#!/usr/bin/perl # The LearningOnline Network # lonsql - LON TCP-NSDL Query Handler. # # $Id: lonsql,v 1.8 2005/11/25 21:18:35 www Exp $ # # Copyright Michigan State University Board of Trustees # # This file is part of the LearningOnline Network with CAPA (LON-CAPA). # # LON-CAPA is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # LON-CAPA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LON-CAPA; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # /home/httpd/html/adm/gpl.txt # # http://www.lon-capa.org/ # =pod =head1 NAME lonsql - LON TCP-MySQL-Server Daemon for handling database requests. =head1 SYNOPSIS This script should be run as user=www. Note that a lonsql.pid file contains the pid of the parent process. =head1 OVERVIEW =head2 Purpose within LON-CAPA LON-CAPA is meant to distribute A LOT of educational content to A LOT of people. It is ineffective to directly rely on contents within the ext2 filesystem to be speedily scanned for on-the-fly searches of content descriptions. (Simply put, it takes a cumbersome amount of time to open, read, analyze, and close thousands of files.) The solution is to index various data fields that are descriptive of the educational resources on a LON-CAPA server machine in a database. Descriptive data fields are referred to as "metadata". The question then arises as to how this metadata is handled in terms of the rest of the LON-CAPA network without burdening client and daemon processes. The obvious solution, using lonc to send a query to a lond process, doesn't work so well in general as you can see in the following example: lonc= loncapa client process A-lonc= a lonc process on Server A lond= loncapa daemon process database command A-lonc --------TCP/IP----------------> B-lond The problem emerges that A-lonc and B-lond are kept waiting for the MySQL server to "do its stuff", or in other words, perform the conceivably sophisticated, data-intensive, time-sucking database transaction. By tying up a lonc and lond process, this significantly cripples the capabilities of LON-CAPA servers. The solution is to offload the work onto another process, and use lonc and lond just for requests and notifications of completed processing: database command A-lonc ---------TCP/IP-----------------> B-lond =====> B-lonsql <---------------------------------/ | "ok, I'll get back to you..." | | / A-lond <------------------------------- B-lonc <====== "Guess what? I have the result!" Of course, depending on success or failure, the messages may vary, but the principle remains the same where a separate pool of children processes (lonsql's) handle the MySQL database manipulations. Thus, lonc and lond spend effectively no time waiting on results from the database. =head1 Internals =over 4 =cut use strict; use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; use LONCAPA::lonmetadata(); use IO::Socket; use Symbol; use POSIX; use IO::Select; use IO::File; use Socket; use Fcntl; use Tie::RefHash; use HTML::LCParser(); use LWP::UserAgent(); use HTTP::Headers; use HTTP::Date; use File::Find; use localenroll; ######################################################## ######################################################## =pod =item Variables required for forking =over 4 =item $MAX_CLIENTS_PER_CHILD The number of clients each child should process. =item %children The keys to %children are the current child process IDs =item $children The current number of children =back =cut ######################################################## ######################################################## my $MAX_CLIENTS_PER_CHILD = 5; # number of clients each child should process my %children = (); # keys are current child process IDs my $children = 0; # current number of children ################################################################### ################################################################### =pod =item Main body of code. =over 4 =item Read data from loncapa_apache.conf and loncapa.conf. =item Ensure we can access the database. =item Determine if there are other instances of lonsql running. =item Read the hosts file. =item Create a socket for lonsql. =item Fork once and dissociate from parent. =item Write PID to disk. =item Prefork children and maintain the population of children. =back =cut ################################################################### ################################################################### my $childmaxattempts=10; my $run =0; # running counter to generate the query-id # # Read loncapa_apache.conf and loncapa.conf # my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); my %perlvar=%{$perlvarref}; # # Write the /home/www/.my.cnf file my $conf_file = '/home/www/.my.cnf'; if (! -e $conf_file) { if (open MYCNF, ">$conf_file") { print MYCNF <<"ENDMYCNF"; [client] user=www password=$perlvar{'lonSqlAccess'} ENDMYCNF close MYCNF; } else { warn "Unable to write $conf_file, continuing"; } } # # Check if other instance running # my $pidfile="$perlvar{'lonDaemons'}/logs/lonsql.pid"; if (-e $pidfile) { my $lfh=IO::File->new("$pidfile"); my $pide=<$lfh>; chomp($pide); if (kill 0 => $pide) { die "already running"; } } # # Read hosts file # my $thisserver; my $PREFORK=4; # number of children to maintain, at least four spare open (CONFIG,"$perlvar{'lonTabDir'}/hosts.tab") || die "Can't read host file"; while (my $configline=) { my ($id,$domain,$role,$name)=split(/:/,$configline); $name=~s/\s//g; $thisserver=$name if ($id eq $perlvar{'lonHostID'}); #$PREFORK++; } close(CONFIG); # #$PREFORK=int($PREFORK/4); # # Create a socket to talk to lond # my $unixsock = "mysqlsock"; my $localfile="$perlvar{'lonSockDir'}/$unixsock"; my $server; unlink ($localfile); unless ($server=IO::Socket::UNIX->new(Local =>"$localfile", Type => SOCK_STREAM, Listen => 10)) { print "in socket error:$@\n"; } # # Fork once and dissociate # my $fpid=fork; exit if $fpid; die "Couldn't fork: $!" unless defined ($fpid); POSIX::setsid() or die "Can't start new session: $!"; # # Write our PID on disk my $execdir=$perlvar{'lonDaemons'}; open (PIDSAVE,">$execdir/logs/lonsql.pid"); print PIDSAVE "$$\n"; close(PIDSAVE); &logthis("CRITICAL: ---------- Starting ----------"); # # Ignore signals generated during initial startup $SIG{HUP}=$SIG{USR1}='IGNORE'; # Now we are on our own # Fork off our children. for (1 .. $PREFORK) { make_new_child(); } # # Install signal handlers. $SIG{CHLD} = \&REAPER; $SIG{INT} = $SIG{TERM} = \&HUNTSMAN; $SIG{HUP} = \&HUPSMAN; # # And maintain the population. while (1) { sleep; # wait for a signal (i.e., child's death) for (my $i = $children; $i < $PREFORK; $i++) { make_new_child(); # top up the child pool } } ######################################################## ######################################################## =pod =item &make_new_child Inputs: None Returns: None =cut ######################################################## ######################################################## sub make_new_child { my $pid; my $sigset; # # block signal for fork $sigset = POSIX::SigSet->new(SIGINT); sigprocmask(SIG_BLOCK, $sigset) or die "Can't block SIGINT for fork: $!\n"; # die "fork: $!" unless defined ($pid = fork); # if ($pid) { # Parent records the child's birth and returns. sigprocmask(SIG_UNBLOCK, $sigset) or die "Can't unblock SIGINT for fork: $!\n"; $children{$pid} = 1; $children++; return; } else { # Child can *not* return from this subroutine. $SIG{INT} = 'DEFAULT'; # make SIGINT kill us as it did before # unblock signals sigprocmask(SIG_UNBLOCK, $sigset) or die "Can't unblock SIGINT for fork: $!\n"; $SIG{TERM}=$SIG{INT}=$SIG{QUIT}=$SIG{__DIE__}=\&DISCONNECT; # handle connections until we've reached $MAX_CLIENTS_PER_CHILD for (my $i=0; $i < $MAX_CLIENTS_PER_CHILD; $i++) { my $client = $server->accept() or last; # do something with the connection $run = $run+1; my $userinput = <$client>; chomp($userinput); # my ($conserver,$query, $arg1,$arg2,$arg3)=split(/&/,$userinput); my $query=unescape($query); # #send query id which is pid_unixdatetime_runningcounter my $queryid = $thisserver; $queryid .="_".($$)."_"; $queryid .= time."_"; $queryid .= $run; print $client "$queryid\n"; # # &logthis("QUERY: $query - $arg1 - $arg2 - $arg3"); sleep 1; # my $result=''; # # At this point, query is received, query-ID assigned and sent # back, $query eq 'logquery' will mean that this is a query # against log-files if (($query eq 'userlog') || ($query eq 'courselog')) { # beginning of log query my $udom = &unescape($arg1); my $uname = &unescape($arg2); my $command = &unescape($arg3); my $path = &propath($udom,$uname); if (-e "$path/activity.log") { if ($query eq 'userlog') { $result=&userlog($path,$command); } else { $result=&courselog($path,$command); } } else { &logthis('Unable to do log query: '.$uname.'@'.$udom); $result='no_such_file'; } # end of log query } elsif ($query eq 'fetchenrollment') { # retrieve institutional class lists my $dom = &unescape($arg1); my %affiliates = (); my %replies = (); my $locresult = ''; my $querystr = &unescape($arg3); foreach (split/%%/,$querystr) { if (/^([^=]+)=([^=]+)$/) { @{$affiliates{$1}} = split/,/,$2; } } $locresult = &localenroll::fetch_enrollment($dom,\%affiliates,\%replies); $result = &escape($locresult.':'); if ($locresult) { $result .= &escape(join(':',map{$_.'='.$replies{$_}} keys %replies)); } } elsif ($query eq 'prepare activity log') { my ($cid,$domain) = map {&unescape($_);} ($arg1,$arg2); &logthis('preparing activity log tables for '.$cid); my $command = qq{$perlvar{'lonDaemons'}/parse_activity_log.pl -course=$cid -domain=$domain}; system($command); &logthis($command); my $returnvalue = $?>>8; if ($returnvalue) { $result = 'error: parse_activity_log.pl returned '. $returnvalue; } else { $result = 'success'; } } else { # Do an sql query $result = &nsdl_query($query,$arg1,$arg2); } # result does not need to be escaped because it has already been # escaped. #$result=&escape($result); &reply("queryreply:$queryid:$result",$conserver); } # tidy up gracefully and finish # # this exit is VERY important, otherwise the child will become # a producer of more and more children, forking yourself into # process death. exit; } } ######################################################## =pod =item &logthis Inputs: $message, the message to log Returns: nothing Writes $message to the logfile. =cut ######################################################## ######################################################## sub logthis { my $message=shift; my $execdir=$perlvar{'lonDaemons'}; my $fh=IO::File->new(">>$execdir/logs/lonsql.log"); my $now=time; my $local=localtime($now); print $fh "$local ($$): $message\n"; } # -------------------------------------------------- Non-critical communication ######################################################## ######################################################## =pod =item &subreply Sends a command to a server. Called only by &reply. Inputs: $cmd,$server Returns: The results of the message or 'con_lost' on error. =cut ######################################################## ######################################################## sub subreply { my ($cmd,$server)=@_; my $peerfile="$perlvar{'lonSockDir'}/$server"; my $sclient=IO::Socket::UNIX->new(Peer =>"$peerfile", Type => SOCK_STREAM, Timeout => 10) or return "con_lost"; print $sclient "$cmd\n"; my $answer=<$sclient>; chomp($answer); $answer="con_lost" if (!$answer); return $answer; } ######################################################## ######################################################## =pod =item &reply Sends a command to a server. Inputs: $cmd,$server Returns: The results of the message or 'con_lost' on error. =cut ######################################################## ######################################################## sub reply { my ($cmd,$server)=@_; my $answer; if ($server ne $perlvar{'lonHostID'}) { $answer=subreply($cmd,$server); if ($answer eq 'con_lost') { $answer=subreply("ping",$server); $answer=subreply($cmd,$server); } } else { $answer='self_reply'; $answer=subreply($cmd,$server); } return $answer; } ######################################################## ######################################################## =pod =item &escape Escape special characters in a string. Inputs: string to escape Returns: The input string with special characters escaped. =cut ######################################################## ######################################################## sub escape { my $str=shift; $str =~ s/(\W)/"%".unpack('H2',$1)/eg; return $str; } ######################################################## ######################################################## =pod =item &unescape Unescape special characters in a string. Inputs: string to unescape Returns: The input string with special characters unescaped. =cut ######################################################## ######################################################## sub unescape { my $str=shift; $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; return $str; } ######################################################## ######################################################## =pod =item &ishome Determine if the current machine is the home server for a user. The determination is made by checking the filesystem for the users information. Inputs: $author Returns: 0 - this is not the authors home server, 1 - this is. =cut ######################################################## ######################################################## sub ishome { my $author=shift; $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/; my ($udom,$uname)=split(/\//,$author); my $proname=propath($udom,$uname); if (-e $proname) { return 1; } else { return 0; } } ######################################################## ######################################################## =pod =item &propath Inputs: user name, user domain Returns: The full path to the users directory. =cut ######################################################## ######################################################## sub propath { my ($udom,$uname)=@_; $udom=~s/\W//g; $uname=~s/\W//g; my $subdir=$uname.'__'; $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/; my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname"; return $proname; } ######################################################## ######################################################## =pod =item &courselog Inputs: $path, $command Returns: unescaped string of values. =cut ######################################################## ######################################################## sub courselog { my ($path,$command)=@_; my %filters=(); foreach (split(/\:/,&unescape($command))) { my ($name,$value)=split(/\=/,$_); $filters{$name}=$value; } my @results=(); open(IN,$path.'/activity.log') or return ('file_error'); while (my $line=) { chomp($line); my ($timestamp,$host,$log)=split(/\:/,$line); # # $log has the actual log entries; currently still escaped, and # %26(timestamp)%3a(url)%3a(user)%3a(domain) # then additionally # %3aPOST%3a(name)%3d(value)%3a(name)%3d(value) # or # %3aCSTORE%3a(name)%3d(value)%26(name)%3d(value) # # get delimiter between timestamped entries to be &&& $log=~s/\%26(\d+)\%3a/\&\&\&$1\%3a/g; # now go over all log entries foreach (split(/\&\&\&/,&unescape($log))) { my ($time,$res,$uname,$udom,$action,@values)=split(/\:/,$_); my $values=&unescape(join(':',@values)); $values=~s/\&/\:/g; $res=&unescape($res); my $include=1; if (($filters{'username'}) && ($uname ne $filters{'username'})) { $include=0; } if (($filters{'domain'}) && ($udom ne $filters{'domain'})) { $include=0; } if (($filters{'url'}) && ($res!~/$filters{'url'}/)) { $include=0; } if (($filters{'start'}) && ($time<$filters{'start'})) { $include=0; } if (($filters{'end'}) && ($time>$filters{'end'})) { $include=0; } if (($filters{'action'} eq 'view') && ($action)) { $include=0; } if (($filters{'action'} eq 'submit') && ($action ne 'POST')) { $include=0; } if (($filters{'action'} eq 'grade') && ($action ne 'CSTORE')) { $include=0; } if ($include) { push(@results,($time<1000000000?'0':'').$time.':'.$res.':'. $uname.':'.$udom.':'. $action.':'.$values); } } } close IN; return join('&',sort(@results)); } ######################################################## ######################################################## =pod =item &userlog Inputs: $path, $command Returns: unescaped string of values. =cut ######################################################## ######################################################## sub userlog { my ($path,$command)=@_; my %filters=(); foreach (split(/\:/,&unescape($command))) { my ($name,$value)=split(/\=/,$_); $filters{$name}=$value; } my @results=(); open(IN,$path.'/activity.log') or return ('file_error'); while (my $line=) { chomp($line); my ($timestamp,$host,$log)=split(/\:/,$line); $log=&unescape($log); my $include=1; if (($filters{'start'}) && ($timestamp<$filters{'start'})) { $include=0; } if (($filters{'end'}) && ($timestamp>$filters{'end'})) { $include=0; } if (($filters{'action'} eq 'log') && ($log!~/^Log/)) { $include=0; } if (($filters{'action'} eq 'check') && ($log!~/^Check/)) { $include=0; } if ($include) { push(@results,$timestamp.':'.$log); } } close IN; return join('&',sort(@results)); } ######################################################## ######################################################## =pod =item Functions required for forking =over 4 =item REAPER REAPER takes care of dead children. =item HUNTSMAN Signal handler for SIGINT. =item HUPSMAN Signal handler for SIGHUP =item DISCONNECT Disconnects from database. =back =cut ######################################################## ######################################################## sub REAPER { # takes care of dead children $SIG{CHLD} = \&REAPER; my $pid = wait; $children --; &logthis("Child $pid died"); delete $children{$pid}; } sub HUNTSMAN { # signal handler for SIGINT local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children kill 'INT' => keys %children; my $execdir=$perlvar{'lonDaemons'}; unlink("$execdir/logs/lonsql.pid"); &logthis("CRITICAL: Shutting down"); $unixsock = "mysqlsock"; my $port="$perlvar{'lonSockDir'}/$unixsock"; unlink($port); exit; # clean up with dignity } sub HUPSMAN { # signal handler for SIGHUP local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children kill 'INT' => keys %children; close($server); # free up socket &logthis("CRITICAL: Restarting"); my $execdir=$perlvar{'lonDaemons'}; $unixsock = "mysqlsock"; my $port="$perlvar{'lonSockDir'}/$unixsock"; unlink($port); exec("$execdir/lonsql"); # here we go again } # # Takes SQL query # sends it to NSDL # sub nsdl_query { my $query=shift; my ($keyword)=($query=~/\"\%([^\%]+)\%\"/); $keyword=&escape($keyword); my $url='http://search.nsdl.org?verb=Search&s=0&n=500&q=-link.primaryCollection:oai\:nsdl.org\:nsdl.nsdl\:00254%20'.$keyword; my $ua=new LWP::UserAgent; my $response=$ua->get($url); my $parser=HTML::LCParser->new(\$response->content); my $is=''; my $cont=''; my $token; my %result=(); my $allresults=''; while ($token=$parser->get_token) { if ($token->[0] eq 'T') { $cont.=$token->[1]; } elsif ($token->[0] eq 'S') { if ($token->[1] eq 'record') { %result=(); } elsif ($token->[1]=~/^dc\:/) { $is=$token->[1]; $cont=''; } } elsif ($token->[0] eq 'E') { if ($token->[1] eq 'record') { # # Now store it away # my $url=$result{'dc:identifier'}; if ($url=~/^http\:/) { $url=~s/^http:\//\/ext/; } else { $url=''; } if ($url) { my ($mime)=($url=~/\.(\w+)$/); $mime=~tr/A-Z/a-z/; $allresults.='&'. &escape($result{'dc:title'}).','. &escape($result{'dc:creator'}).','. &escape($result{'dc:subject'}).','. &escape($url).',,,,'. &escape($result{'dc:description'}).','. &escape($mime).',seniso,,,,public,nsdl,,,,,,,,,,,,,,,,,,,,,,,,,,,,'; } %result=(); } elsif ($token->[1]=~/^dc\:/) { $result{$is}=$cont; } } } $allresults=~s/^\&//; &logthis($allresults); return $allresults; } =pod =back =cut