Annotation of loncom/lonc, revision 1.31

1.1       albertel    1: #!/usr/bin/perl
                      2: 
                      3: # The LearningOnline Network
                      4: # lonc - LON TCP-Client Domain-Socket-Server
                      5: # provides persistent TCP connections to the other servers in the network
                      6: # through multiplexed domain sockets
                      7: #
1.31    ! harris41    8: # $Id: lonc,v 1.30 2002/02/25 20:43:15 www Exp $
1.22      www         9: #
                     10: # Copyright Michigan State University Board of Trustees
                     11: #
                     12: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
                     13: #
                     14: # LON-CAPA is free software; you can redistribute it and/or modify
                     15: # it under the terms of the GNU General Public License as published by
                     16: # the Free Software Foundation; either version 2 of the License, or
                     17: # (at your option) any later version.
                     18: #
                     19: # LON-CAPA is distributed in the hope that it will be useful,
                     20: # but WITHOUT ANY WARRANTY; without even the implied warranty of
                     21: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     22: # GNU General Public License for more details.
                     23: #
                     24: # You should have received a copy of the GNU General Public License
                     25: # along with LON-CAPA; if not, write to the Free Software
                     26: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
                     27: #
                     28: # /home/httpd/html/adm/gpl.txt
                     29: #
                     30: # http://www.lon-capa.org/
                     31: #
1.1       albertel   32: # PID in subdir logs/lonc.pid
                     33: # kill kills
                     34: # HUP restarts
                     35: # USR1 tries to open connections again
                     36: 
1.2       www        37: # 6/4/99,6/5,6/7,6/8,6/9,6/10,6/11,6/12,7/14,7/19,
1.5       www        38: # 10/8,10/9,10/15,11/18,12/22,
1.10      www        39: # 2/8,7/25 Gerd Kortemeyer
                     40: # 12/05 Scott Harrison
                     41: # 12/05 Gerd Kortemeyer
1.23      harris41   42: # YEAR=2001
1.14      www        43: # 01/10/01 Scott Harrison
1.21      www        44: # 03/14/01,03/15,06/12,11/26,11/27,11/28 Gerd Kortemeyer
1.23      harris41   45: # 12/20 Scott Harrison
1.26      www        46: # YEAR=2002
1.29      www        47: # 2/19/02,02/22/02,02/25/02 Gerd Kortemeyer
1.10      www        48: # 
1.1       albertel   49: # based on nonforker from Perl Cookbook
                     50: # - server who multiplexes without forking
                     51: 
                     52: use POSIX;
                     53: use IO::Socket;
                     54: use IO::Select;
                     55: use IO::File;
                     56: use Socket;
                     57: use Fcntl;
                     58: use Tie::RefHash;
                     59: use Crypt::IDEA;
1.26      www        60: use Net::Ping;
                     61: use LWP::UserAgent();
1.1       albertel   62: 
1.30      www        63: $status='';
                     64: $lastlog='';
                     65: $conserver='SHELL';
1.26      www        66: 
1.8       harris41   67: # -------------------------------- Set signal handlers to record abnormal exits
                     68: 
1.29      www        69: &status("Init exception handlers");
1.26      www        70: $SIG{QUIT}=\&catchexception;
1.8       harris41   71: $SIG{__DIE__}=\&catchexception;
                     72: 
1.1       albertel   73: # ------------------------------------ Read httpd access.conf and get variables
1.29      www        74: &status("Read access.conf");
1.11      harris41   75: open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf";
1.1       albertel   76: 
                     77: while ($configline=<CONFIG>) {
                     78:     if ($configline =~ /PerlSetVar/) {
                     79: 	my ($dummy,$varname,$varvalue)=split(/\s+/,$configline);
1.4       www        80:         chomp($varvalue);
1.1       albertel   81:         $perlvar{$varname}=$varvalue;
                     82:     }
                     83: }
                     84: close(CONFIG);
1.7       www        85: 
1.13      harris41   86: # ----------------------------- Make sure this process is running from user=www
1.29      www        87: &status("Check user ID");
1.13      harris41   88: my $wwwid=getpwnam('www');
                     89: if ($wwwid!=$<) {
                     90:    $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
                     91:    $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
1.14      www        92:    system("echo 'User ID mismatch.  lonc must be run as user www.' |\
1.13      harris41   93:  mailto $emailto -s '$subj' > /dev/null");
                     94:    exit 1;
                     95: }
                     96: 
1.7       www        97: # --------------------------------------------- Check if other instance running
                     98: 
                     99: my $pidfile="$perlvar{'lonDaemons'}/logs/lonc.pid";
                    100: 
                    101: if (-e $pidfile) {
                    102:    my $lfh=IO::File->new("$pidfile");
                    103:    my $pide=<$lfh>;
                    104:    chomp($pide);
1.11      harris41  105:    if (kill 0 => $pide) { die "already running"; }
1.7       www       106: }
1.1       albertel  107: 
                    108: # ------------------------------------------------------------- Read hosts file
                    109: 
1.11      harris41  110: open (CONFIG,"$perlvar{'lonTabDir'}/hosts.tab") || die "Can't read host file";
1.1       albertel  111: 
                    112: while ($configline=<CONFIG>) {
                    113:     my ($id,$domain,$role,$name,$ip)=split(/:/,$configline);
                    114:     chomp($ip);
1.28      www       115:     if ($ip) {
                    116:      $hostip{$id}=$ip;
                    117:      $hostname{$id}=$name;
                    118:     }
1.1       albertel  119: }
1.27      www       120: 
1.1       albertel  121: close(CONFIG);
                    122: 
                    123: # -------------------------------------------------------- Routines for forking
                    124: 
                    125: %children               = ();       # keys are current child process IDs,
                    126:                                     # values are hosts
                    127: %childpid               = ();       # the other way around
                    128: 
                    129: %childatt               = ();       # number of attempts to start server
                    130:                                     # for ID
                    131: 
1.30      www       132: $childmaxattempts=5;
1.3       www       133: 
1.1       albertel  134: # ---------------------------------------------------- Fork once and dissociate
1.29      www       135: &status("Fork and dissociate");
1.1       albertel  136: $fpid=fork;
                    137: exit if $fpid;
1.11      harris41  138: die "Couldn't fork: $!" unless defined ($fpid);
1.1       albertel  139: 
1.11      harris41  140: POSIX::setsid() or die "Can't start new session: $!";
1.1       albertel  141: 
1.30      www       142: $conserver='PARENT';
                    143: 
1.1       albertel  144: # ------------------------------------------------------- Write our PID on disk
1.29      www       145: &status("Write PID");
1.1       albertel  146: $execdir=$perlvar{'lonDaemons'};
                    147: open (PIDSAVE,">$execdir/logs/lonc.pid");
                    148: print PIDSAVE "$$\n";
                    149: close(PIDSAVE);
1.5       www       150: &logthis("<font color=red>CRITICAL: ---------- Starting ----------</font>");
1.1       albertel  151: 
                    152: # ----------------------------- Ignore signals generated during initial startup
                    153: $SIG{HUP}=$SIG{USR1}='IGNORE';
                    154: # ------------------------------------------------------- Now we are on our own
                    155:     
                    156: # Fork off our children, one for every server
                    157: 
1.18      www       158: &status("Forking ...");
                    159: 
1.1       albertel  160: foreach $thisserver (keys %hostip) {
1.26      www       161:     if (&online($hostname{$thisserver})) {
                    162:        make_new_child($thisserver);
                    163:     }
1.1       albertel  164: }
                    165: 
                    166: &logthis("Done starting initial servers");
                    167: # ----------------------------------------------------- Install signal handlers
                    168: 
                    169: $SIG{CHLD} = \&REAPER;
                    170: $SIG{INT}  = $SIG{TERM} = \&HUNTSMAN;
                    171: $SIG{HUP}  = \&HUPSMAN;
                    172: $SIG{USR1} = \&USRMAN;
                    173: 
                    174: # And maintain the population.
                    175: while (1) {
1.18      www       176:     &status("Sleeping");
1.1       albertel  177:     sleep;                          # wait for a signal (i.e., child's death)
                    178:                                     # See who died and start new one
1.18      www       179:     &status("Woke up");
1.30      www       180:     my $skipping='';
1.1       albertel  181:     foreach $thisserver (keys %hostip) {
                    182:         if (!$childpid{$thisserver}) {
1.26      www       183: 	    if (($childatt{$thisserver}<$childmaxattempts) &&
                    184:                 (&online($hostname{$thisserver}))) {
1.6       www       185: 	       $childatt{$thisserver}++;
1.5       www       186:                &logthis(
                    187:    "<font color=yellow>INFO: Trying to reconnect for $thisserver "
1.30      www       188:   ."($childatt{$thisserver} of $childmaxattempts attempts)</font>"); 
1.1       albertel  189:                make_new_child($thisserver);
1.26      www       190: 	   } else {
1.30      www       191:                $skipping.=$thisserver.' ';
1.26      www       192:            } 
                    193:                
1.1       albertel  194:         }       
                    195:     }
1.30      www       196:     if ($skipping) { 
                    197:        &logthis("<font color=blue>WARNING: Skipped $skipping</font>");
                    198:     }
1.1       albertel  199: }
                    200: 
                    201: 
                    202: sub make_new_child {
                    203:    
1.30      www       204:     $newserver=shift;
1.1       albertel  205:     my $pid;
                    206:     my $sigset;
1.30      www       207:     &logthis("Attempting to start child for server $newserver");
1.1       albertel  208:     # block signal for fork
                    209:     $sigset = POSIX::SigSet->new(SIGINT);
                    210:     sigprocmask(SIG_BLOCK, $sigset)
1.11      harris41  211:         or die "Can't block SIGINT for fork: $!\n";
1.1       albertel  212:     
1.11      harris41  213:     die "fork: $!" unless defined ($pid = fork);
1.1       albertel  214:     
                    215:     if ($pid) {
                    216:         # Parent records the child's birth and returns.
                    217:         sigprocmask(SIG_UNBLOCK, $sigset)
1.11      harris41  218:             or die "Can't unblock SIGINT for fork: $!\n";
1.30      www       219:         $children{$pid} = $newserver;
1.1       albertel  220:         $childpid{$conserver} = $pid;
                    221:         return;
                    222:     } else {
1.30      www       223:         $conserver=$newserver;
1.1       albertel  224:         # Child can *not* return from this subroutine.
                    225:         $SIG{INT} = 'DEFAULT';      # make SIGINT kill us as it did before
1.18      www       226:         $SIG{USR1}= \&logstatus;
                    227:    
1.1       albertel  228:         # unblock signals
                    229:         sigprocmask(SIG_UNBLOCK, $sigset)
1.11      harris41  230:             or die "Can't unblock SIGINT for fork: $!\n";
1.1       albertel  231: 
                    232: # ----------------------------- This is the modified main program of non-forker
                    233: 
                    234: $port = "$perlvar{'lonSockDir'}/$conserver";
                    235: 
                    236: unlink($port);
1.18      www       237: 
1.29      www       238: # -------------------------------------------------------------- Open other end
1.1       albertel  239: 
1.29      www       240: &openremote($conserver);
1.1       albertel  241: 
1.3       www       242: # ----------------------------------------- We're online, send delayed messages
1.18      www       243:     &status("Checking for delayed messages");
1.4       www       244:     my @allbuffered;
1.3       www       245:     my $path="$perlvar{'lonSockDir'}/delayed";
1.4       www       246:     opendir(DIRHANDLE,$path);
                    247:     @allbuffered=grep /\.$conserver$/, readdir DIRHANDLE;
                    248:     closedir(DIRHANDLE);
1.3       www       249:     my $dfname;
1.23      harris41  250:     foreach (@allbuffered) {
1.30      www       251:         &status("Sending delayed: $_");
1.4       www       252:         $dfname="$path/$_";
1.29      www       253:         &logthis('Sending '.$dfname);
1.3       www       254:         my $wcmd;
                    255:         {
                    256:          my $dfh=IO::File->new($dfname);
1.4       www       257:          $cmd=<$dfh>;
1.3       www       258:         }
                    259:         chomp($cmd);
                    260:         my $bcmd=$cmd;
                    261:         if ($cmd =~ /^encrypt\:/) {
                    262: 	    my $rcmd=$cmd;
                    263:             $rcmd =~ s/^encrypt\://;
                    264:             chomp($rcmd);
                    265:             my $cmdlength=length($rcmd);
                    266:             $rcmd.="         ";
                    267:             my $encrequest='';
                    268:             for (my $encidx=0;$encidx<=$cmdlength;$encidx+=8) {
                    269:                 $encrequest.=
                    270:                     unpack("H16",$cipher->encrypt(substr($rcmd,$encidx,8)));
                    271:             }
                    272:             $cmd="enc:$cmdlength:$encrequest\n";
                    273:         }
1.20      www       274:     $SIG{ALRM}=sub { die "timeout" };
                    275:     $SIG{__DIE__}='DEFAULT';
                    276:     eval {
                    277:         alarm(60);
1.3       www       278:         print $remotesock "$cmd\n";
                    279:         $answer=<$remotesock>;
                    280: 	chomp($answer);
1.20      www       281:         alarm(0);
                    282:     };
                    283:     $SIG{ALRM}='DEFAULT';
                    284:     $SIG{__DIE__}=\&catchexception;
                    285: 
                    286:         if (($answer ne '') && ($@!~/timeout/)) {
1.3       www       287: 	    unlink("$dfname");
1.30      www       288:             &logthis("Delayed $cmd: >$answer<");
1.3       www       289:             &logperm("S:$conserver:$bcmd");
                    290:         }        
1.23      harris41  291:     }
1.1       albertel  292: 
                    293: # ------------------------------------------------------- Listen to UNIX socket
1.30      www       294: &status("Opening socket");
1.1       albertel  295: unless (
                    296:   $server = IO::Socket::UNIX->new(Local  => $port,
                    297:                                   Type   => SOCK_STREAM,
                    298:                                   Listen => 10 )
1.5       www       299:    ) { 
                    300:        my $st=120+int(rand(240));
                    301:        &logthis(
                    302:          "<font color=blue>WARNING: ".
1.30      www       303:          "Can't make server socket ($st secs): $@</font>");
1.5       www       304:        sleep($st);
1.1       albertel  305:        exit; 
                    306:      };
                    307: 
                    308: # -----------------------------------------------------------------------------
                    309: 
1.5       www       310: &logthis("<font color=green>$conserver online</font>");
                    311: 
                    312: # -----------------------------------------------------------------------------
1.1       albertel  313: # begin with empty buffers
                    314: %inbuffer  = ();
                    315: %outbuffer = ();
                    316: %ready     = ();
                    317: 
                    318: tie %ready, 'Tie::RefHash';
                    319: 
                    320: nonblock($server);
                    321: $select = IO::Select->new($server);
                    322: 
                    323: # Main loop: check reads/accepts, check writes, check ready to process
                    324: while (1) {
                    325:     my $client;
                    326:     my $rv;
                    327:     my $data;
                    328: 
                    329:     # check for new information on the connections we have
                    330: 
                    331:     # anything to read or accept?
1.16      www       332:     foreach $client ($select->can_read(0.1)) {
1.1       albertel  333: 
                    334:         if ($client == $server) {
                    335:             # accept a new connection
1.18      www       336:             &status("Accept new connection: $conserver");
1.1       albertel  337:             $client = $server->accept();
                    338:             $select->add($client);
                    339:             nonblock($client);
                    340:         } else {
                    341:             # read data
                    342:             $data = '';
                    343:             $rv   = $client->recv($data, POSIX::BUFSIZ, 0);
                    344: 
                    345:             unless (defined($rv) && length $data) {
                    346:                 # This would be the end of file, so close the client
                    347:                 delete $inbuffer{$client};
                    348:                 delete $outbuffer{$client};
                    349:                 delete $ready{$client};
                    350: 
1.30      www       351:                 &status("Idle");
1.1       albertel  352:                 $select->remove($client);
                    353:                 close $client;
                    354:                 next;
                    355:             }
                    356: 
                    357:             $inbuffer{$client} .= $data;
                    358: 
                    359:             # test whether the data in the buffer or the data we
                    360:             # just read means there is a complete request waiting
                    361:             # to be fulfilled.  If there is, set $ready{$client}
                    362:             # to the requests waiting to be fulfilled.
                    363:             while ($inbuffer{$client} =~ s/(.*\n)//) {
                    364:                 push( @{$ready{$client}}, $1 );
                    365:             }
                    366:         }
                    367:     }
                    368: 
                    369:     # Any complete requests to process?
                    370:     foreach $client (keys %ready) {
1.25      albertel  371:         handle($client);
1.1       albertel  372:     }
                    373: 
                    374:     # Buffers to flush?
                    375:     foreach $client ($select->can_write(1)) {
                    376:         # Skip this client if we have nothing to say
                    377:         next unless exists $outbuffer{$client};
1.25      albertel  378: 
1.1       albertel  379:         $rv = $client->send($outbuffer{$client}, 0);
1.29      www       380: 
1.30      www       381:       unless ($outbuffer{$client} eq "con_lost\n") {
1.1       albertel  382:         unless (defined $rv) {
                    383:             # Whine, but move on.
1.15      www       384:             &logthis("I was told I could write, but I can't.\n");
1.1       albertel  385:             next;
                    386:         }
1.15      www       387:         $errno=$!;
1.1       albertel  388:         if (($rv == length $outbuffer{$client}) ||
1.15      www       389:             ($errno == POSIX::EWOULDBLOCK) || ($errno == 0)) {
1.1       albertel  390:             substr($outbuffer{$client}, 0, $rv) = '';
                    391:             delete $outbuffer{$client} unless length $outbuffer{$client};
                    392:         } else {
                    393:             # Couldn't write all the data, and it wasn't because
                    394:             # it would have blocked.  Shutdown and move on.
1.15      www       395: 
                    396: 	    &logthis("Dropping data with ".$errno.": ".
                    397:                      length($outbuffer{$client}).", $rv");
                    398: 
1.1       albertel  399:             delete $inbuffer{$client};
                    400:             delete $outbuffer{$client};
                    401:             delete $ready{$client};
                    402: 
                    403:             $select->remove($client);
                    404:             close($client);
                    405:             next;
                    406:         }
1.29      www       407:       } else {
                    408: # -------------------------------------------------------- Wow, connection lost
                    409:          &logthis(
1.30      www       410:      "<font color=red>CRITICAL: Closing connection</font>");
                    411: 	 &status("Connection lost");
1.29      www       412:          $remotesock->shutdown(2);
                    413:          &logthis("Attempting to open new connection");
                    414:          &openremote($conserver);          
                    415:       }
1.1       albertel  416:     }
1.29      www       417:    
1.1       albertel  418: }
                    419: }
1.25      albertel  420: 
1.1       albertel  421: # ------------------------------------------------------- End of make_new_child
                    422: 
                    423: # handle($socket) deals with all pending requests for $client
                    424: sub handle {
                    425:     # requests are in $ready{$client}
                    426:     # send output to $outbuffer{$client}
                    427:     my $client = shift;
                    428:     my $request;
                    429: 
                    430:     foreach $request (@{$ready{$client}}) {
                    431: # ============================================================= Process request
                    432:         # $request is the text of the request
                    433:         # put text of reply into $outbuffer{$client}
1.29      www       434: # ------------------------------------------------------------ Is this the end?
                    435:         if ($request eq "close_connection_exit\n") {
1.30      www       436: 	    &status("Request close connection");
1.29      www       437:            &logthis(
1.30      www       438:      "<font color=red>CRITICAL: Request Close Connection</font>");
1.29      www       439:            $remotesock->shutdown(2);
                    440:            $server->close();
                    441:            exit;
                    442:         }
1.1       albertel  443: # -----------------------------------------------------------------------------
                    444:         if ($request =~ /^encrypt\:/) {
                    445: 	    my $cmd=$request;
                    446:             $cmd =~ s/^encrypt\://;
                    447:             chomp($cmd);
                    448:             my $cmdlength=length($cmd);
                    449:             $cmd.="         ";
                    450:             my $encrequest='';
                    451:             for (my $encidx=0;$encidx<=$cmdlength;$encidx+=8) {
                    452:                 $encrequest.=
                    453:                     unpack("H16",$cipher->encrypt(substr($cmd,$encidx,8)));
                    454:             }
                    455:             $request="enc:$cmdlength:$encrequest\n";
                    456:         }
1.19      www       457: # --------------------------------------------------------------- Main exchange
                    458:     $SIG{ALRM}=sub { die "timeout" };
                    459:     $SIG{__DIE__}='DEFAULT';
                    460:     eval {
                    461:         alarm(300);
1.30      www       462:         &status("Sending: $request");
1.1       albertel  463:         print $remotesock "$request";
1.18      www       464:         &status("Waiting for reply from $conserver: $request");
1.1       albertel  465:         $answer=<$remotesock>;
1.18      www       466:         &status("Received reply: $request");
1.19      www       467:         alarm(0);
                    468:     };
                    469:     if ($@=~/timeout/) { 
                    470:        $answer='';
                    471:        &logthis(
1.30      www       472:         "<font color=red>CRITICAL: Timeout: $request</font>");
1.19      www       473:     }  
                    474:     $SIG{ALRM}='DEFAULT';
                    475:     $SIG{__DIE__}=\&catchexception;
                    476: 
                    477: 
1.1       albertel  478:         if ($answer) {
                    479: 	   if ($answer =~ /^enc/) {
                    480:                my ($cmd,$cmdlength,$encinput)=split(/:/,$answer);
                    481:                chomp($encinput);
                    482: 	       $answer='';
                    483:                for (my $encidx=0;$encidx<length($encinput);$encidx+=16) {
                    484:                   $answer.=$cipher->decrypt(
                    485:                    pack("H16",substr($encinput,$encidx,16))
                    486:                   );
                    487: 	       }
                    488: 	      $answer=substr($answer,0,$cmdlength);
                    489: 	      $answer.="\n";
                    490: 	   }
                    491:            $outbuffer{$client} .= $answer;
                    492:         } else {
                    493:            $outbuffer{$client} .= "con_lost\n";
                    494:         }
                    495: 
1.30      www       496:      &status("Completed: $request");
                    497: 
1.1       albertel  498: # ===================================================== Done processing request
                    499:     }
                    500:     delete $ready{$client};
                    501: # -------------------------------------------------------------- End non-forker
                    502: }
                    503: # ---------------------------------------------------------- End make_new_child
1.25      albertel  504: }
1.1       albertel  505: 
                    506: # nonblock($socket) puts socket into nonblocking mode
                    507: sub nonblock {
                    508:     my $socket = shift;
                    509:     my $flags;
                    510: 
                    511:     
                    512:     $flags = fcntl($socket, F_GETFL, 0)
1.11      harris41  513:             or die "Can't get flags for socket: $!\n";
1.1       albertel  514:     fcntl($socket, F_SETFL, $flags | O_NONBLOCK)
1.11      harris41  515:             or die "Can't make socket nonblocking: $!\n";
1.29      www       516: }
                    517: 
                    518: 
                    519: sub openremote {
                    520: # ---------------------------------------------------- Client to network server
                    521: 
                    522:     my $conserver=shift;
                    523: 
1.30      www       524: &status("Opening TCP");
1.29      www       525: 
                    526: unless (
                    527:   $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver},
                    528:                                       PeerPort => $perlvar{'londPort'},
                    529:                                       Proto    => "tcp",
                    530:                                       Type     => SOCK_STREAM)
                    531:    ) { 
                    532:        my $st=120+int(rand(240));
                    533:        &logthis(
1.30      www       534: "<font color=blue>WARNING: Couldn't connect ($st secs): $@</font>");
1.29      www       535:        sleep($st);
                    536:        exit; 
                    537:      };
                    538: # ----------------------------------------------------------------- Init dialog
                    539: 
                    540: &status("Init dialogue: $conserver");
                    541: 
                    542:      $SIG{ALRM}=sub { die "timeout" };
                    543:      $SIG{__DIE__}='DEFAULT';
                    544:      eval {
                    545:          alarm(60);
                    546: print $remotesock "init\n";
                    547: $answer=<$remotesock>;
                    548: print $remotesock "$answer";
                    549: $answer=<$remotesock>;
                    550: chomp($answer);
                    551:           alarm(0);
                    552:      };
                    553:      $SIG{ALRM}='DEFAULT';
                    554:      $SIG{__DIE__}=\&catchexception;
                    555:  
                    556:      if ($@=~/timeout/) {
1.30      www       557: 	 &logthis("Timed out during init");
1.29      www       558:          exit;
                    559:      }
                    560: 
                    561: if ($answer ne 'ok') {
1.30      www       562:        &logthis("Init reply: >$answer<");
1.29      www       563:        my $st=120+int(rand(240));
                    564:        &logthis(
1.30      www       565: "<font color=blue>WARNING: Init failed ($st secs)</font>");
1.29      www       566:        sleep($st);
                    567:        exit; 
                    568: }
                    569: 
                    570: sleep 5;
1.30      www       571: &status("Ponging");
1.29      www       572: print $remotesock "pong\n";
                    573: $answer=<$remotesock>;
                    574: chomp($answer);
1.30      www       575: if ($answer!~/^$conserver/) {
                    576:    &logthis("Pong reply: >$answer<");
1.29      www       577: }
                    578: # ----------------------------------------------------------- Initialize cipher
                    579: 
1.30      www       580: &status("Initialize cipher");
1.29      www       581: print $remotesock "ekey\n";
                    582: my $buildkey=<$remotesock>;
                    583: my $key=$conserver.$perlvar{'lonHostID'};
                    584: $key=~tr/a-z/A-Z/;
                    585: $key=~tr/G-P/0-9/;
                    586: $key=~tr/Q-Z/0-9/;
                    587: $key=$key.$buildkey.$key.$buildkey.$key.$buildkey;
                    588: $key=substr($key,0,32);
                    589: my $cipherkey=pack("H32",$key);
                    590: if ($cipher=new IDEA $cipherkey) {
1.30      www       591:    &logthis("Secure connection initialized");
1.29      www       592: } else {
                    593:    my $st=120+int(rand(240));
                    594:    &logthis(
                    595:      "<font color=blue>WARNING: ".
1.30      www       596:      "Could not establish secure connection ($st secs)!</font>");
1.29      www       597:    sleep($st);
                    598:    exit;
                    599: }
                    600: 
1.8       harris41  601: }
1.30      www       602: 
                    603: 
                    604: 
                    605: # grabs exception and records it to log before exiting
                    606: sub catchexception {
                    607:     my ($signal)=@_;
                    608:     $SIG{QUIT}='DEFAULT';
                    609:     $SIG{__DIE__}='DEFAULT';
                    610:     chomp($signal);
                    611:     &logthis("<font color=red>CRITICAL: "
                    612:      ."ABNORMAL EXIT. Child $$ for server [$wasserver] died through "
                    613:      ."\"$signal\" with parameter [$@]</font>");
                    614:     die($@);
                    615: }
                    616: 
                    617: # -------------------------------------- Routines to see if other box available
                    618: 
                    619: sub online {
                    620:     my $host=shift;
                    621:     &status("Pinging ".$host);
                    622:     my $p=Net::Ping->new("tcp",20);
                    623:     my $online=$p->ping("$host");
                    624:     $p->close();
                    625:     undef ($p);
                    626:     return $online;
                    627: }
                    628: 
                    629: sub connected {
                    630:     my ($local,$remote)=@_;
                    631:     &status("Checking connection $local to $remote");
                    632:     $local=~s/\W//g;
                    633:     $remote=~s/\W//g;
                    634: 
                    635:     unless ($hostname{$local}) { return 'local_unknown'; }
                    636:     unless ($hostname{$remote}) { return 'remote_unknown'; }
                    637: 
                    638:     unless (&online($hostname{$local})) { return 'local_offline'; }
                    639: 
                    640:     my $ua=new LWP::UserAgent;
                    641:     
                    642:     my $request=new HTTP::Request('GET',
                    643:       "http://".$hostname{$local}.'/cgi-bin/ping.pl?'.$remote);
                    644: 
                    645:     my $response=$ua->request($request);
                    646: 
                    647:     unless ($response->is_success) { return 'local_error'; }
                    648: 
                    649:     my $reply=$response->content;
                    650:     $reply=(split("\n",$reply))[0];
                    651:     $reply=~s/\W//g;
                    652:     if ($reply ne $remote) { return $reply; }
                    653:     return 'ok';
                    654: }
                    655: 
                    656: 
                    657: sub REAPER {                        # takes care of dead children
                    658:     $SIG{CHLD} = \&REAPER;
                    659:     my $pid = wait;
                    660:     my $wasserver=$children{$pid};
                    661:     &logthis("<font color=red>CRITICAL: "
                    662:      ."Child $pid for server $wasserver died ($childatt{$wasserver})</font>");
                    663:     delete $children{$pid};
                    664:     delete $childpid{$wasserver};
                    665:     my $port = "$perlvar{'lonSockDir'}/$wasserver";
                    666:     unlink($port);
                    667: }
                    668: 
                    669: sub hangup {
                    670:     foreach (keys %children) {
                    671:         $wasserver=$children{$_};
                    672:         &status("Closing $wasserver");
                    673:         &logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver));
                    674:         &status("Kill PID $_ for $wasserver");
                    675: 	kill ('INT',$_);
                    676:     }
                    677: }
                    678: 
                    679: sub HUNTSMAN {                      # signal handler for SIGINT
                    680:     local($SIG{CHLD}) = 'IGNORE';   # we're going to kill our children
                    681:     &hangup();
                    682:     my $execdir=$perlvar{'lonDaemons'};
                    683:     unlink("$execdir/logs/lonc.pid");
                    684:     &logthis("<font color=red>CRITICAL: Shutting down</font>");
                    685:     exit;                           # clean up with dignity
                    686: }
                    687: 
                    688: sub HUPSMAN {                      # signal handler for SIGHUP
                    689:     local($SIG{CHLD}) = 'IGNORE';  # we're going to kill our children
                    690:     &hangup();
                    691:     &logthis("<font color=red>CRITICAL: Restarting</font>");
                    692:     unlink("$execdir/logs/lonc.pid");
                    693:     my $execdir=$perlvar{'lonDaemons'};
                    694:     exec("$execdir/lonc");         # here we go again
                    695: }
                    696: 
                    697: sub checkchildren {
                    698:     &initnewstatus();
                    699:     &logstatus();
                    700:     &logthis('Going to check on the children');
                    701:     foreach (sort keys %children) {
                    702: 	sleep 1;
                    703:         unless (kill 'USR1' => $_) {
                    704: 	    &logthis ('<font color=red>CRITICAL: Child '.$_.' is dead</font>');
                    705:             &logstatus($$.' is dead');
                    706:         } 
                    707:     }
                    708: }
                    709: 
                    710: sub USRMAN {
                    711:     &logthis("USR1: Trying to establish connections again");
                    712:     %childatt=();
                    713:     &checkchildren();
                    714: }
                    715: 
                    716: # -------------------------------------------------- Non-critical communication
                    717: sub subreply { 
                    718:  my ($cmd,$server)=@_;
                    719:  my $answer='';
                    720:  if ($server ne $perlvar{'lonHostID'}) { 
                    721:     my $peerfile="$perlvar{'lonSockDir'}/$server";
                    722:     my $sclient=IO::Socket::UNIX->new(Peer    =>"$peerfile",
                    723:                                       Type    => SOCK_STREAM,
                    724:                                       Timeout => 10)
                    725:        or return "con_lost";
                    726: 
                    727: 
                    728:     $SIG{ALRM}=sub { die "timeout" };
                    729:     $SIG{__DIE__}='DEFAULT';
                    730:     eval {
                    731:      alarm(10);
                    732:      print $sclient "$cmd\n";
                    733:      $answer=<$sclient>;
                    734:      chomp($answer);
                    735:      alarm(0);
                    736:     };
                    737:     if ((!$answer) || ($@=~/timeout/)) { $answer="con_lost"; }
                    738:     $SIG{ALRM}='DEFAULT';
                    739:     $SIG{__DIE__}=\&catchexception;
                    740:  } else { $answer='self_reply'; }
                    741:  return $answer;
                    742: }
                    743: 
                    744: # --------------------------------------------------------------------- Logging
                    745: 
                    746: sub logthis {
                    747:     my $message=shift;
                    748:     my $execdir=$perlvar{'lonDaemons'};
                    749:     my $fh=IO::File->new(">>$execdir/logs/lonc.log");
                    750:     my $now=time;
                    751:     my $local=localtime($now);
                    752:     $lastlog=$local.': '.$message;
                    753:     print $fh "$local ($$) [$conserver] [$status]: $message\n";
                    754: }
                    755: 
                    756: 
                    757: sub logperm {
                    758:     my $message=shift;
                    759:     my $execdir=$perlvar{'lonDaemons'};
                    760:     my $now=time;
                    761:     my $local=localtime($now);
                    762:     my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log");
                    763:     print $fh "$now:$message:$local\n";
                    764: }
                    765: # ------------------------------------------------------------------ Log status
                    766: 
                    767: sub logstatus {
                    768:     my $docdir=$perlvar{'lonDocRoot'};
                    769:     my $fh=IO::File->new(">>$docdir/lon-status/loncstatus.txt");
                    770:     print $fh $$."\t".$conserver."\t".$status."\t".$lastlog."\n";
                    771: }
                    772: 
                    773: sub initnewstatus {
                    774:     my $docdir=$perlvar{'lonDocRoot'};
                    775:     my $fh=IO::File->new(">$docdir/lon-status/loncstatus.txt");
                    776:     my $now=time;
                    777:     my $local=localtime($now);
                    778:     print $fh "LONC status $local - parent $$\n\n";
                    779: }
                    780: 
                    781: # -------------------------------------------------------------- Status setting
                    782: 
                    783: sub status {
                    784:     my $what=shift;
                    785:     my $now=time;
                    786:     my $local=localtime($now);
                    787:     $status=$local.': '.$what;
                    788: }
                    789: 
                    790: 
1.1       albertel  791: 
1.23      harris41  792: # ----------------------------------- POD (plain old documentation, CPAN style)
                    793: 
                    794: =head1 NAME
                    795: 
                    796: lonc - LON TCP-MySQL-Server Daemon for handling database requests.
                    797: 
                    798: =head1 SYNOPSIS
                    799: 
1.31    ! harris41  800: Usage: B<lonc>
        !           801: 
1.23      harris41  802: Should only be run as user=www.  This is a command-line script which
1.31    ! harris41  803: is invoked by B<loncron>.  There is no expectation that a typical user
        !           804: will manually start B<lonc> from the command-line.  (In other words,
        !           805: DO NOT START B<lonc> YOURSELF.)
1.23      harris41  806: 
                    807: =head1 DESCRIPTION
                    808: 
                    809: Provides persistent TCP connections to the other servers in the network
                    810: through multiplexed domain sockets
                    811: 
1.31    ! harris41  812: B<lonc> forks off children processes that correspond to the other servers
        !           813: in the network.  Management of these processes can be done at the
        !           814: parent process level or the child process level.
        !           815: 
        !           816: B<logs/lonc.log> is the location of log messages.
        !           817: 
        !           818: The process management is now explained in terms of linux shell commands,
        !           819: subroutines internal to this code, and signal assignments:
        !           820: 
        !           821: =over 4
        !           822: 
        !           823: =item *
        !           824: 
        !           825: PID is stored in B<logs/lonc.pid>
        !           826: 
        !           827: This is the process id number of the parent B<lonc> process.
        !           828: 
        !           829: =item *
        !           830: 
        !           831: SIGTERM and SIGINT
        !           832: 
        !           833: Parent signal assignment:
        !           834:  $SIG{INT}  = $SIG{TERM} = \&HUNTSMAN;
        !           835: 
        !           836: Child signal assignment:
        !           837:  $SIG{INT}  = 'DEFAULT'; (and SIGTERM is DEFAULT also)
        !           838: (The child dies and a SIGALRM is sent to parent, awaking parent from slumber
        !           839:  to restart a new child.)
        !           840: 
        !           841: Command-line invocations:
        !           842:  B<kill> B<-s> SIGTERM I<PID>
        !           843:  B<kill> B<-s> SIGINT I<PID>
        !           844: 
        !           845: Subroutine B<HUNTSMAN>:
        !           846:  This is only invoked for the B<lonc> parent I<PID>.
        !           847: This kills all the children, and then the parent.
        !           848: The B<lonc.pid> file is cleared.
        !           849: 
        !           850: =item *
        !           851: 
        !           852: SIGHUP
        !           853: 
        !           854: Current bug:
        !           855:  This signal can only be processed the first time
        !           856: on the parent process.  Subsequent SIGHUP signals
        !           857: have no effect.
        !           858: 
        !           859: Parent signal assignment:
        !           860:  $SIG{HUP}  = \&HUPSMAN;
        !           861: 
        !           862: Child signal assignment:
        !           863:  none (nothing happens)
        !           864: 
        !           865: Command-line invocations:
        !           866:  B<kill> B<-s> SIGHUP I<PID>
        !           867: 
        !           868: Subroutine B<HUPSMAN>:
        !           869:  This is only invoked for the B<lonc> parent I<PID>,
        !           870: This kills all the children, and then the parent.
        !           871: The B<lonc.pid> file is cleared.
        !           872: 
        !           873: =item *
        !           874: 
        !           875: SIGUSR1
        !           876: 
        !           877: Parent signal assignment:
        !           878:  $SIG{USR1} = \&USRMAN;
        !           879: 
        !           880: Child signal assignment:
        !           881:  $SIG{USR1}= \&logstatus;
        !           882: 
        !           883: Command-line invocations:
        !           884:  B<kill> B<-s> SIGUSR1 I<PID>
        !           885: 
        !           886: Subroutine B<USRMAN>:
        !           887:  When invoked for the B<lonc> parent I<PID>,
        !           888: SIGUSR1 is sent to all the children, and the status of
        !           889: each connection is logged.
        !           890: 
        !           891: =item *
        !           892: 
        !           893: SIGCHLD
        !           894: 
        !           895: Parent signal assignment:
        !           896:  $SIG{CHLD} = \&REAPER;
        !           897: 
        !           898: Child signal assignment:
        !           899:  none
        !           900: 
        !           901: Command-line invocations:
        !           902:  B<kill> B<-s> SIGCHLD I<PID>
1.23      harris41  903: 
1.31    ! harris41  904: Subroutine B<REAPER>:
        !           905:  This is only invoked for the B<lonc> parent I<PID>.
        !           906: Information pertaining to the child is removed.
        !           907: The socket port is cleaned up.
1.23      harris41  908: 
1.31    ! harris41  909: =back
1.23      harris41  910: 
                    911: =head1 PREREQUISITES
                    912: 
                    913: POSIX
                    914: IO::Socket
                    915: IO::Select
                    916: IO::File
                    917: Socket
                    918: Fcntl
                    919: Tie::RefHash
                    920: Crypt::IDEA
                    921: 
                    922: =head1 COREQUISITES
                    923: 
                    924: =head1 OSNAMES
                    925: 
                    926: linux
                    927: 
                    928: =head1 SCRIPT CATEGORIES
                    929: 
                    930: Server/Process
                    931: 
                    932: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>