Annotation of loncom/loncnew, revision 1.88

1.1       foxr        1: #!/usr/bin/perl
1.2       albertel    2: # The LearningOnline Network with CAPA
                      3: # lonc maintains the connections to remote computers
                      4: #
1.88    ! foxr        5: # $Id: loncnew,v 1.87 2007/06/18 22:49:52 albertel Exp $
1.2       albertel    6: #
                      7: # Copyright Michigan State University Board of Trustees
                      8: #
                      9: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
1.17      foxr       10: ## LON-CAPA is free software; you can redistribute it and/or modify
1.2       albertel   11: # it under the terms of the GNU General Public License as published by
                     12: # the Free Software Foundation; either version 2 of the License, or
                     13: # (at your option) any later version.
                     14: #
                     15: # LON-CAPA is distributed in the hope that it will be useful,
                     16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
                     17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     18: # GNU General Public License for more details.
                     19: #
                     20: # You should have received a copy of the GNU General Public License
                     21: # along with LON-CAPA; if not, write to the Free Software
                     22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
                     23: #
                     24: # /home/httpd/html/adm/gpl.txt
                     25: #
                     26: # http://www.lon-capa.org/
                     27: #
1.1       foxr       28: #
1.15      foxr       29: # new lonc handles n request out bver m connections to londs.
1.1       foxr       30: # This module is based on the Event class.
                     31: #   Development iterations:
                     32: #    - Setup basic event loop.   (done)
                     33: #    - Add timer dispatch.       (done)
                     34: #    - Add ability to accept lonc UNIX domain sockets.  (done)
                     35: #    - Add ability to create/negotiate lond connections (done).
1.7       foxr       36: #    - Add general logic for dispatching requests and timeouts. (done).
                     37: #    - Add support for the lonc/lond requests.          (done).
1.38      foxr       38: #    - Add logging/status monitoring.                    (done)
                     39: #    - Add Signal handling - HUP restarts. USR1 status report. (done)
1.7       foxr       40: #    - Add Configuration file I/O                       (done).
1.38      foxr       41: #    - Add management/status request interface.         (done)
1.8       foxr       42: #    - Add deferred request capability.                  (done)
1.38      foxr       43: #    - Detect transmission timeouts.                     (done)
1.7       foxr       44: #
                     45: 
1.23      foxr       46: use strict;
1.1       foxr       47: use lib "/home/httpd/lib/perl/";
                     48: use Event qw(:DEFAULT );
                     49: use POSIX qw(:signal_h);
1.12      foxr       50: use POSIX;
1.1       foxr       51: use IO::Socket;
                     52: use IO::Socket::INET;
                     53: use IO::Socket::UNIX;
1.9       foxr       54: use IO::File;
1.6       foxr       55: use IO::Handle;
1.1       foxr       56: use Socket;
                     57: use Crypt::IDEA;
                     58: use LONCAPA::Queue;
                     59: use LONCAPA::Stack;
                     60: use LONCAPA::LondConnection;
1.7       foxr       61: use LONCAPA::LondTransaction;
1.1       foxr       62: use LONCAPA::Configuration;
1.67      albertel   63: use Fcntl qw(:flock);
1.1       foxr       64: 
                     65: 
                     66: # Read the httpd configuration file to get perl variables
                     67: # normally set in apache modules:
                     68: 
                     69: my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf');
                     70: my %perlvar    = %{$perlvarref};
                     71: 
                     72: #
                     73: #  parent and shared variables.
                     74: 
1.83      albertel   75: my %ChildPid;			# by pid -> host.
                     76: my %ChildHost;			# by host.
1.62      foxr       77: my %listening_to;		# Socket->host table for who the parent
                     78:                                 # is listening to.
                     79: my %parent_dispatchers;         # host-> listener watcher events. 
1.1       foxr       80: 
1.65      foxr       81: my %parent_handlers;		# Parent signal handlers...
                     82: 
1.9       foxr       83: my $MaxConnectionCount = 10;	# Will get from config later.
1.1       foxr       84: my $ClientConnection = 0;	# Uniquifier for client events.
                     85: 
1.9       foxr       86: my $DebugLevel = 0;
1.29      foxr       87: my $NextDebugLevel= 2;		# So Sigint can toggle this.
1.50      albertel   88: my $IdleTimeout= 600;		# Wait 10 minutes before pruning connections.
1.1       foxr       89: 
1.39      foxr       90: my $LogTransactions = 0;	# When True, all transactions/replies get logged.
1.65      foxr       91: my $executable      = $0;	# Get the full path to me.
1.39      foxr       92: 
1.1       foxr       93: #
                     94: #  The variables below are only used by the child processes.
                     95: #
                     96: my $RemoteHost;			# Name of host child is talking to.
1.81      albertel   97: my $RemoteHostId;		# default lonid of host child is talking to.
1.82      albertel   98: my @all_host_ids;
1.20      albertel   99: my $UnixSocketDir= $perlvar{'lonSockDir'};
1.1       foxr      100: my $IdleConnections = Stack->new(); # Set of idle connections
                    101: my %ActiveConnections;		# Connections to the remote lond.
1.7       foxr      102: my %ActiveTransactions;		# LondTransactions in flight.
1.1       foxr      103: my %ActiveClients;		# Serial numbers of active clients by socket.
                    104: my $WorkQueue       = Queue->new(); # Queue of pending transactions.
                    105: my $ConnectionCount = 0;
1.4       foxr      106: my $IdleSeconds     = 0;	# Number of seconds idle.
1.9       foxr      107: my $Status          = "";	# Current status string.
1.14      foxr      108: my $RecentLogEntry  = "";
1.72      albertel  109: my $ConnectionRetries=5;	# Number of connection retries allowed.
                    110: my $ConnectionRetriesLeft=5;	# Number of connection retries remaining.
1.40      foxr      111: my $LondVersion     = "unknown"; # Version of lond we talk with.
1.49      foxr      112: my $KeyMode         = "";       # e.g. ssl, local, insecure from last connect.
1.54      foxr      113: my $LondConnecting  = 0;       # True when a connection is being built.
1.1       foxr      114: 
1.60      foxr      115: 
                    116: 
1.62      foxr      117: my $I_am_child      = 0;	# True if this is the child process.
1.57      foxr      118: 
1.1       foxr      119: #
1.9       foxr      120: #   The hash below gives the HTML format for log messages
                    121: #   given a severity.
                    122: #    
                    123: my %LogFormats;
                    124: 
1.45      albertel  125: $LogFormats{"CRITICAL"} = "<font color='red'>CRITICAL: %s</font>";
                    126: $LogFormats{"SUCCESS"}  = "<font color='green'>SUCCESS: %s</font>";
                    127: $LogFormats{"INFO"}     = "<font color='yellow'>INFO: %s</font>";
                    128: $LogFormats{"WARNING"}  = "<font color='blue'>WARNING: %s</font>";
1.9       foxr      129: $LogFormats{"DEFAULT"}  = " %s ";
                    130: 
1.10      foxr      131: 
1.57      foxr      132: #  UpdateStatus;
                    133: #    Update the idle status display to show how many connections
                    134: #    are left, retries and other stuff.
                    135: #
                    136: sub UpdateStatus {
                    137:     if ($ConnectionRetriesLeft > 0) {
                    138: 	ShowStatus(GetServerHost()." Connection count: ".$ConnectionCount
                    139: 		   ." Retries remaining: ".$ConnectionRetriesLeft
                    140: 		   ." ($KeyMode)");
                    141:     } else {
                    142: 	ShowStatus(GetServerHost()." >> DEAD <<");
                    143:     }
                    144: }
                    145: 
1.10      foxr      146: 
                    147: =pod
                    148: 
                    149: =head2 LogPerm
                    150: 
                    151: Makes an entry into the permanent log file.
                    152: 
                    153: =cut
1.69      matthew   154: 
1.10      foxr      155: sub LogPerm {
                    156:     my $message=shift;
                    157:     my $execdir=$perlvar{'lonDaemons'};
                    158:     my $now=time;
                    159:     my $local=localtime($now);
                    160:     my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log");
1.86      albertel  161:     chomp($message);
1.10      foxr      162:     print $fh "$now:$message:$local\n";
                    163: }
1.9       foxr      164: 
                    165: =pod
                    166: 
                    167: =head2 Log
                    168: 
                    169: Logs a message to the log file.
                    170: Parameters:
                    171: 
                    172: =item severity
                    173: 
                    174: One of CRITICAL, WARNING, INFO, SUCCESS used to select the
                    175: format string used to format the message.  if the severity is
                    176: not a defined severity the Default format string is used.
                    177: 
                    178: =item message
                    179: 
                    180: The base message.  In addtion to the format string, the message
                    181: will be appended to a string containing the name of our remote
                    182: host and the time will be formatted into the message.
                    183: 
                    184: =cut
                    185: 
                    186: sub Log {
1.47      foxr      187: 
                    188:     my ($severity, $message) = @_;
                    189: 
1.9       foxr      190:     if(!$LogFormats{$severity}) {
                    191: 	$severity = "DEFAULT";
                    192:     }
                    193: 
                    194:     my $format = $LogFormats{$severity};
                    195:     
                    196:     #  Put the window dressing in in front of the message format:
                    197: 
                    198:     my $now   = time;
                    199:     my $local = localtime($now);
                    200:     my $finalformat = "$local ($$) [$RemoteHost] [$Status] ";
1.76      albertel  201:     $finalformat = $finalformat.$format."\n";
1.9       foxr      202: 
                    203:     # open the file and put the result.
                    204: 
                    205:     my $execdir = $perlvar{'lonDaemons'};
                    206:     my $fh      = IO::File->new(">>$execdir/logs/lonc.log");
                    207:     my $msg = sprintf($finalformat, $message);
1.14      foxr      208:     $RecentLogEntry = $msg;
1.9       foxr      209:     print $fh $msg;
                    210:     
1.10      foxr      211:     
1.9       foxr      212: }
1.6       foxr      213: 
1.3       albertel  214: 
1.1       foxr      215: =pod
1.3       albertel  216: 
                    217: =head2 GetPeerName
                    218: 
                    219: Returns the name of the host that a socket object is connected to.
                    220: 
1.1       foxr      221: =cut
                    222: 
                    223: sub GetPeername {
1.47      foxr      224: 
                    225: 
                    226:     my ($connection, $AdrFamily) = @_;
                    227: 
1.1       foxr      228:     my $peer       = $connection->peername();
                    229:     my $peerport;
                    230:     my $peerip;
                    231:     if($AdrFamily == AF_INET) {
                    232: 	($peerport, $peerip) = sockaddr_in($peer);
1.23      foxr      233: 	my $peername    = gethostbyaddr($peerip, $AdrFamily);
1.1       foxr      234: 	return $peername;
                    235:     } elsif ($AdrFamily == AF_UNIX) {
                    236: 	my $peerfile;
                    237: 	($peerfile) = sockaddr_un($peer);
                    238: 	return $peerfile;
                    239:     }
                    240: }
                    241: =pod
1.3       albertel  242: 
1.1       foxr      243: =head2 Debug
1.3       albertel  244: 
                    245: Invoked to issue a debug message.
                    246: 
1.1       foxr      247: =cut
1.3       albertel  248: 
1.1       foxr      249: sub Debug {
1.47      foxr      250: 
                    251:     my ($level, $message) = @_;
                    252: 
1.1       foxr      253:     if ($level <= $DebugLevel) {
1.23      foxr      254: 	Log("INFO", "-Debug- $message host = $RemoteHost");
1.1       foxr      255:     }
                    256: }
                    257: 
                    258: sub SocketDump {
1.47      foxr      259: 
                    260:     my ($level, $socket) = @_;
                    261: 
1.1       foxr      262:     if($level <= $DebugLevel) {
1.48      foxr      263: 	$socket->Dump(-1);	# Ensure it will get dumped.
1.1       foxr      264:     }
                    265: }
1.3       albertel  266: 
1.1       foxr      267: =pod
1.3       albertel  268: 
1.5       foxr      269: =head2 ShowStatus
                    270: 
                    271:  Place some text as our pid status.
1.10      foxr      272:  and as what we return in a SIGUSR1
1.5       foxr      273: 
                    274: =cut
1.69      matthew   275: 
1.5       foxr      276: sub ShowStatus {
1.10      foxr      277:     my $state = shift;
                    278:     my $now = time;
                    279:     my $local = localtime($now);
                    280:     $Status   = $local.": ".$state;
                    281:     $0='lonc: '.$state.' '.$local;
1.5       foxr      282: }
                    283: 
                    284: =pod
                    285: 
1.69      matthew   286: =head2 SocketTimeout
1.15      foxr      287: 
                    288:     Called when an action on the socket times out.  The socket is 
                    289:    destroyed and any active transaction is failed.
                    290: 
                    291: 
                    292: =cut
1.69      matthew   293: 
1.15      foxr      294: sub SocketTimeout {
                    295:     my $Socket = shift;
1.38      foxr      296:     Log("WARNING", "A socket timeout was detected");
1.52      foxr      297:     Debug(5, " SocketTimeout called: ");
1.48      foxr      298:     $Socket->Dump(0);
1.42      foxr      299:     if(exists($ActiveTransactions{$Socket})) {
1.43      albertel  300: 	FailTransaction($ActiveTransactions{$Socket});
1.42      foxr      301:     }
1.22      foxr      302:     KillSocket($Socket);	# A transaction timeout also counts as
                    303:                                 # a connection failure:
                    304:     $ConnectionRetriesLeft--;
1.42      foxr      305:     if($ConnectionRetriesLeft <= 0) {
1.52      foxr      306: 	Log("CRITICAL", "Host marked DEAD: ".GetServerHost());
1.56      foxr      307: 	$LondConnecting = 0;
1.42      foxr      308:     }
                    309: 
1.15      foxr      310: }
1.80      albertel  311: 
1.64      foxr      312: #
                    313: #   This function should be called by the child in all cases where it must
1.80      albertel  314: #   exit.  The child process must create a lock file for the AF_UNIX socket
                    315: #   in order to prevent connection requests from lonnet in the time between
                    316: #   process exit and the parent picking up the listen again.
                    317: #
1.64      foxr      318: # Parameters:
                    319: #     exit_code           - Exit status value, however see the next parameter.
                    320: #     message             - If this optional parameter is supplied, the exit
                    321: #                           is via a die with this message.
                    322: #
                    323: sub child_exit {
                    324:     my ($exit_code, $message) = @_;
                    325: 
                    326:     # Regardless of how we exit, we may need to do the lock thing:
                    327: 
1.80      albertel  328:     #
                    329:     #  Create a lock file since there will be a time window
                    330:     #  between our exit and the parent's picking up the listen
                    331:     #  during which no listens will be done on the
                    332:     #  lonnet client socket.
                    333:     #
                    334:     my $lock_file = &GetLoncSocketPath().".lock";
                    335:     open(LOCK,">$lock_file");
                    336:     print LOCK "Contents not important";
                    337:     close(LOCK);
1.81      albertel  338:     unlink(&GetLoncSocketPath());
1.64      foxr      339: 
1.80      albertel  340:     if ($message) {
                    341: 	die($message);
1.64      foxr      342:     } else {
                    343: 	exit($exit_code);
                    344:     }
                    345: }
1.35      foxr      346: #----------------------------- Timer management ------------------------
1.15      foxr      347: 
                    348: =pod
                    349: 
1.1       foxr      350: =head2 Tick
1.3       albertel  351: 
                    352: Invoked  each timer tick.
                    353: 
1.1       foxr      354: =cut
                    355: 
1.5       foxr      356: 
1.1       foxr      357: sub Tick {
1.52      foxr      358:     my ($Event)       = @_;
                    359:     my $clock_watcher = $Event->w;
                    360: 
1.1       foxr      361:     my $client;
1.57      foxr      362:     UpdateStatus();
                    363: 
1.4       foxr      364:     # Is it time to prune connection count:
                    365: 
                    366: 
                    367:     if($IdleConnections->Count()  && 
                    368:        ($WorkQueue->Count() == 0)) { # Idle connections and nothing to do?
1.52      foxr      369: 	$IdleSeconds++;
1.4       foxr      370: 	if($IdleSeconds > $IdleTimeout) { # Prune a connection...
1.23      foxr      371: 	    my $Socket = $IdleConnections->pop();
1.6       foxr      372: 	    KillSocket($Socket);
1.54      foxr      373: 	    $IdleSeconds = 0;	# Otherwise all connections get trimmed to fast.
1.57      foxr      374: 	    UpdateStatus();
1.80      albertel  375: 	    if(($ConnectionCount == 0)) {
1.64      foxr      376: 		&child_exit(0);
                    377: 
1.57      foxr      378: 	    }
1.4       foxr      379: 	}
                    380:     } else {
                    381: 	$IdleSeconds = 0;	# Reset idle count if not idle.
                    382:     }
1.15      foxr      383:     #
                    384:     #  For each inflight transaction, tick down its timeout counter.
                    385:     #
1.35      foxr      386: 
1.34      albertel  387:     foreach my $item (keys %ActiveConnections) {
                    388: 	my $State = $ActiveConnections{$item}->data->GetState();
1.35      foxr      389: 	if ($State ne 'Idle') {
1.34      albertel  390: 	    Debug(5,"Ticking Socket $State $item");
                    391: 	    $ActiveConnections{$item}->data->Tick();
                    392: 	}
1.15      foxr      393:     }
1.5       foxr      394:     # Do we have work in the queue, but no connections to service them?
                    395:     # If so, try to make some new connections to get things going again.
                    396:     #
1.57      foxr      397:     #   Note this code is dead now...
                    398:     #
1.5       foxr      399:     my $Requests = $WorkQueue->Count();
1.56      foxr      400:     if (($ConnectionCount == 0)  && ($Requests > 0) && (!$LondConnecting)) { 
1.10      foxr      401: 	if ($ConnectionRetriesLeft > 0) {
1.56      foxr      402: 	    Debug(5,"Work but no connections, Make a new one");
                    403: 	    my $success;
                    404: 	    $success    = &MakeLondConnection;
                    405: 	    if($success == 0) { # All connections failed:
1.29      foxr      406: 		Debug(5,"Work in queue failed to make any connectiouns\n");
1.22      foxr      407: 		EmptyQueue();	# Fail pending transactions with con_lost.
1.42      foxr      408: 		CloseAllLondConnections(); # Should all be closed but....
1.10      foxr      409: 	    }
                    410: 	} else {
1.56      foxr      411: 	    $LondConnecting = 0;
1.22      foxr      412: 	    ShowStatus(GetServerHost()." >>> DEAD!!! <<<");
1.29      foxr      413: 	    Debug(5,"Work in queue, but gave up on connections..flushing\n");
1.10      foxr      414: 	    EmptyQueue();	# Connections can't be established.
1.42      foxr      415: 	    CloseAllLondConnections(); # Should all already be closed but...
1.5       foxr      416: 	}
                    417:        
                    418:     }
1.49      foxr      419:     if ($ConnectionCount == 0) {
                    420: 	$KeyMode = ""; 
1.52      foxr      421: 	$clock_watcher->cancel();
1.49      foxr      422:     }
1.66      albertel  423:     &UpdateStatus();
1.1       foxr      424: }
                    425: 
                    426: =pod
1.3       albertel  427: 
1.1       foxr      428: =head2 SetupTimer
                    429: 
1.3       albertel  430: Sets up a 1 per sec recurring timer event.  The event handler is used to:
1.1       foxr      431: 
1.3       albertel  432: =item
                    433: 
                    434: Trigger timeouts on communications along active sockets.
                    435: 
                    436: =item
                    437: 
                    438: Trigger disconnections of idle sockets.
1.1       foxr      439: 
                    440: =cut
                    441: 
                    442: sub SetupTimer {
1.52      foxr      443:     Debug(6, "SetupTimer");
                    444:     Event->timer(interval => 1, cb => \&Tick );
1.1       foxr      445: }
1.3       albertel  446: 
1.1       foxr      447: =pod
1.3       albertel  448: 
1.1       foxr      449: =head2 ServerToIdle
1.3       albertel  450: 
                    451: This function is called when a connection to the server is
                    452: ready for more work.
                    453: 
                    454: If there is work in the Work queue the top element is dequeued
1.1       foxr      455: and the connection will start to work on it.  If the work queue is
                    456: empty, the connection is pushed on the idle connection stack where
                    457: it will either get another work unit, or alternatively, if it sits there
                    458: long enough, it will be shut down and released.
                    459: 
1.3       albertel  460: =cut
1.1       foxr      461: 
                    462: sub ServerToIdle {
                    463:     my $Socket   = shift;	# Get the socket.
1.49      foxr      464:     $KeyMode = $Socket->{AuthenticationMode};
1.7       foxr      465:     delete($ActiveTransactions{$Socket}); # Server has no transaction
1.1       foxr      466: 
1.29      foxr      467:     &Debug(5, "Server to idle");
1.1       foxr      468: 
                    469:     #  If there's work to do, start the transaction:
                    470: 
1.23      foxr      471:     my $reqdata = $WorkQueue->dequeue(); # This is a LondTransaction
1.29      foxr      472:     if ($reqdata ne undef)  {
                    473: 	Debug(5, "Queue gave request data: ".$reqdata->getRequest());
1.7       foxr      474: 	&StartRequest($Socket,  $reqdata);
1.8       foxr      475: 
1.1       foxr      476:     } else {
                    477: 	
                    478:     #  There's no work waiting, so push the server to idle list.
1.29      foxr      479: 	&Debug(5, "No new work requests, server connection going idle");
1.1       foxr      480: 	$IdleConnections->push($Socket);
                    481:     }
                    482: }
1.3       albertel  483: 
1.1       foxr      484: =pod
1.3       albertel  485: 
1.1       foxr      486: =head2 ClientWritable
1.3       albertel  487: 
                    488: Event callback for when a client socket is writable.
                    489: 
                    490: This callback is established when a transaction reponse is
                    491: avaiable from lond.  The response is forwarded to the unix socket
                    492: as it becomes writable in this sub.
                    493: 
1.1       foxr      494: Parameters:
                    495: 
1.3       albertel  496: =item Event
                    497: 
                    498: The event that has been triggered. Event->w->data is
                    499: the data and Event->w->fd is the socket to write.
1.1       foxr      500: 
                    501: =cut
1.3       albertel  502: 
1.1       foxr      503: sub ClientWritable {
                    504:     my $Event    = shift;
                    505:     my $Watcher  = $Event->w;
1.84      albertel  506:     if (!defined($Watcher)) {
                    507: 	&child_exit(-1,'No watcher for event in ClientWritable');
                    508:     }
1.1       foxr      509:     my $Data     = $Watcher->data;
                    510:     my $Socket   = $Watcher->fd;
                    511: 
                    512:     # Try to send the data:
                    513: 
                    514:     &Debug(6, "ClientWritable writing".$Data);
                    515:     &Debug(9, "Socket is: ".$Socket);
                    516: 
1.6       foxr      517:     if($Socket->connected) {
                    518: 	my $result = $Socket->send($Data, 0);
                    519: 	
                    520: 	# $result undefined: the write failed.
                    521: 	# otherwise $result is the number of bytes written.
                    522: 	# Remove that preceding string from the data.
                    523: 	# If the resulting data is empty, destroy the watcher
                    524: 	# and set up a read event handler to accept the next
                    525: 	# request.
                    526: 	
                    527: 	&Debug(9,"Send result is ".$result." Defined: ".defined($result));
1.29      foxr      528: 	if($result ne undef) {
1.6       foxr      529: 	    &Debug(9, "send result was defined");
                    530: 	    if($result == length($Data)) { # Entire string sent.
                    531: 		&Debug(9, "ClientWritable data all written");
                    532: 		$Watcher->cancel();
                    533: 		#
                    534: 		#  Set up to read next request from socket:
                    535: 		
                    536: 		my $descr     = sprintf("Connection to lonc client %d",
                    537: 					$ActiveClients{$Socket});
                    538: 		Event->io(cb    => \&ClientRequest,
                    539: 			  poll  => 'r',
                    540: 			  desc  => $descr,
                    541: 			  data  => "",
                    542: 			  fd    => $Socket);
                    543: 		
                    544: 	    } else {		# Partial string sent.
                    545: 		$Watcher->data(substr($Data, $result));
1.15      foxr      546: 		if($result == 0) {    # client hung up on us!!
1.52      foxr      547: 		    # Log("INFO", "lonc pipe client hung up on us!");
1.15      foxr      548: 		    $Watcher->cancel;
                    549: 		    $Socket->shutdown(2);
                    550: 		    $Socket->close();
                    551: 		}
1.6       foxr      552: 	    }
                    553: 	    
                    554: 	} else {			# Error of some sort...
                    555: 	    
                    556: 	    # Some errnos are possible:
                    557: 	    my $errno = $!;
                    558: 	    if($errno == POSIX::EWOULDBLOCK   ||
                    559: 	       $errno == POSIX::EAGAIN        ||
                    560: 	       $errno == POSIX::EINTR) {
                    561: 		# No action taken?
                    562: 	    } else {		# Unanticipated errno.
                    563: 		&Debug(5,"ClientWritable error or peer shutdown".$RemoteHost);
                    564: 		$Watcher->cancel;	# Stop the watcher.
                    565: 		$Socket->shutdown(2); # Kill connection
                    566: 		$Socket->close();	# Close the socket.
                    567: 	    }
1.1       foxr      568: 	    
                    569: 	}
1.6       foxr      570:     } else {
                    571: 	$Watcher->cancel();	# A delayed request...just cancel.
1.84      albertel  572: 	return;
1.1       foxr      573:     }
                    574: }
                    575: 
                    576: =pod
1.3       albertel  577: 
1.1       foxr      578: =head2 CompleteTransaction
1.3       albertel  579: 
                    580: Called when the reply data has been received for a lond 
1.1       foxr      581: transaction.   The reply data must now be sent to the
                    582: ultimate client on the other end of the Unix socket.  This is
                    583: done by setting up a writable event for the socket with the
                    584: data the reply data.
1.3       albertel  585: 
1.1       foxr      586: Parameters:
1.3       albertel  587: 
                    588: =item Socket
                    589: 
                    590: Socket on which the lond transaction occured.  This is a
                    591: LondConnection. The data received is in the TransactionReply member.
                    592: 
1.7       foxr      593: =item Transaction
1.3       albertel  594: 
1.7       foxr      595: The transaction that is being completed.
1.1       foxr      596: 
                    597: =cut
1.3       albertel  598: 
1.1       foxr      599: sub CompleteTransaction {
1.29      foxr      600:     &Debug(5,"Complete transaction");
1.47      foxr      601: 
                    602:     my ($Socket, $Transaction) = @_;
1.1       foxr      603: 
1.7       foxr      604:     if (!$Transaction->isDeferred()) { # Normal transaction
                    605: 	my $data   = $Socket->GetReply(); # Data to send.
1.39      foxr      606: 	if($LogTransactions) {
                    607: 	    Log("SUCCESS", "Reply from lond: '$data'");
                    608: 	}
1.7       foxr      609: 	StartClientReply($Transaction, $data);
                    610:     } else {			# Delete deferred transaction file.
1.9       foxr      611: 	Log("SUCCESS", "A delayed transaction was completed");
1.86      albertel  612: 	LogPerm("S:".$Transaction->getClient().":".$Transaction->getRequest());
                    613: 	unlink($Transaction->getFile());
1.7       foxr      614:     }
1.6       foxr      615: }
1.42      foxr      616: 
1.6       foxr      617: =pod
1.42      foxr      618: 
1.6       foxr      619: =head1 StartClientReply
                    620: 
                    621:    Initiates a reply to a client where the reply data is a parameter.
                    622: 
1.7       foxr      623: =head2  parameters:
                    624: 
                    625: =item Transaction
                    626: 
                    627:     The transaction for which we are responding to the client.
                    628: 
                    629: =item data
                    630: 
                    631:     The data to send to apached client.
                    632: 
1.6       foxr      633: =cut
1.42      foxr      634: 
1.6       foxr      635: sub StartClientReply {
1.1       foxr      636: 
1.47      foxr      637:     my ($Transaction, $data) = @_;
1.12      foxr      638: 
1.7       foxr      639:     my $Client   = $Transaction->getClient();
                    640: 
1.1       foxr      641:     &Debug(8," Reply was: ".$data);
                    642:     my $Serial         = $ActiveClients{$Client};
                    643:     my $desc           = sprintf("Connection to lonc client %d",
                    644: 				 $Serial);
                    645:     Event->io(fd       => $Client,
                    646: 	      poll     => "w",
                    647: 	      desc     => $desc,
                    648: 	      cb       => \&ClientWritable,
                    649: 	      data     => $data);
                    650: }
1.42      foxr      651: 
1.4       foxr      652: =pod
1.42      foxr      653: 
1.4       foxr      654: =head2 FailTransaction
                    655: 
                    656:   Finishes a transaction with failure because the associated lond socket
1.7       foxr      657:   disconnected.  There are two possibilities:
                    658:   - The transaction is deferred: in which case we just quietly
                    659:     delete the transaction since there is no client connection.
                    660:   - The transaction is 'live' in which case we initiate the sending
                    661:     of "con_lost" to the client.
                    662: 
1.42      foxr      663: Deleting the transaction means killing it from the %ActiveTransactions hash.
1.4       foxr      664: 
                    665: Parameters:
                    666: 
                    667: =item client  
                    668:  
1.7       foxr      669:    The LondTransaction we are failing.
                    670:  
1.42      foxr      671: 
1.4       foxr      672: =cut
                    673: 
                    674: sub FailTransaction {
1.7       foxr      675:     my $transaction = shift;
1.52      foxr      676:     
                    677:     #  If the socket is dead, that's already logged.
                    678: 
                    679:     if ($ConnectionRetriesLeft > 0) {
                    680: 	Log("WARNING", "Failing transaction "
1.71      albertel  681: 	    .$transaction->getLoggableRequest());
1.52      foxr      682:     }
1.71      albertel  683:     Debug(1, "Failing transaction: ".$transaction->getLoggableRequest());
1.10      foxr      684:     if (!$transaction->isDeferred()) { # If the transaction is deferred we'll get to it.
1.11      foxr      685: 	my $client  = $transaction->getClient();
1.30      foxr      686: 	Debug(1," Replying con_lost to ".$transaction->getRequest());
1.11      foxr      687: 	StartClientReply($transaction, "con_lost\n");
1.7       foxr      688:     }
1.4       foxr      689: 
                    690: }
                    691: 
                    692: =pod
1.69      matthew   693: 
1.6       foxr      694: =head1  EmptyQueue
1.7       foxr      695: 
1.6       foxr      696:   Fails all items in the work queue with con_lost.
1.7       foxr      697:   Note that each item in the work queue is a transaction.
                    698: 
1.6       foxr      699: =cut
1.69      matthew   700: 
1.6       foxr      701: sub EmptyQueue {
1.22      foxr      702:     $ConnectionRetriesLeft--;	# Counts as connection failure too.
1.6       foxr      703:     while($WorkQueue->Count()) {
1.10      foxr      704: 	my $request = $WorkQueue->dequeue(); # This is a transaction
1.7       foxr      705: 	FailTransaction($request);
1.6       foxr      706:     }
                    707: }
                    708: 
                    709: =pod
1.4       foxr      710: 
1.9       foxr      711: =head2 CloseAllLondConnections
                    712: 
                    713: Close all connections open on lond prior to exit e.g.
                    714: 
                    715: =cut
1.69      matthew   716: 
1.9       foxr      717: sub CloseAllLondConnections {
1.23      foxr      718:     foreach my $Socket (keys %ActiveConnections) {
1.42      foxr      719:       if(exists($ActiveTransactions{$Socket})) {
                    720: 	FailTransaction($ActiveTransactions{$Socket});
                    721:       }
                    722:       KillSocket($Socket);
1.9       foxr      723:     }
                    724: }
                    725: 
                    726: =pod
                    727: 
1.4       foxr      728: =head2 KillSocket
                    729:  
                    730: Destroys a socket.  This function can be called either when a socket
                    731: has died of 'natural' causes or because a socket needs to be pruned due to
                    732: idleness.  If the socket has died naturally, if there are no longer any 
                    733: live connections a new connection is created (in case there are transactions
                    734: in the queue).  If the socket has been pruned, it is never re-created.
                    735: 
                    736: Parameters:
1.1       foxr      737: 
1.4       foxr      738: =item Socket
                    739:  
                    740:   The socket to kill off.
                    741: 
                    742: =item Restart
                    743: 
                    744: nonzero if we are allowed to create a new connection.
                    745: 
1.69      matthew   746: =cut
1.4       foxr      747: 
                    748: sub KillSocket {
                    749:     my $Socket = shift;
                    750: 
1.17      foxr      751:     Log("WARNING", "Shutting down a socket");
1.9       foxr      752:     $Socket->Shutdown();
                    753: 
1.7       foxr      754:     #  If the socket came from the active connection set,
                    755:     #  delete its transaction... note that FailTransaction should
                    756:     #  already have been called!!!
                    757:     #  otherwise it came from the idle set.
                    758:     #  
1.4       foxr      759:     
                    760:     if(exists($ActiveTransactions{$Socket})) {
                    761: 	delete ($ActiveTransactions{$Socket});
                    762:     }
                    763:     if(exists($ActiveConnections{$Socket})) {
                    764: 	delete($ActiveConnections{$Socket});
1.37      albertel  765: 	$ConnectionCount--;
                    766: 	if ($ConnectionCount < 0) { $ConnectionCount = 0; }
1.4       foxr      767:     }
1.6       foxr      768:     #  If the connection count has gone to zero and there is work in the
                    769:     #  work queue, the work all gets failed with con_lost.
                    770:     #
                    771:     if($ConnectionCount == 0) {
1.22      foxr      772: 	EmptyQueue();
1.42      foxr      773: 	CloseAllLondConnections; # Should all already be closed but...
1.4       foxr      774:     }
                    775: }
1.1       foxr      776: 
                    777: =pod
1.3       albertel  778: 
1.1       foxr      779: =head2 LondReadable
1.3       albertel  780: 
1.1       foxr      781: This function is called whenever a lond connection
                    782: is readable.  The action is state dependent:
                    783: 
1.3       albertel  784: =head3 State=Initialized
                    785: 
                    786: We''re waiting for the challenge, this is a no-op until the
1.1       foxr      787: state changes.
1.3       albertel  788: 
1.1       foxr      789: =head3 State=Challenged 
1.3       albertel  790: 
                    791: The challenge has arrived we need to transition to Writable.
1.1       foxr      792: The connection must echo the challenge back.
1.3       albertel  793: 
1.1       foxr      794: =head3 State=ChallengeReplied
1.3       albertel  795: 
                    796: The challenge has been replied to.  The we are receiveing the 
1.1       foxr      797: 'ok' from the partner.
1.3       albertel  798: 
1.40      foxr      799: =head3  State=ReadingVersionString
                    800: 
                    801: We have requested the lond version and are reading the
                    802: version back.  Upon completion, we'll store the version away
                    803: for future use(?).
                    804: 
                    805: =head3 State=HostSet
                    806: 
                    807: We have selected the domain name of our peer (multhomed hosts)
                    808: and are getting the reply (presumably ok) back.
                    809: 
1.1       foxr      810: =head3 State=RequestingKey
1.3       albertel  811: 
                    812: The ok has been received and we need to send the request for
1.1       foxr      813: an encryption key.  Transition to writable for that.
1.3       albertel  814: 
1.1       foxr      815: =head3 State=ReceivingKey
1.3       albertel  816: 
                    817: The the key has been requested, now we are reading the new key.
                    818: 
1.1       foxr      819: =head3 State=Idle 
1.3       albertel  820: 
                    821: The encryption key has been negotiated or we have finished 
1.1       foxr      822: reading data from the a transaction.   If the callback data has
                    823: a client as well as the socket iformation, then we are 
                    824: doing a transaction and the data received is relayed to the client
                    825: before the socket is put on the idle list.
1.3       albertel  826: 
1.1       foxr      827: =head3 State=SendingRequest
1.3       albertel  828: 
                    829: I do not think this state can be received here, but if it is,
1.1       foxr      830: the appropriate thing to do is to transition to writable, and send
                    831: the request.
1.3       albertel  832: 
1.1       foxr      833: =head3 State=ReceivingReply
1.3       albertel  834: 
                    835: We finished sending the request to the server and now transition
1.1       foxr      836: to readable to receive the reply. 
                    837: 
                    838: The parameter to this function are:
1.3       albertel  839: 
1.1       foxr      840: The event. Implicit in this is the watcher and its data.  The data 
                    841: contains at least the lond connection object and, if a 
                    842: transaction is in progress, the socket attached to the local client.
                    843: 
1.3       albertel  844: =cut
1.1       foxr      845: 
                    846: sub LondReadable {
1.8       foxr      847: 
1.41      albertel  848:     my $Event      = shift;
                    849:     my $Watcher    = $Event->w;
                    850:     my $Socket     = $Watcher->data;
                    851:     my $client     = undef;
1.40      foxr      852: 
1.41      albertel  853:     &Debug(6,"LondReadable called state = ".$Socket->GetState());
1.40      foxr      854: 
                    855: 
1.41      albertel  856:     my $State = $Socket->GetState(); # All action depends on the state.
1.40      foxr      857: 
1.41      albertel  858:     SocketDump(6, $Socket);
                    859:     my $status = $Socket->Readable();
1.40      foxr      860: 
1.41      albertel  861:     &Debug(2, "Socket->Readable returned: $status");
1.40      foxr      862: 
1.41      albertel  863:     if($status != 0) {
                    864: 	# bad return from socket read. Currently this means that
                    865: 	# The socket has become disconnected. We fail the transaction.
1.40      foxr      866: 
1.41      albertel  867: 	Log("WARNING",
                    868: 	    "Lond connection lost.");
                    869: 	if(exists($ActiveTransactions{$Socket})) {
                    870: 	    FailTransaction($ActiveTransactions{$Socket});
1.56      foxr      871: 	} else {
                    872: 	    #  Socket is connecting and failed... need to mark
                    873: 	    #  no longer connecting.
                    874: 	   
                    875: 	    $LondConnecting = 0;
1.41      albertel  876: 	}
                    877: 	$Watcher->cancel();
                    878: 	KillSocket($Socket);
                    879: 	$ConnectionRetriesLeft--;       # Counts as connection failure
                    880: 	return;
                    881:     }
                    882:     SocketDump(6,$Socket);
1.17      foxr      883: 
1.41      albertel  884:     $State = $Socket->GetState(); # Update in case of transition.
                    885:     &Debug(6, "After read, state is ".$State);
1.1       foxr      886: 
1.41      albertel  887:     if($State eq "Initialized") {
1.1       foxr      888: 
                    889: 
1.41      albertel  890:     } elsif ($State eq "ChallengeReceived") {
1.1       foxr      891: 	#  The challenge must be echoed back;  The state machine
                    892: 	# in the connection takes care of setting that up.  Just
                    893: 	# need to transition to writable:
1.41      albertel  894: 	
                    895: 	$Watcher->cb(\&LondWritable);
                    896: 	$Watcher->poll("w");
1.1       foxr      897: 
1.41      albertel  898:     } elsif ($State eq "ChallengeReplied") {
1.1       foxr      899: 
1.41      albertel  900:     } elsif ($State eq "RequestingVersion") {
                    901: 	# Need to ask for the version... that is writiability:
1.1       foxr      902: 
1.41      albertel  903: 	$Watcher->cb(\&LondWritable);
                    904: 	$Watcher->poll("w");
                    905: 
                    906:     } elsif ($State eq "ReadingVersionString") {
                    907: 	# Read the rest of the version string... 
                    908:     } elsif ($State eq "SetHost") {
                    909: 	# Need to request the actual domain get set...
                    910: 
                    911: 	$Watcher->cb(\&LondWritable);
                    912: 	$Watcher->poll("w");
                    913:     } elsif ($State eq "HostSet") {
                    914: 	# Reading the 'ok' from the peer.
                    915: 
                    916:     } elsif ($State eq "RequestingKey") {
1.1       foxr      917: 	#  The ok was received.  Now we need to request the key
                    918: 	#  That requires us to be writable:
                    919: 
1.41      albertel  920: 	$Watcher->cb(\&LondWritable);
                    921: 	$Watcher->poll("w");
1.1       foxr      922: 
1.41      albertel  923:     } elsif ($State eq "ReceivingKey") {
1.1       foxr      924: 
1.41      albertel  925:     } elsif ($State eq "Idle") {
1.40      foxr      926:    
1.41      albertel  927: 	# This is as good a spot as any to get the peer version
                    928: 	# string:
1.40      foxr      929:    
1.41      albertel  930: 	if($LondVersion eq "unknown") {
                    931: 	    $LondVersion = $Socket->PeerVersion();
                    932: 	    Log("INFO", "Connected to lond version: $LondVersion");
                    933: 	}
1.1       foxr      934: 	# If necessary, complete a transaction and then go into the
                    935: 	# idle queue.
1.22      foxr      936: 	#  Note that a trasition to idle indicates a live lond
                    937: 	# on the other end so reset the connection retries.
                    938: 	#
1.41      albertel  939: 	$ConnectionRetriesLeft = $ConnectionRetries; # success resets the count
                    940: 	$Watcher->cancel();
                    941: 	if(exists($ActiveTransactions{$Socket})) {
                    942: 	    Debug(5,"Completing transaction!!");
                    943: 	    CompleteTransaction($Socket, 
                    944: 				$ActiveTransactions{$Socket});
                    945: 	} else {
                    946: 	    Log("SUCCESS", "Connection ".$ConnectionCount." to "
                    947: 		.$RemoteHost." now ready for action");
                    948: 	}
                    949: 	ServerToIdle($Socket);	# Next work unit or idle.
1.54      foxr      950: 
                    951: 	#
                    952: 	$LondConnecting = 0;	# Best spot I can think of for this.
                    953: 	# 
1.6       foxr      954: 	
1.41      albertel  955:     } elsif ($State eq "SendingRequest") {
1.1       foxr      956: 	#  We need to be writable for this and probably don't belong
                    957: 	#  here inthe first place.
                    958: 
1.73      albertel  959: 	Debug(6, "SendingRequest state encountered in readable");
1.41      albertel  960: 	$Watcher->poll("w");
                    961: 	$Watcher->cb(\&LondWritable);
1.1       foxr      962: 
1.41      albertel  963:     } elsif ($State eq "ReceivingReply") {
1.1       foxr      964: 
                    965: 
1.41      albertel  966:     } else {
                    967: 	# Invalid state.
                    968: 	Debug(4, "Invalid state in LondReadable");
                    969:     }
1.1       foxr      970: }
1.3       albertel  971: 
1.1       foxr      972: =pod
1.3       albertel  973: 
1.1       foxr      974: =head2 LondWritable
1.3       albertel  975: 
1.1       foxr      976: This function is called whenever a lond connection
                    977: becomes writable while there is a writeable monitoring
                    978: event.  The action taken is very state dependent:
1.3       albertel  979: 
1.1       foxr      980: =head3 State = Connected 
1.3       albertel  981: 
                    982: The connection is in the process of sending the 'init' hailing to the
                    983: lond on the remote end.  The connection object''s Writable member is
                    984: called.  On error, ConnectionError is called to destroy the connection
                    985: and remove it from the ActiveConnections hash
                    986: 
1.1       foxr      987: =head3 Initialized
1.3       albertel  988: 
                    989: 'init' has been sent, writability monitoring is removed and
                    990: readability monitoring is started with LondReadable as the callback.
                    991: 
1.1       foxr      992: =head3 ChallengeReceived
1.3       albertel  993: 
                    994: The connection has received the who are you challenge from the remote
                    995: system, and is in the process of sending the challenge
                    996: response. Writable is called.
                    997: 
1.1       foxr      998: =head3 ChallengeReplied
1.3       albertel  999: 
                   1000: The connection has replied to the initial challenge The we switch to
                   1001: monitoring readability looking for the server to reply with 'ok'.
                   1002: 
1.1       foxr     1003: =head3 RequestingKey
1.3       albertel 1004: 
                   1005: The connection is in the process of requesting its encryption key.
                   1006: Writable is called.
                   1007: 
1.1       foxr     1008: =head3 ReceivingKey
1.3       albertel 1009: 
                   1010: The connection has sent the request for a key.  Switch to readability
                   1011: monitoring to accept the key
                   1012: 
1.1       foxr     1013: =head3 SendingRequest
1.3       albertel 1014: 
                   1015: The connection is in the process of sending a request to the server.
                   1016: This request is part of a client transaction.  All the states until
                   1017: now represent the client setup protocol. Writable is called.
                   1018: 
1.1       foxr     1019: =head3 ReceivingReply
                   1020: 
1.3       albertel 1021: The connection has sent a request.  Now it must receive a reply.
                   1022: Readability monitoring is requested.
                   1023: 
                   1024: This function is an event handler and therefore receives as
1.1       foxr     1025: a parameter the event that has fired.  The data for the watcher
                   1026: of this event is a reference to a list of one or two elements,
                   1027: depending on state. The first (and possibly only) element is the
                   1028: socket.  The second (present only if a request is in progress)
                   1029: is the socket on which to return a reply to the caller.
                   1030: 
                   1031: =cut
1.3       albertel 1032: 
1.1       foxr     1033: sub LondWritable {
                   1034:     my $Event   = shift;
                   1035:     my $Watcher = $Event->w;
1.8       foxr     1036:     my $Socket  = $Watcher->data;
                   1037:     my $State   = $Socket->GetState();
1.1       foxr     1038: 
1.8       foxr     1039:     Debug(6,"LondWritable State = ".$State."\n");
1.1       foxr     1040: 
1.8       foxr     1041:  
1.1       foxr     1042:     #  Figure out what to do depending on the state of the socket:
                   1043:     
                   1044: 
                   1045: 
                   1046: 
                   1047:     SocketDump(6,$Socket);
                   1048: 
1.42      foxr     1049:     #  If the socket is writable, we must always write.
                   1050:     # Only by writing will we undergo state transitions.
                   1051:     # Old logic wrote in state specific code below, however
                   1052:     # That forces us at least through another invocation of
                   1053:     # this function after writability is possible again.
                   1054:     # This logic also factors out common code for handling
                   1055:     # write failures... in all cases, write failures 
                   1056:     # Kill the socket.
                   1057:     #  This logic makes the branches of the >big< if below
                   1058:     # so that the writing states are actually NO-OPs.
                   1059: 
                   1060:     if ($Socket->Writable() != 0) {
1.43      albertel 1061: 	#  The write resulted in an error.
                   1062: 	# We'll treat this as if the socket got disconnected:
                   1063: 	Log("WARNING", "Connection to ".$RemoteHost.
                   1064: 	    " has been disconnected");
                   1065: 	if(exists($ActiveTransactions{$Socket})) {
                   1066: 	    FailTransaction($ActiveTransactions{$Socket});
1.56      foxr     1067: 	} else {
                   1068: 	    #  In the process of conneting, so need to turn that off.
                   1069: 	    
                   1070: 	    $LondConnecting = 0;
1.43      albertel 1071: 	}
                   1072: 	$Watcher->cancel();
                   1073: 	KillSocket($Socket);
                   1074: 	return;
1.42      foxr     1075:     }
                   1076: 
                   1077: 
                   1078: 
1.41      albertel 1079:     if      ($State eq "Connected")         {
1.1       foxr     1080: 
1.41      albertel 1081: 	#  "init" is being sent...
1.42      foxr     1082:  
1.41      albertel 1083:     } elsif ($State eq "Initialized")       {
1.4       foxr     1084: 
1.41      albertel 1085: 	# Now that init was sent, we switch 
                   1086: 	# to watching for readability:
1.1       foxr     1087: 
1.41      albertel 1088: 	$Watcher->cb(\&LondReadable);
                   1089: 	$Watcher->poll("r");
                   1090: 	
                   1091:     } elsif ($State eq "ChallengeReceived") {
                   1092: 	# We received the challenge, now we 
                   1093: 	# are echoing it back. This is a no-op,
                   1094: 	# we're waiting for the state to change
1.1       foxr     1095: 	
1.41      albertel 1096:     } elsif ($State eq "ChallengeReplied")  {
                   1097: 	# The echo was sent back, so we switch
                   1098: 	# to watching readability.
                   1099: 
                   1100: 	$Watcher->cb(\&LondReadable);
                   1101: 	$Watcher->poll("r");
                   1102:     } elsif ($State eq "RequestingVersion") {
                   1103: 	# Sending the peer a version request...
1.42      foxr     1104: 
1.41      albertel 1105:     } elsif ($State eq "ReadingVersionString") {
                   1106: 	# Transition to read since we have sent the
                   1107: 	# version command and now just need to read the
                   1108: 	# version string from the peer:
1.40      foxr     1109:       
1.41      albertel 1110: 	$Watcher->cb(\&LondReadable);
                   1111: 	$Watcher->poll("r");
1.40      foxr     1112:       
1.41      albertel 1113:     } elsif ($State eq "SetHost") {
                   1114: 	#  Setting the remote domain...
1.42      foxr     1115: 
1.41      albertel 1116:     } elsif ($State eq "HostSet") {
                   1117: 	# Back to readable to get the ok.
1.40      foxr     1118:       
1.41      albertel 1119: 	$Watcher->cb(\&LondReadable);
                   1120: 	$Watcher->poll("r");
1.40      foxr     1121:       
                   1122: 
1.41      albertel 1123:     } elsif ($State eq "RequestingKey")     {
                   1124: 	# At this time we're requesting the key.
                   1125: 	# again, this is essentially a no-op.
                   1126: 
                   1127:     } elsif ($State eq "ReceivingKey")      {
                   1128: 	# Now we need to wait for the key
                   1129: 	# to come back from the peer:
                   1130: 
                   1131: 	$Watcher->cb(\&LondReadable);
                   1132: 	$Watcher->poll("r");
                   1133: 
                   1134:     } elsif ($State eq "SendingRequest")    {
1.40      foxr     1135:  
1.41      albertel 1136: 	# At this time we are sending a request to the
1.1       foxr     1137: 	# peer... write the next chunk:
                   1138: 
1.41      albertel 1139: 
                   1140:     } elsif ($State eq "ReceivingReply")    {
                   1141: 	# The send has completed.  Wait for the
                   1142: 	# data to come in for a reply.
                   1143: 	Debug(8,"Writable sent request/receiving reply");
                   1144: 	$Watcher->cb(\&LondReadable);
                   1145: 	$Watcher->poll("r");
1.1       foxr     1146: 
1.41      albertel 1147:     } else {
                   1148: 	#  Control only passes here on an error: 
                   1149: 	#  the socket state does not match any
                   1150: 	#  of the known states... so an error
                   1151: 	#  must be logged.
1.1       foxr     1152: 
1.41      albertel 1153: 	&Debug(4, "Invalid socket state ".$State."\n");
                   1154:     }
1.1       foxr     1155:     
                   1156: }
1.81      albertel 1157: 
1.6       foxr     1158: =pod
                   1159:     
                   1160: =cut
1.69      matthew  1161: 
1.81      albertel 1162: 
1.6       foxr     1163: sub QueueDelayed {
1.8       foxr     1164:     Debug(3,"QueueDelayed called");
                   1165: 
1.6       foxr     1166:     my $path = "$perlvar{'lonSockDir'}/delayed";
1.8       foxr     1167: 
                   1168:     Debug(4, "Delayed path: ".$path);
1.6       foxr     1169:     opendir(DIRHANDLE, $path);
1.75      albertel 1170: 
1.82      albertel 1171:     my $host_id_re = '(?:'.join('|',map {quotemeta($_)} (@all_host_ids)).')';
1.75      albertel 1172:     my @alldelayed = grep(/\.$host_id_re$/, readdir(DIRHANDLE));
1.6       foxr     1173:     closedir(DIRHANDLE);
1.75      albertel 1174:     foreach my $dfname (sort(@alldelayed)) {
                   1175: 	my $reqfile = "$path/$dfname";
                   1176: 	my ($host_id) = ($dfname =~ /\.([^.]*)$/);
                   1177: 	Debug(4, "queueing ".$reqfile." for $host_id");
1.6       foxr     1178: 	my $Handle = IO::File->new($reqfile);
                   1179: 	my $cmd    = <$Handle>;
1.8       foxr     1180: 	chomp $cmd;		# There may or may not be a newline...
1.12      foxr     1181: 	$cmd = $cmd."\n";	# now for sure there's exactly one newline.
1.75      albertel 1182: 	my $Transaction = LondTransaction->new("sethost:$host_id:$cmd");
1.7       foxr     1183: 	$Transaction->SetDeferred($reqfile);
                   1184: 	QueueTransaction($Transaction);
1.6       foxr     1185:     }
                   1186:     
                   1187: }
1.1       foxr     1188: 
                   1189: =pod
1.3       albertel 1190: 
1.1       foxr     1191: =head2 MakeLondConnection
1.3       albertel 1192: 
                   1193: Create a new lond connection object, and start it towards its initial
                   1194: idleness.  Once idle, it becomes elligible to receive transactions
                   1195: from the work queue.  If the work queue is not empty when the
                   1196: connection is completed and becomes idle, it will dequeue an entry and
                   1197: start off on it.
                   1198: 
1.1       foxr     1199: =cut
1.3       albertel 1200: 
1.1       foxr     1201: sub MakeLondConnection {     
                   1202:     Debug(4,"MakeLondConnection to ".GetServerHost()." on port "
                   1203: 	  .GetServerPort());
                   1204: 
                   1205:     my $Connection = LondConnection->new(&GetServerHost(),
1.81      albertel 1206: 					 &GetServerPort(),
                   1207: 					 &GetHostId());
1.1       foxr     1208: 
1.30      foxr     1209:     if($Connection eq undef) {	# Needs to be more robust later.
1.9       foxr     1210: 	Log("CRITICAL","Failed to make a connection with lond.");
1.10      foxr     1211: 	$ConnectionRetriesLeft--;
                   1212: 	return 0;		# Failure.
1.5       foxr     1213:     }  else {
1.22      foxr     1214: 
1.82      albertel 1215: 	$LondConnecting = 1;	# Connection in progress.
1.5       foxr     1216: 	# The connection needs to have writability 
                   1217: 	# monitored in order to send the init sequence
                   1218: 	# that starts the whole authentication/key
                   1219: 	# exchange underway.
                   1220: 	#
                   1221: 	my $Socket = $Connection->GetSocket();
1.30      foxr     1222: 	if($Socket eq undef) {
1.64      foxr     1223: 	    &child_exit(-1, "did not get a socket from the connection");
1.5       foxr     1224: 	} else {
                   1225: 	    &Debug(9,"MakeLondConnection got socket: ".$Socket);
                   1226: 	}
1.1       foxr     1227: 	
1.21      foxr     1228: 	$Connection->SetTimeoutCallback(\&SocketTimeout);
                   1229: 
1.23      foxr     1230: 	my $event = Event->io(fd       => $Socket,
1.5       foxr     1231: 			   poll     => 'w',
                   1232: 			   cb       => \&LondWritable,
1.8       foxr     1233: 			   data     => $Connection,
1.5       foxr     1234: 			   desc => 'Connection to lond server');
                   1235: 	$ActiveConnections{$Connection} = $event;
1.52      foxr     1236: 	if ($ConnectionCount == 0) {
                   1237: 	    &SetupTimer;	# Need to handle timeouts with connections...
                   1238: 	}
1.5       foxr     1239: 	$ConnectionCount++;
1.8       foxr     1240: 	Debug(4, "Connection count = ".$ConnectionCount);
1.6       foxr     1241: 	if($ConnectionCount == 1) { # First Connection:
                   1242: 	    QueueDelayed;
                   1243: 	}
1.9       foxr     1244: 	Log("SUCESS", "Created connection ".$ConnectionCount
                   1245: 	    ." to host ".GetServerHost());
1.10      foxr     1246: 	return 1;		# Return success.
1.1       foxr     1247:     }
                   1248:     
                   1249: }
1.3       albertel 1250: 
1.1       foxr     1251: =pod
1.3       albertel 1252: 
1.1       foxr     1253: =head2 StartRequest
1.3       albertel 1254: 
                   1255: Starts a lond request going on a specified lond connection.
                   1256: parameters are:
                   1257: 
                   1258: =item $Lond
                   1259: 
                   1260: Connection to the lond that will send the transaction and receive the
                   1261: reply.
                   1262: 
                   1263: =item $Client
                   1264: 
                   1265: Connection to the client that is making this request We got the
                   1266: request from this socket, and when the request has been relayed to
                   1267: lond and we get a reply back from lond it will get sent to this
                   1268: socket.
                   1269: 
                   1270: =item $Request
                   1271: 
                   1272: The text of the request to send.
                   1273: 
1.1       foxr     1274: =cut
                   1275: 
                   1276: sub StartRequest {
1.47      foxr     1277: 
                   1278:     my ($Lond, $Request) = @_;
1.1       foxr     1279:     
1.7       foxr     1280:     Debug(6, "StartRequest: ".$Request->getRequest());
1.1       foxr     1281: 
                   1282:     my $Socket = $Lond->GetSocket();
                   1283:     
1.7       foxr     1284:     $Request->Activate($Lond);
                   1285:     $ActiveTransactions{$Lond} = $Request;
1.1       foxr     1286: 
1.7       foxr     1287:     $Lond->InitiateTransaction($Request->getRequest());
1.23      foxr     1288:     my $event = Event->io(fd      => $Socket,
1.1       foxr     1289: 		       poll    => "w",
                   1290: 		       cb      => \&LondWritable,
                   1291: 		       data    => $Lond,
                   1292: 		       desc    => "lond transaction connection");
                   1293:     $ActiveConnections{$Lond} = $event;
                   1294:     Debug(8," Start Request made watcher data with ".$event->data."\n");
                   1295: }
                   1296: 
                   1297: =pod
1.3       albertel 1298: 
1.1       foxr     1299: =head2 QueueTransaction
1.3       albertel 1300: 
                   1301: If there is an idle lond connection, it is put to work doing this
                   1302: transaction.  Otherwise, the transaction is placed in the work queue.
                   1303: If placed in the work queue and the maximum number of connections has
                   1304: not yet been created, a new connection will be started.  Our goal is
                   1305: to eventually have a sufficient number of connections that the work
                   1306: queue will typically be empty.  parameters are:
                   1307: 
                   1308: =item Socket
                   1309: 
                   1310: open on the lonc client.
                   1311: 
                   1312: =item Request
                   1313: 
                   1314: data to send to the lond.
1.1       foxr     1315: 
                   1316: =cut
1.3       albertel 1317: 
1.1       foxr     1318: sub QueueTransaction {
                   1319: 
1.7       foxr     1320:     my $requestData   = shift;	# This is a LondTransaction.
                   1321:     my $cmd           = $requestData->getRequest();
                   1322: 
                   1323:     Debug(6,"QueueTransaction: ".$cmd);
1.1       foxr     1324: 
                   1325:     my $LondSocket    = $IdleConnections->pop();
                   1326:     if(!defined $LondSocket) {	# Need to queue request.
1.29      foxr     1327: 	Debug(5,"Must queue...");
1.1       foxr     1328: 	$WorkQueue->enqueue($requestData);
1.56      foxr     1329: 	Debug(5, "Queue Transaction startnew $ConnectionCount $LondConnecting");
                   1330: 	if(($ConnectionCount < $MaxConnectionCount)   && (! $LondConnecting)) {
                   1331: 
1.22      foxr     1332: 	    if($ConnectionRetriesLeft > 0) {
1.29      foxr     1333: 		Debug(5,"Starting additional lond connection");
1.56      foxr     1334: 		if(&MakeLondConnection() == 0) {
1.22      foxr     1335: 		    EmptyQueue();	# Fail transactions, can't make connection.
1.42      foxr     1336: 		    CloseAllLondConnections; # Should all be closed but...
1.22      foxr     1337: 		}
                   1338: 	    } else {
                   1339: 		ShowStatus(GetServerHost()." >>> DEAD !!!! <<<");
1.56      foxr     1340: 		$LondConnecting = 0;
1.22      foxr     1341: 		EmptyQueue();	# It's worse than that ... he's dead Jim.
1.42      foxr     1342: 		CloseAllLondConnections; # Should all be closed but..
1.17      foxr     1343: 	    }
1.1       foxr     1344: 	}
                   1345:     } else {			# Can start the request:
                   1346: 	Debug(8,"Can start...");
1.7       foxr     1347: 	StartRequest($LondSocket,  $requestData);
1.1       foxr     1348:     }
                   1349: }
                   1350: 
                   1351: #-------------------------- Lonc UNIX socket handling ---------------------
1.3       albertel 1352: 
1.1       foxr     1353: =pod
1.3       albertel 1354: 
1.1       foxr     1355: =head2 ClientRequest
1.3       albertel 1356: Callback that is called when data can be read from the UNIX domain
                   1357: socket connecting us with an apache server process.
1.1       foxr     1358: 
                   1359: =cut
                   1360: 
                   1361: sub ClientRequest {
                   1362:     Debug(6, "ClientRequest");
                   1363:     my $event   = shift;
                   1364:     my $watcher = $event->w;
                   1365:     my $socket  = $watcher->fd;
                   1366:     my $data    = $watcher->data;
                   1367:     my $thisread;
                   1368: 
                   1369:     Debug(9, "  Watcher named: ".$watcher->desc);
                   1370: 
                   1371:     my $rv = $socket->recv($thisread, POSIX::BUFSIZ, 0);
                   1372:     Debug(8, "rcv:  data length = ".length($thisread)
                   1373: 	  ." read =".$thisread);
1.29      foxr     1374:     unless (defined $rv  && length($thisread)) {
1.1       foxr     1375: 	 # Likely eof on socket.
                   1376: 	Debug(5,"Client Socket closed on lonc for ".$RemoteHost);
                   1377: 	close($socket);
                   1378: 	$watcher->cancel();
                   1379: 	delete($ActiveClients{$socket});
1.10      foxr     1380: 	return;
1.1       foxr     1381:     }
                   1382:     Debug(8,"Data: ".$data." this read: ".$thisread);
                   1383:     $data = $data.$thisread;	# Append new data.
                   1384:     $watcher->data($data);
1.44      albertel 1385:     if($data =~ /\n$/) {	# Request entirely read.
1.87      albertel 1386: 	if ($data eq "close_connection_exit\n") {
1.9       foxr     1387: 	    Log("CRITICAL",
                   1388: 		"Request Close Connection ... exiting");
                   1389: 	    CloseAllLondConnections();
                   1390: 	    exit;
1.87      albertel 1391: 	} elsif ($data eq "reset_retries\n") {
                   1392: 	    Log("INFO", "Resetting Connection Retries.");
                   1393: 	    $ConnectionRetriesLeft = $ConnectionRetries;
                   1394: 	    &UpdateStatus();
                   1395: 	    my $Transaction = LondTransaction->new($data);
                   1396: 	    $Transaction->SetClient($socket);
                   1397: 	    StartClientReply($Transaction, "ok\n");
                   1398: 	    $watcher->cancel();
                   1399: 	    return;
1.9       foxr     1400: 	}
1.1       foxr     1401: 	Debug(8, "Complete transaction received: ".$data);
1.87      albertel 1402: 	if ($LogTransactions) {
1.39      foxr     1403: 	    Log("SUCCESS", "Transaction: '$data'"); # Transaction has \n.
                   1404: 	}
1.8       foxr     1405: 	my $Transaction = LondTransaction->new($data);
1.7       foxr     1406: 	$Transaction->SetClient($socket);
                   1407: 	QueueTransaction($Transaction);
1.1       foxr     1408: 	$watcher->cancel();	# Done looking for input data.
                   1409:     }
                   1410: 
                   1411: }
                   1412: 
1.62      foxr     1413: #
                   1414: #     Accept a connection request for a client (lonc child) and
                   1415: #    start up an event watcher to keep an eye on input from that 
                   1416: #    Event.  This can be called both from NewClient and from
1.80      albertel 1417: #    ChildProcess.
1.62      foxr     1418: # Parameters:
                   1419: #    $socket       - The listener socket.
                   1420: # Returns:
                   1421: #   NONE
                   1422: # Side Effects:
                   1423: #    An event is made to watch the accepted connection.
                   1424: #    Active clients hash is updated to reflect the new connection.
                   1425: #    The client connection count is incremented.
                   1426: #
                   1427: sub accept_client {
                   1428:     my ($socket) = @_;
                   1429: 
                   1430:     Debug(8, "Entering accept for lonc UNIX socket\n");
                   1431:     my $connection = $socket->accept();	# Accept the client connection.
                   1432:     Debug(8,"Connection request accepted from "
                   1433: 	  .GetPeername($connection, AF_UNIX));
                   1434: 
                   1435: 
                   1436:     my $description = sprintf("Connection to lonc client %d",
                   1437: 			      $ClientConnection);
                   1438:     Debug(9, "Creating event named: ".$description);
                   1439:     Event->io(cb      => \&ClientRequest,
                   1440: 	      poll    => 'r',
                   1441: 	      desc    => $description,
                   1442: 	      data    => "",
                   1443: 	      fd      => $connection);
                   1444:     $ActiveClients{$connection} = $ClientConnection;
                   1445:     $ClientConnection++;
                   1446: }
1.1       foxr     1447: 
                   1448: =pod
1.3       albertel 1449: 
1.1       foxr     1450: =head2  NewClient
1.3       albertel 1451: 
                   1452: Callback that is called when a connection is received on the unix
                   1453: socket for a new client of lonc.  The callback is parameterized by the
                   1454: event.. which is a-priori assumed to be an io event, and therefore has
                   1455: an fd member that is the Listener socket.  We Accept the connection
                   1456: and register a new event on the readability of that socket:
                   1457: 
1.1       foxr     1458: =cut
1.3       albertel 1459: 
1.1       foxr     1460: sub NewClient {
                   1461:     Debug(6, "NewClient");
                   1462:     my $event      = shift;		# Get the event parameters.
                   1463:     my $watcher    = $event->w; 
                   1464:     my $socket     = $watcher->fd;	# Get the event' socket.
                   1465: 
1.62      foxr     1466:     &accept_client($socket);
1.1       foxr     1467: }
1.3       albertel 1468: 
                   1469: =pod
                   1470: 
                   1471: =head2 GetLoncSocketPath
                   1472: 
                   1473: Returns the name of the UNIX socket on which to listen for client
                   1474: connections.
1.1       foxr     1475: 
1.58      foxr     1476: =head2 Parameters:
                   1477: 
                   1478:     host (optional)  - Name of the host socket to return.. defaults to
                   1479:                        the return from GetServerHost().
                   1480: 
1.1       foxr     1481: =cut
1.3       albertel 1482: 
1.1       foxr     1483: sub GetLoncSocketPath {
1.58      foxr     1484: 
                   1485:     my $host = GetServerHost();	# Default host.
                   1486:     if (@_) {
                   1487: 	($host)  = @_;		# Override if supplied.
                   1488:     }
                   1489:     return $UnixSocketDir."/".$host;
1.1       foxr     1490: }
                   1491: 
1.3       albertel 1492: =pod
                   1493: 
                   1494: =head2 GetServerHost
                   1495: 
                   1496: Returns the host whose lond we talk with.
                   1497: 
1.1       foxr     1498: =cut
1.3       albertel 1499: 
1.7       foxr     1500: sub GetServerHost {
1.1       foxr     1501:     return $RemoteHost;		# Setup by the fork.
                   1502: }
1.3       albertel 1503: 
                   1504: =pod
                   1505: 
1.81      albertel 1506: =head2 GetServerId
                   1507: 
                   1508: Returns the hostid whose lond we talk with.
                   1509: 
                   1510: =cut
                   1511: 
                   1512: sub GetHostId {
                   1513:     return $RemoteHostId;		# Setup by the fork.
                   1514: }
                   1515: 
                   1516: =pod
                   1517: 
1.3       albertel 1518: =head2 GetServerPort
                   1519: 
                   1520: Returns the lond port number.
                   1521: 
1.1       foxr     1522: =cut
1.3       albertel 1523: 
1.7       foxr     1524: sub GetServerPort {
1.1       foxr     1525:     return $perlvar{londPort};
                   1526: }
1.3       albertel 1527: 
                   1528: =pod
                   1529: 
                   1530: =head2 SetupLoncListener
                   1531: 
                   1532: Setup a lonc listener event.  The event is called when the socket
                   1533: becomes readable.. that corresponds to the receipt of a new
                   1534: connection.  The event handler established will accept the connection
                   1535: (creating a communcations channel), that int turn will establish
                   1536: another event handler to subess requests.
1.1       foxr     1537: 
1.58      foxr     1538: =head2  Parameters:
                   1539: 
                   1540:    host (optional)   Name of the host to set up a unix socket to.
                   1541: 
1.1       foxr     1542: =cut
1.3       albertel 1543: 
1.1       foxr     1544: sub SetupLoncListener {
1.78      albertel 1545:     my ($host,$SocketName) = @_;
                   1546:     if (!$host) { $host = &GetServerHost(); }
                   1547:     if (!$SocketName) { $SocketName = &GetLoncSocketPath($host); }
1.1       foxr     1548: 
1.78      albertel 1549: 
                   1550:     unlink($SocketName);
1.58      foxr     1551: 
1.1       foxr     1552:     my $socket;
1.7       foxr     1553:     unless ($socket =IO::Socket::UNIX->new(Local  => $SocketName,
1.55      albertel 1554: 					    Listen => 250, 
1.1       foxr     1555: 					    Type   => SOCK_STREAM)) {
1.64      foxr     1556: 	if($I_am_child) {
                   1557: 	    &child_exit(-1, "Failed to create a lonc listener socket");
                   1558: 	} else {
                   1559: 	    die "Failed to create a lonc listner socket";
                   1560: 	}
1.1       foxr     1561:     }
1.59      foxr     1562:     return $socket;
1.1       foxr     1563: }
                   1564: 
1.39      foxr     1565: #
                   1566: #   Toggle transaction logging.
                   1567: #  Implicit inputs:  
                   1568: #     LogTransactions
                   1569: #  Implicit Outputs:
                   1570: #     LogTransactions
                   1571: sub ToggleTransactionLogging {
                   1572:     print STDERR "Toggle transaction logging...\n";
                   1573:     if(!$LogTransactions) {
                   1574: 	$LogTransactions = 1;
                   1575:     } else {
                   1576: 	$LogTransactions = 0;
                   1577:     }
                   1578: 
                   1579: 
                   1580:     Log("SUCCESS", "Toggled transaction logging: $LogTransactions \n");
                   1581: }
                   1582: 
1.14      foxr     1583: =pod 
                   1584: 
                   1585: =head2 ChildStatus
                   1586:  
                   1587: Child USR1 signal handler to report the most recent status
                   1588: into the status file.
                   1589: 
1.22      foxr     1590: We also use this to reset the retries count in order to allow the
                   1591: client to retry connections with a previously dead server.
1.69      matthew  1592: 
1.14      foxr     1593: =cut
1.46      albertel 1594: 
1.14      foxr     1595: sub ChildStatus {
                   1596:     my $event = shift;
                   1597:     my $watcher = $event->w;
                   1598: 
                   1599:     Debug(2, "Reporting child status because : ".$watcher->data);
                   1600:     my $docdir = $perlvar{'lonDocRoot'};
1.67      albertel 1601:     
                   1602:     open(LOG,">>$docdir/lon-status/loncstatus.txt");
                   1603:     flock(LOG,LOCK_EX);
                   1604:     print LOG $$."\t".$RemoteHost."\t".$Status."\t".
1.14      foxr     1605: 	$RecentLogEntry."\n";
1.38      foxr     1606:     #
                   1607:     #  Write out information about each of the connections:
                   1608:     #
1.46      albertel 1609:     if ($DebugLevel > 2) {
1.67      albertel 1610: 	print LOG "Active connection statuses: \n";
1.46      albertel 1611: 	my $i = 1;
                   1612: 	print STDERR  "================================= Socket Status Dump:\n";
                   1613: 	foreach my $item (keys %ActiveConnections) {
                   1614: 	    my $Socket = $ActiveConnections{$item}->data;
                   1615: 	    my $state  = $Socket->GetState();
1.67      albertel 1616: 	    print LOG "Connection $i State: $state\n";
1.46      albertel 1617: 	    print STDERR "---------------------- Connection $i \n";
1.48      foxr     1618: 	    $Socket->Dump(-1);	# Ensure it gets dumped..
1.46      albertel 1619: 	    $i++;	
                   1620: 	}
1.38      foxr     1621:     }
1.67      albertel 1622:     flock(LOG,LOCK_UN);
                   1623:     close(LOG);
1.22      foxr     1624:     $ConnectionRetriesLeft = $ConnectionRetries;
1.70      albertel 1625:     UpdateStatus();
1.14      foxr     1626: }
                   1627: 
1.1       foxr     1628: =pod
1.3       albertel 1629: 
1.10      foxr     1630: =head2 SignalledToDeath
                   1631: 
                   1632: Called in response to a signal that causes a chid process to die.
                   1633: 
                   1634: =cut
                   1635: 
                   1636: 
                   1637: sub SignalledToDeath {
1.14      foxr     1638:     my $event  = shift;
                   1639:     my $watcher= $event->w;
                   1640: 
                   1641:     Debug(2,"Signalled to death! via ".$watcher->data);
1.17      foxr     1642:     my ($signal) = $watcher->data;
1.10      foxr     1643:     chomp($signal);
                   1644:     Log("CRITICAL", "Abnormal exit.  Child $$ for $RemoteHost "
                   1645: 	."died through "."\"$signal\"");
1.68      albertel 1646:     #LogPerm("F:lonc: $$ on $RemoteHost signalled to death: "
                   1647: #	    ."\"$signal\"");
1.12      foxr     1648:     exit 0;
1.10      foxr     1649: 
                   1650: }
1.16      foxr     1651: 
1.69      matthew  1652: =pod
                   1653: 
1.16      foxr     1654: =head2 ToggleDebug
                   1655: 
                   1656: This sub toggles trace debugging on and off.
                   1657: 
                   1658: =cut
                   1659: 
                   1660: sub ToggleDebug {
                   1661:     my $Current    = $DebugLevel;
                   1662:        $DebugLevel = $NextDebugLevel;
                   1663:        $NextDebugLevel = $Current;
                   1664: 
                   1665:     Log("SUCCESS", "New debugging level for $RemoteHost now $DebugLevel");
                   1666: 
                   1667: }
                   1668: 
1.69      matthew  1669: =pod
                   1670: 
1.1       foxr     1671: =head2 ChildProcess
                   1672: 
                   1673: This sub implements a child process for a single lonc daemon.
1.61      foxr     1674: Optional parameter:
                   1675:    $socket  - if provided, this is a socket already open for listen
                   1676:               on the client socket. Otherwise, a new listen is set up.
1.1       foxr     1677: 
                   1678: =cut
                   1679: 
                   1680: sub ChildProcess {
1.80      albertel 1681:     #  We've inherited all the
1.62      foxr     1682:     #  events of our parent and those have to be cancelled or else
                   1683:     #  all holy bloody chaos will result.. trust me, I already made
                   1684:     #  >that< mistake.
                   1685: 
                   1686:     my $host = GetServerHost();
                   1687:     foreach my $listener (keys %parent_dispatchers) {
                   1688: 	my $watcher = $parent_dispatchers{$listener};
                   1689: 	my $s       = $watcher->fd;
                   1690: 	if ($listener ne $host) { # Close everyone but me.
                   1691: 	    Debug(5, "Closing listen socket for $listener");
                   1692: 	    $s->close();
                   1693: 	}
                   1694: 	Debug(5, "Killing watcher for $listener");
                   1695: 
                   1696: 	$watcher->cancel();
1.65      foxr     1697: 	delete($parent_dispatchers{$listener});
1.62      foxr     1698: 
                   1699:     }
1.65      foxr     1700: 
                   1701:     #  kill off the parent's signal handlers too!  
                   1702:     #
                   1703: 
                   1704:     for my $handler (keys %parent_handlers) {
                   1705: 	my $watcher = $parent_handlers{$handler};
                   1706: 	$watcher->cancel();
                   1707: 	delete($parent_handlers{$handler});
                   1708:     }
                   1709: 
1.64      foxr     1710:     $I_am_child    = 1;		# Seems like in spite of it all I may still getting
                   1711:                                 # parent event dispatches.. flag I'm a child.
1.1       foxr     1712: 
                   1713: 
1.14      foxr     1714:     #
                   1715:     #  Signals must be handled by the Event framework...
1.61      foxr     1716:     #
1.14      foxr     1717: 
                   1718:     Event->signal(signal   => "QUIT",
                   1719: 		  cb       => \&SignalledToDeath,
                   1720: 		  data     => "QUIT");
                   1721:     Event->signal(signal   => "HUP",
                   1722: 		  cb       => \&ChildStatus,
                   1723: 		  data     => "HUP");
                   1724:     Event->signal(signal   => "USR1",
                   1725: 		  cb       => \&ChildStatus,
                   1726: 		  data     => "USR1");
1.39      foxr     1727:     Event->signal(signal   => "USR2",
                   1728: 		  cb       => \&ToggleTransactionLogging);
1.16      foxr     1729:     Event->signal(signal   => "INT",
                   1730: 		  cb       => \&ToggleDebug,
                   1731: 		  data     => "INT");
1.1       foxr     1732: 
1.62      foxr     1733:     #  Figure out if we got passed a socket or need to open one to listen for
                   1734:     #  client requests.
                   1735: 
1.61      foxr     1736:     my ($socket) = @_;
                   1737:     if (!$socket) {
                   1738: 
                   1739: 	$socket =  SetupLoncListener();
                   1740:     }
1.62      foxr     1741:     #  Establish an event to listen for client connection requests.
                   1742: 
                   1743: 
1.59      foxr     1744:     Event->io(cb   => \&NewClient,
                   1745: 	      poll => 'r',
                   1746: 	      desc => 'Lonc Listener Unix Socket',
                   1747: 	      fd   => $socket);
1.1       foxr     1748:     
1.76      albertel 1749:     $Event::DebugLevel = $DebugLevel;
1.1       foxr     1750:     
                   1751:     Debug(9, "Making initial lond connection for ".$RemoteHost);
                   1752: 
                   1753: # Setup the initial server connection:
                   1754:     
1.62      foxr     1755:      # &MakeLondConnection(); // let first work request do it.
1.10      foxr     1756: 
1.80      albertel 1757:     #  need to accept the connection since the event may  not fire.
1.62      foxr     1758: 
1.80      albertel 1759:     &accept_client($socket);
1.5       foxr     1760: 
1.1       foxr     1761:     Debug(9,"Entering event loop");
                   1762:     my $ret = Event::loop();		#  Start the main event loop.
                   1763:     
                   1764:     
1.64      foxr     1765:     &child_exit (-1,"Main event loop exited!!!");
1.1       foxr     1766: }
                   1767: 
                   1768: #  Create a new child for host passed in:
                   1769: 
                   1770: sub CreateChild {
1.81      albertel 1771:     my ($host, $hostid) = @_;
1.52      foxr     1772: 
1.12      foxr     1773:     my $sigset = POSIX::SigSet->new(SIGINT);
                   1774:     sigprocmask(SIG_BLOCK, $sigset);
1.1       foxr     1775:     $RemoteHost = $host;
1.9       foxr     1776:     Log("CRITICAL", "Forking server for ".$host);
1.23      foxr     1777:     my $pid          = fork;
1.1       foxr     1778:     if($pid) {			# Parent
1.17      foxr     1779: 	$RemoteHost = "Parent";
1.83      albertel 1780: 	$ChildPid{$pid} = $host;
1.12      foxr     1781: 	sigprocmask(SIG_UNBLOCK, $sigset);
1.82      albertel 1782: 	undef(@all_host_ids);
1.1       foxr     1783:     } else {			# child.
1.81      albertel 1784: 	$RemoteHostId = $hostid;
1.5       foxr     1785: 	ShowStatus("Connected to ".$RemoteHost);
1.23      foxr     1786: 	$SIG{INT} = 'DEFAULT';
1.12      foxr     1787: 	sigprocmask(SIG_UNBLOCK, $sigset);
1.81      albertel 1788: 	&ChildProcess();		# Does not return.
1.1       foxr     1789:     }
1.61      foxr     1790: }
1.1       foxr     1791: 
1.61      foxr     1792: # parent_client_connection:
                   1793: #    Event handler that processes client connections for the parent process.
                   1794: #    This sub is called when the parent is listening on a socket and
                   1795: #    a connection request arrives.  We must:
                   1796: #     Start a child process to accept the connection request.
                   1797: #     Kill our listen on the socket.
                   1798: # Parameter:
                   1799: #    event       - The event object that was created to monitor this socket.
                   1800: #                  event->w->fd is the socket.
                   1801: # Returns:
                   1802: #    NONE
                   1803: #
                   1804: sub parent_client_connection {
1.62      foxr     1805:     if ($I_am_child) {
                   1806: 	#  Should not get here, but seem to anyway:
                   1807: 	&Debug(5," Child caught parent client connection event!!");
                   1808: 	my ($event) = @_;
                   1809: 	my $watcher = $event->w;
                   1810: 	$watcher->cancel();	# Try to kill it off again!!
                   1811:     } else {
                   1812: 	&Debug(9, "parent_client_connection");
                   1813: 	my ($event)   = @_;
                   1814: 	my $watcher   = $event->w;
                   1815: 	my $socket    = $watcher->fd;
1.81      albertel 1816: 	my $connection = $socket->accept();	# Accept the client connection.
                   1817: 	Event->io(cb      => \&get_remote_hostname,
                   1818: 		  poll    => 'r',
                   1819: 		  data    => "",
                   1820: 		  fd      => $connection);
1.77      albertel 1821:     }
                   1822: }
                   1823: 
                   1824: sub get_remote_hostname {
1.82      albertel 1825:     my ($event)   = @_;
                   1826:     my $watcher   = $event->w;
                   1827:     my $socket    = $watcher->fd;
                   1828: 
                   1829:     my $thisread;
                   1830:     my $rv = $socket->recv($thisread, POSIX::BUFSIZ, 0);
                   1831:     Debug(8, "rcv:  data length = ".length($thisread)." read =".$thisread);
                   1832:     if (!defined($rv) || length($thisread) == 0) {
                   1833: 	# Likely eof on socket.
                   1834: 	Debug(5,"Client Socket closed on lonc for p_c_c");
                   1835: 	close($socket);
                   1836: 	$watcher->cancel();
                   1837: 	return;
                   1838:     }
                   1839: 
                   1840:     my $data    = $watcher->data().$thisread;
                   1841:     $watcher->data($data);
                   1842:     if($data =~ /\n$/) {	# Request entirely read.
                   1843: 	chomp($data);
                   1844:     } else {
                   1845: 	return;
                   1846:     }
1.77      albertel 1847: 
1.82      albertel 1848:     &Debug(5,"Creating child for $data (parent_client_connection)");
                   1849:     (my $hostname,my $lonid,@all_host_ids) = split(':',$data);
1.83      albertel 1850:     $ChildHost{$hostname}++;
                   1851:     if ($ChildHost{$hostname} == 1) {
                   1852: 	&CreateChild($hostname,$lonid);
                   1853:     } else {
                   1854: 	&Log('WARNING',"Request for a second child on $hostname");
                   1855:     }
1.82      albertel 1856:     # Clean up the listen since now the child takes over until it exits.
                   1857:     $watcher->cancel();		# Nolonger listening to this event
                   1858:     $socket->send("done\n");
                   1859:     $socket->close();
1.61      foxr     1860: }
                   1861: 
                   1862: # parent_listen:
                   1863: #    Opens a socket and starts a listen for the parent process on a client UNIX
                   1864: #    domain socket.
                   1865: #
                   1866: #    This involves:
                   1867: #       Creating a socket for listen.
                   1868: #       Removing any socket lock file
                   1869: #       Adding an event handler for this socket becoming readable
                   1870: #         To the parent's event dispatcher.
                   1871: # Parameters:
                   1872: #    loncapa_host    - LonCAPA cluster name of the host represented by the client
                   1873: #                      socket.
                   1874: # Returns:
                   1875: #    NONE
                   1876: #
                   1877: sub parent_listen {
                   1878:     my ($loncapa_host) = @_;
                   1879:     Debug(5, "parent_listen: $loncapa_host");
                   1880: 
1.78      albertel 1881:     my ($socket,$file);
                   1882:     if (!$loncapa_host) {
                   1883: 	$loncapa_host = 'common_parent';
                   1884: 	$file         = $perlvar{'lonSockCreate'};
                   1885:     } else {
                   1886: 	$file         = &GetLoncSocketPath($loncapa_host);
                   1887:     }
                   1888:     $socket = &SetupLoncListener($loncapa_host,$file);
                   1889: 
1.62      foxr     1890:     $listening_to{$socket} = $loncapa_host;
1.61      foxr     1891:     if (!$socket) {
                   1892: 	die "Unable to create a listen socket for $loncapa_host";
                   1893:     }
                   1894:     
1.78      albertel 1895:     my $lock_file = $file.".lock";
1.61      foxr     1896:     unlink($lock_file);		# No problem if it doesn't exist yet [startup e.g.]
                   1897: 
1.77      albertel 1898:     my $watcher = 
                   1899: 	Event->io(cb    => \&parent_client_connection,
                   1900: 		  poll  => 'r',
                   1901: 		  desc  => "Parent listener unix socket ($loncapa_host)",
                   1902: 		  data => "",
                   1903: 		  fd    => $socket);
1.62      foxr     1904:     $parent_dispatchers{$loncapa_host} = $watcher;
1.61      foxr     1905: 
                   1906: }
                   1907: 
1.77      albertel 1908: sub parent_clean_up {
                   1909:     my ($loncapa_host) = @_;
1.87      albertel 1910:     Debug(1, "parent_clean_up: $loncapa_host");
1.77      albertel 1911: 
                   1912:     my $socket_file = &GetLoncSocketPath($loncapa_host);
                   1913:     unlink($socket_file);	# No problem if it doesn't exist yet [startup e.g.]
                   1914:     my $lock_file   = $socket_file.".lock";
                   1915:     unlink($lock_file);		# No problem if it doesn't exist yet [startup e.g.]
                   1916: }
                   1917: 
1.61      foxr     1918: 
1.83      albertel 1919: 
                   1920: #    This sub initiates a listen on the common unix domain lonc client socket.
                   1921: #    loncnew starts up with no children, and only spawns off children when a
                   1922: #    connection request occurs on the common client unix socket.  The spawned
                   1923: #    child continues to run until it has been idle a while at which point it
                   1924: #    eventually exits and once more the parent picks up the listen.
1.61      foxr     1925: #
                   1926: #  Parameters:
                   1927: #      NONE
                   1928: #  Implicit Inputs:
                   1929: #    The configuration file that has been read in by LondConnection.
                   1930: #  Returns:
                   1931: #     NONE
                   1932: #
1.77      albertel 1933: sub listen_on_common_socket {
                   1934:     Debug(5, "listen_on_common_socket");
1.78      albertel 1935:     &parent_listen();
1.77      albertel 1936: }
                   1937: 
1.63      foxr     1938: #   server_died is called whenever a child process exits.
                   1939: #   Since this is dispatched via a signal, we must process all
                   1940: #   dead children until there are no more left.  The action
                   1941: #   is to:
                   1942: #      - Remove the child from the bookeeping hashes
                   1943: #      - Re-establish a listen on the unix domain socket associated
                   1944: #        with that host.
                   1945: # Parameters:
                   1946: #    The event, but we don't actually care about it.
                   1947: sub server_died {
                   1948:     &Debug(9, "server_died called...");
                   1949:     
                   1950:     while(1) {			# Loop until waitpid nowait fails.
                   1951: 	my $pid = waitpid(-1, WNOHANG);
                   1952: 	if($pid <= 0) {
                   1953: 	    return;		# Nothing left to wait for.
                   1954: 	}
                   1955: 	# need the host to restart:
                   1956: 
1.83      albertel 1957: 	my $host = $ChildPid{$pid};
1.63      foxr     1958: 	if($host) {		# It's for real...
                   1959: 	    &Debug(9, "Caught sigchild for $host");
1.83      albertel 1960: 	    delete($ChildPid{$pid});
                   1961: 	    delete($ChildHost{$host});
1.81      albertel 1962: 	    &parent_clean_up($host);
                   1963: 
1.63      foxr     1964: 	} else {
                   1965: 	    &Debug(5, "Caught sigchild for pid not in hosts hash: $pid");
                   1966: 	}
                   1967:     }
                   1968: 
                   1969: }
                   1970: 
1.1       foxr     1971: #
                   1972: #  Parent process logic pass 1:
                   1973: #   For each entry in the hosts table, we will
                   1974: #  fork off an instance of ChildProcess to service the transactions
                   1975: #  to that host.  Each pid will be entered in a global hash
                   1976: #  with the value of the key, the host.
                   1977: #  The parent will then enter a loop to wait for process exits.
                   1978: #  Each exit gets logged and the child gets restarted.
                   1979: #
                   1980: 
1.5       foxr     1981: #
                   1982: #   Fork and start in new session so hang-up isn't going to 
                   1983: #   happen without intent.
                   1984: #
                   1985: 
                   1986: 
1.6       foxr     1987: 
                   1988: 
1.8       foxr     1989: 
1.6       foxr     1990: 
                   1991: ShowStatus("Forming new session");
                   1992: my $childpid = fork;
                   1993: if ($childpid != 0) {
                   1994:     sleep 4;			# Give child a chacne to break to
                   1995:     exit 0;			# a new sesion.
                   1996: }
1.8       foxr     1997: #
                   1998: #   Write my pid into the pid file so I can be located
                   1999: #
                   2000: 
                   2001: ShowStatus("Parent writing pid file:");
1.23      foxr     2002: my $execdir = $perlvar{'lonDaemons'};
1.8       foxr     2003: open (PIDSAVE, ">$execdir/logs/lonc.pid");
                   2004: print PIDSAVE "$$\n";
                   2005: close(PIDSAVE);
1.6       foxr     2006: 
1.17      foxr     2007: 
                   2008: 
1.6       foxr     2009: if (POSIX::setsid() < 0) {
                   2010:     print "Could not create new session\n";
                   2011:     exit -1;
                   2012: }
1.5       foxr     2013: 
                   2014: ShowStatus("Forking node servers");
                   2015: 
1.9       foxr     2016: Log("CRITICAL", "--------------- Starting children ---------------");
                   2017: 
1.31      foxr     2018: LondConnection::ReadConfig;               # Read standard config files.
1.1       foxr     2019: 
1.80      albertel 2020: $RemoteHost = "[parent]";
1.81      albertel 2021: &listen_on_common_socket();
1.60      foxr     2022: 
1.12      foxr     2023: $RemoteHost = "Parent Server";
1.1       foxr     2024: 
                   2025: # Maintain the population:
1.5       foxr     2026: 
                   2027: ShowStatus("Parent keeping the flock");
1.1       foxr     2028: 
1.12      foxr     2029: 
1.80      albertel 2030: # We need to setup a SIGChild event to handle the exit (natural or otherwise)
                   2031: # of the children.
1.61      foxr     2032: 
1.80      albertel 2033: Event->signal(cb       => \&server_died,
                   2034: 	      desc     => "Child exit handler",
                   2035: 	      signal   => "CHLD");
                   2036: 
                   2037: 
                   2038: # Set up all the other signals we set up.
                   2039: 
                   2040: $parent_handlers{INT} = Event->signal(cb       => \&Terminate,
                   2041: 				      desc     => "Parent INT handler",
                   2042: 				      signal   => "INT");
                   2043: $parent_handlers{TERM} = Event->signal(cb       => \&Terminate,
                   2044: 				       desc     => "Parent TERM handler",
                   2045: 				       signal   => "TERM");
1.81      albertel 2046: $parent_handlers{HUP}  = Event->signal(cb       => \&KillThemAll,
                   2047: 				       desc     => "Parent HUP handler.",
                   2048: 				       signal   => "HUP");
1.80      albertel 2049: $parent_handlers{USR1} = Event->signal(cb       => \&CheckKids,
                   2050: 				       desc     => "Parent USR1 handler",
                   2051: 				       signal   => "USR1");
                   2052: $parent_handlers{USR2} = Event->signal(cb       => \&UpdateKids,
                   2053: 				       desc     => "Parent USR2 handler.",
                   2054: 				       signal   => "USR2");
                   2055: 
                   2056: #  Start procdesing events.
                   2057: 
                   2058: $Event::DebugLevel = $DebugLevel;
                   2059: Debug(9, "Parent entering event loop");
                   2060: my $ret = Event::loop();
                   2061: die "Main Event loop exited: $ret";
1.14      foxr     2062: 
                   2063: =pod
                   2064: 
                   2065: =head1 CheckKids
                   2066: 
                   2067:   Since kids do not die as easily in this implementation
                   2068: as the previous one, there  is no need to restart the
                   2069: dead ones (all dead kids get restarted when they die!!)
                   2070: The only thing this function does is to pass USR1 to the
                   2071: kids so that they report their status.
                   2072: 
                   2073: =cut
                   2074: 
                   2075: sub CheckKids {
                   2076:     Debug(2, "Checking status of children");
                   2077:     my $docdir = $perlvar{'lonDocRoot'};
                   2078:     my $fh = IO::File->new(">$docdir/lon-status/loncstatus.txt");
                   2079:     my $now=time;
                   2080:     my $local=localtime($now);
                   2081:     print $fh "LONC status $local - parent $$ \n\n";
1.65      foxr     2082:     foreach my $host (keys %parent_dispatchers) {
                   2083: 	print $fh "LONC Parent process listening for $host\n";
                   2084:     }
1.83      albertel 2085:     foreach my $pid (keys %ChildPid) {
1.14      foxr     2086: 	Debug(2, "Sending USR1 -> $pid");
                   2087: 	kill 'USR1' => $pid;	# Tell Child to report status.
                   2088:     }
1.65      foxr     2089: 
1.14      foxr     2090: }
1.24      foxr     2091: 
                   2092: =pod
                   2093: 
                   2094: =head1  UpdateKids
                   2095: 
1.25      foxr     2096: parent's SIGUSR2 handler.  This handler:
1.24      foxr     2097: 
                   2098: =item
                   2099: 
                   2100: Rereads the hosts file.
                   2101: 
                   2102: =item
                   2103:  
                   2104: Kills off (via sigint) children for hosts that have disappeared.
                   2105: 
                   2106: =item
                   2107: 
1.27      foxr     2108: QUITs  children for hosts that already exist (this just forces a status display
1.24      foxr     2109: and resets the connection retry count for that host.
                   2110: 
                   2111: =item
                   2112: 
                   2113: Starts new children for hosts that have been added to the hosts.tab file since
                   2114: the start of the master program and maintains them.
                   2115: 
                   2116: =cut
                   2117: 
                   2118: sub UpdateKids {
1.27      foxr     2119: 
1.25      foxr     2120:     Log("INFO", "Updating connections via SIGUSR2");
1.27      foxr     2121: 
1.65      foxr     2122:     #  I'm not sure what I was thinking in the first implementation.
                   2123:     # someone will have to work hard to convince me the effect is any
                   2124:     # different than Restart, especially now that we don't start up 
                   2125:     # per host servers automatically, may as well just restart.
                   2126:     # The down side is transactions that are in flight will get timed out
                   2127:     # (lost unless they are critical).
1.27      foxr     2128: 
1.81      albertel 2129:     &KillThemAll();
1.24      foxr     2130: }
                   2131: 
1.14      foxr     2132: 
1.13      foxr     2133: =pod
                   2134: 
                   2135: =head1 Restart
                   2136: 
                   2137: Signal handler for HUP... all children are killed and
                   2138: we self restart.  This is an el-cheapo way to re read
                   2139: the config file.
                   2140: 
                   2141: =cut
                   2142: 
                   2143: sub Restart {
1.23      foxr     2144:     &KillThemAll;		# First kill all the children.
1.13      foxr     2145:     Log("CRITICAL", "Restarting");
                   2146:     my $execdir = $perlvar{'lonDaemons'};
                   2147:     unlink("$execdir/logs/lonc.pid");
1.65      foxr     2148:     exec("$executable");
1.10      foxr     2149: }
1.12      foxr     2150: 
                   2151: =pod
                   2152: 
                   2153: =head1 KillThemAll
                   2154: 
                   2155: Signal handler that kills all children by sending them a 
1.17      foxr     2156: SIGHUP.  Responds to sigint and sigterm.
1.12      foxr     2157: 
                   2158: =cut
                   2159: 
1.10      foxr     2160: sub KillThemAll {
1.12      foxr     2161:     Debug(2, "Kill them all!!");
1.85      albertel 2162:     
                   2163:     #local($SIG{CHLD}) = 'IGNORE';
                   2164:     # Our children >will< die.
                   2165:     # but we need to catch their death and cleanup after them in case this is 
                   2166:     # a restart set of kills
                   2167:     my @allpids = keys(%ChildPid);
                   2168:     foreach my $pid (@allpids) {
1.83      albertel 2169: 	my $serving = $ChildPid{$pid};
1.52      foxr     2170: 	ShowStatus("Nicely Killing lonc for $serving pid = $pid");
                   2171: 	Log("CRITICAL", "Nicely Killing lonc for $serving pid = $pid");
1.17      foxr     2172: 	kill 'QUIT' => $pid;
1.12      foxr     2173:     }
1.85      albertel 2174:     ShowStatus("Finished killing child processes off.");
1.1       foxr     2175: }
1.12      foxr     2176: 
1.52      foxr     2177: 
                   2178: #
                   2179: #  Kill all children via KILL.  Just in case the
                   2180: #  first shot didn't get them.
                   2181: 
                   2182: sub really_kill_them_all_dammit
                   2183: {
                   2184:     Debug(2, "Kill them all Dammit");
                   2185:     local($SIG{CHLD} = 'IGNORE'); # In case some purist reenabled them.
1.83      albertel 2186:     foreach my $pid (keys %ChildPid) {
                   2187: 	my $serving = $ChildPid{$pid};
1.52      foxr     2188: 	&ShowStatus("Nastily killing lonc for $serving pid = $pid");
                   2189: 	Log("CRITICAL", "Nastily killing lonc for $serving pid = $pid");
                   2190: 	kill 'KILL' => $pid;
1.83      albertel 2191: 	delete($ChildPid{$pid});
1.52      foxr     2192: 	my $execdir = $perlvar{'lonDaemons'};
                   2193: 	unlink("$execdir/logs/lonc.pid");
                   2194:     }
                   2195: }
1.69      matthew  2196: 
1.14      foxr     2197: =pod
                   2198: 
                   2199: =head1 Terminate
                   2200:  
                   2201: Terminate the system.
                   2202: 
                   2203: =cut
                   2204: 
                   2205: sub Terminate {
1.52      foxr     2206:     &Log("CRITICAL", "Asked to kill children.. first be nice...");
                   2207:     &KillThemAll;
                   2208:     #
                   2209:     #  By now they really should all be dead.. but just in case 
                   2210:     #  send them all SIGKILL's after a bit of waiting:
                   2211: 
                   2212:     sleep(4);
                   2213:     &Log("CRITICAL", "Now kill children nasty");
                   2214:     &really_kill_them_all_dammit;
1.17      foxr     2215:     Log("CRITICAL","Master process exiting");
                   2216:     exit 0;
1.14      foxr     2217: 
                   2218: }
1.81      albertel 2219: 
                   2220: sub my_hostname {
                   2221:     use Sys::Hostname;
                   2222:     my $name = &hostname();
                   2223:     &Debug(9,"Name is $name");
                   2224:     return $name;
                   2225: }
                   2226: 
1.12      foxr     2227: =pod
1.1       foxr     2228: 
                   2229: =head1 Theory
1.3       albertel 2230: 
                   2231: The event class is used to build this as a single process with an
                   2232: event driven model.  The following events are handled:
1.1       foxr     2233: 
                   2234: =item UNIX Socket connection Received
                   2235: 
                   2236: =item Request data arrives on UNIX data transfer socket.
                   2237: 
                   2238: =item lond connection becomes writable.
                   2239: 
                   2240: =item timer fires at 1 second intervals.
                   2241: 
                   2242: All sockets are run in non-blocking mode.  Timeouts managed by the timer
                   2243: handler prevents hung connections.
                   2244: 
                   2245: Key data structures:
                   2246: 
1.3       albertel 2247: =item RequestQueue
                   2248: 
                   2249: A queue of requests received from UNIX sockets that are
                   2250: waiting for a chance to be forwarded on a lond connection socket.
                   2251: 
                   2252: =item ActiveConnections
                   2253: 
                   2254: A hash of lond connections that have transactions in process that are
                   2255: available to be timed out.
                   2256: 
                   2257: =item ActiveTransactions
                   2258: 
                   2259: A hash indexed by lond connections that contain the client reply
                   2260: socket for each connection that has an active transaction on it.
                   2261: 
                   2262: =item IdleConnections
                   2263: 
                   2264: A hash of lond connections that have no work to do.  These connections
                   2265: can be closed if they are idle for a long enough time.
1.1       foxr     2266: 
                   2267: =cut
1.88    ! foxr     2268: 
        !          2269: =pod
        !          2270: 
        !          2271: =head1 Log messages
        !          2272: 
        !          2273: The following is a list of log messages that can appear in the 
        !          2274: lonc.log file.  Each log file has a severity and a message.
        !          2275: 
        !          2276: =over 2
        !          2277: 
        !          2278: =item Warning  A socket timeout was detected
        !          2279: 
        !          2280: If there are pending transactions in the socket's queue,
        !          2281: they are failed (saved if critical).  If the connection
        !          2282: retry count gets exceeded by this, the
        !          2283: remote host is marked as dead.
        !          2284: Called when timeouts occured during the connection and
        !          2285: connection dialog with a remote host.
        !          2286: 
        !          2287: =item Critical Host makred DEAD <hostname>   
        !          2288: 
        !          2289: The numer of retry counts for contacting a host was
        !          2290: exceeded. The host is marked dead an no 
        !          2291: further attempts will be made by that child.
        !          2292: 
        !          2293: =item Info lonc pipe client hung up on us     
        !          2294: 
        !          2295: Write to the client pipe indicated no data transferred
        !          2296: Socket to remote host is shut down.  Reply to the client 
        !          2297: is discarded.  Note: This is commented out in &ClientWriteable
        !          2298: 
        !          2299: =item Success  Reply from lond: <data>   
        !          2300: 
        !          2301: Can be enabled for debugging by setting LogTransactions to nonzero.
        !          2302: Indicates a successful transaction with lond, <data> is the data received
        !          2303: from the remote lond.
        !          2304: 
        !          2305: =item Success A delayed transaction was completed  
        !          2306: 
        !          2307: A transaction that must be reliable was executed and completed
        !          2308: as lonc restarted.  This is followed by a mesage of the form
        !          2309: 
        !          2310:   S: client-name : request
        !          2311: 
        !          2312: =item WARNING  Failing transaction <cmd>:<subcmd>  
        !          2313: 
        !          2314: Transaction failed on a socket, but the failure retry count for the remote
        !          2315: node has not yet been exhausted (the node is not yet marked dead).
        !          2316: cmd is the command, subcmd is the subcommand.  This results from a con_lost
        !          2317: when communicating with lond.
        !          2318: 
        !          2319: =item WARNING Shutting down a socket     
        !          2320: 
        !          2321: Called when a socket is being closed to lond.  This is emitted both when 
        !          2322: idle pruning is being done and when the socket has been disconnected by the remote.
        !          2323: 
        !          2324: =item WARNING Lond connection lost.
        !          2325: 
        !          2326: Called when a read from lond's socket failed indicating lond has closed the 
        !          2327: connection or died.  This should be followed by one or more
        !          2328: 
        !          2329:  "WARNING Failing transaction..." msgs for each in-flight or queued transaction.
        !          2330: 
        !          2331: =item INFO Connected to lond version:  <version> 
        !          2332: 
        !          2333: When connection negotiation is complete, the lond version is requested and logged here.
        !          2334: 
        !          2335: =item SUCCESS Connection n to host now ready for action
        !          2336: 
        !          2337: Emitted when connection has been completed with lond. n is then number of 
        !          2338: concurrent connections and host, the host to which the connection has just
        !          2339: been established.
        !          2340: 
        !          2341: =item WARNING Connection to host has been disconnected
        !          2342: 
        !          2343: Write to a lond resulted in failure status.  Connection to lond is dropped.
        !          2344: 
        !          2345: =item SUCCESS Created connection n to host host 
        !          2346: 
        !          2347: Initial connection request to host..(before negotiation).
        !          2348: 
        !          2349: =item CRITICAL Request Close Connection ... exiting
        !          2350: 
        !          2351: Client has sent "close_connection_exit"   The loncnew server is exiting.
        !          2352: 
        !          2353: =item INFO Resetting Connection Retries 
        !          2354: 
        !          2355: Client has sent "reset_retries" The lond connection retries are reset to zero for the
        !          2356: corresponding lond.
        !          2357: 
        !          2358: =item SUCCESS Transaction <data>
        !          2359: 
        !          2360: Only emitted if the global variable $LogTransactions was set to true.
        !          2361: A client has requested a lond transaction <data> is the contents of the request.
        !          2362: 
        !          2363: =item SUCCESS Toggled transaction logging <LogTransactions>
        !          2364:                                     
        !          2365: The state of the $LogTransactions global has been toggled, and its current value
        !          2366: (after being toggled) is displayed.  When non zero additional logging of transactions
        !          2367: is enabled for debugging purposes.  Transaction logging is toggled on receipt of a USR2
        !          2368: signal.
        !          2369: 
        !          2370: =item CRITICAL Abnormal exit. Child <pid> for <host> died thorugh signal.
        !          2371: 
        !          2372: QUIT signal received.  lonc child process is exiting.
        !          2373: 
        !          2374: =item SUCCESS New debugging level for <RemoteHost> now <DebugLevel>
        !          2375:                                     
        !          2376: Debugging toggled for the host loncnew is talking with.
        !          2377: Currently debugging is a level based scheme with higher number 
        !          2378: conveying more information.  The daemon starts out at
        !          2379: DebugLevel 0 and can toggle back and forth between that and
        !          2380: DebugLevel 2  These are controlled by
        !          2381: the global variables $DebugLevel and $NextDebugLevel
        !          2382: The debug level can go up to 9.
        !          2383: SIGINT toggles the debug level.  The higher the debug level the 
        !          2384: more debugging information is spewed.  See the Debug
        !          2385: sub in loncnew.
        !          2386: 
        !          2387: =item CRITICAL Forking server for host  
        !          2388: 
        !          2389: A child is being created to service requests for the specified host.
        !          2390: 
        !          2391: 
        !          2392: =item WARNING Request for a second child on hostname
        !          2393:                                     
        !          2394: Somehow loncnew was asked to start a second child on a host that already had a child
        !          2395: servicing it.  This request is not honored, but themessage is emitted.  This could happen
        !          2396: due to a race condition.  When a client attempts to contact loncnew for a new host, a child
        !          2397: is forked off to handle the requests for that server.  The parent then backs off the Unix
        !          2398: domain socket leaving it for the child to service all requests.  If in the time between
        !          2399: creating the child, and backing off, a new connection request comes in to the unix domain
        !          2400: socket, this could trigger (unlikely but remotely possible),.
        !          2401: 
        !          2402: =item CRITICAL ------ Starting Children ----
        !          2403: 
        !          2404: This message should probably be changed to "Entering event loop"  as the loncnew only starts
        !          2405: children as needed.  This message is emitted as new events are established and
        !          2406: the event processing loop is entered.
        !          2407: 
        !          2408: =item INFO Updating connections via SIGUSR2
        !          2409:                                     
        !          2410: SIGUSR2 received. The original code would kill all clients, re-read the host file,
        !          2411: then restart children for each host.  Now that childrean aree started on demand, this
        !          2412: just kills all child processes and lets requests start them as needed again.
        !          2413: 
        !          2414: 
        !          2415: =item CRITICAL Restarting
        !          2416: 
        !          2417: SigHUP received.  all the children are killed and the script exec's itself to start again.
        !          2418: 
        !          2419: =item CRITICAL Nicely killing lonc for host pid = <pid>
        !          2420: 
        !          2421: Attempting to kill the child that is serving the specified host (pid given) cleanly via
        !          2422: SIGQUIT  The child should handle that, clean up nicely and exit.
        !          2423: 
        !          2424: =item CRITICAL Nastily killing lonc for host pid = <pid>
        !          2425: 
        !          2426: The child specified did not die when requested via SIGQUIT.  Therefore it is killed
        !          2427: via SIGKILL.
        !          2428: 
        !          2429: =item CRITICAL Asked to kill children.. first be nice..
        !          2430: 
        !          2431: In the parent's INT handler.  INT kills the child processes.  This inidicate loncnew
        !          2432: is about to attempt to kill all known children via SIGQUIT.  This message should be followed 
        !          2433: by one "Nicely killing" message for each extant child.
        !          2434: 
        !          2435: =item CRITICAL Now kill children nasty 
        !          2436: 
        !          2437: In the parent's INT handler. remaining children are about to be killed via
        !          2438: SIGKILL. Should be followed by a Nastily killing... for each lonc child that 
        !          2439: refused to die.
        !          2440: 
        !          2441: =item CRITICAL Master process exiting
        !          2442: 
        !          2443: In the parent's INT handler. just prior to the exit 0 call.
        !          2444: 
        !          2445: =back
        !          2446: 
        !          2447: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>
500 Internal Server Error

Internal Server Error

The server encountered an internal error or misconfiguration and was unable to complete your request.

Please contact the server administrator at root@localhost to inform them of the time this error occurred, and the actions you performed just before this error.

More information about this error may be available in the server error log.