--- loncom/loncnew 2004/10/05 11:16:57 1.65 +++ loncom/loncnew 2007/03/28 00:05:38 1.78 @@ -2,7 +2,7 @@ # The LearningOnline Network with CAPA # lonc maintains the connections to remote computers # -# $Id: loncnew,v 1.65 2004/10/05 11:16:57 foxr Exp $ +# $Id: loncnew,v 1.78 2007/03/28 00:05:38 albertel Exp $ # # Copyright Michigan State University Board of Trustees # @@ -61,6 +61,7 @@ use LONCAPA::LondConnection; use LONCAPA::LondTransaction; use LONCAPA::Configuration; use LONCAPA::HashIterator; +use Fcntl qw(:flock); # Read the httpd configuration file to get perl variables @@ -105,8 +106,8 @@ my $ConnectionCount = 0; my $IdleSeconds = 0; # Number of seconds idle. my $Status = ""; # Current status string. my $RecentLogEntry = ""; -my $ConnectionRetries=2; # Number of connection retries allowed. -my $ConnectionRetriesLeft=2; # Number of connection retries remaining. +my $ConnectionRetries=5; # Number of connection retries allowed. +my $ConnectionRetriesLeft=5; # Number of connection retries remaining. my $LondVersion = "unknown"; # Version of lond we talk with. my $KeyMode = ""; # e.g. ssl, local, insecure from last connect. my $LondConnecting = 0; # True when a connection is being built. @@ -114,6 +115,7 @@ my $LondConnecting = 0; # True wh my $DieWhenIdle = 1; # When true children die when trimmed -> 0. +my $hosts_tab = 1; # True if we are using a static hosts.tab my $I_am_child = 0; # True if this is the child process. # @@ -151,6 +153,7 @@ sub UpdateStatus { Makes an entry into the permanent log file. =cut + sub LogPerm { my $message=shift; my $execdir=$perlvar{'lonDaemons'}; @@ -196,7 +199,7 @@ sub Log { my $now = time; my $local = localtime($now); my $finalformat = "$local ($$) [$RemoteHost] [$Status] "; - my $finalformat = $finalformat.$format."\n"; + $finalformat = $finalformat.$format."\n"; # open the file and put the result. @@ -270,6 +273,7 @@ sub SocketDump { and as what we return in a SIGUSR1 =cut + sub ShowStatus { my $state = shift; my $now = time; @@ -280,13 +284,14 @@ sub ShowStatus { =pod -=head 2 SocketTimeout +=head2 SocketTimeout Called when an action on the socket times out. The socket is destroyed and any active transaction is failed. =cut + sub SocketTimeout { my $Socket = shift; Log("WARNING", "A socket timeout was detected"); @@ -327,11 +332,13 @@ sub child_exit { # during which no listens will be done on the # lonnet client socket. # - my $lock_file = GetLoncSocketPath().".lock"; + my $lock_file = &GetLoncSocketPath().".lock"; open(LOCK,">$lock_file"); print LOCK "Contents not important"; close(LOCK); - + if ($hosts_tab) { + unlink(&GetLoncSocketPath()); + } exit(0); } # Now figure out how we exit: @@ -419,6 +426,7 @@ sub Tick { $KeyMode = ""; $clock_watcher->cancel(); } + &UpdateStatus(); } =pod @@ -672,9 +680,9 @@ sub FailTransaction { if ($ConnectionRetriesLeft > 0) { Log("WARNING", "Failing transaction " - .$transaction->getRequest()); + .$transaction->getLoggableRequest()); } - Debug(1, "Failing transaction: ".$transaction->getRequest()); + Debug(1, "Failing transaction: ".$transaction->getLoggableRequest()); if (!$transaction->isDeferred()) { # If the transaction is deferred we'll get to it. my $client = $transaction->getClient(); Debug(1," Replying con_lost to ".$transaction->getRequest()); @@ -684,12 +692,14 @@ sub FailTransaction { } =pod + =head1 EmptyQueue Fails all items in the work queue with con_lost. Note that each item in the work queue is a transaction. =cut + sub EmptyQueue { $ConnectionRetriesLeft--; # Counts as connection failure too. while($WorkQueue->Count()) { @@ -705,6 +715,7 @@ sub EmptyQueue { Close all connections open on lond prior to exit e.g. =cut + sub CloseAllLondConnections { foreach my $Socket (keys %ActiveConnections) { if(exists($ActiveTransactions{$Socket})) { @@ -713,7 +724,6 @@ sub CloseAllLondConnections { KillSocket($Socket); } } -=cut =pod @@ -735,8 +745,8 @@ Parameters: nonzero if we are allowed to create a new connection. - =cut + sub KillSocket { my $Socket = shift; @@ -948,7 +958,7 @@ sub LondReadable { # We need to be writable for this and probably don't belong # here inthe first place. - Deubg(6, "SendingRequest state encountered in readable"); + Debug(6, "SendingRequest state encountered in readable"); $Watcher->poll("w"); $Watcher->cb(\&LondWritable); @@ -1149,6 +1159,7 @@ sub LondWritable { =pod =cut + sub QueueDelayed { Debug(3,"QueueDelayed called"); @@ -1156,19 +1167,28 @@ sub QueueDelayed { Debug(4, "Delayed path: ".$path); opendir(DIRHANDLE, $path); - - my @alldelayed = grep /\.$RemoteHost$/, readdir DIRHANDLE; + + my @all_host_ids; + my $host_iterator = &LondConnection::GetHostIterator(); + while (!$host_iterator->end()) { + my ($host_id,$host_name) = @{$host_iterator->get()}[0,3]; + if ($host_name eq $RemoteHost) { + push(@all_host_ids, $host_id); + } + $host_iterator->next(); + } + my $host_id_re = '(?:'.join('|',@all_host_ids).')'; + my @alldelayed = grep(/\.$host_id_re$/, readdir(DIRHANDLE)); closedir(DIRHANDLE); - my $dfname; - my $reqfile; - foreach $dfname (sort @alldelayed) { - $reqfile = "$path/$dfname"; - Debug(4, "queueing ".$reqfile); + foreach my $dfname (sort(@alldelayed)) { + my $reqfile = "$path/$dfname"; + my ($host_id) = ($dfname =~ /\.([^.]*)$/); + Debug(4, "queueing ".$reqfile." for $host_id"); my $Handle = IO::File->new($reqfile); my $cmd = <$Handle>; chomp $cmd; # There may or may not be a newline... $cmd = $cmd."\n"; # now for sure there's exactly one newline. - my $Transaction = LondTransaction->new($cmd); + my $Transaction = LondTransaction->new("sethost:$host_id:$cmd"); $Transaction->SetDeferred($reqfile); QueueTransaction($Transaction); } @@ -1509,15 +1529,14 @@ another event handler to subess requests =cut sub SetupLoncListener { + my ($host,$SocketName) = @_; + if (!$host) { $host = &GetServerHost(); } + if (!$SocketName) { $SocketName = &GetLoncSocketPath($host); } - my $host = GetServerHost(); # Default host. - if (@_) { - ($host) = @_ # Override host with parameter. - } - my $socket; - my $SocketName = GetLoncSocketPath($host); unlink($SocketName); + + my $socket; unless ($socket =IO::Socket::UNIX->new(Local => $SocketName, Listen => 250, Type => SOCK_STREAM)) { @@ -1557,6 +1576,7 @@ into the status file. We also use this to reset the retries count in order to allow the client to retry connections with a previously dead server. + =cut sub ChildStatus { @@ -1565,26 +1585,31 @@ sub ChildStatus { Debug(2, "Reporting child status because : ".$watcher->data); my $docdir = $perlvar{'lonDocRoot'}; - my $fh = IO::File->new(">>$docdir/lon-status/loncstatus.txt"); - print $fh $$."\t".$RemoteHost."\t".$Status."\t". + + open(LOG,">>$docdir/lon-status/loncstatus.txt"); + flock(LOG,LOCK_EX); + print LOG $$."\t".$RemoteHost."\t".$Status."\t". $RecentLogEntry."\n"; # # Write out information about each of the connections: # if ($DebugLevel > 2) { - print $fh "Active connection statuses: \n"; + print LOG "Active connection statuses: \n"; my $i = 1; print STDERR "================================= Socket Status Dump:\n"; foreach my $item (keys %ActiveConnections) { my $Socket = $ActiveConnections{$item}->data; my $state = $Socket->GetState(); - print $fh "Connection $i State: $state\n"; + print LOG "Connection $i State: $state\n"; print STDERR "---------------------- Connection $i \n"; $Socket->Dump(-1); # Ensure it gets dumped.. $i++; } } + flock(LOG,LOCK_UN); + close(LOG); $ConnectionRetriesLeft = $ConnectionRetries; + UpdateStatus(); } =pod @@ -1605,12 +1630,14 @@ sub SignalledToDeath { chomp($signal); Log("CRITICAL", "Abnormal exit. Child $$ for $RemoteHost " ."died through "."\"$signal\""); - LogPerm("F:lonc: $$ on $RemoteHost signalled to death: " - ."\"$signal\""); + #LogPerm("F:lonc: $$ on $RemoteHost signalled to death: " +# ."\"$signal\""); exit 0; } +=pod + =head2 ToggleDebug This sub toggles trace debugging on and off. @@ -1626,6 +1653,8 @@ sub ToggleDebug { } +=pod + =head2 ChildProcess This sub implements a child process for a single lonc daemon. @@ -1704,7 +1733,7 @@ sub ChildProcess { desc => 'Lonc Listener Unix Socket', fd => $socket); - $Event::Debuglevel = $DebugLevel; + $Event::DebugLevel = $DebugLevel; Debug(9, "Making initial lond connection for ".$RemoteHost); @@ -1778,25 +1807,67 @@ sub parent_client_connection { my ($event) = @_; my $watcher = $event->w; my $socket = $watcher->fd; + if ($hosts_tab) { - # Lookup the host associated with this socket: - - my $host = $listening_to{$socket}; + # Lookup the host associated with this socket: + + my $host = $listening_to{$socket}; - # Start the child: + # Start the child: + + + + &Debug(9,"Creating child for $host (parent_client_connection)"); + &CreateChild($host, $socket); + + # Clean up the listen since now the child takes over until it exits. + $watcher->cancel(); # Nolonger listening to this event + delete($listening_to{$socket}); + delete($parent_dispatchers{$host}); + $socket->close(); + + } else { + my $connection = $socket->accept(); # Accept the client connection. + Event->io(cb => \&get_remote_hostname, + poll => 'r', + data => "", + fd => $connection); + } + } +} + +sub get_remote_hostname { + my ($event) = @_; + my $watcher = $event->w; + my $socket = $watcher->fd; + my $thisread; + my $rv = $socket->recv($thisread, POSIX::BUFSIZ, 0); + Debug(8, "rcv: data length = ".length($thisread)." read =".$thisread); + if (!defined($rv) || length($thisread) == 0) { + # Likely eof on socket. + Debug(5,"Client Socket closed on lonc for p_c_c"); + close($socket); + $watcher->cancel(); + return; + } + + my $data = $watcher->data().$thisread; + $watcher->data($data); + if($data =~ /\n$/) { # Request entirely read. + chomp($data); + } else { + return; + } - &Debug(9,"Creating child for $host (parent_client_connection)"); - &CreateChild($host, $socket); + &Debug(5,"Creating child for $data (parent_client_connection)"); + &CreateChild($data); # Clean up the listen since now the child takes over until it exits. - $watcher->cancel(); # Nolonger listening to this event - delete($listening_to{$socket}); - delete($parent_dispatchers{$host}); + $socket->send("done\n"); $socket->close(); - } } # parent_listen: @@ -1818,23 +1889,43 @@ sub parent_listen { my ($loncapa_host) = @_; Debug(5, "parent_listen: $loncapa_host"); - my $socket = &SetupLoncListener($loncapa_host); + my ($socket,$file); + if (!$loncapa_host) { + $loncapa_host = 'common_parent'; + $file = $perlvar{'lonSockCreate'}; + } else { + $file = &GetLoncSocketPath($loncapa_host); + } + $socket = &SetupLoncListener($loncapa_host,$file); + $listening_to{$socket} = $loncapa_host; if (!$socket) { die "Unable to create a listen socket for $loncapa_host"; } - my $lock_file = &GetLoncSocketPath($loncapa_host).".lock"; + my $lock_file = $file.".lock"; unlink($lock_file); # No problem if it doesn't exist yet [startup e.g.] - my $watcher = Event->io(cb => \&parent_client_connection, - poll => 'r', - desc => "Parent listener unix socket ($loncapa_host)", - fd => $socket); + my $watcher = + Event->io(cb => \&parent_client_connection, + poll => 'r', + desc => "Parent listener unix socket ($loncapa_host)", + data => "", + fd => $socket); $parent_dispatchers{$loncapa_host} = $watcher; } +sub parent_clean_up { + my ($loncapa_host) = @_; + Debug(5, "parent_clean_up: $loncapa_host"); + + my $socket_file = &GetLoncSocketPath($loncapa_host); + unlink($socket_file); # No problem if it doesn't exist yet [startup e.g.] + my $lock_file = $socket_file.".lock"; + unlink($lock_file); # No problem if it doesn't exist yet [startup e.g.] +} + # listen_on_all_unix_sockets: # This sub initiates a listen on all unix domain lonc client sockets. @@ -1857,13 +1948,18 @@ sub listen_on_all_unix_sockets { my $host_iterator = &LondConnection::GetHostIterator(); while (!$host_iterator->end()) { my $host_entry_ref = $host_iterator->get(); - my $host_name = $host_entry_ref->[0]; + my $host_name = $host_entry_ref->[3]; Debug(9, "Listen for $host_name"); &parent_listen($host_name); $host_iterator->next(); } } +sub listen_on_common_socket { + Debug(5, "listen_on_common_socket"); + &parent_listen(); +} + # server_died is called whenever a child process exits. # Since this is dispatched via a signal, we must process all # dead children until there are no more left. The action @@ -1888,8 +1984,12 @@ sub server_died { &Debug(9, "Caught sigchild for $host"); delete($ChildHash{$pid}); delete($HostToPid{$host}); - &parent_listen($host); - + if ($hosts_tab) { + &parent_listen($host); + } else { + &parent_clean_up($host); + } + } else { &Debug(5, "Caught sigchild for pid not in hosts hash: $pid"); } @@ -1949,7 +2049,11 @@ my $HostIterator = LondConnection::GetHo if ($DieWhenIdle) { $RemoteHost = "[parent]"; - &listen_on_all_unix_sockets(); + if ($hosts_tab) { + &listen_on_all_unix_sockets(); + } else { + &listen_on_common_socket(); + } } else { while (! $HostIterator->end()) { @@ -1987,9 +2091,15 @@ if ($DieWhenIdle) { $parent_handlers{TERM} = Event->signal(cb => \&Terminate, desc => "Parent TERM handler", signal => "TERM"); - $parent_handlers{HUP} = Event->signal(cb => \&Restart, - desc => "Parent HUP handler.", - signal => "HUP"); + if ($hosts_tab) { + $parent_handlers{HUP} = Event->signal(cb => \&Restart, + desc => "Parent HUP handler.", + signal => "HUP"); + } else { + $parent_handlers{HUP} = Event->signal(cb => \&KillThemAll, + desc => "Parent HUP handler.", + signal => "HUP"); + } $parent_handlers{USR1} = Event->signal(cb => \&CheckKids, desc => "Parent USR1 handler", signal => "USR1"); @@ -2012,7 +2122,11 @@ if ($DieWhenIdle) { $SIG{INT} = \&Terminate; $SIG{TERM} = \&Terminate; - $SIG{HUP} = \&Restart; + if ($hosts_tab) { + $SIG{HUP} = \&Restart; + } else { + $SIG{HUP} = \&KillThemAll; + } $SIG{USR1} = \&CheckKids; $SIG{USR2} = \&UpdateKids; # LonManage update request. @@ -2056,7 +2170,6 @@ sub CheckKids { foreach my $pid (keys %ChildHash) { Debug(2, "Sending USR1 -> $pid"); kill 'USR1' => $pid; # Tell Child to report status. - sleep 1; # Wait so file doesn't intermix. } } @@ -2098,8 +2211,11 @@ sub UpdateKids { # The down side is transactions that are in flight will get timed out # (lost unless they are critical). - &Restart(); - + if ($hosts_tab) { + &Restart(); + } else { + &KillThemAll(); + } } @@ -2139,8 +2255,6 @@ sub KillThemAll { Log("CRITICAL", "Nicely Killing lonc for $serving pid = $pid"); kill 'QUIT' => $pid; } - - } @@ -2162,6 +2276,7 @@ sub really_kill_them_all_dammit unlink("$execdir/logs/lonc.pid"); } } + =pod =head1 Terminate