--- loncom/loncnew 2003/04/18 03:10:36 1.3 +++ loncom/loncnew 2003/04/29 03:24:51 1.5 @@ -2,7 +2,7 @@ # The LearningOnline Network with CAPA # lonc maintains the connections to remote computers # -# $Id: loncnew,v 1.3 2003/04/18 03:10:36 albertel Exp $ +# $Id: loncnew,v 1.5 2003/04/29 03:24:51 foxr Exp $ # # Copyright Michigan State University Board of Trustees # @@ -100,7 +100,7 @@ my %ActiveClients; # Serial numbers of my $WorkQueue = Queue->new(); # Queue of pending transactions. my $ClientQueue = Queue->new(); # Queue of clients causing xactinos. my $ConnectionCount = 0; - +my $IdleSeconds = 0; # Number of seconds idle. # @@ -155,19 +155,61 @@ sub SocketDump { =pod +=head2 ShowStatus + + Place some text as our pid status. + +=cut +sub ShowStatus { + my $status = shift; + $0 = "lonc: ".$status; +} + +=pod + =head2 Tick Invoked each timer tick. =cut + sub Tick { my $client; + ShowStatus(GetServerHost()." Connection count: ".$ConnectionCount); Debug(6, "Tick"); Debug(6, " Current connection count: ".$ConnectionCount); foreach $client (keys %ActiveClients) { Debug(7, " Have client: with id: ".$ActiveClients{$client}); } + # Is it time to prune connection count: + + + if($IdleConnections->Count() && + ($WorkQueue->Count() == 0)) { # Idle connections and nothing to do? + $IdleSeconds++; + if($IdleSeconds > $IdleTimeout) { # Prune a connection... + $Socket = $IdleConnections->pop(); + KillSocket($Socket, 0); + } + } else { + $IdleSeconds = 0; # Reset idle count if not idle. + } + + # Do we have work in the queue, but no connections to service them? + # If so, try to make some new connections to get things going again. + # + + my $Requests = $WorkQueue->Count(); + if (($ConnectionCount == 0) && ($Requests > 0)) { + my $Connections = ($Requests <= $MaxConnectionCount) ? + $Requests : $MaxConnectionCount; + Debug(1,"Work but no connections, starting ".$Connections." of them"); + for ($i =0; $i < $Connections; $i++) { + MakeLondConnection(); + } + + } } =pod @@ -348,7 +390,77 @@ sub CompleteTransaction { cb => \&ClientWritable, data => $data); } +=pod +=head2 FailTransaction + + Finishes a transaction with failure because the associated lond socket + disconnected. It is up to our client to retry if desired. +Parameters: + +=item client + + The UNIX domain socket open on our client. + +=cut + +sub FailTransaction { + my $client = shift; + + &Debug(8, "Failing transaction due to disconnect"); + my $Serial = $ActiveClients{$client}; + my $desc = sprintf("Connection to lonc client %d", $Serial); + my $data = "error: Connection to lond lost\n"; + + Event->io(fd => $client, + poll => "w", + desc => $desc, + cb => \&ClientWritable, + data => $data); + +} + +=pod + +=head2 KillSocket + +Destroys a socket. This function can be called either when a socket +has died of 'natural' causes or because a socket needs to be pruned due to +idleness. If the socket has died naturally, if there are no longer any +live connections a new connection is created (in case there are transactions +in the queue). If the socket has been pruned, it is never re-created. + +Parameters: + +=item Socket + + The socket to kill off. + +=item Restart + +nonzero if we are allowed to create a new connection. + + +=cut +sub KillSocket { + my $Socket = shift; + my $Restart= shift; + + # If the socket came from the active connection set, delete it. + # otherwise it came from the idle set and has already been destroyed: + + if(exists($ActiveTransactions{$Socket})) { + delete ($ActiveTransactions{$Socket}); + } + if(exists($ActiveConnections{$Socket})) { + delete($ActiveConnections{$Socket}); + } + $ConnectionCount--; + if( ($ConnectionCount = 0) && ($Restart)) { + MakeLondConnection(); + } + +} =pod @@ -421,7 +533,16 @@ sub LondReadable { SocketDump(6, $Socket); if($Socket->Readable() != 0) { - # bad return from socket read. + # bad return from socket read. Currently this means that + # The socket has become disconnected. We fail the transaction. + + if(exists($ActiveTransactions{$Socket})) { + Debug(3,"Lond connection lost failing transaction"); + FailTransaction($ActiveTransactions{$Socket}); + } + $Watcher->cancel(); + KillSocket($Socket, 1); + return; } SocketDump(6,$Socket); @@ -557,11 +678,17 @@ sub LondWritable { SocketDump(6,$Socket); if ($State eq "Connected") { - # "init" is being sent... if ($Socket->Writable() != 0) { # The write resulted in an error. + # We'll treat this as if the socket got disconnected: + + $Watcher->cancel(); + KillSocket($Socket, 1); + return; } + # "init" is being sent... + } elsif ($State eq "Initialized") { @@ -577,7 +704,10 @@ sub LondWritable { # we're waiting for the state to change if($Socket->Writable() != 0) { - # Write of the next chunk resulted in an error. + + $Watcher->cancel(); + KillSocket($Socket, 1); + return; } } elsif ($State eq "ChallengeReplied") { @@ -595,8 +725,12 @@ sub LondWritable { if($Socket->Writable() != 0) { # Write resulted in an error. - } + $Watcher->cancel(); + KillSocket($Socket, 1); + return; + + } } elsif ($State eq "ReceivingKey") { # Now we need to wait for the key # to come back from the peer: @@ -609,8 +743,15 @@ sub LondWritable { # peer... write the next chunk: if($Socket->Writable() != 0) { - # Write resulted in an error. + if(exists($ActiveTransactions{$Socket})) { + Debug(3, "Lond connection lost, failing transactions"); + FailTransaction($ActiveTransactions{$Socket}); + } + $Watcher->cancel(); + KillSocket($Socket, 1); + return; + } } elsif ($State eq "ReceivingReply") { @@ -651,31 +792,30 @@ sub MakeLondConnection { &GetServerPort()); if($Connection == undef) { # Needs to be more robust later. - die "Failed to make a connection!!".$!."\n"; + Debug(0,"Failed to make a connection with lond."); + } else { + # The connection needs to have writability + # monitored in order to send the init sequence + # that starts the whole authentication/key + # exchange underway. + # + my $Socket = $Connection->GetSocket(); + if($Socket == undef) { + die "did not get a socket from the connection"; + } else { + &Debug(9,"MakeLondConnection got socket: ".$Socket); + } - } - # The connection needs to have writability - # monitored in order to send the init sequence - # that starts the whole authentication/key - # exchange underway. - # - my $Socket = $Connection->GetSocket(); - if($Socket == undef) { - die "did not get a socket from the connection"; - } else { - &Debug(9,"MakeLondConnection got socket: ".$Socket); + + $event = Event->io(fd => $Socket, + poll => 'w', + cb => \&LondWritable, + data => ($Connection, undef), + desc => 'Connection to lond server'); + $ActiveConnections{$Connection} = $event; + + $ConnectionCount++; } - - - $event = Event->io(fd => $Socket, - poll => 'w', - cb => \&LondWritable, - data => ($Connection, undef), - desc => 'Connection to lond server'); - $ActiveConnections{$Lond} = $event; - - $ConnectionCount++; - } @@ -941,7 +1081,11 @@ sub ChildProcess { # Setup the initial server connection: &MakeLondConnection(); - + + if($ConnectionCount == 0) { + Debug(1,"Could not make initial connection..\n"); + Debug(1,"Will retry when there's work to do\n"); + } Debug(9,"Entering event loop"); my $ret = Event::loop(); # Start the main event loop. @@ -960,6 +1104,7 @@ sub CreateChild { if($pid) { # Parent $ChildHash{$pid} = $RemoteHost; } else { # child. + ShowStatus("Connected to ".$RemoteHost); ChildProcess; } @@ -974,6 +1119,20 @@ sub CreateChild { # Each exit gets logged and the child gets restarted. # +# +# Fork and start in new session so hang-up isn't going to +# happen without intent. +# + + +ShowStatus("Parent writing pid file:"); +$execdir = $perlvar{'lonDaemons'}; +open (PIDSAVE, ">$execdir/logs/lonc.pid"); +print PIDSAVE "$$\n"; +close(PIDSAVE); + +ShowStatus("Forking node servers"); + my $HostIterator = LondConnection::GetHostIterator; while (! $HostIterator->end()) { @@ -984,6 +1143,8 @@ while (! $HostIterator->end()) { # Maintain the population: +ShowStatus("Parent keeping the flock"); + while(1) { $deadchild = wait(); if(exists $ChildHash{$deadchild}) { # need to restart.