--- loncom/loncnew 2018/06/24 18:45:18 1.104 +++ loncom/loncnew 2018/08/07 17:12:09 1.105 @@ -2,7 +2,7 @@ # The LearningOnline Network with CAPA # lonc maintains the connections to remote computers # -# $Id: loncnew,v 1.104 2018/06/24 18:45:18 raeburn Exp $ +# $Id: loncnew,v 1.105 2018/08/07 17:12:09 raeburn Exp $ # # Copyright Michigan State University Board of Trustees # @@ -74,6 +74,7 @@ my %perlvar = %{$perlvarref}; my %ChildPid; # by pid -> host. my %ChildHost; # by host. +my %ChildKeyMode; # by pid -> keymode my %listening_to; # Socket->host table for who the parent # is listening to. my %parent_dispatchers; # host-> listener watcher events. @@ -741,7 +742,7 @@ Parameters: The socket to kill off. -=item Restart +=item restart non-zero if we are allowed to create a new connection. @@ -749,6 +750,7 @@ non-zero if we are allowed to create a n sub KillSocket { my $Socket = shift; + my $restart = shift; Log("WARNING", "Shutting down a socket"); $Socket->Shutdown(); @@ -765,16 +767,24 @@ sub KillSocket { if(exists($ActiveConnections{$Socket})) { $ActiveConnections{$Socket}->cancel; delete($ActiveConnections{$Socket}); - $ConnectionCount--; + # Decrement ConnectionCount unless we will immediately + # re-connect (i.e., $restart is true), because this was + # a connection where the SSL channel for exchange of the + # shared key failed, and we may use an insecure channel. + unless ($restart) { + $ConnectionCount--; + } if ($ConnectionCount < 0) { $ConnectionCount = 0; } } # If the connection count has gone to zero and there is work in the # work queue, the work all gets failed with con_lost. # + if($ConnectionCount == 0) { $LondConnecting = 0; # No connections so also not connecting. EmptyQueue(); - CloseAllLondConnections; # Should all already be closed but... + CloseAllLondConnections(); # Should all already be closed but... + &clear_childpid($$); } UpdateStatus(); } @@ -871,17 +881,33 @@ sub LondReadable { Log("WARNING", "Lond connection lost."); + my $state_on_exit = $Socket->GetState(); if(exists($ActiveTransactions{$Socket})) { FailTransaction($ActiveTransactions{$Socket}); } else { # Socket is connecting and failed... need to mark # no longer connecting. - $LondConnecting = 0; } $Watcher->cancel(); - KillSocket($Socket); - $ConnectionRetriesLeft--; # Counts as connection failure + if ($state_on_exit eq 'ReInitNoSSL') { + # SSL certificate verification failed, and insecure connection + # allowed. Send restart arg to KillSocket(), so EmptyQueue() + # is not called, as we still hope to process queued request. + + KillSocket($Socket,1); + + # Re-initiate creation of Lond Connection for use with queued + # request. + + ShowStatus("Connected to ".$RemoteHost); + Log("WARNING","No SSL channel (verification failed), will try with insecure channel"); + &MakeLondConnection(1); + + } else { + KillSocket($Socket); + $ConnectionRetriesLeft--; # Counts as connection failure + } return; } SocketDump(6,$Socket); @@ -892,6 +918,8 @@ sub LondReadable { if($State eq "Initialized") { + } elsif ($State eq "ReInitNoSSL") { + } elsif ($State eq "ChallengeReceived") { # The challenge must be echoed back; The state machine # in the connection takes care of setting that up. Just @@ -928,7 +956,14 @@ sub LondReadable { } elsif ($State eq "ReceivingKey") { } elsif ($State eq "Idle") { - + + if ($ConnectionCount == 1) { + # Write child Pid file to keep track of ssl and insecure + # connections + + &record_childpid($Socket); + } + # This is as good a spot as any to get the peer version # string: @@ -1093,7 +1128,9 @@ sub LondWritable { $Watcher->cb(\&LondReadable); $Watcher->poll("r"); - + + } elsif ($State eq "ReInitNoSSL") { + } elsif ($State eq "ChallengeReceived") { # We received the challenge, now we # are echoing it back. This is a no-op, @@ -1204,7 +1241,8 @@ start off on it. =cut -sub MakeLondConnection { +sub MakeLondConnection { + my ($restart) = @_; Debug(4,"MakeLondConnection to ".GetServerHost()." on port " .GetServerPort()); @@ -1212,12 +1250,11 @@ sub MakeLondConnection { &GetServerPort(), &GetHostId()); - if($Connection eq undef) { + if($Connection eq undef) { Log("CRITICAL","Failed to make a connection with lond."); $ConnectionRetriesLeft--; return 0; # Failure. } else { - $LondConnecting = 1; # Connection in progress. # The connection needs to have writability # monitored in order to send the init sequence @@ -1242,7 +1279,9 @@ sub MakeLondConnection { if ($ConnectionCount == 0) { &SetupTimer; # Need to handle timeouts with connections... } - $ConnectionCount++; + unless ($restart) { + $ConnectionCount++; + } $Connection->SetClientData($ConnectionCount); Debug(4, "Connection count = ".$ConnectionCount); if($ConnectionCount == 1) { # First Connection: @@ -1652,6 +1691,7 @@ sub SignalledToDeath { ."died through "."\"$signal\""); #LogPerm("F:lonc: $$ on $RemoteHost signalled to death: " # ."\"$signal\""); + &clear_childpid($$); exit 0; } @@ -1972,6 +2012,7 @@ sub server_died { my $host = $ChildPid{$pid}; if($host) { # It's for real... &Debug(9, "Caught sigchild for $host"); + &clear_childpid($pid); delete($ChildPid{$pid}); delete($ChildHost{$host}); &parent_clean_up($host); @@ -2143,6 +2184,7 @@ sub UpdateKids { &KillThemAll(); LondConnection->ResetReadConfig(); + ShowStatus('Parent keeping the flock'); } @@ -2181,12 +2223,14 @@ sub KillThemAll { # Our children >will< die. # but we need to catch their death and cleanup after them in case this is # a restart set of kills + my $execdir = $perlvar{'lonDaemons'}; my @allpids = keys(%ChildPid); foreach my $pid (@allpids) { my $serving = $ChildPid{$pid}; ShowStatus("Nicely Killing lonc for $serving pid = $pid"); Log("CRITICAL", "Nicely Killing lonc for $serving pid = $pid"); kill 'QUIT' => $pid; + &clear_childpid($pid); } ShowStatus("Finished killing child processes off."); } @@ -2206,6 +2250,7 @@ sub really_kill_them_all_dammit Log("CRITICAL", "Nastily killing lonc for $serving pid = $pid"); kill 'KILL' => $pid; delete($ChildPid{$pid}); + delete($ChildKeyMode{$pid}); my $execdir = $perlvar{'lonDaemons'}; unlink("$execdir/logs/lonc.pid"); } @@ -2234,6 +2279,10 @@ sub Terminate { } +=pod + +=cut + sub my_hostname { use Sys::Hostname::FQDN(); my $name = Sys::Hostname::FQDN::fqdn(); @@ -2241,6 +2290,49 @@ sub my_hostname { return $name; } +sub record_childpid { + my ($Socket) = @_; + my $docdir = $perlvar{'lonDocRoot'}; + my $authmode = $Socket->GetKeyMode(); + my $peer = $Socket->PeerLoncapaHim(); + if (($authmode eq 'ssl') || ($authmode eq 'insecure')) { + my $childpid = $$; + if ($childpid) { + unless (exists($ChildKeyMode{$childpid})) { + $ChildKeyMode{$childpid} = $authmode; + } + if (-d "$docdir/lon-status/loncchld") { + unless (-e "$docdir/lon-status/loncchld/$childpid") { + if (open (my $pidfh,'>',"$docdir/lon-status/loncchld/$childpid")) { + print $pidfh "$peer:$authmode\n"; + close($pidfh); + } + } + } + } + } + return; +} + +sub clear_childpid { + my ($childpid) = @_; + my $docdir = $perlvar{'lonDocRoot'}; + if (-d "$docdir/lon-status/loncchld") { + if ($childpid =~ /^\d+$/) { + if (($ChildKeyMode{$childpid} eq 'insecure') || + ($ChildKeyMode{$childpid} eq 'ssl')) { + if (-e "$docdir/lon-status/loncchld/$childpid") { + unlink("$docdir/lon-status/loncchld/$childpid"); + } + } + } + } + if (exists($ChildKeyMode{$childpid})) { + delete($ChildKeyMode{$childpid}); + } + return; +} + =pod =head1 Theory @@ -2345,6 +2437,12 @@ connection or died. This should be foll "WARNING Failing transaction..." msgs for each in-flight or queued transaction. +=item WARNING No SSL channel (verification failed), will try with insecure channel. + +Called when promotion of a socket to SSL failed because SSL certificate verification failed. +Domain configuration must also permit insecure channel use for key exchange. Connection +negotiation will start again from the beginning, but with Authentication Mode not set to ssl. + =item INFO Connected to lond version: When connection negotiation is complete, the lond version is requested and logged here.