--- loncom/LONCAPA.pm	2006/05/30 19:26:34	1.4
+++ loncom/LONCAPA.pm	2008/11/17 13:22:01	1.25
@@ -1,7 +1,7 @@
 # The LearningOnline Network
 # Base routines
 #
-# $Id: LONCAPA.pm,v 1.4 2006/05/30 19:26:34 albertel Exp $
+# $Id: LONCAPA.pm,v 1.25 2008/11/17 13:22:01 jms Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -27,6 +27,42 @@
 #
 ###
 
+=head1 NAME
+
+Apache::LONCAPA
+
+LONCAPA - Basic routines
+
+=head1 SYNOPSIS
+
+Generally useful routines
+
+=head1 EXPORTED SUBROUTINES
+
+=over 4
+
+=item *
+
+escape() : unpack non-word characters into CGI-compatible hex codes
+
+=item *
+
+unescape() : pack CGI-compatible hex codes into actual non-word ASCII character
+
+=item *
+
+add_get_param() :
+ Inputs:  url (with or without exit GET from parameters), hash ref of
+              form name => value pairs
+
+ Return: url with properly added the form name elements and values to the 
+         the url doing proper escaping of the values and joining with ? or &
+         as needed
+
+=back
+
+=cut
+
 package LONCAPA;
 
 use strict;
@@ -38,11 +74,35 @@ use POSIX;
 
 my $loncapa_max_wait_time = 13;
 
+
+use vars qw($match_domain   $match_not_domain
+	    $match_username $match_not_username
+	    $match_courseid $match_not_courseid
+	    $match_name
+            $match_lonid
+	    $match_handle   $match_not_handle);
+
 require Exporter;
 our @ISA = qw (Exporter);
-our @EXPORT = qw(&add_get_param &escape &unescape &tie_domain_hash &untie_domain_hash &tie_user_hash &untie_user_hash &propath);
+our @EXPORT = qw(&add_get_param    &escape            &unescape       
+		 &tie_domain_hash  &untie_domain_hash &tie_user_hash
+		 &untie_user_hash  &propath);
+our @EXPORT_OK = qw($match_domain   $match_not_domain
+		    $match_username $match_not_username
+		    $match_courseid $match_not_courseid
+		    $match_name
+		    $match_lonid
+		    $match_handle   $match_not_handle);
+our %EXPORT_TAGS = ( 'match' =>[qw($match_domain   $match_not_domain
+				   $match_username $match_not_username
+				   $match_courseid $match_not_courseid
+				   $match_name
+				   $match_lonid
+				   $match_handle   $match_not_handle)],);
 my %perlvar;
 
+
+
 # Inputs are a url, and a hash ref of
 # form name => value pairs
 # takes care of properly adding the form name elements and values to the 
@@ -81,12 +141,63 @@ sub unescape {
     return $str;
 }
 
+$match_domain     = $LONCAPA::domain_re     = qr{[\w\-.]+};
+$match_not_domain = $LONCAPA::not_domain_re = qr{[^\w\-.]+};
+sub clean_domain {
+    my ($domain) = @_;
+    $domain =~ s/$match_not_domain//g;
+    return $domain;
+}
+
+$match_username     = $LONCAPA::username_re     = qr{\w[\w\-.@]+};
+$match_not_username = $LONCAPA::not_username_re = qr{[^\w\-.@]+};
+sub clean_username {
+    my ($username) = @_;
+    $username =~ s/^\W+//;
+    $username =~ s/$match_not_username//g;
+    return $username;
+}
+
+
+$match_courseid     = $LONCAPA::courseid_re     = qr{\d[\w\-.]+};
+$match_not_courseid = $LONCAPA::not_courseid_re = qr{[^\w\-.]+};
+sub clean_courseid {
+    my ($courseid) = @_;
+    $courseid =~ s/^\D+//;
+    $courseid =~ s/$match_not_courseid//g;
+    return $courseid;
+}
+
+$match_name         = $LONCAPA::name_re = qr{$match_username|$match_courseid};
+sub clean_name {
+    my ($name) = @_;
+    $name =~ s/$match_not_username//g;
+    return $name;
+}
+
+$match_lonid     = $LONCAPA::lonid_re     = qr{[\w\-.]+};
+
+sub split_courseid {
+    my ($courseid) = @_;
+    my  ($domain,$coursenum) = 
+	($courseid=~m{^/($match_domain)/($match_courseid)});
+    return ($domain,$coursenum);
+}
+
+$match_handle     = $LONCAPA::handle_re     = qr{[\w\-.@]+};
+$match_not_handle = $LONCAPA::not_handle_re = qr{[^\w\-.@]+};
+sub clean_handle {
+    my ($handle) = @_;
+    $handle =~ s/$match_not_handle//g;
+    return $handle;
+}
+
 # -------------------------------------------- Return path to profile directory
 
 sub propath {
     my ($udom,$uname)=@_;
-    $udom=~s/\W//g;
-    $uname=~s/\W//g;
+    $udom = &clean_domain($udom);
+    $uname= &clean_name($uname);
     my $subdir=$uname.'__';
     $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/;
     my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname";
@@ -95,32 +206,38 @@ sub propath {
 
 
 #---------------------------------------------------------------
-#
-# Manipulation of hash based databases (factoring out common code
-# for later use as we refactor.
-#
-#  Ties a domain level resource file to a hash.
-#  If requested a history entry is created in the associated hist file.
-#
-#  Parameters:
-#     domain    - Name of the domain in which the resource file lives.
-#     namespace - Name of the hash within that domain.
-#     how       - How to tie the hash (e.g. GDBM_WRCREAT()).
-#     loghead   - Optional parameter, if present a log entry is created
-#                 in the associated history file and this is the first part
-#                  of that entry.
-#     logtail   - Goes along with loghead,  The actual logentry is of the
-#                 form $loghead:<timestamp>:logtail.
-# Returns:
-#    Reference to a hash bound to the db file or alternatively undef
-#    if the tie failed.
-#
+
+=pod
+
+=item tie_domain_hash()
+
+Manipulation of hash based databases (factoring out common code
+for later use as we refactor.
+
+ Ties a domain level resource file to a hash.
+ If requested a history entry is created in the associated hist file.
+
+ Parameters:
+    domain    - Name of the domain in which the resource file lives.
+    namespace - Name of the hash within that domain.
+    how       - How to tie the hash (e.g. GDBM_WRCREAT()).
+    loghead   - Optional parameter, if present a log entry is created
+                in the associated history file and this is the first part
+                 of that entry.
+    logtail   - Goes along with loghead,  The actual logentry is of the
+                form $loghead:<timestamp>:logtail.
+Returns:
+   Reference to a hash bound to the db file or alternatively undef
+   if the tie failed.
+
+=cut
+
 sub tie_domain_hash {
     my ($domain,$namespace,$how,$loghead,$logtail) = @_;
     
     # Filter out any whitespace in the domain name:
     
-    $domain =~ s/\W//g;
+    $domain = &clean_domain($domain);
     
     # We have enough to go on to tie the hash:
     
@@ -133,32 +250,37 @@ sub tie_domain_hash {
 sub untie_domain_hash {
     return &_locking_hash_untie(@_);
 }
-#
-#   Ties a user's resource file to a hash.  
-#   If necessary, an appropriate history
-#   log file entry is made as well.
-#   This sub factors out common code from the subs that manipulate
-#   the various gdbm files that keep keyword value pairs.
-# Parameters:
-#   domain       - Name of the domain the user is in.
-#   user         - Name of the 'current user'.
-#   namespace    - Namespace representing the file to tie.
-#   how          - What the tie is done to (e.g. GDBM_WRCREAT().
-#   loghead      - Optional first part of log entry if there may be a
-#                  history file.
-#   what         - Optional tail of log entry if there may be a history
-#                  file.
-# Returns:
-#   hash to which the database is tied.  It's up to the caller to untie.
-#   undef if the has could not be tied.
-#
+
+=pod
+
+=item tie_user_hash()
+
+  Ties a user's resource file to a hash.  
+  If necessary, an appropriate history
+  log file entry is made as well.
+  This sub factors out common code from the subs that manipulate
+  the various gdbm files that keep keyword value pairs.
+Parameters:
+  domain       - Name of the domain the user is in.
+  user         - Name of the 'current user'.
+  namespace    - Namespace representing the file to tie.
+  how          - What the tie is done to (e.g. GDBM_WRCREAT().
+  loghead      - Optional first part of log entry if there may be a
+                 history file.
+  what         - Optional tail of log entry if there may be a history
+                 file.
+Returns:
+  hash to which the database is tied.  It's up to the caller to untie.
+  undef if the has could not be tied.
+
+=cut
+
 sub tie_user_hash {
     my ($domain,$user,$namespace,$how,$loghead,$what) = @_;
 
-    $namespace=~s/\//\_/g;	# / -> _
-    $namespace=~s/\W//g;		# whitespace eliminated.
-    my $proname     = &propath($domain, $user);
-
+    $namespace=~s{/}{_}g;	# / -> _
+    $namespace     = &clean_username($namespace);
+    my $proname    = &propath($domain, $user);
     my $file_prefix="$proname/$namespace";
     return &_locking_hash_tie($file_prefix,$namespace,$how,$loghead,$what);
 }
@@ -167,6 +289,33 @@ sub untie_user_hash {
     return &_locking_hash_untie(@_);
 }
 
+=pod
+
+=item locking_hash_tie()
+
+routines if you just have a filename
+return tied hashref or undef
+
+=cut
+
+sub locking_hash_tie {
+    my ($filename,$how)=@_;
+    my ($file_prefix,$namespace)=&db_filename_parts($filename);
+    if ($namespace eq '') { return undef; }
+    return &_locking_hash_tie($file_prefix,$namespace,$how);
+}
+
+sub locking_hash_untie {
+    return &_locking_hash_untie(@_);
+}
+
+sub db_filename_parts {
+    my ($filename)=@_;
+    my ($file_path,$namespace)=($filename=~/^(.*)\/([^\/]+)\.db$/);
+    if ($namespace eq '') { return undef; }
+    return ($file_path.'/'.$namespace,$namespace);
+}
+
 # internal routines that handle the actual tieing and untieing process
 
 sub _do_hash_tie {
@@ -176,11 +325,10 @@ sub _do_hash_tie {
 	# If this is a namespace for which a history is kept,
 	# make the history log entry:    
 	if (($namespace !~/^nohist\_/) && (defined($loghead))) {
-	    my $args = scalar @_;
 	    my $hfh = IO::File->new(">>$file_prefix.hist"); 
 	    if($hfh) {
-		my $now = time;
-		print $hfh "$loghead:$now:$what\n";
+		my $now = time();
+		print $hfh ("$loghead:$now:$what\n");
 	    }
 	    $hfh->close;
 	}
@@ -198,9 +346,32 @@ sub _do_hash_untie {
 
 {
     my $sym;
+    my @pushed_syms;
+
+    sub clean_sym {
+	undef($sym);
+    }
+    sub push_locking_hash_tie {
+	if (!defined($sym)) {
+	    die("Invalid used of push_locking_hash_tie, should only be called after a lock has occurred and before and unlock.");
+	}
+	push(@pushed_syms,$sym);
+	undef($sym);
+    }
+
+    sub pop_locking_hash_tie {
+	if (defined($sym)) {
+	    die("Invalid nested used of pop_locking_hash_tie, should only be called after a unlock has occurred.");
+	}
+	$sym = pop(@pushed_syms);
+    }
 
     sub _locking_hash_tie {
 	my ($file_prefix,$namespace,$how,$loghead,$what) = @_;
+	if (defined($sym)) {
+	    die('Nested locking attempted without proper use of push_locking_hash_tie, this is unsupported');
+	}
+
         my $lock_type=LOCK_SH;
 # Are we reading or writing?
         if ($how eq &GDBM_READER()) {
@@ -212,25 +383,34 @@ sub _do_hash_untie {
                if ((! -e "$file_prefix.db") && (! -e "$file_prefix.db.gz")) {
 # No such file. Forget it.                
                    $! = 2;
+		   &clean_sym();
                    return undef;
                }
 # Apparently just no lock file yet. Make one
                open($sym,">>$file_prefix.db.lock");
            }
 # Do a shared lock
-           if (!&flock_sym(LOCK_SH)) { return undef; } 
+           if (!&flock_sym(LOCK_SH)) { 
+	       &clean_sym();
+	       return undef; 
+	   } 
 # If this is compressed, we will actually need an exclusive lock
 	   if (-e "$file_prefix.db.gz") {
-	       if (!&flock_sym(LOCK_EX)) { return undef; }
+	       if (!&flock_sym(LOCK_EX)) {
+		   &clean_sym();
+		   return undef;
+	       }
 	   }
         } elsif ($how eq &GDBM_WRCREAT()) {
 # We are writing
            open($sym,">>$file_prefix.db.lock");
 # Writing needs exclusive lock
-           if (!&flock_sym(LOCK_EX)) { return undef; }
+           if (!&flock_sym(LOCK_EX)) {
+	       &clean_sym();
+	       return undef;
+	   }
         } else {
-           &logthis("Unknown method $how for $file_prefix");
-           die();
+           die("Unknown method $how for $file_prefix");
         }
 # The file is ours!
 # If it is archived, un-archive it now
@@ -243,7 +423,12 @@ sub _do_hash_untie {
 # Change access mode to non-blocking
        $how=$how|&GDBM_NOLOCK();
 # Go ahead and tie the hash
-       return &_do_hash_tie($file_prefix,$namespace,$how,$loghead,$what);
+      	my $result = 
+	    &_do_hash_tie($file_prefix,$namespace,$how,$loghead,$what);
+	if (!$result) {
+	    &clean_sym();
+	}
+	return $result;
     }
 
     sub flock_sym {
@@ -272,7 +457,7 @@ sub _do_hash_untie {
 	my $result = untie(%$hashref);
 	flock($sym,LOCK_UN);
 	close($sym);
-	undef($sym);
+	&clean_sym();
 	return $result;
     }
 }
@@ -285,36 +470,4 @@ BEGIN {
 
 __END__
 
-=pod
 
-=head1 NAME
-
-LONCAPA - Basic routines
-
-=head1 SYNOPSIS
-
-Generally useful routines
-
-=head1 EXPORTED SUBROUTINES
-
-=over 4
-
-=item *
-
-escape() : unpack non-word characters into CGI-compatible hex codes
-
-=item *
-
-unescape() : pack CGI-compatible hex codes into actual non-word ASCII character
-
-=item *
-
-add_get_param() :
- Inputs:  url (with or without exit GET from parameters), hash ref of
-              form name => value pairs
-
- Return: url with properly added the form name elements and values to the 
-         the url doing proper escaping of the values and joining with ? or &
-         as needed
-
-=back