File:
[LON-CAPA] /
loncom /
metadata_database /
searchcat.pl
Revision
1.28:
download - view:
text,
annotated -
select for diffs
Mon Feb 3 05:39:37 2003 UTC (21 years, 3 months ago) by
harris41
Branches:
MAIN
CVS tags:
HEAD
beautified; BUG 1215 FIXED documentation that was horrible is now non-horrible
and
it now exists in every place that it should be; BUG 1216 FIXED stale records that
are not accounted for on the system are deleted; BUG 1217 FIXED, the
loncapa:metadata table can now change structure, and this script will
verify and alter the table structure if necessary; BUG 1218 PARTIALLY FIXED
(a small solid step toward The Great Metadata Overhaul); also using File::Find
now instead of find.pl
1: #!/usr/bin/perl
2: # The LearningOnline Network
3: # searchcat.pl "Search Catalog" batch script
4: #
5: # $Id: searchcat.pl,v 1.28 2003/02/03 05:39:37 harris41 Exp $
6: #
7: # Copyright Michigan State University Board of Trustees
8: #
9: # This file is part of the LearningOnline Network with a
10: # Computer assisted personalized approach (loncapa).
11: #
12: # Loncapa is free software; you can redistribute it and/or modify
13: # it under the terms of the GNU General Public License as published by
14: # the Free Software Foundation; either version 2 of the License, or
15: # (at your option) any later version.
16: #
17: # Loncapa is distributed in the hope that it will be useful,
18: # but WITHOUT ANY WARRANTY; without even the implied warranty of
19: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20: # GNU General Public License for more details.
21: #
22: # You should have received a copy of the GNU General Public License
23: # along with loncapa; if not, write to the Free Software
24: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25: #
26: # /home/httpd/html/adm/gpl.txt
27: #
28: # http://www.loncapa.org/
29: #
30: # YEAR=2001
31: # 04/14/2001, 04/16/2001 Scott Harrison
32: #
33: # YEAR=2002
34: # 05/11/2002 Scott Harrison
35: #
36: # YEAR=2003
37: # Scott Harrison
38: #
39: ###
40:
41: =pod
42:
43: =head1 NAME
44:
45: B<searchcat.pl> - put authoritative filesystem data into sql database.
46:
47: =head1 SYNOPSIS
48:
49: Ordinarily this script is to be called from a loncapa cron job
50: (CVS source location: F<loncapa/loncom/cron/loncapa>; typical
51: filesystem installation location: F</etc/cron.d/loncapa>).
52:
53: Here is the cron job entry.
54:
55: C<# Repopulate and refresh the metadata database used for the search catalog.>
56:
57: C<10 1 * * 7 www /home/httpd/perl/searchcat.pl>
58:
59: This script only allows itself to be run as the user C<www>.
60:
61: =head1 DESCRIPTION
62:
63: This script goes through a loncapa resource directory and gathers metadata.
64: The metadata is entered into a SQL database.
65:
66: This script also does general database maintenance such as reformatting
67: the C<loncapa:metadata> table if it is deprecated.
68:
69: This script also builds dynamic temporal metadata and stores this inside
70: a F<nohist_resevaldata.db> database file.
71:
72: This script is playing an increasingly important role for a loncapa
73: library server. The proper operation of this script is critical for a smooth
74: and correct user experience.
75:
76: =cut
77:
78: # ========================================================== Setting things up.
79:
80: # ------------------------------------------------------ Use external modules.
81:
82: use lib '/home/httpd/lib/perl/';
83: use LONCAPA::Configuration;
84:
85: use IO::File;
86: use HTML::TokeParser;
87: use DBI;
88: use GDBM_File;
89: use POSIX qw(strftime mktime);
90:
91: # ----------------- Code to enable 'find' subroutine listing of the .meta files
92: use File::Find;
93:
94: # List of .meta files (used on a per-user basis).
95: my @metalist;
96:
97: # --------------- Read loncapa_apache.conf and loncapa.conf and get variables.
98: my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf');
99: my %perlvar = %{$perlvarref};
100: undef($perlvarref); # Remove since sensitive and not needed.
101: delete($perlvar{'lonReceipt'}); # Remove since sensitive and not needed.
102:
103: # ------------------------------------- Only run if machine is a library server
104: if ($perlvar{'lonRole'} ne 'library')
105: {
106: exit(0);
107: }
108:
109: # ------------------------------ Make sure this process is running as user=www.
110: my $wwwid = getpwnam('www');
111: if ($wwwid != $<)
112: {
113: $emailto = "$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
114: $subj = "LON: $perlvar{'lonHostID'} User ID mismatch";
115: system("echo 'User ID mismatch. searchcat.pl must be run as user www.' | ".
116: "mailto $emailto -s '$subj' > /dev/null");
117: exit(1);
118: }
119:
120: # ------------------------------------------------------ Initialize log output.
121: open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
122: print(LOG '==== Searchcat Run '.localtime().' ===='."\n\n");
123:
124: my $dbh; # Database object reference handle.
125:
126: # ----------------------------- Verify connection to loncapa:metadata database.
127: unless (
128: $dbh = DBI->connect('DBI:mysql:loncapa','www',
129: $perlvar{'lonSqlAccess'},
130: { RaiseError => 0,PrintError => 0})
131: )
132: {
133: print(LOG '**** ERROR **** Cannot connect to database!'."\n");
134: exit(0);
135: }
136:
137: # ------------------------------ Create loncapa:metadata table if non-existent.
138: my $make_metadata_table = 'CREATE TABLE IF NOT EXISTS metadata ('.
139: 'title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, '.
140: 'version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, '.
141: 'creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, '.
142: 'copyright TEXT, utilitysemaphore BOOL, FULLTEXT idx_title (title), '.
143: 'FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), '.
144: 'FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), '.
145: 'FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), '.
146: 'FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), '.
147: 'FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), '.
148: 'FULLTEXT idx_copyright (copyright)) TYPE=MYISAM';
149:
150: $dbh->do($make_metadata_table); # Generate the table.
151:
152: # ----------------------------- Verify format of the loncapa:metadata database.
153: # (delete and recreate database if necessary).
154:
155: # Make a positive control for verifying table structure.
156: my $make_metadata_table_CONTROL = $make_metadata_table;
157: $make_metadata_table_CONTROL =~
158: s/^(CREATE TABLE IF NOT EXISTS) metadata/$1 CONTROL_metadata/;
159:
160: $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata');
161: $dbh->do($make_metadata_table_CONTROL);
162:
163: my $table_description; # selectall reference to the table description.
164:
165: my $CONTROL_table_string; # What the table description should look like.
166: my $table_string; # What the table description does look like.
167:
168: # Calculate the CONTROL table description (what it should be).
169: $table_description = $dbh->selectall_arrayref('describe CONTROL_metadata');
170: foreach my $table_row (@{$table_description})
171: {
172: $CONTROL_table_string .= join(',',@{$table_row})."\n";
173: }
174:
175: # Calculate the current table description (what it currently looks like).
176: $table_description = $dbh->selectall_arrayref('describe metadata');
177: foreach my $table_row (@{$table_description})
178: {
179: $table_string .= join(',',@{$table_row})."\n";
180: }
181:
182: if ($table_string ne $CONTROL_table_string)
183: {
184: # Log this incident.
185: print(LOG '**** WARNING **** Table structure mismatch, need to regenerate'.
186: '.'."\n");
187: # Delete the table.
188: $dbh->do('DROP TABLE IF EXISTS metadata');
189: # Generate the table.
190: $dbh->do($make_metadata_table);
191: }
192:
193: $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); # Okay. Done with control.
194:
195: # ----------------------------------------------- Set utilitysemaphore to zero.
196: $dbh->do('UPDATE metadata SET utilitysemaphore = 0');
197:
198: # ========================================================= Main functionality.
199:
200: # - Determine home authors on this server based on resources dir and user tree.
201:
202: # RESOURCES: the resources directory (subdirs correspond to author usernames).
203: opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}") or
204: (print(LOG '=== /res/--lonDefDomain-- directory is not accessible'."\n")
205: and exit(0));
206:
207: # query_home_server_status will look for user home directories on this machine.
208: my @homeusers =
209: grep {&query_home_server_status($perlvar{'lonDocRoot'}.'/res/'.
210: $perlvar{'lonDefDomain'}.'/'.$_)
211: } grep {!/^\.\.?$/} readdir(RESOURCES);
212: closedir(RESOURCES);
213:
214: unless (@homeusers)
215: {
216: print(LOG '=== No home users found on this server.'."\n");
217: }
218:
219: # Consider each author individually.
220: foreach my $user (@homeusers)
221: {
222: # Make a log entry.
223: print(LOG "\n".'=== User: '.$user."\n\n");
224:
225: # Get filesystem path to this user's directory.
226: my $user_directory =
227: &construct_path_to_user_directory($perlvar{'lonDefDomain'},$user);
228:
229: # Remove left-over db-files from a potentially crashed searchcat run.
230: unlink($user_directory.'/nohist_new_resevaldata.db');
231:
232: # Cleanup the metalist array.
233: undef(@metalist);
234: @metalist = ();
235:
236: # This will add entries to the @metalist array.
237: &File::Find::find(\&wanted,
238: $perlvar{'lonDocRoot'}.'/res/'.
239: $perlvar{'lonDefDomain'}.'/'.$user);
240:
241: # -- process file to get metadata and put into search catalog SQL database
242: # Also, build and store dynamic metadata.
243: # Also, delete record entries before refreshing.
244: foreach my $m (@metalist)
245: {
246: # Log this action.
247: print(LOG "- ".$m."\n");
248:
249: # Get metadata from the file.
250: my $ref = get_metadata_from_file($m);
251:
252: # Make a datarecord identifier for this resource.
253: my $m2 = '/res/'.declutter($m);
254: $m2 =~ s/\.meta$//;
255:
256: # Build and store dynamic metadata inside nohist_resevaldata.db.
257: build_on_the_fly_dynamic_metadata($m2);
258:
259: # Delete record if it already exists.
260: my $q2 = 'select * from metadata where url like binary '."'".$m2."'";
261: my $sth = $dbh->prepare($q2);
262: $sth->execute();
263: my $r1 = $sth->fetchall_arrayref;
264: if (@$r1)
265: {
266: $sth =
267: $dbh->prepare('delete from metadata where url like binary '.
268: "'".$m2."'");
269: $sth->execute();
270: }
271:
272: # Add new/replacement record into the loncapa:metadata table.
273: $sth = $dbh->prepare('insert into metadata values ('.
274: '"'.delete($ref->{'title'}).'"'.','.
275: '"'.delete($ref->{'author'}).'"'.','.
276: '"'.delete($ref->{'subject'}).'"'.','.
277: '"'.$m2.'"'.','.
278: '"'.delete($ref->{'keywords'}).'"'.','.
279: '"'.'current'.'"'.','.
280: '"'.delete($ref->{'notes'}).'"'.','.
281: '"'.delete($ref->{'abstract'}).'"'.','.
282: '"'.delete($ref->{'mime'}).'"'.','.
283: '"'.delete($ref->{'language'}).'"'.','.
284: '"'.sql_formatted_time(
285: delete($ref->{'creationdate'})).'"'.','.
286: '"'.sql_formatted_time(
287: delete($ref->{'lastrevisiondate'})).'"'.','.
288: '"'.delete($ref->{'owner'}).'"'.','.
289: '"'.delete($ref->{'copyright'}).'"'.','.
290: '1'.')');
291: $sth->execute();
292: }
293:
294: # ----------------------- Clean up database, remove stale SQL database records.
295: $dbh->do('DELETE FROM metadata WHERE utilitysemaphore = 0');
296:
297: # -------------------------------------------------- Copy over the new db-files
298: system('mv '.$user_directory.'/nohist_new_resevaldata.db '.
299: $user_directory.'/nohist_resevaldata.db');
300: }
301:
302: # --------------------------------------------------- Close database connection
303: $dbh->disconnect;
304: print LOG "\n==== Searchcat completed ".localtime()." ====\n";
305: close(LOG);
306: exit(0);
307:
308: # ================================================================ Subroutines.
309:
310: =pod
311:
312: =head1 SUBROUTINES
313:
314: =cut
315:
316: =pod
317:
318: B<unescape> - translate to unstrange escaped syntax to strange characters.
319:
320: =over 4
321:
322: Parameters:
323:
324: =item I<$str> - string with unweird characters.
325:
326: =back
327:
328: =over 4
329:
330: Returns:
331:
332: =item C<string> - string with potentially weird characters.
333:
334: =back
335:
336: =cut
337:
338: sub unescape ($)
339: {
340: my $str = shift(@_);
341: $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
342: return($str);
343: }
344:
345: =pod
346:
347: B<escape> - translate strange characters to unstrange escaped syntax.
348:
349: =over 4
350:
351: Parameters:
352:
353: =item I<$str> - string with potentially weird characters to unweird-ify.
354:
355: =back
356:
357: =over 4
358:
359: Returns:
360:
361: =item C<string> - unweird-ified string.
362:
363: =back
364:
365: =cut
366:
367: sub escape ($)
368: {
369: my $str = shift(@_);
370: $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
371: return($str);
372: }
373:
374: =pod
375:
376: B<build_on_the_fly_dynamic_metadata> - evaluate and store dynamic metadata.
377:
378: Dynamic metadata is stored in a nohist_resevaldata GDBM database.
379: Most of the calculations in this subroutine are totally pointless
380: and not useful for anything that this subroutine does.
381: (THIS IS A FRUSTRATED SUBROUTINE THAT IS NON-OPTIMAL, *&*&!.)
382: The only thing that this subroutine really makes happen is adjusting
383: a 'count' value inside the F<nohist_new_resevaldata.db> as well
384: as updating F<nohist_new_resevaldata.db> with information from
385: F<nohist_resevaldata.db>.
386:
387: =over 4
388:
389: Parameters:
390:
391: =item I<$url> - the filesystem path (url may be a misnomer...)
392:
393: =back
394:
395: =over 4
396:
397: Returns:
398:
399: =item C<hash> - key-value table of dynamically evaluated metadata.
400:
401: =back
402:
403: =cut
404:
405: sub build_on_the_fly_dynamic_metadata ($)
406: {
407: # BEWARE ALL WHO TRY TO UNDERSTAND THIS ABSURDLY HORRIBLE SUBROUTINE.
408:
409: # Do all sorts of mumbo-jumbo to compute the user's directory.
410: my $url = &declutter(shift(@_));
411: $url =~ s/\.meta$//;
412: my %returnhash = ();
413: my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!);
414: my $user_directory = &construct_path_to_user_directory($adomain,$aauthor);
415:
416: # Attempt a GDBM database instantiation inside users directory and proceed.
417: if ((tie(%evaldata,'GDBM_File',
418: $user_directory.
419: '/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
420: (tie(%newevaldata,'GDBM_File',
421: $user_directory.
422: '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640)))
423: {
424: # For different variables, track the running sum and counts.
425: my %sum = ();
426: my %cnt = ();
427:
428: # Define computed items as a sum (add) or an average (avg) or a raw
429: # count (cnt) or 'app'?
430: my %listitems=('count' => 'add',
431: 'course' => 'add',
432: 'avetries' => 'avg',
433: 'stdno' => 'add',
434: 'difficulty' => 'avg',
435: 'clear' => 'avg',
436: 'technical' => 'avg',
437: 'helpful' => 'avg',
438: 'correct' => 'avg',
439: 'depth' => 'avg',
440: 'comments' => 'app',
441: 'usage' => 'cnt'
442: );
443:
444: # Untaint the url and use as part of a regular expression.
445: my $regexp = $url;
446: $regexp =~ s/(\W)/\\$1/g;
447: $regexp = '___'.$regexp.'___([a-z]+)$';
448:
449: # Check existing nohist database for this url.
450: # THE ONLY TIME THIS IS IMPORTANT FOR THIS AWFUL SUBROUTINE
451: # IS FOR 'count' ENTRIES
452: # AND FOR REFRESHING non-'count' ENTRIES INSIDE nohist_new DATABASE.
453: foreach (keys %evaldata)
454: {
455: my $key = &unescape($_);
456: if ($key =~ /$regexp/) # If url-based entry exists.
457: {
458: my $ctype = $1; # Set to specific category type.
459:
460: # Do an increment for this category type.
461: if (defined($cnt{$ctype}))
462: {
463: $cnt{$ctype}++;
464: }
465: else
466: {
467: $cnt{$ctype} = 1;
468: }
469: unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN?
470: {
471: # Increment the sum based on the evaluated data in the db.
472: if (defined($sum{$ctype}))
473: {
474: $sum{$ctype} += $evaldata{$_};
475: }
476: else
477: {
478: $sum{$ctype} = $evaldata{$_};
479: }
480: }
481: else # 'app' mode, means to use '<hr />' as a separator
482: {
483: if (defined($sum{$ctype}))
484: {
485: if ($evaldata{$_})
486: {
487: $sum{$ctype} .= '<hr />'.$evaldata{$_};
488: }
489: }
490: else
491: {
492: $sum{$ctype} = ''.$evaldata{$_};
493: }
494: }
495: if ($ctype ne 'count')
496: {
497: # ALERT! THIS HORRIBLE LOOP IS ACTUALLY DOING SOMETHING
498: # USEFUL!
499: $newevaldata{$_} = $evaldata{$_};
500: }
501: }
502: }
503:
504: # THE ONLY OTHER TIME THIS LOOP IS USEFUL IS FOR THE 'count' HASH
505: # ELEMENT.
506: foreach (keys %cnt)
507: {
508: if ($listitems{$_} eq 'avg')
509: {
510: $returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
511: }
512: elsif ($listitems{$_} eq 'cnt')
513: {
514: $returnhash{$_} = $cnt{$_};
515: }
516: else
517: {
518: $returnhash{$_} = $sum{$_};
519: }
520: }
521:
522: # A RARE MOMENT OF DOING ANYTHING USEFUL INSIDE THIS
523: # BLEEPING SUBROUTINE.
524: if ($returnhash{'count'})
525: {
526: my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count';
527: $newevaldata{$newkey} = $returnhash{'count'};
528: }
529:
530: untie(%evaldata); # Close/release the original nohist database.
531: untie(%newevaldata); # Close/release the new nohist database.
532: }
533: return(%returnhash);
534: # Celebrate! We have now accomplished some simple calculations using
535: # 1000% bloated functionality in our subroutine. Go wash your eyeballs
536: # out now.
537: }
538:
539: =pod
540:
541: B<wanted> - used by B<File::Find::find> subroutine.
542:
543: This evaluates whether a file is wanted, and pushes it onto the
544: I<@metalist> array. This subroutine was, for the most part, auto-generated
545: by the B<find2perl> command.
546:
547: =over 4
548:
549: Parameters:
550:
551: =item I<$file> - a path to the file.
552:
553: =back
554:
555: =over 4
556:
557: Returns:
558:
559: =item C<boolean> - true or false based on logical statement.
560:
561: =back
562:
563: =cut
564:
565: sub wanted ($)
566: {
567: (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
568: -f $_ &&
569: /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
570: push(@metalist,$File::Find::dir.'/'.$_);
571: }
572:
573: =pod
574:
575: B<get_metadata_from_file> - read xml-tagged file and return parsed metadata.
576:
577: I<Note that this is significantly altered from a subroutine present in lonnet.>
578:
579: =over 4
580:
581: Parameters:
582:
583: =item I<$file> - a path.to the file.
584:
585: =back
586:
587: =over 4
588:
589: Returns:
590:
591: =item C<hash reference> - a hash array (keys and values).
592:
593: =back
594:
595: =cut
596:
597: sub get_metadata_from_file ($)
598: {
599: my ($filename) = @_;
600: my %metatable; # Used to store return value of hash-tabled metadata.
601: $filename = &declutter($filename); # Remove non-identifying filesystem info
602: my $uri = ''; # The URI is not relevant in this scenario.
603: unless ($filename =~ m/\.meta$/) # Unless ending with .meta.
604: {
605: $filename .= '.meta'; # Append a .meta suffix.
606: }
607: # Get the file contents.
608: my $metadata_string =
609: &get_file_contents($perlvar{'lonDocRoot'}.'/res/'.$filename);
610:
611: # Parse the file based on its XML tags.
612: my $parser = HTML::TokeParser->new(\$metadata_string);
613: my $token;
614: while ($token = $parser->get_token) # Loop through tokens.
615: {
616: if ($token->[0] eq 'S') # If it is a start token.
617: {
618: my $entry = $token->[1];
619: my $unikey = $entry; # A unique identifier for this xml tag key.
620: if (defined($token->[2]->{'part'}))
621: {
622: $unikey .= '_'.$token->[2]->{'part'};
623: }
624: if (defined($token->[2]->{'name'}))
625: {
626: $unikey .= '_'.$token->[2]->{'name'};
627: }
628: # Append $unikey to metatable's keys entry.
629: if ($metatable{$uri.'keys'})
630: {
631: $metatable{$uri.'keys'} .= ','.$unikey;
632: }
633: else
634: {
635: $metatable{$uri.'keys'} = $unikey;
636: }
637: # Insert contents into metatable entry for the unikey.
638: foreach my $t3 (@{$token->[3]})
639: {
640: $metatable{$uri.''.$unikey.'.'.$_} = $token->[2]->{$t3};
641: }
642: # If there was no text contained inside the tags, set = default.
643: unless
644: (
645: $metatable{$uri.''.$unikey} = $parser->get_text('/'.$entry)
646: )
647: {
648: $metatable{$uri.''.$unikey} =
649: $metatable{$uri.''.$unikey.'.default'};
650: }
651: }
652: }
653: # Return with a key-value table of XML tags and their tag contents.
654: return(\%metatable);
655: }
656:
657: =pod
658:
659: B<get_file_contents> - returns either the contents of the file or a -1.
660:
661: =over 4
662:
663: Parameters:
664:
665: =item I<$file> - a complete filesystem path.to the file.
666:
667: =back
668:
669: =over 4
670:
671: Returns:
672:
673: =item C<string> - file contents or a -1.
674:
675: =back
676:
677: =cut
678:
679: sub get_file_contents ($)
680: {
681: my $file = shift(@_);
682:
683: # If file does not exist, then return a -1 value.
684: unless (-e $file)
685: {
686: return(-1);
687: }
688:
689: # Read in file contents.
690: my $file_handle = IO::File->new($file);
691: my $file_contents = '';
692: while (<$file_handle>)
693: {
694: $file_contents .= $_;
695: }
696:
697: # Return file contents.
698: return($file_contents);
699: }
700:
701: =pod
702:
703: B<declutter> - Declutters URLs (remove extraneous prefixed filesystem path).
704:
705: =over 4
706:
707: Parameters:
708:
709: =item I<$filesystem_path> - a complete filesystem path.
710:
711: =back
712:
713: =over 4
714:
715: Returns:
716:
717: =item C<string> - remnants of the filesystem path (beginning portion removed).
718:
719: =back
720:
721: =cut
722:
723: sub declutter
724: {
725: my $filesystem_path = shift(@_);
726:
727: # Remove beginning portions of the filesystem path.
728: $filesystem_path =~ s/^$perlvar{'lonDocRoot'}//;
729: $filesystem_path =~ s!^/!!;
730: $filesystem_path =~ s!^res/!!;
731:
732: # Return what is remaining for the filesystem path.
733: return($filesystem_path);
734: }
735:
736: =pod
737:
738: B<query_home_server_status> - Is this the home server of an author's directory?
739:
740: =over 4
741:
742: Parameters:
743:
744: =item I<$author_filesystem_path> - directory path for a user.
745:
746: =back
747:
748: =over 4
749:
750: Returns:
751:
752: =item C<boolean> - 1 if true; 0 if false.
753:
754: =back
755:
756: =cut
757:
758: sub query_home_server_status ($)
759: {
760: my $author_filesystem_path = shift(@_);
761:
762: # Remove beginning portion of this filesystem path.
763: $author_filesystem_path =~ s!/home/httpd/html/res/([^/]*)/([^/]*).*!$1/$2!;
764:
765: # Construct path to the author's ordinary user directory.
766: my ($user_domain,$username) = split(m!/!,$author_filesystem_path);
767: my $user_directory_path = construct_path_to_user_directory($user_domain,
768: $username);
769:
770: # Return status of whether the user directory path is defined.
771: if (-e $user_directory_path)
772: {
773: return(1); # True.
774: }
775: else
776: {
777: return(0); # False.
778: }
779: }
780:
781: =pod
782:
783: B<construct_path_to_user_directory> ($$) - makes a filesystem path to user dir.
784:
785: =over 4
786:
787: Parameters:
788:
789: =item I<$user_domain> - the loncapa domain of the user.
790:
791: =item I<$username> - the unique username (user id) of the user.
792:
793: =back
794:
795: =over 4
796:
797: Returns:
798:
799: =item C<string> - representing the path on the filesystem.
800:
801: =back
802:
803: =cut
804:
805: sub construct_path_to_user_directory ($$)
806: {
807: my ($user_domain,$username) = @_;
808:
809: # Untaint.
810: $user_domain =~ s/\W//g;
811: $username =~ s/\W//g;
812:
813: # Create three levels of sub-directoried filesystem path
814: # based on the first three characters of the username.
815: my $sub_filesystem_path = $username.'__';
816: $sub_filesystem_path =~ s!(.)(.)(.).*!$1/$2/$3/!;
817:
818: # Use the sub-directoried levels and other variables to generate
819: # the complete filesystem path.
820: my $complete_filesystem_path =
821: join('/',($perlvar{'lonUsersDir'},
822: $user_domain,
823: $sub_filesystem_path,
824: $username));
825:
826: # Return the complete filesystem path.
827: return($complete_filesystem_path);
828: }
829:
830: =pod
831:
832: B<sql_formatted_time> (@) - turns seconds since epoch into datetime sql format.
833:
834: =over 4
835:
836: Parameters:
837:
838: =item I<$epochtime> - time in seconds since epoch (may need to be sanitized).
839:
840: =back
841:
842: =over 4
843:
844: Returns:
845:
846: =item C<string> - datetime sql formatted string.
847:
848: =back
849:
850: =cut
851:
852: sub sql_formatted_time ($)
853: {
854: # Sanitize the time argument and convert to localtime array.
855: my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
856: localtime(&sanitize_time(shift(@_)));
857:
858: # Convert month from (0..11) to (1..12).
859: $mon += 1;
860:
861: # Make the year compatible with A.D. specification.
862: $year += 1900;
863:
864: # Return a date which is compatible with MySQL's "DATETIME" format.
865: return(join('-',($year,$mon,$mday)).
866: ' '.
867: join(':',($hour,$min,$sec))
868: );
869: }
870:
871:
872: # ==================================== The following two subroutines are needed
873: # for accommodating incorrect time formats inside the metadata.
874:
875: =pod
876:
877: B<make_seconds_since_epoch> (@) - turns time metadata into seconds since epoch.
878:
879: =over 4
880:
881: Parameters:
882:
883: =item I<%time_metadata> - a key-value listing characterizing month, year, etc.
884:
885: =back
886:
887: =over 4
888:
889: Returns:
890:
891: =item C<integer> - seconds since epoch.
892:
893: =back
894:
895: =cut
896:
897: sub make_seconds_since_epoch (@)
898: {
899: # Keytable of time metadata.
900: my %time_metadata = @_;
901:
902: # Return seconds since the epoch (January 1, 1970, 00:00:00 UTC).
903: return(POSIX::mktime(
904: ($time_metadata{'seconds'},
905: $time_metadata{'minutes'},
906: $time_metadata{'hours'},
907: $time_metadata{'day'},
908: $time_metadata{'month'}-1,
909: $time_metadata{'year'}-1900,
910: 0,
911: 0,
912: $time_metadata{'dlsav'})
913: )
914: );
915: }
916:
917: =pod
918:
919: B<sanitize_time> - if time looks sql-formatted, make it seconds since epoch.
920:
921: Somebody described this subroutine as
922: "retro-fixing of un-backward-compatible time format".
923:
924: What this means, is that a part of this code expects to get UTC seconds
925: since the epoch (beginning of 1970). Yet, some of the .meta files have
926: sql-formatted time strings (2001-04-01, etc.) instead of seconds-since-epoch
927: integers (e.g. 1044147435). These time strings do not encode the timezone
928: and, in this sense, can be considered "un-backwards-compatible".
929:
930: =over 4
931:
932: Parameters:
933:
934: =item I<$potentially_badformat_string> - string to "retro-fix".
935:
936: =back
937:
938: =over 4
939:
940: Returns:
941:
942: =item C<integer> - seconds since epoch.
943:
944: =back
945:
946: =cut
947:
948: sub sanitize_time ($)
949: {
950: my $timestamp = shift(@_);
951: # If timestamp is in this unexpected format....
952: if ($timestamp =~ /^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/)
953: {
954: # then convert into seconds since epoch (the expected format).
955: $timestamp = &make_seconds_since_epoch(
956: 'year' => $1,
957: 'month' => $2,
958: 'day' => $3,
959: 'hours' => $4,
960: 'minutes' => $5,
961: 'seconds' => $6
962: );
963: }
964: # Otherwise we assume timestamp to be as expected.
965: return($timestamp);
966: }
967:
968: =pod
969:
970: =head1 AUTHOR
971:
972: Written to help the loncapa project.
973:
974: Scott Harrison, sharrison@users.sourceforge.net
975:
976: This is distributed under the same terms as loncapa (i.e. "freeware").
977:
978: =cut
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>