--- loncom/interface/loncoursedata.pm 2008/11/17 14:16:55 1.189 +++ loncom/interface/loncoursedata.pm 2008/11/20 21:22:21 1.190 @@ -1,6 +1,6 @@ # The LearningOnline Network with CAPA # -# $Id: loncoursedata.pm,v 1.189 2008/11/17 14:16:55 jms Exp $ +# $Id: loncoursedata.pm,v 1.190 2008/11/20 21:22:21 jms Exp $ # # Copyright Michigan State University Board of Trustees # @@ -25,7 +25,6 @@ # http://www.lon-capa.org/ # ### - =pod =head1 NAME @@ -44,7 +43,7 @@ Set of functions that download and proce Apache::lonmysql LONCAPA Digest::MD5 - + =cut package Apache::loncoursedata; @@ -57,25 +56,6 @@ use Apache::lonmysql(); use LONCAPA; use Digest::MD5(); -=pod - -=head1 DOWNLOAD INFORMATION - -This section contains all the functions that get data from other servers -and/or itself. - -=item &make_into_hash($values); - -Returns a reference to a hash as described by $values. $values is -assumed to be the result of - join(':',map {&escape($_)} %orighash); - -This is a helper function for get_current_state. - -=cut - -################################################ -################################################ sub make_into_hash { my $values = shift; my %tmp = map { &unescape($_); } split(':',$values); @@ -83,193 +63,6 @@ sub make_into_hash { } -################################################ -################################################ - -=pod - -=head1 LOCAL DATA CACHING SUBROUTINES - -The local caching is done using MySQL. There is no fall-back implementation -if MySQL is not running. - -The programmers interface is to call &get_current_state() or some other -primary interface subroutine (described below). The internals of this -storage system are documented here. - -There are six tables used to store student performance data (the results of -a dumpcurrent). Each of these tables is created in MySQL with a name of -$courseid_*****, where ***** is 'symb', 'part', or whatever is appropriate -for the table. The tables and their purposes are described below. - -Some notes before we get started. - -Each table must have a PRIMARY KEY, which is a column or set of columns which -will serve to uniquely identify a row of data. NULL is not allowed! - -INDEXes work best on integer data. - -JOIN is used to combine data from many tables into one output. - -lonmysql.pm is used for some of the interface, specifically the table creation -calls. The inserts are done in bulk by directly calling the database handler. -The SELECT ... JOIN statement used to retrieve the data does not have an -interface in lonmysql.pm and I shudder at the thought of writing one. - -=head3 Table Descriptions - -=over 4 - -=item Tables used to store meta information - -The following tables hold data required to keep track of the current status -of a students data in the tables or to look up the students data in the tables. - -=over 4 - -=item $symb_table - -The symb_table has two columns. The first is a 'symb_id' and the second -is the text name for the 'symb' (limited to 64k). The 'symb_id' is generated -automatically by MySQL so inserts should be done on this table with an -empty first element. This table has its PRIMARY KEY on the 'symb_id'. - -=item $part_table - -The part_table has two columns. The first is a 'part_id' and the second -is the text name for the 'part' (limited to 100 characters). The 'part_id' is -generated automatically by MySQL so inserts should be done on this table with -an empty first element. This table has its PRIMARY KEY on the 'part' (100 -characters) and a KEY on 'part_id'. - -=item $student_table - -The student_table has 7 columns. The first is a 'student_id' assigned by -MySQL. The second is 'student' which is username:domain. The third through -fifth are 'section', 'status' (enrollment status), and 'classification' -(to be used in the future). The sixth and seventh ('updatetime' and -'fullupdatetime') contain the time of last update and full update of student -data. This table has its PRIMARY KEY on the 'student_id' column and is indexed -on 'student', 'section', and 'status'. - -=item $groupnames_table - -The groupnames_table has 2 columns. The first is a 'group_id' assigned by -MySQL. The second is 'groupname' which is the name of the group in the course. - -=item $students_groups_table - -The students_groups_table has 2 columns. The first is the 'student_id', and the -second is the 'group_id'. These two columns comprise the PRIMARY KEY for this -table, as an individual student may be affiliated with more than one group at -any time. This table is indexed on both student_id and group_id. - -=back - -=item Tables used to store current status data - -The following tables store data only about the students current status on -a problem, meaning only the data related to the last attempt on a problem. - -=over 4 - -=item $performance_table - -The performance_table has 9 columns. The first three are 'symb_id', -'student_id', and 'part_id'. These comprise the PRIMARY KEY for this table -and are directly related to the $symb_table, $student_table, and $part_table -described above. MySQL does better indexing on numeric items than text, -so we use these three "index tables". The remaining columns are -'solved', 'tries', 'awarded', 'award', 'awarddetail', and 'timestamp'. -These are either the MySQL type TINYTEXT or various integers ('tries' and -'timestamp'). This table has KEYs of 'student_id' and 'symb_id'. -For use of this table, see the functions described below. - -=item $parameters_table - -The parameters_table holds the data that does not fit neatly into the -performance_table. The parameters table has four columns: 'symb_id', -'student_id', 'parameter', and 'value'. 'symb_id', 'student_id', and -'parameter' comprise the PRIMARY KEY for this table. 'parameter' is -limited to 255 characters. 'value' is limited to 64k characters. - -=back - -=item Tables used for storing historic data - -The following tables are used to store almost all of the transactions a student -has made on a homework problem. See loncapa/docs/homework/datastorage for -specific information about each of the parameters stored. - -=over 4 - -=item $fulldump_response_table - -The response table holds data (documented in loncapa/docs/homework/datastorage) -associated with a particular response id which is stored when a student -attempts a problem. The following are the columns of the table, in order: -'symb_id','part_id','response_id','student_id','transaction','tries', -'awarddetail', 'response_specific', 'response_specific_value', -'response_specific_2', 'response_specific_value_2', and 'submission -(the text of the students submission). The primary key is based on the -first five columns listed above. - -=item $fulldump_part_table - -The part table holds data (documented in loncapa/docs/homework/datastorage) -associated with a particular part id which is stored when a student attempts -a problem. The following are the columns of the table, in order: -'symb_id','part_id','student_id','transaction','tries','award','awarded', -and 'previous'. The primary key is based on the first five columns listed -above. - -=item $fulldump_timestamp_table - -The timestamp table holds the timestamps of the transactions which are -stored in $fulldump_response_table and $fulldump_part_table. This data is -about both the response and part data. Columns: 'symb_id','student_id', -'transaction', and 'timestamp'. -The primary key is based on the first 3 columns. - -=item $weight_table - -The weight table holds the weight for the problems used in the class. -Whereas the weight of a problem can vary by section and student the data -here is applied to the class as a whole. -Columns: 'symb_id','part_id','response_id','weight'. - -=back - -=back - -=head3 Important Subroutines - -Here is a brief overview of the subroutines which are likely to be of -interest: - -=over 4 - -=item &get_current_state(): programmers interface. - -=item &init_dbs(): table creation - -=item &update_student_data(): data storage calls - -=item &get_student_data_from_performance_cache(): data retrieval - -=back - -=head3 Main Documentation - -=over 4 - -=cut - -################################################ -################################################ - -################################################ -################################################ { # Begin scope of table identifiers my $current_course =''; @@ -286,24 +79,9 @@ my $fulldump_timestamp_table; my $weight_table; my @Tables; -################################################ -################################################ -=pod -=item &init_dbs() -Input: course id - -Output: 0 on success, positive integer on error - -This routine issues the calls to lonmysql to create the tables used to -store student data. - -=cut - -################################################ -################################################ sub init_dbs { my ($courseid,$drop) = @_; &setup_table_names($courseid); @@ -658,24 +436,8 @@ sub init_dbs { return 0; } -################################################ -################################################ - -=pod - -=item &delete_caches() - -This routine drops all the tables associated with a course from the -MySQL database. -Input: course id (optional, determined by environment if omitted) - -Returns: nothing - -=cut -################################################ -################################################ sub delete_caches { my $courseid = shift; $courseid = $env{'request.course.id'} if (! defined($courseid)); @@ -693,31 +455,6 @@ sub delete_caches { return; } -################################################ -################################################ - -=pod - -=item &get_part_id() - -Get the MySQL id of a problem part string. - -Input: $part - -Output: undef on error, integer $part_id on success. - -=item &get_part() - -Get the string describing a part from the MySQL id of the problem part. - -Input: $part_id - -Output: undef on error, $part string on success. - -=cut - -################################################ -################################################ my $have_read_part_table = 0; my %ids_by_part; @@ -759,31 +496,6 @@ sub get_part { return undef; # error } -################################################ -################################################ - -=pod - -=item &get_symb_id() - -Get the MySQL id of a symb. - -Input: $symb - -Output: undef on error, integer $symb_id on success. - -=item &get_symb() - -Get the symb associated with a MySQL symb_id. - -Input: $symb_id - -Output: undef on error, $symb on success. - -=cut - -################################################ -################################################ my $have_read_symb_table = 0; my %ids_by_symb; @@ -824,32 +536,6 @@ sub get_symb { return undef; # error } -################################################ -################################################ - -=pod - -=item &get_student_id() - -Get the MySQL id of a student. - -Input: $sname, $dom - -Output: undef on error, integer $student_id on success. - -=item &get_student() - -Get student username:domain associated with the MySQL student_id. - -Input: $student_id - -Output: undef on error, string $student (username:domain) on success. - -=cut - -################################################ -################################################ - my $have_read_student_table = 0; my %ids_by_student; my %students_by_id; @@ -1052,23 +738,6 @@ sub populate_students_groups_table { return; } -################################################ -################################################ - -=pod - -=item &clear_internal_caches() - -Causes the internal caches used in get_student_id, get_student, -get_symb_id, get_symb, get_part_id, and get_part to be undef'd. - -Needs to be called before the first operation with the MySQL database -for a given Apache request. - -=cut - -################################################ -################################################ sub clear_internal_caches { $have_read_part_table = 0; undef(%ids_by_part); @@ -1083,53 +752,12 @@ sub clear_internal_caches { undef(%ids_by_groupname); } - -################################################ -################################################ - sub symb_is_for_task { my ($symb) = @_; return ($symb =~ /\.task$/); } -################################################ -################################################ - -=pod - -=item &update_full_student_data($sname,$sdom,$courseid) - -Does a lonnet::dump on a student to populate the courses tables. - -Input: $sname, $sdom, $courseid - -Output: $returnstatus - -$returnstatus is a string describing any errors that occurred. 'okay' is the -default. - -This subroutine loads a students data using lonnet::dump and inserts -it into the MySQL database. The inserts are done on three tables, -$fulldump_response_table, $fulldump_part_table, and $fulldump_timestamp_table. -The INSERT calls are made directly by this subroutine, not through lonmysql -because we do a 'bulk'insert which takes advantage of MySQLs non-SQL -compliant INSERT command to insert multiple rows at a time. -If anything has gone wrong during this process, $returnstatus is updated with -a description of the error. - -Once the "fulldump" tables are updated, the tables used for chart and -spreadsheet (which hold only the current state of the student on their -homework, not historical data) are updated. If all updates have occurred -successfully, $student_table is updated to reflect the time of the update. - -Notice we do not insert the data and immediately query it. This means it -is possible for there to be data returned this first time that is not -available the second time. CYA. - -=cut -################################################ -################################################ sub update_full_student_data { my ($sname,$sdom,$courseid) = @_; # @@ -1380,40 +1008,7 @@ sub update_full_student_data { return $returnstatus; } -################################################ -################################################ -=pod - -=item &update_student_data() - -Input: $sname, $sdom, $courseid - -Output: $returnstatus, \%student_data - -$returnstatus is a string describing any errors that occurred. 'okay' is the -default. -\%student_data is the data returned by a call to lonnet::currentdump. - -This subroutine loads a students data using lonnet::currentdump and inserts -it into the MySQL database. The inserts are done on two tables, -$performance_table and $parameters_table. $parameters_table holds the data -that is not included in $performance_table. See the description of -$performance_table elsewhere in this file. The INSERT calls are made -directly by this subroutine, not through lonmysql because we do a 'bulk' -insert which takes advantage of MySQLs non-SQL compliant INSERT command to -insert multiple rows at a time. If anything has gone wrong during this -process, $returnstatus is updated with a description of the error and -\%student_data is returned. - -Notice we do not insert the data and immediately query it. This means it -is possible for there to be data returned this first time that is not -available the second time. CYA. - -=cut - -################################################ -################################################ sub update_student_data { my ($sname,$sdom,$courseid) = @_; # @@ -1580,22 +1175,7 @@ sub store_student_data { return ($returnstatus,$student_data); } -###################################### -###################################### - -=pod - -=item &ensure_tables_are_set_up($courseid) - -Checks to be sure the MySQL tables for the given class are set up. -If $courseid is omitted it will be obtained from the environment. - -Returns nothing on success and 'error' on failure - -=cut -###################################### -###################################### sub ensure_tables_are_set_up { my ($courseid) = @_; $courseid = $env{'request.course.id'} if (! defined($courseid)); @@ -1633,26 +1213,6 @@ sub ensure_tables_are_set_up { } } -################################################ -################################################ - -=pod - -=item &ensure_current_data() - -Input: $sname, $sdom, $courseid - -Output: $status, $data - -This routine ensures the data for a given student is up to date. -The $student_table is queried to determine the time of the last update. -If the students data is out of date, &update_student_data() is called. -The return values from the call to &update_student_data() are returned. - -=cut - -################################################ -################################################ sub ensure_current_data { my ($sname,$sdom,$courseid) = @_; my $status = 'okay'; # return value @@ -1683,29 +1243,7 @@ sub ensure_current_data { return ($status,$data); } -################################################ -################################################ - -=pod - -=item &ensure_current_full_data($sname,$sdom,$courseid) -Input: $sname, $sdom, $courseid - -Output: $status - -This routine ensures the fulldata (the data from a lonnet::dump, not a -lonnet::currentdump) for a given student is up to date. -The $student_table is queried to determine the time of the last update. -If the students fulldata is out of date, &update_full_student_data() is -called. - -The return value from the call to &update_full_student_data() is returned. - -=cut - -################################################ -################################################ sub ensure_current_full_data { my ($sname,$sdom,$courseid) = @_; my $status = 'okay'; # return value @@ -1731,37 +1269,7 @@ sub ensure_current_full_data { return $status; } -################################################ -################################################ - -=pod - -=item &get_student_data_from_performance_cache() - -Input: $sname, $sdom, $symb, $courseid - -Output: hash reference containing the data for the given student. -If $symb is undef, all the students data is returned. - -This routine is the heart of the local caching system. See the description -of $performance_table, $symb_table, $student_table, and $part_table. The -main task is building the MySQL request. The tables appear in the request -in the order in which they should be parsed by MySQL. When searching -on a student the $student_table is used to locate the 'student_id'. All -rows in $performance_table which have a matching 'student_id' are returned, -with data from $part_table and $symb_table which match the entries in -$performance_table, 'part_id' and 'symb_id'. When searching on a symb, -the $symb_table is processed first, with matching rows grabbed from -$performance_table and filled in from $part_table and $student_table in -that order. - -Running 'EXPLAIN ' on the 'SELECT' statements generated can be quite -interesting, especially if you play with the order the tables are listed. - -=cut -################################################ -################################################ sub get_student_data_from_performance_cache { my ($sname,$sdom,$symb,$courseid)=@_; my $student = $sname.':'.$sdom if (defined($sname) && defined($sdom)); @@ -1845,44 +1353,7 @@ sub get_student_data_from_performance_ca return $studentdata; } -################################################ -################################################ - -=pod - -=item &get_current_state() - -Input: $sname,$sdom,$symb,$courseid - -Output: Described below - -Retrieve the current status of a students performance. $sname and -$sdom are the only required parameters. If $symb is undef the results -of an &Apache::lonnet::currentdump() will be returned. -If $courseid is undef it will be retrieved from the environment. - -The return structure is based on &Apache::lonnet::currentdump. If -$symb is unspecified, all the students data is returned in a hash of -the form: -( - symb1 => { param1 => value1, param2 => value2 ... }, - symb2 => { param1 => value1, param2 => value2 ... }, -) - -If $symb is specified, a hash of -( - param1 => value1, - param2 => value2, -) -is returned. -If no data is found for $symb, or if the student has no performance data, -an empty list is returned. - -=cut - -################################################ -################################################ sub get_current_state { my ($sname,$sdom,$symb,$courseid,$forcedownload)=@_; # @@ -1922,74 +1393,7 @@ sub get_current_state { return (); } -################################################ -################################################ - -=pod - -=item &get_problem_statistics() - -Gather data on a given problem. The database is assumed to be -populated and all local caching variables are assumed to be set -properly. This means you need to call &ensure_current_data for -the students you are concerned with prior to calling this routine. - -Inputs: $Sections, Groups, $status, $symb, $part, $courseid, $starttime, - $endtime - -=over 4 - -=item $Sections Array ref containing section names for students. -'all' is allowed to be the first (and only) item in the array. - -=item $Groups Array ref containing group names for students. -'all' is allowed to be the first (and only) item in the array. - -=item $status String describing the status of students - -=item $symb is the symb for the problem. - -=item $part is the part id you need statistics for - -=item $courseid is the course id, of course! - -=item $starttime and $endtime are unix times which to use to limit -the statistical data. - -=back - -Outputs: See the code for up to date information. A hash reference is -returned. The hash has the following keys defined: - -=over 4 - -=item num_students The number of students attempting the problem - -=item tries The total number of tries for the students - -=item max_tries The maximum number of tries taken - -=item mean_tries The average number of tries - -=item num_solved The number of students able to solve the problem - -=item num_override The number of students whose answer is 'correct_by_override' - -=item deg_of_diff The degree of difficulty of the problem - -=item std_tries The standard deviation of the number of tries - -=item skew_tries The skew of the number of tries - -=item per_wrong The number of students attempting the problem who were not -able to answer it correctly. -=back - -=cut - -################################################ -################################################ sub get_problem_statistics { my ($Sections,$Groups,$status,$symb,$part,$courseid,$starttime,$endtime) = @_; return if (! defined($symb) || ! defined($part)); @@ -2139,17 +1543,7 @@ sub execute_SQL_request { return (); } -###################################################### -###################################################### -=pod - -=item &populate_weight_table - -=cut - -###################################################### -###################################################### sub populate_weight_table { my ($courseid) = @_; if (! defined($courseid)) { @@ -2208,24 +1602,6 @@ sub populate_weight_table { return; } -########################################################## -########################################################## - -=pod - -=item &limit_by_start_end_times - -Build SQL WHERE condition which limits the data collected by the start -and end times provided - -Inputs: $starttime, $endtime, $table - -Returns: $time_limits - -=cut - -########################################################## -########################################################## sub limit_by_start_end_time { my ($starttime,$endtime,$table) = @_; my $time_requirements = undef; @@ -2240,26 +1616,7 @@ sub limit_by_start_end_time { return $time_requirements; } -########################################################## -########################################################## - -=pod - -=item &limit_by_section_and_status - -Build SQL WHERE condition which limits the data collected by section and -student status. - -Inputs: $Sections (array ref) - $enrollment (string: 'any', 'expired', 'active') - $tablename The name of the table that holds the student data - -Returns: $student_requirements,$enrollment_requirements - -=cut -########################################################## -########################################################## sub limit_by_section_and_status { my ($Sections,$enrollment,$tablename) = @_; my $student_requirements = undef; @@ -2288,23 +1645,7 @@ sub limit_by_section_and_status { return ($student_requirements,$enrollment_requirements); } -###################################################### -###################################################### - -=pod - -=item &limit_by_group - -Build SQL LEFT JOIN statement to include students_groups and groupnames tables and SQL WHERE condition which limits the data collected by group. - -Inputs: $Groups (array ref) - $stutable The name of the table which holds the student data. - $grptable The name of the table which maps group_id to groupname. - $stugrptab The name of the table which holds student group affiliations. -Returns: $groups_join,$group_limits - $groups_join JOIN part of SQL statement (to include group related tables) - $group_limits SQL WHERE condition limiting to requested groups -=cut + sub limit_by_group { my ($Groups,$stutable,$grptable,$stugrptab) = @_; @@ -2326,32 +1667,7 @@ sub limit_by_group { return ($groups_join,$group_limits); } -=pod - -=item rank_students_by_scores_on_resources - -Inputs: - $resources: array ref of hash ref. Each hash ref needs key 'symb'. - $Sections: array ref of sections to include, - $Groups: array ref of groups to include. - $enrollment: string, - $courseid (may be omitted) - $starttime (may be omitted) - $endtime (may be omitted) - $has_award_for (may be omitted) - -Returns; An array of arrays. The sub arrays contain a student name and -their score on the resources. $starttime and $endtime constrain the -list to awards obtained during the given time limits. $has_score_on -constrains the list to those students who at least attempted the -resource identified by the given symb, which is used to filter out -such students for statistics that would be adversely affected by such -students. - -=cut -###################################################### -###################################################### sub RNK_student { return 0; }; sub RNK_score { return 1; }; @@ -2413,25 +1729,6 @@ sub rank_students_by_scores_on_resources return ($rows); } -######################################################## -######################################################## - -=pod - -=item &get_sum_of_scores - -Inputs: $resource (hash ref, needs {'symb'} key), -$part, (the part id), -$students (array ref, contents of array are scalars holding 'sname:sdom'), -$courseid - -Returns: the sum of the score on the problem part over the students and the - maximum possible value for the sum (taken from the weight table). - -=cut - -######################################################## -######################################################## sub get_sum_of_scores { my ($symb,$part,$students,$courseid,$starttime,$endtime) = @_; if (! defined($courseid)) { @@ -2475,27 +1772,7 @@ sub get_sum_of_scores { return ($rows->[0],$rows->[1]); } -######################################################## -######################################################## - -=pod - -=item &score_stats - -Inputs: $Sections, $enrollment, $symbs, $starttime, - $endtime, $courseid - -$Sections, $enrollment, $starttime, $endtime, and $courseid are the same as -elsewhere in this module. -$symbs is an array ref of symbs -Returns: minimum, maximum, mean, s.d., number of students, and maximum - possible of student scores on the given resources - -=cut - -######################################################## -######################################################## sub score_stats { my ($Sections,$Groups,$enrollment,$symbs,$starttime,$endtime,$courseid)=@_; if (! defined($courseid)) { @@ -2554,27 +1831,7 @@ sub score_stats { } -######################################################## -######################################################## - -=pod - -=item &count_stats - -Inputs: $Sections, $Groups, $enrollment, $symbs, $starttime, - $endtime, $courseid - -$Sections, $Groups $enrollment, $starttime, $endtime, and $courseid are the -same as elsewhere in this module. -$symbs is an array ref of symbs - -Returns: minimum, maximum, mean, s.d., and number of students - of the number of items correct on the given resources - -=cut -######################################################## -######################################################## sub count_stats { my ($Sections,$Groups,$enrollment,$symbs,$starttime,$endtime,$courseid)=@_; if (! defined($courseid)) { @@ -2626,17 +1883,7 @@ sub count_stats { return($min,$max,$ave,$std,$count); } -###################################################### -###################################################### - -=pod - -=item get_student_data -=cut - -###################################################### -###################################################### sub get_student_data { my ($students,$courseid) = @_; $courseid = $env{'request.course.id'} if (! defined($courseid)); @@ -2871,17 +2118,6 @@ sub get_response_time_data { } -################################################ -################################################ - -=pod - -=item &get_student_scores($Sections,$Groups,$Symbs,$enrollment,$courseid) - -=cut - -################################################ -################################################ sub get_student_scores { my ($sections,$groups,$Symbs,$enrollment,$courseid,$starttime,$endtime) = @_; $courseid = $env{'request.course.id'} if (! defined($courseid)); @@ -2959,23 +2195,8 @@ sub get_student_scores { return $dataset; } -################################################ -################################################ -=pod -=item &setup_table_names() - -input: course id - -output: none - -Cleans up the package variables for local caching. - -=cut - -################################################ -################################################ sub setup_table_names { my ($courseid) = @_; if (! defined($courseid)) { @@ -3035,52 +2256,10 @@ sub temp_table_name { return $base_id.'_'.$affix; } -################################################ -################################################ - -=pod - -=back - -=item End of Local Data Caching Subroutines - -=cut - -################################################ -################################################ } # End scope of table identifiers -################################################ -################################################ - -=pod - -=head3 Classlist Subroutines - -=item &get_classlist(); - -Retrieve the classist of a given class or of the current class. Student -information is returned from the classlist.db file and, if needed, -from the students environment. - -Optional arguments are $cdom, and $cnum (course domain, -and course number, respectively). If either is ommitted the course -will be taken from the current environment ($env{'request.course.id'}, -$env{'course.'.$cid.'.domain'}, and $env{'course.'.$cid.'.num'}). - -Returns a reference to a hash which contains: - keys '$sname:$sdom' - values [$sdom,$sname,$end,$start,$id,$section,$fullname,$status,$type,$lockedtype] - -The constant values CL_SDOM, CL_SNAME, CL_END, etc. can be used -as indices into the returned list to future-proof clients against -changes in the list order. - -=cut -################################################ -################################################ sub CL_SDOM { return 0; } sub CL_SNAME { return 1; } @@ -3261,3 +2440,645 @@ sub get_students_groups { 1; __END__ + +=pod + +=head1 NAME + +Apache::loncoursedata + +=head1 SYNOPSIS + +Set of functions that download and process student and course information. + +=head1 PACKAGES USED + + Apache::lonnet + Apache::longroup + Time::HiRes + Apache::lonmysql + LONCAPA + Digest::MD5 + +=head1 DOWNLOAD INFORMATION + +This section contains all the functions that get data from other servers +and/or itself. + + + +=head1 LOCAL DATA CACHING SUBROUTINES + +The local caching is done using MySQL. There is no fall-back implementation +if MySQL is not running. + +The programmers interface is to call &get_current_state() or some other +primary interface subroutine (described below). The internals of this +storage system are documented here. + +There are six tables used to store student performance data (the results of +a dumpcurrent). Each of these tables is created in MySQL with a name of +$courseid_*****, where ***** is 'symb', 'part', or whatever is appropriate +for the table. The tables and their purposes are described below. + +Some notes before we get started. + +Each table must have a PRIMARY KEY, which is a column or set of columns which +will serve to uniquely identify a row of data. NULL is not allowed! + +INDEXes work best on integer data. + +JOIN is used to combine data from many tables into one output. + +lonmysql.pm is used for some of the interface, specifically the table creation +calls. The inserts are done in bulk by directly calling the database handler. +The SELECT ... JOIN statement used to retrieve the data does not have an +interface in lonmysql.pm and I shudder at the thought of writing one. + +=head2 Table Descriptions + +=over 4 + +=head2 Tables used to store meta information + +The following tables hold data required to keep track of the current status +of a students data in the tables or to look up the students data in the tables. + +=over 4 + +=item C<$symb_table> + +The symb_table has two columns. The first is a 'symb_id' and the second +is the text name for the 'symb' (limited to 64k). The 'symb_id' is generated +automatically by MySQL so inserts should be done on this table with an +empty first element. This table has its PRIMARY KEY on the 'symb_id'. + +=item C<$part_table> + +The part_table has two columns. The first is a 'part_id' and the second +is the text name for the 'part' (limited to 100 characters). The 'part_id' is +generated automatically by MySQL so inserts should be done on this table with +an empty first element. This table has its PRIMARY KEY on the 'part' (100 +characters) and a KEY on 'part_id'. + +=item C<$student_table> + +The student_table has 7 columns. The first is a 'student_id' assigned by +MySQL. The second is 'student' which is username:domain. The third through +fifth are 'section', 'status' (enrollment status), and 'classification' +(to be used in the future). The sixth and seventh ('updatetime' and +'fullupdatetime') contain the time of last update and full update of student +data. This table has its PRIMARY KEY on the 'student_id' column and is indexed +on 'student', 'section', and 'status'. + +=item C<$groupnames_table> + +The groupnames_table has 2 columns. The first is a 'group_id' assigned by +MySQL. The second is 'groupname' which is the name of the group in the course. + +=item C<$students_groups_table> + +The students_groups_table has 2 columns. The first is the 'student_id', and the +second is the 'group_id'. These two columns comprise the PRIMARY KEY for this +table, as an individual student may be affiliated with more than one group at +any time. This table is indexed on both student_id and group_id. + +=back + +=head2 Tables used to store current status data + +The following tables store data only about the students current status on +a problem, meaning only the data related to the last attempt on a problem. + +=over 4 + +=item C<$performance_table> + +The performance_table has 9 columns. The first three are 'symb_id', +'student_id', and 'part_id'. These comprise the PRIMARY KEY for this table +and are directly related to the $symb_table, $student_table, and $part_table +described above. MySQL does better indexing on numeric items than text, +so we use these three "index tables". The remaining columns are +'solved', 'tries', 'awarded', 'award', 'awarddetail', and 'timestamp'. +These are either the MySQL type TINYTEXT or various integers ('tries' and +'timestamp'). This table has KEYs of 'student_id' and 'symb_id'. +For use of this table, see the functions described below. + +=item C<$parameters_table> + +The parameters_table holds the data that does not fit neatly into the +performance_table. The parameters table has four columns: 'symb_id', +'student_id', 'parameter', and 'value'. 'symb_id', 'student_id', and +'parameter' comprise the PRIMARY KEY for this table. 'parameter' is +limited to 255 characters. 'value' is limited to 64k characters. + +=back + +=head2 Tables used for storing historic data + +The following tables are used to store almost all of the transactions a student +has made on a homework problem. See loncapa/docs/homework/datastorage for +specific information about each of the parameters stored. + +=over 4 + +=item C<$fulldump_response_table> + +The response table holds data (documented in loncapa/docs/homework/datastorage) +associated with a particular response id which is stored when a student +attempts a problem. The following are the columns of the table, in order: +'symb_id','part_id','response_id','student_id','transaction','tries', +'awarddetail', 'response_specific', 'response_specific_value', +'response_specific_2', 'response_specific_value_2', and 'submission +(the text of the students submission). The primary key is based on the +first five columns listed above. + +=item C<$fulldump_part_table()> + +The part table holds data (documented in loncapa/docs/homework/datastorage) +associated with a particular part id which is stored when a student attempts +a problem. The following are the columns of the table, in order: +'symb_id','part_id','student_id','transaction','tries','award','awarded', +and 'previous'. The primary key is based on the first five columns listed +above. + +=item C<$fulldump_timestamp_table()> + +The timestamp table holds the timestamps of the transactions which are +stored in $fulldump_response_table and $fulldump_part_table. This data is +about both the response and part data. Columns: 'symb_id','student_id', +'transaction', and 'timestamp'. +The primary key is based on the first 3 columns. + +=item C<$weight_table()> + +The weight table holds the weight for the problems used in the class. +Whereas the weight of a problem can vary by section and student the data +here is applied to the class as a whole. +Columns: 'symb_id','part_id','response_id','weight'. + +=back + + +=head1 IMPORTANT SUBROUTINES + +Here is a brief overview of the subroutines which are likely to be of +interest: + +=over 4 + +=item C<&get_current_state()> + +programmers interface. + +=item C<&init_dbs()> + +table creation + +=item C<&update_student_data()> + +data storage calls + +=item C<&get_student_data_from_performance_cache()> + +data retrieval + +=back + +=head1 OTHER SUBROUTINES + +=over 4 + +=item C<&make_into_hash($values)> + +Returns a reference to a hash as described by $values. $values is +assumed to be the result of + join(':',map {&escape($_)} %orighash); + +This is a helper function for get_current_state. + +=item C<&init_dbs()> + +Input: course id + +Output: 0 on success, positive integer on error + +This routine issues the calls to lonmysql to create the tables used to +store student data. + +item C<&delete_caches()> + +This routine drops all the tables associated with a course from the +MySQL database. + +Input: course id (optional, determined by environment if omitted) + +Returns: nothing + +=item C<&get_part_id()> + +Get the MySQL id of a problem part string. + +Input: $part + +Output: undef on error, integer $part_id on success. + +=item C<&get_part()> + +Get the string describing a part from the MySQL id of the problem part. + +Input: $part_id + +Output: undef on error, $part string on success. + +=item C<&get_symb_id()> + +Get the MySQL id of a symb. + +Input: $symb + +Output: undef on error, integer $symb_id on success. + +=item C<&get_symb()> + +Get the symb associated with a MySQL symb_id. + +Input: $symb_id + +Output: undef on error, $symb on success. + +=item C<&get_student_id()> + +Get the MySQL id of a student. + +Input: $sname, $dom + +Output: undef on error, integer $student_id on success. + +=item C<&get_student()> + +Get student username:domain associated with the MySQL student_id. + +Input: $student_id + +Output: undef on error, string $student (username:domain) on success. + +=item C<&clear_internal_caches()> + +Causes the internal caches used in get_student_id, get_student, +get_symb_id, get_symb, get_part_id, and get_part to be undef'd. + +Needs to be called before the first operation with the MySQL database +for a given Apache request. + +=item C<&update_full_student_data($sname,$sdom,$courseid)> + +Does a lonnet::dump on a student to populate the courses tables. + +Input: $sname, $sdom, $courseid + +Output: $returnstatus + +$returnstatus is a string describing any errors that occurred. 'okay' is the +default. + +This subroutine loads a students data using lonnet::dump and inserts +it into the MySQL database. The inserts are done on three tables, +$fulldump_response_table, $fulldump_part_table, and $fulldump_timestamp_table. +The INSERT calls are made directly by this subroutine, not through lonmysql +because we do a 'bulk'insert which takes advantage of MySQLs non-SQL +compliant INSERT command to insert multiple rows at a time. +If anything has gone wrong during this process, $returnstatus is updated with +a description of the error. + +Once the "fulldump" tables are updated, the tables used for chart and +spreadsheet (which hold only the current state of the student on their +homework, not historical data) are updated. If all updates have occurred +successfully, $student_table is updated to reflect the time of the update. + +Notice we do not insert the data and immediately query it. This means it +is possible for there to be data returned this first time that is not +available the second time. CYA. + + +=item C<&update_student_data()> + +Input: $sname, $sdom, $courseid + +Output: $returnstatus, \%student_data + +$returnstatus is a string describing any errors that occurred. 'okay' is the +default. +\%student_data is the data returned by a call to lonnet::currentdump. + +This subroutine loads a students data using lonnet::currentdump and inserts +it into the MySQL database. The inserts are done on two tables, +$performance_table and $parameters_table. $parameters_table holds the data +that is not included in $performance_table. See the description of +$performance_table elsewhere in this file. The INSERT calls are made +directly by this subroutine, not through lonmysql because we do a 'bulk' +insert which takes advantage of MySQLs non-SQL compliant INSERT command to +insert multiple rows at a time. If anything has gone wrong during this +process, $returnstatus is updated with a description of the error and +\%student_data is returned. + +Notice we do not insert the data and immediately query it. This means it +is possible for there to be data returned this first time that is not +available the second time. CYA. + +=item &ensure_tables_are_set_up($courseid) + +Checks to be sure the MySQL tables for the given class are set up. +If $courseid is omitted it will be obtained from the environment. + +Returns nothing on success and 'error' on failure + + +=item C<&ensure_current_data()> + +Input: $sname, $sdom, $courseid + +Output: $status, $data + +This routine ensures the data for a given student is up to date. +The $student_table is queried to determine the time of the last update. +If the students data is out of date, &update_student_data() is called. +The return values from the call to &update_student_data() are returned. + +=item C<&ensure_current_full_data($sname,$sdom,$courseid)> + +Input: $sname, $sdom, $courseid + +Output: $status + +This routine ensures the fulldata (the data from a lonnet::dump, not a +lonnet::currentdump) for a given student is up to date. +The $student_table is queried to determine the time of the last update. +If the students fulldata is out of date, &update_full_student_data() is +called. + +The return value from the call to &update_full_student_data() is returned. + +=item C<&get_student_data_from_performance_cache()> + +Input: $sname, $sdom, $symb, $courseid + +Output: hash reference containing the data for the given student. +If $symb is undef, all the students data is returned. + +This routine is the heart of the local caching system. See the description +of $performance_table, $symb_table, $student_table, and $part_table. The +main task is building the MySQL request. The tables appear in the request +in the order in which they should be parsed by MySQL. When searching +on a student the $student_table is used to locate the 'student_id'. All +rows in $performance_table which have a matching 'student_id' are returned, +with data from $part_table and $symb_table which match the entries in +$performance_table, 'part_id' and 'symb_id'. When searching on a symb, +the $symb_table is processed first, with matching rows grabbed from +$performance_table and filled in from $part_table and $student_table in +that order. + +Running 'EXPLAIN ' on the 'SELECT' statements generated can be quite +interesting, especially if you play with the order the tables are listed. + + +=item C<&get_current_state()> + +Input: $sname,$sdom,$symb,$courseid + +Output: Described below + +Retrieve the current status of a students performance. $sname and +$sdom are the only required parameters. If $symb is undef the results +of an &Apache::lonnet::currentdump() will be returned. +If $courseid is undef it will be retrieved from the environment. + +The return structure is based on &Apache::lonnet::currentdump. If +$symb is unspecified, all the students data is returned in a hash of +the form: +( + symb1 => { param1 => value1, param2 => value2 ... }, + symb2 => { param1 => value1, param2 => value2 ... }, +) + +If $symb is specified, a hash of +( + param1 => value1, + param2 => value2, +) +is returned. + +If no data is found for $symb, or if the student has no performance data, +an empty list is returned. + +=item C<&get_problem_statistics()> + +Gather data on a given problem. The database is assumed to be +populated and all local caching variables are assumed to be set +properly. This means you need to call &ensure_current_data for +the students you are concerned with prior to calling this routine. + +Inputs: $Sections, Groups, $status, $symb, $part, $courseid, $starttime, + $endtime + +=over 4 + +=item $Sections Array ref containing section names for students. +'all' is allowed to be the first (and only) item in the array. + +=item $Groups Array ref containing group names for students. +'all' is allowed to be the first (and only) item in the array. + +=item $status String describing the status of students + +=item $symb is the symb for the problem. + +=item $part is the part id you need statistics for + +=item $courseid is the course id, of course! + +=item $starttime and $endtime are unix times which to use to limit +the statistical data. + +=back + +Outputs: See the code for up to date information. A hash reference is +returned. The hash has the following keys defined: + +=over 4 + +=item * num_students + +The number of students attempting the problem + +=item tries + +The total number of tries for the students + +=item max_tries + +The maximum number of tries taken + +=item mean_tries + +The average number of tries + +=item num_solved T + +he number of students able to solve the problem + +=item num_override + +The number of students whose answer is 'correct_by_override' + +=item deg_of_diff + +The degree of difficulty of the problem + +=item std_tries + +The standard deviation of the number of tries + +=item skew_tries + +The skew of the number of tries + +=item per_wrong + +The number of students attempting the problem who were not +able to answer it correctly. + +=back + +=item C<&populate_weight_table()> + +=item C<&limit_by_start_end_times()> + +Build SQL WHERE condition which limits the data collected by the start +and end times provided + +Inputs: $starttime, $endtime, $table + +Returns: $time_limits + + +=item C<&limit_by_section_and_status()C< + +Build SQL WHERE condition which limits the data collected by section and +student status. + +Inputs: $Sections (array ref) + $enrollment (string: 'any', 'expired', 'active') + $tablename The name of the table that holds the student data + +Returns: $student_requirements,$enrollment_requirements + +=item C<&limit_by_group()> + +Build SQL LEFT JOIN statement to include students_groups and groupnames tables and SQL WHERE condition which limits the data collected by group. + +Inputs: $Groups (array ref) + $stutable The name of the table which holds the student data. + $grptable The name of the table which maps group_id to groupname. + $stugrptab The name of the table which holds student group affiliations. +Returns: $groups_join,$group_limits + $groups_join JOIN part of SQL statement (to include group related tables) + $group_limits SQL WHERE condition limiting to requested groups + +=item C + +Inputs: + $resources: array ref of hash ref. Each hash ref needs key 'symb'. + $Sections: array ref of sections to include, + $Groups: array ref of groups to include. + $enrollment: string, + $courseid (may be omitted) + $starttime (may be omitted) + $endtime (may be omitted) + $has_award_for (may be omitted) + +Returns; An array of arrays. The sub arrays contain a student name and +their score on the resources. $starttime and $endtime constrain the +list to awards obtained during the given time limits. $has_score_on +constrains the list to those students who at least attempted the +resource identified by the given symb, which is used to filter out +such students for statistics that would be adversely affected by such +students. + +=item C<&get_sum_of_scores> + +Inputs: $resource (hash ref, needs {'symb'} key), +$part, (the part id), +$students (array ref, contents of array are scalars holding 'sname:sdom'), +$courseid + +Returns: the sum of the score on the problem part over the students and the + maximum possible value for the sum (taken from the weight table). + + +=item C<&score_stats()> + +Inputs: $Sections, $enrollment, $symbs, $starttime, + $endtime, $courseid + +$Sections, $enrollment, $starttime, $endtime, and $courseid are the same as +elsewhere in this module. +$symbs is an array ref of symbs + +Returns: minimum, maximum, mean, s.d., number of students, and maximum + possible of student scores on the given resources + +=item C<&count_stats()> + +Inputs: $Sections, $Groups, $enrollment, $symbs, $starttime, + $endtime, $courseid + +$Sections, $Groups $enrollment, $starttime, $endtime, and $courseid are the +same as elsewhere in this module. +$symbs is an array ref of symbs + +Returns: minimum, maximum, mean, s.d., and number of students + of the number of items correct on the given resources + +=item C + +=item C<&get_student_scores($Sections,$Groups,$Symbs,$enrollment,$courseid)> + +=item C<&setup_table_names()> + +input: course id + +output: none + +=back + +=head3 End of Local Data Caching Subroutines + +=head3 Classlist Subroutines + +=over + +=item &get_classlist(); + +Retrieve the classist of a given class or of the current class. Student +information is returned from the classlist.db file and, if needed, +from the students environment. + +Optional arguments are $cdom, and $cnum (course domain, +and course number, respectively). If either is ommitted the course +will be taken from the current environment ($env{'request.course.id'}, +$env{'course.'.$cid.'.domain'}, and $env{'course.'.$cid.'.num'}). + +Returns a reference to a hash which contains: + keys '$sname:$sdom' + values [$sdom,$sname,$end,$start,$id,$section,$fullname,$status,$type,$lockedtype] + +The constant values CL_SDOM, CL_SNAME, CL_END, etc. can be used +as indices into the returned list to future-proof clients against +changes in the list order. + +=back + +=cut + +