# The LearningOnline Network # convert_file takes a well-formed XML file content and converts it to LON-CAPA syntax. # # $Id: xml_to_loncapa.pm,v 1.9 2016/01/21 22:11:13 damieng Exp $ # # Copyright Michigan State University Board of Trustees # # This file is part of the LearningOnline Network with CAPA (LON-CAPA). # # LON-CAPA is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # LON-CAPA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LON-CAPA; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # /home/httpd/html/adm/gpl.txt # # http://www.lon-capa.org/ # ### #!/usr/bin/perl package Apache::xml_to_loncapa; use strict; use utf8; use warnings; use XML::LibXML; # LON-CAPA block elements that cannot be found within startouttext/endouttext my @loncapa_block = ('parameter','location','answer','foil','image','polygon','rectangle','text','conceptgroup','itemgroup','item','label','data','function','array','unit','answergroup','functionplotresponse','functionplotruleset','functionplotelements','functionplotcustomrule','essayresponse','hintpart','formulahint','numericalhint','reactionhint','organichint','optionhint','radiobuttonhint','stringhint','customhint','mathhint','formulahintcondition','numericalhintcondition','reactionhintcondition','organichintcondition','optionhintcondition','radiobuttonhintcondition','stringhintcondition','customhintcondition','mathhintcondition','imageresponse','foilgroup','datasubmission','textfield','hiddensubmission','radiobuttonresponse','rankresponse','matchresponse','import','style','script','window','block','library','notsolved','part','postanswerdate','preduedate','problem','problemtype','randomlabel','bgimg','labelgroup','randomlist','solved','while','tex','print','web','gnuplot','curve','Task','IntroParagraph','ClosingParagraph','Question','QuestionText','Setup','Instance','InstanceText','Criteria','CriteriaText','GraderNote','languageblock','instructorcomment','dataresponse','togglebox','standalone','comment','drawimage','allow','displayduedate','displaytitle','responseparam','organicstructure','scriptlib','parserlib','drawoptionlist','spline','backgroundplot','plotobject','plotvector','drawvectorsum','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','axis','key','xtics','ytics','title','xlabel','ylabel','hiddenline','dtm','stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse', 'hint', 'hintgroup'); # LON-CAPA elements that can be found within startouttext/endouttext: my @loncapa_in_text = ('display','m','lm','chem','num','parse','algebra','displayweight','displaystudentphoto','translated','lang'); # not textline # HTML elements that trigger the addition of startouttext/endouttext my @html_trigger = ('header','footer','aside','h1','h2','h3','h4','h5','h6','li','dd','dt','tbody','tr','caption','thead','tfoot','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','br','hr','address','blockquote','figure','figcaption','object','param','embed','applet','video','source','audio','map','area','canvas','form','input','select','optgroup','option','textarea','fieldset','legend','button','iframe','section','div','p','ul','ol','dl','table'); my @simple_data = ('polygon', 'rectangle', 'vector', 'value', 'answer', 'title', 'data', 'function', 'xlabel', 'ylabel', 'tic', 'parserlib', 'scriptlib', 'import', 'tex', 'text', 'image', 'display', 'm', 'lm', 'num', 'algebra', 'chem', 'parse', 'title', 'style', 'script', 'ins', 'del', 'label', 'option', 'textarea', 'legend','comment'); my @inline_responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse'); # see http://www.w3.org/TR/html-polyglot/#empty-elements # and http://tiffanybbrown.com/2011/03/23/html5-does-not-allow-self-closing-tags/ # HTML elements that do not have an empty content, and must never use a self-closing tag: my @non_empty_html = ('title','style','script','noscript','body','section','header','footer','article','aside','nav','h1','h2','h3','h4','h5','h6','div','p','li','dt','dd','caption','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','address','blockquote','bdo','ruby','rb','rp','rt','rtc','figure','figcaption','object','applet','video','audio','canvas','label','option','textarea','fieldset','legend','button','iframe'); # Converts a file and return the modified contents sub convert_file { my ($contents) = @_; my $dom_doc = XML::LibXML->load_xml(string => $contents); my $root = $dom_doc->documentElement(); if (defined $root && $root->nodeName ne 'html') { add_outtext($dom_doc); } return node_to_string($dom_doc); } sub node_to_string { my ($node) = @_; if ($node->nodeType == XML_DOCUMENT_NODE) { my $root = $node->documentElement(); return node_to_string($root); } elsif ($node->nodeType == XML_TEXT_NODE || $node->nodeType == XML_CDATA_SECTION_NODE) { my $parent = $node->parentNode; my $parent_name = $parent->nodeName; my $grandparent_name; if (defined $parent->parentNode) { $grandparent_name = $parent->parentNode->nodeName; } my @no_escape = ('m', 'script', 'style', 'display', 'parse', 'answer'); if (string_in_array(\@no_escape, $parent_name) && ($parent_name ne 'answer' || (defined $grandparent_name && $grandparent_name ne 'numericalresponse' && $grandparent_name ne 'formularesponse'))) { return $node->nodeValue; } else { return $node->toString(); } } elsif ($node->nodeType == XML_ELEMENT_NODE) { my $s = ''; my $tag = $node->nodeName; $s .= "<$tag"; my @attributes = $node->attributes(); foreach my $attribute (@attributes) { $s .= ' '; $s .= $attribute->nodeName; $s .= '="'; $s .= escape_attribute($attribute->nodeValue); $s .= '"'; } if ($node->hasChildNodes() || string_in_array(\@non_empty_html, $tag)) { $s .= '>'; foreach my $child ($node->childNodes) { $s .= node_to_string($child); } $s .= ""; } else { $s .= '/>'; } return $s; } else { return $node->toString(); } } # Escapes an attribute value sub escape_attribute { my ($s) = @_; # normal XML escapes do not work with LON-CAPA, for instance with reactionresponse #$s =~ s/&/&/sg; #$s =~ s//>/sg; $s =~ s/"/"/sg; return $s; } # Adds startouttext and endouttext where useful for the colorful editor sub add_outtext { my ($node) = @_; if ($node->nodeType == XML_DOCUMENT_NODE) { my $root = $node->documentElement(); add_outtext($root); return; } if ($node->nodeType != XML_ELEMENT_NODE) { return; } if (string_in_array(\@simple_data, $node->nodeName)) { return; } convert_paragraphs($node); if ($node->nodeName eq 'hintgroup' && !defined $node->firstChild) { # empty hintgroup: colorful editor needs start/end outtext add_endouttext($node, undef); add_startouttext($node, $node->firstChild); } my $next; my $in_outtext = 0; for (my $child=$node->firstChild; defined $child; $child=$next) { $next = $child->nextSibling; if (!$in_outtext && inside_outtext($child)) { add_startouttext($node, $child); $in_outtext = 1; } elsif ($in_outtext && !continue_outtext($child)) { add_endouttext($node, $child); $in_outtext = 0; } if (!$in_outtext) { add_outtext($child); } } if ($in_outtext) { add_endouttext($node); } } # Returns 1 if this node should trigger the addition of startouttext before it sub inside_outtext { my ($node) = @_; if ($node->nodeType == XML_TEXT_NODE && $node->nodeValue !~ /^\s*$/) { return 1; } if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@html_trigger, $node->nodeName)) { if (contains_loncapa_block($node)) { return 0; } else { return 1; } } if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@loncapa_in_text, $node->nodeName)) { return 1; } return 0; } # Returns 1 if the outtext environment can continue with this node sub continue_outtext { my ($node) = @_; if (inside_outtext($node)) { return 1; } if ($node->nodeType == XML_TEXT_NODE) { return 1; # continue even if this is just spaces } return 0; } # Returns 1 if the node contains a LON-CAPA block in a descendant. sub contains_loncapa_block { my ($node) = @_; foreach my $child ($node->childNodes) { if ($child->nodeType == XML_ELEMENT_NODE) { if (string_in_array(\@loncapa_block, $child->nodeName)) { return 1; } if (contains_loncapa_block($child)) { return 1; } } } return 0; } sub add_startouttext { my ($parent, $before_node) = @_; my $doc = $parent->ownerDocument; if ($before_node->nodeType == XML_TEXT_NODE) { # split space at the beginning of the node if ($before_node->nodeValue =~ /^(\s+)(.*?)$/s) { my $space_node = $doc->createTextNode($1); $before_node->setData($2); $parent->insertBefore($space_node, $before_node); } } my $startouttext = $doc->createElement('startouttext'); $parent->insertBefore($startouttext, $before_node); } sub add_endouttext { my ($parent, $before_node) = @_; my $doc = $parent->ownerDocument; my $endouttext = $doc->createElement('endouttext'); my $before_before; if (defined $before_node) { $before_before = $before_node->previousSibling; } else { $before_before = $parent->lastChild; } if (defined $before_before && $before_before->nodeType == XML_TEXT_NODE) { # split space at the end of the node if ($before_before->nodeValue =~ /^(.*?)(\s+)$/s) { $before_before->setData($1); my $space_node = $doc->createTextNode($2); if (defined $before_node) { $parent->insertBefore($space_node, $before_node); } else { $parent->appendChild($space_node); } $before_node = $space_node; } } if (defined $before_node) { $parent->insertBefore($endouttext, $before_node); } else { $parent->appendChild($endouttext); } # replace spaces afterwards by a \n + indentation my $next = $endouttext->nextSibling; if (defined $next && $next->nodeType == XML_TEXT_NODE) { my $v = $next->nodeValue; if ($v =~ /^ /) { $v =~ s/^ +//; if ($parent->firstChild->nodeType == XML_TEXT_NODE && $parent->firstChild->nodeValue =~ /^\n +$/) { $v = $parent->firstChild->nodeValue.$v; } else { $v = "\n".$v; } $next->setData($v); } } } # Convert paragraph children when one contains an inline response into content +
# (the colorful editor does not support paragraphs containing inline responses) sub convert_paragraphs { my ($parent) = @_; my $p_child_with_inline_response = 0; foreach my $child ($parent->childNodes) { if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') { foreach my $child2 ($child->childNodes) { if ($child2->nodeType == XML_ELEMENT_NODE) { if (string_in_array(\@inline_responses, $child2->nodeName)) { $p_child_with_inline_response = 1; last; } } } } if ($p_child_with_inline_response) { last; } } if ($p_child_with_inline_response) { my $doc = $parent->ownerDocument; my $next; for (my $child=$parent->firstChild; defined $child; $child=$next) { $next = $child->nextSibling; if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') { replace_by_children($child); if (defined $next && (defined $next->nextSibling || $next->nodeType != XML_TEXT_NODE || $next->nodeValue !~ /^\s*$/)) { # we only add a br if there is something after my $br = $doc->createElement('br'); $parent->insertBefore($br, $next); } } } } } ## # Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array) # @param {Array} array - reference to the array of strings # @param {string} value - the string to look for # @returns 1 if found, 0 otherwise ## sub string_in_array { my ($array, $value) = @_; foreach my $v (@{$array}) { if ($v eq $value) { return 1; } } return 0; } ## # replaces a node by its children # @param {Node} node - the DOM node ## sub replace_by_children { my ($node) = @_; my $parent = $node->parentNode; my $next; my $previous; for (my $child=$node->firstChild; defined $child; $child=$next) { $next = $child->nextSibling; if ((!defined $previous || !defined $next) && $child->nodeType == XML_TEXT_NODE && $child->nodeValue =~ /^\s*$/) { next; # do not keep first and last whitespace nodes } else { if (!defined $previous && $child->nodeType == XML_TEXT_NODE) { # remove whitespace at the beginning my $value = $child->nodeValue; $value =~ s/^\s+//; $child->setData($value); } if (!defined $next && $child->nodeType == XML_TEXT_NODE) { # and at the end my $value = $child->nodeValue; $value =~ s/\s+$//; $child->setData($value); } } $node->removeChild($child); $parent->insertBefore($child, $node); $previous = $child; } $parent->removeChild($node); } 1; __END__