Annotation of loncom/homework/cleanxml/xml_to_loncapa.pm, revision 1.4

1.1       damieng     1: # The LearningOnline Network
                      2: # convert_file takes a well-formed XML file content and converts it to LON-CAPA syntax.
                      3: #
1.4     ! damieng     4: # $Id: xml_to_loncapa.pm,v 1.3 2015/12/23 20:33:10 damieng Exp $
1.1       damieng     5: #
                      6: # Copyright Michigan State University Board of Trustees
                      7: #
                      8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
                      9: #
                     10: # LON-CAPA is free software; you can redistribute it and/or modify
                     11: # it under the terms of the GNU General Public License as published by
                     12: # the Free Software Foundation; either version 2 of the License, or
                     13: # (at your option) any later version.
                     14: #
                     15: # LON-CAPA is distributed in the hope that it will be useful,
                     16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
                     17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     18: # GNU General Public License for more details.
                     19: #
                     20: # You should have received a copy of the GNU General Public License
                     21: # along with LON-CAPA; if not, write to the Free Software
                     22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
                     23: #
                     24: # /home/httpd/html/adm/gpl.txt
                     25: #
                     26: # http://www.lon-capa.org/
                     27: #
                     28: ###
                     29: 
                     30: #!/usr/bin/perl
                     31: 
                     32: package Apache::xml_to_loncapa;
                     33: 
                     34: use strict;
                     35: use utf8;
                     36: use warnings;
                     37: 
                     38: use XML::LibXML;
                     39: 
                     40: 
                     41: my @loncapa_block = ('parameter','location','answer','foil','image','polygon','rectangle','text','conceptgroup','itemgroup','item','label','data','function','array','unit','answergroup','functionplotresponse','functionplotruleset','functionplotelements','functionplotcustomrule','essayresponse','hintpart','formulahint','numericalhint','reactionhint','organichint','optionhint','radiobuttonhint','stringhint','customhint','mathhint','formulahintcondition','numericalhintcondition','reactionhintcondition','organichintcondition','optionhintcondition','radiobuttonhintcondition','stringhintcondition','customhintcondition','mathhintcondition','imageresponse','foilgroup','datasubmission','textfield','hiddensubmission','radiobuttonresponse','rankresponse','matchresponse','import','style','script','window','block','library','notsolved','part','postanswerdate','preduedate','problem','problemtype','randomlabel','bgimg','labelgroup','randomlist','solved','while','tex','print','web','gnuplot','curve','Task','IntroParagraph','ClosingParagraph','Question','QuestionText','Setup','Instance','InstanceText','Criteria','CriteriaText','GraderNote','languageblock','translated','lang','instructorcomment','dataresponse','togglebox','standalone','comment','drawimage','allow','displayduedate','displaytitle','responseparam','organicstructure','scriptlib','parserlib','drawoptionlist','spline','backgroundplot','plotobject','plotvector','drawvectorsum','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','axis','key','xtics','ytics','title','xlabel','ylabel','hiddenline','dtm','stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse', 'hint', 'hintgroup');
                     42: 
                     43: my @loncapa_inline = ('display','m','lm','chem','num','parse','algebra','displayweight','displaystudentphoto'); # not textline
                     44: 
                     45: # HTML elements that trigger the addition of startouttext/endouttext
                     46: my @html_trigger = ('header','footer','aside','h1','h2','h3','h4','h5','h6','li','dd','dt','tbody','tr','caption','thead','tfoot','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','br','hr','address','blockquote','img','figure','figcaption','object','param','embed','applet','video','source','audio','map','area','canvas','form','input','select','optgroup','option','textarea','fieldset','legend','button','iframe','section','div','p','ul','ol','dl','table');
                     47: 
                     48: my @simple_data = ('polygon', 'rectangle', 'vector', 'value', 'answer', 'title', 'data', 'function', 'xlabel', 'ylabel', 'tic', 'parserlib', 'scriptlib', 'import', 'tex', 'text', 'image', 'display', 'm', 'lm', 'num', 'algebra', 'chem', 'parse', 'title', 'style', 'script', 'ins', 'del', 'label', 'option', 'textarea', 'legend' );
                     49: 
                     50: my @inline_responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse');
                     51: 
1.4     ! damieng    52: # see http://www.w3.org/TR/html-polyglot/#empty-elements
        !            53: # and http://tiffanybbrown.com/2011/03/23/html5-does-not-allow-self-closing-tags/
        !            54: # HTML elements that do not have an empty content, and must never use a self-closing tag:
        !            55: my @non_empty_html = ('title','style','script','noscript','body','section','header','footer','article','aside','nav','h1','h2','h3','h4','h5','h6','div','p','li','dt','dd','caption','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','address','blockquote','bdo','ruby','rb','rp','rt','rtc','figure','figcaption','object','applet','video','audio','canvas','label','option','textarea','fieldset','legend','button','iframe');
        !            56: 
1.1       damieng    57: 
                     58: # Converts a file and return the modified contents
                     59: sub convert_file {
                     60:   my ($contents) = @_;
                     61: 
                     62:   my $dom_doc = XML::LibXML->load_xml(string => $contents);
                     63:   add_outtext($dom_doc);
                     64:   return node_to_string($dom_doc);
                     65: }
                     66: 
                     67: 
                     68: sub node_to_string {
                     69:   my ($node) = @_;
                     70:   
                     71:   if ($node->nodeType == XML_DOCUMENT_NODE) {
                     72:     my $root = $node->documentElement();
                     73:     return node_to_string($root);
                     74:   } elsif ($node->nodeType == XML_TEXT_NODE || $node->nodeType == XML_CDATA_SECTION_NODE) {
                     75:     my $parent = $node->parentNode;
                     76:     my $parent_name = $parent->nodeName;
                     77:     my $grandparent_name;
                     78:     if (defined $parent->parentNode) {
                     79:       $grandparent_name = $parent->parentNode->nodeName;
                     80:     }
                     81:     my @no_escape = ('m', 'script', 'display', 'parse', 'answer');
                     82:     if (string_in_array(\@no_escape, $parent_name) &&
                     83:         ($parent_name ne 'answer' ||
                     84:         (defined $grandparent_name &&
                     85:         $grandparent_name ne 'numericalresponse' &&
                     86:         $grandparent_name ne 'formularesponse'))) {
                     87:       return $node->nodeValue;
                     88:     } else {
                     89:       return $node->toString();
                     90:     }
                     91:   } elsif ($node->nodeType == XML_ELEMENT_NODE) {
                     92:     my $s = '';
                     93:     my $tag = $node->nodeName;
                     94:     $s .= "<$tag";
                     95:     my @attributes = $node->attributes();
                     96:     foreach my $attribute (@attributes) {
                     97:       $s .= ' ';
                     98:       $s .= $attribute->nodeName;
                     99:       $s .= '="';
1.2       damieng   100:       $s .= escape_attribute($attribute->nodeValue);
1.1       damieng   101:       $s .= '"';
                    102:     }
1.4     ! damieng   103:     if ($node->hasChildNodes() || string_in_array(\@non_empty_html, $tag)) {
1.1       damieng   104:       $s .= '>';
                    105:       foreach my $child ($node->childNodes) {
                    106:         $s .= node_to_string($child);
                    107:       }
                    108:       $s .= "</$tag>";
                    109:     } else {
                    110:       $s .= '/>';
                    111:     }
                    112:     return $s;
                    113:   } else {
                    114:     return $node->toString();
                    115:   }
                    116: }
                    117: 
1.2       damieng   118: # Escapes an attribute value
                    119: sub escape_attribute {
1.1       damieng   120:   my ($s) = @_;
1.2       damieng   121:   # normal XML escapes do not work with LON-CAPA, for instance with reactionresponse
                    122:   #$s =~ s/&/&amp;/sg;
                    123:   #$s =~ s/</&lt;/sg;
                    124:   #$s =~ s/>/&gt;/sg;
                    125:   $s =~ s/"/&quot;/sg;
1.1       damieng   126:   return $s;
                    127: }
                    128: 
                    129: # Adds startouttext and endouttext where useful for the colorful editor
                    130: sub add_outtext {
                    131:   my ($node) = @_;
                    132:   
                    133:   if ($node->nodeType == XML_DOCUMENT_NODE) {
                    134:     my $root = $node->documentElement();
                    135:     add_outtext($root);
                    136:     return;
                    137:   }
                    138:   if ($node->nodeType != XML_ELEMENT_NODE) {
                    139:     return;
                    140:   }
                    141:   if (string_in_array(\@simple_data, $node->nodeName)) {
                    142:     return;
                    143:   }
                    144:   convert_paragraphs($node);
                    145:   my $next;
                    146:   my $in_outtext = 0;
                    147:   for (my $child=$node->firstChild; defined $child; $child=$next) {
                    148:     $next = $child->nextSibling;
                    149:     if (!$in_outtext && inside_outtext($child)) {
                    150:       add_startouttext($node, $child);
                    151:       $in_outtext = 1;
                    152:     } elsif ($in_outtext && !continue_outtext($child)) {
                    153:       add_endouttext($node, $child);
                    154:       $in_outtext = 0;
                    155:     }
                    156:     if (!$in_outtext) {
                    157:       add_outtext($child);
                    158:     }
                    159:   }
                    160:   if ($in_outtext) {
                    161:     add_endouttext($node);
                    162:   }
                    163: }
                    164: 
                    165: # Returns 1 if this node should trigger the addition of startouttext before it
                    166: sub inside_outtext {
                    167:   my ($node) = @_;
                    168:   if ($node->nodeType == XML_TEXT_NODE && $node->nodeValue !~ /^\s*$/) {
                    169:     return 1;
                    170:   }
                    171:   if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@html_trigger, $node->nodeName)) {
                    172:     if (contains_loncapa_block($node)) {
                    173:       return 0;
                    174:     } else {
                    175:       return 1;
                    176:     }
                    177:   }
                    178:   if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@loncapa_inline, $node->nodeName)) {
                    179:     return 1;
                    180:   }
                    181:   return 0;
                    182: }
                    183: 
                    184: # Returns 1 if the outtext environment can continue with this node
                    185: sub continue_outtext {
                    186:   my ($node) = @_;
                    187:   if (inside_outtext($node)) {
                    188:     return 1;
                    189:   }
                    190:   if ($node->nodeType == XML_TEXT_NODE) {
                    191:     return 1; # continue even if this is just spaces
                    192:   }
                    193:   return 0;
                    194: }
                    195: 
                    196: # Returns 1 if the node contains a LON-CAPA block in a descendant.
                    197: sub contains_loncapa_block {
                    198:   my ($node) = @_;
                    199:   foreach my $child ($node->childNodes) {
                    200:     if ($child->nodeType == XML_ELEMENT_NODE) {
                    201:       if (string_in_array(\@loncapa_block, $child->nodeName)) {
                    202:         return 1;
                    203:       }
                    204:       if (contains_loncapa_block($child)) {
                    205:         return 1;
                    206:       }
                    207:     }
                    208:   }
                    209:   return 0;
                    210: }
                    211: 
                    212: sub add_startouttext {
                    213:   my ($parent, $before_node) = @_;
                    214:   my $doc = $parent->ownerDocument;
                    215:   if ($before_node->nodeType == XML_TEXT_NODE) {
                    216:     # split space at the beginning of the node
                    217:     if ($before_node->nodeValue =~ /^(\s+)(.*?)$/s) {
                    218:       my $space_node = $doc->createTextNode($1);
                    219:       $before_node->setData($2);
                    220:       $parent->insertBefore($space_node, $before_node);
                    221:     }
                    222:   }
                    223:   my $startouttext = $doc->createElement('startouttext');
                    224:   $parent->insertBefore($startouttext, $before_node);
                    225: }
                    226: 
                    227: sub add_endouttext {
                    228:   my ($parent, $before_node) = @_;
                    229:   my $doc = $parent->ownerDocument;
                    230:   my $endouttext = $doc->createElement('endouttext');
                    231:   my $before_before;
                    232:   if (defined $before_node) {
                    233:     $before_before = $before_node->previousSibling;
                    234:   } else {
                    235:     $before_before = $parent->lastChild;
                    236:   }
                    237:   if (defined $before_before && $before_before->nodeType == XML_TEXT_NODE) {
                    238:     # split space at the end of the node
                    239:     if ($before_before->nodeValue =~ /^(.*?)(\s+)$/s) {
                    240:       $before_before->setData($1);
                    241:       my $space_node = $doc->createTextNode($2);
                    242:       if (defined $before_node) {
                    243:         $parent->insertBefore($space_node, $before_node);
                    244:       } else {
                    245:         $parent->appendChild($space_node);
                    246:       }
                    247:       $before_node = $space_node;
                    248:     }
                    249:   }
                    250:   if (defined $before_node) {
                    251:     $parent->insertBefore($endouttext, $before_node);
                    252:   } else {
                    253:     $parent->appendChild($endouttext);
                    254:   }
                    255: }
                    256: 
                    257: # Convert paragraph children when one contains an inline response into content + <br>
                    258: # (the colorful editor does not support paragraphs containing inline responses)
                    259: sub convert_paragraphs {
                    260:   my ($parent) = @_;
                    261:   my $p_child_with_inline_response = 0;
                    262:   foreach my $child ($parent->childNodes) {
                    263:     if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') {
                    264:       foreach my $child2 ($child->childNodes) {
                    265:         if ($child2->nodeType == XML_ELEMENT_NODE) {
                    266:           if (string_in_array(\@inline_responses, $child2->nodeName)) {
                    267:             $p_child_with_inline_response = 1;
                    268:             last;
                    269:           }
                    270:         }
                    271:       }
                    272:     }
                    273:     if ($p_child_with_inline_response) {
                    274:       last;
                    275:     }
                    276:   }
                    277:   if ($p_child_with_inline_response) {
                    278:     my $doc = $parent->ownerDocument;
                    279:     my $next;
                    280:     for (my $child=$parent->firstChild; defined $child; $child=$next) {
                    281:       $next = $child->nextSibling;
                    282:       if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') {
                    283:         replace_by_children($child);
                    284:         if (defined $next && (defined $next->nextSibling || $next->nodeType != XML_TEXT_NODE ||
                    285:             $next->nodeValue !~ /^\s*$/)) {
                    286:           # we only add a br if there is something after
                    287:           my $br = $doc->createElement('br');
                    288:           $parent->insertBefore($br, $next);
                    289:         }
                    290:       }
                    291:     }
                    292:   }
                    293: }
                    294: 
                    295: ##
                    296: # Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array)
                    297: # @param {Array<string>} array - reference to the array of strings
                    298: # @param {string} value - the string to look for
                    299: # @returns 1 if found, 0 otherwise
                    300: ##
                    301: sub string_in_array {
                    302:   my ($array, $value) = @_;
                    303:   foreach my $v (@{$array}) {
                    304:     if ($v eq $value) {
                    305:       return 1;
                    306:     }
                    307:   }
                    308:   return 0;
                    309: }
                    310: 
                    311: ##
                    312: # replaces a node by its children
                    313: # @param {Node} node - the DOM node
                    314: ##
                    315: sub replace_by_children {
                    316:   my ($node) = @_;
                    317:   my $parent = $node->parentNode;
                    318:   my $next;
                    319:   my $previous;
                    320:   for (my $child=$node->firstChild; defined $child; $child=$next) {
                    321:     $next = $child->nextSibling;
                    322:     if ((!defined $previous || !defined $next) &&
                    323:         $child->nodeType == XML_TEXT_NODE && $child->nodeValue =~ /^\s*$/) {
                    324:       next; # do not keep first and last whitespace nodes
                    325:     } else {
                    326:       if (!defined $previous && $child->nodeType == XML_TEXT_NODE) {
                    327:         # remove whitespace at the beginning
                    328:         my $value = $child->nodeValue;
                    329:         $value =~ s/^\s+//;
                    330:         $child->setData($value);
                    331:       }
                    332:       if (!defined $next && $child->nodeType == XML_TEXT_NODE) {
                    333:         # and at the end
                    334:         my $value = $child->nodeValue;
                    335:         $value =~ s/\s+$//;
                    336:         $child->setData($value);
                    337:       }
                    338:     }
                    339:     $node->removeChild($child);
                    340:     $parent->insertBefore($child, $node);
                    341:     $previous = $child;
                    342:   }
                    343:   $parent->removeChild($node);
                    344: }
                    345: 
                    346: 1;
                    347: __END__

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>