Annotation of loncom/homework/cleanxml/xml_to_loncapa.pm, revision 1.3

1.1       damieng     1: # The LearningOnline Network
                      2: # convert_file takes a well-formed XML file content and converts it to LON-CAPA syntax.
                      3: #
1.3     ! damieng     4: # $Id: xml_to_loncapa.pm,v 1.2 2015/12/18 22:08:51 damieng Exp $
1.1       damieng     5: #
                      6: # Copyright Michigan State University Board of Trustees
                      7: #
                      8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
                      9: #
                     10: # LON-CAPA is free software; you can redistribute it and/or modify
                     11: # it under the terms of the GNU General Public License as published by
                     12: # the Free Software Foundation; either version 2 of the License, or
                     13: # (at your option) any later version.
                     14: #
                     15: # LON-CAPA is distributed in the hope that it will be useful,
                     16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
                     17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     18: # GNU General Public License for more details.
                     19: #
                     20: # You should have received a copy of the GNU General Public License
                     21: # along with LON-CAPA; if not, write to the Free Software
                     22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
                     23: #
                     24: # /home/httpd/html/adm/gpl.txt
                     25: #
                     26: # http://www.lon-capa.org/
                     27: #
                     28: ###
                     29: 
                     30: #!/usr/bin/perl
                     31: 
                     32: package Apache::xml_to_loncapa;
                     33: 
                     34: use strict;
                     35: use utf8;
                     36: use warnings;
                     37: 
                     38: use XML::LibXML;
                     39: 
                     40: 
                     41: my @loncapa_block = ('parameter','location','answer','foil','image','polygon','rectangle','text','conceptgroup','itemgroup','item','label','data','function','array','unit','answergroup','functionplotresponse','functionplotruleset','functionplotelements','functionplotcustomrule','essayresponse','hintpart','formulahint','numericalhint','reactionhint','organichint','optionhint','radiobuttonhint','stringhint','customhint','mathhint','formulahintcondition','numericalhintcondition','reactionhintcondition','organichintcondition','optionhintcondition','radiobuttonhintcondition','stringhintcondition','customhintcondition','mathhintcondition','imageresponse','foilgroup','datasubmission','textfield','hiddensubmission','radiobuttonresponse','rankresponse','matchresponse','import','style','script','window','block','library','notsolved','part','postanswerdate','preduedate','problem','problemtype','randomlabel','bgimg','labelgroup','randomlist','solved','while','tex','print','web','gnuplot','curve','Task','IntroParagraph','ClosingParagraph','Question','QuestionText','Setup','Instance','InstanceText','Criteria','CriteriaText','GraderNote','languageblock','translated','lang','instructorcomment','dataresponse','togglebox','standalone','comment','drawimage','allow','displayduedate','displaytitle','responseparam','organicstructure','scriptlib','parserlib','drawoptionlist','spline','backgroundplot','plotobject','plotvector','drawvectorsum','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','axis','key','xtics','ytics','title','xlabel','ylabel','hiddenline','dtm','stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse', 'hint', 'hintgroup');
                     42: 
                     43: my @loncapa_inline = ('display','m','lm','chem','num','parse','algebra','displayweight','displaystudentphoto'); # not textline
                     44: 
                     45: # HTML elements that trigger the addition of startouttext/endouttext
                     46: my @html_trigger = ('header','footer','aside','h1','h2','h3','h4','h5','h6','li','dd','dt','tbody','tr','caption','thead','tfoot','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','br','hr','address','blockquote','img','figure','figcaption','object','param','embed','applet','video','source','audio','map','area','canvas','form','input','select','optgroup','option','textarea','fieldset','legend','button','iframe','section','div','p','ul','ol','dl','table');
                     47: 
                     48: my @simple_data = ('polygon', 'rectangle', 'vector', 'value', 'answer', 'title', 'data', 'function', 'xlabel', 'ylabel', 'tic', 'parserlib', 'scriptlib', 'import', 'tex', 'text', 'image', 'display', 'm', 'lm', 'num', 'algebra', 'chem', 'parse', 'title', 'style', 'script', 'ins', 'del', 'label', 'option', 'textarea', 'legend' );
                     49: 
                     50: my @inline_responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse');
                     51: 
                     52: 
                     53: # Converts a file and return the modified contents
                     54: sub convert_file {
                     55:   my ($contents) = @_;
                     56: 
                     57:   my $dom_doc = XML::LibXML->load_xml(string => $contents);
                     58:   add_outtext($dom_doc);
                     59:   return node_to_string($dom_doc);
                     60: }
                     61: 
                     62: 
                     63: sub node_to_string {
                     64:   my ($node) = @_;
                     65:   
                     66:   if ($node->nodeType == XML_DOCUMENT_NODE) {
                     67:     my $root = $node->documentElement();
                     68:     return node_to_string($root);
                     69:   } elsif ($node->nodeType == XML_TEXT_NODE || $node->nodeType == XML_CDATA_SECTION_NODE) {
                     70:     my $parent = $node->parentNode;
                     71:     my $parent_name = $parent->nodeName;
                     72:     my $grandparent_name;
                     73:     if (defined $parent->parentNode) {
                     74:       $grandparent_name = $parent->parentNode->nodeName;
                     75:     }
                     76:     my @no_escape = ('m', 'script', 'display', 'parse', 'answer');
                     77:     if (string_in_array(\@no_escape, $parent_name) &&
                     78:         ($parent_name ne 'answer' ||
                     79:         (defined $grandparent_name &&
                     80:         $grandparent_name ne 'numericalresponse' &&
                     81:         $grandparent_name ne 'formularesponse'))) {
                     82:       return $node->nodeValue;
                     83:     } else {
                     84:       return $node->toString();
                     85:     }
                     86:   } elsif ($node->nodeType == XML_ELEMENT_NODE) {
                     87:     my $s = '';
                     88:     my $tag = $node->nodeName;
                     89:     $s .= "<$tag";
                     90:     my @attributes = $node->attributes();
                     91:     foreach my $attribute (@attributes) {
                     92:       $s .= ' ';
                     93:       $s .= $attribute->nodeName;
                     94:       $s .= '="';
1.2       damieng    95:       $s .= escape_attribute($attribute->nodeValue);
1.1       damieng    96:       $s .= '"';
                     97:     }
1.3     ! damieng    98:     if ($node->hasChildNodes() || $tag eq 'script') {
1.1       damieng    99:       $s .= '>';
                    100:       foreach my $child ($node->childNodes) {
                    101:         $s .= node_to_string($child);
                    102:       }
                    103:       $s .= "</$tag>";
                    104:     } else {
                    105:       $s .= '/>';
                    106:     }
                    107:     return $s;
                    108:   } else {
                    109:     return $node->toString();
                    110:   }
                    111: }
                    112: 
1.2       damieng   113: # Escapes an attribute value
                    114: sub escape_attribute {
1.1       damieng   115:   my ($s) = @_;
1.2       damieng   116:   # normal XML escapes do not work with LON-CAPA, for instance with reactionresponse
                    117:   #$s =~ s/&/&amp;/sg;
                    118:   #$s =~ s/</&lt;/sg;
                    119:   #$s =~ s/>/&gt;/sg;
                    120:   $s =~ s/"/&quot;/sg;
1.1       damieng   121:   return $s;
                    122: }
                    123: 
                    124: # Adds startouttext and endouttext where useful for the colorful editor
                    125: sub add_outtext {
                    126:   my ($node) = @_;
                    127:   
                    128:   if ($node->nodeType == XML_DOCUMENT_NODE) {
                    129:     my $root = $node->documentElement();
                    130:     add_outtext($root);
                    131:     return;
                    132:   }
                    133:   if ($node->nodeType != XML_ELEMENT_NODE) {
                    134:     return;
                    135:   }
                    136:   if (string_in_array(\@simple_data, $node->nodeName)) {
                    137:     return;
                    138:   }
                    139:   convert_paragraphs($node);
                    140:   my $next;
                    141:   my $in_outtext = 0;
                    142:   for (my $child=$node->firstChild; defined $child; $child=$next) {
                    143:     $next = $child->nextSibling;
                    144:     if (!$in_outtext && inside_outtext($child)) {
                    145:       add_startouttext($node, $child);
                    146:       $in_outtext = 1;
                    147:     } elsif ($in_outtext && !continue_outtext($child)) {
                    148:       add_endouttext($node, $child);
                    149:       $in_outtext = 0;
                    150:     }
                    151:     if (!$in_outtext) {
                    152:       add_outtext($child);
                    153:     }
                    154:   }
                    155:   if ($in_outtext) {
                    156:     add_endouttext($node);
                    157:   }
                    158: }
                    159: 
                    160: # Returns 1 if this node should trigger the addition of startouttext before it
                    161: sub inside_outtext {
                    162:   my ($node) = @_;
                    163:   if ($node->nodeType == XML_TEXT_NODE && $node->nodeValue !~ /^\s*$/) {
                    164:     return 1;
                    165:   }
                    166:   if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@html_trigger, $node->nodeName)) {
                    167:     if (contains_loncapa_block($node)) {
                    168:       return 0;
                    169:     } else {
                    170:       return 1;
                    171:     }
                    172:   }
                    173:   if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@loncapa_inline, $node->nodeName)) {
                    174:     return 1;
                    175:   }
                    176:   return 0;
                    177: }
                    178: 
                    179: # Returns 1 if the outtext environment can continue with this node
                    180: sub continue_outtext {
                    181:   my ($node) = @_;
                    182:   if (inside_outtext($node)) {
                    183:     return 1;
                    184:   }
                    185:   if ($node->nodeType == XML_TEXT_NODE) {
                    186:     return 1; # continue even if this is just spaces
                    187:   }
                    188:   return 0;
                    189: }
                    190: 
                    191: # Returns 1 if the node contains a LON-CAPA block in a descendant.
                    192: sub contains_loncapa_block {
                    193:   my ($node) = @_;
                    194:   foreach my $child ($node->childNodes) {
                    195:     if ($child->nodeType == XML_ELEMENT_NODE) {
                    196:       if (string_in_array(\@loncapa_block, $child->nodeName)) {
                    197:         return 1;
                    198:       }
                    199:       if (contains_loncapa_block($child)) {
                    200:         return 1;
                    201:       }
                    202:     }
                    203:   }
                    204:   return 0;
                    205: }
                    206: 
                    207: sub add_startouttext {
                    208:   my ($parent, $before_node) = @_;
                    209:   my $doc = $parent->ownerDocument;
                    210:   if ($before_node->nodeType == XML_TEXT_NODE) {
                    211:     # split space at the beginning of the node
                    212:     if ($before_node->nodeValue =~ /^(\s+)(.*?)$/s) {
                    213:       my $space_node = $doc->createTextNode($1);
                    214:       $before_node->setData($2);
                    215:       $parent->insertBefore($space_node, $before_node);
                    216:     }
                    217:   }
                    218:   my $startouttext = $doc->createElement('startouttext');
                    219:   $parent->insertBefore($startouttext, $before_node);
                    220: }
                    221: 
                    222: sub add_endouttext {
                    223:   my ($parent, $before_node) = @_;
                    224:   my $doc = $parent->ownerDocument;
                    225:   my $endouttext = $doc->createElement('endouttext');
                    226:   my $before_before;
                    227:   if (defined $before_node) {
                    228:     $before_before = $before_node->previousSibling;
                    229:   } else {
                    230:     $before_before = $parent->lastChild;
                    231:   }
                    232:   if (defined $before_before && $before_before->nodeType == XML_TEXT_NODE) {
                    233:     # split space at the end of the node
                    234:     if ($before_before->nodeValue =~ /^(.*?)(\s+)$/s) {
                    235:       $before_before->setData($1);
                    236:       my $space_node = $doc->createTextNode($2);
                    237:       if (defined $before_node) {
                    238:         $parent->insertBefore($space_node, $before_node);
                    239:       } else {
                    240:         $parent->appendChild($space_node);
                    241:       }
                    242:       $before_node = $space_node;
                    243:     }
                    244:   }
                    245:   if (defined $before_node) {
                    246:     $parent->insertBefore($endouttext, $before_node);
                    247:   } else {
                    248:     $parent->appendChild($endouttext);
                    249:   }
                    250: }
                    251: 
                    252: # Convert paragraph children when one contains an inline response into content + <br>
                    253: # (the colorful editor does not support paragraphs containing inline responses)
                    254: sub convert_paragraphs {
                    255:   my ($parent) = @_;
                    256:   my $p_child_with_inline_response = 0;
                    257:   foreach my $child ($parent->childNodes) {
                    258:     if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') {
                    259:       foreach my $child2 ($child->childNodes) {
                    260:         if ($child2->nodeType == XML_ELEMENT_NODE) {
                    261:           if (string_in_array(\@inline_responses, $child2->nodeName)) {
                    262:             $p_child_with_inline_response = 1;
                    263:             last;
                    264:           }
                    265:         }
                    266:       }
                    267:     }
                    268:     if ($p_child_with_inline_response) {
                    269:       last;
                    270:     }
                    271:   }
                    272:   if ($p_child_with_inline_response) {
                    273:     my $doc = $parent->ownerDocument;
                    274:     my $next;
                    275:     for (my $child=$parent->firstChild; defined $child; $child=$next) {
                    276:       $next = $child->nextSibling;
                    277:       if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') {
                    278:         replace_by_children($child);
                    279:         if (defined $next && (defined $next->nextSibling || $next->nodeType != XML_TEXT_NODE ||
                    280:             $next->nodeValue !~ /^\s*$/)) {
                    281:           # we only add a br if there is something after
                    282:           my $br = $doc->createElement('br');
                    283:           $parent->insertBefore($br, $next);
                    284:         }
                    285:       }
                    286:     }
                    287:   }
                    288: }
                    289: 
                    290: ##
                    291: # Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array)
                    292: # @param {Array<string>} array - reference to the array of strings
                    293: # @param {string} value - the string to look for
                    294: # @returns 1 if found, 0 otherwise
                    295: ##
                    296: sub string_in_array {
                    297:   my ($array, $value) = @_;
                    298:   foreach my $v (@{$array}) {
                    299:     if ($v eq $value) {
                    300:       return 1;
                    301:     }
                    302:   }
                    303:   return 0;
                    304: }
                    305: 
                    306: ##
                    307: # replaces a node by its children
                    308: # @param {Node} node - the DOM node
                    309: ##
                    310: sub replace_by_children {
                    311:   my ($node) = @_;
                    312:   my $parent = $node->parentNode;
                    313:   my $next;
                    314:   my $previous;
                    315:   for (my $child=$node->firstChild; defined $child; $child=$next) {
                    316:     $next = $child->nextSibling;
                    317:     if ((!defined $previous || !defined $next) &&
                    318:         $child->nodeType == XML_TEXT_NODE && $child->nodeValue =~ /^\s*$/) {
                    319:       next; # do not keep first and last whitespace nodes
                    320:     } else {
                    321:       if (!defined $previous && $child->nodeType == XML_TEXT_NODE) {
                    322:         # remove whitespace at the beginning
                    323:         my $value = $child->nodeValue;
                    324:         $value =~ s/^\s+//;
                    325:         $child->setData($value);
                    326:       }
                    327:       if (!defined $next && $child->nodeType == XML_TEXT_NODE) {
                    328:         # and at the end
                    329:         my $value = $child->nodeValue;
                    330:         $value =~ s/\s+$//;
                    331:         $child->setData($value);
                    332:       }
                    333:     }
                    334:     $node->removeChild($child);
                    335:     $parent->insertBefore($child, $node);
                    336:     $previous = $child;
                    337:   }
                    338:   $parent->removeChild($node);
                    339: }
                    340: 
                    341: 1;
                    342: __END__

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>