Annotation of loncom/homework/cleanxml/xml_to_loncapa.pm, revision 1.1

1.1     ! damieng     1: # The LearningOnline Network
        !             2: # convert_file takes a well-formed XML file content and converts it to LON-CAPA syntax.
        !             3: #
        !             4: # $Id$
        !             5: #
        !             6: # Copyright Michigan State University Board of Trustees
        !             7: #
        !             8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
        !             9: #
        !            10: # LON-CAPA is free software; you can redistribute it and/or modify
        !            11: # it under the terms of the GNU General Public License as published by
        !            12: # the Free Software Foundation; either version 2 of the License, or
        !            13: # (at your option) any later version.
        !            14: #
        !            15: # LON-CAPA is distributed in the hope that it will be useful,
        !            16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
        !            18: # GNU General Public License for more details.
        !            19: #
        !            20: # You should have received a copy of the GNU General Public License
        !            21: # along with LON-CAPA; if not, write to the Free Software
        !            22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
        !            23: #
        !            24: # /home/httpd/html/adm/gpl.txt
        !            25: #
        !            26: # http://www.lon-capa.org/
        !            27: #
        !            28: ###
        !            29: 
        !            30: #!/usr/bin/perl
        !            31: 
        !            32: package Apache::xml_to_loncapa;
        !            33: 
        !            34: use strict;
        !            35: use utf8;
        !            36: use warnings;
        !            37: 
        !            38: use XML::LibXML;
        !            39: 
        !            40: 
        !            41: my @loncapa_block = ('parameter','location','answer','foil','image','polygon','rectangle','text','conceptgroup','itemgroup','item','label','data','function','array','unit','answergroup','functionplotresponse','functionplotruleset','functionplotelements','functionplotcustomrule','essayresponse','hintpart','formulahint','numericalhint','reactionhint','organichint','optionhint','radiobuttonhint','stringhint','customhint','mathhint','formulahintcondition','numericalhintcondition','reactionhintcondition','organichintcondition','optionhintcondition','radiobuttonhintcondition','stringhintcondition','customhintcondition','mathhintcondition','imageresponse','foilgroup','datasubmission','textfield','hiddensubmission','radiobuttonresponse','rankresponse','matchresponse','import','style','script','window','block','library','notsolved','part','postanswerdate','preduedate','problem','problemtype','randomlabel','bgimg','labelgroup','randomlist','solved','while','tex','print','web','gnuplot','curve','Task','IntroParagraph','ClosingParagraph','Question','QuestionText','Setup','Instance','InstanceText','Criteria','CriteriaText','GraderNote','languageblock','translated','lang','instructorcomment','dataresponse','togglebox','standalone','comment','drawimage','allow','displayduedate','displaytitle','responseparam','organicstructure','scriptlib','parserlib','drawoptionlist','spline','backgroundplot','plotobject','plotvector','drawvectorsum','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','axis','key','xtics','ytics','title','xlabel','ylabel','hiddenline','dtm','stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse', 'hint', 'hintgroup');
        !            42: 
        !            43: my @loncapa_inline = ('display','m','lm','chem','num','parse','algebra','displayweight','displaystudentphoto'); # not textline
        !            44: 
        !            45: # HTML elements that trigger the addition of startouttext/endouttext
        !            46: my @html_trigger = ('header','footer','aside','h1','h2','h3','h4','h5','h6','li','dd','dt','tbody','tr','caption','thead','tfoot','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','br','hr','address','blockquote','img','figure','figcaption','object','param','embed','applet','video','source','audio','map','area','canvas','form','input','select','optgroup','option','textarea','fieldset','legend','button','iframe','section','div','p','ul','ol','dl','table');
        !            47: 
        !            48: my @simple_data = ('polygon', 'rectangle', 'vector', 'value', 'answer', 'title', 'data', 'function', 'xlabel', 'ylabel', 'tic', 'parserlib', 'scriptlib', 'import', 'tex', 'text', 'image', 'display', 'm', 'lm', 'num', 'algebra', 'chem', 'parse', 'title', 'style', 'script', 'ins', 'del', 'label', 'option', 'textarea', 'legend' );
        !            49: 
        !            50: my @inline_responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse');
        !            51: 
        !            52: 
        !            53: # Converts a file and return the modified contents
        !            54: sub convert_file {
        !            55:   my ($contents) = @_;
        !            56: 
        !            57:   my $dom_doc = XML::LibXML->load_xml(string => $contents);
        !            58:   add_outtext($dom_doc);
        !            59:   return node_to_string($dom_doc);
        !            60: }
        !            61: 
        !            62: 
        !            63: sub node_to_string {
        !            64:   my ($node) = @_;
        !            65:   
        !            66:   if ($node->nodeType == XML_DOCUMENT_NODE) {
        !            67:     my $root = $node->documentElement();
        !            68:     return node_to_string($root);
        !            69:   } elsif ($node->nodeType == XML_TEXT_NODE || $node->nodeType == XML_CDATA_SECTION_NODE) {
        !            70:     my $parent = $node->parentNode;
        !            71:     my $parent_name = $parent->nodeName;
        !            72:     my $grandparent_name;
        !            73:     if (defined $parent->parentNode) {
        !            74:       $grandparent_name = $parent->parentNode->nodeName;
        !            75:     }
        !            76:     my @no_escape = ('m', 'script', 'display', 'parse', 'answer');
        !            77:     if (string_in_array(\@no_escape, $parent_name) &&
        !            78:         ($parent_name ne 'answer' ||
        !            79:         (defined $grandparent_name &&
        !            80:         $grandparent_name ne 'numericalresponse' &&
        !            81:         $grandparent_name ne 'formularesponse'))) {
        !            82:       return $node->nodeValue;
        !            83:     } else {
        !            84:       return $node->toString();
        !            85:     }
        !            86:   } elsif ($node->nodeType == XML_ELEMENT_NODE) {
        !            87:     my $s = '';
        !            88:     my $tag = $node->nodeName;
        !            89:     $s .= "<$tag";
        !            90:     my @attributes = $node->attributes();
        !            91:     foreach my $attribute (@attributes) {
        !            92:       $s .= ' ';
        !            93:       $s .= $attribute->nodeName;
        !            94:       $s .= '="';
        !            95:       $s .= escape($attribute->nodeValue);
        !            96:       $s .= '"';
        !            97:     }
        !            98:     if ($node->hasChildNodes()) {
        !            99:       $s .= '>';
        !           100:       foreach my $child ($node->childNodes) {
        !           101:         $s .= node_to_string($child);
        !           102:       }
        !           103:       $s .= "</$tag>";
        !           104:     } else {
        !           105:       $s .= '/>';
        !           106:     }
        !           107:     return $s;
        !           108:   } else {
        !           109:     return $node->toString();
        !           110:   }
        !           111: }
        !           112: 
        !           113: # Escapes a string for LON-CAPA output (used for text nodes, not attribute values)
        !           114: sub escape {
        !           115:   my ($s) = @_;
        !           116:   $s =~ s/&/&amp;/sg;
        !           117:   $s =~ s/</&lt;/sg;
        !           118:   $s =~ s/>/&gt;/sg;
        !           119:   # quot and apos do not need to be escaped outside attribute values
        !           120:   return $s;
        !           121: }
        !           122: 
        !           123: # Adds startouttext and endouttext where useful for the colorful editor
        !           124: sub add_outtext {
        !           125:   my ($node) = @_;
        !           126:   
        !           127:   if ($node->nodeType == XML_DOCUMENT_NODE) {
        !           128:     my $root = $node->documentElement();
        !           129:     add_outtext($root);
        !           130:     return;
        !           131:   }
        !           132:   if ($node->nodeType != XML_ELEMENT_NODE) {
        !           133:     return;
        !           134:   }
        !           135:   if (string_in_array(\@simple_data, $node->nodeName)) {
        !           136:     return;
        !           137:   }
        !           138:   convert_paragraphs($node);
        !           139:   my $next;
        !           140:   my $in_outtext = 0;
        !           141:   for (my $child=$node->firstChild; defined $child; $child=$next) {
        !           142:     $next = $child->nextSibling;
        !           143:     if (!$in_outtext && inside_outtext($child)) {
        !           144:       add_startouttext($node, $child);
        !           145:       $in_outtext = 1;
        !           146:     } elsif ($in_outtext && !continue_outtext($child)) {
        !           147:       add_endouttext($node, $child);
        !           148:       $in_outtext = 0;
        !           149:     }
        !           150:     if (!$in_outtext) {
        !           151:       add_outtext($child);
        !           152:     }
        !           153:   }
        !           154:   if ($in_outtext) {
        !           155:     add_endouttext($node);
        !           156:   }
        !           157: }
        !           158: 
        !           159: # Returns 1 if this node should trigger the addition of startouttext before it
        !           160: sub inside_outtext {
        !           161:   my ($node) = @_;
        !           162:   if ($node->nodeType == XML_TEXT_NODE && $node->nodeValue !~ /^\s*$/) {
        !           163:     return 1;
        !           164:   }
        !           165:   if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@html_trigger, $node->nodeName)) {
        !           166:     if (contains_loncapa_block($node)) {
        !           167:       return 0;
        !           168:     } else {
        !           169:       return 1;
        !           170:     }
        !           171:   }
        !           172:   if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@loncapa_inline, $node->nodeName)) {
        !           173:     return 1;
        !           174:   }
        !           175:   return 0;
        !           176: }
        !           177: 
        !           178: # Returns 1 if the outtext environment can continue with this node
        !           179: sub continue_outtext {
        !           180:   my ($node) = @_;
        !           181:   if (inside_outtext($node)) {
        !           182:     return 1;
        !           183:   }
        !           184:   if ($node->nodeType == XML_TEXT_NODE) {
        !           185:     return 1; # continue even if this is just spaces
        !           186:   }
        !           187:   return 0;
        !           188: }
        !           189: 
        !           190: # Returns 1 if the node contains a LON-CAPA block in a descendant.
        !           191: sub contains_loncapa_block {
        !           192:   my ($node) = @_;
        !           193:   foreach my $child ($node->childNodes) {
        !           194:     if ($child->nodeType == XML_ELEMENT_NODE) {
        !           195:       if (string_in_array(\@loncapa_block, $child->nodeName)) {
        !           196:         return 1;
        !           197:       }
        !           198:       if (contains_loncapa_block($child)) {
        !           199:         return 1;
        !           200:       }
        !           201:     }
        !           202:   }
        !           203:   return 0;
        !           204: }
        !           205: 
        !           206: sub add_startouttext {
        !           207:   my ($parent, $before_node) = @_;
        !           208:   my $doc = $parent->ownerDocument;
        !           209:   if ($before_node->nodeType == XML_TEXT_NODE) {
        !           210:     # split space at the beginning of the node
        !           211:     if ($before_node->nodeValue =~ /^(\s+)(.*?)$/s) {
        !           212:       my $space_node = $doc->createTextNode($1);
        !           213:       $before_node->setData($2);
        !           214:       $parent->insertBefore($space_node, $before_node);
        !           215:     }
        !           216:   }
        !           217:   my $startouttext = $doc->createElement('startouttext');
        !           218:   $parent->insertBefore($startouttext, $before_node);
        !           219: }
        !           220: 
        !           221: sub add_endouttext {
        !           222:   my ($parent, $before_node) = @_;
        !           223:   my $doc = $parent->ownerDocument;
        !           224:   my $endouttext = $doc->createElement('endouttext');
        !           225:   my $before_before;
        !           226:   if (defined $before_node) {
        !           227:     $before_before = $before_node->previousSibling;
        !           228:   } else {
        !           229:     $before_before = $parent->lastChild;
        !           230:   }
        !           231:   if (defined $before_before && $before_before->nodeType == XML_TEXT_NODE) {
        !           232:     # split space at the end of the node
        !           233:     if ($before_before->nodeValue =~ /^(.*?)(\s+)$/s) {
        !           234:       $before_before->setData($1);
        !           235:       my $space_node = $doc->createTextNode($2);
        !           236:       if (defined $before_node) {
        !           237:         $parent->insertBefore($space_node, $before_node);
        !           238:       } else {
        !           239:         $parent->appendChild($space_node);
        !           240:       }
        !           241:       $before_node = $space_node;
        !           242:     }
        !           243:   }
        !           244:   if (defined $before_node) {
        !           245:     $parent->insertBefore($endouttext, $before_node);
        !           246:   } else {
        !           247:     $parent->appendChild($endouttext);
        !           248:   }
        !           249: }
        !           250: 
        !           251: # Convert paragraph children when one contains an inline response into content + <br>
        !           252: # (the colorful editor does not support paragraphs containing inline responses)
        !           253: sub convert_paragraphs {
        !           254:   my ($parent) = @_;
        !           255:   my $p_child_with_inline_response = 0;
        !           256:   foreach my $child ($parent->childNodes) {
        !           257:     if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') {
        !           258:       foreach my $child2 ($child->childNodes) {
        !           259:         if ($child2->nodeType == XML_ELEMENT_NODE) {
        !           260:           if (string_in_array(\@inline_responses, $child2->nodeName)) {
        !           261:             $p_child_with_inline_response = 1;
        !           262:             last;
        !           263:           }
        !           264:         }
        !           265:       }
        !           266:     }
        !           267:     if ($p_child_with_inline_response) {
        !           268:       last;
        !           269:     }
        !           270:   }
        !           271:   if ($p_child_with_inline_response) {
        !           272:     my $doc = $parent->ownerDocument;
        !           273:     my $next;
        !           274:     for (my $child=$parent->firstChild; defined $child; $child=$next) {
        !           275:       $next = $child->nextSibling;
        !           276:       if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') {
        !           277:         replace_by_children($child);
        !           278:         if (defined $next && (defined $next->nextSibling || $next->nodeType != XML_TEXT_NODE ||
        !           279:             $next->nodeValue !~ /^\s*$/)) {
        !           280:           # we only add a br if there is something after
        !           281:           my $br = $doc->createElement('br');
        !           282:           $parent->insertBefore($br, $next);
        !           283:         }
        !           284:       }
        !           285:     }
        !           286:   }
        !           287: }
        !           288: 
        !           289: ##
        !           290: # Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array)
        !           291: # @param {Array<string>} array - reference to the array of strings
        !           292: # @param {string} value - the string to look for
        !           293: # @returns 1 if found, 0 otherwise
        !           294: ##
        !           295: sub string_in_array {
        !           296:   my ($array, $value) = @_;
        !           297:   foreach my $v (@{$array}) {
        !           298:     if ($v eq $value) {
        !           299:       return 1;
        !           300:     }
        !           301:   }
        !           302:   return 0;
        !           303: }
        !           304: 
        !           305: ##
        !           306: # replaces a node by its children
        !           307: # @param {Node} node - the DOM node
        !           308: ##
        !           309: sub replace_by_children {
        !           310:   my ($node) = @_;
        !           311:   my $parent = $node->parentNode;
        !           312:   my $next;
        !           313:   my $previous;
        !           314:   for (my $child=$node->firstChild; defined $child; $child=$next) {
        !           315:     $next = $child->nextSibling;
        !           316:     if ((!defined $previous || !defined $next) &&
        !           317:         $child->nodeType == XML_TEXT_NODE && $child->nodeValue =~ /^\s*$/) {
        !           318:       next; # do not keep first and last whitespace nodes
        !           319:     } else {
        !           320:       if (!defined $previous && $child->nodeType == XML_TEXT_NODE) {
        !           321:         # remove whitespace at the beginning
        !           322:         my $value = $child->nodeValue;
        !           323:         $value =~ s/^\s+//;
        !           324:         $child->setData($value);
        !           325:       }
        !           326:       if (!defined $next && $child->nodeType == XML_TEXT_NODE) {
        !           327:         # and at the end
        !           328:         my $value = $child->nodeValue;
        !           329:         $value =~ s/\s+$//;
        !           330:         $child->setData($value);
        !           331:       }
        !           332:     }
        !           333:     $node->removeChild($child);
        !           334:     $parent->insertBefore($child, $node);
        !           335:     $previous = $child;
        !           336:   }
        !           337:   $parent->removeChild($node);
        !           338: }
        !           339: 
        !           340: 1;
        !           341: __END__

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>