File:  [LON-CAPA] / loncom / homework / cleanxml / xml_to_loncapa.pm
Revision 1.8: download - view: text, annotated - select for diffs
Wed Jan 20 21:24:22 2016 UTC (8 years, 3 months ago) by damieng
Branches: MAIN
CVS tags: HEAD
add startouttext and endouttext inside empty hintgroup; put translated and lang within startouttext/endouttext

    1: # The LearningOnline Network
    2: # convert_file takes a well-formed XML file content and converts it to LON-CAPA syntax.
    3: #
    4: # $Id: xml_to_loncapa.pm,v 1.8 2016/01/20 21:24:22 damieng Exp $
    5: #
    6: # Copyright Michigan State University Board of Trustees
    7: #
    8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
    9: #
   10: # LON-CAPA is free software; you can redistribute it and/or modify
   11: # it under the terms of the GNU General Public License as published by
   12: # the Free Software Foundation; either version 2 of the License, or
   13: # (at your option) any later version.
   14: #
   15: # LON-CAPA is distributed in the hope that it will be useful,
   16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
   17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   18: # GNU General Public License for more details.
   19: #
   20: # You should have received a copy of the GNU General Public License
   21: # along with LON-CAPA; if not, write to the Free Software
   22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   23: #
   24: # /home/httpd/html/adm/gpl.txt
   25: #
   26: # http://www.lon-capa.org/
   27: #
   28: ###
   29: 
   30: #!/usr/bin/perl
   31: 
   32: package Apache::xml_to_loncapa;
   33: 
   34: use strict;
   35: use utf8;
   36: use warnings;
   37: 
   38: use XML::LibXML;
   39: 
   40: 
   41: # LON-CAPA block elements that cannot be found within startouttext/endouttext
   42: my @loncapa_block = ('parameter','location','answer','foil','image','polygon','rectangle','text','conceptgroup','itemgroup','item','label','data','function','array','unit','answergroup','functionplotresponse','functionplotruleset','functionplotelements','functionplotcustomrule','essayresponse','hintpart','formulahint','numericalhint','reactionhint','organichint','optionhint','radiobuttonhint','stringhint','customhint','mathhint','formulahintcondition','numericalhintcondition','reactionhintcondition','organichintcondition','optionhintcondition','radiobuttonhintcondition','stringhintcondition','customhintcondition','mathhintcondition','imageresponse','foilgroup','datasubmission','textfield','hiddensubmission','radiobuttonresponse','rankresponse','matchresponse','import','style','script','window','block','library','notsolved','part','postanswerdate','preduedate','problem','problemtype','randomlabel','bgimg','labelgroup','randomlist','solved','while','tex','print','web','gnuplot','curve','Task','IntroParagraph','ClosingParagraph','Question','QuestionText','Setup','Instance','InstanceText','Criteria','CriteriaText','GraderNote','languageblock','instructorcomment','dataresponse','togglebox','standalone','comment','drawimage','allow','displayduedate','displaytitle','responseparam','organicstructure','scriptlib','parserlib','drawoptionlist','spline','backgroundplot','plotobject','plotvector','drawvectorsum','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','axis','key','xtics','ytics','title','xlabel','ylabel','hiddenline','dtm','stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse', 'hint', 'hintgroup');
   43: 
   44: # LON-CAPA elements that can be found within startouttext/endouttext:
   45: my @loncapa_in_text = ('display','m','lm','chem','num','parse','algebra','displayweight','displaystudentphoto','translated','lang'); # not textline
   46: 
   47: # HTML elements that trigger the addition of startouttext/endouttext
   48: my @html_trigger = ('header','footer','aside','h1','h2','h3','h4','h5','h6','li','dd','dt','tbody','tr','caption','thead','tfoot','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','br','hr','address','blockquote','img','figure','figcaption','object','param','embed','applet','video','source','audio','map','area','canvas','form','input','select','optgroup','option','textarea','fieldset','legend','button','iframe','section','div','p','ul','ol','dl','table');
   49: 
   50: my @simple_data = ('polygon', 'rectangle', 'vector', 'value', 'answer', 'title', 'data', 'function', 'xlabel', 'ylabel', 'tic', 'parserlib', 'scriptlib', 'import', 'tex', 'text', 'image', 'display', 'm', 'lm', 'num', 'algebra', 'chem', 'parse', 'title', 'style', 'script', 'ins', 'del', 'label', 'option', 'textarea', 'legend' );
   51: 
   52: my @inline_responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse');
   53: 
   54: # see http://www.w3.org/TR/html-polyglot/#empty-elements
   55: # and http://tiffanybbrown.com/2011/03/23/html5-does-not-allow-self-closing-tags/
   56: # HTML elements that do not have an empty content, and must never use a self-closing tag:
   57: my @non_empty_html = ('title','style','script','noscript','body','section','header','footer','article','aside','nav','h1','h2','h3','h4','h5','h6','div','p','li','dt','dd','caption','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','address','blockquote','bdo','ruby','rb','rp','rt','rtc','figure','figcaption','object','applet','video','audio','canvas','label','option','textarea','fieldset','legend','button','iframe');
   58: 
   59: 
   60: # Converts a file and return the modified contents
   61: sub convert_file {
   62:   my ($contents) = @_;
   63: 
   64:   my $dom_doc = XML::LibXML->load_xml(string => $contents);
   65:   my $root = $dom_doc->documentElement();
   66:   if (defined $root && $root->nodeName ne 'html') {
   67:     add_outtext($dom_doc);
   68:   }
   69:   return node_to_string($dom_doc);
   70: }
   71: 
   72: 
   73: sub node_to_string {
   74:   my ($node) = @_;
   75:   
   76:   if ($node->nodeType == XML_DOCUMENT_NODE) {
   77:     my $root = $node->documentElement();
   78:     return node_to_string($root);
   79:   } elsif ($node->nodeType == XML_TEXT_NODE || $node->nodeType == XML_CDATA_SECTION_NODE) {
   80:     my $parent = $node->parentNode;
   81:     my $parent_name = $parent->nodeName;
   82:     my $grandparent_name;
   83:     if (defined $parent->parentNode) {
   84:       $grandparent_name = $parent->parentNode->nodeName;
   85:     }
   86:     my @no_escape = ('m', 'script', 'style', 'display', 'parse', 'answer');
   87:     if (string_in_array(\@no_escape, $parent_name) &&
   88:         ($parent_name ne 'answer' ||
   89:         (defined $grandparent_name &&
   90:         $grandparent_name ne 'numericalresponse' &&
   91:         $grandparent_name ne 'formularesponse'))) {
   92:       return $node->nodeValue;
   93:     } else {
   94:       return $node->toString();
   95:     }
   96:   } elsif ($node->nodeType == XML_ELEMENT_NODE) {
   97:     my $s = '';
   98:     my $tag = $node->nodeName;
   99:     $s .= "<$tag";
  100:     my @attributes = $node->attributes();
  101:     foreach my $attribute (@attributes) {
  102:       $s .= ' ';
  103:       $s .= $attribute->nodeName;
  104:       $s .= '="';
  105:       $s .= escape_attribute($attribute->nodeValue);
  106:       $s .= '"';
  107:     }
  108:     if ($node->hasChildNodes() || string_in_array(\@non_empty_html, $tag)) {
  109:       $s .= '>';
  110:       foreach my $child ($node->childNodes) {
  111:         $s .= node_to_string($child);
  112:       }
  113:       $s .= "</$tag>";
  114:     } else {
  115:       $s .= '/>';
  116:     }
  117:     return $s;
  118:   } else {
  119:     return $node->toString();
  120:   }
  121: }
  122: 
  123: # Escapes an attribute value
  124: sub escape_attribute {
  125:   my ($s) = @_;
  126:   # normal XML escapes do not work with LON-CAPA, for instance with reactionresponse
  127:   #$s =~ s/&/&amp;/sg;
  128:   #$s =~ s/</&lt;/sg;
  129:   #$s =~ s/>/&gt;/sg;
  130:   $s =~ s/"/&quot;/sg;
  131:   return $s;
  132: }
  133: 
  134: # Adds startouttext and endouttext where useful for the colorful editor
  135: sub add_outtext {
  136:   my ($node) = @_;
  137:   
  138:   if ($node->nodeType == XML_DOCUMENT_NODE) {
  139:     my $root = $node->documentElement();
  140:     add_outtext($root);
  141:     return;
  142:   }
  143:   if ($node->nodeType != XML_ELEMENT_NODE) {
  144:     return;
  145:   }
  146:   if (string_in_array(\@simple_data, $node->nodeName)) {
  147:     return;
  148:   }
  149:   convert_paragraphs($node);
  150:   if ($node->nodeName eq 'hintgroup' && !defined $node->firstChild) {
  151:     # empty hintgroup: colorful editor needs start/end outtext
  152:     add_endouttext($node, undef);
  153:     add_startouttext($node, $node->firstChild);
  154:   }
  155:   my $next;
  156:   my $in_outtext = 0;
  157:   for (my $child=$node->firstChild; defined $child; $child=$next) {
  158:     $next = $child->nextSibling;
  159:     if (!$in_outtext && inside_outtext($child)) {
  160:       add_startouttext($node, $child);
  161:       $in_outtext = 1;
  162:     } elsif ($in_outtext && !continue_outtext($child)) {
  163:       add_endouttext($node, $child);
  164:       $in_outtext = 0;
  165:     }
  166:     if (!$in_outtext) {
  167:       add_outtext($child);
  168:     }
  169:   }
  170:   if ($in_outtext) {
  171:     add_endouttext($node);
  172:   }
  173: }
  174: 
  175: # Returns 1 if this node should trigger the addition of startouttext before it
  176: sub inside_outtext {
  177:   my ($node) = @_;
  178:   if ($node->nodeType == XML_TEXT_NODE && $node->nodeValue !~ /^\s*$/) {
  179:     return 1;
  180:   }
  181:   if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@html_trigger, $node->nodeName)) {
  182:     if (contains_loncapa_block($node)) {
  183:       return 0;
  184:     } else {
  185:       return 1;
  186:     }
  187:   }
  188:   if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@loncapa_in_text, $node->nodeName)) {
  189:     return 1;
  190:   }
  191:   return 0;
  192: }
  193: 
  194: # Returns 1 if the outtext environment can continue with this node
  195: sub continue_outtext {
  196:   my ($node) = @_;
  197:   if (inside_outtext($node)) {
  198:     return 1;
  199:   }
  200:   if ($node->nodeType == XML_TEXT_NODE) {
  201:     return 1; # continue even if this is just spaces
  202:   }
  203:   return 0;
  204: }
  205: 
  206: # Returns 1 if the node contains a LON-CAPA block in a descendant.
  207: sub contains_loncapa_block {
  208:   my ($node) = @_;
  209:   foreach my $child ($node->childNodes) {
  210:     if ($child->nodeType == XML_ELEMENT_NODE) {
  211:       if (string_in_array(\@loncapa_block, $child->nodeName)) {
  212:         return 1;
  213:       }
  214:       if (contains_loncapa_block($child)) {
  215:         return 1;
  216:       }
  217:     }
  218:   }
  219:   return 0;
  220: }
  221: 
  222: sub add_startouttext {
  223:   my ($parent, $before_node) = @_;
  224:   my $doc = $parent->ownerDocument;
  225:   if ($before_node->nodeType == XML_TEXT_NODE) {
  226:     # split space at the beginning of the node
  227:     if ($before_node->nodeValue =~ /^(\s+)(.*?)$/s) {
  228:       my $space_node = $doc->createTextNode($1);
  229:       $before_node->setData($2);
  230:       $parent->insertBefore($space_node, $before_node);
  231:     }
  232:   }
  233:   my $startouttext = $doc->createElement('startouttext');
  234:   $parent->insertBefore($startouttext, $before_node);
  235: }
  236: 
  237: sub add_endouttext {
  238:   my ($parent, $before_node) = @_;
  239:   my $doc = $parent->ownerDocument;
  240:   my $endouttext = $doc->createElement('endouttext');
  241:   my $before_before;
  242:   if (defined $before_node) {
  243:     $before_before = $before_node->previousSibling;
  244:   } else {
  245:     $before_before = $parent->lastChild;
  246:   }
  247:   if (defined $before_before && $before_before->nodeType == XML_TEXT_NODE) {
  248:     # split space at the end of the node
  249:     if ($before_before->nodeValue =~ /^(.*?)(\s+)$/s) {
  250:       $before_before->setData($1);
  251:       my $space_node = $doc->createTextNode($2);
  252:       if (defined $before_node) {
  253:         $parent->insertBefore($space_node, $before_node);
  254:       } else {
  255:         $parent->appendChild($space_node);
  256:       }
  257:       $before_node = $space_node;
  258:     }
  259:   }
  260:   if (defined $before_node) {
  261:     $parent->insertBefore($endouttext, $before_node);
  262:   } else {
  263:     $parent->appendChild($endouttext);
  264:   }
  265:   # replace spaces afterwards by a \n + indentation
  266:   my $next = $endouttext->nextSibling;
  267:   if (defined $next && $next->nodeType == XML_TEXT_NODE) {
  268:     my $v = $next->nodeValue;
  269:     if ($v =~ /^ /) {
  270:       $v =~ s/^ +//;
  271:       if ($parent->firstChild->nodeType == XML_TEXT_NODE &&
  272:           $parent->firstChild->nodeValue =~ /^\n +$/) {
  273:         $v = $parent->firstChild->nodeValue.$v;
  274:       } else {
  275:         $v = "\n".$v;
  276:       }
  277:       $next->setData($v);
  278:     }
  279:   }
  280: }
  281: 
  282: # Convert paragraph children when one contains an inline response into content + <br>
  283: # (the colorful editor does not support paragraphs containing inline responses)
  284: sub convert_paragraphs {
  285:   my ($parent) = @_;
  286:   my $p_child_with_inline_response = 0;
  287:   foreach my $child ($parent->childNodes) {
  288:     if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') {
  289:       foreach my $child2 ($child->childNodes) {
  290:         if ($child2->nodeType == XML_ELEMENT_NODE) {
  291:           if (string_in_array(\@inline_responses, $child2->nodeName)) {
  292:             $p_child_with_inline_response = 1;
  293:             last;
  294:           }
  295:         }
  296:       }
  297:     }
  298:     if ($p_child_with_inline_response) {
  299:       last;
  300:     }
  301:   }
  302:   if ($p_child_with_inline_response) {
  303:     my $doc = $parent->ownerDocument;
  304:     my $next;
  305:     for (my $child=$parent->firstChild; defined $child; $child=$next) {
  306:       $next = $child->nextSibling;
  307:       if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') {
  308:         replace_by_children($child);
  309:         if (defined $next && (defined $next->nextSibling || $next->nodeType != XML_TEXT_NODE ||
  310:             $next->nodeValue !~ /^\s*$/)) {
  311:           # we only add a br if there is something after
  312:           my $br = $doc->createElement('br');
  313:           $parent->insertBefore($br, $next);
  314:         }
  315:       }
  316:     }
  317:   }
  318: }
  319: 
  320: ##
  321: # Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array)
  322: # @param {Array<string>} array - reference to the array of strings
  323: # @param {string} value - the string to look for
  324: # @returns 1 if found, 0 otherwise
  325: ##
  326: sub string_in_array {
  327:   my ($array, $value) = @_;
  328:   foreach my $v (@{$array}) {
  329:     if ($v eq $value) {
  330:       return 1;
  331:     }
  332:   }
  333:   return 0;
  334: }
  335: 
  336: ##
  337: # replaces a node by its children
  338: # @param {Node} node - the DOM node
  339: ##
  340: sub replace_by_children {
  341:   my ($node) = @_;
  342:   my $parent = $node->parentNode;
  343:   my $next;
  344:   my $previous;
  345:   for (my $child=$node->firstChild; defined $child; $child=$next) {
  346:     $next = $child->nextSibling;
  347:     if ((!defined $previous || !defined $next) &&
  348:         $child->nodeType == XML_TEXT_NODE && $child->nodeValue =~ /^\s*$/) {
  349:       next; # do not keep first and last whitespace nodes
  350:     } else {
  351:       if (!defined $previous && $child->nodeType == XML_TEXT_NODE) {
  352:         # remove whitespace at the beginning
  353:         my $value = $child->nodeValue;
  354:         $value =~ s/^\s+//;
  355:         $child->setData($value);
  356:       }
  357:       if (!defined $next && $child->nodeType == XML_TEXT_NODE) {
  358:         # and at the end
  359:         my $value = $child->nodeValue;
  360:         $value =~ s/\s+$//;
  361:         $child->setData($value);
  362:       }
  363:     }
  364:     $node->removeChild($child);
  365:     $parent->insertBefore($child, $node);
  366:     $previous = $child;
  367:   }
  368:   $parent->removeChild($node);
  369: }
  370: 
  371: 1;
  372: __END__

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>