Diff for /modules/damieng/clean_xml/xml_to_loncapa.pl between versions 1.1 and 1.2

version 1.1, 2015/04/29 19:26:05 version 1.2, 2015/05/16 20:17:13
Line 8  use warnings; Line 8  use warnings;
   
 use XML::LibXML;  use XML::LibXML;
   
   
   my @loncapa_block = ('parameter','location','answer','foil','image','polygon','rectangle','text','conceptgroup','itemgroup','item','label','data','function','array','unit','answergroup','functionplotresponse','functionplotruleset','functionplotelements','functionplotcustomrule','essayresponse','hintpart','formulahint','numericalhint','reactionhint','organichint','optionhint','radiobuttonhint','stringhint','customhint','mathhint','formulahintcondition','numericalhintcondition','reactionhintcondition','organichintcondition','optionhintcondition','radiobuttonhintcondition','stringhintcondition','customhintcondition','mathhintcondition','imageresponse','foilgroup','datasubmission','textfield','hiddensubmission','radiobuttonresponse','rankresponse','matchresponse','import','style','script','window','block','library','notsolved','part','postanswerdate','preduedate','problem','problemtype','randomlabel','bgimg','labelgroup','randomlist','solved','while','tex','print','web','gnuplot','curve','Task','IntroParagraph','ClosingParagraph','Question','QuestionText','Setup','Instance','InstanceText','Criteria','CriteriaText','GraderNote','languageblock','translated','lang','instructorcomment','dataresponse','togglebox','standalone','comment','drawimage','allow','displayduedate','displaytitle','responseparam','organicstructure','scriptlib','parserlib','drawoptionlist','spline','backgroundplot','plotobject','plotvector','drawvectorsum','functionplotrule','functionplotvectorrule','functionplotvectorsumrule','axis','key','xtics','ytics','title','xlabel','ylabel','hiddenline','dtm','stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse', 'hint', 'hintgroup');
   
   my @loncapa_inline = ('display','m','lm','chem','num','parse','algebra','displayweight','displaystudentphoto'); # not textline
   
   my @html_elements = ('html','meta','head','title','base','link','style','noscript','body','header','footer','aside','h1','h2','h3','h4','h5','h6','li','dd','dt','tbody','tr','caption','thead','tfoot','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','br','hr','address','blockquote','img','figure','figcaption','object','param','embed','applet','video','source','audio','map','area','canvas','form','label','input','select','optgroup','option','textarea','fieldset','legend','button','iframe','section','div','p','ul','ol','dl','table'); # without script
   
   my @simple_data = ('polygon', 'rectangle', 'vector', 'value', 'answer', 'title', 'xlabel', 'ylabel', 'tic', 'parserlib', 'scriptlib', 'import', 'tex', 'text', 'image', 'display', 'm', 'lm', 'num', 'algebra', 'chem', 'parse', 'title', 'style', 'script', 'ins', 'del', 'label', 'option', 'textarea', 'legend' );
   
   
 binmode(STDOUT, ':encoding(UTF-8)');  binmode(STDOUT, ':encoding(UTF-8)');
   
 if (scalar(@ARGV) != 1) {  if (scalar(@ARGV) != 1) {
Line 39  sub convert_file { Line 49  sub convert_file {
   my $dom_doc = XML::LibXML->load_xml(location => $pathname);    my $dom_doc = XML::LibXML->load_xml(location => $pathname);
       
   open my $out, '>:encoding(UTF-8)', $newpath;    open my $out, '>:encoding(UTF-8)', $newpath;
     add_outtext($dom_doc);
   print $out node_to_string($dom_doc);    print $out node_to_string($dom_doc);
   close $out;    close $out;
 }  }
Line 103  sub escape { Line 114  sub escape {
   return $s;    return $s;
 }  }
   
   # Adds startouttext and endouttext where useful for the colorfull editor
   sub add_outtext {
     my ($node) = @_;
     
     if ($node->nodeType == XML_DOCUMENT_NODE) {
       my $root = $node->documentElement();
       add_outtext($root);
       return;
     }
     if ($node->nodeType != XML_ELEMENT_NODE) {
       return;
     }
     if (string_in_array(\@simple_data, $node->nodeName)) {
       return;
     }
     my $next;
     my $in_outtext = 0;
     for (my $child=$node->firstChild; defined $child; $child=$next) {
       $next = $child->nextSibling;
       if (!$in_outtext && inside_outtext($child)) {
         # Add startouttext
         my $doc = $node->ownerDocument;
         my $startouttext = $doc->createElement('startouttext');
         $node->insertBefore($startouttext, $child);
         $in_outtext = 1;
       } elsif ($in_outtext && !continue_outtext($child)) {
         # Add endouttext
         add_endouttext($node, $child);
         $in_outtext = 0;
       }
       if (!$in_outtext) {
         add_outtext($child);
       }
     }
     if ($in_outtext) {
       # Add endouttext
       add_endouttext($node);
     }
   }
   
   # Returns 1 if this node should trigger the addition of startouttext before it
   sub inside_outtext {
     my ($node) = @_;
     if ($node->nodeType == XML_TEXT_NODE && $node->nodeValue !~ /^\s*$/) {
       return 1;
     }
     if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@html_elements, $node->nodeName)) {
       if (contains_loncapa_block($node)) {
         return 0;
       } else {
         return 1;
       }
     }
     if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@loncapa_inline, $node->nodeName)) {
       return 1;
     }
     return 0;
   }
   
   # Returns 1 if the outtext environment can continue with this node
   sub continue_outtext {
     my ($node) = @_;
     if (inside_outtext($node)) {
       return 1;
     }
     if ($node->nodeType == XML_TEXT_NODE) {
       return 1; # continue even if this is just spaces
     }
     return 0;
   }
   
   # Returns 1 if the node contains a LON-CAPA block in a descendant.
   sub contains_loncapa_block {
     my ($node) = @_;
     foreach my $child ($node->childNodes) {
       if ($child->nodeType == XML_ELEMENT_NODE) {
         if (string_in_array(\@loncapa_block, $child->nodeName)) {
           return 1;
         }
         if (contains_loncapa_block($child)) {
           return 1;
         }
       }
     }
     return 0;
   }
   
   sub add_endouttext {
     my ($parent, $before_node) = @_;
     my $doc = $parent->ownerDocument;
     my $endouttext = $doc->createElement('endouttext');
     my $before_before;
     if (defined $before_node) {
       $before_before = $before_node->previousSibling;
     } else {
       $before_before = $parent->lastChild;
     }
     if (defined $before_before && $before_before->nodeType == XML_TEXT_NODE) {
       # split space at the end of the node before endouttext
       if ($before_before->nodeValue =~ /^(.*?)(\s+)$/s) {
         $before_before->setData($1);
         my $space_node = $doc->createTextNode($2);
         if (defined $before_node) {
           $parent->insertBefore($space_node, $before_node);
         } else {
           $parent->appendChild($space_node);
         }
         $before_node = $space_node;
       }
     }
     if (defined $before_node) {
       $parent->insertBefore($endouttext, $before_node);
     } else {
       $parent->appendChild($endouttext);
     }
   }
   
 ##  ##
 # Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array)  # Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array)
 # @param {Array<string>} array - reference to the array of strings  # @param {Array<string>} array - reference to the array of strings

Removed from v.1.1  
changed lines
  Added in v.1.2


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>