Diff for /modules/damieng/clean_xml/xml_to_loncapa.pl between versions 1.2 and 1.3

version 1.2, 2015/05/16 20:17:13 version 1.3, 2015/05/19 15:36:01
Line 13  my @loncapa_block = ('parameter','locati Line 13  my @loncapa_block = ('parameter','locati
   
 my @loncapa_inline = ('display','m','lm','chem','num','parse','algebra','displayweight','displaystudentphoto'); # not textline  my @loncapa_inline = ('display','m','lm','chem','num','parse','algebra','displayweight','displaystudentphoto'); # not textline
   
 my @html_elements = ('html','meta','head','title','base','link','style','noscript','body','header','footer','aside','h1','h2','h3','h4','h5','h6','li','dd','dt','tbody','tr','caption','thead','tfoot','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','br','hr','address','blockquote','img','figure','figcaption','object','param','embed','applet','video','source','audio','map','area','canvas','form','label','input','select','optgroup','option','textarea','fieldset','legend','button','iframe','section','div','p','ul','ol','dl','table'); # without script  # HTML elements that trigger the addition of startouttext/endouttext
   my @html_trigger = ('header','footer','aside','h1','h2','h3','h4','h5','h6','li','dd','dt','tbody','tr','caption','thead','tfoot','td','th','span','a','em','strong','b','i','sup','sub','pre','code','kbd','samp','cite','q','tt','ins','del','var','small','big','br','hr','address','blockquote','img','figure','figcaption','object','param','embed','applet','video','source','audio','map','area','canvas','form','input','select','optgroup','option','textarea','fieldset','legend','button','iframe','section','div','p','ul','ol','dl','table');
   
 my @simple_data = ('polygon', 'rectangle', 'vector', 'value', 'answer', 'title', 'xlabel', 'ylabel', 'tic', 'parserlib', 'scriptlib', 'import', 'tex', 'text', 'image', 'display', 'm', 'lm', 'num', 'algebra', 'chem', 'parse', 'title', 'style', 'script', 'ins', 'del', 'label', 'option', 'textarea', 'legend' );  my @simple_data = ('polygon', 'rectangle', 'vector', 'value', 'answer', 'title', 'data', 'function', 'xlabel', 'ylabel', 'tic', 'parserlib', 'scriptlib', 'import', 'tex', 'text', 'image', 'display', 'm', 'lm', 'num', 'algebra', 'chem', 'parse', 'title', 'style', 'script', 'ins', 'del', 'label', 'option', 'textarea', 'legend' );
   
   my @inline_responses = ('stringresponse','optionresponse','numericalresponse','formularesponse','mathresponse','organicresponse','reactionresponse','customresponse','externalresponse');
   
   
 binmode(STDOUT, ':encoding(UTF-8)');  binmode(STDOUT, ':encoding(UTF-8)');
Line 114  sub escape { Line 117  sub escape {
   return $s;    return $s;
 }  }
   
 # Adds startouttext and endouttext where useful for the colorfull editor  # Adds startouttext and endouttext where useful for the colorful editor
 sub add_outtext {  sub add_outtext {
   my ($node) = @_;    my ($node) = @_;
       
Line 129  sub add_outtext { Line 132  sub add_outtext {
   if (string_in_array(\@simple_data, $node->nodeName)) {    if (string_in_array(\@simple_data, $node->nodeName)) {
     return;      return;
   }    }
     convert_paragraphs($node);
   my $next;    my $next;
   my $in_outtext = 0;    my $in_outtext = 0;
   for (my $child=$node->firstChild; defined $child; $child=$next) {    for (my $child=$node->firstChild; defined $child; $child=$next) {
     $next = $child->nextSibling;      $next = $child->nextSibling;
     if (!$in_outtext && inside_outtext($child)) {      if (!$in_outtext && inside_outtext($child)) {
       # Add startouttext        add_startouttext($node, $child);
       my $doc = $node->ownerDocument;  
       my $startouttext = $doc->createElement('startouttext');  
       $node->insertBefore($startouttext, $child);  
       $in_outtext = 1;        $in_outtext = 1;
     } elsif ($in_outtext && !continue_outtext($child)) {      } elsif ($in_outtext && !continue_outtext($child)) {
       # Add endouttext  
       add_endouttext($node, $child);        add_endouttext($node, $child);
       $in_outtext = 0;        $in_outtext = 0;
     }      }
Line 149  sub add_outtext { Line 149  sub add_outtext {
     }      }
   }    }
   if ($in_outtext) {    if ($in_outtext) {
     # Add endouttext  
     add_endouttext($node);      add_endouttext($node);
   }    }
 }  }
Line 160  sub inside_outtext { Line 159  sub inside_outtext {
   if ($node->nodeType == XML_TEXT_NODE && $node->nodeValue !~ /^\s*$/) {    if ($node->nodeType == XML_TEXT_NODE && $node->nodeValue !~ /^\s*$/) {
     return 1;      return 1;
   }    }
   if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@html_elements, $node->nodeName)) {    if ($node->nodeType == XML_ELEMENT_NODE && string_in_array(\@html_trigger, $node->nodeName)) {
     if (contains_loncapa_block($node)) {      if (contains_loncapa_block($node)) {
       return 0;        return 0;
     } else {      } else {
Line 201  sub contains_loncapa_block { Line 200  sub contains_loncapa_block {
   return 0;    return 0;
 }  }
   
   sub add_startouttext {
     my ($parent, $before_node) = @_;
     my $doc = $parent->ownerDocument;
     if ($before_node->nodeType == XML_TEXT_NODE) {
       # split space at the beginning of the node
       if ($before_node->nodeValue =~ /^(\s+)(.*?)$/s) {
         my $space_node = $doc->createTextNode($1);
         $before_node->setData($2);
         $parent->insertBefore($space_node, $before_node);
       }
     }
     my $startouttext = $doc->createElement('startouttext');
     $parent->insertBefore($startouttext, $before_node);
   }
   
 sub add_endouttext {  sub add_endouttext {
   my ($parent, $before_node) = @_;    my ($parent, $before_node) = @_;
   my $doc = $parent->ownerDocument;    my $doc = $parent->ownerDocument;
Line 212  sub add_endouttext { Line 226  sub add_endouttext {
     $before_before = $parent->lastChild;      $before_before = $parent->lastChild;
   }    }
   if (defined $before_before && $before_before->nodeType == XML_TEXT_NODE) {    if (defined $before_before && $before_before->nodeType == XML_TEXT_NODE) {
     # split space at the end of the node before endouttext      # split space at the end of the node
     if ($before_before->nodeValue =~ /^(.*?)(\s+)$/s) {      if ($before_before->nodeValue =~ /^(.*?)(\s+)$/s) {
       $before_before->setData($1);        $before_before->setData($1);
       my $space_node = $doc->createTextNode($2);        my $space_node = $doc->createTextNode($2);
Line 231  sub add_endouttext { Line 245  sub add_endouttext {
   }    }
 }  }
   
   # Convert paragraph children when one contains an inline response into content + <br>
   # (the colorful editor does not support paragraphs containing inline responses)
   sub convert_paragraphs {
     my ($parent) = @_;
     my $p_child_with_inline_response = 0;
     foreach my $child ($parent->childNodes) {
       if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') {
         foreach my $child2 ($child->childNodes) {
           if ($child2->nodeType == XML_ELEMENT_NODE) {
             if (string_in_array(\@inline_responses, $child2->nodeName)) {
               $p_child_with_inline_response = 1;
               last;
             }
           }
         }
       }
       if ($p_child_with_inline_response) {
         last;
       }
     }
     if ($p_child_with_inline_response) {
       my $doc = $parent->ownerDocument;
       my $next;
       for (my $child=$parent->firstChild; defined $child; $child=$next) {
         $next = $child->nextSibling;
         if ($child->nodeType == XML_ELEMENT_NODE && $child->nodeName eq 'p') {
           replace_by_children($child);
           if (defined $next && (defined $next->nextSibling || $next->nodeType != XML_TEXT_NODE ||
               $next->nodeValue !~ /^\s*$/)) {
             # we only add a br if there is something after
             my $br = $doc->createElement('br');
             $parent->insertBefore($br, $next);
           }
         }
       }
     }
   }
   
 ##  ##
 # Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array)  # Tests if a string is in an array (using eq) (to avoid Smartmatch warnings with $value ~~ @array)
 # @param {Array<string>} array - reference to the array of strings  # @param {Array<string>} array - reference to the array of strings
Line 246  sub string_in_array { Line 298  sub string_in_array {
   }    }
   return 0;    return 0;
 }  }
   
   ##
   # replaces a node by its children
   # @param {Node} node - the DOM node
   ##
   sub replace_by_children {
     my ($node) = @_;
     my $parent = $node->parentNode;
     my $next;
     my $previous;
     for (my $child=$node->firstChild; defined $child; $child=$next) {
       $next = $child->nextSibling;
       if ((!defined $previous || !defined $next) &&
           $child->nodeType == XML_TEXT_NODE && $child->nodeValue =~ /^\s*$/) {
         next; # do not keep first and last whitespace nodes
       } else {
         if (!defined $previous && $child->nodeType == XML_TEXT_NODE) {
           # remove whitespace at the beginning
           my $value = $child->nodeValue;
           $value =~ s/^\s+//;
           $child->setData($value);
         }
         if (!defined $next && $child->nodeType == XML_TEXT_NODE) {
           # and at the end
           my $value = $child->nodeValue;
           $value =~ s/\s+$//;
           $child->setData($value);
         }
       }
       $node->removeChild($child);
       $parent->insertBefore($child, $node);
       $previous = $child;
     }
     $parent->removeChild($node);
   }
   

Removed from v.1.2  
changed lines
  Added in v.1.3


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>