File:  [LON-CAPA] / loncom / homework / math_parser / Parser.pm
Revision 1.3: download - view: text, annotated - select for diffs
Mon Mar 13 22:31:22 2023 UTC (14 months, 3 weeks ago) by raeburn
Branches: MAIN
CVS tags: version_2_12_X, version_2_11_4_msu, HEAD
- Add $Id$ line in comments for display of version.

    1: # The LearningOnline Network with CAPA - LON-CAPA
    2: # Parser
    3: #
    4: # $Id: Parser.pm,v 1.3 2023/03/13 22:31:22 raeburn Exp $
    5: #
    6: # Copyright (C) 2014 Michigan State University Board of Trustees
    7: #
    8: # This program is free software: you can redistribute it and/or modify
    9: # it under the terms of the GNU General Public License as published by
   10: # the Free Software Foundation, either version 3 of the License, or
   11: # (at your option) any later version.
   12: #
   13: # This program is distributed in the hope that it will be useful,
   14: # but WITHOUT ANY WARRANTY; without even the implied warranty of
   15: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
   16: # GNU General Public License for more details.
   17: #
   18: # You should have received a copy of the GNU General Public License
   19: # along with this program. If not, see <http://www.gnu.org/licenses/>.
   20: #
   21: 
   22: ##
   23: # Equation parser
   24: ##
   25: package Apache::math_parser::Parser;
   26: 
   27: use strict;
   28: use warnings;
   29: use utf8;
   30: 
   31: use aliased 'Apache::math_parser::Definitions';
   32: use aliased 'Apache::math_parser::ENode';
   33: use aliased 'Apache::math_parser::Operator';
   34: use aliased 'Apache::math_parser::ParseException';
   35: use aliased 'Apache::math_parser::Token';
   36: use aliased 'Apache::math_parser::Tokenizer';
   37: 
   38: ##
   39: # Constructor
   40: # @optional {boolean} implicit_operators - assume hidden multiplication and unit operators in some cases (unlike maxima)
   41: # @optional {boolean} unit_mode - handle only numerical expressions with units (no variable)
   42: ##
   43: sub new {
   44:     my ($class, $implicit_operators, $unit_mode) = @_;
   45:     if (!defined $implicit_operators) {
   46:         $implicit_operators = 0;
   47:     }
   48:     if (!defined $unit_mode) {
   49:         $unit_mode = 0;
   50:     }
   51:     my $self = {
   52:         _implicit_operators => $implicit_operators,
   53:         _unit_mode => $unit_mode,
   54:         _defs => Definitions->new(),
   55:     };
   56:     $self->{_defs}->define();
   57:     bless $self, $class;
   58:     return $self;
   59: }
   60: 
   61: # Attribute helpers
   62: 
   63: ##
   64: # Implicit operators ?
   65: # @returns {boolean}
   66: ##
   67: sub implicit_operators {
   68:     my $self = shift;
   69:     return $self->{_implicit_operators};
   70: }
   71: 
   72: ##
   73: # Unit mode ?
   74: # @returns {boolean}
   75: ##
   76: sub unit_mode {
   77:     my $self = shift;
   78:     return $self->{_unit_mode};
   79: }
   80: 
   81: ##
   82: # Definitions
   83: # @returns {Definitions}
   84: ##
   85: sub defs {
   86:     my $self = shift;
   87:     return $self->{_defs};
   88: }
   89: 
   90: ##
   91: # Tokens
   92: # @returns {Token[]}
   93: ##
   94: sub tokens {
   95:     my $self = shift;
   96:     return $self->{_tokens};
   97: }
   98: 
   99: ##
  100: # Current token
  101: # @returns {Token}
  102: ##
  103: sub current_token {
  104:     my $self = shift;
  105:     return $self->{_current_token};
  106: }
  107: 
  108: ##
  109: # Current token number
  110: # @returns {int}
  111: ##
  112: sub token_nr {
  113:     my $self = shift;
  114:     return $self->{_token_nr};
  115: }
  116: 
  117: 
  118: ##
  119: # Returns the right node at the current token, based on top-down operator precedence.
  120: # @param {integer} rbp - Right binding power
  121: # @returns {ENode}
  122: ##
  123: sub expression {
  124:     my( $self, $rbp ) = @_;
  125:     my $left; # ENode
  126:     my $t = $self->current_token;
  127:     if (! defined $t) {
  128:         die ParseException->new("Expected something at the end.",
  129:             $self->tokens->[scalar(@{$self->tokens}) - 1]->to + 1);
  130:     }
  131:     $self->advance();
  132:     if (! defined $t->op) {
  133:         $left = ENode->new($t->type, undef, $t->value, undef);
  134:     } elsif (! defined $t->op->nud) {
  135:         die ParseException->new("Unexpected operator '[_1]'.", $t->from, $t->from, $t->op->id);
  136:     } else {
  137:         $left = $t->op->nud->($t->op, $self);
  138:     }
  139:     while (defined $self->current_token && defined $self->current_token->op &&
  140:             $rbp < $self->current_token->op->lbp) {
  141:         $t = $self->current_token;
  142:         $self->advance();
  143:         $left = $t->op->led->($t->op, $self, $left);
  144:     }
  145:     return $left;
  146: }
  147: 
  148: ##
  149: # Advance to the next token,
  150: # expecting the given operator id if it is provided.
  151: # Throws a ParseException if a given operator id is not found.
  152: # @optional {string} id - Operator id
  153: ##
  154: sub advance {
  155:     my ( $self, $id ) = @_;
  156:     if (defined $id && (!defined $self->current_token || !defined $self->current_token->op ||
  157:             $self->current_token->op->id ne $id)) {
  158:         if (!defined $self->current_token) {
  159:             die ParseException->new("Expected '[_1]' at the end.",
  160:                 $self->tokens->[scalar(@{$self->tokens}) - 1]->to + 1, undef, $id);
  161:         } else {
  162:             die ParseException->new("Expected '[_1]'.", $self->current_token->from, undef, $id);
  163:         }
  164:     }
  165:     if ($self->token_nr >= scalar(@{$self->tokens})) {
  166:         $self->{_current_token} = undef;
  167:         return;
  168:     }
  169:     $self->{_current_token} = $self->tokens->[$self->token_nr];
  170:     $self->{_token_nr} += 1;
  171: }
  172: 
  173: 
  174: ##
  175: # Adds hidden multiplication and unit operators to the token stream
  176: ##
  177: sub addHiddenOperators {
  178:     my ( $self ) = @_;
  179:     my $multiplication = $self->defs->findOperator("*");
  180:     my $unit_operator = $self->defs->findOperator("`");
  181:     my $in_units = 0; # we check if we are already in the units to avoid adding two ` operators inside
  182:     my $in_exp = 0;
  183:     for (my $i=0; $i<scalar(@{$self->tokens}) - 1; $i++) {
  184:         my $token = $self->tokens->[$i];
  185:         my $next_token = $self->tokens->[$i + 1];
  186:         if ($self->unit_mode) {
  187:             if ($token->value eq "`") {
  188:                 $in_units = 1;
  189:             } elsif ($in_units) {
  190:                 if ($token->value eq "^") {
  191:                     $in_exp = 1;
  192:                 } elsif ($in_exp && $token->type == Token->NUMBER) {
  193:                     $in_exp = 0;
  194:                 } elsif (!$in_exp && $token->type == Token->NUMBER) {
  195:                     $in_units = 0;
  196:                 } elsif (!$in_exp && $token->type == Token->OPERATOR && index("*/^()", $token->value) == -1) {
  197:                     $in_units = 0;
  198:                 } elsif ($token->type == Token->NAME && $next_token->value eq "(") {
  199:                     $in_units = 0;
  200:                 }
  201:             }
  202:         }
  203:         my $token_type = $token->type;
  204:         my $next_token_type = $next_token->type;
  205:         my $token_value = $token->value;
  206:         my $next_token_value = $next_token->value;
  207:         if (
  208:                 ($token_type == Token->NAME && $next_token_type == Token->NAME) ||
  209:                 ($token_type == Token->NUMBER && $next_token_type == Token->NAME) ||
  210:                 ($token_type == Token->NUMBER && $next_token_type == Token->NUMBER) ||
  211:                 ($token_type == Token->NUMBER && string_in_array(["(","[","{"], $next_token_value)) ||
  212:                 # ($token_type == Token->NAME && $next_token_value eq "(") ||
  213:                 # name ( could be a function call
  214:                 (string_in_array([")","]","}"], $token_value) && $next_token_type == Token->NAME) ||
  215:                 (string_in_array([")","]","}"], $token_value) && $next_token_type == Token->NUMBER) ||
  216:                 (string_in_array([")","]","}"], $token_value) && $next_token_value eq "(")
  217:            ) {
  218:             # support for things like "(1/2) (m/s)" is complex...
  219:             my $units = ($self->unit_mode && !$in_units &&
  220:                 ($token_type == Token->NUMBER || string_in_array([")","]","}"], $token_value)) &&
  221:                 ($next_token_type == Token->NAME ||
  222:                     (string_in_array(["(","[","{"], $next_token_value) && scalar(@{$self->tokens}) > $i + 2 &&
  223:                     $self->tokens->[$i + 2]->type == Token->NAME)));
  224:             if ($units) {
  225:                 my( $test_token, $index_test);
  226:                 if ($next_token_type == Token->NAME) {
  227:                     $test_token = $next_token;
  228:                     $index_test = $i + 1;
  229:                 } else {
  230:                     # for instance for "2 (m/s)"
  231:                     $index_test = $i + 2;
  232:                     $test_token = $self->tokens->[$index_test];
  233:                 }
  234:                 if (scalar(@{$self->tokens}) > $index_test + 1 && $self->tokens->[$index_test + 1]->value eq "(") {
  235:                     my @known_functions = ("pow", "sqrt", "abs", "exp", "factorial", "diff",
  236:                         "integrate", "sum", "product", "limit", "binomial", "matrix",
  237:                         "ln", "log", "log10", "mod", "sgn", "ceil", "floor",
  238:                         "sin", "cos", "tan", "asin", "acos", "atan", "atan2",
  239:                         "sinh", "cosh", "tanh", "asinh", "acosh", "atanh");
  240:                     for (my $j=0; $j<scalar(@known_functions); $j++) {
  241:                         if ($test_token->value eq $known_functions[$j]) {
  242:                             $units = 0;
  243:                             last;
  244:                         }
  245:                     }
  246:                 }
  247:             }
  248:             my $new_token;
  249:             if ($units) {
  250:                 $new_token = Token->new(Token->OPERATOR, $next_token->from,
  251:                     $next_token->from, $unit_operator->id, $unit_operator);
  252:             } else {
  253:                 $new_token = Token->new(Token->OPERATOR, $next_token->from,
  254:                     $next_token->from, $multiplication->id, $multiplication);
  255:             }
  256:             splice(@{$self->{_tokens}}, $i+1, 0, $new_token);
  257:         }
  258:     }
  259: }
  260: 
  261: ##
  262: # Parse the string, returning an ENode tree.
  263: # @param {string} text - The text to parse.
  264: # @returns {ENode}
  265: ##
  266: sub parse {
  267:     my ( $self, $text ) = @_;
  268:     
  269:     my $tokenizer = Tokenizer->new($self->defs, $text);
  270:     @{$self->{_tokens}} = $tokenizer->tokenize();
  271:     if (scalar(@{$self->tokens}) == 0) {
  272:         die ParseException->new("No information found.");
  273:     }
  274:     if ($self->implicit_operators) {
  275:         $self->addHiddenOperators();
  276:     }
  277:     $self->{_token_nr} = 0;
  278:     $self->{_current_token} = $self->tokens->[$self->token_nr];
  279:     $self->advance();
  280:     my $root = $self->expression(0);
  281:     if (defined $self->current_token) {
  282:         die ParseException->new("Expected the end.", $self->current_token->from);
  283:     }
  284:     return $root;
  285: }
  286: 
  287: ##
  288: # Tests if a string is in an array (using eq) (to avoid using $value ~~ @array)
  289: # @param {Array<string>} array - reference to the array of strings
  290: # @param {string} value - the string to look for
  291: # @returns 1 if found, 0 otherwise
  292: ##
  293: sub string_in_array {
  294:   my ($array, $value) = @_;
  295:   foreach my $v (@{$array}) {
  296:     if ($v eq $value) {
  297:       return 1;
  298:     }
  299:   }
  300:   return 0;
  301: }
  302: 
  303: 1;
  304: __END__

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>