I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

View File

@ -0,0 +1,2 @@
from .latex_parser import parse_latex_lark, LarkLaTeXParser # noqa
from .transformer import TransformToSymPyExpr # noqa

View File

@ -0,0 +1,28 @@
// Greek symbols
// TODO: Shouold we include the uppercase variants for the symbols where the uppercase variant doesn't have a separate meaning?
ALPHA: "\\alpha"
BETA: "\\beta"
GAMMA: "\\gamma"
DELTA: "\\delta" // TODO: Should this be included? Delta usually denotes other things.
EPSILON: "\\epsilon" | "\\varepsilon"
ZETA: "\\zeta"
ETA: "\\eta"
THETA: "\\theta" | "\\vartheta"
// TODO: Should I add iota to the list?
KAPPA: "\\kappa"
LAMBDA: "\\lambda" // TODO: What about the uppercase variant?
MU: "\\mu"
NU: "\\nu"
XI: "\\xi"
// TODO: Should there be a separate note for transforming \pi into sympy.pi?
RHO: "\\rho" | "\\varrho"
// TODO: What should we do about sigma?
TAU: "\\tau"
UPSILON: "\\upsilon"
PHI: "\\phi" | "\\varphi"
CHI: "\\chi"
PSI: "\\psi"
OMEGA: "\\omega"
GREEK_SYMBOL: ALPHA | BETA | GAMMA | DELTA | EPSILON | ZETA | ETA | THETA | KAPPA
| LAMBDA | MU | NU | XI | RHO | TAU | UPSILON | PHI | CHI | PSI | OMEGA

View File

@ -0,0 +1,327 @@
%ignore /[ \t\n\r]+/
%ignore "\\," | "\\thinspace" | "\\:" | "\\medspace" | "\\;" | "\\thickspace"
%ignore "\\quad" | "\\qquad"
%ignore "\\!" | "\\negthinspace" | "\\negmedspace" | "\\negthickspace"
%ignore "\\vrule" | "\\vcenter" | "\\vbox" | "\\vskip" | "\\vspace" | "\\hfill"
%ignore "\\*" | "\\-" | "\\." | "\\/" | "\\\\" | "\\(" | "\\="
%ignore "\\left" | "\\right"
%ignore "\\limits" | "\\nolimits"
%ignore "\\displaystyle"
///////////////////// tokens ///////////////////////
// basic binary operators
ADD: "+"
SUB: "-"
MUL: "*"
DIV: "/"
// tokens with distinct left and right symbols
L_BRACE: "{"
R_BRACE: "}"
L_BRACE_LITERAL: "\\{"
R_BRACE_LITERAL: "\\}"
L_BRACKET: "["
R_BRACKET: "]"
L_CEIL: "\\lceil"
R_CEIL: "\\rceil"
L_FLOOR: "\\lfloor"
R_FLOOR: "\\rfloor"
L_PAREN: "("
R_PAREN: ")"
// limit, integral, sum, and product symbols
FUNC_LIM: "\\lim"
LIM_APPROACH_SYM: "\\to" | "\\rightarrow" | "\\Rightarrow" | "\\longrightarrow" | "\\Longrightarrow"
FUNC_INT: "\\int" | "\\intop"
FUNC_SUM: "\\sum"
FUNC_PROD: "\\prod"
// common functions
FUNC_EXP: "\\exp"
FUNC_LOG: "\\log"
FUNC_LN: "\\ln"
FUNC_LG: "\\lg"
FUNC_MIN: "\\min"
FUNC_MAX: "\\max"
// trigonometric functions
FUNC_SIN: "\\sin"
FUNC_COS: "\\cos"
FUNC_TAN: "\\tan"
FUNC_CSC: "\\csc"
FUNC_SEC: "\\sec"
FUNC_COT: "\\cot"
// inverse trigonometric functions
FUNC_ARCSIN: "\\arcsin"
FUNC_ARCCOS: "\\arccos"
FUNC_ARCTAN: "\\arctan"
FUNC_ARCCSC: "\\arccsc"
FUNC_ARCSEC: "\\arcsec"
FUNC_ARCCOT: "\\arccot"
// hyperbolic trigonometric functions
FUNC_SINH: "\\sinh"
FUNC_COSH: "\\cosh"
FUNC_TANH: "\\tanh"
FUNC_ARSINH: "\\arsinh"
FUNC_ARCOSH: "\\arcosh"
FUNC_ARTANH: "\\artanh"
FUNC_SQRT: "\\sqrt"
// miscellaneous symbols
CMD_TIMES: "\\times"
CMD_CDOT: "\\cdot"
CMD_DIV: "\\div"
CMD_FRAC: "\\frac" | "\\dfrac" | "\\tfrac" | "\\nicefrac"
CMD_BINOM: "\\binom" | "\\dbinom" | "\\tbinom"
CMD_OVERLINE: "\\overline"
CMD_LANGLE: "\\langle"
CMD_RANGLE: "\\rangle"
CMD_MATHIT: "\\mathit"
CMD_INFTY: "\\infty"
BANG: "!"
BAR: "|"
CARET: "^"
COLON: ":"
UNDERSCORE: "_"
// relational symbols
EQUAL: "="
NOT_EQUAL: "\\neq" | "\\ne"
LT: "<"
LTE: "\\leq" | "\\le" | "\\leqslant"
GT: ">"
GTE: "\\geq" | "\\ge" | "\\geqslant"
DIV_SYMBOL: CMD_DIV | DIV
MUL_SYMBOL: MUL | CMD_TIMES | CMD_CDOT
%import .greek_symbols.GREEK_SYMBOL
UPRIGHT_DIFFERENTIAL_SYMBOL: "\\text{d}" | "\\mathrm{d}"
DIFFERENTIAL_SYMBOL: "d" | UPRIGHT_DIFFERENTIAL_SYMBOL
// disallow "d" as a variable name because we want to parse "d" as a differential symbol.
SYMBOL: /[a-zA-Z]/
BASIC_SUBSCRIPTED_SYMBOL: /([a-zA-Z])_(([A-Za-z0-9]|[a-zA-Z]+)|\{([A-Za-z0-9]|[a-zA-Z]+)\})/
SYMBOL_WITH_GREEK_SUBSCRIPT: /([a-zA-Z])_/ GREEK_SYMBOL | /([a-zA-Z])_/ L_BRACE GREEK_SYMBOL R_BRACE
// best to define the variant with braces like that instead of shoving it all into one case like in
// /([a-zA-Z])_/ L_BRACE? GREEK_SYMBOL R_BRACE? because then we can easily error out on input like
// r"h_{\theta"
GREEK_SUBSCRIPTED_SYMBOL: GREEK_SYMBOL /_(([A-Za-z0-9]|[a-zA-Z]+)|\{([A-Za-z0-9]|[a-zA-Z]+)\})/
%import common.DIGIT -> DIGIT
//////////////////// grammar //////////////////////
latex_string: _relation | _expression
_one_letter_symbol: SYMBOL
| BASIC_SUBSCRIPTED_SYMBOL
| SYMBOL_WITH_GREEK_SUBSCRIPT
| GREEK_SUBSCRIPTED_SYMBOL
| GREEK_SYMBOL
multi_letter_symbol: CMD_MATHIT L_BRACE /[a-zA-Z]+(\s+[a-zA-Z]+)*/ R_BRACE
number: /\d+(\.\d*)?/
_atomic_expr: _one_letter_symbol
| multi_letter_symbol
| number
| CMD_INFTY
group_round_parentheses: L_PAREN _expression R_PAREN
group_square_brackets: L_BRACKET _expression R_BRACKET
group_curly_parentheses: L_BRACE _expression R_BRACE
_relation: eq | ne | lt | lte | gt | gte
eq: _expression EQUAL _expression
ne: _expression NOT_EQUAL _expression
lt: _expression LT _expression
lte: _expression LTE _expression
gt: _expression GT _expression
gte: _expression GTE _expression
_expression_core: _atomic_expr | group_curly_parentheses
add: _expression ADD _expression_mul
sub: _expression SUB _expression_mul
| SUB _expression_mul
mul: _expression_mul MUL_SYMBOL _expression_power
div: _expression_mul DIV_SYMBOL _expression_power
adjacent_expressions: (_one_letter_symbol | number) _expression_mul
| group_round_parentheses (group_round_parentheses | _one_letter_symbol)
| _function _function
| fraction _expression
_expression_func: _expression_core
| group_round_parentheses
| fraction
| binomial
| _function
_expression_power: _expression_func | superscript
_expression_mul: _expression_power
| mul | div | adjacent_expressions
| _integral// | derivative
| summation | product
| limit
_expression: _expression_mul | add | sub
_limit_dir: "+" | "-" | L_BRACE ("+" | "-") R_BRACE
limit_dir_expr: _expression CARET _limit_dir
group_curly_parentheses_lim: L_BRACE _expression LIM_APPROACH_SYM (limit_dir_expr | _expression) R_BRACE
limit: FUNC_LIM UNDERSCORE group_curly_parentheses_lim _expression
differential: DIFFERENTIAL_SYMBOL _one_letter_symbol
//_derivative_operator: CMD_FRAC L_BRACE DIFFERENTIAL_SYMBOL R_BRACE L_BRACE differential R_BRACE
//derivative: _derivative_operator _expression
_integral: normal_integral | integral_with_special_fraction
normal_integral: FUNC_INT _expression DIFFERENTIAL_SYMBOL _one_letter_symbol
| FUNC_INT (CARET _expression_core UNDERSCORE _expression_core)? _expression? DIFFERENTIAL_SYMBOL _one_letter_symbol
| FUNC_INT (UNDERSCORE _expression_core CARET _expression_core)? _expression? DIFFERENTIAL_SYMBOL _one_letter_symbol
group_curly_parentheses_int: L_BRACE _expression? differential R_BRACE
special_fraction: CMD_FRAC group_curly_parentheses_int group_curly_parentheses
integral_with_special_fraction: FUNC_INT special_fraction
| FUNC_INT (CARET _expression_core UNDERSCORE _expression_core)? special_fraction
| FUNC_INT (UNDERSCORE _expression_core CARET _expression_core)? special_fraction
group_curly_parentheses_special: UNDERSCORE L_BRACE _atomic_expr EQUAL _atomic_expr R_BRACE CARET _expression_core
| CARET _expression_core UNDERSCORE L_BRACE _atomic_expr EQUAL _atomic_expr R_BRACE
summation: FUNC_SUM group_curly_parentheses_special _expression
| FUNC_SUM group_curly_parentheses_special _expression
product: FUNC_PROD group_curly_parentheses_special _expression
| FUNC_PROD group_curly_parentheses_special _expression
superscript: _expression_func CARET _expression_power
fraction: _basic_fraction
| _simple_fraction
| _general_fraction
_basic_fraction: CMD_FRAC DIGIT (DIGIT | SYMBOL | GREEK_SYMBOL)
_simple_fraction: CMD_FRAC DIGIT group_curly_parentheses
| CMD_FRAC group_curly_parentheses (DIGIT | SYMBOL | GREEK_SYMBOL)
_general_fraction: CMD_FRAC group_curly_parentheses group_curly_parentheses
binomial: _basic_binomial
| _simple_binomial
| _general_binomial
_basic_binomial: CMD_BINOM DIGIT (DIGIT | SYMBOL | GREEK_SYMBOL)
_simple_binomial: CMD_BINOM DIGIT group_curly_parentheses
| CMD_BINOM group_curly_parentheses (DIGIT | SYMBOL | GREEK_SYMBOL)
_general_binomial: CMD_BINOM group_curly_parentheses group_curly_parentheses
list_of_expressions: _expression ("," _expression)*
function_applied: _one_letter_symbol L_PAREN list_of_expressions R_PAREN
min: FUNC_MIN L_PAREN list_of_expressions R_PAREN
max: FUNC_MAX L_PAREN list_of_expressions R_PAREN
bra: CMD_LANGLE _expression BAR
ket: BAR _expression CMD_RANGLE
inner_product: CMD_LANGLE _expression BAR _expression CMD_RANGLE
_function: function_applied
| abs | floor | ceil
| _trigonometric_function | _inverse_trigonometric_function
| _trigonometric_function_power
| _hyperbolic_trigonometric_function | _inverse_hyperbolic_trigonometric_function
| exponential
| log
| square_root
| factorial
| conjugate
| max | min
| bra | ket | inner_product
exponential: FUNC_EXP _expression
log: FUNC_LOG _expression
| FUNC_LN _expression
| FUNC_LG _expression
| FUNC_LOG UNDERSCORE (DIGIT | _one_letter_symbol) _expression
| FUNC_LOG UNDERSCORE group_curly_parentheses _expression
square_root: FUNC_SQRT group_curly_parentheses
| FUNC_SQRT group_square_brackets group_curly_parentheses
factorial: _expression BANG
conjugate: CMD_OVERLINE group_curly_parentheses
| CMD_OVERLINE DIGIT
_trigonometric_function: sin | cos | tan | csc | sec | cot
sin: FUNC_SIN _expression
cos: FUNC_COS _expression
tan: FUNC_TAN _expression
csc: FUNC_CSC _expression
sec: FUNC_SEC _expression
cot: FUNC_COT _expression
_trigonometric_function_power: sin_power | cos_power | tan_power | csc_power | sec_power | cot_power
sin_power: FUNC_SIN CARET _expression_core _expression
cos_power: FUNC_COS CARET _expression_core _expression
tan_power: FUNC_TAN CARET _expression_core _expression
csc_power: FUNC_CSC CARET _expression_core _expression
sec_power: FUNC_SEC CARET _expression_core _expression
cot_power: FUNC_COT CARET _expression_core _expression
_hyperbolic_trigonometric_function: sinh | cosh | tanh
sinh: FUNC_SINH _expression
cosh: FUNC_COSH _expression
tanh: FUNC_TANH _expression
_inverse_trigonometric_function: arcsin | arccos | arctan | arccsc | arcsec | arccot
arcsin: FUNC_ARCSIN _expression
arccos: FUNC_ARCCOS _expression
arctan: FUNC_ARCTAN _expression
arccsc: FUNC_ARCCSC _expression
arcsec: FUNC_ARCSEC _expression
arccot: FUNC_ARCCOT _expression
_inverse_hyperbolic_trigonometric_function: asinh | acosh | atanh
asinh: FUNC_ARSINH _expression
acosh: FUNC_ARCOSH _expression
atanh: FUNC_ARTANH _expression
abs: BAR _expression BAR
floor: L_FLOOR _expression R_FLOOR
ceil: L_CEIL _expression R_CEIL

View File

@ -0,0 +1,146 @@
import os
import logging
import re
from sympy.external import import_module
from sympy.parsing.latex.lark.transformer import TransformToSymPyExpr
_lark = import_module("lark")
class LarkLaTeXParser:
r"""Class for converting input `\mathrm{\LaTeX}` strings into SymPy Expressions.
It holds all the necessary internal data for doing so, and exposes hooks for
customizing its behavior.
Parameters
==========
print_debug_output : bool, optional
If set to ``True``, prints debug output to the logger. Defaults to ``False``.
transform : bool, optional
If set to ``True``, the class runs the Transformer class on the parse tree
generated by running ``Lark.parse`` on the input string. Defaults to ``True``.
Setting it to ``False`` can help with debugging the `\mathrm{\LaTeX}` grammar.
grammar_file : str, optional
The path to the grammar file that the parser should use. If set to ``None``,
it uses the default grammar, which is in ``grammar/latex.lark``, relative to
the ``sympy/parsing/latex/lark/`` directory.
transformer : str, optional
The name of the Transformer class to use. If set to ``None``, it uses the
default transformer class, which is :py:func:`TransformToSymPyExpr`.
"""
def __init__(self, print_debug_output=False, transform=True, grammar_file=None, transformer=None):
grammar_dir_path = os.path.join(os.path.dirname(__file__), "grammar/")
if grammar_file is None:
with open(os.path.join(grammar_dir_path, "latex.lark"), encoding="utf-8") as f:
latex_grammar = f.read()
else:
with open(grammar_file, encoding="utf-8") as f:
latex_grammar = f.read()
self.parser = _lark.Lark(
latex_grammar,
source_path=grammar_dir_path,
parser="earley",
start="latex_string",
lexer="auto",
ambiguity="explicit",
propagate_positions=False,
maybe_placeholders=False,
keep_all_tokens=True)
self.print_debug_output = print_debug_output
self.transform_expr = transform
if transformer is None:
self.transformer = TransformToSymPyExpr()
else:
self.transformer = transformer()
def doparse(self, s: str):
if self.print_debug_output:
_lark.logger.setLevel(logging.DEBUG)
parse_tree = self.parser.parse(s)
if not self.transform_expr:
# exit early and return the parse tree
_lark.logger.debug("expression = %s", s)
_lark.logger.debug(parse_tree)
_lark.logger.debug(parse_tree.pretty())
return parse_tree
if self.print_debug_output:
# print this stuff before attempting to run the transformer
_lark.logger.debug("expression = %s", s)
# print the `parse_tree` variable
_lark.logger.debug(parse_tree.pretty())
sympy_expression = self.transformer.transform(parse_tree)
if self.print_debug_output:
_lark.logger.debug("SymPy expression = %s", sympy_expression)
return sympy_expression
if _lark is not None:
_lark_latex_parser = LarkLaTeXParser()
def parse_latex_lark(s: str):
"""
Experimental LaTeX parser using Lark.
This function is still under development and its API may change with the
next releases of SymPy.
"""
if _lark is None:
raise ImportError("Lark is probably not installed")
return _lark_latex_parser.doparse(s)
def _pretty_print_lark_trees(tree, indent=0, show_expr=True):
if isinstance(tree, _lark.Token):
return tree.value
data = str(tree.data)
is_expr = data.startswith("expression")
if is_expr:
data = re.sub(r"^expression", "E", data)
is_ambig = (data == "_ambig")
if is_ambig:
new_indent = indent + 2
else:
new_indent = indent
output = ""
show_node = not is_expr or show_expr
if show_node:
output += str(data) + "("
if is_ambig:
output += "\n" + "\n".join([" " * new_indent + _pretty_print_lark_trees(i, new_indent, show_expr) for i in tree.children])
else:
output += ",".join([_pretty_print_lark_trees(i, new_indent, show_expr) for i in tree.children])
if show_node:
output += ")"
return output

View File

@ -0,0 +1,557 @@
import re
import sympy
from sympy.external import import_module
from sympy.parsing.latex.errors import LaTeXParsingError
lark = import_module("lark")
if lark:
from lark import Transformer, Token # type: ignore
else:
class Transformer: # type: ignore
def transform(self, *args):
pass
class Token: # type: ignore
pass
# noinspection PyPep8Naming,PyMethodMayBeStatic
class TransformToSymPyExpr(Transformer):
"""Returns a SymPy expression that is generated by traversing the ``lark.Tree``
passed to the ``.transform()`` function.
Notes
=====
**This class is never supposed to be used directly.**
In order to tweak the behavior of this class, it has to be subclassed and then after
the required modifications are made, the name of the new class should be passed to
the :py:class:`LarkLaTeXParser` class by using the ``transformer`` argument in the
constructor.
Parameters
==========
visit_tokens : bool, optional
For information about what this option does, see `here
<https://lark-parser.readthedocs.io/en/latest/visitors.html#lark.visitors.Transformer>`_.
Note that the option must be set to ``True`` for the default parser to work.
"""
SYMBOL = sympy.Symbol
DIGIT = sympy.core.numbers.Integer
def CMD_INFTY(self, tokens):
return sympy.oo
def GREEK_SYMBOL(self, tokens):
# we omit the first character because it is a backslash. Also, if the variable name has "var" in it,
# like "varphi" or "varepsilon", we remove that too
variable_name = re.sub("var", "", tokens[1:])
return sympy.Symbol(variable_name)
def BASIC_SUBSCRIPTED_SYMBOL(self, tokens):
symbol, sub = tokens.value.split("_")
if sub.startswith("{"):
return sympy.Symbol("%s_{%s}" % (symbol, sub[1:-1]))
else:
return sympy.Symbol("%s_{%s}" % (symbol, sub))
def GREEK_SUBSCRIPTED_SYMBOL(self, tokens):
greek_letter, sub = tokens.value.split("_")
greek_letter = re.sub("var", "", greek_letter[1:])
if sub.startswith("{"):
return sympy.Symbol("%s_{%s}" % (greek_letter, sub[1:-1]))
else:
return sympy.Symbol("%s_{%s}" % (greek_letter, sub))
def SYMBOL_WITH_GREEK_SUBSCRIPT(self, tokens):
symbol, sub = tokens.value.split("_")
if sub.startswith("{"):
greek_letter = sub[2:-1]
greek_letter = re.sub("var", "", greek_letter)
return sympy.Symbol("%s_{%s}" % (symbol, greek_letter))
else:
greek_letter = sub[1:]
greek_letter = re.sub("var", "", greek_letter)
return sympy.Symbol("%s_{%s}" % (symbol, greek_letter))
def multi_letter_symbol(self, tokens):
return sympy.Symbol(tokens[2])
def number(self, tokens):
if "." in tokens[0]:
return sympy.core.numbers.Float(tokens[0])
else:
return sympy.core.numbers.Integer(tokens[0])
def latex_string(self, tokens):
return tokens[0]
def group_round_parentheses(self, tokens):
return tokens[1]
def group_square_brackets(self, tokens):
return tokens[1]
def group_curly_parentheses(self, tokens):
return tokens[1]
def eq(self, tokens):
return sympy.Eq(tokens[0], tokens[2])
def ne(self, tokens):
return sympy.Ne(tokens[0], tokens[2])
def lt(self, tokens):
return sympy.Lt(tokens[0], tokens[2])
def lte(self, tokens):
return sympy.Le(tokens[0], tokens[2])
def gt(self, tokens):
return sympy.Gt(tokens[0], tokens[2])
def gte(self, tokens):
return sympy.Ge(tokens[0], tokens[2])
def add(self, tokens):
return sympy.Add(tokens[0], tokens[2])
def sub(self, tokens):
if len(tokens) == 2:
return -tokens[1]
elif len(tokens) == 3:
return sympy.Add(tokens[0], -tokens[2])
def mul(self, tokens):
return sympy.Mul(tokens[0], tokens[2])
def div(self, tokens):
return sympy.Mul(tokens[0], sympy.Pow(tokens[2], -1))
def adjacent_expressions(self, tokens):
# Most of the time, if two expressions are next to each other, it means implicit multiplication,
# but not always
from sympy.physics.quantum import Bra, Ket
if isinstance(tokens[0], Ket) and isinstance(tokens[1], Bra):
from sympy.physics.quantum import OuterProduct
return OuterProduct(tokens[0], tokens[1])
elif tokens[0] == sympy.Symbol("d"):
# If the leftmost token is a "d", then it is highly likely that this is a differential
return tokens[0], tokens[1]
elif isinstance(tokens[0], tuple):
# then we have a derivative
return sympy.Derivative(tokens[1], tokens[0][1])
else:
return sympy.Mul(tokens[0], tokens[1])
def superscript(self, tokens):
return sympy.Pow(tokens[0], tokens[2])
def fraction(self, tokens):
numerator = tokens[1]
if isinstance(tokens[2], tuple):
# we only need the variable w.r.t. which we are differentiating
_, variable = tokens[2]
# we will pass this information upwards
return "derivative", variable
else:
denominator = tokens[2]
return sympy.Mul(numerator, sympy.Pow(denominator, -1))
def binomial(self, tokens):
return sympy.binomial(tokens[1], tokens[2])
def normal_integral(self, tokens):
underscore_index = None
caret_index = None
if "_" in tokens:
# we need to know the index because the next item in the list is the
# arguments for the lower bound of the integral
underscore_index = tokens.index("_")
if "^" in tokens:
# we need to know the index because the next item in the list is the
# arguments for the upper bound of the integral
caret_index = tokens.index("^")
lower_bound = tokens[underscore_index + 1] if underscore_index else None
upper_bound = tokens[caret_index + 1] if caret_index else None
differential_symbol = self._extract_differential_symbol(tokens)
if differential_symbol is None:
raise LaTeXParsingError("Differential symbol was not found in the expression."
"Valid differential symbols are \"d\", \"\\text{d}, and \"\\mathrm{d}\".")
# else we can assume that a differential symbol was found
differential_variable_index = tokens.index(differential_symbol) + 1
differential_variable = tokens[differential_variable_index]
# we can't simply do something like `if (lower_bound and not upper_bound) ...` because this would
# evaluate to `True` if the `lower_bound` is 0 and upper bound is non-zero
if lower_bound is not None and upper_bound is None:
# then one was given and the other wasn't
raise LaTeXParsingError("Lower bound for the integral was found, but upper bound was not found.")
if upper_bound is not None and lower_bound is None:
# then one was given and the other wasn't
raise LaTeXParsingError("Upper bound for the integral was found, but lower bound was not found.")
# check if any expression was given or not. If it wasn't, then set the integrand to 1.
if underscore_index is not None and underscore_index == differential_variable_index - 3:
# The Token at differential_variable_index - 2 should be the integrand. However, if going one more step
# backwards after that gives us the underscore, then that means that there _was_ no integrand.
# Example: \int^7_0 dx
integrand = 1
elif caret_index is not None and caret_index == differential_variable_index - 3:
# The Token at differential_variable_index - 2 should be the integrand. However, if going one more step
# backwards after that gives us the caret, then that means that there _was_ no integrand.
# Example: \int_0^7 dx
integrand = 1
elif differential_variable_index == 2:
# this means we have something like "\int dx", because the "\int" symbol will always be
# at index 0 in `tokens`
integrand = 1
else:
# The Token at differential_variable_index - 1 is the differential symbol itself, so we need to go one
# more step before that.
integrand = tokens[differential_variable_index - 2]
if lower_bound is not None:
# then we have a definite integral
# we can assume that either both the lower and upper bounds are given, or
# neither of them are
return sympy.Integral(integrand, (differential_variable, lower_bound, upper_bound))
else:
# we have an indefinite integral
return sympy.Integral(integrand, differential_variable)
def group_curly_parentheses_int(self, tokens):
# return signature is a tuple consisting of the expression in the numerator, along with the variable of
# integration
if len(tokens) == 3:
return 1, tokens[1]
elif len(tokens) == 4:
return tokens[1], tokens[2]
# there are no other possibilities
def special_fraction(self, tokens):
numerator, variable = tokens[1]
denominator = tokens[2]
# We pass the integrand, along with information about the variable of integration, upw
return sympy.Mul(numerator, sympy.Pow(denominator, -1)), variable
def integral_with_special_fraction(self, tokens):
underscore_index = None
caret_index = None
if "_" in tokens:
# we need to know the index because the next item in the list is the
# arguments for the lower bound of the integral
underscore_index = tokens.index("_")
if "^" in tokens:
# we need to know the index because the next item in the list is the
# arguments for the upper bound of the integral
caret_index = tokens.index("^")
lower_bound = tokens[underscore_index + 1] if underscore_index else None
upper_bound = tokens[caret_index + 1] if caret_index else None
# we can't simply do something like `if (lower_bound and not upper_bound) ...` because this would
# evaluate to `True` if the `lower_bound` is 0 and upper bound is non-zero
if lower_bound is not None and upper_bound is None:
# then one was given and the other wasn't
raise LaTeXParsingError("Lower bound for the integral was found, but upper bound was not found.")
if upper_bound is not None and lower_bound is None:
# then one was given and the other wasn't
raise LaTeXParsingError("Upper bound for the integral was found, but lower bound was not found.")
integrand, differential_variable = tokens[-1]
if lower_bound is not None:
# then we have a definite integral
# we can assume that either both the lower and upper bounds are given, or
# neither of them are
return sympy.Integral(integrand, (differential_variable, lower_bound, upper_bound))
else:
# we have an indefinite integral
return sympy.Integral(integrand, differential_variable)
def group_curly_parentheses_special(self, tokens):
underscore_index = tokens.index("_")
caret_index = tokens.index("^")
# given the type of expressions we are parsing, we can assume that the lower limit
# will always use braces around its arguments. This is because we don't support
# converting unconstrained sums into SymPy expressions.
# first we isolate the bottom limit
left_brace_index = tokens.index("{", underscore_index)
right_brace_index = tokens.index("}", underscore_index)
bottom_limit = tokens[left_brace_index + 1: right_brace_index]
# next, we isolate the upper limit
top_limit = tokens[caret_index + 1:]
# the code below will be useful for supporting things like `\sum_{n = 0}^{n = 5} n^2`
# if "{" in top_limit:
# left_brace_index = tokens.index("{", caret_index)
# if left_brace_index != -1:
# # then there's a left brace in the string, and we need to find the closing right brace
# right_brace_index = tokens.index("}", caret_index)
# top_limit = tokens[left_brace_index + 1: right_brace_index]
# print(f"top limit = {top_limit}")
index_variable = bottom_limit[0]
lower_limit = bottom_limit[-1]
upper_limit = top_limit[0] # for now, the index will always be 0
# print(f"return value = ({index_variable}, {lower_limit}, {upper_limit})")
return index_variable, lower_limit, upper_limit
def summation(self, tokens):
return sympy.Sum(tokens[2], tokens[1])
def product(self, tokens):
return sympy.Product(tokens[2], tokens[1])
def limit_dir_expr(self, tokens):
caret_index = tokens.index("^")
if "{" in tokens:
left_curly_brace_index = tokens.index("{", caret_index)
direction = tokens[left_curly_brace_index + 1]
else:
direction = tokens[caret_index + 1]
if direction == "+":
return tokens[0], "+"
elif direction == "-":
return tokens[0], "-"
else:
return tokens[0], "+-"
def group_curly_parentheses_lim(self, tokens):
limit_variable = tokens[1]
if isinstance(tokens[3], tuple):
destination, direction = tokens[3]
else:
destination = tokens[3]
direction = "+-"
return limit_variable, destination, direction
def limit(self, tokens):
limit_variable, destination, direction = tokens[2]
return sympy.Limit(tokens[-1], limit_variable, destination, direction)
def differential(self, tokens):
return tokens[1]
def derivative(self, tokens):
return sympy.Derivative(tokens[-1], tokens[5])
def list_of_expressions(self, tokens):
if len(tokens) == 1:
# we return it verbatim because the function_applied node expects
# a list
return tokens
else:
def remove_tokens(args):
if isinstance(args, Token):
if args.type != "COMMA":
# An unexpected token was encountered
raise LaTeXParsingError("A comma token was expected, but some other token was encountered.")
return False
return True
return filter(remove_tokens, tokens)
def function_applied(self, tokens):
return sympy.Function(tokens[0])(*tokens[2])
def min(self, tokens):
return sympy.Min(*tokens[2])
def max(self, tokens):
return sympy.Max(*tokens[2])
def bra(self, tokens):
from sympy.physics.quantum import Bra
return Bra(tokens[1])
def ket(self, tokens):
from sympy.physics.quantum import Ket
return Ket(tokens[1])
def inner_product(self, tokens):
from sympy.physics.quantum import Bra, Ket, InnerProduct
return InnerProduct(Bra(tokens[1]), Ket(tokens[3]))
def sin(self, tokens):
return sympy.sin(tokens[1])
def cos(self, tokens):
return sympy.cos(tokens[1])
def tan(self, tokens):
return sympy.tan(tokens[1])
def csc(self, tokens):
return sympy.csc(tokens[1])
def sec(self, tokens):
return sympy.sec(tokens[1])
def cot(self, tokens):
return sympy.cot(tokens[1])
def sin_power(self, tokens):
exponent = tokens[2]
if exponent == -1:
return sympy.asin(tokens[-1])
else:
return sympy.Pow(sympy.sin(tokens[-1]), exponent)
def cos_power(self, tokens):
exponent = tokens[2]
if exponent == -1:
return sympy.acos(tokens[-1])
else:
return sympy.Pow(sympy.cos(tokens[-1]), exponent)
def tan_power(self, tokens):
exponent = tokens[2]
if exponent == -1:
return sympy.atan(tokens[-1])
else:
return sympy.Pow(sympy.tan(tokens[-1]), exponent)
def csc_power(self, tokens):
exponent = tokens[2]
if exponent == -1:
return sympy.acsc(tokens[-1])
else:
return sympy.Pow(sympy.csc(tokens[-1]), exponent)
def sec_power(self, tokens):
exponent = tokens[2]
if exponent == -1:
return sympy.asec(tokens[-1])
else:
return sympy.Pow(sympy.sec(tokens[-1]), exponent)
def cot_power(self, tokens):
exponent = tokens[2]
if exponent == -1:
return sympy.acot(tokens[-1])
else:
return sympy.Pow(sympy.cot(tokens[-1]), exponent)
def arcsin(self, tokens):
return sympy.asin(tokens[1])
def arccos(self, tokens):
return sympy.acos(tokens[1])
def arctan(self, tokens):
return sympy.atan(tokens[1])
def arccsc(self, tokens):
return sympy.acsc(tokens[1])
def arcsec(self, tokens):
return sympy.asec(tokens[1])
def arccot(self, tokens):
return sympy.acot(tokens[1])
def sinh(self, tokens):
return sympy.sinh(tokens[1])
def cosh(self, tokens):
return sympy.cosh(tokens[1])
def tanh(self, tokens):
return sympy.tanh(tokens[1])
def asinh(self, tokens):
return sympy.asinh(tokens[1])
def acosh(self, tokens):
return sympy.acosh(tokens[1])
def atanh(self, tokens):
return sympy.atanh(tokens[1])
def abs(self, tokens):
return sympy.Abs(tokens[1])
def floor(self, tokens):
return sympy.floor(tokens[1])
def ceil(self, tokens):
return sympy.ceiling(tokens[1])
def factorial(self, tokens):
return sympy.factorial(tokens[0])
def conjugate(self, tokens):
return sympy.conjugate(tokens[1])
def square_root(self, tokens):
if len(tokens) == 2:
# then there was no square bracket argument
return sympy.sqrt(tokens[1])
elif len(tokens) == 3:
# then there _was_ a square bracket argument
return sympy.root(tokens[2], tokens[1])
def exponential(self, tokens):
return sympy.exp(tokens[1])
def log(self, tokens):
if tokens[0].type == "FUNC_LG":
# we don't need to check if there's an underscore or not because having one
# in this case would be meaningless
# TODO: ANTLR refers to ISO 80000-2:2019. should we keep base 10 or base 2?
return sympy.log(tokens[1], 10)
elif tokens[0].type == "FUNC_LN":
return sympy.log(tokens[1])
elif tokens[0].type == "FUNC_LOG":
# we check if a base was specified or not
if "_" in tokens:
# then a base was specified
return sympy.log(tokens[3], tokens[2])
else:
# a base was not specified
return sympy.log(tokens[1])
def _extract_differential_symbol(self, s: str):
differential_symbols = {"d", r"\text{d}", r"\mathrm{d}"}
differential_symbol = next((symbol for symbol in differential_symbols if symbol in s), None)
return differential_symbol