I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

View File

@ -0,0 +1,28 @@
// Greek symbols
// TODO: Shouold we include the uppercase variants for the symbols where the uppercase variant doesn't have a separate meaning?
ALPHA: "\\alpha"
BETA: "\\beta"
GAMMA: "\\gamma"
DELTA: "\\delta" // TODO: Should this be included? Delta usually denotes other things.
EPSILON: "\\epsilon" | "\\varepsilon"
ZETA: "\\zeta"
ETA: "\\eta"
THETA: "\\theta" | "\\vartheta"
// TODO: Should I add iota to the list?
KAPPA: "\\kappa"
LAMBDA: "\\lambda" // TODO: What about the uppercase variant?
MU: "\\mu"
NU: "\\nu"
XI: "\\xi"
// TODO: Should there be a separate note for transforming \pi into sympy.pi?
RHO: "\\rho" | "\\varrho"
// TODO: What should we do about sigma?
TAU: "\\tau"
UPSILON: "\\upsilon"
PHI: "\\phi" | "\\varphi"
CHI: "\\chi"
PSI: "\\psi"
OMEGA: "\\omega"
GREEK_SYMBOL: ALPHA | BETA | GAMMA | DELTA | EPSILON | ZETA | ETA | THETA | KAPPA
| LAMBDA | MU | NU | XI | RHO | TAU | UPSILON | PHI | CHI | PSI | OMEGA

View File

@ -0,0 +1,327 @@
%ignore /[ \t\n\r]+/
%ignore "\\," | "\\thinspace" | "\\:" | "\\medspace" | "\\;" | "\\thickspace"
%ignore "\\quad" | "\\qquad"
%ignore "\\!" | "\\negthinspace" | "\\negmedspace" | "\\negthickspace"
%ignore "\\vrule" | "\\vcenter" | "\\vbox" | "\\vskip" | "\\vspace" | "\\hfill"
%ignore "\\*" | "\\-" | "\\." | "\\/" | "\\\\" | "\\(" | "\\="
%ignore "\\left" | "\\right"
%ignore "\\limits" | "\\nolimits"
%ignore "\\displaystyle"
///////////////////// tokens ///////////////////////
// basic binary operators
ADD: "+"
SUB: "-"
MUL: "*"
DIV: "/"
// tokens with distinct left and right symbols
L_BRACE: "{"
R_BRACE: "}"
L_BRACE_LITERAL: "\\{"
R_BRACE_LITERAL: "\\}"
L_BRACKET: "["
R_BRACKET: "]"
L_CEIL: "\\lceil"
R_CEIL: "\\rceil"
L_FLOOR: "\\lfloor"
R_FLOOR: "\\rfloor"
L_PAREN: "("
R_PAREN: ")"
// limit, integral, sum, and product symbols
FUNC_LIM: "\\lim"
LIM_APPROACH_SYM: "\\to" | "\\rightarrow" | "\\Rightarrow" | "\\longrightarrow" | "\\Longrightarrow"
FUNC_INT: "\\int" | "\\intop"
FUNC_SUM: "\\sum"
FUNC_PROD: "\\prod"
// common functions
FUNC_EXP: "\\exp"
FUNC_LOG: "\\log"
FUNC_LN: "\\ln"
FUNC_LG: "\\lg"
FUNC_MIN: "\\min"
FUNC_MAX: "\\max"
// trigonometric functions
FUNC_SIN: "\\sin"
FUNC_COS: "\\cos"
FUNC_TAN: "\\tan"
FUNC_CSC: "\\csc"
FUNC_SEC: "\\sec"
FUNC_COT: "\\cot"
// inverse trigonometric functions
FUNC_ARCSIN: "\\arcsin"
FUNC_ARCCOS: "\\arccos"
FUNC_ARCTAN: "\\arctan"
FUNC_ARCCSC: "\\arccsc"
FUNC_ARCSEC: "\\arcsec"
FUNC_ARCCOT: "\\arccot"
// hyperbolic trigonometric functions
FUNC_SINH: "\\sinh"
FUNC_COSH: "\\cosh"
FUNC_TANH: "\\tanh"
FUNC_ARSINH: "\\arsinh"
FUNC_ARCOSH: "\\arcosh"
FUNC_ARTANH: "\\artanh"
FUNC_SQRT: "\\sqrt"
// miscellaneous symbols
CMD_TIMES: "\\times"
CMD_CDOT: "\\cdot"
CMD_DIV: "\\div"
CMD_FRAC: "\\frac" | "\\dfrac" | "\\tfrac" | "\\nicefrac"
CMD_BINOM: "\\binom" | "\\dbinom" | "\\tbinom"
CMD_OVERLINE: "\\overline"
CMD_LANGLE: "\\langle"
CMD_RANGLE: "\\rangle"
CMD_MATHIT: "\\mathit"
CMD_INFTY: "\\infty"
BANG: "!"
BAR: "|"
CARET: "^"
COLON: ":"
UNDERSCORE: "_"
// relational symbols
EQUAL: "="
NOT_EQUAL: "\\neq" | "\\ne"
LT: "<"
LTE: "\\leq" | "\\le" | "\\leqslant"
GT: ">"
GTE: "\\geq" | "\\ge" | "\\geqslant"
DIV_SYMBOL: CMD_DIV | DIV
MUL_SYMBOL: MUL | CMD_TIMES | CMD_CDOT
%import .greek_symbols.GREEK_SYMBOL
UPRIGHT_DIFFERENTIAL_SYMBOL: "\\text{d}" | "\\mathrm{d}"
DIFFERENTIAL_SYMBOL: "d" | UPRIGHT_DIFFERENTIAL_SYMBOL
// disallow "d" as a variable name because we want to parse "d" as a differential symbol.
SYMBOL: /[a-zA-Z]/
BASIC_SUBSCRIPTED_SYMBOL: /([a-zA-Z])_(([A-Za-z0-9]|[a-zA-Z]+)|\{([A-Za-z0-9]|[a-zA-Z]+)\})/
SYMBOL_WITH_GREEK_SUBSCRIPT: /([a-zA-Z])_/ GREEK_SYMBOL | /([a-zA-Z])_/ L_BRACE GREEK_SYMBOL R_BRACE
// best to define the variant with braces like that instead of shoving it all into one case like in
// /([a-zA-Z])_/ L_BRACE? GREEK_SYMBOL R_BRACE? because then we can easily error out on input like
// r"h_{\theta"
GREEK_SUBSCRIPTED_SYMBOL: GREEK_SYMBOL /_(([A-Za-z0-9]|[a-zA-Z]+)|\{([A-Za-z0-9]|[a-zA-Z]+)\})/
%import common.DIGIT -> DIGIT
//////////////////// grammar //////////////////////
latex_string: _relation | _expression
_one_letter_symbol: SYMBOL
| BASIC_SUBSCRIPTED_SYMBOL
| SYMBOL_WITH_GREEK_SUBSCRIPT
| GREEK_SUBSCRIPTED_SYMBOL
| GREEK_SYMBOL
multi_letter_symbol: CMD_MATHIT L_BRACE /[a-zA-Z]+(\s+[a-zA-Z]+)*/ R_BRACE
number: /\d+(\.\d*)?/
_atomic_expr: _one_letter_symbol
| multi_letter_symbol
| number
| CMD_INFTY
group_round_parentheses: L_PAREN _expression R_PAREN
group_square_brackets: L_BRACKET _expression R_BRACKET
group_curly_parentheses: L_BRACE _expression R_BRACE
_relation: eq | ne | lt | lte | gt | gte
eq: _expression EQUAL _expression
ne: _expression NOT_EQUAL _expression
lt: _expression LT _expression
lte: _expression LTE _expression
gt: _expression GT _expression
gte: _expression GTE _expression
_expression_core: _atomic_expr | group_curly_parentheses
add: _expression ADD _expression_mul
sub: _expression SUB _expression_mul
| SUB _expression_mul
mul: _expression_mul MUL_SYMBOL _expression_power
div: _expression_mul DIV_SYMBOL _expression_power
adjacent_expressions: (_one_letter_symbol | number) _expression_mul
| group_round_parentheses (group_round_parentheses | _one_letter_symbol)
| _function _function
| fraction _expression
_expression_func: _expression_core
| group_round_parentheses
| fraction
| binomial
| _function
_expression_power: _expression_func | superscript
_expression_mul: _expression_power
| mul | div | adjacent_expressions
| _integral// | derivative
| summation | product
| limit
_expression: _expression_mul | add | sub
_limit_dir: "+" | "-" | L_BRACE ("+" | "-") R_BRACE
limit_dir_expr: _expression CARET _limit_dir
group_curly_parentheses_lim: L_BRACE _expression LIM_APPROACH_SYM (limit_dir_expr | _expression) R_BRACE
limit: FUNC_LIM UNDERSCORE group_curly_parentheses_lim _expression
differential: DIFFERENTIAL_SYMBOL _one_letter_symbol
//_derivative_operator: CMD_FRAC L_BRACE DIFFERENTIAL_SYMBOL R_BRACE L_BRACE differential R_BRACE
//derivative: _derivative_operator _expression
_integral: normal_integral | integral_with_special_fraction
normal_integral: FUNC_INT _expression DIFFERENTIAL_SYMBOL _one_letter_symbol
| FUNC_INT (CARET _expression_core UNDERSCORE _expression_core)? _expression? DIFFERENTIAL_SYMBOL _one_letter_symbol
| FUNC_INT (UNDERSCORE _expression_core CARET _expression_core)? _expression? DIFFERENTIAL_SYMBOL _one_letter_symbol
group_curly_parentheses_int: L_BRACE _expression? differential R_BRACE
special_fraction: CMD_FRAC group_curly_parentheses_int group_curly_parentheses
integral_with_special_fraction: FUNC_INT special_fraction
| FUNC_INT (CARET _expression_core UNDERSCORE _expression_core)? special_fraction
| FUNC_INT (UNDERSCORE _expression_core CARET _expression_core)? special_fraction
group_curly_parentheses_special: UNDERSCORE L_BRACE _atomic_expr EQUAL _atomic_expr R_BRACE CARET _expression_core
| CARET _expression_core UNDERSCORE L_BRACE _atomic_expr EQUAL _atomic_expr R_BRACE
summation: FUNC_SUM group_curly_parentheses_special _expression
| FUNC_SUM group_curly_parentheses_special _expression
product: FUNC_PROD group_curly_parentheses_special _expression
| FUNC_PROD group_curly_parentheses_special _expression
superscript: _expression_func CARET _expression_power
fraction: _basic_fraction
| _simple_fraction
| _general_fraction
_basic_fraction: CMD_FRAC DIGIT (DIGIT | SYMBOL | GREEK_SYMBOL)
_simple_fraction: CMD_FRAC DIGIT group_curly_parentheses
| CMD_FRAC group_curly_parentheses (DIGIT | SYMBOL | GREEK_SYMBOL)
_general_fraction: CMD_FRAC group_curly_parentheses group_curly_parentheses
binomial: _basic_binomial
| _simple_binomial
| _general_binomial
_basic_binomial: CMD_BINOM DIGIT (DIGIT | SYMBOL | GREEK_SYMBOL)
_simple_binomial: CMD_BINOM DIGIT group_curly_parentheses
| CMD_BINOM group_curly_parentheses (DIGIT | SYMBOL | GREEK_SYMBOL)
_general_binomial: CMD_BINOM group_curly_parentheses group_curly_parentheses
list_of_expressions: _expression ("," _expression)*
function_applied: _one_letter_symbol L_PAREN list_of_expressions R_PAREN
min: FUNC_MIN L_PAREN list_of_expressions R_PAREN
max: FUNC_MAX L_PAREN list_of_expressions R_PAREN
bra: CMD_LANGLE _expression BAR
ket: BAR _expression CMD_RANGLE
inner_product: CMD_LANGLE _expression BAR _expression CMD_RANGLE
_function: function_applied
| abs | floor | ceil
| _trigonometric_function | _inverse_trigonometric_function
| _trigonometric_function_power
| _hyperbolic_trigonometric_function | _inverse_hyperbolic_trigonometric_function
| exponential
| log
| square_root
| factorial
| conjugate
| max | min
| bra | ket | inner_product
exponential: FUNC_EXP _expression
log: FUNC_LOG _expression
| FUNC_LN _expression
| FUNC_LG _expression
| FUNC_LOG UNDERSCORE (DIGIT | _one_letter_symbol) _expression
| FUNC_LOG UNDERSCORE group_curly_parentheses _expression
square_root: FUNC_SQRT group_curly_parentheses
| FUNC_SQRT group_square_brackets group_curly_parentheses
factorial: _expression BANG
conjugate: CMD_OVERLINE group_curly_parentheses
| CMD_OVERLINE DIGIT
_trigonometric_function: sin | cos | tan | csc | sec | cot
sin: FUNC_SIN _expression
cos: FUNC_COS _expression
tan: FUNC_TAN _expression
csc: FUNC_CSC _expression
sec: FUNC_SEC _expression
cot: FUNC_COT _expression
_trigonometric_function_power: sin_power | cos_power | tan_power | csc_power | sec_power | cot_power
sin_power: FUNC_SIN CARET _expression_core _expression
cos_power: FUNC_COS CARET _expression_core _expression
tan_power: FUNC_TAN CARET _expression_core _expression
csc_power: FUNC_CSC CARET _expression_core _expression
sec_power: FUNC_SEC CARET _expression_core _expression
cot_power: FUNC_COT CARET _expression_core _expression
_hyperbolic_trigonometric_function: sinh | cosh | tanh
sinh: FUNC_SINH _expression
cosh: FUNC_COSH _expression
tanh: FUNC_TANH _expression
_inverse_trigonometric_function: arcsin | arccos | arctan | arccsc | arcsec | arccot
arcsin: FUNC_ARCSIN _expression
arccos: FUNC_ARCCOS _expression
arctan: FUNC_ARCTAN _expression
arccsc: FUNC_ARCCSC _expression
arcsec: FUNC_ARCSEC _expression
arccot: FUNC_ARCCOT _expression
_inverse_hyperbolic_trigonometric_function: asinh | acosh | atanh
asinh: FUNC_ARSINH _expression
acosh: FUNC_ARCOSH _expression
atanh: FUNC_ARTANH _expression
abs: BAR _expression BAR
floor: L_FLOOR _expression R_FLOOR
ceil: L_CEIL _expression R_CEIL