389 lines
14 KiB
C++
389 lines
14 KiB
C++
|
/* -------------------------------------------------------------------------- *
|
||
|
* Lepton *
|
||
|
* -------------------------------------------------------------------------- *
|
||
|
* This is part of the Lepton expression parser originating from *
|
||
|
* Simbios, the NIH National Center for Physics-Based Simulation of *
|
||
|
* Biological Structures at Stanford, funded under the NIH Roadmap for *
|
||
|
* Medical Research, grant U54 GM072970. See https://simtk.org. *
|
||
|
* *
|
||
|
* Portions copyright (c) 2009-2013 Stanford University and the Authors. *
|
||
|
* Authors: Peter Eastman *
|
||
|
* Contributors: *
|
||
|
* *
|
||
|
* Permission is hereby granted, free of charge, to any person obtaining a *
|
||
|
* copy of this software and associated documentation files (the "Software"), *
|
||
|
* to deal in the Software without restriction, including without limitation *
|
||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
|
||
|
* and/or sell copies of the Software, and to permit persons to whom the *
|
||
|
* Software is furnished to do so, subject to the following conditions: *
|
||
|
* *
|
||
|
* The above copyright notice and this permission notice shall be included in *
|
||
|
* all copies or substantial portions of the Software. *
|
||
|
* *
|
||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
|
||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
|
||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
|
||
|
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
|
||
|
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
|
||
|
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
|
||
|
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
|
||
|
* -------------------------------------------------------------------------- */
|
||
|
|
||
|
#include "Parser.h"
|
||
|
#include "CustomFunction.h"
|
||
|
#include "ExpressionTreeNode.h"
|
||
|
#include "Operation.h"
|
||
|
#include "ParsedExpression.h"
|
||
|
#include <exception>
|
||
|
#include <cctype>
|
||
|
#include <iostream>
|
||
|
|
||
|
using namespace Lepton;
|
||
|
|
||
|
static const std::string Digits = "0123456789";
|
||
|
static const std::string Operators = "+-*/^";
|
||
|
static const bool LeftAssociative[] = { true, true, true, true, false };
|
||
|
static const int Precedence[] = { 0, 0, 1, 1, 3 };
|
||
|
static const Operation::Id OperationId[] = { Operation::ADD, Operation::SUBTRACT, Operation::MULTIPLY, Operation::DIVIDE, Operation::POWER };
|
||
|
|
||
|
class Lepton::ParseToken
|
||
|
{
|
||
|
public:
|
||
|
enum Type { Number, Operator, Variable, Function, LeftParen, RightParen, Comma, Whitespace };
|
||
|
|
||
|
ParseToken(std::string text, Type type) : text(text), type(type) { }
|
||
|
|
||
|
const std::string& getText() const { return text; }
|
||
|
|
||
|
Type getType() const { return type; }
|
||
|
|
||
|
private:
|
||
|
std::string text;
|
||
|
Type type;
|
||
|
};
|
||
|
|
||
|
std::string Parser::trim(const std::string& expression)
|
||
|
{
|
||
|
// Remove leading and trailing spaces.
|
||
|
|
||
|
size_t start, end;
|
||
|
for (start = 0; start < expression.size() && isspace(expression[start]); ++start) { }
|
||
|
for (end = expression.size() - 1; end > start && isspace(expression[end]); end--) { }
|
||
|
if (start == end && isspace(expression[end])) { return ""; }
|
||
|
return expression.substr(start, end - start + 1);
|
||
|
}
|
||
|
|
||
|
ParseToken Parser::getNextToken(const std::string& expression, size_t start)
|
||
|
{
|
||
|
char c = expression[start];
|
||
|
if (c == '(') { return ParseToken("(", ParseToken::LeftParen); }
|
||
|
if (c == ')') { return ParseToken(")", ParseToken::RightParen); }
|
||
|
if (c == ',') { return ParseToken(",", ParseToken::Comma); }
|
||
|
if (Operators.find(c) != std::string::npos) { return ParseToken(std::string(1, c), ParseToken::Operator); }
|
||
|
if (isspace(c))
|
||
|
{
|
||
|
// White space
|
||
|
|
||
|
for (size_t pos = start + 1; pos < expression.size(); ++pos)
|
||
|
{
|
||
|
if (!isspace(expression[pos])) { return ParseToken(expression.substr(start, pos - start), ParseToken::Whitespace); }
|
||
|
}
|
||
|
return ParseToken(expression.substr(start, std::string::npos), ParseToken::Whitespace);
|
||
|
}
|
||
|
if (c == '.' || Digits.find(c) != std::string::npos)
|
||
|
{
|
||
|
// A number
|
||
|
|
||
|
bool foundDecimal = (c == '.');
|
||
|
bool foundExp = false;
|
||
|
size_t pos = start + 1;
|
||
|
for (; pos < expression.size(); ++pos)
|
||
|
{
|
||
|
c = expression[pos];
|
||
|
if (Digits.find(c) != std::string::npos) { continue; }
|
||
|
if (c == '.' && !foundDecimal)
|
||
|
{
|
||
|
foundDecimal = true;
|
||
|
continue;
|
||
|
}
|
||
|
if ((c == 'e' || c == 'E') && !foundExp)
|
||
|
{
|
||
|
foundExp = true;
|
||
|
if (pos < expression.size() - 1 && (expression[pos + 1] == '-' || expression[pos + 1] == '+')) { pos++; }
|
||
|
continue;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
return ParseToken(expression.substr(start, pos - start), ParseToken::Number);
|
||
|
}
|
||
|
|
||
|
// A variable, function, or left parenthesis
|
||
|
|
||
|
for (size_t pos = start; pos < expression.size(); ++pos)
|
||
|
{
|
||
|
c = expression[pos];
|
||
|
if (c == '(') { return ParseToken(expression.substr(start, pos - start + 1), ParseToken::Function); }
|
||
|
if (Operators.find(c) != std::string::npos || c == ',' || c == ')' || isspace(c))
|
||
|
{
|
||
|
return ParseToken(expression.substr(start, pos - start), ParseToken::Variable);
|
||
|
}
|
||
|
}
|
||
|
return ParseToken(expression.substr(start, std::string::npos), ParseToken::Variable);
|
||
|
}
|
||
|
|
||
|
std::vector<ParseToken> Parser::tokenize(const std::string& expression)
|
||
|
{
|
||
|
std::vector<ParseToken> tokens;
|
||
|
size_t pos = 0;
|
||
|
while (pos < expression.size())
|
||
|
{
|
||
|
ParseToken token = getNextToken(expression, pos);
|
||
|
if (token.getType() != ParseToken::Whitespace) { tokens.push_back(token); }
|
||
|
pos += token.getText().size();
|
||
|
}
|
||
|
return tokens;
|
||
|
}
|
||
|
|
||
|
ParsedExpression Parser::parse(const std::string& expression) { return parse(expression, std::map<std::string, CustomFunction*>()); }
|
||
|
|
||
|
ParsedExpression Parser::parse(const std::string& expression, const std::map<std::string, CustomFunction*>& customFunctions)
|
||
|
{
|
||
|
// First split the expression into subexpressions.
|
||
|
|
||
|
std::string primaryExpression = expression;
|
||
|
std::vector<std::string> subexpressions;
|
||
|
while (true)
|
||
|
{
|
||
|
std::string::size_type pos = primaryExpression.find_last_of(';');
|
||
|
if (pos == std::string::npos) { break; }
|
||
|
std::string sub = trim(primaryExpression.substr(pos + 1));
|
||
|
if (!sub.empty()) { subexpressions.push_back(sub); }
|
||
|
primaryExpression = primaryExpression.substr(0, pos);
|
||
|
}
|
||
|
|
||
|
// Parse the subexpressions.
|
||
|
|
||
|
std::map<std::string, ExpressionTreeNode> subexpDefs;
|
||
|
for (size_t i = 0; i < subexpressions.size(); ++i)
|
||
|
{
|
||
|
size_t equalsPos = subexpressions[i].find('=');
|
||
|
if (equalsPos == std::string::npos) { throw Exception("Parse error: subexpression does not specify a name"); }
|
||
|
std::string name = trim(subexpressions[i].substr(0, equalsPos));
|
||
|
if (name.empty()) { throw Exception("Parse error: subexpression does not specify a name"); }
|
||
|
std::vector<ParseToken> tokens = tokenize(subexpressions[i].substr(equalsPos + 1));
|
||
|
size_t pos = 0;
|
||
|
subexpDefs[name] = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
|
||
|
if (pos != tokens.size()) { throw Exception("Parse error: unexpected text at end of subexpression: " + tokens[pos].getText()); }
|
||
|
}
|
||
|
|
||
|
// Now parse the primary expression.
|
||
|
|
||
|
std::vector<ParseToken> tokens = tokenize(primaryExpression);
|
||
|
size_t pos = 0;
|
||
|
ExpressionTreeNode result = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
|
||
|
if (pos != tokens.size()) { throw Exception("Parse error: unexpected text at end of expression: " + tokens[pos].getText()); }
|
||
|
return ParsedExpression(result);
|
||
|
}
|
||
|
|
||
|
ExpressionTreeNode Parser::parsePrecedence(const std::vector<ParseToken>& tokens, size_t& pos, const std::map<std::string, CustomFunction*>& customFunctions,
|
||
|
const std::map<std::string, ExpressionTreeNode>& subexpressionDefs, int precedence)
|
||
|
{
|
||
|
if (pos == tokens.size()) { throw Exception("Parse error: unexpected end of expression"); }
|
||
|
|
||
|
// Parse the next value (number, variable, function, parenthesized expression)
|
||
|
|
||
|
ParseToken token = tokens[pos];
|
||
|
ExpressionTreeNode result;
|
||
|
if (token.getType() == ParseToken::Number)
|
||
|
{
|
||
|
double value;
|
||
|
std::stringstream(token.getText()) >> value;
|
||
|
result = ExpressionTreeNode(new Operation::Constant(value));
|
||
|
pos++;
|
||
|
}
|
||
|
else if (token.getType() == ParseToken::Variable)
|
||
|
{
|
||
|
const auto subexp = subexpressionDefs.find(token.getText());
|
||
|
if (subexp == subexpressionDefs.end())
|
||
|
{
|
||
|
Operation* op = new Operation::Variable(token.getText());
|
||
|
result = ExpressionTreeNode(op);
|
||
|
}
|
||
|
else { result = subexp->second; }
|
||
|
pos++;
|
||
|
}
|
||
|
else if (token.getType() == ParseToken::LeftParen)
|
||
|
{
|
||
|
pos++;
|
||
|
result = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0);
|
||
|
if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen) { throw Exception("Parse error: unbalanced parentheses"); }
|
||
|
pos++;
|
||
|
}
|
||
|
else if (token.getType() == ParseToken::Function)
|
||
|
{
|
||
|
pos++;
|
||
|
std::vector<ExpressionTreeNode> args;
|
||
|
bool moreArgs;
|
||
|
do
|
||
|
{
|
||
|
args.push_back(parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0));
|
||
|
moreArgs = (pos < tokens.size() && tokens[pos].getType() == ParseToken::Comma);
|
||
|
if (moreArgs) { pos++; }
|
||
|
} while (moreArgs);
|
||
|
if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen) { throw Exception("Parse error: unbalanced parentheses"); }
|
||
|
pos++;
|
||
|
Operation* op = getFunctionOperation(token.getText(), customFunctions);
|
||
|
try { result = ExpressionTreeNode(op, args); }
|
||
|
catch (...)
|
||
|
{
|
||
|
delete op;
|
||
|
throw;
|
||
|
}
|
||
|
}
|
||
|
else if (token.getType() == ParseToken::Operator && token.getText() == "-")
|
||
|
{
|
||
|
pos++;
|
||
|
const ExpressionTreeNode toNegate = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 2);
|
||
|
result = ExpressionTreeNode(new Operation::Negate(), toNegate);
|
||
|
}
|
||
|
else { throw Exception("Parse error: unexpected token: " + token.getText()); }
|
||
|
|
||
|
// Now deal with the next binary operator.
|
||
|
|
||
|
while (pos < tokens.size() && tokens[pos].getType() == ParseToken::Operator)
|
||
|
{
|
||
|
token = tokens[pos];
|
||
|
int opIndex = int(Operators.find(token.getText()));
|
||
|
int opPrecedence = Precedence[opIndex];
|
||
|
if (opPrecedence < precedence) { return result; }
|
||
|
pos++;
|
||
|
ExpressionTreeNode arg = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, LeftAssociative[opIndex] ? opPrecedence + 1 : opPrecedence);
|
||
|
Operation* op = getOperatorOperation(token.getText());
|
||
|
try { result = ExpressionTreeNode(op, result, arg); }
|
||
|
catch (...)
|
||
|
{
|
||
|
delete op;
|
||
|
throw;
|
||
|
}
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
Operation* Parser::getOperatorOperation(const std::string& name)
|
||
|
{
|
||
|
switch (OperationId[Operators.find(name)])
|
||
|
{
|
||
|
case Operation::ADD:
|
||
|
return new Operation::Add();
|
||
|
case Operation::SUBTRACT:
|
||
|
return new Operation::Subtract();
|
||
|
case Operation::MULTIPLY:
|
||
|
return new Operation::Multiply();
|
||
|
case Operation::DIVIDE:
|
||
|
return new Operation::Divide();
|
||
|
case Operation::POWER:
|
||
|
return new Operation::Power();
|
||
|
default:
|
||
|
throw Exception("Parse error: unknown operator");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
Operation* Parser::getFunctionOperation(const std::string& name, const std::map<std::string, CustomFunction*>& customFunctions)
|
||
|
{
|
||
|
static std::map<std::string, Operation::Id> opMap;
|
||
|
if (opMap.empty())
|
||
|
{
|
||
|
opMap["sqrt"] = Operation::SQRT;
|
||
|
opMap["exp"] = Operation::EXP;
|
||
|
opMap["log"] = Operation::LOG;
|
||
|
opMap["sin"] = Operation::SIN;
|
||
|
opMap["cos"] = Operation::COS;
|
||
|
opMap["sec"] = Operation::SEC;
|
||
|
opMap["csc"] = Operation::CSC;
|
||
|
opMap["tan"] = Operation::TAN;
|
||
|
opMap["cot"] = Operation::COT;
|
||
|
opMap["asin"] = Operation::ASIN;
|
||
|
opMap["acos"] = Operation::ACOS;
|
||
|
opMap["atan"] = Operation::ATAN;
|
||
|
opMap["sinh"] = Operation::SINH;
|
||
|
opMap["cosh"] = Operation::COSH;
|
||
|
opMap["tanh"] = Operation::TANH;
|
||
|
opMap["erf"] = Operation::ERF;
|
||
|
opMap["erfc"] = Operation::ERFC;
|
||
|
opMap["step"] = Operation::STEP;
|
||
|
opMap["delta"] = Operation::DELTA;
|
||
|
opMap["square"] = Operation::SQUARE;
|
||
|
opMap["cube"] = Operation::CUBE;
|
||
|
opMap["recip"] = Operation::RECIPROCAL;
|
||
|
opMap["min"] = Operation::MIN;
|
||
|
opMap["max"] = Operation::MAX;
|
||
|
opMap["abs"] = Operation::ABS;
|
||
|
}
|
||
|
const std::string trimmed = name.substr(0, name.size() - 1);
|
||
|
|
||
|
// First check custom functions.
|
||
|
|
||
|
const auto custom = customFunctions.find(trimmed);
|
||
|
if (custom != customFunctions.end()) { return new Operation::Custom(trimmed, custom->second->clone()); }
|
||
|
|
||
|
// Now try standard functions.
|
||
|
|
||
|
const auto iter = opMap.find(trimmed);
|
||
|
if (iter == opMap.end()) { throw Exception("Parse error: unknown function: " + trimmed); }
|
||
|
switch (iter->second)
|
||
|
{
|
||
|
case Operation::SQRT:
|
||
|
return new Operation::Sqrt();
|
||
|
case Operation::EXP:
|
||
|
return new Operation::Exp();
|
||
|
case Operation::LOG:
|
||
|
return new Operation::Log();
|
||
|
case Operation::SIN:
|
||
|
return new Operation::Sin();
|
||
|
case Operation::COS:
|
||
|
return new Operation::Cos();
|
||
|
case Operation::SEC:
|
||
|
return new Operation::Sec();
|
||
|
case Operation::CSC:
|
||
|
return new Operation::Csc();
|
||
|
case Operation::TAN:
|
||
|
return new Operation::Tan();
|
||
|
case Operation::COT:
|
||
|
return new Operation::Cot();
|
||
|
case Operation::ASIN:
|
||
|
return new Operation::Asin();
|
||
|
case Operation::ACOS:
|
||
|
return new Operation::Acos();
|
||
|
case Operation::ATAN:
|
||
|
return new Operation::Atan();
|
||
|
case Operation::SINH:
|
||
|
return new Operation::Sinh();
|
||
|
case Operation::COSH:
|
||
|
return new Operation::Cosh();
|
||
|
case Operation::TANH:
|
||
|
return new Operation::Tanh();
|
||
|
case Operation::ERF:
|
||
|
return new Operation::Erf();
|
||
|
case Operation::ERFC:
|
||
|
return new Operation::Erfc();
|
||
|
case Operation::STEP:
|
||
|
return new Operation::Step();
|
||
|
case Operation::DELTA:
|
||
|
return new Operation::Delta();
|
||
|
case Operation::SQUARE:
|
||
|
return new Operation::Square();
|
||
|
case Operation::CUBE:
|
||
|
return new Operation::Cube();
|
||
|
case Operation::RECIPROCAL:
|
||
|
return new Operation::Reciprocal();
|
||
|
case Operation::MIN:
|
||
|
return new Operation::Min();
|
||
|
case Operation::MAX:
|
||
|
return new Operation::Max();
|
||
|
case Operation::ABS:
|
||
|
return new Operation::Abs();
|
||
|
default:
|
||
|
throw Exception("Parse error: unknown function");
|
||
|
}
|
||
|
}
|