You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Parser.cpp 14KB

3 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. /* -------------------------------------------------------------------------- *
  2. * Lepton *
  3. * -------------------------------------------------------------------------- *
  4. * This is part of the Lepton expression parser originating from *
  5. * Simbios, the NIH National Center for Physics-Based Simulation of *
  6. * Biological Structures at Stanford, funded under the NIH Roadmap for *
  7. * Medical Research, grant U54 GM072970. See https://simtk.org. *
  8. * *
  9. * Portions copyright (c) 2009-2013 Stanford University and the Authors. *
  10. * Authors: Peter Eastman *
  11. * Contributors: *
  12. * *
  13. * Permission is hereby granted, free of charge, to any person obtaining a *
  14. * copy of this software and associated documentation files (the "Software"), *
  15. * to deal in the Software without restriction, including without limitation *
  16. * the rights to use, copy, modify, merge, publish, distribute, sublicense, *
  17. * and/or sell copies of the Software, and to permit persons to whom the *
  18. * Software is furnished to do so, subject to the following conditions: *
  19. * *
  20. * The above copyright notice and this permission notice shall be included in *
  21. * all copies or substantial portions of the Software. *
  22. * *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
  24. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
  25. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
  26. * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
  27. * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
  28. * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
  29. * USE OR OTHER DEALINGS IN THE SOFTWARE. *
  30. * -------------------------------------------------------------------------- */
  31. #include "Parser.h"
  32. #include "CustomFunction.h"
  33. #include "ExpressionTreeNode.h"
  34. #include "Operation.h"
  35. #include "ParsedExpression.h"
  36. #include <exception>
  37. #include <cctype>
  38. #include <iostream>
  39. using namespace Lepton;
  40. static const std::string Digits = "0123456789";
  41. static const std::string Operators = "+-*/^";
  42. static const bool LeftAssociative[] = { true, true, true, true, false };
  43. static const int Precedence[] = { 0, 0, 1, 1, 3 };
  44. static const Operation::Id OperationId[] = { Operation::ADD, Operation::SUBTRACT, Operation::MULTIPLY, Operation::DIVIDE, Operation::POWER };
  45. class Lepton::ParseToken
  46. {
  47. public:
  48. enum Type { Number, Operator, Variable, Function, LeftParen, RightParen, Comma, Whitespace };
  49. ParseToken(std::string text, Type type) : text(text), type(type) { }
  50. const std::string& getText() const { return text; }
  51. Type getType() const { return type; }
  52. private:
  53. std::string text;
  54. Type type;
  55. };
  56. std::string Parser::trim(const std::string& expression)
  57. {
  58. // Remove leading and trailing spaces.
  59. size_t start, end;
  60. for (start = 0; start < expression.size() && isspace(expression[start]); ++start) { }
  61. for (end = expression.size() - 1; end > start && isspace(expression[end]); end--) { }
  62. if (start == end && isspace(expression[end])) { return ""; }
  63. return expression.substr(start, end - start + 1);
  64. }
  65. ParseToken Parser::getNextToken(const std::string& expression, size_t start)
  66. {
  67. char c = expression[start];
  68. if (c == '(') { return ParseToken("(", ParseToken::LeftParen); }
  69. if (c == ')') { return ParseToken(")", ParseToken::RightParen); }
  70. if (c == ',') { return ParseToken(",", ParseToken::Comma); }
  71. if (Operators.find(c) != std::string::npos) { return ParseToken(std::string(1, c), ParseToken::Operator); }
  72. if (isspace(c))
  73. {
  74. // White space
  75. for (size_t pos = start + 1; pos < expression.size(); ++pos)
  76. {
  77. if (!isspace(expression[pos])) { return ParseToken(expression.substr(start, pos - start), ParseToken::Whitespace); }
  78. }
  79. return ParseToken(expression.substr(start, std::string::npos), ParseToken::Whitespace);
  80. }
  81. if (c == '.' || Digits.find(c) != std::string::npos)
  82. {
  83. // A number
  84. bool foundDecimal = (c == '.');
  85. bool foundExp = false;
  86. size_t pos = start + 1;
  87. for (; pos < expression.size(); ++pos)
  88. {
  89. c = expression[pos];
  90. if (Digits.find(c) != std::string::npos) { continue; }
  91. if (c == '.' && !foundDecimal)
  92. {
  93. foundDecimal = true;
  94. continue;
  95. }
  96. if ((c == 'e' || c == 'E') && !foundExp)
  97. {
  98. foundExp = true;
  99. if (pos < expression.size() - 1 && (expression[pos + 1] == '-' || expression[pos + 1] == '+')) { pos++; }
  100. continue;
  101. }
  102. break;
  103. }
  104. return ParseToken(expression.substr(start, pos - start), ParseToken::Number);
  105. }
  106. // A variable, function, or left parenthesis
  107. for (size_t pos = start; pos < expression.size(); ++pos)
  108. {
  109. c = expression[pos];
  110. if (c == '(') { return ParseToken(expression.substr(start, pos - start + 1), ParseToken::Function); }
  111. if (Operators.find(c) != std::string::npos || c == ',' || c == ')' || isspace(c))
  112. {
  113. return ParseToken(expression.substr(start, pos - start), ParseToken::Variable);
  114. }
  115. }
  116. return ParseToken(expression.substr(start, std::string::npos), ParseToken::Variable);
  117. }
  118. std::vector<ParseToken> Parser::tokenize(const std::string& expression)
  119. {
  120. std::vector<ParseToken> tokens;
  121. size_t pos = 0;
  122. while (pos < expression.size())
  123. {
  124. ParseToken token = getNextToken(expression, pos);
  125. if (token.getType() != ParseToken::Whitespace) { tokens.push_back(token); }
  126. pos += token.getText().size();
  127. }
  128. return tokens;
  129. }
  130. ParsedExpression Parser::parse(const std::string& expression) { return parse(expression, std::map<std::string, CustomFunction*>()); }
  131. ParsedExpression Parser::parse(const std::string& expression, const std::map<std::string, CustomFunction*>& customFunctions)
  132. {
  133. // First split the expression into subexpressions.
  134. std::string primaryExpression = expression;
  135. std::vector<std::string> subexpressions;
  136. while (true)
  137. {
  138. std::string::size_type pos = primaryExpression.find_last_of(';');
  139. if (pos == std::string::npos) { break; }
  140. std::string sub = trim(primaryExpression.substr(pos + 1));
  141. if (!sub.empty()) { subexpressions.push_back(sub); }
  142. primaryExpression = primaryExpression.substr(0, pos);
  143. }
  144. // Parse the subexpressions.
  145. std::map<std::string, ExpressionTreeNode> subexpDefs;
  146. for (size_t i = 0; i < subexpressions.size(); ++i)
  147. {
  148. size_t equalsPos = subexpressions[i].find('=');
  149. if (equalsPos == std::string::npos) { throw Exception("Parse error: subexpression does not specify a name"); }
  150. std::string name = trim(subexpressions[i].substr(0, equalsPos));
  151. if (name.empty()) { throw Exception("Parse error: subexpression does not specify a name"); }
  152. std::vector<ParseToken> tokens = tokenize(subexpressions[i].substr(equalsPos + 1));
  153. size_t pos = 0;
  154. subexpDefs[name] = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
  155. if (pos != tokens.size()) { throw Exception("Parse error: unexpected text at end of subexpression: " + tokens[pos].getText()); }
  156. }
  157. // Now parse the primary expression.
  158. std::vector<ParseToken> tokens = tokenize(primaryExpression);
  159. size_t pos = 0;
  160. ExpressionTreeNode result = parsePrecedence(tokens, pos, customFunctions, subexpDefs, 0);
  161. if (pos != tokens.size()) { throw Exception("Parse error: unexpected text at end of expression: " + tokens[pos].getText()); }
  162. return ParsedExpression(result);
  163. }
  164. ExpressionTreeNode Parser::parsePrecedence(const std::vector<ParseToken>& tokens, size_t& pos, const std::map<std::string, CustomFunction*>& customFunctions,
  165. const std::map<std::string, ExpressionTreeNode>& subexpressionDefs, int precedence)
  166. {
  167. if (pos == tokens.size()) { throw Exception("Parse error: unexpected end of expression"); }
  168. // Parse the next value (number, variable, function, parenthesized expression)
  169. ParseToken token = tokens[pos];
  170. ExpressionTreeNode result;
  171. if (token.getType() == ParseToken::Number)
  172. {
  173. double value;
  174. std::stringstream(token.getText()) >> value;
  175. result = ExpressionTreeNode(new Operation::Constant(value));
  176. pos++;
  177. }
  178. else if (token.getType() == ParseToken::Variable)
  179. {
  180. const auto subexp = subexpressionDefs.find(token.getText());
  181. if (subexp == subexpressionDefs.end())
  182. {
  183. Operation* op = new Operation::Variable(token.getText());
  184. result = ExpressionTreeNode(op);
  185. }
  186. else { result = subexp->second; }
  187. pos++;
  188. }
  189. else if (token.getType() == ParseToken::LeftParen)
  190. {
  191. pos++;
  192. result = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0);
  193. if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen) { throw Exception("Parse error: unbalanced parentheses"); }
  194. pos++;
  195. }
  196. else if (token.getType() == ParseToken::Function)
  197. {
  198. pos++;
  199. std::vector<ExpressionTreeNode> args;
  200. bool moreArgs;
  201. do
  202. {
  203. args.push_back(parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 0));
  204. moreArgs = (pos < tokens.size() && tokens[pos].getType() == ParseToken::Comma);
  205. if (moreArgs) { pos++; }
  206. } while (moreArgs);
  207. if (pos == tokens.size() || tokens[pos].getType() != ParseToken::RightParen) { throw Exception("Parse error: unbalanced parentheses"); }
  208. pos++;
  209. Operation* op = getFunctionOperation(token.getText(), customFunctions);
  210. try { result = ExpressionTreeNode(op, args); }
  211. catch (...)
  212. {
  213. delete op;
  214. throw;
  215. }
  216. }
  217. else if (token.getType() == ParseToken::Operator && token.getText() == "-")
  218. {
  219. pos++;
  220. const ExpressionTreeNode toNegate = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, 2);
  221. result = ExpressionTreeNode(new Operation::Negate(), toNegate);
  222. }
  223. else { throw Exception("Parse error: unexpected token: " + token.getText()); }
  224. // Now deal with the next binary operator.
  225. while (pos < tokens.size() && tokens[pos].getType() == ParseToken::Operator)
  226. {
  227. token = tokens[pos];
  228. int opIndex = int(Operators.find(token.getText()));
  229. int opPrecedence = Precedence[opIndex];
  230. if (opPrecedence < precedence) { return result; }
  231. pos++;
  232. ExpressionTreeNode arg = parsePrecedence(tokens, pos, customFunctions, subexpressionDefs, LeftAssociative[opIndex] ? opPrecedence + 1 : opPrecedence);
  233. Operation* op = getOperatorOperation(token.getText());
  234. try { result = ExpressionTreeNode(op, result, arg); }
  235. catch (...)
  236. {
  237. delete op;
  238. throw;
  239. }
  240. }
  241. return result;
  242. }
  243. Operation* Parser::getOperatorOperation(const std::string& name)
  244. {
  245. switch (OperationId[Operators.find(name)])
  246. {
  247. case Operation::ADD:
  248. return new Operation::Add();
  249. case Operation::SUBTRACT:
  250. return new Operation::Subtract();
  251. case Operation::MULTIPLY:
  252. return new Operation::Multiply();
  253. case Operation::DIVIDE:
  254. return new Operation::Divide();
  255. case Operation::POWER:
  256. return new Operation::Power();
  257. default:
  258. throw Exception("Parse error: unknown operator");
  259. }
  260. }
  261. Operation* Parser::getFunctionOperation(const std::string& name, const std::map<std::string, CustomFunction*>& customFunctions)
  262. {
  263. static std::map<std::string, Operation::Id> opMap;
  264. if (opMap.empty())
  265. {
  266. opMap["sqrt"] = Operation::SQRT;
  267. opMap["exp"] = Operation::EXP;
  268. opMap["log"] = Operation::LOG;
  269. opMap["sin"] = Operation::SIN;
  270. opMap["cos"] = Operation::COS;
  271. opMap["sec"] = Operation::SEC;
  272. opMap["csc"] = Operation::CSC;
  273. opMap["tan"] = Operation::TAN;
  274. opMap["cot"] = Operation::COT;
  275. opMap["asin"] = Operation::ASIN;
  276. opMap["acos"] = Operation::ACOS;
  277. opMap["atan"] = Operation::ATAN;
  278. opMap["sinh"] = Operation::SINH;
  279. opMap["cosh"] = Operation::COSH;
  280. opMap["tanh"] = Operation::TANH;
  281. opMap["erf"] = Operation::ERF;
  282. opMap["erfc"] = Operation::ERFC;
  283. opMap["step"] = Operation::STEP;
  284. opMap["delta"] = Operation::DELTA;
  285. opMap["square"] = Operation::SQUARE;
  286. opMap["cube"] = Operation::CUBE;
  287. opMap["recip"] = Operation::RECIPROCAL;
  288. opMap["min"] = Operation::MIN;
  289. opMap["max"] = Operation::MAX;
  290. opMap["abs"] = Operation::ABS;
  291. }
  292. const std::string trimmed = name.substr(0, name.size() - 1);
  293. // First check custom functions.
  294. const auto custom = customFunctions.find(trimmed);
  295. if (custom != customFunctions.end()) { return new Operation::Custom(trimmed, custom->second->clone()); }
  296. // Now try standard functions.
  297. const auto iter = opMap.find(trimmed);
  298. if (iter == opMap.end()) { throw Exception("Parse error: unknown function: " + trimmed); }
  299. switch (iter->second)
  300. {
  301. case Operation::SQRT:
  302. return new Operation::Sqrt();
  303. case Operation::EXP:
  304. return new Operation::Exp();
  305. case Operation::LOG:
  306. return new Operation::Log();
  307. case Operation::SIN:
  308. return new Operation::Sin();
  309. case Operation::COS:
  310. return new Operation::Cos();
  311. case Operation::SEC:
  312. return new Operation::Sec();
  313. case Operation::CSC:
  314. return new Operation::Csc();
  315. case Operation::TAN:
  316. return new Operation::Tan();
  317. case Operation::COT:
  318. return new Operation::Cot();
  319. case Operation::ASIN:
  320. return new Operation::Asin();
  321. case Operation::ACOS:
  322. return new Operation::Acos();
  323. case Operation::ATAN:
  324. return new Operation::Atan();
  325. case Operation::SINH:
  326. return new Operation::Sinh();
  327. case Operation::COSH:
  328. return new Operation::Cosh();
  329. case Operation::TANH:
  330. return new Operation::Tanh();
  331. case Operation::ERF:
  332. return new Operation::Erf();
  333. case Operation::ERFC:
  334. return new Operation::Erfc();
  335. case Operation::STEP:
  336. return new Operation::Step();
  337. case Operation::DELTA:
  338. return new Operation::Delta();
  339. case Operation::SQUARE:
  340. return new Operation::Square();
  341. case Operation::CUBE:
  342. return new Operation::Cube();
  343. case Operation::RECIPROCAL:
  344. return new Operation::Reciprocal();
  345. case Operation::MIN:
  346. return new Operation::Min();
  347. case Operation::MAX:
  348. return new Operation::Max();
  349. case Operation::ABS:
  350. return new Operation::Abs();
  351. default:
  352. throw Exception("Parse error: unknown function");
  353. }
  354. }