diff --git a/src/Lib/Script/FrontEnd/Lexer.cc b/src/Lib/Script/FrontEnd/Lexer.cc index 3ee1cfd1082d9a912611e758d71712e0ca851e4c..e3b5f2b0cf9c49206cb50e0978acc6f90784bba9 100644 --- a/src/Lib/Script/FrontEnd/Lexer.cc +++ b/src/Lib/Script/FrontEnd/Lexer.cc @@ -43,6 +43,22 @@ getRideOfComments(std::string *retString) } while (begin != std::string::npos); } +static void +promoteQNameToSimpleTokens(TokenList *tokens) noexcept +{ + for (size_t i = 0; i < tokens->size(); i += 1) { + Token tok = tokens->at(i); + if (!tok.isQName()) + continue; + StrV qnameToPromote = tok.asQName(); + + for (const auto &simpleToken : Vivy::Script::SIMPLE_TOKENS) { + if (StrV::equal(qnameToPromote, simpleToken)) + tokens->at(i) = Token::fromSimple(tok.location(), qnameToPromote); + } + } +} + void tokenizeFile(const char *file, TokenList *tokens, std::string *storage) { @@ -69,10 +85,10 @@ tokenizeFile(const char *file, TokenList *tokens, std::string *storage) for (;;) { global_continue: - /* First simple tokens */ + /* First simple tokens that are not alpha-numeric */ fileContent = StrV::trimL(fileContent, &trimmedAmount); loc = Location::shift(loc, trimmedAmount); - for (const auto &simpleToken : Vivy::Script::SIMPLE_TOKENS) { + for (const auto &simpleToken : Vivy::Script::SIMPLE_TOKENS_NON_ALPHANUM) { if (StrV::startsWith(fileContent, simpleToken)) { /* If the charatecr is an UTF8 character, the Location can skip * multiple chars because the glyph is multiple characters long! */ @@ -132,10 +148,12 @@ tokenizeFile(const char *file, TokenList *tokens, std::string *storage) continue; } - /* EOF? */ + /* EOF? => May promote QNAME to SIMPLE */ fileContent = StrV::trimL(fileContent, &trimmedAmount); loc = Location::shift(loc, trimmedAmount); if (fileContent.len() == 0) { + /* Parse simple tokens that are alpha numeric */ + promoteQNameToSimpleTokens(tokens); return; } diff --git a/src/Lib/Script/FrontEnd/Tokens.hh b/src/Lib/Script/FrontEnd/Tokens.hh index c6df5e0c4952b0ee4d39075238e0869b3f445b67..5d532142163a00d39b51d8e9767d6140f8e5d7aa 100644 --- a/src/Lib/Script/FrontEnd/Tokens.hh +++ b/src/Lib/Script/FrontEnd/Tokens.hh @@ -64,12 +64,38 @@ namespace Vivy::Script } #define TOKEN_RULE_INT "(+|-)?[0-9]+" -#define TOKEN_RULE_ID "[a-zA-Z\u0391-\u03C9\u220F_\u221A_][a-zA-Z\u0391-\u03C9_0-9]*" +#define TOKEN_RULE_ID "[a-zA-Z\u0391-\u03C9\u220F_\u221A_][a-zA-Z\u0391-\u03C9_0-9\\-\\+]*" #define TOKEN_RULE_REAL TOKEN_RULE_INT ".(" TOKEN_RULE_INT "((e|E)(+|-)" TOKEN_RULE_INT ")?)?" #define TOKEN_RULE_QNAME TOKEN_RULE_ID "(." TOKEN_RULE_ID ")*" #define TOKEN_RULE_UPPER_QNAME "[A-Z\u0391-\u03C9\u220F_\u221A_][A-Z\u0391-\u03C9_0-9]*" -/* The list of all possible simple tokens */ +[[maybe_unused]] static constexpr auto SIMPLE_TOKENS_NON_ALPHANUM = { TOKEN_PARENT_LEFT, + TOKEN_PARENT_RIGHT, + TOKEN_BRACKET_LEFT, + TOKEN_BRACKET_RIGHT, + TOKEN_HAT, + TOKEN_HASHTAG, + TOKEN_CURLY_BRACKET_LEFT, + TOKEN_CURLY_BRACKET_RIGHT, + TOKEN_SEMICOL, + TOKEN_COL, + TOKEN_PIPE, + TOKEN_INTEROGATION, + TOKEN_COMMA, + TOKEN_DOT, + TOKEN_ARROW, + TOKEN_PLUS, + TOKEN_MINUS, + TOKEN_DIV, + TOKEN_TIMES, + TOKEN_EQ, + TOKEN_NEQ, + TOKEN_LT, + TOKEN_LE, + TOKEN_GT, + TOKEN_GE, + TOKEN_ASSIGN }; + [[maybe_unused]] static constexpr auto SIMPLE_TOKENS = { TOKEN_PARENT_LEFT, TOKEN_PARENT_RIGHT, TOKEN_BRACKET_LEFT,