Skip to content
Extraits de code Groupes Projets
Vérifiée Valider 2fa4c782 rédigé par Kubat's avatar Kubat
Parcourir les fichiers

FIX: Fix lexer => correct simple and qname lexing

Some qname where cut and parsed as simple tokens because of the chop
thing, now we parse every simple token as qnames then we try to promote
them to simple tokens if they matches entirely the said simple token.
parent 99de86a4
Aucune branche associée trouvée
Aucune étiquette associée trouvée
1 requête de fusion!25Draft: New Vivy module spec
......@@ -43,6 +43,22 @@ getRideOfComments(std::string *retString)
} while (begin != std::string::npos);
}
static void
promoteQNameToSimpleTokens(TokenList *tokens) noexcept
{
for (size_t i = 0; i < tokens->size(); i += 1) {
Token tok = tokens->at(i);
if (!tok.isQName())
continue;
StrV qnameToPromote = tok.asQName();
for (const auto &simpleToken : Vivy::Script::SIMPLE_TOKENS) {
if (StrV::equal(qnameToPromote, simpleToken))
tokens->at(i) = Token::fromSimple(tok.location(), qnameToPromote);
}
}
}
void
tokenizeFile(const char *file, TokenList *tokens, std::string *storage)
{
......@@ -69,10 +85,10 @@ tokenizeFile(const char *file, TokenList *tokens, std::string *storage)
for (;;) {
global_continue:
/* First simple tokens */
/* First simple tokens that are not alpha-numeric */
fileContent = StrV::trimL(fileContent, &trimmedAmount);
loc = Location::shift(loc, trimmedAmount);
for (const auto &simpleToken : Vivy::Script::SIMPLE_TOKENS) {
for (const auto &simpleToken : Vivy::Script::SIMPLE_TOKENS_NON_ALPHANUM) {
if (StrV::startsWith(fileContent, simpleToken)) {
/* If the charatecr is an UTF8 character, the Location can skip
* multiple chars because the glyph is multiple characters long! */
......@@ -132,10 +148,12 @@ tokenizeFile(const char *file, TokenList *tokens, std::string *storage)
continue;
}
/* EOF? */
/* EOF? => May promote QNAME to SIMPLE */
fileContent = StrV::trimL(fileContent, &trimmedAmount);
loc = Location::shift(loc, trimmedAmount);
if (fileContent.len() == 0) {
/* Parse simple tokens that are alpha numeric */
promoteQNameToSimpleTokens(tokens);
return;
}
......
......@@ -64,12 +64,38 @@ namespace Vivy::Script
}
#define TOKEN_RULE_INT "(+|-)?[0-9]+"
#define TOKEN_RULE_ID "[a-zA-Z\u0391-\u03C9\u220F_\u221A_][a-zA-Z\u0391-\u03C9_0-9]*"
#define TOKEN_RULE_ID "[a-zA-Z\u0391-\u03C9\u220F_\u221A_][a-zA-Z\u0391-\u03C9_0-9\\-\\+]*"
#define TOKEN_RULE_REAL TOKEN_RULE_INT ".(" TOKEN_RULE_INT "((e|E)(+|-)" TOKEN_RULE_INT ")?)?"
#define TOKEN_RULE_QNAME TOKEN_RULE_ID "(." TOKEN_RULE_ID ")*"
#define TOKEN_RULE_UPPER_QNAME "[A-Z\u0391-\u03C9\u220F_\u221A_][A-Z\u0391-\u03C9_0-9]*"
/* The list of all possible simple tokens */
[[maybe_unused]] static constexpr auto SIMPLE_TOKENS_NON_ALPHANUM = { TOKEN_PARENT_LEFT,
TOKEN_PARENT_RIGHT,
TOKEN_BRACKET_LEFT,
TOKEN_BRACKET_RIGHT,
TOKEN_HAT,
TOKEN_HASHTAG,
TOKEN_CURLY_BRACKET_LEFT,
TOKEN_CURLY_BRACKET_RIGHT,
TOKEN_SEMICOL,
TOKEN_COL,
TOKEN_PIPE,
TOKEN_INTEROGATION,
TOKEN_COMMA,
TOKEN_DOT,
TOKEN_ARROW,
TOKEN_PLUS,
TOKEN_MINUS,
TOKEN_DIV,
TOKEN_TIMES,
TOKEN_EQ,
TOKEN_NEQ,
TOKEN_LT,
TOKEN_LE,
TOKEN_GT,
TOKEN_GE,
TOKEN_ASSIGN };
[[maybe_unused]] static constexpr auto SIMPLE_TOKENS = { TOKEN_PARENT_LEFT,
TOKEN_PARENT_RIGHT,
TOKEN_BRACKET_LEFT,
......
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Veuillez vous inscrire ou vous pour commenter