Sélectionner une révision Git
-
Kubat a rédigé
Some qname where cut and parsed as simple tokens because of the chop thing, now we parse every simple token as qnames then we try to promote them to simple tokens if they matches entirely the said simple token.
Kubat a rédigéSome qname where cut and parsed as simple tokens because of the chop thing, now we parse every simple token as qnames then we try to promote them to simple tokens if they matches entirely the said simple token.
Lexer.cc 8,22 Kio
#include "StrV.hh"
#include "Lexer.hh"
#include "Tokens.hh"
#include <fstream>
namespace Vivy::Script
{
static void
readFileIntoStdString(const char *filePath, std::string *retString)
{
std::ifstream file(filePath);
if (file.is_open()) {
file.seekg(0, std::ios::end);
std::streamoff size = file.tellg();
if ((size == std::streamoff(-1)) || (size < 0))
throw std::runtime_error("Failed to get the size of the file " + std::string(filePath));
retString->reserve(static_cast<std::size_t>(size));
file.seekg(0, std::ios::beg);
retString->assign((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
file.close();
}
else
throw std::runtime_error("Failed to open the file " + std::string(filePath));
}
static void
getRideOfComments(std::string *retString)
{
std::size_t begin;
do /* Single line */ {
begin = retString->find("--");
const std::size_t end = retString->find("\n", begin);
if (std::string::npos != begin && std::string::npos != end && begin <= end) {
for (std::size_t i = begin; i < end; ++i)
(*retString)[i] = ' ';
}
else if (std::string::npos != begin && std::string::npos == end)
retString->resize(begin);
} while (begin != std::string::npos);
}
static void
promoteQNameToSimpleTokens(TokenList *tokens) noexcept
{
for (size_t i = 0; i < tokens->size(); i += 1) {
Token tok = tokens->at(i);
if (!tok.isQName())
continue;
StrV qnameToPromote = tok.asQName();
for (const auto &simpleToken : Vivy::Script::SIMPLE_TOKENS) {
if (StrV::equal(qnameToPromote, simpleToken))
tokens->at(i) = Token::fromSimple(tok.location(), qnameToPromote);
}
}
}
void
tokenizeFile(const char *file, TokenList *tokens, std::string *storage)
{
union {
double floating;
int integer;
StrV qualifiedName = STRV_NULL;
};
bool ok;
if (storage == nullptr) {
throw std::runtime_error("No storage was passed to the tokenizeFile function");
}
readFileIntoStdString(file, storage);
getRideOfComments(storage);
StrV fileContent = StrV::fromStr(storage->data());
Location loc = Location::beginOfFile(file, fileContent);
int trimmedAmount = 0;
if (storage->size() == 0) {
throw std::runtime_error("Found an empty file!");
}
for (;;) {
global_continue:
/* First simple tokens that are not alpha-numeric */
fileContent = StrV::trimL(fileContent, &trimmedAmount);
loc = Location::shift(loc, trimmedAmount);
for (const auto &simpleToken : Vivy::Script::SIMPLE_TOKENS_NON_ALPHANUM) {
if (StrV::startsWith(fileContent, simpleToken)) {
/* If the charatecr is an UTF8 character, the Location can skip
* multiple chars because the glyph is multiple characters long! */
tokens->push_back(Token::fromSimple(loc, simpleToken));
fileContent.chopLeft(simpleToken.len());
loc = Location::shift(loc, simpleToken.len());
goto global_continue;
}
}
/* Find a string literal? */
fileContent = StrV::trimL(fileContent, &trimmedAmount);
loc = Location::shift(loc, trimmedAmount);
if (fileContent.tryChopEscapedString(&qualifiedName, &trimmedAmount)) {
if (fileContent.chopNextIdChar(&ok); ok) {
throw std::runtime_error("Invalid string literal, directly "
"followed by an identifier...");
}
tokens->push_back(Token::fromStringLit(loc, qualifiedName));
loc = Location::shift(loc, trimmedAmount);
continue;
}
/* Find a floating? */
fileContent = StrV::trimL(fileContent, &trimmedAmount);
loc = Location::shift(loc, trimmedAmount);
if (fileContent.tryChopFloating(&floating, &trimmedAmount)) {
if (fileContent.chopNextIdChar(&ok); ok) {
throw std::runtime_error("Invalid floating point found, directly "
"followed by an identifier...");
}
tokens->push_back(Token::fromFloating(loc, floating));
loc = Location::shift(loc, trimmedAmount);
continue;
}
/* Find an integer? */
fileContent = StrV::trimL(fileContent, &trimmedAmount);
loc = Location::shift(loc, trimmedAmount);
if (fileContent.tryChopInteger(&integer, &trimmedAmount)) {
if (fileContent.chopNextIdChar(&ok); ok) {
throw std::runtime_error("Invalid integer found, directly "
"followed by an identifier...");
}
tokens->push_back(Token::fromInteger(loc, integer));
loc = Location::shift(loc, trimmedAmount);
continue;
}
/* Find a qualified name? */
fileContent = StrV::trimL(fileContent, &trimmedAmount);
loc = Location::shift(loc, trimmedAmount);
qualifiedName = fileContent.chopQualifiedName(&ok);
if (ok) {
tokens->push_back(Token::fromQName(loc, qualifiedName));
loc = Location::shift(loc, qualifiedName.len());
continue;
}
/* EOF? => May promote QNAME to SIMPLE */
fileContent = StrV::trimL(fileContent, &trimmedAmount);
loc = Location::shift(loc, trimmedAmount);
if (fileContent.len() == 0) {
/* Parse simple tokens that are alpha numeric */
promoteQNameToSimpleTokens(tokens);
return;
}
/* ERROR! Try to print the token we couldn't parse! */
else {
StrV unknownToken;
if (fileContent.tryChopByDelim(' ', &unknownToken) ||
fileContent.tryChopByDelim('\n', &unknownToken)) {
throw std::runtime_error(
"Failed to find a token. Last parsed location is " + loc.toString() +
". The possible unknown token might be: " + unknownToken.toStdString());
}
throw std::runtime_error("Failed to find a token... Last location is: " +
loc.toString());
}
}
}
std::optional<Token>
tokenListPeek(const TokenList *tokens) noexcept
{
return (tokens->size() > 0) ? std::optional((*tokens)[0]) : std::nullopt;
}
std::optional<Token>
tokenListPop(TokenList *tokens) noexcept
{
if (tokens->size() > 0) {
Token ret = (*tokens)[0];
tokens->erase(tokens->begin());
return ret;
} else {
return std::nullopt;
}
}
std::optional<Token>
tokenListPeekNext(const TokenList *tokens) noexcept
{
return (tokens->size() < 2) ? std::nullopt : std::optional((*tokens)[1]);
}
bool
tokenListIsNotEmpty(const TokenList *tokens) noexcept
{
return !(tokens->size() == 0);
}
Token
getInnerTokenOpt(const std::optional<Token> &tokOpt)
{
if (!tokOpt)
throw std::runtime_error("Excepted a token, found nothing, EOF?");
return tokOpt.value();
}
std::vector<Token>
tokenListPopToNextSimpleToken(TokenList *tokens, StrV limitToken)
{
std::vector<Token> ret;
if (tokens->size() == 0)
throw std::runtime_error("No more tokens to pop, EOF?");
for (;;) {
ret.push_back(getInnerTokenOpt(tokenListPop(tokens)));
Token nextToken = getInnerTokenOpt(tokenListPeek(tokens));
if ((nextToken.valueType() == Token::Type::SIMPLE) && nextToken.isSimple(limitToken)) {
ret.push_back(getInnerTokenOpt(tokenListPop(tokens)));
break;
}
}
return ret;
}
std::vector<Token>
tokenListPopToNextSimpleTokenExcluded(TokenList *tokens, StrV limitToken)
{
std::vector<Token> ret;
if (tokens->size() == 0)
throw std::runtime_error("No more tokens to pop, EOF?");
for (;;) {
Token nextToken = getInnerTokenOpt(tokenListPeek(tokens));
if ((nextToken.valueType() == Token::Type::SIMPLE) && nextToken.isSimple(limitToken))
break;
ret.push_back(getInnerTokenOpt(tokenListPop(tokens)));
}
return ret;
}
}