Sélectionner une révision Git
Lexer.cc 8,22 Kio
#include "StrV.hh"
#include "Lexer.hh"
#include "Tokens.hh"
#include <fstream>
namespace Vivy::Script
{
static void
readFileIntoStdString(const char *filePath, std::string *retString)
{
std::ifstream file(filePath);
if (file.is_open()) {
file.seekg(0, std::ios::end);
std::streamoff size = file.tellg();
if ((size == std::streamoff(-1)) || (size < 0))
throw std::runtime_error("Failed to get the size of the file " + std::string(filePath));
retString->reserve(static_cast<std::size_t>(size));
file.seekg(0, std::ios::beg);
retString->assign((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
file.close();
}
else
throw std::runtime_error("Failed to open the file " + std::string(filePath));
}
static void
getRideOfComments(std::string *retString)
{
std::size_t begin;
do /* Single line */ {
begin = retString->find("--");
const std::size_t end = retString->find("\n", begin);
if (std::string::npos != begin && std::string::npos != end && begin <= end) {
for (std::size_t i = begin; i < end; ++i)
(*retString)[i] = ' ';
}
else if (std::string::npos != begin && std::string::npos == end)
retString->resize(begin);
} while (begin != std::string::npos);
}
static void
promoteQNameToSimpleTokens(TokenList *tokens) noexcept
{
for (size_t i = 0; i < tokens->size(); i += 1) {
Token tok = tokens->at(i);
if (!tok.isQName())
continue;
StrV qnameToPromote = tok.asQName();
for (const auto &simpleToken : Vivy::Script::SIMPLE_TOKENS) {
if (StrV::equal(qnameToPromote, simpleToken))
tokens->at(i) = Token::fromSimple(tok.location(), qnameToPromote);
}
}
}
void
tokenizeFile(const char *file, TokenList *tokens, std::string *storage)
{
union {
double floating;
int integer;
StrV qualifiedName = STRV_NULL;
};
bool ok;
if (storage == nullptr) {
throw std::runtime_error("No storage was passed to the tokenizeFile function");
}
readFileIntoStdString(file, storage);
getRideOfComments(storage);
StrV fileContent = StrV::fromStr(storage->data());
Location loc = Location::beginOfFile(file, fileContent);
int trimmedAmount = 0;
if (storage->size() == 0) {
throw std::runtime_error("Found an empty file!");
}
for (;;) {
global_continue:
/* First simple tokens that are not alpha-numeric */
fileContent = StrV::trimL(fileContent, &trimmedAmount);
loc = Location::shift(loc, trimmedAmount);
for (const auto &simpleToken : Vivy::Script::SIMPLE_TOKENS_NON_ALPHANUM) {
if (StrV::startsWith(fileContent, simpleToken)) {
/* If the charatecr is an UTF8 character, the Location can skip
* multiple chars because the glyph is multiple characters long! */
tokens->push_back(Token::fromSimple(loc, simpleToken));
fileContent.chopLeft(simpleToken.len());
loc = Location::shift(loc, simpleToken.len());
goto global_continue;
}
}
/* Find a string literal? */
fileContent = StrV::trimL(fileContent, &trimmedAmount);
loc = Location::shift(loc, trimmedAmount);
if (fileContent.tryChopEscapedString(&qualifiedName, &trimmedAmount)) {
if (fileContent.chopNextIdChar(&ok); ok) {
throw std::runtime_error("Invalid string literal, directly "
"followed by an identifier...");
}
tokens->push_back(Token::fromStringLit(loc, qualifiedName));
loc = Location::shift(loc, trimmedAmount);
continue;
}
/* Find a floating? */
fileContent = StrV::trimL(fileContent, &trimmedAmount);
loc = Location::shift(loc, trimmedAmount);
if (fileContent.tryChopFloating(&floating, &trimmedAmount)) {
if (fileContent.chopNextIdChar(&ok); ok) {
throw std::runtime_error("Invalid floating point found, directly "
"followed by an identifier...");
}
tokens->push_back(Token::fromFloating(loc, floating));
loc = Location::shift(loc, trimmedAmount);
continue;
}
/* Find an integer? */
fileContent = StrV::trimL(fileContent, &trimmedAmount);
loc = Location::shift(loc, trimmedAmount);
if (fileContent.tryChopInteger(&integer, &trimmedAmount)) {
if (fileContent.chopNextIdChar(&ok); ok) {
throw std::runtime_error("Invalid integer found, directly "
"followed by an identifier...");
}
tokens->push_back(Token::fromInteger(loc, integer));
loc = Location::shift(loc, trimmedAmount);
continue;
}
/* Find a qualified name? */
fileContent = StrV::trimL(fileContent, &trimmedAmount);
loc = Location::shift(loc, trimmedAmount);
qualifiedName = fileContent.chopQualifiedName(&ok);
if (ok) {
tokens->push_back(Token::fromQName(loc, qualifiedName));
loc = Location::shift(loc, qualifiedName.len());
continue;
}
/* EOF? => May promote QNAME to SIMPLE */
fileContent = StrV::trimL(fileContent, &trimmedAmount);
loc = Location::shift(loc, trimmedAmount);
if (fileContent.len() == 0) {
/* Parse simple tokens that are alpha numeric */
promoteQNameToSimpleTokens(tokens);
return;
}
/* ERROR! Try to print the token we couldn't parse! */
else {
StrV unknownToken;
if (fileContent.tryChopByDelim(' ', &unknownToken) ||
fileContent.tryChopByDelim('\n', &unknownToken)) {
throw std::runtime_error(
"Failed to find a token. Last parsed location is " + loc.toString() +
". The possible unknown token might be: " + unknownToken.toStdString());
}
throw std::runtime_error("Failed to find a token... Last location is: " +
loc.toString());
}
}
}
std::optional<Token>
tokenListPeek(const TokenList *tokens) noexcept
{
return (tokens->size() > 0) ? std::optional((*tokens)[0]) : std::nullopt;
}
std::optional<Token>
tokenListPop(TokenList *tokens) noexcept
{
if (tokens->size() > 0) {
Token ret = (*tokens)[0];
tokens->erase(tokens->begin());
return ret;
} else {
return std::nullopt;
}
}
std::optional<Token>
tokenListPeekNext(const TokenList *tokens) noexcept
{
return (tokens->size() < 2) ? std::nullopt : std::optional((*tokens)[1]);
}
bool
tokenListIsNotEmpty(const TokenList *tokens) noexcept
{
return !(tokens->size() == 0);
}
Token
getInnerTokenOpt(const std::optional<Token> &tokOpt)
{
if (!tokOpt)
throw std::runtime_error("Excepted a token, found nothing, EOF?");
return tokOpt.value();
}
std::vector<Token>
tokenListPopToNextSimpleToken(TokenList *tokens, StrV limitToken)
{
std::vector<Token> ret;
if (tokens->size() == 0)
throw std::runtime_error("No more tokens to pop, EOF?");
for (;;) {
ret.push_back(getInnerTokenOpt(tokenListPop(tokens)));
Token nextToken = getInnerTokenOpt(tokenListPeek(tokens));
if ((nextToken.valueType() == Token::Type::SIMPLE) && nextToken.isSimple(limitToken)) {
ret.push_back(getInnerTokenOpt(tokenListPop(tokens)));
break;
}
}
return ret;
}
std::vector<Token>
tokenListPopToNextSimpleTokenExcluded(TokenList *tokens, StrV limitToken)
{
std::vector<Token> ret;
if (tokens->size() == 0)
throw std::runtime_error("No more tokens to pop, EOF?");
for (;;) {
Token nextToken = getInnerTokenOpt(tokenListPeek(tokens));
if ((nextToken.valueType() == Token::Type::SIMPLE) && nextToken.isSimple(limitToken))
break;
ret.push_back(getInnerTokenOpt(tokenListPop(tokens)));
}
return ret;
}
}