From 0d5f651d45b2d8eaca0010cf671a34107643249b Mon Sep 17 00:00:00 2001 From: Kubat <maelle.martin@proton.me> Date: Thu, 1 May 2025 11:48:04 +0200 Subject: [PATCH] [AST] Differenciate from parser span and ast span, and add convertion to get ast span for each of ast elements --- grimoire/src/ast/expr.rs | 24 +++++++++--- grimoire/src/ast/location.rs | 5 --- grimoire/src/ast/mod.rs | 4 +- grimoire/src/ast/span.rs | 32 ++++++++++++++++ grimoire/src/parser/error.rs | 14 +++++-- grimoire/src/parser/expr.rs | 17 ++++----- grimoire/src/parser/location.rs | 11 ++++++ grimoire/src/parser/mod.rs | 2 + grimoire/src/parser/span.rs | 66 +++++++++++++++++---------------- grimoire/src/parser/utils.rs | 10 ++--- 10 files changed, 121 insertions(+), 64 deletions(-) delete mode 100644 grimoire/src/ast/location.rs create mode 100644 grimoire/src/ast/span.rs create mode 100644 grimoire/src/parser/location.rs diff --git a/grimoire/src/ast/expr.rs b/grimoire/src/ast/expr.rs index 623b087..89ad2fd 100644 --- a/grimoire/src/ast/expr.rs +++ b/grimoire/src/ast/expr.rs @@ -1,19 +1,31 @@ -use crate::ast::Location; +use crate::ast::AstSpan; -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq)] pub enum VarOrConstant { - Var(String), + /// Integer, like 1, 42, -1, etc Int(i32), + + /// A floating point number, like 3.14, .01, 3, etc Flt(f32), + + /// A string, can be multiline or not: "something", '''Some other things''' Str(String), + + /// A variable, just an identifier preceded by `$`, like: `$CynthiaLike` + Var(String), + + /// A boolean value, `true` or `false` Bool(bool), + + /// An identifier, follows the same rules as rust identifiers. + Ident(String), } #[derive(Debug)] pub enum Expression { - Binary(Location, Location, Box<Expression>, BinOp, Box<Expression>), - Unary(Location, Location, UnOp, Box<Expression>), - Leaf(Location, Location, VarOrConstant), + Binary(AstSpan, Box<Expression>, BinOp, Box<Expression>), + Unary(AstSpan, UnOp, Box<Expression>), + Leaf(AstSpan, VarOrConstant), } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] diff --git a/grimoire/src/ast/location.rs b/grimoire/src/ast/location.rs deleted file mode 100644 index 32a6f60..0000000 --- a/grimoire/src/ast/location.rs +++ /dev/null @@ -1,5 +0,0 @@ -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct Location { - offset: usize, - line: u32, -} diff --git a/grimoire/src/ast/mod.rs b/grimoire/src/ast/mod.rs index 598d762..19b1f1b 100644 --- a/grimoire/src/ast/mod.rs +++ b/grimoire/src/ast/mod.rs @@ -1,4 +1,4 @@ pub mod expr; -mod location; +mod span; -pub use location::Location; +pub use span::Span as AstSpan; diff --git a/grimoire/src/ast/span.rs b/grimoire/src/ast/span.rs new file mode 100644 index 0000000..9996b6f --- /dev/null +++ b/grimoire/src/ast/span.rs @@ -0,0 +1,32 @@ +use crate::parser::Location; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct Span { + offset: usize, + line: usize, + size: usize, +} + +impl From<(Location, Location)> for Span { + fn from((from, to): (Location, Location)) -> Self { + use std::cmp::Ordering; + let (from, to) = match PartialOrd::partial_cmp(&from, &to) { + Some(Ordering::Less | Ordering::Equal) => (from, to), + Some(Ordering::Greater) => (to, from), + None => unreachable!(), + }; + + debug_assert!(from.offset <= to.offset, "error in implementation..."); + Span { + offset: from.offset, + line: from.line, + size: to.offset - from.offset, + } + } +} + +impl From<(crate::parser::Span<'_>, crate::parser::Span<'_>)> for Span { + fn from((from, to): (crate::parser::Span<'_>, crate::parser::Span<'_>)) -> Self { + (from.into_location(), to.into_location()).into() + } +} diff --git a/grimoire/src/parser/error.rs b/grimoire/src/parser/error.rs index 9ac891d..2185b01 100644 --- a/grimoire/src/parser/error.rs +++ b/grimoire/src/parser/error.rs @@ -1,4 +1,4 @@ -use crate::{ast::Location, parser::Span}; +use crate::parser::{Location, Span}; use nom::{IResult, error::*}; use std::num::{ParseFloatError, ParseIntError}; @@ -24,7 +24,7 @@ impl<I> ContextError<I> for ParserError { impl ParseError<Span<'_>> for ParserError { fn from_error_kind(input: Span<'_>, kind: ErrorKind) -> Self { - Self::Nom(input.get_location(), kind) + Self::Nom(input.location(), kind) } fn append(_: Span<'_>, _: ErrorKind, other: Self) -> Self { @@ -34,12 +34,18 @@ impl ParseError<Span<'_>> for ParserError { impl nom::error::FromExternalError<Span<'_>, ParseFloatError> for ParserError { fn from_external_error(input: Span<'_>, _: ErrorKind, e: ParseFloatError) -> Self { - Self::ParseFloat(input.get_location(), e) + Self::ParseFloat(input.location(), e) } } impl nom::error::FromExternalError<Span<'_>, ParseIntError> for ParserError { fn from_external_error(input: Span<'_>, _: ErrorKind, e: ParseIntError) -> Self { - Self::ParseInt(input.get_location(), e) + Self::ParseInt(input.location(), e) + } +} + +impl FromExternalError<Span<'_>, ParserError> for ParserError { + fn from_external_error(_: Span<'_>, _: ErrorKind, e: ParserError) -> Self { + e } } diff --git a/grimoire/src/parser/expr.rs b/grimoire/src/parser/expr.rs index 78bf3ca..cb91241 100644 --- a/grimoire/src/parser/expr.rs +++ b/grimoire/src/parser/expr.rs @@ -1,5 +1,5 @@ use crate::{ - ast::{Location, expr::*}, + ast::{AstSpan, expr::*}, parser::*, }; @@ -40,29 +40,26 @@ fn unop<'a>( ) -> impl Parser<Span<'a>, Output = Expression, Error = ParserError> { utils::map_with_locaiton( (multispace0, tag(op.as_str()), next), - move |begin, end, (.., expr)| Expression::Unary(begin, end, op, Box::new(expr)), + move |ast_span, (.., expr)| Expression::Unary(ast_span, op, Box::new(expr)), ) } fn binop<'a>( op: BinOp, next: impl Parser<Span<'a>, Output = Expression, Error = ParserError>, -) -> impl Parser<Span<'a>, Output = (Location, Location, BinOp, Expression), Error = ParserError> { +) -> impl Parser<Span<'a>, Output = (AstSpan, BinOp, Expression), Error = ParserError> { preceded( multispace0, utils::with_location((map(tag(op.as_str()), move |_| op), next)) - .map(|(begin, end, (op, expr))| (begin, end, op, expr)), + .map(|(ast_span, (op, expr))| (ast_span, op, expr)), ) } -fn fold_exprs( - initial: Expression, - remainder: Vec<(Location, Location, BinOp, Expression)>, -) -> Expression { +fn fold_exprs(initial: Expression, remainder: Vec<(AstSpan, BinOp, Expression)>) -> Expression { remainder .into_iter() - .fold(initial, |acc, (begin, end, op, expr)| { - Expression::Binary(begin, end, Box::new(acc), op, Box::new(expr)) + .fold(initial, |acc, (ast_span, op, expr)| { + Expression::Binary(ast_span, Box::new(acc), op, Box::new(expr)) }) } diff --git a/grimoire/src/parser/location.rs b/grimoire/src/parser/location.rs new file mode 100644 index 0000000..13a6dec --- /dev/null +++ b/grimoire/src/parser/location.rs @@ -0,0 +1,11 @@ +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct Location { + pub offset: usize, + pub line: usize, +} + +impl PartialOrd for Location { + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { + PartialOrd::partial_cmp(&self.offset, &other.offset) + } +} diff --git a/grimoire/src/parser/mod.rs b/grimoire/src/parser/mod.rs index fe3d064..245e522 100644 --- a/grimoire/src/parser/mod.rs +++ b/grimoire/src/parser/mod.rs @@ -1,7 +1,9 @@ mod error; mod expr; +mod location; mod span; pub mod utils; pub use error::*; +pub use location::*; pub use span::*; diff --git a/grimoire/src/parser/span.rs b/grimoire/src/parser/span.rs index 432b9f9..9ada95e 100644 --- a/grimoire/src/parser/span.rs +++ b/grimoire/src/parser/span.rs @@ -1,4 +1,4 @@ -use crate::ast::Location; +use crate::parser::Location; use nom::{Compare, FindSubstring, Input, Offset}; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -9,7 +9,7 @@ pub struct Span<'a> { /// The line number of the fragment relatively to the input of the /// parser. It starts at line 1. - line: u32, + line: usize, /// The fragment that is spanned. /// The fragment represents a part of the input of the parser. @@ -23,15 +23,21 @@ impl<'a> core::ops::Deref for Span<'a> { } } +impl From<Span<'_>> for Location { + fn from(value: Span<'_>) -> Self { + value.into_location() + } +} + impl From<Span<'_>> for String { fn from(value: Span<'_>) -> Self { - value.fragment().to_string() + value.into_fragment().to_string() } } impl<'a> From<Span<'a>> for &'a str { fn from(value: Span<'a>) -> Self { - value.fragment + value.into_fragment() } } @@ -44,55 +50,51 @@ impl<'a> Span<'a> { } } - pub fn get_location(&self) -> Location { - todo!() + pub fn location(&self) -> Location { + let Self { offset, line, .. } = *self; + Location { offset, line } } - /// The offset represents the position of the fragment relatively to - /// the input of the parser. It starts at offset 0. + pub fn into_location(self) -> Location { + let Self { offset, line, .. } = self; + Location { offset, line } + } + + /// The offset represents the position of the fragment relatively to the input of the parser. + /// It starts at offset 0. pub fn location_offset(&self) -> usize { self.offset } - /// The line number of the fragment relatively to the input of the - /// parser. It starts at line 1. - pub fn location_line(&self) -> u32 { + /// The line number of the fragment relatively to the input of the parser. It starts at line 1. + pub fn location_line(&self) -> usize { self.line } - /// The fragment that is spanned. - /// The fragment represents a part of the input of the parser. + /// The fragment that is spanned. The fragment represents a part of the input of the parser. pub fn fragment(&self) -> &str { self.fragment } + /// The fragment that is spanned. The fragment represents a part of the input of the parser. pub fn into_fragment(self) -> &'a str { self.fragment } fn slice_by(&self, next_fragment: &'a str) -> Self { - let consumed_len = self.fragment.offset(next_fragment); - if consumed_len == 0 { - return Self { + match self.fragment.offset(next_fragment) { + 0 => Self { line: self.line, offset: self.offset, fragment: next_fragment, - }; - } - - let consumed = self.fragment.take(consumed_len); - - let next_offset = self.offset + consumed_len; - - let consumed_as_bytes = consumed.as_bytes(); - let iter = memchr::Memchr::new(b'\n', consumed_as_bytes); - let number_of_lines = iter.count() as u32; - let next_line = self.line + number_of_lines; - - Self { - line: next_line, - offset: next_offset, - fragment: next_fragment, + }, + consumed_len => Self { + line: self.line + + memchr::Memchr::new(b'\n', self.fragment.take(consumed_len).as_bytes()) + .count(), + offset: self.offset + consumed_len, + fragment: next_fragment, + }, } } } diff --git a/grimoire/src/parser/utils.rs b/grimoire/src/parser/utils.rs index 6f5c7c6..dcdbb44 100644 --- a/grimoire/src/parser/utils.rs +++ b/grimoire/src/parser/utils.rs @@ -1,5 +1,5 @@ use crate::{ - ast::Location, + ast::AstSpan, parser::{error::*, span::*}, }; use nom::{ @@ -23,17 +23,17 @@ pub fn parse_paren<'a, T>( pub fn with_location<'a, O>( mut parser: impl Parser<Span<'a>, Output = O, Error = ParserError>, -) -> impl Parser<Span<'a>, Output = (Location, Location, O), Error = ParserError> { +) -> impl Parser<Span<'a>, Output = (AstSpan, O), Error = ParserError> { move |span: Span<'a>| { let begin_s = multispace0(span)?.0; let (end_s, res) = parser.parse(span)?; - Ok((end_s, (begin_s.get_location(), end_s.get_location(), res))) + Ok((end_s, (AstSpan::from((begin_s, end_s)), res))) } } pub fn map_with_locaiton<'a, F, O>( mut parser: F, - mut cb: impl FnMut(Location, Location, <F as Parser<Span<'a>>>::Output) -> O, + mut cb: impl FnMut(AstSpan, <F as Parser<Span<'a>>>::Output) -> O, ) -> impl Parser<Span<'a>, Output = O, Error = ParserError> where F: Parser<Span<'a>, Error = ParserError>, @@ -41,7 +41,7 @@ where move |span: Span<'a>| { let begin_s = multispace0(span)?.0; let (end_s, res) = parser.parse(span)?; - Ok((end_s, cb(begin_s.get_location(), end_s.get_location(), res))) + Ok((end_s, cb(AstSpan::from((begin_s, end_s)), res))) } } -- GitLab