From 63890e46d92a00e097f224f4433fc9aff87e8533 Mon Sep 17 00:00:00 2001 From: Mia Date: Mon, 9 Mar 2026 00:52:28 +0100 Subject: [PATCH] Full AST based syntax highlighting --- .gitignore | 1 + Cargo.lock | 19 ++ compiler/src/lib.rs | 7 +- compiler/src/main.rs | 7 +- compiler/src/metadata.rs | 40 +--- compiler/src/scope.rs | 489 ++++++++++++++++++++++----------------- lsp/Cargo.toml | 1 + lsp/src/main.rs | 18 +- lsp/src/parsing.rs | 334 ++++++++++++++++++++++++++ lsp/src/utils.rs | 45 +++- lsp/src/workspace.rs | 400 ++++++++++---------------------- parser/Cargo.toml | 5 + parser/src/ast.rs | 338 ++++++++++++--------------- parser/src/lib.rs | 59 ++++- parser/src/main.rs | 7 +- parser/src/parser.rs | 321 ++++++++++++++++--------- 16 files changed, 1240 insertions(+), 851 deletions(-) mode change 100644 => 100755 compiler/src/scope.rs create mode 100644 lsp/src/parsing.rs diff --git a/.gitignore b/.gitignore index 2f127e3..d259efe 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ target/ *.leaf *.asm +*.bak.rs a.out .zed/ diff --git a/Cargo.lock b/Cargo.lock index 69e6756..4ab44db 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -484,6 +484,7 @@ dependencies = [ "leaf_compiler", "leaf_parser", "rangemap", + "ropey", "rust_search", "scc", "tokio", @@ -498,6 +499,8 @@ dependencies = [ "derive_more", "indexmap", "peg", + "rangemap", + "ropey", ] [[package]] @@ -730,6 +733,16 @@ version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +[[package]] +name = "ropey" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93411e420bcd1a75ddd1dc3caf18c23155eda2c090631a85af21ba19e97093b5" +dependencies = [ + "smallvec", + "str_indices", +] + [[package]] name = "rust_search" version = "2.1.0" @@ -856,6 +869,12 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "str_indices" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d08889ec5408683408db66ad89e0e1f93dff55c73a4ccc71c427d5b277ee47e6" + [[package]] name = "strsim" version = "0.10.0" diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index 838cb4f..2676bc1 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -7,7 +7,10 @@ use leaf_assembly::{ types::Type, values::{AnyConst, AnyValue}, }; -use leaf_parser::{SourceCode, ast}; +use leaf_parser::{ + SourceCode, + ast::{self, AstNode}, +}; use std::{ any::TypeId, collections::{HashMap, VecDeque}, @@ -22,7 +25,7 @@ mod scope; pub type SourceFile = Arc; -type FuncQueue<'l> = VecDeque<(&'l Function<'l>, Arc, Scope<'l>)>; +type FuncQueue<'l> = VecDeque<(&'l Function<'l>, Arc>, Scope<'l>)>; pub struct CompilationContext<'l> { ctx: &'l Context<'l>, diff --git a/compiler/src/main.rs b/compiler/src/main.rs index 1483ccc..4bb64f8 100644 --- a/compiler/src/main.rs +++ b/compiler/src/main.rs @@ -26,10 +26,9 @@ fn main() { }; if let Err(err) = context.extend( ident.clone(), - &[Arc::new(SourceCode { - text: ArcStr::from(std::fs::read_to_string("test.leaf").unwrap()), - file: PathBuf::from("test.leaf"), - })], + &[Arc::new( + SourceCode::from_file_arcstr(&"test.leaf").unwrap(), + )], ) { println!("{:#?}", err); return; diff --git a/compiler/src/metadata.rs b/compiler/src/metadata.rs index 098818c..373f4d1 100644 --- a/compiler/src/metadata.rs +++ b/compiler/src/metadata.rs @@ -1,4 +1,4 @@ -use leaf_parser::{LineCol, SourceCode}; +use leaf_parser::{LineCol, Parse, SourceCode}; use std::{ any::Any, ops::Range, @@ -26,41 +26,9 @@ impl CodePosition { pub fn line_col(&self) -> &Range { self.line_col.get_or_init(|| { - let mut line_col_range = LineCol { - line: 0, - column: 0, - offset: self.range.start, - }..LineCol { - line: 0, - column: 0, - offset: self.range.end, - }; - let mut line = 0; - let mut col = 0; - - for (byte_idx, ch) in self.file.text.char_indices() { - if byte_idx == self.range.start { - line_col_range.start.line = line; - line_col_range.start.column = col; - } - if byte_idx == self.range.end { - line_col_range.end.line = line; - line_col_range.end.column = col; - } - - if ch == '\n' { - line += 1; - col = 0; - } else { - col += 1; - } - } - - if self.range.end == self.file.text.len() { - line_col_range.end.line = line; - line_col_range.end.column = col; - } - line_col_range + let start = self.file.position_repr(self.range.start); + let end = self.file.position_repr(self.range.end); + start..end }) } } diff --git a/compiler/src/scope.rs b/compiler/src/scope.rs old mode 100644 new mode 100755 index 42d3604..17e262a --- a/compiler/src/scope.rs +++ b/compiler/src/scope.rs @@ -15,13 +15,12 @@ use leaf_assembly::{ }; use leaf_parser::{ SourceCode, - ast::{ - self, AccessExpr, BinaryExpr, BinaryOp, Block, ConstDecl, Else, Expr, Ident, If, - IndexingExpr, NamePattern, While, - }, + ast::{self, *}, }; use std::{ + borrow::Cow, collections::HashMap, + ops::Range, sync::{Arc, OnceLock}, }; @@ -79,11 +78,12 @@ impl<'l> Scope<'l> { ); } - pub fn declare_constants(&mut self, decl: &[ConstDecl]) { + pub fn declare_constants(&mut self, decl: &[AstNode]) { for val in decl { - for Ident(name) in val.names.as_slice() { + for range in val.names.as_slice() { + let name = self.get_text_arc(range); self.values.insert( - name.clone(), + name, Variable { value: Arc::default(), }, @@ -94,7 +94,7 @@ impl<'l> Scope<'l> { pub fn define_constants( &mut self, - decl: &[ConstDecl], + decl: &[AstNode], fn_queue: &mut FuncQueue<'l>, ) -> Result<(), Diagnostic> { for val in decl { @@ -107,10 +107,12 @@ impl<'l> Scope<'l> { fn_queue, }, )?; - match &val.names { + match &*val.names { NamePattern::Single(ident) => { + let name = self.get_text(ident).to_string(); + self.values - .get_mut(&ident.0) + .get_mut(&*name) .unwrap() .value .set(expr) @@ -118,7 +120,7 @@ impl<'l> Scope<'l> { self.ctx.emit_event(Event::Definition { value: expr, - position: CodePosition::new(self.source.clone(), ident.range()), + position: CodePosition::new(self.source.clone(), ident.clone()), }); } NamePattern::Tuple(_) => todo!(), @@ -131,7 +133,7 @@ impl<'l> Scope<'l> { pub fn compile_function( &mut self, func: &'l Function<'l>, - block: &Arc, + block: &Arc>, fn_queue: &mut FuncQueue<'l>, ) -> Result<(), Diagnostic> { let mut builder = func.create_body().unwrap(); @@ -154,7 +156,14 @@ impl<'l> Scope<'l> { if ret.is_lvalue() { ret = builder.load(ret).unwrap(); } - self.assert_ty_eq(&ret, &Expr::Block(block.clone()), &func.ty.ret_t)?; + self.assert_ty_eq( + &ret, + &AstNode { + range: block.range.clone(), + node: Expr::Block(block.clone()), + }, + &func.ty.ret_t, + )?; builder.ret(Some(ret)).unwrap(); } }; @@ -166,35 +175,38 @@ impl<'l> Scope<'l> { fn compile_expression( &mut self, - expr: &Expr, + expr: &AstNode, ctx: &mut ExpressionContext<'l, '_>, ) -> Result, Diagnostic> { - match expr { - Expr::Ident(Ident(name)) => match self.values.get(name) { - None => Err(Diagnostic { - kind: Kind::Error, - code: Code::SymbolNotFound, - message: format!("Symbol `{name}` does not exist in the current scope."), - position: CodePosition::new(self.source.clone(), name.range()), - cause: None, - }), - Some(Variable { value, .. }) => match value.get() { + match &**expr { + Expr::Ident(range) => { + let name = self.get_text(range); + match self.values.get(&*name) { None => Err(Diagnostic { kind: Kind::Error, - code: Code::UninitializedSymbol, - message: format!("Symbol `{name}` is not initialized at this time."), - position: CodePosition::new(self.source.clone(), name.range()), + code: Code::SymbolNotFound, + message: format!("Symbol `{name}` does not exist in the current scope."), + position: CodePosition::new(self.source.clone(), range.clone()), cause: None, }), - Some(value) => { - self.ctx.emit_event(Event::Symbol { - value: *value, - position: CodePosition::new(self.source.clone(), name.range()), - }); - Ok(*value) - } - }, - }, + Some(Variable { value, .. }) => match value.get() { + None => Err(Diagnostic { + kind: Kind::Error, + code: Code::UninitializedSymbol, + message: format!("Symbol `{name}` is not initialized at this time."), + position: CodePosition::new(self.source.clone(), range.clone()), + cause: None, + }), + Some(value) => { + self.ctx.emit_event(Event::Symbol { + value: *value, + position: CodePosition::new(self.source.clone(), range.clone()), + }); + Ok(*value) + } + }, + } + } Expr::Func(func) => self .make_function(func, ctx) @@ -203,7 +215,7 @@ impl<'l> Scope<'l> { kind: Kind::Error, code: Code::FunctionCompilationFailed, message: "Could not compile function.".to_string(), - position: CodePosition::new(self.source.clone(), func.text.range()), + position: CodePosition::new(self.source.clone(), expr.range.clone()), cause: Some(Box::new(err)), }), @@ -212,23 +224,25 @@ impl<'l> Scope<'l> { Expr::Number(n) => { macro_rules! parse_number { - ($ty: ty, $id: ident) => { - match n.text.split_at_checked(2) { + ($ty: ty, $id: ident) => {{ + let text = self.get_text(&n.number); + match text.split_at_checked(2) { Some(("0b", value)) => <$ty>::from_str_radix(value, 2), Some(("0x", value)) => <$ty>::from_str_radix(value, 16), - _ => n.text.parse::<$ty>(), + _ => text.parse::<$ty>(), } .map(|v| AnyValue::Constant(AnyConst::Int(Int::$id(v)))) .map_err(|_| Diagnostic { kind: Kind::Error, code: Code::InvalidInteger, - message: format!("`{}` is not a valid integer.", n.text), - position: CodePosition::new(self.source.clone(), n.text.range()), + message: format!("`{}` is not a valid integer.", text), + position: CodePosition::new(self.source.clone(), n.number.clone()), cause: None, }) - }; + }}; } - let value = match n.r#type.as_ref().map(|v| v.as_str()) { + let ty = n.ty.as_ref().map(|v| self.get_text(v)); + let value = match ty.as_ref().map(|ty| &**ty) { None if ctx.type_hint == Some(Type::I8) => parse_number!(i8, I8), None if ctx.type_hint == Some(Type::I16) => parse_number!(i16, I16), None if ctx.type_hint == Some(Type::I32) => parse_number!(i32, I32), @@ -259,24 +273,25 @@ impl<'l> Scope<'l> { kind: Kind::Error, code: Code::InvalidIntegerType, message: format!("`{ty}` is not a valid integer type."), - position: CodePosition::new(self.source.clone(), n.text.range()), + position: CodePosition::new(self.source.clone(), n.ty.clone().unwrap()), cause: None, }), }?; self.ctx.emit_event(Event::Symbol { value: value, - position: CodePosition::new(self.source.clone(), n.text.range()), + position: CodePosition::new(self.source.clone(), n.number.clone()), }); Ok(value) } - Expr::Access(expr) => { - let AccessExpr { - value: value_expr, - field, - } = &**expr; + Expr::Access { + value: value_expr, + operator, + field, + } => { let value = self.compile_expression(value_expr, ctx)?; - if let Some(value) = value.get_associated_value(&field.0) { + let name = self.get_text(field); + if let Some(value) = value.get_associated_value(&*name) { return Ok(value); } @@ -284,7 +299,7 @@ impl<'l> Scope<'l> { match value.ty() { Type::Struct(StructT { fields, .. }) => { if let Some(fields) = fields.get() { - if let Some(field) = fields.get(field.0.as_str()) { + if let Some(field) = fields.get(&*name) { let builder = ctx.builder.as_mut().unwrap(); break 'value Some( builder @@ -301,7 +316,7 @@ impl<'l> Scope<'l> { .. }) => { if let Some(fields) = fields.get() { - if let Some(field) = fields.get(field.0.as_str()) { + if let Some(field) = fields.get(&*name) { let builder = ctx.builder.as_mut().unwrap(); let inst = builder .get_element_ptr(value, field.name.as_any_value()) @@ -326,7 +341,7 @@ impl<'l> Scope<'l> { if let Some(value) = value { self.ctx.emit_event(Event::Symbol { value: value, - position: CodePosition::new(self.source.clone(), field.range()), + position: CodePosition::new(self.source.clone(), field.clone()), }); return Ok(value); } @@ -334,25 +349,23 @@ impl<'l> Scope<'l> { return Err(Diagnostic { kind: Kind::Error, code: Code::FieldNotFound, - message: format!("Value does not contain field `{}`.", field.0), - position: CodePosition::new(self.source.clone(), field.range()), + message: format!("Value does not contain field `{name}`."), + position: CodePosition::new(self.source.clone(), field.clone()), cause: None, }); } - Expr::Binary(bin_expr) => { - let BinaryExpr { - lhs: lhs_expr, - rhs: rhs_expr, - op, - } = &**bin_expr; - + Expr::Binary { + lhs: lhs_expr, + rhs: rhs_expr, + operator, + } => { let mut lhs = self.compile_expression(lhs_expr, ctx)?; let type_hint = if lhs.is_lvalue() { let Type::Ptr(PtrT { base, .. }) = lhs.ty() else { unreachable!(); }; - if !matches!(op, BinaryOp::Assign(_)) { + if !matches!(operator.node, BinaryOperator::Assign) { lhs = ctx.builder.as_mut().unwrap().load(lhs).unwrap(); } *base @@ -372,7 +385,7 @@ impl<'l> Scope<'l> { const exact $([$($ty:ident),*] $op:pat => |$a:ident, $b:ident| $expr:expr,)* ) => { - match op { + match operator.node { $( $op => match (lhs, rhs) { $( @@ -391,7 +404,7 @@ impl<'l> Scope<'l> { const auto $([$($ty:ident),*] $op:pat => |$a:ident, $b:ident| $expr:expr,)* ) => { - match op { + match operator.node { $( $op => match (lhs, rhs) { $( @@ -411,20 +424,20 @@ impl<'l> Scope<'l> { if lhs.is_const() && rhs.is_const() { int_bin_ops! { const exact - [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOp::Add(_) => |a, b| a + b, - [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOp::Sub(_) => |a, b| a - b, - [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOp::Mul(_) => |a, b| a - b, - [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOp::Div(_) => |a, b| a - b, - [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOp::Mod(_) => |a, b| a - b, + [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOperator::Add => |a, b| a + b, + [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOperator::Sub => |a, b| a - b, + [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOperator::Mul => |a, b| a - b, + [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOperator::Div => |a, b| a - b, + [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOperator::Mod => |a, b| a - b, } int_bin_ops! { const auto - [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOp::Eq(_) => |a, b| a == b, - [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOp::Ne(_) => |a, b| a == b, - [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOp::Lt(_) => |a, b| a == b, - [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOp::Gt(_) => |a, b| a == b, - [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOp::Le(_) => |a, b| a == b, - [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOp::Ge(_) => |a, b| a == b, + [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOperator::Eq => |a, b| a == b, + [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOperator::Ne => |a, b| a == b, + [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOperator::Lt => |a, b| a == b, + [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOperator::Gt => |a, b| a == b, + [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOperator::Le => |a, b| a == b, + [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, ISize, USize] BinaryOperator::Ge => |a, b| a == b, } } @@ -434,19 +447,19 @@ impl<'l> Scope<'l> { (Type::Int(a_ty), Type::Int(b_ty)) => a_ty == b_ty, _ => false, } { - return Ok(match op { - BinaryOp::Add(_) => builder.add(lhs, rhs).unwrap(), - BinaryOp::Sub(_) => builder.sub(lhs, rhs).unwrap(), - BinaryOp::Mul(_) => builder.mul(lhs, rhs).unwrap(), - BinaryOp::Div(_) => builder.div(lhs, rhs).unwrap(), - BinaryOp::Mod(_) => builder.modulo(lhs, rhs).unwrap(), - BinaryOp::Eq(_) => builder.cmp(lhs, rhs, Cmp::Eq).unwrap(), - BinaryOp::Ne(_) => builder.cmp(lhs, rhs, Cmp::Ne).unwrap(), - BinaryOp::Lt(_) => builder.cmp(lhs, rhs, Cmp::Lt).unwrap(), - BinaryOp::Gt(_) => builder.cmp(lhs, rhs, Cmp::Gt).unwrap(), - BinaryOp::Le(_) => builder.cmp(lhs, rhs, Cmp::Le).unwrap(), - BinaryOp::Ge(_) => builder.cmp(lhs, rhs, Cmp::Ge).unwrap(), - _ => todo!("{lhs:?} {op:?} {rhs:?}"), + return Ok(match operator.node { + BinaryOperator::Add => builder.add(lhs, rhs).unwrap(), + BinaryOperator::Sub => builder.sub(lhs, rhs).unwrap(), + BinaryOperator::Mul => builder.mul(lhs, rhs).unwrap(), + BinaryOperator::Div => builder.div(lhs, rhs).unwrap(), + BinaryOperator::Mod => builder.modulo(lhs, rhs).unwrap(), + BinaryOperator::Eq => builder.cmp(lhs, rhs, Cmp::Eq).unwrap(), + BinaryOperator::Ne => builder.cmp(lhs, rhs, Cmp::Ne).unwrap(), + BinaryOperator::Lt => builder.cmp(lhs, rhs, Cmp::Lt).unwrap(), + BinaryOperator::Gt => builder.cmp(lhs, rhs, Cmp::Gt).unwrap(), + BinaryOperator::Le => builder.cmp(lhs, rhs, Cmp::Le).unwrap(), + BinaryOperator::Ge => builder.cmp(lhs, rhs, Cmp::Ge).unwrap(), + _ => todo!("{lhs:?} {:?} {rhs:?}", operator.node), }); } @@ -456,26 +469,27 @@ impl<'l> Scope<'l> { } { let lhs = builder.ptr_to_int(lhs, IntT::USIZE).unwrap(); let rhs = builder.ptr_to_int(rhs, IntT::USIZE).unwrap(); - return Ok(match op { - BinaryOp::Eq(_) => builder.cmp(lhs, rhs, Cmp::Eq).unwrap(), - BinaryOp::Ne(_) => builder.cmp(lhs, rhs, Cmp::Ne).unwrap(), - BinaryOp::Lt(_) => builder.cmp(lhs, rhs, Cmp::Lt).unwrap(), - BinaryOp::Gt(_) => builder.cmp(lhs, rhs, Cmp::Gt).unwrap(), - BinaryOp::Le(_) => builder.cmp(lhs, rhs, Cmp::Le).unwrap(), - BinaryOp::Ge(_) => builder.cmp(lhs, rhs, Cmp::Ge).unwrap(), - _ => todo!("{lhs:?} {op:?} {rhs:?}"), + return Ok(match operator.node { + BinaryOperator::Eq => builder.cmp(lhs, rhs, Cmp::Eq).unwrap(), + BinaryOperator::Ne => builder.cmp(lhs, rhs, Cmp::Ne).unwrap(), + BinaryOperator::Lt => builder.cmp(lhs, rhs, Cmp::Lt).unwrap(), + BinaryOperator::Gt => builder.cmp(lhs, rhs, Cmp::Gt).unwrap(), + BinaryOperator::Le => builder.cmp(lhs, rhs, Cmp::Le).unwrap(), + BinaryOperator::Ge => builder.cmp(lhs, rhs, Cmp::Ge).unwrap(), + _ => todo!("{lhs:?} {:?} {rhs:?}", operator.node), }); } - match (lhs_ty, rhs_ty, op) { - (Type::Ptr(ptr @ PtrT { base, .. }), Type::USIZE, BinaryOp::Add(_)) => { + match (lhs_ty, rhs_ty, operator.node) { + (Type::Ptr(ptr @ PtrT { base, .. }), Type::USIZE, BinaryOperator::Add) => { let mut value = builder.ptr_to_int(lhs, IntT::USIZE).unwrap(); let add = builder.mul(rhs, AnyConst::SizeOf(*base).into()).unwrap(); value = builder.add(value, add).unwrap(); value = builder.int_to_ptr(value, ptr).unwrap(); Ok(value) } - (Type::Ptr(PtrT { base, .. }), ty, BinaryOp::Assign(_)) => match *base == ty { + (Type::Ptr(PtrT { base, .. }), ty, BinaryOperator::Assign) => match *base == ty + { true => Ok(builder.store(lhs, rhs).unwrap()), false => Err(Diagnostic { kind: Kind::Error, @@ -483,16 +497,16 @@ impl<'l> Scope<'l> { message: format!( "Cannot assign a value of type `{ty}` to a value of type `{base}`." ), - position: CodePosition::new(self.source.clone(), expr.range()), + position: CodePosition::new(self.source.clone(), expr.range.clone()), cause: None, }), }, - (src_ty, Type::Type, BinaryOp::Cast(_)) => { + (src_ty, Type::Type, BinaryOperator::Cast) => { let dst_ty = self.assert_ty(rhs, rhs_expr).map_err(|err| Diagnostic { kind: Kind::Error, code: Code::InvalidCast, message: "Cannot perform cast.".to_string(), - position: CodePosition::new(self.source.clone(), expr.range()), + position: CodePosition::new(self.source.clone(), expr.range.clone()), cause: Some(Box::new(err)), })?; if src_ty == dst_ty { @@ -514,14 +528,17 @@ impl<'l> Scope<'l> { _ => todo!("{src_ty} as {dst_ty}"), } } - (a, b, _) => todo!("{a} {op:?} {b} | {lhs:?} {op:?} {rhs:?}"), + (a, b, _) => todo!( + "{a} {op:?} {b} | {lhs:?} {op:?} {rhs:?}", + op = operator.node, + ), } } Expr::If(expr) => self.compile_if(expr, ctx), Expr::While(expr) => { - let While { cond, block } = &**expr; + let While { cond, block, .. } = &**expr; let mut builder = ctx.builder.as_mut().unwrap(); let cond_block = builder.create_block(); @@ -555,7 +572,7 @@ impl<'l> Scope<'l> { kind: Kind::Error, code: Code::NotAFunction, message: "Value is not a function".into(), - position: CodePosition::new(self.source.clone(), func.range()), + position: CodePosition::new(self.source.clone(), func.range.clone()), cause: None, }); } @@ -573,86 +590,84 @@ impl<'l> Scope<'l> { let builder = ctx.builder.as_mut().unwrap(); Ok(builder.call(func, args).unwrap()) } - - Expr::Type(ty_expr) => match &**ty_expr { - ast::Type::Struct(ast::Struct { fields }) => { - let name = match &ctx.decl_names { - Some(NamePattern::Single(func_name)) => func_name.0.as_str(), - _ => "", - }; - let struct_ty = self.assembly.create_struct(name); - let mut scope = self.clone(); - let mut expr_ctx = ExpressionContext { - builder: None, - decl_names: None, - type_hint: Some(Type::Type), - fn_queue: ctx.fn_queue, - }; - let ctx = self.assembly.ctx(); - - scope.insert(literal_substr!("Self"), struct_ty.as_any_value()); - let mut field_map = FieldMap::default(); - for ast::Field { - name, - ty: ty_expr, - public, - mutable, - } in fields - { - let ty = scope.compile_expression(ty_expr, &mut expr_ctx)?; - let name = ctx.intern_str(&name.0); - field_map.insert( - name, - Field { - name, - ty: self.assert_ty(ty, ty_expr)?, - public: public.is_some(), - mutable: mutable.is_some(), - }, - ); - } - struct_ty.fields.set(field_map).unwrap(); - Ok(struct_ty.as_any_value()) + Expr::Ptr { mutable, base, .. } => match self.compile_expression(base, ctx)? { + AnyValue::Constant(AnyConst::Type(ty)) => { + Ok(AnyConst::Type(Type::Ptr(ty.make_ptr(mutable.is_some()))).into()) } - ast::Type::Ptr { base, mutable } => match self.compile_expression(base, ctx)? { - AnyValue::Constant(AnyConst::Type(ty)) => { - Ok(AnyConst::Type(Type::Ptr(ty.make_ptr(mutable.is_some()))).into()) + AnyValue::Instruction(inst) if inst.is_lvalue() => { + let Type::Ptr(PtrT { + base, + mutable: is_mut, + .. + }) = inst.ty() + else { + unreachable!() + }; + if mutable.is_some() && !*is_mut { + return Err(Diagnostic { + kind: Kind::Error, + code: Code::NotAFunction, + message: "Cannot obtain a mutable pointer to an immutable value." + .into(), + position: CodePosition::new(self.source.clone(), expr.range.clone()), + cause: None, + }); } - AnyValue::Instruction(inst) if inst.is_lvalue() => { - let Type::Ptr(PtrT { - base, - mutable: is_mut, - .. - }) = inst.ty() - else { - unreachable!() - }; - if mutable.is_some() && !*is_mut { - return Err(Diagnostic { - kind: Kind::Error, - code: Code::NotAFunction, - message: "Cannot obtain a mutable pointer to an immutable value." - .into(), - position: CodePosition::new(self.source.clone(), expr.range()), - cause: None, - }); - } - let mut flags = inst.flags(); - let builder = ctx.builder.as_mut().unwrap(); - flags.remove( - ValueFlags::Mutable | ValueFlags::Volatile | ValueFlags::LValue, - ); - let ptr = Type::Ptr(base.make_ptr(mutable.is_some())); - unsafe { - Ok(builder - .reinterpret(AnyValue::Instruction(inst), ptr, flags) - .unwrap()) - } + let mut flags = inst.flags(); + let builder = ctx.builder.as_mut().unwrap(); + flags.remove(ValueFlags::Mutable | ValueFlags::Volatile | ValueFlags::LValue); + let ptr = Type::Ptr(base.make_ptr(mutable.is_some())); + unsafe { + Ok(builder + .reinterpret(AnyValue::Instruction(inst), ptr, flags) + .unwrap()) } - v => todo!("{v:?}"), - }, - v => todo!("{v:#?}"), + } + v => todo!("{v:?}"), }, + Expr::Struct { fields, .. } => { + let name = match &ctx.decl_names { + Some(NamePattern::Single(func_name)) => &*self.get_text(func_name), + _ => "", + }; + let struct_ty = self.assembly.create_struct(name); + let mut scope = self.clone(); + let mut expr_ctx = ExpressionContext { + builder: None, + decl_names: None, + type_hint: Some(Type::Type), + fn_queue: ctx.fn_queue, + }; + let ctx = self.assembly.ctx(); + + scope.insert(literal_substr!("Self"), struct_ty.as_any_value()); + let mut field_map = FieldMap::default(); + for AstNode { + range, + node: + ast::Field { + name, + ty: ty_expr, + public, + mutable, + }, + } in fields + { + let ty = scope.compile_expression(ty_expr, &mut expr_ctx)?; + let name = ctx.intern_str(&self.get_text(name)); + field_map.insert( + name, + Field { + name, + ty: self.assert_ty(ty, ty_expr)?, + public: public.is_some(), + mutable: mutable.is_some(), + }, + ); + } + struct_ty.fields.set(field_map).unwrap(); + Ok(struct_ty.as_any_value()) + } Expr::List(expr) => { let mut expr = expr.iter(); @@ -683,7 +698,7 @@ impl<'l> Scope<'l> { todo!() } - Expr::Deref(expr) => { + Expr::Deref { value: expr, .. } => { let value = self.compile_expression(expr, ctx)?; let builder = ctx.builder.as_mut().unwrap(); let ty = value.ty(); @@ -695,7 +710,10 @@ impl<'l> Scope<'l> { kind: Kind::Error, code: Code::CannotDereference, message: format!("Cannot dereference a value of type `{ty}`."), - position: CodePosition::new(self.source.clone(), expr.range()), + position: CodePosition::new( + self.source.clone(), + expr.range.clone(), + ), cause: None, }); } @@ -719,7 +737,10 @@ impl<'l> Scope<'l> { kind: Kind::Error, code: Code::CannotDereference, message: format!("Cannot dereference a value of type `{base}`."), - position: CodePosition::new(self.source.clone(), expr.range()), + position: CodePosition::new( + self.source.clone(), + expr.range.clone(), + ), cause: None, }); } @@ -733,10 +754,12 @@ impl<'l> Scope<'l> { } } - Expr::Index(expr) => { - let IndexingExpr { value, index } = &**expr; + Expr::Index { + value, + index: index_expr, + } if index_expr.len() == 1 => { let mut value = self.compile_expression(value, ctx)?; - let mut index = self.compile_expression(index, ctx)?; + let mut index = self.compile_expression(&index_expr[0], ctx)?; let builder = ctx.builder.as_mut().unwrap(); if index.is_lvalue() { @@ -749,7 +772,10 @@ impl<'l> Scope<'l> { kind: Kind::Error, code: Code::InvalidType, message: "Value is not of type `usize`".into(), - position: CodePosition::new(self.source.clone(), expr.index.range()), + position: CodePosition::new( + self.source.clone(), + index_expr[0].range.clone(), + ), cause: None, }); } @@ -777,8 +803,11 @@ impl<'l> Scope<'l> { todo!("{:#?}", value.ty()); } - Expr::Struct(ctor) => { - let ty = match &ctor.r#type { + Expr::StructCtor { + ty: ctor_ty, + values: ctor_values, + } => { + let ty = match ctor_ty { Some(ty) => self.compile_expression(ty, ctx)?, None => match ctx.type_hint { Some(ty) => AnyValue::Constant(AnyConst::Type(ty)), @@ -787,7 +816,10 @@ impl<'l> Scope<'l> { kind: Kind::Error, code: Code::CannotInferType, message: "Type cannot be inferred.".into(), - position: CodePosition::new(self.source.clone(), ctor.range()), + position: CodePosition::new( + self.source.clone(), + expr.range.clone(), + ), cause: None, }); } @@ -803,9 +835,9 @@ impl<'l> Scope<'l> { message: format!("Expected struct type, got value of type `{}`.", ty.ty()), position: CodePosition::new( self.source.clone(), - match &ctor.r#type { - None => ctor.range(), - Some(ty) => ty.range(), + match ctor_ty { + None => expr.range.clone(), + Some(ty) => ty.range.clone(), }, ), cause: None, @@ -819,12 +851,18 @@ impl<'l> Scope<'l> { name: fld_name, ty, .. } in fields.values() { - let Some(name_value_pair) = ctor.values.get(*fld_name) else { + let Some(name_value_pair) = ctor_values.iter().find_map(|v| { + let name = self.get_text(&v.name); + match name == *fld_name { + true => Some(&v.node), + false => None, + } + }) else { return Err(Diagnostic { kind: Kind::Error, code: Code::UninitializedField, message: format!("Uninitialized field `{fld_name}`."), - position: CodePosition::new(self.source.clone(), expr.range()), + position: CodePosition::new(self.source.clone(), expr.range.clone()), cause: None, }); }; @@ -862,14 +900,17 @@ impl<'l> Scope<'l> { fn make_function( &mut self, - ast: &Arc, + ast: &Arc>, ctx: &mut ExpressionContext<'l, '_>, ) -> Result<&'l Function<'l>, Diagnostic> { let ret_ty = match ast.ret.as_ref() { None => AnyValue::Constant(AnyConst::Type(Type::Void)), Some(ty) => self.compile_expression(ty, ctx)?, }; - let ast_as_expr = Expr::Func(ast.clone()); + let ast_as_expr = AstNode { + range: ast.range.clone(), + node: Expr::Func(ast.clone()), + }; let ret_ty = self.assert_ty(ret_ty, ast.ret.as_ref().unwrap_or(&ast_as_expr))?; let mut par_ty = Vec::with_capacity(ast.args.len()); for arg in &ast.args { @@ -880,7 +921,7 @@ impl<'l> Scope<'l> { let fn_ty = ret_ty.make_fn(par_ty); let name = match &ctx.decl_names { - Some(NamePattern::Single(func_name)) => func_name.0.as_str(), + Some(NamePattern::Single(func_name)) => &*self.get_text(func_name), _ => "", }; let func = self.assembly.create_function(fn_ty, name); @@ -890,7 +931,8 @@ impl<'l> Scope<'l> { let mut scope = self.clone(); for (i, arg) in ast.args.iter().enumerate() { - scope.insert(arg.name.0.clone(), AnyValue::Parameter(i, func)); + let name = self.get_text_arc(&arg.name); + scope.insert(name, AnyValue::Parameter(i, func)); } ctx.fn_queue.push_back((func, block.clone(), scope)); @@ -901,7 +943,7 @@ impl<'l> Scope<'l> { fn compile_decl( &mut self, names: &NamePattern, - value: &Expr, + value: &AstNode, mutable: bool, ctx: &mut ExpressionContext<'l, '_>, ) -> Result, Diagnostic> { @@ -923,8 +965,9 @@ impl<'l> Scope<'l> { } match names { NamePattern::Single(ident) => { + let name = self.get_text_arc(ident); self.values.insert( - ident.0.clone(), + name, Variable { value: Arc::new(OnceLock::from(value)), }, @@ -932,7 +975,7 @@ impl<'l> Scope<'l> { self.ctx.emit_event(Event::Definition { value: value, - position: CodePosition::new(self.source.clone(), ident.range()), + position: CodePosition::new(self.source.clone(), ident.clone()), }); } NamePattern::Tuple(_) => todo!(), @@ -970,7 +1013,9 @@ impl<'l> Scope<'l> { expr: &If, ctx: &mut ExpressionContext<'l, '_>, ) -> Result, Diagnostic> { - let If { cond, block, else_ } = expr; + let If { + cond, block, else_, .. + } = expr; let condition = self.compile_expression(cond, ctx)?; self.assert_ty_eq(&condition, cond, &Type::Bool)?; @@ -997,8 +1042,8 @@ impl<'l> Scope<'l> { builder.set_current_block(else_block); let else_val = match &**else_ { - Else::Block(block) => self.compile_block(block, ctx)?, - Else::If(if_) => self.compile_if(if_, ctx)?, + Else::Block { expr, .. } => self.compile_block(expr, ctx)?, + Else::If { expr, .. } => self.compile_if(expr, ctx)?, }; let builder = ctx.builder.as_mut().unwrap(); @@ -1027,14 +1072,18 @@ impl<'l> Scope<'l> { .into()) } - fn assert_ty(&self, val: AnyValue<'l>, value_expr: &Expr) -> Result, Diagnostic> { + fn assert_ty( + &self, + val: AnyValue<'l>, + value_expr: &AstNode, + ) -> Result, Diagnostic> { match val { AnyValue::Constant(AnyConst::Type(ty)) => Ok(ty), _ => Err(Diagnostic { kind: Kind::Error, code: Code::NotAType, message: "Value is not a type.".to_string(), - position: CodePosition::new(self.source.clone(), value_expr.range()), + position: CodePosition::new(self.source.clone(), value_expr.range.clone()), cause: None, }), } @@ -1043,7 +1092,7 @@ impl<'l> Scope<'l> { pub fn assert_ty_eq( &self, value: &AnyValue<'l>, - value_expr: &Expr, + value_expr: &AstNode, expected: &Type<'l>, ) -> Result, Diagnostic> { let value_ty = value.ty(); @@ -1054,10 +1103,24 @@ impl<'l> Scope<'l> { kind: Kind::Error, code: Code::InvalidType, message: format!("Expected value of type `{expected}`, found `{value_ty}`."), - position: CodePosition::new(self.source.clone(), value_expr.range()), + position: CodePosition::new(self.source.clone(), value_expr.range.clone()), cause: None, }); } } } + + fn get_text(&self, range: &Range) -> Cow<'_, str> { + match &self.source.text { + leaf_parser::Text::ArcStr(arc_str, _) => { + Cow::Borrowed(&arc_str.as_str()[range.clone()]) + } + } + } + + fn get_text_arc(&self, range: &Range) -> Substr { + match &self.source.text { + leaf_parser::Text::ArcStr(arc_str, _) => arc_str.substr(range.clone()), + } + } } diff --git a/lsp/Cargo.toml b/lsp/Cargo.toml index bf83b99..37be73a 100644 --- a/lsp/Cargo.toml +++ b/lsp/Cargo.toml @@ -14,3 +14,4 @@ scc = "3.6.9" rust_search = "2.1.0" rangemap = "1.7.1" boxcar = "0.2.14" +ropey = "1.6.1" diff --git a/lsp/src/main.rs b/lsp/src/main.rs index 7eb459b..b264106 100644 --- a/lsp/src/main.rs +++ b/lsp/src/main.rs @@ -1,3 +1,4 @@ +use crate::parsing::semantic_tokens; use crate::utils::UriUtils; use crate::workspace::{Workspace, start_workspace_thread}; use std::sync::Arc; @@ -6,6 +7,8 @@ use tower_lsp_server::jsonrpc::Result; use tower_lsp_server::ls_types::request::{GotoDeclarationParams, GotoDeclarationResponse}; use tower_lsp_server::{Client, LspService, Server}; use tower_lsp_server::{LanguageServer, ls_types::*}; + +mod parsing; mod utils; mod workspace; @@ -36,6 +39,7 @@ impl LanguageServer for Backend { save: Some(TextDocumentSyncSaveOptions::SaveOptions(SaveOptions { include_text: Some(false), })), + change: Some(TextDocumentSyncKind::INCREMENTAL), ..Default::default() }, )), @@ -43,15 +47,11 @@ impl LanguageServer for Backend { SemanticTokensServerCapabilities::SemanticTokensOptions( SemanticTokensOptions { legend: SemanticTokensLegend { - token_types: vec![ - SemanticTokenType::TYPE, - SemanticTokenType::FUNCTION, - SemanticTokenType::NUMBER, - ], + token_types: semantic_tokens::TOKENS.to_vec(), token_modifiers: vec![], }, full: Some(SemanticTokensFullOptions::Bool(true)), - range: None, + range: Some(false), ..Default::default() }, ), @@ -104,6 +104,12 @@ impl LanguageServer for Backend { Ok(None) } + async fn did_change(&self, params: DidChangeTextDocumentParams) { + if let Some(w) = self.find_workspace(¶ms.text_document.uri).await { + w.file_changed(params).await; + } + } + async fn did_save(&self, params: DidSaveTextDocumentParams) { if let Some(w) = self.find_workspace(¶ms.text_document.uri).await { w.reload().await; diff --git a/lsp/src/parsing.rs b/lsp/src/parsing.rs new file mode 100644 index 0000000..d0755f9 --- /dev/null +++ b/lsp/src/parsing.rs @@ -0,0 +1,334 @@ +use crate::utils::{correct_semantic_token_deltas, make_diagnostics}; +use leaf_compiler::metadata::CodePosition; +use leaf_parser::{SourceCode, Text, ast::*}; +use std::{sync::Arc, time::Instant}; +use tower_lsp_server::ls_types::{Diagnostic, SemanticToken, SemanticTokens, SemanticTokensResult}; + +pub mod semantic_tokens { + use tower_lsp_server::ls_types::SemanticTokenType; + + pub const TOKENS: &[SemanticTokenType] = &[ + SemanticTokenType::KEYWORD, + SemanticTokenType::VARIABLE, + SemanticTokenType::TYPE, + SemanticTokenType::FUNCTION, + SemanticTokenType::NUMBER, + SemanticTokenType::PARAMETER, + SemanticTokenType::PROPERTY, + SemanticTokenType::OPERATOR, + ]; + pub const KEYWORD: u32 = 0; + pub const VARIABLE: u32 = 1; + pub const TYPE: u32 = 2; + pub const FUNCTION: u32 = 3; + pub const NUMBER: u32 = 4; + pub const PARAMETER: u32 = 5; + pub const PROPERTY: u32 = 6; + pub const OPERATOR: u32 = 7; +} + +pub struct DocumentParsingResult { + pub tokens: SemanticTokensResult, + pub diagnostics: Vec, +} + +pub fn parse_document(code: &Arc) -> DocumentParsingResult { + let now = Instant::now(); + let ast = match code.ast() { + Ok(d) => d, + Err(err) => { + return DocumentParsingResult { + tokens: SemanticTokensResult::Tokens(SemanticTokens::default()), + diagnostics: make_diagnostics( + &leaf_compiler::diagnostics::Diagnostic::parsing_err(code.clone(), err.clone()), + ), + }; + } + }; + + let mut tokens = SemanticTokens::default(); + for decl in &ast.decls { + decl.push_semantic_tokens(code, &mut tokens, None); + } + correct_semantic_token_deltas(&mut tokens); + + let size = match &code.text { + // #[cfg(feature = "rope")] + // Text::Rope(rope) => rope.byte_len(), + Text::ArcStr(arc_str, _) => arc_str.len(), + }; + + eprintln!( + "Parsed {:?} ({}KB) in {:?}", + code.file, + size as f32 / 1000.0, + now.elapsed() + ); + DocumentParsingResult { + tokens: SemanticTokensResult::Tokens(tokens), + diagnostics: vec![], + } +} + +macro_rules! push_token { + ($file: expr, $tokens: expr, $range: expr, $type: expr) => {{ + let position = CodePosition::new($file.clone(), $range.clone()); + let line_col = position.line_col(); + $tokens.data.push(SemanticToken { + delta_line: line_col.start.line as u32, + delta_start: line_col.start.column as u32, + length: position.range.len() as u32, + token_type: $type, + token_modifiers_bitset: 0, + }); + }}; +} + +trait PushSemanticTokens { + fn push_semantic_tokens( + &self, + file: &Arc, + tokens: &mut SemanticTokens, + hint: Option, + ); +} + +impl PushSemanticTokens for AstNode { + fn push_semantic_tokens( + &self, + file: &Arc, + tokens: &mut SemanticTokens, + hint: Option, + ) { + match self.names.as_slice() { + [ident] => { + push_token!( + file, + tokens, + ident, + match &self.value.node { + Expr::Func(_) => semantic_tokens::FUNCTION, + Expr::Ptr { .. } => semantic_tokens::TYPE, + Expr::Struct { .. } => semantic_tokens::TYPE, + _ => semantic_tokens::VARIABLE, + } + ); + } + _ => {} + } + self.value.push_semantic_tokens(file, tokens, hint); + } +} + +impl PushSemanticTokens for AstNode { + fn push_semantic_tokens( + &self, + file: &Arc, + tokens: &mut SemanticTokens, + hint: Option, + ) { + match &self.node { + Expr::Ident(range) => { + push_token!( + file, + tokens, + range, + hint.unwrap_or(semantic_tokens::VARIABLE) + ); + } + Expr::Number(_) => { + push_token!(file, tokens, self.range, semantic_tokens::NUMBER); + } + Expr::If(expr) => expr.push_semantic_tokens(file, tokens, None), + Expr::While(expr) => expr.push_semantic_tokens(file, tokens, None), + Expr::Func(func) => { + let Function { + fn_tok, + args, + ret, + block, + .. + } = &func.node; + push_token!(file, tokens, fn_tok, semantic_tokens::KEYWORD); + for AstNode { + node: Parameter { name, value }, + .. + } in args + { + push_token!(file, tokens, name, semantic_tokens::PARAMETER); + value.push_semantic_tokens(file, tokens, Some(semantic_tokens::TYPE)); + } + if let Some(ret) = ret { + push_token!(file, tokens, ret.range, semantic_tokens::TYPE); + } + if let Some(block) = block { + block.push_semantic_tokens(file, tokens, hint); + } + } + Expr::VarDecl(expr) => { + let VarDecl { + names, + r#type, + value, + } = &**expr; + for range in names.as_slice() { + push_token!(file, tokens, range, semantic_tokens::VARIABLE); + } + if let Some(ty) = r#type { + ty.push_semantic_tokens(file, tokens, Some(semantic_tokens::TYPE)); + } + value.push_semantic_tokens(file, tokens, hint); + } + Expr::ConstDecl(expr) => { + let ConstDecl { + names, + r#type, + value, + } = &**expr; + for range in names.as_slice() { + push_token!(file, tokens, range, semantic_tokens::VARIABLE); + } + if let Some(ty) = r#type { + ty.push_semantic_tokens(file, tokens, Some(semantic_tokens::TYPE)); + } + value.push_semantic_tokens(file, tokens, hint); + } + Expr::Ptr { mutable, base, .. } => { + if let Some(mutable) = mutable { + push_token!(file, tokens, mutable, semantic_tokens::KEYWORD); + } + base.push_semantic_tokens(file, tokens, hint); + } + Expr::Struct { fields, struct_tok } => { + push_token!(file, tokens, struct_tok, semantic_tokens::KEYWORD); + for AstNode { + node: Field { + name, + public, + mutable, + ty, + }, + .. + } in fields + { + if let Some(public) = public { + push_token!(file, tokens, public, semantic_tokens::KEYWORD); + } + if let Some(mutable) = mutable { + push_token!(file, tokens, mutable, semantic_tokens::KEYWORD); + } + push_token!(file, tokens, name, semantic_tokens::PROPERTY); + ty.push_semantic_tokens(file, tokens, Some(semantic_tokens::TYPE)); + } + } + Expr::StructCtor { ty, values } => { + if let Some(ty) = ty { + push_token!(file, tokens, ty.range, semantic_tokens::TYPE); + } + for value in values { + value.push_semantic_tokens(file, tokens, None); + } + } + Expr::Deref { value, operator } => { + value.push_semantic_tokens(file, tokens, None); + push_token!(file, tokens, operator, semantic_tokens::OPERATOR); + } + Expr::Binary { + lhs, + operator: AstNode { + node: BinaryOperator::Cast, + range, + }, + rhs, + } => { + lhs.push_semantic_tokens(file, tokens, None); + push_token!(file, tokens, range, semantic_tokens::KEYWORD); + rhs.push_semantic_tokens(file, tokens, Some(semantic_tokens::TYPE)); + } + Expr::Binary { + lhs, + rhs, + operator: AstNode { range, .. }, + } => { + lhs.push_semantic_tokens(file, tokens, None); + rhs.push_semantic_tokens(file, tokens, None); + push_token!(file, tokens, range, semantic_tokens::OPERATOR); + } + Expr::Access { value, field, .. } => { + push_token!(file, tokens, value.range, semantic_tokens::VARIABLE); + push_token!(file, tokens, field, semantic_tokens::PROPERTY); + value.push_semantic_tokens(file, tokens, None); + } + Expr::Call { func, args } => { + push_token!(file, tokens, func.range, semantic_tokens::FUNCTION); + for arg in args.iter() { + arg.push_semantic_tokens(file, tokens, None); + } + } + _ => {} + } + } +} + +impl PushSemanticTokens for Block { + fn push_semantic_tokens( + &self, + file: &Arc, + tokens: &mut SemanticTokens, + _: Option, + ) { + for expr in &self.0 { + expr.push_semantic_tokens(file, tokens, None); + } + } +} + +impl PushSemanticTokens for If { + fn push_semantic_tokens( + &self, + file: &Arc, + tokens: &mut SemanticTokens, + _: Option, + ) { + push_token!(file, tokens, self.if_tok, semantic_tokens::KEYWORD); + self.cond.push_semantic_tokens(file, tokens, None); + self.block.push_semantic_tokens(file, tokens, None); + if let Some(expr) = &self.else_ { + match &**expr { + Else::If { else_tok, expr } => { + push_token!(file, tokens, else_tok, semantic_tokens::KEYWORD); + expr.push_semantic_tokens(file, tokens, None); + } + Else::Block { else_tok, expr } => { + push_token!(file, tokens, else_tok, semantic_tokens::KEYWORD); + expr.push_semantic_tokens(file, tokens, None); + } + } + } + } +} + +impl PushSemanticTokens for While { + fn push_semantic_tokens( + &self, + file: &Arc, + tokens: &mut SemanticTokens, + _: Option, + ) { + push_token!(file, tokens, self.while_tok, semantic_tokens::KEYWORD); + self.cond.push_semantic_tokens(file, tokens, None); + self.block.push_semantic_tokens(file, tokens, None); + } +} + +impl PushSemanticTokens for NameValuePair { + fn push_semantic_tokens( + &self, + file: &Arc, + tokens: &mut SemanticTokens, + _: Option, + ) { + push_token!(file, tokens, self.name, semantic_tokens::PROPERTY); + self.value.push_semantic_tokens(file, tokens, None); + } +} diff --git a/lsp/src/utils.rs b/lsp/src/utils.rs index c851be9..e6133e4 100644 --- a/lsp/src/utils.rs +++ b/lsp/src/utils.rs @@ -1,5 +1,8 @@ use leaf_compiler::metadata::CodePosition; -use tower_lsp_server::ls_types::{Position, Range, Uri}; +use tower_lsp_server::ls_types::{ + Diagnostic, DiagnosticSeverity, NumberOrString, Position, Range, SemanticToken, SemanticTokens, + Uri, +}; pub trait UriUtils { fn strip_header(&self) -> &str; @@ -33,3 +36,43 @@ impl CodePositionUtils for CodePosition { } } } + +pub fn make_diagnostics(diag: &leaf_compiler::diagnostics::Diagnostic) -> Vec { + vec![Diagnostic { + range: diag.position.lsp_range(), + severity: Some(match diag.kind { + leaf_compiler::diagnostics::Kind::Info => DiagnosticSeverity::INFORMATION, + leaf_compiler::diagnostics::Kind::Warning => DiagnosticSeverity::WARNING, + leaf_compiler::diagnostics::Kind::Error => DiagnosticSeverity::ERROR, + }), + code: Some(NumberOrString::String(format!("{:#06X}", diag.code as u32))), + code_description: None, + source: Some("Leaf compiler".into()), + message: diag.message.clone(), + related_information: None, + tags: None, + data: None, + }] +} + +pub fn correct_semantic_token_deltas(tokens: &mut SemanticTokens) { + let mut previous_line = 0; + let mut previous_start = 0; + tokens.data.sort_by_key(|v| (v.delta_line, v.delta_start)); + for SemanticToken { + delta_line, + delta_start, + .. + } in tokens.data.iter_mut() + { + let line = *delta_line; + let start = *delta_start; + *delta_start = match line == previous_line { + false => start, + true => start - previous_start, + }; + *delta_line = line - previous_line; + previous_line = line; + previous_start = start; + } +} diff --git a/lsp/src/workspace.rs b/lsp/src/workspace.rs index 324fa0e..0554196 100644 --- a/lsp/src/workspace.rs +++ b/lsp/src/workspace.rs @@ -5,8 +5,9 @@ use leaf_assembly::{ values::{AnyConst, AnyValue, Value}, }; use leaf_compiler::{CompilationContext, events::Event, metadata::CodePosition}; -use leaf_parser::{ArcStr, SourceCode}; +use leaf_parser::{ArcStr, SourceCode, Text}; use rangemap::RangeMap; +use ropey::Rope; use rust_search::SearchBuilder; use std::{ borrow::Cow, @@ -14,6 +15,7 @@ use std::{ fmt::Write, ops::Range, path::PathBuf, + str::FromStr, sync::{Arc, OnceLock}, }; use tokio::sync::{ @@ -23,15 +25,17 @@ use tokio::sync::{ use tower_lsp_server::{ Client, ls_types::{ - Diagnostic, DiagnosticSeverity, Hover, HoverContents, HoverParams, Location, MarkedString, - NumberOrString, Position, Range as LspRange, SemanticToken, SemanticTokenType, - SemanticTokens, SemanticTokensParams, SemanticTokensResult, TextDocumentPositionParams, - Uri, + Diagnostic, DidChangeTextDocumentParams, Hover, HoverContents, HoverParams, Location, + MarkedString, Position, SemanticToken, SemanticTokens, SemanticTokensParams, + SemanticTokensResult, TextDocumentPositionParams, Uri, request::{GotoDeclarationParams, GotoDeclarationResponse}, }, }; -use crate::utils::{CodePositionUtils, UriUtils}; +use crate::{ + parsing::{DocumentParsingResult, parse_document}, + utils::{CodePositionUtils, UriUtils, correct_semantic_token_deltas, make_diagnostics}, +}; pub struct Workspace { pub base: String, @@ -85,6 +89,13 @@ impl Workspace { result.0.get().cloned() } + pub async fn file_changed(&self, params: DidChangeTextDocumentParams) { + self.sender + .send(Request::FileChanged(params)) + .await + .unwrap(); + } + pub async fn reload(&self) { self.sender.send(Request::Reload).await.unwrap(); } @@ -93,6 +104,7 @@ impl Workspace { enum Request { Reload, Close(Arc), + FileChanged(DidChangeTextDocumentParams), Hover(HoverParams, Arc<(OnceLock, Notify)>), GoToDeclaration( GotoDeclarationParams, @@ -104,251 +116,125 @@ enum Request { ), } -struct State<'l> { - context: CompilationContext<'l>, - files: Arc>>, -} - -#[derive(Default)] -struct FileInfo { - line_ranges: HashMap>, - symbol_ranges: RangeMap>()]>, - symbol_positions: HashMap<[u8; size_of::>()], CodePosition>, - symbol_definitions: HashMap<[u8; size_of::>()], CodePosition>, -} - -impl<'l> State<'l> { - pub fn new(alloc: &'l SyncArenaAllocator) -> Self { - State { - context: CompilationContext::new(alloc), - files: Default::default(), - } - } -} - pub fn start_workspace_thread(url: Uri, client: Client) -> Result, String> { let base = url.strip_header().to_string(); - let path = base.clone(); let (sender, mut receiver) = channel(1); + let _handle = tokio::task::spawn(async move { let mut alloc = SyncArenaAllocator::default(); + let mut file_text = HashMap::::new(); + let mut files = HashMap::, i32)>::new(); + let mut parsed = HashMap::::new(); + + macro_rules! get_file_text { + ($uri:expr, $version:expr) => { + file_text.entry($uri).or_insert_with_key(|key| { + let path = key.strip_header(); + ( + Rope::from_str(&std::fs::read_to_string(path).unwrap()), + $version, + ) + }) + }; + } + 'global: loop { alloc.reset(); - let mut state = State::new(&alloc); - let mut diagnostics = HashMap::>::new(); - - let files: Vec<_> = tokio::task::block_in_place(|| { - SearchBuilder::default() - .location(&path) - .ext("leaf") - .build() - .map(|f| { - let mut info = state.files.blocking_write(); - let info = info.entry(f.clone()).or_default(); - let text: ArcStr = std::fs::read_to_string(&f).unwrap().into(); - info.line_ranges = calc_line_ranges(text.as_str()); - diagnostics.entry(PathBuf::from(&f)).or_default(); - Arc::new(SourceCode { - text, - file: f.into(), - }) - }) - .collect() - }); - - let diagnostics = Arc::new(Mutex::new(diagnostics)); - - { - let info = state.files.clone(); - let diagnostics = diagnostics.clone(); - state.context.add_event_callback(move |e| unsafe { - tokio::task::block_in_place(|| match e { - Event::Symbol { value, position } => { - let mut info = info.blocking_write(); - let info = info - .entry(position.file.file.to_string_lossy().to_string()) - .or_default(); - info.symbol_ranges - .insert(position.range.clone(), std::mem::transmute(*value)); - info.symbol_positions - .insert(std::mem::transmute(*value), position.clone()); - } - Event::Definition { value, position } => { - let mut info = info.blocking_write(); - let info = info - .entry(position.file.file.to_string_lossy().to_string()) - .or_default(); - info.symbol_definitions - .insert(std::mem::transmute(*value), position.clone()); - info.symbol_ranges - .insert(position.range.clone(), std::mem::transmute(*value)); - info.symbol_positions - .insert(std::mem::transmute(*value), position.clone()); - } - Event::Diagnostic(diagnostic) => { - let mut diagnostics = diagnostics.blocking_lock(); - diagnostics - .entry(diagnostic.position.file.file.clone()) - .or_default() - .extend(make_diagnostics(diagnostic)); - } - }); - }); - } - - if let Err(err) = state.context.extend( - AssemblyIdentifier { - version: Version::default(), - name: Cow::Borrowed("Leaf lsp tmp"), - }, - &files, - ) { - let mut diagnostics = diagnostics.lock().await; - diagnostics - .entry(err.position.file.file.clone()) - .or_default() - .extend(make_diagnostics(&err)); - } - - { - let mut diagnostics = diagnostics.lock().await; - for (file, diagnostics) in diagnostics.drain() { - client - .publish_diagnostics(Uri::from_file_path(file).unwrap(), diagnostics, None) - .await; - } - } - while let Some(event) = receiver.recv().await { match event { - Request::Reload => { - alloc = SyncArenaAllocator::default(); - continue 'global; - } + Request::Reload => continue 'global, Request::Hover(params, result) => { - let value = state - .with_file_and_range( - ¶ms.text_document_position_params, - |info, range| unsafe { - let Some(symbol) = info.symbol_ranges.get( - &(range.start - + params - .text_document_position_params - .position - .character as usize), - ) else { - return None; - }; - - // This is blasphemy but what can I do? :3 - Some(std::mem::transmute::<_, AnyValue>(*symbol)) - }, - ) - .await; - - if let Some(Some(value)) = value { - let mut message = String::new(); - let _ = writeln!( - message, - "Type: {}", - match value.is_lvalue() { - false => value.ty(), - true => match value.ty() { - Type::Ptr(PtrT { base, .. }) => *base, - _ => unreachable!(), - }, - } - ); - result - .0 - .set(Hover { - contents: HoverContents::Scalar(MarkedString::String(message)), - range: None, - }) - .unwrap(); - } - result.1.notify_one(); } Request::GoToDeclaration(params, result) => { - let declaration = state - .with_file_and_range( - ¶ms.text_document_position_params, - |info, range| { - let Some(symbol) = info.symbol_ranges.get( - &(range.start - + params - .text_document_position_params - .position - .character as usize), - ) else { - return None; - }; - let Some(position) = info.symbol_definitions.get(symbol) else { - return None; - }; - Some(position.clone()) - }, + result.1.notify_one(); + } + Request::FileChanged(params) => { + let (file, version) = get_file_text!( + params.text_document.uri.clone(), + params.text_document.version + ); + eprintln!("File {:?} changed", params.text_document.uri.as_str()); + for change in params.content_changes { + let Some(range) = change.range else { + *file = Rope::from_str(&change.text); + continue; + }; + + fn pos_to_char_index(rope: &ropey::Rope, pos: Position) -> usize { + let line_start = rope.line_to_char(pos.line as usize); + line_start + pos.character as usize + } + let start = pos_to_char_index(file, range.start); + let end = pos_to_char_index(file, range.end); + + file.remove(start..end); + file.insert(start, &change.text); + } + *version = params.text_document.version; + client.semantic_tokens_refresh().await.unwrap(); + } + Request::SemanticTokens(params, result) => { + eprintln!( + "Sending semantic tokens for file {:?}", + params.text_document.uri.as_str() + ); + let (file, version) = get_file_text!(params.text_document.uri.clone(), 0); + let source = match files.get_mut(¶ms.text_document.uri) { + None => { + &files + .entry(params.text_document.uri.clone()) + .or_insert_with_key(|uri| { + ( + SourceCode { + file: uri.strip_header().into(), + text: Text::ArcStr( + ArcStr::from(file.to_string()), + OnceLock::new(), + ), + ast: OnceLock::new(), + } + .into(), + *version, + ) + }) + .0 + } + Some((source, v)) => { + if *v != *version { + eprintln!( + "Updating file {:?}", + params.text_document.uri.as_str() + ); + *source = SourceCode { + file: params.text_document.uri.strip_header().into(), + text: Text::ArcStr( + ArcStr::from(file.to_string()), + OnceLock::new(), + ), + ast: OnceLock::new(), + } + .into(); + *v = *version; + parsed.remove(¶ms.text_document.uri); + } + source + } + }; + + let parsed = parsed + .entry(params.text_document.uri.clone()) + .or_insert_with(|| parse_document(source)); + + let _ = client + .publish_diagnostics( + params.text_document.uri, + parsed.diagnostics.clone(), + None, ) .await; - if let Some(Some(decl)) = declaration { - result - .0 - .set(GotoDeclarationResponse::Scalar(Location { - uri: Uri::from_file_path(&decl.file.file).unwrap(), - range: decl.lsp_range(), - })) - .unwrap(); - } - - result.1.notify_one(); - } - Request::SemanticTokens(params, result) => { - let info = state.files.read().await; - let Some(file) = info.get(params.text_document.uri.strip_header()) else { - result.1.notify_one(); - continue; - }; - let mut tokens = SemanticTokens::default(); - for (symbol, position) in file.symbol_positions.iter() { - let symbol: AnyValue = unsafe { std::mem::transmute(*symbol) }; - let line_col = position.line_col(); - tokens.data.push(SemanticToken { - delta_line: line_col.start.line as u32, - delta_start: line_col.start.column as u32, - length: position.range.len() as u32, - token_type: match symbol { - AnyValue::Constant(AnyConst::Type(_)) => 0, - AnyValue::Constant(AnyConst::Function(_)) => 1, - AnyValue::Constant(AnyConst::Int(_) | AnyConst::Float(_)) => 2, - _ => continue, - }, - token_modifiers_bitset: 0, - }); - } - - let mut previous_line = 0; - let mut previous_start = 0; - tokens.data.sort_by_key(|v| (v.delta_line, v.delta_start)); - for SemanticToken { - delta_line, - delta_start, - .. - } in tokens.data.iter_mut() - { - let line = *delta_line; - let start = *delta_start; - *delta_start = match line == previous_line { - false => start, - true => start - previous_start, - }; - *delta_line = line - previous_line; - previous_line = line; - previous_start = start; - } - - result.0.set(SemanticTokensResult::Tokens(tokens)).unwrap(); + result.0.set(parsed.tokens.clone()).unwrap(); result.1.notify_one(); } Request::Close(notify) => { @@ -361,47 +247,3 @@ pub fn start_workspace_thread(url: Uri, client: Client) -> Result }); Ok(Arc::new(Workspace { base, sender })) } - -fn calc_line_ranges(text: &str) -> HashMap> { - let mut map = HashMap::new(); - for line in text.split('\n') { - let start = line.as_ptr() as usize - text.as_ptr() as usize; - map.insert(map.len() as u32, start..start + line.len()); - } - map -} - -impl State<'_> { - async fn with_file_and_range( - &self, - params: &TextDocumentPositionParams, - action: impl FnOnce(&FileInfo, Range) -> T, - ) -> Option { - let info = self.files.read().await; - let Some(info) = info.get(params.text_document.uri.strip_header()) else { - return None; - }; - let Some(range) = info.line_ranges.get(¶ms.position.line) else { - return None; - }; - Some(action(info, range.clone())) - } -} - -fn make_diagnostics(diag: &leaf_compiler::diagnostics::Diagnostic) -> Vec { - vec![Diagnostic { - range: diag.position.lsp_range(), - severity: Some(match diag.kind { - leaf_compiler::diagnostics::Kind::Info => DiagnosticSeverity::INFORMATION, - leaf_compiler::diagnostics::Kind::Warning => DiagnosticSeverity::WARNING, - leaf_compiler::diagnostics::Kind::Error => DiagnosticSeverity::ERROR, - }), - code: Some(NumberOrString::Number(diag.code as i32)), - code_description: None, - source: Some("Leaf compiler".into()), - message: diag.message.clone(), - related_information: None, - tags: None, - data: None, - }] -} diff --git a/parser/Cargo.toml b/parser/Cargo.toml index 6d4105f..1190386 100644 --- a/parser/Cargo.toml +++ b/parser/Cargo.toml @@ -8,3 +8,8 @@ arcstr = "1.2.0" derive_more = { version = "2.1.0", features = ["deref", "debug", "display"] } indexmap = "2.13.0" peg = "0.8.5" +rangemap = "1.7.1" +ropey = { version = "1.6.1", optional = true } + +[features] +rope = ["dep:ropey"] diff --git a/parser/src/ast.rs b/parser/src/ast.rs index 0a27d68..b9a3ea1 100644 --- a/parser/src/ast.rs +++ b/parser/src/ast.rs @@ -1,146 +1,117 @@ -use arcstr::Substr; use derive_more::Deref; -use indexmap::IndexMap; -use std::ops::Range; -use std::sync::Arc; +use std::{ops::Range, sync::Arc}; -#[derive(Debug, Deref)] -pub struct Ident(pub Substr); - -#[derive(Debug, Deref)] -pub struct Import(pub Expr); - -#[derive(Debug)] -pub struct Number { - pub text: Substr, - pub r#type: Option, +#[rustfmt::skip] +#[derive(derive_more::Debug, Clone, Copy)] +pub enum BinaryOperator { + #[debug("+")] Add, + #[debug("-")] Sub, + #[debug("*")] Mul, + #[debug("/")] Div, + #[debug("%")] Mod, + #[debug("==")] Eq, + #[debug("!=")] Ne, + #[debug("<")] Lt, + #[debug(">")] Gt, + #[debug("<=")] Le, + #[debug(">=")] Ge, + #[debug("..")] Range, + #[debug("as")] Cast, + #[debug("as")] Assign, } -#[derive(derive_more::Debug)] +#[derive(Debug, Deref, Clone)] +pub struct AstNode { + pub range: Range, + #[deref] + pub node: T, +} + +#[derive(Debug, Clone)] +pub struct Number { + pub number: Range, + pub ty: Option>, +} + +#[derive(Debug, Clone)] pub enum Expr { - #[debug("uninit")] - Uninit(Substr), - #[debug("Ident({:?})", _0.0)] - Ident(Ident), - #[debug("{_0:?}")] Number(Number), - String(Substr), - #[debug("{_0:?}")] - Binary(Box), - Index(Box), - Access(Box), - Deref(Box), - Tuple(Vec), - List(Vec), - Struct(Box), - #[debug("{_0:?}")] - Block(Arc), - #[debug("{_0:?}")] - Func(Arc), - #[debug("{_0:?}")] - Type(Box), - - #[debug("{_0:?}")] - ConstDecl(Box), - #[debug("{_0:?}")] - VarDecl(Box), - #[debug("{_0:?}")] - For(Box), - #[debug("{_0:?}")] - While(Box), - #[debug("{_0:?}")] + String(Range), + Ident(Range), + Uninit(Range), If(Box), - + Func(Arc>), + While(Box), + Block(Arc>), + VarDecl(Box), + ConstDecl(Box), + List(Vec>), + Tuple(Vec>), + Access { + value: Box>, + operator: Range, + field: Range, + }, + Deref { + value: Box>, + operator: Range, + }, + Index { + value: Box>, + index: Vec>, + }, + Binary { + lhs: Box>, + operator: AstNode, + rhs: Box>, + }, Call { - func: Box, - args: Vec, + func: Box>, + args: Vec>, + }, + Ptr { + ptr_tok: Range, + mutable: Option>, + base: Box>, + }, + Ref { + ref_tok: Range, + mutable: Range, + base: Box>, + }, + Struct { + struct_tok: Range, + fields: Vec>, + }, + StructCtor { + ty: Option>>, + values: Vec>, }, } -impl Expr { - pub fn range(&self) -> Range { - match self { - Self::Ident(e) => e.range(), - Self::Access(e) => e.range(), - Self::Number(e) => e.text.range(), - _ => todo!("{self:?}"), - } - } +#[derive(Debug, Clone)] +pub struct Import { + pub import_tok: Range, + pub expr: AstNode, } -#[derive(Debug)] -pub struct BinaryExpr { - pub lhs: Expr, - pub op: BinaryOp, - pub rhs: Expr, +#[derive(Debug, Clone)] +pub struct Field { + pub name: Range, + pub ty: AstNode, + pub public: Option>, + pub mutable: Option>, } -#[derive(Debug)] -pub struct IndexingExpr { - pub value: Expr, - pub index: Expr, -} - -#[derive(Debug)] -pub struct AccessExpr { - pub value: Expr, - pub field: Ident, -} - -impl AccessExpr { - pub fn range(&self) -> Range { - self.value.range().start..self.field.0.range().end - } -} - -#[rustfmt::skip] -#[derive(derive_more::Debug)] -pub enum BinaryOp { - #[debug("{_0}")] Add(Substr), - #[debug("{_0}")] Sub(Substr), - #[debug("{_0}")] Mul(Substr), - #[debug("{_0}")] Div(Substr), - #[debug("{_0}")] Mod(Substr), - #[debug("{_0}")] Eq(Substr), - #[debug("{_0}")] Ne(Substr), - #[debug("{_0}")] Lt(Substr), - #[debug("{_0}")] Gt(Substr), - #[debug("{_0}")] Le(Substr), - #[debug("{_0}")] Ge(Substr), - #[debug("{_0}")] Range(Substr), - #[debug("{_0}")] Assign(Substr), - #[debug("{_0}")] Cast(Substr), -} - -#[derive(Debug)] -pub enum Type { - Ptr { base: Expr, mutable: Option }, - Struct(Struct), -} - -#[derive(Debug)] -pub struct ConstDecl { - pub names: NamePattern, - pub r#type: Option, - pub value: Expr, -} - -#[derive(Debug)] -pub struct VarDecl { - pub names: NamePattern, - pub r#type: Option, - pub value: Expr, -} - -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum NamePattern { - Single(Ident), - Tuple(Vec), - List(Vec), + Single(Range), + Tuple(Vec>), + List(Vec>), } impl NamePattern { - pub fn as_slice(&self) -> &[Ident] { + pub fn as_slice(&self) -> &[Range] { match self { NamePattern::Single(ident) => std::slice::from_ref(ident), NamePattern::Tuple(idents) => idents.as_slice(), @@ -149,77 +120,72 @@ impl NamePattern { } } -#[derive(Debug)] -pub struct Function { - pub text: Substr, - pub args: Vec, - pub ret: Option, - pub block: Option>, +#[derive(Debug, Clone)] +pub struct ConstDecl { + pub names: AstNode, + pub r#type: Option>, + pub value: AstNode, } -#[derive(Debug)] -pub struct NameValuePair { - pub name: Ident, - pub value: Expr, +#[derive(Debug, Clone)] +pub struct VarDecl { + pub names: AstNode, + pub r#type: Option>, + pub value: AstNode, } -#[derive(Debug)] -pub struct Struct { - pub fields: Vec, -} +#[derive(Debug, Clone)] +pub struct Block(pub Vec>); -#[derive(Debug)] -pub struct Field { - pub name: Ident, - pub ty: Expr, - pub public: Option, - pub mutable: Option, -} - -#[derive(Debug)] -pub struct StructCtor { - pub r#type: Option, - pub r#values: IndexMap, - pub(crate) range: Range, -} - -impl StructCtor { - pub fn range(&self) -> Range { - self.range.clone() - } -} - -#[derive(Debug)] -pub struct Block(pub Vec); - -#[derive(Debug)] -pub struct For { - pub names: NamePattern, - pub value: Expr, - pub block: Block, -} - -#[derive(Debug)] -pub struct While { - pub cond: Expr, - pub block: Block, -} - -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct If { - pub cond: Expr, - pub block: Block, + pub if_tok: Range, + pub cond: AstNode, + pub block: AstNode, pub else_: Option>, } -#[derive(Debug)] -pub enum Else { - If(If), - Block(Block), +#[derive(Debug, Clone)] +pub struct While { + pub while_tok: Range, + pub cond: AstNode, + pub block: AstNode, } -#[derive(Debug)] -pub struct CompilationUnit { - pub imports: Vec, - pub decls: Vec, +#[derive(Debug, Clone)] +pub enum Else { + If { + else_tok: Range, + expr: AstNode, + }, + Block { + else_tok: Range, + expr: AstNode, + }, +} + +#[derive(Debug, Clone)] +pub struct Function { + pub fn_tok: Range, + pub args: Vec>, + pub ret: Option>, + pub block: Option>>, +} + +#[derive(Debug, Clone)] +pub struct Parameter { + pub name: Range, + pub value: AstNode, +} + +#[derive(Debug, Clone)] +pub struct NameValuePair { + pub name: Range, + pub value: AstNode, +} + +#[derive(Debug, Clone)] +pub struct AstRoot { + pub imports: Vec>, + pub decls: Vec>, } diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 2495c1e..82b2453 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -1,19 +1,70 @@ -pub use arcstr::ArcStr; +pub use arcstr::{ArcStr, Substr}; +use derive_more::Display; pub use parser::{compilation_unit as parse, *}; -use std::{fmt::Debug, path::PathBuf}; +use rangemap::RangeMap; +use std::{ + fmt::Debug, + path::{Path, PathBuf}, + sync::OnceLock, +}; + +#[cfg(feature = "rope")] +pub use ropey::Rope; + +use crate::ast::AstRoot; pub mod ast; mod parser; +#[derive(Display)] +pub enum Text { + #[cfg(feature = "rope")] + #[display("{_0}")] + Rope(Rope), + #[display("{_0}")] + ArcStr(ArcStr, OnceLock>), +} + pub struct SourceCode { - pub text: ArcStr, + pub text: Text, pub file: PathBuf, + pub ast: OnceLock>>, +} + +impl SourceCode { + #[cfg(feature = "rope")] + pub fn from_file_rope(path: &dyn AsRef) -> std::io::Result { + let file = path.as_ref().to_path_buf(); + let text = Text::Rope(Rope::from(std::fs::read_to_string(path)?)); + Ok(Self { + text, + file, + ast: OnceLock::new(), + }) + } + + pub fn from_file_arcstr(path: &dyn AsRef) -> std::io::Result { + let file = path.as_ref().to_path_buf(); + let text = Text::ArcStr( + ArcStr::from(std::fs::read_to_string(path)?), + OnceLock::new(), + ); + Ok(Self { + text, + file, + ast: OnceLock::new(), + }) + } + + pub fn ast(&self) -> &Result> { + self.ast.get_or_init(|| crate::parse(self)) + } } impl Debug for SourceCode { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("SourceCode") - .field("text", &self.text) + .field("text", &format_args!("{}", self.text)) .finish_non_exhaustive() } } diff --git a/parser/src/main.rs b/parser/src/main.rs index 3874fa3..ba31c97 100644 --- a/parser/src/main.rs +++ b/parser/src/main.rs @@ -1,12 +1,7 @@ -use arcstr::ArcStr; use leaf_parser::SourceCode; -use std::path::PathBuf; fn main() { - let source = SourceCode { - text: ArcStr::from(std::fs::read_to_string("../test.leaf").unwrap()), - file: PathBuf::from("../test.leaf"), - }; + let source = SourceCode::from_file_arcstr(&"../test.leaf").unwrap(); let unit = leaf_parser::parse(&source); let _ = dbg!(unit); } diff --git a/parser/src/parser.rs b/parser/src/parser.rs index e004aad..79927b0 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -1,26 +1,62 @@ use crate::SourceCode; use crate::ast::*; -use arcstr::Substr; -use peg::{Parse, ParseElem, ParseLiteral, ParseSlice}; +pub use peg::Parse; +use peg::ParseSlice; +use peg::{ParseElem, ParseLiteral}; pub use peg::{error::*, str::LineCol}; -use std::sync::Arc; +use rangemap::RangeMap; +use std::ops::Range; impl Parse for SourceCode { type PositionRepr = LineCol; #[inline] fn start(&self) -> usize { - self.text.as_str().start() + 0 } #[inline] fn is_eof(&self, p: usize) -> bool { - self.text.as_str().is_eof(p) + match &self.text { + #[cfg(feature = "rope")] + crate::Text::Rope(s) => p >= s.len_bytes(), + crate::Text::ArcStr(s, _) => s.as_str().is_eof(p), + } } #[inline] fn position_repr(&self, p: usize) -> Self::PositionRepr { - self.text.as_str().position_repr(p) + match &self.text { + #[cfg(feature = "rope")] + crate::Text::Rope(s) => { + let line = s.byte_to_line(p); + let line_s = s.line_to_byte(line); + let column = p - line_s; + Self::PositionRepr { + line, + column, + offset: p, + } + } + crate::Text::ArcStr(s, lines) => { + let lines = lines.get_or_init(|| { + let mut lines = RangeMap::new(); + for (i, text) in s.split_inclusive('\n').enumerate() { + let text = s.substr_from(text); + let range = text.range(); + lines.insert(range, (i, text)); + } + lines + }); + let (line, text) = lines.get(&p).unwrap(); + let column = p - text.range().start; + Self::PositionRepr { + line: *line, + column, + offset: p, + } + } + } } } @@ -29,173 +65,230 @@ impl<'input> ParseElem<'input> for SourceCode { #[inline] fn parse_elem(&'input self, pos: usize) -> peg::RuleResult { - self.text.as_str().parse_elem(pos) + match &self.text { + #[cfg(feature = "rope")] + crate::Text::Rope(s) => match s.get_char(pos) { + Some(c) => RuleResult::Matched(pos + c.len_utf8(), c), + None => RuleResult::Failed, + }, + crate::Text::ArcStr(s, _) => s.as_str().parse_elem(pos), + } } } impl ParseLiteral for SourceCode { #[inline] fn parse_string_literal(&self, pos: usize, literal: &str) -> peg::RuleResult<()> { - self.text.as_str().parse_string_literal(pos, literal) + match &self.text { + #[cfg(feature = "rope")] + crate::Text::Rope(s) => match s.get_slice(pos..pos + literal.len()) { + None => peg::RuleResult::Failed, + Some(text) => match text == literal { + false => peg::RuleResult::Failed, + true => peg::RuleResult::Matched(pos + literal.len(), ()), + }, + }, + crate::Text::ArcStr(s, _) => s.as_str().parse_string_literal(pos, literal), + } } } impl<'input> ParseSlice<'input> for SourceCode { - type Slice = Substr; + type Slice = Range; - #[inline] fn parse_slice(&'input self, p1: usize, p2: usize) -> Self::Slice { - self.text.substr(p1..p2) + p1..p2 } } peg::parser! { grammar leaf_parser() for SourceCode { // #### ATOMS #### - rule number() -> Number - = text:$(['0'..='9']+) r#type:ident()? { Number { text, r#type } } - / "0x" text:$(['0'..='9'|'a'..='f'|'A'..'F']+) r#type:ident()? { Number { text, r#type } } - / "0b" text:$(['0'|'1']+) r#type:ident()? { Number { text, r#type } } + rule number() -> AstNode + = s:position!() number:$(['0'..='9']+) ty:ident()? e:position!() + { AstNode { node: Number { number, ty }, range: s..e } } + / s:position!() "0x" number:$(['0'..='9'|'a'..='f'|'A'..'F']+) ty:ident()? e:position!() + { AstNode { node: Number { number, ty }, range: s..e } } + / s:position!() "0b" number:$(['0'|'1']+) ty:ident()? e:position!() + { AstNode { node: Number { number, ty }, range: s..e } } - rule ident() -> Ident - = text:$(['_'|'a'..='z'|'A'..='Z']['_'|'a'..='z'|'A'..='Z'|'0'..='9']*) { Ident(text) } + rule ident() -> Range + = text:$(['_'|'a'..='z'|'A'..='Z']['_'|'a'..='z'|'A'..='Z'|'0'..='9']*) + { text } - rule ident2() -> Ident - = text:$("#"? ['_'|'a'..='z'|'A'..='Z']['_'|'a'..='z'|'A'..='Z'|'0'..='9']*) { Ident(text) } + rule ident2() -> Range + = text:$(['#']? ['_'|'a'..='z'|'A'..='Z']['_'|'a'..='z'|'A'..='Z'|'0'..='9']*) + { text } - rule string() -> Substr - = str:$("\"" char()* "\"") { str } - - rule char() -> char = normal() - - rule normal() -> char = [^'\\'|'"'] + rule string() -> Range + = text:$("\"" [^'\\'|'"']* "\"") + { text } // ### EXPRESSIONS #### - rule expr() -> Expr = precedence! { - lhs:(@) __ op:$("as") __ rhs:expr() { Expr::Binary(BinaryExpr { lhs, op: BinaryOp::Cast(op), rhs }.into()) } - -- - lhs:@ __ op:$("=") __ rhs:expr() { Expr::Binary(BinaryExpr { lhs, op: BinaryOp::Assign(op), rhs }.into()) } - value:@ __ op:$(".") __ field:ident2() { Expr::Access(AccessExpr { value, field }.into()) } - value:@ __ op:$(".^") { Expr::Deref(value.into()) } - lhs:@ "(" __ args:(expr() ** list_separator()) __ ")" { Expr::Call { func: lhs.into(), args } } - value:@ "[" __ index:expr() __ "]" { Expr::Index(IndexingExpr { value, index }.into()) } + rule bop0() -> AstNode + = op:$("as") { AstNode { range: op, node: BinaryOperator::Cast } } + / op:$("=") { AstNode { range: op, node: BinaryOperator::Assign } } + + rule bop1() -> AstNode + = op:$("+") { AstNode { range: op, node: BinaryOperator::Add } } + / op:$("-") { AstNode { range: op, node: BinaryOperator::Sub } } + + rule bop2() -> AstNode + = op:$("*") { AstNode { range: op, node: BinaryOperator::Mul } } + / op:$("/") { AstNode { range: op, node: BinaryOperator::Div } } + / op:$("%") { AstNode { range: op, node: BinaryOperator::Mod } } + + rule bop3() -> AstNode + = op:$("==") { AstNode { range: op, node: BinaryOperator::Eq } } + / op:$("!=") { AstNode { range: op, node: BinaryOperator::Ne } } + / op:$("<") { AstNode { range: op, node: BinaryOperator::Lt } } + / op:$(">") { AstNode { range: op, node: BinaryOperator::Gt } } + / op:$("<=") { AstNode { range: op, node: BinaryOperator::Le } } + / op:$(">=") { AstNode { range: op, node: BinaryOperator::Ge } } + + rule bop4() -> AstNode + = op:$("..") { AstNode { range: op, node: BinaryOperator::Range } } + + rule expr() -> AstNode = precedence! { + lhs:@ __ op:bop0() __ rhs:expr() + { AstNode { range: lhs.range.start..rhs.range.end, node: Expr::Binary { lhs: lhs.into(), rhs: rhs.into(), operator: op } } } + // -- + value:@ __ op:$(".") __ field:ident2() + { AstNode { range: value.range.start..field.end, node: Expr::Access { value: value.into(), field, operator: op } } } + + value:@ __ op:$(".^") + { AstNode { range: value.range.start..op.end, node: Expr::Deref { value: value.into(), operator: op } } } + + lhs:@ "(" __ args:(expr() ** list_separator()) __ e:$")" + { AstNode { range: lhs.range.start..e.end, node: Expr::Call { func: lhs.into(), args } } } + + value:@ "[" __ index:(expr() **<1,> ("," __)) __ e:$"]" + { AstNode { range: value.range.start..e.end, node: Expr::Index { value: value.into(), index } } } + + ty:@ __ "#{" __ values:name_value_pairs() __ "}" e:position!() + { AstNode { range: ty.range.start..e, node: Expr::StructCtor { ty: Some(ty.into()), values } } } + + s:$"#{" __ values:name_value_pairs() __ "}" e:position!() + { AstNode { range: s.start..e, node: Expr::StructCtor { ty: None, values } } } - ty:@ __ "#{" __ values:name_value_pairs() __ "}" e:position!() { Expr::Struct( - StructCtor { - range: ty.range().start..e, - r#type: Some(ty), - values: values.into_iter().map(|v| (v.name.0.clone(), v)).collect(), - }.into() - ) } - s:position!() "#{" __ values:name_value_pairs() __ "}" e:position!() { Expr::Struct( - StructCtor { - r#type: None, - values: values.into_iter().map(|v| (v.name.0.clone(), v)).collect(), - range: s..e, - }.into() - ) } -- - lhs:@ __ op:$("+") __ rhs:expr() { Expr::Binary(BinaryExpr { lhs, op: BinaryOp::Add(op), rhs }.into()) } - lhs:@ __ op:$("-") __ rhs:expr() { Expr::Binary(BinaryExpr { lhs, op: BinaryOp::Sub(op), rhs }.into()) } + lhs:@ __ op:bop1() __ rhs:expr() + { AstNode { range: lhs.range.start..rhs.range.end, node: Expr::Binary { lhs: lhs.into(), rhs: rhs.into(), operator: op } } } -- - lhs:@ __ op:$("*") __ rhs:expr() { Expr::Binary(BinaryExpr { lhs, op: BinaryOp::Mul(op), rhs }.into()) } - lhs:@ __ op:$("/") __ rhs:expr() { Expr::Binary(BinaryExpr { lhs, op: BinaryOp::Div(op), rhs }.into()) } - lhs:@ __ op:$("%") __ rhs:expr() { Expr::Binary(BinaryExpr { lhs, op: BinaryOp::Mod(op), rhs }.into()) } + lhs:@ __ op:bop2() __ rhs:expr() + { AstNode { range: lhs.range.start..rhs.range.end, node: Expr::Binary { lhs: lhs.into(), rhs: rhs.into(), operator: op } } } -- - lhs:@ __ op:$("..") __ rhs:expr() { Expr::Binary(BinaryExpr { lhs, op: BinaryOp::Range(op), rhs }.into()) } + lhs:@ __ op:bop3() __ rhs:expr() + { AstNode { range: lhs.range.start..rhs.range.end, node: Expr::Binary { lhs: lhs.into(), rhs: rhs.into(), operator: op } } } -- - lhs:@ __ op:$("==") __ rhs:expr() { Expr::Binary(BinaryExpr { lhs, op: BinaryOp::Eq(op), rhs }.into()) } - lhs:@ __ op:$("!=") __ rhs:expr() { Expr::Binary(BinaryExpr { lhs, op: BinaryOp::Ne(op), rhs }.into()) } - lhs:@ __ op:$("<") __ rhs:expr() { Expr::Binary(BinaryExpr { lhs, op: BinaryOp::Lt(op), rhs }.into()) } - lhs:@ __ op:$(">") __ rhs:expr() { Expr::Binary(BinaryExpr { lhs, op: BinaryOp::Gt(op), rhs }.into()) } - lhs:@ __ op:$("<=") __ rhs:expr() { Expr::Binary(BinaryExpr { lhs, op: BinaryOp::Le(op), rhs }.into()) } - lhs:@ __ op:$(">=") __ rhs:expr() { Expr::Binary(BinaryExpr { lhs, op: BinaryOp::Ge(op), rhs }.into()) } + lhs:@ __ op:bop4() __ rhs:expr() + { AstNode { range: lhs.range.start..rhs.range.end, node: Expr::Binary { lhs: lhs.into(), rhs: rhs.into(), operator: op } } } -- - block:block() { Expr::Block(block.into())} - for_loop:for_loop() { Expr::For(for_loop.into())} - while_loop:while_loop() { Expr::While(while_loop.into())} - if_statement:if_statement() { Expr::If(if_statement.into())} - func:func() { Expr::Func(Arc::new(func))} - var_decl:var_decl() { Expr::VarDecl(var_decl.into()) } - const_decl:const_decl() { Expr::ConstDecl(const_decl.into()) } - "(" __ tuple:(expr() **<2,> ("," __)) __ ")" { Expr::Tuple(tuple) } - "[" __ list:(expr() ** ("," __)) __ "]" { Expr::List(list) } - "(" __ v:expr() __ ")" { v } - "*" __ m:$"mut"? __ v:expr() { Expr::Type(Type::Ptr { base:v, mutable: m }.into()) } - v:struct_t() { Expr::Type(Type::Struct(v).into()) } - v:string() { Expr::String(v) } - v:number() { Expr::Number(v) } - v:ident() { Expr::Ident(v) } + block:block() { AstNode { range: block.range.clone(), node: Expr::Block(block.into()) } } + + i:if_statement() { AstNode { range: i.range, node: Expr::If(i.node.into()) }} + l:while_loop() { AstNode { range: l.range, node: Expr::While(l.node.into()) }} + + func:func() { AstNode { range: func.range.clone(), node: Expr::Func(func.into()) } } + + var_decl:var_decl() + { AstNode { range: var_decl.range, node: Expr::VarDecl(var_decl.node.into()) } } + + const_decl:const_decl() + { AstNode { range: const_decl.range, node: Expr::ConstDecl(const_decl.node.into()) } } + + s:$"(" __ elements:(expr() **<2,> ("," __)) __ e:$")" + { AstNode { range: s.start..e.end, node: Expr::Tuple(elements) } } + + s:$"[" __ elements:(expr() **<2,> ("," __)) __ e:$"]" + { AstNode { range: s.start..e.end, node: Expr::List(elements) } } + + s:$"(" __ v:expr() __ e:$")" + { AstNode { range: s.start..e.end, node: v.node } } + + t:$"*" __ m:$"mut"? __ v:expr() + { AstNode { range: t.start..v.range.end, node: Expr::Ptr { ptr_tok: t, mutable: m, base: v.into() } } } + + v:struct_t() { v } + v:string() { AstNode { range: v.clone(), node: Expr::String(v) } } + v:number() { AstNode { range: v.range, node: Expr::Number(v.node) } } + v:ident() { AstNode { range: v.clone(), node: Expr::Ident(v) } } } - rule block() -> Block - = "{" __ exprs:(i:expr() statement_separator() {i})* __ "}" { Block(exprs) } + rule block() -> AstNode + = s:$"{" __ exprs:(i:expr() statement_separator() {i})* __ e:$"}" + { AstNode { range: s.start..e.end, node: Block(exprs) } } - rule func() -> Function - = s:position!() t:$"fn" __ "(" __ args:name_type_pairs() __ ")" __ ret:("->" __ e:expr() {e})? __ block:block()? e:position!() - { Function { args, ret, block: block.map(Into::into), text: t.parent().substr(s..e), } } + rule func() -> AstNode + = s:position!() t:$"fn" __ "(" __ args:parameters() __ ")" __ ret:("->" __ e:expr() {e})? __ block:block()? e:position!() + { AstNode { range: s..e, node: Function { args, ret, block: block.map(Into::into), fn_tok: t, } } } - rule name_type_pair() -> NameValuePair - = name:ident() __ ":" __ value:expr() { NameValuePair { name, value } } + rule parameter() -> AstNode + = name:ident() __ ":" __ value:expr() + { AstNode { range: name.start..value.range.end, node: Parameter { name, value } } } - rule name_type_pairs() -> Vec - = v:(name_type_pair() **<1,> list_separator()) list_separator()? { v } + rule parameters() -> Vec> + = v:(parameter() **<1,> list_separator()) list_separator()? { v } / { vec![] } - rule name_value_pair() -> NameValuePair - = name:ident() __ "=" __ value:expr() { NameValuePair { name, value } } + rule name_value_pair() -> AstNode + = name:ident() __ "=" __ value:expr() + { AstNode { range: name.start..value.range.end, node: NameValuePair { name, value } } } - rule name_value_pairs() -> Vec + rule name_value_pairs() -> Vec> = v:(name_value_pair() **<1,> list_separator()) list_separator()? { v } / { vec![] } - rule struct_t() -> Struct - = "struct" __ "{" __ fields:fields() __ "}" { Struct { fields } } + rule struct_t() -> AstNode + = t:$"struct" __ "{" __ fields:fields() __ e:$"}" + { AstNode { range: t.start..e.end, node: Expr::Struct { fields, struct_tok: t } } } - rule field() -> Field - = public:$"pub"? __ mutable:$"mut"? __ name:ident() __ ":" __ ty:expr() { Field { name, ty, public, mutable } } + rule field() -> AstNode + = s:position!() public:$"pub"? __ mutable:$"mut"? __ name:ident() __ ":" __ ty:expr() e:position!() + { AstNode { node: Field { name, ty, public, mutable }, range: s..e } } - rule fields() -> Vec + rule fields() -> Vec> = v:(field() **<1,> list_separator()) list_separator()? { v } / { vec![] } - rule import() -> Import - = "import" _ expr:expr() { Import(expr) } + rule import() -> AstNode + = t:$"import" _ expr:expr() + { AstNode { range: t.start..expr.range.end, node: Import { import_tok: t, expr } } } - rule name_pattern() -> NamePattern - = "(" __ tuple:(ident() ** ("," __)) __ ")" { NamePattern::Tuple(tuple) } - / "[" __ slice:(ident() ** ("," __)) __ "]" { NamePattern::List(slice) } - / ident:ident() { NamePattern::Single(ident) } + rule name_pattern() -> AstNode + = s:$"(" __ tuple:(ident() ** ("," __)) __ e:$")" + { AstNode { range: s.start..e.end, node: NamePattern::Tuple(tuple) } } + / s:$"[" __ slice:(ident() ** ("," __)) __ e:$"]" + { AstNode { range: s.start..e.end, node: NamePattern::List(slice) } } + / ident:ident() + { AstNode { range: ident.clone(), node: NamePattern::Single(ident) } } - rule const_decl() -> ConstDecl = + rule const_decl() -> AstNode = names:name_pattern() _? ":" r#type:(_ t:expr() _ {t})? ":" _ value:expr() - { ConstDecl { names, r#type, value } } + { AstNode { range: names.range.start..value.range.end, node: ConstDecl { names, r#type, value } } } - rule var_decl() -> VarDecl = + rule var_decl() -> AstNode = names:name_pattern() _? ":" r#type:(_ t:expr() _ {t})? "=" _ value:expr() - { VarDecl { names, r#type, value } } + { AstNode { range: names.range.start..value.range.end, node: VarDecl { names, r#type, value } } } - rule for_loop() -> For = - "for" _ names:name_pattern() _ "in" _ value:expr() _ block:block() - { For { names, value, block } } + rule while_loop() -> AstNode + = t:$"while" _ cond:expr() _ block:block() + { AstNode { range: t.start..block.range.end, node: While { cond, block, while_tok: t } } } - rule while_loop() -> While = - "while" _ cond:expr() _ block:block() - { While { cond, block } } - - rule if_statement() -> If = - "if" _ cond:expr() __ block:block() e:(__ e:else_statement() {e})? { If { cond, block, else_: e.map(Box::new) } } + rule if_statement() -> AstNode + = t:$"if" _ cond:expr() __ block:block() el:(__ e:else_statement() {e})? e:position!() + { AstNode { range: t.start..e, node: If { cond, block, else_: el.map(Box::new), if_tok: t } } } rule else_statement() -> Else - = "else" _ i:if_statement() { Else::If(i) } - / "else" __ b:block() { Else::Block(b) } + = t:$"else" _ i:if_statement() { Else::If { else_tok: t, expr: i } } + / t:$"else" __ b:block() { Else::Block { else_tok: t, expr: b } } - pub rule compilation_unit() -> CompilationUnit = + pub rule compilation_unit() -> AstRoot = __ imports:(i:import() statement_separator() {i})* __ decls:(d:const_decl() statement_separator() {d})* __ - { CompilationUnit { imports, decls } } + { AstRoot { imports, decls } } // #### MISC #### rule _ = quiet! { [' '|'\t']+ }