From 65b48fe497f2040ad4fe11065c7648c465107f43 Mon Sep 17 00:00:00 2001 From: Mia Date: Mon, 10 Nov 2025 16:01:34 +0100 Subject: [PATCH] Initial commit --- Cargo.lock | 509 ++++++++++++++++++++++++++++++ Cargo.toml | 3 + allocators/Cargo.toml | 8 + allocators/src/arena.rs | 73 +++++ allocators/src/global.rs | 41 +++ allocators/src/lib.rs | 46 +++ assembly/Cargo.lock | 176 +++++++++++ assembly/Cargo.toml | 13 + assembly/src/context.rs | 175 ++++++++++ assembly/src/functions/ir.rs | 316 +++++++++++++++++++ assembly/src/functions/mod.rs | 72 +++++ assembly/src/lib.rs | 4 + assembly/src/main.rs | 15 + assembly/src/types/derivations.rs | 159 ++++++++++ assembly/src/types/intrinsics.rs | 98 ++++++ assembly/src/types/mod.rs | 61 ++++ assembly/src/values/mod.rs | 150 +++++++++ parsing/.gitignore | 1 + parsing/Cargo.toml | 9 + parsing/src/ast.rs | 93 ++++++ parsing/src/lib.rs | 11 + parsing/src/main.rs | 25 ++ parsing/src/parser.rs | 147 +++++++++ parsing/src/tokenizer.rs | 161 ++++++++++ rust-toolchain.toml | 4 + rustfmt.toml | 1 + 26 files changed, 2371 insertions(+) create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 allocators/Cargo.toml create mode 100644 allocators/src/arena.rs create mode 100644 allocators/src/global.rs create mode 100644 allocators/src/lib.rs create mode 100644 assembly/Cargo.lock create mode 100644 assembly/Cargo.toml create mode 100644 assembly/src/context.rs create mode 100644 assembly/src/functions/ir.rs create mode 100644 assembly/src/functions/mod.rs create mode 100644 assembly/src/lib.rs create mode 100644 assembly/src/main.rs create mode 100644 assembly/src/types/derivations.rs create mode 100644 assembly/src/types/intrinsics.rs create mode 100644 assembly/src/types/mod.rs create mode 100644 assembly/src/values/mod.rs create mode 100644 parsing/.gitignore create mode 100644 parsing/Cargo.toml create mode 100644 parsing/src/ast.rs create mode 100644 parsing/src/lib.rs create mode 100644 parsing/src/main.rs create mode 100644 parsing/src/parser.rs create mode 100644 parsing/src/tokenizer.rs create mode 100644 rust-toolchain.toml create mode 100644 rustfmt.toml diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..b796e99 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,509 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "ar_archive_writer" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" +dependencies = [ + "object", +] + +[[package]] +name = "arcstr" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03918c3dbd7701a85c6b9887732e2921175f26c350b4563841d0958c21d57e6d" + +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + +[[package]] +name = "boxcar" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36f64beae40a84da1b4b26ff2761a5b895c12adc41dc25aaee1c4f2bbfe97a6e" + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + +[[package]] +name = "cc" +version = "1.2.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35900b6c8d709fb1d854671ae27aeaa9eec2f8b01b364e1619a40da3e6fe2afe" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "chumsky" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acc17a6284abccac6e50db35c1cee87f605474a72939b959a3a67d9371800efd" +dependencies = [ + "hashbrown", + "regex-automata", + "serde", + "stacker", + "unicode-ident", + "unicode-segmentation", +] + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "derive_more" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "093242cf7570c207c83073cf82f79706fe7b8317e98620a47d5be7c3d8497678" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "unicode-xid", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "find-msvc-tools" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "leaf_allocators" +version = "0.1.0" +dependencies = [ + "boxcar", + "bumpalo", +] + +[[package]] +name = "leaf_assembly" +version = "0.1.0" +dependencies = [ + "bitflags", + "boxcar", + "derive_more", + "half", + "leaf_allocators", + "scc", +] + +[[package]] +name = "leaf_parsing" +version = "0.1.0" +dependencies = [ + "arcstr", + "chumsky", + "logos", +] + +[[package]] +name = "libc" +version = "0.2.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" + +[[package]] +name = "logos" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff472f899b4ec2d99161c51f60ff7075eeb3097069a36050d8037a6325eb8154" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-codegen" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "192a3a2b90b0c05b27a0b2c43eecdb7c415e29243acc3f89cc8247a5b693045c" +dependencies = [ + "beef", + "fnv", + "lazy_static", + "proc-macro2", + "quote", + "regex-syntax 0.8.8", + "rustc_version", + "syn", +] + +[[package]] +name = "logos-derive" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "605d9697bcd5ef3a42d38efc51541aa3d6a4a25f7ab6d1ed0da5ac632a26b470" +dependencies = [ + "logos-codegen", +] + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "psm" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01" +dependencies = [ + "ar_archive_writer", + "cc", +] + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex-automata" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.7.5", +] + +[[package]] +name = "regex-syntax" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "saa" +version = "5.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dfac11c0cd0606aaf7eb9ef66f82c119438a96dc487715abb8b57fdf08ad4fe" + +[[package]] +name = "scc" +version = "3.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3904515dbe9c10a126a54faa1d44fd40b893224247a1198d8c5dab6ef028bbdc" +dependencies = [ + "saa", + "sdd", +] + +[[package]] +name = "sdd" +version = "4.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd556f4cb1d30590889c126f4951eb87f8a70d736ed5a2f62cc89d290721a1c2" + +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "stacker" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys", +] + +[[package]] +name = "syn" +version = "2.0.110" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "zerocopy" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..4c59d24 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,3 @@ +[workspace] +resolver = "3" +members = ["allocators","assembly", "parsing"] diff --git a/allocators/Cargo.toml b/allocators/Cargo.toml new file mode 100644 index 0000000..f6c8041 --- /dev/null +++ b/allocators/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "leaf_allocators" +version = "0.1.0" +edition = "2024" + +[dependencies] +boxcar = "0.2.14" +bumpalo = "3.19.0" diff --git a/allocators/src/arena.rs b/allocators/src/arena.rs new file mode 100644 index 0000000..2da5ee2 --- /dev/null +++ b/allocators/src/arena.rs @@ -0,0 +1,73 @@ +use crate::{AllocationEntry, Allocator, DropFn}; +use boxcar::Vec; +use bumpalo::Bump; +use std::{alloc::Layout, sync::Mutex}; + +#[derive(Default)] +pub struct ArenaAllocator { + bump: Bump, + allocations: Vec, +} + +impl Allocator for ArenaAllocator { + unsafe fn alloc_unsafe(&self, data: *const u8, layout: Layout, drop_fn: DropFn) -> *mut u8 { + unsafe { + let ptr = self.bump.alloc_layout(layout).as_ptr(); + std::ptr::copy_nonoverlapping(data, ptr, layout.size()); + self.allocations.push(AllocationEntry { + ptr, + layout, + drop_fn, + }); + ptr + } + } +} + +impl Drop for ArenaAllocator { + fn drop(&mut self) { + unsafe { + for AllocationEntry { ptr, drop_fn, .. } in std::mem::take(&mut self.allocations) { + if let Some(drop) = drop_fn { + drop(ptr); + } + } + } + } +} + +#[derive(Default)] +pub struct SyncArenaAllocator { + bump: Mutex, + allocations: Vec, +} + +impl Allocator for SyncArenaAllocator { + unsafe fn alloc_unsafe(&self, data: *const u8, layout: Layout, drop_fn: DropFn) -> *mut u8 { + unsafe { + let ptr = { + let bump = self.bump.lock().unwrap(); + bump.alloc_layout(layout).as_ptr() + }; + std::ptr::copy_nonoverlapping(data, ptr, layout.size()); + self.allocations.push(AllocationEntry { + ptr, + layout, + drop_fn, + }); + ptr + } + } +} + +impl Drop for SyncArenaAllocator { + fn drop(&mut self) { + unsafe { + for AllocationEntry { ptr, drop_fn, .. } in std::mem::take(&mut self.allocations) { + if let Some(drop) = drop_fn { + drop(ptr); + } + } + } + } +} diff --git a/allocators/src/global.rs b/allocators/src/global.rs new file mode 100644 index 0000000..c2ee7c3 --- /dev/null +++ b/allocators/src/global.rs @@ -0,0 +1,41 @@ +use crate::{AllocationEntry, Allocator, DropFn}; +use boxcar::Vec; +use std::alloc::Layout; + +#[derive(Default)] +pub struct GlobalAllocator { + allocations: Vec, +} + +impl Allocator for GlobalAllocator { + unsafe fn alloc_unsafe(&self, data: *const u8, layout: Layout, drop_fn: DropFn) -> *mut u8 { + unsafe { + let ptr = std::alloc::alloc(layout); + std::ptr::copy_nonoverlapping(data, ptr, layout.size()); + self.allocations.push(AllocationEntry { + ptr, + layout, + drop_fn, + }); + ptr + } + } +} + +impl Drop for GlobalAllocator { + fn drop(&mut self) { + unsafe { + for AllocationEntry { + ptr, + layout, + drop_fn, + } in std::mem::take(&mut self.allocations) + { + if let Some(drop) = drop_fn { + drop(ptr); + } + std::alloc::dealloc(ptr, layout); + } + } + } +} diff --git a/allocators/src/lib.rs b/allocators/src/lib.rs new file mode 100644 index 0000000..ee559e8 --- /dev/null +++ b/allocators/src/lib.rs @@ -0,0 +1,46 @@ +mod arena; +mod global; + +pub use arena::*; +pub use global::*; +use std::{alloc::Layout, mem::MaybeUninit}; + +pub type DropFn = Option; + +pub struct AllocationEntry { + ptr: *mut u8, + layout: Layout, + drop_fn: DropFn, +} + +unsafe impl Send for AllocationEntry {} +unsafe impl Sync for AllocationEntry {} + +pub trait Allocator { + unsafe fn alloc_unsafe(&self, data: *const u8, layout: Layout, drop: DropFn) -> *mut u8; +} + +pub trait SyncAllocator: Allocator + Send + Sync {} + +impl SyncAllocator for T {} + +impl<'l> dyn Allocator + 'l { + pub fn alloc(&'l self, value: T) -> &'l mut T { + unsafe { + let value = MaybeUninit::new(value); + let data = value.as_ptr() as *const u8; + let layout = Layout::new::(); + let drop: DropFn = match std::mem::needs_drop::() { + false => None, + true => Some(|ptr: *mut u8| std::ptr::drop_in_place(ptr as *mut T)), + }; + &mut *(self.alloc_unsafe(data, layout, drop) as *mut T) + } + } +} + +impl<'l> dyn SyncAllocator + 'l { + pub fn alloc(&'l self, value: T) -> &'l mut T { + ::alloc(self, value) + } +} diff --git a/assembly/Cargo.lock b/assembly/Cargo.lock new file mode 100644 index 0000000..3b75d70 --- /dev/null +++ b/assembly/Cargo.lock @@ -0,0 +1,176 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + +[[package]] +name = "boxcar" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36f64beae40a84da1b4b26ff2761a5b895c12adc41dc25aaee1c4f2bbfe97a6e" + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "derive_more" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "093242cf7570c207c83073cf82f79706fe7b8317e98620a47d5be7c3d8497678" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "unicode-xid", +] + +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + +[[package]] +name = "leaf" +version = "0.1.0" +dependencies = [ + "bitflags", + "boxcar", + "bumpalo", + "derive_more", + "fxhash", + "half", + "scc", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "saa" +version = "5.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dfac11c0cd0606aaf7eb9ef66f82c119438a96dc487715abb8b57fdf08ad4fe" + +[[package]] +name = "scc" +version = "3.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3904515dbe9c10a126a54faa1d44fd40b893224247a1198d8c5dab6ef028bbdc" +dependencies = [ + "saa", + "sdd", +] + +[[package]] +name = "sdd" +version = "4.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd556f4cb1d30590889c126f4951eb87f8a70d736ed5a2f62cc89d290721a1c2" + +[[package]] +name = "syn" +version = "2.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f17c7e013e88258aa9543dcbe81aca68a667a9ac37cd69c9fbc07858bfe0e2f" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "zerocopy" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/assembly/Cargo.toml b/assembly/Cargo.toml new file mode 100644 index 0000000..5445556 --- /dev/null +++ b/assembly/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "leaf_assembly" +version = "0.1.0" +edition = "2024" + +[dependencies] +bitflags = "2.10.0" +boxcar = "0.2.14" +derive_more = { version = "2.0.1", features = ["deref", "debug", "display"] } +half = "2.7.1" +scc = "3.3.7" + +leaf_allocators = { path = "../allocators" } diff --git a/assembly/src/context.rs b/assembly/src/context.rs new file mode 100644 index 0000000..4ae1671 --- /dev/null +++ b/assembly/src/context.rs @@ -0,0 +1,175 @@ +use crate::{ + functions::Function, + types::{ + Type, + derivations::{FuncT, TypeDerivations}, + intrinsics::*, + }, + values::{Const, Int}, +}; +use leaf_allocators::SyncAllocator; +use scc::HashMap; +use std::{hash::Hash, sync::OnceLock}; + +pub type Ctx<'l> = &'l Context<'l>; + +pub struct Context<'l> { + pub(crate) alloc: &'l dyn SyncAllocator, + pub(crate) derivations: TypeDerivations<'l>, + + constants: HashMap<&'l Const<'l>, &'l Const<'l>>, + intrinsics: OnceLock>>, +} + +impl Eq for Context<'_> {} + +impl PartialEq for Context<'_> { + fn eq(&self, other: &Self) -> bool { + std::ptr::eq(self, other) + } +} + +impl Hash for Context<'_> { + fn hash(&self, state: &mut H) { + (self as *const Self).hash(state); + } +} + +impl<'l> Context<'l> { + pub fn new(alloc: &'l dyn SyncAllocator) -> &'l Context<'l> { + let ctx = alloc.alloc(Self { + alloc, + constants: HashMap::new(), + derivations: TypeDerivations::new(alloc), + intrinsics: OnceLock::new(), + }); + ctx.intrinsics + .set(Box::new(intrinsic_types::make_intrinsic_types(ctx))) + .unwrap(); + ctx + } + + pub fn create_function(&'l self, ty: &'l FuncT<'l>) -> &'l Function<'l> { + self.alloc.alloc(Function { + ty, + body: OnceLock::new(), + ctx: self, + }) + } +} + +pub trait CreateConst<'l, T> { + fn create_const(&'l self, value: T) -> &'l Const<'l>; +} + +macro_rules! create_const { + ($(impl $ty:ty : $expr:expr,)*) => { + $( + impl<'l> CreateConst<'l, $ty> for Context<'l> { + fn create_const(&'l self, value: $ty) -> &'l Const<'l> { + let constant = ($expr)(self, value); + if let Some(existing) = self.constants.get_sync(&constant) { + return *existing; + } + let constant = self.alloc.alloc(constant); + *self.constants.entry_sync(constant).or_insert(constant) + } + } + )* + }; +} + +create_const! { + impl i8: |ctx, val| Const::Int(Int::I8(val), ctx), + impl i16: |ctx, val| Const::Int(Int::I16(val), ctx), + impl i32: |ctx, val| Const::Int(Int::I32(val), ctx), + impl i64: |ctx, val| Const::Int(Int::I64(val), ctx), + impl i128: |ctx, val| Const::Int(Int::I128(val), ctx), + + impl u8: |ctx, val| Const::Int(Int::U8(val), ctx), + impl u16: |ctx, val| Const::Int(Int::U16(val), ctx), + impl u32: |ctx, val| Const::Int(Int::U32(val), ctx), + impl u64: |ctx, val| Const::Int(Int::U64(val), ctx), + impl u128: |ctx, val| Const::Int(Int::U128(val), ctx), + + impl (): |ctx, _| Const::Void(ctx), + impl char: |ctx, val| Const::Char(val, ctx), + impl bool: |ctx, val| Const::Bool(val, ctx), + impl &'l str: |ctx, val| Const::Str(val, ctx), +} + +mod intrinsic_types { + use super::*; + use std::mem::MaybeUninit; + + macro_rules! create_intrinsics { + ($(($id:ident, $fn_id:ident, $init:expr),)*) => { + pub enum IntrinsicTypeKind { + $($id,)* + COUNT, + } + + pub type IntrinsicTypesArray<'l> = [Type<'l>; IntrinsicTypeKind::COUNT as usize]; + + pub fn make_intrinsic_types<'l>(ctx: Ctx<'l>) -> IntrinsicTypesArray<'l> { + unsafe { + const C: usize = IntrinsicTypeKind::COUNT as usize; + let mut intrinsics = [MaybeUninit::::uninit(); C]; + + $( + intrinsics[IntrinsicTypeKind::$id as usize] + .write((&*ctx.alloc.alloc($init(ctx))).into()); + )* + + std::mem::transmute(intrinsics) + } + } + + impl<'l> Context<'l> { + $( + #[inline] + pub fn $fn_id(&'l self) -> Type<'l> { + self.get_intrinsic(IntrinsicTypeKind::$id) + } + )* + + #[inline] + fn get_intrinsic(&'l self, intrinsic: IntrinsicTypeKind) -> Type<'l> { + unsafe { + self.intrinsics + .get() + .unwrap_unchecked() + .get_unchecked(intrinsic as usize) + .clone() + } + } + } + }; + } + + create_intrinsics! { + (Void, void_t, |ctx| VoidT { ctx }), + (Char, char_t, |ctx| CharT { ctx }), + (Bool, bool_t, |ctx| BoolT { ctx }), + (Type, type_t, |ctx| TypeT { ctx, }), + (ConstStr, const_str_t, |ctx| ConstStrT { ctx, }), + + (I8, i8_t, |ctx| IntT { ctx, signed: true, precision: 8 }), + (I16, i16_t, |ctx| IntT { ctx, signed: true, precision: 16 }), + (I32, i32_t, |ctx| IntT { ctx, signed: true, precision: 32 }), + (I64, i64_t, |ctx| IntT { ctx, signed: true, precision: 64 }), + (I128, i128_t, |ctx| IntT { ctx, signed: true, precision: 128 }), + (ISize, isize_t, |ctx| IntT { ctx, signed: true, precision: u32::MAX }), + + (U8, u8_t, |ctx| IntT { ctx, signed: false, precision: 8 }), + (U16, u16_t, |ctx| IntT { ctx, signed: false, precision: 16 }), + (U32, u32_t, |ctx| IntT { ctx, signed: false, precision: 32 }), + (U64, u64_t, |ctx| IntT { ctx, signed: false, precision: 64 }), + (U128, u128_t, |ctx| IntT { ctx, signed: false, precision: 128 }), + (USize, usize_t, |ctx| IntT { ctx, signed: false, precision: u32::MAX }), + + (F16, f16_t, |ctx| FloatT { ctx, precision: 16 }), + (F32, f32_t, |ctx| FloatT { ctx, precision: 32 }), + (F64, f64_t, |ctx| FloatT { ctx, precision: 64 }), + } +} diff --git a/assembly/src/functions/ir.rs b/assembly/src/functions/ir.rs new file mode 100644 index 0000000..3eb79fd --- /dev/null +++ b/assembly/src/functions/ir.rs @@ -0,0 +1,316 @@ +use crate::{ + context::Ctx, + functions::{Function, FunctionBody}, + types::{Type, derivations::*}, + values::{Value, ValueFlags}, +}; +use derive_more::Debug; +use std::{ + borrow::Cow, + hash::Hash, + ops::Deref, + sync::{ + OnceLock, + atomic::{AtomicU32, Ordering}, + }, + u32, +}; + +#[derive(Debug)] +#[debug("%{}: {variant:?}", self.id())] +pub struct Instruction<'l> { + id: AtomicU32, + pub parent_block: &'l Block<'l>, + pub variant: InstructionVariant<'l>, +} + +impl Eq for Instruction<'_> {} + +impl PartialEq for Instruction<'_> { + #[inline] + fn eq(&self, _: &Self) -> bool { + false + } +} + +impl Hash for Instruction<'_> { + #[inline] + fn hash(&self, state: &mut H) { + (self as *const Self).hash(state); + } +} + +impl<'l> Deref for Instruction<'l> { + type Target = InstructionVariant<'l>; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.variant + } +} + +impl<'l> Into> for &'l Instruction<'l> { + #[inline] + fn into(self) -> Value<'l> { + Value::Instruction(self) + } +} + +impl<'l> Instruction<'l> { + #[inline] + pub fn ctx(&self) -> Ctx<'l> { + self.parent_block.func.ctx + } + + #[inline] + pub fn id(&self) -> u32 { + self.id.load(Ordering::Relaxed) + } + + pub fn value_flags(&self) -> ValueFlags { + match self.variant { + _ => todo!(), + } + } + + pub fn value_ty(&self) -> Type<'l> { + match self.variant { + _ => todo!(), + } + } +} + +#[derive(Debug, PartialEq, Eq, Hash)] +pub enum InstructionVariant<'l> { + #[debug("stackalloc {_0}")] + StackAlloc(Type<'l>), + + #[debug("gcalloc {_0}")] + GCAlloc(Type<'l>), + + #[debug("load {_0}")] + Load(Value<'l>), + + #[debug("store {_0}, {_1}")] + Store(Value<'l>, Value<'l>), + + #[debug("return{}", _0.map(|v| format!(" {v}")).unwrap_or_default())] + Return(Option>), +} + +impl InstructionVariant<'_> { + pub fn is_block_termination(&self) -> bool { + match self { + Self::Return(_) => true, + _ => false, + } + } +} + +pub struct Block<'l> { + pub id: u32, + func: &'l Function<'l>, + instructions: OnceLock>>, +} + +impl<'l> Block<'l> { + #[inline] + pub fn instructions(&self) -> &[&'l Instruction<'l>] { + match self.instructions.get() { + None => &[], + Some(v) => v, + } + } +} + +pub struct BlockBuilder<'l> { + block: &'l Block<'l>, + instructions: Vec<&'l Instruction<'l>>, +} + +pub type BlockBuilderError<'l> = Cow<'l, str>; +pub type BlockBuilderResult<'l, T> = Result>; + +impl<'l> BlockBuilder<'l> { + pub fn stack_alloc(&mut self, ty: Type<'l>) -> BlockBuilderResult<'l, Value<'l>> { + let inst = self.push_instruction(InstructionVariant::StackAlloc(ty))?; + Ok(inst.into()) + } + + pub fn store( + &mut self, + target: Value<'l>, + value: Value<'l>, + ) -> BlockBuilderResult<'l, Value<'l>> { + let value_ty = value.ty(); + let target_ty = target.ty(); + match target_ty { + Type::Ptr(PtrT { + base, + mutable: true, + }) if *base == value_ty => {} + + Type::Ref(RefT { + base, + mutable: true, + }) if *base == value_ty => {} + + _ => { + return Err(format!( + "Cannot store value of type `{value_ty}` into target of type `{target_ty}.`" + ) + .into()); + } + }; + let inst = self.push_instruction(InstructionVariant::Store(target, value))?; + Ok(inst.into()) + } + + pub fn load(&mut self, value: Value<'l>) -> BlockBuilderResult<'l, Value<'l>> { + let value_ty = value.ty(); + match value_ty { + Type::Ptr(PtrT { .. }) => {} + Type::Ref(RefT { .. }) => {} + _ => { + return Err(format!("Cannot load value of type `{}`.`", value_ty).into()); + } + } + let inst = self.push_instruction(InstructionVariant::Load(value))?; + Ok(inst.into()) + } + + pub fn ret(&mut self, value: Option>) -> BlockBuilderResult<'l, Value<'l>> { + let ret_t = self.block.func.ty.ret_t; + let value_ty = match value { + Some(v) => v.ty(), + None => self.block.func.ctx.void_t(), + }; + if value_ty != ret_t { + return Err(format!( + "Cannot return value of type `{value_ty}`. Expected type `{ret_t}`." + ) + .into()); + } + let inst = self.push_instruction(InstructionVariant::Return(value))?; + Ok(inst.into()) + } + + #[inline] + pub fn has_termination(&self) -> bool { + match self.instructions.as_slice() { + [.., i] => i.is_block_termination(), + _ => false, + } + } + + pub fn build(self) -> BlockBuilderResult<'l, &'l Block<'l>> { + if !self.has_termination() { + return Err(format!("Block #{} has not termination.", self.block.id).into()); + } + self.block.instructions.set(self.instructions).unwrap(); + Ok(self.block) + } + + fn push_instruction( + &mut self, + variant: InstructionVariant<'l>, + ) -> BlockBuilderResult<'l, &'l Instruction<'l>> { + if self.has_termination() { + return Err(format!("Block #{} has already terminated", self.block.id).into()); + } + let instruction = &*self.block.func.ctx.alloc.alloc(Instruction { + id: AtomicU32::new(u32::MAX), + parent_block: self.block, + variant, + }); + self.instructions.push(instruction); + Ok(instruction) + } +} + +pub struct FunctionBodyBuilder<'l> { + current_block: usize, + func: &'l Function<'l>, + blocks: Vec>, +} + +impl<'l> FunctionBodyBuilder<'l> { + pub fn new(func: &'l Function<'l>) -> Self { + let mut builder = Self { + func, + blocks: vec![], + current_block: 0, + }; + builder.create_block(); + builder + } + + pub fn current_block(&self) -> &'l Block<'l> { + self.blocks[self.current_block].block + } + + pub fn set_current_block(&mut self, block: &'l Block<'l>) -> Option<&'l Block<'l>> { + match std::ptr::eq(block.func, self.func) { + false => None, + true => { + let current = self.current_block(); + self.current_block = block.id as usize; + Some(current) + } + } + } + + pub fn create_block(&mut self) -> &'l Block<'l> { + let block = &*self.func.ctx.alloc.alloc(Block { + id: 0, + func: self.func, + instructions: OnceLock::new(), + }); + let builder = BlockBuilder { + block, + instructions: vec![], + }; + self.blocks.push(builder); + block + } + + pub fn build(self) -> Result<&'l FunctionBody<'l>, Cow<'l, str>> { + let mut next_id = 0..; + let mut blocks = Vec::with_capacity(self.blocks.len()); + for block in self.blocks { + let block = block.build()?; + for inst in block.instructions() { + inst.id.store(next_id.next().unwrap(), Ordering::Relaxed); + } + blocks.push(block); + } + if self.func.body.set(FunctionBody { blocks }).is_err() { + return Err("Function body already exists.".into()); + } + Ok(self.func.body.get().unwrap()) + } + + pub fn stack_alloc(&mut self, ty: Type<'l>) -> BlockBuilderResult<'l, Value<'l>> { + self.current_builder().stack_alloc(ty) + } + + pub fn store( + &mut self, + target: Value<'l>, + value: Value<'l>, + ) -> BlockBuilderResult<'l, Value<'l>> { + self.current_builder().store(target, value) + } + + pub fn load(&mut self, value: Value<'l>) -> BlockBuilderResult<'l, Value<'l>> { + self.current_builder().load(value) + } + + pub fn ret(&mut self, value: Option>) -> BlockBuilderResult<'l, Value<'l>> { + self.current_builder().ret(value) + } + + fn current_builder(&mut self) -> &mut BlockBuilder<'l> { + &mut self.blocks[self.current_block] + } +} diff --git a/assembly/src/functions/mod.rs b/assembly/src/functions/mod.rs new file mode 100644 index 0000000..dc34e54 --- /dev/null +++ b/assembly/src/functions/mod.rs @@ -0,0 +1,72 @@ +use crate::{ + context::Ctx, + functions::ir::{Block, FunctionBodyBuilder}, + types::derivations::FuncT, +}; +use std::{fmt::Debug as FmtDebug, sync::OnceLock}; + +pub mod ir; + +#[non_exhaustive] +pub struct Function<'l> { + pub ty: &'l FuncT<'l>, + pub(crate) body: OnceLock>, + + pub(crate) ctx: Ctx<'l>, +} + +impl<'l> Function<'l> { + pub fn body(&self) -> Option<&FunctionBody<'l>> { + self.body.get() + } + + pub fn create_body(&'l self) -> Option> { + match self.body() { + Some(_) => None, + None => Some(FunctionBodyBuilder::new(self)), + } + } +} + +impl FmtDebug for Function<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let body: &dyn FmtDebug = match self.body() { + Some(b) => b, + None => &None::<()>, + }; + + f.debug_struct("Function") + .field("ty", &format_args!("{}", self.ty)) + .field("body", body) + .finish_non_exhaustive() + } +} + +#[non_exhaustive] +pub struct FunctionBody<'l> { + pub blocks: Vec<&'l Block<'l>>, +} + +impl<'l> FmtDebug for FunctionBody<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + struct BlockDbg<'l>(&'l Block<'l>); + + impl FmtDebug for BlockDbg<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use std::fmt::Write; + let mut name = format!("#{}:", self.0.id); + let mut dbg = f.debug_struct(&name); + for inst in self.0.instructions() { + name.clear(); + write!(name, "%{}", inst.id())?; + dbg.field(&name, &format_args!("{:?}", inst.variant)); + } + dbg.finish() + } + } + + f.debug_set() + .entries(self.blocks.iter().map(|b| BlockDbg(b))) + .finish() + } +} diff --git a/assembly/src/lib.rs b/assembly/src/lib.rs new file mode 100644 index 0000000..d26d103 --- /dev/null +++ b/assembly/src/lib.rs @@ -0,0 +1,4 @@ +pub mod context; +pub mod functions; +pub mod types; +pub mod values; diff --git a/assembly/src/main.rs b/assembly/src/main.rs new file mode 100644 index 0000000..e0b6f14 --- /dev/null +++ b/assembly/src/main.rs @@ -0,0 +1,15 @@ +use leaf_allocators::SyncArenaAllocator; +use leaf_assembly::context::{Context, CreateConst}; +use leaf_assembly::types::derivations::MakeTypeDerivations; + +fn main() { + let allocator = SyncArenaAllocator::default(); + let context = Context::new(&allocator); + let func = context.create_function(context.u32_t().make_fn([])); + let mut builder = func.create_body().unwrap(); + builder + .ret(Some(context.create_const(42u32).into())) + .unwrap(); + builder.build().unwrap(); + println!("{func:#?}"); +} diff --git a/assembly/src/types/derivations.rs b/assembly/src/types/derivations.rs new file mode 100644 index 0000000..5cb7898 --- /dev/null +++ b/assembly/src/types/derivations.rs @@ -0,0 +1,159 @@ +use crate::types::*; +use derive_more::{Debug, Display}; +use leaf_allocators::SyncAllocator; +use scc::HashMap; +use std::{fmt::Display, hash::Hash, sync::Arc}; + +pub struct TypeDerivations<'l> { + alloc: &'l dyn SyncAllocator, + ptr_t: HashMap<(Type<'l>, bool), Type<'l>>, + ref_t: HashMap<(Type<'l>, bool), Type<'l>>, + fun_t: HashMap<(Type<'l>, Arc<[Type<'l>]>), Type<'l>>, +} + +impl<'l> TypeDerivations<'l> { + pub fn new(alloc: &'l dyn SyncAllocator) -> Self { + Self { + alloc, + ptr_t: HashMap::new(), + ref_t: HashMap::new(), + fun_t: HashMap::new(), + } + } + + pub fn make_ptr(&self, base: Type<'l>, mutable: bool) -> &'l PtrT<'l> { + let Type::Ptr(ty) = *self + .ptr_t + .entry_sync((base, mutable)) + .or_insert_with(|| (&*self.alloc.alloc(PtrT { base, mutable })).into()) + else { + unreachable!() + }; + ty + } + + pub fn make_ref(&'l self, base: Type<'l>, mutable: bool) -> &'l RefT<'l> { + let Type::Ref(ty) = *self + .ref_t + .entry_sync((base, mutable)) + .or_insert_with(|| (&*self.alloc.alloc(RefT { base, mutable })).into()) + else { + unreachable!() + }; + ty + } + + pub fn make_fn(&'l self, ret_t: Type<'l>, par_t: Arc<[Type<'l>]>) -> &'l FuncT<'l> { + let Type::Func(ty) = *self + .fun_t + .entry_sync((ret_t, par_t.clone())) + .or_insert_with(|| { + let ctx = ret_t.ctx(); + for ty in par_t.iter() { + if ty.ctx() != ctx { + panic!("All types must share the same context."); + } + } + (&*self.alloc.alloc(FuncT { ret_t, par_t })).into() + }) + else { + unreachable!() + }; + ty + } +} + +impl Eq for TypeDerivations<'_> {} + +impl PartialEq for TypeDerivations<'_> { + fn eq(&self, other: &Self) -> bool { + std::ptr::eq(self, other) + } +} + +impl Hash for TypeDerivations<'_> { + fn hash(&self, state: &mut H) { + (self as *const Self).hash(state); + } +} + +#[non_exhaustive] +#[derive(Debug, Display, PartialEq, Eq, Hash)] +#[display("*{}{}", if *mutable { "mut " } else { "" }, *base)] +pub struct PtrT<'l> { + #[debug("{base}")] + pub base: Type<'l>, + pub mutable: bool, +} + +impl<'l> Into> for &'l PtrT<'l> { + fn into(self) -> Type<'l> { + Type::Ptr(self) + } +} + +#[non_exhaustive] +#[derive(Debug, Display, PartialEq, Eq, Hash)] +#[display("&{}{}", if *mutable { "mut " } else { "" }, *base)] +pub struct RefT<'l> { + #[debug("{base}")] + pub base: Type<'l>, + pub mutable: bool, +} + +impl<'l> Into> for &'l RefT<'l> { + fn into(self) -> Type<'l> { + Type::Ref(self) + } +} + +#[non_exhaustive] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct FuncT<'l> { + pub ret_t: Type<'l>, + pub par_t: Arc<[Type<'l>]>, +} + +impl Display for FuncT<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut separator = ""; + write!(f, "fn(")?; + for ty in &*self.par_t { + write!(f, "{separator}{ty}")?; + separator = ", "; + } + write!(f, ") -> {}", self.ret_t)?; + Ok(()) + } +} + +impl<'l> Into> for &'l FuncT<'l> { + fn into(self) -> Type<'l> { + Type::Func(self) + } +} + +pub trait MakeTypeDerivations<'l> { + fn make_ptr(self, mutable: bool) -> &'l PtrT<'l>; + fn make_ref(self, mutable: bool) -> &'l RefT<'l>; + fn make_fn(self, par_t: impl IntoIterator>) -> &'l FuncT<'l>; +} + +impl<'l, T: 'l + Into>> MakeTypeDerivations<'l> for T { + fn make_ptr(self, mutable: bool) -> &'l PtrT<'l> { + let ty = self.into(); + ty.ctx().derivations.make_ptr(ty, mutable) + } + + fn make_ref(self, mutable: bool) -> &'l RefT<'l> { + let ty = self.into(); + ty.ctx().derivations.make_ref(ty, mutable) + } + + fn make_fn(self, par_t: impl IntoIterator>) -> &'l FuncT<'l> { + let ty = self.into(); + ty.ctx() + .derivations + .make_fn(ty, par_t.into_iter().collect()) + } +} diff --git a/assembly/src/types/intrinsics.rs b/assembly/src/types/intrinsics.rs new file mode 100644 index 0000000..2562bb8 --- /dev/null +++ b/assembly/src/types/intrinsics.rs @@ -0,0 +1,98 @@ +use crate::{context::Ctx, types::Type}; +use derive_more::{Debug, Display}; + +#[derive(Debug, Display, PartialEq, Eq, Hash)] +#[display("type")] +#[debug("type")] +pub struct TypeT<'l> { + pub(crate) ctx: Ctx<'l>, +} + +impl<'l> Into> for &'l TypeT<'l> { + fn into(self) -> Type<'l> { + Type::Type(self) + } +} + +#[derive(Debug, Display, PartialEq, Eq, Hash)] +#[display("void")] +#[debug("void")] +pub struct VoidT<'l> { + pub(crate) ctx: Ctx<'l>, +} + +impl<'l> Into> for &'l VoidT<'l> { + fn into(self) -> Type<'l> { + Type::Void(self) + } +} + +#[derive(Debug, Display, PartialEq, Eq, Hash)] +#[display("char")] +#[debug("char")] +pub struct CharT<'l> { + pub(crate) ctx: Ctx<'l>, +} + +impl<'l> Into> for &'l CharT<'l> { + fn into(self) -> Type<'l> { + Type::Char(self) + } +} + +#[derive(Debug, Display, PartialEq, Eq, Hash)] +#[display("bool")] +#[debug("bool")] +pub struct BoolT<'l> { + pub(crate) ctx: Ctx<'l>, +} + +impl<'l> Into> for &'l BoolT<'l> { + fn into(self) -> Type<'l> { + Type::Bool(self) + } +} + +#[derive(Debug, Display, PartialEq, Eq, Hash)] +#[display("const_str")] +#[debug("const_str")] +pub struct ConstStrT<'l> { + pub(crate) ctx: Ctx<'l>, +} + +impl<'l> Into> for &'l ConstStrT<'l> { + fn into(self) -> Type<'l> { + Type::ConstStr(self) + } +} + +#[non_exhaustive] +#[derive(Debug, Display, PartialEq, Eq, Hash)] +#[display("{}{}", if *signed { "i" } else { "u" }, precision)] +#[debug("{self}")] +pub struct IntT<'l> { + pub signed: bool, + pub precision: u32, + pub(crate) ctx: Ctx<'l>, +} + +impl<'l> Into> for &'l IntT<'l> { + fn into(self) -> Type<'l> { + Type::Int(self) + } +} + +#[non_exhaustive] +#[derive(Debug, Display, PartialEq, Eq, Hash)] +#[display("f{}", precision)] +#[debug("{self}")] +pub struct FloatT<'l> { + pub precision: u32, + pub(crate) ctx: Ctx<'l>, +} + +impl<'l> Into> for &'l FloatT<'l> { + fn into(self) -> Type<'l> { + Type::Float(self) + } +} diff --git a/assembly/src/types/mod.rs b/assembly/src/types/mod.rs new file mode 100644 index 0000000..7501cec --- /dev/null +++ b/assembly/src/types/mod.rs @@ -0,0 +1,61 @@ +pub mod derivations; +pub mod intrinsics; + +use crate::{ + context::Ctx, + types::{derivations::*, intrinsics::*}, +}; +use derive_more::{Debug, Display}; + +#[non_exhaustive] +#[derive(Debug, Display, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Type<'l> { + Void(&'l VoidT<'l>), + Char(&'l CharT<'l>), + Bool(&'l BoolT<'l>), + + #[debug("{_0:?}")] + #[display("{_0}")] + Int(&'l IntT<'l>), + + #[debug("{_0:?}")] + #[display("{_0}")] + Float(&'l FloatT<'l>), + + #[debug("{_0:?}")] + #[display("{_0}")] + Ptr(&'l PtrT<'l>), + + #[debug("{_0:?}")] + #[display("{_0}")] + Ref(&'l RefT<'l>), + + #[debug("{_0:?}")] + #[display("{_0}")] + Func(&'l FuncT<'l>), + + #[debug("{_0:?}")] + #[display("{_0}")] + Type(&'l TypeT<'l>), + + #[debug("{_0:?}")] + #[display("{_0}")] + ConstStr(&'l ConstStrT<'l>), +} + +impl<'l> Type<'l> { + pub fn ctx(&self) -> Ctx<'l> { + match self { + Type::Void(t) => t.ctx, + Type::Char(t) => t.ctx, + Type::Bool(t) => t.ctx, + Type::Int(t) => t.ctx, + Type::Float(t) => t.ctx, + Type::Ptr(t) => t.base.ctx(), + Type::Ref(t) => t.base.ctx(), + Type::Func(t) => t.ret_t.ctx(), + Type::Type(t) => t.ctx, + Type::ConstStr(t) => t.ctx, + } + } +} diff --git a/assembly/src/values/mod.rs b/assembly/src/values/mod.rs new file mode 100644 index 0000000..f4724e9 --- /dev/null +++ b/assembly/src/values/mod.rs @@ -0,0 +1,150 @@ +use crate::{context::Ctx, functions::ir::Instruction, types::Type}; +use bitflags::bitflags; +use derive_more::{Debug, Display}; +use half::f16; +use std::hash::Hash; + +bitflags! { + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct ValueFlags: u16 { + const Mutable = 0b00000000_00000001; + const Volatile = 0b00000000_00000010; + const Const = 0b00000001_00000000; + const ConstOnly = 0b00000011_00000000; + const Type = 0b00000111_00000000; + } +} + +#[derive(Debug, Display, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Int { + I8(i8), + I16(i16), + I32(i32), + I64(i64), + I128(i128), + + U8(u8), + U16(u16), + U32(u32), + U64(u64), + U128(u128), +} + +#[derive(Debug, Display, Clone, Copy, PartialEq)] +pub enum Float { + F16(f16), + F32(f32), + F64(f64), +} + +impl Eq for Float {} + +impl Hash for Float { + fn hash(&self, state: &mut H) { + match self { + Float::F16(v) => v.to_ne_bytes().hash(state), + Float::F32(v) => v.to_ne_bytes().hash(state), + Float::F64(v) => v.to_ne_bytes().hash(state), + } + } +} + +#[derive(Debug, Display, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Const<'l> { + #[display("()")] + #[debug("()")] + Void(Ctx<'l>), + + #[debug("{:?}", _0)] + #[display("{}", _0)] + Bool(bool, Ctx<'l>), + + #[debug("{:?}", _0)] + #[display("{}", _0)] + Char(char, Ctx<'l>), + + #[debug("{:?}", _0)] + #[display("\"{}\"", _0)] + Str(&'l str, Ctx<'l>), + + #[debug("{:?}", _0)] + #[display("{}", _0)] + Int(Int, Ctx<'l>), + + #[debug("{:?}", _0)] + #[display("{}", _0)] + Float(Float, Ctx<'l>), +} + +impl<'l> Const<'l> { + pub fn flags(&self) -> ValueFlags { + match self { + Const::Void(_) => ValueFlags::Const, + Const::Char(_, _) => ValueFlags::Const, + Const::Bool(_, _) => ValueFlags::Const, + Const::Str(_, _) => ValueFlags::Const, + Const::Int(_, _) => ValueFlags::Const, + Const::Float(_, _) => ValueFlags::Const, + } + } + + pub fn ty(&self) -> Type<'l> { + match self { + Const::Void(ctx) => ctx.void_t(), + Const::Char(_, ctx) => ctx.char_t(), + Const::Bool(_, ctx) => ctx.bool_t(), + Const::Str(_, ctx) => ctx.const_str_t(), + Const::Int(v, ctx) => match v { + Int::I8(_) => ctx.i8_t(), + Int::I16(_) => ctx.i16_t(), + Int::I32(_) => ctx.i32_t(), + Int::I64(_) => ctx.i64_t(), + Int::I128(_) => ctx.i128_t(), + Int::U8(_) => ctx.u8_t(), + Int::U16(_) => ctx.u16_t(), + Int::U32(_) => ctx.u32_t(), + Int::U64(_) => ctx.u64_t(), + Int::U128(_) => ctx.u128_t(), + }, + Const::Float(v, ctx) => match v { + Float::F16(_) => ctx.f16_t(), + Float::F32(_) => ctx.f32_t(), + Float::F64(_) => ctx.f64_t(), + }, + } + } +} + +impl<'l> Into> for &'l Const<'l> { + fn into(self) -> Value<'l> { + Value::Const(self) + } +} + +#[derive(Debug, Display, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Value<'l> { + #[display("{}", _0)] + Const(&'l Const<'l>), + #[display("{}", _0)] + Type(Type<'l>), + #[display("%{}", _0.id())] + Instruction(&'l Instruction<'l>), +} + +impl<'l> Value<'l> { + pub fn flags(&self) -> ValueFlags { + match self { + Value::Const(c) => c.flags(), + Value::Type(_) => ValueFlags::Type, + Value::Instruction(i) => i.value_flags(), + } + } + + pub fn ty(&self) -> Type<'l> { + match self { + Value::Const(v) => v.ty(), + Value::Type(v) => v.ctx().type_t(), + Value::Instruction(v) => v.value_ty(), + } + } +} diff --git a/parsing/.gitignore b/parsing/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/parsing/.gitignore @@ -0,0 +1 @@ +/target diff --git a/parsing/Cargo.toml b/parsing/Cargo.toml new file mode 100644 index 0000000..7e68ae6 --- /dev/null +++ b/parsing/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "leaf_parsing" +version = "0.1.0" +edition = "2024" + +[dependencies] +arcstr = "1.2.0" +chumsky = { version = "0.11.1", features = ["pratt"] } +logos = "0.15.1" diff --git a/parsing/src/ast.rs b/parsing/src/ast.rs new file mode 100644 index 0000000..a5d6078 --- /dev/null +++ b/parsing/src/ast.rs @@ -0,0 +1,93 @@ +use crate::tokenizer::Token; + +#[derive(Debug, Clone)] +#[non_exhaustive] +pub struct AstRoot { + pub imports: Vec, + pub decls: Vec, +} + +#[derive(Debug, Clone)] +#[non_exhaustive] +pub struct Ident { + pub tok_ident: Token, +} + +#[derive(Debug, Clone)] +#[non_exhaustive] +pub struct Import { + pub tok_pub: Option, + pub tok_import: Token, + pub value: Value, +} + +#[derive(Debug, Clone)] +#[non_exhaustive] +pub struct Struct { + pub tok_struct: Token, + pub tok_open_curly: Token, + pub fields: Vec, + pub tok_close_curly: Token, +} + +#[derive(Debug, Clone)] +#[non_exhaustive] +pub struct StructField { + pub ident: Ident, + pub tok_colon: Token, + pub r#type: Value, +} + +pub enum Number { + Integer { value: i128, token: Token }, + Decimal { value: f64, token: Token }, +} + +#[derive(Debug, Clone)] +#[non_exhaustive] +pub enum Value { + Ident(Ident), + Struct(Struct), + Statement(Statement), + + BinaryOp { + lhs: Box, + tok_op: Token, + rhs: Box, + }, +} + +#[derive(Debug, Clone)] +#[non_exhaustive] +pub enum Statement { + While(While), + Return(Return), + ConstDecl(ConstDecl), +} + +#[derive(Debug, Clone)] +#[non_exhaustive] +pub struct ConstDecl { + pub name: Ident, + pub tok_colon_t: Token, + pub r#type: Option>, + pub tok_colon_v: Token, + pub value: Box, +} + +#[derive(Debug, Clone)] +#[non_exhaustive] +pub struct While { + pub tok_while: Token, + pub condition: Box, + pub tok_open_curly: Token, + pub statements: Vec, + pub tok_close_curly: Token, +} + +#[derive(Debug, Clone)] +#[non_exhaustive] +pub struct Return { + pub tok_return: Token, + pub value: Box, +} diff --git a/parsing/src/lib.rs b/parsing/src/lib.rs new file mode 100644 index 0000000..1abcb7c --- /dev/null +++ b/parsing/src/lib.rs @@ -0,0 +1,11 @@ +use arcstr::Substr; +use std::path::PathBuf; + +pub mod ast; +pub mod parser; +pub mod tokenizer; + +pub struct SourceCode { + pub text: Substr, + pub file: PathBuf, +} diff --git a/parsing/src/main.rs b/parsing/src/main.rs new file mode 100644 index 0000000..40dba9e --- /dev/null +++ b/parsing/src/main.rs @@ -0,0 +1,25 @@ +use std::path::PathBuf; + +use arcstr::literal_substr; +use chumsky::{Parser, input::Stream}; +use leaf_parsing::{SourceCode, tokenizer::tokenize}; + +fn main() { + let source = SourceCode { + text: literal_substr!( + " + import leaf.intrinsics; + import leaf.values; + + Vector3 :: struct { + x: 2, + } + " + ), + file: PathBuf::from("test.leaf"), + }; + let tokens = tokenize(&source).unwrap(); + let stream = Stream::from_iter(tokens.iter().cloned()); + let ast = leaf_parsing::parser::ast_root().parse(stream); + println!("{ast:#?}") +} diff --git a/parsing/src/parser.rs b/parsing/src/parser.rs new file mode 100644 index 0000000..cfd2f54 --- /dev/null +++ b/parsing/src/parser.rs @@ -0,0 +1,147 @@ +use crate::{ast::*, tokenizer::*}; +use chumsky::{ + extra::Err, + input::Stream, + pratt::{infix, left}, + prelude::*, + util::Maybe, +}; + +pub type Tokens = Stream; + +pub fn parse<'l>(tokens: Vec) -> ParseResult> { + let stream = Stream::from_iter(tokens); + ast_root().parse(stream) +} + +macro_rules! make_parsers { + ( + $( + $vis:vis fn $name:ident() -> $type:ty { + $($tt:tt)* + } + )* + ) => { + $( + $vis fn $name<'l, T: Iterator + 'l>() -> impl Parser<'l, Tokens, $type, Err>> { + $($tt)* + } + )* + }; +} + +make_parsers! { + pub fn ident() -> Ident { + tok(TokenKind::Ident).map(|tok_ident| Ident { tok_ident }) + } + + pub fn value() -> Value { + let atom = choice(( + choice(( + ident().map(Value::Ident), + type_struct().map(Value::Struct), + )), + statement_val(), + )); + atom.pratt(( + infix(left(1), tok(TokenKind::Add), |lhs, tok_op, rhs, _| Value::BinaryOp { lhs: Box::new(lhs), tok_op, rhs: Box::new(rhs) }), + infix(left(1), tok(TokenKind::Sub), |lhs, tok_op, rhs, _| Value::BinaryOp { lhs: Box::new(lhs), tok_op, rhs: Box::new(rhs) }), + infix(left(2), tok(TokenKind::Mul), |lhs, tok_op, rhs, _| Value::BinaryOp { lhs: Box::new(lhs), tok_op, rhs: Box::new(rhs) }), + infix(left(2), tok(TokenKind::Div), |lhs, tok_op, rhs, _| Value::BinaryOp { lhs: Box::new(lhs), tok_op, rhs: Box::new(rhs) }), + infix(left(2), tok(TokenKind::Mod), |lhs, tok_op, rhs, _| Value::BinaryOp { lhs: Box::new(lhs), tok_op, rhs: Box::new(rhs) }), + infix(left(3), tok(TokenKind::Period), |lhs, tok_op, rhs, _| Value::BinaryOp { lhs: Box::new(lhs), tok_op, rhs: Box::new(rhs) }), + )).boxed() + } + + pub fn type_struct() -> Struct { + let struct_fld = ident() + .then(tok(TokenKind::Colon)) + .then(value()) + .map(|((ident, tok_colon), r#type)| StructField { ident, tok_colon, r#type }); + + tok(TokenKind::Struct) + .then(tok(TokenKind::OpenCurly)) + .then(struct_fld.separated_by(tok(TokenKind::Comma)).allow_trailing().collect()) + .then(tok(TokenKind::CloseCurly)) + .map(|(((tok_struct, tok_open_curly), fields), tok_close_curly)| { + Struct { tok_struct, tok_open_curly, fields, tok_close_curly } + }) + } + + pub fn statement() -> Statement { + choice(( + const_decl().map(Statement::ConstDecl), + statement_while().map(Statement::While), + statement_return().map(Statement::Return), + )).boxed() + } + + pub fn statement_val() -> Value { + choice(( + statement_while().map(Statement::While), + statement_return().map(Statement::Return), + )).map(Value::Statement) + } + + pub fn statement_return() -> Return { + tok(TokenKind::Return) + .then(value()) + .map(|(tok_return, value)| Return { tok_return, value: Box::new(value) }) + } + + pub fn statement_while() -> While { + tok(TokenKind::While) + .then(value()) + .then(tok(TokenKind::OpenCurly)) + .then(statement().repeated().collect()) + .then(tok(TokenKind::CloseCurly)) + .map(| ((((tok_while, condition), tok_open_curly), statements), tok_close_curly)| While { + tok_while, condition: Box::new(condition), tok_open_curly, statements, tok_close_curly + }) + } + + pub fn const_decl() -> ConstDecl { + ident() + .then(tok(TokenKind::Colon)) + .then(value().or_not()) + .then(tok(TokenKind::Colon)) + .then(value()) + .map(|((((name, tok_colon_t), r#type), tok_colon_v), value)| ConstDecl { + name, + tok_colon_t, + r#type: r#type.map(Box::new), + tok_colon_v, + value: Box::new(value), + }) + } + + pub fn import() -> Import { + tok(TokenKind::Pub).or_not() + .then(tok(TokenKind::Import)) + .then(value()) + .map(|((tok_pub, tok_import), value)| Import { tok_pub, tok_import, value }) + } + + pub fn ast_root() -> AstRoot { + let imports = import().then_ignore(tok(TokenKind::Semicolon)).repeated().collect::>(); + let decls = const_decl().repeated().collect::>(); + imports.then(decls).map(|(imports, decls)| AstRoot { imports, decls }) + } +} + +#[inline(always)] +fn tok<'l, T: Iterator + 'l>( + k: TokenKind, +) -> impl Parser<'l, Tokens, Token, Err>> { + custom(move |inp| { + let before = inp.cursor(); + match inp.next() { + Some(token @ Token { kind, .. }) if kind == k => Ok(token), + None => Err(Simple::new(None, inp.span_since(&before))), + Some(token) => Err(Simple::new( + Some(Maybe::Val(token)), + inp.span_since(&before), + )), + } + }) +} diff --git a/parsing/src/tokenizer.rs b/parsing/src/tokenizer.rs new file mode 100644 index 0000000..770da6a --- /dev/null +++ b/parsing/src/tokenizer.rs @@ -0,0 +1,161 @@ +use arcstr::Substr; +use logos::{Logos, Source, source::Chunk}; +use std::ops::Range; + +use crate::SourceCode; + +#[derive(Logos, Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[logos(source = SourceCode)] +pub enum TokenKind { + #[regex(r"[ \t\n\f\r]+", logos::skip)] + Whitespace, + + #[token("fn")] + Fn, + #[token("enum")] + Enum, + #[token("union")] + Union, + #[token("struct")] + Struct, + #[token("interface")] + Interface, + + #[token("pub")] + Pub, + #[token("mut")] + Mut, + #[token("for")] + For, + #[token("while")] + While, + #[token("yield")] + Yield, + #[token("import")] + Import, + #[token("return")] + Return, + #[token("continue")] + Continue, + + #[token("+")] + Add, + #[token("-")] + Sub, + #[token("*")] + Mul, + #[token("/")] + Div, + #[token("%")] + Mod, + #[token(":")] + Colon, + #[token(",")] + Comma, + #[token(".")] + Period, + #[token(";")] + Semicolon, + + #[token("=")] + Eq, + #[token("==")] + CmpEq, + #[token("!=")] + CmpNe, + #[token("<")] + CmpLt, + #[token("<=")] + CmpLe, + #[token(">")] + CmpGt, + #[token(">=")] + CmpGe, + + #[token("(")] + OpenRound, + #[token("[")] + OpenSquare, + #[token("{")] + OpenCurly, + #[token(")")] + CloseRound, + #[token("]")] + CloseSquare, + #[token("}")] + CloseCurly, + + #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")] + Ident, + #[regex(r"[0-9]+")] + #[regex(r"[0-9]+\.[0-9]+")] + Number, +} + +impl Source for SourceCode { + type Slice<'a> = Substr; + + #[inline] + fn len(&self) -> usize { + Source::len(self.text.as_str()) + } + + #[inline] + fn read<'a, Chunk>(&'a self, offset: usize) -> Option + where + Chunk: self::Chunk<'a>, + { + Source::read(self.text.as_str(), offset) + } + + #[inline] + unsafe fn read_byte_unchecked(&self, offset: usize) -> u8 { + unsafe { Source::read_byte_unchecked(self.text.as_str(), offset) } + } + + #[inline] + fn slice(&self, range: std::ops::Range) -> Option> { + _ = self.text.get(range.clone())?; + Some(self.text.substr(range)) + } + + #[inline] + unsafe fn slice_unchecked(&self, range: std::ops::Range) -> Self::Slice<'_> { + self.text.substr(range) + } + + #[inline] + fn is_boundary(&self, index: usize) -> bool { + Source::is_boundary(self.text.as_str(), index) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Token { + pub text: Substr, + pub kind: TokenKind, +} + +impl PartialEq for Token { + #[inline] + fn eq(&self, other: &TokenKind) -> bool { + self.kind == *other + } +} + +pub fn tokenize(source: &SourceCode) -> Result, Range> { + let mut tokens = Vec::with_capacity(1024); + let mut lexer = TokenKind::lexer(&source).spanned(); + while let Some((token, span)) = lexer.next() { + match token { + Ok(kind) => tokens.push(Token { + kind, + text: source.text.substr(span), + }), + Err(()) => { + return Err(lexer.slice().range()); + } + } + } + Ok(tokens) +} diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..9a2357f --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,4 @@ +[toolchain] +channel = "1.91.0" +components = ["clippy", "rustfmt"] +profile = "default" diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..218e203 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1 @@ +hard_tabs = true