From eb3c5fa06bb2071a38713db5245806151289897b Mon Sep 17 00:00:00 2001 From: Kennedy Tedesco Date: Wed, 8 Feb 2023 09:43:15 -0300 Subject: [PATCH 1/5] new: source content is now optional --- Cargo.toml | 1 + src/hash.rs | 22 ++++++++++ src/lib.rs | 7 ++-- src/loader.rs | 4 +- src/source.rs | 110 ++++++++++++++++++++++++++++++++++---------------- 5 files changed, 105 insertions(+), 39 deletions(-) create mode 100644 src/hash.rs diff --git a/Cargo.toml b/Cargo.toml index 2b50bc0..225ca56 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,3 +15,4 @@ keywords = ["ara", "file-source", "source-map"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +rustc-hash = { version = "1.1.0" } diff --git a/src/hash.rs b/src/hash.rs new file mode 100644 index 0000000..5a45e50 --- /dev/null +++ b/src/hash.rs @@ -0,0 +1,22 @@ +use std::hash::Hasher; + +pub trait ContentHasher: Send + Sync { + fn hash(&self, content: &str) -> u64; +} + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub struct FxHasher; + +impl FxHasher { + pub fn new() -> Self { + Self + } +} + +impl ContentHasher for FxHasher { + fn hash(&self, content: &str) -> u64 { + let mut hasher = rustc_hash::FxHasher::default(); + hasher.write(content.as_bytes()); + hasher.finish() + } +} diff --git a/src/lib.rs b/src/lib.rs index 41bebc7..f67865e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,7 @@ use crate::error::Error; use crate::source::Source; pub mod error; +pub(crate) mod hash; pub mod loader; pub mod source; @@ -62,13 +63,13 @@ mod tests { map.add(Source::new( SourceKind::Script, + "/Documents/Project", "foo.ara", - "function foo(): void {}", )); map.add(Source::new( SourceKind::Script, + "/Documents/Project", "bar.ara", - "function bar(): void {}", )); assert_eq!(map.get(1).unwrap().origin, Some("foo.ara".to_string())); @@ -89,8 +90,8 @@ mod tests { other.add(Source::new( SourceKind::Script, + "/Documents/Project", "baz.ara", - "function baz(): void {}", )); map.merge(&mut other); diff --git a/src/loader.rs b/src/loader.rs index 66d8b62..ed2a9af 100644 --- a/src/loader.rs +++ b/src/loader.rs @@ -109,18 +109,18 @@ impl SourceLoader for FileSourceLoader { file.to_path_buf() }; - let content = std::fs::read_to_string(&file)?; let origin = file .strip_prefix(&self.root) .map(|path| path.to_string_lossy()) .unwrap(); + let kind = if origin.ends_with(ARA_DEFINTION_EXTENSION) { SourceKind::Definition } else { SourceKind::Script }; - Ok(SourceMap::new(vec![Source::new(kind, origin, content)])) + Ok(SourceMap::new(vec![Source::new(kind, &self.root, origin)])) } } diff --git a/src/source.rs b/src/source.rs index 5368e32..b629af6 100644 --- a/src/source.rs +++ b/src/source.rs @@ -1,20 +1,36 @@ +use std::fs; +use std::path::PathBuf; + +use crate::hash::ContentHasher; +use crate::hash::FxHasher; + pub const DEFAULT_NAME: &str = ""; #[derive(Debug, PartialEq, Eq, Clone, Copy)] pub enum SourceKind { /// A definition is a piece of code that is not executed, but can be used - /// to define foriegn symbols ( e.g from PHP ). + /// to define foreign symbols ( e.g from PHP ). Definition, /// A script is a piece of code that is executed. Script, } +pub trait SourceTrait { + /// Reads the source content. + fn content(&mut self) -> std::io::Result; + + /// Returns the source content hash. + fn hash(&mut self) -> std::io::Result; +} + #[derive(Debug, PartialEq, Eq, Clone)] pub struct Source { pub kind: SourceKind, + pub root: Option, pub origin: Option, - pub content: String, + pub content: Option, + hasher: FxHasher, } /// A source. @@ -27,40 +43,24 @@ pub struct Source { /// use ara_source::source::Source; /// use ara_source::source::SourceKind; /// -/// let source = Source::new(SourceKind::Script, "main.ara", "function main(): void {}"); +/// let source = Source::new(SourceKind::Script, "/Documents/Project", "src/main.ara"); /// /// assert_eq!(source.kind, SourceKind::Script); -/// assert_eq!(source.origin, Some("main.ara".to_string())); -/// assert_eq!(source.content, "function main(): void {}"); +/// assert_eq!(source.origin, Some("src/main.ara".to_string())); +/// assert_eq!(source.root, Some("/Documents/Project".into())); +/// assert_eq!(source.content, None); /// -/// assert_eq!(source.name(), "main.ara"); +/// assert_eq!(source.name(), "src/main.ara"); /// ``` impl Source { - /// Create a new source with the given content. - /// - /// Example: - /// - /// ```rust - /// use ara_source::source::Source; - /// use ara_source::source::SourceKind; - /// - /// let source = Source::inline(SourceKind::Definition, "function main(): void {}"); - /// - /// assert_eq!(source.kind, SourceKind::Definition); - /// assert_eq!(source.origin, None); - /// assert_eq!(source.content, "function main(): void {}"); - /// - /// assert_eq!(source.name(), ""); - /// ``` - pub fn new, C: Into>( - kind: SourceKind, - origin: O, - content: C, - ) -> Source { + /// Create a new source with the given origin. + pub fn new, R: Into>(kind: SourceKind, root: R, origin: O) -> Source { Source { kind, + root: Some(root.into()), origin: Some(origin.into()), - content: content.into(), + content: None, + hasher: FxHasher::new(), } } @@ -75,14 +75,19 @@ impl Source { /// let source = Source::inline(SourceKind::Definition, "function main(): void {}"); /// /// assert_eq!(source.kind, SourceKind::Definition); + /// assert_eq!(source.root, None); /// assert_eq!(source.origin, None); - /// assert_eq!(source.content, "function main(): void {}"); + /// assert_eq!(source.content, Some("function main(): void {}".to_string())); + /// + /// assert_eq!(source.name(), ""); /// ``` pub fn inline>(kind: SourceKind, content: C) -> Source { Source { kind, + root: None, origin: None, - content: content.into(), + content: Some(content.into()), + hasher: FxHasher::new(), } } @@ -97,16 +102,53 @@ impl Source { /// use ara_source::source::Source; /// use ara_source::source::SourceKind; /// - /// let source = Source::new(SourceKind::Definition, "main.ara", "function main(): void {}"); - /// assert_eq!(source.name(), "main.ara"); + /// let source = Source::new(SourceKind::Definition, "/Documents/Project", "src/Foo/main.ara"); + /// assert_eq!(source.name(), "src/Foo/main.ara"); /// /// let source = Source::inline(SourceKind::Definition, "function main(): void {}"); /// assert_eq!(source.name(), ""); /// ``` pub fn name(&self) -> &str { - match self.origin { - Some(ref origin) => origin, + match &self.origin { + Some(origin) => origin, None => DEFAULT_NAME, } } + + /// Returns the complete path of the source. + /// + /// Example: + /// + /// ```rust + /// use ara_source::source::Source; + /// use ara_source::source::SourceKind; + /// + /// let source = Source::new(SourceKind::Definition, "/Documents/Project", "src/Foo/main.ara"); + /// assert_eq!(source.source_path(), Some("/Documents/Project/src/Foo/main.ara".into())); + /// ``` + pub fn source_path(&self) -> Option { + self.root + .as_ref() + .map(|root| root.join(self.origin.as_ref().unwrap())) + } +} + +impl SourceTrait for Source { + fn content(&mut self) -> std::io::Result { + let path = self + .source_path() + .expect("Both root and origin must be present in order to read the source content"); + + fs::read_to_string(path) + } + + fn hash(&mut self) -> std::io::Result { + if self.content.is_some() { + return Ok(self.hasher.hash(self.content.as_ref().unwrap())); + } + + let content = self.content()?; + + Ok(self.hasher.hash(&content)) + } } From 06dd184611e18964c101750cd5abae56ebe1323f Mon Sep 17 00:00:00 2001 From: Kennedy Tedesco Date: Wed, 8 Feb 2023 20:12:39 -0300 Subject: [PATCH 2/5] no need to be mutable --- src/source.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/source.rs b/src/source.rs index b629af6..09c5acd 100644 --- a/src/source.rs +++ b/src/source.rs @@ -18,10 +18,10 @@ pub enum SourceKind { pub trait SourceTrait { /// Reads the source content. - fn content(&mut self) -> std::io::Result; + fn content(&self) -> std::io::Result; /// Returns the source content hash. - fn hash(&mut self) -> std::io::Result; + fn hash(&self) -> std::io::Result; } #[derive(Debug, PartialEq, Eq, Clone)] @@ -134,7 +134,7 @@ impl Source { } impl SourceTrait for Source { - fn content(&mut self) -> std::io::Result { + fn content(&self) -> std::io::Result { let path = self .source_path() .expect("Both root and origin must be present in order to read the source content"); @@ -142,7 +142,7 @@ impl SourceTrait for Source { fs::read_to_string(path) } - fn hash(&mut self) -> std::io::Result { + fn hash(&self) -> std::io::Result { if self.content.is_some() { return Ok(self.hasher.hash(self.content.as_ref().unwrap())); } From 8aab9a832353930e03ecff71fd831e77332692e2 Mon Sep 17 00:00:00 2001 From: Kennedy Tedesco Date: Wed, 8 Feb 2023 20:50:45 -0300 Subject: [PATCH 3/5] if it is an inline source, returns its content --- src/source.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/source.rs b/src/source.rs index 09c5acd..e0689a5 100644 --- a/src/source.rs +++ b/src/source.rs @@ -135,6 +135,10 @@ impl Source { impl SourceTrait for Source { fn content(&self) -> std::io::Result { + if self.content.is_some() { + return Ok(self.content.as_ref().unwrap().clone()); + } + let path = self .source_path() .expect("Both root and origin must be present in order to read the source content"); From eda602c4079daf9120953921da4e565732c1f589 Mon Sep 17 00:00:00 2001 From: Kennedy Tedesco Date: Thu, 16 Feb 2023 18:36:25 -0300 Subject: [PATCH 4/5] Remove SourceTrait and make content() stores the content --- src/hash.rs | 6 ++++++ src/lib.rs | 2 +- src/source.rs | 42 ++++++++++++++++++++---------------------- 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/src/hash.rs b/src/hash.rs index 5a45e50..34c002e 100644 --- a/src/hash.rs +++ b/src/hash.rs @@ -13,6 +13,12 @@ impl FxHasher { } } +impl Default for FxHasher { + fn default() -> Self { + Self::new() + } +} + impl ContentHasher for FxHasher { fn hash(&self, content: &str) -> u64 { let mut hasher = rustc_hash::FxHasher::default(); diff --git a/src/lib.rs b/src/lib.rs index f67865e..9232695 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,7 +2,7 @@ use crate::error::Error; use crate::source::Source; pub mod error; -pub(crate) mod hash; +pub mod hash; pub mod loader; pub mod source; diff --git a/src/source.rs b/src/source.rs index e0689a5..3476e9d 100644 --- a/src/source.rs +++ b/src/source.rs @@ -1,5 +1,6 @@ use std::fs; use std::path::PathBuf; +use std::sync::Arc; use crate::hash::ContentHasher; use crate::hash::FxHasher; @@ -16,20 +17,12 @@ pub enum SourceKind { Script, } -pub trait SourceTrait { - /// Reads the source content. - fn content(&self) -> std::io::Result; - - /// Returns the source content hash. - fn hash(&self) -> std::io::Result; -} - #[derive(Debug, PartialEq, Eq, Clone)] pub struct Source { pub kind: SourceKind, pub root: Option, pub origin: Option, - pub content: Option, + pub content: Option>, hasher: FxHasher, } @@ -77,7 +70,7 @@ impl Source { /// assert_eq!(source.kind, SourceKind::Definition); /// assert_eq!(source.root, None); /// assert_eq!(source.origin, None); - /// assert_eq!(source.content, Some("function main(): void {}".to_string())); + /// assert_eq!(source.content.as_ref().unwrap().as_str(), "function main(): void {}"); /// /// assert_eq!(source.name(), ""); /// ``` @@ -86,7 +79,7 @@ impl Source { kind, root: None, origin: None, - content: Some(content.into()), + content: Some(Arc::new(content.into())), hasher: FxHasher::new(), } } @@ -131,28 +124,33 @@ impl Source { .as_ref() .map(|root| root.join(self.origin.as_ref().unwrap())) } -} -impl SourceTrait for Source { - fn content(&self) -> std::io::Result { - if self.content.is_some() { - return Ok(self.content.as_ref().unwrap().clone()); + /// Returns the content of the source. + /// If the source has no content, the content is read from the file system. + pub fn content(&mut self) -> std::io::Result> { + if let Some(content) = self.content.as_ref() { + return Ok(content.clone()); } let path = self .source_path() .expect("Both root and origin must be present in order to read the source content"); - fs::read_to_string(path) - } + let content = Arc::new(fs::read_to_string(path)?); + self.content = Some(content.clone()); - fn hash(&self) -> std::io::Result { - if self.content.is_some() { - return Ok(self.hasher.hash(self.content.as_ref().unwrap())); - } + Ok(content) + } + /// Returns the hash of the source content. + pub fn hash(&mut self) -> std::io::Result { let content = self.content()?; Ok(self.hasher.hash(&content)) } + + /// Dispose the content of the source. + pub fn dispose_content(&mut self) { + self.content = None; + } } From ea1097442c887871a8221ec4ce6009c829ddca46 Mon Sep 17 00:00:00 2001 From: Kennedy Tedesco Date: Sat, 25 Feb 2023 11:22:40 -0300 Subject: [PATCH 5/5] use BufReader --- src/source.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/source.rs b/src/source.rs index 3476e9d..9db0bba 100644 --- a/src/source.rs +++ b/src/source.rs @@ -1,4 +1,6 @@ use std::fs; +use std::io::BufReader; +use std::io::Read; use std::path::PathBuf; use std::sync::Arc; @@ -136,10 +138,14 @@ impl Source { .source_path() .expect("Both root and origin must be present in order to read the source content"); - let content = Arc::new(fs::read_to_string(path)?); - self.content = Some(content.clone()); + let mut reader = BufReader::new(fs::File::open(path)?); + let mut file_contents = String::new(); + reader.read_to_string(&mut file_contents)?; - Ok(content) + let content_reference = Arc::new(file_contents); + self.content = Some(content_reference.clone()); + + Ok(content_reference) } /// Returns the hash of the source content.