diff --git a/Cargo.lock b/Cargo.lock index 4f2b4d8..e0bc3b7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -201,7 +201,7 @@ dependencies = [ [[package]] name = "futhorc" -version = "0.1.12" +version = "0.1.13" dependencies = [ "atom_syndication", "chrono", @@ -214,6 +214,7 @@ dependencies = [ "serde_yaml", "slug", "url", + "walkdir", ] [[package]] @@ -391,6 +392,15 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "serde" version = "1.0.124" @@ -551,6 +561,17 @@ version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed" +[[package]] +name = "walkdir" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +dependencies = [ + "same-file", + "winapi", + "winapi-util", +] + [[package]] name = "wasi" version = "0.10.0+wasi-snapshot-preview1" @@ -573,6 +594,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/Cargo.toml b/Cargo.toml index 40d4e77..3f476b7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ clap = "2.33.3" atom_syndication = "0.11.0" chrono = "0.4.19" url = { version = "2.2.2", features = ["serde"] } +walkdir = "2.3.2" [features] fail-on-warnings = [] diff --git a/src/build.rs b/src/build.rs index b3db7be..d9cd1fa 100644 --- a/src/build.rs +++ b/src/build.rs @@ -25,7 +25,8 @@ pub fn build_site(config: Config) -> Result<()> { ); // collect all posts - let posts = post_parser.parse_posts(&config.posts_source_directory)?; + let (posts, static_files) = + post_parser.parse_posts(&config.posts_source_directory)?; // Parse the template files. let index_template = parse_template(config.index_template.iter())?; @@ -56,6 +57,9 @@ pub fn build_site(config: Config) -> Result<()> { }; writer.write_posts(&posts)?; + // write the static files + writer.write_static_files(&static_files)?; + // copy static directory copy_dir( &config.static_source_directory, diff --git a/src/markdown.rs b/src/markdown.rs index 3bbbed1..18549e5 100644 --- a/src/markdown.rs +++ b/src/markdown.rs @@ -56,6 +56,29 @@ impl<'a> EventConverter<'a> { // intercepting heading tags and returning the tag size + 2. Tag::Heading(s) => Tag::Heading(s + 2), + // Internal image links (links from blog posts, pages, and assets + // *to* posts, pages, and assets) need to be converted from their + // input formats to their output formats (e.g., a post linking to + // another post as `foo.md` will need to be converted to an + // equivalent link ending in `foo.html`). + Tag::Image( + link @ (LinkType::Inline + | LinkType::Reference + | LinkType::ReferenceUnknown + | LinkType::Shortcut + | LinkType::Autolink + | LinkType::Collapsed + | LinkType::CollapsedUnknown), + url, + title, + ) => Tag::Image( + link, + CowStr::Boxed( + self.link_converter.convert(&url)?.into_boxed_str(), + ), + title, + ), + // Internal links (links from blog posts, pages, and assets *to* // posts, pages, and assets) need to be converted from their input // formats to their output formats (e.g., a post linking to another diff --git a/src/parser.rs b/src/parser.rs index ad8787b..e2b8ba6 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -7,7 +7,7 @@ use std::{ collections::HashSet, fmt, fs::{read_dir, File}, - path::Path, + path::{Path, PathBuf}, }; use serde::Deserialize; @@ -47,21 +47,63 @@ impl<'a> Parser<'a> { } } + fn parse_post_bundle( + &self, + posts_source_directory: &Path, + relative_path: &Path, + static_files: &mut Vec, + ) -> Result { + // We want to make sure we can parse a post before we mutate + // `static_files` + let post = self.parse_post( + posts_source_directory, + &relative_path.join("index.md"), + )?; + + // Mutate `static_files` only after we've confirmed that we've parsed a + // valid post. + use walkdir::WalkDir; + let abs = posts_source_directory.join(relative_path); + for result in WalkDir::new(&abs) { + let entry = result?; + if entry.file_type().is_file() && entry.file_name() != "index.md" { + static_files.push(( + entry.path().to_owned(), + self.posts_directory + .join(relative_path.file_name().unwrap()) + // strip_prefix shouldn't fail since `abs` is always an + // ancestor of `entry_path` + .join(entry.path().strip_prefix(&abs).unwrap()), + )); + } + } + + Ok(post) + } + /// Parses a single [`Post`] from an `id` and `input` strings. The `id` is /// the path of the file relative to the `posts_source_directory` less the /// extension (e.g., the ID for a post whose source file is /// `{posts_source_directory}/foo/bar.md` is `foo/bar`). - fn parse_post(&self, id: &str, input: &str) -> Result { - match self._parse_post(id, input) { + fn parse_post( + &self, + posts_source_directory: &Path, + relative_path: &Path, + ) -> Result { + match self._parse_post(posts_source_directory, relative_path) { Ok(p) => Ok(p), Err(e) => Err(Error::Annotated( - format!("parsing post `{}`", id), + format!("parsing post `{:?}`", relative_path), Box::new(e), )), } } - fn _parse_post(&self, id: &str, input: &str) -> Result { + fn _parse_post( + &self, + posts_source_directory: &Path, + relative_path: &Path, + ) -> Result { fn frontmatter_indices(input: &str) -> Result<(usize, usize, usize)> { const FENCE: &str = "---"; if !input.starts_with(FENCE) { @@ -77,15 +119,34 @@ impl<'a> Parser<'a> { } } + use std::io::Read; + let mut contents = String::new(); + File::open(posts_source_directory.join(relative_path))? + .read_to_string(&mut contents)?; + let input: &str = &contents; + let (yaml_start, yaml_stop, body_start) = frontmatter_indices(input)?; let frontmatter: Frontmatter = serde_yaml::from_str(&input[yaml_start..yaml_stop])?; - let file_name = format!("{}.html", id); + + let with_extension = if relative_path.ends_with("index.md") { + relative_path.parent().unwrap() + } else { + relative_path + } + .with_extension("html"); + + let file_name = with_extension + .file_name() + .ok_or_else(|| InvalidFileNameError(relative_path.to_owned()))? + .to_str() + .ok_or_else(|| InvalidFileNameError(relative_path.to_owned()))?; + let mut post = Post { title: frontmatter.title, date: frontmatter.date, file_path: self.posts_directory.join(&file_name), - url: self.posts_url.join(&file_name)?, + url: self.posts_url.join(file_name)?, tags: frontmatter .tags .iter() @@ -116,7 +177,7 @@ impl<'a> Parser<'a> { markdown::to_html( &mut post.body, self.posts_url, - id, + file_name, &input[body_start..], post.url.as_str(), )?; @@ -144,25 +205,38 @@ impl<'a> Parser<'a> { /// /// World /// ``` - pub fn parse_posts(&self, source_directory: &Path) -> Result> { - use std::io::Read; + pub fn parse_posts(&self, source_directory: &Path) -> Result { const MARKDOWN_EXTENSION: &str = ".md"; let mut posts = Vec::new(); + let mut static_files = Vec::new(); for result in read_dir(source_directory)? { let entry = result?; let os_file_name = entry.file_name(); let file_name = os_file_name.to_string_lossy(); - if file_name.ends_with(MARKDOWN_EXTENSION) { - let base_name = file_name.trim_end_matches(MARKDOWN_EXTENSION); - let mut contents = String::new(); - File::open(entry.path())?.read_to_string(&mut contents)?; - posts.push(self.parse_post(base_name, &contents)?); + if Self::is_bundle(&entry)? { + posts.push(self.parse_post_bundle( + source_directory, + // strip_prefix() should never fail + entry.path().strip_prefix(source_directory).unwrap(), + &mut static_files, + )?) + } else if file_name.ends_with(MARKDOWN_EXTENSION) { + posts.push(self.parse_post( + source_directory, + // should never fail + entry.path().strip_prefix(source_directory).unwrap(), + )?); } } posts.sort_by(|a, b| b.date.cmp(&a.date)); - Ok(posts) + Ok((posts, static_files)) + } + + fn is_bundle(entry: &std::fs::DirEntry) -> std::io::Result { + Ok(entry.file_type()?.is_dir() + && entry.path().join("index.md").is_file()) } } @@ -181,6 +255,27 @@ struct Frontmatter { pub tags: HashSet, } +#[derive(Debug)] +pub struct InvalidFileNameError(PathBuf); + +impl fmt::Display for InvalidFileNameError { + /// Displays an [`InvalidFileNameError`] as human-readable text. + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "invalid file name: {:?}", &self.0) + } +} + +impl std::error::Error for InvalidFileNameError { + /// Implements the [`std::error::Error`] trait for [`InvalidFileNameError`]. + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + None + } +} + +pub type Posts = (Vec, Vec); + +pub type StaticFile = (PathBuf, PathBuf); + /// Represents the result of a [`Post`]-parse operation. pub type Result = std::result::Result; @@ -205,6 +300,12 @@ pub enum Error { /// Returned for other I/O errors. Io(std::io::Error), + /// Returned for WalkDir I/O errors. + WalkDir(walkdir::Error), + + /// Returned when a source file isn't valid UTF-8. + InvalidFileName(InvalidFileNameError), + /// An error with an annotation. Annotated(String, Box), } @@ -222,6 +323,8 @@ impl fmt::Display for Error { Error::DeserializeYaml(err) => err.fmt(f), Error::UrlParse(err) => err.fmt(f), Error::Io(err) => err.fmt(f), + Error::WalkDir(err) => err.fmt(f), + Error::InvalidFileName(err) => err.fmt(f), Error::Annotated(annotation, err) => { write!(f, "{}: {}", &annotation, err) } @@ -238,11 +341,19 @@ impl std::error::Error for Error { Error::DeserializeYaml(err) => Some(err), Error::UrlParse(err) => Some(err), Error::Io(err) => Some(err), + Error::WalkDir(err) => Some(err), + Error::InvalidFileName(err) => Some(err), Error::Annotated(_, err) => Some(err), } } } +impl From for Error { + fn from(err: InvalidFileNameError) -> Error { + Error::InvalidFileName(err) + } +} + impl From for Error { fn from(err: markdown::Error) -> Error { match err { @@ -268,6 +379,14 @@ impl From for Error { } } +impl From for Error { + /// Converts a [`walkdir::Error`] into an [`Error`]. It allows us to + // use the `?` operator for fallible I/O functions. + fn from(err: walkdir::Error) -> Error { + Error::WalkDir(err) + } +} + impl From for Error { /// Converts a [`std::io::Error`] into an [`Error`]. It allows us to // use the `?` operator for fallible I/O functions. @@ -275,3 +394,35 @@ impl From for Error { Error::Io(err) } } + +#[cfg(test)] +mod test { + use std::path::PathBuf; + + use super::*; + + #[test] + fn test_parse_posts() -> Result<()> { + let index_url = Url::parse("https://example.com")?; + let posts_url = Url::parse("https://example.com/posts/")?; + let posts_directory = Path::new("./testdata/posts/"); + let parser = Parser::new(&index_url, &posts_url, &posts_directory); + let (posts, static_files) = + parser.parse_posts(Path::new("./testdata/posts/"))?; + + let wanted_posts = vec![Post { + file_path: PathBuf::from("./testdata/posts/"), + title: String::from("Simple"), + url: Url::parse("https://example.com/posts/simple.html")?, + date: String::from("0000-01-01"), + body: String::from("Today is the first day of the Common Era."), + tags: HashSet::new(), + }]; + + let wanted_static_files: Vec = Vec::new(); + + assert_eq!(wanted_posts, posts); + assert_eq!(wanted_static_files, static_files); + Ok(()) + } +} diff --git a/src/post.rs b/src/post.rs index e678cf4..cc6a58f 100644 --- a/src/post.rs +++ b/src/post.rs @@ -7,7 +7,7 @@ use std::path::PathBuf; use url::Url; /// Represents a blog post. -#[derive(Clone)] +#[derive(Clone, Debug, PartialEq)] pub struct Post { /// The output path where the final post file will be rendered. pub file_path: PathBuf, diff --git a/src/url.rs b/src/url.rs index 9e12e79..85bde68 100644 --- a/src/url.rs +++ b/src/url.rs @@ -23,18 +23,32 @@ impl<'a> Converter<'a> { }) } + fn parse_bundle_base(normalized: &str) -> Option<&str> { + let base = normalized.trim_end_matches("/index.md"); + if base == normalized || base.contains('/') { + None + } else { + Some(base) + } + } + fn convert_absolute(&self, absolute: Url) -> Result { if let Some(relative) = self.posts_root.make_relative(&absolute) { if !relative.starts_with("../") && relative.ends_with(MARKDOWN_EXTENSION) { - let abs = absolute.to_string(); - return Ok(Url::parse(&format!( - "{}{}", - &abs[..abs.len() - MARKDOWN_EXTENSION.len()], - HTML_EXTENSION, - )) - .unwrap()); // this should never fail + return Ok(self + .posts_root + .join(&format!( + "{}{}", + match Self::parse_bundle_base(&relative) { + Some(base) => base, + None => + relative.trim_end_matches(MARKDOWN_EXTENSION), + }, + HTML_EXTENSION, + )) + .unwrap()); } } Ok(absolute) @@ -68,10 +82,7 @@ mod test { #[test] fn test_convert_relative_post_leading_dotslash() -> Result<()> { - fixture_basic( - "https://example.org/posts/relative.html", - "./relative.md", - ) + fixture_basic("https://example.org/posts/relative.html", "relative.md") } #[test] @@ -103,6 +114,40 @@ mod test { ) } + #[test] + fn test_convert_relative_bundle() -> Result<()> { + fixture_basic( + "https://example.org/posts/relative.html", + "relative/index.md", + ) + } + + #[test] + fn test_convert_relative_bundle_leading_dotslash() -> Result<()> { + fixture_basic( + "https://example.org/posts/relative.html", + "./relative.md", + ) + } + + #[test] + fn test_convert_relative_bundle_asset() -> Result<()> { + fixture( + "relative/index.md", + "https://example.org/posts/relative/image.jpg", + "image.jpg", + ) + } + + #[test] + fn test_convert_relative_bundle_asset_leading_dotslash() -> Result<()> { + fixture( + "relative/index.md", + "https://example.org/posts/relative/image.jpg", + "./image.jpg", + ) + } + #[test] fn test_convert_absolute_post() -> Result<()> { fixture_basic( @@ -144,13 +189,14 @@ mod test { } fn fixture_basic(wanted: &str, target: &str) -> Result<()> { + fixture("index.html", wanted, target) + } + + fn fixture(base: &str, wanted: &str, target: &str) -> Result<()> { assert_eq!( wanted, - Converter::new( - &Url::parse("https://example.org/posts/")?, - "index.html" - )? - .convert(target)?, + Converter::new(&Url::parse("https://example.org/posts/")?, base)? + .convert(target)?, ); Ok(()) } diff --git a/src/write.rs b/src/write.rs index 576bb0c..7e8dd89 100644 --- a/src/write.rs +++ b/src/write.rs @@ -1,8 +1,10 @@ //! Takes [`Post`] objects created by the [`crate::post`] module and turns them //! into index and post HTML files on the file system. +use crate::parser::StaticFile; use crate::post::*; use gtmpl::{Template, Value}; +use std::collections::HashSet; use std::fmt; use std::io; use std::path::{Path, PathBuf}; @@ -78,7 +80,6 @@ impl Writer<'_> { /// Takes a slice of [`Post`], indexes it by tag, and writes post and index /// pages to disk. pub fn write_posts(&self, posts: &[Post]) -> Result<()> { - use std::collections::HashSet; let mut seen_dirs: HashSet = HashSet::new(); pages( posts, @@ -89,13 +90,33 @@ impl Writer<'_> { self.index_template, ) .try_for_each(|page| { - let dir = page.file_path.parent().unwrap(); // there should always be a dir - if seen_dirs.insert(dir.to_owned()) { - std::fs::create_dir_all(dir)?; - } + Self::make_parent_dir(&mut seen_dirs, &page.file_path)?; self.write_page(&page) }) } + + pub fn write_static_files( + &self, + static_files: &[StaticFile], + ) -> Result<()> { + let mut seen_dirs: HashSet = HashSet::new(); + for (src, dst) in static_files { + Self::make_parent_dir(&mut seen_dirs, dst)?; + std::fs::hard_link(src, dst)?; + } + Ok(()) + } + + fn make_parent_dir<'a>( + seen_dirs: &mut HashSet, + path: &Path, + ) -> Result<()> { + let parent = path.parent().unwrap(); + if seen_dirs.insert(parent.to_owned()) { + std::fs::create_dir_all(&parent)?; + } + Ok(()) + } } /// An object representing an output HTML file. A [`Page`] can be converted to diff --git a/testdata/posts/bundle-with-asset/asset.jpg b/testdata/posts/bundle-with-asset/asset.jpg new file mode 100644 index 0000000..a696a27 Binary files /dev/null and b/testdata/posts/bundle-with-asset/asset.jpg differ diff --git a/testdata/posts/bundle-with-asset/index.md b/testdata/posts/bundle-with-asset/index.md new file mode 100644 index 0000000..11b2ca3 --- /dev/null +++ b/testdata/posts/bundle-with-asset/index.md @@ -0,0 +1,8 @@ +--- +Title: Bundle with an asset +Date: 2023-02-21 +--- + +This is a bundle with an asset. This asset was sourced from +[Wikipedia](https://upload.wikimedia.org/wikipedia/commons/a/a8/Anastasius_I_%28emperor%29.jpg). +![The asset](./asset.jpg) \ No newline at end of file diff --git a/testdata/posts/bundle-with-different-asset-same-name/asset.jpg b/testdata/posts/bundle-with-different-asset-same-name/asset.jpg new file mode 100644 index 0000000..a22f727 Binary files /dev/null and b/testdata/posts/bundle-with-different-asset-same-name/asset.jpg differ diff --git a/testdata/posts/bundle-with-different-asset-same-name/index.md b/testdata/posts/bundle-with-different-asset-same-name/index.md new file mode 100644 index 0000000..eec3a96 --- /dev/null +++ b/testdata/posts/bundle-with-different-asset-same-name/index.md @@ -0,0 +1,10 @@ +--- +Title: Bundle with different asset same name +Date: 2023-02-21 +--- + +This is a post bundle with a different asset (compared to that of [the previous +post](../bundle-with-asset/index.md)) but sharing the same name (asset.jpg). +This asset was also sourced from +[Wikipedia](https://upload.wikimedia.org/wikipedia/commons/8/89/Tremissis_Avitus-RIC_2402.jpg). +![The asset](./asset.jpg). \ No newline at end of file diff --git a/testdata/posts/simple.md b/testdata/posts/simple.md new file mode 100644 index 0000000..b4d009c --- /dev/null +++ b/testdata/posts/simple.md @@ -0,0 +1,6 @@ +--- +Title: The title +Date: 0000-01-01 +--- + +Today is the first day of the Common Era. \ No newline at end of file