diff --git a/.gitignore b/.gitignore index 250af8f8..b06df2f0 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ /libs /bin /Settings.toml +/.articles /.metadata.json /.reading-states /.fat32-epoch diff --git a/Cargo.lock b/Cargo.lock index 19c65089..844726d2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -236,6 +236,35 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" +[[package]] +name = "cookie" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ddef33a339a91ea89fb53151bd0a4689cfce27055c291dfa69945475d22c747" +dependencies = [ + "percent-encoding", + "time", + "version_check", +] + +[[package]] +name = "cookie_store" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eac901828f88a5241ee0600950ab981148a18f2f756900ffba1b125ca6a3ef9" +dependencies = [ + "cookie", + "document-features", + "idna", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "time", + "url", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -339,6 +368,15 @@ dependencies = [ "syn", ] +[[package]] +name = "document-features" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95249b50c6c185bee49034bcb378a49dc2b5dff0be90ff6616d31d64febab05d" +dependencies = [ + "litrs", +] + [[package]] name = "downcast-rs" version = "1.2.1" @@ -541,9 +579,9 @@ dependencies = [ [[package]] name = "http" -version = "1.1.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" dependencies = [ "bytes", "fnv", @@ -613,7 +651,7 @@ dependencies = [ "tokio", "tokio-rustls", "tower-service", - "webpki-roots", + "webpki-roots 0.26.11", ] [[package]] @@ -901,6 +939,12 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" +[[package]] +name = "litrs" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ce301924b7887e9d637144fdade93f9dfff9b60981d4ac161db09720d39aa5" + [[package]] name = "lockfree-object-pool" version = "0.1.6" @@ -909,9 +953,9 @@ checksum = "9374ef4228402d4b7e403e5838cb880d9ee663314b0a900d5a6aabf0c213552e" [[package]] name = "log" -version = "0.4.22" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "lzma-rs" @@ -1059,6 +1103,7 @@ dependencies = [ "flate2", "fxhash", "globset", + "http", "indexmap", "kl-hyphenate", "lazy_static", @@ -1078,6 +1123,8 @@ dependencies = [ "titlecase", "toml", "unicode-normalization", + "ureq", + "url", "walkdir", "xi-unicode", "zip", @@ -1288,7 +1335,7 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "webpki-roots", + "webpki-roots 0.26.11", "windows-registry", ] @@ -1321,10 +1368,11 @@ checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" [[package]] name = "rustls" -version = "0.23.18" +version = "0.23.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c9cc1d47e243d655ace55ed38201c19ae02c148ae56412ab8750e8f0166ab7f" +checksum = "7160e3e10bf4535308537f3c4e1641468cd0e485175d6163087c0393c7d46643" dependencies = [ + "log", "once_cell", "ring", "rustls-pki-types", @@ -1344,18 +1392,19 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.10.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" +checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" dependencies = [ "web-time", + "zeroize", ] [[package]] name = "rustls-webpki" -version = "0.102.8" +version = "0.103.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" +checksum = "e4a72fe2bcf7a6ac6fd7d0b9e5cb68aeb7d4c0a0271730218b3e92d43b4eb435" dependencies = [ "ring", "rustls-pki-types", @@ -1602,10 +1651,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" dependencies = [ "deranged", + "itoa", "num-conv", "powerfmt", "serde", "time-core", + "time-macros", ] [[package]] @@ -1614,6 +1665,16 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" +[[package]] +name = "time-macros" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tinystr" version = "0.7.6" @@ -1772,6 +1833,40 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "ureq" +version = "3.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f0fde9bc91026e381155f8c67cb354bcd35260b2f4a29bcc84639f762760c39" +dependencies = [ + "base64", + "cookie_store", + "flate2", + "log", + "percent-encoding", + "rustls", + "rustls-pemfile", + "rustls-pki-types", + "serde", + "serde_json", + "ureq-proto", + "url", + "utf-8", + "webpki-roots 0.26.11", +] + +[[package]] +name = "ureq-proto" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59db78ad1923f2b1be62b6da81fe80b173605ca0d57f85da2e005382adf693f7" +dependencies = [ + "base64", + "http", + "httparse", + "log", +] + [[package]] name = "url" version = "2.5.4" @@ -1783,6 +1878,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf16_iter" version = "1.0.5" @@ -1921,9 +2022,18 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.26.7" +version = "0.26.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +dependencies = [ + "webpki-roots 1.0.1", +] + +[[package]] +name = "webpki-roots" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d642ff16b7e79272ae451b7322067cdc17cadf68c23264be9d94a32319efe7e" +checksum = "8782dd5a41a24eed3a4f40b606249b3e236ca61adf1f25ea4d45c73de122b502" dependencies = [ "rustls-pki-types", ] diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 25a48e61..9ee93be7 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -39,3 +39,6 @@ rand_core = "0.6.4" rand_xoshiro = "0.6.0" percent-encoding = "2.3.1" chrono = { version = "0.4.38", features = ["serde", "clock"], default-features = false } +ureq = { version = "3.0.12", features = ["cookies", "json"] } +http = "1.3.1" +url = "2.5.4" diff --git a/crates/core/src/articles/dummy.rs b/crates/core/src/articles/dummy.rs new file mode 100644 index 00000000..e5d5fc54 --- /dev/null +++ b/crates/core/src/articles/dummy.rs @@ -0,0 +1,29 @@ +use std::sync::{Arc, Mutex}; + +use crate::{ + articles::{ArticleIndex, Service}, + view::Hub, +}; + +pub struct Dummy { + index: Arc>, +} + +impl Dummy { + pub fn new() -> Dummy { + Dummy { + index: Arc::new(Mutex::new(ArticleIndex::default())), + } + } +} + +impl Service for Dummy { + fn index(&self) -> Arc> { + self.index.clone() + } + fn save_index(&self) {} + fn update(&mut self, _hub: &Hub) -> bool { + // nothing to do, always finishes immediately + true + } +} diff --git a/crates/core/src/articles/mod.rs b/crates/core/src/articles/mod.rs new file mode 100644 index 00000000..f99c7a3d --- /dev/null +++ b/crates/core/src/articles/mod.rs @@ -0,0 +1,235 @@ +mod dummy; +mod readeck; +mod wallabag; + +use chrono::FixedOffset; +use fxhash::FxHashSet; +use serde::{Deserialize, Serialize}; +use std::fs::OpenOptions; +use std::io::prelude::*; +use std::{ + collections::{BTreeMap, BTreeSet}, + fs::{self, File}, + io::{self, Error, Write}, + os::unix::fs::MetadataExt, + path::PathBuf, + sync::{Arc, Mutex}, +}; + +use crate::settings::ArticleAuth; +use crate::{ + articles::readeck::Readeck, + articles::wallabag::Wallabag, + metadata::{FileInfo, Info}, + settings::{self, ArticleList}, + view::Hub, +}; + +pub const ARTICLES_DIR: &str = ".articles"; + +#[derive(Serialize, Deserialize)] +pub struct ArticleIndex { + pub articles: BTreeMap, +} + +impl Default for ArticleIndex { + fn default() -> Self { + ArticleIndex { + articles: BTreeMap::new(), + } + } +} + +#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Hash)] +#[serde(rename_all = "kebab-case")] +pub enum Changes { + Deleted, + Starred, + Archived, +} + +#[derive(Serialize, Deserialize, Clone)] +pub struct Article { + pub id: String, + #[serde(skip_serializing_if = "FxHashSet::is_empty")] + #[serde(default)] + pub changed: FxHashSet, + pub loaded: bool, + pub title: String, + pub domain: String, + pub authors: Vec, + pub format: String, + pub language: String, + pub reading_time: u32, + pub added: chrono::DateTime, + pub starred: bool, + pub archived: bool, +} + +impl Article { + fn path(&self) -> PathBuf { + std::path::absolute(PathBuf::from(format!( + "{}/article-{}.{}", + ARTICLES_DIR, self.id, self.format + ))) + .unwrap() + } + + pub fn file(&self) -> FileInfo { + let path = self.path(); + let size = match fs::metadata(&path) { + Ok(metadata) => metadata.size(), + Err(_err) => 0, + }; + FileInfo { + path: path, + kind: self.format.to_owned(), + size: size, + } + } + + pub fn info(&self) -> Info { + Info { + title: self.title.to_owned(), + subtitle: self.domain.to_owned(), + author: self.authors.join(", "), + year: "".to_string(), + language: self.language.to_owned(), + publisher: "".to_string(), + series: "".to_string(), + edition: "".to_string(), + volume: "".to_string(), + number: "".to_string(), + identifier: "".to_string(), + categories: BTreeSet::new(), + file: self.file(), + reader: None, + reader_info: None, + toc: None, + added: self.added.naive_local(), + } + } +} + +pub trait Service { + fn index(&self) -> Arc>; + + fn save_index(&self); + + // Update the list of articles. + // Returns true when the update was started, false when an update is already + // in progress. + fn update(&mut self, hub: &Hub) -> bool; +} + +/// The name of a given service (identified by API). +pub fn name(api: &String) -> &'static str { + match api.as_str() { + "readeck" => "Readeck", + "wallabag" => "Wallabag", + _ => "(unknown)", + } +} + +fn read_index() -> Result { + let file = File::open(ARTICLES_DIR.to_owned() + "/index.json")?; + let index: ArticleIndex = serde_json::from_reader(file)?; + + Ok(index) +} + +pub fn load(auth: settings::ArticleAuth) -> Box { + let index = read_index().unwrap_or_default(); + match auth.api.as_str() { + "readeck" => Box::new(Readeck::load(auth, index)), + "wallabag" => Box::new(Wallabag::load(auth, index)), + _ => Box::new(dummy::Dummy::new()), + } +} + +pub fn authenticate( + api: String, + server: String, + username: String, + password: String, +) -> Result { + match api.as_str() { + "readeck" => readeck::authenticate(server, "Plato".to_string(), username, password), + "wallabag" => wallabag::authenticate(server, "Plato".to_string(), username, password), + _ => Err(format!("unknown API: {api}")), + } +} + +pub fn filter(service: &Box, list: crate::settings::ArticleList) -> Vec
{ + // TODO: perhaps only return a list of articles on the current page, to + // reduce the amount of cloning? + let mut articles: Vec
= service.index() + .lock() + .unwrap() + .articles + .values() + .filter(|article| match list { + ArticleList::Unread => !article.archived, + ArticleList::Starred => article.starred, + ArticleList::Archive => article.archived, + } && !article.changed.contains(&Changes::Deleted)) + .cloned() + .collect(); + + // Sort newest first. + articles.sort_by(|a, b| b.added.cmp(&a.added)); + + articles +} + +fn save_index(index: &ArticleIndex) -> io::Result<()> { + let buf = serde_json::to_string(index).unwrap(); + let mut file = File::create(ARTICLES_DIR.to_owned() + "/index.json.tmp")?; + file.write_all(buf.as_bytes())?; + fs::rename( + ARTICLES_DIR.to_owned() + "/index.json.tmp", + ARTICLES_DIR.to_owned() + "/index.json", + ) +} + +static QUEUE_MUTEX: Mutex = Mutex::new(0); + +pub fn queue_link(link: String) { + let lock = QUEUE_MUTEX.lock().unwrap(); + let path = format!("{ARTICLES_DIR}/queued.txt"); + if let Ok(mut file) = OpenOptions::new().create(true).append(true).open(&path) { + if let Err(e) = writeln!(file, "{}", link) { + eprintln!("Couldn't write to {}: {:#}.", path, e); + } + } + std::mem::drop(lock); +} + +pub fn read_queued() -> Vec { + // Lock the queue to avoid race conditions between adding a link and reading + // the links. + let lock = QUEUE_MUTEX.lock().unwrap(); + + // Read all the data in the file. + let path = format!("{ARTICLES_DIR}/queued.txt"); + let mut file = match File::open(&path) { + Ok(file) => file, + Err(_) => return Vec::new(), + }; + let mut data = String::new(); + if let Err(_) = file.read_to_string(&mut data) { + return Vec::new(); + } + + // Remove the file. + fs::remove_file(path).ok(); + + // Make sure the lock stays locked until here. + std::mem::drop(lock); + + // Split each line in the file. + data.split("\n") + .filter(|s| !s.is_empty()) + .map(|s| s.to_string()) + .collect() +} diff --git a/crates/core/src/articles/readeck.rs b/crates/core/src/articles/readeck.rs new file mode 100644 index 00000000..7508e4f6 --- /dev/null +++ b/crates/core/src/articles/readeck.rs @@ -0,0 +1,359 @@ +use std::{ + collections::{BTreeMap, BTreeSet}, + fs::{self, File}, + io::Write, + ops::Deref, + sync::{ + atomic::{ + AtomicBool, + Ordering::{Acquire, Release}, + }, + Arc, Mutex, + }, + thread, +}; + +use fxhash::FxHashSet; +use serde::{Deserialize, Serialize}; +use ureq::Agent; + +use crate::{ + articles::{ + queue_link, read_queued, save_index, Article, ArticleIndex, Changes, Service, ARTICLES_DIR, + }, + settings::ArticleAuth, + view::{ArticleUpdateProgress, Event, Hub}, +}; + +pub struct Readeck { + auth: ArticleAuth, + index: Arc>, + updating: Arc, +} + +impl Readeck { + pub fn load(auth: ArticleAuth, index: ArticleIndex) -> Readeck { + Readeck { + auth: auth, + index: Arc::new(Mutex::new(index)), + updating: Arc::new(AtomicBool::new(false)), + } + } +} + +impl Service for Readeck { + fn index(&self) -> Arc> { + self.index.clone() + } + + fn save_index(&self) { + let index = self.index.lock().unwrap(); + if let Err(err) = save_index(index.deref()) { + eprintln!("failed to save index: {}", err); + }; + } + + fn update(&mut self, hub: &crate::view::Hub) -> bool { + if self.updating.swap(true, Acquire) { + return false; + } + hub.send(Event::ArticleUpdateProgress( + ArticleUpdateProgress::ListStart, + )) + .ok(); + let hub = hub.clone(); + let auth = self.auth.clone(); + let updating = self.updating.clone(); + let index = self.index.clone(); + thread::spawn(move || { + if let Err(err) = update(&hub, auth, index) { + eprintln!("while fetching article list: {err}"); + hub.send(Event::Notify(err)).ok(); + }; + hub.send(Event::ArticleUpdateProgress(ArticleUpdateProgress::Finish)) + .ok(); + updating.store(false, Release); + }); + return true; + } +} + +#[derive(Serialize)] +struct APIAuth { + application: String, + username: String, + password: String, +} + +#[derive(Deserialize)] +struct APIAuthResponse { + token: String, +} + +#[derive(Deserialize)] +struct APIBookmarks { + id: String, + created: String, + loaded: bool, + title: String, + site_name: String, + authors: Option>, + lang: String, + has_article: bool, + is_marked: bool, + is_archived: bool, + reading_time: Option, +} + +#[derive(Serialize)] +struct APIBookmarkUpdate { + is_archived: Option, + is_marked: Option, +} + +pub fn authenticate( + server: String, + client_name: String, + username: String, + password: String, +) -> Result { + let url = url_from_server(&server); + let agent: Agent = Agent::config_builder().max_redirects(0).build().into(); + + let login_url = url.to_owned() + "api/auth"; + let mut response = match agent + .post(&login_url) + .content_type("application/json") + .send_json(APIAuth { + application: client_name, + username: username, + password: password, + }) { + Ok(response) => response, + Err(ureq::Error::Io(_)) => { + // Any I/O error, but most likely there's a networking issuing. + return Err("failed to connect to the server".to_string()); + } + Err(ureq::Error::StatusCode(404)) => { + // Special case: provide better error message for invalid + // (mistyped?) addresses. + return Err(format!("login page does not exist: {login_url}")); + } + Err(err) => { + return Err(format!("could not fetch login page: {err}")); + } + }; + if response.status().is_redirection() { + return Err(format!("login page does not exist: {login_url}")); + } + + let response_body = response + .body_mut() + .read_json::() + .map_err(|err| format!("could not fetch authentication response: {err}"))?; + Ok(ArticleAuth { + api: "readeck".to_string(), + server: server, + access_token: response_body.token, + ..Default::default() + }) +} + +fn update(hub: &Hub, auth: ArticleAuth, index: Arc>) -> Result<(), String> { + let url = url_from_server(&auth.server); + let agent: Agent = Agent::config_builder().max_redirects(0).build().into(); + + // Submit new URLs. + let queued = read_queued(); + if !queued.is_empty() { + if let Err(err) = agent + .post(format!("{url}bookmarks/import/text")) + .header("Authorization", "Bearer ".to_owned() + &auth.access_token) + .send_form([("data", queued.join("\n"))]) + { + // Add the links back (this is inefficient, but it should work). + for link in queued { + queue_link(link); + } + + return Err(format!("submitting articles failed: {err}")); + }; + } + + // Sync local changes. + let mut changes: BTreeMap = BTreeMap::new(); + let mut deleted: BTreeSet = BTreeSet::new(); + for (id, article) in index.lock().unwrap().articles.iter_mut() { + if article.changed.contains(&Changes::Deleted) { + deleted.insert(id.clone()); + continue; + } + let update = APIBookmarkUpdate { + is_marked: if article.changed.contains(&Changes::Starred) { + Some(article.starred) + } else { + None + }, + is_archived: if article.changed.contains(&Changes::Archived) { + Some(article.archived) + } else { + None + }, + }; + if update.is_marked.is_some() || update.is_archived.is_some() { + changes.insert(id.clone(), update); + } + } + for id in deleted { + match agent + .delete(format!("{url}api/bookmarks/{id}")) + .header("Authorization", "Bearer ".to_owned() + &auth.access_token) + .header("accept", "application/json") + .call() + { + Ok(_) | Err(ureq::Error::StatusCode(404)) => { + // Either successfully deleted or the article was already + // deleted on the server, so we can remove the entry locally. + index.lock().unwrap().articles.remove(&id); + } + Err(err) => { + return Err(format!("deleting article failed: {err}")); + } + }; + } + for (id, update) in changes { + match agent + .patch(format!("{url}api/bookmarks/{id}")) + .header("Authorization", "Bearer ".to_owned() + &auth.access_token) + .header("accept", "application/json") + .content_type("application/json") + .send_json(update) + { + Ok(_) => { + // Change was successfully sent, so we can remove the change + // flags. + if let Some(article) = index.lock().unwrap().articles.get_mut(&id) { + article.changed.remove(&Changes::Starred); + article.changed.remove(&Changes::Archived); + } + } + Err(ureq::Error::StatusCode(404)) => { + // Article was deleted on the server. + // We'll just let it as-is, the article will be removed locally + // when updating the list of articles. + } + Err(err) => { + return Err(format!("sending local changes failed: {err}")); + } + }; + } + + // Create articles directory if it doesn't exist yet. + std::fs::create_dir(ARTICLES_DIR).ok(); + + // Fetch the list of articles. + let mut response = match agent + .get(format!("{url}api/bookmarks")) + .header("accept", "application/json") + .header("authorization", format!("Bearer {}", auth.access_token)) + .call() + { + Ok(response) => response, + Err(err) => { + return Err(format!("could not get list of bookmarks: {err}")); + } + }; + let bookmarks = response + .body_mut() + .read_json::>() + .map_err(|err| format!("could not get list of bookmarks: {err}"))?; + + // Create articles index. + let articles: BTreeMap = bookmarks + .into_iter() + .filter(|bookmark| bookmark.has_article) + .map(|bookmark| Article { + id: bookmark.id, + changed: FxHashSet::default(), + loaded: bookmark.loaded, + title: bookmark.title, + domain: bookmark.site_name, + format: "epub".to_string(), + authors: bookmark.authors.unwrap_or_default(), + language: bookmark.lang, + reading_time: bookmark.reading_time.unwrap_or(0), + added: chrono::DateTime::parse_from_rfc3339(&bookmark.created).unwrap_or_default(), + starred: bookmark.is_marked, + archived: bookmark.is_archived, + }) + .map(|article| (article.id.clone(), article)) + .collect(); + + // Make a list of articles to download. + let to_download: Vec
= articles + .values() + .filter(|article| article.loaded) + .filter(|article| match fs::exists(article.path()) { + Ok(exists) => !exists, + Err(_) => false, + }) + .cloned() + .collect(); + + // Update the in-memory list of articles, and save. + { + let mut index = index.lock().unwrap(); + index.articles.clear(); + index.articles.extend(articles); + save_index(&index).map_err(|err| err.to_string())?; + } + + // Notify the Articles app that the list of articles has been updated, and + // the shelf can be updated. + hub.send(Event::ArticleUpdateProgress( + ArticleUpdateProgress::ListFinished, + )) + .ok(); + + // Download all articles. + for (i, article) in to_download.iter().enumerate() { + hub.send(Event::ArticleUpdateProgress( + ArticleUpdateProgress::Download(i + 1, to_download.len()), + )) + .ok(); + + // Download now. + let mut response = agent + .get(format!( + "{}api/bookmarks/{}/article.{}", + url, article.id, article.format + )) + .header("Authorization", "Bearer ".to_owned() + &auth.access_token) + .call() + .map_err(|err| format!("article fetch failed: {err}"))?; + let response_body = response + .body_mut() + .read_to_vec() + .map_err(|err| format!("article fetch failed: {err}"))?; + + // Write article to filesystem. + let path = format!("{}/article-{}.{}", ARTICLES_DIR, article.id, article.format); + let tmppath = path.to_owned() + ".tmp"; + let mut file = File::create(&tmppath).map_err(|err| err.to_string())?; + file.write_all(&response_body) + .map_err(|err| err.to_string())?; + file.flush().map_err(|err| err.to_string())?; + drop(file); + fs::rename(tmppath, path).map_err(|err| err.to_string())?; + } + + Ok(()) +} + +fn url_from_server(server: &String) -> String { + let mut url = "https://".to_owned() + &server; + if !url.ends_with("/") { + url += "/"; + } + url +} diff --git a/crates/core/src/articles/wallabag.rs b/crates/core/src/articles/wallabag.rs new file mode 100644 index 00000000..91d25501 --- /dev/null +++ b/crates/core/src/articles/wallabag.rs @@ -0,0 +1,709 @@ +use fxhash::FxHashSet; +use http::{HeaderValue, StatusCode}; +use regex::{Captures, Regex}; +use serde::Deserialize; +use std::{ + collections::{BTreeMap, BTreeSet}, + fs::{self, File}, + io::{Error, Write}, + ops::Deref, + sync::{ + atomic::{ + AtomicBool, + Ordering::{Acquire, Release}, + }, + Arc, Mutex, + }, + thread, + time::{SystemTime, UNIX_EPOCH}, +}; +use ureq::Agent; +use url::Url; + +use crate::{ + articles::{ + queue_link, read_queued, save_index, Article, ArticleIndex, Changes, Service, ARTICLES_DIR, + }, + settings::ArticleAuth, + view::{ArticleUpdateProgress, Event, Hub}, +}; + +struct ClientCredentials { + client_id: String, + client_secret: String, +} + +pub struct Wallabag { + auth: ArticleAuth, + index: Arc>, + updating: Arc, +} + +impl Wallabag { + pub fn load(auth: ArticleAuth, index: ArticleIndex) -> Wallabag { + Wallabag { + auth: auth, + index: Arc::new(Mutex::new(index)), + updating: Arc::new(AtomicBool::new(false)), + } + } +} + +impl Service for Wallabag { + fn index(&self) -> std::sync::Arc> { + self.index.clone() + } + + fn save_index(&self) { + let index = self.index.lock().unwrap(); + if let Err(err) = save_index(index.deref()) { + eprintln!("failed to save index: {}", err); + }; + } + + fn update(&mut self, hub: &Hub) -> bool { + if self.updating.swap(true, Acquire) { + return false; + } + hub.send(Event::ArticleUpdateProgress( + ArticleUpdateProgress::ListStart, + )) + .ok(); + let hub = hub.clone(); + let auth = self.auth.clone(); + let updating = self.updating.clone(); + let index = self.index.clone(); + thread::spawn(move || { + if let Err(err) = update(&hub, auth, index) { + eprintln!("while fetching article list: {err}"); + hub.send(Event::Notify(err.to_string())).ok(); + }; + hub.send(Event::ArticleUpdateProgress(ArticleUpdateProgress::Finish)) + .ok(); + updating.store(false, Release); + }); + return true; + } +} + +// Create new API client by doing HTTP requests and reading the response HTML, +// similar to how the Android app does it. If an API client with the given name +// already exists, that API client is returned instead of creating a new one. +// +// This is a terrible idea, but there doesn't seem to be an alternative that +// doesn't involve asking the user to manually create an API client and copying +// over the client ID and secret. Since the Android app does something similar, +// I hope the Wallabag authors won't break this. +fn create_api_client( + server: String, + client_name: String, + username: String, + password: String, +) -> Result { + let url = "https://".to_owned() + &server + "/"; + + // Create a HTTP client that works similar to a browser. + // Disable redirection though since that saves a request during login. + let agent: Agent = Agent::config_builder().max_redirects(0).build().into(); + + // Fetch the login page (which importantly includes the CSRF token). + let login_url = url.clone() + "login"; + let mut response = match agent.get(&login_url).call() { + Ok(response) => response, + Err(ureq::Error::Io(_)) => { + // Any I/O error, but most likely there's a networking issuing. + return Err(Error::other("failed to connect to the server")); + } + Err(ureq::Error::StatusCode(404)) => { + // Special case: provide better error message for invalid + // (mistyped?) addresses. + return Err(Error::other(format!( + "login page does not exist: {login_url}", + ))); + } + Err(err) => { + return Err(Error::other(format!("could not fetch login page: {err}"))); + } + }; + let response_body = match response.body_mut().read_to_string() { + Ok(text) => text, + Err(err) => { + return Err(Error::other(format!( + "could not fetch response body of login page: {err}", + ))) + } + }; + + // Extract the CSRF token of the login page. + let csrf_token = match Regex::new("name=\"_csrf_token\" value=\"(.*+)\"") + .unwrap() + .captures(&response_body) + { + Some(caps) => caps[1].to_owned(), + None => return Err(Error::other("could not find CSRF token in login page")), + }; + + // Log in to Wallabag. + let login_result_url = url.clone() + "login_check"; + let response = match agent.post(&login_result_url).send_form([ + ("_username", username), + ("_password", password), + ("_csrf_token", csrf_token), + ]) { + Ok(response) => response, + Err(err) => return Err(Error::other(format!("failed to log in: {err}"))), + }; + + // Check that we got a 302 redirect to the homepage (which indicates a + // successful login). + if response.status() != StatusCode::FOUND { + return Err(Error::other(format!( + "could not log in, expected 302 redirect but got {}", + response.status() + ))); + } + let empty_header_value = &HeaderValue::from_str("").unwrap(); + let redirect_url = response + .headers() + .get("Location") + .unwrap_or(empty_header_value) + .to_str() + .unwrap(); + if redirect_url != url { + // Try to determine why the login failed. + // Not really handling any errors here, since we'll fall back to the + // "could not log in" error message below. + if redirect_url == login_url { + if let Ok(mut response) = agent.get(redirect_url).call() { + if let Ok(body) = response.body_mut().read_to_string() { + if let Some(caps) = Regex::new("