refactor(wip): use Metadata and Entry to form Inventory

also remove old functions
This commit is contained in:
Compositr 2025-01-20 09:46:46 +11:00
parent 375d547bc1
commit 2f78459ef6
2 changed files with 65 additions and 50 deletions

View file

@ -68,19 +68,25 @@ fn main() {
let inventory = Inventory::take(&config).unwrap_or_fatal("Failed to take inventory");
let last_entry = inventory.entries.last();
println!("*** Inventory ***");
println!(
"For the {day_ordinal} of {month_name}, {year} in {}",
time.timezone
);
println!();
println!("You have {} diary entries so far.", inventory.entries);
match last_entry {
Some(entry) => println!("Your last entry was written in {} at {}", entry.timezone, entry.timestamp.to_string()),
None => println!("Your last entry is not known!")
}
println!("You have {} diary entries so far.", inventory.entries_count);
println!(
"You have written {} words so far, with an average of {} words per entry.",
inventory.word_count, inventory.avg_word_count
);
println!();
println!("*** Happy journaling! ***")
println!("*** Happy journaling! ***");
}
}
}

View file

@ -1,12 +1,14 @@
use chrono::TimeZone;
use chrono::FixedOffset;
use diaryrs::macros::UnwrapOrFatalAble;
use regex::Regex;
use serde::Deserialize;
use crate::args::Config;
use std::{fs, path};
// Management for existing diary entries
// TODO: Proper error handling for all the times we used `?`
/// Overall stats and information about existing diary entries
///
@ -14,7 +16,12 @@ use std::{fs, path};
#[derive(Debug)]
pub struct Inventory {
/// Number of entries in the diary
pub entries: usize,
pub entries_count: usize,
/// A vector of [`Entry`] representing each diary entry
///
/// Sorted oldest to newest based on enclosed timestamp
pub entries: Vec<Entry>,
/// Total word count of all entries
///
@ -26,15 +33,32 @@ pub struct Inventory {
}
/// Represents the front-matter metadata included within each entry. Does not necessarily have to be associated with any real diary entry.
#[derive(Debug)]
///
/// "Lower-level" (more representative of what is on disk) than an [`Entry`]. For example, it does not do any validation or processing of the timezone
#[derive(Debug, Deserialize)]
pub struct Metadata {
timestamp: String,
timezone: String,
}
impl Metadata {
pub fn read_from_entry(contents: String) {
pub fn read_from_entry(contents: &str) -> Result<Self, Box<dyn std::error::Error>> {
// Match YAML section
let yaml_regex = Regex::new(r"(?U)---\n([\S\s]+)---\n")
.unwrap_or_fatal("Failed to compile YAML regex. Something is very wrong!");
let matches = yaml_regex
.captures(&contents)
.ok_or("YAML front-matter not found!")?;
let yaml_str = matches
.get(1)
.ok_or("YAML front-matter match not found!")?
.as_str();
let meta = serde_yaml::from_str::<Metadata>(yaml_str)?;
Ok(meta)
}
}
@ -42,23 +66,30 @@ impl Metadata {
///
/// Does not include the actual contents of the entry, only stats
///
/// The timezone of the entry is determined by the `Tz` type parameter
/// The offset of this entry can be retrieved from the timestamp. The tzdb-style timezone (e.g. `Australia/Sydney`)
/// is separately specified. Using the offset is probably preferred as the offset should already account for regional daylight saving and other oddities.
#[derive(Debug)]
pub struct Entry<Tz: TimeZone> {
pub struct Entry {
pub path: path::PathBuf,
pub word_count: u64,
pub timestamp: chrono::DateTime<Tz>,
pub timestamp: chrono::DateTime<FixedOffset>,
pub timezone: chrono_tz::Tz,
}
impl<Tz: TimeZone> Entry<Tz> {
//
impl Entry {
pub fn read(path: path::PathBuf) -> Result<Self, Box<dyn std::error::Error>> {
let contents = fs::read_to_string(&path)?;
let meta = Metadata::read_from_entry(&contents)?;
let timestamp = chrono::DateTime::parse_from_rfc3339(&meta.timestamp)?;
let timezone: chrono_tz::Tz = meta.timezone[0..meta.timezone.len() - 8].parse()?;
Ok(Self {
path,
word_count: count_words(&contents),
word_count: count_words(&contents)?,
timestamp,
timezone,
})
}
}
@ -78,11 +109,21 @@ impl Inventory {
.flatten()
.collect();
let word_count = word_count_mds(&md_paths)?;
let entries_count = md_paths.len();
let mut entries = Vec::new();
for path in md_paths {
entries.push(Entry::read(path)?)
}
// Sort entry oldest to newest
entries.sort_by(|a, b| a.timestamp.cmp(&b.timestamp));
let word_count = entries.iter().fold(0, |acc, e| acc + e.word_count);
let entries_count = entries.len();
Ok(Self {
entries: entries_count,
entries_count,
entries,
word_count,
avg_word_count: (word_count as f64 / entries_count as f64).round(),
})
@ -121,39 +162,7 @@ fn recurse_paths_md(dir: fs::DirEntry) -> Result<Vec<path::PathBuf>, Box<dyn std
Ok(paths)
}
fn word_count_mds(paths: &Vec<path::PathBuf>) -> Result<u64, Box<dyn std::error::Error>> {
// temp regexes
let re_titles = Regex::new(r"(?m)^#{1,6} .+")
.unwrap_or_fatal("Failed to compile title regex. Something is very wrong!");
let re_comments = Regex::new(r"(?s-m)<!---?.+-->")
.unwrap_or_fatal("Failed to compile comment regex. Something is very wrong!");
let re_images = Regex::new(r"!\[.*\]\(.+\)")
.unwrap_or_fatal("Failed to compile image regex. Something is very wrong!");
let mut word_count = 0;
for path in paths {
let contents = fs::read_to_string(&path)?;
// Cut YAML header and comments
let contents = &re_titles.replace_all(
contents
.split("---")
.collect::<Vec<&str>>()
.pop()
.ok_or("No content found")?,
"",
);
let contents = &re_comments.replace_all(contents, "");
let contents = &re_images.replace_all(contents, "");
word_count += contents.split_whitespace().count();
}
Ok(word_count as u64)
}
fn count_words(contents: &str) -> u64 {
fn count_words(contents: &str) -> Result<u64, Box<dyn std::error::Error>> {
let re_titles = Regex::new(r"(?m)^#{1,6} .+")
.unwrap_or_fatal("Failed to compile title regex. Something is very wrong!");
let re_comments = Regex::new(r"(?s-m)<!---?.+-->")
@ -173,5 +182,5 @@ fn count_words(contents: &str) -> u64 {
let contents = &re_comments.replace_all(contents, "");
let contents = &re_images.replace_all(contents, "");
contents.split_whitespace().count() as u64
Ok(contents.split_whitespace().count() as u64)
}