diff --git a/Cargo.lock b/Cargo.lock index 80b8848..f2b8275 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -235,6 +235,40 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + [[package]] name = "displaydoc" version = "0.2.5" @@ -367,6 +401,7 @@ dependencies = [ "gix", "itertools", "mockd", + "rayon", ] [[package]] @@ -550,12 +585,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bfdd4838a8d42bd482c9f0cb526411d003ee94cc7c7b08afe5007329c71d554" dependencies = [ "crc32fast", + "crossbeam-channel", "flate2", "gix-hash", "gix-trace", "gix-utils", "libc", "once_cell", + "parking_lot", "prodash", "sha1_smol", "thiserror", @@ -1380,6 +1417,26 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.8" diff --git a/Cargo.toml b/Cargo.toml index 3349d59..303f99a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,11 +21,15 @@ bench = false unsafe_code = { level = "forbid" } [dependencies] -gix = { version = "0.70.0", default-features = false, features = ["mailmap"] } +gix = { version = "0.70.0", default-features = false, features = [ + "mailmap", + "parallel", +] } clap = { version = "4.5.26", features = ["derive"] } chrono = { version = "0.4.39" } itertools = { version = "0.14.0" } anyhow = { version = "1.0.95" } +rayon = { version = "1.10.0" } [dev-dependencies] divan = { version = "0.1.17" } diff --git a/src/lib.rs b/src/lib.rs index 332b19c..c1dc382 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,7 +2,6 @@ use std::{ cmp::Reverse, - collections::HashSet, path::{self, PathBuf}, sync::OnceLock, }; @@ -13,10 +12,12 @@ use clap::Parser; use cli::CliArgs; use gix::{ bstr::ByteSlice, revision::walk::Sorting, traverse::commit::simple::CommitTimeOrder, ObjectId, + ThreadSafeRepository, }; use heatmap::{ColorLogic, HeatmapColors}; use itertools::Itertools; use mailmap::Mailmap; +use rayon::prelude::*; use rgb::Rgb; pub mod cli; @@ -104,7 +105,7 @@ pub fn get_commits( start_date: NaiveDate, end_date: NaiveDate, ) -> anyhow::Result<(usize, usize, Vec)> { - let mut commits: HashSet = HashSet::new(); + let mut commits: Vec = vec![]; let ignored_repos = args.ignored_repos.as_ref().unwrap_or(&vec![]).to_owned(); @@ -145,145 +146,186 @@ pub fn get_commits( let end_date = Local.from_local_datetime(&end_date).unwrap(); let authors = args.authors.unwrap_or_default(); - let mut repos_count = 0; - let mut branches_count = 0; + + let mut repos_count: usize = 0; + let mut branches_count: usize = 0; for (i, repo_path) in repos.iter().enumerate() { - let repo = gix::open(repo_path).unwrap(); + let repo = ThreadSafeRepository::open(repo_path).unwrap(); let branch_names = &*branches[i]; - let mut branches = vec![]; - if branch_names.is_empty() { - branches = repo - .references()? - .prefixed("refs/heads")? - .filter_map(Result::ok) - .filter_map(|b| { - b.inner - .name - .to_string() - .strip_prefix("refs/heads/") - .map(|s| s.to_string()) - }) - .collect_vec(); - } - else { - let branch_names = branch_names.split(' ').map(|s| s.to_string()); - branches.extend(branch_names); - } + let branches = get_repo_branches(&repo, branch_names).unwrap(); let mailmap = Mailmap::new(repo_path); - let mut has_commits = false; - for branch in &branches { - // When passing the default @ (HEAD) branch this might actually not exist at all - // locally so we're skipping it - let Ok(rev) = repo.rev_parse(&**branch) - else { - continue; - }; + let branch_commits: Vec<_> = branches + .par_iter() + .filter_map(|branch| get_commit_ids(&repo, branch, start_date)) + .reduce(Vec::new, |mut c, n| { + c.extend(n); + c + }); - let branch_commits = rev - .single() - .unwrap() - .ancestors() - .sorting(Sorting::ByCommitTimeCutoff { - order: CommitTimeOrder::NewestFirst, - seconds: start_date.timestamp(), - }) - .all()?; + let repo = repo.to_thread_local(); - branch_commits - .filter_map(|c| c.ok()) - .filter_map(|c| c.object().ok()) - .filter_map(|c| { - let title = c - .message() - .ok()? - .title - .trim_ascii() - .to_str() - .ok()? - .to_string(); + let branch_commits = branch_commits + .into_iter() + .unique() + .filter_map(|c| repo.find_commit(c).ok()) + .filter_map(|c| { + let title = c + .message() + .ok()? + .title + .trim_ascii() + .to_str() + .ok()? + .to_string(); - if args.no_merges { - let is_merge = c.parent_ids().count() > 1; - if is_merge { - return None; - } - } - - let author = c.author().ok()?; - - let email = author.email.to_string(); - let name = author.name.to_string(); - - let author = Author { name, email }; - let author = mailmap.resolve(author); - - if !authors.is_empty() && !authors.contains(&author.name) { + if args.no_merges { + let is_merge = c.parent_ids().count() > 1; + if is_merge { return None; } + } - let time = c.time().ok()?; - let time = - DateTime::from_timestamp_millis(time.seconds * 1000)?.with_timezone(&Local); - if time < start_date || time > end_date { - return None; - } + let author = c.author().ok()?; - has_commits = true; + let email = author.email.to_string(); + let name = author.name.to_string(); - Some(Commit { - id: c.id, - title, - author, - time, - }) + let author = Author { name, email }; + let author = mailmap.resolve(author); + + if !authors.is_empty() && !authors.contains(&author.name) { + return None; + } + + let time = c.time().ok()?; + let time = + DateTime::from_timestamp_millis(time.seconds * 1000)?.with_timezone(&Local); + if time < start_date || time > end_date { + return None; + } + + Some(Commit { + id: c.id, + title, + author, + time, }) - .for_each(|c| { - commits.insert(c); - }); - } + }) + .collect_vec(); - if has_commits { + if !branch_commits.is_empty() { repos_count += 1; branches_count += branches.len(); } + + commits.extend(branch_commits); } - let commits = commits - .into_iter() - .sorted_by_cached_key(|a| Reverse(a.time)) - .collect_vec(); + commits.par_sort_by_cached_key(|a| Reverse(a.time)); Ok((repos_count, branches_count, commits)) } +fn get_repo_branches(repo: &ThreadSafeRepository, branch_names: &str) -> Option> { + if branch_names.is_empty() { + let repo = repo.to_thread_local(); + let Ok(refs) = repo.references() + else { + return None; + }; + + let Ok(prefix) = refs.prefixed("refs/heads") + else { + return None; + }; + + let branches = prefix + .filter_map(Result::ok) + .filter_map(|b| { + b.inner + .name + .to_string() + .strip_prefix("refs/heads/") + .map(|s| s.to_string()) + }) + .collect(); + + Some(branches) + } + else { + Some(branch_names.split(' ').map(|s| s.to_string()).collect()) + } +} + +fn get_commit_ids( + repo: &ThreadSafeRepository, + branch: &str, + start_date: DateTime, +) -> Option> { + let repo = repo.to_thread_local(); + + // When passing the default @ (HEAD) branch this might actually not exist at all + // locally so we're skipping it + let rev = repo.rev_parse(branch).ok()?; + + let branch_commits = rev + .single() + .unwrap() + .ancestors() + .sorting(Sorting::ByCommitTimeCutoff { + order: CommitTimeOrder::NewestFirst, + seconds: start_date.timestamp(), + }) + .all() + .ok()?; + + let commits = branch_commits + .filter_map(|c| c.ok()) + .map(|c| c.id) + .collect(); + + Some(commits) +} + fn find_git_repos(scan_path: &path::Path, repos: &mut Vec, ignored_repos: &Vec) { + if let Some(path) = walk_dir(scan_path, ignored_repos) { + repos.extend(path) + } +} + +pub fn walk_dir(scan_path: &path::Path, ignored_repos: &Vec) -> Option> { let Ok(dirs) = scan_path.read_dir() else { - return; + return None; }; - let dirs: Vec<_> = dirs + let dirs: Vec = dirs + .par_bridge() .filter_map(|d| d.ok()) .filter(|d| { let dir_name = d.file_name().to_string_lossy().to_string(); !ignored_repos.contains(&dir_name) }) .filter(|d| d.file_type().is_ok_and(|t| t.is_dir())) - .collect_vec(); + .filter_map(|d| { + let dir = d.path(); + let filename = dir.file_name().unwrap_or_default().to_string_lossy(); - let dirs = dirs.iter().map(|d| d.path()); + match filename.as_ref() { + ".git" => Some(vec![dir]), + _ => walk_dir(&dir, ignored_repos), + } + }) + .reduce(Vec::new, |mut c, n| { + c.extend(n); + c + }); - for dir in dirs { - let filename = dir.file_name().unwrap_or_default().to_string_lossy(); - match filename.as_ref() { - ".git" => repos.push(dir), - _ => find_git_repos(&dir, repos, ignored_repos), - } - } + Some(dirs) } pub fn get_default_until(since: NaiveDate) -> String {