Merge branch 'feat/rayon'

master v1.3.0
Wynd 2025-01-19 00:50:52 +02:00
commit 728ad3ff26
3 changed files with 206 additions and 103 deletions

57
Cargo.lock generated
View File

@ -235,6 +235,40 @@ dependencies = [
"cfg-if", "cfg-if",
] ]
[[package]]
name = "crossbeam-channel"
version = "0.5.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]] [[package]]
name = "displaydoc" name = "displaydoc"
version = "0.2.5" version = "0.2.5"
@ -367,6 +401,7 @@ dependencies = [
"gix", "gix",
"itertools", "itertools",
"mockd", "mockd",
"rayon",
] ]
[[package]] [[package]]
@ -550,12 +585,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8bfdd4838a8d42bd482c9f0cb526411d003ee94cc7c7b08afe5007329c71d554" checksum = "8bfdd4838a8d42bd482c9f0cb526411d003ee94cc7c7b08afe5007329c71d554"
dependencies = [ dependencies = [
"crc32fast", "crc32fast",
"crossbeam-channel",
"flate2", "flate2",
"gix-hash", "gix-hash",
"gix-trace", "gix-trace",
"gix-utils", "gix-utils",
"libc", "libc",
"once_cell", "once_cell",
"parking_lot",
"prodash", "prodash",
"sha1_smol", "sha1_smol",
"thiserror", "thiserror",
@ -1380,6 +1417,26 @@ dependencies = [
"proc-macro2", "proc-macro2",
] ]
[[package]]
name = "rayon"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.5.8" version = "0.5.8"

View File

@ -21,11 +21,15 @@ bench = false
unsafe_code = { level = "forbid" } unsafe_code = { level = "forbid" }
[dependencies] [dependencies]
gix = { version = "0.70.0", default-features = false, features = ["mailmap"] } gix = { version = "0.70.0", default-features = false, features = [
"mailmap",
"parallel",
] }
clap = { version = "4.5.26", features = ["derive"] } clap = { version = "4.5.26", features = ["derive"] }
chrono = { version = "0.4.39" } chrono = { version = "0.4.39" }
itertools = { version = "0.14.0" } itertools = { version = "0.14.0" }
anyhow = { version = "1.0.95" } anyhow = { version = "1.0.95" }
rayon = { version = "1.10.0" }
[dev-dependencies] [dev-dependencies]
divan = { version = "0.1.17" } divan = { version = "0.1.17" }

View File

@ -2,7 +2,6 @@
use std::{ use std::{
cmp::Reverse, cmp::Reverse,
collections::HashSet,
path::{self, PathBuf}, path::{self, PathBuf},
sync::OnceLock, sync::OnceLock,
}; };
@ -13,10 +12,12 @@ use clap::Parser;
use cli::CliArgs; use cli::CliArgs;
use gix::{ use gix::{
bstr::ByteSlice, revision::walk::Sorting, traverse::commit::simple::CommitTimeOrder, ObjectId, bstr::ByteSlice, revision::walk::Sorting, traverse::commit::simple::CommitTimeOrder, ObjectId,
ThreadSafeRepository,
}; };
use heatmap::{ColorLogic, HeatmapColors}; use heatmap::{ColorLogic, HeatmapColors};
use itertools::Itertools; use itertools::Itertools;
use mailmap::Mailmap; use mailmap::Mailmap;
use rayon::prelude::*;
use rgb::Rgb; use rgb::Rgb;
pub mod cli; pub mod cli;
@ -104,7 +105,7 @@ pub fn get_commits(
start_date: NaiveDate, start_date: NaiveDate,
end_date: NaiveDate, end_date: NaiveDate,
) -> anyhow::Result<(usize, usize, Vec<Commit>)> { ) -> anyhow::Result<(usize, usize, Vec<Commit>)> {
let mut commits: HashSet<Commit> = HashSet::new(); let mut commits: Vec<Commit> = vec![];
let ignored_repos = args.ignored_repos.as_ref().unwrap_or(&vec![]).to_owned(); let ignored_repos = args.ignored_repos.as_ref().unwrap_or(&vec![]).to_owned();
@ -145,145 +146,186 @@ pub fn get_commits(
let end_date = Local.from_local_datetime(&end_date).unwrap(); let end_date = Local.from_local_datetime(&end_date).unwrap();
let authors = args.authors.unwrap_or_default(); let authors = args.authors.unwrap_or_default();
let mut repos_count = 0;
let mut branches_count = 0; let mut repos_count: usize = 0;
let mut branches_count: usize = 0;
for (i, repo_path) in repos.iter().enumerate() { for (i, repo_path) in repos.iter().enumerate() {
let repo = gix::open(repo_path).unwrap(); let repo = ThreadSafeRepository::open(repo_path).unwrap();
let branch_names = &*branches[i]; let branch_names = &*branches[i];
let mut branches = vec![]; let branches = get_repo_branches(&repo, branch_names).unwrap();
if branch_names.is_empty() {
branches = repo
.references()?
.prefixed("refs/heads")?
.filter_map(Result::ok)
.filter_map(|b| {
b.inner
.name
.to_string()
.strip_prefix("refs/heads/")
.map(|s| s.to_string())
})
.collect_vec();
}
else {
let branch_names = branch_names.split(' ').map(|s| s.to_string());
branches.extend(branch_names);
}
let mailmap = Mailmap::new(repo_path); let mailmap = Mailmap::new(repo_path);
let mut has_commits = false;
for branch in &branches { let branch_commits: Vec<_> = branches
// When passing the default @ (HEAD) branch this might actually not exist at all .par_iter()
// locally so we're skipping it .filter_map(|branch| get_commit_ids(&repo, branch, start_date))
let Ok(rev) = repo.rev_parse(&**branch) .reduce(Vec::new, |mut c, n| {
else { c.extend(n);
continue; c
}; });
let branch_commits = rev let repo = repo.to_thread_local();
.single()
.unwrap()
.ancestors()
.sorting(Sorting::ByCommitTimeCutoff {
order: CommitTimeOrder::NewestFirst,
seconds: start_date.timestamp(),
})
.all()?;
branch_commits let branch_commits = branch_commits
.filter_map(|c| c.ok()) .into_iter()
.filter_map(|c| c.object().ok()) .unique()
.filter_map(|c| { .filter_map(|c| repo.find_commit(c).ok())
let title = c .filter_map(|c| {
.message() let title = c
.ok()? .message()
.title .ok()?
.trim_ascii() .title
.to_str() .trim_ascii()
.ok()? .to_str()
.to_string(); .ok()?
.to_string();
if args.no_merges { if args.no_merges {
let is_merge = c.parent_ids().count() > 1; let is_merge = c.parent_ids().count() > 1;
if is_merge { if is_merge {
return None;
}
}
let author = c.author().ok()?;
let email = author.email.to_string();
let name = author.name.to_string();
let author = Author { name, email };
let author = mailmap.resolve(author);
if !authors.is_empty() && !authors.contains(&author.name) {
return None; return None;
} }
}
let time = c.time().ok()?; let author = c.author().ok()?;
let time =
DateTime::from_timestamp_millis(time.seconds * 1000)?.with_timezone(&Local);
if time < start_date || time > end_date {
return None;
}
has_commits = true; let email = author.email.to_string();
let name = author.name.to_string();
Some(Commit { let author = Author { name, email };
id: c.id, let author = mailmap.resolve(author);
title,
author, if !authors.is_empty() && !authors.contains(&author.name) {
time, return None;
}) }
let time = c.time().ok()?;
let time =
DateTime::from_timestamp_millis(time.seconds * 1000)?.with_timezone(&Local);
if time < start_date || time > end_date {
return None;
}
Some(Commit {
id: c.id,
title,
author,
time,
}) })
.for_each(|c| { })
commits.insert(c); .collect_vec();
});
}
if has_commits { if !branch_commits.is_empty() {
repos_count += 1; repos_count += 1;
branches_count += branches.len(); branches_count += branches.len();
} }
commits.extend(branch_commits);
} }
let commits = commits commits.par_sort_by_cached_key(|a| Reverse(a.time));
.into_iter()
.sorted_by_cached_key(|a| Reverse(a.time))
.collect_vec();
Ok((repos_count, branches_count, commits)) Ok((repos_count, branches_count, commits))
} }
fn get_repo_branches(repo: &ThreadSafeRepository, branch_names: &str) -> Option<Vec<String>> {
if branch_names.is_empty() {
let repo = repo.to_thread_local();
let Ok(refs) = repo.references()
else {
return None;
};
let Ok(prefix) = refs.prefixed("refs/heads")
else {
return None;
};
let branches = prefix
.filter_map(Result::ok)
.filter_map(|b| {
b.inner
.name
.to_string()
.strip_prefix("refs/heads/")
.map(|s| s.to_string())
})
.collect();
Some(branches)
}
else {
Some(branch_names.split(' ').map(|s| s.to_string()).collect())
}
}
fn get_commit_ids(
repo: &ThreadSafeRepository,
branch: &str,
start_date: DateTime<Local>,
) -> Option<Vec<ObjectId>> {
let repo = repo.to_thread_local();
// When passing the default @ (HEAD) branch this might actually not exist at all
// locally so we're skipping it
let rev = repo.rev_parse(branch).ok()?;
let branch_commits = rev
.single()
.unwrap()
.ancestors()
.sorting(Sorting::ByCommitTimeCutoff {
order: CommitTimeOrder::NewestFirst,
seconds: start_date.timestamp(),
})
.all()
.ok()?;
let commits = branch_commits
.filter_map(|c| c.ok())
.map(|c| c.id)
.collect();
Some(commits)
}
fn find_git_repos(scan_path: &path::Path, repos: &mut Vec<PathBuf>, ignored_repos: &Vec<String>) { fn find_git_repos(scan_path: &path::Path, repos: &mut Vec<PathBuf>, ignored_repos: &Vec<String>) {
if let Some(path) = walk_dir(scan_path, ignored_repos) {
repos.extend(path)
}
}
pub fn walk_dir(scan_path: &path::Path, ignored_repos: &Vec<String>) -> Option<Vec<PathBuf>> {
let Ok(dirs) = scan_path.read_dir() let Ok(dirs) = scan_path.read_dir()
else { else {
return; return None;
}; };
let dirs: Vec<_> = dirs let dirs: Vec<PathBuf> = dirs
.par_bridge()
.filter_map(|d| d.ok()) .filter_map(|d| d.ok())
.filter(|d| { .filter(|d| {
let dir_name = d.file_name().to_string_lossy().to_string(); let dir_name = d.file_name().to_string_lossy().to_string();
!ignored_repos.contains(&dir_name) !ignored_repos.contains(&dir_name)
}) })
.filter(|d| d.file_type().is_ok_and(|t| t.is_dir())) .filter(|d| d.file_type().is_ok_and(|t| t.is_dir()))
.collect_vec(); .filter_map(|d| {
let dir = d.path();
let filename = dir.file_name().unwrap_or_default().to_string_lossy();
let dirs = dirs.iter().map(|d| d.path()); match filename.as_ref() {
".git" => Some(vec![dir]),
_ => walk_dir(&dir, ignored_repos),
}
})
.reduce(Vec::new, |mut c, n| {
c.extend(n);
c
});
for dir in dirs { Some(dirs)
let filename = dir.file_name().unwrap_or_default().to_string_lossy();
match filename.as_ref() {
".git" => repos.push(dir),
_ => find_git_repos(&dir, repos, ignored_repos),
}
}
} }
pub fn get_default_until(since: NaiveDate) -> String { pub fn get_default_until(since: NaiveDate) -> String {