Merge branch 'feat/rayon'

master v1.3.0
Wynd 2025-01-19 00:50:52 +02:00
commit 728ad3ff26
3 changed files with 206 additions and 103 deletions

57
Cargo.lock generated
View File

@ -235,6 +235,40 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "displaydoc"
version = "0.2.5"
@ -367,6 +401,7 @@ dependencies = [
"gix",
"itertools",
"mockd",
"rayon",
]
[[package]]
@ -550,12 +585,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8bfdd4838a8d42bd482c9f0cb526411d003ee94cc7c7b08afe5007329c71d554"
dependencies = [
"crc32fast",
"crossbeam-channel",
"flate2",
"gix-hash",
"gix-trace",
"gix-utils",
"libc",
"once_cell",
"parking_lot",
"prodash",
"sha1_smol",
"thiserror",
@ -1380,6 +1417,26 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "rayon"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "redox_syscall"
version = "0.5.8"

View File

@ -21,11 +21,15 @@ bench = false
unsafe_code = { level = "forbid" }
[dependencies]
gix = { version = "0.70.0", default-features = false, features = ["mailmap"] }
gix = { version = "0.70.0", default-features = false, features = [
"mailmap",
"parallel",
] }
clap = { version = "4.5.26", features = ["derive"] }
chrono = { version = "0.4.39" }
itertools = { version = "0.14.0" }
anyhow = { version = "1.0.95" }
rayon = { version = "1.10.0" }
[dev-dependencies]
divan = { version = "0.1.17" }

View File

@ -2,7 +2,6 @@
use std::{
cmp::Reverse,
collections::HashSet,
path::{self, PathBuf},
sync::OnceLock,
};
@ -13,10 +12,12 @@ use clap::Parser;
use cli::CliArgs;
use gix::{
bstr::ByteSlice, revision::walk::Sorting, traverse::commit::simple::CommitTimeOrder, ObjectId,
ThreadSafeRepository,
};
use heatmap::{ColorLogic, HeatmapColors};
use itertools::Itertools;
use mailmap::Mailmap;
use rayon::prelude::*;
use rgb::Rgb;
pub mod cli;
@ -104,7 +105,7 @@ pub fn get_commits(
start_date: NaiveDate,
end_date: NaiveDate,
) -> anyhow::Result<(usize, usize, Vec<Commit>)> {
let mut commits: HashSet<Commit> = HashSet::new();
let mut commits: Vec<Commit> = vec![];
let ignored_repos = args.ignored_repos.as_ref().unwrap_or(&vec![]).to_owned();
@ -145,145 +146,186 @@ pub fn get_commits(
let end_date = Local.from_local_datetime(&end_date).unwrap();
let authors = args.authors.unwrap_or_default();
let mut repos_count = 0;
let mut branches_count = 0;
let mut repos_count: usize = 0;
let mut branches_count: usize = 0;
for (i, repo_path) in repos.iter().enumerate() {
let repo = gix::open(repo_path).unwrap();
let repo = ThreadSafeRepository::open(repo_path).unwrap();
let branch_names = &*branches[i];
let mut branches = vec![];
if branch_names.is_empty() {
branches = repo
.references()?
.prefixed("refs/heads")?
.filter_map(Result::ok)
.filter_map(|b| {
b.inner
.name
.to_string()
.strip_prefix("refs/heads/")
.map(|s| s.to_string())
})
.collect_vec();
}
else {
let branch_names = branch_names.split(' ').map(|s| s.to_string());
branches.extend(branch_names);
}
let branches = get_repo_branches(&repo, branch_names).unwrap();
let mailmap = Mailmap::new(repo_path);
let mut has_commits = false;
for branch in &branches {
// When passing the default @ (HEAD) branch this might actually not exist at all
// locally so we're skipping it
let Ok(rev) = repo.rev_parse(&**branch)
else {
continue;
};
let branch_commits: Vec<_> = branches
.par_iter()
.filter_map(|branch| get_commit_ids(&repo, branch, start_date))
.reduce(Vec::new, |mut c, n| {
c.extend(n);
c
});
let branch_commits = rev
.single()
.unwrap()
.ancestors()
.sorting(Sorting::ByCommitTimeCutoff {
order: CommitTimeOrder::NewestFirst,
seconds: start_date.timestamp(),
})
.all()?;
let repo = repo.to_thread_local();
branch_commits
.filter_map(|c| c.ok())
.filter_map(|c| c.object().ok())
.filter_map(|c| {
let title = c
.message()
.ok()?
.title
.trim_ascii()
.to_str()
.ok()?
.to_string();
let branch_commits = branch_commits
.into_iter()
.unique()
.filter_map(|c| repo.find_commit(c).ok())
.filter_map(|c| {
let title = c
.message()
.ok()?
.title
.trim_ascii()
.to_str()
.ok()?
.to_string();
if args.no_merges {
let is_merge = c.parent_ids().count() > 1;
if is_merge {
return None;
}
}
let author = c.author().ok()?;
let email = author.email.to_string();
let name = author.name.to_string();
let author = Author { name, email };
let author = mailmap.resolve(author);
if !authors.is_empty() && !authors.contains(&author.name) {
if args.no_merges {
let is_merge = c.parent_ids().count() > 1;
if is_merge {
return None;
}
}
let time = c.time().ok()?;
let time =
DateTime::from_timestamp_millis(time.seconds * 1000)?.with_timezone(&Local);
if time < start_date || time > end_date {
return None;
}
let author = c.author().ok()?;
has_commits = true;
let email = author.email.to_string();
let name = author.name.to_string();
Some(Commit {
id: c.id,
title,
author,
time,
})
let author = Author { name, email };
let author = mailmap.resolve(author);
if !authors.is_empty() && !authors.contains(&author.name) {
return None;
}
let time = c.time().ok()?;
let time =
DateTime::from_timestamp_millis(time.seconds * 1000)?.with_timezone(&Local);
if time < start_date || time > end_date {
return None;
}
Some(Commit {
id: c.id,
title,
author,
time,
})
.for_each(|c| {
commits.insert(c);
});
}
})
.collect_vec();
if has_commits {
if !branch_commits.is_empty() {
repos_count += 1;
branches_count += branches.len();
}
commits.extend(branch_commits);
}
let commits = commits
.into_iter()
.sorted_by_cached_key(|a| Reverse(a.time))
.collect_vec();
commits.par_sort_by_cached_key(|a| Reverse(a.time));
Ok((repos_count, branches_count, commits))
}
fn get_repo_branches(repo: &ThreadSafeRepository, branch_names: &str) -> Option<Vec<String>> {
if branch_names.is_empty() {
let repo = repo.to_thread_local();
let Ok(refs) = repo.references()
else {
return None;
};
let Ok(prefix) = refs.prefixed("refs/heads")
else {
return None;
};
let branches = prefix
.filter_map(Result::ok)
.filter_map(|b| {
b.inner
.name
.to_string()
.strip_prefix("refs/heads/")
.map(|s| s.to_string())
})
.collect();
Some(branches)
}
else {
Some(branch_names.split(' ').map(|s| s.to_string()).collect())
}
}
fn get_commit_ids(
repo: &ThreadSafeRepository,
branch: &str,
start_date: DateTime<Local>,
) -> Option<Vec<ObjectId>> {
let repo = repo.to_thread_local();
// When passing the default @ (HEAD) branch this might actually not exist at all
// locally so we're skipping it
let rev = repo.rev_parse(branch).ok()?;
let branch_commits = rev
.single()
.unwrap()
.ancestors()
.sorting(Sorting::ByCommitTimeCutoff {
order: CommitTimeOrder::NewestFirst,
seconds: start_date.timestamp(),
})
.all()
.ok()?;
let commits = branch_commits
.filter_map(|c| c.ok())
.map(|c| c.id)
.collect();
Some(commits)
}
fn find_git_repos(scan_path: &path::Path, repos: &mut Vec<PathBuf>, ignored_repos: &Vec<String>) {
if let Some(path) = walk_dir(scan_path, ignored_repos) {
repos.extend(path)
}
}
pub fn walk_dir(scan_path: &path::Path, ignored_repos: &Vec<String>) -> Option<Vec<PathBuf>> {
let Ok(dirs) = scan_path.read_dir()
else {
return;
return None;
};
let dirs: Vec<_> = dirs
let dirs: Vec<PathBuf> = dirs
.par_bridge()
.filter_map(|d| d.ok())
.filter(|d| {
let dir_name = d.file_name().to_string_lossy().to_string();
!ignored_repos.contains(&dir_name)
})
.filter(|d| d.file_type().is_ok_and(|t| t.is_dir()))
.collect_vec();
.filter_map(|d| {
let dir = d.path();
let filename = dir.file_name().unwrap_or_default().to_string_lossy();
let dirs = dirs.iter().map(|d| d.path());
match filename.as_ref() {
".git" => Some(vec![dir]),
_ => walk_dir(&dir, ignored_repos),
}
})
.reduce(Vec::new, |mut c, n| {
c.extend(n);
c
});
for dir in dirs {
let filename = dir.file_name().unwrap_or_default().to_string_lossy();
match filename.as_ref() {
".git" => repos.push(dir),
_ => find_git_repos(&dir, repos, ignored_repos),
}
}
Some(dirs)
}
pub fn get_default_until(since: NaiveDate) -> String {