From b1a5df8659ab9f115a7d5262a371273403da02cf Mon Sep 17 00:00:00 2001 From: Wynd Date: Sat, 18 Jan 2025 10:34:03 +0200 Subject: [PATCH 1/3] Rayon testing --- .gitignore | 3 +- src/lib.rs | 395 +++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 325 insertions(+), 73 deletions(-) diff --git a/.gitignore b/.gitignore index c96fa24..e4c67ba 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target -profile.json \ No newline at end of file +profile.json +perf.data* \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 2347e07..a7e9fc6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,7 +3,7 @@ use std::{ cmp::Reverse, collections::HashSet, - path::{self, PathBuf}, + path::{self, Path, PathBuf}, sync::OnceLock, }; @@ -11,10 +11,11 @@ use anyhow::anyhow; use chrono::{DateTime, Duration, Local, NaiveDate, NaiveTime, TimeZone}; use clap::Parser; use cli::CliArgs; -use gix::{bstr::ByteSlice, traverse::commit::simple::Sorting, ObjectId}; +use gix::{bstr::ByteSlice, traverse::commit::simple::Sorting, ObjectId, ThreadSafeRepository}; use heatmap::{ColorLogic, HeatmapColors}; use itertools::Itertools; use mailmap::Mailmap; +use rayon::prelude::*; use rgb::Rgb; pub mod cli; @@ -143,10 +144,15 @@ pub fn get_commits( let end_date = Local.from_local_datetime(&end_date).unwrap(); let authors = args.authors.unwrap_or_default(); - let mut repos_count = 0; - let mut branches_count = 0; + + //NOTE: rayon on branches + let mut repos_count: usize = 0; + let mut branches_count: usize = 0; for (i, repo_path) in repos.iter().enumerate() { + // let repo = ThreadSafeRepository::open(repo_path) + // .unwrap() + // .to_thread_local(); let repo = gix::open(repo_path).unwrap(); let branch_names = &*branches[i]; @@ -171,90 +177,335 @@ pub fn get_commits( } let mailmap = Mailmap::new(repo_path); - let mut has_commits = false; - for branch in &branches { - // When passing the default @ (HEAD) branch this might actually not exist at all - // locally so we're skipping it - let Ok(rev) = repo.rev_parse(&**branch) - else { - continue; - }; + let branch_commits: Vec = branches + .par_iter() + .filter_map(|branch| { + let mut repo_commits: Vec = vec![]; - let branch_commits = rev - .single() - .unwrap() - .ancestors() - .sorting(Sorting::ByCommitTimeNewestFirstCutoffOlderThan { - seconds: start_date.timestamp(), - }) - .all()?; + let repo = ThreadSafeRepository::open(repo_path) + .unwrap() + .to_thread_local(); - branch_commits - .filter_map(|c| c.ok()) - .filter_map(|c| c.object().ok()) - .filter_map(|c| { - let title = c - .message() - .ok()? - .title - .trim_ascii() - .to_str() - .ok()? - .to_string(); + // When passing the default @ (HEAD) branch this might actually not exist at all + // locally so we're skipping it + let rev = repo.rev_parse(&**branch).ok()?; - if args.no_merges { - let is_merge = c.parent_ids().count() > 1; - if is_merge { + let branch_commits = rev + .single() + .unwrap() + .ancestors() + .sorting(Sorting::ByCommitTimeNewestFirstCutoffOlderThan { + seconds: start_date.timestamp(), + }) + .all() + .ok()?; + + branch_commits + .filter_map(|c| c.ok()) + .filter_map(|c| c.object().ok()) + .filter_map(|c| { + let title = c + .message() + .ok()? + .title + .trim_ascii() + .to_str() + .ok()? + .to_string(); + + if args.no_merges { + let is_merge = c.parent_ids().count() > 1; + if is_merge { + return None; + } + } + + let author = c.author().ok()?; + + let email = author.email.to_string(); + let name = author.name.to_string(); + + let author = Author { name, email }; + let author = mailmap.resolve(author); + + if !authors.is_empty() && !authors.contains(&author.name) { return None; } - } - let author = c.author().ok()?; + let time = c.time().ok()?; + let time = DateTime::from_timestamp_millis(time.seconds * 1000)? + .with_timezone(&Local); + if time < start_date || time > end_date { + return None; + } - let email = author.email.to_string(); - let name = author.name.to_string(); - - let author = Author { name, email }; - let author = mailmap.resolve(author); - - if !authors.is_empty() && !authors.contains(&author.name) { - return None; - } - - let time = c.time().ok()?; - let time = - DateTime::from_timestamp_millis(time.seconds * 1000)?.with_timezone(&Local); - if time < start_date || time > end_date { - return None; - } - - has_commits = true; - - Some(Commit { - id: c.id, - title, - author, - time, + Some(Commit { + id: c.id, + title, + author, + time, + }) }) - }) - .for_each(|c| { - commits.insert(c); - }); - } + .for_each(|c| { + repo_commits.push(c); + }); - if has_commits { + Some(repo_commits) + }) + .reduce(Vec::new, |mut c, n| { + c.extend(n); + c + }); + + if !branch_commits.is_empty() { repos_count += 1; branches_count += branches.len(); } + + // for vec in branch_commits { + commits.extend(branch_commits); + // } } - let commits = commits - .into_iter() - .sorted_by_cached_key(|a| Reverse(a.time)) - .collect_vec(); + //NOTE: rayon on the entire repos set - Ok((repos_count, branches_count, commits)) + // let dataset: Vec<(Vec, usize, usize)> = repos + // .par_iter() + // .enumerate() + // .filter_map(|(i, repo_path)| { + // let mut repos_count: usize = 0; + // let mut branches_count: usize = 0; + // let mut repo_commits: Vec = vec![]; + // + // let repo = gix::open(repo_path).unwrap(); + // + // let branch_names = &*branches[i]; + // let mut branches: Vec = vec![]; + // + // if branch_names.is_empty() { + // branches = repo + // .references() + // .ok()? + // .prefixed("refs/heads") + // .ok()? + // .filter_map(Result::ok) + // .filter_map(|b| { + // b.inner + // .name + // .to_string() + // .strip_prefix("refs/heads/") + // .map(|s| s.to_string()) + // }) + // .collect_vec(); + // } + // else { + // let branch_names = branch_names.split(' ').map(|s| s.to_string()); + // branches.extend(branch_names); + // } + // + // let mailmap = Mailmap::new(repo_path); + // let mut has_commits = false; + // + // for branch in &branches { + // // When passing the default @ (HEAD) branch this might actually not exist at all + // // locally so we're skipping it + // let Ok(rev) = repo.rev_parse(&**branch) + // else { + // continue; + // }; + // + // let branch_commits = rev + // .single() + // .unwrap() + // .ancestors() + // .sorting(Sorting::ByCommitTimeNewestFirstCutoffOlderThan { + // seconds: start_date.timestamp(), + // }) + // .all() + // .ok()?; + // + // branch_commits + // .filter_map(|c| c.ok()) + // .filter_map(|c| c.object().ok()) + // .filter_map(|c| { + // let title = c + // .message() + // .ok()? + // .title + // .trim_ascii() + // .to_str() + // .ok()? + // .to_string(); + // + // if args.no_merges { + // let is_merge = c.parent_ids().count() > 1; + // if is_merge { + // return None; + // } + // } + // + // let author = c.author().ok()?; + // + // let email = author.email.to_string(); + // let name = author.name.to_string(); + // + // let author = Author { name, email }; + // let author = mailmap.resolve(author); + // + // if !authors.is_empty() && !authors.contains(&author.name) { + // return None; + // } + // + // let time = c.time().ok()?; + // let time = DateTime::from_timestamp_millis(time.seconds * 1000)? + // .with_timezone(&Local); + // if time < start_date || time > end_date { + // return None; + // } + // + // has_commits = true; + // + // Some(Commit { + // id: c.id, + // title, + // author, + // time, + // }) + // }) + // .for_each(|c| { + // repo_commits.push(c); + // }); + // } + // + // if has_commits { + // repos_count += 1; + // branches_count += branches.len(); + // } + // + // Some((repo_commits, repos_count, branches_count)) + // }) + // .collect(); + // + // let mut repos_count = 0; + // let mut branches_count = 0; + // let mut repo_commits: Vec = vec![]; + + //NOTE: simple for loop + + // let dataset: (usize, &PathBuf) = repos.par_iter().enumerate().collect(); + // + // for (i, repo_path) in dataset { + // let repo = gix::open(repo_path).unwrap(); + // + // let branch_names = &*branches[i]; + // let mut branches = vec![]; + // if branch_names.is_empty() { + // branches = repo + // .references()? + // .prefixed("refs/heads")? + // .filter_map(Result::ok) + // .filter_map(|b| { + // b.inner + // .name + // .to_string() + // .strip_prefix("refs/heads/") + // .map(|s| s.to_string()) + // }) + // .collect_vec(); + // } + // else { + // let branch_names = branch_names.split(' ').map(|s| s.to_string()); + // branches.extend(branch_names); + // } + // + // let mailmap = Mailmap::new(repo_path); + // let mut has_commits = false; + // + // for branch in &branches { + // // When passing the default @ (HEAD) branch this might actually not exist at all + // // locally so we're skipping it + // let Ok(rev) = repo.rev_parse(&**branch) + // else { + // continue; + // }; + // + // let branch_commits = rev + // .single() + // .unwrap() + // .ancestors() + // .sorting(Sorting::ByCommitTimeNewestFirstCutoffOlderThan { + // seconds: start_date.timestamp(), + // }) + // .all()?; + // + // branch_commits + // .filter_map(|c| c.ok()) + // .filter_map(|c| c.object().ok()) + // .filter_map(|c| { + // let title = c + // .message() + // .ok()? + // .title + // .trim_ascii() + // .to_str() + // .ok()? + // .to_string(); + // + // if args.no_merges { + // let is_merge = c.parent_ids().count() > 1; + // if is_merge { + // return None; + // } + // } + // + // let author = c.author().ok()?; + // + // let email = author.email.to_string(); + // let name = author.name.to_string(); + // + // let author = Author { name, email }; + // let author = mailmap.resolve(author); + // + // if !authors.is_empty() && !authors.contains(&author.name) { + // return None; + // } + // + // let time = c.time().ok()?; + // let time = + // DateTime::from_timestamp_millis(time.seconds * 1000)?.with_timezone(&Local); + // if time < start_date || time > end_date { + // return None; + // } + // + // has_commits = true; + // + // Some(Commit { + // id: c.id, + // title, + // author, + // time, + // }) + // }) + // .for_each(|c| { + // commits.insert(c); + // }); + // } + // + // if has_commits { + // repos_count += 1; + // branches_count += branches.len(); + // } + // } + + let mut commits: Vec = commits.into_par_iter().collect::>(); + + commits.par_sort_by_cached_key(|a| Reverse(a.time)); + // .sorted_by_cached_key(|a| Reverse(a.time)) + // .collect_vec(); + + Ok((0, 0, commits)) + // Ok((repos_count, branches_count, commits)) } fn find_git_repos(scan_path: &path::Path, repos: &mut Vec, ignored_repos: &Vec) { From 54d36fafcbf06c6c48a69ecfc367d59826881951 Mon Sep 17 00:00:00 2001 From: Wynd Date: Sat, 18 Jan 2025 21:13:26 +0200 Subject: [PATCH 2/3] Using rayon for discovering repos via dir walking --- src/lib.rs | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a7e9fc6..778c213 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,7 +3,7 @@ use std::{ cmp::Reverse, collections::HashSet, - path::{self, Path, PathBuf}, + path::{self, PathBuf}, sync::OnceLock, }; @@ -504,34 +504,44 @@ pub fn get_commits( // .sorted_by_cached_key(|a| Reverse(a.time)) // .collect_vec(); - Ok((0, 0, commits)) - // Ok((repos_count, branches_count, commits)) + Ok((repos_count, branches_count, commits)) } fn find_git_repos(scan_path: &path::Path, repos: &mut Vec, ignored_repos: &Vec) { + if let Some(path) = walk_dir(scan_path, ignored_repos) { + repos.extend(path) + } +} + +pub fn walk_dir(scan_path: &path::Path, ignored_repos: &Vec) -> Option> { let Ok(dirs) = scan_path.read_dir() else { - return; + return None; }; - let dirs: Vec<_> = dirs + let dirs: Vec = dirs + .par_bridge() .filter_map(|d| d.ok()) .filter(|d| { let dir_name = d.file_name().to_string_lossy().to_string(); !ignored_repos.contains(&dir_name) }) .filter(|d| d.file_type().is_ok_and(|t| t.is_dir())) - .collect_vec(); + .filter_map(|d| { + let dir = d.path(); + let filename = dir.file_name().unwrap_or_default().to_string_lossy(); - let dirs = dirs.iter().map(|d| d.path()); + match filename.as_ref() { + ".git" => Some(vec![dir]), + _ => walk_dir(&dir, ignored_repos), + } + }) + .reduce(Vec::new, |mut c, n| { + c.extend(n); + c + }); - for dir in dirs { - let filename = dir.file_name().unwrap_or_default().to_string_lossy(); - match filename.as_ref() { - ".git" => repos.push(dir), - _ => find_git_repos(&dir, repos, ignored_repos), - } - } + Some(dirs) } pub fn get_default_until(since: NaiveDate) -> String { From 21e0247d7bf4803c5f90f7c08e275c53fee35ab6 Mon Sep 17 00:00:00 2001 From: Wynd Date: Sun, 19 Jan 2025 00:15:43 +0200 Subject: [PATCH 3/3] Some cleanup and parallel gix feature --- Cargo.lock | 11 ++ Cargo.toml | 5 +- src/lib.rs | 452 ++++++++++++++--------------------------------------- 3 files changed, 132 insertions(+), 336 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c27f429..f3211ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -234,6 +234,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -536,12 +545,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac7045ac9fe5f9c727f38799d002a7ed3583cd777e3322a7c4b43e3cf437dc69" dependencies = [ "crc32fast", + "crossbeam-channel", "flate2", "gix-hash", "gix-trace", "gix-utils", "libc", "once_cell", + "parking_lot", "prodash", "sha1_smol", "thiserror", diff --git a/Cargo.toml b/Cargo.toml index 2ea07e2..f703372 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,10 @@ bench = false unsafe_code = { level = "forbid" } [dependencies] -gix = { version = "0.66.0", default-features = false, features = ["mailmap"] } +gix = { version = "0.66.0", default-features = false, features = [ + "mailmap", + "parallel", +] } clap = { version = "4.5.20", features = ["derive"] } chrono = { version = "0.4.38" } itertools = { version = "0.13.0" } diff --git a/src/lib.rs b/src/lib.rs index 778c213..e434745 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,7 +2,6 @@ use std::{ cmp::Reverse, - collections::HashSet, path::{self, PathBuf}, sync::OnceLock, }; @@ -103,7 +102,7 @@ pub fn get_commits( start_date: NaiveDate, end_date: NaiveDate, ) -> anyhow::Result<(usize, usize, Vec)> { - let mut commits: HashSet = HashSet::new(); + let mut commits: Vec = vec![]; let ignored_repos = args.ignored_repos.as_ref().unwrap_or(&vec![]).to_owned(); @@ -150,363 +149,146 @@ pub fn get_commits( let mut branches_count: usize = 0; for (i, repo_path) in repos.iter().enumerate() { - // let repo = ThreadSafeRepository::open(repo_path) - // .unwrap() - // .to_thread_local(); - let repo = gix::open(repo_path).unwrap(); + let repo = ThreadSafeRepository::open(repo_path).unwrap(); + // let repo = gix::open(repo_path).unwrap(); let branch_names = &*branches[i]; - let mut branches = vec![]; - if branch_names.is_empty() { - branches = repo - .references()? - .prefixed("refs/heads")? - .filter_map(Result::ok) - .filter_map(|b| { - b.inner - .name - .to_string() - .strip_prefix("refs/heads/") - .map(|s| s.to_string()) - }) - .collect_vec(); - } - else { - let branch_names = branch_names.split(' ').map(|s| s.to_string()); - branches.extend(branch_names); - } + let branches = get_repo_branches(&repo, branch_names).unwrap(); let mailmap = Mailmap::new(repo_path); - let branch_commits: Vec = branches + let branch_commits: Vec<_> = branches .par_iter() - .filter_map(|branch| { - let mut repo_commits: Vec = vec![]; - - let repo = ThreadSafeRepository::open(repo_path) - .unwrap() - .to_thread_local(); - - // When passing the default @ (HEAD) branch this might actually not exist at all - // locally so we're skipping it - let rev = repo.rev_parse(&**branch).ok()?; - - let branch_commits = rev - .single() - .unwrap() - .ancestors() - .sorting(Sorting::ByCommitTimeNewestFirstCutoffOlderThan { - seconds: start_date.timestamp(), - }) - .all() - .ok()?; - - branch_commits - .filter_map(|c| c.ok()) - .filter_map(|c| c.object().ok()) - .filter_map(|c| { - let title = c - .message() - .ok()? - .title - .trim_ascii() - .to_str() - .ok()? - .to_string(); - - if args.no_merges { - let is_merge = c.parent_ids().count() > 1; - if is_merge { - return None; - } - } - - let author = c.author().ok()?; - - let email = author.email.to_string(); - let name = author.name.to_string(); - - let author = Author { name, email }; - let author = mailmap.resolve(author); - - if !authors.is_empty() && !authors.contains(&author.name) { - return None; - } - - let time = c.time().ok()?; - let time = DateTime::from_timestamp_millis(time.seconds * 1000)? - .with_timezone(&Local); - if time < start_date || time > end_date { - return None; - } - - Some(Commit { - id: c.id, - title, - author, - time, - }) - }) - .for_each(|c| { - repo_commits.push(c); - }); - - Some(repo_commits) - }) + .filter_map(|branch| get_commit_ids(&repo, branch, start_date)) .reduce(Vec::new, |mut c, n| { c.extend(n); c }); + let repo = repo.to_thread_local(); + + let branch_commits = branch_commits + .into_iter() + .unique() + .filter_map(|c| repo.find_commit(c).ok()) + .filter_map(|c| { + let title = c + .message() + .ok()? + .title + .trim_ascii() + .to_str() + .ok()? + .to_string(); + + if args.no_merges { + let is_merge = c.parent_ids().count() > 1; + if is_merge { + return None; + } + } + + let author = c.author().ok()?; + + let email = author.email.to_string(); + let name = author.name.to_string(); + + let author = Author { name, email }; + let author = mailmap.resolve(author); + + if !authors.is_empty() && !authors.contains(&author.name) { + return None; + } + + let time = c.time().ok()?; + let time = + DateTime::from_timestamp_millis(time.seconds * 1000)?.with_timezone(&Local); + if time < start_date || time > end_date { + return None; + } + + Some(Commit { + id: c.id, + title, + author, + time, + }) + }) + .collect_vec(); + if !branch_commits.is_empty() { repos_count += 1; branches_count += branches.len(); } - // for vec in branch_commits { commits.extend(branch_commits); - // } } - //NOTE: rayon on the entire repos set - - // let dataset: Vec<(Vec, usize, usize)> = repos - // .par_iter() - // .enumerate() - // .filter_map(|(i, repo_path)| { - // let mut repos_count: usize = 0; - // let mut branches_count: usize = 0; - // let mut repo_commits: Vec = vec![]; - // - // let repo = gix::open(repo_path).unwrap(); - // - // let branch_names = &*branches[i]; - // let mut branches: Vec = vec![]; - // - // if branch_names.is_empty() { - // branches = repo - // .references() - // .ok()? - // .prefixed("refs/heads") - // .ok()? - // .filter_map(Result::ok) - // .filter_map(|b| { - // b.inner - // .name - // .to_string() - // .strip_prefix("refs/heads/") - // .map(|s| s.to_string()) - // }) - // .collect_vec(); - // } - // else { - // let branch_names = branch_names.split(' ').map(|s| s.to_string()); - // branches.extend(branch_names); - // } - // - // let mailmap = Mailmap::new(repo_path); - // let mut has_commits = false; - // - // for branch in &branches { - // // When passing the default @ (HEAD) branch this might actually not exist at all - // // locally so we're skipping it - // let Ok(rev) = repo.rev_parse(&**branch) - // else { - // continue; - // }; - // - // let branch_commits = rev - // .single() - // .unwrap() - // .ancestors() - // .sorting(Sorting::ByCommitTimeNewestFirstCutoffOlderThan { - // seconds: start_date.timestamp(), - // }) - // .all() - // .ok()?; - // - // branch_commits - // .filter_map(|c| c.ok()) - // .filter_map(|c| c.object().ok()) - // .filter_map(|c| { - // let title = c - // .message() - // .ok()? - // .title - // .trim_ascii() - // .to_str() - // .ok()? - // .to_string(); - // - // if args.no_merges { - // let is_merge = c.parent_ids().count() > 1; - // if is_merge { - // return None; - // } - // } - // - // let author = c.author().ok()?; - // - // let email = author.email.to_string(); - // let name = author.name.to_string(); - // - // let author = Author { name, email }; - // let author = mailmap.resolve(author); - // - // if !authors.is_empty() && !authors.contains(&author.name) { - // return None; - // } - // - // let time = c.time().ok()?; - // let time = DateTime::from_timestamp_millis(time.seconds * 1000)? - // .with_timezone(&Local); - // if time < start_date || time > end_date { - // return None; - // } - // - // has_commits = true; - // - // Some(Commit { - // id: c.id, - // title, - // author, - // time, - // }) - // }) - // .for_each(|c| { - // repo_commits.push(c); - // }); - // } - // - // if has_commits { - // repos_count += 1; - // branches_count += branches.len(); - // } - // - // Some((repo_commits, repos_count, branches_count)) - // }) - // .collect(); - // - // let mut repos_count = 0; - // let mut branches_count = 0; - // let mut repo_commits: Vec = vec![]; - - //NOTE: simple for loop - - // let dataset: (usize, &PathBuf) = repos.par_iter().enumerate().collect(); - // - // for (i, repo_path) in dataset { - // let repo = gix::open(repo_path).unwrap(); - // - // let branch_names = &*branches[i]; - // let mut branches = vec![]; - // if branch_names.is_empty() { - // branches = repo - // .references()? - // .prefixed("refs/heads")? - // .filter_map(Result::ok) - // .filter_map(|b| { - // b.inner - // .name - // .to_string() - // .strip_prefix("refs/heads/") - // .map(|s| s.to_string()) - // }) - // .collect_vec(); - // } - // else { - // let branch_names = branch_names.split(' ').map(|s| s.to_string()); - // branches.extend(branch_names); - // } - // - // let mailmap = Mailmap::new(repo_path); - // let mut has_commits = false; - // - // for branch in &branches { - // // When passing the default @ (HEAD) branch this might actually not exist at all - // // locally so we're skipping it - // let Ok(rev) = repo.rev_parse(&**branch) - // else { - // continue; - // }; - // - // let branch_commits = rev - // .single() - // .unwrap() - // .ancestors() - // .sorting(Sorting::ByCommitTimeNewestFirstCutoffOlderThan { - // seconds: start_date.timestamp(), - // }) - // .all()?; - // - // branch_commits - // .filter_map(|c| c.ok()) - // .filter_map(|c| c.object().ok()) - // .filter_map(|c| { - // let title = c - // .message() - // .ok()? - // .title - // .trim_ascii() - // .to_str() - // .ok()? - // .to_string(); - // - // if args.no_merges { - // let is_merge = c.parent_ids().count() > 1; - // if is_merge { - // return None; - // } - // } - // - // let author = c.author().ok()?; - // - // let email = author.email.to_string(); - // let name = author.name.to_string(); - // - // let author = Author { name, email }; - // let author = mailmap.resolve(author); - // - // if !authors.is_empty() && !authors.contains(&author.name) { - // return None; - // } - // - // let time = c.time().ok()?; - // let time = - // DateTime::from_timestamp_millis(time.seconds * 1000)?.with_timezone(&Local); - // if time < start_date || time > end_date { - // return None; - // } - // - // has_commits = true; - // - // Some(Commit { - // id: c.id, - // title, - // author, - // time, - // }) - // }) - // .for_each(|c| { - // commits.insert(c); - // }); - // } - // - // if has_commits { - // repos_count += 1; - // branches_count += branches.len(); - // } - // } - - let mut commits: Vec = commits.into_par_iter().collect::>(); - commits.par_sort_by_cached_key(|a| Reverse(a.time)); - // .sorted_by_cached_key(|a| Reverse(a.time)) - // .collect_vec(); Ok((repos_count, branches_count, commits)) } +fn get_repo_branches(repo: &ThreadSafeRepository, branch_names: &str) -> Option> { + if branch_names.is_empty() { + let repo = repo.to_thread_local(); + let Ok(refs) = repo.references() + else { + return None; + }; + + let Ok(prefix) = refs.prefixed("refs/heads") + else { + return None; + }; + + let branches = prefix + .filter_map(Result::ok) + .filter_map(|b| { + b.inner + .name + .to_string() + .strip_prefix("refs/heads/") + .map(|s| s.to_string()) + }) + .collect(); + + Some(branches) + } + else { + Some(branch_names.split(' ').map(|s| s.to_string()).collect()) + } +} + +fn get_commit_ids( + repo: &ThreadSafeRepository, + branch: &str, + start_date: DateTime, +) -> Option> { + let repo = repo.to_thread_local(); + + // When passing the default @ (HEAD) branch this might actually not exist at all + // locally so we're skipping it + let rev = repo.rev_parse(branch).ok()?; + + let branch_commits = rev + .single() + .unwrap() + .ancestors() + .sorting(Sorting::ByCommitTimeNewestFirstCutoffOlderThan { + seconds: start_date.timestamp(), + }) + .all() + .ok()?; + + let commits = branch_commits + .filter_map(|c| c.ok()) + .map(|c| c.id) + .collect(); + + Some(commits) +} + fn find_git_repos(scan_path: &path::Path, repos: &mut Vec, ignored_repos: &Vec) { if let Some(path) = walk_dir(scan_path, ignored_repos) { repos.extend(path)