From b1a5df8659ab9f115a7d5262a371273403da02cf Mon Sep 17 00:00:00 2001 From: Wynd Date: Sat, 18 Jan 2025 10:34:03 +0200 Subject: [PATCH] Rayon testing --- .gitignore | 3 +- src/lib.rs | 395 +++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 325 insertions(+), 73 deletions(-) diff --git a/.gitignore b/.gitignore index c96fa24..e4c67ba 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target -profile.json \ No newline at end of file +profile.json +perf.data* \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 2347e07..a7e9fc6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,7 +3,7 @@ use std::{ cmp::Reverse, collections::HashSet, - path::{self, PathBuf}, + path::{self, Path, PathBuf}, sync::OnceLock, }; @@ -11,10 +11,11 @@ use anyhow::anyhow; use chrono::{DateTime, Duration, Local, NaiveDate, NaiveTime, TimeZone}; use clap::Parser; use cli::CliArgs; -use gix::{bstr::ByteSlice, traverse::commit::simple::Sorting, ObjectId}; +use gix::{bstr::ByteSlice, traverse::commit::simple::Sorting, ObjectId, ThreadSafeRepository}; use heatmap::{ColorLogic, HeatmapColors}; use itertools::Itertools; use mailmap::Mailmap; +use rayon::prelude::*; use rgb::Rgb; pub mod cli; @@ -143,10 +144,15 @@ pub fn get_commits( let end_date = Local.from_local_datetime(&end_date).unwrap(); let authors = args.authors.unwrap_or_default(); - let mut repos_count = 0; - let mut branches_count = 0; + + //NOTE: rayon on branches + let mut repos_count: usize = 0; + let mut branches_count: usize = 0; for (i, repo_path) in repos.iter().enumerate() { + // let repo = ThreadSafeRepository::open(repo_path) + // .unwrap() + // .to_thread_local(); let repo = gix::open(repo_path).unwrap(); let branch_names = &*branches[i]; @@ -171,90 +177,335 @@ pub fn get_commits( } let mailmap = Mailmap::new(repo_path); - let mut has_commits = false; - for branch in &branches { - // When passing the default @ (HEAD) branch this might actually not exist at all - // locally so we're skipping it - let Ok(rev) = repo.rev_parse(&**branch) - else { - continue; - }; + let branch_commits: Vec = branches + .par_iter() + .filter_map(|branch| { + let mut repo_commits: Vec = vec![]; - let branch_commits = rev - .single() - .unwrap() - .ancestors() - .sorting(Sorting::ByCommitTimeNewestFirstCutoffOlderThan { - seconds: start_date.timestamp(), - }) - .all()?; + let repo = ThreadSafeRepository::open(repo_path) + .unwrap() + .to_thread_local(); - branch_commits - .filter_map(|c| c.ok()) - .filter_map(|c| c.object().ok()) - .filter_map(|c| { - let title = c - .message() - .ok()? - .title - .trim_ascii() - .to_str() - .ok()? - .to_string(); + // When passing the default @ (HEAD) branch this might actually not exist at all + // locally so we're skipping it + let rev = repo.rev_parse(&**branch).ok()?; - if args.no_merges { - let is_merge = c.parent_ids().count() > 1; - if is_merge { + let branch_commits = rev + .single() + .unwrap() + .ancestors() + .sorting(Sorting::ByCommitTimeNewestFirstCutoffOlderThan { + seconds: start_date.timestamp(), + }) + .all() + .ok()?; + + branch_commits + .filter_map(|c| c.ok()) + .filter_map(|c| c.object().ok()) + .filter_map(|c| { + let title = c + .message() + .ok()? + .title + .trim_ascii() + .to_str() + .ok()? + .to_string(); + + if args.no_merges { + let is_merge = c.parent_ids().count() > 1; + if is_merge { + return None; + } + } + + let author = c.author().ok()?; + + let email = author.email.to_string(); + let name = author.name.to_string(); + + let author = Author { name, email }; + let author = mailmap.resolve(author); + + if !authors.is_empty() && !authors.contains(&author.name) { return None; } - } - let author = c.author().ok()?; + let time = c.time().ok()?; + let time = DateTime::from_timestamp_millis(time.seconds * 1000)? + .with_timezone(&Local); + if time < start_date || time > end_date { + return None; + } - let email = author.email.to_string(); - let name = author.name.to_string(); - - let author = Author { name, email }; - let author = mailmap.resolve(author); - - if !authors.is_empty() && !authors.contains(&author.name) { - return None; - } - - let time = c.time().ok()?; - let time = - DateTime::from_timestamp_millis(time.seconds * 1000)?.with_timezone(&Local); - if time < start_date || time > end_date { - return None; - } - - has_commits = true; - - Some(Commit { - id: c.id, - title, - author, - time, + Some(Commit { + id: c.id, + title, + author, + time, + }) }) - }) - .for_each(|c| { - commits.insert(c); - }); - } + .for_each(|c| { + repo_commits.push(c); + }); - if has_commits { + Some(repo_commits) + }) + .reduce(Vec::new, |mut c, n| { + c.extend(n); + c + }); + + if !branch_commits.is_empty() { repos_count += 1; branches_count += branches.len(); } + + // for vec in branch_commits { + commits.extend(branch_commits); + // } } - let commits = commits - .into_iter() - .sorted_by_cached_key(|a| Reverse(a.time)) - .collect_vec(); + //NOTE: rayon on the entire repos set - Ok((repos_count, branches_count, commits)) + // let dataset: Vec<(Vec, usize, usize)> = repos + // .par_iter() + // .enumerate() + // .filter_map(|(i, repo_path)| { + // let mut repos_count: usize = 0; + // let mut branches_count: usize = 0; + // let mut repo_commits: Vec = vec![]; + // + // let repo = gix::open(repo_path).unwrap(); + // + // let branch_names = &*branches[i]; + // let mut branches: Vec = vec![]; + // + // if branch_names.is_empty() { + // branches = repo + // .references() + // .ok()? + // .prefixed("refs/heads") + // .ok()? + // .filter_map(Result::ok) + // .filter_map(|b| { + // b.inner + // .name + // .to_string() + // .strip_prefix("refs/heads/") + // .map(|s| s.to_string()) + // }) + // .collect_vec(); + // } + // else { + // let branch_names = branch_names.split(' ').map(|s| s.to_string()); + // branches.extend(branch_names); + // } + // + // let mailmap = Mailmap::new(repo_path); + // let mut has_commits = false; + // + // for branch in &branches { + // // When passing the default @ (HEAD) branch this might actually not exist at all + // // locally so we're skipping it + // let Ok(rev) = repo.rev_parse(&**branch) + // else { + // continue; + // }; + // + // let branch_commits = rev + // .single() + // .unwrap() + // .ancestors() + // .sorting(Sorting::ByCommitTimeNewestFirstCutoffOlderThan { + // seconds: start_date.timestamp(), + // }) + // .all() + // .ok()?; + // + // branch_commits + // .filter_map(|c| c.ok()) + // .filter_map(|c| c.object().ok()) + // .filter_map(|c| { + // let title = c + // .message() + // .ok()? + // .title + // .trim_ascii() + // .to_str() + // .ok()? + // .to_string(); + // + // if args.no_merges { + // let is_merge = c.parent_ids().count() > 1; + // if is_merge { + // return None; + // } + // } + // + // let author = c.author().ok()?; + // + // let email = author.email.to_string(); + // let name = author.name.to_string(); + // + // let author = Author { name, email }; + // let author = mailmap.resolve(author); + // + // if !authors.is_empty() && !authors.contains(&author.name) { + // return None; + // } + // + // let time = c.time().ok()?; + // let time = DateTime::from_timestamp_millis(time.seconds * 1000)? + // .with_timezone(&Local); + // if time < start_date || time > end_date { + // return None; + // } + // + // has_commits = true; + // + // Some(Commit { + // id: c.id, + // title, + // author, + // time, + // }) + // }) + // .for_each(|c| { + // repo_commits.push(c); + // }); + // } + // + // if has_commits { + // repos_count += 1; + // branches_count += branches.len(); + // } + // + // Some((repo_commits, repos_count, branches_count)) + // }) + // .collect(); + // + // let mut repos_count = 0; + // let mut branches_count = 0; + // let mut repo_commits: Vec = vec![]; + + //NOTE: simple for loop + + // let dataset: (usize, &PathBuf) = repos.par_iter().enumerate().collect(); + // + // for (i, repo_path) in dataset { + // let repo = gix::open(repo_path).unwrap(); + // + // let branch_names = &*branches[i]; + // let mut branches = vec![]; + // if branch_names.is_empty() { + // branches = repo + // .references()? + // .prefixed("refs/heads")? + // .filter_map(Result::ok) + // .filter_map(|b| { + // b.inner + // .name + // .to_string() + // .strip_prefix("refs/heads/") + // .map(|s| s.to_string()) + // }) + // .collect_vec(); + // } + // else { + // let branch_names = branch_names.split(' ').map(|s| s.to_string()); + // branches.extend(branch_names); + // } + // + // let mailmap = Mailmap::new(repo_path); + // let mut has_commits = false; + // + // for branch in &branches { + // // When passing the default @ (HEAD) branch this might actually not exist at all + // // locally so we're skipping it + // let Ok(rev) = repo.rev_parse(&**branch) + // else { + // continue; + // }; + // + // let branch_commits = rev + // .single() + // .unwrap() + // .ancestors() + // .sorting(Sorting::ByCommitTimeNewestFirstCutoffOlderThan { + // seconds: start_date.timestamp(), + // }) + // .all()?; + // + // branch_commits + // .filter_map(|c| c.ok()) + // .filter_map(|c| c.object().ok()) + // .filter_map(|c| { + // let title = c + // .message() + // .ok()? + // .title + // .trim_ascii() + // .to_str() + // .ok()? + // .to_string(); + // + // if args.no_merges { + // let is_merge = c.parent_ids().count() > 1; + // if is_merge { + // return None; + // } + // } + // + // let author = c.author().ok()?; + // + // let email = author.email.to_string(); + // let name = author.name.to_string(); + // + // let author = Author { name, email }; + // let author = mailmap.resolve(author); + // + // if !authors.is_empty() && !authors.contains(&author.name) { + // return None; + // } + // + // let time = c.time().ok()?; + // let time = + // DateTime::from_timestamp_millis(time.seconds * 1000)?.with_timezone(&Local); + // if time < start_date || time > end_date { + // return None; + // } + // + // has_commits = true; + // + // Some(Commit { + // id: c.id, + // title, + // author, + // time, + // }) + // }) + // .for_each(|c| { + // commits.insert(c); + // }); + // } + // + // if has_commits { + // repos_count += 1; + // branches_count += branches.len(); + // } + // } + + let mut commits: Vec = commits.into_par_iter().collect::>(); + + commits.par_sort_by_cached_key(|a| Reverse(a.time)); + // .sorted_by_cached_key(|a| Reverse(a.time)) + // .collect_vec(); + + Ok((0, 0, commits)) + // Ok((repos_count, branches_count, commits)) } fn find_git_repos(scan_path: &path::Path, repos: &mut Vec, ignored_repos: &Vec) {