1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
|
use std::convert::Infallible;
use std::path::PathBuf;
use criterion::measurement::Measurement;
use criterion::{
black_box, criterion_group, criterion_main, BenchmarkGroup, BenchmarkId, Criterion,
};
use git_repository::object::tree::diff::{Action, Change};
use git_repository::Id;
use imara_diff::intern::InternedInput;
use imara_diff::sink::Counter;
use imara_diff::Algorithm;
fn extract_diff(change: &Change) -> Option<(Vec<u8>, Vec<u8>)> {
use git_repository::object::tree::diff::change::Event::Modification;
let (previous_id, id) = match change.event {
Modification {
previous_entry_mode,
previous_id,
entry_mode,
id,
} if previous_entry_mode.is_blob() && entry_mode.is_blob() => (previous_id, id),
_ => return None,
};
let old = previous_id.object().ok()?.detach().data;
let new = id.object().ok()?.detach().data;
Some((new, old))
}
fn git_tree_diff(from: Id, to: Id, diffs: &mut Vec<(Vec<u8>, Vec<u8>, usize)>) {
let from_tree = from.object().unwrap().peel_to_tree().unwrap();
let to_tree = to.object().unwrap().peel_to_tree().unwrap();
from_tree
.changes()
.track_filename()
.for_each_to_obtain_tree(&to_tree, |change| -> Result<_, Infallible> {
if let Some((old, new)) = extract_diff(&change) {
let input = InternedInput::new(&*old, &*new);
let changes =
imara_diff::diff(Algorithm::Myers, &input, Counter::default()).total();
let complexity = changes * (old.len() + new.len());
diffs.push((old, new, complexity));
}
Ok(Action::Continue)
})
.unwrap();
}
pub fn project_root() -> PathBuf {
let dir = env!("CARGO_MANIFEST_DIR");
let mut res = PathBuf::from(dir);
while !res.join("README.md").exists() {
res = res
.parent()
.expect("reached fs root without finding project root")
.to_owned()
}
res
}
fn bench_repo(c: &mut Criterion, name: &str, tag2: &str, tag1: &str, num_commits: usize) {
let path = project_root().join("bench_data").join("repos").join(name);
let repo = git_repository::open(path).unwrap();
let tag1 = repo
.find_reference(tag1)
.unwrap()
.into_fully_peeled_id()
.unwrap();
let tag2 = repo
.find_reference(tag2)
.unwrap()
.into_fully_peeled_id()
.unwrap();
let mut diffs = Vec::new();
git_tree_diff(tag1, tag2, &mut diffs);
let mut last_commit = tag2;
tag2.object()
.unwrap()
.into_commit()
.ancestors()
.all()
.unwrap()
.take(num_commits as usize)
.for_each(|parent| {
let parent = parent.unwrap();
git_tree_diff(last_commit, parent, &mut diffs);
last_commit = parent;
});
diffs.sort_unstable_by_key(|(_, _, complexity)| *complexity);
if std::env::var("PLOT").is_ok() {
let mut group = c.benchmark_group(format!("{name}_plot"));
group.sample_size(15);
bench_file_diffs(group, &diffs, 12, true);
} else {
bench_file_diffs(c.benchmark_group(name), &diffs, 2, false);
}
}
fn bench_file_diffs<M: Measurement>(
mut group: BenchmarkGroup<M>,
files: &[(Vec<u8>, Vec<u8>, usize)],
num_chunks: usize,
compare_to_similar: bool,
) {
let mut run = |name, f: fn(&[u8], &[u8]) -> usize| {
let mut i = 0;
for chunk in files.chunks((files.len() + num_chunks - 1) / num_chunks) {
let mut average_complexity: usize = chunk.iter().map(|(_, _, it)| *it).sum();
average_complexity /= chunk.len();
println!("benchmarking {i}..{}/{}", i + chunk.len(), files.len());
i += chunk.len();
group.bench_function(
BenchmarkId::new(name, format!("{average_complexity}::{}", chunk.len())),
|b| {
b.iter(|| {
for (old, new, _) in chunk {
// myers algorithm is O(ND) where D is the length of the (minimal) edit script and N the sum of file lengths
// we use that as an x axis to find a meaningful way to plot a
black_box(f(old, new));
}
});
},
);
}
};
run("imara_diff-histogram", |file1, file2| {
let input = InternedInput::new(file1, file2);
imara_diff::diff(Algorithm::Histogram, &input, Counter::default()).total()
});
run("imara_diff-myers", |file1, file2| {
let input = InternedInput::new(file1, file2);
imara_diff::diff(Algorithm::Myers, &input, Counter::default()).total()
});
if compare_to_similar {
run("similar", |file1, file2| {
let diff = similar::utils::diff_lines(similar::Algorithm::Myers, file1, file2);
diff.len()
});
}
group.finish();
}
fn rust(c: &mut Criterion) {
bench_repo(c, "rust", "1.64.0", "1.50.0", 30);
}
fn vscode(c: &mut Criterion) {
bench_repo(c, "vscode", "1.72.2", "1.41.0", 30);
}
fn linux(c: &mut Criterion) {
bench_repo(c, "linux", "v6.0", "v5.7", 30);
}
fn helix(c: &mut Criterion) {
bench_repo(c, "helix", "22.08.1", "v0.5.0", 30);
}
criterion_group!(realworld_repos, helix, rust, vscode, linux);
criterion_main!(realworld_repos);
|