File: git_repo.rs

package info (click to toggle)
rust-imara-diff 0.1.8-1
links: PTS, VCS
area: main
in suites: forky, sid
size: 920 kB
sloc: makefile: 2
file content (168 lines) | stat: -rw-r--r-- 5,514 bytes
parent folder | download | duplicates (7)
use std::convert::Infallible;
use std::path::PathBuf;

use criterion::measurement::Measurement;
use criterion::{
    black_box, criterion_group, criterion_main, BenchmarkGroup, BenchmarkId, Criterion,
};
use git_repository::object::tree::diff::{Action, Change};
use git_repository::Id;
use imara_diff::intern::InternedInput;
use imara_diff::sink::Counter;
use imara_diff::Algorithm;

fn extract_diff(change: &Change) -> Option<(Vec<u8>, Vec<u8>)> {
    use git_repository::object::tree::diff::change::Event::Modification;

    let (previous_id, id) = match change.event {
        Modification {
            previous_entry_mode,
            previous_id,
            entry_mode,
            id,
        } if previous_entry_mode.is_blob() && entry_mode.is_blob() => (previous_id, id),
        _ => return None,
    };

    let old = previous_id.object().ok()?.detach().data;
    let new = id.object().ok()?.detach().data;

    Some((new, old))
}

fn git_tree_diff(from: Id, to: Id, diffs: &mut Vec<(Vec<u8>, Vec<u8>, usize)>) {
    let from_tree = from.object().unwrap().peel_to_tree().unwrap();
    let to_tree = to.object().unwrap().peel_to_tree().unwrap();
    from_tree
        .changes()
        .track_filename()
        .for_each_to_obtain_tree(&to_tree, |change| -> Result<_, Infallible> {
            if let Some((old, new)) = extract_diff(&change) {
                let input = InternedInput::new(&*old, &*new);
                let changes =
                    imara_diff::diff(Algorithm::Myers, &input, Counter::default()).total();
                let complexity = changes * (old.len() + new.len());
                diffs.push((old, new, complexity));
            }
            Ok(Action::Continue)
        })
        .unwrap();
}

pub fn project_root() -> PathBuf {
    let dir = env!("CARGO_MANIFEST_DIR");
    let mut res = PathBuf::from(dir);
    while !res.join("README.md").exists() {
        res = res
            .parent()
            .expect("reached fs root without finding project root")
            .to_owned()
    }
    res
}

fn bench_repo(c: &mut Criterion, name: &str, tag2: &str, tag1: &str, num_commits: usize) {
    let path = project_root().join("bench_data").join("repos").join(name);
    let repo = git_repository::open(path).unwrap();
    let tag1 = repo
        .find_reference(tag1)
        .unwrap()
        .into_fully_peeled_id()
        .unwrap();
    let tag2 = repo
        .find_reference(tag2)
        .unwrap()
        .into_fully_peeled_id()
        .unwrap();
    let mut diffs = Vec::new();
    git_tree_diff(tag1, tag2, &mut diffs);
    let mut last_commit = tag2;
    tag2.object()
        .unwrap()
        .into_commit()
        .ancestors()
        .all()
        .unwrap()
        .take(num_commits as usize)
        .for_each(|parent| {
            let parent = parent.unwrap();
            git_tree_diff(last_commit, parent, &mut diffs);
            last_commit = parent;
        });
    diffs.sort_unstable_by_key(|(_, _, complexity)| *complexity);

    if std::env::var("PLOT").is_ok() {
        let mut group = c.benchmark_group(format!("{name}_plot"));
        group.sample_size(15);
        bench_file_diffs(group, &diffs, 12, true);
    } else {
        bench_file_diffs(c.benchmark_group(name), &diffs, 2, false);
    }
}

fn bench_file_diffs<M: Measurement>(
    mut group: BenchmarkGroup<M>,
    files: &[(Vec<u8>, Vec<u8>, usize)],
    num_chunks: usize,
    compare_to_similar: bool,
) {
    let mut run = |name, f: fn(&[u8], &[u8]) -> usize| {
        let mut i = 0;
        for chunk in files.chunks((files.len() + num_chunks - 1) / num_chunks) {
            let mut average_complexity: usize = chunk.iter().map(|(_, _, it)| *it).sum();
            average_complexity /= chunk.len();
            println!("benchmarking {i}..{}/{}", i + chunk.len(), files.len());
            i += chunk.len();
            group.bench_function(
                BenchmarkId::new(name, format!("{average_complexity}::{}", chunk.len())),
                |b| {
                    b.iter(|| {
                        for (old, new, _) in chunk {
                            // myers algorithm is O(ND) where D is the length of the (minimal) edit script and N the sum of file lengths
                            // we use that as an x axis to find a meaningful way to plot a
                            black_box(f(old, new));
                        }
                    });
                },
            );
        }
    };

    run("imara_diff-histogram", |file1, file2| {
        let input = InternedInput::new(file1, file2);
        imara_diff::diff(Algorithm::Histogram, &input, Counter::default()).total()
    });

    run("imara_diff-myers", |file1, file2| {
        let input = InternedInput::new(file1, file2);
        imara_diff::diff(Algorithm::Myers, &input, Counter::default()).total()
    });

    if compare_to_similar {
        run("similar", |file1, file2| {
            let diff = similar::utils::diff_lines(similar::Algorithm::Myers, file1, file2);
            diff.len()
        });
    }

    group.finish();
}

fn rust(c: &mut Criterion) {
    bench_repo(c, "rust", "1.64.0", "1.50.0", 30);
}

fn vscode(c: &mut Criterion) {
    bench_repo(c, "vscode", "1.72.2", "1.41.0", 30);
}

fn linux(c: &mut Criterion) {
    bench_repo(c, "linux", "v6.0", "v5.7", 30);
}

fn helix(c: &mut Criterion) {
    bench_repo(c, "helix", "22.08.1", "v0.5.0", 30);
}

criterion_group!(realworld_repos, helix, rust, vscode, linux);
criterion_main!(realworld_repos);