1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
|
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use multihash::Multihash;
pub fn criterion_benchmark(c: &mut Criterion) {
let file = "benchmark.tar";
match std::fs::read(file) {
Ok(tar_bytes) => {
// warmup should take care of right sizing these
let mut buffer = Vec::new();
let mut path = String::new();
c.bench_function("ingest-tar", |b| {
b.iter(|| ingest_tar(&tar_bytes, &mut buffer, &mut path))
});
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
eprintln!("could not find {file:?}:");
eprintln!("please download a linux kernel and unpack it to enable benchmark. specific version doesn't matter.");
}
Err(e) => panic!("failed to read the {file:?}: {e}"),
}
}
fn ingest_tar(bytes: &[u8], buffer: &mut Vec<u8>, path: &mut String) {
use Cid;
use rust_unixfs::dir::builder::{BufferingTreeBuilder, TreeOptions};
use rust_unixfs::file::adder::FileAdder;
use sha2::{Digest, Sha256};
use std::io::Read;
let mut archive = tar::Archive::new(std::io::Cursor::new(bytes));
let entries = archive.entries().unwrap();
let mut opts = TreeOptions::default();
opts.wrap_with_directory();
let mut tree = BufferingTreeBuilder::new(opts);
for entry in entries {
let mut entry = entry.expect("assuming good tar");
let path_bytes = entry.path_bytes();
let tmp_path = std::str::from_utf8(&path_bytes).unwrap();
path.clear();
path.push_str(tmp_path);
if let Some(link_name) = entry.link_name_bytes() {
let link_name =
std::str::from_utf8(&link_name).expect("symlink targets should be utf8");
buffer.clear();
rust_unixfs::symlink::serialize_symlink_block(link_name, buffer);
let len = buffer.len();
let mh = Multihash::wrap(
multihash_codetable::Code::Sha2_256.into(),
&Sha256::digest(&buffer),
)
.unwrap();
let cid = Cid::new_v0(mh).expect("sha2_256 is the correct multihash for cidv0");
tree.put_link(path, cid, len as u64).unwrap();
// save the &buffer[..]
continue;
}
if !path.ends_with('/') {
// TODO: reusing of adder
let mut adder = FileAdder::default();
// with the std::io::Read it'd be good to read into the fileadder, or read into ...
// something. trying to acccess the buffer from inside FileAdder does not seem the be the
// way to go.
//
// reusing the buffers between files would make a lot of sense as well
if let Some(needed) = adder.size_hint().checked_sub(buffer.capacity()) {
buffer.reserve(needed);
}
if let Some(mut needed) = adder.size_hint().checked_sub(buffer.len()) {
let zeros = [0u8; 8];
while needed > zeros.len() {
buffer.extend_from_slice(&zeros[..]);
needed -= zeros.len();
}
buffer.extend(std::iter::repeat(0).take(needed));
}
let mut total_written = 0usize;
loop {
match entry.read(&mut buffer[0..]).unwrap() {
0 => {
let blocks = adder.finish();
let (cid, subtotal) = blocks
.fold(
None,
|acc: Option<(Cid, usize)>, (cid, bytes): (Cid, Vec<u8>)| match acc
{
Some((_, total)) => Some((cid, total + bytes.len())),
None => Some((cid, bytes.len())),
},
)
.expect("this is probably always present");
total_written += subtotal;
tree.put_link(path, cid, total_written as u64).unwrap();
break;
}
n => {
let mut read = 0;
while read < n {
let (blocks, consumed) = adder.push(&buffer[read..n]);
read += consumed;
total_written += blocks.map(|(_, bytes)| bytes.len()).sum::<usize>();
}
}
}
}
} else {
tree.set_metadata(&path[..path.len() - 1], rust_unixfs::Metadata::default())
.unwrap();
}
}
let mut iter = tree.build();
let mut last: Option<(Cid, u64, usize)> = None;
while let Some(res) = iter.next_borrowed() {
let res = res.unwrap();
last = Some((res.cid.to_owned(), res.total_size, res.block.len()));
}
let last = last.unwrap();
black_box(last);
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
|