1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
|
extern crate pdf;
use std::env::args;
use std::time::SystemTime;
use std::fs;
use std::collections::HashMap;
use pdf::file::{FileOptions, Log};
use pdf::object::*;
use pdf::primitive::Primitive;
use pdf::error::PdfError;
use pdf::enc::StreamFilter;
struct VerboseLog;
impl Log for VerboseLog {
fn load_object(&self, r: PlainRef) {
println!("load {r:?}");
}
fn log_get(&self, r: PlainRef) {
println!("get {r:?}");
}
}
#[cfg(feature="cache")]
fn main() -> Result<(), PdfError> {
let path = args().nth(1).expect("no file given");
println!("read: {}", path);
let now = SystemTime::now();
let file = FileOptions::cached().log(VerboseLog).open(&path).unwrap();
let resolver = file.resolver();
if let Some(ref info) = file.trailer.info_dict {
let title = info.title.as_ref().map(|p| p.to_string_lossy());
let author = info.author.as_ref().map(|p| p.to_string_lossy());
let descr = match (title, author) {
(Some(title), None) => title,
(None, Some(author)) => format!("[no title] – {}", author),
(Some(title), Some(author)) => format!("{} – {}", title, author),
_ => "PDF".into()
};
println!("{}", descr);
}
let mut images: Vec<_> = vec![];
let mut fonts = HashMap::new();
for page in file.pages() {
let page = page.unwrap();
let resources = page.resources().unwrap();
for (i, font) in resources.fonts.values().enumerate() {
let name = match &font.name {
Some(name) => name.as_str().into(),
None => i.to_string(),
};
fonts.insert(name, font.clone());
}
images.extend(resources.xobjects.iter().map(|(_name, &r)| resolver.get(r).unwrap())
.filter(|o| matches!(**o, XObject::Image(_)))
);
}
for (i, o) in images.iter().enumerate() {
let img = match **o {
XObject::Image(ref im) => im,
_ => continue
};
let (data, filter) = img.raw_image_data(&resolver)?;
let ext = match filter {
Some(StreamFilter::DCTDecode(_)) => "jpeg",
Some(StreamFilter::JBIG2Decode(_)) => "jbig2",
Some(StreamFilter::JPXDecode) => "jp2k",
Some(StreamFilter::FlateDecode(_)) => "png",
_ => continue,
};
let fname = format!("extracted_image_{}.{}", i, ext);
fs::write(fname.as_str(), data).unwrap();
println!("Wrote file {}", fname);
}
println!("Found {} image(s).", images.len());
for (name, font) in fonts.iter() {
let fname = format!("font_{}", name);
if let Some(Ok(data)) = font.embedded_data(&resolver) {
fs::write(fname.as_str(), data).unwrap();
println!("Wrote file {}", fname);
}
}
println!("Found {} font(s).", fonts.len());
if let Some(ref forms) = file.get_root().forms {
println!("Forms:");
for field in forms.fields.iter() {
print!(" {:?} = ", field.name);
match field.value {
Primitive::String(ref s) => println!("{}", s.to_string_lossy()),
Primitive::Integer(i) => println!("{}", i),
Primitive::Name(ref s) => println!("{}", s),
ref p => println!("{:?}", p),
}
}
}
if let Ok(elapsed) = now.elapsed() {
println!("Time: {}s", elapsed.as_secs() as f64
+ elapsed.subsec_nanos() as f64 * 1e-9);
}
Ok(())
}
|