use std::env;
use std::fs;
use std::io::{self, Write};
use std::path::Path;
use std::time::{SystemTime, UNIX_EPOCH};
use indicatif::{ProgressBar, ProgressStyle};
use pdf_extract::;
use lopdf::;
use std::fs::File;
use std::panic::{self, AssertUnwindSafe};
fn main() {
let args: Vec = env::args().collect();
if args.len() < 3 {
eprintln!("Usage: {}
let pdf_dir = Path::new(&args[1]);
let output_dir = Path::new(&args[2]);
if !output_dir.exists() {
fs::create_dir_all(&output_dir).unwrap_or_else(|_| panic!("Could not create output directory: {:?}", output_dir));
}
process_directory(&pdf_dir, &output_dir);
}
fn process_directory(pdf_dir: &Path, output_dir: &Path) {
for entry in fs::read_dir(pdf_dir).unwrap() {
let entry = entry.unwrap();
let path = entry.path();
if path.is_dir() {
println!("Processed directory: {:?}", pdf_dir);
println!("Next directory: {:?}", path);
println!("Do you wish to proceed? (yes/no)");
let mut input = String::new();
io::stdin().read_line(&mut input).unwrap();
if input.trim().eq_ignore_ascii_case("yes") {
process_directory(&path, output_dir);
} else {
continue;
}
} else if path.extension().and_then(|s| s.to_str()) == Some("pdf") {
// Wrap the call to process_pdf with catch_unwind to handle panics
let result = panic::catch_unwind(AssertUnwindSafe(|| {
process_pdf(&path, &output_dir);
}));
if let Err(e) = result {
eprintln!("An error occurred while processing {:?}: {:?}", path, e);
}
}
}
let filename = pdf_path.file_stem().unwrap().to_str().unwrap();
let output_path = output_dir.join(filename).with_extension("txt");
let mut file = File::create(&output_path).expect("Could not create output file");
match Document::load(pdf_path) {
Ok(doc) => {
print_metadata(&doc);
let mut output: Box<dyn OutputDev> = Box::new(PlainTextOutput::new(&mut file as &mut dyn Write));
if let Err(e) = output_doc(&doc, output.as_mut()) {
eprintln!("Error processing document {}: {}", pdf_path.display(), e);
}
}
Err(e) => eprintln!("Failed to load document {}: {}", pdf_path.display(), e),
}
pb.finish_with_message("Done.");
let time = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
let mut log_file = fs::OpenOptions::new().append(true).create(true).open(output_dir.join(format!("processed_{}.log", time))).unwrap();
writeln!(log_file, "Processed PDF: {:?}", pdf_path).unwrap();
}
fn printmetadata(doc: &Document) {
= doc; // Simulate using the doc variable, or implement logic here
}
warning: fields name, alternate_space, and tint_transform are never read
--> src/lib.rs:1310:5
1309
pub struct Separation {
---------- fields in this struct
1310
name: String,
^^^^
1311
alternate_space: AlternateColorSpace,
^^^^^^^^^^^^^^^
1312
tint_transform: Box,
^^^^^^^^^^^^^^
= note: `Separation` has a derived impl for the trait `Clone`, but this is intentionally ignored during dead code analysis
warning: pdf-extract (lib) generated 7 warnings
Compiling pdf-extract v0.7.2 (/home/walter/programs/pdf-extract)
warning: unused Result that must be used
--> bin/extract.rs:72:5
72
output_doc(&doc, output.as_mut());
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
= note: this Result may be an Err variant, which should be handled
= note: #[warn(unused_must_use)] on by default
help: use let _ = ... to ignore the resulting value
|
72 | let _ = output_doc(&doc, output.as_mut());
| +++++++
example code:
extern crate pdf_extract; extern crate lopdf; extern crate indicatif;
use std::env; use std::fs; use std::io::{self, Write}; use std::path::Path; use std::time::{SystemTime, UNIX_EPOCH}; use indicatif::{ProgressBar, ProgressStyle}; use pdf_extract::; use lopdf::; use std::fs::File; use std::panic::{self, AssertUnwindSafe};
fn main() { let args: Vec = env::args().collect();
if args.len() < 3 {
eprintln!("Usage: {}
}
fn process_directory(pdf_dir: &Path, output_dir: &Path) { for entry in fs::read_dir(pdf_dir).unwrap() { let entry = entry.unwrap(); let path = entry.path();
}
fn process_pdf(pdf_path: &Path, output_dir: &Path) { let pb = ProgressBar::new_spinner(); pb.set_style(ProgressStyle::default_spinner().template("{spinner:.green} {msg}")); pb.enable_steady_tick(120); pb.set_message("Processing PDF...");
}
fn printmetadata(doc: &Document) { = doc; // Simulate using the doc variable, or implement logic here }
name
,alternate_space
, andtint_transform
are never read --> src/lib.rs:1310:5pdf-extract
(lib) generated 7 warnings Compiling pdf-extract v0.7.2 (/home/walter/programs/pdf-extract) warning: unusedResult
that must be used --> bin/extract.rs:72:5= note: this
Result
may be anErr
variant, which should be handled = note:#[warn(unused_must_use)]
on by default help: uselet _ = ...
to ignore the resulting value | 72 | let _ = output_doc(&doc, output.as_mut()); | +++++++warning:
pdf-extract
(bin "pdf-extract") generated 1 warning Finished dev [unoptimized + debuginfo] target(s) in 1.09s Runningtarget/debug/pdf-extract /media/ /media/extract_pdfminer
⠁ Done. Done. ⠚ Processing PDF... thread 'main' panicked at 'missing char 33 in map {48: "∙", 34: "(", 36: ")"} for <</Type /Font/Subtype /TrueType/BaseFont /AAAAAI+CambriaMath/FontDescriptor 161 0 R/ToUnicode 90 0 R/FirstChar 33/LastChar 48/Widths [698 415 672 415 351 469 605 728 579 579 728 579 440 507 579 247]>>', src/lib.rs:750:27Keep getting thread main panic.