I wanted to be able to get an idea of the content of the fax before opening an
email attachment, or downloading the PDF from the webinterface.
You need to have tesseract installed:
apt-get install tesseract-ocr tesseract-ocr-eng
(apt-get install tesseract-ocr-deu) ...
Index: fax_to_email.php
===================================================================
--- fax_to_email.php (revision 2492)
+++ fax_to_email.php (working copy)
@@ -49,6 +49,9 @@
ob_end_clean();
ob_start();
+// OCR "configuration"
+$ocr_lang = "eng";
+
//add a delimeter to the log
echo "\n---------------------------------\n";
@@ -155,6 +158,15 @@
exec($cmd);
}
}
+ // if tesseract is installed, create ocr representation of received fax
+ if (!file_exists($dir_fax.'/'.$fax_name.".txt")) {
+ $tmp_tesseract = exec("which tesseract");
+ if (strlen($tmp_tesseract) > 0) {
+ $cmd = "cd ".$dir_fax."; nice ".$tmp_tesseract."
".$dir_fax."/".$fax_name.".tif ".$dir_fax."/".$fax_name." -l
".$ocr_lang.">".sys_get_temp_dir()."/lastfax.log 2>&1";
+ echo $cmd."\n";
+ exec($cmd);
+ }
+ }
}
else {
$fax_file_warning = " Fax image not available on server.";
@@ -225,6 +237,13 @@
if ($fax_retry == 'yes') {
$tmp_text_plain .= "This message arrived earlier and has been queued until now due to email server issues.\n";
}
+ if (file_exists($dir_fax."/".$fax_name.".txt")) {
+ // An OCR representation has been found
+ // Include it in the mail body
+ $tmp_text_plain .= "\n ================= My guess on the content of this
fax: =================\n";
+ $tmp_text_plain .= file_get_contents($dir_fax."/".$fax_name.".txt", NULL,
NULL, 0, 100000);
+ $tmp_text_plain .= "\n
========================================================================\n";
+ }
$tmp_text_html = $tmp_text_plain;
//prepare the mail object
@@ -271,6 +290,9 @@
if (file_exists($dir_fax.'/'.$fax_name.".pdf")) {
$mail->AddAttachment($dir_fax.'/'.$fax_name.'.pdf'); // pdf attachment
}
+ if (file_exists($dir_fax.'/'.$fax_name.".txt")) {
+ $mail->AddAttachment($dir_fax.'/'.$fax_name.'.txt'); // OCR
representation as TXT file
+ }
//$filename='fax.tif'; $encoding = "base64"; $type = "image/tif";
//$mail->AddStringAttachment(base64_decode($strfax),$filename,$encoding,$type);
}
@@ -337,4 +359,4 @@
//write the contents of the buffer
fwrite($fp, $content);
fclose($fp);
I know, tesseract by far isn't perfect, but for most of our faxes I get an idea
if it's a signed contract or just a spam fax.
Maybe there is an intelligent way to add this generated text to the
webinterface (mouseover-popups or something like that).
Regards,
Johannes Jakob
Original issue reported on code.google.com by lists...@googlemail.com on 3 May 2012 at 6:05
Original issue reported on code.google.com by
lists...@googlemail.com
on 3 May 2012 at 6:05