J-F-Liu / lopdf

A Rust library for PDF document manipulation.
MIT License
1.67k stars 176 forks source link

add Chinese watermark garbled code #316

Closed YSOcean closed 2 weeks ago

YSOcean commented 3 months ago

this is my code: `use lopdf::{Document, Object, Stream, content::Content, content::Operation, Dictionary, dictionary}; use std::fs::File; use std::io::Read; use std::f64::consts::PI; use hex::ToHex;

fn add_watermark_to_existing_pdf(input_path: &str, output_path: &str, watermark_texts: &Vec, font_path: &str, opacity: f32) { let mut doc = Document::load(input_path).unwrap();

// 加载字体文件
let mut font_file = Vec::new();
File::open(font_path).unwrap().read_to_end(&mut font_file).unwrap();
let font_stream = Stream::new(dictionary! { "Length1" => font_file.len() as i64 }, font_file);
let font_stream_id = doc.add_object(font_stream);

// 创建FontDescriptor字典
let font_descriptor = dictionary! {
    "Type" => "FontDescriptor",
    "FontName" => "SourceHanSansCN-Normal",
    "FontFile2" => font_stream_id,
    "Flags" => 4,  // Symbolic
    "FontBBox" => vec![Object::Integer(-1000), Object::Integer(-500), Object::Integer(2000), Object::Integer(1000)],
    "ItalicAngle" => Object::Integer(0),
    "Ascent" => Object::Integer(1000),
    "Descent" => Object::Integer(-200),
    "CapHeight" => Object::Integer(750),
    "StemV" => Object::Integer(80),
};
let font_descriptor_id = doc.add_object(font_descriptor);

// 创建CIDFontType2字体字典
let cid_font_dict = dictionary! {
"Type" => "Font",
"Subtype" => "CIDFontType2",
"BaseFont" => "SourceHanSansCN-Normal",
"CIDSystemInfo" => dictionary! {
    "Registry" => "Adobe",
    "Ordering" => "Identity",
    "Supplement" => 0,
},
"FontDescriptor" => font_descriptor_id,
"DW" => 1000,

}; let cid_font_id = doc.add_object(cid_font_dict);

// 创建Type0字体字典
let font_dict = dictionary! {
    "Type" => "Font",
    "Subtype" => "Type0",
    "BaseFont" => "SourceHanSansCN-Normal",
    "Encoding" => "Identity-H",
    "DescendantFonts" => vec![cid_font_id.into()],
    "ToUnicode" => create_to_unicode_cmap(),
};
let font_id = doc.add_object(font_dict);

// 创建ExtGState字典用于透明度设置
let ext_gstate = dictionary! {
    "Type" => "ExtGState",
    "ca" => Object::Real(opacity),   // 字体透明度
    "CA" => Object::Real(opacity),   // 图像透明度
    "BM" => "Normal",
};
let ext_gstate_id = doc.add_object(ext_gstate);

// 计算旋转角度和位置
let angle = 45.0 * PI / 180.0; // 45度旋转
let (cos_theta, sin_theta) = (angle.cos() as f32, angle.sin() as f32);

let page_width = 595.0;
let page_height = 842.0;

// 遍历每个页面,将水印添加到每个页面的内容流中
for (_, page_id) in doc.get_pages() {
    let page_content = doc.get_page_content(page_id).unwrap();

    let mut watermark_operations = vec![
        Operation::new("q", vec![]), // 保存图形状态
        Operation::new("BT", vec![]), // 开始文本块
        Operation::new("gs", vec![ext_gstate_id.into()]), // 设置透明度
        Operation::new("Tf", vec![font_id.into(), Object::Real(24.0)]), // 设置字体和大小
        Operation::new("0.65 0.65 0.65 rg", vec![]), // 设置浅灰色(RGB各0.65)
    ];

    let font_size = 24.0;
    let line_height = font_size * 1.5; // 行高

    // 计算整个水印块的中心点
    let center_x = page_width / 2.0;
    let center_y = page_height / 2.0;

    for (i, text) in watermark_texts.iter().enumerate() {
        let text_width = text.len() as f32 * font_size * 0.5;

        // 计算每行文本相对于水印中心的偏移
        let offset_y = (i as f32 - (watermark_texts.len() - 1) as f32 / 2.0) * line_height;

        // 应用旋转并移动到正确的位置
        let rotated_x = -offset_y * sin_theta;
        let rotated_y = offset_y * cos_theta;

        watermark_operations.push(Operation::new("Tm", vec![
            Object::Real(cos_theta), Object::Real(sin_theta),
            Object::Real(-sin_theta), Object::Real(cos_theta),
            Object::Real(center_x + rotated_x), Object::Real(center_y + rotated_y)
        ]));

        // 移动文本起始点,使其居中
        watermark_operations.push(Operation::new("Td", vec![
            Object::Real(-text_width / 2.0), Object::Real(0.0)
        ]));
        let text = "中";
        let utf16be = text.encode_utf16().flat_map(|c| c.to_be_bytes()).collect::<Vec<u8>>();
        watermark_operations.push(Operation::new("Tj", vec![Object::String(utf16be, lopdf::StringFormat::Literal)]));

    }
    watermark_operations.push(Operation::new("ET", vec![])); // 结束文本块
    watermark_operations.push(Operation::new("Q", vec![])); // 恢复图形状态

    let mut watermark_content = Content {
        operations: watermark_operations,
    };

    let watermark_encoded = watermark_content.encode().unwrap();
    let new_content = [watermark_encoded.as_slice(), &page_content].concat(); // 先绘制水印,再添加原内容

    doc.change_page_content(page_id, new_content);
}

doc.save(output_path).unwrap();

} fn utf16be_hex_encode(text: &str) -> String { text.encode_utf16() .flat_map(|c| c.to_be_bytes()) .map(|b| format!("{:02X}", b)) .collect() } // 创建ToUnicode映射 fn create_to_unicode_cmap() -> Object { let cmap = b"/CIDInit /ProcSet findresource begin 12 dict begin begincmap /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> def /CMapName /Adobe-Identity-UCS def /CMapType 2 def 1 begincodespacerange

<0000> endcodespacerange 1 beginbfchar <4E2D> <4E2D> endbfchar endcmap CMapName currentdict /CMap defineresource pop end end"; let stream = Stream::new(dictionary!{}, cmap.to_vec()); Object::Stream(stream) } // PDF水印居中 fn main() { let input_pdf = "input-01.pdf"; let output_pdf = "output.pdf"; let watermark_texts = vec!["2024/08/23 17:30".to_string(),"6002008".to_string(),"中文".to_string()]; let font_path = "./font/SourceHanSansCN-Normal.otf"; // 确保字体文件路径正确 let opacity = 0.5; // 透明度, 范围为 0.0 (完全透明) 到 1.0 (完全不透明) add_watermark_to_existing_pdf(input_pdf, output_pdf, &watermark_texts, font_path, opacity); } `
Heinenen commented 2 months ago

If you could formulate a question (best in English, but I can use Google Translate too), I can try to help you.

I tried formatting your code a little better:

use lopdf::{Document, Object, Stream, content::Content, content::Operation, Dictionary, dictionary};
use std::fs::File;
use std::io::Read;
use std::f64::consts::PI;
use hex::ToHex;

fn add_watermark_to_existing_pdf(input_path: &str, output_path: &str, watermark_texts: &Vec<String>, font_path: &str, opacity: f32) {
    let mut doc = Document::load(input_path).unwrap();

    // 加载字体文件
    let mut font_file = Vec::new();
    File::open(font_path).unwrap().read_to_end(&mut font_file).unwrap();
    let font_stream = Stream::new(dictionary! { "Length1" => font_file.len() as i64 }, font_file);
    let font_stream_id = doc.add_object(font_stream);

    // 创建FontDescriptor字典
    let font_descriptor = dictionary! {
        "Type" => "FontDescriptor",
        "FontName" => "SourceHanSansCN-Normal",
        "FontFile2" => font_stream_id,
        "Flags" => 4,  // Symbolic
        "FontBBox" => vec![Object::Integer(-1000), Object::Integer(-500), Object::Integer(2000), Object::Integer(1000)],
        "ItalicAngle" => Object::Integer(0),
        "Ascent" => Object::Integer(1000),
        "Descent" => Object::Integer(-200),
        "CapHeight" => Object::Integer(750),
        "StemV" => Object::Integer(80),
    };
    let font_descriptor_id = doc.add_object(font_descriptor);

    // 创建CIDFontType2字体字典
    let cid_font_dict = dictionary! {
    "Type" => "Font",
    "Subtype" => "CIDFontType2",
    "BaseFont" => "SourceHanSansCN-Normal",
    "CIDSystemInfo" => dictionary! {
        "Registry" => "Adobe",
        "Ordering" => "Identity",
        "Supplement" => 0,
    },
    "FontDescriptor" => font_descriptor_id,
    "DW" => 1000,
};
    let cid_font_id = doc.add_object(cid_font_dict);

    // 创建Type0字体字典
    let font_dict = dictionary! {
        "Type" => "Font",
        "Subtype" => "Type0",
        "BaseFont" => "SourceHanSansCN-Normal",
        "Encoding" => "Identity-H",
        "DescendantFonts" => vec![cid_font_id.into()],
        "ToUnicode" => create_to_unicode_cmap(),
    };
    let font_id = doc.add_object(font_dict);

    // 创建ExtGState字典用于透明度设置
    let ext_gstate = dictionary! {
        "Type" => "ExtGState",
        "ca" => Object::Real(opacity),   // 字体透明度
        "CA" => Object::Real(opacity),   // 图像透明度
        "BM" => "Normal",
    };
    let ext_gstate_id = doc.add_object(ext_gstate);

    // 计算旋转角度和位置
    let angle = 45.0 * PI / 180.0; // 45度旋转
    let (cos_theta, sin_theta) = (angle.cos() as f32, angle.sin() as f32);

    let page_width = 595.0;
    let page_height = 842.0;

    // 遍历每个页面,将水印添加到每个页面的内容流中
    for (_, page_id) in doc.get_pages() {
        let page_content = doc.get_page_content(page_id).unwrap();

        let mut watermark_operations = vec![
            Operation::new("q", vec![]), // 保存图形状态
            Operation::new("BT", vec![]), // 开始文本块
            Operation::new("gs", vec![ext_gstate_id.into()]), // 设置透明度
            Operation::new("Tf", vec![font_id.into(), Object::Real(24.0)]), // 设置字体和大小
            Operation::new("0.65 0.65 0.65 rg", vec![]), // 设置浅灰色(RGB各0.65)
        ];

        let font_size = 24.0;
        let line_height = font_size * 1.5; // 行高

        // 计算整个水印块的中心点
        let center_x = page_width / 2.0;
        let center_y = page_height / 2.0;

        for (i, text) in watermark_texts.iter().enumerate() {
            let text_width = text.len() as f32 * font_size * 0.5;

            // 计算每行文本相对于水印中心的偏移
            let offset_y = (i as f32 - (watermark_texts.len() - 1) as f32 / 2.0) * line_height;

            // 应用旋转并移动到正确的位置
            let rotated_x = -offset_y * sin_theta;
            let rotated_y = offset_y * cos_theta;

            watermark_operations.push(Operation::new("Tm", vec![
                Object::Real(cos_theta), Object::Real(sin_theta),
                Object::Real(-sin_theta), Object::Real(cos_theta),
                Object::Real(center_x + rotated_x), Object::Real(center_y + rotated_y)
            ]));

            // 移动文本起始点,使其居中
            watermark_operations.push(Operation::new("Td", vec![
                Object::Real(-text_width / 2.0), Object::Real(0.0)
            ]));
            let text = "中";
            let utf16be = text.encode_utf16().flat_map(|c| c.to_be_bytes()).collect::<Vec<u8>>();
            watermark_operations.push(Operation::new("Tj", vec![Object::String(utf16be, lopdf::StringFormat::Literal)]));

        }
        watermark_operations.push(Operation::new("ET", vec![])); // 结束文本块
        watermark_operations.push(Operation::new("Q", vec![])); // 恢复图形状态

        let mut watermark_content = Content {
            operations: watermark_operations,
        };

        let watermark_encoded = watermark_content.encode().unwrap();
        let new_content = [watermark_encoded.as_slice(), &page_content].concat(); // 先绘制水印,再添加原内容

        doc.change_page_content(page_id, new_content);
    }

    doc.save(output_path).unwrap();
}
fn utf16be_hex_encode(text: &str) -> String {
    text.encode_utf16()
        .flat_map(|c| c.to_be_bytes())
        .map(|b| format!("{:02X}", b))
        .collect()
}
// 创建ToUnicode映射
fn create_to_unicode_cmap() -> Object {
    let cmap = b"/CIDInit /ProcSet findresource begin
    12 dict begin
    begincmap
    /CIDSystemInfo << /Registry (Adobe) /Ordering (Identity) /Supplement 0 >> def
    /CMapName /Adobe-Identity-UCS def
    /CMapType 2 def
    1 begincodespacerange
    <0000> <FFFF>
    endcodespacerange
    1 beginbfchar
    <4E2D> <4E2D>
    endbfchar
    endcmap
    CMapName currentdict /CMap defineresource pop
    end
    end";
    let stream = Stream::new(dictionary!{}, cmap.to_vec());
    Object::Stream(stream)
}

// PDF水印居中
fn main() {
    let input_pdf = "input-01.pdf";
    let output_pdf = "output.pdf";
    let watermark_texts = vec!["2024/08/23 17:30".to_string(),"6002008".to_string(),"中文".to_string()];
    let font_path = "./font/SourceHanSansCN-Normal.otf";  // 确保字体文件路径正确
    let opacity = 0.5; // 透明度, 范围为 0.0 (完全透明) 到 1.0 (完全不透明)

    add_watermark_to_existing_pdf(input_pdf, output_pdf, &watermark_texts, font_path, opacity);
}