from googletrans import Translator
from google.cloud import translate_v2 as translate
from google.oauth2 import service_account
from transformers import pipeline
from fpdf import FPDF
import openai
import PyPDF4
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
for line in lines:
text_object.textLine(line)
c.drawText(text_object)
c.showPage()
c.save()
1. 从PDF文件提取文本
def extract_text_from_pdf(pdf_file):
with open(pdf_file,'rb') as file:
pdf_reader = PyPDF4.PdfFileReader(file)
text=''
for page in range(pdf_reader.numPages):
text += pdf_reader.getPage(page).extractText()
return text
!/usr/bin/env python
-- coding: utf-8 --
@Time : 2023/5/9 9:06
@Author : linjingyu
@File : 一个基于Google服务和ChatGPT3服务的文档翻译工具.py
from googletrans import Translator from google.cloud import translate_v2 as translate from google.oauth2 import service_account from transformers import pipeline from fpdf import FPDF import openai import PyPDF4 from reportlab.lib.pagesizes import letter from reportlab.pdfgen import canvas
用于将文本内容写入到 PDF 文件
def create_pdf_from_text(text, output_pdf): c = canvas.Canvas(output_pdf, pagesize=letter) text_object = c.beginText(50, 750) lines = text.split('\n')
1. 从PDF文件提取文本
def extract_text_from_pdf(pdf_file): with open(pdf_file,'rb') as file: pdf_reader = PyPDF4.PdfFileReader(file) text='' for page in range(pdf_reader.numPages): text += pdf_reader.getPage(page).extractText() return text
2. 使用谷歌翻译将文本翻译成目标语言
def google_translate_text(text, target_language='en', google_api_key=None): credentials = service_account.Credentials.from_service_account_file(google_api_key_file) translate_client = translate.Client(credentials = credentials)
3. 使用GPT模型润色翻译结果
def polish_translation(text, openai_api_key): openai.api_key = openai_api_key prompt = f"请对以下机器翻译的文本进行润色:\n\n{text}\n\n润色后的文本:" response = openai.Completion.create( engine="text-davinci-003", prompt=prompt, max_tokens=1024, n=1, stop=None, temperature=0.5, ) polished_text = response.choices[0].text.strip() return polished_text
4. 保存文本为PDF文件
def save_text_to_pdf(text, output_file): pdf = FPDF() pdf.add_page() pdf.set_font("Arial", size=12) pdf.multi_cell(0, 10, text) pdf.output(output_file)
主程序
def main(input_pdf, output_pdf, target_language, openai_api_key, google_api_key_file): source_text = extract_text_from_pdf(input_pdf) translated_text = google_translate_text(source_text, target_language, google_api_key_file) polished_text = polish_translation(translated_text, openai_api_key) create_pdf_from_text(polished_text, output_pdf)
if name == "main": input_pdf = r"Addrs" # 源PDF文件路径 output_pdf = r"Addrs" # 输出PDF文件路径 target_language = "zh-CN" # 目标语言 openai_api_key = ' Your OpenAi_key! ' google_api_key_file=r'你的Google密钥路径信息' main(input_pdf, output_pdf, target_language, openai_api_key, google_api_key_file)