Open TimothyWang123 opened 7 years ago
Missing font. Did you use export-fo, or Plutext's commercial converter?
hi, I wrote as the example in github. here is my code.
import java.io.File; import java.io.OutputStream;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.docx4j.Docx4J; import org.docx4j.convert.out.FOSettings; import org.docx4j.fonts.IdentityPlusMapper; import org.docx4j.fonts.Mapper; import org.docx4j.fonts.PhysicalFont; import org.docx4j.fonts.PhysicalFonts; import org.docx4j.model.fields.FieldUpdater; import org.docx4j.openpackaging.exceptions.Docx4JException; import org.docx4j.openpackaging.packages.WordprocessingMLPackage; import org.docx4j.samples.AbstractSample; import org.docx4j.services.client.ConversionException;
public class ConvertOutPDF extends AbstractSample { // Config for non-command line use static {
inputfilepath = null; // to generate a docx (and PDF output) containing font samples
inputfilepath = "C:/Users/wangzh20785/Desktop/swarm.docx";
// URL of converter instance
// Docx4jProperties.setProperty("com.plutext.converter.URL", "http://localhost:9016/v1/00000000-0000-0000-0000-000000000000/convert");
// XSL-FO only
saveFO = true;
}
// For demo/debugging purposes, save the intermediate XSL FO
// Don't do this in production!
static boolean saveFO;
public static void main(String[] args) throws Exception {
try {
getInputFilePath(args);
} catch (IllegalArgumentException e) {
}
// Font regex (optional)
// Set regex if you want to restrict to some defined subset of fonts
// Here we have to do this before calling createContent,
// since that discovers fonts
String regex = null;
// Windows:
// String
// regex=".*(calibri|camb|cour|arial|symb|times|Times|zapf).*";
// regex=".(calibri|camb|cour|arial|times|comic|georgia|impact|LSANS|pala|tahoma|trebuc|verdana|symbol|webdings|wingding)."; // Mac // String
// Document loading (required)
WordprocessingMLPackage wordMLPackage;
if (inputfilepath==null) {
// Create a docx
System.out.println("No imput path passed, creating dummy document");
wordMLPackage = WordprocessingMLPackage.createPackage();
// SampleDocument.createContent(wordMLPackage.getMainDocumentPart()); } else { // Load .docx or Flat OPC .xml System.out.println("Loading file from " + inputfilepath); wordMLPackage = WordprocessingMLPackage.load(new java.io.File(inputfilepath)); }
// Refresh the values of DOCPROPERTY fields
FieldUpdater updater = new FieldUpdater(wordMLPackage);
updater.update(true);
String outputfilepath;
if (inputfilepath==null) {
outputfilepath = "C:/Users/wangzh20785/Desktop/swarm.pdf";
} else {
outputfilepath = "C:/Users/wangzh20785/Desktop/swarm.pdf";
}
// All methods write to an output stream
OutputStream os = new java.io.FileOutputStream(outputfilepath);
if (!Docx4J.pdfViaFO()) {
// Since 3.3.0, Plutext's PDF Converter is used by default
System.out.println("Using Plutext's PDF Converter; add docx4j-export-fo if you don't want that");
Mapper fontMapper = new IdentityPlusMapper();
// Mapper fontMapper = new IdentityPlusMapper(); // String fontFamily = "SimSun"; // PhysicalFont simsunFont = PhysicalFonts.get(fontFamily); // fontMapper.put(fontFamily, simsunFont); // regex=".(SimSun|calibri|camb|cour|arial|times|comic|georgia|impact|LSANS|pala|tahoma|trebuc|verdana|symbol|webdings|wingding)."; // PhysicalFonts.setRegex(regex);
// PhysicalFont font = PhysicalFonts.getPhysicalFonts().get("Arial Unicode MS"); // if (font!=null) { // fontMapper.getFontMappings().put("Times New Roman", font); // } fontMapper.put("隶书", PhysicalFonts.get("LiSu")); fontMapper.put("宋体",PhysicalFonts.get("SimSun")); fontMapper.put("微软雅黑",PhysicalFonts.get("Microsoft Yahei")); fontMapper.put("黑体",PhysicalFonts.get("SimHei")); fontMapper.put("楷体",PhysicalFonts.get("KaiTi")); fontMapper.put("新宋体",PhysicalFonts.get("NSimSun")); fontMapper.put("华文行楷", PhysicalFonts.get("STXingkai")); fontMapper.put("华文仿宋", PhysicalFonts.get("STFangsong")); fontMapper.put("宋体扩展",PhysicalFonts.get("simsun-extB")); fontMapper.put("仿宋",PhysicalFonts.get("FangSong")); fontMapper.put("仿宋_GB2312",PhysicalFonts.get("FangSong_GB2312")); fontMapper.put("幼圆",PhysicalFonts.get("YouYuan")); fontMapper.put("华文宋体",PhysicalFonts.get("STSong")); fontMapper.put("华文中宋",PhysicalFonts.get("STZhongsong")); wordMLPackage.setFontMapper(fontMapper);
try {
System.out.println("我要开始转换啦!");
// FOSettings foSettings = Docx4J.createFOSettings();
// foSettings.setWmlPackage(wordMLPackage);
// Docx4J.toFO(foSettings, os, Docx4J.FLAG_EXPORT_PREFER_XSL);
Docx4J.toPDF(wordMLPackage, os);
} catch (Docx4JException e) {
System.out.println("我有错!");
e.printStackTrace();
IOUtils.closeQuietly(os);
System.out.println(FileUtils.readFileToString(new File(outputfilepath)));
if (e.getCause()!=null&& e.getCause() instanceof ConversionException) {
ConversionException ce = (ConversionException)e.getCause();
ce.printStackTrace();
}
}
System.out.println("Saved: " + outputfilepath);
return;
}
System.out.println("Attempting to use XSL FO");
regex=".*(Courier New|Arial|Times New Roman|Comic Sans|Georgia|Impact|Lucida Console|Lucida Sans Unicode|Palatino Linotype|Tahoma|Trebuchet|Verdana|Symbol|Webdings|Wingdings|MS Sans Serif|MS Serif|SimSun).*";
PhysicalFonts.setRegex(regex);
// Set up font mapper (optional)
Mapper fontMapper = new IdentityPlusMapper();
wordMLPackage.setFontMapper(fontMapper);
PhysicalFont font = PhysicalFonts.get("Arial Unicode MS");
// make sure this is in your regex (if any)!!!
// if (font!=null) { // fontMapper.put("Times New Roman", font); // fontMapper.put("Arial", font); fontMapper.put("隶书", PhysicalFonts.get("LiSu")); fontMapper.put("宋体",PhysicalFonts.get("SimSun")); fontMapper.put("微软雅黑",PhysicalFonts.get("Microsoft Yahei")); fontMapper.put("黑体",PhysicalFonts.get("SimHei")); fontMapper.put("楷体",PhysicalFonts.get("KaiTi")); fontMapper.put("新宋体",PhysicalFonts.get("NSimSun")); fontMapper.put("华文行楷", PhysicalFonts.get("STXingkai")); fontMapper.put("华文仿宋", PhysicalFonts.get("STFangsong")); fontMapper.put("宋体扩展",PhysicalFonts.get("simsun-extB")); fontMapper.put("仿宋",PhysicalFonts.get("FangSong")); fontMapper.put("仿宋_GB2312",PhysicalFonts.get("FangSong_GB2312")); fontMapper.put("幼圆",PhysicalFonts.get("YouYuan")); fontMapper.put("华文宋体",PhysicalFonts.get("STSong")); fontMapper.put("华文中宋",PhysicalFonts.get("STZhongsong")); // } // fontMapper.put("Libian SC Regular", PhysicalFonts.get("SimSun"));
FOSettings foSettings = Docx4J.createFOSettings();
if (saveFO) {
foSettings.setFoDumpFile(new java.io.File(inputfilepath + ".fo"));
}
foSettings.setWmlPackage(wordMLPackage);
// Document format:
// The default implementation of the FORenderer that uses Apache Fop will output
// a PDF document if nothing is passed via
// foSettings.setApacheFopMime(apacheFopMime)
// apacheFopMime can be any of the output formats defined in org.apache.fop.apps.MimeConstants eg org.apache.fop.apps.MimeConstants.MIME_FOP_IF or
// FOSettings.INTERNAL_FO_MIME if you want the fo document as the result.
//foSettings.setApacheFopMime(FOSettings.INTERNAL_FO_MIME);
// Specify whether PDF export uses XSLT or not to create the FO
// (XSLT takes longer, but is more complete).
// Don't care what type of exporter you use
Docx4J.toFO(foSettings, os, Docx4J.FLAG_EXPORT_PREFER_XSL);
// Prefer the exporter, that uses a xsl transformation
// Docx4J.toFO(foSettings, os, Docx4J.FLAG_EXPORT_PREFER_XSL);
// Prefer the exporter, that doesn't use a xsl transformation (= uses a visitor)
// .. faster, but not yet at feature parity
// Docx4J.toFO(foSettings, os, Docx4J.FLAG_EXPORT_PREFER_NONXSL);
System.out.println("Saved: " + outputfilepath+"这是第二个saved");
// Clean up, so any ObfuscatedFontPart temp files can be deleted
if (wordMLPackage.getMainDocumentPart().getFontTablePart()!=null) {
wordMLPackage.getMainDocumentPart().getFontTablePart().deleteEmbeddedFontTempFiles();
}
// This would also do it, via finalize() methods
updater = null;
foSettings = null;
wordMLPackage = null;
}
}
Is docx4j-export-FO on your class path? If not, you are using Plutext's commercial converter. If you are using Plutext's commercial converter, download/install your own instance (from converter-eval.plutext.com), and if necessary, add the relevant Chinese fonts to its install dir.
@plutext hi,thanks. when I tried to download from converter-eval.plutext.com, it needs a work email address,but I am still a student in china...... is there anyway to transfer word to pdf just by code in windows and linux system? and free!
@plutext hi, I set the font but it still didnt work...sad...and I use the demo website on the github, it didnt work also...
We don't have Chinese fonts installed on our instance at converter-eval.plutext.com; happy to send you a link if you tell me which download you'd like.
the fontMapper stuff is only for docx4j-export-FO; have you got that on your classpath now?
I've just done some testing. Should work if you use export-FO jar; you can try https://www.docx4java.org/docx4j/docx4j-export-fo-nightly-20170721.jar
Win和Mac用TimothyWang123 方法可以解决docx转pdf乱码问题 关于中文的docx转pdf乱吗问题,Linux系统需要安装字体库
I used docx4j to transferred the .docx file to .pdf file. everything is ok except the chinese words turned to be messy code....