使用Asposetxt转Word或PDF乱码问题--慧智精品网

使⽤Asposetxt转Word或PDF乱码问题出现乱码问题有两种可能

1. 系统⽆中⽂字体(通常发⽣在Linux系统下,⾃⾏百度Linux如何安装中⽂字体)

2. TXT的byte流没有选择合适的编码，不能写死为UTF-8，需要根据实际⽂本的编码来决定

使⽤ ExchangeUtil 可以⾃动识别⽂本byte编码，如果需要使⽤@Autowired需⾃⾏修改该类，否则需要按照File originFile = new File();

ExchangeUtil s = new ExchangeUtil();

ExchangeUtil.javaname[s.detectEncoding(originFile)

这样来获取⽂件编码

// 验证License 若不验证则转化出的pdf⽂档会有⽔印产⽣

if (!getWordLicense()) {

return false;

}

FileOutputStream os = null;

try {

// 新建⼀个空⽩pdf⽂档

File file = targetFile;

os = new FileOutputStream(file);

String text = "";

FileInputStream fileInputStream = new AbsolutePath());

byte[] buf = new byte[fileInputStream.available()];

int length = 0;

//循环读取⽂件内容，输⼊流中将最多buf.length个字节的数据读⼊⼀个buf数组中，返回类型是读取到的字节数。

//当⽂件读取到结尾时返回 -1，循环结束。

ExchangeUtil s = new ExchangeUtil();

while ((length = ad(buf)) != -1) {

text = new String(buf, 0, length, ExchangeUtil.javaname[s.detectEncoding(originFile)]);

}

// Address是将要被转化的word⽂档

Document doc = new Document();

DocumentBuilder builder = new DocumentBuilder(doc);

builder.write(text);

doc.save(os, com.aspose.words.SaveFormat.PDF);

fileInputStream.close();

os.close();

} catch (Exception e) {

if (os != null) {

try {

os.close();

} catch (IOException e1) {

e1.printStackTrace();

}

e.printStackTrace();

return false;

}

return true;

package com.ucs.service.util;

最新小麦价格行情import java.io.File;

import java.io.FileInputStream;

import java.io.InputStream;

import java.URL;

public class ExchangeUtil extends Encoding {

// Frequency tables to hold the GB, Big5, and EUC-TW character

// frequencies

int GBFreq[][];

int GBKFreq[][];

int Big5Freq[][];

int Big5PFreq[][];

int EUC_TWFreq[][];

int KRFreq[][];

int JPFreq[][];

// int UnicodeFreq[94][128];

// public static String[] nicename;

// public static String[] codings;

public boolean debug;

public ExchangeUtil() {

super();

debug = false;

GBFreq = new int[94][94];

GBKFreq = new int[126][191];

Big5Freq = new int[94][158];

Big5PFreq = new int[126][191];

EUC_TWFreq = new int[94][94];

KRFreq = new int[94][94];

JPFreq = new int[94][94];

// Initialize the Frequency Table for GB, GBK, Big5, EUC-TW, KR, JP initialize_frequencies();

}

// public static void main(String argc[]) {

/ ExchangeUtil sinodetector;

// int result = OTHER;

// int i;

// sinodetector = new ExchangeUtil();

// for (i = 0; i < argc.length; i++) {

// if (argc[i].startsWith("") == true) {

// try {

// result = sinodetector.detectEncoding(new URL(argc[i]));

// } catch (Exception e) {

// println("Bad URL " + e.toString());

// }

/ } else if (argc[i].equals("-d")) {

// sinodetector.debug = true;

// continue;

// } else {

// result = sinodetector.detectEncoding(new File(argc[i]));

// }

// System.out.println(nicename[result]);

// }

/**

* Function : detectEncoding Aruguments: URL Returns : One of the encodings from the Encoding en

umeration (GB2312, HZ, BIG5, * EUC_TW, ASCII, or OTHER) Description: This function looks at the URL contents and assigns it a probability score for each

* encoding type. The encoding type with the highest probability is returned.

public int detectEncoding(URL testurl) {

byte[] rawtext = new byte[10000];

int bytesread = 0, byteoffset = 0;

int guess = OTHER;

InputStream chinesestream;

try {

chinesestream = testurl.openStream();

while ((bytesread = ad(rawtext, byteoffset, rawtext.length - byteoffset)) > 0) {

byteoffset += bytesread;

}

;

chinesestream.close();

guess = detectEncoding(rawtext);

} catch (Exception e) {

guess = -1;

}

return guess;

}

* Function : detectEncoding Aruguments: File Returns : One of the encodings from the Encoding enumeration (GB2312, HZ, BIG5, * EUC_TW, ASCII, or OTHER) Description: This function looks at the file and assigns it a probability score for each encoding

* type. The encoding type with the highest probability is returned.

public int detectEncoding(File testfile) {

FileInputStream chinesefile;

byte[] rawtext;

rawtext = new byte[(int) testfile.length()];

try {

chinesefile = new FileInputStream(testfile);

chinesefile.close();

} catch (Exception e) {

}

return detectEncoding(rawtext);

}

/**

* Function : detectEncoding Aruguments: byte array Returns : One of the encodings from the Encoding enumeration (GB2312, HZ, * BIG5, EUC_TW, ASCII, or OTHER) Description: This function looks at the byte array and assigns it a probability score for

* each encoding type. The encoding type with the highest probability is returned.

1升是几斤

public int detectEncoding(byte[] rawtext) {

int[] scores;

int index, maxscore = 0;

int encoding_guess = OTHER;

scores = new int[TOTALTYPES];

// Assign Scores

scores[GB2312] = gb2312_probability(rawtext);

scores[GBK] = gbk_probability(rawtext);

scores[GB18030] = gb18030_probability(rawtext);

scores[HZ] = hz_probability(rawtext);

scores[BIG5] = big5_probability(rawtext);

scores[CNS11643] = euc_tw_probability(rawtext);

scores[ISO2022CN] = iso_2022_cn_probability(rawtext);

scores[UTF8] = utf8_probability(rawtext);

scores[UNICODE] = utf16_probability(rawtext);

scores[EUC_KR] = euc_kr_probability(rawtext);

scores[CP949] = cp949_probability(rawtext);

scores[JOHAB] = 0;

scores[ISO2022KR] = iso_2022_kr_probability(rawtext);

scores[ASCII] = ascii_probability(rawtext);

scores[SJIS] = sjis_probability(rawtext);

scores[EUC_JP] = euc_jp_probability(rawtext);

scores[ISO2022JP] = iso_2022_jp_probability(rawtext);

scores[UNICODET] = 0;

scores[UNICODES] = 0;

scores[ISO2022CN_GB] = 0;

scores[ISO2022CN_CNS] = 0;

scores[OTHER] = 0;

// Tabulate Scores

for (index = 0; index < TOTALTYPES; index++) {

if (debug)

吴君如的鬼片if (scores[index] > maxscore) {

encoding_guess = index;

maxscore = scores[index];

}

// Return OTHER if nothing scored above 50

if (maxscore <= 50) {

encoding_guess = OTHER;

}

有什么好听的歌推荐

return encoding_guess;

}

* Function: gb2312_probability Argument: pointer to byte array Returns : number from 0 to 100 representing probability text * in array uses GB-2312 encoding

int gb2312_probability(byte[] rawtext) {

int i, rawtextlen = 0;

int dbchars = 1, gbchars = 1;

long gbfreq = 0, totalfreq = 1;

float rangeval = 0, freqval = 0;

int row, column;

// Stage 1: Check to see if characters fit into acceptable ranges

rawtextlen = rawtext.length;

for (i = 0; i < rawtextlen - 1; i++) {

// println(rawtext[i]);

if (rawtext[i] >= 0) {

// asciichars++;

} else {

dbchars++;

if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7 && (byte) 0xA1 <= rawtext[i + 1]

&& rawtext[i + 1] <= (byte) 0xFE) {

gbchars++;

totalfreq += 500;

row = rawtext[i] + 256 - 0xA1;

column = rawtext[i + 1] + 256 - 0xA1;

if (GBFreq[row][column] != 0) {

gbfreq += GBFreq[row][column];

} else if (15 <= row && row < 55) {

// In GB high-freq character range

gbfreq += 200;

}

i++;

}

rangeval = 50 * ((float) gbchars / (float) dbchars);

freqval = 50 * ((float) gbfreq / (float) totalfreq);

return (int) (rangeval + freqval);

}

* Function: gbk_probability Argument: pointer to byte array Returns : number from 0 to 100 representing probability text in

* array uses GBK encoding

int gbk_probability(byte[] rawtext) {

int i, rawtextlen = 0;

int dbchars = 1, gbchars = 1;

long gbfreq = 0, totalfreq = 1;

float rangeval = 0, freqval = 0;

int row, column;

// Stage 1: Check to see if characters fit into acceptable ranges

rawtextlen = rawtext.length;

for (i = 0; i < rawtextlen - 1; i++) {

// println(rawtext[i]);

if (rawtext[i] >= 0) {

// asciichars++;

} else {

dbchars++;

if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7 && // Original GB range

(byte) 0xA1 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0xFE) {

gbchars++;

totalfreq += 500;

row = rawtext[i] + 256 - 0xA1;

column = rawtext[i + 1] + 256 - 0xA1;

// System.out.println("original row " + row + " column " + column);

if (GBFreq[row][column] != 0) {

gbfreq += GBFreq[row][column];

} else if (15 <= row && row < 55) {

gbfreq += 200;

}

} else if ((byte) 0x81 <= rawtext[i]

&& rawtext[i] <= (byte) 0xFE

&& // Extended GB range

(((byte) 0x80 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0xFE) || ((byte) 0x40 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0x7E))) { gbchars++;

totalfreq += 500;

row = rawtext[i] + 256 - 0x81;

if (0x40 <= rawtext[i + 1] && rawtext[i + 1] <= 0x7E) {

column = rawtext[i + 1] - 0x40;

} else {

column = rawtext[i + 1] + 256 - 0x40;

}

// System.out.println("extended row " + row + " column " + column + " rawtext[i] " + rawtext[i]);

if (GBKFreq[row][column] != 0) {

gbfreq += GBKFreq[row][column];

}

i++;

}

rangeval = 50 * ((float) gbchars / (float) dbchars);

freqval = 50 * ((float) gbfreq / (float) totalfreq);

// For regular GB files, this would give the same score, so I handicap it slightly

return (int) (rangeval + freqval) - 1;

}

* Function: gb18030_probability Argument: pointer to byte array Returns : number from 0 to 100 representing probability text

* in array uses GBK encoding

int gb18030_probability(byte[] rawtext) {

int i, rawtextlen = 0;

word打开是乱码int dbchars = 1, gbchars = 1;

>qq怎么创建

慧智精品网

使用Asposetxt转Word或PDF乱码问题

发表评论

推荐文章

【中国历史十五讲】读书说明与指导(吴树国)

中药泡脚的历史典故

关于司马迁的历史评价

3-真题专练-沈阳历史中考中国古代史-材料解析题

历史上对陶渊明的评价

热门文章

史记素材作文(实用)

汉代文学和经学的关系

汉代散文知识点总结

中国历史的六条脉络

简述汉代丝绸之路开辟的历史意义

汉代城址与墓区的择地规律

少年读史记汉帝国风云录概括300字

汉代文学在中国文学史中的地位与影响

汉试制度与科举制度的关系

汉代生产方式

汉代的思想大一知识点

汉代的科技成就与文化启示

马王堆汉墓的文化内涵与社会背景从文物解读历史

汉代社会的缩影

汉代经由古丝路上的文化交流与影响

汉书的内容

2022国开中国近代史纲要大作业

汉代经学知识点总结图解

秦汉时期的历史观与历史记载方式

汉代的文化成就

最新文章

中药泡脚的历史典故

关于司马迁的历史评价

“亲亲相隐”现象及容隐制度在中国历史中的演进

人教版七年级中国历史新增文物

国子监历史及简介

列举汉代碑刻隶书10种

标签列表

慧智精品网

使用Asposetxt转Word或PDF乱码问题

发表评论

推荐文章

【中国历史十五讲】读书说明与指导(吴树国)

中药泡脚的历史典故

关于司马迁的历史评价

3-真题专练-沈阳历史中考中国古代史-材料解析题

历史上对陶渊明的评价

热门文章

史记素材作文(实用)

汉代文学和经学的关系

汉代散文知识点总结

中国历史的六条脉络

简述汉代丝绸之路开辟的历史意义

汉代城址与墓区的择地规律

少年读史记 汉帝国风云录概括300字

汉代文学在中国文学史中的地位与影响

汉试制度与科举制度的关系

汉代生产方式

汉代的思想大一知识点

汉代的科技成就与文化启示

马王堆汉墓的文化内涵与社会背景从文物解读历史

汉代社会的缩影

汉代经由古丝路上的文化交流与影响

汉书的内容

2022国开中国近代史纲要大作业

汉代经学知识点总结图解

秦汉时期的历史观与历史记载方式

汉代的文化成就

最新文章

中药泡脚的历史典故

关于司马迁的历史评价

“亲亲相隐”现象及容隐制度在中国历史中的演进

人教版 七年级中国历史新增文物

国子监历史及简介

列举汉代碑刻隶书10种

标签列表

少年读史记汉帝国风云录概括300字

人教版七年级中国历史新增文物