使用Asposetxt转Word或PDF乱码问题
使⽤Asposetxt转Word或PDF乱码问题出现乱码问题有两种可能
1. 系统⽆中⽂字体(通常发⽣在Linux系统下,⾃⾏百度Linux如何安装中⽂字体)
2. TXT的byte流没有选择合适的编码,不能写死为UTF-8,需要根据实际⽂本的编码来决定
使⽤  ExchangeUtil 可以⾃动识别⽂本byte编码,如果需要使⽤@Autowired需⾃⾏修改该类,否则需要按照File originFile = new File();
ExchangeUtil s = new ExchangeUtil();
ExchangeUtil.javaname[s.detectEncoding(originFile)
这样来获取⽂件编码
// 验证License 若不验证则转化出的pdf⽂档会有⽔印产⽣
if (!getWordLicense()) {
return false;
}
FileOutputStream os = null;
try {
// 新建⼀个空⽩pdf⽂档
File file = targetFile;
os = new FileOutputStream(file);
String text = "";
FileInputStream fileInputStream = new AbsolutePath());
byte[] buf = new byte[fileInputStream.available()];
int length = 0;
//循环读取⽂件内容,输⼊流中将最多buf.length个字节的数据读⼊⼀个buf数组中,返回类型是读取到的字节数。
//当⽂件读取到结尾时返回 -1,循环结束。
ExchangeUtil s = new ExchangeUtil();
while ((length = ad(buf)) != -1) {
text = new String(buf, 0, length, ExchangeUtil.javaname[s.detectEncoding(originFile)]);
}
// Address是将要被转化的word⽂档
Document doc = new Document();
DocumentBuilder builder = new DocumentBuilder(doc);
builder.write(text);
doc.save(os, com.aspose.words.SaveFormat.PDF);
fileInputStream.close();
os.close();
} catch (Exception e) {
if (os != null) {
try {
os.close();
} catch (IOException e1) {
e1.printStackTrace();
}
}
e.printStackTrace();
return false;
}
return true;
package com.ucs.service.util;
最新小麦价格行情import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.URL;
public class ExchangeUtil extends Encoding {
// Frequency tables to hold the GB, Big5, and EUC-TW character
// frequencies
int GBFreq[][];
int GBKFreq[][];
int Big5Freq[][];
int Big5PFreq[][];
int EUC_TWFreq[][];
int KRFreq[][];
int JPFreq[][];
// int UnicodeFreq[94][128];
// public static String[] nicename;
// public static String[] codings;
public boolean debug;
public ExchangeUtil() {
super();
debug = false;
GBFreq = new int[94][94];
GBKFreq = new int[126][191];
Big5Freq = new int[94][158];
Big5PFreq = new int[126][191];
EUC_TWFreq = new int[94][94];
KRFreq = new int[94][94];
JPFreq = new int[94][94];
// Initialize the Frequency Table for GB, GBK, Big5, EUC-TW, KR, JP        initialize_frequencies();
}
//    public static void main(String argc[]) {
/
/      ExchangeUtil sinodetector;
//      int result = OTHER;
//      int i;
//      sinodetector = new ExchangeUtil();
//      for (i = 0; i < argc.length; i++) {
//        if (argc[i].startsWith("") == true) {
//          try {
//            result = sinodetector.detectEncoding(new URL(argc[i]));
//          } catch (Exception e) {
//            println("Bad URL " + e.toString());
//          }
/
/        } else if (argc[i].equals("-d")) {
//          sinodetector.debug = true;
//          continue;
//        } else {
//          result = sinodetector.detectEncoding(new File(argc[i]));
//        }
//        System.out.println(nicename[result]);
//      }
//    }
/**
* Function : detectEncoding Aruguments: URL Returns : One of the encodings from the Encoding en
umeration (GB2312, HZ, BIG5,    * EUC_TW, ASCII, or OTHER) Description: This function looks at the URL contents and assigns it a probability score for each
* encoding type. The encoding type with the highest probability is returned.
*/
public int detectEncoding(URL testurl) {
byte[] rawtext = new byte[10000];
int bytesread = 0, byteoffset = 0;
int guess = OTHER;
InputStream chinesestream;
try {
chinesestream = testurl.openStream();
while ((bytesread = ad(rawtext, byteoffset, rawtext.length - byteoffset)) > 0) {
byteoffset += bytesread;
}
;
chinesestream.close();
guess = detectEncoding(rawtext);
} catch (Exception e) {
guess = -1;
}
return guess;
}
/
**
* Function : detectEncoding Aruguments: File Returns : One of the encodings from the Encoding enumeration (GB2312, HZ, BIG5,    * EUC_TW, ASCII, or OTHER) Description: This function looks at the file and assigns it a probability score for each encoding
* type. The encoding type with the highest probability is returned.
*/
public int detectEncoding(File testfile) {
FileInputStream chinesefile;
byte[] rawtext;
rawtext = new byte[(int) testfile.length()];
try {
chinesefile = new FileInputStream(testfile);
chinesefile.close();
} catch (Exception e) {
}
return detectEncoding(rawtext);
}
/**
* Function : detectEncoding Aruguments: byte array Returns : One of the encodings from the Encoding enumeration (GB2312, HZ,    * BIG5, EUC_TW, ASCII, or OTHER) Description: This function looks at the byte array and assigns it a probability score for
* each encoding type. The encoding type with the highest probability is returned.
1升是几斤
*/
public int detectEncoding(byte[] rawtext) {
int[] scores;
int index, maxscore = 0;
int encoding_guess = OTHER;
scores = new int[TOTALTYPES];
// Assign Scores
scores[GB2312] = gb2312_probability(rawtext);
scores[GBK] = gbk_probability(rawtext);
scores[GB18030] = gb18030_probability(rawtext);
scores[HZ] = hz_probability(rawtext);
scores[BIG5] = big5_probability(rawtext);
scores[CNS11643] = euc_tw_probability(rawtext);
scores[ISO2022CN] = iso_2022_cn_probability(rawtext);
scores[UTF8] = utf8_probability(rawtext);
scores[UNICODE] = utf16_probability(rawtext);
scores[EUC_KR] = euc_kr_probability(rawtext);
scores[EUC_KR] = euc_kr_probability(rawtext);
scores[CP949] = cp949_probability(rawtext);
scores[JOHAB] = 0;
scores[ISO2022KR] = iso_2022_kr_probability(rawtext);
scores[ASCII] = ascii_probability(rawtext);
scores[SJIS] = sjis_probability(rawtext);
scores[EUC_JP] = euc_jp_probability(rawtext);
scores[ISO2022JP] = iso_2022_jp_probability(rawtext);
scores[UNICODET] = 0;
scores[UNICODES] = 0;
scores[ISO2022CN_GB] = 0;
scores[ISO2022CN_CNS] = 0;
scores[OTHER] = 0;
// Tabulate Scores
for (index = 0; index < TOTALTYPES; index++) {
if (debug)
吴君如的鬼片if (scores[index] > maxscore) {
encoding_guess = index;
maxscore = scores[index];
}
}
// Return OTHER if nothing scored above 50
if (maxscore <= 50) {
encoding_guess = OTHER;
}
有什么好听的歌推荐
return encoding_guess;
}
/*
* Function: gb2312_probability Argument: pointer to byte array Returns : number from 0 to 100 representing probability text      * in array uses GB-2312 encoding
*/
int gb2312_probability(byte[] rawtext) {
int i, rawtextlen = 0;
int dbchars = 1, gbchars = 1;
long gbfreq = 0, totalfreq = 1;
float rangeval = 0, freqval = 0;
int row, column;
// Stage 1: Check to see if characters fit into acceptable ranges
rawtextlen = rawtext.length;
for (i = 0; i < rawtextlen - 1; i++) {
// println(rawtext[i]);
if (rawtext[i] >= 0) {
// asciichars++;
} else {
dbchars++;
if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7 && (byte) 0xA1 <= rawtext[i + 1]
&& rawtext[i + 1] <= (byte) 0xFE) {
gbchars++;
totalfreq += 500;
row = rawtext[i] + 256 - 0xA1;
column = rawtext[i + 1] + 256 - 0xA1;
if (GBFreq[row][column] != 0) {
gbfreq += GBFreq[row][column];
} else if (15 <= row && row < 55) {
// In GB high-freq character range
gbfreq += 200;
}
}
i++;
}
}
rangeval = 50 * ((float) gbchars / (float) dbchars);
freqval = 50 * ((float) gbfreq / (float) totalfreq);
return (int) (rangeval + freqval);
return (int) (rangeval + freqval);
}
/*
* Function: gbk_probability Argument: pointer to byte array Returns : number from 0 to 100 representing probability text in
* array uses GBK encoding
*/
int gbk_probability(byte[] rawtext) {
int i, rawtextlen = 0;
int dbchars = 1, gbchars = 1;
long gbfreq = 0, totalfreq = 1;
float rangeval = 0, freqval = 0;
int row, column;
// Stage 1: Check to see if characters fit into acceptable ranges
rawtextlen = rawtext.length;
for (i = 0; i < rawtextlen - 1; i++) {
// println(rawtext[i]);
if (rawtext[i] >= 0) {
// asciichars++;
} else {
dbchars++;
if ((byte) 0xA1 <= rawtext[i] && rawtext[i] <= (byte) 0xF7 && // Original GB range
(byte) 0xA1 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0xFE) {
gbchars++;
totalfreq += 500;
row = rawtext[i] + 256 - 0xA1;
column = rawtext[i + 1] + 256 - 0xA1;
// System.out.println("original row " + row + " column " + column);
if (GBFreq[row][column] != 0) {
gbfreq += GBFreq[row][column];
} else if (15 <= row && row < 55) {
gbfreq += 200;
}
} else if ((byte) 0x81 <= rawtext[i]
&& rawtext[i] <= (byte) 0xFE
&& // Extended GB range
(((byte) 0x80 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0xFE) || ((byte) 0x40 <= rawtext[i + 1] && rawtext[i + 1] <= (byte) 0x7E))) {                    gbchars++;
totalfreq += 500;
row = rawtext[i] + 256 - 0x81;
if (0x40 <= rawtext[i + 1] && rawtext[i + 1] <= 0x7E) {
column = rawtext[i + 1] - 0x40;
} else {
column = rawtext[i + 1] + 256 - 0x40;
}
// System.out.println("extended row " + row + " column " + column + " rawtext[i] " + rawtext[i]);
if (GBKFreq[row][column] != 0) {
gbfreq += GBKFreq[row][column];
}
}
i++;
}
}
rangeval = 50 * ((float) gbchars / (float) dbchars);
freqval = 50 * ((float) gbfreq / (float) totalfreq);
// For regular GB files, this would give the same score, so I handicap it slightly
return (int) (rangeval + freqval) - 1;
}
/*
* Function: gb18030_probability Argument: pointer to byte array Returns : number from 0 to 100 representing probability text
* in array uses GBK encoding
*/
int gb18030_probability(byte[] rawtext) {
int i, rawtextlen = 0;
word打开是乱码int dbchars = 1, gbchars = 1;
>qq怎么创建

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。