replaceall()
poi实现word⽂档的导⼊(针对.doc.docxrtf)public void importUnitInfo() throws IOException{
String filePath = HttpServletRequest().getParameter("docWord");
File file = new File(filePath);
POIFSFileSystem pfs = null;
List<String> list = new ArrayList<String>();
organization = new Organization();
String info="";
Boolean sign =true;
FileInputStream ins = new FileInputStream(filePath);// 载⼊⽂档
// WordExtractor extractor = new WordExtractor(ins);
// // 对DOC⽂件进⾏提取
/
/ String text = Text();
XWPFWordExtractor docx = null;
int index = filePath.lastIndexOf(".");
String fileType =filePath.substring(index);
try {
if(fileType.equals(".docx")){
docx = new XWPFWordExtractor(POIXMLDocument.openPackage(filePath));//对docx⽂档的操作
}else if(fileType.equals(".doc")){
pfs = new POIFSFileSystem(ins); // 对doc⽂档的操作
}
} catch (Exception e) {
if(pfs==null&&docx==null){
sign=false;
}
}
if(sign==true){//导⼊的⽂件格式是word(doc或者docx)
try {
if(fileType.equals(".doc")){
HWPFDocument hwpf = new HWPFDocument(pfs);
Range range = Range();// 得到⽂档的读取范围
TableIterator it = new TableIterator(range);// 迭代⽂档中的表格
String cellString="";
if (it.hasNext()) {
TableRow tr = null;
TableCell td = null;
org.apache.poi.hwpf.usermodel.Paragraph para = null;
org.apache.poi.hwpf.usermodel.Table tb = it.next();
// 迭代⾏,从第1⾏开始
for (int i = 0; i < tb.numRows(); i++) {
tr = tb.getRow(i);
for (int j = 0; j < tr.numCells(); j++) {
td = tr.getCell(j);// 取得单元格
// 取得单元格的内容
for (int k = 0; k < td.numParagraphs(); k++) {
para = td.getParagraph(k);
cellString = ();
boolean flag = true;
if (cellString != null && cellStringpareTo("") != 0&&flag==true) {
// 如果不trim,取出的内容后会有⼀个乱码字符
cellString = im();
}
info+= cellString;
}
}
}
}
}else if(fileType.equals(".docx")){
info = Text();
info =placeAll("\n", "");
info = placeAll("\r", "");
}
info = placeAll("\\s*", "");
.......(获得word中的内容(info)后,对具体内容操作很简单就不列出了)
} catch (Exception e) {
}
}else {//导⼊的⽂件是rtf格式
try {
RTFEditorKit rtf = new RTFEditorKit();
DefaultStyledDocument styledDoc = new DefaultStyledDocument();
FileInputStream in = new FileInputStream(filePath);
info = new Text(Length()).getBytes("GBK")); //提取⽂本,读取中⽂需要使⽤ISO8859_1编码,否则会出现乱码
info = placeAll("\n", "");
info = placeAll("\\s*", "");
.......(获得word中的内容(info)后,对具体内容操作很简单就不列出了) [color=gray][/color][color=black][/color][size=xx-small] [/size] HttpServletResponse().getWriter().write("success");
} catch (IOException e) {
e.printStackTrace();
} catch (BadLocationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}