apache的poi项目可以用来处理ms office文档,codeplex上还有一个它的.net版本。poi项目可创建和维护操作各种基于ooxml和ole2文件格式的java api。大多数ms office都是ole2格式的。poi通hsmf子项目来支持outlook,通过hdgf子项目来支持visio,通过hpbf子项目来支持publisher。
使用poi抽取word简单示例:
要引入poi-3.7.jat和poi-scratchpad-3.7.ajr这两个包。
import java.io.file;
import java.io.fileinputstream;
import java.io.ioexception;
import java.io.inputstream;
import org.apache.poi.hwpf.hwpfdocument;
import org.apache.poi.hwpf.extractor.wordextractor;
import org.apache.poi.hwpf.usermodel.characterrun;
import org.apache.poi.hwpf.usermodel.paragraph;
import org.apache.poi.hwpf.usermodel.range;
import org.apache.poi.hwpf.usermodel.section;
public class word {
// 直接抽取全部内容
public static string readdoc1(inputstream is) throws ioexception {
wordextractor extractor = new wordextractor(is);
return extractor.gettext();
}
//分章节section、段落paragraph、字符串characterrun抽取
public static void readdoc2(inputstream is) throws ioexception {
hwpfdocument doc=new hwpfdocument(is);
range r=doc.getrange();
for(int x=0;x<r.numsections();x++){
section s=r.getsection(x);
for(int y=0;y<s.numparagraphs();y++){
paragraph p=s.getparagraph(y);
for(int z=0;z<p.numcharacterruns();z++){
characterrun run=p.getcharacterrun(z);
string text=run.text();
system.out.print(text);
}
}
}
}
public static void main(string[] args) {
file file = new file("/home/orisun/1.doc");
try {
fileinputstream fin = new fileinputstream(file);
string cont = readdoc1(fin);
system.out.println(cont);
fin.close();
fin = new fileinputstream(file);
readdoc2(fin);
fin.close();
} catch (ioexception e) {
e.printstacktrace();
}
}
}
poi抽取ppt示例:
import java.io.file;
import java.io.fileinputstream;
import java.io.ioexception;
import java.io.inputstream;
import org.apache.poi.hslf.hslfslideshow;
import org.apache.poi.hslf.extractor.powerpointextractor;
import org.apache.poi.hslf.model.slide;
import org.apache.poi.hslf.model.textrun;
import org.apache.poi.hslf.usermodel.slideshow;
public class ppt {
//直接抽取幻灯片的全部内容
public static string readdoc1(inputstream is) throws ioexception{
powerpointextractor extractor=new powerpointextractor(is);
return extractor.gettext();
}
//一张幻灯片一张幻灯片地读取
public static void readdoc2(inputstream is) throws ioexception{
slideshow ss=new slideshow(new hslfslideshow(is));
slide[] slides=ss.getslides();
for(int i=0;i<slides.length;i++){
//读取一张幻灯片的标题
string title=slides[i].gettitle();
system.out.println("标题:"+title);
//读取一张幻灯片的内容(包括标题)
textrun[] runs=slides[i].gettextruns();
for(int j=0;j<runs.length;j++){
system.out.println(runs[j].gettext());
}
}
}
public static void main(string[] args){
file file = new file("/home/orisun/2.ppt");
try{
fileinputstream fin=new fileinputstream(file);
string cont=readdoc1(fin);
system.out.println(cont);
fin.close();
fin=new fileinputstream(file);
readdoc2(fin);
fin.close();
}catch(ioexception e){
e.printstacktrace();
}
}
}
excel文件由多个workbook组成,一个workbook由多个sheet组成。
poi抽取excel简单示例:
import java.io.file;
import java.io.fileinputstream;
import java.io.ioexception;
import java.io.inputstream;
import java.util.iterator;
import org.apache.poi.hssf.usermodel.hssfcell;
import org.apache.poi.hssf.usermodel.hssfrow;
import org.apache.poi.hssf.usermodel.hssfsheet;
import org.apache.poi.hssf.usermodel.hssfworkbook;
import org.apache.poi.hssf.extractor.excelextractor;
import org.apache.poi.poifs.filesystem.poifsfilesystem;
import org.apache.poi.ss.usermodel.row;
public class excel {
//直接读取excel的全部内容
public static string readdoc1(inputstream is)throws ioexception{
hssfworkbook wb=new hssfworkbook(new poifsfilesystem(is));
excelextractor extractor=new excelextractor(wb);
extractor.setformulasnotresults(false);
extractor.setincludesheetnames(true);
return extractor.gettext();
}
//读取时细化到sheet、行甚至单元格
public static double getavg(inputstream is)throws ioexception{
hssfworkbook wb=new hssfworkbook(new poifsfilesystem(is));
//获取第一张sheet
hssfsheet sheet=wb.getsheetat(0);
double molecule=0.0;
double denominator=0.0;
//按行遍历sheet
iterator<row> riter=sheet.rowiterator();
while(riter.hasnext()){
hssfrow row=(hssfrow)riter.next();
hssfcell cell1=row.getcell(4);
hssfcell cell2=row.getcell(4);
if(cell1.getcelltype()!=hssfcell.cell_type_numeric){
system.err.println("数字类型错误!");
system.exit(-2);
}
if(cell2.getcelltype()!=hssfcell.cell_type_numeric){
system.err.println("数字类型错误!");
system.exit(-2);
}
denominator+=double.parsedouble(cell2.tostring().trim());
molecule+=double.parsedouble(cell2.tostring().trim())*float.parsefloat(cell1.tostring().trim());
}
return molecule/denominator;
}
public static void main(string[] args){
file file = new file("/home/orisun/3.xls");
try{
fileinputstream fin=new fileinputstream(file);
string cont=readdoc1(fin);
system.out.println(cont);
fin.close();
fin=new fileinputstream(file);
system.out.println("加权平均分"+getavg(fin));
fin.close();
}catch(ioexception e){
e.printstacktrace();
}
}
}
如对本文有疑问, 点击进行留言回复!!
springcloud中feign调用处理mybatis-plus Ipage反序列化问题。
Flume 史上最全面的大数据学习第十篇(一) 别再说不知道flume是什么了
网友评论