当前位置: 移动技术网 > IT编程>开发语言>Java > java使用poi读取ppt文件和poi读取excel、word示例

java使用poi读取ppt文件和poi读取excel、word示例

2019年07月22日  | 移动技术网IT编程  | 我要评论

apache的poi项目可以用来处理ms office文档,codeplex上还有一个它的.net版本。poi项目可创建和维护操作各种基于ooxml和ole2文件格式的java api。大多数ms office都是ole2格式的。poi通hsmf子项目来支持outlook,通过hdgf子项目来支持visio,通过hpbf子项目来支持publisher。

使用poi抽取word简单示例:

要引入poi-3.7.jat和poi-scratchpad-3.7.ajr这两个包。

复制代码 代码如下:

package msoffice;

import java.io.file;
import java.io.fileinputstream;
import java.io.ioexception;
import java.io.inputstream;

import org.apache.poi.hwpf.hwpfdocument;
import org.apache.poi.hwpf.extractor.wordextractor;
import org.apache.poi.hwpf.usermodel.characterrun;
import org.apache.poi.hwpf.usermodel.paragraph;
import org.apache.poi.hwpf.usermodel.range;
import org.apache.poi.hwpf.usermodel.section;

public class word {

    // 直接抽取全部内容
    public static string readdoc1(inputstream is) throws ioexception {
        wordextractor extractor = new wordextractor(is);
        return extractor.gettext();
    }

    //分章节section、段落paragraph、字符串characterrun抽取
    public static void readdoc2(inputstream is) throws ioexception {
        hwpfdocument doc=new hwpfdocument(is);
        range r=doc.getrange();
        for(int x=0;x<r.numsections();x++){
            section s=r.getsection(x);
            for(int y=0;y<s.numparagraphs();y++){
                paragraph p=s.getparagraph(y);
                for(int z=0;z<p.numcharacterruns();z++){
                    characterrun run=p.getcharacterrun(z);
                    string text=run.text();
                    system.out.print(text);
                }
            }
        }
    }

    public static void main(string[] args) {
        file file = new file("/home/orisun/1.doc");
        try {
            fileinputstream fin = new fileinputstream(file);
            string cont = readdoc1(fin);
            system.out.println(cont);
            fin.close();
            fin = new fileinputstream(file);
            readdoc2(fin);
            fin.close();
        } catch (ioexception e) {
            e.printstacktrace();
        }
    }
}

poi抽取ppt示例:

复制代码 代码如下:

package msoffice;

import java.io.file;
import java.io.fileinputstream;
import java.io.ioexception;
import java.io.inputstream;

import org.apache.poi.hslf.hslfslideshow;
import org.apache.poi.hslf.extractor.powerpointextractor;
import org.apache.poi.hslf.model.slide;
import org.apache.poi.hslf.model.textrun;
import org.apache.poi.hslf.usermodel.slideshow;

public class ppt {

    //直接抽取幻灯片的全部内容
    public static string readdoc1(inputstream is) throws ioexception{
        powerpointextractor extractor=new powerpointextractor(is);
        return extractor.gettext();
    }

    //一张幻灯片一张幻灯片地读取
    public static void readdoc2(inputstream is) throws ioexception{
        slideshow ss=new slideshow(new hslfslideshow(is));
        slide[] slides=ss.getslides();
        for(int i=0;i<slides.length;i++){
            //读取一张幻灯片的标题
            string title=slides[i].gettitle();
            system.out.println("标题:"+title);
            //读取一张幻灯片的内容(包括标题)
            textrun[] runs=slides[i].gettextruns();
            for(int j=0;j<runs.length;j++){
                system.out.println(runs[j].gettext());
            }
        }
    }

    public static void main(string[] args){
        file file = new file("/home/orisun/2.ppt");
        try{
            fileinputstream fin=new fileinputstream(file);
            string cont=readdoc1(fin);
            system.out.println(cont);
            fin.close();
            fin=new fileinputstream(file);
            readdoc2(fin);
            fin.close();
        }catch(ioexception e){
            e.printstacktrace();
        }
    }
}

excel文件由多个workbook组成,一个workbook由多个sheet组成。

poi抽取excel简单示例:

复制代码 代码如下:

package msoffice;

import java.io.file;
import java.io.fileinputstream;
import java.io.ioexception;
import java.io.inputstream;
import java.util.iterator;

import org.apache.poi.hssf.usermodel.hssfcell;
import org.apache.poi.hssf.usermodel.hssfrow;
import org.apache.poi.hssf.usermodel.hssfsheet;
import org.apache.poi.hssf.usermodel.hssfworkbook;
import org.apache.poi.hssf.extractor.excelextractor;
import org.apache.poi.poifs.filesystem.poifsfilesystem;
import org.apache.poi.ss.usermodel.row;

public class excel {

    //直接读取excel的全部内容
    public static string readdoc1(inputstream is)throws ioexception{
        hssfworkbook wb=new hssfworkbook(new poifsfilesystem(is));
        excelextractor extractor=new excelextractor(wb);
        extractor.setformulasnotresults(false);
        extractor.setincludesheetnames(true);
        return extractor.gettext();
    }

    //读取时细化到sheet、行甚至单元格
    public static double getavg(inputstream is)throws ioexception{
        hssfworkbook wb=new hssfworkbook(new poifsfilesystem(is));
        //获取第一张sheet
        hssfsheet sheet=wb.getsheetat(0);
        double molecule=0.0;
        double denominator=0.0;
        //按行遍历sheet
        iterator<row> riter=sheet.rowiterator();
        while(riter.hasnext()){
            hssfrow row=(hssfrow)riter.next();
            hssfcell cell1=row.getcell(4);
            hssfcell cell2=row.getcell(4);
            if(cell1.getcelltype()!=hssfcell.cell_type_numeric){
                system.err.println("数字类型错误!");
                system.exit(-2);
            }
            if(cell2.getcelltype()!=hssfcell.cell_type_numeric){
                system.err.println("数字类型错误!");
                system.exit(-2);
            }
            denominator+=double.parsedouble(cell2.tostring().trim());
            molecule+=double.parsedouble(cell2.tostring().trim())*float.parsefloat(cell1.tostring().trim());
        }
        return molecule/denominator;
    }

    public static void main(string[] args){
        file file = new file("/home/orisun/3.xls");
        try{
            fileinputstream fin=new fileinputstream(file);
            string cont=readdoc1(fin);
            system.out.println(cont);
            fin.close();
            fin=new fileinputstream(file);
            system.out.println("加权平均分"+getavg(fin));
            fin.close();
        }catch(ioexception e){
            e.printstacktrace();
        }
    }
}

如对本文有疑问, 点击进行留言回复!!

相关文章:

验证码:
移动技术网