当前位置: 移动技术网 > IT编程>开发语言>c# > C#使用iTextSharp将PDF转成文本的方法

C#使用iTextSharp将PDF转成文本的方法

2019年07月18日  | 移动技术网IT编程  | 我要评论

本文实例讲述了c#使用itextsharp将pdf转成文本的方法。分享给大家供大家参考。具体实现方法如下:

using system;
using system.io; 
using itextsharp.text;
using itextsharp.text.pdf;
using itextsharp.text.pdf.parser;
public class parsingpdf {
  static string pdf;
  static string text2;
  /**
   * parses the pdf using prtokeniser
   * @param src the path to the original pdf file
   * @param dest the path to the resulting text file
   */
  public void parsepdf(string src, string dest)
  {
    pdfreader reader = new pdfreader(src);
    streamwriter output = new streamwriter(new filestream(dest, filemode.create));
    int pagecount = reader.numberofpages;
    for (int pg = 1; pg <= pagecount; pg++)
    {
      // we can inspect the syntax of the imported page
      byte[] streambytes = reader.getpagecontent(pg);
      prtokeniser tokenizer = new prtokeniser(streambytes);
      while (tokenizer.nexttoken())
      {
        if (tokenizer.tokentype == prtokeniser.toktype.string)
        {
          output.writeline(tokenizer.stringvalue);
        }
      }
    }
    output.flush();
    output.close();
  }
  /**
   * main method.
   */
  static void main(string[] args)
  {
    if (args.length < 1 || args.length > 2)
    {
      console.writeline("usage: parsepdf infile.pdf <outfile.txt>");
      return;
    }
    else if (args.length == 1)
    {
      pdf = args[0];
      text2 = path.getfilenamewithoutextension(pdf) + ".txt";
    }
    else
    {
      pdf = args[0];
      text2 = args[1];
    }
    try
    {
      datetime t1 = datetime.now;
      parsingpdf example = new parsingpdf();
      example.parsepdf(pdf, text2);
      datetime t2 = datetime.now;
      timespan ts = t2 - t1;
      console.writeline("parsing completed in {0:0.00} seconds.", ts.totalseconds);
    }
    catch (exception ex)
    {
      console.writeline("error: " + ex.message);
    }
  } // class
  public class mytextrenderlistener : irenderlistener
  {
    /** the print writer to which the information will be written. */
    protected streamwriter output;
    /**
     * creates a renderlistener that will look for text.
     */
    public mytextrenderlistener(streamwriter output)
    {
      this.output = output;
    }
    public void begintextblock()
    {
      output.write("<");
    }
    public void endtextblock()
    {
      output.writeline(">");
    }
    public void renderimage(imagerenderinfo renderinfo)
    {
    }
    public void rendertext(textrenderinfo renderinfo)
    {
      output.write("<");
      output.write(renderinfo.gettext());
      output.write(">");
    }
  } // class
} // namespace 

希望本文所述对大家的c#程序设计有所帮助。

如对本文有疑问, 点击进行留言回复!!

相关文章:

验证码:
移动技术网