当前位置: 移动技术网 > IT编程>开发语言>Java > 使用阿里云的图片识别成表格ocr(将图片表格转换成excel)

使用阿里云的图片识别成表格ocr(将图片表格转换成excel)

2019年04月09日  | 移动技术网IT编程  | 我要评论

死神来了5快播,谢园春子,婴儿奶粉排行

为了简便财务总是要对照着别人发来的表格图片制作成自己的表格

  • 图片识别 识别成表格 表格识别 ocr
  • 使用阿里云api
  • 购买(印刷文字识别-表格识别) https://market.aliyun.com/products/57124001/cmapi024968.html
  • 获得阿里云图片识别表格的appcode
效果图如下

效果图

整合的代码
package com.xai.wuye.controller.api;

import com.alibaba.fastjson.json;
import com.alibaba.fastjson.jsonarray;
import com.alibaba.fastjson.jsonexception;
import com.alibaba.fastjson.jsonobject;
import com.xai.wuye.common.jsonresult;
import com.xai.wuye.exception.resultexception;
import com.xai.wuye.model.aparam
import com.xai.wuye.service.carservice;
import com.xai.wuye.util.httputils;
import org.apache.http.httpresponse;
import org.apache.http.util.entityutils;
import org.springframework.beans.factory.annotation.autowired;
import org.springframework.core.io.filesystemresource;
import org.springframework.http.httpheaders;
import org.springframework.http.mediatype;
import org.springframework.http.responseentity;
import org.springframework.scheduling.annotation.enableasync;
import org.springframework.stereotype.controller;
import org.springframework.web.bind.annotation.requestmapping;
import org.springframework.web.bind.annotation.requestparam;
import org.springframework.web.bind.annotation.responsebody;
import org.springframework.web.multipart.multipartfile;

import java.io.*;
import java.util.date;
import java.util.hashmap;
import java.util.map;

import static org.apache.tomcat.util.codec.binary.base64.encodebase64;

@controller
@enableasync
@requestmapping("/api/ocr")
public class aliocrimages {


    @autowired
    carservice carservice;


    private string ocrpath = "/home/runapp/car/orc/";


    @responsebody
    @requestmapping("table")
    public jsonresult getfirstlicence(@requestparam(value = "file", required = false) multipartfile file) {
        if (file == null || file.isempty()||file.getsize() > 1204*1204*3)
            throw new resultexception(0,"文件为null,且不能大于3m");

        string filename = file.getoriginalfilename();
        string filepath = ocrpath+"temp/"+filename;
        file newfile = new file(filepath);
        try {
            file.transferto(newfile);

            string host = "https://form.market.alicloudapi.com";
            string path = "/api/predict/ocr_table_parse";
            
            // 输入阿里的code
            string appcode = "4926a667ee6c41329c278361*****";
            string imgfile = "图片路径";
            boolean is_old_format = false;//如果文档的输入中含有inputs字段,设置为true, 否则设置为false
            //请根据线上文档修改configure字段
            jsonobject configobj = new jsonobject();
            configobj.put("format", "xlsx");
            configobj.put("finance", false);
            configobj.put("dir_assure", false);
            string config_str = configobj.tostring();
            //            configobj.put("min_size", 5);
            //string config_str = "";

            string method = "post";
            map<string, string> headers = new hashmap<string, string>();
            //最后在header中的格式(中间是英文空格)为authorization:appcode 83359fd73fe94948385f570e3c139105
            headers.put("authorization", "appcode " + appcode);

            map<string, string> querys = new hashmap<string, string>();

            // 对图像进行base64编码
            string imgbase64 = "";
            try {

                byte[] content = new byte[(int) newfile.length()];
                fileinputstream finputstream = new fileinputstream(newfile);
                finputstream.read(content);
                finputstream.close();
                imgbase64 = new string(encodebase64(content));
            } catch (ioexception e) {
                e.printstacktrace();
                return null;
            }
            // 拼装请求body的json字符串
            jsonobject requestobj = new jsonobject();
            try {
                if(is_old_format) {
                    jsonobject obj = new jsonobject();
                    obj.put("image", getparam(50, imgbase64));
                    if(config_str.length() > 0) {
                        obj.put("configure", getparam(50, config_str));
                    }
                    jsonarray inputarray = new jsonarray();
                    inputarray.add(obj);
                    requestobj.put("inputs", inputarray);
                }else{
                    requestobj.put("image", imgbase64);
                    if(config_str.length() > 0) {
                        requestobj.put("configure", config_str);
                    }
                }
            } catch (jsonexception e) {
                e.printstacktrace();
            }
            string bodys = requestobj.tostring();

            try {
                /**
                 * 重要提示如下:
                 * httputils请从
                 * https://github.com/aliyun/api-gateway-demo-sign-java/blob/master/src/main/java/com/aliyun/api/gateway/demo/util/httputils.java
                 * 下载
                 *
                 * 相应的依赖请参照
                 * https://github.com/aliyun/api-gateway-demo-sign-java/blob/master/pom.xml
                 */
                httpresponse response = httputils.dopost(host, path, method, headers, querys, bodys);
                int stat = response.getstatusline().getstatuscode();
                if(stat != 200){
                    system.out.println("http code: " + stat);
                    system.out.println("http header error msg: "+ response.getfirstheader("x-ca-error-message"));
                    system.out.println("http body error msg:" + entityutils.tostring(response.getentity()));
                    return null;
                }

                string res = entityutils.tostring(response.getentity());
                jsonobject res_obj = json.parseobject(res);
                long filename = system.currenttimemillis();
                if(is_old_format) {



                    jsonarray outputarray = res_obj.getjsonarray("outputs");
                    string output = outputarray.getjsonobject(0).getjsonobject("outputvalue").getstring("datavalue");
                    jsonobject out = json.parseobject(output);
                    system.out.println(out.tojsonstring());


                }else{

                    string tmp_base64path = ocrpath + filename;
                    file tmp_base64file = new file(tmp_base64path);
                    if(!tmp_base64file.exists()){
                        tmp_base64file.getparentfile().mkdirs();
                    }
                    tmp_base64file.createnewfile();

                    // write
                    filewriter fw = new filewriter(tmp_base64file, true);
                    bufferedwriter bw = new bufferedwriter(fw);
                    bw.write(res_obj.getstring("tables"));
                    bw.flush();
                    bw.close();
                    fw.close();

                    string exelfilepath = ocrpath + filename + "_1.xlsx";
                    runtime.getruntime().exec("touch "+exelfilepath).destroy();
                    process exec = runtime.getruntime().exec("sed -i -e 's/\\\\n/\\n/g' " + tmp_base64path);
                    exec.waitfor();
                    exec.destroy();

                    process exec1 = null;
                    string[] cmd = { "/bin/sh", "-c", "base64 -d " + tmp_base64path + " > " + exelfilepath };
                    exec1 = runtime.getruntime().exec(cmd);
                    exec1.waitfor();
                    exec1.destroy();


                    return jsonresult.success(filename);
                }
            } catch (exception e) {
                e.printstacktrace();
            }



        } catch (ioexception e) {
            e.printstacktrace();
        }

        return null;
    }


    @responsebody
    @requestmapping("getid")
    public responseentity<filesystemresource> getfirstlicence(string id) {
        string exelfilepath = ocrpath + id + "_1.xlsx";
        return export(new file(exelfilepath));
    }


    public responseentity<filesystemresource> export(file file) {
        if (file == null) {
            return null;
        }
        httpheaders headers = new httpheaders();
        headers.add("cache-control", "no-cache, no-store, must-revalidate");
        headers.add("content-disposition", "attachment; filename=" + system.currenttimemillis() + ".xls");
        headers.add("pragma", "no-cache");
        headers.add("expires", "0");
        headers.add("last-modified", new date().tostring());
        headers.add("etag", string.valueof(system.currenttimemillis()));

        return responseentity
                .ok()
                .headers(headers)
                .contentlength(file.length())
                .contenttype(mediatype.parsemediatype("application/octet-stream"))
                .body(new filesystemresource(file));
    }

    public static jsonobject getparam(int type, string datavalue) {
        jsonobject obj = new jsonobject();
        try {
            obj.put("datatype", type);
            obj.put("datavalue", datavalue);
        } catch (jsonexception e) {
            e.printstacktrace();
        }
        return obj;
    }

}

大功告成

  • 以下是静态页面代码
<!doctype html>
<html>
<head>
  <meta charset="utf-8">
  <!-- import css -->
  <link rel="stylesheet" href="https://unpkg.com/element-ui/lib/theme-chalk/index.css">
    <title>table</title>
</head>
<body>
  <div id="app">
      <el-upload
              class="upload-demo"
              drag
              action="https://www.***.com/car/api/ocr/table"
              :file-list="imagelist"
              :on-preview="pre"
                >
          <i class="el-icon-upload"></i>
          <div class="el-upload__text">将文件拖到此处,或<em>点击上传</em></div>
          <div class="el-upload__tip" slot="tip">只能上传jpg/png文件,且不超过500kb</div>
      </el-upload>
      <div class="img-content" v-for="(item,key) in imagelist" :key="key">
          <img :src="item.url">
          <div class="name">
              <div>{{ item.name }}</div>
              <el-button type="text" @click="handlefilename(item,key)">修改名字</el-button>
          </div>
          <!-- 删除icon -->
          <div class="del">
              <i @click="handlefileremove(item,key)" class="el-icon-delete2"></i>
          </div>
          <!-- 放大icon -->
          <div class="layer" @click="handlefileenlarge(item.url)">
              <i class="el-icon-view"></i>
          </div>
      </div>
  </div>
</body>
  <!-- import vue before element -->
  <script src="https://unpkg.com/vue/dist/vue.js"></script>
  <!-- import javascript -->
  <script src="https://unpkg.com/element-ui/lib/index.js"></script>
  <script>
    new vue({
      el: '#app',
      data: function() {
        return {
            visible: false,
            imagelist: [

            ]

        }
      },
        methods: {
            pre(res) {
                console.log(res.response.msg)
                window.open("https://www.***.com/api/ocr/getid?id="+res.response.data);
            }
        }

    })
  </script>
</html>

如对本文有疑问,请在下面进行留言讨论,广大热心网友会与你互动!! 点击进行留言回复

相关文章:

验证码:
移动技术网