これまでの流れを汲んでOCRかけるクラスを完成させるで。
まだまだ改良の余地はあるけど、いつまでもこんなことばっかりやってられへんから一旦これでOCRは完結や。
流れは
1)白紙のセル判定 ->白紙なら終了 2)文字の部分だけ切り取る 3)tesseractにかける 4)数字ならそれで終了,数字以外ならvisionAPIにかける
簡単に実装するとこんな感じやで。
import requests
import base64
import json
import cv2
import re
import numpy as np
from PIL import Image
import sys
import pyocr
import pyocr.builders
from enum import Enum
from collections import namedtuple
CellKind = Enum('CellKind','empty string number')
DetectText = namedtuple('DetectText', ['kind', 'text'])
class HybridOCR:
def __init__(self):
tools = pyocr.get_available_tools()
if len(tools) == 0:
print("No OCR tool found")
sys.exit(1)
self.tool = tools[0]
def _isBlankCell(self,img_gray):
points = [value for row in img_gray for value in row]
for p in points:
if p != 255:
return False
return True
def _getEndPoint(self,img_gray):
left_pt = sys.maxsize
right_pt = 0
for row in img_gray:
for index,val in enumerate(row):
if val == 255: continue
if left_pt > index: left_pt = index
if right_pt < index: right_pt = index
return (left_pt,right_pt)
def _cutOut(self,points,img,margin=20):
return img[:,points[0]-margin:points[1]+margin]
def _tesseract_ocr(self,img):
txt = self.tool.image_to_string(
img,
lang="jpn+eng",
builder=pyocr.builders.TextBuilder(tesseract_layout=6)
)
txt = re.sub(r',|\s', "", txt)
try:
res = int(txt)
except:
pass
try:
res = float(txt)
return DetectText(kind=CellKind.number,text=res)
except:
return None
def _visionapi_ocr(self,img_path):
img = open(img_path, 'rb').read()
req_body = json.dumps({
'requests': [{
'image': {
'content': base64.b64encode(img).decode('UTF-8')
},
'features': [{
'type': 'TEXT_DETECTION',
'maxResults': 10,
}]
}]
})
response = requests.post(
url='https://vision.googleapis.com/v1/images:annotate?key=<API_KEY>',
data=req_body,
headers={'Content-Type': 'application/json'}
)
res = json.loads(response.text)
try:
txt = res['responses'][0]['textAnnotations'][0]['description'].replace('\n','')
return DetectText(kind=CellKind.string,text=txt)
except:
return DetectText(kind=CellKind.empty,text="")
def recognize(self,file_path):
image = cv2.imread(file_path,1)
img_gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
isBlank = self._isBlankCell(img_gray)
if(isBlank): return DetectText(kind=CellKind.empty,text="")
cut_image = self._cutOut(self._getEndPoint(img_gray),image)
try:
pil_image = Image.fromarray(cut_image)
except Exception as e:
pil_image = Image.fromarray(image)
detect = self._tesseract_ocr(pil_image)
if(detect is not None): return detect
return self._visionapi_ocr(file_path)
(input)
(output)
DetectText(kind=<CellKind.number: 3>, text=42.1)
(input)
(output)
DetectText(kind=<CellKind.string: 2>, text=‘住宅の所有の関係(6区分)’)
(input)
(output)
DetectText(kind=<CellKind.empty: 1>, text=‘’)
ええ感じやな!次は素性作成器の実装や!