1/1/1970
5/10 videos ✅
Workflow
[Pytesseract documentation]
Install
pip install pillow
pip install opencv-python
Add directory C:\Program Files\Tesseract-OCR to system variable path
then
pip install pytesseract
from PIL import Image
import cv2
import pytesseractAs pillow is easy, use Pillow library to Open Image, use pillow for more simple tasks - processing, loading, open, passing, same, minor cropping,
As Opencv is computationally expensive, Opencv - more complex robustness tasks, binarization of an image , Controlling threshold of an image,
from PIL import Image# Assign file path
img_file = "/data/page_01.jpg"# Load this image in Memory
img = Image.open(img_file)# Open Image in Viewer
img.show()# Rotate 180 degree and show
img.rotate(180).show()## Save as new file
img.save("temp/img_file2.jpg")import cv2
img_file = "data.jpg"
img = cv2.imread(img_file)# Display Image In viewer
cv2.imshow("original image", img)
cv2.waitkey(0)# https://stackoverflow.com/questions/28816046/displaying-different-images-with-actual-size-in-matplotlib-subplot
from matplotlib import pyplot as plt
# Function to Display Image Inline
def display_image_in_actual_size(im_path):
dpi = 80
im_data = plt.imread(im_path)
height, width, depth = im_data.shape
# What size does the figure need to be in inches to fit the image?
figsize = width / float(dpi), height / float(dpi)
# Create a figure of the right size with one axes that takes up the full figure
fig = plt.figure(figsize=figsize)
ax = fig.add_axes([0, 0, 1, 1])
# Hide spines, ticks, etc.
ax.axis('off')
# Display the image.
ax.imshow(im_data, cmap='gray')
plt.show()
# Display the Image
display_image_in_actual_size(img_file)# Invert
inverted_image = cv2.bitwise_not(img)
# save file
cv2.imwrite("temp/inverted.jpg", inverted_imgae) # Function to Convert an image into grayscale
def grayscale(image):
return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Convert image into grayscale
gray_image = grayscale(img)
# save file
cv2.imwrite("temp/gray.jpg", gray_image)noet:- converting into grayscale, before converting image into black and white make process easier
# Convert into black and white
thres, img_bw = cv2.threshold(gray_image, 200, 230, cv2.THRESH_BINARY)
# save file
cv2.imwrite("temp/bw_image.jpg", img_bw)
# display
display("temp/bw_image.jpg")note: - make sure to adjust threshold() for better result
# Function to remove noise
def noise_removal(image):
import numpy as np
kernal = np.ones((1,1), np.uint8)
image = cv2.dilate(image, kernal, iterations=1)
kernel = np.ones((1, 1) np.uint8)
image = cv2.erods(image, kernel, iterations=1)
image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)
image = cv2.mediaBlur(image, 3)
return (image)
# Remove noise
no_noise = noise_removal(im_bw)
# save file
cv2.imwrite("temp/no_noise.jpg", no_noise)
#display
display("temp/no_noise.jpg")Erosion - Thin
# function to thin font
def thin_font(image):
import numpy as np
image = cv2.bitwise_not(image) # invert image
kernel = np.ones((2, 2), np.uint8)
image = cv2.erode(image, kernel, iterations=1)
image = cv2.bitwise_not(image)
return (image)
# thin font
eroded_image = thin_font(not_noise)
# save
cv2.imwrite("temp/eroded_image.jpg", eroded_image)
# display
display("temp/eroded_image.jpg)_note:- adjust the parameters of np.one() for better result
Dilation - Thick
def thick_font(image):
import numpy as np
image = cv2.bitwise_not(image) # invert image
kernel = np.ones((2, 2), np.uint8)
image = cv2.dilate(image, kernel, iterations=1)
image = cv2.bitwise_not(image)
return (image)# dilated_image
dilated_image = thick_font(no_noise)
# save
cv2.imwrite("temp/dilated_image.jpg", dilated_image)
# display
display("temp/dilated_image.jpg")# getSkewAngle() function>note: - contours allow you to draw bounding boxes.
# deskew() function>fixed = deskew(new)
cv2.imwrite("temp/rotated_fixed.jpg", fixed)
display("temp/rotated_fixed.jpg")# remove border functionnote:- if you dealing with pdf with margin the border are consitently the same cropping on any side. do not use this method. preprocess the pdf with pdf editor, image editor to edit in bulk if you already know the determed size
otherwise you can if you know the determine size an numbers such as width height, x and y you can go ahead and do that in opencv as well by passing in those arguments manually in `contours. What this doing is it's creating those bounding boxes and finding the borders automatically. this is particularly good, if you are dealing with inconsistent borders across images
# call function
# save
#displaycolor = [ 255, 255, 255]
top, bottom, left, right = [150]*4image_with_border = cv2.copyMakeBorder(no_borders, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
#save
#display import pytesseract
from PIL import Imageimg_file = "img.jpg"
no_noise ="temp/no_noise.jpg"img = Image.open(no_noise)ocr_result = pytesseract.image_to_string(img)print( ocr_result)note:- pass preprocessed image or good source file for better result using opencv. we learn in last lecture.
there may be still some wrong output, these are known as like the dragons of ocr
machine learning for ocr is not 100% accurate, there may be 2% trade off