Search This Blog

Wednesday, February 26, 2020

Pytesseract Read a image and labeled or Rename as filename

pytesseract_image.py

import cv2
import os
import pytesseract
try:
    from PIL import Image
except ImportError:
    import Image

pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files/Tesseract-OCR/tesseract.exe'

#for x in range (1,1000): 
#    img_cv = cv2.imread(r'D:/Temp/Captcha/program/'+ str(x) + '.png')

folderpath = "D:/Temp/Captcha/program/"
for filename in os.listdir(folderpath):   
    img_cv = cv2.imread(r'D:/Temp/Captcha/program/'+ filename )
    # By default OpenCV stores images in BGR format and since pytesseract assumes RGB format,
    # we need to convert from BGR to RGB format/mode:
    img_rgb = cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB)
    imagetext = pytesseract.image_to_string(img_rgb)
    print (imagetext)
    # OR
    #img_rgb = Image.frombytes('RGB', img_cv.shape[:2], img_cv, 'raw', 'BGR', 0, 0)
    #imagetext = pytesseract.image_to_string(img_rgb)
    #print (imagetext)
    src =folderpath+ filename
    dst =folderpath+ imagetext + ".png"
    os.rename(src, dst)


NOTE:
save filenmae other than pytesseract.py - Note: to avoid pytesseract.image_to_string not found error

No comments:

Hit Counter


View My Stats