Reading Images#

This code uses OpenCV and PyTesseract. It reads text from images.

The image being read: image info

import cv2
import pytesseract

# Read image
path = "images/img.png"
img = cv2.imread(path)

text = pytesseract.image_to_string(img)
print(text)
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/pytesseract/pytesseract.py:255, in run_tesseract(input_filename, output_filename_base, extension, lang, config, nice, timeout)
    254 try:
--> 255     proc = subprocess.Popen(cmd_args, **subprocess_args())
    256 except OSError as e:

File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/subprocess.py:858, in Popen.__init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors, text)
    855             self.stderr = io.TextIOWrapper(self.stderr,
    856                     encoding=encoding, errors=errors)
--> 858     self._execute_child(args, executable, preexec_fn, close_fds,
    859                         pass_fds, cwd, env,
    860                         startupinfo, creationflags, shell,
    861                         p2cread, p2cwrite,
    862                         c2pread, c2pwrite,
    863                         errread, errwrite,
    864                         restore_signals, start_new_session)
    865 except:
    866     # Cleanup if the child failed starting.

File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/subprocess.py:1704, in Popen._execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, start_new_session)
   1703         err_msg = os.strerror(errno_num)
-> 1704     raise child_exception_type(errno_num, err_msg, err_filename)
   1705 raise child_exception_type(err_msg)

FileNotFoundError: [Errno 2] No such file or directory: 'tesseract'

During handling of the above exception, another exception occurred:

TesseractNotFoundError                    Traceback (most recent call last)
Cell In[1], line 8
      5 path = "images/img.png"
      6 img = cv2.imread(path)
----> 8 text = pytesseract.image_to_string(img)
      9 print(text)

File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/pytesseract/pytesseract.py:423, in image_to_string(image, lang, config, nice, output_type, timeout)
    418 """
    419 Returns the result of a Tesseract OCR run on the provided image to string
    420 """
    421 args = [image, 'txt', lang, config, nice, timeout]
--> 423 return {
    424     Output.BYTES: lambda: run_and_get_output(*(args + [True])),
    425     Output.DICT: lambda: {'text': run_and_get_output(*args)},
    426     Output.STRING: lambda: run_and_get_output(*args),
    427 }[output_type]()

File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/pytesseract/pytesseract.py:426, in image_to_string.<locals>.<lambda>()
    418 """
    419 Returns the result of a Tesseract OCR run on the provided image to string
    420 """
    421 args = [image, 'txt', lang, config, nice, timeout]
    423 return {
    424     Output.BYTES: lambda: run_and_get_output(*(args + [True])),
    425     Output.DICT: lambda: {'text': run_and_get_output(*args)},
--> 426     Output.STRING: lambda: run_and_get_output(*args),
    427 }[output_type]()

File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/pytesseract/pytesseract.py:288, in run_and_get_output(image, extension, lang, config, nice, timeout, return_bytes)
    277 with save(image) as (temp_name, input_filename):
    278     kwargs = {
    279         'input_filename': input_filename,
    280         'output_filename_base': temp_name,
   (...)
    285         'timeout': timeout,
    286     }
--> 288     run_tesseract(**kwargs)
    289     filename = f"{kwargs['output_filename_base']}{extsep}{extension}"
    290     with open(filename, 'rb') as output_file:

File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/pytesseract/pytesseract.py:260, in run_tesseract(input_filename, output_filename_base, extension, lang, config, nice, timeout)
    258         raise
    259     else:
--> 260         raise TesseractNotFoundError()
    262 with timeout_manager(proc, timeout) as error_string:
    263     if proc.returncode:

TesseractNotFoundError: tesseract is not installed or it's not in your PATH. See README file for more information.