Reading Images
Reading Images#
This code uses OpenCV
and PyTesseract
.
It reads text from images.
The image being read:
import cv2
import pytesseract
# Read image
path = "images/img.png"
img = cv2.imread(path)
text = pytesseract.image_to_string(img)
print(text)
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/pytesseract/pytesseract.py:255, in run_tesseract(input_filename, output_filename_base, extension, lang, config, nice, timeout)
254 try:
--> 255 proc = subprocess.Popen(cmd_args, **subprocess_args())
256 except OSError as e:
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/subprocess.py:858, in Popen.__init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors, text)
855 self.stderr = io.TextIOWrapper(self.stderr,
856 encoding=encoding, errors=errors)
--> 858 self._execute_child(args, executable, preexec_fn, close_fds,
859 pass_fds, cwd, env,
860 startupinfo, creationflags, shell,
861 p2cread, p2cwrite,
862 c2pread, c2pwrite,
863 errread, errwrite,
864 restore_signals, start_new_session)
865 except:
866 # Cleanup if the child failed starting.
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/subprocess.py:1704, in Popen._execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, start_new_session)
1703 err_msg = os.strerror(errno_num)
-> 1704 raise child_exception_type(errno_num, err_msg, err_filename)
1705 raise child_exception_type(err_msg)
FileNotFoundError: [Errno 2] No such file or directory: 'tesseract'
During handling of the above exception, another exception occurred:
TesseractNotFoundError Traceback (most recent call last)
Cell In[1], line 8
5 path = "images/img.png"
6 img = cv2.imread(path)
----> 8 text = pytesseract.image_to_string(img)
9 print(text)
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/pytesseract/pytesseract.py:423, in image_to_string(image, lang, config, nice, output_type, timeout)
418 """
419 Returns the result of a Tesseract OCR run on the provided image to string
420 """
421 args = [image, 'txt', lang, config, nice, timeout]
--> 423 return {
424 Output.BYTES: lambda: run_and_get_output(*(args + [True])),
425 Output.DICT: lambda: {'text': run_and_get_output(*args)},
426 Output.STRING: lambda: run_and_get_output(*args),
427 }[output_type]()
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/pytesseract/pytesseract.py:426, in image_to_string.<locals>.<lambda>()
418 """
419 Returns the result of a Tesseract OCR run on the provided image to string
420 """
421 args = [image, 'txt', lang, config, nice, timeout]
423 return {
424 Output.BYTES: lambda: run_and_get_output(*(args + [True])),
425 Output.DICT: lambda: {'text': run_and_get_output(*args)},
--> 426 Output.STRING: lambda: run_and_get_output(*args),
427 }[output_type]()
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/pytesseract/pytesseract.py:288, in run_and_get_output(image, extension, lang, config, nice, timeout, return_bytes)
277 with save(image) as (temp_name, input_filename):
278 kwargs = {
279 'input_filename': input_filename,
280 'output_filename_base': temp_name,
(...)
285 'timeout': timeout,
286 }
--> 288 run_tesseract(**kwargs)
289 filename = f"{kwargs['output_filename_base']}{extsep}{extension}"
290 with open(filename, 'rb') as output_file:
File /opt/hostedtoolcache/Python/3.8.14/x64/lib/python3.8/site-packages/pytesseract/pytesseract.py:260, in run_tesseract(input_filename, output_filename_base, extension, lang, config, nice, timeout)
258 raise
259 else:
--> 260 raise TesseractNotFoundError()
262 with timeout_manager(proc, timeout) as error_string:
263 if proc.returncode:
TesseractNotFoundError: tesseract is not installed or it's not in your PATH. See README file for more information.