Add initial project structure with OCR functionality and dependencies
- Create .gitignore to exclude Python-generated files and virtual environments - Add .python-version for Python version management - Implement main OCR script (main.py) to process PDF files and extract text - Add PDF processing functions in pdf_ocr.py - Update README.md with project description, requirements, and usage instructions - Include pyproject.toml for project metadata and dependencies - Add uv.lock for dependency resolution
This commit is contained in:
+12
@@ -0,0 +1,12 @@
|
|||||||
|
# Python-generated files
|
||||||
|
__pycache__/
|
||||||
|
*.py[oc]
|
||||||
|
build/
|
||||||
|
dist/
|
||||||
|
wheels/
|
||||||
|
*.egg-info
|
||||||
|
|
||||||
|
# Virtual environments
|
||||||
|
.venv
|
||||||
|
*.txt
|
||||||
|
*.pdf
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
3.14.3
|
||||||
@@ -0,0 +1,45 @@
|
|||||||
|
# PDF Range OCR Script
|
||||||
|
|
||||||
|
This project provides a command line script that recognizes text from a selected PDF page range.
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
1. Linux with Tesseract OCR installed:
|
||||||
|
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y tesseract-ocr tesseract-ocr-rus tesseract-ocr-eng
|
||||||
|
|
||||||
|
2. Python dependencies:
|
||||||
|
|
||||||
|
uv sync
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
Run OCR for an inclusive 1-based page range and write to a text file:
|
||||||
|
|
||||||
|
uv run python main.py --input "input.pdf" --start 5 --end 12 --output "result.txt"
|
||||||
|
|
||||||
|
Optional flags:
|
||||||
|
|
||||||
|
- --lang (default: rus+eng)
|
||||||
|
- --dpi (default: 300)
|
||||||
|
- --rotate (default: 0, degrees before OCR)
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
uv run python main.py \
|
||||||
|
--input "Красавчикова. Личные права. 1994.pdf" \
|
||||||
|
--start 1 \
|
||||||
|
--end 3 \
|
||||||
|
--output "ocr_output.txt" \
|
||||||
|
--lang "rus+eng" \
|
||||||
|
--dpi 300 \
|
||||||
|
--rotate 90
|
||||||
|
|
||||||
|
The output file is UTF-8 text with page separators:
|
||||||
|
|
||||||
|
=== Page 1 ===
|
||||||
|
<recognized text>
|
||||||
|
|
||||||
|
=== Page 2 ===
|
||||||
|
<recognized text>
|
||||||
@@ -0,0 +1,121 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import shutil
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytesseract
|
||||||
|
|
||||||
|
from pdf_ocr import get_total_pages, ocr_pdf_range
|
||||||
|
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Recognize text from selected PDF pages and save to a text file.",
|
||||||
|
)
|
||||||
|
parser.add_argument("--input", required=True, type=Path, help="Path to source PDF file.")
|
||||||
|
parser.add_argument("--start", required=True, type=int, help="Start page (1-based, inclusive).")
|
||||||
|
parser.add_argument("--end", required=True, type=int, help="End page (1-based, inclusive).")
|
||||||
|
parser.add_argument("--output", required=True, type=Path, help="Path to output TXT file.")
|
||||||
|
parser.add_argument("--lang", default="rus+eng", help="Tesseract language(s), example: rus+eng.")
|
||||||
|
parser.add_argument("--dpi", default=300, type=int, help="Render DPI before OCR. Default: 300.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--rotate",
|
||||||
|
default=0,
|
||||||
|
type=int,
|
||||||
|
help="Rotate page image before OCR. Allowed: 0, 90, 180, 270, -90, -180, -270.",
|
||||||
|
)
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def validate_args(args: argparse.Namespace) -> int:
|
||||||
|
input_path: Path = args.input
|
||||||
|
if not input_path.exists() or not input_path.is_file():
|
||||||
|
raise ValueError(f"Input file not found: {input_path}")
|
||||||
|
|
||||||
|
if input_path.suffix.lower() != ".pdf":
|
||||||
|
raise ValueError(f"Input file must have .pdf extension: {input_path}")
|
||||||
|
|
||||||
|
if args.start < 1:
|
||||||
|
raise ValueError("--start must be >= 1.")
|
||||||
|
|
||||||
|
if args.end < args.start:
|
||||||
|
raise ValueError("--end must be >= --start.")
|
||||||
|
|
||||||
|
if args.dpi < 72:
|
||||||
|
raise ValueError("--dpi must be >= 72.")
|
||||||
|
|
||||||
|
allowed_rotations = {-270, -180, -90, 0, 90, 180, 270}
|
||||||
|
if args.rotate not in allowed_rotations:
|
||||||
|
raise ValueError("--rotate must be one of: 0, 90, 180, 270, -90, -180, -270.")
|
||||||
|
|
||||||
|
total_pages = get_total_pages(input_path)
|
||||||
|
if args.end > total_pages:
|
||||||
|
raise ValueError(
|
||||||
|
f"--end ({args.end}) is out of range. Document has {total_pages} pages.",
|
||||||
|
)
|
||||||
|
|
||||||
|
return total_pages
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_tesseract_available() -> None:
|
||||||
|
if shutil.which("tesseract") is None:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Tesseract is not installed or not in PATH. Install it, for example: sudo apt-get install tesseract-ocr",
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
_ = pytesseract.get_tesseract_version()
|
||||||
|
except Exception as exc:
|
||||||
|
raise RuntimeError("Cannot use Tesseract binary from current environment.") from exc
|
||||||
|
|
||||||
|
|
||||||
|
def write_output(output_path: Path, pages: list[tuple[int, str]]) -> None:
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with output_path.open("w", encoding="utf-8") as output_file:
|
||||||
|
for page_number, text in pages:
|
||||||
|
output_file.write(f"=== Page {page_number} ===\n")
|
||||||
|
output_file.write(text.rstrip())
|
||||||
|
output_file.write("\n\n")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
parser = build_parser()
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
try:
|
||||||
|
total_pages = validate_args(args)
|
||||||
|
ensure_tesseract_available()
|
||||||
|
|
||||||
|
print(
|
||||||
|
(
|
||||||
|
f"Running OCR for pages {args.start}-{args.end} of {total_pages} "
|
||||||
|
f"from {args.input} with rotate={args.rotate}..."
|
||||||
|
),
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
pages = ocr_pdf_range(
|
||||||
|
args.input,
|
||||||
|
args.start,
|
||||||
|
args.end,
|
||||||
|
lang=args.lang,
|
||||||
|
dpi=args.dpi,
|
||||||
|
rotate=args.rotate,
|
||||||
|
on_progress=lambda index, total, page: print(
|
||||||
|
f"[{index}/{total}] OCR page {page}",
|
||||||
|
flush=True,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
write_output(args.output, pages)
|
||||||
|
except (ValueError, RuntimeError) as exc:
|
||||||
|
print(f"Error: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print(f"Saved OCR text to: {args.output}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
+61
@@ -0,0 +1,61 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Callable
|
||||||
|
|
||||||
|
import fitz
|
||||||
|
import pytesseract
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
|
||||||
|
def get_total_pages(pdf_path: Path) -> int:
|
||||||
|
"""Return total number of pages in a PDF file."""
|
||||||
|
with fitz.open(pdf_path) as document:
|
||||||
|
return document.page_count
|
||||||
|
|
||||||
|
|
||||||
|
def ocr_pdf_range(
|
||||||
|
pdf_path: Path,
|
||||||
|
start_page: int,
|
||||||
|
end_page: int,
|
||||||
|
*,
|
||||||
|
lang: str,
|
||||||
|
dpi: int,
|
||||||
|
rotate: int = 0,
|
||||||
|
on_progress: Callable[[int, int, int], None] | None = None,
|
||||||
|
) -> list[tuple[int, str]]:
|
||||||
|
"""Run OCR for the selected inclusive page range and return extracted text."""
|
||||||
|
scale = dpi / 72.0
|
||||||
|
matrix = fitz.Matrix(scale, scale)
|
||||||
|
total_in_range = end_page - start_page + 1
|
||||||
|
results: list[tuple[int, str]] = []
|
||||||
|
|
||||||
|
with fitz.open(pdf_path) as document:
|
||||||
|
for index, page_number in enumerate(range(start_page, end_page + 1), start=1):
|
||||||
|
if on_progress is not None:
|
||||||
|
on_progress(index, total_in_range, page_number)
|
||||||
|
|
||||||
|
page = document.load_page(page_number - 1)
|
||||||
|
pixmap = page.get_pixmap(matrix=matrix, alpha=False)
|
||||||
|
image = Image.frombytes(
|
||||||
|
"RGB",
|
||||||
|
(pixmap.width, pixmap.height),
|
||||||
|
pixmap.samples,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
if rotate:
|
||||||
|
rotated_image = image.rotate(-rotate, expand=True)
|
||||||
|
else:
|
||||||
|
rotated_image = image
|
||||||
|
|
||||||
|
text = pytesseract.image_to_string(rotated_image, lang=lang)
|
||||||
|
except Exception as exc:
|
||||||
|
raise RuntimeError(f"OCR failed on page {page_number}") from exc
|
||||||
|
finally:
|
||||||
|
if rotate:
|
||||||
|
rotated_image.close()
|
||||||
|
image.close()
|
||||||
|
|
||||||
|
results.append((page_number, text))
|
||||||
|
|
||||||
|
return results
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
[project]
|
||||||
|
name = "pdf-reader"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Add your description here"
|
||||||
|
readme = "README.md"
|
||||||
|
requires-python = ">=3.14.3"
|
||||||
|
dependencies = [
|
||||||
|
"pillow>=11.3.0",
|
||||||
|
"pymupdf>=1.26.4",
|
||||||
|
"pytesseract>=0.3.13",
|
||||||
|
]
|
||||||
@@ -0,0 +1,91 @@
|
|||||||
|
version = 1
|
||||||
|
revision = 3
|
||||||
|
requires-python = ">=3.14.3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "packaging"
|
||||||
|
version = "26.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pdf-reader"
|
||||||
|
version = "0.1.0"
|
||||||
|
source = { virtual = "." }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "pillow" },
|
||||||
|
{ name = "pymupdf" },
|
||||||
|
{ name = "pytesseract" },
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.metadata]
|
||||||
|
requires-dist = [
|
||||||
|
{ name = "pillow", specifier = ">=11.3.0" },
|
||||||
|
{ name = "pymupdf", specifier = ">=1.26.4" },
|
||||||
|
{ name = "pytesseract", specifier = ">=0.3.13" },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pillow"
|
||||||
|
version = "12.1.1"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/1f/42/5c74462b4fd957fcd7b13b04fb3205ff8349236ea74c7c375766d6c82288/pillow-12.1.1.tar.gz", hash = "sha256:9ad8fa5937ab05218e2b6a4cff30295ad35afd2f83ac592e68c0d871bb0fdbc4", size = 46980264, upload-time = "2026-02-11T04:23:07.146Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/03/d0/bebb3ffbf31c5a8e97241476c4cf8b9828954693ce6744b4a2326af3e16b/pillow-12.1.1-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:417423db963cb4be8bac3fc1204fe61610f6abeed1580a7a2cbb2fbda20f12af", size = 4062652, upload-time = "2026-02-11T04:21:53.19Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/2d/c0/0e16fb0addda4851445c28f8350d8c512f09de27bbb0d6d0bbf8b6709605/pillow-12.1.1-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:b957b71c6b2387610f556a7eb0828afbe40b4a98036fc0d2acfa5a44a0c2036f", size = 4138823, upload-time = "2026-02-11T04:22:03.088Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/6b/fb/6170ec655d6f6bb6630a013dd7cf7bc218423d7b5fa9071bf63dc32175ae/pillow-12.1.1-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:097690ba1f2efdeb165a20469d59d8bb03c55fb6621eb2041a060ae8ea3e9642", size = 3601143, upload-time = "2026-02-11T04:22:04.909Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/59/04/dc5c3f297510ba9a6837cbb318b87dd2b8f73eb41a43cc63767f65cb599c/pillow-12.1.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:2815a87ab27848db0321fb78c7f0b2c8649dee134b7f2b80c6a45c6831d75ccd", size = 5266254, upload-time = "2026-02-11T04:22:07.656Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/05/30/5db1236b0d6313f03ebf97f5e17cda9ca060f524b2fcc875149a8360b21c/pillow-12.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f7ed2c6543bad5a7d5530eb9e78c53132f93dfa44a28492db88b41cdab885202", size = 4657499, upload-time = "2026-02-11T04:22:09.613Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/6f/18/008d2ca0eb612e81968e8be0bbae5051efba24d52debf930126d7eaacbba/pillow-12.1.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:652a2c9ccfb556235b2b501a3a7cf3742148cd22e04b5625c5fe057ea3e3191f", size = 6232137, upload-time = "2026-02-11T04:22:11.434Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/70/f1/f14d5b8eeb4b2cd62b9f9f847eb6605f103df89ef619ac68f92f748614ea/pillow-12.1.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d6e4571eedf43af33d0fc233a382a76e849badbccdf1ac438841308652a08e1f", size = 8042721, upload-time = "2026-02-11T04:22:13.321Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/5a/d6/17824509146e4babbdabf04d8171491fa9d776f7061ff6e727522df9bd03/pillow-12.1.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b574c51cf7d5d62e9be37ba446224b59a2da26dc4c1bb2ecbe936a4fb1a7cb7f", size = 6347798, upload-time = "2026-02-11T04:22:15.449Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/d1/ee/c85a38a9ab92037a75615aba572c85ea51e605265036e00c5b67dfafbfe2/pillow-12.1.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a37691702ed687799de29a518d63d4682d9016932db66d4e90c345831b02fb4e", size = 7039315, upload-time = "2026-02-11T04:22:17.24Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ec/f3/bc8ccc6e08a148290d7523bde4d9a0d6c981db34631390dc6e6ec34cacf6/pillow-12.1.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f95c00d5d6700b2b890479664a06e754974848afaae5e21beb4d83c106923fd0", size = 6462360, upload-time = "2026-02-11T04:22:19.111Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f6/ab/69a42656adb1d0665ab051eec58a41f169ad295cf81ad45406963105408f/pillow-12.1.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:559b38da23606e68681337ad74622c4dbba02254fc9cb4488a305dd5975c7eeb", size = 7165438, upload-time = "2026-02-11T04:22:21.041Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/02/46/81f7aa8941873f0f01d4b55cc543b0a3d03ec2ee30d617a0448bf6bd6dec/pillow-12.1.1-cp314-cp314-win32.whl", hash = "sha256:03edcc34d688572014ff223c125a3f77fb08091e4607e7745002fc214070b35f", size = 6431503, upload-time = "2026-02-11T04:22:22.833Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/40/72/4c245f7d1044b67affc7f134a09ea619d4895333d35322b775b928180044/pillow-12.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:50480dcd74fa63b8e78235957d302d98d98d82ccbfac4c7e12108ba9ecbdba15", size = 7176748, upload-time = "2026-02-11T04:22:24.64Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e4/ad/8a87bdbe038c5c698736e3348af5c2194ffb872ea52f11894c95f9305435/pillow-12.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:5cb1785d97b0c3d1d1a16bc1d710c4a0049daefc4935f3a8f31f827f4d3d2e7f", size = 2544314, upload-time = "2026-02-11T04:22:26.685Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/6c/9d/efd18493f9de13b87ede7c47e69184b9e859e4427225ea962e32e56a49bc/pillow-12.1.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1f90cff8aa76835cba5769f0b3121a22bd4eb9e6884cfe338216e557a9a548b8", size = 5268612, upload-time = "2026-02-11T04:22:29.884Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f8/f1/4f42eb2b388eb2ffc660dcb7f7b556c1015c53ebd5f7f754965ef997585b/pillow-12.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1f1be78ce9466a7ee64bfda57bdba0f7cc499d9794d518b854816c41bf0aa4e9", size = 4660567, upload-time = "2026-02-11T04:22:31.799Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/01/54/df6ef130fa43e4b82e32624a7b821a2be1c5653a5fdad8469687a7db4e00/pillow-12.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:42fc1f4677106188ad9a55562bbade416f8b55456f522430fadab3cef7cd4e60", size = 6269951, upload-time = "2026-02-11T04:22:33.921Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a9/48/618752d06cc44bb4aae8ce0cd4e6426871929ed7b46215638088270d9b34/pillow-12.1.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:98edb152429ab62a1818039744d8fbb3ccab98a7c29fc3d5fcef158f3f1f68b7", size = 8074769, upload-time = "2026-02-11T04:22:35.877Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/c3/bd/f1d71eb39a72fa088d938655afba3e00b38018d052752f435838961127d8/pillow-12.1.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d470ab1178551dd17fdba0fef463359c41aaa613cdcd7ff8373f54be629f9f8f", size = 6381358, upload-time = "2026-02-11T04:22:37.698Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/64/ef/c784e20b96674ed36a5af839305f55616f8b4f8aa8eeccf8531a6e312243/pillow-12.1.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6408a7b064595afcab0a49393a413732a35788f2a5092fdc6266952ed67de586", size = 7068558, upload-time = "2026-02-11T04:22:39.597Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/73/cb/8059688b74422ae61278202c4e1ad992e8a2e7375227be0a21c6b87ca8d5/pillow-12.1.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5d8c41325b382c07799a3682c1c258469ea2ff97103c53717b7893862d0c98ce", size = 6493028, upload-time = "2026-02-11T04:22:42.73Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/c6/da/e3c008ed7d2dd1f905b15949325934510b9d1931e5df999bb15972756818/pillow-12.1.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c7697918b5be27424e9ce568193efd13d925c4481dd364e43f5dff72d33e10f8", size = 7191940, upload-time = "2026-02-11T04:22:44.543Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/01/4a/9202e8d11714c1fc5951f2e1ef362f2d7fbc595e1f6717971d5dd750e969/pillow-12.1.1-cp314-cp314t-win32.whl", hash = "sha256:d2912fd8114fc5545aa3a4b5576512f64c55a03f3ebcca4c10194d593d43ea36", size = 6438736, upload-time = "2026-02-11T04:22:46.347Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f3/ca/cbce2327eb9885476b3957b2e82eb12c866a8b16ad77392864ad601022ce/pillow-12.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:4ceb838d4bd9dab43e06c363cab2eebf63846d6a4aeaea283bbdfd8f1a8ed58b", size = 7182894, upload-time = "2026-02-11T04:22:48.114Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ec/d2/de599c95ba0a973b94410477f8bf0b6f0b5e67360eb89bcb1ad365258beb/pillow-12.1.1-cp314-cp314t-win_arm64.whl", hash = "sha256:7b03048319bfc6170e93bd60728a1af51d3dd7704935feb228c4d4faab35d334", size = 2546446, upload-time = "2026-02-11T04:22:50.342Z" },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pymupdf"
|
||||||
|
version = "1.27.2"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/a4/fb/d80374ab091ab7ad5a5e7981a45c877ae094db668c1ab4d30f1109a4ec6a/pymupdf-1.27.2.tar.gz", hash = "sha256:37fc9cedeafb40839f86a074d4d9feab725144bdd4bbfd20308ff8957e2b10af", size = 85353104, upload-time = "2026-03-10T12:53:01.697Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/98/ee/2c10b6bde83ee42f5150b690ace952a802a7e632776dadd42bbfe5b68601/pymupdf-1.27.2-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:a60ff9010d7025428e31d92ac2c9b4218c7c4844409d0b31a050565ea0a955fd", size = 23987468, upload-time = "2026-03-10T12:37:06.593Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/44/06/c8cc8c8ade83f5a75ac0f543edc2bc3c52d8c38c1d55d1e0713558258540/pymupdf-1.27.2-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:5095efb242cfe1c46fec1c864a13f000098564829c98366582dde7ad9e61aa32", size = 23262964, upload-time = "2026-03-10T12:37:23.915Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/1a/8e/df2ab91a680a77c82bc4501cdca60767b3758d75552e4d2849647a16cbc0/pymupdf-1.27.2-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1081235fcfad268d801cd73a7b69c629939e2c46ed4d97035cb1bb7b5b90dc54", size = 24318675, upload-time = "2026-03-10T12:37:42.249Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ab/56/c6c16fa2dcfe2476ec28a9aaaca773dc35c593699e81e573211c91442770/pymupdf-1.27.2-cp310-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:917f4dd52daea504d5c60e1430c17d637b5014a43e66d068b4b356effe087dba", size = 24947974, upload-time = "2026-03-10T12:38:00.779Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/7b/4f/1659f1d80b5d2f5aad134c2ca63894c63daf47a3ffb7e18987fe25e49097/pymupdf-1.27.2-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:9617d5e71c334937c804544fa201946c5f73d0a97b5842b96857bdabfefbc343", size = 25169417, upload-time = "2026-03-10T12:38:18.912Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/05/23/e34d704f7242885dd1d67cfbe1040051a04b4b7e2cf1cbd27af9bd4500a3/pymupdf-1.27.2-cp310-abi3-win32.whl", hash = "sha256:6deef49e06c9a5d8670bf5835a911ab887dac4b3ed4bd60ab7d93da6aa8ff6f1", size = 18008725, upload-time = "2026-03-10T12:38:31.915Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f5/fb/a3f1f8813f6e93c65d1f7ebca6530a889f1ae109229b537f7a617b2aab57/pymupdf-1.27.2-cp310-abi3-win_amd64.whl", hash = "sha256:acdfdb7329882246545a0f6bc85f91739e2773ed81f9301c1687cffb826470f3", size = 19237944, upload-time = "2026-03-10T12:38:45.603Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e6/a4/e9257882f0569a21d51207a58f7586a799e76dc6b4008029a04f2329194c/pymupdf-1.27.2-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:261c916915cede4c546559810d3210277f86f31b52dd3de138f1e12d95a4c6b6", size = 24985149, upload-time = "2026-03-10T12:39:02.636Z" },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pytesseract"
|
||||||
|
version = "0.3.13"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "packaging" },
|
||||||
|
{ name = "pillow" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/9f/a6/7d679b83c285974a7cb94d739b461fa7e7a9b17a3abfd7bf6cbc5c2394b0/pytesseract-0.3.13.tar.gz", hash = "sha256:4bf5f880c99406f52a3cfc2633e42d9dc67615e69d8a509d74867d3baddb5db9", size = 17689, upload-time = "2024-08-16T02:33:56.762Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/7a/33/8312d7ce74670c9d39a532b2c246a853861120486be9443eebf048043637/pytesseract-0.3.13-py3-none-any.whl", hash = "sha256:7a99c6c2ac598360693d83a416e36e0b33a67638bb9d77fdcac094a3589d4b34", size = 14705, upload-time = "2024-08-16T02:36:10.09Z" },
|
||||||
|
]
|
||||||
Reference in New Issue
Block a user