From 5af97638c973c69b2853b6c75ca8814a2b5b34a9 Mon Sep 17 00:00:00 2001 From: k1nq Date: Mon, 16 Mar 2026 20:28:28 +0500 Subject: [PATCH] Update README and main.py to clarify optional page range arguments --- README.md | 2 ++ main.py | 16 +++++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 15cb234..3b5bab5 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,8 @@ Run OCR for an inclusive 1-based page range and write to a text file: uv run python main.py --input "input.pdf" --start 5 --end 12 --output "result.txt" +If `--start` and `--end` are both omitted, OCR runs from the first page to the last page. + Optional flags: - --lang (default: rus+eng) diff --git a/main.py b/main.py index ce82af6..368ae91 100644 --- a/main.py +++ b/main.py @@ -15,8 +15,8 @@ def build_parser() -> argparse.ArgumentParser: description="Recognize text from selected PDF pages and save to a text file.", ) parser.add_argument("--input", required=True, type=Path, help="Path to source PDF file.") - parser.add_argument("--start", required=True, type=int, help="Start page (1-based, inclusive).") - parser.add_argument("--end", required=True, type=int, help="End page (1-based, inclusive).") + parser.add_argument("--start", type=int, help="Start page (1-based, inclusive). Default: 1.") + parser.add_argument("--end", type=int, help="End page (1-based, inclusive). Default: last page.") parser.add_argument("--output", required=True, type=Path, help="Path to output TXT file.") parser.add_argument("--lang", default="rus+eng", help="Tesseract language(s), example: rus+eng.") parser.add_argument("--dpi", default=300, type=int, help="Render DPI before OCR. Default: 300.") @@ -37,6 +37,17 @@ def validate_args(args: argparse.Namespace) -> int: if input_path.suffix.lower() != ".pdf": raise ValueError(f"Input file must have .pdf extension: {input_path}") + total_pages = get_total_pages(input_path) + + if args.start is None and args.end is None: + args.start = 1 + args.end = total_pages + else: + if args.start is None: + args.start = 1 + if args.end is None: + args.end = total_pages + if args.start < 1: raise ValueError("--start must be >= 1.") @@ -50,7 +61,6 @@ def validate_args(args: argparse.Namespace) -> int: if args.rotate not in allowed_rotations: raise ValueError("--rotate must be one of: 0, 90, 180, 270, -90, -180, -270.") - total_pages = get_total_pages(input_path) if args.end > total_pages: raise ValueError( f"--end ({args.end}) is out of range. Document has {total_pages} pages.",