anki-from-dictionary/main.py

import argparse
import os
from pdf2image import convert_from_path
import dict_to_anki  # ensure dict_to_anki is imported


IMGS_PER_REQUEST = 2

def is_parsable_to_int(input_string):
    try:
        int(input_string)
        return True
    except ValueError:
        return False

def main():
    parser = argparse.ArgumentParser(description='Parse PDF pages to MD.')
    parser.add_argument('--pages', type=str, required=True, help='Specify pages to parse in format <num>-<num>')
    parser.add_argument('--output-file', type=str, default='out.md', help='Specify output file')
    parser.add_argument('--images-path', type=str, default='./.img/', help='Specify output file')
    parser.add_argument('--ocr', type=str, default=None, help='languages to use for local OCR, e.g. deu+chi_sim for german and simplified chinese (tesseract langpacks needed)')
    parser.add_argument('--batch-size', type=int, default=3, help='Decide how many pages are processed in parallel')
    parser.add_argument('pdf_file', type=str, help='Specify PDF file name')

    args = parser.parse_args()

    # check if page is a single digit

    if is_parsable_to_int(args.pages):
        start_page = int(args.pages)
        end_page = start_page
    else:
        # parse pages into start and end page
        start_page, end_page = map(int, args.pages.split('-'))

    # todo: validate output file

    # Create the images directory if it doesn't exist
    os.makedirs(args.images_path, exist_ok=True)

    # Create output file or throw error if not possible
    try:
        with open(args.output_file, 'w') as file:
            pass
    except OSError:
        raise Exception("Couldn't create output file")

    # convert PDF to images for the given page
    images = convert_from_path(args.pdf_file, first_page=start_page, last_page=end_page)

    # for each image, run dict_to_anki.convert(filename)

    out = ''

    cost = 0.0

    paths = []

    for i, image in enumerate(images, start=start_page):
        print(f'extracting image for page {i}...')
        image_path = f"{args.images_path.rstrip('/')}/{args.pdf_file}_{i}.png"
        image.save(image_path, 'PNG')
        paths.append(image_path)

    break_outer = False

    for i in range(len(paths) // args.batch_size + 1):  # the batch size argument is used here
        # print(i)

        # collect images
        while True:
            to_process = paths[i * args.batch_size:i * args.batch_size + args.batch_size]  # the batch size argument is used here
            # print(to_process)
            if len(to_process) == 0:
                # skip if remaining list is empty (e.g. if 4 pages at package size 2)
                break

            print(f'processing {len(to_process)} image{"s" if len(to_process) != 1 else ""}')

            ocr = True if args.ocr else False  # set OCR to True if --ocr parameter is present

            cards, meta = dict_to_anki.image_to_anki(to_process, do_ocr=ocr, lang=args.ocr)

            if not cards:
                print("Error processing! Response: " + str(meta))
                user_response = input("Retry? [y/N]  > ")
                if user_response != 'y' or 'yes' or 'Y' or 'YES' or 'Yes':
                    break_outer = True
                    break
                else:
                    continue


            # usage logging
            usage = meta['usage']
            print(f'usage for page {i}:\n{usage["prompt_tokens"]} prompt tokens and {usage["completion_tokens"]} completion tokens')
            print(f'approx. cost: , {usage["prompt_tokens"] * 0.01 / 1000}$ for prompt tokens (pictures approx {0.00745 * len(to_process)}$ of this), {usage["completion_tokens"] * 0.03 / 1000}$ for completion tokens')

            cost_this = (usage["prompt_tokens"] * 0.01 / 1000
                         + usage["completion_tokens"] * 0.03 / 1000)
            cost += cost_this
            print(f'this page: {cost_this}$, total: {cost}$')

            out += cards + '\n\n\n'
            break

        if break_outer:
            break


    print("total cost:", cost)

    with open(args.output_file, 'w') as file:
        file.write(out)

if __name__ == "__main__":
    main()
INITIAL COMMIT 2024-02-01 11:56:34 +00:00			`import argparse`
			`import os`
			`from pdf2image import convert_from_path`
			`import dict_to_anki # ensure dict_to_anki is imported`


			`IMGS_PER_REQUEST = 2`

			`def is_parsable_to_int(input_string):`
			`try:`
			`int(input_string)`
			`return True`
			`except ValueError:`
			`return False`

			`def main():`
			`parser = argparse.ArgumentParser(description='Parse PDF pages to MD.')`
			`parser.add_argument('--pages', type=str, required=True, help='Specify pages to parse in format <num>-<num>')`
			`parser.add_argument('--output-file', type=str, default='out.md', help='Specify output file')`
			`parser.add_argument('--images-path', type=str, default='./.img/', help='Specify output file')`
fix: fixed help msg for --ocr argument 2024-02-05 17:36:44 +00:00			`parser.add_argument('--ocr', type=str, default=None, help='languages to use for local OCR, e.g. deu+chi_sim for german and simplified chinese (tesseract langpacks needed)')`
Implement local OCR and batch processing CLI flag Implemented optical character recognition (OCR) in the image_to_anki function to vastly enhance performance. Additionally, allowed batch processing of images via explicitly specified batch size in command-line arguments 2024-02-05 08:47:49 +00:00			`parser.add_argument('--batch-size', type=int, default=3, help='Decide how many pages are processed in parallel')`
INITIAL COMMIT 2024-02-01 11:56:34 +00:00			`parser.add_argument('pdf_file', type=str, help='Specify PDF file name')`

			`args = parser.parse_args()`

			`# check if page is a single digit`

			`if is_parsable_to_int(args.pages):`
			`start_page = int(args.pages)`
			`end_page = start_page`
			`else:`
			`# parse pages into start and end page`
			`start_page, end_page = map(int, args.pages.split('-'))`

			`# todo: validate output file`

			`# Create the images directory if it doesn't exist`
			`os.makedirs(args.images_path, exist_ok=True)`

			`# Create output file or throw error if not possible`
			`try:`
			`with open(args.output_file, 'w') as file:`
			`pass`
			`except OSError:`
			`raise Exception("Couldn't create output file")`

			`# convert PDF to images for the given page`
			`images = convert_from_path(args.pdf_file, first_page=start_page, last_page=end_page)`

			`# for each image, run dict_to_anki.convert(filename)`

			`out = ''`

			`cost = 0.0`

			`paths = []`

			`for i, image in enumerate(images, start=start_page):`
			`print(f'extracting image for page {i}...')`
			`image_path = f"{args.images_path.rstrip('/')}/{args.pdf_file}_{i}.png"`
			`image.save(image_path, 'PNG')`
			`paths.append(image_path)`

			`break_outer = False`

Implement local OCR and batch processing CLI flag Implemented optical character recognition (OCR) in the image_to_anki function to vastly enhance performance. Additionally, allowed batch processing of images via explicitly specified batch size in command-line arguments 2024-02-05 08:47:49 +00:00			`for i in range(len(paths) // args.batch_size + 1): # the batch size argument is used here`
INITIAL COMMIT 2024-02-01 11:56:34 +00:00			`# print(i)`
Implement local OCR and batch processing CLI flag Implemented optical character recognition (OCR) in the image_to_anki function to vastly enhance performance. Additionally, allowed batch processing of images via explicitly specified batch size in command-line arguments 2024-02-05 08:47:49 +00:00
INITIAL COMMIT 2024-02-01 11:56:34 +00:00			`# collect images`
			`while True:`
Implement local OCR and batch processing CLI flag Implemented optical character recognition (OCR) in the image_to_anki function to vastly enhance performance. Additionally, allowed batch processing of images via explicitly specified batch size in command-line arguments 2024-02-05 08:47:49 +00:00			`to_process = paths[i * args.batch_size:i * args.batch_size + args.batch_size] # the batch size argument is used here`
INITIAL COMMIT 2024-02-01 11:56:34 +00:00			`# print(to_process)`
			`if len(to_process) == 0:`
			`# skip if remaining list is empty (e.g. if 4 pages at package size 2)`
			`break`

			`print(f'processing {len(to_process)} image{"s" if len(to_process) != 1 else ""}')`

Implement local OCR and batch processing CLI flag Implemented optical character recognition (OCR) in the image_to_anki function to vastly enhance performance. Additionally, allowed batch processing of images via explicitly specified batch size in command-line arguments 2024-02-05 08:47:49 +00:00			`ocr = True if args.ocr else False # set OCR to True if --ocr parameter is present`

			`cards, meta = dict_to_anki.image_to_anki(to_process, do_ocr=ocr, lang=args.ocr)`
INITIAL COMMIT 2024-02-01 11:56:34 +00:00
			`if not cards:`
fix(debug): if request fails, prints proper output 2024-02-05 14:07:25 +00:00			`print("Error processing! Response: " + str(meta))`
INITIAL COMMIT 2024-02-01 11:56:34 +00:00			`user_response = input("Retry? [y/N] > ")`
			`if user_response != 'y' or 'yes' or 'Y' or 'YES' or 'Yes':`
			`break_outer = True`
			`break`
			`else:`
			`continue`


			`# usage logging`
			`usage = meta['usage']`
			`print(f'usage for page {i}:\n{usage["prompt_tokens"]} prompt tokens and {usage["completion_tokens"]} completion tokens')`
			`print(f'approx. cost: , {usage["prompt_tokens"] * 0.01 / 1000}$ for prompt tokens (pictures approx {0.00745 * len(to_process)}$ of this), {usage["completion_tokens"] * 0.03 / 1000}$ for completion tokens')`

			`cost_this = (usage["prompt_tokens"] * 0.01 / 1000`
			`+ usage["completion_tokens"] * 0.03 / 1000)`
			`cost += cost_this`
			`print(f'this page: {cost_this}$, total: {cost}$')`

			`out += cards + '\n\n\n'`
			`break`

			`if break_outer:`
			`break`


			`print("total cost:", cost)`

			`with open(args.output_file, 'w') as file:`
			`file.write(out)`

			`if __name__ == "__main__":`
			`main()`