anki-from-dictionary/main.py

import argparse
import os
from pdf2image import convert_from_path
import dict_to_anki  # ensure dict_to_anki is imported


IMGS_PER_REQUEST = 2

def is_parsable_to_int(input_string):
    try:
        int(input_string)
        return True
    except ValueError:
        return False

def main():
    parser = argparse.ArgumentParser(description='Parse PDF pages to MD.')
    parser.add_argument('--pages', type=str, required=True, help='Specify pages to parse in format <num>-<num>')
    parser.add_argument('--output-file', type=str, default='out.md', help='Specify output file')
    parser.add_argument('--images-path', type=str, default='./.img/', help='Specify output file')
    parser.add_argument('--ocr', type=str, default=None, help='languages to use for local OCR, e.g. deu+chi_sim for german and simplified chinese (tesseract langpacks needed)')
    parser.add_argument('--batch-size', type=int, default=3, help='Decide how many pages are processed in parallel')
    parser.add_argument('pdf_file', type=str, help='Specify PDF file name')

    args = parser.parse_args()

    # check if page is a single digit

    if is_parsable_to_int(args.pages):
        start_page = int(args.pages)
        end_page = start_page
    else:
        # parse pages into start and end page
        start_page, end_page = map(int, args.pages.split('-'))

    # todo: validate output file

    # Create the images directory if it doesn't exist
    os.makedirs(args.images_path, exist_ok=True)

    # Create output file or throw error if not possible
    try:
        with open(args.output_file, 'w') as file:
            pass
    except OSError:
        raise Exception("Couldn't create output file")

    # convert PDF to images for the given page
    images = convert_from_path(args.pdf_file, first_page=start_page, last_page=end_page)

    # for each image, run dict_to_anki.convert(filename)

    out = ''

    cost = 0.0

    paths = []

    for i, image in enumerate(images, start=start_page):
        print(f'extracting image for page {i}...')
        image_path = f"{args.images_path.rstrip('/')}/{args.pdf_file}_{i}.png"
        image.save(image_path, 'PNG')
        paths.append(image_path)

    break_outer = False

    for i in range(len(paths) // args.batch_size + 1):  # the batch size argument is used here
        # print(i)

        # collect images
        while True:
            to_process = paths[i * args.batch_size:i * args.batch_size + args.batch_size]  # the batch size argument is used here
            # print(to_process)
            if len(to_process) == 0:
                # skip if remaining list is empty (e.g. if 4 pages at package size 2)
                break

            print(f'processing {len(to_process)} image{"s" if len(to_process) != 1 else ""}')

            ocr = True if args.ocr else False  # set OCR to True if --ocr parameter is present

            cards, meta = dict_to_anki.image_to_anki(to_process, do_ocr=ocr, lang=args.ocr)

            if not cards:
                print("Error processing! Response: " + str(meta))
                user_response = input("Retry? [y/N]  > ")
                if user_response != 'y' or 'yes' or 'Y' or 'YES' or 'Yes':
                    break_outer = True
                    break
                else:
                    continue


            # usage logging
            usage = meta['usage']
            print(f'usage for page {i}:\n{usage["prompt_tokens"]} prompt tokens and {usage["completion_tokens"]} completion tokens')
            print(f'approx. cost: , {usage["prompt_tokens"] * 0.01 / 1000}$ for prompt tokens (pictures approx {0.00745 * len(to_process)}$ of this), {usage["completion_tokens"] * 0.03 / 1000}$ for completion tokens')

            cost_this = (usage["prompt_tokens"] * 0.01 / 1000
                         + usage["completion_tokens"] * 0.03 / 1000)
            cost += cost_this
            print(f'this page: {cost_this}$, total: {cost}$')

            out += cards + '\n\n\n'
            break

        if break_outer:
            break


    print("total cost:", cost)

    with open(args.output_file, 'w') as file:
        file.write(out)

if __name__ == "__main__":
    main()