INITIAL COMMIT

2023-10-23 17:49:26 +02:00
commit 9121ad819d
10 changed files with 2815 additions and 0 deletions
--- a/.fleet/run.json
+++ b/.fleet/run.json
@ -0,0 +1,12 @@
+{
+    "configurations": [
+        {
+            "type": "command",
+            "name": "Command configuration",
+            "program": "poetry",
+            "args": ["run", "python", "main.py"],
+        },
+        
+        
+    ]
+}
--- a/README.md
+++ b/README.md
@ -0,0 +1,23 @@
+# Flüsterpost
+
+Flüsterpost takes audio files and turns them into text. It's made to do so easily, quickly, and locally.
+
+## How to Run
+
+This repo uses the poetry package manager for Python. If you don't have it, you'll need to install it before using this.
+(i can recommend it. it's great.)
+
+`cd` into the repository. Then, run `poetry install`.
+Then, do `poetry run main.py`.
+
+### Deps
+This project uses [flet](https://flet.dev/) for the GUI. On Linux systems, you'll need `gstreamer` installed.
+You probably have this shipped with your system, but if you get the error message
+`error while loading shared libraries: libgstapp-1.0.so.0: cannot open shared object file: No such file or directory`,
+install it. More info on the [flet docs page](https://flet.dev/docs/guides/python/getting-started).
+
+## License
+This project is licensed under the MIT license.
+
+## Contributions
+Any contributions to this project will be licensed under the MIT license, unless explicitly noted otherwise.
--- a/pycache/nn_model_manager.cpython-311.pyc
+++ b/pycache/nn_model_manager.cpython-311.pyc
--- a/pycache/utils.cpython-311.pyc
+++ b/pycache/utils.cpython-311.pyc
--- a/main.py
+++ b/main.py
@ -0,0 +1,322 @@
+import os
+import pprint
+import traceback
+
+import utils
+import flet as ft
+
+from typing import DefaultDict
+
+import pygame
+
+import nn_model_manager as mm
+
+
+def main(page):
+    pygame.mixer.init()
+
+    first_name = ft.Ref[ft.TextField]()
+    last_name = ft.Ref[ft.TextField]()
+    greetings = ft.Ref[ft.Column]()
+
+    file_tree = ft.Ref[ft.Column]()
+    file_tree_empty_text = ft.Ref[ft.Text]()
+    
+    load_model_text = ft.Ref[ft.Text]()
+    model_size_select = ft.Ref[ft.Dropdown]()
+    model_device_select = ft.Ref[ft.Dropdown]()
+    # model_bits_select = ft.Ref[ft.Dropdown]()
+    model_load_unload_button = ft.Ref[ft.IconButton]()
+    model_loading_spinner = ft.Ref[ft.ProgressRing]()
+    
+    transcribe_buttons: list[ft.Ref[ft.IconButton]] = []
+    
+    output_text_container = ft.Ref[ft.Container]()
+    output_text_col = ft.Ref[ft.Column]()
+
+    def generate_file_tree(path: str, tree_dict: dict | DefaultDict):
+        if path[-1] == os.sep:
+            path = path[:-1]
+
+        folder_name = utils.get_last_segment(path)
+        print(f"DEBUG: generating tree for folder {folder_name}")
+
+        # find folders, and add dict for each
+        print(f"adding name {folder_name} to ui")
+
+        controls = [
+            ft.Row(
+                [
+                    ft.Icon(ft.icons.FOLDER, color=ft.colors.BLUE),
+                    ft.Text(folder_name, size=14, weight=ft.FontWeight.BOLD),
+                ]
+            )
+        ]
+
+        for folder_name, value in tree_dict.items():
+            if folder_name == utils.FILES_KEY or folder_name == '.':
+                continue  # skip for now
+
+            controls.append(generate_file_tree(path + os.sep + folder_name, value))
+
+        # now folders are there, let's do files
+        if utils.FILES_KEY not in tree_dict and '.' in tree_dict:
+            tree_dict = tree_dict['.']  # if root dir, enter root dir (.) directory
+
+        files_controls = []
+
+        for file in tree_dict[utils.FILES_KEY]:
+            control = [ft.Text(file)]
+            
+            if not file.endswith('.mp3'):
+                continue
+
+            def start_playing(filepath: str, button_ref: ft.Ref[ft.IconButton]):
+                print(f"trying to play {filepath}...")
+                if pygame.mixer.music.get_busy() or not os.path.isfile(filepath):
+                    return
+
+                print("starting playback")
+
+                pygame.mixer.music.load(filepath)
+                pygame.mixer.music.play()
+
+                button_ref.current.icon = ft.icons.PAUSE_CIRCLE_FILLED_OUTLINED
+                button_ref.current.on_click = lambda _, f=filepath, r=button_ref: stop_playing(f, r)
+                page.update()
+
+            def stop_playing(filepath: str, button_ref: ft.Ref[ft.IconButton]):
+                print("stopping playback")
+
+                pygame.mixer.music.stop()
+
+                button_ref.current.icon = ft.icons.PLAY_CIRCLE_OUTLINED
+                button_ref.current.on_click = lambda _, f=filepath, r=button_ref: start_playing(f, r)
+                page.update()
+
+            full_file_path = path + os.sep + file
+
+            _button_ref = ft.Ref[ft.IconButton]()
+
+            control.append(ft.IconButton(icon=ft.icons.PLAY_CIRCLE_OUTLINED, ref=_button_ref,
+                                on_click=lambda _, f=full_file_path, r=_button_ref: start_playing(f, r)))
+            
+            def transcribe(filepath: str):
+                print(f"DEBUG: trying to transcribe file {filepath}")
+                if not mm.is_model_loaded() or not filepath.endswith('.mp3'):
+                    return
+                
+                print(f"DEBUG: starting transcription")
+                output_text_container.current.alignment = ft.alignment.center
+                output_text_col.current.controls = [ft.ProgressRing()]
+                
+                # set all transcribe buttons to disabled
+                for btn in transcribe_buttons:
+                    btn.current.disabled = True
+                page.update()
+                
+                try:
+                    segments, info = mm.transcribe_from_file(filepath)
+                    
+                    txt = ''
+                    
+                    for seg in segments:
+                        txt += seg.text + '\n'
+                    
+                    output_text_container.current.alignment = ft.alignment.top_left
+                    output_text_col.current.controls = [ft.Text(txt, selectable=True)]  # TODO
+                        
+                except Exception as e:
+                    output_text_container.current.alignment = ft.alignment.center
+                    output_text_col.current.controls = [ft.Text(f"Transcribing failed: {str(e)}")]  # TODO
+                    
+                finally:
+                    # set all transcribe buttons to disabled
+                    for btn in transcribe_buttons:
+                        btn.current.disabled = False
+                    page.update()
+                    
+                
+            transcribe_button_ref = ft.Ref[ft.IconButton]()
+            
+            control.append(ft.IconButton(icon=ft.icons.FORMAT_ALIGN_LEFT, disabled=not mm.is_model_loaded(), ref=transcribe_button_ref,
+                                         on_click=lambda _, f=full_file_path: transcribe(f)))
+            
+            transcribe_buttons.append(transcribe_button_ref)
+
+            files_controls.append(ft.Row(control))
+
+        if len(files_controls) == 0:
+            files_controls.append(ft.Text('No mp3 Files found', color='grey'))
+
+        return ft.Row([
+            ft.VerticalDivider(),
+            ft.Column(controls + [ft.Row([ft.VerticalDivider(), ft.Column(files_controls)])])
+        ]
+        )
+
+    def btn_click(e):
+        greetings.current.controls.append(
+            ft.Text(f"Hello, {first_name.current.value} {last_name.current.value}!")
+        )
+        first_name.current.value = ""
+        last_name.current.value = ""
+        page.update()
+        first_name.current.focus()
+
+    def on_dialog_result(e: ft.FilePickerResultEvent):
+        path = e.path
+        if path:
+            print(f"path is {path}")
+            try:
+                if os.path.isdir(path):
+                    tree = utils.build_file_tree(path)
+
+                    if '.' in tree:  # if there is actually a proper file tree
+                        # add to view
+                        file_tree.current.controls.append(
+                            generate_file_tree(path, utils.defaultdict_to_dict(tree))
+                        )
+                        file_tree_empty_text.current.visible = False
+
+                    page.update()
+            except e:
+                print("didn't work aaa")  # TODO: fix
+    
+    def load_model():
+        
+        load_model_text.current.value = 'Loading... This may take a while.'
+        
+        model_size_select.current.disabled = True
+        model_device_select.current.disabled = True
+        # model_bits_select.current.disabled = True
+        model_load_unload_button.current.disabled = True
+        model_loading_spinner.current.visible = True
+        page.update()
+        
+        try:
+            mm.set_model(
+                size=model_size_select.current.value or 'base',
+                device=model_device_select.current.value or 'auto',
+                # compute_type=model_bits_select.current.value or '16bit',
+            )
+        except Exception as e:
+            print(f"loading model failed. Exception: {str(e)}")
+            print(traceback.format_exc())
+            load_model_text.current.value = f'Loading failed. Reason:\n{str(e)}'
+            model_size_select.current.disabled = False
+            model_device_select.current.disabled = False
+            # model_bits_select.current.disabled = False
+            
+            # raise e
+                
+        model_loading_spinner.current.visible = False
+        model_load_unload_button.current.disabled = False
+            
+        if mm.is_model_loaded():
+            load_model_text.current.value = f'Loaded.'
+            model_load_unload_button.current.icon = ft.icons.CLOSE
+            model_load_unload_button.current.on_click = lambda _: unload_model()
+            
+            # if successful, save to shared preferences
+            page.client_storage.set('model_size', model_size_select.current.value)
+            page.client_storage.set('device_select', model_device_select.current.value)
+            
+            # set all transcribe buttons to enabled
+            for btn in transcribe_buttons:
+                btn.current.disabled = False
+        
+        page.update()
+            
+    def unload_model():
+        model_load_unload_button.current.disabled = True
+        
+        # set all transcribe buttons to disabled
+        for btn in transcribe_buttons:
+            btn.current.disabled = True
+            
+        page.update()
+        
+        if mm.is_model_loaded():
+            mm.unload_model()
+            
+        load_model_text.current.value = 'Select parameters, and then load transcription model.'
+        model_size_select.current.disabled = False
+        model_device_select.current.disabled = False
+        # model_bits_select.current.disabled = False
+        model_load_unload_button.current.disabled = False
+        model_load_unload_button.current.icon = ft.icons.START
+        model_load_unload_button.current.on_click = lambda _: load_model()
+        model_loading_spinner.current.visible = False
+        page.update()
+        
+            
+            
+
+    # set up file picker
+    file_picker = ft.FilePicker(on_result=on_dialog_result)
+
+    page.overlay.append(file_picker)
+
+    page.add(
+        ft.Text("Flüsterpost", style=ft.TextThemeStyle.TITLE_LARGE),
+        ft.Divider()
+    )
+
+    page.add(
+        ft.ResponsiveRow([
+            ft.Container(
+                ft.Column([
+                    ft.ElevatedButton("Add Folder", on_click=lambda _: file_picker.get_directory_path()),
+                    ft.Column(ref=file_tree, scroll=ft.ScrollMode.ALWAYS, expand=True),
+                    # ft.ListView(ref=file_tree),
+                    ft.Text("No Folder Open Yet", style=ft.TextTheme.body_small, color="grey",
+                            ref=file_tree_empty_text),
+                ], expand=True), expand=True, col=4),
+            ft.Container(expand=True, content=ft.Column(expand=True, controls=[
+                ft.Column([
+                    ft.Text('Select parameters, and then load transcription model.', ref=load_model_text),
+                    ft.Row([
+                        ft.Dropdown(
+                            ref=model_size_select,
+                            width=100,
+                            hint_text='model size',
+                            value=page.client_storage.get('model_size') if page.client_storage.contains_key('model_size') else 'base',
+                            options=[ft.dropdown.Option(x) for x in mm.ModelSize.__args__],  # __args__ is not perfect here. But works.
+                        ),
+                        ft.Dropdown(
+                            ref=model_device_select,
+                            width=100,
+                            hint_text='device',
+                            value=page.client_storage.get('device_select') if page.client_storage.contains_key('device_select') else 'auto',
+                            options=[ft.dropdown.Option(x) for x in mm.Device.__args__]  # __args__ is not perfect here. But works.
+                        ),
+                        # ft.Dropdown(
+                        #    ref=model_bits_select,
+                        #    width=100,
+                        #    hint_text='bits',
+                        #    value='16bit',
+                        #    options=[ft.dropdown.Option(x) for x in mm.ComputeType.__args__]  # __args__ is not perfect here. But works.
+                        #),
+                        ft.IconButton(
+                            icon=ft.icons.START,
+                            ref=model_load_unload_button,
+                            on_click=lambda _: load_model(),
+                        ),
+                        ft.ProgressRing(ref=model_loading_spinner, visible=False)
+                    ])
+                ]),
+                ft.Container(expand=True, padding=12, border=ft.border.all(2, 'grey'), 
+                             alignment=ft.alignment.center,
+                             ref=output_text_container,
+                             content=ft.Column(
+                                 [ft.Text('Nothing to see here!', text_align=ft.TextAlign.CENTER)],
+                                                ref=output_text_col,
+                                                expand=True,
+                                                scroll=ft.ScrollMode.ADAPTIVE)),
+            ]), col=8)
+        ], expand=True),
+    )
+
+
+ft.app(target=main)
--- a/nn_model_manager.py
+++ b/nn_model_manager.py
@ -0,0 +1,65 @@
+import threading
+
+from faster_whisper import WhisperModel
+import faster_whisper
+from typing import Literal, Iterable, Tuple
+
+_model: WhisperModel | None = None
+
+ModelSize = Literal["tiny", "base", "small", "medium", "large-v1", "large-v2"]
+Device = Literal["cuda", "cpu", "auto"]
+ComputeType = Literal["8bit", "16bit", "32bit"]
+
+
+def set_model(size: ModelSize, device: Device):  #, compute_type: ComputeType):
+    
+    '''
+    compute = None
+    if compute_type == '8bit':
+        if device == 'cuda' or device == 'auto':
+            compute = 'int8_float16'
+        else:
+            compute = 'int8'
+    elif compute_type == '16bit':
+        if device == 'cuda' or device == 'auto':
+            compute = 'int8'
+        else:
+            raise Exception("Cannot do 16 bit computing on CPU")
+    elif compute_type == '32bit':
+        compute = 'float'
+    else:
+        raise Exception(f"Invalid Compute / Device configuration (device {device} with {compute_type})")
+    '''
+
+    global _model
+    _model = WhisperModel(size, device=device)
+
+
+def unload_model():
+    if not is_model_loaded():
+        return
+    
+    global _model
+    _model = None  # TODO: check if this works
+
+def is_model_loaded() -> bool:
+    global _model
+    return _model is not None
+
+
+def transcribe_from_file(mp3_path: str) -> Tuple[Iterable[faster_whisper.transcribe.Segment], faster_whisper.transcribe.TranscriptionInfo] | None:
+    """
+        Transcribe audio from an MP3 file.
+        Note that this can - and will - crash if you don't catch exceptions.
+        
+        If the model isn't loaded yet, this will return None.
+        Otherwise, it will return the raw transcription from `faster-whisper`.
+    """
+    if not is_model_loaded():
+        return None
+    
+    global _model
+    segments, info = _model.transcribe(mp3_path, beam_size=5)
+    # transcribe, and throw all exceptions to application to handle
+    
+    return segments, info
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,19 @@
+[tool.poetry]
+name = "fluesterpost"
+version = "0.1.0"
+description = "App for transcribing audio files to text"
+authors = ["Yandrik <me@yandrik.dev>"]
+license = "MIT"
+readme = "README.md"
+
+[tool.poetry.dependencies]
+python = "^3.11"
+flet = "^0.10.3"
+faster-whisper = "^0.9.0"
+pygame = "^2.5.2"
+torch = "2.0.0"
+
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
--- a/285
+++ b/285
@ -0,0 +1,285 @@
+{   '.': {   '_i_files': [   'getting-started.md',
+                             'inbox.md',
+                             'readme.md',
+                             'skill_stack.md',
+                             'player_famillarity.md',
+                             'todo.md']},
+    '.foam': {   '_i_files': [],
+                 'templates': {   '_i_files': [   'anki-cards.md',
+                                                  'your-first-template.md',
+                                                  'zettel-refs.md',
+                                                  'zettel.md']}},
+    '.git': {   '_i_files': [   'description',
+                                'packed-refs',
+                                'HEAD',
+                                'config',
+                                'COMMIT_EDITMSG',
+                                'FETCH_HEAD',
+                                'ORIG_HEAD',
+                                'index'],
+                'branches': {'_i_files': []},
+                'hooks': {   '_i_files': [   'applypatch-msg.sample',
+                                             'commit-msg.sample',
+                                             'post-update.sample',
+                                             'pre-applypatch.sample',
+                                             'pre-commit.sample',
+                                             'pre-merge-commit.sample',
+                                             'pre-push.sample',
+                                             'pre-receive.sample',
+                                             'push-to-checkout.sample',
+                                             'update.sample',
+                                             'fsmonitor-watchman.sample',
+                                             'pre-rebase.sample',
+                                             'prepare-commit-msg.sample']},
+                'info': {'_i_files': ['exclude']},
+                'logs': {   '_i_files': ['HEAD'],
+                            'refs': {   '_i_files': [],
+                                        'heads': {'_i_files': ['master']},
+                                        'remotes': {   '_i_files': [],
+                                                       'origin': {   '_i_files': [   'HEAD',
+                                                                                     'master']}}}},
+                'objects': {   '01': {   '_i_files': [   '92178cee01a3ad18cb621cac1ba64ad8e4e8a3']},
+                               '02': {   '_i_files': [   '8c8217c849a00a010fec6cfa99a9e92e8c9375',
+                                                         '557a1b00b8c95a84b967c50c1e1aa7974a69ce']},
+                               '05': {   '_i_files': [   'f4816f07fc4f96c0c2010cf0d7449f00fcce19',
+                                                         '0a51af521fb44d8aacd7604d14b8766238d66f']},
+                               '0b': {   '_i_files': [   'f3fdf9465baf34897ad3252be2341bc1bf7db8']},
+                               '0f': {   '_i_files': [   'd094c8d57913f667e462632d49b2f052bab5de']},
+                               '11': {   '_i_files': [   'ae42db7bb29c0a5f1987e75db0c49fbdf8f957']},
+                               '13': {   '_i_files': [   'b1d98f14fd2acff4afa59cf80bf7150a44d8d5']},
+                               '18': {   '_i_files': [   'b5e3934b5d99910733c118c634fcd17c58b369']},
+                               '1d': {   '_i_files': [   '8661f5e25c86324e1bba1933948c3900126c06']},
+                               '20': {   '_i_files': [   '0e31f1ae6fcde97e64e5dc1ea7d5f10008dea4']},
+                               '24': {   '_i_files': [   '5d1d3c006b36d48b007298349f2f33bf3b14d8']},
+                               '28': {   '_i_files': [   'b6a59bbb64bd2705e586f53a880ce2d4c00515']},
+                               '2b': {   '_i_files': [   '25ddb4990745c5df76289a21a4d7d6e333d054']},
+                               '30': {   '_i_files': [   'e393907000c0219d86dbffd28bb1e548c7a078']},
+                               '32': {   '_i_files': [   '711ab584d672d6e5aa62da57b5a45c8d231689']},
+                               '34': {   '_i_files': [   '7998770c00191c4a76a470c5a4a1311c2265fc']},
+                               '35': {   '_i_files': [   'bb5e4cb76375d51162c6143c3ebd998f469714']},
+                               '38': {   '_i_files': [   'b873fbb24e1d38a5896f9161b0d4e98aec472f']},
+                               '39': {   '_i_files': [   'cdb1347792036be3a4ceb5fe49faace3c5f579']},
+                               '3a': {   '_i_files': [   'ba671d641c06e76036016b91e577f91a4b02ad',
+                                                         '44ad749af1d6dc5866356beb964f766eee5128']},
+                               '42': {   '_i_files': [   'a79846fbcee6a7b10a0ce6a6fca8994fc80876']},
+                               '45': {   '_i_files': [   'dc2d782c3fe9faf18d6517f11f183cf78f177a']},
+                               '53': {   '_i_files': [   '196f6105b690fd51e81484deb1321976b1daa9']},
+                               '56': {   '_i_files': [   '729062c3620d0cc078c7b7033f93e64982bfc4',
+                                                         'b44a575108bdb87f385b47c30f68d418a3eaaa']},
+                               '5c': {   '_i_files': [   '13490222138a9d52e50f4782969284fd43e8ce']},
+                               '5f': {   '_i_files': [   '3ff320973f57bb17f5abd596af5f73b15bd601']},
+                               '67': {   '_i_files': [   'e12b1d2f0a5307acc8f06fa0edb883ef86acfe']},
+                               '6e': {   '_i_files': [   'd2d0031d1ffb6c25f58046a1971661e0cf1cc4']},
+                               '70': {   '_i_files': [   '92cf531a29dd7fdc0a4d09acb194c4a57d8803',
+                                                         'b9d0a5d2154b5be599f50ca39883642ef681ec']},
+                               '71': {   '_i_files': [   '45b359be3dd773a9ccbc6aaeba7b9be9ba074c']},
+                               '74': {   '_i_files': [   '83a25779adf27f8c37221076cb0e61a2025a5c']},
+                               '75': {   '_i_files': [   '490a494632a140b2119a894e6521161d302769']},
+                               '76': {   '_i_files': [   'd9e26bda3f744c90704e94004f2350a3b145ba']},
+                               '78': {   '_i_files': [   '4442d08c18a84bd0570ecc56518073f4a02430']},
+                               '79': {   '_i_files': [   'bcbd1fa1badcf3677e52ddd76721bf7ddfc115']},
+                               '7a': {   '_i_files': [   '5177de86b242d198edb87bd20a5613f106e596']},
+                               '82': {   '_i_files': [   '65b4da954fcdc135ffaf0019b31cdb228e59ec',
+                                                         'a06086a94bc1fa94b8f054e7c48c3a8d45a580']},
+                               '85': {   '_i_files': [   '29e2398baf641413f194a652305c3632c757c7']},
+                               '88': {   '_i_files': [   '83fca5338d4419f32e57377106986fea85f81e']},
+                               '8c': {   '_i_files': [   '972d8832336104982d5f66037246ed75050c94']},
+                               '8d': {   '_i_files': [   '200ac5a1dd8d235bb786bcdf94527297c6019c']},
+                               '90': {   '_i_files': [   'f26b565b09edc34d98bfe9b01dfc88d01b3e05']},
+                               '92': {   '_i_files': [   '048c50fec01e3a6b9cc7d301b48f62f0d47ad6',
+                                                         'b464c0c03c745287234e4ccbbd0dc5285fc545']},
+                               '93': {   '_i_files': [   'a00093b5319ab79d4b7a9c490cb4e2a661907e']},
+                               '94': {   '_i_files': [   '05bfdc224d3e5dfe9fe6116a4fd2955ab10a72']},
+                               '95': {   '_i_files': [   '0027f29e86396e4d7416487d16100fc559fb2d',
+                                                         '7a01dc4f30e7f3424d004a17d7b5571d0e3879']},
+                               '96': {   '_i_files': [   '2e3e24df9db27f23050dc22c057793b40455cc']},
+                               '97': {   '_i_files': [   'f9215e573ba5b7d2273bc7b368d025b37f0f7a']},
+                               '9c': {   '_i_files': [   '6b61bb1fdfba8f5b242f7f98284a528a9e17c8']},
+                               '9d': {   '_i_files': [   '9fe831567fe46ae777733412cad5157982c44a']},
+                               '9e': {   '_i_files': [   '26dfeeb6e641a33dae4961196235bdb965b21b',
+                                                         'a7cd9ba86b57ada4037f24526eee3579efd26d']},
+                               '_i_files': [],
+                               'a6': {   '_i_files': [   'ffab47a1ec45c43ed1927ddc6097a088fcad4f']},
+                               'a8': {   '_i_files': [   'a84552bead93885c351952ca9781d4ddbe5780',
+                                                         'edeadbf5b40ab9eb7598d729263e270ddd9e63']},
+                               'b3': {   '_i_files': [   '2e1b1d3d1bbd905aca74818820b184001fea13']},
+                               'b5': {   '_i_files': [   'cd6cf2d939352ff74ac7ca30018decc0647b65']},
+                               'b9': {   '_i_files': [   '0227ebf336c51d2364f2c6ed9cb9896a236f49']},
+                               'ba': {   '_i_files': [   '4fad78807ee9ec608eca8c6355b54146b52a68']},
+                               'bb': {   '_i_files': [   '90408b3c6db106c5fe19e6bd142d2bf6b7b528']},
+                               'bc': {   '_i_files': [   '4057206a45a261e46f8b6f4896d7c5b843876f']},
+                               'c0': {   '_i_files': [   'd770dce01d3d0ca290ddd7e494714960c77326',
+                                                         '190d5c75633f5ea107de72845dc5387a518400',
+                                                         '8336e9a181a2788e9bbcba484415698701638a',
+                                                         '0153d83030dad37d32e38628fe10e304f3377c']},
+                               'c3': {   '_i_files': [   '31566dd29ec2f17a6ba736a2fe0a45fa65e89d']},
+                               'c4': {   '_i_files': [   '1c5acd753957dda0d5bdeb25fb47e793080453']},
+                               'c8': {   '_i_files': [   'c365d89b8b1bad158cba37051fc32cbc784c65']},
+                               'ce': {   '_i_files': [   '8bbb33980fd7c40f017d1063950fd8fdab2d6b']},
+                               'cf': {   '_i_files': [   'dc0b4420769cfc9219fa741447ae43b8129b75']},
+                               'd8': {   '_i_files': [   'ee8b9b4c05ce67098abbdfe9e351d63b21f18e',
+                                                         'eeb04612a7818d4c0872b312f4e2241f996190']},
+                               'd9': {   '_i_files': [   '54566657c342600667bb4836f83a8407e32807']},
+                               'dd': {   '_i_files': [   '2a192a1c242d0691fb8db2fdd08733c63a0dfb']},
+                               'e1': {   '_i_files': [   '2b226b929c2c1996829b1dd45539f71fba1d4b']},
+                               'e5': {   '_i_files': [   'c523af28c3a765a63e7e5fbf44a40a44cd0d05']},
+                               'e6': {   '_i_files': [   '80ac775c8245442f5dec43644a2aa7aac6dbc2',
+                                                         'a347474ea106d1b0979e5aead00e695710eb34']},
+                               'ea': {   '_i_files': [   '514bc29f92feccb4fc8fb53dcbee24cb9863b2']},
+                               'ee': {   '_i_files': [   '41eb2f83cb8a36fb5773cc3501a73d6b5a484f']},
+                               'ef': {   '_i_files': [   '998d5afc023c58a624a7b01261f15b416ef286']},
+                               'f1': {   '_i_files': [   '3dba7d7450e0ea935bc15b087c2e62e3f9f62b',
+                                                         '053bfb26234ebea971e874a0150b221f5f598d',
+                                                         'ae4aac4535bfe3d569f4376e1992acdd5e5fde']},
+                               'f2': {   '_i_files': [   '6065a6e08f05d5d0dad0e8f9de825648938a04',
+                                                         '432313c1f432e922ed0dc4baea17c8d5043cfb']},
+                               'f3': {   '_i_files': [   'ebbe30d46a1d7d56b95ab0d64f64b7cfede59a']},
+                               'f6': {   '_i_files': [   'd65179b69864c0b1d88f4fa32b35a9089bfa83']},
+                               'f8': {   '_i_files': [   '5851a1afaca442587934e62f1b182f3e09e3d1',
+                                                         'af88bf54d71d06fba7335ff4662722019a1f9b',
+                                                         '61ad4ac94746c2347df1aa6ae2dafafe7f9538']},
+                               'fb': {   '_i_files': [   '8be1fd027914d656729cc5217b6f9a7c1cb4f3']},
+                               'fd': {   '_i_files': [   '0d011ded41de68be2f0039c97890ed4117f7ff',
+                                                         '204664448169af25c5126c3c8c11917ec5cb6f']},
+                               'info': {'_i_files': []},
+                               'pack': {   '_i_files': [   'pack-8b0ef94d8431e1b37110e10fedb27318a852f391.pack',
+                                                           'pack-8b0ef94d8431e1b37110e10fedb27318a852f391.idx']}},
+                'refs': {   '_i_files': [],
+                            'heads': {'_i_files': ['master']},
+                            'remotes': {   '_i_files': [],
+                                           'origin': {   '_i_files': [   'HEAD',
+                                                                         'master']}},
+                            'tags': {'_i_files': []}}},
+    '.obsidian': {   '_i_files': [   'hotkeys.json',
+                                     'core-plugins.json',
+                                     'core-plugins-migration.json',
+                                     'app.json',
+                                     'appearance.json',
+                                     'workspace.json',
+                                     'graph.json',
+                                     'community-plugins.json'],
+                     'plugins': {   '_i_files': [],
+                                    'dataview': {   '_i_files': [   'manifest.json',
+                                                                    'main.js',
+                                                                    'styles.css']},
+                                    'obsidian-git': {   '_i_files': [   'manifest.json',
+                                                                        'main.js',
+                                                                        'styles.css',
+                                                                        'data.json']}}},
+    '.vscode': {   '_i_files': [   'extensions.json',
+                                   'foam.json',
+                                   'settings.json',
+                                   'spellright.dict']},
+    '_layouts': {'_i_files': ['home.html', 'page.html']},
+    'abawo': {   '_i_files': ['corporate-design.md'],
+                 'keant': {   '_i_files': [],
+                              'emc': {'_i_files': ['keant-emc-testing.md']}}},
+    'ai': {   '_i_files': [   'clustering.md',
+                              'gradient_descent.md',
+                              'nn_neurons.md'],
+              'basics': {   '_i_files': [   'embedding.md',
+                                            'latent-space.md',
+                                            'few-shot.md',
+                                            'zero-shot.md']},
+              'combination': {'_i_files': ['query-net.md']},
+              'ffn': {'_i_files': ['multi_layer_perceptron.md']},
+              'gan': {'_i_files': ['gan.md']},
+              'learn': {   '_i_files': [   'active_learning.md',
+                                           'backpropagation_learn.md',
+                                           'fitting.md',
+                                           'learning_kinds.md',
+                                           'semi-supervised_text_classification.md']},
+              'llm': {   '_i_files': [   'autoregressive.md',
+                                         'language_models.md',
+                                         'llm.md'],
+                         'google': {'_i_files': ['bard.md']},
+                         'langchain': {'_i_files': ['langchain.md']},
+                         'openai': {'_i_files': ['openai.md', 'gpt4.md']},
+                         'oss': {'_i_files': ['vicuna.md']},
+                         'prompting': {'_i_files': ['cot.md']},
+                         'research': {   '_i_files': [   'using-llms-for-reading-thoughts.md']}},
+              'multi-receptors': {'_i_files': ['nn-neurotransmitters.md']},
+              'news': {'_i_files': ['google-codered.md', 'we-have-no-moat.md']},
+              'rnn': {   '_i_files': [   'bci-rnn.md',
+                                         'bionets.md',
+                                         'echo-state-networks.md',
+                                         'rnn.md',
+                                         'rnn_overlearning.md',
+                                         'rnn_timer_example.md',
+                                         'rnn_usefulness.md',
+                                         'rockets_rnn.md']},
+              'tools': {   '_i_files': [   'chatgpt.md',
+                                           'github-copilot.md',
+                                           'vector-database.md',
+                                           'bing-chat.md']}},
+    'assets': {'_i_files': [], 'css': {'_i_files': ['style.scss']}},
+    'attachments': {   '_i_files': [   '2022-04-26-19-39-39.png',
+                                       'Josefin_Sans.zip',
+                                       'Montserrat.zip',
+                                       'abawo-color-palette-v1.pdf',
+                                       'abawo-color-palette-v1.png',
+                                       'abawo-logo-square.png',
+                                       'abawo-logo-square.svg',
+                                       'abawo-logo.png',
+                                       'abawo-logo.svg',
+                                       'bid_ask_spread.png',
+                                       'clustering_ni-seminar_fabian-karl_2022-02-03-12-37-28.png',
+                                       'foam-icon.png',
+                                       'gamedev_tutorial_properties.png',
+                                       'gen-discr-training.svg',
+                                       'jaeger_feedforward.png',
+                                       'rnn-esn-talk_daniel-ruepp_ni-seminar_3.2.2022.png',
+                                       'rnn_feedforward_net.png',
+                                       'rnn_structure_jaeger_tutorial_timer.png',
+                                       'viofs-folder-on-iso.png',
+                                       'virt-manager-folder-share.png',
+                                       '2023-05-12-12-09-30-langchain-memory.png',
+                                       '2023-05-19-12-22-28.png',
+                                       'cot-prompting-yaoTreeThoughtsDeliberate2023.png',
+                                       'evalResults-yaoTreeThoughtsDeliberate2023.png',
+                                       'evalTable-yaoTreeThoughtsDeliberate2023.png',
+                                       'titleImg-panDragYourGAN.png',
+                                       'tot-prompting-yaoTreeThoughtsDeliberate2023.png']},
+    'cad': {'_i_files': ['CalculiX.md']},
+    'cs': {'_i_files': ['reservoir-computing.md']},
+    'furntiture': {'_i_files': ['element-shelving-system.md']},
+    'gamedev': {   '_i_files': ['prototyping.md'],
+                   'mechanics': {'_i_files': ['difficulty.md', 'tutorial.md']},
+                   'target_audience': {'_i_files': ['game_literacy.md']}},
+    'helium': {   '_i_files': ['helium.md'],
+                  'manufacturing': {   '_i_files': [],
+                                       'hs1': {   '_i_files': [   'hs1-ethernet-driver.md',
+                                                                  'hs1-processor.md',
+                                                                  'hs1.md']}}},
+    'investing': {   '_i_files': [   'diversification.md',
+                                     'international_investing.md',
+                                     'value_vs_growth_stocks.md'],
+                     'brokers': {   '_i_files': [   'broker.md',
+                                                    'zero_commission_brokers.md']},
+                     'value': {   '_i_files': [   'bottom_up_market_research.md',
+                                                  'in-depth_company_research.md',
+                                                  'value_investing.md']}},
+    'job_search': {'_i_files': ['ats.md', 'resume_writing.md']},
+    'linux': {   '_i_files': [],
+                 'kvm': {   '_i_files': [   'kvm-virtualization.md',
+                                            'libvirt.md',
+                                            'libvirtd-folder-share-windows-guest.md',
+                                            'qemu.md',
+                                            'virt-manager.md']}},
+    'manufacturing': {   '_i_files': [],
+                         'oem': {   '_i_files': [   'oem-in-china.md',
+                                                    'oem.md',
+                                                    'value-added-reseller.md']}},
+    'maths': {   '_i_files': [],
+                 'game_theory': {   '_i_files': [   'positive_sum_game.md',
+                                                    'zero_sum_game.md']}},
+    'meditation': {'_i_files': ['shunya.md']},
+    'people': {'_i_files': [], 'developers': {'_i_files': ['hwchase17.md']}},
+    'psychology': {'_i_files': ['the-faschist-mindset.md']},
+    'robotics': {'_i_files': ['arco.md']},
+    'skills': {   '_i_files': [   'jack_of_all_trades_master_of_one.md',
+                                  'skill_stacking.md']},
+    'uni': {   '_i_files': [],
+               's5': {   '_i_files': ['thomas-thuem-cb-pruefung-stuff.md'],
+                         'compilers': {'_i_files': ['lexing_anki.md']},
+                         'grn': {'_i_files': ['questions.md']}}}}
--- a/utils.py
+++ b/utils.py
@ -0,0 +1,46 @@
+from collections import defaultdict
+import os
+
+from typing import DefaultDict, Dict, List
+
+
+def tree() -> DefaultDict:
+    return defaultdict(tree)
+
+
+def get_last_segment(path: str) -> str:
+    if path[-1] == '/':
+        path = path[:-1]
+    return path.split(os.sep)[-1]
+
+
+FILES_KEY = '_i_files'
+
+
+def build_file_tree(root_dir: str) -> DefaultDict:
+    file_tree = tree()
+    root_dir = os.path.normpath(root_dir)  # Normalize the path
+    
+    for dirpath, dirnames, files in os.walk(root_dir):
+        # Get the subdirectory path relative to the root directory
+        subdir = os.path.relpath(dirpath, root_dir)
+        
+        # Split the path into components to navigate the nested dictionary
+        path_components = subdir.split(os.sep)
+
+        # Navigate to the current subdirectory in the file tree
+        current_subdir = file_tree
+        for component in path_components:
+            current_subdir = current_subdir[component]
+
+        # Add files to the current subdirectory in the file tree
+        current_subdir[FILES_KEY] = files
+
+    return file_tree
+
+
+# Function to convert defaultdict to dict (for readability)
+def defaultdict_to_dict(d: defaultdict) -> dict:
+    if isinstance(d, defaultdict):
+        d = {k: defaultdict_to_dict(v) for k, v in d.items()}
+    return d