Fixes to get voice working properly, not fully confident it'll work under pressure but it's done well in small tests

2026-01-01 17:00:39 -05:00
parent 81b8085b51
commit 7df2bb6dfc
5 changed files with 39 additions and 15 deletions
--- a/bin/breadbot_common.py
+++ b/bin/breadbot_common.py
@@ -169,11 +169,11 @@ def mix_audio_with_ffmpeg(files: list[TranscriptableFile], media_folder_path: st

    output_file_name = Path(
        media_folder_path,
-        call_id,
+        str(call_id),
        "output.mp3" if is_final_pass else "intermediate-" + "".join(random.choices(string.ascii_uppercase + string.digits, k=10)) + ".mp3"
    )

-    command_list.append(output_file_name)
+    command_list.append(str(output_file_name))

    # TODO shell = True isn't great, I don't remember the reason why it has to be this way
    # I *think* it had something to do with me not using ffmpeg's absolute path
--- a/bin/breadmixer.py
+++ b/bin/breadmixer.py
@@ -2,7 +2,7 @@ import json
 import os
 import copy
 from pathlib import Path
-from datetime import datetime
+from datetime import datetime, timezone
 from breadbot_common import SQLite, MySQL, TranscriptableFile, mix_audio_with_ffmpeg
 from txtai.pipeline import Transcription

@@ -36,20 +36,25 @@ transcriber = Transcription("openai/whisper-base")
 for call in calls_needing_work[1]:
    all_files = os.listdir(Path(
        config_json["media_voice_folder"],
-        call[0]
+        str(call[0])
    ))

    transcriptable_files = []

    for file in all_files:
+        print(file)
        file_name_no_extension = file.split('.')[0]
        timestamp = int(file_name_no_extension.split('-')[0])
        user_snowflake = file_name_no_extension.split('-')[1]
-        file_stamp_as_datetime = datetime.fromtimestamp(timestamp / 1000)
-        time_diff = file_stamp_as_datetime - call[1]
+        file_stamp_as_datetime = datetime.fromtimestamp(timestamp / 1000, timezone.utc)
+        print(file_stamp_as_datetime)
+        print(type(call[1]))
+        print(call[1])
+        time_diff = file_stamp_as_datetime - datetime.fromisoformat(call[1] + 'Z')
+        print(time_diff)

        transcriptable_files.append(TranscriptableFile(
-            file_path = file,
+            file_path = str(Path(config_json["media_voice_folder"], str(call[0]), file)),
            real_date = file_stamp_as_datetime,
            milliseconds_from_start = int((time_diff.seconds * 1000) + (time_diff.microseconds / 1000)),
            user_snowflake = user_snowflake
@@ -60,6 +65,11 @@ for call in calls_needing_work[1]:
    # TODO Possibly RAM abusive solution to wanting to keep the original list around
    ffmpeg_files = copy.deepcopy(transcriptable_files)

+    for file in ffmpeg_files:
+        print(file.file_path)
+        print(file.real_date)
+        print(file.milliseconds_from_start)
+
    # TODO Error handling for all ffmpeg operations
    while len(ffmpeg_files) > MAX_FILES_PER_CYCLE:
        ffmpeg_files = [
@@ -84,9 +94,9 @@ for call in calls_needing_work[1]:
        "compare": "="
    }])

-    for file in os.listdir(Path(config_json["media_voice_folder"], call[0])):
+    for file in os.listdir(Path(config_json["media_voice_folder"], str(call[0]))):
        if file.startswith("intermediate"):
-            os.remove(Path(config_json["media_voice_folder"], call[0], file))
+            os.remove(Path(config_json["media_voice_folder"], str(call[0]), file))

    for file in transcriptable_files:
        text = transcriber(file.file_path)