Fixes to get voice working properly, not fully confident it'll work under pressure but it's done well in small tests
This commit is contained in:
@@ -2,7 +2,7 @@ import json
|
||||
import os
|
||||
import copy
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
from breadbot_common import SQLite, MySQL, TranscriptableFile, mix_audio_with_ffmpeg
|
||||
from txtai.pipeline import Transcription
|
||||
|
||||
@@ -36,20 +36,25 @@ transcriber = Transcription("openai/whisper-base")
|
||||
for call in calls_needing_work[1]:
|
||||
all_files = os.listdir(Path(
|
||||
config_json["media_voice_folder"],
|
||||
call[0]
|
||||
str(call[0])
|
||||
))
|
||||
|
||||
transcriptable_files = []
|
||||
|
||||
for file in all_files:
|
||||
print(file)
|
||||
file_name_no_extension = file.split('.')[0]
|
||||
timestamp = int(file_name_no_extension.split('-')[0])
|
||||
user_snowflake = file_name_no_extension.split('-')[1]
|
||||
file_stamp_as_datetime = datetime.fromtimestamp(timestamp / 1000)
|
||||
time_diff = file_stamp_as_datetime - call[1]
|
||||
file_stamp_as_datetime = datetime.fromtimestamp(timestamp / 1000, timezone.utc)
|
||||
print(file_stamp_as_datetime)
|
||||
print(type(call[1]))
|
||||
print(call[1])
|
||||
time_diff = file_stamp_as_datetime - datetime.fromisoformat(call[1] + 'Z')
|
||||
print(time_diff)
|
||||
|
||||
transcriptable_files.append(TranscriptableFile(
|
||||
file_path = file,
|
||||
file_path = str(Path(config_json["media_voice_folder"], str(call[0]), file)),
|
||||
real_date = file_stamp_as_datetime,
|
||||
milliseconds_from_start = int((time_diff.seconds * 1000) + (time_diff.microseconds / 1000)),
|
||||
user_snowflake = user_snowflake
|
||||
@@ -60,6 +65,11 @@ for call in calls_needing_work[1]:
|
||||
# TODO Possibly RAM abusive solution to wanting to keep the original list around
|
||||
ffmpeg_files = copy.deepcopy(transcriptable_files)
|
||||
|
||||
for file in ffmpeg_files:
|
||||
print(file.file_path)
|
||||
print(file.real_date)
|
||||
print(file.milliseconds_from_start)
|
||||
|
||||
# TODO Error handling for all ffmpeg operations
|
||||
while len(ffmpeg_files) > MAX_FILES_PER_CYCLE:
|
||||
ffmpeg_files = [
|
||||
@@ -84,9 +94,9 @@ for call in calls_needing_work[1]:
|
||||
"compare": "="
|
||||
}])
|
||||
|
||||
for file in os.listdir(Path(config_json["media_voice_folder"], call[0])):
|
||||
for file in os.listdir(Path(config_json["media_voice_folder"], str(call[0]))):
|
||||
if file.startswith("intermediate"):
|
||||
os.remove(Path(config_json["media_voice_folder"], call[0], file))
|
||||
os.remove(Path(config_json["media_voice_folder"], str(call[0]), file))
|
||||
|
||||
for file in transcriptable_files:
|
||||
text = transcriber(file.file_path)
|
||||
|
||||
Reference in New Issue
Block a user