|
|
@@ -0,0 +1,130 @@ |
|
|
#!/usr/bin/env ruby |
|
|
|
|
|
require 'open3' |
|
|
require 'signal' |
|
|
require 'openai' |
|
|
|
|
|
# Define the silence threshold and duration for detection |
|
|
@silence_threshold = '-30dB' |
|
|
@silence_duration = 0.5 |
|
|
|
|
|
class AudioTranscriberApi |
|
|
def initialize(access_token:) |
|
|
@access_token = access_token |
|
|
@client = OpenAI::Client.new( |
|
|
access_token: @access_token, |
|
|
request_timeout: 20 |
|
|
) |
|
|
end |
|
|
|
|
|
def transcribe(audio_file) |
|
|
retries = 0 |
|
|
max_retries = 5 |
|
|
|
|
|
begin |
|
|
parameters = { |
|
|
model: "whisper-1", |
|
|
file: File.open(audio_file, "rb"), |
|
|
language: "en" |
|
|
} |
|
|
|
|
|
response = @client.audio.transcribe( |
|
|
parameters: parameters |
|
|
) |
|
|
response["text"] |
|
|
rescue StandardError => e |
|
|
puts "Error transcribing audio file: #{e.message}" |
|
|
if retries < max_retries |
|
|
retries += 1 |
|
|
puts "Retrying audio transcription for #{audio_file} (#{retries} times) after exponential backoff (#{2 ** retries} seconds)" |
|
|
sleep 2 ** retries |
|
|
else |
|
|
raise e |
|
|
end |
|
|
retry |
|
|
end |
|
|
end |
|
|
|
|
|
end |
|
|
|
|
|
def filename(file_index) |
|
|
"output_#{file_index.to_s.rjust(3, '0')}.wav" |
|
|
end |
|
|
|
|
|
def start_ffmpeg(file_index) |
|
|
file_name = filename(file_index) |
|
|
sox_cmd = [ |
|
|
'sox', |
|
|
'-t', 'coreaudio', |
|
|
'-d', # Use default audio input device |
|
|
'-c', '2', # Stereo channels |
|
|
'-r', '44100', # Sample rate |
|
|
'-b', '16', # Bit depth |
|
|
'-e', 'signed-integer', |
|
|
'-t', 'wav', # Output as WAV format for piping |
|
|
'-' |
|
|
] |
|
|
|
|
|
ffmpeg_cmd = [ |
|
|
'ffmpeg', |
|
|
'-f', 'wav', |
|
|
'-i', '-', # Input from sox via pipe |
|
|
'-af', "silencedetect=n=#{@silence_threshold}:d=#{@silence_duration}", |
|
|
'-c:a', 'pcm_s16le', |
|
|
'-y', # Overwrite output files |
|
|
file_name |
|
|
] |
|
|
|
|
|
# Start sox and pipe its output to ffmpeg |
|
|
sox_stdin, sox_stdout, sox_stderr, sox_wait_thr = Open3.popen3(*sox_cmd) |
|
|
stdin, stdout, stderr, ffmpeg_wait_thr = Open3.popen3(*ffmpeg_cmd) |
|
|
|
|
|
# Redirect sox output to ffmpeg input |
|
|
Thread.new do |
|
|
while (line = sox_stdout.gets) |
|
|
stdin.puts line |
|
|
end |
|
|
end |
|
|
|
|
|
# Return the threads and process handles |
|
|
return sox_stdin, stdout, stderr, sox_wait_thr, ffmpeg_wait_thr |
|
|
end |
|
|
|
|
|
def transcribe_audio(file_index) |
|
|
Thread.new do |
|
|
file_name = filename(file_index) |
|
|
transcriber = AudioTranscriberApi.new(access_token: ENV['OPENAI_API_KEY']) |
|
|
transcription = transcriber.transcribe(file_name) |
|
|
puts transcription |
|
|
File.delete(file_name) |
|
|
end |
|
|
end |
|
|
|
|
|
# Monitor for silence detection in stderr |
|
|
def monitor_for_silence |
|
|
Thread.new do |
|
|
@stderr.each do |line| |
|
|
if line.include?("silence_start") && !line.include?("silence_start: 0") |
|
|
# Gracefully interrupt the current ffmpeg process |
|
|
Process.kill("TERM", @sox_wait_thr.pid) if @sox_wait_thr.alive? |
|
|
Process.kill("TERM", @ffmpeg_wait_thr.pid) if @ffmpeg_wait_thr.alive? |
|
|
|
|
|
# Start a new recording |
|
|
transcribe_audio(@file_index) |
|
|
@file_index += 1 |
|
|
@stdin, @stdout, @stderr, @sox_wait_thr, @ffmpeg_wait_thr = start_ffmpeg(@file_index) |
|
|
|
|
|
monitor_for_silence |
|
|
break |
|
|
end |
|
|
end |
|
|
end |
|
|
end |
|
|
|
|
|
# Start the initial recording |
|
|
@file_index = 1 |
|
|
@stdin, @stdout, @stderr, @sox_wait_thr, @ffmpeg_wait_thr = start_ffmpeg(@file_index) |
|
|
monitor_for_silence |
|
|
|
|
|
# Keep the main thread alive to handle signals |
|
|
sleep |