immersive-home/app/content/system/assist/assist.gd

129 lines
3.1 KiB
GDScript3
Raw Normal View History

2024-03-15 01:13:05 +02:00
extends Node3D
2024-03-17 18:05:45 +02:00
const VoiceAssistant = preload ("res://lib/home_apis/voice_handler.gd")
2024-03-15 01:13:05 +02:00
const sample_hold = preload ("res://lib/utils/sample_hold.gd")
2024-03-15 19:27:03 +02:00
const Chat = preload ("./chat.gd")
2024-03-15 01:13:05 +02:00
const audio_freq = 44100
const target_freq = 16000
const sample_rate_ratio: float = audio_freq / target_freq * 1.5
var effect: AudioEffectCapture
@export var input_threshold: float = 0.05
2024-03-15 01:13:05 +02:00
@onready var audio_recorder: AudioStreamPlayer = $AudioStreamRecord
2024-03-15 19:27:03 +02:00
@onready var audio_timer: Timer = $AudioTimer
@onready var visual_timer: Timer = $VisualTimer
@onready var audio_player_3d: AudioStreamPlayer3D = $AudioStreamPlayer3D
@onready var chat_user: Chat = $ChatUser
@onready var chat_assistant: Chat = $ChatAssistant
@onready var loader: Node3D = $Loader
@onready var camera = $"/root/Main/XROrigin3D/XRCamera3D"
var running := false
2024-03-17 18:05:45 +02:00
var voice_assistant: VoiceAssistant
2024-03-15 01:13:05 +02:00
func _ready():
var index = AudioServer.get_bus_index("Record")
effect = AudioServer.get_bus_effect(index, 0)
2024-04-25 17:05:52 +03:00
chat_assistant.visible = false
chat_user.visible = false
loader.visible = false
2024-03-17 18:05:45 +02:00
if !HomeApi.has_connected():
await HomeApi.on_connect
voice_assistant = HomeApi.get_voice_assistant()
if voice_assistant == null:
return
2024-03-15 19:27:03 +02:00
finish()
2024-03-21 11:51:58 +02:00
chat_assistant.flip = true
2024-03-15 19:27:03 +02:00
audio_timer.timeout.connect(func():
2024-03-17 18:05:45 +02:00
voice_assistant.send_data(PackedByteArray())
2024-03-15 01:13:05 +02:00
)
2024-03-17 18:05:45 +02:00
voice_assistant.on_wake_word.connect(func(_text):
2024-03-15 19:27:03 +02:00
loader.visible=true
chat_user.visible=false
chat_assistant.visible=false
global_position=camera.global_position + camera.global_transform.basis.z * - 0.5
global_position.y *= 0.7
global_transform.basis=Basis.looking_at((camera.global_position - global_position) * - 1)
running=true
)
2024-03-17 18:05:45 +02:00
voice_assistant.on_stt_message.connect(func(text):
2024-03-15 19:27:03 +02:00
loader.visible=false
chat_user.visible=true
chat_user.text=text
)
2024-03-17 18:05:45 +02:00
voice_assistant.on_tts_message.connect(func(text):
2024-03-15 19:27:03 +02:00
chat_assistant.visible=true
chat_assistant.text=text
)
2024-03-17 18:05:45 +02:00
voice_assistant.on_tts_sound.connect(func(audio):
2024-03-15 19:27:03 +02:00
audio_player_3d.stream=audio
audio_player_3d.play()
visual_timer.start()
running=false
)
2024-03-17 18:05:45 +02:00
voice_assistant.on_error.connect(func():
running=false
finish()
)
2024-03-15 19:27:03 +02:00
visual_timer.timeout.connect(func():
if audio_player_3d.playing == false:
finish()
else:
await audio_player_3d.finished
finish()
)
func finish():
if running:
return
chat_user.visible = false
chat_assistant.visible = false
loader.visible = false
2024-03-15 01:13:05 +02:00
func _process(_delta):
2024-04-25 17:05:52 +03:00
if voice_assistant == null:
return
2024-03-15 01:13:05 +02:00
var sterioData: PackedVector2Array = effect.get_buffer(effect.get_frames_available())
if sterioData.size() == 0:
return
var monoSampled := sample_hold.sample_and_hold(sterioData, sample_rate_ratio)
# 16 bit PCM
var data := PackedByteArray()
data.resize(monoSampled.size() * 2)
var max_amplitude = 0.0
for i in range(monoSampled.size()):
var value = monoSampled[i]
max_amplitude = max(max_amplitude, value)
data.encode_s16(i * 2, int(value * 32767))
if max_amplitude > input_threshold:
2024-03-15 19:27:03 +02:00
if audio_timer.is_stopped():
2024-03-17 18:05:45 +02:00
voice_assistant.start_wakeword()
2024-03-15 01:13:05 +02:00
2024-03-15 19:27:03 +02:00
audio_timer.start()
2024-03-15 01:13:05 +02:00
2024-03-15 19:27:03 +02:00
if audio_timer.is_stopped() == false:
2024-03-17 18:05:45 +02:00
voice_assistant.send_data(data)