add initial work for voice assistant

This commit is contained in:
Nitwel 2024-03-15 00:13:05 +01:00
parent 7278b68437
commit 30d3ef6004
13 changed files with 222 additions and 5 deletions

View File

@ -1,4 +1,4 @@
[gd_scene load_steps=16 format=3 uid="uid://eecv28y6jxk4"]
[gd_scene load_steps=17 format=3 uid="uid://eecv28y6jxk4"]
[ext_resource type="PackedScene" uid="uid://clc5dre31iskm" path="res://addons/godot-xr-tools/xr/start_xr.tscn" id="1_i4c04"]
[ext_resource type="Script" path="res://content/main.gd" id="1_uvrd4"]
@ -11,6 +11,7 @@
[ext_resource type="PackedScene" uid="uid://c3kdssrmv84kv" path="res://content/ui/menu/menu.tscn" id="8_du83w"]
[ext_resource type="PackedScene" uid="uid://lrehk38exd5n" path="res://content/system/keyboard/keyboard.tscn" id="9_e5n3p"]
[ext_resource type="PackedScene" uid="uid://cbemihbxkd4ll" path="res://content/system/house/house.tscn" id="9_np6mw"]
[ext_resource type="PackedScene" uid="uid://oydbwnek6xb4" path="res://content/system/assist/assist.tscn" id="12_8av8q"]
[sub_resource type="Sky" id="Sky_vhymk"]
sky_material = ExtResource("5_wgwf8")
@ -83,4 +84,6 @@ transform = Transform3D(0.499999, -0.000139169, -6.50204e-05, 5.24307e-05, 0.353
[node name="House" parent="." instance=ExtResource("9_np6mw")]
[node name="Assist" parent="." instance=ExtResource("12_8av8q")]
[editable path="XROrigin3D/XRControllerLeft"]

View File

@ -0,0 +1,50 @@
extends Node3D
const sample_hold = preload ("res://lib/utils/sample_hold.gd")
const audio_freq = 44100
const target_freq = 16000
const sample_rate_ratio: float = audio_freq / target_freq * 1.5
var effect: AudioEffectCapture
@export var input_threshold: float = 0.05
@onready var audio_recorder: AudioStreamPlayer = $AudioStreamRecord
@onready var timer: Timer = $Timer
func _ready():
var index = AudioServer.get_bus_index("Record")
effect = AudioServer.get_bus_effect(index, 0)
timer.timeout.connect(func():
HomeApi.api.assist_handler.send_data(PackedByteArray())
)
func _process(_delta):
var sterioData: PackedVector2Array = effect.get_buffer(effect.get_frames_available())
if sterioData.size() == 0:
return
var monoSampled := sample_hold.sample_and_hold(sterioData, sample_rate_ratio)
# 16 bit PCM
var data := PackedByteArray()
data.resize(monoSampled.size() * 2)
var max_amplitude = 0.0
for i in range(monoSampled.size()):
var value = monoSampled[i]
max_amplitude = max(max_amplitude, value)
data.encode_s16(i * 2, int(value * 32767))
if max_amplitude > input_threshold:
if timer.is_stopped():
HomeApi.api.assist_handler.start_wakeword()
timer.start()
if timer.is_stopped() == false:
HomeApi.api.assist_handler.send_data(data)

View File

@ -0,0 +1,17 @@
[gd_scene load_steps=3 format=3 uid="uid://oydbwnek6xb4"]
[ext_resource type="Script" path="res://content/system/assist/assist.gd" id="1_5obhy"]
[sub_resource type="AudioStreamMicrophone" id="AudioStreamMicrophone_6tv2x"]
[node name="Assist" type="Node3D"]
script = ExtResource("1_5obhy")
[node name="AudioStreamRecord" type="AudioStreamPlayer" parent="."]
stream = SubResource("AudioStreamMicrophone_6tv2x")
autoplay = true
bus = &"Record"
[node name="Timer" type="Timer" parent="."]
wait_time = 2.0
one_shot = true

3
default_bus_layout.tres Normal file
View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:fb9d247646174775b00db7902c224ac62f734b3a6467af32919d12d2a6861c38
size 555

View File

@ -155,7 +155,7 @@ permissions/receive_boot_completed=false
permissions/receive_mms=false
permissions/receive_sms=false
permissions/receive_wap_push=false
permissions/record_audio=false
permissions/record_audio=true
permissions/reorder_tasks=false
permissions/restart_packages=false
permissions/send_respond_via_message=false
@ -377,7 +377,7 @@ permissions/receive_boot_completed=false
permissions/receive_mms=false
permissions/receive_sms=false
permissions/receive_wap_push=false
permissions/record_audio=false
permissions/record_audio=true
permissions/reorder_tasks=false
permissions/restart_packages=false
permissions/send_respond_via_message=false

View File

@ -1,3 +1,3 @@
extends Node
@onready var body = get_node("/root/Main/House")
@onready var body = get_node_or_null("/root/Main/House")

View File

@ -0,0 +1,70 @@
const HASS_API = preload ("../hass.gd")
var api: HASS_API
var pipe_running := false
var handler_id := 0
func _init(hass: HASS_API):
self.api = hass
func on_connect():
pass
func start_wakeword():
if pipe_running:
return
print("wake start")
api.send_packet({
"type": "assist_pipeline/run",
"start_stage": "wake_word",
"end_stage": "intent",
"input": {
"timeout": 5,
"sample_rate": 16000
},
"timeout": 60
}, true)
func send_data(data: PackedByteArray):
# prepend the handler id to the data in 8 bits
if pipe_running:
var stream = PackedByteArray()
stream.resize(1)
stream.encode_s8(0, handler_id)
stream.append_array(data)
print("sending data")
api.send_raw(stream)
func handle_message(message: Dictionary):
if message["type"] != "event":
return
var event = message["event"]
if event.has("type") == false:
return
print(event["type"])
match event["type"]:
"run-start":
print("Pipeline started")
pipe_running = true
handler_id = event["data"]["runner_data"]["stt_binary_handler_id"]
"run-end":
pipe_running = false
handler_id = 0
"wake_word-start":
# handle trigger message
pass
"wake_word-end":
# handle trigger message
pass
_:
pass

View File

@ -2,6 +2,7 @@ extends Node
const AuthHandler = preload ("./handlers/auth.gd")
const IntegrationHandler = preload ("./handlers/integration.gd")
const AssistHandler = preload ("./handlers/assist.gd")
signal on_connect()
signal on_disconnect()
@ -25,6 +26,7 @@ var packet_callbacks := CallbackMap.new()
var auth_handler: AuthHandler
var integration_handler: IntegrationHandler
var assist_handler: AssistHandler
func _init(url:=self.url, token:=self.token):
self.url = url
@ -32,6 +34,7 @@ func _init(url:=self.url, token:=self.token):
auth_handler = AuthHandler.new(self, url, token)
integration_handler = IntegrationHandler.new(self)
assist_handler = AssistHandler.new(self)
devices_template = devices_template.replace("\n", " ").replace("\t", "").replace("\r", " ")
connect_ws()
@ -82,6 +85,7 @@ func handle_packet(packet: Dictionary):
if LOG_MESSAGES: print("Received packet: %s" % str(packet).substr(0, 1000))
auth_handler.handle_message(packet)
assist_handler.handle_message(packet)
if packet.has("id"):
packet_callbacks.call_key(int(packet.id), [packet])
@ -117,6 +121,7 @@ func start_subscriptions():
func handle_connect():
integration_handler.on_connect()
assist_handler.on_connect()
connected = true
on_connect.emit()
@ -176,7 +181,15 @@ func send_request_packet(packet: Dictionary, ignore_initial:=false):
return await promise.settled
func send_packet(packet: Dictionary):
func send_raw(packet: PackedByteArray):
if LOG_MESSAGES: print("Sending binary: %s" % packet.hex_encode())
socket.send(packet)
func send_packet(packet: Dictionary, with_id:=false):
if with_id:
packet.id = id
id += 1
if LOG_MESSAGES: print("Sending packet: %s" % encode_packet(packet))
socket.send_text(encode_packet(packet))

11
lib/utils/sample_hold.gd Normal file
View File

@ -0,0 +1,11 @@
static func sample_and_hold(data: PackedVector2Array, sample_rate: float) -> PackedFloat32Array:
var new_data: PackedFloat32Array = PackedFloat32Array()
new_data.resize(int(data.size() / sample_rate))
var counter = 0.0
for i in range(new_data.size()):
new_data[i] = data[int(counter)].y
counter += sample_rate
return new_data

View File

@ -0,0 +1,8 @@
[gd_scene load_steps=2 format=3 uid="uid://b4l22m7bxamsc"]
[ext_resource type="Script" path="res://test/lib/utils/sample_hold/sample_hold.gd" id="1_t0y35"]
[node name="Node2D" type="Node2D"]
script = ExtResource("1_t0y35")
[node name="CanvasLayer" type="CanvasLayer" parent="."]

View File

@ -15,6 +15,10 @@ run/main_scene="res://content/main.tscn"
config/features=PackedStringArray("4.2", "Mobile")
config/icon="res://assets/logo.png"
[audio]
driver/enable_input=true
[autoload]
XRToolsUserSettings="*res://addons/godot-xr-tools/user_settings/user_settings.gd"

View File

@ -0,0 +1,35 @@
@tool
extends Node2D
const sample_hold = preload ("res://lib/utils/sample_hold.gd")
var data = PackedVector2Array()
var result: PackedFloat32Array
func _ready():
print("test")
for i in range(0, 44100):
var value = sin(i * 2 * PI / 44100.0)
data.push_back(Vector2(value, value))
result = sample_hold.sample_and_hold(data, 44100.0 / 16000.0 * 1.5)
func _draw():
var size = get_viewport().get_visible_rect().size
size.x *= 10
size.y *= 4
var center = size / 2
draw_line(Vector2(0, size.y / 2), Vector2(size.x, size.y / 2), Color(1, 1, 1))
for i in range(0, data.size()):
var value = data[i]
var x = i * (size.x / data.size())
draw_line(Vector2(x, 0), Vector2(x, value.x * center.y), Color(1, 0, 0))
for i in range(0, result.size()):
var value = result[i]
var x = i * (size.x / result.size())
draw_line(Vector2(x, 0), Vector2(x, value * center.y), Color(0, 1, 0))

View File

@ -0,0 +1,3 @@
[gd_scene format=3 uid="uid://bpy811vonnq2u"]
[node name="Node2D" type="Node2D"]