From 30d3ef60049cddfb2af75b6e44f786d253dbaddd Mon Sep 17 00:00:00 2001 From: Nitwel Date: Fri, 15 Mar 2024 00:13:05 +0100 Subject: [PATCH] add initial work for voice assistant --- content/main.tscn | 5 +- content/system/assist/assist.gd | 50 +++++++++++++++ content/system/assist/assist.tscn | 17 +++++ default_bus_layout.tres | 3 + export_presets.cfg | 4 +- lib/globals/house_body.gd | 2 +- lib/home_apis/hass_ws/handlers/assist.gd | 70 +++++++++++++++++++++ lib/home_apis/hass_ws/hass.gd | 15 ++++- lib/utils/sample_hold.gd | 11 ++++ lib/utils/sample_hold.tscn | 8 +++ project.godot | 4 ++ test/lib/utils/sample_hold/sample_hold.gd | 35 +++++++++++ test/lib/utils/sample_hold/sample_hold.tscn | 3 + 13 files changed, 222 insertions(+), 5 deletions(-) create mode 100644 content/system/assist/assist.gd create mode 100644 content/system/assist/assist.tscn create mode 100644 default_bus_layout.tres create mode 100644 lib/home_apis/hass_ws/handlers/assist.gd create mode 100644 lib/utils/sample_hold.gd create mode 100644 lib/utils/sample_hold.tscn create mode 100644 test/lib/utils/sample_hold/sample_hold.gd create mode 100644 test/lib/utils/sample_hold/sample_hold.tscn diff --git a/content/main.tscn b/content/main.tscn index 421b116..b852603 100644 --- a/content/main.tscn +++ b/content/main.tscn @@ -1,4 +1,4 @@ -[gd_scene load_steps=16 format=3 uid="uid://eecv28y6jxk4"] +[gd_scene load_steps=17 format=3 uid="uid://eecv28y6jxk4"] [ext_resource type="PackedScene" uid="uid://clc5dre31iskm" path="res://addons/godot-xr-tools/xr/start_xr.tscn" id="1_i4c04"] [ext_resource type="Script" path="res://content/main.gd" id="1_uvrd4"] @@ -11,6 +11,7 @@ [ext_resource type="PackedScene" uid="uid://c3kdssrmv84kv" path="res://content/ui/menu/menu.tscn" id="8_du83w"] [ext_resource type="PackedScene" uid="uid://lrehk38exd5n" path="res://content/system/keyboard/keyboard.tscn" id="9_e5n3p"] [ext_resource type="PackedScene" uid="uid://cbemihbxkd4ll" path="res://content/system/house/house.tscn" id="9_np6mw"] +[ext_resource type="PackedScene" uid="uid://oydbwnek6xb4" path="res://content/system/assist/assist.tscn" id="12_8av8q"] [sub_resource type="Sky" id="Sky_vhymk"] sky_material = ExtResource("5_wgwf8") @@ -83,4 +84,6 @@ transform = Transform3D(0.499999, -0.000139169, -6.50204e-05, 5.24307e-05, 0.353 [node name="House" parent="." instance=ExtResource("9_np6mw")] +[node name="Assist" parent="." instance=ExtResource("12_8av8q")] + [editable path="XROrigin3D/XRControllerLeft"] diff --git a/content/system/assist/assist.gd b/content/system/assist/assist.gd new file mode 100644 index 0000000..e277c3d --- /dev/null +++ b/content/system/assist/assist.gd @@ -0,0 +1,50 @@ +extends Node3D + +const sample_hold = preload ("res://lib/utils/sample_hold.gd") + +const audio_freq = 44100 +const target_freq = 16000 +const sample_rate_ratio: float = audio_freq / target_freq * 1.5 + +var effect: AudioEffectCapture +@export var input_threshold: float = 0.05 +@onready var audio_recorder: AudioStreamPlayer = $AudioStreamRecord +@onready var timer: Timer = $Timer + +func _ready(): + var index = AudioServer.get_bus_index("Record") + effect = AudioServer.get_bus_effect(index, 0) + + timer.timeout.connect(func(): + HomeApi.api.assist_handler.send_data(PackedByteArray()) + ) + +func _process(_delta): + var sterioData: PackedVector2Array = effect.get_buffer(effect.get_frames_available()) + + if sterioData.size() == 0: + return + + var monoSampled := sample_hold.sample_and_hold(sterioData, sample_rate_ratio) + + # 16 bit PCM + var data := PackedByteArray() + data.resize(monoSampled.size() * 2) + + var max_amplitude = 0.0 + + for i in range(monoSampled.size()): + + var value = monoSampled[i] + max_amplitude = max(max_amplitude, value) + + data.encode_s16(i * 2, int(value * 32767)) + + if max_amplitude > input_threshold: + if timer.is_stopped(): + HomeApi.api.assist_handler.start_wakeword() + + timer.start() + + if timer.is_stopped() == false: + HomeApi.api.assist_handler.send_data(data) \ No newline at end of file diff --git a/content/system/assist/assist.tscn b/content/system/assist/assist.tscn new file mode 100644 index 0000000..86b284b --- /dev/null +++ b/content/system/assist/assist.tscn @@ -0,0 +1,17 @@ +[gd_scene load_steps=3 format=3 uid="uid://oydbwnek6xb4"] + +[ext_resource type="Script" path="res://content/system/assist/assist.gd" id="1_5obhy"] + +[sub_resource type="AudioStreamMicrophone" id="AudioStreamMicrophone_6tv2x"] + +[node name="Assist" type="Node3D"] +script = ExtResource("1_5obhy") + +[node name="AudioStreamRecord" type="AudioStreamPlayer" parent="."] +stream = SubResource("AudioStreamMicrophone_6tv2x") +autoplay = true +bus = &"Record" + +[node name="Timer" type="Timer" parent="."] +wait_time = 2.0 +one_shot = true diff --git a/default_bus_layout.tres b/default_bus_layout.tres new file mode 100644 index 0000000..7386b28 --- /dev/null +++ b/default_bus_layout.tres @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb9d247646174775b00db7902c224ac62f734b3a6467af32919d12d2a6861c38 +size 555 diff --git a/export_presets.cfg b/export_presets.cfg index 1ef519e..76601c7 100644 --- a/export_presets.cfg +++ b/export_presets.cfg @@ -155,7 +155,7 @@ permissions/receive_boot_completed=false permissions/receive_mms=false permissions/receive_sms=false permissions/receive_wap_push=false -permissions/record_audio=false +permissions/record_audio=true permissions/reorder_tasks=false permissions/restart_packages=false permissions/send_respond_via_message=false @@ -377,7 +377,7 @@ permissions/receive_boot_completed=false permissions/receive_mms=false permissions/receive_sms=false permissions/receive_wap_push=false -permissions/record_audio=false +permissions/record_audio=true permissions/reorder_tasks=false permissions/restart_packages=false permissions/send_respond_via_message=false diff --git a/lib/globals/house_body.gd b/lib/globals/house_body.gd index c10f30a..4b0b520 100644 --- a/lib/globals/house_body.gd +++ b/lib/globals/house_body.gd @@ -1,3 +1,3 @@ extends Node -@onready var body = get_node("/root/Main/House") \ No newline at end of file +@onready var body = get_node_or_null("/root/Main/House") \ No newline at end of file diff --git a/lib/home_apis/hass_ws/handlers/assist.gd b/lib/home_apis/hass_ws/handlers/assist.gd new file mode 100644 index 0000000..d0601ee --- /dev/null +++ b/lib/home_apis/hass_ws/handlers/assist.gd @@ -0,0 +1,70 @@ +const HASS_API = preload ("../hass.gd") + +var api: HASS_API +var pipe_running := false +var handler_id := 0 + +func _init(hass: HASS_API): + self.api = hass + +func on_connect(): + pass + +func start_wakeword(): + if pipe_running: + return + + print("wake start") + + api.send_packet({ + "type": "assist_pipeline/run", + "start_stage": "wake_word", + "end_stage": "intent", + "input": { + "timeout": 5, + "sample_rate": 16000 + }, + "timeout": 60 + }, true) + +func send_data(data: PackedByteArray): + + # prepend the handler id to the data in 8 bits + if pipe_running: + var stream = PackedByteArray() + + stream.resize(1) + stream.encode_s8(0, handler_id) + stream.append_array(data) + + print("sending data") + + api.send_raw(stream) + +func handle_message(message: Dictionary): + if message["type"] != "event": + return + + var event = message["event"] + + if event.has("type") == false: + return + + print(event["type"]) + + match event["type"]: + "run-start": + print("Pipeline started") + pipe_running = true + handler_id = event["data"]["runner_data"]["stt_binary_handler_id"] + "run-end": + pipe_running = false + handler_id = 0 + "wake_word-start": + # handle trigger message + pass + "wake_word-end": + # handle trigger message + pass + _: + pass diff --git a/lib/home_apis/hass_ws/hass.gd b/lib/home_apis/hass_ws/hass.gd index ceca25e..91907e9 100644 --- a/lib/home_apis/hass_ws/hass.gd +++ b/lib/home_apis/hass_ws/hass.gd @@ -2,6 +2,7 @@ extends Node const AuthHandler = preload ("./handlers/auth.gd") const IntegrationHandler = preload ("./handlers/integration.gd") +const AssistHandler = preload ("./handlers/assist.gd") signal on_connect() signal on_disconnect() @@ -25,6 +26,7 @@ var packet_callbacks := CallbackMap.new() var auth_handler: AuthHandler var integration_handler: IntegrationHandler +var assist_handler: AssistHandler func _init(url:=self.url, token:=self.token): self.url = url @@ -32,6 +34,7 @@ func _init(url:=self.url, token:=self.token): auth_handler = AuthHandler.new(self, url, token) integration_handler = IntegrationHandler.new(self) + assist_handler = AssistHandler.new(self) devices_template = devices_template.replace("\n", " ").replace("\t", "").replace("\r", " ") connect_ws() @@ -82,6 +85,7 @@ func handle_packet(packet: Dictionary): if LOG_MESSAGES: print("Received packet: %s" % str(packet).substr(0, 1000)) auth_handler.handle_message(packet) + assist_handler.handle_message(packet) if packet.has("id"): packet_callbacks.call_key(int(packet.id), [packet]) @@ -117,6 +121,7 @@ func start_subscriptions(): func handle_connect(): integration_handler.on_connect() + assist_handler.on_connect() connected = true on_connect.emit() @@ -176,7 +181,15 @@ func send_request_packet(packet: Dictionary, ignore_initial:=false): return await promise.settled -func send_packet(packet: Dictionary): +func send_raw(packet: PackedByteArray): + if LOG_MESSAGES: print("Sending binary: %s" % packet.hex_encode()) + socket.send(packet) + +func send_packet(packet: Dictionary, with_id:=false): + if with_id: + packet.id = id + id += 1 + if LOG_MESSAGES: print("Sending packet: %s" % encode_packet(packet)) socket.send_text(encode_packet(packet)) diff --git a/lib/utils/sample_hold.gd b/lib/utils/sample_hold.gd new file mode 100644 index 0000000..97f25c5 --- /dev/null +++ b/lib/utils/sample_hold.gd @@ -0,0 +1,11 @@ +static func sample_and_hold(data: PackedVector2Array, sample_rate: float) -> PackedFloat32Array: + var new_data: PackedFloat32Array = PackedFloat32Array() + new_data.resize(int(data.size() / sample_rate)) + + var counter = 0.0 + + for i in range(new_data.size()): + new_data[i] = data[int(counter)].y + counter += sample_rate + + return new_data \ No newline at end of file diff --git a/lib/utils/sample_hold.tscn b/lib/utils/sample_hold.tscn new file mode 100644 index 0000000..9cc2184 --- /dev/null +++ b/lib/utils/sample_hold.tscn @@ -0,0 +1,8 @@ +[gd_scene load_steps=2 format=3 uid="uid://b4l22m7bxamsc"] + +[ext_resource type="Script" path="res://test/lib/utils/sample_hold/sample_hold.gd" id="1_t0y35"] + +[node name="Node2D" type="Node2D"] +script = ExtResource("1_t0y35") + +[node name="CanvasLayer" type="CanvasLayer" parent="."] diff --git a/project.godot b/project.godot index 0ca5bc8..ef8aca1 100644 --- a/project.godot +++ b/project.godot @@ -15,6 +15,10 @@ run/main_scene="res://content/main.tscn" config/features=PackedStringArray("4.2", "Mobile") config/icon="res://assets/logo.png" +[audio] + +driver/enable_input=true + [autoload] XRToolsUserSettings="*res://addons/godot-xr-tools/user_settings/user_settings.gd" diff --git a/test/lib/utils/sample_hold/sample_hold.gd b/test/lib/utils/sample_hold/sample_hold.gd new file mode 100644 index 0000000..f8be129 --- /dev/null +++ b/test/lib/utils/sample_hold/sample_hold.gd @@ -0,0 +1,35 @@ +@tool +extends Node2D + +const sample_hold = preload ("res://lib/utils/sample_hold.gd") + +var data = PackedVector2Array() +var result: PackedFloat32Array + +func _ready(): + print("test") + for i in range(0, 44100): + var value = sin(i * 2 * PI / 44100.0) + data.push_back(Vector2(value, value)) + + result = sample_hold.sample_and_hold(data, 44100.0 / 16000.0 * 1.5) + +func _draw(): + var size = get_viewport().get_visible_rect().size + size.x *= 10 + size.y *= 4 + var center = size / 2 + + draw_line(Vector2(0, size.y / 2), Vector2(size.x, size.y / 2), Color(1, 1, 1)) + + for i in range(0, data.size()): + var value = data[i] + var x = i * (size.x / data.size()) + + draw_line(Vector2(x, 0), Vector2(x, value.x * center.y), Color(1, 0, 0)) + + for i in range(0, result.size()): + var value = result[i] + var x = i * (size.x / result.size()) + + draw_line(Vector2(x, 0), Vector2(x, value * center.y), Color(0, 1, 0)) \ No newline at end of file diff --git a/test/lib/utils/sample_hold/sample_hold.tscn b/test/lib/utils/sample_hold/sample_hold.tscn new file mode 100644 index 0000000..cba1c37 --- /dev/null +++ b/test/lib/utils/sample_hold/sample_hold.tscn @@ -0,0 +1,3 @@ +[gd_scene format=3 uid="uid://bpy811vonnq2u"] + +[node name="Node2D" type="Node2D"]