finish basic voice assistant
This commit is contained in:
parent
30d3ef6004
commit
aff66884ca
3
assets/chat_bubble.blend
Normal file
3
assets/chat_bubble.blend
Normal file
|
@ -0,0 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c5fa5f006a42d87f43a8b411e4e4bf64a8b4fbbaedd0d02579134a8fa59161eb
|
||||
size 894176
|
BIN
assets/chat_bubble.blend1
Normal file
BIN
assets/chat_bubble.blend1
Normal file
Binary file not shown.
3
assets/models/chat_bubble/chat_bubble.glb
Normal file
3
assets/models/chat_bubble/chat_bubble.glb
Normal file
|
@ -0,0 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f215158ae0aba0113e0077040342fc1b508cfec0a3a1e022c9ce0c16973e2ee1
|
||||
size 17828
|
34
assets/models/chat_bubble/chat_bubble.glb.import
Normal file
34
assets/models/chat_bubble/chat_bubble.glb.import
Normal file
|
@ -0,0 +1,34 @@
|
|||
[remap]
|
||||
|
||||
importer="scene"
|
||||
importer_version=1
|
||||
type="PackedScene"
|
||||
uid="uid://b12raorbby1xd"
|
||||
path="res://.godot/imported/chat_bubble.glb-03622c64b96f5698360bcfb8a4904483.scn"
|
||||
|
||||
[deps]
|
||||
|
||||
source_file="res://assets/models/chat_bubble/chat_bubble.glb"
|
||||
dest_files=["res://.godot/imported/chat_bubble.glb-03622c64b96f5698360bcfb8a4904483.scn"]
|
||||
|
||||
[params]
|
||||
|
||||
nodes/root_type=""
|
||||
nodes/root_name=""
|
||||
nodes/apply_root_scale=true
|
||||
nodes/root_scale=1.0
|
||||
meshes/ensure_tangents=true
|
||||
meshes/generate_lods=true
|
||||
meshes/create_shadow_meshes=true
|
||||
meshes/light_baking=1
|
||||
meshes/lightmap_texel_size=0.2
|
||||
meshes/force_disable_compression=false
|
||||
skins/use_named_skins=true
|
||||
animation/import=true
|
||||
animation/fps=30
|
||||
animation/trimming=false
|
||||
animation/remove_immutable_tracks=true
|
||||
import_script/path=""
|
||||
_subresources={}
|
||||
gltf/naming_version=1
|
||||
gltf/embedded_image_handling=1
|
|
@ -85,5 +85,6 @@ transform = Transform3D(0.499999, -0.000139169, -6.50204e-05, 5.24307e-05, 0.353
|
|||
[node name="House" parent="." instance=ExtResource("9_np6mw")]
|
||||
|
||||
[node name="Assist" parent="." instance=ExtResource("12_8av8q")]
|
||||
transform = Transform3D(1, -1.39636e-11, 0, 9.47986e-12, 1, 0, 0, 0, 1, 0.000231838, -4.01369e-06, -0.855612)
|
||||
|
||||
[editable path="XROrigin3D/XRControllerLeft"]
|
||||
|
|
|
@ -1,24 +1,79 @@
|
|||
extends Node3D
|
||||
|
||||
const sample_hold = preload ("res://lib/utils/sample_hold.gd")
|
||||
const Chat = preload ("./chat.gd")
|
||||
|
||||
const audio_freq = 44100
|
||||
const target_freq = 16000
|
||||
const sample_rate_ratio: float = audio_freq / target_freq * 1.5
|
||||
|
||||
var effect: AudioEffectCapture
|
||||
@export var input_threshold: float = 0.05
|
||||
@export var input_threshold: float = 0.1
|
||||
@onready var audio_recorder: AudioStreamPlayer = $AudioStreamRecord
|
||||
@onready var timer: Timer = $Timer
|
||||
@onready var audio_timer: Timer = $AudioTimer
|
||||
@onready var visual_timer: Timer = $VisualTimer
|
||||
@onready var audio_player_3d: AudioStreamPlayer3D = $AudioStreamPlayer3D
|
||||
@onready var chat_user: Chat = $ChatUser
|
||||
@onready var chat_assistant: Chat = $ChatAssistant
|
||||
@onready var loader: Node3D = $Loader
|
||||
@onready var camera = $"/root/Main/XROrigin3D/XRCamera3D"
|
||||
|
||||
var running := true
|
||||
|
||||
func _ready():
|
||||
var index = AudioServer.get_bus_index("Record")
|
||||
effect = AudioServer.get_bus_effect(index, 0)
|
||||
|
||||
timer.timeout.connect(func():
|
||||
finish()
|
||||
|
||||
audio_timer.timeout.connect(func():
|
||||
HomeApi.api.assist_handler.send_data(PackedByteArray())
|
||||
)
|
||||
|
||||
HomeApi.api.assist_handler.on_wake_word.connect(func(text):
|
||||
loader.visible=true
|
||||
chat_user.visible=false
|
||||
chat_assistant.visible=false
|
||||
global_position=camera.global_position + camera.global_transform.basis.z * - 0.5
|
||||
global_position.y *= 0.7
|
||||
global_transform.basis=Basis.looking_at((camera.global_position - global_position) * - 1)
|
||||
running=true
|
||||
)
|
||||
|
||||
HomeApi.api.assist_handler.on_stt_message.connect(func(text):
|
||||
loader.visible=false
|
||||
chat_user.visible=true
|
||||
chat_user.text=text
|
||||
)
|
||||
HomeApi.api.assist_handler.on_tts_message.connect(func(text):
|
||||
chat_assistant.visible=true
|
||||
chat_assistant.text=text
|
||||
)
|
||||
|
||||
HomeApi.api.assist_handler.on_tts_sound.connect(func(audio):
|
||||
print("Playing TTS ", audio.data.size())
|
||||
audio_player_3d.stream=audio
|
||||
audio_player_3d.play()
|
||||
visual_timer.start()
|
||||
running=false
|
||||
)
|
||||
|
||||
visual_timer.timeout.connect(func():
|
||||
if audio_player_3d.playing == false:
|
||||
finish()
|
||||
else:
|
||||
await audio_player_3d.finished
|
||||
finish()
|
||||
)
|
||||
|
||||
func finish():
|
||||
if running:
|
||||
return
|
||||
|
||||
chat_user.visible = false
|
||||
chat_assistant.visible = false
|
||||
loader.visible = false
|
||||
|
||||
func _process(_delta):
|
||||
var sterioData: PackedVector2Array = effect.get_buffer(effect.get_frames_available())
|
||||
|
||||
|
@ -41,10 +96,10 @@ func _process(_delta):
|
|||
data.encode_s16(i * 2, int(value * 32767))
|
||||
|
||||
if max_amplitude > input_threshold:
|
||||
if timer.is_stopped():
|
||||
if audio_timer.is_stopped():
|
||||
HomeApi.api.assist_handler.start_wakeword()
|
||||
|
||||
timer.start()
|
||||
audio_timer.start()
|
||||
|
||||
if timer.is_stopped() == false:
|
||||
if audio_timer.is_stopped() == false:
|
||||
HomeApi.api.assist_handler.send_data(data)
|
|
@ -1,6 +1,8 @@
|
|||
[gd_scene load_steps=3 format=3 uid="uid://oydbwnek6xb4"]
|
||||
[gd_scene load_steps=5 format=3 uid="uid://oydbwnek6xb4"]
|
||||
|
||||
[ext_resource type="Script" path="res://content/system/assist/assist.gd" id="1_5obhy"]
|
||||
[ext_resource type="PackedScene" uid="uid://cy6jklyde3pgo" path="res://content/system/assist/chat.tscn" id="2_laew1"]
|
||||
[ext_resource type="PackedScene" uid="uid://b0d1582vpkr8m" path="res://content/system/assist/loader.tscn" id="3_25iy1"]
|
||||
|
||||
[sub_resource type="AudioStreamMicrophone" id="AudioStreamMicrophone_6tv2x"]
|
||||
|
||||
|
@ -12,6 +14,22 @@ stream = SubResource("AudioStreamMicrophone_6tv2x")
|
|||
autoplay = true
|
||||
bus = &"Record"
|
||||
|
||||
[node name="Timer" type="Timer" parent="."]
|
||||
[node name="AudioTimer" type="Timer" parent="."]
|
||||
wait_time = 2.0
|
||||
one_shot = true
|
||||
|
||||
[node name="AudioStreamPlayer3D" type="AudioStreamPlayer3D" parent="."]
|
||||
|
||||
[node name="VisualTimer" type="Timer" parent="."]
|
||||
wait_time = 4.0
|
||||
one_shot = true
|
||||
|
||||
[node name="ChatUser" parent="." instance=ExtResource("2_laew1")]
|
||||
transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, -0.109997, 0.025, 0)
|
||||
flip = false
|
||||
|
||||
[node name="ChatAssistant" parent="." instance=ExtResource("2_laew1")]
|
||||
transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, -0.0499932, -0.025, 0)
|
||||
text = "Hello, World!"
|
||||
|
||||
[node name="Loader" parent="." instance=ExtResource("3_25iy1")]
|
||||
|
|
37
content/system/assist/chat.gd
Normal file
37
content/system/assist/chat.gd
Normal file
|
@ -0,0 +1,37 @@
|
|||
@tool
|
||||
extends Node3D
|
||||
|
||||
const FontTools = preload ("res://lib/utils/font_tools.gd")
|
||||
|
||||
@onready var label: Label3D = $Label3D
|
||||
@onready var chat: Skeleton3D = $chat_bubble/Armature/Skeleton3D
|
||||
@onready var model: MeshInstance3D = $chat_bubble/Armature/Skeleton3D/Cube
|
||||
|
||||
@export var text := "Hello, World!":
|
||||
set(value):
|
||||
if !is_node_ready(): await ready
|
||||
|
||||
text = value
|
||||
label.text = value
|
||||
update()
|
||||
|
||||
@export var flip: bool = false:
|
||||
set(value):
|
||||
if !is_node_ready(): await ready
|
||||
|
||||
flip = value
|
||||
model.rotation_degrees.x = -90 if value else 90
|
||||
|
||||
const base_width = 0.8 * 0.2
|
||||
|
||||
func update():
|
||||
var text_width = FontTools.get_font_size(label).x
|
||||
|
||||
var offset = (text_width - base_width) / 0.2
|
||||
|
||||
offset = max(0.0, offset)
|
||||
|
||||
if flip:
|
||||
offset = -offset
|
||||
|
||||
chat.set_bone_pose_position(1 if flip else 0, Vector3(0, offset, 0))
|
33
content/system/assist/chat.tscn
Normal file
33
content/system/assist/chat.tscn
Normal file
|
@ -0,0 +1,33 @@
|
|||
[gd_scene load_steps=5 format=3 uid="uid://cy6jklyde3pgo"]
|
||||
|
||||
[ext_resource type="PackedScene" uid="uid://b12raorbby1xd" path="res://assets/models/chat_bubble/chat_bubble.glb" id="1_lsdcs"]
|
||||
[ext_resource type="Script" path="res://content/system/assist/chat.gd" id="1_rbrak"]
|
||||
[ext_resource type="Material" uid="uid://bujy3egn1oqac" path="res://assets/materials/pri-500.material" id="2_ps3pl"]
|
||||
[ext_resource type="FontVariation" uid="uid://d2ofyimg5s65q" path="res://assets/fonts/ui_font_500.tres" id="4_gxfp3"]
|
||||
|
||||
[node name="Chat" type="Node3D"]
|
||||
transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 3.41237e-06, 0, 0)
|
||||
script = ExtResource("1_rbrak")
|
||||
text = "Hello World"
|
||||
flip = true
|
||||
|
||||
[node name="chat_bubble" parent="." instance=ExtResource("1_lsdcs")]
|
||||
transform = Transform3D(0.2, 0, 0, 0, 0.2, 0, 0, 0, 0.2, -0.0154175, 0, 0.0710473)
|
||||
|
||||
[node name="Armature" parent="chat_bubble" index="0"]
|
||||
transform = Transform3D(1, 0, 0, 0, 0, 1, 0, -1, 0, 0.5, 0, 0)
|
||||
|
||||
[node name="Cube" parent="chat_bubble/Armature/Skeleton3D" index="0"]
|
||||
transform = Transform3D(-4.37114e-08, -1, -4.37114e-08, 0, -4.37114e-08, 1, -1, 4.37114e-08, 1.91069e-15, 0, 0.35, 0)
|
||||
material_override = ExtResource("2_ps3pl")
|
||||
|
||||
[node name="Label3D" type="Label3D" parent="."]
|
||||
transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0.006)
|
||||
pixel_size = 0.001
|
||||
text = "Hello World"
|
||||
font = ExtResource("4_gxfp3")
|
||||
font_size = 20
|
||||
outline_size = 0
|
||||
horizontal_alignment = 0
|
||||
|
||||
[editable path="chat_bubble"]
|
42
content/system/assist/loader.gd
Normal file
42
content/system/assist/loader.gd
Normal file
|
@ -0,0 +1,42 @@
|
|||
@tool
|
||||
extends Node3D
|
||||
|
||||
const material: StandardMaterial3D = preload ("res://assets/materials/pri-500.material")
|
||||
var time: float = 0.0
|
||||
const DOT_COUNT = 8
|
||||
const RADIUS = 0.025
|
||||
|
||||
func _ready():
|
||||
generate_meshes()
|
||||
|
||||
func generate_meshes():
|
||||
for i in range(DOT_COUNT):
|
||||
var mesh := MeshInstance3D.new()
|
||||
mesh.mesh = CylinderMesh.new()
|
||||
mesh.mesh.top_radius = 0.005
|
||||
mesh.mesh.bottom_radius = 0.005
|
||||
mesh.mesh.height = 0.005
|
||||
mesh.material_override = material.duplicate()
|
||||
mesh.material_override.transparency = BaseMaterial3D.TRANSPARENCY_ALPHA
|
||||
|
||||
add_child(mesh)
|
||||
|
||||
mesh.position = Vector3(sin(i * PI / DOT_COUNT * 2), cos(i * PI / DOT_COUNT * 2), 0) * RADIUS
|
||||
mesh.rotation_degrees = Vector3(90, 0, 0)
|
||||
|
||||
func _process(delta):
|
||||
if !visible:
|
||||
return
|
||||
|
||||
time += delta
|
||||
|
||||
for i in range(get_child_count()):
|
||||
var mesh := get_child(i)
|
||||
|
||||
if mesh == null:
|
||||
return
|
||||
|
||||
mesh.material_override.albedo_color.a = saw_tooth(i / float(get_child_count()) + time)
|
||||
|
||||
func saw_tooth(x: float) -> float:
|
||||
return 1 - fmod(x, 1)
|
6
content/system/assist/loader.tscn
Normal file
6
content/system/assist/loader.tscn
Normal file
|
@ -0,0 +1,6 @@
|
|||
[gd_scene load_steps=2 format=3 uid="uid://b0d1582vpkr8m"]
|
||||
|
||||
[ext_resource type="Script" path="res://content/system/assist/loader.gd" id="1_3bi3s"]
|
||||
|
||||
[node name="Loader" type="Node3D"]
|
||||
script = ExtResource("1_3bi3s")
|
|
@ -1,5 +1,7 @@
|
|||
extends RefCounted
|
||||
|
||||
const FontTools = preload ("res://lib/utils/font_tools.gd")
|
||||
|
||||
var label: Label3D
|
||||
|
||||
var text: String = ""
|
||||
|
@ -73,14 +75,13 @@ func _calculate_caret_position(click_pos_x: float):
|
|||
return gap_offsets.size() - 1
|
||||
|
||||
func _calculate_text_gaps():
|
||||
var font = label.get_font()
|
||||
var offsets = [0.0]
|
||||
|
||||
for i in range(text.length()):
|
||||
var chars = text.substr(0, i + 1) # Can't use single chars because of kerning.
|
||||
var size = font.get_string_size(chars, HORIZONTAL_ALIGNMENT_CENTER, -1, label.font_size)
|
||||
var size = FontTools.get_font_size(label, chars)
|
||||
|
||||
offsets.append(size.x * label.pixel_size)
|
||||
offsets.append(size.x)
|
||||
|
||||
return offsets
|
||||
|
||||
|
|
|
@ -1,8 +1,36 @@
|
|||
const HASS_API = preload ("../hass.gd")
|
||||
|
||||
signal on_wake_word(wake_word: String)
|
||||
signal on_stt_message(message: String)
|
||||
signal on_tts_message(message: String)
|
||||
signal on_tts_sound(sound: AudioStreamMP3)
|
||||
|
||||
var api: HASS_API
|
||||
var pipe_running := false
|
||||
var handler_id := 0
|
||||
var wake_word = null:
|
||||
set(value):
|
||||
if value != wake_word&&value != null:
|
||||
on_wake_word.emit(value)
|
||||
wake_word = value
|
||||
|
||||
var stt_message = null:
|
||||
set(value):
|
||||
if value != stt_message&&value != null:
|
||||
on_stt_message.emit(value)
|
||||
stt_message = value
|
||||
|
||||
var tts_message = null:
|
||||
set(value):
|
||||
if value != tts_message&&value != null:
|
||||
on_tts_message.emit(value)
|
||||
tts_message = value
|
||||
|
||||
var tts_sound = null:
|
||||
set(value):
|
||||
if value != tts_sound&&value != null:
|
||||
on_tts_sound.emit(value)
|
||||
tts_sound = value
|
||||
|
||||
func _init(hass: HASS_API):
|
||||
self.api = hass
|
||||
|
@ -19,7 +47,7 @@ func start_wakeword():
|
|||
api.send_packet({
|
||||
"type": "assist_pipeline/run",
|
||||
"start_stage": "wake_word",
|
||||
"end_stage": "intent",
|
||||
"end_stage": "tts",
|
||||
"input": {
|
||||
"timeout": 5,
|
||||
"sample_rate": 16000
|
||||
|
@ -50,21 +78,59 @@ func handle_message(message: Dictionary):
|
|||
if event.has("type") == false:
|
||||
return
|
||||
|
||||
print(event["type"])
|
||||
print(message)
|
||||
|
||||
match event["type"]:
|
||||
"run-start":
|
||||
print("Pipeline started")
|
||||
pipe_running = true
|
||||
handler_id = event["data"]["runner_data"]["stt_binary_handler_id"]
|
||||
"wake_word-end":
|
||||
if pipe_running == false:
|
||||
return
|
||||
|
||||
if event["data"]["wake_word_output"].has("wake_word_phrase") == false:
|
||||
return
|
||||
|
||||
wake_word = event["data"]["wake_word_output"]["wake_word_phrase"]
|
||||
"stt-end":
|
||||
if pipe_running == false:
|
||||
return
|
||||
|
||||
if event["data"]["stt_output"].has("text") == false:
|
||||
return
|
||||
|
||||
stt_message = event["data"]["stt_output"]["text"]
|
||||
"intent-end":
|
||||
if pipe_running == false:
|
||||
return
|
||||
|
||||
tts_message = event["data"]["intent_output"]["response"]["speech"]["plain"]["speech"]
|
||||
"tts-end":
|
||||
if pipe_running == false:
|
||||
return
|
||||
|
||||
if event["data"]["tts_output"].has("url") == false:
|
||||
return
|
||||
|
||||
var headers = PackedStringArray(["Authorization: Bearer %s" % api.token, "Content-Type: application/json"])
|
||||
var url = "%s://%s%s" % ["https" if api.url.begins_with("wss") else "http", api.url.split("//")[1],event["data"]["tts_output"]["url"]]
|
||||
|
||||
Request.request(url, headers, HTTPClient.METHOD_GET)
|
||||
|
||||
var response = await Request.request_completed
|
||||
|
||||
if response[0] != HTTPRequest.RESULT_SUCCESS:
|
||||
return
|
||||
|
||||
var sound = AudioStreamMP3.new()
|
||||
sound.data = response[3]
|
||||
|
||||
tts_sound = sound
|
||||
|
||||
"run-end":
|
||||
pipe_running = false
|
||||
wake_word = null
|
||||
handler_id = 0
|
||||
"wake_word-start":
|
||||
# handle trigger message
|
||||
pass
|
||||
"wake_word-end":
|
||||
# handle trigger message
|
||||
pass
|
||||
_:
|
||||
pass
|
||||
|
|
9
lib/utils/font_tools.gd
Normal file
9
lib/utils/font_tools.gd
Normal file
|
@ -0,0 +1,9 @@
|
|||
static func get_font_size(label: Label3D, chars=null):
|
||||
var font = label.font
|
||||
|
||||
if font == null:
|
||||
return Vector2(0, 0)
|
||||
|
||||
var size = font.get_string_size(label.text if chars == null else chars, label.horizontal_alignment, label.width, label.font_size) * label.pixel_size
|
||||
|
||||
return size
|
Loading…
Reference in New Issue
Block a user