/* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under the license found in the * LICENSE file in the root directory of this source tree. */ using System; using System.Collections.Generic; using System.Threading.Tasks; using Meta.Conduit; using Meta.Voice; using Meta.Voice.TelemetryUtilities; using Meta.WitAi.Configuration; using Meta.WitAi.Data; using Meta.WitAi.Data.Configuration; using Meta.WitAi.Data.Intents; using Meta.WitAi.Events; using Meta.WitAi.Events.UnityEventListeners; using Meta.WitAi.Interfaces; using Meta.WitAi.Json; using Meta.WitAi.Requests; using UnityEngine; namespace Meta.WitAi { public abstract class VoiceService : BaseSpeechService, IVoiceService, IInstanceResolver, IAudioEventProvider { /// /// Enables/disables all attribute based intent handling /// private bool UseIntentAttributes => WitConfiguration && WitConfiguration.useIntentAttributes; /// /// When set to true, Conduit will be used. Otherwise, the legacy dispatching will be used. /// private bool UseConduit => UseIntentAttributes && WitConfiguration.useConduit; /// /// When set to true, the service will use platform integration. /// public virtual bool UsePlatformIntegrations { get => false; set => throw new NotImplementedException(); } /// /// Wit configuration accessor via IWitConfigurationProvider /// public WitConfiguration WitConfiguration { get { if (_witConfiguration == null) { _witConfiguration = GetComponent()?.Configuration; } return _witConfiguration; } set => _witConfiguration = value; } private WitConfiguration _witConfiguration; /// /// The Conduit parameter provider. /// private readonly IParameterProvider _conduitParameterProvider = new ParameterProvider(); /// /// This field should not be accessed outside the Wit-Unity library. If you need access /// to events you should be using the VoiceService.VoiceEvents property instead. /// [Tooltip("Events that will fire before, during and after an activation")] [SerializeField] protected VoiceEvents events = new VoiceEvents(); /// /// Internal events used to report telemetry. These events are reserved for internal /// use only and should not be used for any other purpose. /// protected TelemetryEvents telemetryEvents = new TelemetryEvents(); /// /// The Conduit-based dispatcher that dispatches incoming invocations based on a manifest. /// internal IConduitDispatcher ConduitDispatcher { get; set; } /// /// Returns true if the service is actively communicating with Wit.ai during an Activation. The mic may or may not still be active while this is true. /// public virtual bool IsRequestActive => base.Active; /// /// Gets/Sets a custom transcription provider. This can be used to replace any built in asr /// with an on device model or other provided source /// public abstract ITranscriptionProvider TranscriptionProvider { get; set; } /// /// Returns true if this voice service is currently reading data from the microphone /// public abstract bool MicActive { get; } public virtual VoiceEvents VoiceEvents { get => events; set => events = value; } // Return voice events protected override SpeechEvents GetSpeechEvents() => VoiceEvents; public virtual TelemetryEvents TelemetryEvents { get => telemetryEvents; set => telemetryEvents = value; } /// /// A subset of events around collection of audio data /// public IAudioInputEvents AudioEvents => VoiceEvents; /// /// A subset of events around receiving transcriptions /// public ITranscriptionEvent TranscriptionEvents => VoiceEvents; /// /// Returns true if the audio input should be read in an activation /// protected abstract bool ShouldSendMicData { get; } /// /// Constructs a /// protected VoiceService() { _conduitParameterProvider.SetSpecializedParameter(ParameterProvider.WitResponseNodeReservedName, typeof(WitResponseNode)); _conduitParameterProvider.SetSpecializedParameter(ParameterProvider.VoiceSessionReservedName, typeof(VoiceSession)); var conduitDispatcherFactory = new ConduitDispatcherFactory(this); ConduitDispatcher = conduitDispatcherFactory.GetDispatcher(); } #region TEXT REQUESTS /// /// Send text data for NLU processing. Results will return the same way a voice based activation would. /// /// Text to be used for NLU processing public void Activate(string text) => ThreadUtility.BackgroundAsync(Logger, async () => await Activate(text, new WitRequestOptions())).WrapErrors(); /// /// Send text data for NLU processing. Results will return the same way a voice based activation would. /// /// Text to be used for NLU processing /// Additional options such as dynamic entities public Task Activate(string text, WitRequestOptions requestOptions) => Activate(text, requestOptions, new VoiceServiceRequestEvents()); /// /// Send text data for NLU processing. Results will return the same way a voice based activation would. /// /// Text to be used for NLU processing /// Events specific to the request's lifecycle public Task Activate(string text, VoiceServiceRequestEvents requestEvents) => Activate(text, new WitRequestOptions(), requestEvents); /// /// Send text data for NLU processing with custom request options. /// /// Text to be used for NLU processing /// Additional options such as dynamic entities /// Events specific to the request's lifecycle public abstract Task Activate(string text, WitRequestOptions requestOptions, VoiceServiceRequestEvents requestEvents); #endregion TEXT REQUESTS #region AUDIO REQUESTS protected bool _waitingForFirstPartialAudio = true; /// /// Start listening for sound or speech from the user and start sending data to Wit.ai once sound or speech has been detected. /// public void Activate() => Activate(new WitRequestOptions()); /// /// Start listening for sound or speech from the user and start sending data to Wit.ai once sound or speech has been detected. /// /// Additional options such as dynamic entities public void Activate(WitRequestOptions requestOptions) => Activate(requestOptions, new VoiceServiceRequestEvents()); /// /// Start listening for sound or speech from the user and start sending data to Wit.ai once sound or speech has been detected. /// /// Events specific to the request's lifecycle public VoiceServiceRequest Activate(VoiceServiceRequestEvents requestEvents) => Activate(new WitRequestOptions(), requestEvents); /// /// Start listening for sound or speech from the user and start sending data to Wit.ai once sound or speech has been detected. /// /// Additional options such as dynamic entities /// Events specific to the request's lifecycle public abstract VoiceServiceRequest Activate(WitRequestOptions requestOptions, VoiceServiceRequestEvents requestEvents); /// /// Activate the microphone and send data for NLU processing immediately without waiting for sound/speech from the user to begin. /// public void ActivateImmediately() => ActivateImmediately(new WitRequestOptions()); /// /// Activate the microphone and send data for NLU processing immediately without waiting for sound/speech from the user to begin. /// /// Additional options such as dynamic entities public void ActivateImmediately(WitRequestOptions requestOptions) => ActivateImmediately(requestOptions, new VoiceServiceRequestEvents()); /// /// Activate the microphone and send data for NLU processing immediately without waiting for sound/speech from the user to begin. /// /// Events specific to the request's lifecycle public VoiceServiceRequest ActivateImmediately(VoiceServiceRequestEvents requestEvents) => ActivateImmediately(new WitRequestOptions(), requestEvents); /// /// Activate the microphone and send data for NLU processing immediately without waiting for sound/speech from the user to begin. /// /// Additional options such as dynamic entities /// Events specific to the request's lifecycle public abstract VoiceServiceRequest ActivateImmediately(WitRequestOptions requestOptions, VoiceServiceRequestEvents requestEvents); #endregion AUDIO REQUESTS // Called when VoiceServiceRequest OnPartialResponse is returned & tries to end early if possible protected override void OnRequestPartialResponse(VoiceServiceRequest request, WitResponseNode responseNode) { if (_waitingForFirstPartialAudio) { _waitingForFirstPartialAudio = false; RuntimeTelemetry.Instance.LogPoint((OperationID)request.Options.OperationId, RuntimeTelemetryPoint.FirstPartialAudioFromServer); } base.OnRequestPartialResponse(request, responseNode); OnValidateEarly(request, responseNode); } // Request send resets partial audio flag protected override void OnRequestSend(VoiceServiceRequest request) { _waitingForFirstPartialAudio = true; base.OnRequestSend(request); } // Attempts to validate early if possible protected virtual void OnValidateEarly(VoiceServiceRequest request, WitResponseNode responseNode) { // Ignore unless can be validated if (request == null || request.State != VoiceRequestState.Transmitting || responseNode == null || VoiceEvents.OnValidatePartialResponse == null) { return; } // Create short response data VoiceSession validationData = GetVoiceSession(responseNode); // Call short response VoiceEvents.OnValidatePartialResponse.Invoke(validationData); // Invoke if (UseConduit) { // Ignore without an intent WitIntentData intent = responseNode.GetFirstIntentData(); if (intent != null) { _conduitParameterProvider.PopulateParametersFromNode(responseNode); _conduitParameterProvider.AddParameter(ParameterProvider.VoiceSessionReservedName, validationData); _conduitParameterProvider.AddParameter(ParameterProvider.WitResponseNodeReservedName, responseNode); ConduitDispatcher.InvokeAction(_conduitParameterProvider, intent.name, _witConfiguration.relaxedResolution, intent.confidence, true); } } // Deactivate & abort immediately but use the response data as results if (validationData.validResponse) { VLog.I("Validated Early"); request.CompleteEarly(); } } /// /// Returns objects of the specified type. /// /// The type. /// Objects of the specified type. public IEnumerable GetObjectsOfType(Type type) { return FindObjectsByType(type, FindObjectsSortMode.None); } protected virtual void Awake() { InitializeEventListeners(); } private void InitializeEventListeners() { var audioEventListener = GetComponent(); if (!audioEventListener) { gameObject.AddComponent(); } var transcriptionEventListener = GetComponent(); if (!transcriptionEventListener) { gameObject.AddComponent(); } } protected override void OnEnable() { base.OnEnable(); if (UseConduit) { InitializeConduit().WrapErrors(); } else if (UseIntentAttributes) { MatchIntentRegistry.Initialize(); } TranscriptionProvider?.OnFullTranscription.AddListener(OnFinalTranscription); VoiceEvents.OnResponse.AddListener(HandleResponse); } private async Task InitializeConduit() { await ConduitDispatcher.Initialize(_witConfiguration.ManifestLocalPath); if (_witConfiguration.relaxedResolution) { if (!ConduitDispatcher.Manifest.ResolveEntities()) { VLog.E("Failed to resolve Conduit entities"); } foreach (var entity in ConduitDispatcher.Manifest.CustomEntityTypes) { _conduitParameterProvider.AddCustomType(entity.Key, entity.Value); } } } protected override void OnDisable() { base.OnDisable(); TranscriptionProvider?.OnFullTranscription.RemoveListener(OnFinalTranscription); VoiceEvents.OnResponse.RemoveListener(HandleResponse); } /// /// Activate message if transcription provider returns a final transcription /// protected virtual void OnFinalTranscription(string transcription) { if (TranscriptionProvider != null) { Activate(transcription); } } private VoiceSession GetVoiceSession(WitResponseNode response) { return new VoiceSession { service = this, response = response, validResponse = false }; } protected virtual void HandleResponse(WitResponseNode response) { HandleIntents(response); } private void HandleIntents(WitResponseNode response) { var intents = response.GetIntents(); foreach (var intent in intents) { HandleIntent(intent, response); } } private void HandleIntent(WitIntentData intent, WitResponseNode response) { if (UseConduit) { _conduitParameterProvider.PopulateParametersFromNode(response); _conduitParameterProvider.AddParameter(ParameterProvider.WitResponseNodeReservedName, response); ConduitDispatcher.InvokeAction(_conduitParameterProvider, intent.name, _witConfiguration.relaxedResolution, intent.confidence, false); } else if (UseIntentAttributes) { var methods = MatchIntentRegistry.RegisteredMethods[intent.name]; foreach (var method in methods) { ExecuteRegisteredMatch(method, intent, response); } } } private void ExecuteRegisteredMatch(RegisteredMatchIntent registeredMethod, WitIntentData intent, WitResponseNode response) { if (intent.confidence >= registeredMethod.matchIntent.MinConfidence && intent.confidence <= registeredMethod.matchIntent.MaxConfidence) { foreach (var obj in GetObjectsOfType(registeredMethod.type)) { var parameters = registeredMethod.method.GetParameters(); if (parameters.Length == 0) { registeredMethod.method.Invoke(obj, Array.Empty()); continue; } if (parameters[0].ParameterType != typeof(WitResponseNode) || parameters.Length > 2) { VLog.E("Match intent only supports methods with no parameters or with a WitResponseNode parameter. Enable Conduit or adjust the parameters"); continue; } if (parameters.Length == 1) { registeredMethod.method.Invoke(obj, new object[] {response}); } } } } } public interface IVoiceService : IVoiceEventProvider, ITelemetryEventsProvider, IVoiceActivationHandler { /// /// Returns true if voice service is currently active or request is transmitting /// bool IsRequestActive { get; } /// /// When set to true, the service will use platform integration. /// bool UsePlatformIntegrations { get; set; } /// /// The current running voice requests /// HashSet Requests { get; } /// /// Returns true Mic is still enabled /// bool MicActive { get; } /// /// All events used for a voice service /// new VoiceEvents VoiceEvents { get; set; } /// /// All events used for a voice service telemetry /// new TelemetryEvents TelemetryEvents { get; set; } /// /// Easy acccess for transcription /// ITranscriptionProvider TranscriptionProvider { get; set; } /// /// Whether or not this service can listen to audio /// /// True if audio can be listened to bool CanActivateAudio(); /// /// Whether or not this service can perform requests /// /// True if a request can be sent bool CanSend(); } public interface IVoiceActivationHandler { /// /// Returns true if this voice service is currently active and listening with the mic /// bool Active { get; } /// /// Send text data for NLU processing with custom request options & events. /// /// Text to be used for NLU processing /// Additional options such as dynamic entities /// Events specific to the request's lifecycle Task Activate(string text, WitRequestOptions requestOptions, VoiceServiceRequestEvents requestEvents); /// /// Activate the microphone and wait for threshold and then send data /// /// Additional options such as dynamic entities /// Events specific to the request's lifecycle VoiceServiceRequest Activate(WitRequestOptions requestOptions, VoiceServiceRequestEvents requestEvents); /// /// Activate the microphone and send data for NLU processing with custom request options. /// /// Additional options such as dynamic entities /// Events specific to the request's lifecycle VoiceServiceRequest ActivateImmediately(WitRequestOptions requestOptions, VoiceServiceRequestEvents requestEvents); /// /// Stop listening and submit the collected microphone data for processing. /// void Deactivate(); /// /// Stop listening and abort any requests that may be active without waiting for a response. /// void DeactivateAndAbortRequest(); /// /// Deactivate mic & abort a specific request /// void DeactivateAndAbortRequest(VoiceServiceRequest request); } }