// Copyright 2019 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

syntax = "proto3";

package google.cloud.dialogflow.v2beta1;

import "google/api/annotations.proto";
import "google/api/resource.proto";
import "google/cloud/dialogflow/v2beta1/agent.proto";
import "google/cloud/dialogflow/v2beta1/audio_config.proto";
import "google/cloud/dialogflow/v2beta1/context.proto";
import "google/cloud/dialogflow/v2beta1/intent.proto";
import "google/cloud/dialogflow/v2beta1/session_entity_type.proto";
import "google/protobuf/struct.proto";
import "google/rpc/status.proto";
import "google/type/latlng.proto";

option cc_enable_arenas = true;
option csharp_namespace = "Google.Cloud.Dialogflow.V2beta1";
option go_package = "google.golang.org/genproto/googleapis/cloud/dialogflow/v2beta1;dialogflow";
option java_multiple_files = true;
option java_outer_classname = "SessionProto";
option java_package = "com.google.cloud.dialogflow.v2beta1";
option objc_class_prefix = "DF";

// A session represents an interaction with a user. You retrieve user input
// and pass it to the [DetectIntent][google.cloud.dialogflow.v2beta1.Sessions.DetectIntent] (or
// [StreamingDetectIntent][google.cloud.dialogflow.v2beta1.Sessions.StreamingDetectIntent]) method to determine
// user intent and respond.
service Sessions {
  // Processes a natural language query and returns structured, actionable data
  // as a result. This method is not idempotent, because it may cause contexts
  // and session entity types to be updated, which in turn might affect
  // results of future queries.
  rpc DetectIntent(DetectIntentRequest) returns (DetectIntentResponse) {
    option (google.api.http) = {
      post: "/v2beta1/{session=projects/*/agent/sessions/*}:detectIntent"
      body: "*"
      additional_bindings {
        post: "/v2beta1/{session=projects/*/agent/environments/*/users/*/sessions/*}:detectIntent"
        body: "*"
      }
    };
  }

  // Processes a natural language query in audio format in a streaming fashion
  // and returns structured, actionable data as a result. This method is only
  // available via the gRPC API (not REST).
  rpc StreamingDetectIntent(stream StreamingDetectIntentRequest) returns (stream StreamingDetectIntentResponse) {
  }
}

// The request to detect user's intent.
message DetectIntentRequest {
  // Required. The name of the session this query is sent to. Format:
  // `projects/<Project ID>/agent/sessions/<Session ID>`, or
  // `projects/<Project ID>/agent/environments/<Environment ID>/users/<User
  // ID>/sessions/<Session ID>`. If `Environment ID` is not specified, we assume
  // default 'draft' environment. If `User ID` is not specified, we are using
  // "-". It’s up to the API caller to choose an appropriate `Session ID` and
  // `User Id`. They can be a random numbers or some type of user and session
  // identifiers (preferably hashed). The length of the `Session ID` and
  // `User ID` must not exceed 36 characters.
  string session = 1;

  // Optional. The parameters of this query.
  QueryParameters query_params = 2;

  // Required. The input specification. It can be set to:
  //
  // 1.  an audio config
  //     which instructs the speech recognizer how to process the speech audio,
  //
  // 2.  a conversational query in the form of text, or
  //
  // 3.  an event that specifies which intent to trigger.
  QueryInput query_input = 3;

  // Optional. Instructs the speech synthesizer how to generate the output
  // audio. If this field is not set and agent-level speech synthesizer is not
  // configured, no output audio is generated.
  OutputAudioConfig output_audio_config = 4;

  // Optional. The natural language speech audio to be processed. This field
  // should be populated iff `query_input` is set to an input audio config.
  // A single request can contain up to 1 minute of speech audio data.
  bytes input_audio = 5;
}

// The message returned from the DetectIntent method.
message DetectIntentResponse {
  // The unique identifier of the response. It can be used to
  // locate a response in the training example set or for reporting issues.
  string response_id = 1;

  // The selected results of the conversational query or event processing.
  // See `alternative_query_results` for additional potential results.
  QueryResult query_result = 2;

  // If Knowledge Connectors are enabled, there could be more than one result
  // returned for a given query or event, and this field will contain all
  // results except for the top one, which is captured in query_result. The
  // alternative results are ordered by decreasing
  // `QueryResult.intent_detection_confidence`. If Knowledge Connectors are
  // disabled, this field will be empty until multiple responses for regular
  // intents are supported, at which point those additional results will be
  // surfaced here.
  repeated QueryResult alternative_query_results = 5;

  // Specifies the status of the webhook request.
  google.rpc.Status webhook_status = 3;

  // The audio data bytes encoded as specified in the request.
  // Note: The output audio is generated based on the values of default platform
  // text responses found in the `query_result.fulfillment_messages` field. If
  // multiple default text responses exist, they will be concatenated when
  // generating audio. If no default platform text responses exist, the
  // generated audio content will be empty.
  bytes output_audio = 4;

  // Instructs the speech synthesizer how to generate the output audio. This
  // field is populated from the agent-level speech synthesizer configuration,
  // if enabled.
  OutputAudioConfig output_audio_config = 6;
}

// Represents the parameters of the conversational query.
message QueryParameters {
  // Optional. The time zone of this conversational query from the
  // [time zone database](https://www.iana.org/time-zones), e.g.,
  // America/New_York, Europe/Paris. If not provided, the time zone specified in
  // agent settings is used.
  string time_zone = 1;

  // Optional. The geo location of this conversational query.
  google.type.LatLng geo_location = 2;

  // Optional. The collection of contexts to be activated before this query is
  // executed.
  repeated Context contexts = 3;

  // Optional. Specifies whether to delete all contexts in the current session
  // before the new ones are activated.
  bool reset_contexts = 4;

  // Optional. Additional session entity types to replace or extend developer
  // entity types with. The entity synonyms apply to all languages and persist
  // for the session of this query.
  repeated SessionEntityType session_entity_types = 5;

  // Optional. This field can be used to pass custom data into the webhook
  // associated with the agent. Arbitrary JSON objects are supported.
  google.protobuf.Struct payload = 6;

  // Optional. KnowledgeBases to get alternative results from. If not set, the
  // KnowledgeBases enabled in the agent (through UI) will be used.
  // Format:  `projects/<Project ID>/knowledgeBases/<Knowledge Base ID>`.
  repeated string knowledge_base_names = 12;

  // Optional. Configures the type of sentiment analysis to perform. If not
  // provided, sentiment analysis is not performed.
  // Note: Sentiment Analysis is only currently available for Enterprise Edition
  // agents.
  SentimentAnalysisRequestConfig sentiment_analysis_request_config = 10;
}

// Represents the query input. It can contain either:
//
// 1.  An audio config which
//     instructs the speech recognizer how to process the speech audio.
//
// 2.  A conversational query in the form of text,.
//
// 3.  An event that specifies which intent to trigger.
message QueryInput {
  // Required. The input specification.
  oneof input {
    // Instructs the speech recognizer how to process the speech audio.
    InputAudioConfig audio_config = 1;

    // The natural language text to be processed.
    TextInput text = 2;

    // The event to be processed.
    EventInput event = 3;
  }
}

// Represents the result of conversational query or event processing.
message QueryResult {
  // The original conversational query text:
  // - If natural language text was provided as input, `query_text` contains
  //   a copy of the input.
  // - If natural language speech audio was provided as input, `query_text`
  //   contains the speech recognition result. If speech recognizer produced
  //   multiple alternatives, a particular one is picked.
  // - If an event was provided as input, `query_text` is not set.
  string query_text = 1;

  // The language that was triggered during intent detection.
  // See [Language
  // Support](https://cloud.google.com/dialogflow-enterprise/docs/reference/language)
  // for a list of the currently supported language codes.
  string language_code = 15;

  // The Speech recognition confidence between 0.0 and 1.0. A higher number
  // indicates an estimated greater likelihood that the recognized words are
  // correct. The default of 0.0 is a sentinel value indicating that confidence
  // was not set.
  //
  // This field is not guaranteed to be accurate or set. In particular this
  // field isn't set for StreamingDetectIntent since the streaming endpoint has
  // separate confidence estimates per portion of the audio in
  // StreamingRecognitionResult.
  float speech_recognition_confidence = 2;

  // The action name from the matched intent.
  string action = 3;

  // The collection of extracted parameters.
  google.protobuf.Struct parameters = 4;

  // This field is set to:
  // - `false` if the matched intent has required parameters and not all of
  //    the required parameter values have been collected.
  // - `true` if all required parameter values have been collected, or if the
  //    matched intent doesn't contain any required parameters.
  bool all_required_params_present = 5;

  // The text to be pronounced to the user or shown on the screen.
  // Note: This is a legacy field, `fulfillment_messages` should be preferred.
  string fulfillment_text = 6;

  // The collection of rich messages to present to the user.
  repeated Intent.Message fulfillment_messages = 7;

  // If the query was fulfilled by a webhook call, this field is set to the
  // value of the `source` field returned in the webhook response.
  string webhook_source = 8;

  // If the query was fulfilled by a webhook call, this field is set to the
  // value of the `payload` field returned in the webhook response.
  google.protobuf.Struct webhook_payload = 9;

  // The collection of output contexts. If applicable,
  // `output_contexts.parameters` contains entries with name
  // `<parameter name>.original` containing the original parameter values
  // before the query.
  repeated Context output_contexts = 10;

  // The intent that matched the conversational query. Some, not
  // all fields are filled in this message, including but not limited to:
  // `name`, `display_name` and `webhook_state`.
  Intent intent = 11;

  // The intent detection confidence. Values range from 0.0
  // (completely uncertain) to 1.0 (completely certain).
  // If there are `multiple knowledge_answers` messages, this value is set to
  // the greatest `knowledgeAnswers.match_confidence` value in the list.
  float intent_detection_confidence = 12;

  // The free-form diagnostic info. For example, this field could contain
  // webhook call latency. The string keys of the Struct's fields map can change
  // without notice.
  google.protobuf.Struct diagnostic_info = 14;

  // The sentiment analysis result, which depends on the
  // `sentiment_analysis_request_config` specified in the request.
  SentimentAnalysisResult sentiment_analysis_result = 17;

  // The result from Knowledge Connector (if any), ordered by decreasing
  // `KnowledgeAnswers.match_confidence`.
  KnowledgeAnswers knowledge_answers = 18;
}

// Represents the result of querying a Knowledge base.
message KnowledgeAnswers {
  // An answer from Knowledge Connector.
  message Answer {
    // Represents the system's confidence that this knowledge answer is a good
    // match for this conversational query.
    enum MatchConfidenceLevel {
      // Not specified.
      MATCH_CONFIDENCE_LEVEL_UNSPECIFIED = 0;

      // Indicates that the confidence is low.
      LOW = 1;

      // Indicates our confidence is medium.
      MEDIUM = 2;

      // Indicates our confidence is high.
      HIGH = 3;
    }

    // Indicates which Knowledge Document this answer was extracted from.
    // Format: `projects/<Project ID>/knowledgeBases/<Knowledge Base
    // ID>/documents/<Document ID>`.
    string source = 1;

    // The corresponding FAQ question if the answer was extracted from a FAQ
    // Document, empty otherwise.
    string faq_question = 2;

    // The piece of text from the `source` knowledge base document that answers
    // this conversational query.
    string answer = 3;

    // The system's confidence level that this knowledge answer is a good match
    // for this conversational query.
    // NOTE: The confidence level for a given `<query, answer>` pair may change
    // without notice, as it depends on models that are constantly being
    // improved. However, it will change less frequently than the confidence
    // score below, and should be preferred for referencing the quality of an
    // answer.
    MatchConfidenceLevel match_confidence_level = 4;

    // The system's confidence score that this Knowledge answer is a good match
    // for this conversational query.
    // The range is from 0.0 (completely uncertain) to 1.0 (completely certain).
    // Note: The confidence score is likely to vary somewhat (possibly even for
    // identical requests), as the underlying model is under constant
    // improvement. It may be deprecated in the future. We recommend using
    // `match_confidence_level` which should be generally more stable.
    float match_confidence = 5;
  }

  // A list of answers from Knowledge Connector.
  repeated Answer answers = 1;
}

// The top-level message sent by the client to the
// `StreamingDetectIntent` method.
//
// Multiple request messages should be sent in order:
//
// 1.  The first message must contain `session`, `query_input` plus optionally
//     `query_params` and/or `single_utterance`. If the client wants to receive
//     an audio response, it should also contain `output_audio_config`.
//     The message must not contain `input_audio`.
//
// 2.  If `query_input` was set to a streaming input audio config,
//     all subsequent messages must contain only `input_audio`.
//     Otherwise, finish the request stream.
message StreamingDetectIntentRequest {
  // Required. The name of the session the query is sent to.
  // Format of the session name:
  // `projects/<Project ID>/agent/sessions/<Session ID>`, or
  // `projects/<Project ID>/agent/environments/<Environment ID>/users/<User
  // ID>/sessions/<Session ID>`. If `Environment ID` is not specified, we assume
  // default 'draft' environment. If `User ID` is not specified, we are using
  // "-". It’s up to the API caller to choose an appropriate `Session ID` and
  // `User Id`. They can be a random numbers or some type of user and session
  // identifiers (preferably hashed). The length of the `Session ID` and
  // `User ID` must not exceed 36 characters.
  string session = 1;

  // Optional. The parameters of this query.
  QueryParameters query_params = 2;

  // Required. The input specification. It can be set to:
  //
  // 1.  an audio config which instructs the speech recognizer how to process
  //     the speech audio,
  //
  // 2.  a conversational query in the form of text, or
  //
  // 3.  an event that specifies which intent to trigger.
  QueryInput query_input = 3;

  // Optional. If `false` (default), recognition does not cease until the
  // client closes the stream.
  // If `true`, the recognizer will detect a single spoken utterance in input
  // audio. Recognition ceases when it detects the audio's voice has
  // stopped or paused. In this case, once a detected intent is received, the
  // client should close the stream and start a new request with a new stream as
  // needed.
  // This setting is ignored when `query_input` is a piece of text or an event.
  bool single_utterance = 4;

  // Optional. Instructs the speech synthesizer how to generate the output
  // audio. If this field is not set and agent-level speech synthesizer is not
  // configured, no output audio is generated.
  OutputAudioConfig output_audio_config = 5;

  // Optional. The input audio content to be recognized. Must be sent if
  // `query_input` was set to a streaming input audio config. The complete audio
  // over all streaming messages must not exceed 1 minute.
  bytes input_audio = 6;
}

// The top-level message returned from the
// `StreamingDetectIntent` method.
//
// Multiple response messages can be returned in order:
//
// 1.  If the input was set to streaming audio, the first one or more messages
//     contain `recognition_result`. Each `recognition_result` represents a more
//     complete transcript of what the user said. The last `recognition_result`
//     has `is_final` set to `true`.
//
// 2.  The next message contains `response_id`, `query_result`,
//     `alternative_query_results` and optionally `webhook_status` if a WebHook
//     was called.
//
// 3.  If `output_audio_config` was specified in the request or agent-level
//     speech synthesizer is configured, all subsequent messages contain
//     `output_audio` and `output_audio_config`.
message StreamingDetectIntentResponse {
  // The unique identifier of the response. It can be used to
  // locate a response in the training example set or for reporting issues.
  string response_id = 1;

  // The result of speech recognition.
  StreamingRecognitionResult recognition_result = 2;

  // The selected results of the conversational query or event processing.
  // See `alternative_query_results` for additional potential results.
  QueryResult query_result = 3;

  // If Knowledge Connectors are enabled, there could be more than one result
  // returned for a given query or event, and this field will contain all
  // results except for the top one, which is captured in query_result. The
  // alternative results are ordered by decreasing
  // `QueryResult.intent_detection_confidence`. If Knowledge Connectors are
  // disabled, this field will be empty until multiple responses for regular
  // intents are supported, at which point those additional results will be
  // surfaced here.
  repeated QueryResult alternative_query_results = 7;

  // Specifies the status of the webhook request.
  google.rpc.Status webhook_status = 4;

  // The audio data bytes encoded as specified in the request.
  bytes output_audio = 5;

  // The config used by the speech synthesizer to generate the output audio.
  OutputAudioConfig output_audio_config = 6;
}

// Contains a speech recognition result corresponding to a portion of the audio
// that is currently being processed or an indication that this is the end
// of the single requested utterance.
//
// Example:
//
// 1.  transcript: "tube"
//
// 2.  transcript: "to be a"
//
// 3.  transcript: "to be"
//
// 4.  transcript: "to be or not to be"
//     is_final: true
//
// 5.  transcript: " that's"
//
// 6.  transcript: " that is"
//
// 7.  message_type: `MESSAGE_TYPE_END_OF_SINGLE_UTTERANCE`
//
// 8.  transcript: " that is the question"
//     is_final: true
//
// Only two of the responses contain final results (#4 and #8 indicated by
// `is_final: true`). Concatenating these generates the full transcript: "to be
// or not to be that is the question".
//
// In each response we populate:
//
// *  for `MESSAGE_TYPE_TRANSCRIPT`: `transcript` and possibly `is_final`.
//
// *  for `MESSAGE_TYPE_END_OF_SINGLE_UTTERANCE`: only `message_type`.
message StreamingRecognitionResult {
  // Type of the response message.
  enum MessageType {
    // Not specified. Should never be used.
    MESSAGE_TYPE_UNSPECIFIED = 0;

    // Message contains a (possibly partial) transcript.
    TRANSCRIPT = 1;

    // Event indicates that the server has detected the end of the user's speech
    // utterance and expects no additional speech. Therefore, the server will
    // not process additional audio (although it may subsequently return
    // additional results). The client should stop sending additional audio
    // data, half-close the gRPC connection, and wait for any additional results
    // until the server closes the gRPC connection. This message is only sent if
    // `single_utterance` was set to `true`, and is not used otherwise.
    END_OF_SINGLE_UTTERANCE = 2;
  }

  // Type of the result message.
  MessageType message_type = 1;

  // Transcript text representing the words that the user spoke.
  // Populated if and only if `message_type` = `MESSAGE_TYPE_TRANSCRIPT`.
  string transcript = 2;

  // If `false`, the `StreamingRecognitionResult` represents an
  // interim result that may change. If `true`, the recognizer will not return
  // any further hypotheses about this piece of the audio. May only be populated
  // for `message_type` = `MESSAGE_TYPE_TRANSCRIPT`.
  bool is_final = 3;

  // The Speech confidence between 0.0 and 1.0 for the current portion of audio.
  // A higher number indicates an estimated greater likelihood that the
  // recognized words are correct. The default of 0.0 is a sentinel value
  // indicating that confidence was not set.
  //
  // This field is typically only provided if `is_final` is true and you should
  // not rely on it being accurate or even set.
  float confidence = 4;
}

// Represents the natural language text to be processed.
message TextInput {
  // Required. The UTF-8 encoded natural language text to be processed.
  // Text length must not exceed 256 characters.
  string text = 1;

  // Required. The language of this conversational query. See [Language
  // Support](https://cloud.google.com/dialogflow-enterprise/docs/reference/language)
  // for a list of the currently supported language codes. Note that queries in
  // the same session do not necessarily need to specify the same language.
  string language_code = 2;
}

// Events allow for matching intents by event name instead of the natural
// language input. For instance, input `<event: { name: "welcome_event",
// parameters: { name: "Sam" } }>` can trigger a personalized welcome response.
// The parameter `name` may be used by the agent in the response:
// `"Hello #welcome_event.name! What can I do for you today?"`.
message EventInput {
  // Required. The unique identifier of the event.
  string name = 1;

  // Optional. The collection of parameters associated with the event.
  google.protobuf.Struct parameters = 2;

  // Required. The language of this query. See [Language
  // Support](https://cloud.google.com/dialogflow-enterprise/docs/reference/language)
  // for a list of the currently supported language codes. Note that queries in
  // the same session do not necessarily need to specify the same language.
  string language_code = 3;
}

// Configures the types of sentiment analysis to perform.
message SentimentAnalysisRequestConfig {
  // Optional. Instructs the service to perform sentiment analysis on
  // `query_text`. If not provided, sentiment analysis is not performed on
  // `query_text`.
  bool analyze_query_text_sentiment = 1;
}

// The result of sentiment analysis as configured by
// `sentiment_analysis_request_config`.
message SentimentAnalysisResult {
  // The sentiment analysis result for `query_text`.
  Sentiment query_text_sentiment = 1;
}

// The sentiment, such as positive/negative feeling or association, for a unit
// of analysis, such as the query text.
message Sentiment {
  // Sentiment score between -1.0 (negative sentiment) and 1.0 (positive
  // sentiment).
  float score = 1;

  // A non-negative number in the [0, +inf) range, which represents the absolute
  // magnitude of sentiment, regardless of score (positive or negative).
  float magnitude = 2;
}