// Copyright 2019 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

syntax = "proto3";

package google.cloud.translation.v3beta1;

import "google/api/annotations.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/timestamp.proto";

option cc_enable_arenas = true;
option csharp_namespace = "Google.Cloud.Translate.V3Beta1";
option go_package = "google.golang.org/genproto/googleapis/cloud/translate/v3beta1;translate";
option java_multiple_files = true;
option java_outer_classname = "TranslationServiceProto";
option java_package = "com.google.cloud.translate.v3beta1";
option php_namespace = "Google\\Cloud\\Translate\\V3beta1";
option ruby_package = "Google::Cloud::Translate::V3beta1";

// Proto file for the Cloud Translation API (v3beta1).

// Provides natural language translation operations.
service TranslationService {
  // Translates input text and returns translated text.
  rpc TranslateText(TranslateTextRequest) returns (TranslateTextResponse) {
    option (google.api.http) = {
      post: "/v3beta1/{parent=projects/*/locations/*}:translateText"
      body: "*"
    };
  }

  // Detects the language of text within a request.
  rpc DetectLanguage(DetectLanguageRequest) returns (DetectLanguageResponse) {
    option (google.api.http) = {
      post: "/v3beta1/{parent=projects/*/locations/*}:detectLanguage"
      body: "*"
    };
  }

  // Returns a list of supported languages for translation.
  rpc GetSupportedLanguages(GetSupportedLanguagesRequest) returns (SupportedLanguages) {
    option (google.api.http) = {
      get: "/v3beta1/{parent=projects/*/locations/*}/supportedLanguages"
    };
  }

  // Translates a large volume of text in asynchronous batch mode.
  // This function provides real-time output as the inputs are being processed.
  // If caller cancels a request, the partial results (for an input file, it's
  // all or nothing) may still be available on the specified output location.
  //
  // This call returns immediately and you can
  // use google.longrunning.Operation.name to poll the status of the call.
  rpc BatchTranslateText(BatchTranslateTextRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v3beta1/{parent=projects/*/locations/*}:batchTranslateText"
      body: "*"
    };
  }

  // Creates a glossary and returns the long-running operation. Returns
  // NOT_FOUND, if the project doesn't exist.
  rpc CreateGlossary(CreateGlossaryRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v3beta1/{parent=projects/*/locations/*}/glossaries"
      body: "glossary"
    };
  }

  // Lists glossaries in a project. Returns NOT_FOUND, if the project doesn't
  // exist.
  rpc ListGlossaries(ListGlossariesRequest) returns (ListGlossariesResponse) {
    option (google.api.http) = {
      get: "/v3beta1/{parent=projects/*/locations/*}/glossaries"
    };
  }

  // Gets a glossary. Returns NOT_FOUND, if the glossary doesn't
  // exist.
  rpc GetGlossary(GetGlossaryRequest) returns (Glossary) {
    option (google.api.http) = {
      get: "/v3beta1/{name=projects/*/locations/*/glossaries/*}"
    };
  }

  // Deletes a glossary, or cancels glossary construction
  // if the glossary isn't created yet.
  // Returns NOT_FOUND, if the glossary doesn't exist.
  rpc DeleteGlossary(DeleteGlossaryRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = {
      delete: "/v3beta1/{name=projects/*/locations/*/glossaries/*}"
    };
  }
}

// Configures which glossary should be used for a specific target language,
// and defines options for applying that glossary.
message TranslateTextGlossaryConfig {
  // Required. Specifies the glossary used for this translation. Use
  // this format: projects/*/locations/*/glossaries/*
  string glossary = 1;

  // Optional. Indicates whether we should do a case-insensitive match.
  // Default value is false if missing.
  bool ignore_case = 2;
}

// The request message for synchronous translation.
message TranslateTextRequest {
  // Required. The content of the input in string format.
  // We recommend the total contents to be less than 30k codepoints.
  // Please use BatchTranslateText for larger text.
  repeated string contents = 1;

  // Optional. The format of the source text, for example, "text/html",
  //  "text/plain". If left blank, the MIME type is assumed to be "text/html".
  string mime_type = 3;

  // Optional. The BCP-47 language code of the input text if
  // known, for example, "en-US" or "sr-Latn". Supported language codes are
  // listed in Language Support. If the source language isn't specified, the API
  // attempts to identify the source language automatically and returns the
  // the source language within the response.
  string source_language_code = 4;

  // Required. The BCP-47 language code to use for translation of the input
  // text, set to one of the language codes listed in Language Support.
  string target_language_code = 5;

  // Optional. Only used when making regionalized call.
  // Format:
  // projects/{project-id}/locations/{location-id}.
  //
  // Only custom model/glossary within the same location-id can be used.
  // Otherwise 400 is returned.
  string parent = 8;

  // Optional. The `model` type requested for this translation.
  //
  // The format  depends on model type:
  // 1. Custom models:
  // projects/{project-id}/locations/{location-id}/models/{model-id}.
  // 2. General (built-in) models:
  // projects/{project-id}/locations/{location-id}/models/general/nmt
  // projects/{project-id}/locations/{location-id}/models/general/base
  //
  // For global (non-regionalized) requests, use {location-id} 'global'.
  // For example,
  // projects/{project-id}/locations/global/models/general/nmt
  //
  // If missing, the system decides which google base model to use.
  string model = 6;

  // Optional. Glossary to be applied. The glossary needs to be in the same
  // region as the model, otherwise an INVALID_ARGUMENT error is returned.
  TranslateTextGlossaryConfig glossary_config = 7;
}

// The main language translation response message.
message TranslateTextResponse {
  // Text translation responses with no glossary applied.
  // This field has the same length as `contents` in TranslateTextRequest.
  repeated Translation translations = 1;

  // Text translation responses if a glossary is provided in the request.
  // This could be the same as 'translation' above if no terms apply.
  // This field has the same length as `contents` in TranslateTextRequest.
  repeated Translation glossary_translations = 3;
}

// A single translation response.
message Translation {
  // Text translated into the target language.
  string translated_text = 1;

  // Only present when `model` is present in the request.
  // This is same as `model` provided in the request.
  string model = 2;

  // The BCP-47 language code of source text in the initial request, detected
  // automatically, if no source language was passed within the initial
  // request. If the source language was passed, auto-detection of the language
  // does not occur and this field will be empty.
  string detected_language_code = 4;

  // The `glossary_config` used for this translation.
  TranslateTextGlossaryConfig glossary_config = 3;
}

// The request message for language detection.
message DetectLanguageRequest {
  // Optional. Only used when making regionalized call.
  // Format:
  // projects/{project-id}/locations/{location-id}.
  //
  // Only custom model within the same location-id can be used.
  // Otherwise 400 is returned.
  string parent = 5;

  // Optional. The language detection model to be used.
  // projects/{project-id}/locations/{location-id}/models/language-detection/{model-id}
  // If not specified, default will be used.
  string model = 4;

  // Required. The source of the document from which to detect the language.
  oneof source {
    // The content of the input stored as a string.
    string content = 1;
  }

  // Optional. The format of the source text, for example, "text/html",
  // "text/plain". If left blank, the MIME type is assumed to be "text/html".
  string mime_type = 3;
}

// The response message for language detection.
message DetectedLanguage {
  // The BCP-47 language code of source content in the request, detected
  // automatically.
  string language_code = 1;

  // The confidence of the detection result for this language.
  float confidence = 2;
}

// The response message for language detection.
message DetectLanguageResponse {
  // A list of detected languages sorted by detection confidence in descending
  // order. The most probable language first.
  repeated DetectedLanguage languages = 1;
}

// The request message for discovering supported languages.
message GetSupportedLanguagesRequest {
  // Optional. Used for making regionalized calls.
  // Format: projects/{project-id}/locations/{location-id}.
  // For global calls, use projects/{project-id}/locations/global.
  // If missing, the call is treated as a global call.
  //
  // Only custom model within the same location-id can be used.
  // Otherwise 400 is returned.
  string parent = 3;

  // Optional. The language to use to return localized, human readable names
  // of supported languages. If missing, default language is ENGLISH.
  string display_language_code = 1;

  // Optional. Get supported languages of this model.
  // The format depends on model type:
  // 1. Custom models:
  // projects/{project-id}/locations/{location-id}/models/{model-id}.
  // 2. General (built-in) models:
  // projects/{project-id}/locations/{location-id}/models/general/nmt
  // projects/{project-id}/locations/{location-id}/models/general/base
  // Returns languages supported by the specified model.
  // If missing, we get supported languages of Google general NMT model.
  string model = 2;
}

// The response message for discovering supported languages.
message SupportedLanguages {
  // A list of supported language responses. This list contains an entry
  // for each language the Translation API supports.
  repeated SupportedLanguage languages = 1;
}

// A single supported language response corresponds to information related
// to one supported language.
message SupportedLanguage {
  // Supported language code, generally consisting of its ISO 639-1
  // identifier, for example, 'en', 'ja'. In certain cases, BCP-47 codes
  // including language and region identifiers are returned (for example,
  // 'zh-TW' and 'zh-CN')
  string language_code = 1;

  // Human readable name of the language localized in the display language
  // specified in the request.
  string display_name = 2;

  // Can be used as source language.
  bool support_source = 3;

  // Can be used as target language.
  bool support_target = 4;
}

// The GCS location for the input content.
message GcsSource {
  // Required. Source data URI. For example, `gs://my_bucket/my_object`.
  string input_uri = 1;
}

// Input configuration.
message InputConfig {
  // Optional. Can be "text/plain" or "text/html".
  // For `.tsv`, "text/html" is used if mime_type is missing.
  // For `.html`, this field must be "text/html" or empty.
  // For `.txt`, this field must be "text/plain" or empty.
  string mime_type = 1;

  // Required. Specify the input.
  oneof source {
    // Required. Google Cloud Storage location for the source input.
    // This can be a single file (for example,
    // `gs://translation-test/input.tsv`) or a wildcard (for example,
    // `gs://translation-test/*`). If a file extension is `.tsv`, it can
    // contain either one or two columns. The first column (optional) is the id
    // of the text request. If the first column is missing, we use the row
    // number (0-based) from the input file as the ID in the output file. The
    // second column is the actual text to be
    //  translated. We recommend each row be <= 10K Unicode codepoints,
    // otherwise an error might be returned.
    //
    // The other supported file extensions are `.txt` or `.html`, which is
    // treated as a single large chunk of text.
    GcsSource gcs_source = 2;
  }
}

// The GCS location for the output content
message GcsDestination {
  // Required. There must be no files under 'output_uri_prefix'.
  // 'output_uri_prefix' must end with "/". Otherwise error 400 is returned.
  string output_uri_prefix = 1;
}

// Output configuration.
message OutputConfig {
  // Required. The destination of output.
  oneof destination {
    // Google Cloud Storage destination for output content.
    // For every single input file (for example, gs://a/b/c.[extension]), we
    // generate at most 2 * n output files. (n is the # of target_language_codes
    // in the BatchTranslateTextRequest).
    //
    // Output files (tsv) generated are compliant with RFC 4180 except that
    // record delimiters are '\n' instead of '\r\n'. We don't provide any way to
    // change record delimiters.
    //
    // While the input files are being processed, we write/update an index file
    // 'index.csv'  under 'output_uri_prefix' (for example,
    // gs://translation-test/index.csv) The index file is generated/updated as
    // new files are being translated. The format is:
    //
    // input_file,target_language_code,translations_file,errors_file,
    // glossary_translations_file,glossary_errors_file
    //
    // input_file is one file we matched using gcs_source.input_uri.
    // target_language_code is provided in the request.
    // translations_file contains the translations. (details provided below)
    // errors_file contains the errors during processing of the file. (details
    // below). Both translations_file and errors_file could be empty
    // strings if we have no content to output.
    // glossary_translations_file,glossary_errors_file are always empty string
    // if input_file is tsv. They could also be empty if we have no content to
    // output.
    //
    // Once a row is present in index.csv, the input/output matching never
    // changes. Callers should also expect all the content in input_file are
    // processed and ready to be consumed (that is, No partial output file is
    // written).
    //
    // The format of translations_file (for target language code 'trg') is:
    // gs://translation_test/a_b_c_'trg'_translations.[extension]
    //
    // If the input file extension is tsv, the output has the following
    // columns:
    // Column 1: ID of the request provided in the input, if it's not
    // provided in the input, then the input row number is used (0-based).
    // Column 2: source sentence.
    // Column 3: translation without applying a glossary. Empty string if there
    // is an error.
    // Column 4 (only present if a glossary is provided in the request):
    // translation after applying the glossary. Empty string if there is an
    // error applying the glossary. Could be same string as column 3 if there is
    // no glossary applied.
    //
    // If input file extension is a txt or html, the translation is directly
    // written to the output file. If glossary is requested, a separate
    // glossary_translations_file has format of
    // gs://translation_test/a_b_c_'trg'_glossary_translations.[extension]
    //
    // The format of errors file (for target language code 'trg') is:
    // gs://translation_test/a_b_c_'trg'_errors.[extension]
    //
    // If the input file extension is tsv, errors_file has the
    // following Column 1: ID of the request provided in the input, if it's not
    // provided in the input, then the input row number is used (0-based).
    // Column 2: source sentence.
    // Column 3: Error detail for the translation. Could be empty.
    // Column 4 (only present if a glossary is provided in the request):
    // Error when applying the glossary.
    //
    // If the input file extension is txt or html, glossary_error_file will be
    // generated that contains error details. glossary_error_file has format of
    // gs://translation_test/a_b_c_'trg'_glossary_errors.[extension]
    GcsDestination gcs_destination = 1;
  }
}

// The batch translation request.
message BatchTranslateTextRequest {
  // Optional. Only used when making regionalized call.
  // Format:
  // projects/{project-id}/locations/{location-id}.
  //
  // Only custom models/glossaries within the same location-id can be used.
  // Otherwise 400 is returned.
  string parent = 1;

  // Required. Source language code.
  string source_language_code = 2;

  // Required. Specify up to 10 language codes here.
  repeated string target_language_codes = 3;

  // Optional. The models to use for translation. Map's key is target language
  // code. Map's value is model name. Value can be a built-in general model,
  // or a custom model built by AutoML.
  //
  // The value format depends on model type:
  // 1. Custom models:
  // projects/{project-id}/locations/{location-id}/models/{model-id}.
  // 2. General (built-in) models:
  // projects/{project-id}/locations/{location-id}/models/general/nmt
  // projects/{project-id}/locations/{location-id}/models/general/base
  //
  // If the map is empty or a specific model is
  // not requested for a language pair, then default google model is used.
  map<string, string> models = 4;

  // Required. Input configurations.
  // The total number of files matched should be <= 1000.
  // The total content size should be <= 100M Unicode codepoints.
  // The files must use UTF-8 encoding.
  repeated InputConfig input_configs = 5;

  // Required. Output configuration.
  // If 2 input configs match to the same file (that is, same input path),
  // we don't generate output for duplicate inputs.
  OutputConfig output_config = 6;

  // Optional. Glossaries to be applied for translation.
  // It's keyed by target language code.
  map<string, TranslateTextGlossaryConfig> glossaries = 7;
}

// State metadata for the batch translation operation.
message BatchTranslateMetadata {
  // State of the job.
  enum State {
    // Invalid.
    STATE_UNSPECIFIED = 0;

    // Request is being processed.
    RUNNING = 1;

    // The batch is processed, and at least one item has been successfully
    // processed.
    SUCCEEDED = 2;

    // The batch is done and no item has been successfully processed.
    FAILED = 3;

    // Request is in the process of being canceled after caller invoked
    // longrunning.Operations.CancelOperation on the request id.
    CANCELLING = 4;

    // The batch is done after the user has called the
    // longrunning.Operations.CancelOperation. Any records processed before the
    // cancel command are output as specified in the request.
    CANCELLED = 5;
  }

  // The state of the operation.
  State state = 1;

  // Number of successfully translated characters so far (Unicode codepoints).
  int64 translated_characters = 2;

  // Number of characters that have failed to process so far (Unicode
  // codepoints).
  int64 failed_characters = 3;

  // Total number of characters (Unicode codepoints).
  // This is the total number of codepoints from input files times the number of
  // target languages. It appears here shortly after the call is submitted.
  int64 total_characters = 4;

  // Time when the operation was submitted.
  google.protobuf.Timestamp submit_time = 5;
}

// Stored in the [google.longrunning.Operation.response][google.longrunning.Operation.response] field returned by
// BatchTranslateText if at least one sentence is translated successfully.
message BatchTranslateResponse {
  // Total number of characters (Unicode codepoints).
  int64 total_characters = 1;

  // Number of successfully translated characters (Unicode codepoints).
  int64 translated_characters = 2;

  // Number of characters that have failed to process (Unicode codepoints).
  int64 failed_characters = 3;

  // Time when the operation was submitted.
  google.protobuf.Timestamp submit_time = 4;

  // The time when the operation is finished and
  // [google.longrunning.Operation.done][google.longrunning.Operation.done] is set to true.
  google.protobuf.Timestamp end_time = 5;
}

// Input configuration for glossaries.
message GlossaryInputConfig {
  // Required. Specify the input.
  oneof source {
    // Required. Google Cloud Storage location of glossary data.
    // File format is determined based on file name extension. API returns
    // [google.rpc.Code.INVALID_ARGUMENT] for unsupported URI-s and file
    // formats. Wildcards are not allowed. This must be a single file in one of
    // the following formats:
    //
    // For `UNIDIRECTIONAL` glossaries:
    //
    // - TSV/CSV (`.tsv`/`.csv`): 2 column file, tab- or comma-separated.
    //   The first column is source text. The second column is target text.
    //   The file must not contain headers. That is, the first row is data, not
    //   column names.
    //
    // - TMX (`.tmx`): TMX file with parallel data defining source/target term
    // pairs.
    //
    // For `EQUIVALENT_TERMS_SET` glossaries:
    //
    // - CSV (`.csv`): Multi-column CSV file defining equivalent glossary terms
    //   in multiple languages. The format is defined for Google Translation
    //   Toolkit and documented here:
    //   `https://support.google.com/translatortoolkit/answer/6306379?hl=en`.
    GcsSource gcs_source = 1;
  }
}

// Represents a glossary built from user provided data.
message Glossary {
  // Used with UNIDIRECTIONAL.
  message LanguageCodePair {
    // Required. The BCP-47 language code of the input text, for example,
    // "en-US". Expected to be an exact match for GlossaryTerm.language_code.
    string source_language_code = 1;

    // Required. The BCP-47 language code for translation output, for example,
    // "zh-CN". Expected to be an exact match for GlossaryTerm.language_code.
    string target_language_code = 2;
  }

  // Used with EQUIVALENT_TERMS_SET.
  message LanguageCodesSet {
    // The BCP-47 language code(s) for terms defined in the glossary.
    // All entries are unique. The list contains at least two entries.
    // Expected to be an exact match for GlossaryTerm.language_code.
    repeated string language_codes = 1;
  }

  // Required. The resource name of the glossary. Glossary names have the form
  // `projects/{project-id}/locations/{location-id}/glossaries/{glossary-id}`.
  string name = 1;

  // Languages supported by the glossary.
  oneof languages {
    // Used with UNIDIRECTIONAL.
    LanguageCodePair language_pair = 3;

    // Used with EQUIVALENT_TERMS_SET.
    LanguageCodesSet language_codes_set = 4;
  }

  // Required. Provides examples to build the glossary from.
  // Total glossary must not exceed 10M Unicode codepoints.
  GlossaryInputConfig input_config = 5;

  // Output only. The number of entries defined in the glossary.
  int32 entry_count = 6;

  // Output only. When CreateGlossary was called.
  google.protobuf.Timestamp submit_time = 7;

  // Output only. When the glossary creation was finished.
  google.protobuf.Timestamp end_time = 8;
}

// Request message for CreateGlossary.
message CreateGlossaryRequest {
  // Required. The project name.
  string parent = 1;

  // Required. The glossary to create.
  Glossary glossary = 2;
}

// Request message for GetGlossary.
message GetGlossaryRequest {
  // Required. The name of the glossary to retrieve.
  string name = 1;
}

// Request message for DeleteGlossary.
message DeleteGlossaryRequest {
  // Required. The name of the glossary to delete.
  string name = 1;
}

// Request message for ListGlossaries.
message ListGlossariesRequest {
  // Required. The name of the project from which to list all of the glossaries.
  string parent = 1;

  // Optional. Requested page size. The server may return fewer glossaries than
  // requested. If unspecified, the server picks an appropriate default.
  int32 page_size = 2;

  // Optional. A token identifying a page of results the server should return.
  // Typically, this is the value of [ListGlossariesResponse.next_page_token]
  // returned from the previous call to `ListGlossaries` method.
  // The first page is returned if `page_token`is empty or missing.
  string page_token = 3;

  // Optional. Filter specifying constraints of a list operation.
  // For example, `tags.glossary_name="products*"`.
  // If missing, no filtering is performed.
  string filter = 4;
}

// Response message for ListGlossaries.
message ListGlossariesResponse {
  // The list of glossaries for a project.
  repeated Glossary glossaries = 1;

  // A token to retrieve a page of results. Pass this value in the
  // [ListGlossariesRequest.page_token] field in the subsequent call to
  // `ListGlossaries` method to retrieve the next page of results.
  string next_page_token = 2;
}

// Stored in the [google.longrunning.Operation.metadata][google.longrunning.Operation.metadata] field returned by
// CreateGlossary.
message CreateGlossaryMetadata {
  // Enumerates the possible states that the creation request can be in.
  enum State {
    // Invalid.
    STATE_UNSPECIFIED = 0;

    // Request is being processed.
    RUNNING = 1;

    // The glossary has been successfully created.
    SUCCEEDED = 2;

    // Failed to create the glossary.
    FAILED = 3;

    // Request is in the process of being canceled after caller invoked
    // longrunning.Operations.CancelOperation on the request id.
    CANCELLING = 4;

    // The glossary creation request has been successfully canceled.
    CANCELLED = 5;
  }

  // The name of the glossary that is being created.
  string name = 1;

  // The current state of the glossary creation operation.
  State state = 2;

  // The time when the operation was submitted to the server.
  google.protobuf.Timestamp submit_time = 3;
}

// Stored in the [google.longrunning.Operation.metadata][google.longrunning.Operation.metadata] field returned by
// DeleteGlossary.
message DeleteGlossaryMetadata {
  // Enumerates the possible states that the creation request can be in.
  enum State {
    // Invalid.
    STATE_UNSPECIFIED = 0;

    // Request is being processed.
    RUNNING = 1;

    // The glossary was successfully deleted.
    SUCCEEDED = 2;

    // Failed to delete the glossary.
    FAILED = 3;

    // Request is in the process of being canceled after caller invoked
    // longrunning.Operations.CancelOperation on the request id.
    CANCELLING = 4;

    // The glossary deletion request has been successfully canceled.
    CANCELLED = 5;
  }

  // The name of the glossary that is being deleted.
  string name = 1;

  // The current state of the glossary deletion operation.
  State state = 2;

  // The time when the operation was submitted to the server.
  google.protobuf.Timestamp submit_time = 3;
}

// Stored in the [google.longrunning.Operation.response][google.longrunning.Operation.response] field returned by
// DeleteGlossary.
message DeleteGlossaryResponse {
  // The name of the deleted glossary.
  string name = 1;

  // The time when the operation was submitted to the server.
  google.protobuf.Timestamp submit_time = 2;

  // The time when the glossary deletion is finished and
  // [google.longrunning.Operation.done][google.longrunning.Operation.done] is set to true.
  google.protobuf.Timestamp end_time = 3;
}