// Copyright 2019 Google LLC. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // syntax = "proto3"; package google.cloud.translation.v3beta1; import "google/api/annotations.proto"; import "google/longrunning/operations.proto"; import "google/protobuf/timestamp.proto"; option cc_enable_arenas = true; option csharp_namespace = "Google.Cloud.Translate.V3Beta1"; option go_package = "google.golang.org/genproto/googleapis/cloud/translate/v3beta1;translate"; option java_multiple_files = true; option java_outer_classname = "TranslationServiceProto"; option java_package = "com.google.cloud.translate.v3beta1"; option php_namespace = "Google\\Cloud\\Translate\\V3beta1"; option ruby_package = "Google::Cloud::Translate::V3beta1"; // Proto file for the Cloud Translation API (v3beta1). // Provides natural language translation operations. service TranslationService { // Translates input text and returns translated text. rpc TranslateText(TranslateTextRequest) returns (TranslateTextResponse) { option (google.api.http) = { post: "/v3beta1/{parent=projects/*/locations/*}:translateText" body: "*" }; } // Detects the language of text within a request. rpc DetectLanguage(DetectLanguageRequest) returns (DetectLanguageResponse) { option (google.api.http) = { post: "/v3beta1/{parent=projects/*/locations/*}:detectLanguage" body: "*" }; } // Returns a list of supported languages for translation. rpc GetSupportedLanguages(GetSupportedLanguagesRequest) returns (SupportedLanguages) { option (google.api.http) = { get: "/v3beta1/{parent=projects/*/locations/*}/supportedLanguages" }; } // Translates a large volume of text in asynchronous batch mode. // This function provides real-time output as the inputs are being processed. // If caller cancels a request, the partial results (for an input file, it's // all or nothing) may still be available on the specified output location. // // This call returns immediately and you can // use google.longrunning.Operation.name to poll the status of the call. rpc BatchTranslateText(BatchTranslateTextRequest) returns (google.longrunning.Operation) { option (google.api.http) = { post: "/v3beta1/{parent=projects/*/locations/*}:batchTranslateText" body: "*" }; } // Creates a glossary and returns the long-running operation. Returns // NOT_FOUND, if the project doesn't exist. rpc CreateGlossary(CreateGlossaryRequest) returns (google.longrunning.Operation) { option (google.api.http) = { post: "/v3beta1/{parent=projects/*/locations/*}/glossaries" body: "glossary" }; } // Lists glossaries in a project. Returns NOT_FOUND, if the project doesn't // exist. rpc ListGlossaries(ListGlossariesRequest) returns (ListGlossariesResponse) { option (google.api.http) = { get: "/v3beta1/{parent=projects/*/locations/*}/glossaries" }; } // Gets a glossary. Returns NOT_FOUND, if the glossary doesn't // exist. rpc GetGlossary(GetGlossaryRequest) returns (Glossary) { option (google.api.http) = { get: "/v3beta1/{name=projects/*/locations/*/glossaries/*}" }; } // Deletes a glossary, or cancels glossary construction // if the glossary isn't created yet. // Returns NOT_FOUND, if the glossary doesn't exist. rpc DeleteGlossary(DeleteGlossaryRequest) returns (google.longrunning.Operation) { option (google.api.http) = { delete: "/v3beta1/{name=projects/*/locations/*/glossaries/*}" }; } } // Configures which glossary should be used for a specific target language, // and defines options for applying that glossary. message TranslateTextGlossaryConfig { // Required. Specifies the glossary used for this translation. Use // this format: projects/*/locations/*/glossaries/* string glossary = 1; // Optional. Indicates whether we should do a case-insensitive match. // Default value is false if missing. bool ignore_case = 2; } // The request message for synchronous translation. message TranslateTextRequest { // Required. The content of the input in string format. // We recommend the total contents to be less than 30k codepoints. // Please use BatchTranslateText for larger text. repeated string contents = 1; // Optional. The format of the source text, for example, "text/html", // "text/plain". If left blank, the MIME type is assumed to be "text/html". string mime_type = 3; // Optional. The BCP-47 language code of the input text if // known, for example, "en-US" or "sr-Latn". Supported language codes are // listed in Language Support. If the source language isn't specified, the API // attempts to identify the source language automatically and returns the // the source language within the response. string source_language_code = 4; // Required. The BCP-47 language code to use for translation of the input // text, set to one of the language codes listed in Language Support. string target_language_code = 5; // Optional. Only used when making regionalized call. // Format: // projects/{project-id}/locations/{location-id}. // // Only custom model/glossary within the same location-id can be used. // Otherwise 400 is returned. string parent = 8; // Optional. The `model` type requested for this translation. // // The format depends on model type: // 1. Custom models: // projects/{project-id}/locations/{location-id}/models/{model-id}. // 2. General (built-in) models: // projects/{project-id}/locations/{location-id}/models/general/nmt // projects/{project-id}/locations/{location-id}/models/general/base // // For global (non-regionalized) requests, use {location-id} 'global'. // For example, // projects/{project-id}/locations/global/models/general/nmt // // If missing, the system decides which google base model to use. string model = 6; // Optional. Glossary to be applied. The glossary needs to be in the same // region as the model, otherwise an INVALID_ARGUMENT error is returned. TranslateTextGlossaryConfig glossary_config = 7; } // The main language translation response message. message TranslateTextResponse { // Text translation responses with no glossary applied. // This field has the same length as `contents` in TranslateTextRequest. repeated Translation translations = 1; // Text translation responses if a glossary is provided in the request. // This could be the same as 'translation' above if no terms apply. // This field has the same length as `contents` in TranslateTextRequest. repeated Translation glossary_translations = 3; } // A single translation response. message Translation { // Text translated into the target language. string translated_text = 1; // Only present when `model` is present in the request. // This is same as `model` provided in the request. string model = 2; // The BCP-47 language code of source text in the initial request, detected // automatically, if no source language was passed within the initial // request. If the source language was passed, auto-detection of the language // does not occur and this field will be empty. string detected_language_code = 4; // The `glossary_config` used for this translation. TranslateTextGlossaryConfig glossary_config = 3; } // The request message for language detection. message DetectLanguageRequest { // Optional. Only used when making regionalized call. // Format: // projects/{project-id}/locations/{location-id}. // // Only custom model within the same location-id can be used. // Otherwise 400 is returned. string parent = 5; // Optional. The language detection model to be used. // projects/{project-id}/locations/{location-id}/models/language-detection/{model-id} // If not specified, default will be used. string model = 4; // Required. The source of the document from which to detect the language. oneof source { // The content of the input stored as a string. string content = 1; } // Optional. The format of the source text, for example, "text/html", // "text/plain". If left blank, the MIME type is assumed to be "text/html". string mime_type = 3; } // The response message for language detection. message DetectedLanguage { // The BCP-47 language code of source content in the request, detected // automatically. string language_code = 1; // The confidence of the detection result for this language. float confidence = 2; } // The response message for language detection. message DetectLanguageResponse { // A list of detected languages sorted by detection confidence in descending // order. The most probable language first. repeated DetectedLanguage languages = 1; } // The request message for discovering supported languages. message GetSupportedLanguagesRequest { // Optional. Used for making regionalized calls. // Format: projects/{project-id}/locations/{location-id}. // For global calls, use projects/{project-id}/locations/global. // If missing, the call is treated as a global call. // // Only custom model within the same location-id can be used. // Otherwise 400 is returned. string parent = 3; // Optional. The language to use to return localized, human readable names // of supported languages. If missing, default language is ENGLISH. string display_language_code = 1; // Optional. Get supported languages of this model. // The format depends on model type: // 1. Custom models: // projects/{project-id}/locations/{location-id}/models/{model-id}. // 2. General (built-in) models: // projects/{project-id}/locations/{location-id}/models/general/nmt // projects/{project-id}/locations/{location-id}/models/general/base // Returns languages supported by the specified model. // If missing, we get supported languages of Google general NMT model. string model = 2; } // The response message for discovering supported languages. message SupportedLanguages { // A list of supported language responses. This list contains an entry // for each language the Translation API supports. repeated SupportedLanguage languages = 1; } // A single supported language response corresponds to information related // to one supported language. message SupportedLanguage { // Supported language code, generally consisting of its ISO 639-1 // identifier, for example, 'en', 'ja'. In certain cases, BCP-47 codes // including language and region identifiers are returned (for example, // 'zh-TW' and 'zh-CN') string language_code = 1; // Human readable name of the language localized in the display language // specified in the request. string display_name = 2; // Can be used as source language. bool support_source = 3; // Can be used as target language. bool support_target = 4; } // The GCS location for the input content. message GcsSource { // Required. Source data URI. For example, `gs://my_bucket/my_object`. string input_uri = 1; } // Input configuration. message InputConfig { // Optional. Can be "text/plain" or "text/html". // For `.tsv`, "text/html" is used if mime_type is missing. // For `.html`, this field must be "text/html" or empty. // For `.txt`, this field must be "text/plain" or empty. string mime_type = 1; // Required. Specify the input. oneof source { // Required. Google Cloud Storage location for the source input. // This can be a single file (for example, // `gs://translation-test/input.tsv`) or a wildcard (for example, // `gs://translation-test/*`). If a file extension is `.tsv`, it can // contain either one or two columns. The first column (optional) is the id // of the text request. If the first column is missing, we use the row // number (0-based) from the input file as the ID in the output file. The // second column is the actual text to be // translated. We recommend each row be <= 10K Unicode codepoints, // otherwise an error might be returned. // // The other supported file extensions are `.txt` or `.html`, which is // treated as a single large chunk of text. GcsSource gcs_source = 2; } } // The GCS location for the output content message GcsDestination { // Required. There must be no files under 'output_uri_prefix'. // 'output_uri_prefix' must end with "/". Otherwise error 400 is returned. string output_uri_prefix = 1; } // Output configuration. message OutputConfig { // Required. The destination of output. oneof destination { // Google Cloud Storage destination for output content. // For every single input file (for example, gs://a/b/c.[extension]), we // generate at most 2 * n output files. (n is the # of target_language_codes // in the BatchTranslateTextRequest). // // Output files (tsv) generated are compliant with RFC 4180 except that // record delimiters are '\n' instead of '\r\n'. We don't provide any way to // change record delimiters. // // While the input files are being processed, we write/update an index file // 'index.csv' under 'output_uri_prefix' (for example, // gs://translation-test/index.csv) The index file is generated/updated as // new files are being translated. The format is: // // input_file,target_language_code,translations_file,errors_file, // glossary_translations_file,glossary_errors_file // // input_file is one file we matched using gcs_source.input_uri. // target_language_code is provided in the request. // translations_file contains the translations. (details provided below) // errors_file contains the errors during processing of the file. (details // below). Both translations_file and errors_file could be empty // strings if we have no content to output. // glossary_translations_file,glossary_errors_file are always empty string // if input_file is tsv. They could also be empty if we have no content to // output. // // Once a row is present in index.csv, the input/output matching never // changes. Callers should also expect all the content in input_file are // processed and ready to be consumed (that is, No partial output file is // written). // // The format of translations_file (for target language code 'trg') is: // gs://translation_test/a_b_c_'trg'_translations.[extension] // // If the input file extension is tsv, the output has the following // columns: // Column 1: ID of the request provided in the input, if it's not // provided in the input, then the input row number is used (0-based). // Column 2: source sentence. // Column 3: translation without applying a glossary. Empty string if there // is an error. // Column 4 (only present if a glossary is provided in the request): // translation after applying the glossary. Empty string if there is an // error applying the glossary. Could be same string as column 3 if there is // no glossary applied. // // If input file extension is a txt or html, the translation is directly // written to the output file. If glossary is requested, a separate // glossary_translations_file has format of // gs://translation_test/a_b_c_'trg'_glossary_translations.[extension] // // The format of errors file (for target language code 'trg') is: // gs://translation_test/a_b_c_'trg'_errors.[extension] // // If the input file extension is tsv, errors_file has the // following Column 1: ID of the request provided in the input, if it's not // provided in the input, then the input row number is used (0-based). // Column 2: source sentence. // Column 3: Error detail for the translation. Could be empty. // Column 4 (only present if a glossary is provided in the request): // Error when applying the glossary. // // If the input file extension is txt or html, glossary_error_file will be // generated that contains error details. glossary_error_file has format of // gs://translation_test/a_b_c_'trg'_glossary_errors.[extension] GcsDestination gcs_destination = 1; } } // The batch translation request. message BatchTranslateTextRequest { // Optional. Only used when making regionalized call. // Format: // projects/{project-id}/locations/{location-id}. // // Only custom models/glossaries within the same location-id can be used. // Otherwise 400 is returned. string parent = 1; // Required. Source language code. string source_language_code = 2; // Required. Specify up to 10 language codes here. repeated string target_language_codes = 3; // Optional. The models to use for translation. Map's key is target language // code. Map's value is model name. Value can be a built-in general model, // or a custom model built by AutoML. // // The value format depends on model type: // 1. Custom models: // projects/{project-id}/locations/{location-id}/models/{model-id}. // 2. General (built-in) models: // projects/{project-id}/locations/{location-id}/models/general/nmt // projects/{project-id}/locations/{location-id}/models/general/base // // If the map is empty or a specific model is // not requested for a language pair, then default google model is used. map models = 4; // Required. Input configurations. // The total number of files matched should be <= 1000. // The total content size should be <= 100M Unicode codepoints. // The files must use UTF-8 encoding. repeated InputConfig input_configs = 5; // Required. Output configuration. // If 2 input configs match to the same file (that is, same input path), // we don't generate output for duplicate inputs. OutputConfig output_config = 6; // Optional. Glossaries to be applied for translation. // It's keyed by target language code. map glossaries = 7; } // State metadata for the batch translation operation. message BatchTranslateMetadata { // State of the job. enum State { // Invalid. STATE_UNSPECIFIED = 0; // Request is being processed. RUNNING = 1; // The batch is processed, and at least one item has been successfully // processed. SUCCEEDED = 2; // The batch is done and no item has been successfully processed. FAILED = 3; // Request is in the process of being canceled after caller invoked // longrunning.Operations.CancelOperation on the request id. CANCELLING = 4; // The batch is done after the user has called the // longrunning.Operations.CancelOperation. Any records processed before the // cancel command are output as specified in the request. CANCELLED = 5; } // The state of the operation. State state = 1; // Number of successfully translated characters so far (Unicode codepoints). int64 translated_characters = 2; // Number of characters that have failed to process so far (Unicode // codepoints). int64 failed_characters = 3; // Total number of characters (Unicode codepoints). // This is the total number of codepoints from input files times the number of // target languages. It appears here shortly after the call is submitted. int64 total_characters = 4; // Time when the operation was submitted. google.protobuf.Timestamp submit_time = 5; } // Stored in the [google.longrunning.Operation.response][google.longrunning.Operation.response] field returned by // BatchTranslateText if at least one sentence is translated successfully. message BatchTranslateResponse { // Total number of characters (Unicode codepoints). int64 total_characters = 1; // Number of successfully translated characters (Unicode codepoints). int64 translated_characters = 2; // Number of characters that have failed to process (Unicode codepoints). int64 failed_characters = 3; // Time when the operation was submitted. google.protobuf.Timestamp submit_time = 4; // The time when the operation is finished and // [google.longrunning.Operation.done][google.longrunning.Operation.done] is set to true. google.protobuf.Timestamp end_time = 5; } // Input configuration for glossaries. message GlossaryInputConfig { // Required. Specify the input. oneof source { // Required. Google Cloud Storage location of glossary data. // File format is determined based on file name extension. API returns // [google.rpc.Code.INVALID_ARGUMENT] for unsupported URI-s and file // formats. Wildcards are not allowed. This must be a single file in one of // the following formats: // // For `UNIDIRECTIONAL` glossaries: // // - TSV/CSV (`.tsv`/`.csv`): 2 column file, tab- or comma-separated. // The first column is source text. The second column is target text. // The file must not contain headers. That is, the first row is data, not // column names. // // - TMX (`.tmx`): TMX file with parallel data defining source/target term // pairs. // // For `EQUIVALENT_TERMS_SET` glossaries: // // - CSV (`.csv`): Multi-column CSV file defining equivalent glossary terms // in multiple languages. The format is defined for Google Translation // Toolkit and documented here: // `https://support.google.com/translatortoolkit/answer/6306379?hl=en`. GcsSource gcs_source = 1; } } // Represents a glossary built from user provided data. message Glossary { // Used with UNIDIRECTIONAL. message LanguageCodePair { // Required. The BCP-47 language code of the input text, for example, // "en-US". Expected to be an exact match for GlossaryTerm.language_code. string source_language_code = 1; // Required. The BCP-47 language code for translation output, for example, // "zh-CN". Expected to be an exact match for GlossaryTerm.language_code. string target_language_code = 2; } // Used with EQUIVALENT_TERMS_SET. message LanguageCodesSet { // The BCP-47 language code(s) for terms defined in the glossary. // All entries are unique. The list contains at least two entries. // Expected to be an exact match for GlossaryTerm.language_code. repeated string language_codes = 1; } // Required. The resource name of the glossary. Glossary names have the form // `projects/{project-id}/locations/{location-id}/glossaries/{glossary-id}`. string name = 1; // Languages supported by the glossary. oneof languages { // Used with UNIDIRECTIONAL. LanguageCodePair language_pair = 3; // Used with EQUIVALENT_TERMS_SET. LanguageCodesSet language_codes_set = 4; } // Required. Provides examples to build the glossary from. // Total glossary must not exceed 10M Unicode codepoints. GlossaryInputConfig input_config = 5; // Output only. The number of entries defined in the glossary. int32 entry_count = 6; // Output only. When CreateGlossary was called. google.protobuf.Timestamp submit_time = 7; // Output only. When the glossary creation was finished. google.protobuf.Timestamp end_time = 8; } // Request message for CreateGlossary. message CreateGlossaryRequest { // Required. The project name. string parent = 1; // Required. The glossary to create. Glossary glossary = 2; } // Request message for GetGlossary. message GetGlossaryRequest { // Required. The name of the glossary to retrieve. string name = 1; } // Request message for DeleteGlossary. message DeleteGlossaryRequest { // Required. The name of the glossary to delete. string name = 1; } // Request message for ListGlossaries. message ListGlossariesRequest { // Required. The name of the project from which to list all of the glossaries. string parent = 1; // Optional. Requested page size. The server may return fewer glossaries than // requested. If unspecified, the server picks an appropriate default. int32 page_size = 2; // Optional. A token identifying a page of results the server should return. // Typically, this is the value of [ListGlossariesResponse.next_page_token] // returned from the previous call to `ListGlossaries` method. // The first page is returned if `page_token`is empty or missing. string page_token = 3; // Optional. Filter specifying constraints of a list operation. // For example, `tags.glossary_name="products*"`. // If missing, no filtering is performed. string filter = 4; } // Response message for ListGlossaries. message ListGlossariesResponse { // The list of glossaries for a project. repeated Glossary glossaries = 1; // A token to retrieve a page of results. Pass this value in the // [ListGlossariesRequest.page_token] field in the subsequent call to // `ListGlossaries` method to retrieve the next page of results. string next_page_token = 2; } // Stored in the [google.longrunning.Operation.metadata][google.longrunning.Operation.metadata] field returned by // CreateGlossary. message CreateGlossaryMetadata { // Enumerates the possible states that the creation request can be in. enum State { // Invalid. STATE_UNSPECIFIED = 0; // Request is being processed. RUNNING = 1; // The glossary has been successfully created. SUCCEEDED = 2; // Failed to create the glossary. FAILED = 3; // Request is in the process of being canceled after caller invoked // longrunning.Operations.CancelOperation on the request id. CANCELLING = 4; // The glossary creation request has been successfully canceled. CANCELLED = 5; } // The name of the glossary that is being created. string name = 1; // The current state of the glossary creation operation. State state = 2; // The time when the operation was submitted to the server. google.protobuf.Timestamp submit_time = 3; } // Stored in the [google.longrunning.Operation.metadata][google.longrunning.Operation.metadata] field returned by // DeleteGlossary. message DeleteGlossaryMetadata { // Enumerates the possible states that the creation request can be in. enum State { // Invalid. STATE_UNSPECIFIED = 0; // Request is being processed. RUNNING = 1; // The glossary was successfully deleted. SUCCEEDED = 2; // Failed to delete the glossary. FAILED = 3; // Request is in the process of being canceled after caller invoked // longrunning.Operations.CancelOperation on the request id. CANCELLING = 4; // The glossary deletion request has been successfully canceled. CANCELLED = 5; } // The name of the glossary that is being deleted. string name = 1; // The current state of the glossary deletion operation. State state = 2; // The time when the operation was submitted to the server. google.protobuf.Timestamp submit_time = 3; } // Stored in the [google.longrunning.Operation.response][google.longrunning.Operation.response] field returned by // DeleteGlossary. message DeleteGlossaryResponse { // The name of the deleted glossary. string name = 1; // The time when the operation was submitted to the server. google.protobuf.Timestamp submit_time = 2; // The time when the glossary deletion is finished and // [google.longrunning.Operation.done][google.longrunning.Operation.done] is set to true. google.protobuf.Timestamp end_time = 3; }