You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1123 lines
29 KiB
1123 lines
29 KiB
6 years ago
|
// Copyright 2019 Google LLC.
|
||
|
//
|
||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
// you may not use this file except in compliance with the License.
|
||
|
// You may obtain a copy of the License at
|
||
|
//
|
||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
//
|
||
|
// Unless required by applicable law or agreed to in writing, software
|
||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
// See the License for the specific language governing permissions and
|
||
|
// limitations under the License.
|
||
|
//
|
||
|
|
||
|
syntax = "proto3";
|
||
|
|
||
|
package google.cloud.language.v1;
|
||
|
|
||
|
import "google/api/annotations.proto";
|
||
|
import "google/api/client.proto";
|
||
|
import "google/api/field_behavior.proto";
|
||
|
|
||
|
option go_package = "google.golang.org/genproto/googleapis/cloud/language/v1;language";
|
||
|
option java_multiple_files = true;
|
||
|
option java_outer_classname = "LanguageServiceProto";
|
||
|
option java_package = "com.google.cloud.language.v1";
|
||
|
|
||
|
|
||
|
// Provides text analysis operations such as sentiment analysis and entity
|
||
|
// recognition.
|
||
|
service LanguageService {
|
||
|
option (google.api.default_host) = "language.googleapis.com";
|
||
|
option (google.api.oauth_scopes) =
|
||
|
"https://www.googleapis.com/auth/cloud-language,"
|
||
|
"https://www.googleapis.com/auth/cloud-platform";
|
||
|
// Analyzes the sentiment of the provided text.
|
||
|
rpc AnalyzeSentiment(AnalyzeSentimentRequest) returns (AnalyzeSentimentResponse) {
|
||
|
option (google.api.http) = {
|
||
|
post: "/v1/documents:analyzeSentiment"
|
||
|
body: "*"
|
||
|
};
|
||
|
option (google.api.method_signature) = "document,encoding_type";
|
||
|
option (google.api.method_signature) = "document";
|
||
|
}
|
||
|
|
||
|
// Finds named entities (currently proper names and common nouns) in the text
|
||
|
// along with entity types, salience, mentions for each entity, and
|
||
|
// other properties.
|
||
|
rpc AnalyzeEntities(AnalyzeEntitiesRequest) returns (AnalyzeEntitiesResponse) {
|
||
|
option (google.api.http) = {
|
||
|
post: "/v1/documents:analyzeEntities"
|
||
|
body: "*"
|
||
|
};
|
||
|
option (google.api.method_signature) = "document,encoding_type";
|
||
|
option (google.api.method_signature) = "document";
|
||
|
}
|
||
|
|
||
|
// Finds entities, similar to [AnalyzeEntities][google.cloud.language.v1.LanguageService.AnalyzeEntities] in the text and analyzes
|
||
|
// sentiment associated with each entity and its mentions.
|
||
|
rpc AnalyzeEntitySentiment(AnalyzeEntitySentimentRequest) returns (AnalyzeEntitySentimentResponse) {
|
||
|
option (google.api.http) = {
|
||
|
post: "/v1/documents:analyzeEntitySentiment"
|
||
|
body: "*"
|
||
|
};
|
||
|
option (google.api.method_signature) = "document,encoding_type";
|
||
|
option (google.api.method_signature) = "document";
|
||
|
}
|
||
|
|
||
|
// Analyzes the syntax of the text and provides sentence boundaries and
|
||
|
// tokenization along with part of speech tags, dependency trees, and other
|
||
|
// properties.
|
||
|
rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) {
|
||
|
option (google.api.http) = {
|
||
|
post: "/v1/documents:analyzeSyntax"
|
||
|
body: "*"
|
||
|
};
|
||
|
option (google.api.method_signature) = "document,encoding_type";
|
||
|
option (google.api.method_signature) = "document";
|
||
|
}
|
||
|
|
||
|
// Classifies a document into categories.
|
||
|
rpc ClassifyText(ClassifyTextRequest) returns (ClassifyTextResponse) {
|
||
|
option (google.api.http) = {
|
||
|
post: "/v1/documents:classifyText"
|
||
|
body: "*"
|
||
|
};
|
||
|
option (google.api.method_signature) = "document";
|
||
|
}
|
||
|
|
||
|
// A convenience method that provides all the features that analyzeSentiment,
|
||
|
// analyzeEntities, and analyzeSyntax provide in one call.
|
||
|
rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) {
|
||
|
option (google.api.http) = {
|
||
|
post: "/v1/documents:annotateText"
|
||
|
body: "*"
|
||
|
};
|
||
|
option (google.api.method_signature) = "document,features,encoding_type";
|
||
|
option (google.api.method_signature) = "document,features";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// ################################################################ #
|
||
|
//
|
||
|
// Represents the input to API methods.
|
||
|
message Document {
|
||
|
// The document types enum.
|
||
|
enum Type {
|
||
|
// The content type is not specified.
|
||
|
TYPE_UNSPECIFIED = 0;
|
||
|
|
||
|
// Plain text
|
||
|
PLAIN_TEXT = 1;
|
||
|
|
||
|
// HTML
|
||
|
HTML = 2;
|
||
|
}
|
||
|
|
||
|
// Required. If the type is not set or is `TYPE_UNSPECIFIED`,
|
||
|
// returns an `INVALID_ARGUMENT` error.
|
||
|
Type type = 1;
|
||
|
|
||
|
// The source of the document: a string containing the content or a
|
||
|
// Google Cloud Storage URI.
|
||
|
oneof source {
|
||
|
// The content of the input in string format.
|
||
|
// Cloud audit logging exempt since it is based on user data.
|
||
|
string content = 2;
|
||
|
|
||
|
// The Google Cloud Storage URI where the file content is located.
|
||
|
// This URI must be of the form: gs://bucket_name/object_name. For more
|
||
|
// details, see https://cloud.google.com/storage/docs/reference-uris.
|
||
|
// NOTE: Cloud Storage object versioning is not supported.
|
||
|
string gcs_content_uri = 3;
|
||
|
}
|
||
|
|
||
|
// The language of the document (if not specified, the language is
|
||
|
// automatically detected). Both ISO and BCP-47 language codes are
|
||
|
// accepted.<br>
|
||
|
// [Language Support](/natural-language/docs/languages)
|
||
|
// lists currently supported languages for each API method.
|
||
|
// If the language (either specified by the caller or automatically detected)
|
||
|
// is not supported by the called API method, an `INVALID_ARGUMENT` error
|
||
|
// is returned.
|
||
|
string language = 4;
|
||
|
}
|
||
|
|
||
|
// Represents a sentence in the input document.
|
||
|
message Sentence {
|
||
|
// The sentence text.
|
||
|
TextSpan text = 1;
|
||
|
|
||
|
// For calls to [AnalyzeSentiment][] or if
|
||
|
// [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_document_sentiment] is set to
|
||
|
// true, this field will contain the sentiment for the sentence.
|
||
|
Sentiment sentiment = 2;
|
||
|
}
|
||
|
|
||
|
// Represents a phrase in the text that is a known entity, such as
|
||
|
// a person, an organization, or location. The API associates information, such
|
||
|
// as salience and mentions, with entities.
|
||
|
message Entity {
|
||
|
// The type of the entity. For most entity types, the associated metadata is a
|
||
|
// Wikipedia URL (`wikipedia_url`) and Knowledge Graph MID (`mid`). The table
|
||
|
// below lists the associated fields for entities that have different
|
||
|
// metadata.
|
||
|
enum Type {
|
||
|
// Unknown
|
||
|
UNKNOWN = 0;
|
||
|
|
||
|
// Person
|
||
|
PERSON = 1;
|
||
|
|
||
|
// Location
|
||
|
LOCATION = 2;
|
||
|
|
||
|
// Organization
|
||
|
ORGANIZATION = 3;
|
||
|
|
||
|
// Event
|
||
|
EVENT = 4;
|
||
|
|
||
|
// Artwork
|
||
|
WORK_OF_ART = 5;
|
||
|
|
||
|
// Consumer product
|
||
|
CONSUMER_GOOD = 6;
|
||
|
|
||
|
// Other types of entities
|
||
|
OTHER = 7;
|
||
|
|
||
|
// Phone number<br><br>
|
||
|
// The metadata lists the phone number, formatted according to local
|
||
|
// convention, plus whichever additional elements appear in the text:<ul>
|
||
|
// <li><code>number</code> – the actual number, broken down into
|
||
|
// sections as per local convention</li> <li><code>national_prefix</code>
|
||
|
// – country code, if detected</li> <li><code>area_code</code> –
|
||
|
// region or area code, if detected</li> <li><code>extension</code> –
|
||
|
// phone extension (to be dialed after connection), if detected</li></ul>
|
||
|
PHONE_NUMBER = 9;
|
||
|
|
||
|
// Address<br><br>
|
||
|
// The metadata identifies the street number and locality plus whichever
|
||
|
// additional elements appear in the text:<ul>
|
||
|
// <li><code>street_number</code> – street number</li>
|
||
|
// <li><code>locality</code> – city or town</li>
|
||
|
// <li><code>street_name</code> – street/route name, if detected</li>
|
||
|
// <li><code>postal_code</code> – postal code, if detected</li>
|
||
|
// <li><code>country</code> – country, if detected</li>
|
||
|
// <li><code>broad_region</code> – administrative area, such as the
|
||
|
// state, if detected</li> <li><code>narrow_region</code> – smaller
|
||
|
// administrative area, such as county, if detected</li>
|
||
|
// <li><code>sublocality</code> – used in Asian addresses to demark a
|
||
|
// district within a city, if detected</li></ul>
|
||
|
ADDRESS = 10;
|
||
|
|
||
|
// Date<br><br>
|
||
|
// The metadata identifies the components of the date:<ul>
|
||
|
// <li><code>year</code> – four digit year, if detected</li>
|
||
|
// <li><code>month</code> – two digit month number, if detected</li>
|
||
|
// <li><code>day</code> – two digit day number, if detected</li></ul>
|
||
|
DATE = 11;
|
||
|
|
||
|
// Number<br><br>
|
||
|
// The metadata is the number itself.
|
||
|
NUMBER = 12;
|
||
|
|
||
|
// Price<br><br>
|
||
|
// The metadata identifies the <code>value</code> and <code>currency</code>.
|
||
|
PRICE = 13;
|
||
|
}
|
||
|
|
||
|
// The representative name for the entity.
|
||
|
string name = 1;
|
||
|
|
||
|
// The entity type.
|
||
|
Type type = 2;
|
||
|
|
||
|
// Metadata associated with the entity.
|
||
|
//
|
||
|
// For most entity types, the metadata is a Wikipedia URL (`wikipedia_url`)
|
||
|
// and Knowledge Graph MID (`mid`), if they are available. For the metadata
|
||
|
// associated with other entity types, see the Type table below.
|
||
|
map<string, string> metadata = 3;
|
||
|
|
||
|
// The salience score associated with the entity in the [0, 1.0] range.
|
||
|
//
|
||
|
// The salience score for an entity provides information about the
|
||
|
// importance or centrality of that entity to the entire document text.
|
||
|
// Scores closer to 0 are less salient, while scores closer to 1.0 are highly
|
||
|
// salient.
|
||
|
float salience = 4;
|
||
|
|
||
|
// The mentions of this entity in the input document. The API currently
|
||
|
// supports proper noun mentions.
|
||
|
repeated EntityMention mentions = 5;
|
||
|
|
||
|
// For calls to [AnalyzeEntitySentiment][] or if
|
||
|
// [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entity_sentiment] is set to
|
||
|
// true, this field will contain the aggregate sentiment expressed for this
|
||
|
// entity in the provided document.
|
||
|
Sentiment sentiment = 6;
|
||
|
}
|
||
|
|
||
|
// Represents the text encoding that the caller uses to process the output.
|
||
|
// Providing an `EncodingType` is recommended because the API provides the
|
||
|
// beginning offsets for various outputs, such as tokens and mentions, and
|
||
|
// languages that natively use different text encodings may access offsets
|
||
|
// differently.
|
||
|
enum EncodingType {
|
||
|
// If `EncodingType` is not specified, encoding-dependent information (such as
|
||
|
// `begin_offset`) will be set at `-1`.
|
||
|
NONE = 0;
|
||
|
|
||
|
// Encoding-dependent information (such as `begin_offset`) is calculated based
|
||
|
// on the UTF-8 encoding of the input. C++ and Go are examples of languages
|
||
|
// that use this encoding natively.
|
||
|
UTF8 = 1;
|
||
|
|
||
|
// Encoding-dependent information (such as `begin_offset`) is calculated based
|
||
|
// on the UTF-16 encoding of the input. Java and JavaScript are examples of
|
||
|
// languages that use this encoding natively.
|
||
|
UTF16 = 2;
|
||
|
|
||
|
// Encoding-dependent information (such as `begin_offset`) is calculated based
|
||
|
// on the UTF-32 encoding of the input. Python is an example of a language
|
||
|
// that uses this encoding natively.
|
||
|
UTF32 = 3;
|
||
|
}
|
||
|
|
||
|
// Represents the smallest syntactic building block of the text.
|
||
|
message Token {
|
||
|
// The token text.
|
||
|
TextSpan text = 1;
|
||
|
|
||
|
// Parts of speech tag for this token.
|
||
|
PartOfSpeech part_of_speech = 2;
|
||
|
|
||
|
// Dependency tree parse for this token.
|
||
|
DependencyEdge dependency_edge = 3;
|
||
|
|
||
|
// [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token.
|
||
|
string lemma = 4;
|
||
|
}
|
||
|
|
||
|
// Represents the feeling associated with the entire text or entities in
|
||
|
// the text.
|
||
|
message Sentiment {
|
||
|
// A non-negative number in the [0, +inf) range, which represents
|
||
|
// the absolute magnitude of sentiment regardless of score (positive or
|
||
|
// negative).
|
||
|
float magnitude = 2;
|
||
|
|
||
|
// Sentiment score between -1.0 (negative sentiment) and 1.0
|
||
|
// (positive sentiment).
|
||
|
float score = 3;
|
||
|
}
|
||
|
|
||
|
// Represents part of speech information for a token. Parts of speech
|
||
|
// are as defined in
|
||
|
// http://www.lrec-conf.org/proceedings/lrec2012/pdf/274_Paper.pdf
|
||
|
message PartOfSpeech {
|
||
|
// The part of speech tags enum.
|
||
|
enum Tag {
|
||
|
// Unknown
|
||
|
UNKNOWN = 0;
|
||
|
|
||
|
// Adjective
|
||
|
ADJ = 1;
|
||
|
|
||
|
// Adposition (preposition and postposition)
|
||
|
ADP = 2;
|
||
|
|
||
|
// Adverb
|
||
|
ADV = 3;
|
||
|
|
||
|
// Conjunction
|
||
|
CONJ = 4;
|
||
|
|
||
|
// Determiner
|
||
|
DET = 5;
|
||
|
|
||
|
// Noun (common and proper)
|
||
|
NOUN = 6;
|
||
|
|
||
|
// Cardinal number
|
||
|
NUM = 7;
|
||
|
|
||
|
// Pronoun
|
||
|
PRON = 8;
|
||
|
|
||
|
// Particle or other function word
|
||
|
PRT = 9;
|
||
|
|
||
|
// Punctuation
|
||
|
PUNCT = 10;
|
||
|
|
||
|
// Verb (all tenses and modes)
|
||
|
VERB = 11;
|
||
|
|
||
|
// Other: foreign words, typos, abbreviations
|
||
|
X = 12;
|
||
|
|
||
|
// Affix
|
||
|
AFFIX = 13;
|
||
|
}
|
||
|
|
||
|
// The characteristic of a verb that expresses time flow during an event.
|
||
|
enum Aspect {
|
||
|
// Aspect is not applicable in the analyzed language or is not predicted.
|
||
|
ASPECT_UNKNOWN = 0;
|
||
|
|
||
|
// Perfective
|
||
|
PERFECTIVE = 1;
|
||
|
|
||
|
// Imperfective
|
||
|
IMPERFECTIVE = 2;
|
||
|
|
||
|
// Progressive
|
||
|
PROGRESSIVE = 3;
|
||
|
}
|
||
|
|
||
|
// The grammatical function performed by a noun or pronoun in a phrase,
|
||
|
// clause, or sentence. In some languages, other parts of speech, such as
|
||
|
// adjective and determiner, take case inflection in agreement with the noun.
|
||
|
enum Case {
|
||
|
// Case is not applicable in the analyzed language or is not predicted.
|
||
|
CASE_UNKNOWN = 0;
|
||
|
|
||
|
// Accusative
|
||
|
ACCUSATIVE = 1;
|
||
|
|
||
|
// Adverbial
|
||
|
ADVERBIAL = 2;
|
||
|
|
||
|
// Complementive
|
||
|
COMPLEMENTIVE = 3;
|
||
|
|
||
|
// Dative
|
||
|
DATIVE = 4;
|
||
|
|
||
|
// Genitive
|
||
|
GENITIVE = 5;
|
||
|
|
||
|
// Instrumental
|
||
|
INSTRUMENTAL = 6;
|
||
|
|
||
|
// Locative
|
||
|
LOCATIVE = 7;
|
||
|
|
||
|
// Nominative
|
||
|
NOMINATIVE = 8;
|
||
|
|
||
|
// Oblique
|
||
|
OBLIQUE = 9;
|
||
|
|
||
|
// Partitive
|
||
|
PARTITIVE = 10;
|
||
|
|
||
|
// Prepositional
|
||
|
PREPOSITIONAL = 11;
|
||
|
|
||
|
// Reflexive
|
||
|
REFLEXIVE_CASE = 12;
|
||
|
|
||
|
// Relative
|
||
|
RELATIVE_CASE = 13;
|
||
|
|
||
|
// Vocative
|
||
|
VOCATIVE = 14;
|
||
|
}
|
||
|
|
||
|
// Depending on the language, Form can be categorizing different forms of
|
||
|
// verbs, adjectives, adverbs, etc. For example, categorizing inflected
|
||
|
// endings of verbs and adjectives or distinguishing between short and long
|
||
|
// forms of adjectives and participles
|
||
|
enum Form {
|
||
|
// Form is not applicable in the analyzed language or is not predicted.
|
||
|
FORM_UNKNOWN = 0;
|
||
|
|
||
|
// Adnomial
|
||
|
ADNOMIAL = 1;
|
||
|
|
||
|
// Auxiliary
|
||
|
AUXILIARY = 2;
|
||
|
|
||
|
// Complementizer
|
||
|
COMPLEMENTIZER = 3;
|
||
|
|
||
|
// Final ending
|
||
|
FINAL_ENDING = 4;
|
||
|
|
||
|
// Gerund
|
||
|
GERUND = 5;
|
||
|
|
||
|
// Realis
|
||
|
REALIS = 6;
|
||
|
|
||
|
// Irrealis
|
||
|
IRREALIS = 7;
|
||
|
|
||
|
// Short form
|
||
|
SHORT = 8;
|
||
|
|
||
|
// Long form
|
||
|
LONG = 9;
|
||
|
|
||
|
// Order form
|
||
|
ORDER = 10;
|
||
|
|
||
|
// Specific form
|
||
|
SPECIFIC = 11;
|
||
|
}
|
||
|
|
||
|
// Gender classes of nouns reflected in the behaviour of associated words.
|
||
|
enum Gender {
|
||
|
// Gender is not applicable in the analyzed language or is not predicted.
|
||
|
GENDER_UNKNOWN = 0;
|
||
|
|
||
|
// Feminine
|
||
|
FEMININE = 1;
|
||
|
|
||
|
// Masculine
|
||
|
MASCULINE = 2;
|
||
|
|
||
|
// Neuter
|
||
|
NEUTER = 3;
|
||
|
}
|
||
|
|
||
|
// The grammatical feature of verbs, used for showing modality and attitude.
|
||
|
enum Mood {
|
||
|
// Mood is not applicable in the analyzed language or is not predicted.
|
||
|
MOOD_UNKNOWN = 0;
|
||
|
|
||
|
// Conditional
|
||
|
CONDITIONAL_MOOD = 1;
|
||
|
|
||
|
// Imperative
|
||
|
IMPERATIVE = 2;
|
||
|
|
||
|
// Indicative
|
||
|
INDICATIVE = 3;
|
||
|
|
||
|
// Interrogative
|
||
|
INTERROGATIVE = 4;
|
||
|
|
||
|
// Jussive
|
||
|
JUSSIVE = 5;
|
||
|
|
||
|
// Subjunctive
|
||
|
SUBJUNCTIVE = 6;
|
||
|
}
|
||
|
|
||
|
// Count distinctions.
|
||
|
enum Number {
|
||
|
// Number is not applicable in the analyzed language or is not predicted.
|
||
|
NUMBER_UNKNOWN = 0;
|
||
|
|
||
|
// Singular
|
||
|
SINGULAR = 1;
|
||
|
|
||
|
// Plural
|
||
|
PLURAL = 2;
|
||
|
|
||
|
// Dual
|
||
|
DUAL = 3;
|
||
|
}
|
||
|
|
||
|
// The distinction between the speaker, second person, third person, etc.
|
||
|
enum Person {
|
||
|
// Person is not applicable in the analyzed language or is not predicted.
|
||
|
PERSON_UNKNOWN = 0;
|
||
|
|
||
|
// First
|
||
|
FIRST = 1;
|
||
|
|
||
|
// Second
|
||
|
SECOND = 2;
|
||
|
|
||
|
// Third
|
||
|
THIRD = 3;
|
||
|
|
||
|
// Reflexive
|
||
|
REFLEXIVE_PERSON = 4;
|
||
|
}
|
||
|
|
||
|
// This category shows if the token is part of a proper name.
|
||
|
enum Proper {
|
||
|
// Proper is not applicable in the analyzed language or is not predicted.
|
||
|
PROPER_UNKNOWN = 0;
|
||
|
|
||
|
// Proper
|
||
|
PROPER = 1;
|
||
|
|
||
|
// Not proper
|
||
|
NOT_PROPER = 2;
|
||
|
}
|
||
|
|
||
|
// Reciprocal features of a pronoun.
|
||
|
enum Reciprocity {
|
||
|
// Reciprocity is not applicable in the analyzed language or is not
|
||
|
// predicted.
|
||
|
RECIPROCITY_UNKNOWN = 0;
|
||
|
|
||
|
// Reciprocal
|
||
|
RECIPROCAL = 1;
|
||
|
|
||
|
// Non-reciprocal
|
||
|
NON_RECIPROCAL = 2;
|
||
|
}
|
||
|
|
||
|
// Time reference.
|
||
|
enum Tense {
|
||
|
// Tense is not applicable in the analyzed language or is not predicted.
|
||
|
TENSE_UNKNOWN = 0;
|
||
|
|
||
|
// Conditional
|
||
|
CONDITIONAL_TENSE = 1;
|
||
|
|
||
|
// Future
|
||
|
FUTURE = 2;
|
||
|
|
||
|
// Past
|
||
|
PAST = 3;
|
||
|
|
||
|
// Present
|
||
|
PRESENT = 4;
|
||
|
|
||
|
// Imperfect
|
||
|
IMPERFECT = 5;
|
||
|
|
||
|
// Pluperfect
|
||
|
PLUPERFECT = 6;
|
||
|
}
|
||
|
|
||
|
// The relationship between the action that a verb expresses and the
|
||
|
// participants identified by its arguments.
|
||
|
enum Voice {
|
||
|
// Voice is not applicable in the analyzed language or is not predicted.
|
||
|
VOICE_UNKNOWN = 0;
|
||
|
|
||
|
// Active
|
||
|
ACTIVE = 1;
|
||
|
|
||
|
// Causative
|
||
|
CAUSATIVE = 2;
|
||
|
|
||
|
// Passive
|
||
|
PASSIVE = 3;
|
||
|
}
|
||
|
|
||
|
// The part of speech tag.
|
||
|
Tag tag = 1;
|
||
|
|
||
|
// The grammatical aspect.
|
||
|
Aspect aspect = 2;
|
||
|
|
||
|
// The grammatical case.
|
||
|
Case case = 3;
|
||
|
|
||
|
// The grammatical form.
|
||
|
Form form = 4;
|
||
|
|
||
|
// The grammatical gender.
|
||
|
Gender gender = 5;
|
||
|
|
||
|
// The grammatical mood.
|
||
|
Mood mood = 6;
|
||
|
|
||
|
// The grammatical number.
|
||
|
Number number = 7;
|
||
|
|
||
|
// The grammatical person.
|
||
|
Person person = 8;
|
||
|
|
||
|
// The grammatical properness.
|
||
|
Proper proper = 9;
|
||
|
|
||
|
// The grammatical reciprocity.
|
||
|
Reciprocity reciprocity = 10;
|
||
|
|
||
|
// The grammatical tense.
|
||
|
Tense tense = 11;
|
||
|
|
||
|
// The grammatical voice.
|
||
|
Voice voice = 12;
|
||
|
}
|
||
|
|
||
|
// Represents dependency parse tree information for a token. (For more
|
||
|
// information on dependency labels, see
|
||
|
// http://www.aclweb.org/anthology/P13-2017
|
||
|
message DependencyEdge {
|
||
|
// The parse label enum for the token.
|
||
|
enum Label {
|
||
|
// Unknown
|
||
|
UNKNOWN = 0;
|
||
|
|
||
|
// Abbreviation modifier
|
||
|
ABBREV = 1;
|
||
|
|
||
|
// Adjectival complement
|
||
|
ACOMP = 2;
|
||
|
|
||
|
// Adverbial clause modifier
|
||
|
ADVCL = 3;
|
||
|
|
||
|
// Adverbial modifier
|
||
|
ADVMOD = 4;
|
||
|
|
||
|
// Adjectival modifier of an NP
|
||
|
AMOD = 5;
|
||
|
|
||
|
// Appositional modifier of an NP
|
||
|
APPOS = 6;
|
||
|
|
||
|
// Attribute dependent of a copular verb
|
||
|
ATTR = 7;
|
||
|
|
||
|
// Auxiliary (non-main) verb
|
||
|
AUX = 8;
|
||
|
|
||
|
// Passive auxiliary
|
||
|
AUXPASS = 9;
|
||
|
|
||
|
// Coordinating conjunction
|
||
|
CC = 10;
|
||
|
|
||
|
// Clausal complement of a verb or adjective
|
||
|
CCOMP = 11;
|
||
|
|
||
|
// Conjunct
|
||
|
CONJ = 12;
|
||
|
|
||
|
// Clausal subject
|
||
|
CSUBJ = 13;
|
||
|
|
||
|
// Clausal passive subject
|
||
|
CSUBJPASS = 14;
|
||
|
|
||
|
// Dependency (unable to determine)
|
||
|
DEP = 15;
|
||
|
|
||
|
// Determiner
|
||
|
DET = 16;
|
||
|
|
||
|
// Discourse
|
||
|
DISCOURSE = 17;
|
||
|
|
||
|
// Direct object
|
||
|
DOBJ = 18;
|
||
|
|
||
|
// Expletive
|
||
|
EXPL = 19;
|
||
|
|
||
|
// Goes with (part of a word in a text not well edited)
|
||
|
GOESWITH = 20;
|
||
|
|
||
|
// Indirect object
|
||
|
IOBJ = 21;
|
||
|
|
||
|
// Marker (word introducing a subordinate clause)
|
||
|
MARK = 22;
|
||
|
|
||
|
// Multi-word expression
|
||
|
MWE = 23;
|
||
|
|
||
|
// Multi-word verbal expression
|
||
|
MWV = 24;
|
||
|
|
||
|
// Negation modifier
|
||
|
NEG = 25;
|
||
|
|
||
|
// Noun compound modifier
|
||
|
NN = 26;
|
||
|
|
||
|
// Noun phrase used as an adverbial modifier
|
||
|
NPADVMOD = 27;
|
||
|
|
||
|
// Nominal subject
|
||
|
NSUBJ = 28;
|
||
|
|
||
|
// Passive nominal subject
|
||
|
NSUBJPASS = 29;
|
||
|
|
||
|
// Numeric modifier of a noun
|
||
|
NUM = 30;
|
||
|
|
||
|
// Element of compound number
|
||
|
NUMBER = 31;
|
||
|
|
||
|
// Punctuation mark
|
||
|
P = 32;
|
||
|
|
||
|
// Parataxis relation
|
||
|
PARATAXIS = 33;
|
||
|
|
||
|
// Participial modifier
|
||
|
PARTMOD = 34;
|
||
|
|
||
|
// The complement of a preposition is a clause
|
||
|
PCOMP = 35;
|
||
|
|
||
|
// Object of a preposition
|
||
|
POBJ = 36;
|
||
|
|
||
|
// Possession modifier
|
||
|
POSS = 37;
|
||
|
|
||
|
// Postverbal negative particle
|
||
|
POSTNEG = 38;
|
||
|
|
||
|
// Predicate complement
|
||
|
PRECOMP = 39;
|
||
|
|
||
|
// Preconjunt
|
||
|
PRECONJ = 40;
|
||
|
|
||
|
// Predeterminer
|
||
|
PREDET = 41;
|
||
|
|
||
|
// Prefix
|
||
|
PREF = 42;
|
||
|
|
||
|
// Prepositional modifier
|
||
|
PREP = 43;
|
||
|
|
||
|
// The relationship between a verb and verbal morpheme
|
||
|
PRONL = 44;
|
||
|
|
||
|
// Particle
|
||
|
PRT = 45;
|
||
|
|
||
|
// Associative or possessive marker
|
||
|
PS = 46;
|
||
|
|
||
|
// Quantifier phrase modifier
|
||
|
QUANTMOD = 47;
|
||
|
|
||
|
// Relative clause modifier
|
||
|
RCMOD = 48;
|
||
|
|
||
|
// Complementizer in relative clause
|
||
|
RCMODREL = 49;
|
||
|
|
||
|
// Ellipsis without a preceding predicate
|
||
|
RDROP = 50;
|
||
|
|
||
|
// Referent
|
||
|
REF = 51;
|
||
|
|
||
|
// Remnant
|
||
|
REMNANT = 52;
|
||
|
|
||
|
// Reparandum
|
||
|
REPARANDUM = 53;
|
||
|
|
||
|
// Root
|
||
|
ROOT = 54;
|
||
|
|
||
|
// Suffix specifying a unit of number
|
||
|
SNUM = 55;
|
||
|
|
||
|
// Suffix
|
||
|
SUFF = 56;
|
||
|
|
||
|
// Temporal modifier
|
||
|
TMOD = 57;
|
||
|
|
||
|
// Topic marker
|
||
|
TOPIC = 58;
|
||
|
|
||
|
// Clause headed by an infinite form of the verb that modifies a noun
|
||
|
VMOD = 59;
|
||
|
|
||
|
// Vocative
|
||
|
VOCATIVE = 60;
|
||
|
|
||
|
// Open clausal complement
|
||
|
XCOMP = 61;
|
||
|
|
||
|
// Name suffix
|
||
|
SUFFIX = 62;
|
||
|
|
||
|
// Name title
|
||
|
TITLE = 63;
|
||
|
|
||
|
// Adverbial phrase modifier
|
||
|
ADVPHMOD = 64;
|
||
|
|
||
|
// Causative auxiliary
|
||
|
AUXCAUS = 65;
|
||
|
|
||
|
// Helper auxiliary
|
||
|
AUXVV = 66;
|
||
|
|
||
|
// Rentaishi (Prenominal modifier)
|
||
|
DTMOD = 67;
|
||
|
|
||
|
// Foreign words
|
||
|
FOREIGN = 68;
|
||
|
|
||
|
// Keyword
|
||
|
KW = 69;
|
||
|
|
||
|
// List for chains of comparable items
|
||
|
LIST = 70;
|
||
|
|
||
|
// Nominalized clause
|
||
|
NOMC = 71;
|
||
|
|
||
|
// Nominalized clausal subject
|
||
|
NOMCSUBJ = 72;
|
||
|
|
||
|
// Nominalized clausal passive
|
||
|
NOMCSUBJPASS = 73;
|
||
|
|
||
|
// Compound of numeric modifier
|
||
|
NUMC = 74;
|
||
|
|
||
|
// Copula
|
||
|
COP = 75;
|
||
|
|
||
|
// Dislocated relation (for fronted/topicalized elements)
|
||
|
DISLOCATED = 76;
|
||
|
|
||
|
// Aspect marker
|
||
|
ASP = 77;
|
||
|
|
||
|
// Genitive modifier
|
||
|
GMOD = 78;
|
||
|
|
||
|
// Genitive object
|
||
|
GOBJ = 79;
|
||
|
|
||
|
// Infinitival modifier
|
||
|
INFMOD = 80;
|
||
|
|
||
|
// Measure
|
||
|
MES = 81;
|
||
|
|
||
|
// Nominal complement of a noun
|
||
|
NCOMP = 82;
|
||
|
}
|
||
|
|
||
|
// Represents the head of this token in the dependency tree.
|
||
|
// This is the index of the token which has an arc going to this token.
|
||
|
// The index is the position of the token in the array of tokens returned
|
||
|
// by the API method. If this token is a root token, then the
|
||
|
// `head_token_index` is its own index.
|
||
|
int32 head_token_index = 1;
|
||
|
|
||
|
// The parse label for the token.
|
||
|
Label label = 2;
|
||
|
}
|
||
|
|
||
|
// Represents a mention for an entity in the text. Currently, proper noun
|
||
|
// mentions are supported.
|
||
|
message EntityMention {
|
||
|
// The supported types of mentions.
|
||
|
enum Type {
|
||
|
// Unknown
|
||
|
TYPE_UNKNOWN = 0;
|
||
|
|
||
|
// Proper name
|
||
|
PROPER = 1;
|
||
|
|
||
|
// Common noun (or noun compound)
|
||
|
COMMON = 2;
|
||
|
}
|
||
|
|
||
|
// The mention text.
|
||
|
TextSpan text = 1;
|
||
|
|
||
|
// The type of the entity mention.
|
||
|
Type type = 2;
|
||
|
|
||
|
// For calls to [AnalyzeEntitySentiment][] or if
|
||
|
// [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entity_sentiment] is set to
|
||
|
// true, this field will contain the sentiment expressed for this mention of
|
||
|
// the entity in the provided document.
|
||
|
Sentiment sentiment = 3;
|
||
|
}
|
||
|
|
||
|
// Represents an output piece of text.
|
||
|
message TextSpan {
|
||
|
// The content of the output text.
|
||
|
string content = 1;
|
||
|
|
||
|
// The API calculates the beginning offset of the content in the original
|
||
|
// document according to the [EncodingType][google.cloud.language.v1.EncodingType] specified in the API request.
|
||
|
int32 begin_offset = 2;
|
||
|
}
|
||
|
|
||
|
// Represents a category returned from the text classifier.
|
||
|
message ClassificationCategory {
|
||
|
// The name of the category representing the document, from the [predefined
|
||
|
// taxonomy](/natural-language/docs/categories).
|
||
|
string name = 1;
|
||
|
|
||
|
// The classifier's confidence of the category. Number represents how certain
|
||
|
// the classifier is that this category represents the given text.
|
||
|
float confidence = 2;
|
||
|
}
|
||
|
|
||
|
// The sentiment analysis request message.
|
||
|
message AnalyzeSentimentRequest {
|
||
|
// Input document.
|
||
|
Document document = 1 [(google.api.field_behavior) = REQUIRED];
|
||
|
|
||
|
// The encoding type used by the API to calculate sentence offsets.
|
||
|
EncodingType encoding_type = 2;
|
||
|
}
|
||
|
|
||
|
// The sentiment analysis response message.
|
||
|
message AnalyzeSentimentResponse {
|
||
|
// The overall sentiment of the input document.
|
||
|
Sentiment document_sentiment = 1;
|
||
|
|
||
|
// The language of the text, which will be the same as the language specified
|
||
|
// in the request or, if not specified, the automatically-detected language.
|
||
|
// See [Document.language][google.cloud.language.v1.Document.language] field for more details.
|
||
|
string language = 2;
|
||
|
|
||
|
// The sentiment for all the sentences in the document.
|
||
|
repeated Sentence sentences = 3;
|
||
|
}
|
||
|
|
||
|
// The entity-level sentiment analysis request message.
|
||
|
message AnalyzeEntitySentimentRequest {
|
||
|
// Input document.
|
||
|
Document document = 1 [(google.api.field_behavior) = REQUIRED];
|
||
|
|
||
|
// The encoding type used by the API to calculate offsets.
|
||
|
EncodingType encoding_type = 2;
|
||
|
}
|
||
|
|
||
|
// The entity-level sentiment analysis response message.
|
||
|
message AnalyzeEntitySentimentResponse {
|
||
|
// The recognized entities in the input document with associated sentiments.
|
||
|
repeated Entity entities = 1;
|
||
|
|
||
|
// The language of the text, which will be the same as the language specified
|
||
|
// in the request or, if not specified, the automatically-detected language.
|
||
|
// See [Document.language][google.cloud.language.v1.Document.language] field for more details.
|
||
|
string language = 2;
|
||
|
}
|
||
|
|
||
|
// The entity analysis request message.
|
||
|
message AnalyzeEntitiesRequest {
|
||
|
// Input document.
|
||
|
Document document = 1 [(google.api.field_behavior) = REQUIRED];
|
||
|
|
||
|
// The encoding type used by the API to calculate offsets.
|
||
|
EncodingType encoding_type = 2;
|
||
|
}
|
||
|
|
||
|
// The entity analysis response message.
|
||
|
message AnalyzeEntitiesResponse {
|
||
|
// The recognized entities in the input document.
|
||
|
repeated Entity entities = 1;
|
||
|
|
||
|
// The language of the text, which will be the same as the language specified
|
||
|
// in the request or, if not specified, the automatically-detected language.
|
||
|
// See [Document.language][google.cloud.language.v1.Document.language] field for more details.
|
||
|
string language = 2;
|
||
|
}
|
||
|
|
||
|
// The syntax analysis request message.
|
||
|
message AnalyzeSyntaxRequest {
|
||
|
// Input document.
|
||
|
Document document = 1 [(google.api.field_behavior) = REQUIRED];
|
||
|
|
||
|
// The encoding type used by the API to calculate offsets.
|
||
|
EncodingType encoding_type = 2;
|
||
|
}
|
||
|
|
||
|
// The syntax analysis response message.
|
||
|
message AnalyzeSyntaxResponse {
|
||
|
// Sentences in the input document.
|
||
|
repeated Sentence sentences = 1;
|
||
|
|
||
|
// Tokens, along with their syntactic information, in the input document.
|
||
|
repeated Token tokens = 2;
|
||
|
|
||
|
// The language of the text, which will be the same as the language specified
|
||
|
// in the request or, if not specified, the automatically-detected language.
|
||
|
// See [Document.language][google.cloud.language.v1.Document.language] field for more details.
|
||
|
string language = 3;
|
||
|
}
|
||
|
|
||
|
// The document classification request message.
|
||
|
message ClassifyTextRequest {
|
||
|
// Input document.
|
||
|
Document document = 1 [(google.api.field_behavior) = REQUIRED];
|
||
|
}
|
||
|
|
||
|
// The document classification response message.
|
||
|
message ClassifyTextResponse {
|
||
|
// Categories representing the input document.
|
||
|
repeated ClassificationCategory categories = 1;
|
||
|
}
|
||
|
|
||
|
// The request message for the text annotation API, which can perform multiple
|
||
|
// analysis types (sentiment, entities, and syntax) in one call.
|
||
|
message AnnotateTextRequest {
|
||
|
// All available features for sentiment, syntax, and semantic analysis.
|
||
|
// Setting each one to true will enable that specific analysis for the input.
|
||
|
message Features {
|
||
|
// Extract syntax information.
|
||
|
bool extract_syntax = 1;
|
||
|
|
||
|
// Extract entities.
|
||
|
bool extract_entities = 2;
|
||
|
|
||
|
// Extract document-level sentiment.
|
||
|
bool extract_document_sentiment = 3;
|
||
|
|
||
|
// Extract entities and their associated sentiment.
|
||
|
bool extract_entity_sentiment = 4;
|
||
|
|
||
|
// Classify the full document into categories.
|
||
|
bool classify_text = 6;
|
||
|
}
|
||
|
|
||
|
// Input document.
|
||
|
Document document = 1 [(google.api.field_behavior) = REQUIRED];
|
||
|
|
||
|
// The enabled features.
|
||
|
Features features = 2 [(google.api.field_behavior) = REQUIRED];
|
||
|
|
||
|
// The encoding type used by the API to calculate offsets.
|
||
|
EncodingType encoding_type = 3;
|
||
|
}
|
||
|
|
||
|
// The text annotations response message.
|
||
|
message AnnotateTextResponse {
|
||
|
// Sentences in the input document. Populated if the user enables
|
||
|
// [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1.AnnotateTextRequest.Features.extract_syntax].
|
||
|
repeated Sentence sentences = 1;
|
||
|
|
||
|
// Tokens, along with their syntactic information, in the input document.
|
||
|
// Populated if the user enables
|
||
|
// [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1.AnnotateTextRequest.Features.extract_syntax].
|
||
|
repeated Token tokens = 2;
|
||
|
|
||
|
// Entities, along with their semantic information, in the input document.
|
||
|
// Populated if the user enables
|
||
|
// [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entities].
|
||
|
repeated Entity entities = 3;
|
||
|
|
||
|
// The overall sentiment for the document. Populated if the user enables
|
||
|
// [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_document_sentiment].
|
||
|
Sentiment document_sentiment = 4;
|
||
|
|
||
|
// The language of the text, which will be the same as the language specified
|
||
|
// in the request or, if not specified, the automatically-detected language.
|
||
|
// See [Document.language][google.cloud.language.v1.Document.language] field for more details.
|
||
|
string language = 5;
|
||
|
|
||
|
// Categories identified in the input document.
|
||
|
repeated ClassificationCategory categories = 6;
|
||
|
}
|