You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
913 lines
30 KiB
913 lines
30 KiB
// Copyright 2019 Google LLC.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
|
|
syntax = "proto3";
|
|
|
|
package google.cloud.vision.v1;
|
|
|
|
import "google/api/annotations.proto";
|
|
import "google/cloud/vision/v1/geometry.proto";
|
|
import "google/cloud/vision/v1/product_search.proto";
|
|
import "google/cloud/vision/v1/text_annotation.proto";
|
|
import "google/cloud/vision/v1/web_detection.proto";
|
|
import "google/longrunning/operations.proto";
|
|
import "google/protobuf/field_mask.proto";
|
|
import "google/protobuf/timestamp.proto";
|
|
import "google/rpc/status.proto";
|
|
import "google/type/color.proto";
|
|
import "google/type/latlng.proto";
|
|
|
|
option cc_enable_arenas = true;
|
|
option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1;vision";
|
|
option java_multiple_files = true;
|
|
option java_outer_classname = "ImageAnnotatorProto";
|
|
option java_package = "com.google.cloud.vision.v1";
|
|
option objc_class_prefix = "GCVN";
|
|
|
|
// Service that performs Google Cloud Vision API detection tasks over client
|
|
// images, such as face, landmark, logo, label, and text detection. The
|
|
// ImageAnnotator service returns detected entities from the images.
|
|
service ImageAnnotator {
|
|
// Run image detection and annotation for a batch of images.
|
|
rpc BatchAnnotateImages(BatchAnnotateImagesRequest)
|
|
returns (BatchAnnotateImagesResponse) {
|
|
option (google.api.http) = {
|
|
post: "/v1/images:annotate"
|
|
body: "*"
|
|
};
|
|
}
|
|
|
|
// Service that performs image detection and annotation for a batch of files.
|
|
// Now only "application/pdf", "image/tiff" and "image/gif" are supported.
|
|
//
|
|
// This service will extract at most 5 (customers can specify which 5 in
|
|
// AnnotateFileRequest.pages) frames (gif) or pages (pdf or tiff) from each
|
|
// file provided and perform detection and annotation for each image
|
|
// extracted.
|
|
rpc BatchAnnotateFiles(BatchAnnotateFilesRequest)
|
|
returns (BatchAnnotateFilesResponse) {
|
|
option (google.api.http) = {
|
|
post: "/v1/files:annotate"
|
|
body: "*"
|
|
};
|
|
}
|
|
|
|
// Run asynchronous image detection and annotation for a list of images.
|
|
//
|
|
// Progress and results can be retrieved through the
|
|
// `google.longrunning.Operations` interface.
|
|
// `Operation.metadata` contains `OperationMetadata` (metadata).
|
|
// `Operation.response` contains `AsyncBatchAnnotateImagesResponse` (results).
|
|
//
|
|
// This service will write image annotation outputs to json files in customer
|
|
// GCS bucket, each json file containing BatchAnnotateImagesResponse proto.
|
|
rpc AsyncBatchAnnotateImages(AsyncBatchAnnotateImagesRequest)
|
|
returns (google.longrunning.Operation) {
|
|
option (google.api.http) = {
|
|
post: "/v1/images:asyncBatchAnnotate"
|
|
body: "*"
|
|
};
|
|
}
|
|
|
|
// Run asynchronous image detection and annotation for a list of generic
|
|
// files, such as PDF files, which may contain multiple pages and multiple
|
|
// images per page. Progress and results can be retrieved through the
|
|
// `google.longrunning.Operations` interface.
|
|
// `Operation.metadata` contains `OperationMetadata` (metadata).
|
|
// `Operation.response` contains `AsyncBatchAnnotateFilesResponse` (results).
|
|
rpc AsyncBatchAnnotateFiles(AsyncBatchAnnotateFilesRequest)
|
|
returns (google.longrunning.Operation) {
|
|
option (google.api.http) = {
|
|
post: "/v1/files:asyncBatchAnnotate"
|
|
body: "*"
|
|
};
|
|
}
|
|
}
|
|
|
|
// The type of Google Cloud Vision API detection to perform, and the maximum
|
|
// number of results to return for that type. Multiple `Feature` objects can
|
|
// be specified in the `features` list.
|
|
message Feature {
|
|
// Type of Google Cloud Vision API feature to be extracted.
|
|
enum Type {
|
|
// Unspecified feature type.
|
|
TYPE_UNSPECIFIED = 0;
|
|
|
|
// Run face detection.
|
|
FACE_DETECTION = 1;
|
|
|
|
// Run landmark detection.
|
|
LANDMARK_DETECTION = 2;
|
|
|
|
// Run logo detection.
|
|
LOGO_DETECTION = 3;
|
|
|
|
// Run label detection.
|
|
LABEL_DETECTION = 4;
|
|
|
|
// Run text detection / optical character recognition (OCR). Text detection
|
|
// is optimized for areas of text within a larger image; if the image is
|
|
// a document, use `DOCUMENT_TEXT_DETECTION` instead.
|
|
TEXT_DETECTION = 5;
|
|
|
|
// Run dense text document OCR. Takes precedence when both
|
|
// `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` are present.
|
|
DOCUMENT_TEXT_DETECTION = 11;
|
|
|
|
// Run Safe Search to detect potentially unsafe
|
|
// or undesirable content.
|
|
SAFE_SEARCH_DETECTION = 6;
|
|
|
|
// Compute a set of image properties, such as the
|
|
// image's dominant colors.
|
|
IMAGE_PROPERTIES = 7;
|
|
|
|
// Run crop hints.
|
|
CROP_HINTS = 9;
|
|
|
|
// Run web detection.
|
|
WEB_DETECTION = 10;
|
|
|
|
// Run Product Search.
|
|
PRODUCT_SEARCH = 12;
|
|
|
|
// Run localizer for object detection.
|
|
OBJECT_LOCALIZATION = 19;
|
|
}
|
|
|
|
// The feature type.
|
|
Type type = 1;
|
|
|
|
// Maximum number of results of this type. Does not apply to
|
|
// `TEXT_DETECTION`, `DOCUMENT_TEXT_DETECTION`, or `CROP_HINTS`.
|
|
int32 max_results = 2;
|
|
|
|
// Model to use for the feature.
|
|
// Supported values: "builtin/stable" (the default if unset) and
|
|
// "builtin/latest".
|
|
string model = 3;
|
|
}
|
|
|
|
// A bucketized representation of likelihood, which is intended to give clients
|
|
// highly stable results across model upgrades.
|
|
enum Likelihood {
|
|
// Unknown likelihood.
|
|
UNKNOWN = 0;
|
|
|
|
// It is very unlikely that the image belongs to the specified vertical.
|
|
VERY_UNLIKELY = 1;
|
|
|
|
// It is unlikely that the image belongs to the specified vertical.
|
|
UNLIKELY = 2;
|
|
|
|
// It is possible that the image belongs to the specified vertical.
|
|
POSSIBLE = 3;
|
|
|
|
// It is likely that the image belongs to the specified vertical.
|
|
LIKELY = 4;
|
|
|
|
// It is very likely that the image belongs to the specified vertical.
|
|
VERY_LIKELY = 5;
|
|
}
|
|
|
|
// External image source (Google Cloud Storage or web URL image location).
|
|
message ImageSource {
|
|
// **Use `image_uri` instead.**
|
|
//
|
|
// The Google Cloud Storage URI of the form
|
|
// `gs://bucket_name/object_name`. Object versioning is not supported. See
|
|
// [Google Cloud Storage Request
|
|
// URIs](https://cloud.google.com/storage/docs/reference-uris) for more info.
|
|
string gcs_image_uri = 1;
|
|
|
|
// The URI of the source image. Can be either:
|
|
//
|
|
// 1. A Google Cloud Storage URI of the form
|
|
// `gs://bucket_name/object_name`. Object versioning is not supported. See
|
|
// [Google Cloud Storage Request
|
|
// URIs](https://cloud.google.com/storage/docs/reference-uris) for more
|
|
// info.
|
|
//
|
|
// 2. A publicly-accessible image HTTP/HTTPS URL. When fetching images from
|
|
// HTTP/HTTPS URLs, Google cannot guarantee that the request will be
|
|
// completed. Your request may fail if the specified host denies the
|
|
// request (e.g. due to request throttling or DOS prevention), or if Google
|
|
// throttles requests to the site for abuse prevention. You should not
|
|
// depend on externally-hosted images for production applications.
|
|
//
|
|
// When both `gcs_image_uri` and `image_uri` are specified, `image_uri` takes
|
|
// precedence.
|
|
string image_uri = 2;
|
|
}
|
|
|
|
// Client image to perform Google Cloud Vision API tasks over.
|
|
message Image {
|
|
// Image content, represented as a stream of bytes.
|
|
// Note: As with all `bytes` fields, protobuffers use a pure binary
|
|
// representation, whereas JSON representations use base64.
|
|
bytes content = 1;
|
|
|
|
// Google Cloud Storage image location, or publicly-accessible image
|
|
// URL. If both `content` and `source` are provided for an image, `content`
|
|
// takes precedence and is used to perform the image annotation request.
|
|
ImageSource source = 2;
|
|
}
|
|
|
|
// A face annotation object contains the results of face detection.
|
|
message FaceAnnotation {
|
|
// A face-specific landmark (for example, a face feature).
|
|
message Landmark {
|
|
// Face landmark (feature) type.
|
|
// Left and right are defined from the vantage of the viewer of the image
|
|
// without considering mirror projections typical of photos. So, `LEFT_EYE`,
|
|
// typically, is the person's right eye.
|
|
enum Type {
|
|
// Unknown face landmark detected. Should not be filled.
|
|
UNKNOWN_LANDMARK = 0;
|
|
|
|
// Left eye.
|
|
LEFT_EYE = 1;
|
|
|
|
// Right eye.
|
|
RIGHT_EYE = 2;
|
|
|
|
// Left of left eyebrow.
|
|
LEFT_OF_LEFT_EYEBROW = 3;
|
|
|
|
// Right of left eyebrow.
|
|
RIGHT_OF_LEFT_EYEBROW = 4;
|
|
|
|
// Left of right eyebrow.
|
|
LEFT_OF_RIGHT_EYEBROW = 5;
|
|
|
|
// Right of right eyebrow.
|
|
RIGHT_OF_RIGHT_EYEBROW = 6;
|
|
|
|
// Midpoint between eyes.
|
|
MIDPOINT_BETWEEN_EYES = 7;
|
|
|
|
// Nose tip.
|
|
NOSE_TIP = 8;
|
|
|
|
// Upper lip.
|
|
UPPER_LIP = 9;
|
|
|
|
// Lower lip.
|
|
LOWER_LIP = 10;
|
|
|
|
// Mouth left.
|
|
MOUTH_LEFT = 11;
|
|
|
|
// Mouth right.
|
|
MOUTH_RIGHT = 12;
|
|
|
|
// Mouth center.
|
|
MOUTH_CENTER = 13;
|
|
|
|
// Nose, bottom right.
|
|
NOSE_BOTTOM_RIGHT = 14;
|
|
|
|
// Nose, bottom left.
|
|
NOSE_BOTTOM_LEFT = 15;
|
|
|
|
// Nose, bottom center.
|
|
NOSE_BOTTOM_CENTER = 16;
|
|
|
|
// Left eye, top boundary.
|
|
LEFT_EYE_TOP_BOUNDARY = 17;
|
|
|
|
// Left eye, right corner.
|
|
LEFT_EYE_RIGHT_CORNER = 18;
|
|
|
|
// Left eye, bottom boundary.
|
|
LEFT_EYE_BOTTOM_BOUNDARY = 19;
|
|
|
|
// Left eye, left corner.
|
|
LEFT_EYE_LEFT_CORNER = 20;
|
|
|
|
// Right eye, top boundary.
|
|
RIGHT_EYE_TOP_BOUNDARY = 21;
|
|
|
|
// Right eye, right corner.
|
|
RIGHT_EYE_RIGHT_CORNER = 22;
|
|
|
|
// Right eye, bottom boundary.
|
|
RIGHT_EYE_BOTTOM_BOUNDARY = 23;
|
|
|
|
// Right eye, left corner.
|
|
RIGHT_EYE_LEFT_CORNER = 24;
|
|
|
|
// Left eyebrow, upper midpoint.
|
|
LEFT_EYEBROW_UPPER_MIDPOINT = 25;
|
|
|
|
// Right eyebrow, upper midpoint.
|
|
RIGHT_EYEBROW_UPPER_MIDPOINT = 26;
|
|
|
|
// Left ear tragion.
|
|
LEFT_EAR_TRAGION = 27;
|
|
|
|
// Right ear tragion.
|
|
RIGHT_EAR_TRAGION = 28;
|
|
|
|
// Left eye pupil.
|
|
LEFT_EYE_PUPIL = 29;
|
|
|
|
// Right eye pupil.
|
|
RIGHT_EYE_PUPIL = 30;
|
|
|
|
// Forehead glabella.
|
|
FOREHEAD_GLABELLA = 31;
|
|
|
|
// Chin gnathion.
|
|
CHIN_GNATHION = 32;
|
|
|
|
// Chin left gonion.
|
|
CHIN_LEFT_GONION = 33;
|
|
|
|
// Chin right gonion.
|
|
CHIN_RIGHT_GONION = 34;
|
|
}
|
|
|
|
// Face landmark type.
|
|
Type type = 3;
|
|
|
|
// Face landmark position.
|
|
Position position = 4;
|
|
}
|
|
|
|
// The bounding polygon around the face. The coordinates of the bounding box
|
|
// are in the original image's scale.
|
|
// The bounding box is computed to "frame" the face in accordance with human
|
|
// expectations. It is based on the landmarker results.
|
|
// Note that one or more x and/or y coordinates may not be generated in the
|
|
// `BoundingPoly` (the polygon will be unbounded) if only a partial face
|
|
// appears in the image to be annotated.
|
|
BoundingPoly bounding_poly = 1;
|
|
|
|
// The `fd_bounding_poly` bounding polygon is tighter than the
|
|
// `boundingPoly`, and encloses only the skin part of the face. Typically, it
|
|
// is used to eliminate the face from any image analysis that detects the
|
|
// "amount of skin" visible in an image. It is not based on the
|
|
// landmarker results, only on the initial face detection, hence
|
|
// the <code>fd</code> (face detection) prefix.
|
|
BoundingPoly fd_bounding_poly = 2;
|
|
|
|
// Detected face landmarks.
|
|
repeated Landmark landmarks = 3;
|
|
|
|
// Roll angle, which indicates the amount of clockwise/anti-clockwise rotation
|
|
// of the face relative to the image vertical about the axis perpendicular to
|
|
// the face. Range [-180,180].
|
|
float roll_angle = 4;
|
|
|
|
// Yaw angle, which indicates the leftward/rightward angle that the face is
|
|
// pointing relative to the vertical plane perpendicular to the image. Range
|
|
// [-180,180].
|
|
float pan_angle = 5;
|
|
|
|
// Pitch angle, which indicates the upwards/downwards angle that the face is
|
|
// pointing relative to the image's horizontal plane. Range [-180,180].
|
|
float tilt_angle = 6;
|
|
|
|
// Detection confidence. Range [0, 1].
|
|
float detection_confidence = 7;
|
|
|
|
// Face landmarking confidence. Range [0, 1].
|
|
float landmarking_confidence = 8;
|
|
|
|
// Joy likelihood.
|
|
Likelihood joy_likelihood = 9;
|
|
|
|
// Sorrow likelihood.
|
|
Likelihood sorrow_likelihood = 10;
|
|
|
|
// Anger likelihood.
|
|
Likelihood anger_likelihood = 11;
|
|
|
|
// Surprise likelihood.
|
|
Likelihood surprise_likelihood = 12;
|
|
|
|
// Under-exposed likelihood.
|
|
Likelihood under_exposed_likelihood = 13;
|
|
|
|
// Blurred likelihood.
|
|
Likelihood blurred_likelihood = 14;
|
|
|
|
// Headwear likelihood.
|
|
Likelihood headwear_likelihood = 15;
|
|
}
|
|
|
|
// Detected entity location information.
|
|
message LocationInfo {
|
|
// lat/long location coordinates.
|
|
google.type.LatLng lat_lng = 1;
|
|
}
|
|
|
|
// A `Property` consists of a user-supplied name/value pair.
|
|
message Property {
|
|
// Name of the property.
|
|
string name = 1;
|
|
|
|
// Value of the property.
|
|
string value = 2;
|
|
|
|
// Value of numeric properties.
|
|
uint64 uint64_value = 3;
|
|
}
|
|
|
|
// Set of detected entity features.
|
|
message EntityAnnotation {
|
|
// Opaque entity ID. Some IDs may be available in
|
|
// [Google Knowledge Graph Search
|
|
// API](https://developers.google.com/knowledge-graph/).
|
|
string mid = 1;
|
|
|
|
// The language code for the locale in which the entity textual
|
|
// `description` is expressed.
|
|
string locale = 2;
|
|
|
|
// Entity textual description, expressed in its `locale` language.
|
|
string description = 3;
|
|
|
|
// Overall score of the result. Range [0, 1].
|
|
float score = 4;
|
|
|
|
// **Deprecated. Use `score` instead.**
|
|
// The accuracy of the entity detection in an image.
|
|
// For example, for an image in which the "Eiffel Tower" entity is detected,
|
|
// this field represents the confidence that there is a tower in the query
|
|
// image. Range [0, 1].
|
|
float confidence = 5 [deprecated = true];
|
|
|
|
// The relevancy of the ICA (Image Content Annotation) label to the
|
|
// image. For example, the relevancy of "tower" is likely higher to an image
|
|
// containing the detected "Eiffel Tower" than to an image containing a
|
|
// detected distant towering building, even though the confidence that
|
|
// there is a tower in each image may be the same. Range [0, 1].
|
|
float topicality = 6;
|
|
|
|
// Image region to which this entity belongs. Not produced
|
|
// for `LABEL_DETECTION` features.
|
|
BoundingPoly bounding_poly = 7;
|
|
|
|
// The location information for the detected entity. Multiple
|
|
// `LocationInfo` elements can be present because one location may
|
|
// indicate the location of the scene in the image, and another location
|
|
// may indicate the location of the place where the image was taken.
|
|
// Location information is usually present for landmarks.
|
|
repeated LocationInfo locations = 8;
|
|
|
|
// Some entities may have optional user-supplied `Property` (name/value)
|
|
// fields, such a score or string that qualifies the entity.
|
|
repeated Property properties = 9;
|
|
}
|
|
|
|
// Set of detected objects with bounding boxes.
|
|
message LocalizedObjectAnnotation {
|
|
// Object ID that should align with EntityAnnotation mid.
|
|
string mid = 1;
|
|
|
|
// The BCP-47 language code, such as "en-US" or "sr-Latn". For more
|
|
// information, see
|
|
// http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
|
|
string language_code = 2;
|
|
|
|
// Object name, expressed in its `language_code` language.
|
|
string name = 3;
|
|
|
|
// Score of the result. Range [0, 1].
|
|
float score = 4;
|
|
|
|
// Image region to which this object belongs. This must be populated.
|
|
BoundingPoly bounding_poly = 5;
|
|
}
|
|
|
|
// Set of features pertaining to the image, computed by computer vision
|
|
// methods over safe-search verticals (for example, adult, spoof, medical,
|
|
// violence).
|
|
message SafeSearchAnnotation {
|
|
// Represents the adult content likelihood for the image. Adult content may
|
|
// contain elements such as nudity, pornographic images or cartoons, or
|
|
// sexual activities.
|
|
Likelihood adult = 1;
|
|
|
|
// Spoof likelihood. The likelihood that an modification
|
|
// was made to the image's canonical version to make it appear
|
|
// funny or offensive.
|
|
Likelihood spoof = 2;
|
|
|
|
// Likelihood that this is a medical image.
|
|
Likelihood medical = 3;
|
|
|
|
// Likelihood that this image contains violent content.
|
|
Likelihood violence = 4;
|
|
|
|
// Likelihood that the request image contains racy content. Racy content may
|
|
// include (but is not limited to) skimpy or sheer clothing, strategically
|
|
// covered nudity, lewd or provocative poses, or close-ups of sensitive
|
|
// body areas.
|
|
Likelihood racy = 9;
|
|
}
|
|
|
|
// Rectangle determined by min and max `LatLng` pairs.
|
|
message LatLongRect {
|
|
// Min lat/long pair.
|
|
google.type.LatLng min_lat_lng = 1;
|
|
|
|
// Max lat/long pair.
|
|
google.type.LatLng max_lat_lng = 2;
|
|
}
|
|
|
|
// Color information consists of RGB channels, score, and the fraction of
|
|
// the image that the color occupies in the image.
|
|
message ColorInfo {
|
|
// RGB components of the color.
|
|
google.type.Color color = 1;
|
|
|
|
// Image-specific score for this color. Value in range [0, 1].
|
|
float score = 2;
|
|
|
|
// The fraction of pixels the color occupies in the image.
|
|
// Value in range [0, 1].
|
|
float pixel_fraction = 3;
|
|
}
|
|
|
|
// Set of dominant colors and their corresponding scores.
|
|
message DominantColorsAnnotation {
|
|
// RGB color values with their score and pixel fraction.
|
|
repeated ColorInfo colors = 1;
|
|
}
|
|
|
|
// Stores image properties, such as dominant colors.
|
|
message ImageProperties {
|
|
// If present, dominant colors completed successfully.
|
|
DominantColorsAnnotation dominant_colors = 1;
|
|
}
|
|
|
|
// Single crop hint that is used to generate a new crop when serving an image.
|
|
message CropHint {
|
|
// The bounding polygon for the crop region. The coordinates of the bounding
|
|
// box are in the original image's scale.
|
|
BoundingPoly bounding_poly = 1;
|
|
|
|
// Confidence of this being a salient region. Range [0, 1].
|
|
float confidence = 2;
|
|
|
|
// Fraction of importance of this salient region with respect to the original
|
|
// image.
|
|
float importance_fraction = 3;
|
|
}
|
|
|
|
// Set of crop hints that are used to generate new crops when serving images.
|
|
message CropHintsAnnotation {
|
|
// Crop hint results.
|
|
repeated CropHint crop_hints = 1;
|
|
}
|
|
|
|
// Parameters for crop hints annotation request.
|
|
message CropHintsParams {
|
|
// Aspect ratios in floats, representing the ratio of the width to the height
|
|
// of the image. For example, if the desired aspect ratio is 4/3, the
|
|
// corresponding float value should be 1.33333. If not specified, the
|
|
// best possible crop is returned. The number of provided aspect ratios is
|
|
// limited to a maximum of 16; any aspect ratios provided after the 16th are
|
|
// ignored.
|
|
repeated float aspect_ratios = 1;
|
|
}
|
|
|
|
// Parameters for web detection request.
|
|
message WebDetectionParams {
|
|
// Whether to include results derived from the geo information in the image.
|
|
bool include_geo_results = 2;
|
|
}
|
|
|
|
// Image context and/or feature-specific parameters.
|
|
message ImageContext {
|
|
// Not used.
|
|
LatLongRect lat_long_rect = 1;
|
|
|
|
// List of languages to use for TEXT_DETECTION. In most cases, an empty value
|
|
// yields the best results since it enables automatic language detection. For
|
|
// languages based on the Latin alphabet, setting `language_hints` is not
|
|
// needed. In rare cases, when the language of the text in the image is known,
|
|
// setting a hint will help get better results (although it will be a
|
|
// significant hindrance if the hint is wrong). Text detection returns an
|
|
// error if one or more of the specified languages is not one of the
|
|
// [supported languages](/vision/docs/languages).
|
|
repeated string language_hints = 2;
|
|
|
|
// Parameters for crop hints annotation request.
|
|
CropHintsParams crop_hints_params = 4;
|
|
|
|
// Parameters for product search.
|
|
ProductSearchParams product_search_params = 5;
|
|
|
|
// Parameters for web detection.
|
|
WebDetectionParams web_detection_params = 6;
|
|
}
|
|
|
|
// Request for performing Google Cloud Vision API tasks over a user-provided
|
|
// image, with user-requested features, and with context information.
|
|
message AnnotateImageRequest {
|
|
// The image to be processed.
|
|
Image image = 1;
|
|
|
|
// Requested features.
|
|
repeated Feature features = 2;
|
|
|
|
// Additional context that may accompany the image.
|
|
ImageContext image_context = 3;
|
|
}
|
|
|
|
// If an image was produced from a file (e.g. a PDF), this message gives
|
|
// information about the source of that image.
|
|
message ImageAnnotationContext {
|
|
// The URI of the file used to produce the image.
|
|
string uri = 1;
|
|
|
|
// If the file was a PDF or TIFF, this field gives the page number within
|
|
// the file used to produce the image.
|
|
int32 page_number = 2;
|
|
}
|
|
|
|
// Response to an image annotation request.
|
|
message AnnotateImageResponse {
|
|
// If present, face detection has completed successfully.
|
|
repeated FaceAnnotation face_annotations = 1;
|
|
|
|
// If present, landmark detection has completed successfully.
|
|
repeated EntityAnnotation landmark_annotations = 2;
|
|
|
|
// If present, logo detection has completed successfully.
|
|
repeated EntityAnnotation logo_annotations = 3;
|
|
|
|
// If present, label detection has completed successfully.
|
|
repeated EntityAnnotation label_annotations = 4;
|
|
|
|
// If present, localized object detection has completed successfully.
|
|
// This will be sorted descending by confidence score.
|
|
repeated LocalizedObjectAnnotation localized_object_annotations = 22;
|
|
|
|
// If present, text (OCR) detection has completed successfully.
|
|
repeated EntityAnnotation text_annotations = 5;
|
|
|
|
// If present, text (OCR) detection or document (OCR) text detection has
|
|
// completed successfully.
|
|
// This annotation provides the structural hierarchy for the OCR detected
|
|
// text.
|
|
TextAnnotation full_text_annotation = 12;
|
|
|
|
// If present, safe-search annotation has completed successfully.
|
|
SafeSearchAnnotation safe_search_annotation = 6;
|
|
|
|
// If present, image properties were extracted successfully.
|
|
ImageProperties image_properties_annotation = 8;
|
|
|
|
// If present, crop hints have completed successfully.
|
|
CropHintsAnnotation crop_hints_annotation = 11;
|
|
|
|
// If present, web detection has completed successfully.
|
|
WebDetection web_detection = 13;
|
|
|
|
// If present, product search has completed successfully.
|
|
ProductSearchResults product_search_results = 14;
|
|
|
|
// If set, represents the error message for the operation.
|
|
// Note that filled-in image annotations are guaranteed to be
|
|
// correct, even when `error` is set.
|
|
google.rpc.Status error = 9;
|
|
|
|
// If present, contextual information is needed to understand where this image
|
|
// comes from.
|
|
ImageAnnotationContext context = 21;
|
|
}
|
|
|
|
// Response to a single file annotation request. A file may contain one or more
|
|
// images, which individually have their own responses.
|
|
message AnnotateFileResponse {
|
|
// Information about the file for which this response is generated.
|
|
InputConfig input_config = 1;
|
|
|
|
// Individual responses to images found within the file.
|
|
repeated AnnotateImageResponse responses = 2;
|
|
|
|
// This field gives the total number of pages in the file.
|
|
int32 total_pages = 3;
|
|
}
|
|
|
|
// Multiple image annotation requests are batched into a single service call.
|
|
message BatchAnnotateImagesRequest {
|
|
// Individual image annotation requests for this batch.
|
|
repeated AnnotateImageRequest requests = 1;
|
|
}
|
|
|
|
// Response to a batch image annotation request.
|
|
message BatchAnnotateImagesResponse {
|
|
// Individual responses to image annotation requests within the batch.
|
|
repeated AnnotateImageResponse responses = 1;
|
|
}
|
|
|
|
// A request to annotate one single file, e.g. a PDF, TIFF or GIF file.
|
|
message AnnotateFileRequest {
|
|
// Required. Information about the input file.
|
|
InputConfig input_config = 1;
|
|
|
|
// Required. Requested features.
|
|
repeated Feature features = 2;
|
|
|
|
// Additional context that may accompany the image(s) in the file.
|
|
ImageContext image_context = 3;
|
|
|
|
// Pages of the file to perform image annotation.
|
|
//
|
|
// Pages starts from 1, we assume the first page of the file is page 1.
|
|
// At most 5 pages are supported per request. Pages can be negative.
|
|
//
|
|
// Page 1 means the first page.
|
|
// Page 2 means the second page.
|
|
// Page -1 means the last page.
|
|
// Page -2 means the second to the last page.
|
|
//
|
|
// If the file is GIF instead of PDF or TIFF, page refers to GIF frames.
|
|
//
|
|
// If this field is empty, by default the service performs image annotation
|
|
// for the first 5 pages of the file.
|
|
repeated int32 pages = 4;
|
|
}
|
|
|
|
// A list of requests to annotate files using the BatchAnnotateFiles API.
|
|
message BatchAnnotateFilesRequest {
|
|
// The list of file annotation requests. Right now we support only one
|
|
// AnnotateFileRequest in BatchAnnotateFilesRequest.
|
|
repeated AnnotateFileRequest requests = 1;
|
|
}
|
|
|
|
// A list of file annotation responses.
|
|
message BatchAnnotateFilesResponse {
|
|
// The list of file annotation responses, each response corresponding to each
|
|
// AnnotateFileRequest in BatchAnnotateFilesRequest.
|
|
repeated AnnotateFileResponse responses = 1;
|
|
}
|
|
|
|
// An offline file annotation request.
|
|
message AsyncAnnotateFileRequest {
|
|
// Required. Information about the input file.
|
|
InputConfig input_config = 1;
|
|
|
|
// Required. Requested features.
|
|
repeated Feature features = 2;
|
|
|
|
// Additional context that may accompany the image(s) in the file.
|
|
ImageContext image_context = 3;
|
|
|
|
// Required. The desired output location and metadata (e.g. format).
|
|
OutputConfig output_config = 4;
|
|
}
|
|
|
|
// The response for a single offline file annotation request.
|
|
message AsyncAnnotateFileResponse {
|
|
// The output location and metadata from AsyncAnnotateFileRequest.
|
|
OutputConfig output_config = 1;
|
|
}
|
|
|
|
// Request for async image annotation for a list of images.
|
|
message AsyncBatchAnnotateImagesRequest {
|
|
// Individual image annotation requests for this batch.
|
|
repeated AnnotateImageRequest requests = 1;
|
|
|
|
// Required. The desired output location and metadata (e.g. format).
|
|
OutputConfig output_config = 2;
|
|
}
|
|
|
|
// Response to an async batch image annotation request.
|
|
message AsyncBatchAnnotateImagesResponse {
|
|
// The output location and metadata from AsyncBatchAnnotateImagesRequest.
|
|
OutputConfig output_config = 1;
|
|
}
|
|
|
|
// Multiple async file annotation requests are batched into a single service
|
|
// call.
|
|
message AsyncBatchAnnotateFilesRequest {
|
|
// Individual async file annotation requests for this batch.
|
|
repeated AsyncAnnotateFileRequest requests = 1;
|
|
}
|
|
|
|
// Response to an async batch file annotation request.
|
|
message AsyncBatchAnnotateFilesResponse {
|
|
// The list of file annotation responses, one for each request in
|
|
// AsyncBatchAnnotateFilesRequest.
|
|
repeated AsyncAnnotateFileResponse responses = 1;
|
|
}
|
|
|
|
// The desired input location and metadata.
|
|
message InputConfig {
|
|
// The Google Cloud Storage location to read the input from.
|
|
GcsSource gcs_source = 1;
|
|
|
|
// File content, represented as a stream of bytes.
|
|
// Note: As with all `bytes` fields, protobuffers use a pure binary
|
|
// representation, whereas JSON representations use base64.
|
|
//
|
|
// Currently, this field only works for BatchAnnotateFiles requests. It does
|
|
// not work for AsyncBatchAnnotateFiles requests.
|
|
bytes content = 3;
|
|
|
|
// The type of the file. Currently only "application/pdf" and "image/tiff"
|
|
// are supported. Wildcards are not supported.
|
|
string mime_type = 2;
|
|
}
|
|
|
|
// The desired output location and metadata.
|
|
message OutputConfig {
|
|
// The Google Cloud Storage location to write the output(s) to.
|
|
GcsDestination gcs_destination = 1;
|
|
|
|
// The max number of response protos to put into each output JSON file on
|
|
// Google Cloud Storage.
|
|
// The valid range is [1, 100]. If not specified, the default value is 20.
|
|
//
|
|
// For example, for one pdf file with 100 pages, 100 response protos will
|
|
// be generated. If `batch_size` = 20, then 5 json files each
|
|
// containing 20 response protos will be written under the prefix
|
|
// `gcs_destination`.`uri`.
|
|
//
|
|
// Currently, batch_size only applies to GcsDestination, with potential future
|
|
// support for other output configurations.
|
|
int32 batch_size = 2;
|
|
}
|
|
|
|
// The Google Cloud Storage location where the input will be read from.
|
|
message GcsSource {
|
|
// Google Cloud Storage URI for the input file. This must only be a
|
|
// Google Cloud Storage object. Wildcards are not currently supported.
|
|
string uri = 1;
|
|
}
|
|
|
|
// The Google Cloud Storage location where the output will be written to.
|
|
message GcsDestination {
|
|
// Google Cloud Storage URI prefix where the results will be stored. Results
|
|
// will be in JSON format and preceded by its corresponding input URI prefix.
|
|
// This field can either represent a gcs file prefix or gcs directory. In
|
|
// either case, the uri should be unique because in order to get all of the
|
|
// output files, you will need to do a wildcard gcs search on the uri prefix
|
|
// you provide.
|
|
//
|
|
// Examples:
|
|
//
|
|
// * File Prefix: gs://bucket-name/here/filenameprefix The output files
|
|
// will be created in gs://bucket-name/here/ and the names of the
|
|
// output files will begin with "filenameprefix".
|
|
//
|
|
// * Directory Prefix: gs://bucket-name/some/location/ The output files
|
|
// will be created in gs://bucket-name/some/location/ and the names of the
|
|
// output files could be anything because there was no filename prefix
|
|
// specified.
|
|
//
|
|
// If multiple outputs, each response is still AnnotateFileResponse, each of
|
|
// which contains some subset of the full list of AnnotateImageResponse.
|
|
// Multiple outputs can happen if, for example, the output JSON is too large
|
|
// and overflows into multiple sharded files.
|
|
string uri = 1;
|
|
}
|
|
|
|
// Contains metadata for the BatchAnnotateImages operation.
|
|
message OperationMetadata {
|
|
// Batch operation states.
|
|
enum State {
|
|
// Invalid.
|
|
STATE_UNSPECIFIED = 0;
|
|
|
|
// Request is received.
|
|
CREATED = 1;
|
|
|
|
// Request is actively being processed.
|
|
RUNNING = 2;
|
|
|
|
// The batch processing is done.
|
|
DONE = 3;
|
|
|
|
// The batch processing was cancelled.
|
|
CANCELLED = 4;
|
|
}
|
|
|
|
// Current state of the batch operation.
|
|
State state = 1;
|
|
|
|
// The time when the batch request was received.
|
|
google.protobuf.Timestamp create_time = 5;
|
|
|
|
// The time when the operation result was last updated.
|
|
google.protobuf.Timestamp update_time = 6;
|
|
}
|
|
|