// Copyright 2018 Google LLC. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // syntax = "proto3"; package google.cloud.datalabeling.v1beta1; import "google/cloud/datalabeling/v1beta1/annotation.proto"; import "google/cloud/datalabeling/v1beta1/human_annotation_config.proto"; import "google/protobuf/duration.proto"; import "google/protobuf/struct.proto"; import "google/protobuf/timestamp.proto"; import "google/api/annotations.proto"; option go_package = "google.golang.org/genproto/googleapis/cloud/datalabeling/v1beta1;datalabeling"; option java_multiple_files = true; option java_package = "com.google.cloud.datalabeling.v1beta1"; enum DataType { DATA_TYPE_UNSPECIFIED = 0; IMAGE = 1; VIDEO = 2; TEXT = 4; AUDIO = 5; } // Dataset is the resource to hold your data. You can request multiple labeling // tasks for a dataset while each one will generate an AnnotatedDataset. message Dataset { // Output only. // Dataset resource name, format is: // projects/{project_id}/datasets/{dataset_id} string name = 1; // Required. The display name of the dataset. Maximum of 64 characters. string display_name = 2; // Optional. User-provided description of the annotation specification set. // The description can be up to 10000 characters long. string description = 3; // Output only. Time the dataset is created. google.protobuf.Timestamp create_time = 4; // Output only. This is populated with the original input configs // where ImportData is called. It is available only after the clients // import data to this dataset. repeated InputConfig input_configs = 5; } // The configuration of input data, including data type, location, etc. message InputConfig { // Required. Where the data is from. oneof source { GcsSource gcs_source = 2; } // Required. Data type must be specifed when user tries to import data. DataType data_type = 1; } // Source of the GCS file to be imported. Only gcs path is allowed in // input_uri. message GcsSource { // Required. The input uri of source file. string input_uri = 1; // Required. The format of the gcs source. Only "text/csv" is supported. string mime_type = 2; } // The configuration of output data. message OutputConfig { // Required. Location to output data to. oneof destination { // Output to a GCS file. Should be used for labeling output other than Audio // transcription. GcsDestination gcs_destination = 1; // Output to a GCS folder. Should be used for Audio transcription // labeling output. GcsFolderDestination gcs_folder_destination = 2; } } // Export destination of the data.Only gcs path is allowed in // output_uri. message GcsDestination { // Required. The output uri of destination file. string output_uri = 1; // Required. The format of the gcs destination. Only "text/csv" and // "application/json" // are supported. string mime_type = 2; } // Export folder destination of the data. message GcsFolderDestination { // Required. GCS folder to export data to. string output_folder_uri = 1; } // DataItem is a piece of data, without annotation. For example, an image. message DataItem { // Output only. oneof payload { // The image payload, a container of the image bytes/uri. ImagePayload image_payload = 2; // The text payload, a container of text content. TextPayload text_payload = 3; // The video payload, a container of the video uri. VideoPayload video_payload = 4; // The audio payload, a container of the audio uri. AudioPayload audio_payload = 5; } // Output only. Name of the data item, in format of: // projects/{project_id}/datasets/{dataset_id}/dataItems/{data_item_id} string name = 1; } // AnnotatedDataset is a set holding annotations for data in a Dataset. Each // labeling task will generate an AnnotatedDataset under the Dataset that the // task is requested for. message AnnotatedDataset { // Output only. // AnnotatedDataset resource name in format of: // projects/{project_id}/datasets/{dataset_id}/annotatedDatasets/ // {annotated_dataset_id} string name = 1; // Output only. The display name of the AnnotatedDataset. It is specified in // HumanAnnotationConfig when user starts a labeling task. Maximum of 64 // characters. string display_name = 2; // Output only. The description of the AnnotatedDataset. It is specified in // HumanAnnotationConfig when user starts a labeling task. Maximum of 10000 // characters. string description = 9; // Output only. Source of the annotation. AnnotationSource annotation_source = 3; // Output only. Type of the annotation. It is specified when starting labeling // task. AnnotationType annotation_type = 8; // Output only. Number of examples in the annotated dataset. int64 example_count = 4; // Output only. Number of examples that have annotation in the annotated // dataset. int64 completed_example_count = 5; // Output only. Per label statistics. LabelStats label_stats = 6; // Output only. Time the AnnotatedDataset was created. google.protobuf.Timestamp create_time = 7; // Output only. Additional information about AnnotatedDataset. AnnotatedDatasetMetadata metadata = 10; } // Metadata on AnnotatedDataset. message AnnotatedDatasetMetadata { // HumanAnnotationConfig used when requesting the human labeling task for this // AnnotatedDataset. HumanAnnotationConfig human_annotation_config = 1; // Specific request configuration used when requesting the labeling task. oneof annotation_request_config { // Configuration for image classification task. ImageClassificationConfig image_classification_config = 2; // Configuration for image bounding box and bounding poly task. BoundingPolyConfig bounding_poly_config = 3; // Configuration for image polyline task. PolylineConfig polyline_config = 4; // Configuration for image segmentation task. SegmentationConfig segmentation_config = 5; // Configuration for video classification task. VideoClassificationConfig video_classification_config = 6; // Configuration for video object detection task. ObjectDetectionConfig object_detection_config = 7; // Configuration for video object tracking task. ObjectTrackingConfig object_tracking_config = 8; // Configuration for video event labeling task. EventConfig event_config = 9; // Configuration for text classification task. TextClassificationConfig text_classification_config = 10; // Configuration for text entity extraction task. TextEntityExtractionConfig text_entity_extraction_config = 11; } } // Statistics about annotation specs. message LabelStats { // Map of each annotation spec's example count. Key is the annotation spec // name and value is the number of examples for that annotation spec. map example_count = 1; } // An Example is a piece of data and its annotation. For example, an image with // label "house". message Example { // Output only. The data part of Example. oneof payload { // The image payload, a container of the image bytes/uri. ImagePayload image_payload = 2; // The text payload, a container of the text content. TextPayload text_payload = 6; // The video payload, a container of the video uri. VideoPayload video_payload = 7; // The audio payload, a container of the audio uri. AudioPayload audio_payload = 8; } // Output only. Name of the example, in format of: // projects/{project_id}/datasets/{dataset_id}/annotatedDatasets/ // {annotated_dataset_id}/examples/{example_id} string name = 1; // Output only. Annotations for the piece of data in Example. // One piece of data can have multiple annotations. repeated Annotation annotations = 5; } // Container of information about an image. message ImagePayload { // Image format. string mime_type = 1; // A byte string of a full image. bytes image_thumbnail = 2; // Image uri from the user bucket. string image_uri = 3; } // Container of information about a piece of text. message TextPayload { // Text content. string text_content = 1; } // Container of information of a video thumbnail. message VideoThumbnail { // A byte string of the video frame. bytes thumbnail = 1; // Time offset relative to the beginning of the video, corresponding to the // video frame where the thumbnail has been extracted from. google.protobuf.Duration time_offset = 2; } // Container of information of a video. message VideoPayload { // Video format. string mime_type = 1; // Video uri from the user bucket. string video_uri = 2; // The list of video thumbnails. repeated VideoThumbnail video_thumbnails = 3; // FPS of the video. float frame_rate = 4; } // Container of information of an audio. message AudioPayload { // Audio uri in user bucket. string audio_uri = 1; // Sample rate in Hertz of the audio data sent in all // `RecognitionAudio` messages. This field is optional for `FLAC` and `WAV` // audio files and required for all other audio formats. For details, // see [AudioEncoding][google.cloud.datalabeling.v1beta1.AudioPayload.AudioEncoding]. int32 sample_rate_hertz = 3; }