// Copyright 2019 Google LLC. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // syntax = "proto3"; package google.cloud.bigquery.v2; import "google/cloud/bigquery/v2/model_reference.proto"; import "google/cloud/bigquery/v2/standard_sql.proto"; import "google/protobuf/empty.proto"; import "google/protobuf/timestamp.proto"; import "google/protobuf/wrappers.proto"; import "google/api/annotations.proto"; option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; option java_outer_classname = "ModelProto"; option java_package = "com.google.cloud.bigquery.v2"; service ModelService { // Gets the specified model resource by model ID. rpc GetModel(GetModelRequest) returns (Model) { } // Lists all models in the specified dataset. Requires the READER dataset // role. rpc ListModels(ListModelsRequest) returns (ListModelsResponse) { } // Patch specific fields in the specified model. rpc PatchModel(PatchModelRequest) returns (Model) { } // Deletes the model specified by modelId from the dataset. rpc DeleteModel(DeleteModelRequest) returns (google.protobuf.Empty) { } } message Model { // Evaluation metrics for regression models. message RegressionMetrics { // Mean absolute error. google.protobuf.DoubleValue mean_absolute_error = 1; // Mean squared error. google.protobuf.DoubleValue mean_squared_error = 2; // Mean squared log error. google.protobuf.DoubleValue mean_squared_log_error = 3; // Median absolute error. google.protobuf.DoubleValue median_absolute_error = 4; // R^2 score. google.protobuf.DoubleValue r_squared = 5; } // Aggregate metrics for classification models. For multi-class models, // the metrics are either macro-averaged: metrics are calculated for each // label and then an unweighted average is taken of those values or // micro-averaged: the metric is calculated globally by counting the total // number of correctly predicted rows. message AggregateClassificationMetrics { // Precision is the fraction of actual positive predictions that had // positive actual labels. For multiclass this is a macro-averaged // metric treating each class as a binary classifier. google.protobuf.DoubleValue precision = 1; // Recall is the fraction of actual positive labels that were given a // positive prediction. For multiclass this is a macro-averaged metric. google.protobuf.DoubleValue recall = 2; // Accuracy is the fraction of predictions given the correct label. For // multiclass this is a micro-averaged metric. google.protobuf.DoubleValue accuracy = 3; // Threshold at which the metrics are computed. For binary // classification models this is the positive class threshold. // For multi-class classfication models this is the confidence // threshold. google.protobuf.DoubleValue threshold = 4; // The F1 score is an average of recall and precision. For multiclass // this is a macro-averaged metric. google.protobuf.DoubleValue f1_score = 5; // Logarithmic Loss. For multiclass this is a macro-averaged metric. google.protobuf.DoubleValue log_loss = 6; // Area Under a ROC Curve. For multiclass this is a macro-averaged // metric. google.protobuf.DoubleValue roc_auc = 7; } // Evaluation metrics for binary classification models. message BinaryClassificationMetrics { // Confusion matrix for binary classification models. message BinaryConfusionMatrix { // Threshold value used when computing each of the following metric. google.protobuf.DoubleValue positive_class_threshold = 1; // Number of true samples predicted as true. google.protobuf.Int64Value true_positives = 2; // Number of false samples predicted as true. google.protobuf.Int64Value false_positives = 3; // Number of true samples predicted as false. google.protobuf.Int64Value true_negatives = 4; // Number of false samples predicted as false. google.protobuf.Int64Value false_negatives = 5; // Aggregate precision. google.protobuf.DoubleValue precision = 6; // Aggregate recall. google.protobuf.DoubleValue recall = 7; } // Aggregate classification metrics. AggregateClassificationMetrics aggregate_classification_metrics = 1; // Binary confusion matrix at multiple thresholds. repeated BinaryConfusionMatrix binary_confusion_matrix_list = 2; } // Evaluation metrics for multi-class classification models. message MultiClassClassificationMetrics { // Confusion matrix for multi-class classification models. message ConfusionMatrix { // A single entry in the confusion matrix. message Entry { // The predicted label. For confidence_threshold > 0, we will // also add an entry indicating the number of items under the // confidence threshold. string predicted_label = 1; // Number of items being predicted as this label. google.protobuf.Int64Value item_count = 2; } // A single row in the confusion matrix. message Row { // The original label of this row. string actual_label = 1; // Info describing predicted label distribution. repeated Entry entries = 2; } // Confidence threshold used when computing the entries of the // confusion matrix. google.protobuf.DoubleValue confidence_threshold = 1; // One row per actual label. repeated Row rows = 2; } // Aggregate classification metrics. AggregateClassificationMetrics aggregate_classification_metrics = 1; // Confusion matrix at different thresholds. repeated ConfusionMatrix confusion_matrix_list = 2; } // Evaluation metrics for clustering models. message ClusteringMetrics { // Davies-Bouldin index. google.protobuf.DoubleValue davies_bouldin_index = 1; // Mean of squared distances between each sample to its cluster centroid. google.protobuf.DoubleValue mean_squared_distance = 2; } // Evaluation metrics of a model. These are either computed on all // training data or just the eval data based on whether eval data was used // during training. message EvaluationMetrics { oneof metrics { // Populated for regression models. RegressionMetrics regression_metrics = 1; // Populated for binary classification models. BinaryClassificationMetrics binary_classification_metrics = 2; // Populated for multi-class classification models. MultiClassClassificationMetrics multi_class_classification_metrics = 3; // [Beta] Populated for clustering models. ClusteringMetrics clustering_metrics = 4; } } // Information about a single training query run for the model. message TrainingRun { message TrainingOptions { // The maximum number of iterations in training. int64 max_iterations = 1; // Type of loss function used during training run. LossType loss_type = 2; // Learning rate in training. double learn_rate = 3; // L1 regularization coefficient. google.protobuf.DoubleValue l1_regularization = 4; // L2 regularization coefficient. google.protobuf.DoubleValue l2_regularization = 5; // When early_stop is true, stops training when accuracy improvement is // less than 'min_relative_progress'. google.protobuf.DoubleValue min_relative_progress = 6; // Whether to train a model from the last checkpoint. google.protobuf.BoolValue warm_start = 7; // Whether to stop early when the loss doesn't improve significantly // any more (compared to min_relative_progress). google.protobuf.BoolValue early_stop = 8; // Name of input label columns in training data. repeated string input_label_columns = 9; // The data split type for training and evaluation, e.g. RANDOM. DataSplitMethod data_split_method = 10; // The fraction of evaluation data over the whole input data. The rest // of data will be used as training data. The format should be double. // Accurate to two decimal places. // Default value is 0.2. double data_split_eval_fraction = 11; // The column to split data with. This column won't be used as a // feature. // 1. When data_split_method is CUSTOM, the corresponding column should // be boolean. The rows with true value tag are eval data, and the false // are training data. // 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION // rows (from smallest to largest) in the corresponding column are used // as training data, and the rest are eval data. It respects the order // in Orderable data types: // https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties string data_split_column = 12; // The strategy to determine learning rate. LearnRateStrategy learn_rate_strategy = 13; // Specifies the initial learning rate for line search to start at. double initial_learn_rate = 16; // Weights associated with each label class, for rebalancing the // training data. map label_class_weights = 17; // [Beta] Distance type for clustering models. DistanceType distance_type = 20; // [Beta] Number of clusters for clustering models. int64 num_clusters = 21; } // Information about a single iteration of the training run. message IterationResult { // Information about a single cluster for clustering model. message ClusterInfo { // Centroid id. int64 centroid_id = 1; // Cluster radius, the average distance from centroid // to each point assigned to the cluster. google.protobuf.DoubleValue cluster_radius = 2; // Cluster size, the total number of points assigned to the cluster. google.protobuf.Int64Value cluster_size = 3; } // Index of the iteration, 0 based. google.protobuf.Int32Value index = 1; // Time taken to run the iteration in milliseconds. google.protobuf.Int64Value duration_ms = 4; // Loss computed on the training data at the end of iteration. google.protobuf.DoubleValue training_loss = 5; // Loss computed on the eval data at the end of iteration. google.protobuf.DoubleValue eval_loss = 6; // Learn rate used for this iteration. double learn_rate = 7; // [Beta] Information about top clusters for clustering models. repeated ClusterInfo cluster_infos = 8; } // Options that were used for this training run, includes // user specified and default options that were used. TrainingOptions training_options = 1; // The start time of this training run. google.protobuf.Timestamp start_time = 8; // Output of each iteration run, results.size() <= max_iterations. repeated IterationResult results = 6; // The evaluation metrics over training/eval data that were computed at the // end of training. EvaluationMetrics evaluation_metrics = 7; } // Indicates the type of the Model. enum ModelType { MODEL_TYPE_UNSPECIFIED = 0; // Linear regression model. LINEAR_REGRESSION = 1; // Logistic regression model. LOGISTIC_REGRESSION = 2; // [Beta] K-means clustering model. KMEANS = 3; } // Loss metric to evaluate model training performance. enum LossType { LOSS_TYPE_UNSPECIFIED = 0; // Mean squared loss, used for linear regression. MEAN_SQUARED_LOSS = 1; // Mean log loss, used for logistic regression. MEAN_LOG_LOSS = 2; } // Distance metric used to compute the distance between two points. enum DistanceType { DISTANCE_TYPE_UNSPECIFIED = 0; // Eculidean distance. EUCLIDEAN = 1; // Cosine distance. COSINE = 2; } // Indicates the method to split input data into multiple tables. enum DataSplitMethod { DATA_SPLIT_METHOD_UNSPECIFIED = 0; // Splits data randomly. RANDOM = 1; // Splits data with the user provided tags. CUSTOM = 2; // Splits data sequentially. SEQUENTIAL = 3; // Data split will be skipped. NO_SPLIT = 4; // Splits data automatically: Uses NO_SPLIT if the data size is small. // Otherwise uses RANDOM. AUTO_SPLIT = 5; } // Indicates the learning rate optimization strategy to use. enum LearnRateStrategy { LEARN_RATE_STRATEGY_UNSPECIFIED = 0; // Use line search to determine learning rate. LINE_SEARCH = 1; // Use a constant learning rate. CONSTANT = 2; } // Output only. A hash of this resource. string etag = 1; // Required. Unique identifier for this model. ModelReference model_reference = 2; // Output only. The time when this model was created, in millisecs since the // epoch. int64 creation_time = 5; // Output only. The time when this model was last modified, in millisecs // since the epoch. int64 last_modified_time = 6; // [Optional] A user-friendly description of this model. // @mutable bigquery.models.patch string description = 12; // [Optional] A descriptive name for this model. // @mutable bigquery.models.patch string friendly_name = 14; // [Optional] The labels associated with this model. You can use these to // organize and group your models. Label keys and values can be no longer // than 63 characters, can only contain lowercase letters, numeric // characters, underscores and dashes. International characters are allowed. // Label values are optional. Label keys must start with a letter and each // label in the list must have a different key. // @mutable bigquery.models.patch map labels = 15; // [Optional] The time when this model expires, in milliseconds since the // epoch. If not present, the model will persist indefinitely. Expired models // will be deleted and their storage reclaimed. The defaultTableExpirationMs // property of the encapsulating dataset can be used to set a default // expirationTime on newly created models. // @mutable bigquery.models.patch int64 expiration_time = 16; // Output only. The geographic location where the model resides. This value // is inherited from the dataset. string location = 13; // Output only. Type of the model resource. ModelType model_type = 7; // Output only. Information for all training runs in increasing order of // start_time. repeated TrainingRun training_runs = 9; // Output only. Input feature columns that were used to train this model. repeated StandardSqlField feature_columns = 10; // Output only. Label columns that were used to train this model. // The output of the model will have a “predicted_” prefix to these columns. repeated StandardSqlField label_columns = 11; } message GetModelRequest { // Project ID of the requested model. string project_id = 1; // Dataset ID of the requested model. string dataset_id = 2; // Model ID of the requested model. string model_id = 3; } message PatchModelRequest { // Project ID of the model to patch. string project_id = 1; // Dataset ID of the model to patch. string dataset_id = 2; // Model ID of the model to patch. string model_id = 3; // Patched model. // Follows patch semantics. Missing fields are not updated. To clear a field, // explicitly set to default value. Model model = 4; } message DeleteModelRequest { // Project ID of the model to delete. string project_id = 1; // Dataset ID of the model to delete. string dataset_id = 2; // Model ID of the model to delete. string model_id = 3; } message ListModelsRequest { // Project ID of the models to list. string project_id = 1; // Dataset ID of the models to list. string dataset_id = 2; // The maximum number of results per page. google.protobuf.UInt32Value max_results = 3; // Page token, returned by a previous call to request the next page of // results string page_token = 4; } message ListModelsResponse { // Models in the requested dataset. Only the following fields are populated: // model_reference, model_type, creation_time, last_modified_time and // labels. repeated Model models = 1; // A token to request the next page of results. string next_page_token = 2; }