kratos/third_party/google/cloud/automl/v1beta1/tables.proto

// Copyright 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

syntax = "proto3";

package google.cloud.automl.v1beta1;

import "google/api/annotations.proto";
import "google/cloud/automl/v1beta1/column_spec.proto";
import "google/cloud/automl/v1beta1/data_stats.proto";
import "google/cloud/automl/v1beta1/ranges.proto";
import "google/protobuf/struct.proto";
import "google/protobuf/timestamp.proto";

option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl";
option java_multiple_files = true;
option java_package = "com.google.cloud.automl.v1beta1";
option php_namespace = "Google\\Cloud\\AutoMl\\V1beta1";


// Metadata for a dataset used for AutoML Tables.
message TablesDatasetMetadata {
  // Output only. The table_spec_id of the primary table of this dataset.
  string primary_table_spec_id = 1;

  // column_spec_id of the primary table's column that should be used as the
  // training & prediction target.
  // This column must be non-nullable and have one of following data types
  // (otherwise model creation will error):
  // * CATEGORY
  // * ARRAY(CATEGORY)
  // * FLOAT64
  // Furthermore, if the type is CATEGORY or ARRAY(CATEGORY), then only up to
  // 40 unique values may exist in that column across all rows, but for
  // ARRAY(CATEGORY) unique values are counted as elements of the ARRAY (i.e.
  // following 3 ARRAY-s: [A, B], [A], [B] are counted as having 2 unique
  // values).
  //
  // NOTE: Updates of this field will instantly affect any other users
  // concurrently working with the dataset.
  string target_column_spec_id = 2;

  // column_spec_id of the primary table's column that should be used as the
  // weight column, i.e. the higher the value the more important the row will be
  // during model training.
  // Required type: FLOAT64.
  // Allowed values: 0 to 10000, inclusive on both ends; 0 means the row is
  //                 ignored for training.
  // If not set all rows are assumed to have equal weight of 1.
  // NOTE: Updates of this field will instantly affect any other users
  // concurrently working with the dataset.
  string weight_column_spec_id = 3;

  // column_spec_id of the primary table column which specifies a possible ML
  // use of the row, i.e. the column will be used to split the rows into TRAIN,
  // VALIDATE and TEST sets.
  // Required type: STRING.
  // This column, if set, must either have all of `TRAIN`, `VALIDATE`, `TEST`
  // among its values, or only have `TEST`, `UNASSIGNED` values. In the latter
  // case the rows with `UNASSIGNED` value will be assigned by AutoML. Note
  // that if a given ml use distribution makes it impossible to create a "good"
  // model, that call will error describing the issue.
  // If both this column_spec_id and primary table's time_column_spec_id are not
  // set, then all rows are treated as `UNASSIGNED`.
  // NOTE: Updates of this field will instantly affect any other users
  // concurrently working with the dataset.
  string ml_use_column_spec_id = 4;

  // Output only. Correlations between
  //
  // [target_column][google.cloud.automl.v1beta1.TablesDatasetMetadata.target_column],
  // and other columns of the
  //
  // [primary_table][google.cloud.automl.v1beta1.TablesDatasetMetadata.primary_table_spec_id].
  // Only set if the target column is set. Mapping from other column spec id to
  // its CorrelationStats with the target column.
  // This field may be stale, see the stats_update_time field for
  // for the timestamp at which these stats were last updated.
  map<string, CorrelationStats> target_column_correlations = 6;

  // The most recent timestamp when target_column_correlations field and all
  // descendant ColumnSpec.data_stats and ColumnSpec.top_correlated_columns
  // fields were last (re-)generated. Any changes that happened to the dataset
  // afterwards are not reflected in these fields values. The regeneration
  // happens in the background on a best effort basis.
  google.protobuf.Timestamp stats_update_time = 7;
}

// Model metadata specific to AutoML Tables.
message TablesModelMetadata {
  // Column spec of the dataset's primary table's column the model is
  // predicting. Snapshotted when model creation started.
  // Only 3 fields are used:
  // name - May be set on CreateModel, if it's not then the ColumnSpec
  //        corresponding to the current target_column_spec_id of the dataset
  //        the model is trained from is used.
  //        If neither is set, CreateModel will error.
  // display_name - Output only.
  // data_type - Output only.
  ColumnSpec target_column_spec = 2;

  // Column specs of the dataset's primary table's columns, on which
  // the model is trained and which are used as the input for predictions.
  // The
  //
  // [target_column][google.cloud.automl.v1beta1.TablesModelMetadata.target_column_spec]
  // as well as, according to dataset's state upon model creation,
  //
  // [weight_column][google.cloud.automl.v1beta1.TablesDatasetMetadata.weight_column_spec_id],
  // and
  //
  // [ml_use_column][google.cloud.automl.v1beta1.TablesDatasetMetadata.ml_use_column_spec_id]
  // must never be included here.
  // Only 3 fields are used:
  // name - May be set on CreateModel, if set only the columns specified are
  //        used, otherwise all primary table's columns (except the ones listed
  //        above) are used for the training and prediction input.
  // display_name - Output only.
  // data_type - Output only.
  repeated ColumnSpec input_feature_column_specs = 3;

  // Objective function the model is optimizing towards. The training process
  // creates a model that maximizes/minimizes the value of the objective
  // function over the validation set.
  //
  // The supported optimization objectives depend on the prediction_type.
  // If the field is not set, a default objective function is used.
  //
  // CLASSIFICATION_BINARY:
  //   "MAXIMIZE_AU_ROC" (default) - Maximize the area under the receiver
  //                                 operating characteristic (ROC) curve.
  //   "MINIMIZE_LOG_LOSS" - Minimize log loss.
  //   "MAXIMIZE_AU_PRC" - Maximize the area under the precision-recall curve.
  //
  // CLASSIFICATION_MULTI_CLASS :
  //   "MINIMIZE_LOG_LOSS" (default) - Minimize log loss.
  //
  // CLASSIFICATION_MULTI_LABEL:
  //   "MINIMIZE_LOG_LOSS" (default) - Minimize log loss.
  //
  // REGRESSION:
  //   "MINIMIZE_RMSE" (default) - Minimize root-mean-squared error (RMSE).
  //   "MINIMIZE_MAE" - Minimize mean-absolute error (MAE).
  //   "MINIMIZE_RMSLE" - Minimize root-mean-squared log error (RMSLE).
  //
  // FORECASTING:
  //   "MINIMIZE_RMSE" (default) - Minimize root-mean-squared error (RMSE).
  //   "MINIMIZE_MAE" - Minimize mean-absolute error (MAE).
  string optimization_objective = 4;

  // Output only. Auxiliary information for each of the
  // input_feature_column_specs, with respect to this particular model.
  repeated TablesModelColumnInfo tables_model_column_info = 5;

  // The train budget of creating this model, expressed in milli node hours
  // i.e. 1,000 value in this field means 1 node hour.
  //
  // The training cost of the model will not exceed this budget. The final cost
  // will be attempted to be close to the budget, though may end up being (even)
  // noticeably smaller - at the backend's discretion. This especially may
  // happen when further model training ceases to provide any improvements.
  //
  // If the budget is set to a value known to be insufficient to train a
  // model for the given dataset, the training won't be attempted and
  // will error.
  int64 train_budget_milli_node_hours = 6;

  // Output only. The actual training cost of the model, expressed in milli
  // node hours, i.e. 1,000 value in this field means 1 node hour. Guaranteed
  // to not exceed the train budget.
  int64 train_cost_milli_node_hours = 7;
}

// Contains annotation details specific to Tables.
message TablesAnnotation {
  // Output only. A confidence estimate between 0.0 and 1.0, inclusive. A higher
  // value means greater confidence in the returned value.
  // For
  //
  // [target_column_spec][google.cloud.automl.v1beta1.TablesModelMetadata.target_column_spec]
  // of ARRAY(CATEGORY) data type, this is a confidence that one of the values
  // in the ARRAY would be the provided value.
  // For
  //
  // [target_column_spec][google.cloud.automl.v1beta1.TablesModelMetadata.target_column_spec]
  // of FLOAT64 data type the score is not populated.
  float score = 1;

  // Output only. Only populated when
  //
  // [target_column_spec][google.cloud.automl.v1beta1.TablesModelMetadata.target_column_spec]
  // has FLOAT64 data type (i.e. for regression predictions). An interval in
  // which the exactly correct target value has 95% chance to be in.
  DoubleRange prediction_interval = 4;

  // The predicted value of the row's
  //
  // [target_column][google.cloud.automl.v1beta1.TablesModelMetadata.target_column_spec].
  // The value depends on the column's DataType:
  // CATEGORY - the predicted (with the above confidence `score`) CATEGORY
  //            value.
  // FLOAT64 - the predicted (with the above confidence `score`) FLOAT64 value.
  // ARRAY(CATEGORY) - CATEGORY value meaning that this value would be in the
  //                   ARRAY in that column (with the above confidence `score`).
  google.protobuf.Value value = 2;

  // Output only. Auxiliary information for each of the model's
  //
  // [input_feature_column_specs'][google.cloud.automl.v1beta1.TablesModelMetadata.input_feature_column_specs]
  // with respect to this particular prediction.
  repeated TablesModelColumnInfo tables_model_column_info = 3;
}

// An information specific to given column and Tables Model, in context
// of the Model and the predictions created by it.
message TablesModelColumnInfo {
  // Output only. The name of the ColumnSpec describing the column. Not
  // populated when this proto is outputted to BigQuery.
  string column_spec_name = 1;

  // Output only. The display name of the column (same as the display_name of
  // its ColumnSpec).
  string column_display_name = 2;

  // Output only.
  //
  // When given as part of a Model:
  // Measurement of how much model predictions correctness on the TEST data
  // depend on values in this column. A value between 0 and 1, higher means
  // higher influence. These values are normalized - for all input feature
  // columns of a given model they add to 1.
  //
  // When given back by Predict or Batch Predict:
  // Measurement of how impactful for the prediction returned for the given row
  // the value in this column was. A value between 0 and 1, higher means larger
  // impact. These values are normalized - for all input feature columns of a
  // single predicted row they add to 1.
  float feature_importance = 3;
}
add third_party proto 6 years ago			`// Copyright 2018 Google LLC.`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`//`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`
			`//`

			`syntax = "proto3";`

			`package google.cloud.automl.v1beta1;`

			`import "google/api/annotations.proto";`
			`import "google/cloud/automl/v1beta1/column_spec.proto";`
			`import "google/cloud/automl/v1beta1/data_stats.proto";`
			`import "google/cloud/automl/v1beta1/ranges.proto";`
			`import "google/protobuf/struct.proto";`
			`import "google/protobuf/timestamp.proto";`

			`option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl";`
			`option java_multiple_files = true;`
			`option java_package = "com.google.cloud.automl.v1beta1";`
			`option php_namespace = "Google\\Cloud\\AutoMl\\V1beta1";`


			`// Metadata for a dataset used for AutoML Tables.`
			`message TablesDatasetMetadata {`
			`// Output only. The table_spec_id of the primary table of this dataset.`
			`string primary_table_spec_id = 1;`

			`// column_spec_id of the primary table's column that should be used as the`
			`// training & prediction target.`
			`// This column must be non-nullable and have one of following data types`
			`// (otherwise model creation will error):`
			`// * CATEGORY`
			`// * ARRAY(CATEGORY)`
			`// * FLOAT64`
			`// Furthermore, if the type is CATEGORY or ARRAY(CATEGORY), then only up to`
			`// 40 unique values may exist in that column across all rows, but for`
			`// ARRAY(CATEGORY) unique values are counted as elements of the ARRAY (i.e.`
			`// following 3 ARRAY-s: [A, B], [A], [B] are counted as having 2 unique`
			`// values).`
			`//`
			`// NOTE: Updates of this field will instantly affect any other users`
			`// concurrently working with the dataset.`
			`string target_column_spec_id = 2;`

			`// column_spec_id of the primary table's column that should be used as the`
			`// weight column, i.e. the higher the value the more important the row will be`
			`// during model training.`
			`// Required type: FLOAT64.`
			`// Allowed values: 0 to 10000, inclusive on both ends; 0 means the row is`
			`// ignored for training.`
			`// If not set all rows are assumed to have equal weight of 1.`
			`// NOTE: Updates of this field will instantly affect any other users`
			`// concurrently working with the dataset.`
			`string weight_column_spec_id = 3;`

			`// column_spec_id of the primary table column which specifies a possible ML`
			`// use of the row, i.e. the column will be used to split the rows into TRAIN,`
			`// VALIDATE and TEST sets.`
			`// Required type: STRING.`
			// This column, if set, must either have all of `TRAIN`, `VALIDATE`, `TEST`
			// among its values, or only have `TEST`, `UNASSIGNED` values. In the latter
			// case the rows with `UNASSIGNED` value will be assigned by AutoML. Note
			`// that if a given ml use distribution makes it impossible to create a "good"`
			`// model, that call will error describing the issue.`
			`// If both this column_spec_id and primary table's time_column_spec_id are not`
			// set, then all rows are treated as `UNASSIGNED`.
			`// NOTE: Updates of this field will instantly affect any other users`
			`// concurrently working with the dataset.`
			`string ml_use_column_spec_id = 4;`

			`// Output only. Correlations between`
			`//`
			`// [target_column][google.cloud.automl.v1beta1.TablesDatasetMetadata.target_column],`
			`// and other columns of the`
			`//`
			`// [primary_table][google.cloud.automl.v1beta1.TablesDatasetMetadata.primary_table_spec_id].`
			`// Only set if the target column is set. Mapping from other column spec id to`
			`// its CorrelationStats with the target column.`
			`// This field may be stale, see the stats_update_time field for`
			`// for the timestamp at which these stats were last updated.`
			`map<string, CorrelationStats> target_column_correlations = 6;`

			`// The most recent timestamp when target_column_correlations field and all`
			`// descendant ColumnSpec.data_stats and ColumnSpec.top_correlated_columns`
			`// fields were last (re-)generated. Any changes that happened to the dataset`
			`// afterwards are not reflected in these fields values. The regeneration`
			`// happens in the background on a best effort basis.`
			`google.protobuf.Timestamp stats_update_time = 7;`
			`}`

			`// Model metadata specific to AutoML Tables.`
			`message TablesModelMetadata {`
			`// Column spec of the dataset's primary table's column the model is`
			`// predicting. Snapshotted when model creation started.`
			`// Only 3 fields are used:`
			`// name - May be set on CreateModel, if it's not then the ColumnSpec`
			`// corresponding to the current target_column_spec_id of the dataset`
			`// the model is trained from is used.`
			`// If neither is set, CreateModel will error.`
			`// display_name - Output only.`
			`// data_type - Output only.`
			`ColumnSpec target_column_spec = 2;`

			`// Column specs of the dataset's primary table's columns, on which`
			`// the model is trained and which are used as the input for predictions.`
			`// The`
			`//`
			`// [target_column][google.cloud.automl.v1beta1.TablesModelMetadata.target_column_spec]`
			`// as well as, according to dataset's state upon model creation,`
			`//`
			`// [weight_column][google.cloud.automl.v1beta1.TablesDatasetMetadata.weight_column_spec_id],`
			`// and`
			`//`
			`// [ml_use_column][google.cloud.automl.v1beta1.TablesDatasetMetadata.ml_use_column_spec_id]`
			`// must never be included here.`
			`// Only 3 fields are used:`
			`// name - May be set on CreateModel, if set only the columns specified are`
			`// used, otherwise all primary table's columns (except the ones listed`
			`// above) are used for the training and prediction input.`
			`// display_name - Output only.`
			`// data_type - Output only.`
			`repeated ColumnSpec input_feature_column_specs = 3;`

			`// Objective function the model is optimizing towards. The training process`
			`// creates a model that maximizes/minimizes the value of the objective`
			`// function over the validation set.`
			`//`
			`// The supported optimization objectives depend on the prediction_type.`
			`// If the field is not set, a default objective function is used.`
			`//`
			`// CLASSIFICATION_BINARY:`
			`// "MAXIMIZE_AU_ROC" (default) - Maximize the area under the receiver`
			`// operating characteristic (ROC) curve.`
			`// "MINIMIZE_LOG_LOSS" - Minimize log loss.`
			`// "MAXIMIZE_AU_PRC" - Maximize the area under the precision-recall curve.`
			`//`
			`// CLASSIFICATION_MULTI_CLASS :`
			`// "MINIMIZE_LOG_LOSS" (default) - Minimize log loss.`
			`//`
			`// CLASSIFICATION_MULTI_LABEL:`
			`// "MINIMIZE_LOG_LOSS" (default) - Minimize log loss.`
			`//`
			`// REGRESSION:`
			`// "MINIMIZE_RMSE" (default) - Minimize root-mean-squared error (RMSE).`
			`// "MINIMIZE_MAE" - Minimize mean-absolute error (MAE).`
			`// "MINIMIZE_RMSLE" - Minimize root-mean-squared log error (RMSLE).`
			`//`
			`// FORECASTING:`
			`// "MINIMIZE_RMSE" (default) - Minimize root-mean-squared error (RMSE).`
			`// "MINIMIZE_MAE" - Minimize mean-absolute error (MAE).`
			`string optimization_objective = 4;`

			`// Output only. Auxiliary information for each of the`
			`// input_feature_column_specs, with respect to this particular model.`
			`repeated TablesModelColumnInfo tables_model_column_info = 5;`

			`// The train budget of creating this model, expressed in milli node hours`
			`// i.e. 1,000 value in this field means 1 node hour.`
			`//`
			`// The training cost of the model will not exceed this budget. The final cost`
			`// will be attempted to be close to the budget, though may end up being (even)`
			`// noticeably smaller - at the backend's discretion. This especially may`
			`// happen when further model training ceases to provide any improvements.`
			`//`
			`// If the budget is set to a value known to be insufficient to train a`
			`// model for the given dataset, the training won't be attempted and`
			`// will error.`
			`int64 train_budget_milli_node_hours = 6;`

			`// Output only. The actual training cost of the model, expressed in milli`
			`// node hours, i.e. 1,000 value in this field means 1 node hour. Guaranteed`
			`// to not exceed the train budget.`
			`int64 train_cost_milli_node_hours = 7;`
			`}`

			`// Contains annotation details specific to Tables.`
			`message TablesAnnotation {`
			`// Output only. A confidence estimate between 0.0 and 1.0, inclusive. A higher`
			`// value means greater confidence in the returned value.`
			`// For`
			`//`
			`// [target_column_spec][google.cloud.automl.v1beta1.TablesModelMetadata.target_column_spec]`
			`// of ARRAY(CATEGORY) data type, this is a confidence that one of the values`
			`// in the ARRAY would be the provided value.`
			`// For`
			`//`
			`// [target_column_spec][google.cloud.automl.v1beta1.TablesModelMetadata.target_column_spec]`
			`// of FLOAT64 data type the score is not populated.`
			`float score = 1;`

			`// Output only. Only populated when`
			`//`
			`// [target_column_spec][google.cloud.automl.v1beta1.TablesModelMetadata.target_column_spec]`
			`// has FLOAT64 data type (i.e. for regression predictions). An interval in`
			`// which the exactly correct target value has 95% chance to be in.`
			`DoubleRange prediction_interval = 4;`

			`// The predicted value of the row's`
			`//`
			`// [target_column][google.cloud.automl.v1beta1.TablesModelMetadata.target_column_spec].`
			`// The value depends on the column's DataType:`
			// CATEGORY - the predicted (with the above confidence `score`) CATEGORY
			`// value.`
			// FLOAT64 - the predicted (with the above confidence `score`) FLOAT64 value.
			`// ARRAY(CATEGORY) - CATEGORY value meaning that this value would be in the`
			// ARRAY in that column (with the above confidence `score`).
			`google.protobuf.Value value = 2;`

			`// Output only. Auxiliary information for each of the model's`
			`//`
			`// [input_feature_column_specs'][google.cloud.automl.v1beta1.TablesModelMetadata.input_feature_column_specs]`
			`// with respect to this particular prediction.`
			`repeated TablesModelColumnInfo tables_model_column_info = 3;`
			`}`

			`// An information specific to given column and Tables Model, in context`
			`// of the Model and the predictions created by it.`
			`message TablesModelColumnInfo {`
			`// Output only. The name of the ColumnSpec describing the column. Not`
			`// populated when this proto is outputted to BigQuery.`
			`string column_spec_name = 1;`

			`// Output only. The display name of the column (same as the display_name of`
			`// its ColumnSpec).`
			`string column_display_name = 2;`

			`// Output only.`
			`//`
			`// When given as part of a Model:`
			`// Measurement of how much model predictions correctness on the TEST data`
			`// depend on values in this column. A value between 0 and 1, higher means`
			`// higher influence. These values are normalized - for all input feature`
			`// columns of a given model they add to 1.`
			`//`
			`// When given back by Predict or Batch Predict:`
			`// Measurement of how impactful for the prediction returned for the given row`
			`// the value in this column was. A value between 0 and 1, higher means larger`
			`// impact. These values are normalized - for all input feature columns of a`
			`// single predicted row they add to 1.`
			`float feature_importance = 3;`
			`}`