From b5e8a06a5f22bf5abab4641bd6cc3939720f3a16 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Thu, 9 Apr 2026 23:13:59 +0000 Subject: [PATCH 1/9] feat(bigframes): Support Expression objects in create_model options This change allows the `options` parameter of `bigframes.bigquery._operations.ml.create_model` to accept BigFrames `Expression` objects. These expressions are compiled to SQL scalar expressions and included in the generated `CREATE MODEL` DDL statement. - Added `bigframes.core.expression.Expression` type support in the `options` dict. - Updated `create_model_ddl` to handle compiling expressions using `expression_compiler`. - Added `test_create_model_expression_option` snapshot test to verify the generated "golden SQL". Co-authored-by: tswast <247555+tswast@users.noreply.github.com> --- .../bigframes/bigframes/bigquery/_operations/ml.py | 7 +++++-- packages/bigframes/bigframes/core/sql/ml.py | 11 +++++++++-- .../create_model_expression_option.sql | 3 +++ packages/bigframes/tests/unit/core/sql/test_ml.py | 14 ++++++++++++++ 4 files changed, 31 insertions(+), 4 deletions(-) create mode 100644 packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql diff --git a/packages/bigframes/bigframes/bigquery/_operations/ml.py b/packages/bigframes/bigframes/bigquery/_operations/ml.py index 04d88c9b6a68..add2dac05fe6 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/ml.py +++ b/packages/bigframes/bigframes/bigquery/_operations/ml.py @@ -25,6 +25,7 @@ import bigframes.dataframe as dataframe import bigframes.ml.base import bigframes.session +import bigframes.core.expression as ex from bigframes.bigquery._operations import utils @@ -50,7 +51,9 @@ def create_model( input_schema: Optional[Mapping[str, str]] = None, output_schema: Optional[Mapping[str, str]] = None, connection_name: Optional[str] = None, - options: Optional[Mapping[str, Union[str, int, float, bool, list]]] = None, + options: Optional[ + Mapping[str, Union[str, int, float, bool, list, "ex.Expression"]] + ] = None, training_data: Optional[Union[pd.DataFrame, dataframe.DataFrame, str]] = None, custom_holiday: Optional[Union[pd.DataFrame, dataframe.DataFrame, str]] = None, session: Optional[bigframes.session.Session] = None, @@ -78,7 +81,7 @@ def create_model( The OUTPUT clause, which specifies the schema of the output data. connection_name (str, optional): The connection to use for the model. - options (Mapping[str, Union[str, int, float, bool, list]], optional): + options (Mapping[str, Union[str, int, float, bool, list, bigframes.core.expression.Expression]], optional): The OPTIONS clause, which specifies the model options. training_data (Union[bigframes.pandas.DataFrame, str], optional): The query or DataFrame to use for training the model. diff --git a/packages/bigframes/bigframes/core/sql/ml.py b/packages/bigframes/bigframes/core/sql/ml.py index 9694e19ad30e..4803bcd15049 100644 --- a/packages/bigframes/bigframes/core/sql/ml.py +++ b/packages/bigframes/bigframes/core/sql/ml.py @@ -16,7 +16,9 @@ from typing import Any, Dict, List, Mapping, Optional, Union +import bigframes.core.expression as ex from bigframes.core.compile.sqlglot import sql as sg_sql +from bigframes.core.compile.sqlglot.expression_compiler import expression_compiler def create_model_ddl( @@ -28,7 +30,9 @@ def create_model_ddl( input_schema: Optional[Mapping[str, str]] = None, output_schema: Optional[Mapping[str, str]] = None, connection_name: Optional[str] = None, - options: Optional[Mapping[str, Union[str, int, float, bool, list]]] = None, + options: Optional[ + Mapping[str, Union[str, int, float, bool, list, "ex.Expression"]] + ] = None, training_data: Optional[str] = None, custom_holiday: Optional[str] = None, ) -> str: @@ -70,7 +74,10 @@ def create_model_ddl( if options: rendered_options = [] for option_name, option_value in options.items(): - if isinstance(option_value, (list, tuple)): + if isinstance(option_value, ex.Expression): + sg_expr = expression_compiler.compile_expression(option_value) + rendered_val = sg_sql.to_sql(sg_expr) + elif isinstance(option_value, (list, tuple)): # Handle list options like model_registry="vertex_ai" # wait, usually options are key=value. # if value is list, it is [val1, val2] diff --git a/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql b/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql new file mode 100644 index 000000000000..eabfb8a5e962 --- /dev/null +++ b/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql @@ -0,0 +1,3 @@ +CREATE MODEL `my_model` +OPTIONS(l2_reg = 0.1, booster_type = 'gbtree') +AS SELECT * FROM t diff --git a/packages/bigframes/tests/unit/core/sql/test_ml.py b/packages/bigframes/tests/unit/core/sql/test_ml.py index bb3b61a949cf..718e4b81ca3b 100644 --- a/packages/bigframes/tests/unit/core/sql/test_ml.py +++ b/packages/bigframes/tests/unit/core/sql/test_ml.py @@ -97,6 +97,20 @@ def test_create_model_list_option(snapshot): snapshot.assert_match(sql, "create_model_list_option.sql") +def test_create_model_expression_option(snapshot): + import bigframes.core.expression as ex + + sql = bigframes.core.sql.ml.create_model_ddl( + model_name="my_model", + options={ + "l2_reg": ex.ScalarConstantExpression(0.1, None), + "booster_type": "gbtree", + }, + training_data="SELECT * FROM t", + ) + snapshot.assert_match(sql, "create_model_expression_option.sql") + + def test_evaluate_model_basic(snapshot): sql = bigframes.core.sql.ml.evaluate( model_name="my_project.my_dataset.my_model", From 7ea8bf595ea68fbbfc578f38eb20856cdec6b48c Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 10 Apr 2026 03:44:10 +0000 Subject: [PATCH 2/9] feat(bigframes): Support Expression objects in create_model options This change allows the `options` parameter of `bigframes.bigquery._operations.ml.create_model` to accept BigFrames `Expression` objects. These expressions are compiled to SQL scalar expressions and included in the generated `CREATE MODEL` DDL statement. - Added `bigframes.core.expression.Expression` type support in the `options` dict. - Updated `create_model_ddl` to handle compiling expressions using `expression_compiler`. - Added `test_create_model_expression_option` snapshot test to verify the generated "golden SQL", using an expression that calls a function on a literal value (e.g. 0.1 * 10). Co-authored-by: tswast <247555+tswast@users.noreply.github.com> --- .../create_model_expression_option.sql | 2 +- packages/bigframes/tests/unit/core/sql/test_ml.py | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql b/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql index eabfb8a5e962..e328aab5fbfd 100644 --- a/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql +++ b/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql @@ -1,3 +1,3 @@ CREATE MODEL `my_model` -OPTIONS(l2_reg = 0.1, booster_type = 'gbtree') +OPTIONS(l2_reg = 0.1 * 10, booster_type = 'gbtree') AS SELECT * FROM t diff --git a/packages/bigframes/tests/unit/core/sql/test_ml.py b/packages/bigframes/tests/unit/core/sql/test_ml.py index 718e4b81ca3b..d46170b771c6 100644 --- a/packages/bigframes/tests/unit/core/sql/test_ml.py +++ b/packages/bigframes/tests/unit/core/sql/test_ml.py @@ -99,11 +99,19 @@ def test_create_model_list_option(snapshot): def test_create_model_expression_option(snapshot): import bigframes.core.expression as ex + import bigframes.operations.numeric_ops as numeric_ops + import bigframes.dtypes as dtypes + + # An expression that calls a function on a literal value + # e.g. 0.1 * 10 + literal_expr = ex.ScalarConstantExpression(0.1, dtypes.FLOAT_DTYPE) + multiplier_expr = ex.ScalarConstantExpression(10, dtypes.INT_DTYPE) + math_expr = ex.OpExpression(op=numeric_ops.mul_op, inputs=(literal_expr, multiplier_expr)) sql = bigframes.core.sql.ml.create_model_ddl( model_name="my_model", options={ - "l2_reg": ex.ScalarConstantExpression(0.1, None), + "l2_reg": math_expr, "booster_type": "gbtree", }, training_data="SELECT * FROM t", From f92c089cdca3dc212c7d3d42afb7f716a3acf5cb Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 10 Apr 2026 04:05:56 +0000 Subject: [PATCH 3/9] feat(bigframes): Support Expression objects in create_model options This change allows the `options` parameter of `bigframes.bigquery._operations.ml.create_model` to accept BigFrames `Expression` objects. These expressions are compiled to SQL scalar expressions and included in the generated `CREATE MODEL` DDL statement. - Added `bigframes.core.expression.Expression` type support in the `options` dict. - Updated `create_model_ddl` to handle compiling expressions using `expression_compiler`. - Added `test_create_model_expression_option` snapshot test to verify the generated "golden SQL", using an expression that calls a function on a literal value (e.g. 0.1 * 10). Co-authored-by: tswast <247555+tswast@users.noreply.github.com> --- packages/bigframes/tests/unit/core/sql/test_ml.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/bigframes/tests/unit/core/sql/test_ml.py b/packages/bigframes/tests/unit/core/sql/test_ml.py index d46170b771c6..fe7630224ddb 100644 --- a/packages/bigframes/tests/unit/core/sql/test_ml.py +++ b/packages/bigframes/tests/unit/core/sql/test_ml.py @@ -106,7 +106,9 @@ def test_create_model_expression_option(snapshot): # e.g. 0.1 * 10 literal_expr = ex.ScalarConstantExpression(0.1, dtypes.FLOAT_DTYPE) multiplier_expr = ex.ScalarConstantExpression(10, dtypes.INT_DTYPE) - math_expr = ex.OpExpression(op=numeric_ops.mul_op, inputs=(literal_expr, multiplier_expr)) + math_expr = ex.OpExpression( + op=numeric_ops.mul_op, inputs=(literal_expr, multiplier_expr) + ) sql = bigframes.core.sql.ml.create_model_ddl( model_name="my_model", From 5d874d0f7b875b881a4bb08f6edd471af0112295 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 10 Apr 2026 04:43:41 +0000 Subject: [PATCH 4/9] feat(bigframes): Support Expression objects in create_model options This change allows the `options` parameter of `bigframes.bigquery._operations.ml.create_model` to accept BigFrames `Expression` objects. These expressions are compiled to SQL scalar expressions and included in the generated `CREATE MODEL` DDL statement. - Added `bigframes.core.expression.Expression` type support in the `options` dict. - Updated `create_model_ddl` to handle compiling expressions using `expression_compiler`. - Added `test_create_model_expression_option` snapshot test to verify the generated "golden SQL", using an expression that calls a function on a literal value (e.g. 0.1 * 10). - Moved test imports to the top level to adhere to PEP 8. Co-authored-by: tswast <247555+tswast@users.noreply.github.com> --- packages/bigframes/tests/unit/core/sql/test_ml.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/packages/bigframes/tests/unit/core/sql/test_ml.py b/packages/bigframes/tests/unit/core/sql/test_ml.py index fe7630224ddb..e7d9f628cc2e 100644 --- a/packages/bigframes/tests/unit/core/sql/test_ml.py +++ b/packages/bigframes/tests/unit/core/sql/test_ml.py @@ -14,7 +14,10 @@ import pytest +import bigframes.core.expression as ex import bigframes.core.sql.ml +import bigframes.dtypes as dtypes +import bigframes.operations.numeric_ops as numeric_ops pytest.importorskip("pytest_snapshot") @@ -98,10 +101,6 @@ def test_create_model_list_option(snapshot): def test_create_model_expression_option(snapshot): - import bigframes.core.expression as ex - import bigframes.operations.numeric_ops as numeric_ops - import bigframes.dtypes as dtypes - # An expression that calls a function on a literal value # e.g. 0.1 * 10 literal_expr = ex.ScalarConstantExpression(0.1, dtypes.FLOAT_DTYPE) From 58aa74e9d4ada8798850a245b4d2292c47bb223e Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 10 Apr 2026 21:10:35 +0000 Subject: [PATCH 5/9] feat(bigframes): Support Expression objects in create_model options This change allows the `options` parameter of `bigframes.bigquery._operations.ml.create_model` to accept BigFrames `col.Expression` objects. These expressions are compiled to SQL scalar expressions and included in the generated `CREATE MODEL` DDL statement. - Added `bigframes.core.col.Expression` type support in the `options` dict. - Updated `create_model_ddl` to handle compiling expressions using `expression_compiler`. - Added `test_create_model_expression_option` snapshot test to verify the generated "golden SQL", using an expression that calls a function on a literal value (e.g. 0.1 * 10). - Moved test imports to the top level to adhere to PEP 8 and ran `ruff format`. Co-authored-by: tswast <247555+tswast@users.noreply.github.com> --- packages/bigframes/bigframes/bigquery/_operations/ml.py | 6 +++--- packages/bigframes/bigframes/core/sql/ml.py | 8 ++++---- packages/bigframes/tests/unit/core/sql/test_ml.py | 5 +++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/packages/bigframes/bigframes/bigquery/_operations/ml.py b/packages/bigframes/bigframes/bigquery/_operations/ml.py index add2dac05fe6..412b49b888f5 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/ml.py +++ b/packages/bigframes/bigframes/bigquery/_operations/ml.py @@ -25,7 +25,7 @@ import bigframes.dataframe as dataframe import bigframes.ml.base import bigframes.session -import bigframes.core.expression as ex +import bigframes.core.col as col from bigframes.bigquery._operations import utils @@ -52,7 +52,7 @@ def create_model( output_schema: Optional[Mapping[str, str]] = None, connection_name: Optional[str] = None, options: Optional[ - Mapping[str, Union[str, int, float, bool, list, "ex.Expression"]] + Mapping[str, Union[str, int, float, bool, list, "col.Expression"]] ] = None, training_data: Optional[Union[pd.DataFrame, dataframe.DataFrame, str]] = None, custom_holiday: Optional[Union[pd.DataFrame, dataframe.DataFrame, str]] = None, @@ -81,7 +81,7 @@ def create_model( The OUTPUT clause, which specifies the schema of the output data. connection_name (str, optional): The connection to use for the model. - options (Mapping[str, Union[str, int, float, bool, list, bigframes.core.expression.Expression]], optional): + options (Mapping[str, Union[str, int, float, bool, list, bigframes.core.col.Expression]], optional): The OPTIONS clause, which specifies the model options. training_data (Union[bigframes.pandas.DataFrame, str], optional): The query or DataFrame to use for training the model. diff --git a/packages/bigframes/bigframes/core/sql/ml.py b/packages/bigframes/bigframes/core/sql/ml.py index 4803bcd15049..8d971e6c3e8d 100644 --- a/packages/bigframes/bigframes/core/sql/ml.py +++ b/packages/bigframes/bigframes/core/sql/ml.py @@ -16,7 +16,7 @@ from typing import Any, Dict, List, Mapping, Optional, Union -import bigframes.core.expression as ex +import bigframes.core.col as col from bigframes.core.compile.sqlglot import sql as sg_sql from bigframes.core.compile.sqlglot.expression_compiler import expression_compiler @@ -31,7 +31,7 @@ def create_model_ddl( output_schema: Optional[Mapping[str, str]] = None, connection_name: Optional[str] = None, options: Optional[ - Mapping[str, Union[str, int, float, bool, list, "ex.Expression"]] + Mapping[str, Union[str, int, float, bool, list, "col.Expression"]] ] = None, training_data: Optional[str] = None, custom_holiday: Optional[str] = None, @@ -74,8 +74,8 @@ def create_model_ddl( if options: rendered_options = [] for option_name, option_value in options.items(): - if isinstance(option_value, ex.Expression): - sg_expr = expression_compiler.compile_expression(option_value) + if isinstance(option_value, col.Expression): + sg_expr = expression_compiler.compile_expression(option_value._value) rendered_val = sg_sql.to_sql(sg_expr) elif isinstance(option_value, (list, tuple)): # Handle list options like model_registry="vertex_ai" diff --git a/packages/bigframes/tests/unit/core/sql/test_ml.py b/packages/bigframes/tests/unit/core/sql/test_ml.py index e7d9f628cc2e..61296638eec2 100644 --- a/packages/bigframes/tests/unit/core/sql/test_ml.py +++ b/packages/bigframes/tests/unit/core/sql/test_ml.py @@ -14,6 +14,7 @@ import pytest +import bigframes.core.col as col import bigframes.core.expression as ex import bigframes.core.sql.ml import bigframes.dtypes as dtypes @@ -105,8 +106,8 @@ def test_create_model_expression_option(snapshot): # e.g. 0.1 * 10 literal_expr = ex.ScalarConstantExpression(0.1, dtypes.FLOAT_DTYPE) multiplier_expr = ex.ScalarConstantExpression(10, dtypes.INT_DTYPE) - math_expr = ex.OpExpression( - op=numeric_ops.mul_op, inputs=(literal_expr, multiplier_expr) + math_expr = col.Expression( + ex.OpExpression(op=numeric_ops.mul_op, inputs=(literal_expr, multiplier_expr)) ) sql = bigframes.core.sql.ml.create_model_ddl( From a95a36a2040d2511510f9bee0eb42309c3a81a17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Mon, 13 Apr 2026 21:41:53 +0000 Subject: [PATCH 6/9] feat: add support for `hparam_range` and `hparam_candidates` to `bigframes.bigquery.create_model` --- .../bigframes/bigframes/bigquery/__init__.py | 10 ++- .../bigquery/_operations/mathematical.py | 68 +++++++++++++++++++ .../bigframes/bigquery/_operations/ml.py | 2 +- .../create_model_expression_option.sql | 3 - .../create_model_hparam_tuning.sql | 3 + .../bigframes/tests/unit/core/sql/test_ml.py | 18 ++--- packages/bigframes/tests/unit/test_col.py | 2 +- 7 files changed, 88 insertions(+), 18 deletions(-) delete mode 100644 packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql create mode 100644 packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_hparam_tuning/create_model_hparam_tuning.sql diff --git a/packages/bigframes/bigframes/bigquery/__init__.py b/packages/bigframes/bigframes/bigquery/__init__.py index f08388704585..a31d7dd83f93 100644 --- a/packages/bigframes/bigframes/bigquery/__init__.py +++ b/packages/bigframes/bigframes/bigquery/__init__.py @@ -87,7 +87,11 @@ to_json, to_json_string, ) -from bigframes.bigquery._operations.mathematical import rand +from bigframes.bigquery._operations.mathematical import ( + hparam_candidates, + hparam_range, + rand, +) from bigframes.bigquery._operations.search import create_vector_index, vector_search from bigframes.bigquery._operations.sql import sql_scalar from bigframes.bigquery._operations.struct import struct @@ -130,6 +134,8 @@ to_json, to_json_string, # mathematical ops + hparam_candidates, + hparam_range, rand, # search ops create_vector_index, @@ -187,6 +193,8 @@ "to_json", "to_json_string", # mathematical ops + "hparam_candidates", + "hparam_range", "rand", # search ops "create_vector_index", diff --git a/packages/bigframes/bigframes/bigquery/_operations/mathematical.py b/packages/bigframes/bigframes/bigquery/_operations/mathematical.py index 2e8351904775..bed9c307c3df 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/mathematical.py +++ b/packages/bigframes/bigframes/bigquery/_operations/mathematical.py @@ -51,3 +51,71 @@ def rand() -> bigframes.core.col.Expression: is_deterministic=False, ) return bigframes.core.col.Expression(bigframes.core.expression.OpExpression(op, ())) + + +def hparam_range(min: float | int, max: float | int) -> bigframes.core.col.Expression: + """ + Defines the minimum and maximum bounds of the search space of continuous + values for a hyperparameter. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> # Specify a range of values for a hyperparameter. + >>> learn_rate = bbq.hparam_range(0.0001, 1.0) + + Args: + min (float or int): + The minimum bound of the search space. + max (float or int): + The maximum bound of the search space. + + Returns: + bigframes.pandas.api.typing.Expression: + An expression that can be used in model options. + """ + min_expr = bigframes.core.expression.const(min) + max_expr = bigframes.core.expression.const(max) + + op = ops.SqlScalarOp( + _output_type=dtypes.FLOAT_DTYPE, + sql_template="HPARAM_RANGE({0}, {1})", + is_deterministic=True, + ) + return bigframes.core.col.Expression( + bigframes.core.expression.OpExpression(op, (min_expr, max_expr)) + ) + + +def hparam_candidates( + candidates: list[float | int | str], +) -> bigframes.core.col.Expression: + """ + Specifies the set of discrete values for the hyperparameter. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> # Specify a set of values for a hyperparameter. + >>> optimizer = bbq.hparam_candidates(['ADAGRAD', 'SGD', 'FTRL']) + + Args: + candidates (list): + The set of discrete values for the hyperparameter. + + Returns: + bigframes.pandas.api.typing.Expression: + An expression that can be used in model options. + """ + candidates_expr = bigframes.core.expression.const(candidates) + + op = ops.SqlScalarOp( + _output_type=dtypes.STRING_DTYPE, + sql_template="HPARAM_CANDIDATES({0})", + is_deterministic=True, + ) + return bigframes.core.col.Expression( + bigframes.core.expression.OpExpression(op, (candidates_expr,)) + ) diff --git a/packages/bigframes/bigframes/bigquery/_operations/ml.py b/packages/bigframes/bigframes/bigquery/_operations/ml.py index 412b49b888f5..c6ef1f8bb7a7 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/ml.py +++ b/packages/bigframes/bigframes/bigquery/_operations/ml.py @@ -20,12 +20,12 @@ import google.cloud.bigquery import pandas as pd +import bigframes.core.col as col import bigframes.core.logging.log_adapter as log_adapter import bigframes.core.sql.ml import bigframes.dataframe as dataframe import bigframes.ml.base import bigframes.session -import bigframes.core.col as col from bigframes.bigquery._operations import utils diff --git a/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql b/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql deleted file mode 100644 index e328aab5fbfd..000000000000 --- a/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_expression_option/create_model_expression_option.sql +++ /dev/null @@ -1,3 +0,0 @@ -CREATE MODEL `my_model` -OPTIONS(l2_reg = 0.1 * 10, booster_type = 'gbtree') -AS SELECT * FROM t diff --git a/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_hparam_tuning/create_model_hparam_tuning.sql b/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_hparam_tuning/create_model_hparam_tuning.sql new file mode 100644 index 000000000000..c7ed32e54fc8 --- /dev/null +++ b/packages/bigframes/tests/unit/core/sql/snapshots/test_ml/test_create_model_hparam_tuning/create_model_hparam_tuning.sql @@ -0,0 +1,3 @@ +CREATE MODEL `my_model` +OPTIONS(model_type = 'LINEAR_REG', learn_rate = HPARAM_RANGE(0.0001, 1.0), optimizer = HPARAM_CANDIDATES(['ADAGRAD', 'SGD'])) +AS SELECT * FROM t diff --git a/packages/bigframes/tests/unit/core/sql/test_ml.py b/packages/bigframes/tests/unit/core/sql/test_ml.py index 61296638eec2..d2f789fc6309 100644 --- a/packages/bigframes/tests/unit/core/sql/test_ml.py +++ b/packages/bigframes/tests/unit/core/sql/test_ml.py @@ -14,6 +14,7 @@ import pytest +import bigframes.bigquery as bbq import bigframes.core.col as col import bigframes.core.expression as ex import bigframes.core.sql.ml @@ -101,24 +102,17 @@ def test_create_model_list_option(snapshot): snapshot.assert_match(sql, "create_model_list_option.sql") -def test_create_model_expression_option(snapshot): - # An expression that calls a function on a literal value - # e.g. 0.1 * 10 - literal_expr = ex.ScalarConstantExpression(0.1, dtypes.FLOAT_DTYPE) - multiplier_expr = ex.ScalarConstantExpression(10, dtypes.INT_DTYPE) - math_expr = col.Expression( - ex.OpExpression(op=numeric_ops.mul_op, inputs=(literal_expr, multiplier_expr)) - ) - +def test_create_model_hparam_tuning(snapshot): sql = bigframes.core.sql.ml.create_model_ddl( model_name="my_model", options={ - "l2_reg": math_expr, - "booster_type": "gbtree", + "model_type": "LINEAR_REG", + "learn_rate": bbq.hparam_range(0.0001, 1.0), + "optimizer": bbq.hparam_candidates(["ADAGRAD", "SGD"]), }, training_data="SELECT * FROM t", ) - snapshot.assert_match(sql, "create_model_expression_option.sql") + snapshot.assert_match(sql, "create_model_hparam_tuning.sql") def test_evaluate_model_basic(snapshot): diff --git a/packages/bigframes/tests/unit/test_col.py b/packages/bigframes/tests/unit/test_col.py index cf9aa5c4b86a..9f5bbca5d9bc 100644 --- a/packages/bigframes/tests/unit/test_col.py +++ b/packages/bigframes/tests/unit/test_col.py @@ -16,13 +16,13 @@ import pathlib from typing import Generator +import numpy as np import pandas as pd import pytest import bigframes import bigframes.pandas as bpd from bigframes.testing.utils import assert_frame_equal, convert_pandas_dtypes -import numpy as np pytest.importorskip("polars") pytest.importorskip("pandas", minversion="3.0.0") From 1ee0f93baa49405695d246d16632ae1c75c651dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 13 Apr 2026 16:48:11 -0500 Subject: [PATCH 7/9] Update packages/bigframes/bigframes/bigquery/_operations/mathematical.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .../bigframes/bigframes/bigquery/_operations/mathematical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/bigframes/bigframes/bigquery/_operations/mathematical.py b/packages/bigframes/bigframes/bigquery/_operations/mathematical.py index bed9c307c3df..2c86ccc9991e 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/mathematical.py +++ b/packages/bigframes/bigframes/bigquery/_operations/mathematical.py @@ -53,7 +53,7 @@ def rand() -> bigframes.core.col.Expression: return bigframes.core.col.Expression(bigframes.core.expression.OpExpression(op, ())) -def hparam_range(min: float | int, max: float | int) -> bigframes.core.col.Expression: +def hparam_range(min: float, max: float) -> bigframes.core.col.Expression: """ Defines the minimum and maximum bounds of the search space of continuous values for a hyperparameter. From 30506fa1609cab52da09fe5cc9f8e08e010a5a8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 13 Apr 2026 16:48:45 -0500 Subject: [PATCH 8/9] Apply suggestions from code review Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .../bigframes/bigframes/bigquery/_operations/mathematical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/bigframes/bigframes/bigquery/_operations/mathematical.py b/packages/bigframes/bigframes/bigquery/_operations/mathematical.py index 2c86ccc9991e..ed2ac120b175 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/mathematical.py +++ b/packages/bigframes/bigframes/bigquery/_operations/mathematical.py @@ -89,7 +89,7 @@ def hparam_range(min: float, max: float) -> bigframes.core.col.Expression: def hparam_candidates( - candidates: list[float | int | str], + candidates: list[float | str], ) -> bigframes.core.col.Expression: """ Specifies the set of discrete values for the hyperparameter. From 32e7040916024ed3cd33dcb275a341e879645adf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Tue, 28 Apr 2026 19:49:36 +0000 Subject: [PATCH 9/9] chore: mypy fixesx --- .../bigframes/bigquery/_operations/mathematical.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/packages/bigframes/bigframes/bigquery/_operations/mathematical.py b/packages/bigframes/bigframes/bigquery/_operations/mathematical.py index ed2ac120b175..476d012bea4d 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/mathematical.py +++ b/packages/bigframes/bigframes/bigquery/_operations/mathematical.py @@ -14,6 +14,8 @@ from __future__ import annotations +from typing import Sequence + import bigframes.core.col import bigframes.core.expression from bigframes import dtypes @@ -89,7 +91,7 @@ def hparam_range(min: float, max: float) -> bigframes.core.col.Expression: def hparam_candidates( - candidates: list[float | str], + candidates: Sequence[float | str], ) -> bigframes.core.col.Expression: """ Specifies the set of discrete values for the hyperparameter. @@ -102,14 +104,14 @@ def hparam_candidates( >>> optimizer = bbq.hparam_candidates(['ADAGRAD', 'SGD', 'FTRL']) Args: - candidates (list): + candidates (Sequence[float | str]): The set of discrete values for the hyperparameter. Returns: bigframes.pandas.api.typing.Expression: An expression that can be used in model options. """ - candidates_expr = bigframes.core.expression.const(candidates) + candidates_expr = bigframes.core.expression.const(tuple(candidates)) op = ops.SqlScalarOp( _output_type=dtypes.STRING_DTYPE,