From 8605075902c7d8afc10bcf1a308d9a76a28bd2a2 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 10 Apr 2026 22:17:15 +0000 Subject: [PATCH 01/26] refactor: deprecate and clean up multimodal blob APIs --- .../bigframes/bigquery/_operations/ai.py | 4 +- .../bigframes/bigframes/blob/_functions.py | 602 ---------- packages/bigframes/bigframes/dataframe.py | 53 +- packages/bigframes/bigframes/ml/llm.py | 4 +- .../bigframes/bigframes/operations/blob.py | 1029 +---------------- .../bigframes/bigframes/operations/strings.py | 29 +- packages/bigframes/bigframes/series.py | 77 +- .../bigframes/bigframes/session/__init__.py | 42 +- .../tests/system/large/blob/test_function.py | 853 -------------- .../sqlglot/expressions/test_blob_ops.py | 12 +- 10 files changed, 130 insertions(+), 2575 deletions(-) delete mode 100644 packages/bigframes/tests/system/large/blob/test_function.py diff --git a/packages/bigframes/bigframes/bigquery/_operations/ai.py b/packages/bigframes/bigframes/bigquery/_operations/ai.py index 7a509d4f95ff..6164c863b391 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/ai.py +++ b/packages/bigframes/bigframes/bigquery/_operations/ai.py @@ -1003,7 +1003,7 @@ def _separate_context_and_series( if isinstance(prompt, series.Series): if prompt.dtype == dtypes.OBJ_REF_DTYPE: # Multi-model support - return [None], [prompt.blob.read_url()] + return [None], [prompt._blob._read_url()] return [None], [prompt] prompt_context: List[str | None] = [] @@ -1040,7 +1040,7 @@ def _convert_series( if result.dtype == dtypes.OBJ_REF_DTYPE: # Support multimodel - return result.blob.read_url() + return result._blob._read_url() return result diff --git a/packages/bigframes/bigframes/blob/_functions.py b/packages/bigframes/bigframes/blob/_functions.py index 5114f60058c1..3869416d1244 100644 --- a/packages/bigframes/bigframes/blob/_functions.py +++ b/packages/bigframes/bigframes/blob/_functions.py @@ -124,605 +124,3 @@ def udf(self): # TODO(b/404605969): remove cleanups when UDF fixes dataset deletion. self._session._function_session._update_temp_artifacts(udf_name, "") return self._session.read_gbq_function(udf_name) - - -def exif_func(src_obj_ref_rt: str, verbose: bool) -> str: - try: - import io - import json - - import requests - from PIL import ExifTags, Image - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() - bts = response.content - - image = Image.open(io.BytesIO(bts)) - exif_data = image.getexif() - exif_dict = {} - - if exif_data: - for tag, value in exif_data.items(): - tag_name = ExifTags.TAGS.get(tag, tag) - # Convert non-serializable types to strings - try: - json.dumps(value) - exif_dict[tag_name] = value - except (TypeError, ValueError): - exif_dict[tag_name] = str(value) - - if verbose: - return json.dumps({"status": "", "content": json.dumps(exif_dict)}) - else: - return json.dumps(exif_dict) - - except Exception as e: - # Return error as JSON with error field - error_result = {"status": f"{type(e).__name__}: {str(e)}", "content": "{}"} - if verbose: - return json.dumps(error_result) - else: - return "{}" - - -exif_func_def = FunctionDef(exif_func, ["pillow", "requests"]) - - -# Blur images. Takes ObjectRefRuntime as JSON string. Outputs ObjectRefRuntime JSON string. -def image_blur_func( - src_obj_ref_rt: str, - dst_obj_ref_rt: str, - ksize_x: int, - ksize_y: int, - ext: str, - verbose: bool, -) -> typing.Optional[str]: - try: - import json - - import cv2 as cv # type: ignore - import numpy as np - import requests - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - ext = ext or ".jpeg" - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - dst_obj_ref_rt_json = json.loads(dst_obj_ref_rt) - - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - dst_url = dst_obj_ref_rt_json["access_urls"]["write_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() # Raise exception for HTTP errors - bts = response.content - - nparr = np.frombuffer(bts, np.uint8) - img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED) - - if img is None: - raise ValueError( - "Failed to decode image - possibly corrupted or unsupported format" - ) - - img_blurred = cv.blur(img, ksize=(ksize_x, ksize_y)) - - success, encoded = cv.imencode(ext, img_blurred) - if not success: - raise ValueError(f"Failed to encode image with extension {ext}") - - bts = encoded.tobytes() - - ext = ext.replace(".", "") - ext_mappings = {"jpg": "jpeg", "tif": "tiff"} - ext = ext_mappings.get(ext, ext) - content_type = "image/" + ext - - put_response = session.put( - url=dst_url, - data=bts, - headers={"Content-Type": content_type}, - timeout=30, - ) - put_response.raise_for_status() - - if verbose: - return json.dumps({"status": "", "content": dst_obj_ref_rt}) - else: - return dst_obj_ref_rt - - except Exception as e: - if verbose: - error_result = { - "status": f"Error: {type(e).__name__}: {str(e)}", - "content": "", - } - return json.dumps(error_result) - else: - return None - - -image_blur_def = FunctionDef(image_blur_func, ["opencv-python", "numpy", "requests"]) - - -def image_blur_to_bytes_func( - src_obj_ref_rt: str, ksize_x: int, ksize_y: int, ext: str, verbose: bool -) -> str: - import base64 - import json - - try: - import cv2 as cv # type: ignore - import numpy as np - import requests - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - ext = ext or ".jpeg" - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() - bts = response.content - - nparr = np.frombuffer(bts, np.uint8) - img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED) - if img is None: - raise ValueError( - "Failed to decode image - possibly corrupted or unsupported format" - ) - img_blurred = cv.blur(img, ksize=(ksize_x, ksize_y)) - success, encoded = cv.imencode(ext, img_blurred) - if not success: - raise ValueError(f"Failed to encode image with extension {ext}") - content = encoded.tobytes() - - encoded_content = base64.b64encode(content).decode("utf-8") - result_dict = {"status": "", "content": encoded_content} - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - except Exception as e: - status = f"Error: {type(e).__name__}: {str(e)}" - encoded_content = base64.b64encode(b"").decode("utf-8") - result_dict = {"status": status, "content": encoded_content} - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - -image_blur_to_bytes_def = FunctionDef( - image_blur_to_bytes_func, ["opencv-python", "numpy", "requests"] -) - - -def image_resize_func( - src_obj_ref_rt: str, - dst_obj_ref_rt: str, - dsize_x: int, - dsize_y: int, - fx: float, - fy: float, - ext: str, - verbose: bool, -) -> typing.Optional[str]: - try: - import json - - import cv2 as cv # type: ignore - import numpy as np - import requests - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - ext = ext or ".jpeg" - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - dst_obj_ref_rt_json = json.loads(dst_obj_ref_rt) - - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - dst_url = dst_obj_ref_rt_json["access_urls"]["write_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() - bts = response.content - - nparr = np.frombuffer(bts, np.uint8) - img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED) - if img is None: - raise ValueError( - "Failed to decode image - possibly corrupted or unsupported format" - ) - img_resized = cv.resize(img, dsize=(dsize_x, dsize_y), fx=fx, fy=fy) - - success, encoded = cv.imencode(ext, img_resized) - if not success: - raise ValueError(f"Failed to encode image with extension {ext}") - bts = encoded.tobytes() - - ext = ext.replace(".", "") - ext_mappings = {"jpg": "jpeg", "tif": "tiff"} - ext = ext_mappings.get(ext, ext) - content_type = "image/" + ext - - put_response = session.put( - url=dst_url, - data=bts, - headers={ - "Content-Type": content_type, - }, - timeout=30, - ) - put_response.raise_for_status() - - if verbose: - return json.dumps({"status": "", "content": dst_obj_ref_rt}) - else: - return dst_obj_ref_rt - - except Exception as e: - if verbose: - error_result = { - "status": f"Error: {type(e).__name__}: {str(e)}", - "content": "", - } - return json.dumps(error_result) - else: - return None - - -image_resize_def = FunctionDef( - image_resize_func, ["opencv-python", "numpy", "requests"] -) - - -def image_resize_to_bytes_func( - src_obj_ref_rt: str, - dsize_x: int, - dsize_y: int, - fx: float, - fy: float, - ext: str, - verbose: bool, -) -> str: - import base64 - import json - - try: - import cv2 as cv # type: ignore - import numpy as np - import requests - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - ext = ext or ".jpeg" - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() - bts = response.content - - nparr = np.frombuffer(bts, np.uint8) - img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED) - if img is None: - raise ValueError( - "Failed to decode image - possibly corrupted or unsupported format" - ) - img_resized = cv.resize(img, dsize=(dsize_x, dsize_y), fx=fx, fy=fy) - success, encoded = cv.imencode(ext, img_resized) - if not success: - raise ValueError(f"Failed to encode image with extension {ext}") - content = encoded.tobytes() - - encoded_content = base64.b64encode(content).decode("utf-8") - result_dict = {"status": "", "content": encoded_content} - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - except Exception as e: - status = f"Error: {type(e).__name__}: {str(e)}" - encoded_content = base64.b64encode(b"").decode("utf-8") - result_dict = {"status": status, "content": encoded_content} - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - -image_resize_to_bytes_def = FunctionDef( - image_resize_to_bytes_func, ["opencv-python", "numpy", "requests"] -) - - -def image_normalize_func( - src_obj_ref_rt: str, - dst_obj_ref_rt: str, - alpha: float, - beta: float, - norm_type: str, - ext: str, - verbose: bool, -) -> typing.Optional[str]: - try: - import json - - import cv2 as cv # type: ignore - import numpy as np - import requests - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - ext = ext or ".jpeg" - - norm_type_mapping = { - "inf": cv.NORM_INF, - "l1": cv.NORM_L1, - "l2": cv.NORM_L2, - "minmax": cv.NORM_MINMAX, - } - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - dst_obj_ref_rt_json = json.loads(dst_obj_ref_rt) - - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - dst_url = dst_obj_ref_rt_json["access_urls"]["write_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() - bts = response.content - - nparr = np.frombuffer(bts, np.uint8) - img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED) - if img is None: - raise ValueError( - "Failed to decode image - possibly corrupted or unsupported format" - ) - img_normalized = cv.normalize( - img, None, alpha=alpha, beta=beta, norm_type=norm_type_mapping[norm_type] - ) - - success, encoded = cv.imencode(ext, img_normalized) - if not success: - raise ValueError(f"Failed to encode image with extension {ext}") - bts = encoded.tobytes() - - ext = ext.replace(".", "") - ext_mappings = {"jpg": "jpeg", "tif": "tiff"} - ext = ext_mappings.get(ext, ext) - content_type = "image/" + ext - - put_response = session.put( - url=dst_url, - data=bts, - headers={ - "Content-Type": content_type, - }, - timeout=30, - ) - put_response.raise_for_status() - - if verbose: - return json.dumps({"status": "", "content": dst_obj_ref_rt}) - else: - return dst_obj_ref_rt - - except Exception as e: - if verbose: - error_result = { - "status": f"Error: {type(e).__name__}: {str(e)}", - "content": "", - } - return json.dumps(error_result) - else: - return None - - -image_normalize_def = FunctionDef( - image_normalize_func, ["opencv-python", "numpy", "requests"] -) - - -def image_normalize_to_bytes_func( - src_obj_ref_rt: str, - alpha: float, - beta: float, - norm_type: str, - ext: str, - verbose: bool, -) -> str: - import base64 - import json - - try: - import cv2 as cv # type: ignore - import numpy as np - import requests - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - ext = ext or ".jpeg" - - norm_type_mapping = { - "inf": cv.NORM_INF, - "l1": cv.NORM_L1, - "l2": cv.NORM_L2, - "minmax": cv.NORM_MINMAX, - } - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - - response = session.get(src_url, timeout=30) - response.raise_for_status() - bts = response.content - - nparr = np.frombuffer(bts, np.uint8) - img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED) - if img is None: - raise ValueError( - "Failed to decode image - possibly corrupted or unsupported format" - ) - img_normalized = cv.normalize( - img, None, alpha=alpha, beta=beta, norm_type=norm_type_mapping[norm_type] - ) - success, encoded = cv.imencode(ext, img_normalized) - if not success: - raise ValueError(f"Failed to encode image with extension {ext}") - content = encoded.tobytes() - - encoded_content = base64.b64encode(content).decode("utf-8") - result_dict = {"status": "", "content": encoded_content} - - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - except Exception as e: - status = f"Error: {type(e).__name__}: {str(e)}" - encoded_content = base64.b64encode(b"").decode("utf-8") - result_dict = {"status": status, "content": encoded_content} - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - -image_normalize_to_bytes_def = FunctionDef( - image_normalize_to_bytes_func, ["opencv-python", "numpy", "requests"] -) - - -# Extracts all text from a PDF url -def pdf_extract_func(src_obj_ref_rt: str, verbose: bool) -> str: - try: - import io - import json - - import requests - from pypdf import PdfReader # type: ignore - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - - response = session.get(src_url, timeout=30, stream=True) - response.raise_for_status() - pdf_bytes = response.content - - pdf_file = io.BytesIO(pdf_bytes) - reader = PdfReader(pdf_file, strict=False) - - all_text = "" - for page in reader.pages: - page_extract_text = page.extract_text() - if page_extract_text: - all_text += page_extract_text - - result_dict = {"status": "", "content": all_text} - - except Exception as e: - result_dict = {"status": str(e), "content": ""} - - if verbose: - return json.dumps(result_dict) - else: - return result_dict["content"] - - -pdf_extract_def = FunctionDef( - pdf_extract_func, ["pypdf>=5.3.1,<6.0.0", "requests", "cryptography==43.0.3"] -) - - -# Extracts text from a PDF url and chunks it simultaneously -def pdf_chunk_func( - src_obj_ref_rt: str, chunk_size: int, overlap_size: int, verbose: bool -) -> str: - try: - import io - import json - - import requests - from pypdf import PdfReader # type: ignore - from requests import adapters - - session = requests.Session() - session.mount("https://", adapters.HTTPAdapter(max_retries=3)) - - src_obj_ref_rt_json = json.loads(src_obj_ref_rt) - src_url = src_obj_ref_rt_json["access_urls"]["read_url"] - - response = session.get(src_url, timeout=30, stream=True) - response.raise_for_status() - pdf_bytes = response.content - - pdf_file = io.BytesIO(pdf_bytes) - reader = PdfReader(pdf_file, strict=False) - # extract and chunk text simultaneously - all_text_chunks = [] - curr_chunk = "" - for page in reader.pages: - page_text = page.extract_text() - if page_text: - curr_chunk += page_text - # split the accumulated text into chunks of a specific size with overlaop - # this loop implements a sliding window approach to create chunks - while len(curr_chunk) >= chunk_size: - split_idx = curr_chunk.rfind(" ", 0, chunk_size) - if split_idx == -1: - split_idx = chunk_size - actual_chunk = curr_chunk[:split_idx] - all_text_chunks.append(actual_chunk) - overlap = curr_chunk[split_idx + 1 : split_idx + 1 + overlap_size] - curr_chunk = overlap + curr_chunk[split_idx + 1 + overlap_size :] - if curr_chunk: - all_text_chunks.append(curr_chunk) - - result_dict = {"status": "", "content": all_text_chunks} - - except Exception as e: - result_dict = {"status": str(e), "content": []} - - if verbose: - return json.dumps(result_dict) - else: - return json.dumps(result_dict["content"]) - - -pdf_chunk_def = FunctionDef( - pdf_chunk_func, ["pypdf>=5.3.1,<6.0.0", "requests", "cryptography==43.0.3"] -) diff --git a/packages/bigframes/bigframes/dataframe.py b/packages/bigframes/bigframes/dataframe.py index b89360c691d3..a98a44448737 100644 --- a/packages/bigframes/bigframes/dataframe.py +++ b/packages/bigframes/bigframes/dataframe.py @@ -833,7 +833,7 @@ def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]: df = self.copy() for col in blob_cols: # TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data. - df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True) + df[col] = df[col]._blob._get_runtime(mode="R", with_metadata=True) return df, blob_cols def _repr_mimebundle_(self, include=None, exclude=None): @@ -1611,7 +1611,8 @@ def to_pandas( # type: ignore[overload-overlap] ordered: bool = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.DataFrame: ... + ) -> pandas.DataFrame: + ... @overload def to_pandas( @@ -1623,7 +1624,8 @@ def to_pandas( ordered: bool = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def to_pandas( self, @@ -1935,7 +1937,8 @@ def drop( columns: Union[blocks.Label, Sequence[blocks.Label]] = None, level: typing.Optional[LevelType] = None, inplace: Literal[False] = False, - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def drop( @@ -1947,7 +1950,8 @@ def drop( columns: Union[blocks.Label, Sequence[blocks.Label]] = None, level: typing.Optional[LevelType] = None, inplace: Literal[True], - ) -> None: ... + ) -> None: + ... def drop( self, @@ -2091,17 +2095,20 @@ def _resolve_levels(self, level: LevelsType) -> typing.Sequence[str]: return self._block.index.resolve_level(level) @overload - def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: ... + def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: + ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[False] - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[True] - ) -> None: ... + ) -> None: + ... def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: bool = False @@ -2118,7 +2125,8 @@ def rename( def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def rename_axis( @@ -2127,7 +2135,8 @@ def rename_axis( *, inplace: Literal[False], **kwargs, - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def rename_axis( @@ -2136,7 +2145,8 @@ def rename_axis( *, inplace: Literal[True], **kwargs, - ) -> None: ... + ) -> None: + ... def rename_axis( self, @@ -2332,7 +2342,8 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def reset_index( @@ -2344,7 +2355,8 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> None: ... + ) -> None: + ... @overload def reset_index( @@ -2356,7 +2368,8 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> Optional[DataFrame]: ... + ) -> Optional[DataFrame]: + ... def reset_index( self, @@ -2419,7 +2432,8 @@ def sort_index( ascending: bool = ..., inplace: Literal[False] = ..., na_position: Literal["first", "last"] = ..., - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def sort_index( @@ -2428,7 +2442,8 @@ def sort_index( ascending: bool = ..., inplace: Literal[True] = ..., na_position: Literal["first", "last"] = ..., - ) -> None: ... + ) -> None: + ... def sort_index( self, @@ -2474,7 +2489,8 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> DataFrame: ... + ) -> DataFrame: + ... @overload def sort_values( @@ -2485,7 +2501,8 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> None: ... + ) -> None: + ... def sort_values( self, diff --git a/packages/bigframes/bigframes/ml/llm.py b/packages/bigframes/bigframes/ml/llm.py index bcf59d591f8e..d9e228c90c9f 100644 --- a/packages/bigframes/bigframes/ml/llm.py +++ b/packages/bigframes/bigframes/ml/llm.py @@ -397,7 +397,7 @@ def predict( # TODO(garrettwu): remove transform to ObjRefRuntime when BQML supports ObjRef as input if X["content"].dtype == dtypes.OBJ_REF_DTYPE: - X["content"] = X["content"].blob._get_runtime("R", with_metadata=True) + X["content"] = X["content"]._blob._get_runtime("R", with_metadata=True) options: dict = {} @@ -731,7 +731,7 @@ def predict( isinstance(item, bigframes.series.Series) and item.dtype == dtypes.OBJ_REF_DTYPE ): - item = item.blob._get_runtime("R", with_metadata=True) + item = item._blob._get_runtime("R", with_metadata=True) df_prompt[label] = item df_prompt = df_prompt.drop(columns="bigframes_placeholder_col") diff --git a/packages/bigframes/bigframes/operations/blob.py b/packages/bigframes/bigframes/operations/blob.py index b9a33af2d1ed..d29d1a1202c0 100644 --- a/packages/bigframes/bigframes/operations/blob.py +++ b/packages/bigframes/bigframes/operations/blob.py @@ -33,134 +33,17 @@ @log_adapter.class_logger -class BlobAccessor: +class _BlobAccessor: """ - Blob functions for Series and Index. - - .. note:: - BigFrames Blob is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the - Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" - and might have limited support. For more information, see the launch stage descriptions - (https://cloud.google.com/products#product-launch-stages). + Internal blob functions for Series and Index. """ def __init__(self, data: bigframes.series.Series): self._data = data - def uri(self) -> bigframes.series.Series: - """URIs of the Blob. - - Returns: - bigframes.series.Series: URIs as string.""" - s = bigframes.series.Series(self._data._block) - - return s.struct.field("uri") - - def authorizer(self) -> bigframes.series.Series: - """Authorizers of the Blob. - - Returns: - bigframes.series.Series: Autorithers(connection) as string.""" - s = bigframes.series.Series(self._data._block) - - return s.struct.field("authorizer") - - def version(self) -> bigframes.series.Series: - """Versions of the Blob. - - Returns: - bigframes.series.Series: Version as string.""" - # version must be retrieved after fetching metadata - return self._data._apply_unary_op(ops.obj_fetch_metadata_op).struct.field( - "version" - ) - - def metadata(self) -> bigframes.series.Series: - """Retrieve the metadata of the Blob. - - Returns: - bigframes.series.Series: JSON metadata of the Blob. Contains fields: content_type, md5_hash, size and updated(time). - """ - series_to_check = bigframes.series.Series(self._data._block) - # Check if it's a struct series from a verbose operation - if dtypes.is_struct_like(series_to_check.dtype): - pyarrow_dtype = series_to_check.dtype.pyarrow_dtype - if "content" in [field.name for field in pyarrow_dtype]: - content_field_type = pyarrow_dtype.field("content").type - content_bf_type = dtypes.arrow_dtype_to_bigframes_dtype( - content_field_type - ) - if content_bf_type == dtypes.OBJ_REF_DTYPE: - series_to_check = series_to_check.struct.field("content") - details_json = series_to_check._apply_unary_op( - ops.obj_fetch_metadata_op - ).struct.field("details") - import bigframes.bigquery as bbq - - return bbq.json_extract(details_json, "$.gcs_metadata").rename("metadata") - - def content_type(self) -> bigframes.series.Series: - """Retrieve the content type of the Blob. - - Returns: - bigframes.series.Series: string of the content type.""" - return ( - self.metadata() - ._apply_unary_op(ops.JSONValue(json_path="$.content_type")) - .rename("content_type") - ) - - def md5_hash(self) -> bigframes.series.Series: - """Retrieve the md5 hash of the Blob. - - Returns: - bigframes.series.Series: string of the md5 hash.""" - return ( - self.metadata() - ._apply_unary_op(ops.JSONValue(json_path="$.md5_hash")) - .rename("md5_hash") - ) - - def size(self) -> bigframes.series.Series: - """Retrieve the file size of the Blob. - - Returns: - bigframes.series.Series: file size in bytes.""" - return ( - self.metadata() - ._apply_unary_op(ops.JSONValue(json_path="$.size")) - .rename("size") - .astype("Int64") - ) - - def updated(self) -> bigframes.series.Series: - """Retrieve the updated time of the Blob. - - Returns: - bigframes.series.Series: updated time as UTC datetime.""" - import bigframes.pandas as bpd - - updated = ( - self.metadata() - ._apply_unary_op(ops.JSONValue(json_path="$.updated")) - .rename("updated") - .astype("Int64") - ) - - return bpd.to_datetime(updated, unit="us", utc=True) - def _get_runtime( self, mode: str, with_metadata: bool = False ) -> bigframes.series.Series: - """Retrieve the ObjectRefRuntime as JSON. - - Args: - mode (str): mode for the URLs, "R" for read, "RW" for read & write. - metadata (bool, default False): whether to fetch the metadata in the ObjectRefRuntime. - - Returns: - bigframes.series.Series: ObjectRefRuntime JSON. - """ s = ( self._data._apply_unary_op(ops.obj_fetch_metadata_op) if with_metadata @@ -169,913 +52,7 @@ def _get_runtime( return s._apply_unary_op(ops.ObjGetAccessUrl(mode=mode)) - def _df_apply_udf( - self, df: bigframes.dataframe.DataFrame, udf - ) -> bigframes.series.Series: - # Catch and rethrow function axis=1 warning to be more user-friendly. - with warnings.catch_warnings(record=True) as catched_warnings: - s = df.apply(udf, axis=1) - for w in catched_warnings: - if isinstance(w.message, bfe.FunctionAxisOnePreviewWarning): - warnings.warn( - "Blob Functions use bigframes DataFrame Managed function with axis=1 senario, which is a preview feature.", - category=w.category, - stacklevel=2, - ) - else: - warnings.warn_explicit( - message=w.message, - category=w.category, - filename=w.filename, - lineno=w.lineno, - source=w.source, - ) - - return s - - def _apply_udf_or_raise_error( - self, df: bigframes.dataframe.DataFrame, udf, operation_name: str - ) -> bigframes.series.Series: - """Helper to apply UDF with consistent error handling.""" - try: - res = self._df_apply_udf(df, udf) - except Exception as e: - raise RuntimeError(f"{operation_name} UDF execution failed: {e}") from e - - if res is None: - raise RuntimeError(f"{operation_name} returned None result") - - return res - - def read_url(self) -> bigframes.series.Series: - """Retrieve the read URL of the Blob. - - Returns: - bigframes.series.Series: Read only URLs.""" + def _read_url(self) -> bigframes.series.Series: return self._get_runtime(mode="R")._apply_unary_op( ops.JSONValue(json_path="$.access_urls.read_url") ) - - def write_url(self) -> bigframes.series.Series: - """Retrieve the write URL of the Blob. - - Returns: - bigframes.series.Series: Writable URLs.""" - return self._get_runtime(mode="RW")._apply_unary_op( - ops.JSONValue(json_path="$.access_urls.write_url") - ) - - def display( - self, - n: int = 3, - *, - content_type: str = "", - width: Optional[int] = None, - height: Optional[int] = None, - ): - """Display the blob content in the IPython Notebook environment. Only works for image type now. - - Args: - n (int, default 3): number of sample blob objects to display. - content_type (str, default ""): content type of the blob. If unset, use the blob metadata of the storage. Possible values are "image", "audio" and "video". - width (int or None, default None): width in pixels that the image/video are constrained to. If unset, use the global setting in bigframes.options.display.blob_display_width, otherwise image/video's original size or ratio is used. No-op for other content types. - height (int or None, default None): height in pixels that the image/video are constrained to. If unset, use the global setting in bigframes.options.display.blob_display_height, otherwise image/video's original size or ratio is used. No-op for other content types. - """ - import IPython.display as ipy_display - - width = width or bigframes.options.display.blob_display_width - height = height or bigframes.options.display.blob_display_height - - # col name doesn't matter here. Rename to avoid column name conflicts - df = bigframes.series.Series(self._data._block).rename("blob_col").to_frame() - - df["read_url"] = df["blob_col"].blob.read_url() - - if content_type: - df["content_type"] = content_type - else: - df["content_type"] = df["blob_col"].blob.content_type() - - pandas_df, _, query_job = df._block.retrieve_repr_request_results(n) - df._set_internal_query_job(query_job) - - def display_single_url( - read_url: Union[str, pd._libs.missing.NAType], - content_type: Union[str, pd._libs.missing.NAType], - ): - if pd.isna(read_url): - ipy_display.display("") - return - - if pd.isna(content_type): # display as raw data or error - response = requests.get(read_url) - ipy_display.display(response.content) - return - - content_type = cast(str, content_type).casefold() - - if content_type.startswith("image"): - ipy_display.display( - ipy_display.Image(url=read_url, width=width, height=height) - ) - elif content_type.startswith("audio"): - # using url somehow doesn't work with audios - response = requests.get(read_url) - ipy_display.display(ipy_display.Audio(response.content)) - elif content_type.startswith("video"): - ipy_display.display( - ipy_display.Video(read_url, width=width, height=height) - ) - else: # display as raw data - response = requests.get(read_url) - ipy_display.display(response.content) - - for _, row in pandas_df.iterrows(): - display_single_url(row["read_url"], row["content_type"]) - - @property - def session(self): - return self._data._block.session - - def _resolve_connection(self, connection: Optional[str] = None) -> str: - """Resovle the BigQuery connection. - - Args: - connection (str or None, default None): BQ connection used for - function internet transactions, and the output blob if "dst" is - str. If None, uses default connection of the session. - - Returns: - str: the resolved BigQuery connection string in the format: - "project.location.connection_id". - - Raises: - ValueError: If the connection cannot be resolved to a valid string. - """ - connection = connection or self._data._block.session.bq_connection - return clients.get_canonical_bq_connection_id( - connection, - default_project=self._data._block.session._project, - default_location=self._data._block.session._location, - ) - - def get_runtime_json_str( - self, mode: str = "R", *, with_metadata: bool = False - ) -> bigframes.series.Series: - """Get the runtime (contains signed URL to access gcs data) and apply the ToJSONSTring transformation. - - Args: - mode(str or str, default "R"): the mode for accessing the runtime. - Default to "R". Possible values are "R" (read-only) and - "RW" (read-write) - with_metadata (bool, default False): whether to include metadata - in the JSON string. Default to False. - - Returns: - str: the runtime object in the JSON string. - """ - runtime = self._get_runtime(mode=mode, with_metadata=with_metadata) - return runtime._apply_unary_op(ops.ToJSONString()) - - def exif( - self, - *, - engine: Literal[None, "pillow"] = None, - connection: Optional[str] = None, - max_batching_rows: int = 8192, - container_cpu: Union[float, int] = 0.33, - container_memory: str = "512Mi", - verbose: bool = False, - ) -> bigframes.series.Series: - """Extract EXIF data. Now only support image types. - - Args: - engine ('pillow' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified. - connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session. - max_batching_rows (int, default 8,192): Max number of rows per batch send to cloud run to execute the function. - container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers. - container_memory (str, default "512Mi"): container memory size. String of the format . Possible values are from 512Mi to 32Gi. - verbose (bool, default False): If True, returns a struct with status and content fields. If False, returns only the content. - - Returns: - bigframes.series.Series: JSON series of key-value pairs if verbose=False, or struct with status and content if verbose=True. - - Raises: - ValueError: If engine is not 'pillow'. - RuntimeError: If EXIF extraction fails or returns invalid structure. - """ - if engine is None or engine.casefold() != "pillow": - raise ValueError("Must specify the engine, supported value is 'pillow'.") - - import bigframes.bigquery as bbq - import bigframes.blob._functions as blob_func - import bigframes.pandas as bpd - - connection = self._resolve_connection(connection) - df = self.get_runtime_json_str(mode="R").to_frame() - df["verbose"] = verbose - - exif_udf = blob_func.TransformFunction( - blob_func.exif_func_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - res = self._apply_udf_or_raise_error(df, exif_udf, "EXIF extraction") - - if verbose: - try: - exif_content_series = bbq.parse_json( - res._apply_unary_op(ops.JSONValue(json_path="$.content")) - ).rename("exif_content") - exif_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - except Exception as e: - raise RuntimeError(f"Failed to parse EXIF JSON result: {e}") from e - results_df = bpd.DataFrame( - {"status": exif_status_series, "content": exif_content_series} - ) - results_struct = bbq.struct(results_df).rename("exif_results") - return results_struct - else: - try: - return bbq.parse_json(res) - except Exception as e: - raise RuntimeError(f"Failed to parse EXIF JSON result: {e}") from e - - def image_blur( - self, - ksize: tuple[int, int], - *, - engine: Literal[None, "opencv"] = None, - dst: Optional[Union[str, bigframes.series.Series]] = None, - connection: Optional[str] = None, - max_batching_rows: int = 8192, - container_cpu: Union[float, int] = 0.33, - container_memory: str = "512Mi", - verbose: bool = False, - ) -> bigframes.series.Series: - """Blurs images. - - Args: - ksize (tuple(int, int)): Kernel size. - engine ('opencv' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified. - dst (str or bigframes.series.Series or None, default None): Output destination. Can be one of: - str: GCS folder str. The output filenames are the same as the input files. - blob Series: The output file paths are determined by the uris of the blob Series. - None: Output to BQ as bytes. - Encoding is determined by the extension of the output filenames (or input filenames if doesn't have output filenames). If filename doesn't have an extension, use ".jpeg" for encoding. - connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session. - max_batching_rows (int, default 8,192): Max number of rows per batch send to cloud run to execute the function. - container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers. - container_memory (str, default "512Mi"): container memory size. String of the format . Possible values are from 512Mi to 32Gi. - verbose (bool, default False): If True, returns a struct with status and content fields. If False, returns only the content. - - Returns: - bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ. If verbose=True, returns struct with status and content. - - Raises: - ValueError: If engine is not 'opencv' or parameters are invalid. - RuntimeError: If image blur operation fails. - """ - if engine is None or engine.casefold() != "opencv": - raise ValueError("Must specify the engine, supported value is 'opencv'.") - - import bigframes.bigquery as bbq - import bigframes.blob._functions as blob_func - import bigframes.pandas as bpd - - connection = self._resolve_connection(connection) - df = self.get_runtime_json_str(mode="R").to_frame() - - if dst is None: - ext = self.uri().str.extract(FILE_EXT_REGEX) - - image_blur_udf = blob_func.TransformFunction( - blob_func.image_blur_to_bytes_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - df["ksize_x"], df["ksize_y"] = ksize - df["ext"] = ext # type: ignore - df["verbose"] = verbose - res = self._apply_udf_or_raise_error(df, image_blur_udf, "Image blur") - - if verbose: - blurred_content_b64_series = res._apply_unary_op( - ops.JSONValue(json_path="$.content") - ) - blurred_content_series = bbq.sql_scalar( - "FROM_BASE64({0})", columns=[blurred_content_b64_series] - ) - blurred_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - results_df = bpd.DataFrame( - {"status": blurred_status_series, "content": blurred_content_series} - ) - results_struct = bbq.struct(results_df).rename("blurred_results") - return results_struct - else: - blurred_bytes = bbq.sql_scalar( - "FROM_BASE64({0})", columns=[res] - ).rename("blurred_bytes") - return blurred_bytes - - if isinstance(dst, str): - dst = os.path.join(dst, "") - # Replace src folder with dst folder, keep the file names. - dst_uri = self.uri().str.replace(FILE_FOLDER_REGEX, rf"{dst}\1", regex=True) - dst = cast( - bigframes.series.Series, dst_uri.str.to_blob(connection=connection) - ) - - ext = dst.blob.uri().str.extract(FILE_EXT_REGEX) - - image_blur_udf = blob_func.TransformFunction( - blob_func.image_blur_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - dst_rt = dst.blob.get_runtime_json_str(mode="RW") - - df = df.join(dst_rt, how="outer") - df["ksize_x"], df["ksize_y"] = ksize - df["ext"] = ext # type: ignore - df["verbose"] = verbose - - res = self._apply_udf_or_raise_error(df, image_blur_udf, "Image blur") - res.cache() # to execute the udf - - if verbose: - blurred_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - results_df = bpd.DataFrame( - { - "status": blurred_status_series, - "content": dst.blob.uri().str.to_blob( - connection=self._resolve_connection(connection) - ), - } - ) - results_struct = bbq.struct(results_df).rename("blurred_results") - return results_struct - else: - return dst - - def image_resize( - self, - dsize: tuple[int, int] = (0, 0), - *, - engine: Literal[None, "opencv"] = None, - fx: float = 0.0, - fy: float = 0.0, - dst: Optional[Union[str, bigframes.series.Series]] = None, - connection: Optional[str] = None, - max_batching_rows: int = 8192, - container_cpu: Union[float, int] = 0.33, - container_memory: str = "512Mi", - verbose: bool = False, - ): - """Resize images. - - Args: - dsize (tuple(int, int), default (0, 0)): Destination size. If set to 0, fx and fy parameters determine the size. - engine ('opencv' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified. - fx (float, default 0.0): scale factor along the horizontal axis. If set to 0.0, dsize parameter determines the output size. - fy (float, defalut 0.0): scale factor along the vertical axis. If set to 0.0, dsize parameter determines the output size. - dst (str or bigframes.series.Series or None, default None): Output destination. Can be one of: - str: GCS folder str. The output filenames are the same as the input files. - blob Series: The output file paths are determined by the uris of the blob Series. - None: Output to BQ as bytes. - Encoding is determined by the extension of the output filenames (or input filenames if doesn't have output filenames). If filename doesn't have an extension, use ".jpeg" for encoding. - connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session. - max_batching_rows (int, default 8,192): Max number of rows per batch send to cloud run to execute the function. - container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers. - container_memory (str, default "512Mi"): container memory size. String of the format . Possible values are from 512Mi to 32Gi. - verbose (bool, default False): If True, returns a struct with status and content fields. If False, returns only the content. - - Returns: - bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ. If verbose=True, returns struct with status and content. - - Raises: - ValueError: If engine is not 'opencv' or parameters are invalid. - RuntimeError: If image resize operation fails. - """ - if engine is None or engine.casefold() != "opencv": - raise ValueError("Must specify the engine, supported value is 'opencv'.") - - dsize_set = dsize[0] > 0 and dsize[1] > 0 - fsize_set = fx > 0.0 and fy > 0.0 - if not dsize_set ^ fsize_set: - raise ValueError( - "Only one of dsize or (fx, fy) parameters must be set. And the set values must be positive. " - ) - - import bigframes.bigquery as bbq - import bigframes.blob._functions as blob_func - import bigframes.pandas as bpd - - connection = self._resolve_connection(connection) - df = self.get_runtime_json_str(mode="R").to_frame() - - if dst is None: - ext = self.uri().str.extract(FILE_EXT_REGEX) - - image_resize_udf = blob_func.TransformFunction( - blob_func.image_resize_to_bytes_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - df["dsize_x"], df["dsize_y"] = dsize - df["fx"], df["fy"] = fx, fy - df["ext"] = ext # type: ignore - df["verbose"] = verbose - res = self._apply_udf_or_raise_error(df, image_resize_udf, "Image resize") - - if verbose: - resized_content_b64_series = res._apply_unary_op( - ops.JSONValue(json_path="$.content") - ) - resized_content_series = bbq.sql_scalar( - "FROM_BASE64({0})", columns=[resized_content_b64_series] - ) - - resized_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - results_df = bpd.DataFrame( - {"status": resized_status_series, "content": resized_content_series} - ) - results_struct = bbq.struct(results_df).rename("resized_results") - return results_struct - else: - resized_bytes = bbq.sql_scalar( - "FROM_BASE64({0})", columns=[res] - ).rename("resized_bytes") - return resized_bytes - - if isinstance(dst, str): - dst = os.path.join(dst, "") - # Replace src folder with dst folder, keep the file names. - dst_uri = self.uri().str.replace(FILE_FOLDER_REGEX, rf"{dst}\1", regex=True) - dst = cast( - bigframes.series.Series, dst_uri.str.to_blob(connection=connection) - ) - - ext = dst.blob.uri().str.extract(FILE_EXT_REGEX) - - image_resize_udf = blob_func.TransformFunction( - blob_func.image_resize_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - dst_rt = dst.blob.get_runtime_json_str(mode="RW") - - df = df.join(dst_rt, how="outer") - df["dsize_x"], df["dsize_y"] = dsize - df["fx"], df["fy"] = fx, fy - df["ext"] = ext # type: ignore - df["verbose"] = verbose - - res = self._apply_udf_or_raise_error(df, image_resize_udf, "Image resize") - res.cache() # to execute the udf - - if verbose: - resized_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - results_df = bpd.DataFrame( - { - "status": resized_status_series, - "content": dst.blob.uri().str.to_blob( - connection=self._resolve_connection(connection) - ), - } - ) - results_struct = bbq.struct(results_df).rename("resized_results") - return results_struct - else: - return dst - - def image_normalize( - self, - *, - engine: Literal[None, "opencv"] = None, - alpha: float = 1.0, - beta: float = 0.0, - norm_type: str = "l2", - dst: Optional[Union[str, bigframes.series.Series]] = None, - connection: Optional[str] = None, - max_batching_rows: int = 8192, - container_cpu: Union[float, int] = 0.33, - container_memory: str = "512Mi", - verbose: bool = False, - ) -> bigframes.series.Series: - """Normalize images. - - Args: - engine ('opencv' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified. - alpha (float, default 1.0): Norm value to normalize to or the lower range boundary in case of the range normalization. - beta (float, default 0.0): Upper range boundary in case of the range normalization; it is not used for the norm normalization. - norm_type (str, default "l2"): Normalization type. Accepted values are "inf", "l1", "l2" and "minmax". - dst (str or bigframes.series.Series or None, default None): Output destination. Can be one of: - str: GCS folder str. The output filenames are the same as the input files. - blob Series: The output file paths are determined by the uris of the blob Series. - None: Output to BQ as bytes. - Encoding is determined by the extension of the output filenames (or input filenames if doesn't have output filenames). If filename doesn't have an extension, use ".jpeg" for encoding. - connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session. - max_batching_rows (int, default 8,192): Max number of rows per batch send to cloud run to execute the function. - container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers. - container_memory (str, default "512Mi"): container memory size. String of the format . Possible values are from 512Mi to 32Gi. - verbose (bool, default False): If True, returns a struct with status and content fields. If False, returns only the content. - - Returns: - bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ. If verbose=True, returns struct with status and content. - - Raises: - ValueError: If engine is not 'opencv' or parameters are invalid. - RuntimeError: If image normalize operation fails. - """ - if engine is None or engine.casefold() != "opencv": - raise ValueError("Must specify the engine, supported value is 'opencv'.") - - import bigframes.bigquery as bbq - import bigframes.blob._functions as blob_func - import bigframes.pandas as bpd - - connection = self._resolve_connection(connection) - df = self.get_runtime_json_str(mode="R").to_frame() - - if dst is None: - ext = self.uri().str.extract(FILE_EXT_REGEX) - - image_normalize_udf = blob_func.TransformFunction( - blob_func.image_normalize_to_bytes_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - df["alpha"] = alpha - df["beta"] = beta - df["norm_type"] = norm_type - df["ext"] = ext # type: ignore - df["verbose"] = verbose - res = self._apply_udf_or_raise_error( - df, image_normalize_udf, "Image normalize" - ) - - if verbose: - normalized_content_b64_series = res._apply_unary_op( - ops.JSONValue(json_path="$.content") - ) - normalized_bytes = bbq.sql_scalar( - "FROM_BASE64({0})", columns=[normalized_content_b64_series] - ) - normalized_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - results_df = bpd.DataFrame( - {"status": normalized_status_series, "content": normalized_bytes} - ) - results_struct = bbq.struct(results_df).rename("normalized_results") - return results_struct - else: - normalized_bytes = bbq.sql_scalar( - "FROM_BASE64({0})", columns=[res] - ).rename("normalized_bytes") - return normalized_bytes - - if isinstance(dst, str): - dst = os.path.join(dst, "") - # Replace src folder with dst folder, keep the file names. - dst_uri = self.uri().str.replace(FILE_FOLDER_REGEX, rf"{dst}\1", regex=True) - dst = cast( - bigframes.series.Series, dst_uri.str.to_blob(connection=connection) - ) - - ext = dst.blob.uri().str.extract(FILE_EXT_REGEX) - - image_normalize_udf = blob_func.TransformFunction( - blob_func.image_normalize_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - dst_rt = dst.blob.get_runtime_json_str(mode="RW") - - df = df.join(dst_rt, how="outer") - df["alpha"] = alpha - df["beta"] = beta - df["norm_type"] = norm_type - df["ext"] = ext # type: ignore - df["verbose"] = verbose - - res = self._apply_udf_or_raise_error(df, image_normalize_udf, "Image normalize") - res.cache() # to execute the udf - - if verbose: - normalized_status_series = res._apply_unary_op( - ops.JSONValue(json_path="$.status") - ) - results_df = bpd.DataFrame( - { - "status": normalized_status_series, - "content": dst.blob.uri().str.to_blob( - connection=self._resolve_connection(connection) - ), - } - ) - results_struct = bbq.struct(results_df).rename("normalized_results") - return results_struct - else: - return dst - - def pdf_extract( - self, - *, - engine: Literal[None, "pypdf"] = None, - connection: Optional[str] = None, - max_batching_rows: int = 1, - container_cpu: Union[float, int] = 2, - container_memory: str = "1Gi", - verbose: bool = False, - ) -> bigframes.series.Series: - """Extracts text from PDF URLs and saves the text as string. - - Args: - engine ('pypdf' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified. - connection (str or None, default None): BQ connection used for - function internet transactions, and the output blob if "dst" - is str. If None, uses default connection of the session. - max_batching_rows (int, default 1): Max number of rows per batch - send to cloud run to execute the function. - container_cpu (int or float, default 2): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers. - container_memory (str, default "1Gi"): container memory size. String of the format . Possible values are from 512Mi to 32Gi. - verbose (bool, default "False"): controls the verbosity of the output. - When set to True, both error messages and the extracted content - are displayed. Conversely, when set to False, only the extracted - content is presented, suppressing error messages. - - Returns: - bigframes.series.Series: str or struct[str, str], - depend on the "verbose" parameter. - Contains the extracted text from the PDF file. - Includes error messages if verbosity is enabled. - - Raises: - ValueError: If engine is not 'pypdf'. - RuntimeError: If PDF extraction fails or returns invalid structure. - """ - if engine is None or engine.casefold() != "pypdf": - raise ValueError("Must specify the engine, supported value is 'pypdf'.") - - import bigframes.bigquery as bbq - import bigframes.blob._functions as blob_func - import bigframes.pandas as bpd - - connection = self._resolve_connection(connection) - - pdf_extract_udf = blob_func.TransformFunction( - blob_func.pdf_extract_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - df = self.get_runtime_json_str(mode="R").to_frame() - df["verbose"] = verbose - - res = self._apply_udf_or_raise_error(df, pdf_extract_udf, "PDF extraction") - - if verbose: - # Extract content with error handling - try: - content_series = res._apply_unary_op( - ops.JSONValue(json_path="$.content") - ) - except Exception as e: - raise RuntimeError( - f"Failed to extract content field from PDF result: {e}" - ) from e - try: - status_series = res._apply_unary_op(ops.JSONValue(json_path="$.status")) - except Exception as e: - raise RuntimeError( - f"Failed to extract status field from PDF result: {e}" - ) from e - - res_df = bpd.DataFrame({"status": status_series, "content": content_series}) - struct_series = bbq.struct(res_df).rename("extracted_results") - return struct_series - else: - return res.rename("extracted_content") - - def pdf_chunk( - self, - *, - engine: Literal[None, "pypdf"] = None, - connection: Optional[str] = None, - chunk_size: int = 2000, - overlap_size: int = 200, - max_batching_rows: int = 1, - container_cpu: Union[float, int] = 2, - container_memory: str = "1Gi", - verbose: bool = False, - ) -> bigframes.series.Series: - """Extracts and chunks text from PDF URLs and saves the text as - arrays of strings. - - Args: - engine ('pypdf' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified. - connection (str or None, default None): BQ connection used for - function internet transactions, and the output blob if "dst" - is str. If None, uses default connection of the session. - chunk_size (int, default 2000): the desired size of each text chunk - (number of characters). - overlap_size (int, default 200): the number of overlapping characters - between consective chunks. The helps to ensure context is - perserved across chunk boundaries. - max_batching_rows (int, default 1): Max number of rows per batch - send to cloud run to execute the function. - container_cpu (int or float, default 2): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers. - container_memory (str, default "1Gi"): container memory size. String of the format . Possible values are from 512Mi to 32Gi. - verbose (bool, default "False"): controls the verbosity of the output. - When set to True, both error messages and the extracted content - are displayed. Conversely, when set to False, only the extracted - content is presented, suppressing error messages. - - Returns: - bigframe.series.Series: array[str] or struct[str, array[str]], - depend on the "verbose" parameter. - where each string is a chunk of text extracted from PDF. - Includes error messages if verbosity is enabled. - - Raises: - ValueError: If engine is not 'pypdf'. - RuntimeError: If PDF chunking fails or returns invalid structure. - """ - if engine is None or engine.casefold() != "pypdf": - raise ValueError("Must specify the engine, supported value is 'pypdf'.") - - import bigframes.bigquery as bbq - import bigframes.blob._functions as blob_func - import bigframes.pandas as bpd - - connection = self._resolve_connection(connection) - - if chunk_size <= 0: - raise ValueError("chunk_size must be a positive integer.") - if overlap_size < 0: - raise ValueError("overlap_size must be a non-negative integer.") - if overlap_size >= chunk_size: - raise ValueError("overlap_size must be smaller than chunk_size.") - - pdf_chunk_udf = blob_func.TransformFunction( - blob_func.pdf_chunk_def, - session=self._data._block.session, - connection=connection, - max_batching_rows=max_batching_rows, - container_cpu=container_cpu, - container_memory=container_memory, - ).udf() - - df = self.get_runtime_json_str(mode="R").to_frame() - df["chunk_size"] = chunk_size - df["overlap_size"] = overlap_size - df["verbose"] = verbose - - res = self._apply_udf_or_raise_error(df, pdf_chunk_udf, "PDF chunking") - - try: - content_series = bbq.json_extract_string_array(res, "$.content") - except Exception as e: - raise RuntimeError( - f"Failed to extract content array from PDF chunk result: {e}" - ) from e - - if verbose: - try: - status_series = res._apply_unary_op(ops.JSONValue(json_path="$.status")) - except Exception as e: - raise RuntimeError( - f"Failed to extract status field from PDF chunk result: {e}" - ) from e - - results_df = bpd.DataFrame( - {"status": status_series, "content": content_series} - ) - resultes_struct = bbq.struct(results_df).rename("chunked_results") - return resultes_struct - else: - return bbq.json_extract_string_array(res, "$").rename("chunked_content") - - def audio_transcribe( - self, - *, - engine: Literal["bigquery"] = "bigquery", - connection: Optional[str] = None, - model_name: Optional[ - Literal[ - "gemini-2.0-flash-001", - "gemini-2.0-flash-lite-001", - ] - ] = None, - verbose: bool = False, - ) -> bigframes.series.Series: - """ - Transcribe audio content using a Gemini multimodal model. - - Args: - engine ('bigquery'): The engine (bigquery or third party library) used for the function. - connection (str or None, default None): BQ connection used for - function internet transactions, and the output blob if "dst" - is str. If None, uses default connection of the session. - model_name (str): The model for natural language tasks. Accepted - values are "gemini-2.0-flash-lite-001", and "gemini-2.0-flash-001". - See "https://ai.google.dev/gemini-api/docs/models" for model choices. - verbose (bool, default "False"): controls the verbosity of the output. - When set to True, both error messages and the transcribed content - are displayed. Conversely, when set to False, only the transcribed - content is presented, suppressing error messages. - - Returns: - bigframes.series.Series: str or struct[str, str], - depend on the "verbose" parameter. - Contains the transcribed text from the audio file. - Includes error messages if verbosity is enabled. - - Raises: - ValueError: If engine is not 'bigquery'. - RuntimeError: If the transcription result structure is invalid. - """ - if engine.casefold() != "bigquery": - raise ValueError("Must specify the engine, supported value is 'bigquery'.") - - import bigframes.bigquery as bbq - import bigframes.pandas as bpd - - # col name doesn't matter here. Rename to avoid column name conflicts - audio_series = bigframes.series.Series(self._data._block) - - prompt_text = "**Task:** Transcribe the provided audio. **Instructions:** - Your response must contain only the verbatim transcription of the audio. - Do not include any introductory text, summaries, or conversational filler in your response. The output should begin directly with the first word of the audio." - - # Convert the audio series to the runtime representation required by the model. - audio_runtime = audio_series.blob._get_runtime("R", with_metadata=True) - - transcribed_results = bbq.ai.generate( - prompt=(prompt_text, audio_runtime), - connection_id=connection, - endpoint=model_name, - model_params={"generationConfig": {"temperature": 0.0}}, - ) - - # Validate that the result is not None - if transcribed_results is None: - raise RuntimeError("Transcription returned None result") - - transcribed_content_series = transcribed_results.struct.field("result").rename( - "transcribed_content" - ) - - if verbose: - transcribed_status_series = transcribed_results.struct.field("status") - results_df = bpd.DataFrame( - { - "status": transcribed_status_series, - "content": transcribed_content_series, - } - ) - results_struct = bbq.struct(results_df).rename("transcription_results") - return results_struct - else: - return transcribed_content_series.rename("transcribed_content") diff --git a/packages/bigframes/bigframes/operations/strings.py b/packages/bigframes/bigframes/operations/strings.py index 26ff2616a1b7..7cc93d34c07a 100644 --- a/packages/bigframes/bigframes/operations/strings.py +++ b/packages/bigframes/bigframes/operations/strings.py @@ -305,6 +305,18 @@ def join(self, sep: str) -> T: ops.ArrayReduceOp(aggregation=agg_ops.StringAggOp(sep=sep)) ) + def _to_blob(self, connection: Optional[str] = None) -> T: + import bigframes.core.blocks + + if hasattr(self._data, "_block") and isinstance( + self._data._block, bigframes.core.blocks.Block + ): + session = self._data._block.session + else: + raise ValueError("to_blob is only supported via Series.str") + connection = session._create_bq_connection(connection=connection) + return self._data._apply_binary_op(connection, ops.obj_make_ref_op) + def to_blob(self, connection: Optional[str] = None) -> T: """Create a BigFrames Blob series from a series of URIs. @@ -325,16 +337,15 @@ def to_blob(self, connection: Optional[str] = None) -> T: bigframes.series.Series: Blob Series. """ - import bigframes.core.blocks + import warnings + import bigframes.exceptions as bfe - if hasattr(self._data, "_block") and isinstance( - self._data._block, bigframes.core.blocks.Block - ): - session = self._data._block.session - else: - raise ValueError("to_blob is only supported via Series.str") - connection = session._create_bq_connection(connection=connection) - return self._data._apply_binary_op(connection, ops.obj_make_ref_op) + warnings.warn( + "Series.str.to_blob is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.", + category=bfe.ApiDeprecationWarning, + stacklevel=2, + ) + return self._to_blob(connection) def _parse_flags(flags: int) -> Optional[str]: diff --git a/packages/bigframes/bigframes/series.py b/packages/bigframes/bigframes/series.py index fbcc949855c2..3d29c19b9e41 100644 --- a/packages/bigframes/bigframes/series.py +++ b/packages/bigframes/bigframes/series.py @@ -321,16 +321,8 @@ def list(self) -> lists.ListAccessor: return lists.ListAccessor(self) @property - def blob(self) -> blob.BlobAccessor: - """ - Accessor for Blob operations. - """ - warnings.warn( - "The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.", - category=bfe.ApiDeprecationWarning, - stacklevel=2, - ) - return blob.BlobAccessor(self) + def _blob(self) -> blob._BlobAccessor: + return blob._BlobAccessor(self) @property @validations.requires_ordering() @@ -383,7 +375,8 @@ def copy(self) -> Series: def rename( self, index: Union[blocks.Label, Mapping[Any, Any]] = None, - ) -> Series: ... + ) -> Series: + ... @overload def rename( @@ -392,7 +385,8 @@ def rename( *, inplace: Literal[False], **kwargs, - ) -> Series: ... + ) -> Series: + ... @overload def rename( @@ -401,7 +395,8 @@ def rename( *, inplace: Literal[True], **kwargs, - ) -> None: ... + ) -> None: + ... def rename( self, @@ -462,7 +457,8 @@ def rename( def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], - ) -> Series: ... + ) -> Series: + ... @overload def rename_axis( @@ -471,7 +467,8 @@ def rename_axis( *, inplace: Literal[False], **kwargs, - ) -> Series: ... + ) -> Series: + ... @overload def rename_axis( @@ -480,7 +477,8 @@ def rename_axis( *, inplace: Literal[True], **kwargs, - ) -> None: ... + ) -> None: + ... @validations.requires_index def rename_axis( @@ -524,7 +522,8 @@ def reset_index( drop: Literal[False] = ..., inplace: Literal[False] = ..., allow_duplicates: Optional[bool] = ..., - ) -> bigframes.dataframe.DataFrame: ... + ) -> bigframes.dataframe.DataFrame: + ... @overload def reset_index( @@ -535,7 +534,8 @@ def reset_index( drop: Literal[True] = ..., inplace: Literal[False] = ..., allow_duplicates: Optional[bool] = ..., - ) -> Series: ... + ) -> Series: + ... @overload def reset_index( @@ -546,7 +546,8 @@ def reset_index( drop: bool = ..., inplace: Literal[True] = ..., allow_duplicates: Optional[bool] = ..., - ) -> None: ... + ) -> None: + ... @validations.requires_ordering() def reset_index( @@ -1539,9 +1540,9 @@ def ne(self, other: object) -> Series: def items(self): for batch_df in self._block.to_pandas_batches(): - assert batch_df.shape[1] == 1, ( - f"Expected 1 column in the dataframe, but got {batch_df.shape[1]}." - ) + assert ( + batch_df.shape[1] == 1 + ), f"Expected 1 column in the dataframe, but got {batch_df.shape[1]}." for item in batch_df.squeeze(axis=1).items(): yield item @@ -1771,7 +1772,8 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> None: ... + ) -> None: + ... @typing.overload def sort_values( @@ -1782,7 +1784,8 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> Series: ... + ) -> Series: + ... def sort_values( self, @@ -1813,12 +1816,14 @@ def sort_values( @typing.overload # type: ignore[override] def sort_index( self, *, axis=..., inplace: Literal[False] = ..., ascending=..., na_position=... - ) -> Series: ... + ) -> Series: + ... @typing.overload def sort_index( self, *, axis=0, inplace: Literal[True] = ..., ascending=..., na_position=... - ) -> None: ... + ) -> None: + ... @validations.requires_index def sort_index( @@ -2693,28 +2698,18 @@ def _apply_binary_aggregation( @typing.overload def _align( self, other: Series, how="outer" - ) -> tuple[ - ex.DerefOp, - ex.DerefOp, - blocks.Block, - ]: ... + ) -> tuple[ex.DerefOp, ex.DerefOp, blocks.Block,]: + ... @typing.overload def _align( self, other: typing.Union[Series, scalars.Scalar], how="outer" - ) -> tuple[ - ex.DerefOp, - AlignedExprT, - blocks.Block, - ]: ... + ) -> tuple[ex.DerefOp, AlignedExprT, blocks.Block,]: + ... def _align( self, other: typing.Union[Series, scalars.Scalar], how="outer" - ) -> tuple[ - ex.DerefOp, - AlignedExprT, - blocks.Block, - ]: + ) -> tuple[ex.DerefOp, AlignedExprT, blocks.Block,]: """Aligns the series value with another scalar or series object. Returns new left column id, right column id and joined tabled expression.""" values, block = self._align_n( [ diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index a6bb3041764c..ea36cc1925f1 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -432,7 +432,8 @@ def read_gbq( # type: ignore[overload-overlap] col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq( @@ -448,7 +449,8 @@ def read_gbq( col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq( self, @@ -520,7 +522,8 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def _read_gbq_colab( @@ -529,7 +532,8 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[True] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... @log_adapter.log_name_override("read_gbq_colab") def _read_gbq_colab( @@ -590,7 +594,8 @@ def read_gbq_query( # type: ignore[overload-overlap] filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq_query( @@ -606,7 +611,8 @@ def read_gbq_query( filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq_query( self, @@ -753,7 +759,8 @@ def read_gbq_table( # type: ignore[overload-overlap] use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq_table( @@ -767,7 +774,8 @@ def read_gbq_table( use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq_table( self, @@ -918,7 +926,8 @@ def read_pandas( pandas_dataframe: pandas.Index, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.core.indexes.Index: ... + ) -> bigframes.core.indexes.Index: + ... @typing.overload def read_pandas( @@ -926,7 +935,8 @@ def read_pandas( pandas_dataframe: pandas.Series, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.series.Series: ... + ) -> bigframes.series.Series: + ... @typing.overload def read_pandas( @@ -934,7 +944,8 @@ def read_pandas( pandas_dataframe: pandas.DataFrame, *, write_engine: constants.WriteEngineType = "default", - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... def read_pandas( self, @@ -2248,12 +2259,17 @@ def from_glob_path( bigframes.pandas.DataFrame: Result BigFrames DataFrame. """ + warnings.warn( + "from_glob_path is deprecated and will be removed in a future release. Use read_gbq with 'ref' column instead.", + category=bfe.ApiDeprecationWarning, + stacklevel=2, + ) # TODO(garrettwu): switch to pseudocolumn when b/374988109 is done. connection = self._create_bq_connection(connection=connection) table = self._create_object_table(path, connection) - s = self._loader.read_gbq_table(table)["uri"].str.to_blob(connection) + s = self._loader.read_gbq_table(table)["uri"].str._to_blob(connection) return s.rename(name).to_frame() def _create_bq_connection( @@ -2312,7 +2328,7 @@ def read_gbq_object_table( table = self.bqclient.get_table(object_table) connection = table._properties["externalDataConfiguration"]["connectionId"] - s = self._loader.read_gbq_table(object_table)["uri"].str.to_blob(connection) + s = self._loader.read_gbq_table(object_table)["uri"].str._to_blob(connection) return s.rename(name).to_frame() # ========================================================================= diff --git a/packages/bigframes/tests/system/large/blob/test_function.py b/packages/bigframes/tests/system/large/blob/test_function.py deleted file mode 100644 index bc09baf268d1..000000000000 --- a/packages/bigframes/tests/system/large/blob/test_function.py +++ /dev/null @@ -1,853 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import os -import traceback -import uuid -from typing import Generator - -import pandas as pd -import pytest -from google.cloud import storage - -import bigframes -import bigframes.pandas as bpd -from bigframes import dtypes - -pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) - - -@pytest.fixture(scope="function") -def images_output_folder() -> Generator[str, None, None]: - id = uuid.uuid4().hex - folder = os.path.join("gs://bigframes_blob_test/output/", id) - yield folder - - # clean up - try: - cloud_storage_client = storage.Client() - bucket = cloud_storage_client.bucket("bigframes_blob_test") - blobs = bucket.list_blobs(prefix="output/" + id) - for blob in blobs: - blob.delete() - except Exception as exc: - traceback.print_exception(type(exc), exc, None) - - -@pytest.fixture(scope="function") -def images_output_uris(images_output_folder: str) -> list[str]: - return [ - os.path.join(images_output_folder, "img0.jpg"), - os.path.join(images_output_folder, "img1.jpg"), - ] - - -def test_blob_exif( - bq_connection: str, - session: bigframes.Session, -): - exif_image_df = session.from_glob_path( - "gs://bigframes_blob_test/images_exif/*", - name="blob_col", - connection=bq_connection, - ) - - actual = exif_image_df["blob_col"].blob.exif( - engine="pillow", connection=bq_connection, verbose=False - ) - expected = bpd.Series( - ['{"ExifOffset": 47, "Make": "MyCamera"}'], - session=session, - dtype=dtypes.JSON_DTYPE, - ) - pd.testing.assert_series_equal( - actual.to_pandas(), - expected.to_pandas(), - check_dtype=False, - check_index_type=False, - ) - - -def test_blob_exif_verbose( - bq_connection: str, - session: bigframes.Session, -): - exif_image_df = session.from_glob_path( - "gs://bigframes_blob_test/images_exif/*", - name="blob_col", - connection=bq_connection, - ) - - actual = exif_image_df["blob_col"].blob.exif( - engine="pillow", connection=bq_connection, verbose=True - ) - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - assert content_series.dtype == dtypes.JSON_DTYPE - - -def test_blob_image_blur_to_series( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), dst=series, connection=bq_connection, engine="opencv", verbose=False - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_blur_to_series_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), dst=series, connection=bq_connection, engine="opencv", verbose=True - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - # Content should be blob objects for GCS destination - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_blur_to_folder( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_blur_to_folder_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_blur_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), connection=bq_connection, engine="opencv", verbose=False - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - assert actual.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_blur_to_bq_verbose(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), connection=bq_connection, engine="opencv", verbose=True - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - assert content_series.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_resize_to_series( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), - dst=series, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_resize_to_series_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), - dst=series, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_resize_to_folder( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_resize_to_folder_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - # verify the files exist - assert not content_series.blob.size().isna().any() - - -def test_blob_image_resize_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), connection=bq_connection, engine="opencv", verbose=False - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - assert actual.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_resize_to_bq_verbose( - images_mm_df: bpd.DataFrame, bq_connection: str -): - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), connection=bq_connection, engine="opencv", verbose=True - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - assert content_series.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_normalize_to_series( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - dst=series, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_normalize_to_series_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - dst=series, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - -def test_blob_image_normalize_to_folder( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_normalize_to_folder_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - -def test_blob_image_normalize_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - assert actual.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_normalize_to_bq_verbose( - images_mm_df: bpd.DataFrame, bq_connection: str -): - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - assert content_series.dtype == dtypes.BYTES_DTYPE - - -def test_blob_pdf_extract( - pdf_mm_df: bpd.DataFrame, - bq_connection: str, -): - actual = ( - pdf_mm_df["pdf"] - .blob.pdf_extract(connection=bq_connection, verbose=False, engine="pypdf") - .explode() - .to_pandas() - ) - - # check relative length - expected_text = "Sample PDF This is a testing file. Some dummy messages are used for testing purposes." - expected_len = len(expected_text) - - actual_text = actual[actual != ""].iloc[0] - actual_len = len(actual_text) - - relative_length_tolerance = 0.25 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=False): Extracted text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["Sample", "PDF", "testing", "dummy", "messages"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=False): Expected keyword '{keyword}' not found in extracted text. " - ) - - -def test_blob_pdf_extract_verbose( - pdf_mm_df: bpd.DataFrame, - bq_connection: str, -): - actual = ( - pdf_mm_df["pdf"] - .blob.pdf_extract(connection=bq_connection, verbose=True, engine="pypdf") - .explode() - .to_pandas() - ) - - # check relative length - expected_text = "Sample PDF This is a testing file. Some dummy messages are used for testing purposes." - expected_len = len(expected_text) - - # The first entry is for a file that doesn't exist, so we check the second one - successful_results = actual[actual.apply(lambda x: x["status"] == "")] - actual_text = successful_results.apply(lambda x: x["content"]).iloc[0] - actual_len = len(actual_text) - - relative_length_tolerance = 0.25 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=True): Extracted text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["Sample", "PDF", "testing", "dummy", "messages"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=True): Expected keyword '{keyword}' not found in extracted text. " - ) - - -def test_blob_pdf_chunk(pdf_mm_df: bpd.DataFrame, bq_connection: str): - actual = ( - pdf_mm_df["pdf"] - .blob.pdf_chunk( - connection=bq_connection, - chunk_size=50, - overlap_size=10, - verbose=False, - engine="pypdf", - ) - .explode() - .to_pandas() - ) - - # check relative length - expected_text = "Sample PDF This is a testing file. Some dummy messages are used for testing purposes." - expected_len = len(expected_text) - - # First entry is NA - actual_text = "".join(actual.dropna()) - actual_len = len(actual_text) - - relative_length_tolerance = 0.25 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=False): Extracted text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["Sample", "PDF", "testing", "dummy", "messages"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=False): Expected keyword '{keyword}' not found in extracted text. " - ) - - -def test_blob_pdf_chunk_verbose(pdf_mm_df: bpd.DataFrame, bq_connection: str): - actual = ( - pdf_mm_df["pdf"] - .blob.pdf_chunk( - connection=bq_connection, - chunk_size=50, - overlap_size=10, - verbose=True, - engine="pypdf", - ) - .explode() - .to_pandas() - ) - - # check relative length - expected_text = "Sample PDF This is a testing file. Some dummy messages are used for testing purposes." - expected_len = len(expected_text) - - # The first entry is for a file that doesn't exist, so we check the second one - successful_results = actual[actual.apply(lambda x: x["status"] == "")] - actual_text = "".join(successful_results.apply(lambda x: x["content"]).iloc[0]) - actual_len = len(actual_text) - - relative_length_tolerance = 0.25 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=True): Extracted text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["Sample", "PDF", "testing", "dummy", "messages"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=True): Expected keyword '{keyword}' not found in extracted text. " - ) - - -@pytest.mark.parametrize( - "model_name", - [ - "gemini-2.0-flash-001", - "gemini-2.0-flash-lite-001", - ], -) -def test_blob_transcribe( - audio_mm_df: bpd.DataFrame, - model_name: str, -): - actual = ( - audio_mm_df["audio"] - .blob.audio_transcribe( - model_name=model_name, # type: ignore - verbose=False, - ) - .to_pandas() - ) - - # check relative length - expected_text = "Now, as all books not primarily intended as picture-books consist principally of types composed to form letterpress" - expected_len = len(expected_text) - - actual_text = actual[0] - - if pd.isna(actual_text) or actual_text == "": - # Ensure the tests are robust to flakes in the model, which isn't - # particularly useful information for the bigframes team. - logging.warning(f"blob_transcribe() model {model_name} verbose=False failure") - return - - actual_len = len(actual_text) - - relative_length_tolerance = 0.2 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=False): Transcribed text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["book", "picture"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=False): Expected keyword '{keyword}' not found in transcribed text. " - ) - - -@pytest.mark.parametrize( - "model_name", - [ - "gemini-2.0-flash-001", - "gemini-2.0-flash-lite-001", - ], -) -def test_blob_transcribe_verbose( - audio_mm_df: bpd.DataFrame, - model_name: str, -): - actual = ( - audio_mm_df["audio"] - .blob.audio_transcribe( - model_name=model_name, # type: ignore - verbose=True, - ) - .to_pandas() - ) - - # check relative length - expected_text = "Now, as all books not primarily intended as picture-books consist principally of types composed to form letterpress" - expected_len = len(expected_text) - - actual_text = actual[0]["content"] - - if pd.isna(actual_text) or actual_text == "": - # Ensure the tests are robust to flakes in the model, which isn't - # particularly useful information for the bigframes team. - logging.warning(f"blob_transcribe() model {model_name} verbose=True failure") - return - - actual_len = len(actual_text) - - relative_length_tolerance = 0.2 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=True): Transcribed text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["book", "picture"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=True): Expected keyword '{keyword}' not found in transcribed text. " - ) diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py index 4bfd50fef4ec..7130c7ac1610 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py @@ -21,15 +21,9 @@ pytest.importorskip("pytest_snapshot") -def test_obj_fetch_metadata(scalar_types_df: bpd.DataFrame, snapshot): - blob_s = scalar_types_df["string_col"].str.to_blob() - sql = blob_s.blob.version().to_frame().sql - snapshot.assert_match(sql, "out.sql") - - def test_obj_get_access_url(scalar_types_df: bpd.DataFrame, snapshot): - blob_s = scalar_types_df["string_col"].str.to_blob() - sql = blob_s.blob.read_url().to_frame().sql + blob_s = scalar_types_df["string_col"].str._to_blob() + sql = blob_s._blob._read_url().to_frame().sql snapshot.assert_match(sql, "out.sql") @@ -45,7 +39,7 @@ def test_obj_get_access_url_with_duration(scalar_types_df: bpd.DataFrame, snapsh def test_obj_make_ref(scalar_types_df: bpd.DataFrame, snapshot): - blob_df = scalar_types_df["string_col"].str.to_blob() + blob_df = scalar_types_df["string_col"].str._to_blob() snapshot.assert_match(blob_df.to_frame().sql, "out.sql") From 1624846cc7ca387773f29ae3f0d4c923eeab573a Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 10 Apr 2026 23:00:30 +0000 Subject: [PATCH 02/26] refactor: cleanup blob API references --- packages/bigframes/docs/templates/toc.yml | 3 - ...with-bigframes-over-national-jukebox.ipynb | 701 +++++++++--------- .../tests/system/small/blob/test_io.py | 132 ---- .../system/small/blob/test_properties.py | 119 --- .../tests/system/small/blob/test_urls.py | 31 - 5 files changed, 364 insertions(+), 622 deletions(-) delete mode 100644 packages/bigframes/tests/system/small/blob/test_io.py delete mode 100644 packages/bigframes/tests/system/small/blob/test_properties.py delete mode 100644 packages/bigframes/tests/system/small/blob/test_urls.py diff --git a/packages/bigframes/docs/templates/toc.yml b/packages/bigframes/docs/templates/toc.yml index 5d043fd85f2a..562b857fee5c 100644 --- a/packages/bigframes/docs/templates/toc.yml +++ b/packages/bigframes/docs/templates/toc.yml @@ -87,9 +87,6 @@ uid: bigframes.operations.lists.ListAccessor - name: PlotAccessor uid: bigframes.operations.plotting.PlotAccessor - - name: BlobAccessor - uid: bigframes.operations.blob.BlobAccessor - status: beta name: Series - name: Window uid: bigframes.core.window.Window diff --git a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb index fe2d567d1b31..e70ddfe4a845 100644 --- a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb +++ b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb @@ -1,23 +1,8 @@ { "cells": [ { + "id": "c62e292f", "cell_type": "markdown", - "metadata": { - "@deathbeds/jupyterlab-fonts": { - "styles": { - "": { - "body[data-jp-deck-mode='presenting'] &": { - "zoom": "194%" - } - } - } - }, - "editable": true, - "slideshow": { - "slide_type": "subslide" - }, - "tags": [] - }, "source": [ "# Creating a searchable index of the National Jukebox\n", "\n", @@ -35,42 +20,42 @@ "To follow along, you'll need a Google Cloud project\n", "\n", "* Go to https://cloud.google.com/free to start a free trial." - ] - }, - { - "cell_type": "markdown", + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { - "z-index": "0", - "zoom": "216%" + "body[data-jp-deck-mode='presenting'] \u0026": { + "zoom": "194%" } } } }, + "editable": true, "slideshow": { - "slide_type": "slide" - } + "slide_type": "subslide" + }, + "tags": [] }, + "execution_count": null + }, + { + "id": "7dc312a4", + "cell_type": "markdown", "source": [ "The National Jukebox is a project of the USA Library of Congress to provide access to thousands of acoustic sound recordings from the very earliest days of the commercial record industry.\n", "\n", "* Learn more at https://www.loc.gov/collections/national-jukebox/about-this-collection/\n", "\n", - "\"recording" - ] - }, - { - "cell_type": "markdown", + "\u003cimg src=\"https://www.loc.gov/static/collections/national-jukebox/images/acoustic-session.jpg\" alt=\"recording 100+ years ago\" width=\"400px\" /\u003e" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "z-index": "0", - "zoom": "181%" + "zoom": "216%" } } } @@ -79,11 +64,16 @@ "slide_type": "slide" } }, + "execution_count": null + }, + { + "id": "07dcae4b", + "cell_type": "markdown", "source": [ "\n", "To search the National Jukebox, we combine powerful features of BigQuery:\n", "\n", - "\"audio\n", + "\u003cimg src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAALEAAAFdCAYAAABM2IyIAAAAAXNSR0IArs4c6QAAIABJREFUeF7tnQfYHUXZ/p+3p4ckpJBgQj4JUkLxjxQFCTUEBRSET4SACAIWOirlE+kgICJIU0GwUVR6UVQg+An4RRNAipKQUBNCSOEl9e3/65mdZ/aZZ2fL++aEc3Z3znWF8+6ePXvOzPzOzT33zM7W9fT09IB/+BrIcQ3UeYhz3Hr+q6sa8BB7EHJfAx7i3DehL4CH2DOQ+xooHcRr166FDz5YAaNGjcx94/kCBDVQKojfeOMNOOjgQ2DlylVw4w3XwV577ZnKwezZz8IXDvlvddxr8+emHu8P+PBrIJcQr1q1Co796vGqtr52wvGw++5TnDXX2toKJ3ztG+q1r3/tBFi5ciWceNIpavuEE46Ds878TmqNe4hTq6jqB+QSYqy1Qw75IsyaPRv2228a3HD9j50Veeedd8HZ53wXGhoa4J//+D9oaWlWUL+76F246abrYeLEiakN4CFOraKqH5BbiG+77ZdwwYUXQb9+/eDZ2f9Qz/IxffqX4amnn1a24eaf/aRPle0h7lO1fahvyi3E7733Huy08y6AA47XXnM1HHDA/lbF8devueZqOFC8HlfLy5e/D+idJ07cBIYOHQpZIEbbMn/+a7DxxuNg5Mj0DiPamnnz5sOGG46AcePGfagNXsQPyy3E2BiktPtN2xduuOE6q31uvfUXcOFFF8OAAQNg9qyZ0NLSAl1dXXDE9KPUcZdcfBF89KP/Zd7z8sv/hksuuQyefuYZs2/PPXaHk08+CT5/0BfUPtmxe+qpp+Hqq69RtoYem222GZxx+qkwdeo+EV6effY5+P7lV8DMmf8wr40ZMwaOOeZoOO6rxxaRrw+lTLmGmDyvy1Ic/IVDAaE56KDPww+vulJVJkK86aTN1d/333c3bLPNNurvl156WaUWHR0danv8+I/AsmXLVUdw8uSt4MUXX4pAfO+998HpZ3xb7a+rq4PJkydDc1OTAfqySy+Gww77omlEBP3II4+GNWvWqH0IOyp+W1ub2j7uuK/COWef+aE0etE+JNcQr169Grbdbnvo7OwEbhkWLlwIu+waJBa33XoLTJmyWyLERxxxlFJg7Ohd86OrYOutt1bAP/DAg3DW2f8D7e3tFsQI4ic/9WlAG/H5zx0I5577PzB8+HB1zA033gRXXnkV9O/fH/4x8xkYOHCgAnf3PfaCxYvfgz123x0uuuh8ZSPwvLfffqfy9vi45eafwp577lE0xtZ7eXINMdYORm2PP/4ETNt3Ktx44/Wqwq6/4Ub4wQ9+CCNGjID/+/tTKp2IU+J3310MO39yF/X6XXfeDjvuuINV6ZdfcSXcdNNPLYgffvgRFdWNHj0K/vrkE9Dc3Gze093dDftMnaY8MqYmmJ489NDDcNLJp8LgwYPh78/8TVkc/jjttDPgvvsfsMqw3lu+QB+Qe4jvu+9+OO30b0FjYyO88K9nVUoxbb/94ZVXXoGjjz4Kzvveuaa5XHbiz3/+Cxx/wtdVJ+65Z/8ZaVq0ARjn4YM8MXrnm2/5uVLfSZM2jbxnzpw5gB1EzKExj7744kvhlp/fCvvuOxVu0j80/iaCHH8Uf3/mqQLh9eEUJfcQo6dES4HPV199FXx8u21h9z32VrV33713w7bbBr43Tonv+u3v4KyzzoFNN/0o/PlPf4zU+oIFC2DXT+9uQXzGt74D99xzb2oLnXbqKXDyyScCHX/EEV+Ciy+6MPK+f86aBYceepjy1vPnzUk9rz/AroHcQ4zFwf9Vo5rtv/9nYdKmm8LVP7pGdc6enPG4VVqXEv/x0T/B17/+zV4pMSnrkdOPgAsvPD+VKa/EqVW0TgcUAmICcciQIfCRj2ys0gZSQV47Lojnzn0Vpu67nzrst3fdATvs8AmrQq+44gdw403BQAnZiQcefAhOOeU02HzzzeEPjzwYaQBU6UmbTYLJW22l1NV74nViNPXNhYAY0wm0FJhW0GPGjMdgwvjxqUqMB6D9wLhrk002gWuvvRq2njxZpRMPPvQwnHnm2ZF0Audu4EALPp9/3rnw5S8H2TM+fvObO+C7535PKftTf3vSpBO77Lqb8sm77fZpwPht7NixkXQCs27MvP2jdzVQCIixyNynbrfdtnDvPb+P1ERcTkxKSW+YMGECLF26VOXE+DcCzpUY/6aMGv/ebNIkGDZ8mIKaMuULLjgPjjpyuvkOPFem97z51luAU0PVD2n3KXDrz2/uXev5o1UNFAbiJ2bMgGOOOU4VChMJTCbkIw5iPA7nYlz5g6ssNd9ppx3hzO98G3DgREKM23/5y2Nw7Y+vhxdeeMF8FMZ5x3zlaDjnnLMin4/zOK699rrIiN306Yer2XgUBXo2e1cDhYG4d8V2H7169Rp4/vnn1YsbbTRG2YssD7QJ8+a9Ch0dnbDxxhsrX5708HMnstRq9mM8xNnryh9ZozXgIa7RhvFfK3sNeIiz15U/skZrwENcow3jv1b2GvAQZ68rf2SN1oCHuEYbxn+t7DXgIc5eV/7IGq0BD3GNNoz/WtlrwEOcva78kTVaAx7iGm0Y/7Wy14CHOHtd+SNrtAY8xDXaMP5rZa8BD3H2uvJH1mgNeIhrtGH818peAx7i7HXlj6zRGvAQ12jD+K+VvQY8xNnryh9ZozXgIa7RhvFfK3sNeIiz15U/skZrwENcow3jv1b2GvAQZ68rf2SN1oCHuEYbxn+t7DXgIc5eV/7IGq2BYkLc3Q3Q2Q7Q1YnLw0NPVyfUQQ9Aj+Mf7sfj8YGv8+PU37RfHRBsB//Rx9N2XbBdF76sjqHtOvyDPWjbeq7DZeeDg9Sz3sa/Xdt8f309QH0DAK7F3NAI0NgMgPtK8CgWxN1dAO1tAF0dAtju+G0FMYNXMYnHC6gltBbMBCvuNNRqmglKoolvS2gFvASzE+L6AOzIP9pfH8Dc3BLAXeBHcSBGeDvaNKwEbQK8UpWlUtO2pdCkxlqJCQyl4PxBMDvIkQqs1FZLtnmOUV6pzApghBbhZ/AasPW+ppYA5oI+igFx25rAOqAtUHDGPUs7wSCPQJygxAZaaSuSKHEoMkFLIBvLoKF0KrGEltSYK7Pj76ZmgObovf6KwHX+IUaAO8k+aChjYSaIBbzcTpBCO5VYQMsthaJBK7DyxtxWOLwwV2AOawRoZhksJZZ2QoBbrz01+mJS7MYmgJb+ReDWKkO+IUb70NEer8Cqw6Y7bpZ9kBBTx468sKOjxz0xKXFEkeMtcVDrZB303wSveZYdObIJpMx1AASntAwEqgteYzvqAluB9qJAj/xCjJ24NasC+6CU12EhSGEjyswUWXbsnEqs4Y5LJxTMskNHlOj9znSC++GEVCJTx04rrkokEHbhkfl2/4GF6uzlF+K1q4WN6A5SBgUz79Dp/ZYix9kJhxJbUJNloBiN2Yc0ZTMJm/DGsREb974uJSZoHbaBK6+EGrcbmgD62bchS/v6tfx6PiFGZV29IoRVKrGB2dXJo1yYd/JkisHzYg0u+V8eRBhbwYQ4rrVFTBzmwfoNHOY05VVKS8mE7NgxReYwY3rB1XjA4MLkyPmEuH0tAP6TNiETzDFgx0Vu5IUJXpkPG6/cWzvhgJescuLgRlyHLoudwGP0cdjBK0jslk+I16wMOnRkGwhmAzFLKeJshBXFuVILbRksBaYRO24nxAhdqhLH2QmXIouOnTWwIXPhGAW2vDFTY4zc+g+qZZeQ+bvlE+KVrQDdLBe2FBk9sOjk9SZy48PM3C7wv00HT9ezHOugqM08a7vBm8XYhzh4WZoRl0RY3pfsBYObWw4FM1NiHM0bNDQzKLV8YC4h7mldinfMCTtxlhLz/aKjlxi5sbkQ6txim/XpgjkWTIGtdEI2d1w6wSCliI18s4JTn0dFZno4Wnpc3JYpBNkF2i/hNfsbAIYEN1XP+yOXEEPr0gBgA2+XPVIX8cYOZaZojeZJ8Mk+ul8Xwuoa5CDI9cSfNBJS0wkClSmzCh44xKA7YxngtWAWnTrq5A0dkfatc/F6PiFe/h5T4ThFZlGby17w2WsqT9ZQkgJbSky2gc1is1IKamueF9M+1uHjCYXLTrigpeNIkc1gBk8nuB92eWMGMVfmYSNzAWnal8wpxIuFEhOwIiuWubFz0CMGXt6hc9kHV9SWWtvaQrCniNLyyUBkK6xnbTUi8ZnwxAQrtxsGYK3kw0alfeNcvJ5PiJctsgc2ZDqRqaOXoLwuZTYemCcUwhsnNXmSndDjFWZY2igyAcs9Mf+b58VCgZ1emBRZPw8fnQtI075kPiFeuijBE3Ov7FJoitMYxARtqq3g0Mq4jSgn+yDshGvY2dWRc6kvnsrYCAm0wz5IxeUDHRzuEWPS+MjF6/mEeMlCPWdCd+ioI5fW0VOvx9kHlkiYdEKDqeYDJSiwnE8cN53YTL1kkRuHVtkMbhfStkmJmT9OtBHCG284NheQpn3JfEL8HkKsVdYCmE/FdCmyBpErL4/TrL9dkVsKzKZ/FzMV00CsX5f+1yhuAry8g2fyY51aGJ8sbIOJ4sR+D3Ha72M9vr74bXv2WgRk2cHjcyNA+2lSZIcCp9oKllYYr2z9wQrvyIld8JIPjjxrz6EGLiTcpMS035FCWMrMX28AGDluPTbSh3fqfCqxgpiUVj6zqZlWXqyvpeN2QqUXDo+cqsiuwQ7WaC47wTt2FsT6fbFJhIA3Von1RCDueQ3A2m5IoEcl30j9w8Nw3T4pnxC/+6ZW0y7x7ABbzTcWsJptrcZqW3plsc8MhiSM5FlXdtCFHiwc5h05itnkYEYczOrCZam8cptshbQTQoGVvWgAGP2RdaOnRt6dY4iTlJh3+BjAFqwId4yloI5cbGoR442NJxZuQm2yoeTMgxo6lTA2gqZd0n7HtrIdGtI4L0xpxejxNYLhun2NfEL8zhsJdkLCzdWWdeyM8jpshjUtUyi04pdUWnhjDjEbqDNNJOFVbGdMIwhOOj5u2+zX6uv0xPq1MRPWjZ4aeXdOIX5dRGykvPJZq3CifZAQx0GdNEFI0KsuFBV2wmUllD1gKm2lDVk6csxOWFDrETm1r0FPoNfPaCNIqT3EVfwZLnwtXYm5D6a/LfvgsBNxHb2kwRBSZl4dfEhadujkAIdrbgSHOZPysgEQSjFcCiz3jZ1YxUas3EfnU4ldEMuYLQIxRWtpyptkLxwDJU6IWTxhTfpxzEpLmx9hRWtxyss8chLE5IUJZg9x5X5JvT7TgvnxSmxgZtEZjdQZRRbZMHX4kpRYDorovp3xx6oQjgtHXbPV8FBrnjDzxk6vG6e0rv3UsXPEalyJ8e+x/9Xrqq/FN+RTiSMQu6I2DrHwxnHQcsixtWSa4cqPae4xLTJILFNrR7xwlo5cQpTGIVdX53NopSLzlAK9MOvseYir/Ht8e55WYtaR43bCshIynWB2IU2ZKaWQOfJGYwCGDGGrYlJK4YgnSIk7OgDeWgDQ1sZG3jjQ+u9M9sFhKywbwWHWlyQZFWYdvY0/WuWGrMzH51OJM0Es4OUJhQWvA2pSXNcgyWaTACb2MZpaswbgr0+r5WZNtGZNck9QVpUoZFFe/mOgK0D44AdLKcZ5iCvzM+rLWd5+Nbg0yagvKrLeloCaETvZsUtSZD3bzdgONvttzykAzc0ArR+EShy5UNRRqEEDAJqaAJ59AWDBQh17OaCMswcKYrIOvYGZKTGqsYrYNNQbb9qX2q+59+RUiTXEBlwGMR9mlhBK+xCnyNZQtYB/6l4ADfUAjz0JsHqNPVxt/DC7TInsxKd2ABi1IcCL/wZ4/c1AiVuaAIYNA1iyxFZmAywNI2toJcSWP5YKzLyyshI8J/YQV/+X+NZcpryowGkQp3Ts4uBWMLOhaTxu370DiP8yA2CVhJjNOTYdO92z22UHgNEjAf71cgBx/xaAnT8BsGgxwKvzQ2U2oIoOW0SJEUSpyDFe2SgwdfQ0xB+ZVP22rMA3yKcSOyFG4BBmVE5uB9gEIJlKpCkzh5iOJYj/PCNUYhoMIS/tWm9i1x1DiN9ZBLDLTgCDBgLMnccgTuiwWRBzwJlXjvwA9OIruJ/bCOrkeYgr8BPq6ynemhPEX1yBZT5sjdKhWsd09HB/0uietBzTUIkbAP6ESrw6OvtNDn7QYMauOwGMGQnw7zkAG48FGKxX35mDEM9jfpdsgV5QME6ZY/e73ieiNeWJGwA8xH0lsALvM0qs82GnnUjqyDmUWqYXcSN+09ATC4j5e+XqQATxbjsFSow/NlRCeqAS4z+ljq6cN8HrJh7PzmcNcrCOnYe4AjD29RRvzknwxL2EN1N6waDfTyvxo0+ESiwHScyKQGwq5W47B0osH6jECuI0WB2Qx3X0XCrtshPjN+trC9TU+3LqidFOiCmXSXYiLRfuzesGYrITDs9NF46auAwAkiBGOxHXceuVnYjz1N5O1NSvTn0ZrsQqZkNbQVBnzYN70+FjnloqsctT44+C5g4ThFO0Ei9ZBrAhWwNtfSuxNUlezDH2SlxFtl0dO2ttNhaNOVVWvB5JKRLSjVglFh1ErB6e96InHjMK4LmXgnx4C/2/cvLEdKylvEkeuTce2itxFWmN+ehqKLGK7noA+uyJGcSvvQGw9RYAm04E8Eq8znzl1BPP1VkwtxF8nQl+GZK2DRFFTojcIqN+TLkT0wnHslYynXj+ZQCEGBV328nBHVBNx44PLSdFbWwkL5Nn5krMruzw6cQ6/4D6foI85sSf1hEbjtghxNTpGzoYYOVKnxP3nQZcq1quwbQOZ/uw3spH7Mz8CRp+7m3EljDC5xp2nrIrwMB1uPPQ32cBLH7PTiMieW+l82LHiB1mxxv7YecPC9no5yDE1vzh9T13gnX0hg4F2HpLgCGDe1f+zk6AV18DeOVVu8OXNqmdT8E0E360nejN3AnXBCBvJ3rXhhU92kzF5Fc3V3AqpjWPWE7Z5GtVyAVWxA1pKKFQz67lWdPWjxCvWxDLKzqS0grpif0story2KeTVXM+MV+vImlVeSpY0tXNHOwsV3T4+cROXPLpiV1XdnBv7Jp9Fju8zDxx3Dxi60qPhMVU6Do765J9TbELZrIKfHkq54Wi4pq71HnEPL0Qk+KVrdBK7K/s6JOGVuZNmS5PojnErhWAXIMZYt5w6gpBLpipeI5J8XIRwcjaEmLuhDXiFzM53tkhlLbCX2NXGegqfRbn1c5svWJrBhrCxoaNXYrsSiEs2+BY1xjLxK9+VtuOgqat/EOX7kfshPDQciRP3R3Jtd6EnGvMcmFa+YdmtY3zl+xXGs3s5/PrToRTNxPTDbE6pl93Ijtj6/3IpGWsIounxFzNLC87SrUPGZZ65W4i0rHTEYXTG6ctnp0hxbBgZjekkeDytSc28stYrXdWYz+gTxC7lq9yrU8sFl3BL6EuP9JDz04bwYabaSFBCTEt7VrNtdj8MlbVYzbyye/gqpjyjqKOO4zKa+3kehJ0zZ1cosq5P+uqmMIbq6uddUcvskI8LZ4Sd4svx/rEcVEcn4+svLL0wmQt2FXPflXMKkLt1ycO51rIVTNN/ObXJ64ioRk+Wt3ugKcR4n516po7fbtc17VycSv88P3GPvB7erBEgmyFeWY+wnkLsAwrxfNFuKXiyshNduisbdftDWTUVg/gV4rPANv6OiQX9+zQNy6nxVN0vy7TbQ/MSB5fVyLGcliw+3t2rC/kKn/e1LsnyZs0uu6SFHfPDscaxJE8uA93T3JCzNU5ZmX43s65MHcPRTXWK8Urv+xQYn/3pMqzmfmM/j520SVdlZ3w97HLzFDVD8Tb4jpXhnfdZZTd105O3qGVe/i6EbScq4nWEm40Q1OxsywoKG97QMpMdoM6aK5tbi/MKppyRE8rrQJZA50Gtb+jaBVR7s29nRWg3F7E2QXXvZ01nXm/t7NlMdgNzT3EVYR46SLH7Q4ojaCkQt5ZVL9OSksXfkYGM2hQg93myygueWGXJ45LJ2ROrOvNKC6ba6zUmXlj3OZzK8y2WKuYK66aUxHjgS3LUQ8wYkwVG7FyH53PqZjLFun107h9iIOWDYKom5rTcTGK7LQRHFq28mVf7YTp5HFo9U5lHegGNWmDIWx42YKXQZwE9fDRlSOpimfKJ8TLF7tvUG5Byr0wh9ehyNaIXYaROQlvlssUedSWlFRYCp2WWGj/i2u8qvSB4E3wxNwzDxtVRfQq99E5hfi9wE6ooWeCkg87s8EPo7wcXgZ1nPLyG8pQx40v3co7c+ZvNo84vBujGHZ22AmXjUi0FVyBKZVg4GaBGaHfwLE2XOXY+tDOlE+IW5c4lJhBaiUXMfDSiB73yApG13VzZCG4ldDHsqfUVqO5E/gZPIUwSQW3FMIbG6hJcdUECZZEiIhN2ghXajF0ROpXzsMBuYS4p3UprjXA1NhlF4RKkyKr4WR9vAGY3ZNDQclApm0Oq9NOyOlr1Px6f1Y7wVWZd+zMlSAaYumBaTuTN9aX8A9ha8LlgdaY75hLiGFla7iIIJ/NZnlikVJE4OUgs7+lvSB4uUpTZcZ27ByTJ0xOnGAnLFshVZng1VduxEEbBzeN2NHrjY0AA4fmGN3wq+cT4tUrATrbHZ6YqbNTeV0KTO/RlzHxwQ4aHDEqvA52gg92xNkJglh6ZIrGjBqT/3U8UwfPCTPrADY1A/TXq9XnHOV8Qty+FgD/kS0gD2xmrqnLNphtEKpLk9xJuY2toDkWanQj4Ro6cYOZPqcT/F7PTHnVn1J5mQKbq6PXAeaWfgDN/XKOb/D18wkxQrt6hfbEPErj0y8JXPmsj1cdOAatBNm8rmU4zU7EWWJujTkyPEqLKLAGVl0NIu2DDpLN0HJGeyEHRAYMtm+7kGOc8wkxVvja1QCdHeG8YStuE1kwQk/KbD3rHC1JiSMws2yNR2+ZIda+InIVtIaTWwYLYgavUmK1hpUe0ZP5MFNol71oaALotw7rydUY8PmFGCe+r1llR21WbKbTiVhboUFXrztgJniVEOv0wtxURg4/u/JhLsEsUjO7CVqyEQS3S4X5Pn2pvuzYmUnxIr1weeP+A4PLlwryyC/E2AC4ti928CKemC7sTLAUpgMn7YWGljyxVGIOsjXIwa+tY+kEmzqhHZx+YtAaO0HWQHti443ldozN4B1AqcAEc3MLQFNLQfDNsyfmTdC2RtgKV4cuLpUg2NOUmL1u4HYsHkgXhOIPhF8g6vLCdPWzGtlj0yrNVdFp9iIucmMjeGQ7CO7GJoCW/oUCOL8dO9kMbWsBujocaUQCvGQ98Fzkmc2wMh/80J0/C15jhoNvotKJjKaYRu3M6J2c7MPgtZSY9ks7EdOxMzPW9PswUitIGiGbP992gpcGrUVHG+voyYk+rm1XpEbQSnj50LOYBR+J2ByDHZYXZtEaV2QDuPTFZCe4jeDw8kiOK7T+G+0D2oiCPooDsVLUrsAnoyqrwQ7mjTNHai54XR05JcFaiYkOAXfgE4LjzLAzTyfoGjvWyTPRm77raFJaQWorUwqyEQ2NAbwF6sS5fofFgphKiPYAO3xdnQBdXdDT1Ql1FsQsIzaRG9kC8RoftYtLJywvLBdPoUlsYtw5MgFI2og0T8xUmaBFWPGWvQhvY3NhcuC0/4EUE+K0UvvXC1UDHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C1NTEM+d90Y5W8GXep1qoKYgXqeS+DeXtgY8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xOuh6Xt6euDNt96CCePHr4ez+1PKGvAQryMTH9t8K2hvb4fbf/Mr+OQnd4auri74/EFfgBdffAlO/OY34IwzTlvHT/BvT6uBwkCM8Hz1uBNgzZo1MGLECLj+umvTyl6R1yXES5cuhU/ssLM693bbbQv33vP7inyOP0l8DRQG4idmzIBjjjnOlPT+++6GbbbZZr23vYQYP/DSyy6HBx98EC44/zyYOnWf9f4dyv4BhYH41FNPh/sfeBAGDhwIq1atgqOPPgrO+9656719XRCv9w/1H2DVQCEgXr16Nfy/7XeEtrY2uPDC8+F73zsfhgwZArNnzYSGhoaKNfnatWth7ty5yq6MHTtWnbcvEHd3d8Obb74JK1asgEmTJkG/fv0q9h3LeKJCQPy7398N3/nOWSoNeOyxP8H2n9gJWltb4eaf/QT22mtPq11/dvMt8Nhjj8Nuu30avvH1r1mv/eIXv4Q//PFR2HWXXeDEE79hXlu+fDlccun34f77H4DOzk61f+LYOIdFAAAf00lEQVTEiXDrz38GBx18CCxf/r7p2OFr5513AbwyZw4cfviX4MAD9jfnQd9+1VVXw5133aXegw/8kU2Zshuc+91zYJNNNikjg+tc5kJAfMQRR8HTzzxj0oCzzjoH7vrt7+Czn/0MXPfja6xKOud/zoU77rgTDj74ILjqB1dYr11w4UVw222/hM8deAD86Ec/VK9hR3Hafp+FN998S20PHToUBg0aBAsWLIBx48bBypUr1Q+G0gk85tBDD4N/zpoFZ591Jhx//FfV+zB2+/o3ToRHH/2T2h4woL9S81dfnae2hw8fDvfc8zsfy/UB6dxDvGjRIvjULrspSGY88ReYMGECPPX00zB9+pehsbERnn9uFgwYMMBUTW8hvvba6+DqH10D/fv3hyuv+D5Mm7avUk+E9JRTToeFCxeqc6dBfMvPb4WLL75UWYfLL78MPrPfNPX9EOLTT/8WvPDiizB58lbw4AP39aEZy/2W3EN8w403wZVXXgWbb745/OGRB43q7bjTp2DJkiVw2aUXw2GHfbHPEO+191SYP/81OO3UU+Dkk0+0aJkx40n4yjGB0qZBvPc++8K8efPh1FNOhlNOOck6zxtvvAG777G32odlwLL4R/YayD3E+0ydptTs298+w/K4ZA123HEHuOvO2/sEMaYck7feTr33kYcfhC22sOFCj4uvY4cvCeK08+D5p+67H8yd+ypceslF8KUvHZa9Bf2RkGuI8X/BBx54UGozPvP0/8KYMWPUcb2xE+iDp+wedAz/MfMZ2HDDDSOftduUPeCtt95OhBhfx+OSznPkUUfD3/72FJx+2qlw0knfTC2TPyCsgVxDTGqb1qAcjCSIzz7nu3DnnXeZjh122Lb7+Ce8EqdVcJVfzy3E+L/ynXbeBXCYd/oRh8NnPrNfpCrvuuu3agAEO3vY6cMHdq6wk7X33nvBz356k/Wefad9FubMmWOlE5g/Y8TmUsgnn/wrHP2VY9U5vCeuHsm5hfjxx5+AY796vKo5SiVkNc6aPRsOOSTo1OEcBpzLgFnw+RdcpNIG7EQh4Pig9AD/5hEbqTMef9VVV8K+U/eB+vp6mD37WTjp5FMzpxM33fRTuPyKK1U6gZ3NAw7YX6Uc2Nk77bQzVDqx2WabwaN/fLh6NOT0k3MLMQL00EMPq/kROE8i7oHx2zvvvANHTj9CjeYtWvQu7LHn3qozNnjwYNhyyy3ULLTnnnsexo8fD5gUcIgxQsPkoKOjQ30E5sSDBw+Ct99eoPLipqbGyGCHKyfGz9j/gM+pzhs+8L2jRo1UyQc+EOg7bv817LBDYF/8I3sN5BJiPsz83e+eA8ce85XYEn//8ivgJz/5mTUMjdHYmWedDYsXv6fe19LSAkcdOR1GjR4Fl1xymQUxvj5z5j/gjG99W4FLD1TwSy6+EL533vkKxDQ7ge/DgZMf/vBHcPc990RG7M44/TT1g/KP3tdALiHufTGj78D5C6i6qMxbbbWlgjzpgcfPfvZZ6GjvgIEDB8DWW28NdXV1ffoqfu5En6ot9k2lhbiy1ejPVs0a8BBXs/b9Z1ekBjzEFalGf5Jq1oCHuJq17z+7IjXgIa5INfqTVLMGPMTVrH3/2RWpAQ9xRarRn6SaNeAhrmbt+8+uSA14iCtSjf4k1awBD3E1a99/dkVqwENckWr0J6lmDXiIq1n7/rMrUgMe4opUoz9JNWvAQ1zN2vefXZEa8BBXpBr9SapZAx7iata+/+yK1ICHuCLV6E9SzRrwEFez9v1nV6QGPMQVqUZ/kmrWgIe4mrXvP7siNeAhrkg1+pNUswY8xNWsff/ZFamBYkLc3Q7QtQKgaxVA92qArjYA6ATo6bL/AW53B/tAP+O2+ps9q797gv34bLZxn1pBW79ObYLb8nJ+vV1Xrw/C5zoAddk/PuN2vX7GbbxNA23jc0P4T+3n2/h3I0BDC0D9AICGgQCNgwHqmisCSa2fpFgQd68F6FgG0LkCQAFK4MYBzCHWfxuwJcwSYmxaPGYdICaAFZQIM4eWoEZANbROeAnmRgZ5I0B9A/Q0DIa6puEA9cW+J0hxIFbwLtfgSngZxAZuUuAugG6mxKTIUo25AmdSYtIvUmSHElsQkzILeAlspcwELELdGGzX8/0EMj6zvxuHASDMBX0UAOIegPbFgfoq5aV/LiXGm8Y4LEUSxMZWxNkJtBfaVijLIR8SYm0flN2QCsxthVZgBW6MEitLoWGmv82zABrtRfMoh83JP9n5h7j9XW0fOgG6NaTqWW5zqBnIFtRoD8hWkFIjpKjUzAtbSpwGMULCQDbqGwcxKqv2xBLeiPIyiJUiBzYieCalbtKKjZ4ZQR6df2pFCfINMVoI/GcpsEuNHd7Ypciq48c7eo5OXqRj11eImQpTx87q3PXWC3Pl1XArSyH+oa0omLXIL8TYiWtbEADcjcuucuXt0CmEA2iEV9kHUmaEFpWb0gmpxCKpSIQ4TeRkGuFSY925M+kEpRQIo/bC9KyUmeyE9MN6u56UuEl3/JoAWsYVqrOXW4h72hZCXecHbhW27ASDu1dpRZwi9wQWGPkL/6AdmZ/DFTVZh04pMSkw/c0VmcVqvOPGYVZQazvhUuK6piC16BfcEbUIj3xCjDnw2tcBelBxNaQKUL3t7OC5OnykyC6PzBUYjwsfph+ndxmmUxC2TkJhBd8Z8cCuPFhGajEKbADWCqwUmcHdf2JhcuR8Qty+BKBjcWAjXPDGKbHZj1Dy7Bj/ZpEbeWOdA+ONHlF6e6AH6vBZUMwhdmUTrqEP6uvh+YKBlIDqYMlj6thpYFWKwXNgZiNMB44psFOJEWbq8KGlGA3QNKIIQpzTW4CtfROgs5UpMfPEBmpUV/LG3COTFxYQm44ewRygpR40MGe2BdTOaI0O5pAGPwJ1Qr1At42w2R282ShzjI2w4jSCnHlfbidQibk6Nw4F6DfeQ1y1Glj1H4DuNQ4llvaCd+yknXBDrHjVSkuCa8Fcx2JhUwFaSfENdYFSK8+rt80zV1zOOE9vjc0I8A68c1aIWRJhvLGAl/Y3DAAYWIw7l+bTTqx4XqssQRtjK0iJI+kF5cjaVujBDg5rCHNgI5yKTEKthdXq5+nXOMuuHz2/Y0LgJDS8lr3QqhxJI3geTHZCKjGzEXUEND43AQzetmo6VMkPzifEH8xKhhi9b2xHL9rBI4/LYQ0gttg1yhrxxK6enXATCmZrvIwrrfTEbHiEoCbaeSrhTB+0Ghv7wMFF6Nn2kO0ryVLVzpVTiGeG+bBS2xRFVkqM+bDDI4fuwThbA6mAOJpK2DbCjtxcUsw8sVRadnjQteMdPd3XMz8MNjfCyoH5wAYpMKUSDpiH7Fg18Cr5wfmE+P2nbaVVcLpgjlFkPTxtUgfywMweBNY4lFi3Mtt5sUkvKMXQz5YGy2jN7udpDxx8LgUVxmVwuHF4OSYHDlIIAS3aB/TDylLg300AQz9ZSZaqdq6cQvy3AForYsNtyop5Xkz76TWdTnAFjoWY7ARTXD5z2N6d2IhmdjGbZsxnHJvBD0zX2JmMi2BphkrgFNkIMo/OHPBaMGt4FfxNABvsWjXwKvnB+YR4+ZMM4jg7IeENIY4ocMQ2BDuc6YSCOJpGZG2UAFbhgcPELYiI1cm4Zw6jNwM1KrWaEMejNQ2xyolFJ4621bP+N2xK1q9d08flFOLHbU8cayeYV1ZzK4Jb2xo4Dbw4iSfMhcPXwzyYv49aNPTI2cfsBMOh6hp1dacTEUXmiu2yDxxW8zoBjJA3Awzbo6bhzPrl8gnx0j8zD8xsRSLMHWakDeEjj0tRGsVr9n5mJyTkJKispg3KlA+LYWjeKPJmpFxhQzXmCmx39MzAnlFk1pEju0DeV2036xE7psQj9snKSU0fl1OIH3XYCeGRDdCkwN1MgXWYJsAkm8DTCQtqbQNcw85WK0thdk1Fj7MN2kmQLyavHCpx8El8fwA0DU2zjpsLXq7QI/ataTizfrl8QrzkkQSIox2+HjWYob2sUOHAEthQhxdqmOTYKLexFUqJbbthKt0OhK224MPO8R04HcUxyxBrJ+rUbI4AaqujxxTXKDJ17PRrG34mKyc1fVxOIX7IhtiyEQRx2OFTymnBG26HdiKc2OP0zAHrtqeWTSuHmSPDzix5EFEbdfiMwmZQZNWvo9SCfhGWF2bQuvZvuH9Nw5n1y+UT4vfud0PsgLkH0EbIjhtuB8PJyGWkIxeZOxEeryWdP5mwQQ6GBP/bD6dQuDxxRJmF+spBD67IAcRkL+hZx24ELYdX/j3yc1k5qenjcgrxfTojbo9R5PYwM9aQBrCK6Eyrc3S/tgk8P7b8M3UDbWVOauloR85OIcLsN4zYlEPQJ42zExEFp9gN0wduI/ggh9rfDDDy8zUNZ9Yvl0+IF9+tIdaWASfJm8EP2odpRLAuBELKO2iZYdYWgpyxtBlUyep8JLlmfjC9ag/JRaBkhwUdtHCkLrQLtkemOccKcmZLgm31rgBSJ8Rs/6gvZOWkpo/LKcS/F0qMEGMKYcNswDVe1mUfgv/dR22FiOEcSh5CbLexs18nPDAf1EBjI+Gl16PpRAC5K63g+4PBDp5UCKgR8lGH1DScWb9cPiFedFeYExO4SolDiFGFFbKiQ6eAlR00rdTOQQ6ZKXOYDcXaViTUukkirEGK6ESfdM/rUuoAahtuvPoDIXbASwo9+otZOanp43IK8Z0aYvS+aB+kN27XNoIUlqUR6k8BNymxA3reAQzTieSRPFeLhxAzL5zJ81KHLRzscKUSIcRsUIQsBXlgYy+aA8BHH1bTcGb9cjmF+HaHnQijtR41Sy3syHUzOxHsj9qHNM/MzyfnVPDZbnX1LdA8bFuob94AOlr/DZ2r3zZzJWiUTVtf9R0pnZCDF65Bjgi86keAcyiYZzaKTLkxKTFXZA3xmMOzclLTx+UU4t/ojhwpsHhGK8FtgMMuOD1wnDdOGKamH4UCsmEADNnsBGjop1fZ6emGVW/fB21LZhoI7JRBDGoQlGbwwp4cb9IKA2+KN1ajeA54SZnHHFHTcGb9cvmE+J1fJUJMaQT3uDbU1GmLdvRsuLXXdaYbgS3g2fDA8QdBvw13hp6utdC55h1oGjQRero7YPlL34eejpWB8poOXiCZpM7hczA7jW+HnTx7LoXtgWWHj2a5aV9M/pg/b3RkVk5q+ricQvzLBIg7I3MkLJjpknumujxyS8qT8cQW5EqGzaA1bLDFqdDYfyNYMf830Lb8X7DBlqdBY/8xsGL+r6H9/Rc0xMyzxkAdRmWuDlu8Nw7g1z8O80Pgo3aoytpK4LOHuIo/zoW/YBCLzl1PV5ALJ9oJ1uGTgyEcbkUoG442x7qnbg7f6lvQ0G8ktM65Gdo/mAsbbP4NaBo0AVa8die0LXvWVmIxXOzqmIW5MV31rKF2wBrrqdUVIHpwQz5v9OUqNmLlPjqnSnxb2LGjdMKkFKEfthTWpBIBlNITu7YpnuPD1vY0TnsEkCB+f84t0P7BHBjmhFinExGI3XaA2w23QjuUmcdt5IuNjWAdPQ9x5X5JvT7TwltZOoFKHKpxkAOHcEl7YLaFnbDTCdsLq9PpH4ENcXgc/jV88regsd8oWI5K3DoHhm/xTaXEH8y/E9Yum619bhRWWl8i4nG5N9YjeUk5shty7NzFKPHYr/S66mvxDflUYoQYs2GjwiydwPUtJXQJI3ZGgTmkbDAk9kfgmDg0fPIZTohb5wd2QqqqcrCWalJklrEDp4eZzVRM7bEjUZwZghae2ENcxd/kgp+zAQ6txPpqZ1JimfsmwyrtBWXJ2vs6Jgq5OoAjtg6UeNkrgRKP2DJQYoR47dLZGtgwlQiVM/C8sWmDM1KLDj/TOVTfzsqLqXOHnTpmJ8YeU8VGrNxH51SJNcQRJQ6uoeuzEiuvbM8rjrMf+DlNgzdTI1/tK+ZBd+caGIFK3H80LHvlFmhrfUVB3DxoArw//w5oW/ocNA+eAA1Ng1T81t2+TEOb7GnjOmz1ep5PAK7o+Ml5xrF2wkNcuZ9Sb8+04BaRTgR2go/URTpqCWlFN/fQfFg6VoEDjzxsy5OhccA4eP/VX8PaZf+CYR87FlqGfgzWLJkFa1vnwNBNDoL6hn6w5KVroHP1Athw8ukqcmud90voeP8lk1a4ojEnvMx6uLyxsRHWcbihr7GTWfG4Y3tb8zV5fE6V+BZHOhHMZMtsG4QHTvO+9sShwH4MmXgEtAzfBla98wSseOsP0DJsMgybdJTV0O0r34BlL18P9Y39YOTHz4O6ugZY+sJl0NP+fjBjkqUU0svGpxHafohBEZqaGR2e1lc3G0XWtsLbiSr+KGOUWEGs04nYCC3tdWVHpBd2pxXNw7aCoR89So3QLX35OuhY/S70H7kDDBgVrKzT3dEKra/fA90dK2DYRw+HfiM+Dh0rX4f3/3ODfaFn1txXeF05JTPeEzMltjyxV+LqUbywNpQYWR+2+dfU8HJX2zJoff0+aGv9j1n+CiFraOoPgz+yv4IbH++/8hPoWDnfdOKSJv70VYnxc7jCB8taiWQCt70SV49hsJSY5hFrO5GmtM4ozTH4oa/BU/lwQjrRNHA8DJ10DNQ3DlAV0r5iPrR/MA+62luhedAm0LLB5lDfNEi9tua9mbDijbvZ1cnhHOBEmKmjpuxHUO/RNCPcF00nvJ2oIq0xH70Q0wk+CR4B1rmxYxiZT8VMtBn03l4MliDk9S3Dla1oHLCR8wv39HTBqrcfgdXv/s2OvsQVGnEdvMTozRnN0SX8NJFID3aYSfJ6/oRX4iqyjRDL4WZ9VUdlcmIWtcVMFMLSB8PXFMnVqWSiZYMtoN+I7aGuoRk6Vr6p8uG1y1+EHrzTk2PRk/gRuOhsNuuaugi8lA3bE4CC9SjkiJ2HuIr06o9WI3ZSifW2nh6ZlO/G2wMxvJww0hdALK/DC674GL7lqUqVW+f9CtqWv2iiNAWx/k8wmT39WjnnIEhkODomJ9YjesG1djqRMFd7oCc+uvptWYFvkNOI7Tb3LLbuDpDrTLjnUrjTBrUOhfbApLQcVvsaPMd6FXqZqyjENmQEs2UfCO4keyCHlWUe7Bx2rg+vtZM5sYe4Aj+hvp7iHT4VUyoyTsV0QJo05TLVA4fnI6jtZz3hSJdnGCpx/43gA5xHvDyYR0wLSJj5vgw4YzP4PAqHUkdyZGlPnPOJcSomm0PMI7aN7Ey7r81R7fflVIlxUjy/OJTPn8CsOG0+ccLwsuncKUyhh6cUurVc61YER9OqwvRX2rJV4RwH1yX6IbTh4EYAfHTyezTdCH45dbQ6vDUJSHtkD3EVf3/m8iR9pbN1xXN7NiUWs9DkPGF+WX94Yah7Mjy/UBTBCX4i9AjmD5t1JfTu8BKkYO4DvS47evzSJFJsk1bwiUExdsSa8CM7eGOmV7ERK/fR+VTid37NVvyJXrYvV/5x2gs+R0J20uiaOhm5abnVboWtJE/+Jb5hzHJToa8QUzOjly2FV3YkT80085EjHT7ywzHzif2FopX7JfX6TItud0DMVTmYQxF7ZUbEA0c9b/B+YTvMOQO74LwVmCgMKit+XHiBaCjFctjYeGOZHztWv3QOdkRmr9FtDxwQo08e86VeV30tviGfSrzoDrGQoLxkH9dhY4ukxIy4WZBLe2FtC8ipJSliM1459MTaJNC9FVnHLjg4y6X7USvBvDB5Y3WiuIiNw0vLWrFO3hi/eEr1fpTv/tadE5tFBRHq6MrwzqueHfCGcEcVmjpw6hXjIpid0BY4tMRciuk+du4byQAtguKC3BW9mSHoKNxmwW2zxKueDM9XARr939Vrwwp+cj6V+N3fuZWY7mWnRu/0jDbHQoDuVTKjk+HJTtCl+tphmOo3d2EimplCs55doLz0Gimn3mPZDEodHHYi2vGjgRN2hQfr6AVziB1KzJe2Gn1oBVGq3qnyCfHie4QS06rwcolXXFRQD0okdeT4GmxmODn01GZRQssTR29UHkZsYYOSMBPEBG3ItO7QadLD49wdPfcqmWIwRV3hTCti0g1n5FKvuCrmwdUjr4KfnFOI701R4gDqHnV/5+Srn7m60oidvY86eNw+kM2Q0ht2+BA2usNomCBrGxHGyPaKQMLbBt459Lz2haYJkEfuniQXUNGAjzqogihV71T5hPi9B5gSI6gUs4ULbIe3yU3yxvGeN/DP4eLcEnb1P3N1UABr1ge/GSNBye2Gc+V3PrpH3piUm64MIQtiqbBco5jZC1TqkQdm/do1fVxOIdY3nlGjdnJxbQ212k+KHCqknIppwck7aix5CAc7wglCvFXp9aSWDj1xcJTzLkqU8zojNZp77LAflhem+9lxG8HvY8dshb/xTBV/nEv+EK5PbCAOoTUqbO79TEu9RpevsiFmcyAsiIXiyrENRzghvbCpLUoU9A5LmcNhPraooAnrYhcaDGwH/ocW1pYKLCHWk+RH7FfFRqzcR+dTiZf+Ud9Yhisxuzl55MbldDfR6PoSAcQaXjNxSNsMPRvCvB44DPMwf2pbQd436oXDLl/glfmSreH5aISOrwjEBvi0f45bQQgn+sTd05k6eQJmfzPGyv2Sen2mZX9JuI+dVGTaRjVmE4OS4CWo9ReTdsGeKxFNKZzlMVMobI2OjuTx6CzAnR/j9sx43ZKO1FQqkQIzZcXD9+511dfiG/KpxMvwBuWyE5cArzoWlZqy4xjl5R01bhmsQQ3HcDPNXuMZWzjvxxp2JgicubHlibV3Zh238GY14iaMJo2Q8LJhZwturczD96xFJnv9nfIJ8fInNZSuNIIpL8HLvTHCzCf8uBRZ2gahzOFInahvDbGZKyGhZodH8mJFaMqInhgECXqIqMAcXlJkGuzQymzdDkwfv8GUXgNTi2/IKcT/64CYPLEDYqbESo3VPT3CaIxPu+QemRrMSifoF2Dez8aZHUoc3lKUa3AUVvOqGUpmKYQD3sBWYEdOQ0xKG4Fa3rCcbW+way0y2evvlE+IW59hnjgDvAitshOo3F3mWUfBYV9N3NYgCjGTWlbVkZSY99xEk7AAQt/uIMYjmxQjOsEnVGC9gLZ1Y3IBNc6dAD6bjW0PDRZ5yfsjpxDPDD2xyYMFzMAVmSCWz116ymbcoEbQvM6OXIoA27rL5k6wF0KtDYeNI4qsd1jDzQpaglU+U8dOKjRPKPTfQ3fMO7/B77lHTorNQ7FWzNKDHNRh089WtMaAxf3QFQx+qGf2Gm7LiE2RS+N1NDIXpmvGNcQM1IURW3xlRudQBMc6B0GMndApBNoIYxsata1AMEmZGcBoN4wSM7+MCj14+zy0dup3zCXEPSuegzo17ZKpL/e9siOnkwlKKIJnhJc9q/gtHNnTHGsptp7M1XRSoVNrW0OqIBcUx0ZtKijG4xFQ+keQJiiygheHmR32gtKMwdtl+co1f0wuIYbV/wHoWhP6XAtm6X3RVmBHTiuwshl624K5Ozgfm19MrZc45dIe/0htcD2fx776WV9jFyixVmQ+Tq3mQyCMGmKlrA1aYUmJOaxSkR3RW0N/gAGbp37fPByQT4jXvgHQ2cpsgbQVCCPBqp+NGmsFNlDjNvun7EUwhdM56d0Fbcqws9J3MQ5tdfA0vTyPCJQZ4eUKrJVX2QmuwtJeILQJqQX+CJqGAvSbkAdGU79jPiHuWArQ/q4ZwAi8LofVTiGSbQQqdFeg1ApmVGTa7g6U2UhyaCtYvy68iI4CYj4MLS+yI6Wlc7JBEdNaSnlRaQXExgtziCXM3BszRZZQN48GaBqRCkgeDsgnxN1tAGteDyEWOXBgF6QSuxSYPDFXYoJYPyPUoGG25khE4XY2OFdgfcGoncDxCA2hrXfAy2HmtkLnxJGOHlfimPSi/ya4EmIeGE39jvmEGIvVthCga4VIKVxRmujAUYdO2QlSXHwfV2CtyAiv2Y/o6W1RrWL+jzVJSPfLIheKWqqrFJfg5RBzRUYYtTc2doJUWHbwHB0+kxc3AjQMBmgZmwpHXg7IL8TdawHWvs3UOD4LdqYR5IONjdBqrEAVEJsYDl/TMNPCxcEFUFZ780v0bRDUfMngXzB3UsCL22QjmPo6bQUHOAPEPFfutzFAfb+8MJr6PfMLMRatYxlA53Kd//IUguXBVgdORmtaiZUiCy+sbAR5ZPyblDgOYj0BWbkDBrXyxGbCbwzEHGaEmMOMgJIiy5iN0gpmK0wuzPJjPjzdOBygaXgqGHk6IN8QY01jB69T2worDyYbIcCNpBIUrTEFVh07sg78mQEcUWK+CMW6QMzshLEZBC9FbWm2IiY/bhwMgB26gj3yDzHC1L448McyD5YDGpEozdGhMx64txBzBWZdN9OxY1YiYifwNe2LXd441k64lJnlxipP1tsK4FF88YDCoFwAiHVboLXAfwZcrcAqpeAduGgmHEZq3D70EWKRRgQdO7ZImtMTZ4XYZStY3Ea2gUdxCHHjsMLEaa5fXnEgxtJhZw9B7lppDylT7quUWtsHpbi0TR05nhOjPUD48VnHbLxT5+rYqR6dGZUOvbAR5riOHUGsp1eqJTNZBy+ixCJyi0srGgYF/rdAnbjiQ0wlxBy5cyVA90o1PN3TvRbqTLTWrW2HI1IzsLo8McFMS8lTKqG3ZYfODGZQxw53OCBWgGqIZVohBz0MzOSJxXA0wtrQH3rqB0Id2oeC5MBpvqdYSpxWWv96IWvAQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1Xof4/sY7KcTsYB2AAAAAASUVORK5CYII=\" alt=\"audio video logos\" style=\"float:left; height:200px;\" /\u003e\n", "\n", "1. Integrations with multi-modal AI models to extract information from unstructured data, in this case audio files.\n", "\n", @@ -96,16 +86,14 @@ "3. BigQuery DataFrames to use Python instead of SQL.\n", "\n", " https://cloud.google.com/bigquery/docs/bigquery-dataframes-introduction" - ] - }, - { - "cell_type": "markdown", + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { - "zoom": "275%" + "body[data-jp-deck-mode='presenting'] \u0026": { + "z-index": "0", + "zoom": "181%" } } } @@ -114,20 +102,43 @@ "slide_type": "slide" } }, + "execution_count": null + }, + { + "id": "8dd2ddab", + "cell_type": "markdown", "source": [ "## Getting started with BigQuery DataFrames (bigframes)\n", "\n", "Install the bigframes package." - ] + ], + "metadata": { + "@deathbeds/jupyterlab-fonts": { + "styles": { + "": { + "body[data-jp-deck-mode='presenting'] \u0026": { + "zoom": "275%" + } + } + } + }, + "slideshow": { + "slide_type": "slide" + } + }, + "execution_count": null }, { + "id": "96cda443", "cell_type": "code", - "execution_count": null, + "source": [ + "%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow " + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "214%" } } @@ -142,18 +153,21 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow " - ] + "execution_count": null }, { + "id": "acf12472", "cell_type": "markdown", + "source": [ + "**Important:** restart the kernel by going to \"Run -\u003e Restart \u0026 clear cell outputs\" before continuing.\n", + "\n", + "Configure bigframes to use your GCP project. First, go to \"Add-ons -\u003e Google Cloud SDK\" and click the \"Attach\" button. Then," + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "z-index": "4", "zoom": "236%" } @@ -161,15 +175,17 @@ } } }, - "source": [ - "**Important:** restart the kernel by going to \"Run -> Restart & clear cell outputs\" before continuing.\n", - "\n", - "Configure bigframes to use your GCP project. First, go to \"Add-ons -> Google Cloud SDK\" and click the \"Attach\" button. Then," - ] + "execution_count": null }, { + "id": "fd321077", "cell_type": "code", - "execution_count": null, + "source": [ + "from kaggle_secrets import UserSecretsClient\n", + "user_secrets = UserSecretsClient()\n", + "user_credential = user_secrets.get_gcloud_credential()\n", + "user_secrets.set_tensorflow_credential(user_credential)" + ], "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:53:08.494636Z", @@ -180,22 +196,25 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "from kaggle_secrets import UserSecretsClient\n", - "user_secrets = UserSecretsClient()\n", - "user_credential = user_secrets.get_gcloud_credential()\n", - "user_secrets.set_tensorflow_credential(user_credential)" - ] + "execution_count": null }, { + "id": "4d837a34", "cell_type": "code", - "execution_count": null, + "source": [ + "import bigframes._config\n", + "import bigframes.pandas as bpd\n", + "\n", + "bpd.options.bigquery.location = \"US\"\n", + "\n", + "# Set to your GCP project ID.\n", + "bpd.options.bigquery.project = \"swast-scratch\"" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "193%" } } @@ -210,24 +229,21 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "import bigframes._config\n", - "import bigframes.pandas as bpd\n", - "\n", - "bpd.options.bigquery.location = \"US\"\n", - "\n", - "# Set to your GCP project ID.\n", - "bpd.options.bigquery.project = \"swast-scratch\"" - ] + "execution_count": null }, { + "id": "008f0a87", "cell_type": "markdown", + "source": [ + "## Reading data\n", + "\n", + "BigQuery DataFrames can read data from BigQuery, GCS, or even local sources. With `engine=\"bigquery\"`, BigQuery's distributed processing reads the file without it ever having to reach your local Python environment." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "207%" } } @@ -237,20 +253,24 @@ "slide_type": "slide" } }, - "source": [ - "## Reading data\n", - "\n", - "BigQuery DataFrames can read data from BigQuery, GCS, or even local sources. With `engine=\"bigquery\"`, BigQuery's distributed processing reads the file without it ever having to reach your local Python environment." - ] + "execution_count": null }, { + "id": "9a4b35ab", "cell_type": "code", - "execution_count": null, + "source": [ + "df = bpd.read_json(\n", + " \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/jukebox.jsonl\",\n", + " engine=\"bigquery\",\n", + " orient=\"records\",\n", + " lines=True,\n", + ")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "225%" } } @@ -265,24 +285,20 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "df = bpd.read_json(\n", - " \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/jukebox.jsonl\",\n", - " engine=\"bigquery\",\n", - " orient=\"records\",\n", - " lines=True,\n", - ")" - ] + "execution_count": null }, { + "id": "e00dcb01", "cell_type": "code", - "execution_count": null, + "source": [ + "# Use `peek()` instead of `head()` to see arbitrary rows rather than the \"first\" rows.\n", + "df.peek()" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "122%" } } @@ -300,20 +316,19 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# Use `peek()` instead of `head()` to see arbitrary rows rather than the \"first\" rows.\n", - "df.peek()" - ] + "execution_count": null }, { + "id": "335511be", "cell_type": "code", - "execution_count": null, + "source": [ + "df.shape" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "134%" } } @@ -328,14 +343,17 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "df.shape" - ] + "execution_count": null }, { + "id": "595126a1", "cell_type": "code", - "execution_count": null, + "source": [ + "# For the purposes of a demo, select only a subset of rows.\n", + "df = df.sample(n=250)\n", + "df.cache()\n", + "df.shape" + ], "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:55:55.448664Z", @@ -346,22 +364,36 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# For the purposes of a demo, select only a subset of rows.\n", - "df = df.sample(n=250)\n", - "df.cache()\n", - "df.shape" - ] + "execution_count": null }, { + "id": "cbd59dd9", "cell_type": "code", - "execution_count": null, + "source": [ + "# As a side effect of how I extracted the song information from the HTML DOM,\n", + "# we ended up with lists in places where we only expect one item.\n", + "#\n", + "# We can \"explode\" to flatten these lists.\n", + "flattened = df.explode([\n", + " \"Recording Repository\",\n", + " \"Recording Label\",\n", + " \"Recording Take Number\",\n", + " \"Recording Date\",\n", + " \"Recording Matrix Number\",\n", + " \"Recording Catalog Number\",\n", + " \"Media Size\",\n", + " \"Recording Location\",\n", + " \"Summary\",\n", + " \"Rights Advisory\",\n", + " \"Title\",\n", + "])\n", + "flattened.peek()" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "161%" } } @@ -379,31 +411,14 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# As a side effect of how I extracted the song information from the HTML DOM,\n", - "# we ended up with lists in places where we only expect one item.\n", - "#\n", - "# We can \"explode\" to flatten these lists.\n", - "flattened = df.explode([\n", - " \"Recording Repository\",\n", - " \"Recording Label\",\n", - " \"Recording Take Number\",\n", - " \"Recording Date\",\n", - " \"Recording Matrix Number\",\n", - " \"Recording Catalog Number\",\n", - " \"Media Size\",\n", - " \"Recording Location\",\n", - " \"Summary\",\n", - " \"Rights Advisory\",\n", - " \"Title\",\n", - "])\n", - "flattened.peek()" - ] + "execution_count": null }, { + "id": "84548649", "cell_type": "code", - "execution_count": null, + "source": [ + "flattened.shape" + ], "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:56:06.546531Z", @@ -414,18 +429,19 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "flattened.shape" - ] + "execution_count": null }, { + "id": "8be3127f", "cell_type": "markdown", + "source": [ + "To access unstructured data from BigQuery, create a URI pointing to a file in Google Cloud Storage (GCS). Then, construct a \"blob\" (also known as an \"Object Ref\" in BigQuery terms) so that BigQuery can read from GCS." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "216%" } } @@ -437,18 +453,25 @@ }, "tags": [] }, - "source": [ - "To access unstructured data from BigQuery, create a URI pointing to a file in Google Cloud Storage (GCS). Then, construct a \"blob\" (also known as an \"Object Ref\" in BigQuery terms) so that BigQuery can read from GCS." - ] + "execution_count": null }, { + "id": "31277e21", "cell_type": "code", - "execution_count": null, + "source": [ + "flattened = flattened.assign(**{\n", + " \"GCS Prefix\": \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/\",\n", + " \"GCS Stub\": flattened['URL'].str.extract(r'/(jukebox-[0-9]+)/'),\n", + "})\n", + "flattened[\"GCS URI\"] = flattened[\"GCS Prefix\"] + flattened[\"GCS Stub\"] + \".mp3\"\n", + "# Note: str.to_blob is deprecated.\n", + "flattened[\"GCS Blob\"] = flattened[\"GCS URI\"].str.to_blob()" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "211%" } } @@ -468,23 +491,19 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "flattened = flattened.assign(**{\n", - " \"GCS Prefix\": \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/\",\n", - " \"GCS Stub\": flattened['URL'].str.extract(r'/(jukebox-[0-9]+)/'),\n", - "})\n", - "flattened[\"GCS URI\"] = flattened[\"GCS Prefix\"] + flattened[\"GCS Stub\"] + \".mp3\"\n", - "flattened[\"GCS Blob\"] = flattened[\"GCS URI\"].str.to_blob()" - ] + "execution_count": null }, { + "id": "d27756f5", "cell_type": "markdown", + "source": [ + "BigQuery (and BigQuery DataFrames) provide access to powerful models and multimodal capabilities. Here, we transcribe audio to text." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "317%" } } @@ -496,13 +515,20 @@ }, "tags": [] }, - "source": [ - "BigQuery (and BigQuery DataFrames) provide access to powerful models and multimodal capabilities. Here, we transcribe audio to text." - ] + "execution_count": null }, { + "id": "d1f7ad46", "cell_type": "code", - "execution_count": null, + "source": [ + "# Note: .blob.audio_transcribe is removed. This cell will fail.\n", + "# Use bigframes.bigquery.ai.generate instead.\n", + "flattened[\"Transcription\"] = flattened[\"GCS Blob\"].blob.audio_transcribe(\n", + " model_name=\"gemini-2.0-flash-001\",\n", + " verbose=True,\n", + ")\n", + "flattened[\"Transcription\"]" + ], "metadata": { "editable": true, "execution": { @@ -518,22 +544,19 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "flattened[\"Transcription\"] = flattened[\"GCS Blob\"].blob.audio_transcribe(\n", - " model_name=\"gemini-2.0-flash-001\",\n", - " verbose=True,\n", - ")\n", - "flattened[\"Transcription\"]" - ] + "execution_count": null }, { + "id": "1575c468", "cell_type": "markdown", + "source": [ + "Sometimes the model has transient errors. Check the status column to see if there are errors." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "229%" } } @@ -543,18 +566,21 @@ "slide_type": "slide" } }, - "source": [ - "Sometimes the model has transient errors. Check the status column to see if there are errors." - ] + "execution_count": null }, { + "id": "e53c7a0b", "cell_type": "code", - "execution_count": null, + "source": [ + "print(f\"Successful rows: {(flattened['Transcription'].struct.field('status') == '').sum()}\")\n", + "print(f\"Failed rows: {(flattened['Transcription'].struct.field('status') != '').sum()}\")\n", + "flattened.shape" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "177%" } } @@ -574,21 +600,20 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "print(f\"Successful rows: {(flattened['Transcription'].struct.field('status') == '').sum()}\")\n", - "print(f\"Failed rows: {(flattened['Transcription'].struct.field('status') != '').sum()}\")\n", - "flattened.shape" - ] + "execution_count": null }, { + "id": "3629f4af", "cell_type": "code", - "execution_count": null, + "source": [ + "# Show transcribed lyrics.\n", + "flattened[\"Transcription\"].struct.field(\"content\")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "141%" } } @@ -603,20 +628,23 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# Show transcribed lyrics.\n", - "flattened[\"Transcription\"].struct.field(\"content\")" - ] + "execution_count": null }, { + "id": "09ef6c3d", "cell_type": "code", - "execution_count": null, + "source": [ + "# Find all instrumentatal songs\n", + "instrumental = flattened[flattened[\"Transcription\"].struct.field(\"content\") == \"\"]\n", + "print(instrumental.shape)\n", + "song = instrumental.peek(1)\n", + "song" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "152%" } } @@ -634,23 +662,26 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# Find all instrumentatal songs\n", - "instrumental = flattened[flattened[\"Transcription\"].struct.field(\"content\") == \"\"]\n", - "print(instrumental.shape)\n", - "song = instrumental.peek(1)\n", - "song" - ] + "execution_count": null }, { + "id": "cf15986a", "cell_type": "code", - "execution_count": null, + "source": [ + "import gcsfs\n", + "import IPython.display\n", + "\n", + "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", + "with fs.open(song[\"GCS URI\"].iloc[0]) as song_file:\n", + " song_bytes = song_file.read()\n", + "\n", + "IPython.display.Audio(song_bytes)" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "152%" } } @@ -670,25 +701,23 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "import gcsfs\n", - "import IPython.display\n", - "\n", - "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", - "with fs.open(song[\"GCS URI\"].iloc[0]) as song_file:\n", - " song_bytes = song_file.read()\n", - "\n", - "IPython.display.Audio(song_bytes)" - ] + "execution_count": null }, { + "id": "778d0ac3", "cell_type": "markdown", + "source": [ + "## Creating a searchable index\n", + "\n", + "To be able to search by semantics rather than just text, generate embeddings and then create an index to efficiently search these.\n", + "\n", + "See also, this example: https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "181%" } } @@ -698,22 +727,21 @@ "slide_type": "slide" } }, - "source": [ - "## Creating a searchable index\n", - "\n", - "To be able to search by semantics rather than just text, generate embeddings and then create an index to efficiently search these.\n", - "\n", - "See also, this example: https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb" - ] + "execution_count": null }, { + "id": "de7e4e11", "cell_type": "code", - "execution_count": null, + "source": [ + "from bigframes.ml.llm import TextEmbeddingGenerator\n", + "\n", + "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "163%" } } @@ -728,21 +756,25 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "from bigframes.ml.llm import TextEmbeddingGenerator\n", - "\n", - "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")" - ] + "execution_count": null }, { + "id": "4acfb495", "cell_type": "code", - "execution_count": null, + "source": [ + "df_to_index = (\n", + " flattened\n", + " .assign(content=flattened[\"Transcription\"].struct.field(\"content\"))\n", + " [flattened[\"Transcription\"].struct.field(\"content\") != \"\"]\n", + ")\n", + "embedding = text_model.predict(df_to_index)\n", + "embedding.peek(1)" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "125%" } } @@ -757,25 +789,22 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "df_to_index = (\n", - " flattened\n", - " .assign(content=flattened[\"Transcription\"].struct.field(\"content\"))\n", - " [flattened[\"Transcription\"].struct.field(\"content\") != \"\"]\n", - ")\n", - "embedding = text_model.predict(df_to_index)\n", - "embedding.peek(1)" - ] + "execution_count": null }, { + "id": "a49d1dde", "cell_type": "code", - "execution_count": null, + "source": [ + "# Check the status column to look for errors.\n", + "print(f\"Successful rows: {(embedding['ml_generate_embedding_status'] == '').sum()}\")\n", + "print(f\"Failed rows: {(embedding['ml_generate_embedding_status'] != '').sum()}\")\n", + "embedding.shape" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "178%" } } @@ -795,39 +824,39 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "# Check the status column to look for errors.\n", - "print(f\"Successful rows: {(embedding['ml_generate_embedding_status'] == '').sum()}\")\n", - "print(f\"Failed rows: {(embedding['ml_generate_embedding_status'] != '').sum()}\")\n", - "embedding.shape" - ] + "execution_count": null }, { + "id": "15a5bfd3", "cell_type": "markdown", + "source": [ + "We're now ready to save this to a table." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "224%" } } } } }, - "source": [ - "We're now ready to save this to a table." - ] + "execution_count": null }, { + "id": "8b49384c", "cell_type": "code", - "execution_count": null, + "source": [ + "embedding_table_id = f\"{bpd.options.bigquery.project}.kaggle.national_jukebox\"\n", + "embedding.to_gbq(embedding_table_id, if_exists=\"replace\")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "172%" } } @@ -842,19 +871,24 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "embedding_table_id = f\"{bpd.options.bigquery.project}.kaggle.national_jukebox\"\n", - "embedding.to_gbq(embedding_table_id, if_exists=\"replace\")" - ] + "execution_count": null }, { + "id": "810c77d5", "cell_type": "markdown", + "source": [ + "## Searching the database\n", + "\n", + "To search by semantics, we:\n", + "\n", + "1. Turn our search string into an embedding using the same model as our index.\n", + "2. Find the closest matches to the search string." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "183%" } } @@ -864,23 +898,22 @@ "slide_type": "slide" } }, - "source": [ - "## Searching the database\n", - "\n", - "To search by semantics, we:\n", - "\n", - "1. Turn our search string into an embedding using the same model as our index.\n", - "2. Find the closest matches to the search string." - ] + "execution_count": null }, { + "id": "fb63ad94", "cell_type": "code", - "execution_count": null, + "source": [ + "import bigframes.pandas as bpd\n", + "\n", + "df_written = bpd.read_gbq(embedding_table_id)\n", + "df_written.peek(1)" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "92%" } } @@ -898,22 +931,26 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "import bigframes.pandas as bpd\n", - "\n", - "df_written = bpd.read_gbq(embedding_table_id)\n", - "df_written.peek(1)" - ] + "execution_count": null }, { + "id": "f19c88d3", "cell_type": "code", - "execution_count": null, + "source": [ + "from bigframes.ml.llm import TextEmbeddingGenerator\n", + "\n", + "search_string = \"walking home\"\n", + "\n", + "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")\n", + "search_df = bpd.DataFrame([search_string], columns=['search_string'])\n", + "search_embedding = text_model.predict(search_df)\n", + "search_embedding" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "127%" } } @@ -928,26 +965,28 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "from bigframes.ml.llm import TextEmbeddingGenerator\n", - "\n", - "search_string = \"walking home\"\n", - "\n", - "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")\n", - "search_df = bpd.DataFrame([search_string], columns=['search_string'])\n", - "search_embedding = text_model.predict(search_df)\n", - "search_embedding" - ] + "execution_count": null }, { + "id": "06f0312e", "cell_type": "code", - "execution_count": null, + "source": [ + "import bigframes.bigquery as bbq\n", + "\n", + "vector_search_results = bbq.vector_search(\n", + " base_table=f\"swast-scratch.scipy2025.national_jukebox\",\n", + " column_to_search=\"ml_generate_embedding_result\",\n", + " query=search_embedding,\n", + " distance_type=\"COSINE\",\n", + " query_column_to_search=\"ml_generate_embedding_result\",\n", + " top_k=5,\n", + ")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "175%" } } @@ -967,23 +1006,14 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "import bigframes.bigquery as bbq\n", - "\n", - "vector_search_results = bbq.vector_search(\n", - " base_table=f\"swast-scratch.scipy2025.national_jukebox\",\n", - " column_to_search=\"ml_generate_embedding_result\",\n", - " query=search_embedding,\n", - " distance_type=\"COSINE\",\n", - " query_column_to_search=\"ml_generate_embedding_result\",\n", - " top_k=5,\n", - ")" - ] + "execution_count": null }, { + "id": "fae3fcae", "cell_type": "code", - "execution_count": null, + "source": [ + "vector_search_results.dtypes" + ], "metadata": { "execution": { "iopub.execute_input": "2025-08-14T16:05:50.566930Z", @@ -994,19 +1024,20 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "vector_search_results.dtypes" - ] + "execution_count": null }, { + "id": "38423dde", "cell_type": "code", - "execution_count": null, + "source": [ + "results = vector_search_results[[\"Title\", \"Summary\", \"Names\", \"GCS URI\", \"Transcription\", \"distance\"]].sort_values(\"distance\").to_pandas()\n", + "results" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "158%" } } @@ -1024,20 +1055,19 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "results = vector_search_results[[\"Title\", \"Summary\", \"Names\", \"GCS URI\", \"Transcription\", \"distance\"]].sort_values(\"distance\").to_pandas()\n", - "results" - ] + "execution_count": null }, { + "id": "37a1dfbd", "cell_type": "code", - "execution_count": null, + "source": [ + "print(results[\"Transcription\"].struct.field(\"content\").iloc[0])" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "138%" } } @@ -1052,14 +1082,21 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "print(results[\"Transcription\"].struct.field(\"content\").iloc[0])" - ] + "execution_count": null }, { + "id": "a4748e0f", "cell_type": "code", - "execution_count": null, + "source": [ + "import gcsfs\n", + "import IPython.display\n", + "\n", + "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", + "with fs.open(results[\"GCS URI\"].iloc[0]) as song_file:\n", + " song_bytes = song_file.read()\n", + "\n", + "IPython.display.Audio(song_bytes)" + ], "metadata": { "editable": true, "execution": { @@ -1076,26 +1113,16 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "import gcsfs\n", - "import IPython.display\n", - "\n", - "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", - "with fs.open(results[\"GCS URI\"].iloc[0]) as song_file:\n", - " song_bytes = song_file.read()\n", - "\n", - "IPython.display.Audio(song_bytes)" - ] + "execution_count": null }, { + "id": "ff22e7eb", "cell_type": "code", - "execution_count": null, + "source": [], "metadata": { "trusted": true }, - "outputs": [], - "source": [] + "execution_count": null } ], "metadata": { @@ -1132,6 +1159,6 @@ "version": "3.11.13" } }, - "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 4, + "nbformat": 4 } diff --git a/packages/bigframes/tests/system/small/blob/test_io.py b/packages/bigframes/tests/system/small/blob/test_io.py deleted file mode 100644 index c89fb4c6e6ed..000000000000 --- a/packages/bigframes/tests/system/small/blob/test_io.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from unittest import mock - -import pandas as pd -import pytest - -import bigframes -import bigframes.pandas as bpd - -pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) - - -idisplay = pytest.importorskip("IPython.display") - - -def test_blob_create_from_uri_str( - bq_connection: str, session: bigframes.Session, images_uris -): - uri_series = bpd.Series(images_uris, session=session) - blob_series = uri_series.str.to_blob(connection=bq_connection) - - pd_blob_df = blob_series.struct.explode().to_pandas() - expected_pd_df = pd.DataFrame( - { - "uri": images_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - - pd.testing.assert_frame_equal( - pd_blob_df, expected_pd_df, check_dtype=False, check_index_type=False - ) - - -def test_blob_create_from_glob_path( - bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris -): - blob_df = session.from_glob_path( - images_gcs_path, connection=bq_connection, name="blob_col" - ) - pd_blob_df = ( - blob_df["blob_col"] - .struct.explode() - .to_pandas() - .sort_values("uri") - .reset_index(drop=True) - ) - - expected_df = pd.DataFrame( - { - "uri": images_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - - pd.testing.assert_frame_equal( - pd_blob_df, expected_df, check_dtype=False, check_index_type=False - ) - - -def test_blob_create_read_gbq_object_table( - bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris -): - obj_table = session._create_object_table(images_gcs_path, bq_connection) - - blob_df = session.read_gbq_object_table(obj_table, name="blob_col") - pd_blob_df = ( - blob_df["blob_col"] - .struct.explode() - .to_pandas() - .sort_values("uri") - .reset_index(drop=True) - ) - expected_df = pd.DataFrame( - { - "uri": images_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - - pd.testing.assert_frame_equal( - pd_blob_df, expected_df, check_dtype=False, check_index_type=False - ) - - -def test_display_images(monkeypatch, images_mm_df: bpd.DataFrame): - mock_display = mock.Mock() - monkeypatch.setattr(idisplay, "display", mock_display) - - images_mm_df["blob_col"].blob.display() - - for call in mock_display.call_args_list: - args, _ = call - arg = args[0] - assert isinstance(arg, idisplay.Image) - - -def test_display_nulls( - monkeypatch, - bq_connection: str, - session: bigframes.Session, -): - uri_series = bpd.Series([None, None, None], dtype="string", session=session) - blob_series = uri_series.str.to_blob(connection=bq_connection) - mock_display = mock.Mock() - monkeypatch.setattr(idisplay, "display", mock_display) - - blob_series.blob.display() - - for call in mock_display.call_args_list: - args, _ = call - arg = args[0] - assert arg == "" diff --git a/packages/bigframes/tests/system/small/blob/test_properties.py b/packages/bigframes/tests/system/small/blob/test_properties.py deleted file mode 100644 index f63de38a8ce9..000000000000 --- a/packages/bigframes/tests/system/small/blob/test_properties.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pandas as pd -import pytest - -import bigframes.dtypes as dtypes -import bigframes.pandas as bpd - -pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) - - -def test_blob_uri(images_uris: list[str], images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.uri().to_pandas() - expected = pd.Series(images_uris, name="uri") - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) - - -def test_blob_authorizer(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.authorizer().to_pandas() - expected = pd.Series( - [bq_connection.casefold(), bq_connection.casefold()], name="authorizer" - ) - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) - - -def test_blob_version(images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.version().to_pandas() - expected = pd.Series(["1753907851152593", "1753907851111538"], name="version") - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) - - -def test_blob_metadata(images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.metadata().to_pandas() - expected = pd.Series( - [ - ( - '{"content_type":"image/jpeg",' - '"md5_hash":"e130ad042261a1883cd2cc06831cf748",' - '"size":338390,' - '"updated":1753907851000000}' - ), - ( - '{"content_type":"image/jpeg",' - '"md5_hash":"e2ae3191ff2b809fd0935f01a537c650",' - '"size":43333,' - '"updated":1753907851000000}' - ), - ], - name="metadata", - dtype=dtypes.JSON_DTYPE, - ) - expected.index = expected.index.astype(dtypes.INT_DTYPE) - pd.testing.assert_series_equal(actual, expected) - - -def test_blob_content_type(images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.content_type().to_pandas() - expected = pd.Series(["image/jpeg", "image/jpeg"], name="content_type") - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) - - -def test_blob_md5_hash(images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.md5_hash().to_pandas() - expected = pd.Series( - ["e130ad042261a1883cd2cc06831cf748", "e2ae3191ff2b809fd0935f01a537c650"], - name="md5_hash", - ) - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) - - -def test_blob_size(images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.size().to_pandas() - expected = pd.Series([338390, 43333], name="size") - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) - - -def test_blob_updated(images_mm_df: bpd.DataFrame): - actual = images_mm_df["blob_col"].blob.updated().to_pandas() - expected = pd.Series( - [ - pd.Timestamp("2025-07-30 20:37:31", tz="UTC"), - pd.Timestamp("2025-07-30 20:37:31", tz="UTC"), - ], - name="updated", - ) - - pd.testing.assert_series_equal( - actual, expected, check_dtype=False, check_index_type=False - ) diff --git a/packages/bigframes/tests/system/small/blob/test_urls.py b/packages/bigframes/tests/system/small/blob/test_urls.py deleted file mode 100644 index b2dd6604343e..000000000000 --- a/packages/bigframes/tests/system/small/blob/test_urls.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -import bigframes.pandas as bpd - -pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) - - -def test_blob_read_url(images_mm_df: bpd.DataFrame): - urls = images_mm_df["blob_col"].blob.read_url() - - assert urls.str.startswith("https://storage.googleapis.com/").all() - - -def test_blob_write_url(images_mm_df: bpd.DataFrame): - urls = images_mm_df["blob_col"].blob.write_url() - - assert urls.str.startswith("https://storage.googleapis.com/").all() From b9476e683f180374e85d805ab9b2be641e7f6380 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 10 Apr 2026 23:06:16 +0000 Subject: [PATCH 03/26] style: fix formatting and clean up imports --- packages/bigframes/bigframes/operations/blob.py | 8 -------- packages/bigframes/bigframes/session/polars_executor.py | 2 +- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/packages/bigframes/bigframes/operations/blob.py b/packages/bigframes/bigframes/operations/blob.py index d29d1a1202c0..9cd7dd0db291 100644 --- a/packages/bigframes/bigframes/operations/blob.py +++ b/packages/bigframes/bigframes/operations/blob.py @@ -14,18 +14,10 @@ from __future__ import annotations -import os -import warnings -from typing import Literal, Optional, Union, cast - -import pandas as pd -import requests import bigframes.dataframe -import bigframes.exceptions as bfe import bigframes.operations as ops import bigframes.series -from bigframes import clients, dtypes from bigframes.core.logging import log_adapter FILE_FOLDER_REGEX = r"^.*\/(.*)$" diff --git a/packages/bigframes/bigframes/session/polars_executor.py b/packages/bigframes/bigframes/session/polars_executor.py index 43e3609ac3c1..06c7fcb925c4 100644 --- a/packages/bigframes/bigframes/session/polars_executor.py +++ b/packages/bigframes/bigframes/session/polars_executor.py @@ -122,7 +122,7 @@ def _is_node_polars_executable(node: nodes.BigFrameNode): return False for expr in node._node_expressions: if isinstance(expr, agg_expressions.Aggregation): - if not type(expr.op) in _COMPATIBLE_AGG_OPS: + if type(expr.op) not in _COMPATIBLE_AGG_OPS: return False if isinstance(expr, expression.Expression): if not set(map(type, _get_expr_ops(expr))).issubset(_COMPATIBLE_SCALAR_OPS): From d84ec947309ea54eec3ddc74d2678e2262e7e4af Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 10 Apr 2026 23:11:51 +0000 Subject: [PATCH 04/26] style: enforce ruff formatting --- packages/bigframes/.python-version | 1 + packages/bigframes/bigframes/dataframe.py | 51 +++++---------- packages/bigframes/bigframes/series.py | 65 +++++++++---------- .../bigframes/bigframes/session/__init__.py | 33 ++++------ 4 files changed, 60 insertions(+), 90 deletions(-) create mode 100644 packages/bigframes/.python-version diff --git a/packages/bigframes/.python-version b/packages/bigframes/.python-version new file mode 100644 index 000000000000..95ed564f82b7 --- /dev/null +++ b/packages/bigframes/.python-version @@ -0,0 +1 @@ +3.14.2 diff --git a/packages/bigframes/bigframes/dataframe.py b/packages/bigframes/bigframes/dataframe.py index a98a44448737..b0ea81e003e1 100644 --- a/packages/bigframes/bigframes/dataframe.py +++ b/packages/bigframes/bigframes/dataframe.py @@ -1611,8 +1611,7 @@ def to_pandas( # type: ignore[overload-overlap] ordered: bool = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.DataFrame: - ... + ) -> pandas.DataFrame: ... @overload def to_pandas( @@ -1624,8 +1623,7 @@ def to_pandas( ordered: bool = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def to_pandas( self, @@ -1937,8 +1935,7 @@ def drop( columns: Union[blocks.Label, Sequence[blocks.Label]] = None, level: typing.Optional[LevelType] = None, inplace: Literal[False] = False, - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def drop( @@ -1950,8 +1947,7 @@ def drop( columns: Union[blocks.Label, Sequence[blocks.Label]] = None, level: typing.Optional[LevelType] = None, inplace: Literal[True], - ) -> None: - ... + ) -> None: ... def drop( self, @@ -2095,20 +2091,17 @@ def _resolve_levels(self, level: LevelsType) -> typing.Sequence[str]: return self._block.index.resolve_level(level) @overload - def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: - ... + def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[False] - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[True] - ) -> None: - ... + ) -> None: ... def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: bool = False @@ -2125,8 +2118,7 @@ def rename( def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def rename_axis( @@ -2135,8 +2127,7 @@ def rename_axis( *, inplace: Literal[False], **kwargs, - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def rename_axis( @@ -2145,8 +2136,7 @@ def rename_axis( *, inplace: Literal[True], **kwargs, - ) -> None: - ... + ) -> None: ... def rename_axis( self, @@ -2342,8 +2332,7 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def reset_index( @@ -2355,8 +2344,7 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> None: - ... + ) -> None: ... @overload def reset_index( @@ -2368,8 +2356,7 @@ def reset_index( col_fill: Hashable = ..., allow_duplicates: Optional[bool] = ..., names: Union[None, Hashable, Sequence[Hashable]] = ..., - ) -> Optional[DataFrame]: - ... + ) -> Optional[DataFrame]: ... def reset_index( self, @@ -2432,8 +2419,7 @@ def sort_index( ascending: bool = ..., inplace: Literal[False] = ..., na_position: Literal["first", "last"] = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def sort_index( @@ -2442,8 +2428,7 @@ def sort_index( ascending: bool = ..., inplace: Literal[True] = ..., na_position: Literal["first", "last"] = ..., - ) -> None: - ... + ) -> None: ... def sort_index( self, @@ -2489,8 +2474,7 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def sort_values( @@ -2501,8 +2485,7 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> None: - ... + ) -> None: ... def sort_values( self, diff --git a/packages/bigframes/bigframes/series.py b/packages/bigframes/bigframes/series.py index 3d29c19b9e41..17addef1ab0a 100644 --- a/packages/bigframes/bigframes/series.py +++ b/packages/bigframes/bigframes/series.py @@ -375,8 +375,7 @@ def copy(self) -> Series: def rename( self, index: Union[blocks.Label, Mapping[Any, Any]] = None, - ) -> Series: - ... + ) -> Series: ... @overload def rename( @@ -385,8 +384,7 @@ def rename( *, inplace: Literal[False], **kwargs, - ) -> Series: - ... + ) -> Series: ... @overload def rename( @@ -395,8 +393,7 @@ def rename( *, inplace: Literal[True], **kwargs, - ) -> None: - ... + ) -> None: ... def rename( self, @@ -457,8 +454,7 @@ def rename( def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], - ) -> Series: - ... + ) -> Series: ... @overload def rename_axis( @@ -467,8 +463,7 @@ def rename_axis( *, inplace: Literal[False], **kwargs, - ) -> Series: - ... + ) -> Series: ... @overload def rename_axis( @@ -477,8 +472,7 @@ def rename_axis( *, inplace: Literal[True], **kwargs, - ) -> None: - ... + ) -> None: ... @validations.requires_index def rename_axis( @@ -522,8 +516,7 @@ def reset_index( drop: Literal[False] = ..., inplace: Literal[False] = ..., allow_duplicates: Optional[bool] = ..., - ) -> bigframes.dataframe.DataFrame: - ... + ) -> bigframes.dataframe.DataFrame: ... @overload def reset_index( @@ -534,8 +527,7 @@ def reset_index( drop: Literal[True] = ..., inplace: Literal[False] = ..., allow_duplicates: Optional[bool] = ..., - ) -> Series: - ... + ) -> Series: ... @overload def reset_index( @@ -546,8 +538,7 @@ def reset_index( drop: bool = ..., inplace: Literal[True] = ..., allow_duplicates: Optional[bool] = ..., - ) -> None: - ... + ) -> None: ... @validations.requires_ordering() def reset_index( @@ -1540,9 +1531,9 @@ def ne(self, other: object) -> Series: def items(self): for batch_df in self._block.to_pandas_batches(): - assert ( - batch_df.shape[1] == 1 - ), f"Expected 1 column in the dataframe, but got {batch_df.shape[1]}." + assert batch_df.shape[1] == 1, ( + f"Expected 1 column in the dataframe, but got {batch_df.shape[1]}." + ) for item in batch_df.squeeze(axis=1).items(): yield item @@ -1772,8 +1763,7 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> None: - ... + ) -> None: ... @typing.overload def sort_values( @@ -1784,8 +1774,7 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> Series: - ... + ) -> Series: ... def sort_values( self, @@ -1816,14 +1805,12 @@ def sort_values( @typing.overload # type: ignore[override] def sort_index( self, *, axis=..., inplace: Literal[False] = ..., ascending=..., na_position=... - ) -> Series: - ... + ) -> Series: ... @typing.overload def sort_index( self, *, axis=0, inplace: Literal[True] = ..., ascending=..., na_position=... - ) -> None: - ... + ) -> None: ... @validations.requires_index def sort_index( @@ -2698,18 +2685,28 @@ def _apply_binary_aggregation( @typing.overload def _align( self, other: Series, how="outer" - ) -> tuple[ex.DerefOp, ex.DerefOp, blocks.Block,]: - ... + ) -> tuple[ + ex.DerefOp, + ex.DerefOp, + blocks.Block, + ]: ... @typing.overload def _align( self, other: typing.Union[Series, scalars.Scalar], how="outer" - ) -> tuple[ex.DerefOp, AlignedExprT, blocks.Block,]: - ... + ) -> tuple[ + ex.DerefOp, + AlignedExprT, + blocks.Block, + ]: ... def _align( self, other: typing.Union[Series, scalars.Scalar], how="outer" - ) -> tuple[ex.DerefOp, AlignedExprT, blocks.Block,]: + ) -> tuple[ + ex.DerefOp, + AlignedExprT, + blocks.Block, + ]: """Aligns the series value with another scalar or series object. Returns new left column id, right column id and joined tabled expression.""" values, block = self._align_n( [ diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index ea36cc1925f1..a025256f2b1e 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -432,8 +432,7 @@ def read_gbq( # type: ignore[overload-overlap] col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq( @@ -449,8 +448,7 @@ def read_gbq( col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq( self, @@ -522,8 +520,7 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def _read_gbq_colab( @@ -532,8 +529,7 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[True] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... @log_adapter.log_name_override("read_gbq_colab") def _read_gbq_colab( @@ -594,8 +590,7 @@ def read_gbq_query( # type: ignore[overload-overlap] filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq_query( @@ -611,8 +606,7 @@ def read_gbq_query( filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq_query( self, @@ -759,8 +753,7 @@ def read_gbq_table( # type: ignore[overload-overlap] use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq_table( @@ -774,8 +767,7 @@ def read_gbq_table( use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq_table( self, @@ -926,8 +918,7 @@ def read_pandas( pandas_dataframe: pandas.Index, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.core.indexes.Index: - ... + ) -> bigframes.core.indexes.Index: ... @typing.overload def read_pandas( @@ -935,8 +926,7 @@ def read_pandas( pandas_dataframe: pandas.Series, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.series.Series: - ... + ) -> bigframes.series.Series: ... @typing.overload def read_pandas( @@ -944,8 +934,7 @@ def read_pandas( pandas_dataframe: pandas.DataFrame, *, write_engine: constants.WriteEngineType = "default", - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... def read_pandas( self, From 9c3bc9e3ff851917dd6aba3fb71a18aae6143a52 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 10 Apr 2026 23:43:04 +0000 Subject: [PATCH 05/26] docs: fix notebook outputs --- ...with-bigframes-over-national-jukebox.ipynb | 153 ++++++++++-------- 1 file changed, 90 insertions(+), 63 deletions(-) diff --git a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb index e70ddfe4a845..3fd66abcbb44 100644 --- a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb +++ b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb @@ -25,7 +25,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "194%" } } @@ -47,13 +47,13 @@ "\n", "* Learn more at https://www.loc.gov/collections/national-jukebox/about-this-collection/\n", "\n", - "\u003cimg src=\"https://www.loc.gov/static/collections/national-jukebox/images/acoustic-session.jpg\" alt=\"recording 100+ years ago\" width=\"400px\" /\u003e" + "\"recording" ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "z-index": "0", "zoom": "216%" } @@ -73,7 +73,7 @@ "\n", "To search the National Jukebox, we combine powerful features of BigQuery:\n", "\n", - "\u003cimg src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAALEAAAFdCAYAAABM2IyIAAAAAXNSR0IArs4c6QAAIABJREFUeF7tnQfYHUXZ/p+3p4ckpJBgQj4JUkLxjxQFCTUEBRSET4SACAIWOirlE+kgICJIU0GwUVR6UVQg+An4RRNAipKQUBNCSOEl9e3/65mdZ/aZZ2fL++aEc3Z3znWF8+6ePXvOzPzOzT33zM7W9fT09IB/+BrIcQ3UeYhz3Hr+q6sa8BB7EHJfAx7i3DehL4CH2DOQ+xooHcRr166FDz5YAaNGjcx94/kCBDVQKojfeOMNOOjgQ2DlylVw4w3XwV577ZnKwezZz8IXDvlvddxr8+emHu8P+PBrIJcQr1q1Co796vGqtr52wvGw++5TnDXX2toKJ3ztG+q1r3/tBFi5ciWceNIpavuEE46Ds878TmqNe4hTq6jqB+QSYqy1Qw75IsyaPRv2228a3HD9j50Veeedd8HZ53wXGhoa4J//+D9oaWlWUL+76F246abrYeLEiakN4CFOraKqH5BbiG+77ZdwwYUXQb9+/eDZ2f9Qz/IxffqX4amnn1a24eaf/aRPle0h7lO1fahvyi3E7733Huy08y6AA47XXnM1HHDA/lbF8devueZqOFC8HlfLy5e/D+idJ07cBIYOHQpZIEbbMn/+a7DxxuNg5Mj0DiPamnnz5sOGG46AcePGfagNXsQPyy3E2BiktPtN2xduuOE6q31uvfUXcOFFF8OAAQNg9qyZ0NLSAl1dXXDE9KPUcZdcfBF89KP/Zd7z8sv/hksuuQyefuYZs2/PPXaHk08+CT5/0BfUPtmxe+qpp+Hqq69RtoYem222GZxx+qkwdeo+EV6effY5+P7lV8DMmf8wr40ZMwaOOeZoOO6rxxaRrw+lTLmGmDyvy1Ic/IVDAaE56KDPww+vulJVJkK86aTN1d/333c3bLPNNurvl156WaUWHR0danv8+I/AsmXLVUdw8uSt4MUXX4pAfO+998HpZ3xb7a+rq4PJkydDc1OTAfqySy+Gww77omlEBP3II4+GNWvWqH0IOyp+W1ub2j7uuK/COWef+aE0etE+JNcQr169Grbdbnvo7OwEbhkWLlwIu+waJBa33XoLTJmyWyLERxxxlFJg7Ohd86OrYOutt1bAP/DAg3DW2f8D7e3tFsQI4ic/9WlAG/H5zx0I5577PzB8+HB1zA033gRXXnkV9O/fH/4x8xkYOHCgAnf3PfaCxYvfgz123x0uuuh8ZSPwvLfffqfy9vi45eafwp577lE0xtZ7eXINMdYORm2PP/4ETNt3Ktx44/Wqwq6/4Ub4wQ9+CCNGjID/+/tTKp2IU+J3310MO39yF/X6XXfeDjvuuINV6ZdfcSXcdNNPLYgffvgRFdWNHj0K/vrkE9Dc3Gze093dDftMnaY8MqYmmJ489NDDcNLJp8LgwYPh78/8TVkc/jjttDPgvvsfsMqw3lu+QB+Qe4jvu+9+OO30b0FjYyO88K9nVUoxbb/94ZVXXoGjjz4Kzvveuaa5XHbiz3/+Cxx/wtdVJ+65Z/8ZaVq0ARjn4YM8MXrnm2/5uVLfSZM2jbxnzpw5gB1EzKExj7744kvhlp/fCvvuOxVu0j80/iaCHH8Uf3/mqQLh9eEUJfcQo6dES4HPV199FXx8u21h9z32VrV33713w7bbBr43Tonv+u3v4KyzzoFNN/0o/PlPf4zU+oIFC2DXT+9uQXzGt74D99xzb2oLnXbqKXDyyScCHX/EEV+Ciy+6MPK+f86aBYceepjy1vPnzUk9rz/AroHcQ4zFwf9Vo5rtv/9nYdKmm8LVP7pGdc6enPG4VVqXEv/x0T/B17/+zV4pMSnrkdOPgAsvPD+VKa/EqVW0TgcUAmICcciQIfCRj2ys0gZSQV47Lojnzn0Vpu67nzrst3fdATvs8AmrQq+44gdw403BQAnZiQcefAhOOeU02HzzzeEPjzwYaQBU6UmbTYLJW22l1NV74nViNPXNhYAY0wm0FJhW0GPGjMdgwvjxqUqMB6D9wLhrk002gWuvvRq2njxZpRMPPvQwnHnm2ZF0Audu4EALPp9/3rnw5S8H2TM+fvObO+C7535PKftTf3vSpBO77Lqb8sm77fZpwPht7NixkXQCs27MvP2jdzVQCIixyNynbrfdtnDvPb+P1ERcTkxKSW+YMGECLF26VOXE+DcCzpUY/6aMGv/ebNIkGDZ8mIKaMuULLjgPjjpyuvkOPFem97z51luAU0PVD2n3KXDrz2/uXev5o1UNFAbiJ2bMgGOOOU4VChMJTCbkIw5iPA7nYlz5g6ssNd9ppx3hzO98G3DgREKM23/5y2Nw7Y+vhxdeeMF8FMZ5x3zlaDjnnLMin4/zOK699rrIiN306Yer2XgUBXo2e1cDhYG4d8V2H7169Rp4/vnn1YsbbTRG2YssD7QJ8+a9Ch0dnbDxxhsrX5708HMnstRq9mM8xNnryh9ZozXgIa7RhvFfK3sNeIiz15U/skZrwENcow3jv1b2GvAQZ68rf2SN1oCHuEYbxn+t7DXgIc5eV/7IGq0BD3GNNoz/WtlrwEOcva78kTVaAx7iGm0Y/7Wy14CHOHtd+SNrtAY8xDXaMP5rZa8BD3H2uvJH1mgNeIhrtGH818peAx7i7HXlj6zRGvAQ12jD+K+VvQY8xNnryh9ZozXgIa7RhvFfK3sNeIiz15U/skZrwENcow3jv1b2GvAQZ68rf2SN1oCHuEYbxn+t7DXgIc5eV/7IGq2BYkLc3Q3Q2Q7Q1YnLw0NPVyfUQQ9Aj+Mf7sfj8YGv8+PU37RfHRBsB//Rx9N2XbBdF76sjqHtOvyDPWjbeq7DZeeDg9Sz3sa/Xdt8f309QH0DAK7F3NAI0NgMgPtK8CgWxN1dAO1tAF0dAtju+G0FMYNXMYnHC6gltBbMBCvuNNRqmglKoolvS2gFvASzE+L6AOzIP9pfH8Dc3BLAXeBHcSBGeDvaNKwEbQK8UpWlUtO2pdCkxlqJCQyl4PxBMDvIkQqs1FZLtnmOUV6pzApghBbhZ/AasPW+ppYA5oI+igFx25rAOqAtUHDGPUs7wSCPQJygxAZaaSuSKHEoMkFLIBvLoKF0KrGEltSYK7Pj76ZmgObovf6KwHX+IUaAO8k+aChjYSaIBbzcTpBCO5VYQMsthaJBK7DyxtxWOLwwV2AOawRoZhksJZZ2QoBbrz01+mJS7MYmgJb+ReDWKkO+IUb70NEer8Cqw6Y7bpZ9kBBTx468sKOjxz0xKXFEkeMtcVDrZB303wSveZYdObIJpMx1AASntAwEqgteYzvqAluB9qJAj/xCjJ24NasC+6CU12EhSGEjyswUWXbsnEqs4Y5LJxTMskNHlOj9znSC++GEVCJTx04rrkokEHbhkfl2/4GF6uzlF+K1q4WN6A5SBgUz79Dp/ZYix9kJhxJbUJNloBiN2Yc0ZTMJm/DGsREb974uJSZoHbaBK6+EGrcbmgD62bchS/v6tfx6PiFGZV29IoRVKrGB2dXJo1yYd/JkisHzYg0u+V8eRBhbwYQ4rrVFTBzmwfoNHOY05VVKS8mE7NgxReYwY3rB1XjA4MLkyPmEuH0tAP6TNiETzDFgx0Vu5IUJXpkPG6/cWzvhgJescuLgRlyHLoudwGP0cdjBK0jslk+I16wMOnRkGwhmAzFLKeJshBXFuVILbRksBaYRO24nxAhdqhLH2QmXIouOnTWwIXPhGAW2vDFTY4zc+g+qZZeQ+bvlE+KVrQDdLBe2FBk9sOjk9SZy48PM3C7wv00HT9ezHOugqM08a7vBm8XYhzh4WZoRl0RY3pfsBYObWw4FM1NiHM0bNDQzKLV8YC4h7mldinfMCTtxlhLz/aKjlxi5sbkQ6txim/XpgjkWTIGtdEI2d1w6wSCliI18s4JTn0dFZno4Wnpc3JYpBNkF2i/hNfsbAIYEN1XP+yOXEEPr0gBgA2+XPVIX8cYOZaZojeZJ8Mk+ul8Xwuoa5CDI9cSfNBJS0wkClSmzCh44xKA7YxngtWAWnTrq5A0dkfatc/F6PiFe/h5T4ThFZlGby17w2WsqT9ZQkgJbSky2gc1is1IKamueF9M+1uHjCYXLTrigpeNIkc1gBk8nuB92eWMGMVfmYSNzAWnal8wpxIuFEhOwIiuWubFz0CMGXt6hc9kHV9SWWtvaQrCniNLyyUBkK6xnbTUi8ZnwxAQrtxsGYK3kw0alfeNcvJ5PiJctsgc2ZDqRqaOXoLwuZTYemCcUwhsnNXmSndDjFWZY2igyAcs9Mf+b58VCgZ1emBRZPw8fnQtI075kPiFeuijBE3Ov7FJoitMYxARtqq3g0Mq4jSgn+yDshGvY2dWRc6kvnsrYCAm0wz5IxeUDHRzuEWPS+MjF6/mEeMlCPWdCd+ioI5fW0VOvx9kHlkiYdEKDqeYDJSiwnE8cN53YTL1kkRuHVtkMbhfStkmJmT9OtBHCG284NheQpn3JfEL8HkKsVdYCmE/FdCmyBpErL4/TrL9dkVsKzKZ/FzMV00CsX5f+1yhuAry8g2fyY51aGJ8sbIOJ4sR+D3Ha72M9vr74bXv2WgRk2cHjcyNA+2lSZIcCp9oKllYYr2z9wQrvyIld8JIPjjxrz6EGLiTcpMS035FCWMrMX28AGDluPTbSh3fqfCqxgpiUVj6zqZlWXqyvpeN2QqUXDo+cqsiuwQ7WaC47wTt2FsT6fbFJhIA3Von1RCDueQ3A2m5IoEcl30j9w8Nw3T4pnxC/+6ZW0y7x7ABbzTcWsJptrcZqW3plsc8MhiSM5FlXdtCFHiwc5h05itnkYEYczOrCZam8cptshbQTQoGVvWgAGP2RdaOnRt6dY4iTlJh3+BjAFqwId4yloI5cbGoR442NJxZuQm2yoeTMgxo6lTA2gqZd0n7HtrIdGtI4L0xpxejxNYLhun2NfEL8zhsJdkLCzdWWdeyM8jpshjUtUyi04pdUWnhjDjEbqDNNJOFVbGdMIwhOOj5u2+zX6uv0xPq1MRPWjZ4aeXdOIX5dRGykvPJZq3CifZAQx0GdNEFI0KsuFBV2wmUllD1gKm2lDVk6csxOWFDrETm1r0FPoNfPaCNIqT3EVfwZLnwtXYm5D6a/LfvgsBNxHb2kwRBSZl4dfEhadujkAIdrbgSHOZPysgEQSjFcCiz3jZ1YxUas3EfnU4ldEMuYLQIxRWtpyptkLxwDJU6IWTxhTfpxzEpLmx9hRWtxyss8chLE5IUJZg9x5X5JvT7TgvnxSmxgZtEZjdQZRRbZMHX4kpRYDorovp3xx6oQjgtHXbPV8FBrnjDzxk6vG6e0rv3UsXPEalyJ8e+x/9Xrqq/FN+RTiSMQu6I2DrHwxnHQcsixtWSa4cqPae4xLTJILFNrR7xwlo5cQpTGIVdX53NopSLzlAK9MOvseYir/Ht8e55WYtaR43bCshIynWB2IU2ZKaWQOfJGYwCGDGGrYlJK4YgnSIk7OgDeWgDQ1sZG3jjQ+u9M9sFhKywbwWHWlyQZFWYdvY0/WuWGrMzH51OJM0Es4OUJhQWvA2pSXNcgyWaTACb2MZpaswbgr0+r5WZNtGZNck9QVpUoZFFe/mOgK0D44AdLKcZ5iCvzM+rLWd5+Nbg0yagvKrLeloCaETvZsUtSZD3bzdgONvttzykAzc0ArR+EShy5UNRRqEEDAJqaAJ59AWDBQh17OaCMswcKYrIOvYGZKTGqsYrYNNQbb9qX2q+59+RUiTXEBlwGMR9mlhBK+xCnyNZQtYB/6l4ADfUAjz0JsHqNPVxt/DC7TInsxKd2ABi1IcCL/wZ4/c1AiVuaAIYNA1iyxFZmAywNI2toJcSWP5YKzLyyshI8J/YQV/+X+NZcpryowGkQp3Ts4uBWMLOhaTxu370DiP8yA2CVhJjNOTYdO92z22UHgNEjAf71cgBx/xaAnT8BsGgxwKvzQ2U2oIoOW0SJEUSpyDFe2SgwdfQ0xB+ZVP22rMA3yKcSOyFG4BBmVE5uB9gEIJlKpCkzh5iOJYj/PCNUYhoMIS/tWm9i1x1DiN9ZBLDLTgCDBgLMnccgTuiwWRBzwJlXjvwA9OIruJ/bCOrkeYgr8BPq6ynemhPEX1yBZT5sjdKhWsd09HB/0uietBzTUIkbAP6ESrw6OvtNDn7QYMauOwGMGQnw7zkAG48FGKxX35mDEM9jfpdsgV5QME6ZY/e73ieiNeWJGwA8xH0lsALvM0qs82GnnUjqyDmUWqYXcSN+09ATC4j5e+XqQATxbjsFSow/NlRCeqAS4z+ljq6cN8HrJh7PzmcNcrCOnYe4AjD29RRvzknwxL2EN1N6waDfTyvxo0+ESiwHScyKQGwq5W47B0osH6jECuI0WB2Qx3X0XCrtshPjN+trC9TU+3LqidFOiCmXSXYiLRfuzesGYrITDs9NF46auAwAkiBGOxHXceuVnYjz1N5O1NSvTn0ZrsQqZkNbQVBnzYN70+FjnloqsctT44+C5g4ThFO0Ei9ZBrAhWwNtfSuxNUlezDH2SlxFtl0dO2ttNhaNOVVWvB5JKRLSjVglFh1ErB6e96InHjMK4LmXgnx4C/2/cvLEdKylvEkeuTce2itxFWmN+ehqKLGK7noA+uyJGcSvvQGw9RYAm04E8Eq8znzl1BPP1VkwtxF8nQl+GZK2DRFFTojcIqN+TLkT0wnHslYynXj+ZQCEGBV328nBHVBNx44PLSdFbWwkL5Nn5krMruzw6cQ6/4D6foI85sSf1hEbjtghxNTpGzoYYOVKnxP3nQZcq1quwbQOZ/uw3spH7Mz8CRp+7m3EljDC5xp2nrIrwMB1uPPQ32cBLH7PTiMieW+l82LHiB1mxxv7YecPC9no5yDE1vzh9T13gnX0hg4F2HpLgCGDe1f+zk6AV18DeOVVu8OXNqmdT8E0E360nejN3AnXBCBvJ3rXhhU92kzF5Fc3V3AqpjWPWE7Z5GtVyAVWxA1pKKFQz67lWdPWjxCvWxDLKzqS0grpif0story2KeTVXM+MV+vImlVeSpY0tXNHOwsV3T4+cROXPLpiV1XdnBv7Jp9Fju8zDxx3Dxi60qPhMVU6Do765J9TbELZrIKfHkq54Wi4pq71HnEPL0Qk+KVrdBK7K/s6JOGVuZNmS5PojnErhWAXIMZYt5w6gpBLpipeI5J8XIRwcjaEmLuhDXiFzM53tkhlLbCX2NXGegqfRbn1c5svWJrBhrCxoaNXYrsSiEs2+BY1xjLxK9+VtuOgqat/EOX7kfshPDQciRP3R3Jtd6EnGvMcmFa+YdmtY3zl+xXGs3s5/PrToRTNxPTDbE6pl93Ijtj6/3IpGWsIounxFzNLC87SrUPGZZ65W4i0rHTEYXTG6ctnp0hxbBgZjekkeDytSc28stYrXdWYz+gTxC7lq9yrU8sFl3BL6EuP9JDz04bwYabaSFBCTEt7VrNtdj8MlbVYzbyye/gqpjyjqKOO4zKa+3kehJ0zZ1cosq5P+uqmMIbq6uddUcvskI8LZ4Sd4svx/rEcVEcn4+svLL0wmQt2FXPflXMKkLt1ycO51rIVTNN/ObXJ64ioRk+Wt3ugKcR4n516po7fbtc17VycSv88P3GPvB7erBEgmyFeWY+wnkLsAwrxfNFuKXiyshNduisbdftDWTUVg/gV4rPANv6OiQX9+zQNy6nxVN0vy7TbQ/MSB5fVyLGcliw+3t2rC/kKn/e1LsnyZs0uu6SFHfPDscaxJE8uA93T3JCzNU5ZmX43s65MHcPRTXWK8Urv+xQYn/3pMqzmfmM/j520SVdlZ3w97HLzFDVD8Tb4jpXhnfdZZTd105O3qGVe/i6EbScq4nWEm40Q1OxsywoKG97QMpMdoM6aK5tbi/MKppyRE8rrQJZA50Gtb+jaBVR7s29nRWg3F7E2QXXvZ01nXm/t7NlMdgNzT3EVYR46SLH7Q4ojaCkQt5ZVL9OSksXfkYGM2hQg93myygueWGXJ45LJ2ROrOvNKC6ba6zUmXlj3OZzK8y2WKuYK66aUxHjgS3LUQ8wYkwVG7FyH53PqZjLFun107h9iIOWDYKom5rTcTGK7LQRHFq28mVf7YTp5HFo9U5lHegGNWmDIWx42YKXQZwE9fDRlSOpimfKJ8TLF7tvUG5Byr0wh9ehyNaIXYaROQlvlssUedSWlFRYCp2WWGj/i2u8qvSB4E3wxNwzDxtVRfQq99E5hfi9wE6ooWeCkg87s8EPo7wcXgZ1nPLyG8pQx40v3co7c+ZvNo84vBujGHZ22AmXjUi0FVyBKZVg4GaBGaHfwLE2XOXY+tDOlE+IW5c4lJhBaiUXMfDSiB73yApG13VzZCG4ldDHsqfUVqO5E/gZPIUwSQW3FMIbG6hJcdUECZZEiIhN2ghXajF0ROpXzsMBuYS4p3UprjXA1NhlF4RKkyKr4WR9vAGY3ZNDQclApm0Oq9NOyOlr1Px6f1Y7wVWZd+zMlSAaYumBaTuTN9aX8A9ha8LlgdaY75hLiGFla7iIIJ/NZnlikVJE4OUgs7+lvSB4uUpTZcZ27ByTJ0xOnGAnLFshVZng1VduxEEbBzeN2NHrjY0AA4fmGN3wq+cT4tUrATrbHZ6YqbNTeV0KTO/RlzHxwQ4aHDEqvA52gg92xNkJglh6ZIrGjBqT/3U8UwfPCTPrADY1A/TXq9XnHOV8Qty+FgD/kS0gD2xmrqnLNphtEKpLk9xJuY2toDkWanQj4Ro6cYOZPqcT/F7PTHnVn1J5mQKbq6PXAeaWfgDN/XKOb/D18wkxQrt6hfbEPErj0y8JXPmsj1cdOAatBNm8rmU4zU7EWWJujTkyPEqLKLAGVl0NIu2DDpLN0HJGeyEHRAYMtm+7kGOc8wkxVvja1QCdHeG8YStuE1kwQk/KbD3rHC1JiSMws2yNR2+ZIda+InIVtIaTWwYLYgavUmK1hpUe0ZP5MFNol71oaALotw7rydUY8PmFGCe+r1llR21WbKbTiVhboUFXrztgJniVEOv0wtxURg4/u/JhLsEsUjO7CVqyEQS3S4X5Pn2pvuzYmUnxIr1weeP+A4PLlwryyC/E2AC4ti928CKemC7sTLAUpgMn7YWGljyxVGIOsjXIwa+tY+kEmzqhHZx+YtAaO0HWQHti443ldozN4B1AqcAEc3MLQFNLQfDNsyfmTdC2RtgKV4cuLpUg2NOUmL1u4HYsHkgXhOIPhF8g6vLCdPWzGtlj0yrNVdFp9iIucmMjeGQ7CO7GJoCW/oUCOL8dO9kMbWsBujocaUQCvGQ98Fzkmc2wMh/80J0/C15jhoNvotKJjKaYRu3M6J2c7MPgtZSY9ks7EdOxMzPW9PswUitIGiGbP992gpcGrUVHG+voyYk+rm1XpEbQSnj50LOYBR+J2ByDHZYXZtEaV2QDuPTFZCe4jeDw8kiOK7T+G+0D2oiCPooDsVLUrsAnoyqrwQ7mjTNHai54XR05JcFaiYkOAXfgE4LjzLAzTyfoGjvWyTPRm77raFJaQWorUwqyEQ2NAbwF6sS5fofFgphKiPYAO3xdnQBdXdDT1Ql1FsQsIzaRG9kC8RoftYtLJywvLBdPoUlsYtw5MgFI2og0T8xUmaBFWPGWvQhvY3NhcuC0/4EUE+K0UvvXC1UDHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C1NTEM+d90Y5W8GXep1qoKYgXqeS+DeXtgY8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xOuh6Xt6euDNt96CCePHr4ez+1PKGvAQryMTH9t8K2hvb4fbf/Mr+OQnd4auri74/EFfgBdffAlO/OY34IwzTlvHT/BvT6uBwkCM8Hz1uBNgzZo1MGLECLj+umvTyl6R1yXES5cuhU/ssLM693bbbQv33vP7inyOP0l8DRQG4idmzIBjjjnOlPT+++6GbbbZZr23vYQYP/DSyy6HBx98EC44/zyYOnWf9f4dyv4BhYH41FNPh/sfeBAGDhwIq1atgqOPPgrO+9656719XRCv9w/1H2DVQCEgXr16Nfy/7XeEtrY2uPDC8+F73zsfhgwZArNnzYSGhoaKNfnatWth7ty5yq6MHTtWnbcvEHd3d8Obb74JK1asgEmTJkG/fv0q9h3LeKJCQPy7398N3/nOWSoNeOyxP8H2n9gJWltb4eaf/QT22mtPq11/dvMt8Nhjj8Nuu30avvH1r1mv/eIXv4Q//PFR2HWXXeDEE79hXlu+fDlccun34f77H4DOzk61f+LYOIdFAAAf00lEQVTEiXDrz38GBx18CCxf/r7p2OFr5513AbwyZw4cfviX4MAD9jfnQd9+1VVXw5133aXegw/8kU2Zshuc+91zYJNNNikjg+tc5kJAfMQRR8HTzzxj0oCzzjoH7vrt7+Czn/0MXPfja6xKOud/zoU77rgTDj74ILjqB1dYr11w4UVw222/hM8deAD86Ec/VK9hR3Hafp+FN998S20PHToUBg0aBAsWLIBx48bBypUr1Q+G0gk85tBDD4N/zpoFZ591Jhx//FfV+zB2+/o3ToRHH/2T2h4woL9S81dfnae2hw8fDvfc8zsfy/UB6dxDvGjRIvjULrspSGY88ReYMGECPPX00zB9+pehsbERnn9uFgwYMMBUTW8hvvba6+DqH10D/fv3hyuv+D5Mm7avUk+E9JRTToeFCxeqc6dBfMvPb4WLL75UWYfLL78MPrPfNPX9EOLTT/8WvPDiizB58lbw4AP39aEZy/2W3EN8w403wZVXXgWbb745/OGRB43q7bjTp2DJkiVw2aUXw2GHfbHPEO+191SYP/81OO3UU+Dkk0+0aJkx40n4yjGB0qZBvPc++8K8efPh1FNOhlNOOck6zxtvvAG777G32odlwLL4R/YayD3E+0ydptTs298+w/K4ZA123HEHuOvO2/sEMaYck7feTr33kYcfhC22sOFCj4uvY4cvCeK08+D5p+67H8yd+ypceslF8KUvHZa9Bf2RkGuI8X/BBx54UGozPvP0/8KYMWPUcb2xE+iDp+wedAz/MfMZ2HDDDSOftduUPeCtt95OhBhfx+OSznPkUUfD3/72FJx+2qlw0knfTC2TPyCsgVxDTGqb1qAcjCSIzz7nu3DnnXeZjh122Lb7+Ce8EqdVcJVfzy3E+L/ynXbeBXCYd/oRh8NnPrNfpCrvuuu3agAEO3vY6cMHdq6wk7X33nvBz356k/Wefad9FubMmWOlE5g/Y8TmUsgnn/wrHP2VY9U5vCeuHsm5hfjxx5+AY796vKo5SiVkNc6aPRsOOSTo1OEcBpzLgFnw+RdcpNIG7EQh4Pig9AD/5hEbqTMef9VVV8K+U/eB+vp6mD37WTjp5FMzpxM33fRTuPyKK1U6gZ3NAw7YX6Uc2Nk77bQzVDqx2WabwaN/fLh6NOT0k3MLMQL00EMPq/kROE8i7oHx2zvvvANHTj9CjeYtWvQu7LHn3qozNnjwYNhyyy3ULLTnnnsexo8fD5gUcIgxQsPkoKOjQ30E5sSDBw+Ct99eoPLipqbGyGCHKyfGz9j/gM+pzhs+8L2jRo1UyQc+EOg7bv817LBDYF/8I3sN5BJiPsz83e+eA8ce85XYEn//8ivgJz/5mTUMjdHYmWedDYsXv6fe19LSAkcdOR1GjR4Fl1xymQUxvj5z5j/gjG99W4FLD1TwSy6+EL533vkKxDQ7ge/DgZMf/vBHcPc990RG7M44/TT1g/KP3tdALiHufTGj78D5C6i6qMxbbbWlgjzpgcfPfvZZ6GjvgIEDB8DWW28NdXV1ffoqfu5En6ot9k2lhbiy1ejPVs0a8BBXs/b9Z1ekBjzEFalGf5Jq1oCHuJq17z+7IjXgIa5INfqTVLMGPMTVrH3/2RWpAQ9xRarRn6SaNeAhrmbt+8+uSA14iCtSjf4k1awBD3E1a99/dkVqwENckWr0J6lmDXiIq1n7/rMrUgMe4opUoz9JNWvAQ1zN2vefXZEa8BBXpBr9SapZAx7iata+/+yK1ICHuCLV6E9SzRrwEFez9v1nV6QGPMQVqUZ/kmrWgIe4mrXvP7siNeAhrkg1+pNUswY8xNWsff/ZFamBYkLc3Q7QtQKgaxVA92qArjYA6ATo6bL/AW53B/tAP+O2+ps9q797gv34bLZxn1pBW79ObYLb8nJ+vV1Xrw/C5zoAddk/PuN2vX7GbbxNA23jc0P4T+3n2/h3I0BDC0D9AICGgQCNgwHqmisCSa2fpFgQd68F6FgG0LkCQAFK4MYBzCHWfxuwJcwSYmxaPGYdICaAFZQIM4eWoEZANbROeAnmRgZ5I0B9A/Q0DIa6puEA9cW+J0hxIFbwLtfgSngZxAZuUuAugG6mxKTIUo25AmdSYtIvUmSHElsQkzILeAlspcwELELdGGzX8/0EMj6zvxuHASDMBX0UAOIegPbFgfoq5aV/LiXGm8Y4LEUSxMZWxNkJtBfaVijLIR8SYm0flN2QCsxthVZgBW6MEitLoWGmv82zABrtRfMoh83JP9n5h7j9XW0fOgG6NaTqWW5zqBnIFtRoD8hWkFIjpKjUzAtbSpwGMULCQDbqGwcxKqv2xBLeiPIyiJUiBzYieCalbtKKjZ4ZQR6df2pFCfINMVoI/GcpsEuNHd7Ypciq48c7eo5OXqRj11eImQpTx87q3PXWC3Pl1XArSyH+oa0omLXIL8TYiWtbEADcjcuucuXt0CmEA2iEV9kHUmaEFpWb0gmpxCKpSIQ4TeRkGuFSY925M+kEpRQIo/bC9KyUmeyE9MN6u56UuEl3/JoAWsYVqrOXW4h72hZCXecHbhW27ASDu1dpRZwi9wQWGPkL/6AdmZ/DFTVZh04pMSkw/c0VmcVqvOPGYVZQazvhUuK6piC16BfcEbUIj3xCjDnw2tcBelBxNaQKUL3t7OC5OnykyC6PzBUYjwsfph+ndxmmUxC2TkJhBd8Z8cCuPFhGajEKbADWCqwUmcHdf2JhcuR8Qty+BKBjcWAjXPDGKbHZj1Dy7Bj/ZpEbeWOdA+ONHlF6e6AH6vBZUMwhdmUTrqEP6uvh+YKBlIDqYMlj6thpYFWKwXNgZiNMB44psFOJEWbq8KGlGA3QNKIIQpzTW4CtfROgs5UpMfPEBmpUV/LG3COTFxYQm44ewRygpR40MGe2BdTOaI0O5pAGPwJ1Qr1At42w2R282ShzjI2w4jSCnHlfbidQibk6Nw4F6DfeQ1y1Glj1H4DuNQ4llvaCd+yknXBDrHjVSkuCa8Fcx2JhUwFaSfENdYFSK8+rt80zV1zOOE9vjc0I8A68c1aIWRJhvLGAl/Y3DAAYWIw7l+bTTqx4XqssQRtjK0iJI+kF5cjaVujBDg5rCHNgI5yKTEKthdXq5+nXOMuuHz2/Y0LgJDS8lr3QqhxJI3geTHZCKjGzEXUEND43AQzetmo6VMkPzifEH8xKhhi9b2xHL9rBI4/LYQ0gttg1yhrxxK6enXATCmZrvIwrrfTEbHiEoCbaeSrhTB+0Ghv7wMFF6Nn2kO0ryVLVzpVTiGeG+bBS2xRFVkqM+bDDI4fuwThbA6mAOJpK2DbCjtxcUsw8sVRadnjQteMdPd3XMz8MNjfCyoH5wAYpMKUSDpiH7Fg18Cr5wfmE+P2nbaVVcLpgjlFkPTxtUgfywMweBNY4lFi3Mtt5sUkvKMXQz5YGy2jN7udpDxx8LgUVxmVwuHF4OSYHDlIIAS3aB/TDylLg300AQz9ZSZaqdq6cQvy3AForYsNtyop5Xkz76TWdTnAFjoWY7ARTXD5z2N6d2IhmdjGbZsxnHJvBD0zX2JmMi2BphkrgFNkIMo/OHPBaMGt4FfxNABvsWjXwKvnB+YR4+ZMM4jg7IeENIY4ocMQ2BDuc6YSCOJpGZG2UAFbhgcPELYiI1cm4Zw6jNwM1KrWaEMejNQ2xyolFJ4621bP+N2xK1q9d08flFOLHbU8cayeYV1ZzK4Jb2xo4Dbw4iSfMhcPXwzyYv49aNPTI2cfsBMOh6hp1dacTEUXmiu2yDxxW8zoBjJA3Awzbo6bhzPrl8gnx0j8zD8xsRSLMHWakDeEjj0tRGsVr9n5mJyTkJKispg3KlA+LYWjeKPJmpFxhQzXmCmx39MzAnlFk1pEju0DeV2036xE7psQj9snKSU0fl1OIH3XYCeGRDdCkwN1MgXWYJsAkm8DTCQtqbQNcw85WK0thdk1Fj7MN2kmQLyavHCpx8El8fwA0DU2zjpsLXq7QI/ataTizfrl8QrzkkQSIox2+HjWYob2sUOHAEthQhxdqmOTYKLexFUqJbbthKt0OhK224MPO8R04HcUxyxBrJ+rUbI4AaqujxxTXKDJ17PRrG34mKyc1fVxOIX7IhtiyEQRx2OFTymnBG26HdiKc2OP0zAHrtqeWTSuHmSPDzix5EFEbdfiMwmZQZNWvo9SCfhGWF2bQuvZvuH9Nw5n1y+UT4vfud0PsgLkH0EbIjhtuB8PJyGWkIxeZOxEeryWdP5mwQQ6GBP/bD6dQuDxxRJmF+spBD67IAcRkL+hZx24ELYdX/j3yc1k5qenjcgrxfTojbo9R5PYwM9aQBrCK6Eyrc3S/tgk8P7b8M3UDbWVOauloR85OIcLsN4zYlEPQJ42zExEFp9gN0wduI/ggh9rfDDDy8zUNZ9Yvl0+IF9+tIdaWASfJm8EP2odpRLAuBELKO2iZYdYWgpyxtBlUyep8JLlmfjC9ag/JRaBkhwUdtHCkLrQLtkemOccKcmZLgm31rgBSJ8Rs/6gvZOWkpo/LKcS/F0qMEGMKYcNswDVe1mUfgv/dR22FiOEcSh5CbLexs18nPDAf1EBjI+Gl16PpRAC5K63g+4PBDp5UCKgR8lGH1DScWb9cPiFedFeYExO4SolDiFGFFbKiQ6eAlR00rdTOQQ6ZKXOYDcXaViTUukkirEGK6ESfdM/rUuoAahtuvPoDIXbASwo9+otZOanp43IK8Z0aYvS+aB+kN27XNoIUlqUR6k8BNymxA3reAQzTieSRPFeLhxAzL5zJ81KHLRzscKUSIcRsUIQsBXlgYy+aA8BHH1bTcGb9cjmF+HaHnQijtR41Sy3syHUzOxHsj9qHNM/MzyfnVPDZbnX1LdA8bFuob94AOlr/DZ2r3zZzJWiUTVtf9R0pnZCDF65Bjgi86keAcyiYZzaKTLkxKTFXZA3xmMOzclLTx+UU4t/ojhwpsHhGK8FtgMMuOD1wnDdOGKamH4UCsmEADNnsBGjop1fZ6emGVW/fB21LZhoI7JRBDGoQlGbwwp4cb9IKA2+KN1ajeA54SZnHHFHTcGb9cvmE+J1fJUJMaQT3uDbU1GmLdvRsuLXXdaYbgS3g2fDA8QdBvw13hp6utdC55h1oGjQRero7YPlL34eejpWB8poOXiCZpM7hczA7jW+HnTx7LoXtgWWHj2a5aV9M/pg/b3RkVk5q+ricQvzLBIg7I3MkLJjpknumujxyS8qT8cQW5EqGzaA1bLDFqdDYfyNYMf830Lb8X7DBlqdBY/8xsGL+r6H9/Rc0xMyzxkAdRmWuDlu8Nw7g1z8O80Pgo3aoytpK4LOHuIo/zoW/YBCLzl1PV5ALJ9oJ1uGTgyEcbkUoG442x7qnbg7f6lvQ0G8ktM65Gdo/mAsbbP4NaBo0AVa8die0LXvWVmIxXOzqmIW5MV31rKF2wBrrqdUVIHpwQz5v9OUqNmLlPjqnSnxb2LGjdMKkFKEfthTWpBIBlNITu7YpnuPD1vY0TnsEkCB+f84t0P7BHBjmhFinExGI3XaA2w23QjuUmcdt5IuNjWAdPQ9x5X5JvT7TwltZOoFKHKpxkAOHcEl7YLaFnbDTCdsLq9PpH4ENcXgc/jV88regsd8oWI5K3DoHhm/xTaXEH8y/E9Yum619bhRWWl8i4nG5N9YjeUk5shty7NzFKPHYr/S66mvxDflUYoQYs2GjwiydwPUtJXQJI3ZGgTmkbDAk9kfgmDg0fPIZTohb5wd2QqqqcrCWalJklrEDp4eZzVRM7bEjUZwZghae2ENcxd/kgp+zAQ6txPpqZ1JimfsmwyrtBWXJ2vs6Jgq5OoAjtg6UeNkrgRKP2DJQYoR47dLZGtgwlQiVM/C8sWmDM1KLDj/TOVTfzsqLqXOHnTpmJ8YeU8VGrNxH51SJNcQRJQ6uoeuzEiuvbM8rjrMf+DlNgzdTI1/tK+ZBd+caGIFK3H80LHvlFmhrfUVB3DxoArw//w5oW/ocNA+eAA1Ng1T81t2+TEOb7GnjOmz1ep5PAK7o+Ml5xrF2wkNcuZ9Sb8+04BaRTgR2go/URTpqCWlFN/fQfFg6VoEDjzxsy5OhccA4eP/VX8PaZf+CYR87FlqGfgzWLJkFa1vnwNBNDoL6hn6w5KVroHP1Athw8ukqcmud90voeP8lk1a4ojEnvMx6uLyxsRHWcbihr7GTWfG4Y3tb8zV5fE6V+BZHOhHMZMtsG4QHTvO+9sShwH4MmXgEtAzfBla98wSseOsP0DJsMgybdJTV0O0r34BlL18P9Y39YOTHz4O6ugZY+sJl0NP+fjBjkqUU0svGpxHafohBEZqaGR2e1lc3G0XWtsLbiSr+KGOUWEGs04nYCC3tdWVHpBd2pxXNw7aCoR89So3QLX35OuhY/S70H7kDDBgVrKzT3dEKra/fA90dK2DYRw+HfiM+Dh0rX4f3/3ODfaFn1txXeF05JTPeEzMltjyxV+LqUbywNpQYWR+2+dfU8HJX2zJoff0+aGv9j1n+CiFraOoPgz+yv4IbH++/8hPoWDnfdOKSJv70VYnxc7jCB8taiWQCt70SV49hsJSY5hFrO5GmtM4ozTH4oa/BU/lwQjrRNHA8DJ10DNQ3DlAV0r5iPrR/MA+62luhedAm0LLB5lDfNEi9tua9mbDijbvZ1cnhHOBEmKmjpuxHUO/RNCPcF00nvJ2oIq0xH70Q0wk+CR4B1rmxYxiZT8VMtBn03l4MliDk9S3Dla1oHLCR8wv39HTBqrcfgdXv/s2OvsQVGnEdvMTozRnN0SX8NJFID3aYSfJ6/oRX4iqyjRDL4WZ9VUdlcmIWtcVMFMLSB8PXFMnVqWSiZYMtoN+I7aGuoRk6Vr6p8uG1y1+EHrzTk2PRk/gRuOhsNuuaugi8lA3bE4CC9SjkiJ2HuIr06o9WI3ZSifW2nh6ZlO/G2wMxvJww0hdALK/DC674GL7lqUqVW+f9CtqWv2iiNAWx/k8wmT39WjnnIEhkODomJ9YjesG1djqRMFd7oCc+uvptWYFvkNOI7Tb3LLbuDpDrTLjnUrjTBrUOhfbApLQcVvsaPMd6FXqZqyjENmQEs2UfCO4keyCHlWUe7Bx2rg+vtZM5sYe4Aj+hvp7iHT4VUyoyTsV0QJo05TLVA4fnI6jtZz3hSJdnGCpx/43gA5xHvDyYR0wLSJj5vgw4YzP4PAqHUkdyZGlPnPOJcSomm0PMI7aN7Ey7r81R7fflVIlxUjy/OJTPn8CsOG0+ccLwsuncKUyhh6cUurVc61YER9OqwvRX2rJV4RwH1yX6IbTh4EYAfHTyezTdCH45dbQ6vDUJSHtkD3EVf3/m8iR9pbN1xXN7NiUWs9DkPGF+WX94Yah7Mjy/UBTBCX4i9AjmD5t1JfTu8BKkYO4DvS47evzSJFJsk1bwiUExdsSa8CM7eGOmV7ERK/fR+VTid37NVvyJXrYvV/5x2gs+R0J20uiaOhm5abnVboWtJE/+Jb5hzHJToa8QUzOjly2FV3YkT80085EjHT7ywzHzif2FopX7JfX6TItud0DMVTmYQxF7ZUbEA0c9b/B+YTvMOQO74LwVmCgMKit+XHiBaCjFctjYeGOZHztWv3QOdkRmr9FtDxwQo08e86VeV30tviGfSrzoDrGQoLxkH9dhY4ukxIy4WZBLe2FtC8ipJSliM1459MTaJNC9FVnHLjg4y6X7USvBvDB5Y3WiuIiNw0vLWrFO3hi/eEr1fpTv/tadE5tFBRHq6MrwzqueHfCGcEcVmjpw6hXjIpid0BY4tMRciuk+du4byQAtguKC3BW9mSHoKNxmwW2zxKueDM9XARr939Vrwwp+cj6V+N3fuZWY7mWnRu/0jDbHQoDuVTKjk+HJTtCl+tphmOo3d2EimplCs55doLz0Gimn3mPZDEodHHYi2vGjgRN2hQfr6AVziB1KzJe2Gn1oBVGq3qnyCfHie4QS06rwcolXXFRQD0okdeT4GmxmODn01GZRQssTR29UHkZsYYOSMBPEBG3ItO7QadLD49wdPfcqmWIwRV3hTCti0g1n5FKvuCrmwdUjr4KfnFOI701R4gDqHnV/5+Srn7m60oidvY86eNw+kM2Q0ht2+BA2usNomCBrGxHGyPaKQMLbBt459Lz2haYJkEfuniQXUNGAjzqogihV71T5hPi9B5gSI6gUs4ULbIe3yU3yxvGeN/DP4eLcEnb1P3N1UABr1ge/GSNBye2Gc+V3PrpH3piUm64MIQtiqbBco5jZC1TqkQdm/do1fVxOIdY3nlGjdnJxbQ212k+KHCqknIppwck7aix5CAc7wglCvFXp9aSWDj1xcJTzLkqU8zojNZp77LAflhem+9lxG8HvY8dshb/xTBV/nEv+EK5PbCAOoTUqbO79TEu9RpevsiFmcyAsiIXiyrENRzghvbCpLUoU9A5LmcNhPraooAnrYhcaDGwH/ocW1pYKLCHWk+RH7FfFRqzcR+dTiZf+Ud9Yhisxuzl55MbldDfR6PoSAcQaXjNxSNsMPRvCvB44DPMwf2pbQd436oXDLl/glfmSreH5aISOrwjEBvi0f45bQQgn+sTd05k6eQJmfzPGyv2Sen2mZX9JuI+dVGTaRjVmE4OS4CWo9ReTdsGeKxFNKZzlMVMobI2OjuTx6CzAnR/j9sx43ZKO1FQqkQIzZcXD9+511dfiG/KpxMvwBuWyE5cArzoWlZqy4xjl5R01bhmsQQ3HcDPNXuMZWzjvxxp2JgicubHlibV3Zh238GY14iaMJo2Q8LJhZwturczD96xFJnv9nfIJ8fInNZSuNIIpL8HLvTHCzCf8uBRZ2gahzOFInahvDbGZKyGhZodH8mJFaMqInhgECXqIqMAcXlJkGuzQymzdDkwfv8GUXgNTi2/IKcT/64CYPLEDYqbESo3VPT3CaIxPu+QemRrMSifoF2Dez8aZHUoc3lKUa3AUVvOqGUpmKYQD3sBWYEdOQ0xKG4Fa3rCcbW+way0y2evvlE+IW59hnjgDvAitshOo3F3mWUfBYV9N3NYgCjGTWlbVkZSY99xEk7AAQt/uIMYjmxQjOsEnVGC9gLZ1Y3IBNc6dAD6bjW0PDRZ5yfsjpxDPDD2xyYMFzMAVmSCWz116ymbcoEbQvM6OXIoA27rL5k6wF0KtDYeNI4qsd1jDzQpaglU+U8dOKjRPKPTfQ3fMO7/B77lHTorNQ7FWzNKDHNRh089WtMaAxf3QFQx+qGf2Gm7LiE2RS+N1NDIXpmvGNcQM1IURW3xlRudQBMc6B0GMndApBNoIYxsata1AMEmZGcBoN4wSM7+MCj14+zy0dup3zCXEPSuegzo17ZKpL/e9siOnkwlKKIJnhJc9q/gtHNnTHGsptp7M1XRSoVNrW0OqIBcUx0ZtKijG4xFQ+keQJiiygheHmR32gtKMwdtl+co1f0wuIYbV/wHoWhP6XAtm6X3RVmBHTiuwshl624K5Ozgfm19MrZc45dIe/0htcD2fx776WV9jFyixVmQ+Tq3mQyCMGmKlrA1aYUmJOaxSkR3RW0N/gAGbp37fPByQT4jXvgHQ2cpsgbQVCCPBqp+NGmsFNlDjNvun7EUwhdM56d0Fbcqws9J3MQ5tdfA0vTyPCJQZ4eUKrJVX2QmuwtJeILQJqQX+CJqGAvSbkAdGU79jPiHuWArQ/q4ZwAi8LofVTiGSbQQqdFeg1ApmVGTa7g6U2UhyaCtYvy68iI4CYj4MLS+yI6Wlc7JBEdNaSnlRaQXExgtziCXM3BszRZZQN48GaBqRCkgeDsgnxN1tAGteDyEWOXBgF6QSuxSYPDFXYoJYPyPUoGG25khE4XY2OFdgfcGoncDxCA2hrXfAy2HmtkLnxJGOHlfimPSi/ya4EmIeGE39jvmEGIvVthCga4VIKVxRmujAUYdO2QlSXHwfV2CtyAiv2Y/o6W1RrWL+jzVJSPfLIheKWqqrFJfg5RBzRUYYtTc2doJUWHbwHB0+kxc3AjQMBmgZmwpHXg7IL8TdawHWvs3UOD4LdqYR5IONjdBqrEAVEJsYDl/TMNPCxcEFUFZ780v0bRDUfMngXzB3UsCL22QjmPo6bQUHOAPEPFfutzFAfb+8MJr6PfMLMRatYxlA53Kd//IUguXBVgdORmtaiZUiCy+sbAR5ZPyblDgOYj0BWbkDBrXyxGbCbwzEHGaEmMOMgJIiy5iN0gpmK0wuzPJjPjzdOBygaXgqGHk6IN8QY01jB69T2worDyYbIcCNpBIUrTEFVh07sg78mQEcUWK+CMW6QMzshLEZBC9FbWm2IiY/bhwMgB26gj3yDzHC1L448McyD5YDGpEozdGhMx64txBzBWZdN9OxY1YiYifwNe2LXd441k64lJnlxipP1tsK4FF88YDCoFwAiHVboLXAfwZcrcAqpeAduGgmHEZq3D70EWKRRgQdO7ZImtMTZ4XYZStY3Ea2gUdxCHHjsMLEaa5fXnEgxtJhZw9B7lppDylT7quUWtsHpbi0TR05nhOjPUD48VnHbLxT5+rYqR6dGZUOvbAR5riOHUGsp1eqJTNZBy+ixCJyi0srGgYF/rdAnbjiQ0wlxBy5cyVA90o1PN3TvRbqTLTWrW2HI1IzsLo8McFMS8lTKqG3ZYfODGZQxw53OCBWgGqIZVohBz0MzOSJxXA0wtrQH3rqB0Id2oeC5MBpvqdYSpxWWv96IWvAQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1Xof4/sY7KcTsYB2AAAAAASUVORK5CYII=\" alt=\"audio video logos\" style=\"float:left; height:200px;\" /\u003e\n", + "\"audio\n", "\n", "1. Integrations with multi-modal AI models to extract information from unstructured data, in this case audio files.\n", "\n", @@ -91,7 +91,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "z-index": "0", "zoom": "181%" } @@ -116,7 +116,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "275%" } } @@ -138,7 +138,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "214%" } } @@ -153,21 +153,22 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "acf12472", "cell_type": "markdown", "source": [ - "**Important:** restart the kernel by going to \"Run -\u003e Restart \u0026 clear cell outputs\" before continuing.\n", + "**Important:** restart the kernel by going to \"Run -> Restart & clear cell outputs\" before continuing.\n", "\n", - "Configure bigframes to use your GCP project. First, go to \"Add-ons -\u003e Google Cloud SDK\" and click the \"Attach\" button. Then," + "Configure bigframes to use your GCP project. First, go to \"Add-ons -> Google Cloud SDK\" and click the \"Attach\" button. Then," ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "z-index": "4", "zoom": "236%" } @@ -196,7 +197,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "4d837a34", @@ -214,7 +216,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "193%" } } @@ -229,7 +231,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "008f0a87", @@ -243,7 +246,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "207%" } } @@ -270,7 +273,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "225%" } } @@ -285,7 +288,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "e00dcb01", @@ -298,7 +302,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "122%" } } @@ -316,7 +320,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "335511be", @@ -328,7 +333,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "134%" } } @@ -343,7 +348,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "595126a1", @@ -364,7 +370,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "cbd59dd9", @@ -393,7 +400,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "161%" } } @@ -411,7 +418,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "84548649", @@ -429,7 +437,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "8be3127f", @@ -441,7 +450,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "216%" } } @@ -471,7 +480,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "211%" } } @@ -491,7 +500,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "d27756f5", @@ -503,7 +513,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "317%" } } @@ -544,7 +554,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "1575c468", @@ -556,7 +567,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "229%" } } @@ -580,7 +591,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "177%" } } @@ -600,7 +611,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "3629f4af", @@ -613,7 +625,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "141%" } } @@ -628,7 +640,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "09ef6c3d", @@ -644,7 +657,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "152%" } } @@ -662,7 +675,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "cf15986a", @@ -681,7 +695,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "152%" } } @@ -701,7 +715,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "778d0ac3", @@ -717,7 +732,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "181%" } } @@ -741,7 +756,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "163%" } } @@ -756,7 +771,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "4acfb495", @@ -774,7 +790,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "125%" } } @@ -789,7 +805,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "a49d1dde", @@ -804,7 +821,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "178%" } } @@ -824,7 +841,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "15a5bfd3", @@ -836,7 +854,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "224%" } } @@ -856,7 +874,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "172%" } } @@ -871,7 +889,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "810c77d5", @@ -888,7 +907,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "183%" } } @@ -913,7 +932,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "92%" } } @@ -931,7 +950,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "f19c88d3", @@ -950,7 +970,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "127%" } } @@ -965,7 +985,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "06f0312e", @@ -986,7 +1007,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "175%" } } @@ -1006,7 +1027,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "fae3fcae", @@ -1024,7 +1046,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "38423dde", @@ -1037,7 +1060,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "158%" } } @@ -1055,7 +1078,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "37a1dfbd", @@ -1067,7 +1091,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "138%" } } @@ -1082,7 +1106,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "a4748e0f", @@ -1113,7 +1138,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "ff22e7eb", @@ -1122,7 +1148,8 @@ "metadata": { "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] } ], "metadata": { From 11f0b0b5307c26a2da127b33ac7cc486f25d57d2 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 10 Apr 2026 16:43:50 -0700 Subject: [PATCH 06/26] Update packages/bigframes/bigframes/operations/strings.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- packages/bigframes/bigframes/operations/strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/bigframes/bigframes/operations/strings.py b/packages/bigframes/bigframes/operations/strings.py index 7cc93d34c07a..a5b9944424b0 100644 --- a/packages/bigframes/bigframes/operations/strings.py +++ b/packages/bigframes/bigframes/operations/strings.py @@ -313,7 +313,7 @@ def _to_blob(self, connection: Optional[str] = None) -> T: ): session = self._data._block.session else: - raise ValueError("to_blob is only supported via Series.str") + raise ValueError(f"{self._to_blob.__name__} is only supported via Series.str") connection = session._create_bq_connection(connection=connection) return self._data._apply_binary_op(connection, ops.obj_make_ref_op) From 38a7820633ee33cbaa24e3cda755f5b1b9ccafc3 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 15 Apr 2026 21:18:50 +0000 Subject: [PATCH 07/26] fix lint in strings.py --- packages/bigframes/bigframes/operations/strings.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/bigframes/bigframes/operations/strings.py b/packages/bigframes/bigframes/operations/strings.py index a5b9944424b0..0833ab5df802 100644 --- a/packages/bigframes/bigframes/operations/strings.py +++ b/packages/bigframes/bigframes/operations/strings.py @@ -313,7 +313,9 @@ def _to_blob(self, connection: Optional[str] = None) -> T: ): session = self._data._block.session else: - raise ValueError(f"{self._to_blob.__name__} is only supported via Series.str") + raise ValueError( + f"{self._to_blob.__name__} is only supported via Series.str" + ) connection = session._create_bq_connection(connection=connection) return self._data._apply_binary_op(connection, ops.obj_make_ref_op) From e765ef09009b2db06f9a88ac452d78126f29df39 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 15 Apr 2026 23:34:29 +0000 Subject: [PATCH 08/26] Address review comments for Blob API deprecation --- packages/bigframes/.python-version | 1 - .../bigframes/bigframes/blob/_functions.py | 126 ------------------ .../bigframes/bigframes/operations/blob.py | 1 - .../bigframes/bigframes/operations/strings.py | 44 ------ .../bigframes/bigframes/pandas/__init__.py | 12 +- packages/bigframes/bigframes/pandas/io/api.py | 61 +++------ .../bigframes/bigframes/session/__init__.py | 104 +++------------ .../bigframes/bigframes/session/loader.py | 14 +- packages/bigframes/tests/system/conftest.py | 22 ++- .../test_blob_get_access_url/out.sql | 4 + .../out.sql | 2 +- .../test_blob_ops/test_blob_make_ref/out.sql | 4 + .../test_blob_make_ref_json/out.sql | 3 + .../test_obj_fetch_metadata/out.sql | 6 - .../test_obj_get_access_url/out.sql | 10 -- .../test_blob_ops/test_obj_make_ref/out.sql | 4 - .../test_obj_make_ref_json/out.sql | 3 - .../sqlglot/expressions/test_blob_ops.py | 18 +-- 18 files changed, 96 insertions(+), 343 deletions(-) delete mode 100644 packages/bigframes/.python-version delete mode 100644 packages/bigframes/bigframes/blob/_functions.py create mode 100644 packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url/out.sql rename packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/{test_obj_get_access_url_with_duration => test_blob_get_access_url_with_duration}/out.sql (60%) create mode 100644 packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_make_ref/out.sql create mode 100644 packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_make_ref_json/out.sql delete mode 100644 packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_fetch_metadata/out.sql delete mode 100644 packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_get_access_url/out.sql delete mode 100644 packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_make_ref/out.sql delete mode 100644 packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_make_ref_json/out.sql diff --git a/packages/bigframes/.python-version b/packages/bigframes/.python-version deleted file mode 100644 index 95ed564f82b7..000000000000 --- a/packages/bigframes/.python-version +++ /dev/null @@ -1 +0,0 @@ -3.14.2 diff --git a/packages/bigframes/bigframes/blob/_functions.py b/packages/bigframes/bigframes/blob/_functions.py deleted file mode 100644 index 3869416d1244..000000000000 --- a/packages/bigframes/bigframes/blob/_functions.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import inspect -import typing -from dataclasses import dataclass -from typing import Callable, Iterable, Union - -import google.cloud.bigquery as bigquery - -import bigframes.session -import bigframes.session._io.bigquery as bf_io_bigquery - -_PYTHON_TO_BQ_TYPES = { - int: "INT64", - float: "FLOAT64", - str: "STRING", - bytes: "BYTES", - bool: "BOOL", -} - - -@dataclass(frozen=True) -class FunctionDef: - """Definition of a Python UDF.""" - - func: Callable # function body - requirements: Iterable[str] # required packages - - -# TODO(garrettwu): migrate to bigframes UDF when it is available -class TransformFunction: - """Simple transform function class to deal with Python UDF.""" - - def __init__( - self, - func_def: FunctionDef, - session: bigframes.session.Session, - connection: str, - max_batching_rows: int, - container_cpu: Union[float, int], - container_memory: str, - ): - self._func = func_def.func - self._requirements = func_def.requirements - self._session = session - self._connection = connection - self._max_batching_rows = ( - int(max_batching_rows) if max_batching_rows > 1 else max_batching_rows - ) - self._container_cpu = container_cpu - self._container_memory = container_memory - - def _input_bq_signature(self): - sig = inspect.signature(self._func) - inputs = [] - for k, v in sig.parameters.items(): - inputs.append(f"{k} {_PYTHON_TO_BQ_TYPES[v.annotation]}") - return ", ".join(inputs) - - def _output_bq_type(self): - sig = inspect.signature(self._func) - return_annotation = sig.return_annotation - origin = typing.get_origin(return_annotation) - if origin is Union: - args = typing.get_args(return_annotation) - if len(args) == 2 and args[1] is type(None): - return _PYTHON_TO_BQ_TYPES[args[0]] - return _PYTHON_TO_BQ_TYPES[sig.return_annotation] - - def _create_udf(self): - """Create Python UDF in BQ. Return name of the UDF.""" - udf_name = str( - self._session._anon_dataset_manager.generate_unique_resource_id() - ) - - func_body = "import typing\n" + inspect.getsource(self._func) - func_name = self._func.__name__ - packages = str(list(self._requirements)) - - sql = f""" -CREATE OR REPLACE FUNCTION `{udf_name}`({self._input_bq_signature()}) -RETURNS {self._output_bq_type()} LANGUAGE python -WITH CONNECTION `{self._connection}` -OPTIONS (entry_point='{func_name}', runtime_version='python-3.11', packages={packages}, max_batching_rows={self._max_batching_rows}, container_cpu={self._container_cpu}, container_memory='{self._container_memory}') -AS r\"\"\" - - -{func_body} - - -\"\"\" - """ - - bf_io_bigquery.start_query_with_client( - self._session.bqclient, - sql, - job_config=bigquery.QueryJobConfig(), - metrics=self._session._metrics, - location=None, - project=None, - timeout=None, - query_with_job=True, - publisher=self._session._publisher, - ) - - return udf_name - - def udf(self): - """Create and return the UDF object.""" - udf_name = self._create_udf() - - # TODO(b/404605969): remove cleanups when UDF fixes dataset deletion. - self._session._function_session._update_temp_artifacts(udf_name, "") - return self._session.read_gbq_function(udf_name) diff --git a/packages/bigframes/bigframes/operations/blob.py b/packages/bigframes/bigframes/operations/blob.py index 9cd7dd0db291..3666ee66602d 100644 --- a/packages/bigframes/bigframes/operations/blob.py +++ b/packages/bigframes/bigframes/operations/blob.py @@ -14,7 +14,6 @@ from __future__ import annotations - import bigframes.dataframe import bigframes.operations as ops import bigframes.series diff --git a/packages/bigframes/bigframes/operations/strings.py b/packages/bigframes/bigframes/operations/strings.py index 0833ab5df802..ff211f1b77d1 100644 --- a/packages/bigframes/bigframes/operations/strings.py +++ b/packages/bigframes/bigframes/operations/strings.py @@ -305,50 +305,6 @@ def join(self, sep: str) -> T: ops.ArrayReduceOp(aggregation=agg_ops.StringAggOp(sep=sep)) ) - def _to_blob(self, connection: Optional[str] = None) -> T: - import bigframes.core.blocks - - if hasattr(self._data, "_block") and isinstance( - self._data._block, bigframes.core.blocks.Block - ): - session = self._data._block.session - else: - raise ValueError( - f"{self._to_blob.__name__} is only supported via Series.str" - ) - connection = session._create_bq_connection(connection=connection) - return self._data._apply_binary_op(connection, ops.obj_make_ref_op) - - def to_blob(self, connection: Optional[str] = None) -> T: - """Create a BigFrames Blob series from a series of URIs. - - .. note:: - BigFrames Blob is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the - Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" - and might have limited support. For more information, see the launch stage descriptions - (https://cloud.google.com/products#product-launch-stages). - - - Args: - connection (str or None, default None): - Connection to connect with remote service. str of the format ... - If None, use default connection in session context. BigQuery DataFrame will try to create the connection and attach - permission if the connection isn't fully set up. - - Returns: - bigframes.series.Series: Blob Series. - - """ - import warnings - import bigframes.exceptions as bfe - - warnings.warn( - "Series.str.to_blob is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.", - category=bfe.ApiDeprecationWarning, - stacklevel=2, - ) - return self._to_blob(connection) - def _parse_flags(flags: int) -> Optional[str]: re2flags = [] diff --git a/packages/bigframes/bigframes/pandas/__init__.py b/packages/bigframes/bigframes/pandas/__init__.py index 58833284ae96..c6f7500f9e89 100644 --- a/packages/bigframes/bigframes/pandas/__init__.py +++ b/packages/bigframes/bigframes/pandas/__init__.py @@ -99,14 +99,12 @@ from bigframes.pandas.core.api import to_timedelta from bigframes.pandas.io.api import ( _read_gbq_colab, - from_glob_path, read_arrow, read_avro, read_csv, read_gbq, read_gbq_function, read_gbq_model, - read_gbq_object_table, read_gbq_query, read_gbq_table, read_json, @@ -251,7 +249,8 @@ def to_datetime( utc: bool = False, format: Optional[str] = None, unit: Optional[str] = None, -) -> bigframes.series.Series: ... +) -> bigframes.series.Series: + ... @typing.overload @@ -261,7 +260,8 @@ def to_datetime( utc: bool = False, format: Optional[str] = None, unit: Optional[str] = None, -) -> Union[pandas.Timestamp, datetime.datetime]: ... +) -> Union[pandas.Timestamp, datetime.datetime]: + ... def to_datetime( @@ -455,7 +455,6 @@ def reset_session(): _read_gbq_colab, read_gbq_function, read_gbq_model, - read_gbq_object_table, read_gbq_query, read_gbq_table, read_json, @@ -466,7 +465,6 @@ def reset_session(): remote_function, to_datetime, to_timedelta, - from_glob_path, ] # Use __all__ to let type checkers know what is part of the public API. @@ -492,7 +490,6 @@ def reset_session(): "_read_gbq_colab", "read_gbq_function", "read_gbq_model", - "read_gbq_object_table", "read_gbq_query", "read_gbq_table", "read_json", @@ -503,7 +500,6 @@ def reset_session(): "remote_function", "to_datetime", "to_timedelta", - "from_glob_path", # Other names "api", # pandas dtype attributes diff --git a/packages/bigframes/bigframes/pandas/io/api.py b/packages/bigframes/bigframes/pandas/io/api.py index b7ed1a65d922..1cf818f5ddec 100644 --- a/packages/bigframes/bigframes/pandas/io/api.py +++ b/packages/bigframes/bigframes/pandas/io/api.py @@ -206,7 +206,8 @@ def read_gbq( # type: ignore[overload-overlap] col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., -) -> bigframes.dataframe.DataFrame: ... +) -> bigframes.dataframe.DataFrame: + ... @overload @@ -222,7 +223,8 @@ def read_gbq( col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., -) -> pandas.Series: ... +) -> pandas.Series: + ... def read_gbq( @@ -302,7 +304,8 @@ def _read_gbq_colab( # type: ignore[overload-overlap] *, pyformat_args: Optional[Dict[str, Any]] = ..., dry_run: Literal[False] = ..., -) -> bigframes.dataframe.DataFrame: ... +) -> bigframes.dataframe.DataFrame: + ... @overload @@ -311,7 +314,8 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = ..., dry_run: Literal[True] = ..., -) -> pandas.Series: ... +) -> pandas.Series: + ... def _read_gbq_colab( @@ -394,21 +398,6 @@ def read_gbq_model(model_name: str): read_gbq_model.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_model) -def read_gbq_object_table( - object_table: str, *, name: Optional[str] = None -) -> bigframes.dataframe.DataFrame: - return global_session.with_default_session( - bigframes.session.Session.read_gbq_object_table, - object_table, - name=name, - ) - - -read_gbq_object_table.__doc__ = inspect.getdoc( - bigframes.session.Session.read_gbq_object_table -) - - @overload def read_gbq_query( # type: ignore[overload-overlap] query: str, @@ -422,7 +411,8 @@ def read_gbq_query( # type: ignore[overload-overlap] filters: vendored_pandas_gbq.FiltersType = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., -) -> bigframes.dataframe.DataFrame: ... +) -> bigframes.dataframe.DataFrame: + ... @overload @@ -438,7 +428,8 @@ def read_gbq_query( filters: vendored_pandas_gbq.FiltersType = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., -) -> pandas.Series: ... +) -> pandas.Series: + ... def read_gbq_query( @@ -484,7 +475,8 @@ def read_gbq_table( # type: ignore[overload-overlap] use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., -) -> bigframes.dataframe.DataFrame: ... +) -> bigframes.dataframe.DataFrame: + ... @overload @@ -498,7 +490,8 @@ def read_gbq_table( use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., -) -> pandas.Series: ... +) -> pandas.Series: + ... def read_gbq_table( @@ -551,7 +544,8 @@ def read_pandas( pandas_dataframe: pandas.DataFrame, *, write_engine: constants.WriteEngineType = "default", -) -> bigframes.dataframe.DataFrame: ... +) -> bigframes.dataframe.DataFrame: + ... @typing.overload @@ -559,7 +553,8 @@ def read_pandas( pandas_dataframe: pandas.Series, *, write_engine: constants.WriteEngineType = "default", -) -> bigframes.series.Series: ... +) -> bigframes.series.Series: + ... @typing.overload @@ -567,7 +562,8 @@ def read_pandas( pandas_dataframe: pandas.Index, *, write_engine: constants.WriteEngineType = "default", -) -> bigframes.core.indexes.Index: ... +) -> bigframes.core.indexes.Index: + ... def read_pandas( @@ -635,19 +631,6 @@ def read_gbq_function( read_gbq_function.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_function) -def from_glob_path( - path: str, *, connection: Optional[str] = None, name: Optional[str] = None -) -> bigframes.dataframe.DataFrame: - return global_session.with_default_session( - bigframes.session.Session.from_glob_path, - path=path, - connection=connection, - name=name, - ) - - -from_glob_path.__doc__ = inspect.getdoc(bigframes.session.Session.from_glob_path) - _default_location_lock = threading.Lock() diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index eba179411c17..1099e5dc527a 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -432,7 +432,8 @@ def read_gbq( # type: ignore[overload-overlap] col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq( @@ -448,7 +449,8 @@ def read_gbq( col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq( self, @@ -520,7 +522,8 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def _read_gbq_colab( @@ -529,7 +532,8 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[True] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... @log_adapter.log_name_override("read_gbq_colab") def _read_gbq_colab( @@ -590,7 +594,8 @@ def read_gbq_query( # type: ignore[overload-overlap] filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq_query( @@ -606,7 +611,8 @@ def read_gbq_query( filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq_query( self, @@ -753,7 +759,8 @@ def read_gbq_table( # type: ignore[overload-overlap] use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq_table( @@ -767,7 +774,8 @@ def read_gbq_table( use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq_table( self, @@ -918,7 +926,8 @@ def read_pandas( pandas_dataframe: pandas.Index, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.core.indexes.Index: ... + ) -> bigframes.core.indexes.Index: + ... @typing.overload def read_pandas( @@ -926,7 +935,8 @@ def read_pandas( pandas_dataframe: pandas.Series, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.series.Series: ... + ) -> bigframes.series.Series: + ... @typing.overload def read_pandas( @@ -934,7 +944,8 @@ def read_pandas( pandas_dataframe: pandas.DataFrame, *, write_engine: constants.WriteEngineType = "default", - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... def read_pandas( self, @@ -2303,45 +2314,6 @@ def _create_temp_table( schema=schema, cluster_cols=cluster_cols ) - def from_glob_path( - self, path: str, *, connection: Optional[str] = None, name: Optional[str] = None - ) -> dataframe.DataFrame: - r"""Create a BigFrames DataFrame that contains a BigFrames `ObjectRef column `_ from a global wildcard path. - This operation creates a temporary BQ Object Table under the hood and requires bigquery.connections.delegate permission or BigQuery Connection Admin role. - If you have an existing BQ Object Table, use read_gbq_object_table(). - - .. note:: - BigFrames ObjectRef is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the - Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" - and might have limited support. For more information, see the launch stage descriptions - (https://cloud.google.com/products#product-launch-stages). - - Args: - path (str): - The wildcard global path, such as "gs:////\*". - connection (str or None, default None): - Connection to connect with remote service. str of the format ... - If None, use default connection in session context. BigQuery DataFrame will try to create the connection and attach - permission if the connection isn't fully set up. - name (str): - The column name of the ObjectRef column. - Returns: - bigframes.pandas.DataFrame: - Result BigFrames DataFrame. - """ - warnings.warn( - "from_glob_path is deprecated and will be removed in a future release. Use read_gbq with 'ref' column instead.", - category=bfe.ApiDeprecationWarning, - stacklevel=2, - ) - # TODO(garrettwu): switch to pseudocolumn when b/374988109 is done. - connection = self._create_bq_connection(connection=connection) - - table = self._create_object_table(path, connection) - - s = self._loader.read_gbq_table(table)["uri"].str._to_blob(connection) - return s.rename(name).to_frame() - def _create_bq_connection( self, *, @@ -2369,38 +2341,6 @@ def _create_bq_connection( return connection - def read_gbq_object_table( - self, object_table: str, *, name: Optional[str] = None - ) -> dataframe.DataFrame: - """Read an existing object table to create a BigFrames `ObjectRef `_ DataFrame. Use the connection of the object table for the connection of the ObjectRef. - This function dosen't retrieve the object table data. If you want to read the data, use read_gbq() instead. - - .. note:: - BigFrames ObjectRef is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the - Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" - and might have limited support. For more information, see the launch stage descriptions - (https://cloud.google.com/products#product-launch-stages). - - Args: - object_table (str): name of the object table of form ... - name (str or None): the returned ObjectRef column name. - - Returns: - bigframes.pandas.DataFrame: - Result BigFrames DataFrame. - """ - warnings.warn( - "read_gbq_object_table is deprecated and will be removed in a future release. Use read_gbq with 'ref' column instead.", - category=bfe.ApiDeprecationWarning, - stacklevel=2, - ) - # TODO(garrettwu): switch to pseudocolumn when b/374988109 is done. - table = self.bqclient.get_table(object_table) - connection = table._properties["externalDataConfiguration"]["connectionId"] - - s = self._loader.read_gbq_table(object_table)["uri"].str._to_blob(connection) - return s.rename(name).to_frame() - # ========================================================================= # bigframes.pandas attributes # diff --git a/packages/bigframes/bigframes/session/loader.py b/packages/bigframes/bigframes/session/loader.py index 960208063105..e8efe83cba80 100644 --- a/packages/bigframes/bigframes/session/loader.py +++ b/packages/bigframes/bigframes/session/loader.py @@ -54,6 +54,8 @@ from google.cloud import bigquery_storage_v1 from google.cloud.bigquery_storage_v1 import ( types as bq_storage_types, +) +from google.cloud.bigquery_storage_v1 import ( writer as bq_storage_writer, ) @@ -642,7 +644,8 @@ def read_gbq_table( # type: ignore[overload-overlap] n_rows: Optional[int] = None, index_col_in_columns: bool = False, publish_execution: bool = True, - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq_table( @@ -665,7 +668,8 @@ def read_gbq_table( n_rows: Optional[int] = None, index_col_in_columns: bool = False, publish_execution: bool = True, - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq_table( self, @@ -1149,7 +1153,8 @@ def read_gbq_query( # type: ignore[overload-overlap] dry_run: Literal[False] = ..., force_total_order: Optional[bool] = ..., allow_large_results: bool, - ) -> dataframe.DataFrame: ... + ) -> dataframe.DataFrame: + ... @overload def read_gbq_query( @@ -1165,7 +1170,8 @@ def read_gbq_query( dry_run: Literal[True] = ..., force_total_order: Optional[bool] = ..., allow_large_results: bool, - ) -> pandas.Series: ... + ) -> pandas.Series: + ... def read_gbq_query( self, diff --git a/packages/bigframes/tests/system/conftest.py b/packages/bigframes/tests/system/conftest.py index 361d9387bc77..f46f5dc3d371 100644 --- a/packages/bigframes/tests/system/conftest.py +++ b/packages/bigframes/tests/system/conftest.py @@ -1503,8 +1503,10 @@ def images_uris() -> list[str]: def images_mm_df( images_uris, session: bigframes.Session, bq_connection: str ) -> bpd.DataFrame: - blob_series = bpd.Series(images_uris, session=session).str.to_blob( - connection=bq_connection + import bigframes.bigquery.obj as obj + + blob_series = obj.make_ref( + bpd.Series(images_uris, session=session), authorizer=bq_connection ) return blob_series.rename("blob_col").to_frame() @@ -1527,7 +1529,12 @@ def pdf_gcs_path() -> str: def pdf_mm_df( pdf_gcs_path, session: bigframes.Session, bq_connection: str ) -> bpd.DataFrame: - return session.from_glob_path(pdf_gcs_path, name="pdf", connection=bq_connection) + import bigframes.bigquery.obj as obj + + table_id = session._create_object_table(pdf_gcs_path, bq_connection) + df = session.read_gbq(table_id) + blob_series = obj.make_ref(df["uri"], authorizer=bq_connection) + return blob_series.rename("pdf").to_frame() @pytest.fixture(scope="session") @@ -1539,6 +1546,9 @@ def audio_gcs_path() -> str: def audio_mm_df( audio_gcs_path, session: bigframes.Session, bq_connection: str ) -> bpd.DataFrame: - return session.from_glob_path( - audio_gcs_path, name="audio", connection=bq_connection - ) + import bigframes.bigquery.obj as obj + + table_id = session._create_object_table(audio_gcs_path, bq_connection) + df = session.read_gbq(table_id) + blob_series = obj.make_ref(df["uri"], authorizer=bq_connection) + return blob_series.rename("audio").to_frame() diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url/out.sql new file mode 100644 index 000000000000..78bd19c32483 --- /dev/null +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url/out.sql @@ -0,0 +1,4 @@ +SELECT + `rowindex`, + OBJ.GET_ACCESS_URL(OBJ.MAKE_REF(`string_col`, 'my-connection'), 'r') AS `string_col` +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_get_access_url_with_duration/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url_with_duration/out.sql similarity index 60% rename from packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_get_access_url_with_duration/out.sql rename to packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url_with_duration/out.sql index 2e8b60230faa..ac2999e386d5 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_get_access_url_with_duration/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url_with_duration/out.sql @@ -1,3 +1,3 @@ SELECT OBJ.GET_ACCESS_URL(`string_col`, 'READ', INTERVAL 3600 MICROSECOND) AS `string_col` -FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_make_ref/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_make_ref/out.sql new file mode 100644 index 000000000000..3178a55cbfad --- /dev/null +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_make_ref/out.sql @@ -0,0 +1,4 @@ +SELECT + `rowindex`, + OBJ.MAKE_REF(`string_col`, 'my-connection') AS `string_col` +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_make_ref_json/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_make_ref_json/out.sql new file mode 100644 index 000000000000..460675d3513b --- /dev/null +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_make_ref_json/out.sql @@ -0,0 +1,3 @@ +SELECT + OBJ.MAKE_REF(`string_col`) AS `string_col` +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_fetch_metadata/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_fetch_metadata/out.sql deleted file mode 100644 index ca6f5842df1d..000000000000 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_fetch_metadata/out.sql +++ /dev/null @@ -1,6 +0,0 @@ -SELECT - `rowindex`, - OBJ.FETCH_METADATA( - OBJ.MAKE_REF(`string_col`, 'bigframes-dev.test-region.bigframes-default-connection') - ).`version` -FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_get_access_url/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_get_access_url/out.sql deleted file mode 100644 index 6d612ee6b87d..000000000000 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_get_access_url/out.sql +++ /dev/null @@ -1,10 +0,0 @@ -SELECT - `rowindex`, - JSON_VALUE( - OBJ.GET_ACCESS_URL( - OBJ.MAKE_REF(`string_col`, 'bigframes-dev.test-region.bigframes-default-connection'), - 'R' - ), - '$.access_urls.read_url' - ) AS `string_col` -FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_make_ref/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_make_ref/out.sql deleted file mode 100644 index 74ca601cd5db..000000000000 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_make_ref/out.sql +++ /dev/null @@ -1,4 +0,0 @@ -SELECT - `rowindex`, - OBJ.MAKE_REF(`string_col`, 'bigframes-dev.test-region.bigframes-default-connection') AS `string_col` -FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_make_ref_json/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_make_ref_json/out.sql deleted file mode 100644 index dc84b3bec12e..000000000000 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_make_ref_json/out.sql +++ /dev/null @@ -1,3 +0,0 @@ -SELECT - OBJ.MAKE_REF(`string_col`) AS `string_col` -FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py index 7130c7ac1610..502490404db9 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py @@ -14,6 +14,7 @@ import pytest +import bigframes.bigquery.obj as obj import bigframes.pandas as bpd from bigframes import operations as ops from bigframes.testing import utils @@ -21,13 +22,14 @@ pytest.importorskip("pytest_snapshot") -def test_obj_get_access_url(scalar_types_df: bpd.DataFrame, snapshot): - blob_s = scalar_types_df["string_col"].str._to_blob() - sql = blob_s._blob._read_url().to_frame().sql +def test_blob_get_access_url(scalar_types_df: bpd.DataFrame, snapshot): + ref_s = obj.make_ref(scalar_types_df["string_col"], authorizer="my-connection") + url_s = obj.get_access_url(ref_s, mode="r") + sql = url_s.to_frame().sql snapshot.assert_match(sql, "out.sql") -def test_obj_get_access_url_with_duration(scalar_types_df: bpd.DataFrame, snapshot): +def test_blob_get_access_url_with_duration(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] sql = utils._apply_ops_to_sql( @@ -38,12 +40,12 @@ def test_obj_get_access_url_with_duration(scalar_types_df: bpd.DataFrame, snapsh snapshot.assert_match(sql, "out.sql") -def test_obj_make_ref(scalar_types_df: bpd.DataFrame, snapshot): - blob_df = scalar_types_df["string_col"].str._to_blob() - snapshot.assert_match(blob_df.to_frame().sql, "out.sql") +def test_blob_make_ref(scalar_types_df: bpd.DataFrame, snapshot): + ref_s = obj.make_ref(scalar_types_df["string_col"], authorizer="my-connection") + snapshot.assert_match(ref_s.to_frame().sql, "out.sql") -def test_obj_make_ref_json(scalar_types_df: bpd.DataFrame, snapshot): +def test_blob_make_ref_json(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] sql = utils._apply_ops_to_sql( From 8af25322270d0571dbec682f17114678f924ead0 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 15 Apr 2026 23:51:31 +0000 Subject: [PATCH 09/26] chore: remove unused multimodal fixtures and tests --- .../bigframes/bigframes/pandas/__init__.py | 6 +- packages/bigframes/bigframes/pandas/io/api.py | 33 ++---- .../bigframes/bigframes/session/__init__.py | 33 ++---- .../bigframes/bigframes/session/loader.py | 12 +- packages/bigframes/tests/system/conftest.py | 46 -------- .../system/large/ml/test_multimodal_llm.py | 106 ------------------ 6 files changed, 28 insertions(+), 208 deletions(-) delete mode 100644 packages/bigframes/tests/system/large/ml/test_multimodal_llm.py diff --git a/packages/bigframes/bigframes/pandas/__init__.py b/packages/bigframes/bigframes/pandas/__init__.py index c6f7500f9e89..11938a887785 100644 --- a/packages/bigframes/bigframes/pandas/__init__.py +++ b/packages/bigframes/bigframes/pandas/__init__.py @@ -249,8 +249,7 @@ def to_datetime( utc: bool = False, format: Optional[str] = None, unit: Optional[str] = None, -) -> bigframes.series.Series: - ... +) -> bigframes.series.Series: ... @typing.overload @@ -260,8 +259,7 @@ def to_datetime( utc: bool = False, format: Optional[str] = None, unit: Optional[str] = None, -) -> Union[pandas.Timestamp, datetime.datetime]: - ... +) -> Union[pandas.Timestamp, datetime.datetime]: ... def to_datetime( diff --git a/packages/bigframes/bigframes/pandas/io/api.py b/packages/bigframes/bigframes/pandas/io/api.py index 1cf818f5ddec..e2737fdbbd1a 100644 --- a/packages/bigframes/bigframes/pandas/io/api.py +++ b/packages/bigframes/bigframes/pandas/io/api.py @@ -206,8 +206,7 @@ def read_gbq( # type: ignore[overload-overlap] col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., -) -> bigframes.dataframe.DataFrame: - ... +) -> bigframes.dataframe.DataFrame: ... @overload @@ -223,8 +222,7 @@ def read_gbq( col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., -) -> pandas.Series: - ... +) -> pandas.Series: ... def read_gbq( @@ -304,8 +302,7 @@ def _read_gbq_colab( # type: ignore[overload-overlap] *, pyformat_args: Optional[Dict[str, Any]] = ..., dry_run: Literal[False] = ..., -) -> bigframes.dataframe.DataFrame: - ... +) -> bigframes.dataframe.DataFrame: ... @overload @@ -314,8 +311,7 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = ..., dry_run: Literal[True] = ..., -) -> pandas.Series: - ... +) -> pandas.Series: ... def _read_gbq_colab( @@ -411,8 +407,7 @@ def read_gbq_query( # type: ignore[overload-overlap] filters: vendored_pandas_gbq.FiltersType = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., -) -> bigframes.dataframe.DataFrame: - ... +) -> bigframes.dataframe.DataFrame: ... @overload @@ -428,8 +423,7 @@ def read_gbq_query( filters: vendored_pandas_gbq.FiltersType = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., -) -> pandas.Series: - ... +) -> pandas.Series: ... def read_gbq_query( @@ -475,8 +469,7 @@ def read_gbq_table( # type: ignore[overload-overlap] use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., -) -> bigframes.dataframe.DataFrame: - ... +) -> bigframes.dataframe.DataFrame: ... @overload @@ -490,8 +483,7 @@ def read_gbq_table( use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., -) -> pandas.Series: - ... +) -> pandas.Series: ... def read_gbq_table( @@ -544,8 +536,7 @@ def read_pandas( pandas_dataframe: pandas.DataFrame, *, write_engine: constants.WriteEngineType = "default", -) -> bigframes.dataframe.DataFrame: - ... +) -> bigframes.dataframe.DataFrame: ... @typing.overload @@ -553,8 +544,7 @@ def read_pandas( pandas_dataframe: pandas.Series, *, write_engine: constants.WriteEngineType = "default", -) -> bigframes.series.Series: - ... +) -> bigframes.series.Series: ... @typing.overload @@ -562,8 +552,7 @@ def read_pandas( pandas_dataframe: pandas.Index, *, write_engine: constants.WriteEngineType = "default", -) -> bigframes.core.indexes.Index: - ... +) -> bigframes.core.indexes.Index: ... def read_pandas( diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index 1099e5dc527a..872aff2291d2 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -432,8 +432,7 @@ def read_gbq( # type: ignore[overload-overlap] col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq( @@ -449,8 +448,7 @@ def read_gbq( col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq( self, @@ -522,8 +520,7 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def _read_gbq_colab( @@ -532,8 +529,7 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[True] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... @log_adapter.log_name_override("read_gbq_colab") def _read_gbq_colab( @@ -594,8 +590,7 @@ def read_gbq_query( # type: ignore[overload-overlap] filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq_query( @@ -611,8 +606,7 @@ def read_gbq_query( filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq_query( self, @@ -759,8 +753,7 @@ def read_gbq_table( # type: ignore[overload-overlap] use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq_table( @@ -774,8 +767,7 @@ def read_gbq_table( use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq_table( self, @@ -926,8 +918,7 @@ def read_pandas( pandas_dataframe: pandas.Index, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.core.indexes.Index: - ... + ) -> bigframes.core.indexes.Index: ... @typing.overload def read_pandas( @@ -935,8 +926,7 @@ def read_pandas( pandas_dataframe: pandas.Series, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.series.Series: - ... + ) -> bigframes.series.Series: ... @typing.overload def read_pandas( @@ -944,8 +934,7 @@ def read_pandas( pandas_dataframe: pandas.DataFrame, *, write_engine: constants.WriteEngineType = "default", - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... def read_pandas( self, diff --git a/packages/bigframes/bigframes/session/loader.py b/packages/bigframes/bigframes/session/loader.py index e8efe83cba80..a07b6fd71ca7 100644 --- a/packages/bigframes/bigframes/session/loader.py +++ b/packages/bigframes/bigframes/session/loader.py @@ -644,8 +644,7 @@ def read_gbq_table( # type: ignore[overload-overlap] n_rows: Optional[int] = None, index_col_in_columns: bool = False, publish_execution: bool = True, - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq_table( @@ -668,8 +667,7 @@ def read_gbq_table( n_rows: Optional[int] = None, index_col_in_columns: bool = False, publish_execution: bool = True, - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq_table( self, @@ -1153,8 +1151,7 @@ def read_gbq_query( # type: ignore[overload-overlap] dry_run: Literal[False] = ..., force_total_order: Optional[bool] = ..., allow_large_results: bool, - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq_query( @@ -1170,8 +1167,7 @@ def read_gbq_query( dry_run: Literal[True] = ..., force_total_order: Optional[bool] = ..., allow_large_results: bool, - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq_query( self, diff --git a/packages/bigframes/tests/system/conftest.py b/packages/bigframes/tests/system/conftest.py index f46f5dc3d371..3fe81b6d95bd 100644 --- a/packages/bigframes/tests/system/conftest.py +++ b/packages/bigframes/tests/system/conftest.py @@ -1499,18 +1499,6 @@ def images_uris() -> list[str]: ] -@pytest.fixture(scope="session") -def images_mm_df( - images_uris, session: bigframes.Session, bq_connection: str -) -> bpd.DataFrame: - import bigframes.bigquery.obj as obj - - blob_series = obj.make_ref( - bpd.Series(images_uris, session=session), authorizer=bq_connection - ) - return blob_series.rename("blob_col").to_frame() - - @pytest.fixture() def reset_default_session_and_location(): bpd.close_session() @@ -1518,37 +1506,3 @@ def reset_default_session_and_location(): yield bpd.close_session() bpd.options.bigquery.location = None - - -@pytest.fixture(scope="session") -def pdf_gcs_path() -> str: - return "gs://bigframes_blob_test/pdfs/*" - - -@pytest.fixture(scope="session") -def pdf_mm_df( - pdf_gcs_path, session: bigframes.Session, bq_connection: str -) -> bpd.DataFrame: - import bigframes.bigquery.obj as obj - - table_id = session._create_object_table(pdf_gcs_path, bq_connection) - df = session.read_gbq(table_id) - blob_series = obj.make_ref(df["uri"], authorizer=bq_connection) - return blob_series.rename("pdf").to_frame() - - -@pytest.fixture(scope="session") -def audio_gcs_path() -> str: - return "gs://bigframes_blob_test/audio/*" - - -@pytest.fixture(scope="session") -def audio_mm_df( - audio_gcs_path, session: bigframes.Session, bq_connection: str -) -> bpd.DataFrame: - import bigframes.bigquery.obj as obj - - table_id = session._create_object_table(audio_gcs_path, bq_connection) - df = session.read_gbq(table_id) - blob_series = obj.make_ref(df["uri"], authorizer=bq_connection) - return blob_series.rename("audio").to_frame() diff --git a/packages/bigframes/tests/system/large/ml/test_multimodal_llm.py b/packages/bigframes/tests/system/large/ml/test_multimodal_llm.py deleted file mode 100644 index 69c316e3dacc..000000000000 --- a/packages/bigframes/tests/system/large/ml/test_multimodal_llm.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pandas as pd -import pyarrow as pa -import pytest - -import bigframes.pandas as bpd -from bigframes.ml import llm -from bigframes.testing import utils - - -@pytest.mark.parametrize( - "model_name", - ( - "gemini-2.0-flash-001", - "gemini-2.0-flash-lite-001", - ), -) -@pytest.mark.flaky(retries=2) -def test_gemini_text_generator_multimodal_input( - images_mm_df: bpd.DataFrame, model_name, session, bq_connection -): - gemini_text_generator_model = llm.GeminiTextGenerator( - model_name=model_name, connection_name=bq_connection, session=session - ) - pd_df = gemini_text_generator_model.predict( - images_mm_df, prompt=["Describe", images_mm_df["blob_col"]] - ).to_pandas() - utils.check_pandas_df_schema_and_index( - pd_df, - columns=utils.ML_GENERATE_TEXT_OUTPUT + ["blob_col"], - index=2, - col_exact=False, - ) - - -@pytest.mark.flaky(retries=2) -def test_multimodal_embedding_generator_predict_default_params_success( - images_mm_df, session, bq_connection -): - text_embedding_model = llm.MultimodalEmbeddingGenerator( - connection_name=bq_connection, session=session - ) - df = text_embedding_model.predict(images_mm_df).to_pandas() - utils.check_pandas_df_schema_and_index( - df, - columns=utils.ML_MULTIMODAL_GENERATE_EMBEDDING_OUTPUT, - index=2, - col_exact=False, - ) - assert len(df["ml_generate_embedding_result"][0]) == 1408 - - -@pytest.mark.parametrize( - "model_name", - ("gemini-2.0-flash-001",), -) -@pytest.mark.flaky(retries=2) -def test_gemini_text_generator_multimodal_structured_output( - images_mm_df: bpd.DataFrame, model_name, session, bq_connection -): - gemini_text_generator_model = llm.GeminiTextGenerator( - model_name=model_name, connection_name=bq_connection, session=session - ) - output_schema = { - "bool_output": "bool", - "int_output": "int64", - "float_output": "float64", - "str_output": "string", - "array_output": "array", - "struct_output": "struct", - } - df = gemini_text_generator_model.predict( - images_mm_df, - prompt=["Describe", images_mm_df["blob_col"]], - output_schema=output_schema, - ) - assert df["bool_output"].dtype == pd.BooleanDtype() - assert df["int_output"].dtype == pd.Int64Dtype() - assert df["float_output"].dtype == pd.Float64Dtype() - assert df["str_output"].dtype == pd.StringDtype(storage="pyarrow") - assert df["array_output"].dtype == pd.ArrowDtype(pa.list_(pa.int64())) - assert df["struct_output"].dtype == pd.ArrowDtype( - pa.struct([("number", pa.int64())]) - ) - - pd_df = df.to_pandas() - utils.check_pandas_df_schema_and_index( - pd_df, - columns=list(output_schema.keys()) - + ["blob_col", "prompt", "full_response", "status"], - index=2, - col_exact=False, - ) From c73abe73f13a2860551050040311e433cb949a5a Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 16 Apr 2026 20:19:59 +0000 Subject: [PATCH 10/26] fix tests related to blob api deprecation --- .../tests/system/small/bigquery/test_ai.py | 53 +++++++++++++------ .../system/small/pandas/test_describe.py | 27 ++++++++-- .../tests/system/small/test_dataframe.py | 31 +++++++++-- .../tests/system/small/test_dataframe_io.py | 36 ++++++++++--- .../sqlglot/expressions/test_blob_ops.py | 16 ------ 5 files changed, 118 insertions(+), 45 deletions(-) diff --git a/packages/bigframes/tests/system/small/bigquery/test_ai.py b/packages/bigframes/tests/system/small/bigquery/test_ai.py index 16e9cca9f136..f475f3780775 100644 --- a/packages/bigframes/tests/system/small/bigquery/test_ai.py +++ b/packages/bigframes/tests/system/small/bigquery/test_ai.py @@ -22,6 +22,31 @@ import bigframes.pandas as bpd from bigframes import dataframe, dtypes, series from bigframes.testing import utils as test_utils +import uuid +import google.cloud.bigquery + + +def _create_mock_obj_ref_df(session, uris, name="image"): + df = bpd.DataFrame({name: uris}, session=session) + table_id = f"bigframes-dev.bigframes_tests_sys.tmp_obj_ref_{uuid.uuid4().hex}" + df.to_gbq(table_id, if_exists="replace") + + client = session.bqclient + table = client.get_table(table_id) + schema = list(table.schema) + for i, field in enumerate(schema): + if field.name == name: + schema[i] = google.cloud.bigquery.SchemaField( + name=field.name, + field_type=field.field_type, + mode=field.mode, + description="bigframes_dtype: OBJ_REF_DTYPE", + ) + break + table.schema = schema + client.update_table(table, ["schema"]) + + return session.read_gbq(table_id) def test_ai_function_pandas_input(session): @@ -159,8 +184,8 @@ def test_ai_generate_bool(session): def test_ai_generate_bool_multi_model(session): - df = session.from_glob_path( - "gs://bigframes-dev-testing/a_multimodel/images/*", name="image" + df = _create_mock_obj_ref_df( + session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image" ) result = bbq.ai.generate_bool((df["image"], " contains an animal")) @@ -196,8 +221,8 @@ def test_ai_generate_int(session): def test_ai_generate_int_multi_model(session): - df = session.from_glob_path( - "gs://bigframes-dev-testing/a_multimodel/images/*", name="image" + df = _create_mock_obj_ref_df( + session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image" ) result = bbq.ai.generate_int( @@ -235,8 +260,8 @@ def test_ai_generate_double(session): def test_ai_generate_double_multi_model(session): - df = session.from_glob_path( - "gs://bigframes-dev-testing/a_multimodel/images/*", name="image" + df = _create_mock_obj_ref_df( + session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image" ) result = bbq.ai.generate_double( @@ -267,10 +292,8 @@ def test_ai_if(session): def test_ai_if_multi_model(session, bq_connection): - df = session.from_glob_path( - "gs://bigframes-dev-testing/a_multimodel/images/*", - name="image", - connection=bq_connection, + df = _create_mock_obj_ref_df( + session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image" ) result = bbq.ai.if_((df["image"], " contains an animal")) @@ -289,10 +312,8 @@ def test_ai_classify(session): def test_ai_classify_multi_model(session, bq_connection): - df = session.from_glob_path( - "gs://bigframes-dev-testing/a_multimodel/images/*", - name="image", - connection=bq_connection, + df = _create_mock_obj_ref_df( + session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image" ) result = bbq.ai.classify(df["image"], ["photo", "cartoon"]) @@ -312,8 +333,8 @@ def test_ai_score(session): def test_ai_score_multi_model(session): - df = session.from_glob_path( - "gs://bigframes-dev-testing/a_multimodel/images/*", name="image" + df = _create_mock_obj_ref_df( + session, ["gs://cloud-samples-data/vision/ocr/sign.jpg"], name="image" ) prompt = ("Rank the liveliness of ", df["image"], "on the scale from 1 to 3") diff --git a/packages/bigframes/tests/system/small/pandas/test_describe.py b/packages/bigframes/tests/system/small/pandas/test_describe.py index b8e427c10ea8..0db25ecd60c9 100644 --- a/packages/bigframes/tests/system/small/pandas/test_describe.py +++ b/packages/bigframes/tests/system/small/pandas/test_describe.py @@ -358,15 +358,36 @@ def test_series_groupby_describe(scalars_dfs): def test_describe_json_and_obj_ref_returns_count(session): # Test describe() works on JSON and OBJ_REF types (without nunique, which fails) + import uuid + import google.cloud.bigquery + sql = """ SELECT PARSE_JSON('{"a": 1}') AS json_col, 'gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri_col """ - df = session.read_gbq(sql) + df_init = session.read_gbq(sql) + + table_id = f"bigframes-dev.bigframes_tests_sys.tmp_obj_ref_{uuid.uuid4().hex}" + df_init.to_gbq(table_id, if_exists="replace") + + client = session.bqclient + table = client.get_table(table_id) + schema = list(table.schema) + for i, field in enumerate(schema): + if field.name == "uri_col": + schema[i] = google.cloud.bigquery.SchemaField( + name=field.name, + field_type=field.field_type, + mode=field.mode, + description="bigframes_dtype: OBJ_REF_DTYPE", + ) + break + table.schema = schema + client.update_table(table, ["schema"]) - df["obj_ref_col"] = df["uri_col"].str.to_blob() - df = df.drop(columns=["uri_col"]) + df = session.read_gbq(table_id) + df = df.rename(columns={"uri_col": "obj_ref_col"}) res = df.describe(include="all").to_pandas() diff --git a/packages/bigframes/tests/system/small/test_dataframe.py b/packages/bigframes/tests/system/small/test_dataframe.py index 8df13a5bcbda..a4b290a5d242 100644 --- a/packages/bigframes/tests/system/small/test_dataframe.py +++ b/packages/bigframes/tests/system/small/test_dataframe.py @@ -5919,9 +5919,34 @@ def test_to_gbq_table_labels(scalars_df_index): def test_to_gbq_obj_ref_persists(session): # Test that saving and loading an Object Reference retains its dtype - bdf = session.from_glob_path( - "gs://cloud-samples-data/vision/ocr/*.jpg", name="uris" - ).head(1) + import uuid + import google.cloud.bigquery + + sql = """ + SELECT STRUCT('gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri, CAST(NULL AS STRING) AS version, CAST(NULL AS STRING) AS authorizer, PARSE_JSON('{}') AS details) AS uris + """ + df_init = session.read_gbq(sql) + + tmp_table_id = f"bigframes-dev.bigframes_tests_sys.tmp_obj_ref_{uuid.uuid4().hex}" + df_init.to_gbq(tmp_table_id, if_exists="replace") + + client = session.bqclient + table = client.get_table(tmp_table_id) + schema = list(table.schema) + for i, field in enumerate(schema): + if field.name == "uris": + schema[i] = google.cloud.bigquery.SchemaField( + name=field.name, + field_type=field.field_type, + mode=field.mode, + description="bigframes_dtype: OBJ_REF_DTYPE", + fields=field.fields, + ) + break + table.schema = schema + client.update_table(table, ["schema"]) + + bdf = session.read_gbq(tmp_table_id) destination_table = "bigframes-dev.bigframes_tests_sys.test_obj_ref_persistence" bdf.to_gbq(destination_table, if_exists="replace") diff --git a/packages/bigframes/tests/system/small/test_dataframe_io.py b/packages/bigframes/tests/system/small/test_dataframe_io.py index 4db606afb749..1771b6485a25 100644 --- a/packages/bigframes/tests/system/small/test_dataframe_io.py +++ b/packages/bigframes/tests/system/small/test_dataframe_io.py @@ -1010,16 +1010,38 @@ def test_to_gbq_timedelta_tag_ignored_when_appending(bigquery_client, dataset_id def test_to_gbq_obj_ref(session, dataset_id: str, bigquery_client): + import uuid + import google.cloud.bigquery + destination_table = f"{dataset_id}.test_to_gbq_obj_ref" sql = """ - SELECT - 'gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri_col + SELECT STRUCT('gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri, CAST(NULL AS STRING) AS version, CAST(NULL AS STRING) AS authorizer, PARSE_JSON('{}') AS details) AS uri_col """ - df = session.read_gbq(sql) - df["obj_ref_col"] = df["uri_col"].str.to_blob() - df = df.drop(columns=["uri_col"]) - - df.to_gbq(destination_table) + df_init = session.read_gbq(sql) + + tmp_table_id = f"{dataset_id}.tmp_obj_ref_{uuid.uuid4().hex}" + df_init.to_gbq(tmp_table_id, if_exists="replace") + + client = session.bqclient + table = client.get_table(tmp_table_id) + schema = list(table.schema) + for i, field in enumerate(schema): + if field.name == "uri_col": + schema[i] = google.cloud.bigquery.SchemaField( + name=field.name, + field_type=field.field_type, + mode=field.mode, + description="bigframes_dtype: OBJ_REF_DTYPE", + fields=field.fields, + ) + break + table.schema = schema + client.update_table(table, ["schema"]) + + df = session.read_gbq(tmp_table_id) + df = df.rename(columns={"uri_col": "obj_ref_col"}) + + df.to_gbq(destination_table, if_exists="replace") table = bigquery_client.get_table(destination_table) obj_ref_field = next(f for f in table.schema if f.name == "obj_ref_col") diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py index 502490404db9..33e656fd83b0 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py @@ -38,19 +38,3 @@ def test_blob_get_access_url_with_duration(scalar_types_df: bpd.DataFrame, snaps [col_name], ) snapshot.assert_match(sql, "out.sql") - - -def test_blob_make_ref(scalar_types_df: bpd.DataFrame, snapshot): - ref_s = obj.make_ref(scalar_types_df["string_col"], authorizer="my-connection") - snapshot.assert_match(ref_s.to_frame().sql, "out.sql") - - -def test_blob_make_ref_json(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = utils._apply_ops_to_sql( - bf_df, - [ops.obj_make_ref_json_op.as_expr(col_name)], - [col_name], - ) - snapshot.assert_match(sql, "out.sql") From 2ce7667fd6ff3da8d1fcf72ffe4293f852608fc9 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 16 Apr 2026 21:28:49 +0000 Subject: [PATCH 11/26] test: update blob ops snapshots --- .../snapshots/test_blob_ops/test_blob_get_access_url/out.sql | 2 +- .../test_blob_get_access_url_with_duration/out.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url/out.sql index 78bd19c32483..6b21b68cb617 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url/out.sql @@ -1,4 +1,4 @@ SELECT `rowindex`, OBJ.GET_ACCESS_URL(OBJ.MAKE_REF(`string_col`, 'my-connection'), 'r') AS `string_col` -FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url_with_duration/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url_with_duration/out.sql index ac2999e386d5..2e8b60230faa 100644 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url_with_duration/out.sql +++ b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_blob_get_access_url_with_duration/out.sql @@ -1,3 +1,3 @@ SELECT OBJ.GET_ACCESS_URL(`string_col`, 'READ', INTERVAL 3600 MICROSECOND) AS `string_col` -FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` \ No newline at end of file From 8cc3a1412db268b0889cea5c79e7e1ad894e4709 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 22 Apr 2026 19:45:17 +0000 Subject: [PATCH 12/26] refactor: remove blob references --- packages/bigframes/bigframes/dataframe.py | 17 ++------- packages/bigframes/bigframes/display/html.py | 36 +++----------------- packages/bigframes/bigframes/series.py | 5 --- 3 files changed, 7 insertions(+), 51 deletions(-) diff --git a/packages/bigframes/bigframes/dataframe.py b/packages/bigframes/bigframes/dataframe.py index a6fe9a3fe244..ee6bf9c863ff 100644 --- a/packages/bigframes/bigframes/dataframe.py +++ b/packages/bigframes/bigframes/dataframe.py @@ -820,21 +820,8 @@ def __repr__(self) -> str: ) def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]: - """Process ObjectRef columns for display.""" - df = self - blob_cols = [] - if bigframes.options.display.blob_display: - blob_cols = [ - series_name - for series_name, series in self.items() - if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE - ] - if blob_cols: - df = self.copy() - for col in blob_cols: - # TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data. - df[col] = df[col]._blob._get_runtime(mode="R", with_metadata=True) - return df, blob_cols + """Process ObjectRef columns for display. (Deprecated)""" + return self, [] def _repr_mimebundle_(self, include=None, exclude=None): """ diff --git a/packages/bigframes/bigframes/display/html.py b/packages/bigframes/bigframes/display/html.py index e9b91afa8e77..7dfb34693cd5 100644 --- a/packages/bigframes/bigframes/display/html.py +++ b/packages/bigframes/bigframes/display/html.py @@ -189,7 +189,6 @@ def create_html_representation( pandas_df: pd.DataFrame, total_rows: int, total_columns: int, - blob_cols: list[str], ) -> str: """Create an HTML representation of the DataFrame or Series.""" from bigframes.series import Series @@ -197,12 +196,8 @@ def create_html_representation( opts = options.display with display_options.pandas_repr(opts): if isinstance(obj, Series): - # Some pandas objects may not have a _repr_html_ method, or it might - # fail in certain environments. We fall back to a pre-formatted - # string representation to ensure something is always displayed. pd_series = pandas_df.iloc[:, 0] try: - # TODO(b/464053870): Support rich display for blob Series. html_string = pd_series._repr_html_() except AttributeError: html_string = f"
{pd_series.to_string()}
" @@ -212,26 +207,8 @@ def create_html_representation( html_string += f"

[{total_rows} rows]

" return html_string else: - # It's a DataFrame - # TODO(shuowei, b/464053870): Escaping HTML would be useful, but - # `escape=False` is needed to show images. We may need to implement - # a full-fledged repr module to better support types not in pandas. - if options.display.blob_display and blob_cols: - formatters = {blob_col: _obj_ref_rt_to_html for blob_col in blob_cols} - - # set max_colwidth so not to truncate the image url - with pandas.option_context("display.max_colwidth", None): - html_string = pandas_df.to_html( - escape=False, - notebook=True, - max_rows=pandas.get_option("display.max_rows"), - max_cols=pandas.get_option("display.max_columns"), - show_dimensions=pandas.get_option("display.show_dimensions"), - formatters=formatters, # type: ignore - ) - else: - # _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy. - html_string = pandas_df._repr_html_() # type:ignore + # _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy. + html_string = pandas_df._repr_html_() # type:ignore html_string += f"[{total_rows} rows x {total_columns} columns in total]" return html_string @@ -265,7 +242,7 @@ def get_anywidget_bundle( if isinstance(obj, Series): df = obj.to_frame() else: - df, blob_cols = obj._get_display_df_and_blob_cols() + df, _ = obj._get_display_df_and_blob_cols() widget = display.TableWidget(df) widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude) @@ -288,7 +265,6 @@ def get_anywidget_bundle( cached_pd, total_rows, total_columns, - blob_cols if "blob_cols" in locals() else [], ) is_series, has_index = _get_obj_metadata(obj) widget_repr["text/plain"] = plaintext.create_text_representation( @@ -317,14 +293,12 @@ def repr_mimebundle_head( from bigframes.series import Series opts = options.display - blob_cols: list[str] if isinstance(obj, Series): pandas_df, row_count, query_job = obj._block.retrieve_repr_request_results( opts.max_rows ) - blob_cols = [] else: - df, blob_cols = obj._get_display_df_and_blob_cols() + df, _ = obj._get_display_df_and_blob_cols() pandas_df, row_count, query_job = df._block.retrieve_repr_request_results( opts.max_rows ) @@ -333,7 +307,7 @@ def repr_mimebundle_head( column_count = len(pandas_df.columns) html_string = create_html_representation( - obj, pandas_df, row_count, column_count, blob_cols + obj, pandas_df, row_count, column_count ) is_series, has_index = _get_obj_metadata(obj) diff --git a/packages/bigframes/bigframes/series.py b/packages/bigframes/bigframes/series.py index 75d9300d2a9b..a5303a2002a0 100644 --- a/packages/bigframes/bigframes/series.py +++ b/packages/bigframes/bigframes/series.py @@ -68,7 +68,6 @@ import bigframes.functions import bigframes.operations as ops import bigframes.operations.aggregations as agg_ops -import bigframes.operations.blob as blob import bigframes.operations.lists as lists import bigframes.operations.plotting as plotting import bigframes.operations.python_op_maps as python_ops @@ -320,10 +319,6 @@ def struct(self) -> structs.StructAccessor: def list(self) -> lists.ListAccessor: return lists.ListAccessor(self) - @property - def _blob(self) -> blob._BlobAccessor: - return blob._BlobAccessor(self) - @property @validations.requires_ordering() def T(self) -> Series: From 7c92e4f2a23889a8f90e577c705a11dacd449ccf Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 24 Apr 2026 21:14:15 +0000 Subject: [PATCH 13/26] chore: update tests, ml, fix lint, and update hooks --- .pre-commit-config.yaml | 5 +- packages/bigframes/bigframes/display/html.py | 4 +- packages/bigframes/bigframes/ml/llm.py | 4 +- .../tests/system/large/bigquery/test_obj.py | 41 ------ .../tests/system/small/bigquery/test_ai.py | 4 +- .../system/small/pandas/test_describe.py | 1 + .../tests/system/small/test_dataframe.py | 1 + .../tests/system/small/test_dataframe_io.py | 1 + .../bigframes/tests/unit/bigquery/test_obj.py | 125 ------------------ .../sqlglot/expressions/test_blob_ops.py | 40 ------ 10 files changed, 11 insertions(+), 215 deletions(-) delete mode 100644 packages/bigframes/tests/system/large/bigquery/test_obj.py delete mode 100644 packages/bigframes/tests/unit/bigquery/test_obj.py delete mode 100644 packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5405cc8ff1f3..f0f656999078 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,10 +22,11 @@ repos: - id: end-of-file-fixer - id: check-yaml - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 24.3.0 hooks: - id: black - repo: https://github.com/pycqa/flake8 - rev: 3.9.2 + rev: 7.0.0 hooks: - id: flake8 + args: ["--ignore=E501", "--ignore=W503"] diff --git a/packages/bigframes/bigframes/display/html.py b/packages/bigframes/bigframes/display/html.py index 7dfb34693cd5..f067a6e11f1e 100644 --- a/packages/bigframes/bigframes/display/html.py +++ b/packages/bigframes/bigframes/display/html.py @@ -306,9 +306,7 @@ def repr_mimebundle_head( obj._set_internal_query_job(query_job) column_count = len(pandas_df.columns) - html_string = create_html_representation( - obj, pandas_df, row_count, column_count - ) + html_string = create_html_representation(obj, pandas_df, row_count, column_count) is_series, has_index = _get_obj_metadata(obj) text_representation = plaintext.create_text_representation( diff --git a/packages/bigframes/bigframes/ml/llm.py b/packages/bigframes/bigframes/ml/llm.py index d9e228c90c9f..3887453a2239 100644 --- a/packages/bigframes/bigframes/ml/llm.py +++ b/packages/bigframes/bigframes/ml/llm.py @@ -397,7 +397,7 @@ def predict( # TODO(garrettwu): remove transform to ObjRefRuntime when BQML supports ObjRef as input if X["content"].dtype == dtypes.OBJ_REF_DTYPE: - X["content"] = X["content"]._blob._get_runtime("R", with_metadata=True) + X["content"] = bbq.obj.get_access_url(X["content"], mode="r") options: dict = {} @@ -731,7 +731,7 @@ def predict( isinstance(item, bigframes.series.Series) and item.dtype == dtypes.OBJ_REF_DTYPE ): - item = item._blob._get_runtime("R", with_metadata=True) + item = bbq.obj.get_access_url(item, mode="r") df_prompt[label] = item df_prompt = df_prompt.drop(columns="bigframes_placeholder_col") diff --git a/packages/bigframes/tests/system/large/bigquery/test_obj.py b/packages/bigframes/tests/system/large/bigquery/test_obj.py deleted file mode 100644 index dcca7580b143..000000000000 --- a/packages/bigframes/tests/system/large/bigquery/test_obj.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -import bigframes.bigquery as bbq - - -@pytest.fixture() -def objectrefs(bq_connection): - return bbq.obj.make_ref( - [ - "gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/tick-terminator-for-dogs.png" - ], - bq_connection, - ) - - -def test_obj_fetch_metadata(objectrefs): - metadata = bbq.obj.fetch_metadata(objectrefs) - - result = metadata.to_pandas() - assert len(result) == len(objectrefs) - - -def test_obj_get_access_url(objectrefs): - access = bbq.obj.get_access_url(objectrefs, "r") - - result = access.to_pandas() - assert len(result) == len(objectrefs) diff --git a/packages/bigframes/tests/system/small/bigquery/test_ai.py b/packages/bigframes/tests/system/small/bigquery/test_ai.py index f475f3780775..8563a894a7fa 100644 --- a/packages/bigframes/tests/system/small/bigquery/test_ai.py +++ b/packages/bigframes/tests/system/small/bigquery/test_ai.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import uuid from unittest import mock +import google.cloud.bigquery import pandas as pd import pyarrow as pa import pytest @@ -22,8 +24,6 @@ import bigframes.pandas as bpd from bigframes import dataframe, dtypes, series from bigframes.testing import utils as test_utils -import uuid -import google.cloud.bigquery def _create_mock_obj_ref_df(session, uris, name="image"): diff --git a/packages/bigframes/tests/system/small/pandas/test_describe.py b/packages/bigframes/tests/system/small/pandas/test_describe.py index 0db25ecd60c9..beb7a1968fc2 100644 --- a/packages/bigframes/tests/system/small/pandas/test_describe.py +++ b/packages/bigframes/tests/system/small/pandas/test_describe.py @@ -359,6 +359,7 @@ def test_series_groupby_describe(scalars_dfs): def test_describe_json_and_obj_ref_returns_count(session): # Test describe() works on JSON and OBJ_REF types (without nunique, which fails) import uuid + import google.cloud.bigquery sql = """ diff --git a/packages/bigframes/tests/system/small/test_dataframe.py b/packages/bigframes/tests/system/small/test_dataframe.py index 8555c2a84897..a109c33ffa66 100644 --- a/packages/bigframes/tests/system/small/test_dataframe.py +++ b/packages/bigframes/tests/system/small/test_dataframe.py @@ -5944,6 +5944,7 @@ def test_to_gbq_table_labels(scalars_df_index): def test_to_gbq_obj_ref_persists(session): # Test that saving and loading an Object Reference retains its dtype import uuid + import google.cloud.bigquery sql = """ diff --git a/packages/bigframes/tests/system/small/test_dataframe_io.py b/packages/bigframes/tests/system/small/test_dataframe_io.py index 1771b6485a25..ef21e929afa3 100644 --- a/packages/bigframes/tests/system/small/test_dataframe_io.py +++ b/packages/bigframes/tests/system/small/test_dataframe_io.py @@ -1011,6 +1011,7 @@ def test_to_gbq_timedelta_tag_ignored_when_appending(bigquery_client, dataset_id def test_to_gbq_obj_ref(session, dataset_id: str, bigquery_client): import uuid + import google.cloud.bigquery destination_table = f"{dataset_id}.test_to_gbq_obj_ref" diff --git a/packages/bigframes/tests/unit/bigquery/test_obj.py b/packages/bigframes/tests/unit/bigquery/test_obj.py deleted file mode 100644 index 9eac234b8bc3..000000000000 --- a/packages/bigframes/tests/unit/bigquery/test_obj.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import datetime -from unittest import mock - -import bigframes.bigquery.obj as obj -import bigframes.operations as ops -import bigframes.series - - -def create_mock_series(): - result = mock.create_autospec(bigframes.series.Series, instance=True) - result.copy.return_value = result - return result - - -def test_fetch_metadata_op_structure(): - op = ops.obj_fetch_metadata_op - assert op.name == "obj_fetch_metadata" - - -def test_get_access_url_op_structure(): - op = ops.ObjGetAccessUrl(mode="r") - assert op.name == "obj_get_access_url" - assert op.mode == "r" - assert op.duration is None - - -def test_get_access_url_with_duration_op_structure(): - op = ops.ObjGetAccessUrl(mode="rw", duration=3600000000) - assert op.name == "obj_get_access_url" - assert op.mode == "rw" - assert op.duration == 3600000000 - - -def test_make_ref_op_structure(): - op = ops.obj_make_ref_op - assert op.name == "obj_make_ref" - - -def test_make_ref_json_op_structure(): - op = ops.obj_make_ref_json_op - assert op.name == "obj_make_ref_json" - - -def test_fetch_metadata_calls_apply_unary_op(): - series = create_mock_series() - - obj.fetch_metadata(series) - - series._apply_unary_op.assert_called_once() - args, _ = series._apply_unary_op.call_args - assert args[0] == ops.obj_fetch_metadata_op - - -def test_get_access_url_calls_apply_unary_op_without_duration(): - series = create_mock_series() - - obj.get_access_url(series, mode="r") - - series._apply_unary_op.assert_called_once() - args, _ = series._apply_unary_op.call_args - assert isinstance(args[0], ops.ObjGetAccessUrl) - assert args[0].mode == "r" - assert args[0].duration is None - - -def test_get_access_url_calls_apply_unary_op_with_duration(): - series = create_mock_series() - duration = datetime.timedelta(hours=1) - - obj.get_access_url(series, mode="rw", duration=duration) - - series._apply_unary_op.assert_called_once() - args, _ = series._apply_unary_op.call_args - assert isinstance(args[0], ops.ObjGetAccessUrl) - assert args[0].mode == "rw" - # 1 hour = 3600 seconds = 3600 * 1000 * 1000 microseconds - assert args[0].duration == 3600000000 - - -def test_make_ref_calls_apply_binary_op_with_authorizer(): - uri = create_mock_series() - auth = create_mock_series() - - obj.make_ref(uri, authorizer=auth) - - uri._apply_binary_op.assert_called_once() - args, _ = uri._apply_binary_op.call_args - assert args[0] == auth - assert args[1] == ops.obj_make_ref_op - - -def test_make_ref_calls_apply_binary_op_with_authorizer_string(): - uri = create_mock_series() - auth = "us.bigframes-test-connection" - - obj.make_ref(uri, authorizer=auth) - - uri._apply_binary_op.assert_called_once() - args, _ = uri._apply_binary_op.call_args - assert args[0] == auth - assert args[1] == ops.obj_make_ref_op - - -def test_make_ref_calls_apply_unary_op_without_authorizer(): - json_val = create_mock_series() - - obj.make_ref(json_val) - - json_val._apply_unary_op.assert_called_once() - args, _ = json_val._apply_unary_op.call_args - assert args[0] == ops.obj_make_ref_json_op diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py b/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py deleted file mode 100644 index 33e656fd83b0..000000000000 --- a/packages/bigframes/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -import bigframes.bigquery.obj as obj -import bigframes.pandas as bpd -from bigframes import operations as ops -from bigframes.testing import utils - -pytest.importorskip("pytest_snapshot") - - -def test_blob_get_access_url(scalar_types_df: bpd.DataFrame, snapshot): - ref_s = obj.make_ref(scalar_types_df["string_col"], authorizer="my-connection") - url_s = obj.get_access_url(ref_s, mode="r") - sql = url_s.to_frame().sql - snapshot.assert_match(sql, "out.sql") - - -def test_blob_get_access_url_with_duration(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = utils._apply_ops_to_sql( - bf_df, - [ops.ObjGetAccessUrl(mode="READ", duration=3600).as_expr(col_name)], - [col_name], - ) - snapshot.assert_match(sql, "out.sql") From 0ce4ea7cae285f757df7a0a74f579a6bb1e50cb3 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 24 Apr 2026 21:26:54 +0000 Subject: [PATCH 14/26] revert: restore .pre-commit-config.yaml to original state --- .pre-commit-config.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f0f656999078..5405cc8ff1f3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,11 +22,10 @@ repos: - id: end-of-file-fixer - id: check-yaml - repo: https://github.com/psf/black - rev: 24.3.0 + rev: 22.3.0 hooks: - id: black - repo: https://github.com/pycqa/flake8 - rev: 7.0.0 + rev: 3.9.2 hooks: - id: flake8 - args: ["--ignore=E501", "--ignore=W503"] From 1ba46c0c794f2ad2b3d000604ed52c0211488353 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Mon, 27 Apr 2026 20:34:45 +0000 Subject: [PATCH 15/26] feat: add _from_glob_path and update tests --- packages/bigframes/bigframes/session/__init__.py | 10 ++++++++++ .../bigframes/tests/system/large/operations/test_ai.py | 4 ++-- .../tests/system/large/operations/test_semantics.py | 4 ++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index af51bdc3680b..339ab165117e 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -2309,6 +2309,16 @@ def _start_query_ml_ddl( ) return iterator, query_job + def _from_glob_path( + self, path: str, *, connection: Optional[str] = None, name: Optional[str] = None + ) -> dataframe.DataFrame: + """Create a BigFrames DataFrame that contains a BigFrames ObjectRef column from a global wildcard path.""" + import bigframes.bigquery as bq + connection = self._create_bq_connection(connection=connection) + table = self._create_object_table(path, connection) + s = bq.obj.make_ref(self._loader.read_gbq_table(table)["uri"], authorizer=connection) + return s.rename(name).to_frame() + def _create_object_table(self, path: str, connection: str) -> str: """Create a random id Object Table from the input path and connection.""" table = str(self._anon_dataset_manager.generate_unique_resource_id()) diff --git a/packages/bigframes/tests/system/large/operations/test_ai.py b/packages/bigframes/tests/system/large/operations/test_ai.py index 57aa1966cbef..9c644e9c6b8a 100644 --- a/packages/bigframes/tests/system/large/operations/test_ai.py +++ b/packages/bigframes/tests/system/large/operations/test_ai.py @@ -64,7 +64,7 @@ def test_filter_multi_model(session, gemini_flash_model): THRESHOLD_OPTION, 10, ): - df = session.from_glob_path( + df = session._from_glob_path( "gs://bigframes-dev-testing/a_multimodel/images/*", name="image" ) df["prey"] = series.Series( @@ -238,7 +238,7 @@ def test_map_multimodel(session, gemini_flash_model): THRESHOLD_OPTION, 10, ): - df = session.from_glob_path( + df = session._from_glob_path( "gs://bigframes-dev-testing/a_multimodel/images/*", name="image" ) df["scenario"] = series.Series( diff --git a/packages/bigframes/tests/system/large/operations/test_semantics.py b/packages/bigframes/tests/system/large/operations/test_semantics.py index 56c90212e99c..983328f11b01 100644 --- a/packages/bigframes/tests/system/large/operations/test_semantics.py +++ b/packages/bigframes/tests/system/large/operations/test_semantics.py @@ -411,7 +411,7 @@ def test_filter_multi_model(session, gemini_flash_model): THRESHOLD_OPTION, 10, ): - df = session.from_glob_path( + df = session._from_glob_path( "gs://bigframes-dev-testing/a_multimodel/images/*", name="image" ) df["prey"] = series.Series( @@ -580,7 +580,7 @@ def test_map_multimodel(session, gemini_flash_model): THRESHOLD_OPTION, 10, ): - df = session.from_glob_path( + df = session._from_glob_path( "gs://bigframes-dev-testing/a_multimodel/images/*", name="image" ) df["scenario"] = series.Series( From 0ad748256fcd4a49d8a85ecb49f6c719137e5b7e Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 28 Apr 2026 22:36:41 +0000 Subject: [PATCH 16/26] revert change --- .../bigframes/bigframes/pandas/__init__.py | 6 + packages/bigframes/bigframes/pandas/io/api.py | 28 ++++ .../bigframes/bigframes/session/__init__.py | 62 ++++++++ .../tests/system/small/blob/test_io.py | 132 ++++++++++++++++++ .../system/small/blob/test_properties.py | 120 ++++++++++++++++ .../tests/system/small/blob/test_urls.py | 34 +++++ .../bigframes/tests/unit/bigquery/test_obj.py | 125 +++++++++++++++++ 7 files changed, 507 insertions(+) create mode 100644 packages/bigframes/tests/system/small/blob/test_io.py create mode 100644 packages/bigframes/tests/system/small/blob/test_properties.py create mode 100644 packages/bigframes/tests/system/small/blob/test_urls.py create mode 100644 packages/bigframes/tests/unit/bigquery/test_obj.py diff --git a/packages/bigframes/bigframes/pandas/__init__.py b/packages/bigframes/bigframes/pandas/__init__.py index 11938a887785..58833284ae96 100644 --- a/packages/bigframes/bigframes/pandas/__init__.py +++ b/packages/bigframes/bigframes/pandas/__init__.py @@ -99,12 +99,14 @@ from bigframes.pandas.core.api import to_timedelta from bigframes.pandas.io.api import ( _read_gbq_colab, + from_glob_path, read_arrow, read_avro, read_csv, read_gbq, read_gbq_function, read_gbq_model, + read_gbq_object_table, read_gbq_query, read_gbq_table, read_json, @@ -453,6 +455,7 @@ def reset_session(): _read_gbq_colab, read_gbq_function, read_gbq_model, + read_gbq_object_table, read_gbq_query, read_gbq_table, read_json, @@ -463,6 +466,7 @@ def reset_session(): remote_function, to_datetime, to_timedelta, + from_glob_path, ] # Use __all__ to let type checkers know what is part of the public API. @@ -488,6 +492,7 @@ def reset_session(): "_read_gbq_colab", "read_gbq_function", "read_gbq_model", + "read_gbq_object_table", "read_gbq_query", "read_gbq_table", "read_json", @@ -498,6 +503,7 @@ def reset_session(): "remote_function", "to_datetime", "to_timedelta", + "from_glob_path", # Other names "api", # pandas dtype attributes diff --git a/packages/bigframes/bigframes/pandas/io/api.py b/packages/bigframes/bigframes/pandas/io/api.py index e2737fdbbd1a..b7ed1a65d922 100644 --- a/packages/bigframes/bigframes/pandas/io/api.py +++ b/packages/bigframes/bigframes/pandas/io/api.py @@ -394,6 +394,21 @@ def read_gbq_model(model_name: str): read_gbq_model.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_model) +def read_gbq_object_table( + object_table: str, *, name: Optional[str] = None +) -> bigframes.dataframe.DataFrame: + return global_session.with_default_session( + bigframes.session.Session.read_gbq_object_table, + object_table, + name=name, + ) + + +read_gbq_object_table.__doc__ = inspect.getdoc( + bigframes.session.Session.read_gbq_object_table +) + + @overload def read_gbq_query( # type: ignore[overload-overlap] query: str, @@ -620,6 +635,19 @@ def read_gbq_function( read_gbq_function.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_function) +def from_glob_path( + path: str, *, connection: Optional[str] = None, name: Optional[str] = None +) -> bigframes.dataframe.DataFrame: + return global_session.with_default_session( + bigframes.session.Session.from_glob_path, + path=path, + connection=connection, + name=name, + ) + + +from_glob_path.__doc__ = inspect.getdoc(bigframes.session.Session.from_glob_path) + _default_location_lock = threading.Lock() diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index 339ab165117e..33745b6438ed 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -2349,6 +2349,68 @@ def _create_object_table(self, path: str, connection: str) -> str: return table + def from_glob_path( + self, path: str, *, connection: Optional[str] = None, name: Optional[str] = None + ) -> dataframe.DataFrame: + r"""Create a BigFrames DataFrame that contains a BigFrames `ObjectRef column `_ from a global wildcard path. + This operation creates a temporary BQ Object Table under the hood and requires bigquery.connections.delegate permission or BigQuery Connection Admin role. + If you have an existing BQ Object Table, use read_gbq_object_table(). + + .. note:: + BigFrames ObjectRef is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the + Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" + and might have limited support. For more information, see the launch stage descriptions + (https://cloud.google.com/products#product-launch-stages). + + Args: + path (str): + The wildcard global path, such as "gs:////\*". + connection (str or None, default None): + Connection to connect with remote service. str of the format ... + If None, use default connection in session context. BigQuery DataFrame will try to create the connection and attach + permission if the connection isn't fully set up. + name (str): + The column name of the ObjectRef column. + Returns: + bigframes.pandas.DataFrame: + Result BigFrames DataFrame. + """ + import bigframes.bigquery as bq + connection = self._create_bq_connection(connection=connection) + + table = self._create_object_table(path, connection) + + s = bq.obj.make_ref(self._loader.read_gbq_table(table)["uri"], authorizer=connection) + return s.rename(name).to_frame() + + def read_gbq_object_table( + self, object_table: str, *, name: Optional[str] = None + ) -> dataframe.DataFrame: + """Read an existing object table to create a BigFrames `ObjectRef `_ DataFrame. Use the connection of the object table for the connection of the ObjectRef. + This function dosen't retrieve the object table data. If you want to read the data, use read_gbq() instead. + + .. note:: + BigFrames ObjectRef is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the + Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" + and might have limited support. For more information, see the launch stage descriptions + (https://cloud.google.com/products#product-launch-stages). + + Args: + object_table (str): name of the object table of form ... + name (str or None): the returned ObjectRef column name. + + Returns: + bigframes.pandas.DataFrame: + Result BigFrames DataFrame. + """ + import bigframes.bigquery as bq + # TODO(garrettwu): switch to pseudocolumn when b/374988109 is done. + table = self.bqclient.get_table(object_table) + connection = table._properties["externalDataConfiguration"]["connectionId"] + + s = bq.obj.make_ref(self._loader.read_gbq_table(object_table)["uri"], authorizer=connection) + return s.rename(name).to_frame() + def _create_temp_view(self, sql: str) -> bigquery.TableReference: """Create a random id view from the sql string.""" return self._anon_dataset_manager.create_temp_view(sql) diff --git a/packages/bigframes/tests/system/small/blob/test_io.py b/packages/bigframes/tests/system/small/blob/test_io.py new file mode 100644 index 000000000000..c89fb4c6e6ed --- /dev/null +++ b/packages/bigframes/tests/system/small/blob/test_io.py @@ -0,0 +1,132 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +import pandas as pd +import pytest + +import bigframes +import bigframes.pandas as bpd + +pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) + + +idisplay = pytest.importorskip("IPython.display") + + +def test_blob_create_from_uri_str( + bq_connection: str, session: bigframes.Session, images_uris +): + uri_series = bpd.Series(images_uris, session=session) + blob_series = uri_series.str.to_blob(connection=bq_connection) + + pd_blob_df = blob_series.struct.explode().to_pandas() + expected_pd_df = pd.DataFrame( + { + "uri": images_uris, + "version": [None, None], + "authorizer": [bq_connection.casefold(), bq_connection.casefold()], + "details": [None, None], + } + ) + + pd.testing.assert_frame_equal( + pd_blob_df, expected_pd_df, check_dtype=False, check_index_type=False + ) + + +def test_blob_create_from_glob_path( + bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris +): + blob_df = session.from_glob_path( + images_gcs_path, connection=bq_connection, name="blob_col" + ) + pd_blob_df = ( + blob_df["blob_col"] + .struct.explode() + .to_pandas() + .sort_values("uri") + .reset_index(drop=True) + ) + + expected_df = pd.DataFrame( + { + "uri": images_uris, + "version": [None, None], + "authorizer": [bq_connection.casefold(), bq_connection.casefold()], + "details": [None, None], + } + ) + + pd.testing.assert_frame_equal( + pd_blob_df, expected_df, check_dtype=False, check_index_type=False + ) + + +def test_blob_create_read_gbq_object_table( + bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris +): + obj_table = session._create_object_table(images_gcs_path, bq_connection) + + blob_df = session.read_gbq_object_table(obj_table, name="blob_col") + pd_blob_df = ( + blob_df["blob_col"] + .struct.explode() + .to_pandas() + .sort_values("uri") + .reset_index(drop=True) + ) + expected_df = pd.DataFrame( + { + "uri": images_uris, + "version": [None, None], + "authorizer": [bq_connection.casefold(), bq_connection.casefold()], + "details": [None, None], + } + ) + + pd.testing.assert_frame_equal( + pd_blob_df, expected_df, check_dtype=False, check_index_type=False + ) + + +def test_display_images(monkeypatch, images_mm_df: bpd.DataFrame): + mock_display = mock.Mock() + monkeypatch.setattr(idisplay, "display", mock_display) + + images_mm_df["blob_col"].blob.display() + + for call in mock_display.call_args_list: + args, _ = call + arg = args[0] + assert isinstance(arg, idisplay.Image) + + +def test_display_nulls( + monkeypatch, + bq_connection: str, + session: bigframes.Session, +): + uri_series = bpd.Series([None, None, None], dtype="string", session=session) + blob_series = uri_series.str.to_blob(connection=bq_connection) + mock_display = mock.Mock() + monkeypatch.setattr(idisplay, "display", mock_display) + + blob_series.blob.display() + + for call in mock_display.call_args_list: + args, _ = call + arg = args[0] + assert arg == "" diff --git a/packages/bigframes/tests/system/small/blob/test_properties.py b/packages/bigframes/tests/system/small/blob/test_properties.py new file mode 100644 index 000000000000..ec18f05462d3 --- /dev/null +++ b/packages/bigframes/tests/system/small/blob/test_properties.py @@ -0,0 +1,120 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pandas as pd +import pytest + +import bigframes.dtypes as dtypes +import bigframes.pandas as bpd +import bigframes.bigquery as bbq + +pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) + + +def test_blob_uri(images_uris: list[str], images_mm_df: bpd.DataFrame): + actual = images_mm_df["blob_col"].struct.field("uri").to_pandas() + expected = pd.Series(images_uris, name="uri") + + pd.testing.assert_series_equal( + actual, expected, check_dtype=False, check_index_type=False + ) + + +def test_blob_authorizer(images_mm_df: bpd.DataFrame, bq_connection: str): + actual = images_mm_df["blob_col"].struct.field("authorizer").to_pandas() + expected = pd.Series( + [bq_connection.casefold(), bq_connection.casefold()], name="authorizer" + ) + + pd.testing.assert_series_equal( + actual, expected, check_dtype=False, check_index_type=False + ) + + +def test_blob_version(images_mm_df: bpd.DataFrame): + actual = bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.version").to_pandas() + expected = pd.Series(["1753907851152593", "1753907851111538"], name="version") + + pd.testing.assert_series_equal( + actual, expected, check_dtype=False, check_index_type=False + ) + + +def test_blob_metadata(images_mm_df: bpd.DataFrame): + actual = images_mm_df["blob_col"].struct.field("details").to_pandas() + expected = pd.Series( + [ + ( + '{"content_type":"image/jpeg",' + '"md5_hash":"e130ad042261a1883cd2cc06831cf748",' + '"size":338390,' + '"updated":1753907851000000}' + ), + ( + '{"content_type":"image/jpeg",' + '"md5_hash":"e2ae3191ff2b809fd0935f01a537c650",' + '"size":43333,' + '"updated":1753907851000000}' + ), + ], + name="metadata", + dtype=dtypes.JSON_DTYPE, + ) + expected.index = expected.index.astype(dtypes.INT_DTYPE) + pd.testing.assert_series_equal(actual, expected) + + +def test_blob_content_type(images_mm_df: bpd.DataFrame): + actual = bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.content_type").to_pandas() + expected = pd.Series(["image/jpeg", "image/jpeg"], name="content_type") + + pd.testing.assert_series_equal( + actual, expected, check_dtype=False, check_index_type=False + ) + + +def test_blob_md5_hash(images_mm_df: bpd.DataFrame): + actual = bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.md5_hash").to_pandas() + expected = pd.Series( + ["e130ad042261a1883cd2cc06831cf748", "e2ae3191ff2b809fd0935f01a537c650"], + name="md5_hash", + ) + + pd.testing.assert_series_equal( + actual, expected, check_dtype=False, check_index_type=False + ) + + +def test_blob_size(images_mm_df: bpd.DataFrame): + actual = bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.size").astype("Int64").to_pandas() + expected = pd.Series([338390, 43333], name="size") + + pd.testing.assert_series_equal( + actual, expected, check_dtype=False, check_index_type=False + ) + + +def test_blob_updated(images_mm_df: bpd.DataFrame): + actual = bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.updated").to_pandas() + expected = pd.Series( + [ + pd.Timestamp("2025-07-30 20:37:31", tz="UTC"), + pd.Timestamp("2025-07-30 20:37:31", tz="UTC"), + ], + name="updated", + ) + + pd.testing.assert_series_equal( + actual, expected, check_dtype=False, check_index_type=False + ) diff --git a/packages/bigframes/tests/system/small/blob/test_urls.py b/packages/bigframes/tests/system/small/blob/test_urls.py new file mode 100644 index 000000000000..0a2db23db20d --- /dev/null +++ b/packages/bigframes/tests/system/small/blob/test_urls.py @@ -0,0 +1,34 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +import bigframes.pandas as bpd +import bigframes.bigquery as bbq + +pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) + + +def test_blob_read_url(images_mm_df: bpd.DataFrame): + access_json = bbq.obj.get_access_url(images_mm_df["blob_col"], mode="r") + urls = bbq.json_value(access_json, "$.access_urls.read_url") + + assert urls.str.startswith("https://storage.googleapis.com/").all() + + +def test_blob_write_url(images_mm_df: bpd.DataFrame): + access_json = bbq.obj.get_access_url(images_mm_df["blob_col"], mode="rw") + urls = bbq.json_value(access_json, "$.access_urls.write_url") + + assert urls.str.startswith("https://storage.googleapis.com/").all() diff --git a/packages/bigframes/tests/unit/bigquery/test_obj.py b/packages/bigframes/tests/unit/bigquery/test_obj.py new file mode 100644 index 000000000000..9eac234b8bc3 --- /dev/null +++ b/packages/bigframes/tests/unit/bigquery/test_obj.py @@ -0,0 +1,125 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +from unittest import mock + +import bigframes.bigquery.obj as obj +import bigframes.operations as ops +import bigframes.series + + +def create_mock_series(): + result = mock.create_autospec(bigframes.series.Series, instance=True) + result.copy.return_value = result + return result + + +def test_fetch_metadata_op_structure(): + op = ops.obj_fetch_metadata_op + assert op.name == "obj_fetch_metadata" + + +def test_get_access_url_op_structure(): + op = ops.ObjGetAccessUrl(mode="r") + assert op.name == "obj_get_access_url" + assert op.mode == "r" + assert op.duration is None + + +def test_get_access_url_with_duration_op_structure(): + op = ops.ObjGetAccessUrl(mode="rw", duration=3600000000) + assert op.name == "obj_get_access_url" + assert op.mode == "rw" + assert op.duration == 3600000000 + + +def test_make_ref_op_structure(): + op = ops.obj_make_ref_op + assert op.name == "obj_make_ref" + + +def test_make_ref_json_op_structure(): + op = ops.obj_make_ref_json_op + assert op.name == "obj_make_ref_json" + + +def test_fetch_metadata_calls_apply_unary_op(): + series = create_mock_series() + + obj.fetch_metadata(series) + + series._apply_unary_op.assert_called_once() + args, _ = series._apply_unary_op.call_args + assert args[0] == ops.obj_fetch_metadata_op + + +def test_get_access_url_calls_apply_unary_op_without_duration(): + series = create_mock_series() + + obj.get_access_url(series, mode="r") + + series._apply_unary_op.assert_called_once() + args, _ = series._apply_unary_op.call_args + assert isinstance(args[0], ops.ObjGetAccessUrl) + assert args[0].mode == "r" + assert args[0].duration is None + + +def test_get_access_url_calls_apply_unary_op_with_duration(): + series = create_mock_series() + duration = datetime.timedelta(hours=1) + + obj.get_access_url(series, mode="rw", duration=duration) + + series._apply_unary_op.assert_called_once() + args, _ = series._apply_unary_op.call_args + assert isinstance(args[0], ops.ObjGetAccessUrl) + assert args[0].mode == "rw" + # 1 hour = 3600 seconds = 3600 * 1000 * 1000 microseconds + assert args[0].duration == 3600000000 + + +def test_make_ref_calls_apply_binary_op_with_authorizer(): + uri = create_mock_series() + auth = create_mock_series() + + obj.make_ref(uri, authorizer=auth) + + uri._apply_binary_op.assert_called_once() + args, _ = uri._apply_binary_op.call_args + assert args[0] == auth + assert args[1] == ops.obj_make_ref_op + + +def test_make_ref_calls_apply_binary_op_with_authorizer_string(): + uri = create_mock_series() + auth = "us.bigframes-test-connection" + + obj.make_ref(uri, authorizer=auth) + + uri._apply_binary_op.assert_called_once() + args, _ = uri._apply_binary_op.call_args + assert args[0] == auth + assert args[1] == ops.obj_make_ref_op + + +def test_make_ref_calls_apply_unary_op_without_authorizer(): + json_val = create_mock_series() + + obj.make_ref(json_val) + + json_val._apply_unary_op.assert_called_once() + args, _ = json_val._apply_unary_op.call_args + assert args[0] == ops.obj_make_ref_json_op From b84baf04d427253739aab90cd0c64dc81110b206 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 00:02:49 +0000 Subject: [PATCH 17/26] remove apis --- .../bigframes/bigframes/pandas/__init__.py | 6 -- packages/bigframes/bigframes/pandas/io/api.py | 23 ------- .../bigframes/bigframes/session/__init__.py | 60 ------------------- 3 files changed, 89 deletions(-) diff --git a/packages/bigframes/bigframes/pandas/__init__.py b/packages/bigframes/bigframes/pandas/__init__.py index 58833284ae96..11938a887785 100644 --- a/packages/bigframes/bigframes/pandas/__init__.py +++ b/packages/bigframes/bigframes/pandas/__init__.py @@ -99,14 +99,12 @@ from bigframes.pandas.core.api import to_timedelta from bigframes.pandas.io.api import ( _read_gbq_colab, - from_glob_path, read_arrow, read_avro, read_csv, read_gbq, read_gbq_function, read_gbq_model, - read_gbq_object_table, read_gbq_query, read_gbq_table, read_json, @@ -455,7 +453,6 @@ def reset_session(): _read_gbq_colab, read_gbq_function, read_gbq_model, - read_gbq_object_table, read_gbq_query, read_gbq_table, read_json, @@ -466,7 +463,6 @@ def reset_session(): remote_function, to_datetime, to_timedelta, - from_glob_path, ] # Use __all__ to let type checkers know what is part of the public API. @@ -492,7 +488,6 @@ def reset_session(): "_read_gbq_colab", "read_gbq_function", "read_gbq_model", - "read_gbq_object_table", "read_gbq_query", "read_gbq_table", "read_json", @@ -503,7 +498,6 @@ def reset_session(): "remote_function", "to_datetime", "to_timedelta", - "from_glob_path", # Other names "api", # pandas dtype attributes diff --git a/packages/bigframes/bigframes/pandas/io/api.py b/packages/bigframes/bigframes/pandas/io/api.py index b7ed1a65d922..3c7692b39037 100644 --- a/packages/bigframes/bigframes/pandas/io/api.py +++ b/packages/bigframes/bigframes/pandas/io/api.py @@ -394,20 +394,8 @@ def read_gbq_model(model_name: str): read_gbq_model.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_model) -def read_gbq_object_table( - object_table: str, *, name: Optional[str] = None -) -> bigframes.dataframe.DataFrame: - return global_session.with_default_session( - bigframes.session.Session.read_gbq_object_table, - object_table, - name=name, - ) -read_gbq_object_table.__doc__ = inspect.getdoc( - bigframes.session.Session.read_gbq_object_table -) - @overload def read_gbq_query( # type: ignore[overload-overlap] @@ -635,18 +623,7 @@ def read_gbq_function( read_gbq_function.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_function) -def from_glob_path( - path: str, *, connection: Optional[str] = None, name: Optional[str] = None -) -> bigframes.dataframe.DataFrame: - return global_session.with_default_session( - bigframes.session.Session.from_glob_path, - path=path, - connection=connection, - name=name, - ) - -from_glob_path.__doc__ = inspect.getdoc(bigframes.session.Session.from_glob_path) _default_location_lock = threading.Lock() diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index 33745b6438ed..1bac3e8a08a5 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -2349,67 +2349,7 @@ def _create_object_table(self, path: str, connection: str) -> str: return table - def from_glob_path( - self, path: str, *, connection: Optional[str] = None, name: Optional[str] = None - ) -> dataframe.DataFrame: - r"""Create a BigFrames DataFrame that contains a BigFrames `ObjectRef column `_ from a global wildcard path. - This operation creates a temporary BQ Object Table under the hood and requires bigquery.connections.delegate permission or BigQuery Connection Admin role. - If you have an existing BQ Object Table, use read_gbq_object_table(). - - .. note:: - BigFrames ObjectRef is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the - Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" - and might have limited support. For more information, see the launch stage descriptions - (https://cloud.google.com/products#product-launch-stages). - Args: - path (str): - The wildcard global path, such as "gs:////\*". - connection (str or None, default None): - Connection to connect with remote service. str of the format ... - If None, use default connection in session context. BigQuery DataFrame will try to create the connection and attach - permission if the connection isn't fully set up. - name (str): - The column name of the ObjectRef column. - Returns: - bigframes.pandas.DataFrame: - Result BigFrames DataFrame. - """ - import bigframes.bigquery as bq - connection = self._create_bq_connection(connection=connection) - - table = self._create_object_table(path, connection) - - s = bq.obj.make_ref(self._loader.read_gbq_table(table)["uri"], authorizer=connection) - return s.rename(name).to_frame() - - def read_gbq_object_table( - self, object_table: str, *, name: Optional[str] = None - ) -> dataframe.DataFrame: - """Read an existing object table to create a BigFrames `ObjectRef `_ DataFrame. Use the connection of the object table for the connection of the ObjectRef. - This function dosen't retrieve the object table data. If you want to read the data, use read_gbq() instead. - - .. note:: - BigFrames ObjectRef is subject to the "Pre-GA Offerings Terms" in the General Service Terms section of the - Service Specific Terms(https://cloud.google.com/terms/service-terms#1). Pre-GA products and features are available "as is" - and might have limited support. For more information, see the launch stage descriptions - (https://cloud.google.com/products#product-launch-stages). - - Args: - object_table (str): name of the object table of form ... - name (str or None): the returned ObjectRef column name. - - Returns: - bigframes.pandas.DataFrame: - Result BigFrames DataFrame. - """ - import bigframes.bigquery as bq - # TODO(garrettwu): switch to pseudocolumn when b/374988109 is done. - table = self.bqclient.get_table(object_table) - connection = table._properties["externalDataConfiguration"]["connectionId"] - - s = bq.obj.make_ref(self._loader.read_gbq_table(object_table)["uri"], authorizer=connection) - return s.rename(name).to_frame() def _create_temp_view(self, sql: str) -> bigquery.TableReference: """Create a random id view from the sql string.""" From 653a1b41fd9d47dff5ee0324bdd0e2f072a7f461 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 00:05:01 +0000 Subject: [PATCH 18/26] remove tests --- .../tests/system/large/blob/test_function.py | 853 ------------------ .../tests/system/small/blob/test_io.py | 132 --- .../tests/system/small/blob/test_urls.py | 34 - 3 files changed, 1019 deletions(-) delete mode 100644 packages/bigframes/tests/system/large/blob/test_function.py delete mode 100644 packages/bigframes/tests/system/small/blob/test_io.py delete mode 100644 packages/bigframes/tests/system/small/blob/test_urls.py diff --git a/packages/bigframes/tests/system/large/blob/test_function.py b/packages/bigframes/tests/system/large/blob/test_function.py deleted file mode 100644 index e0996db4212a..000000000000 --- a/packages/bigframes/tests/system/large/blob/test_function.py +++ /dev/null @@ -1,853 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import os -import traceback -import uuid -from typing import Generator - -import pandas as pd -import pytest -from google.cloud import storage - -import bigframes -import bigframes.pandas as bpd -from bigframes import dtypes - -pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) - - -@pytest.fixture(scope="function") -def images_output_folder() -> Generator[str, None, None]: - id = uuid.uuid4().hex - folder = os.path.join("gs://bigframes_blob_test/output/", id) - yield folder - - # clean up - try: - cloud_storage_client = storage.Client() - bucket = cloud_storage_client.bucket("bigframes_blob_test") - blobs = bucket.list_blobs(prefix="output/" + id) - for blob in blobs: - blob.delete() - except Exception as exc: - traceback.print_exception(type(exc), exc, None) - - -@pytest.fixture(scope="function") -def images_output_uris(images_output_folder: str) -> list[str]: - return [ - os.path.join(images_output_folder, "img0.jpg"), - os.path.join(images_output_folder, "img1.jpg"), - ] - - -def test_blob_exif( - bq_connection: str, - session: bigframes.Session, -): - exif_image_df = session.from_glob_path( - "gs://bigframes_blob_test/images_exif/*", - name="blob_col", - connection=bq_connection, - ) - - actual = exif_image_df["blob_col"].blob.exif( - engine="pillow", connection=bq_connection, verbose=False - ) - expected = bpd.Series( - ['{"ExifOffset": 47, "Make": "MyCamera"}'], - session=session, - dtype=dtypes.JSON_DTYPE, - ) - pd.testing.assert_series_equal( - actual.to_pandas(), - expected.to_pandas(), - check_dtype=False, - check_index_type=False, - ) - - -def test_blob_exif_verbose( - bq_connection: str, - session: bigframes.Session, -): - exif_image_df = session.from_glob_path( - "gs://bigframes_blob_test/images_exif/*", - name="blob_col", - connection=bq_connection, - ) - - actual = exif_image_df["blob_col"].blob.exif( - engine="pillow", connection=bq_connection, verbose=True - ) - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - assert content_series.dtype == dtypes.JSON_DTYPE - - -def test_blob_image_blur_to_series( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), dst=series, connection=bq_connection, engine="opencv", verbose=False - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_blur_to_series_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), dst=series, connection=bq_connection, engine="opencv", verbose=True - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - # Content should be blob objects for GCS destination - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_blur_to_folder( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_blur_to_folder_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_blur_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), connection=bq_connection, engine="opencv", verbose=False - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - assert actual.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_blur_to_bq_verbose(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.image_blur( - (8, 8), connection=bq_connection, engine="opencv", verbose=True - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - assert content_series.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_resize_to_series( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), - dst=series, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_resize_to_series_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), - dst=series, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_resize_to_folder( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_resize_to_folder_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - # verify the files exist - assert not content_series.blob.size().isna().any() - - -def test_blob_image_resize_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), connection=bq_connection, engine="opencv", verbose=False - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - assert actual.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_resize_to_bq_verbose( - images_mm_df: bpd.DataFrame, bq_connection: str -): - actual = images_mm_df["blob_col"].blob.image_resize( - (200, 300), connection=bq_connection, engine="opencv", verbose=True - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - assert content_series.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_normalize_to_series( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - dst=series, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_normalize_to_series_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_uris: list[str], - session: bigframes.Session, -): - series = bpd.Series(images_output_uris, session=session).str.to_blob( - connection=bq_connection - ) - - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - dst=series, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - -def test_blob_image_normalize_to_folder( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - expected_df = pd.DataFrame( - { - "uri": images_output_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - pd.testing.assert_frame_equal( - actual.struct.explode().to_pandas(), - expected_df, - check_dtype=False, - check_index_type=False, - ) - - # verify the files exist - assert not actual.blob.size().isna().any() - - -def test_blob_image_normalize_to_folder_verbose( - images_mm_df: bpd.DataFrame, - bq_connection: str, - images_output_folder: str, - images_output_uris: list[str], -): - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - dst=images_output_folder, - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - # Content should be blob objects for GCS destination - assert hasattr(content_series, "blob") - - -def test_blob_image_normalize_to_bq(images_mm_df: bpd.DataFrame, bq_connection: str): - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - connection=bq_connection, - engine="opencv", - verbose=False, - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - assert actual.dtype == dtypes.BYTES_DTYPE - - -def test_blob_image_normalize_to_bq_verbose( - images_mm_df: bpd.DataFrame, bq_connection: str -): - actual = images_mm_df["blob_col"].blob.image_normalize( - alpha=50.0, - beta=150.0, - norm_type="minmax", - connection=bq_connection, - engine="opencv", - verbose=True, - ) - - assert isinstance(actual, bpd.Series) - assert len(actual) == 2 - - assert hasattr(actual, "struct") - actual_exploded = actual.struct.explode() - assert "status" in actual_exploded.columns - assert "content" in actual_exploded.columns - - status_series = actual_exploded["status"] - assert status_series.dtype == dtypes.STRING_DTYPE - - content_series = actual_exploded["content"] - assert content_series.dtype == dtypes.BYTES_DTYPE - - -def test_blob_pdf_extract( - pdf_mm_df: bpd.DataFrame, - bq_connection: str, -): - actual = ( - pdf_mm_df["pdf"] - .blob.pdf_extract(connection=bq_connection, verbose=False, engine="pypdf") - .explode() - .to_pandas() - ) - - # check relative length - expected_text = "Sample PDF This is a testing file. Some dummy messages are used for testing purposes." - expected_len = len(expected_text) - - actual_text = actual[actual != ""].iloc[0] - actual_len = len(actual_text) - - relative_length_tolerance = 0.25 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=False): Extracted text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["Sample", "PDF", "testing", "dummy", "messages"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=False): Expected keyword '{keyword}' not found in extracted text. " - ) - - -def test_blob_pdf_extract_verbose( - pdf_mm_df: bpd.DataFrame, - bq_connection: str, -): - actual = ( - pdf_mm_df["pdf"] - .blob.pdf_extract(connection=bq_connection, verbose=True, engine="pypdf") - .explode() - .to_pandas() - ) - - # check relative length - expected_text = "Sample PDF This is a testing file. Some dummy messages are used for testing purposes." - expected_len = len(expected_text) - - # The first entry is for a file that doesn't exist, so we check the second one - successful_results = actual[actual.apply(lambda x: x["status"] == "")] - actual_text = successful_results.apply(lambda x: x["content"]).iloc[0] - actual_len = len(actual_text) - - relative_length_tolerance = 0.25 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=True): Extracted text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["Sample", "PDF", "testing", "dummy", "messages"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=True): Expected keyword '{keyword}' not found in extracted text. " - ) - - -def test_blob_pdf_chunk(pdf_mm_df: bpd.DataFrame, bq_connection: str): - actual = ( - pdf_mm_df["pdf"] - .blob.pdf_chunk( - connection=bq_connection, - chunk_size=50, - overlap_size=10, - verbose=False, - engine="pypdf", - ) - .explode() - .to_pandas() - ) - - # check relative length - expected_text = "Sample PDF This is a testing file. Some dummy messages are used for testing purposes." - expected_len = len(expected_text) - - # First entry is NA - actual_text = "".join(actual.dropna()) - actual_len = len(actual_text) - - relative_length_tolerance = 0.25 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=False): Extracted text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["Sample", "PDF", "testing", "dummy", "messages"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=False): Expected keyword '{keyword}' not found in extracted text. " - ) - - -def test_blob_pdf_chunk_verbose(pdf_mm_df: bpd.DataFrame, bq_connection: str): - actual = ( - pdf_mm_df["pdf"] - .blob.pdf_chunk( - connection=bq_connection, - chunk_size=50, - overlap_size=10, - verbose=True, - engine="pypdf", - ) - .explode() - .to_pandas() - ) - - # check relative length - expected_text = "Sample PDF This is a testing file. Some dummy messages are used for testing purposes." - expected_len = len(expected_text) - - # The first entry is for a file that doesn't exist, so we check the second one - successful_results = actual[actual.apply(lambda x: x["status"] == "")] - actual_text = "".join(successful_results.apply(lambda x: x["content"]).iloc[0]) - actual_len = len(actual_text) - - relative_length_tolerance = 0.25 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=True): Extracted text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["Sample", "PDF", "testing", "dummy", "messages"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=True): Expected keyword '{keyword}' not found in extracted text. " - ) - - -@pytest.mark.parametrize( - "model_name", - [ - "gemini-2.5-flash", - "gemini-2.5-flash-lite", - ], -) -def test_blob_transcribe( - audio_mm_df: bpd.DataFrame, - model_name: str, -): - actual = ( - audio_mm_df["audio"] - .blob.audio_transcribe( - model_name=model_name, # type: ignore - verbose=False, - ) - .to_pandas() - ) - - # check relative length - expected_text = "Now, as all books not primarily intended as picture-books consist principally of types composed to form letterpress" - expected_len = len(expected_text) - - actual_text = actual[0] - - if pd.isna(actual_text) or actual_text == "": - # Ensure the tests are robust to flakes in the model, which isn't - # particularly useful information for the bigframes team. - logging.warning(f"blob_transcribe() model {model_name} verbose=False failure") - return - - actual_len = len(actual_text) - - relative_length_tolerance = 0.2 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=False): Transcribed text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["book", "picture"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=False): Expected keyword '{keyword}' not found in transcribed text. " - ) - - -@pytest.mark.parametrize( - "model_name", - [ - "gemini-2.5-flash", - "gemini-2.5-flash-lite", - ], -) -def test_blob_transcribe_verbose( - audio_mm_df: bpd.DataFrame, - model_name: str, -): - actual = ( - audio_mm_df["audio"] - .blob.audio_transcribe( - model_name=model_name, # type: ignore - verbose=True, - ) - .to_pandas() - ) - - # check relative length - expected_text = "Now, as all books not primarily intended as picture-books consist principally of types composed to form letterpress" - expected_len = len(expected_text) - - actual_text = actual[0]["content"] - - if pd.isna(actual_text) or actual_text == "": - # Ensure the tests are robust to flakes in the model, which isn't - # particularly useful information for the bigframes team. - logging.warning(f"blob_transcribe() model {model_name} verbose=True failure") - return - - actual_len = len(actual_text) - - relative_length_tolerance = 0.2 - min_acceptable_len = expected_len * (1 - relative_length_tolerance) - max_acceptable_len = expected_len * (1 + relative_length_tolerance) - assert min_acceptable_len <= actual_len <= max_acceptable_len, ( - f"Item (verbose=True): Transcribed text length {actual_len} is outside the acceptable range " - f"[{min_acceptable_len:.0f}, {max_acceptable_len:.0f}]. " - f"Expected reference length was {expected_len}. " - ) - - # check for major keywords - major_keywords = ["book", "picture"] - for keyword in major_keywords: - assert keyword.lower() in actual_text.lower(), ( - f"Item (verbose=True): Expected keyword '{keyword}' not found in transcribed text. " - ) diff --git a/packages/bigframes/tests/system/small/blob/test_io.py b/packages/bigframes/tests/system/small/blob/test_io.py deleted file mode 100644 index c89fb4c6e6ed..000000000000 --- a/packages/bigframes/tests/system/small/blob/test_io.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from unittest import mock - -import pandas as pd -import pytest - -import bigframes -import bigframes.pandas as bpd - -pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) - - -idisplay = pytest.importorskip("IPython.display") - - -def test_blob_create_from_uri_str( - bq_connection: str, session: bigframes.Session, images_uris -): - uri_series = bpd.Series(images_uris, session=session) - blob_series = uri_series.str.to_blob(connection=bq_connection) - - pd_blob_df = blob_series.struct.explode().to_pandas() - expected_pd_df = pd.DataFrame( - { - "uri": images_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - - pd.testing.assert_frame_equal( - pd_blob_df, expected_pd_df, check_dtype=False, check_index_type=False - ) - - -def test_blob_create_from_glob_path( - bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris -): - blob_df = session.from_glob_path( - images_gcs_path, connection=bq_connection, name="blob_col" - ) - pd_blob_df = ( - blob_df["blob_col"] - .struct.explode() - .to_pandas() - .sort_values("uri") - .reset_index(drop=True) - ) - - expected_df = pd.DataFrame( - { - "uri": images_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - - pd.testing.assert_frame_equal( - pd_blob_df, expected_df, check_dtype=False, check_index_type=False - ) - - -def test_blob_create_read_gbq_object_table( - bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris -): - obj_table = session._create_object_table(images_gcs_path, bq_connection) - - blob_df = session.read_gbq_object_table(obj_table, name="blob_col") - pd_blob_df = ( - blob_df["blob_col"] - .struct.explode() - .to_pandas() - .sort_values("uri") - .reset_index(drop=True) - ) - expected_df = pd.DataFrame( - { - "uri": images_uris, - "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], - "details": [None, None], - } - ) - - pd.testing.assert_frame_equal( - pd_blob_df, expected_df, check_dtype=False, check_index_type=False - ) - - -def test_display_images(monkeypatch, images_mm_df: bpd.DataFrame): - mock_display = mock.Mock() - monkeypatch.setattr(idisplay, "display", mock_display) - - images_mm_df["blob_col"].blob.display() - - for call in mock_display.call_args_list: - args, _ = call - arg = args[0] - assert isinstance(arg, idisplay.Image) - - -def test_display_nulls( - monkeypatch, - bq_connection: str, - session: bigframes.Session, -): - uri_series = bpd.Series([None, None, None], dtype="string", session=session) - blob_series = uri_series.str.to_blob(connection=bq_connection) - mock_display = mock.Mock() - monkeypatch.setattr(idisplay, "display", mock_display) - - blob_series.blob.display() - - for call in mock_display.call_args_list: - args, _ = call - arg = args[0] - assert arg == "" diff --git a/packages/bigframes/tests/system/small/blob/test_urls.py b/packages/bigframes/tests/system/small/blob/test_urls.py deleted file mode 100644 index 0a2db23db20d..000000000000 --- a/packages/bigframes/tests/system/small/blob/test_urls.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -import bigframes.pandas as bpd -import bigframes.bigquery as bbq - -pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) - - -def test_blob_read_url(images_mm_df: bpd.DataFrame): - access_json = bbq.obj.get_access_url(images_mm_df["blob_col"], mode="r") - urls = bbq.json_value(access_json, "$.access_urls.read_url") - - assert urls.str.startswith("https://storage.googleapis.com/").all() - - -def test_blob_write_url(images_mm_df: bpd.DataFrame): - access_json = bbq.obj.get_access_url(images_mm_df["blob_col"], mode="rw") - urls = bbq.json_value(access_json, "$.access_urls.write_url") - - assert urls.str.startswith("https://storage.googleapis.com/").all() From a24ab2812644e30834c3009b2e5eb3bb8307f77a Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 00:16:40 +0000 Subject: [PATCH 19/26] format files --- .../bigframes/bigquery/_operations/ml.py | 2 +- packages/bigframes/bigframes/pandas/io/api.py | 5 ---- .../bigframes/bigframes/session/__init__.py | 7 +++--- .../system/small/blob/test_properties.py | 24 ++++++++++++++----- 4 files changed, 23 insertions(+), 15 deletions(-) diff --git a/packages/bigframes/bigframes/bigquery/_operations/ml.py b/packages/bigframes/bigframes/bigquery/_operations/ml.py index 412b49b888f5..c6ef1f8bb7a7 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/ml.py +++ b/packages/bigframes/bigframes/bigquery/_operations/ml.py @@ -20,12 +20,12 @@ import google.cloud.bigquery import pandas as pd +import bigframes.core.col as col import bigframes.core.logging.log_adapter as log_adapter import bigframes.core.sql.ml import bigframes.dataframe as dataframe import bigframes.ml.base import bigframes.session -import bigframes.core.col as col from bigframes.bigquery._operations import utils diff --git a/packages/bigframes/bigframes/pandas/io/api.py b/packages/bigframes/bigframes/pandas/io/api.py index 3c7692b39037..e2737fdbbd1a 100644 --- a/packages/bigframes/bigframes/pandas/io/api.py +++ b/packages/bigframes/bigframes/pandas/io/api.py @@ -394,9 +394,6 @@ def read_gbq_model(model_name: str): read_gbq_model.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_model) - - - @overload def read_gbq_query( # type: ignore[overload-overlap] query: str, @@ -623,8 +620,6 @@ def read_gbq_function( read_gbq_function.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_function) - - _default_location_lock = threading.Lock() diff --git a/packages/bigframes/bigframes/session/__init__.py b/packages/bigframes/bigframes/session/__init__.py index 1bac3e8a08a5..565ea8503a4c 100644 --- a/packages/bigframes/bigframes/session/__init__.py +++ b/packages/bigframes/bigframes/session/__init__.py @@ -2314,9 +2314,12 @@ def _from_glob_path( ) -> dataframe.DataFrame: """Create a BigFrames DataFrame that contains a BigFrames ObjectRef column from a global wildcard path.""" import bigframes.bigquery as bq + connection = self._create_bq_connection(connection=connection) table = self._create_object_table(path, connection) - s = bq.obj.make_ref(self._loader.read_gbq_table(table)["uri"], authorizer=connection) + s = bq.obj.make_ref( + self._loader.read_gbq_table(table)["uri"], authorizer=connection + ) return s.rename(name).to_frame() def _create_object_table(self, path: str, connection: str) -> str: @@ -2349,8 +2352,6 @@ def _create_object_table(self, path: str, connection: str) -> str: return table - - def _create_temp_view(self, sql: str) -> bigquery.TableReference: """Create a random id view from the sql string.""" return self._anon_dataset_manager.create_temp_view(sql) diff --git a/packages/bigframes/tests/system/small/blob/test_properties.py b/packages/bigframes/tests/system/small/blob/test_properties.py index ec18f05462d3..c3597b37116d 100644 --- a/packages/bigframes/tests/system/small/blob/test_properties.py +++ b/packages/bigframes/tests/system/small/blob/test_properties.py @@ -15,9 +15,9 @@ import pandas as pd import pytest +import bigframes.bigquery as bbq import bigframes.dtypes as dtypes import bigframes.pandas as bpd -import bigframes.bigquery as bbq pytest.skip("Skipping blob tests due to b/481790217", allow_module_level=True) @@ -43,7 +43,9 @@ def test_blob_authorizer(images_mm_df: bpd.DataFrame, bq_connection: str): def test_blob_version(images_mm_df: bpd.DataFrame): - actual = bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.version").to_pandas() + actual = bbq.json_value( + images_mm_df["blob_col"].struct.field("details"), "$.version" + ).to_pandas() expected = pd.Series(["1753907851152593", "1753907851111538"], name="version") pd.testing.assert_series_equal( @@ -76,7 +78,9 @@ def test_blob_metadata(images_mm_df: bpd.DataFrame): def test_blob_content_type(images_mm_df: bpd.DataFrame): - actual = bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.content_type").to_pandas() + actual = bbq.json_value( + images_mm_df["blob_col"].struct.field("details"), "$.content_type" + ).to_pandas() expected = pd.Series(["image/jpeg", "image/jpeg"], name="content_type") pd.testing.assert_series_equal( @@ -85,7 +89,9 @@ def test_blob_content_type(images_mm_df: bpd.DataFrame): def test_blob_md5_hash(images_mm_df: bpd.DataFrame): - actual = bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.md5_hash").to_pandas() + actual = bbq.json_value( + images_mm_df["blob_col"].struct.field("details"), "$.md5_hash" + ).to_pandas() expected = pd.Series( ["e130ad042261a1883cd2cc06831cf748", "e2ae3191ff2b809fd0935f01a537c650"], name="md5_hash", @@ -97,7 +103,11 @@ def test_blob_md5_hash(images_mm_df: bpd.DataFrame): def test_blob_size(images_mm_df: bpd.DataFrame): - actual = bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.size").astype("Int64").to_pandas() + actual = ( + bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.size") + .astype("Int64") + .to_pandas() + ) expected = pd.Series([338390, 43333], name="size") pd.testing.assert_series_equal( @@ -106,7 +116,9 @@ def test_blob_size(images_mm_df: bpd.DataFrame): def test_blob_updated(images_mm_df: bpd.DataFrame): - actual = bbq.json_value(images_mm_df["blob_col"].struct.field("details"), "$.updated").to_pandas() + actual = bbq.json_value( + images_mm_df["blob_col"].struct.field("details"), "$.updated" + ).to_pandas() expected = pd.Series( [ pd.Timestamp("2025-07-30 20:37:31", tz="UTC"), From a72a4a53e2e7c008a631f5cc2c98c273aa068aea Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 00:18:19 +0000 Subject: [PATCH 20/26] restore notebook from main --- ...with-bigframes-over-national-jukebox.ipynb | 662 ++++++++---------- 1 file changed, 304 insertions(+), 358 deletions(-) diff --git a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb index 3fd66abcbb44..4faff4b8e768 100644 --- a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb +++ b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb @@ -1,8 +1,23 @@ { "cells": [ { - "id": "c62e292f", "cell_type": "markdown", + "metadata": { + "@deathbeds/jupyterlab-fonts": { + "styles": { + "": { + "body[data-jp-deck-mode='presenting'] &": { + "zoom": "194%" + } + } + } + }, + "editable": true, + "slideshow": { + "slide_type": "subslide" + }, + "tags": [] + }, "source": [ "# Creating a searchable index of the National Jukebox\n", "\n", @@ -20,42 +35,42 @@ "To follow along, you'll need a Google Cloud project\n", "\n", "* Go to https://cloud.google.com/free to start a free trial." - ], + ] + }, + { + "cell_type": "markdown", "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { "body[data-jp-deck-mode='presenting'] &": { - "zoom": "194%" + "z-index": "0", + "zoom": "216%" } } } }, - "editable": true, "slideshow": { - "slide_type": "subslide" - }, - "tags": [] + "slide_type": "slide" + } }, - "execution_count": null - }, - { - "id": "7dc312a4", - "cell_type": "markdown", "source": [ "The National Jukebox is a project of the USA Library of Congress to provide access to thousands of acoustic sound recordings from the very earliest days of the commercial record industry.\n", "\n", "* Learn more at https://www.loc.gov/collections/national-jukebox/about-this-collection/\n", "\n", "\"recording" - ], + ] + }, + { + "cell_type": "markdown", "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { "body[data-jp-deck-mode='presenting'] &": { "z-index": "0", - "zoom": "216%" + "zoom": "181%" } } } @@ -64,11 +79,6 @@ "slide_type": "slide" } }, - "execution_count": null - }, - { - "id": "07dcae4b", - "cell_type": "markdown", "source": [ "\n", "To search the National Jukebox, we combine powerful features of BigQuery:\n", @@ -86,32 +96,10 @@ "3. BigQuery DataFrames to use Python instead of SQL.\n", "\n", " https://cloud.google.com/bigquery/docs/bigquery-dataframes-introduction" - ], - "metadata": { - "@deathbeds/jupyterlab-fonts": { - "styles": { - "": { - "body[data-jp-deck-mode='presenting'] &": { - "z-index": "0", - "zoom": "181%" - } - } - } - }, - "slideshow": { - "slide_type": "slide" - } - }, - "execution_count": null + ] }, { - "id": "8dd2ddab", "cell_type": "markdown", - "source": [ - "## Getting started with BigQuery DataFrames (bigframes)\n", - "\n", - "Install the bigframes package." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -126,14 +114,15 @@ "slide_type": "slide" } }, - "execution_count": null + "source": [ + "## Getting started with BigQuery DataFrames (bigframes)\n", + "\n", + "Install the bigframes package." + ] }, { - "id": "96cda443", "cell_type": "code", - "source": [ - "%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow " - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -153,17 +142,13 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow " + ] }, { - "id": "acf12472", "cell_type": "markdown", - "source": [ - "**Important:** restart the kernel by going to \"Run -> Restart & clear cell outputs\" before continuing.\n", - "\n", - "Configure bigframes to use your GCP project. First, go to \"Add-ons -> Google Cloud SDK\" and click the \"Attach\" button. Then," - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -176,17 +161,15 @@ } } }, - "execution_count": null + "source": [ + "**Important:** restart the kernel by going to \"Run -> Restart & clear cell outputs\" before continuing.\n", + "\n", + "Configure bigframes to use your GCP project. First, go to \"Add-ons -> Google Cloud SDK\" and click the \"Attach\" button. Then," + ] }, { - "id": "fd321077", "cell_type": "code", - "source": [ - "from kaggle_secrets import UserSecretsClient\n", - "user_secrets = UserSecretsClient()\n", - "user_credential = user_secrets.get_gcloud_credential()\n", - "user_secrets.set_tensorflow_credential(user_credential)" - ], + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:53:08.494636Z", @@ -197,21 +180,17 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "from kaggle_secrets import UserSecretsClient\n", + "user_secrets = UserSecretsClient()\n", + "user_credential = user_secrets.get_gcloud_credential()\n", + "user_secrets.set_tensorflow_credential(user_credential)" + ] }, { - "id": "4d837a34", "cell_type": "code", - "source": [ - "import bigframes._config\n", - "import bigframes.pandas as bpd\n", - "\n", - "bpd.options.bigquery.location = \"US\"\n", - "\n", - "# Set to your GCP project ID.\n", - "bpd.options.bigquery.project = \"swast-scratch\"" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -231,17 +210,19 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "import bigframes._config\n", + "import bigframes.pandas as bpd\n", + "\n", + "bpd.options.bigquery.location = \"US\"\n", + "\n", + "# Set to your GCP project ID.\n", + "bpd.options.bigquery.project = \"swast-scratch\"" + ] }, { - "id": "008f0a87", "cell_type": "markdown", - "source": [ - "## Reading data\n", - "\n", - "BigQuery DataFrames can read data from BigQuery, GCS, or even local sources. With `engine=\"bigquery\"`, BigQuery's distributed processing reads the file without it ever having to reach your local Python environment." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -256,19 +237,15 @@ "slide_type": "slide" } }, - "execution_count": null + "source": [ + "## Reading data\n", + "\n", + "BigQuery DataFrames can read data from BigQuery, GCS, or even local sources. With `engine=\"bigquery\"`, BigQuery's distributed processing reads the file without it ever having to reach your local Python environment." + ] }, { - "id": "9a4b35ab", "cell_type": "code", - "source": [ - "df = bpd.read_json(\n", - " \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/jukebox.jsonl\",\n", - " engine=\"bigquery\",\n", - " orient=\"records\",\n", - " lines=True,\n", - ")" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -288,16 +265,19 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "df = bpd.read_json(\n", + " \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/jukebox.jsonl\",\n", + " engine=\"bigquery\",\n", + " orient=\"records\",\n", + " lines=True,\n", + ")" + ] }, { - "id": "e00dcb01", "cell_type": "code", - "source": [ - "# Use `peek()` instead of `head()` to see arbitrary rows rather than the \"first\" rows.\n", - "df.peek()" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -320,15 +300,15 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# Use `peek()` instead of `head()` to see arbitrary rows rather than the \"first\" rows.\n", + "df.peek()" + ] }, { - "id": "335511be", "cell_type": "code", - "source": [ - "df.shape" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -348,18 +328,14 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "df.shape" + ] }, { - "id": "595126a1", "cell_type": "code", - "source": [ - "# For the purposes of a demo, select only a subset of rows.\n", - "df = df.sample(n=250)\n", - "df.cache()\n", - "df.shape" - ], + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:55:55.448664Z", @@ -370,32 +346,17 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# For the purposes of a demo, select only a subset of rows.\n", + "df = df.sample(n=250)\n", + "df.cache()\n", + "df.shape" + ] }, { - "id": "cbd59dd9", "cell_type": "code", - "source": [ - "# As a side effect of how I extracted the song information from the HTML DOM,\n", - "# we ended up with lists in places where we only expect one item.\n", - "#\n", - "# We can \"explode\" to flatten these lists.\n", - "flattened = df.explode([\n", - " \"Recording Repository\",\n", - " \"Recording Label\",\n", - " \"Recording Take Number\",\n", - " \"Recording Date\",\n", - " \"Recording Matrix Number\",\n", - " \"Recording Catalog Number\",\n", - " \"Media Size\",\n", - " \"Recording Location\",\n", - " \"Summary\",\n", - " \"Rights Advisory\",\n", - " \"Title\",\n", - "])\n", - "flattened.peek()" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -418,15 +379,31 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# As a side effect of how I extracted the song information from the HTML DOM,\n", + "# we ended up with lists in places where we only expect one item.\n", + "#\n", + "# We can \"explode\" to flatten these lists.\n", + "flattened = df.explode([\n", + " \"Recording Repository\",\n", + " \"Recording Label\",\n", + " \"Recording Take Number\",\n", + " \"Recording Date\",\n", + " \"Recording Matrix Number\",\n", + " \"Recording Catalog Number\",\n", + " \"Media Size\",\n", + " \"Recording Location\",\n", + " \"Summary\",\n", + " \"Rights Advisory\",\n", + " \"Title\",\n", + "])\n", + "flattened.peek()" + ] }, { - "id": "84548649", "cell_type": "code", - "source": [ - "flattened.shape" - ], + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:56:06.546531Z", @@ -437,15 +414,13 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "flattened.shape" + ] }, { - "id": "8be3127f", "cell_type": "markdown", - "source": [ - "To access unstructured data from BigQuery, create a URI pointing to a file in Google Cloud Storage (GCS). Then, construct a \"blob\" (also known as an \"Object Ref\" in BigQuery terms) so that BigQuery can read from GCS." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -462,20 +437,13 @@ }, "tags": [] }, - "execution_count": null + "source": [ + "To access unstructured data from BigQuery, create a URI pointing to a file in Google Cloud Storage (GCS). Then, construct a \"blob\" (also known as an \"Object Ref\" in BigQuery terms) so that BigQuery can read from GCS." + ] }, { - "id": "31277e21", "cell_type": "code", - "source": [ - "flattened = flattened.assign(**{\n", - " \"GCS Prefix\": \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/\",\n", - " \"GCS Stub\": flattened['URL'].str.extract(r'/(jukebox-[0-9]+)/'),\n", - "})\n", - "flattened[\"GCS URI\"] = flattened[\"GCS Prefix\"] + flattened[\"GCS Stub\"] + \".mp3\"\n", - "# Note: str.to_blob is deprecated.\n", - "flattened[\"GCS Blob\"] = flattened[\"GCS URI\"].str.to_blob()" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -500,15 +468,18 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "flattened = flattened.assign(**{\n", + " \"GCS Prefix\": \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/\",\n", + " \"GCS Stub\": flattened['URL'].str.extract(r'/(jukebox-[0-9]+)/'),\n", + "})\n", + "flattened[\"GCS URI\"] = flattened[\"GCS Prefix\"] + flattened[\"GCS Stub\"] + \".mp3\"\n", + "flattened[\"GCS Blob\"] = flattened[\"GCS URI\"].str.to_blob()" + ] }, { - "id": "d27756f5", "cell_type": "markdown", - "source": [ - "BigQuery (and BigQuery DataFrames) provide access to powerful models and multimodal capabilities. Here, we transcribe audio to text." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -525,20 +496,13 @@ }, "tags": [] }, - "execution_count": null + "source": [ + "BigQuery (and BigQuery DataFrames) provide access to powerful models and multimodal capabilities. Here, we transcribe audio to text." + ] }, { - "id": "d1f7ad46", "cell_type": "code", - "source": [ - "# Note: .blob.audio_transcribe is removed. This cell will fail.\n", - "# Use bigframes.bigquery.ai.generate instead.\n", - "flattened[\"Transcription\"] = flattened[\"GCS Blob\"].blob.audio_transcribe(\n", - " model_name=\"gemini-2.0-flash-001\",\n", - " verbose=True,\n", - ")\n", - "flattened[\"Transcription\"]" - ], + "execution_count": null, "metadata": { "editable": true, "execution": { @@ -554,15 +518,17 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "flattened[\"Transcription\"] = flattened[\"GCS Blob\"].blob.audio_transcribe(\n", + " model_name=\"gemini-2.5-flash\",\n", + " verbose=True,\n", + ")\n", + "flattened[\"Transcription\"]" + ] }, { - "id": "1575c468", "cell_type": "markdown", - "source": [ - "Sometimes the model has transient errors. Check the status column to see if there are errors." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -577,16 +543,13 @@ "slide_type": "slide" } }, - "execution_count": null + "source": [ + "Sometimes the model has transient errors. Check the status column to see if there are errors." + ] }, { - "id": "e53c7a0b", "cell_type": "code", - "source": [ - "print(f\"Successful rows: {(flattened['Transcription'].struct.field('status') == '').sum()}\")\n", - "print(f\"Failed rows: {(flattened['Transcription'].struct.field('status') != '').sum()}\")\n", - "flattened.shape" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -611,16 +574,16 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "print(f\"Successful rows: {(flattened['Transcription'].struct.field('status') == '').sum()}\")\n", + "print(f\"Failed rows: {(flattened['Transcription'].struct.field('status') != '').sum()}\")\n", + "flattened.shape" + ] }, { - "id": "3629f4af", "cell_type": "code", - "source": [ - "# Show transcribed lyrics.\n", - "flattened[\"Transcription\"].struct.field(\"content\")" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -640,19 +603,15 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# Show transcribed lyrics.\n", + "flattened[\"Transcription\"].struct.field(\"content\")" + ] }, { - "id": "09ef6c3d", "cell_type": "code", - "source": [ - "# Find all instrumentatal songs\n", - "instrumental = flattened[flattened[\"Transcription\"].struct.field(\"content\") == \"\"]\n", - "print(instrumental.shape)\n", - "song = instrumental.peek(1)\n", - "song" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -675,22 +634,18 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# Find all instrumentatal songs\n", + "instrumental = flattened[flattened[\"Transcription\"].struct.field(\"content\") == \"\"]\n", + "print(instrumental.shape)\n", + "song = instrumental.peek(1)\n", + "song" + ] }, { - "id": "cf15986a", "cell_type": "code", - "source": [ - "import gcsfs\n", - "import IPython.display\n", - "\n", - "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", - "with fs.open(song[\"GCS URI\"].iloc[0]) as song_file:\n", - " song_bytes = song_file.read()\n", - "\n", - "IPython.display.Audio(song_bytes)" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -715,19 +670,20 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] - }, - { - "id": "778d0ac3", - "cell_type": "markdown", + "outputs": [], "source": [ - "## Creating a searchable index\n", + "import gcsfs\n", + "import IPython.display\n", "\n", - "To be able to search by semantics rather than just text, generate embeddings and then create an index to efficiently search these.\n", + "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", + "with fs.open(song[\"GCS URI\"].iloc[0]) as song_file:\n", + " song_bytes = song_file.read()\n", "\n", - "See also, this example: https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb" - ], + "IPython.display.Audio(song_bytes)" + ] + }, + { + "cell_type": "markdown", "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -742,16 +698,17 @@ "slide_type": "slide" } }, - "execution_count": null + "source": [ + "## Creating a searchable index\n", + "\n", + "To be able to search by semantics rather than just text, generate embeddings and then create an index to efficiently search these.\n", + "\n", + "See also, this example: https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb" + ] }, { - "id": "de7e4e11", "cell_type": "code", - "source": [ - "from bigframes.ml.llm import TextEmbeddingGenerator\n", - "\n", - "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -771,21 +728,16 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "from bigframes.ml.llm import TextEmbeddingGenerator\n", + "\n", + "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")" + ] }, { - "id": "4acfb495", "cell_type": "code", - "source": [ - "df_to_index = (\n", - " flattened\n", - " .assign(content=flattened[\"Transcription\"].struct.field(\"content\"))\n", - " [flattened[\"Transcription\"].struct.field(\"content\") != \"\"]\n", - ")\n", - "embedding = text_model.predict(df_to_index)\n", - "embedding.peek(1)" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -805,18 +757,20 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "df_to_index = (\n", + " flattened\n", + " .assign(content=flattened[\"Transcription\"].struct.field(\"content\"))\n", + " [flattened[\"Transcription\"].struct.field(\"content\") != \"\"]\n", + ")\n", + "embedding = text_model.predict(df_to_index)\n", + "embedding.peek(1)" + ] }, { - "id": "a49d1dde", "cell_type": "code", - "source": [ - "# Check the status column to look for errors.\n", - "print(f\"Successful rows: {(embedding['ml_generate_embedding_status'] == '').sum()}\")\n", - "print(f\"Failed rows: {(embedding['ml_generate_embedding_status'] != '').sum()}\")\n", - "embedding.shape" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -841,15 +795,16 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# Check the status column to look for errors.\n", + "print(f\"Successful rows: {(embedding['ml_generate_embedding_status'] == '').sum()}\")\n", + "print(f\"Failed rows: {(embedding['ml_generate_embedding_status'] != '').sum()}\")\n", + "embedding.shape" + ] }, { - "id": "15a5bfd3", "cell_type": "markdown", - "source": [ - "We're now ready to save this to a table." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -861,15 +816,13 @@ } } }, - "execution_count": null + "source": [ + "We're now ready to save this to a table." + ] }, { - "id": "8b49384c", "cell_type": "code", - "source": [ - "embedding_table_id = f\"{bpd.options.bigquery.project}.kaggle.national_jukebox\"\n", - "embedding.to_gbq(embedding_table_id, if_exists=\"replace\")" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -889,20 +842,14 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "embedding_table_id = f\"{bpd.options.bigquery.project}.kaggle.national_jukebox\"\n", + "embedding.to_gbq(embedding_table_id, if_exists=\"replace\")" + ] }, { - "id": "810c77d5", "cell_type": "markdown", - "source": [ - "## Searching the database\n", - "\n", - "To search by semantics, we:\n", - "\n", - "1. Turn our search string into an embedding using the same model as our index.\n", - "2. Find the closest matches to the search string." - ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -917,17 +864,18 @@ "slide_type": "slide" } }, - "execution_count": null + "source": [ + "## Searching the database\n", + "\n", + "To search by semantics, we:\n", + "\n", + "1. Turn our search string into an embedding using the same model as our index.\n", + "2. Find the closest matches to the search string." + ] }, { - "id": "fb63ad94", "cell_type": "code", - "source": [ - "import bigframes.pandas as bpd\n", - "\n", - "df_written = bpd.read_gbq(embedding_table_id)\n", - "df_written.peek(1)" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -950,22 +898,17 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "import bigframes.pandas as bpd\n", + "\n", + "df_written = bpd.read_gbq(embedding_table_id)\n", + "df_written.peek(1)" + ] }, { - "id": "f19c88d3", "cell_type": "code", - "source": [ - "from bigframes.ml.llm import TextEmbeddingGenerator\n", - "\n", - "search_string = \"walking home\"\n", - "\n", - "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")\n", - "search_df = bpd.DataFrame([search_string], columns=['search_string'])\n", - "search_embedding = text_model.predict(search_df)\n", - "search_embedding" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -985,24 +928,21 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "from bigframes.ml.llm import TextEmbeddingGenerator\n", + "\n", + "search_string = \"walking home\"\n", + "\n", + "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")\n", + "search_df = bpd.DataFrame([search_string], columns=['search_string'])\n", + "search_embedding = text_model.predict(search_df)\n", + "search_embedding" + ] }, { - "id": "06f0312e", "cell_type": "code", - "source": [ - "import bigframes.bigquery as bbq\n", - "\n", - "vector_search_results = bbq.vector_search(\n", - " base_table=f\"swast-scratch.scipy2025.national_jukebox\",\n", - " column_to_search=\"ml_generate_embedding_result\",\n", - " query=search_embedding,\n", - " distance_type=\"COSINE\",\n", - " query_column_to_search=\"ml_generate_embedding_result\",\n", - " top_k=5,\n", - ")" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -1027,15 +967,23 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "import bigframes.bigquery as bbq\n", + "\n", + "vector_search_results = bbq.vector_search(\n", + " base_table=f\"swast-scratch.scipy2025.national_jukebox\",\n", + " column_to_search=\"ml_generate_embedding_result\",\n", + " query=search_embedding,\n", + " distance_type=\"COSINE\",\n", + " query_column_to_search=\"ml_generate_embedding_result\",\n", + " top_k=5,\n", + ")" + ] }, { - "id": "fae3fcae", "cell_type": "code", - "source": [ - "vector_search_results.dtypes" - ], + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-08-14T16:05:50.566930Z", @@ -1046,16 +994,14 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "vector_search_results.dtypes" + ] }, { - "id": "38423dde", "cell_type": "code", - "source": [ - "results = vector_search_results[[\"Title\", \"Summary\", \"Names\", \"GCS URI\", \"Transcription\", \"distance\"]].sort_values(\"distance\").to_pandas()\n", - "results" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -1078,15 +1024,15 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "results = vector_search_results[[\"Title\", \"Summary\", \"Names\", \"GCS URI\", \"Transcription\", \"distance\"]].sort_values(\"distance\").to_pandas()\n", + "results" + ] }, { - "id": "37a1dfbd", "cell_type": "code", - "source": [ - "print(results[\"Transcription\"].struct.field(\"content\").iloc[0])" - ], + "execution_count": null, "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { @@ -1106,22 +1052,14 @@ }, "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "print(results[\"Transcription\"].struct.field(\"content\").iloc[0])" + ] }, { - "id": "a4748e0f", "cell_type": "code", - "source": [ - "import gcsfs\n", - "import IPython.display\n", - "\n", - "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", - "with fs.open(results[\"GCS URI\"].iloc[0]) as song_file:\n", - " song_bytes = song_file.read()\n", - "\n", - "IPython.display.Audio(song_bytes)" - ], + "execution_count": null, "metadata": { "editable": true, "execution": { @@ -1138,18 +1076,26 @@ "tags": [], "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "import gcsfs\n", + "import IPython.display\n", + "\n", + "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", + "with fs.open(results[\"GCS URI\"].iloc[0]) as song_file:\n", + " song_bytes = song_file.read()\n", + "\n", + "IPython.display.Audio(song_bytes)" + ] }, { - "id": "ff22e7eb", "cell_type": "code", - "source": [], + "execution_count": null, "metadata": { "trusted": true }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [] } ], "metadata": { @@ -1186,6 +1132,6 @@ "version": "3.11.13" } }, - "nbformat_minor": 4, - "nbformat": 4 + "nbformat": 4, + "nbformat_minor": 4 } From 983ef098fe9e013c398d8bcc16e9605ab7ef8c6a Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 00:30:08 +0000 Subject: [PATCH 21/26] remove to_blob usage --- ...with-bigframes-over-national-jukebox.ipynb | 689 +++++++++--------- 1 file changed, 352 insertions(+), 337 deletions(-) diff --git a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb index 4faff4b8e768..fe68d0107bfd 100644 --- a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb +++ b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb @@ -1,23 +1,8 @@ { "cells": [ { + "id": "f4ece66a", "cell_type": "markdown", - "metadata": { - "@deathbeds/jupyterlab-fonts": { - "styles": { - "": { - "body[data-jp-deck-mode='presenting'] &": { - "zoom": "194%" - } - } - } - }, - "editable": true, - "slideshow": { - "slide_type": "subslide" - }, - "tags": [] - }, "source": [ "# Creating a searchable index of the National Jukebox\n", "\n", @@ -35,42 +20,42 @@ "To follow along, you'll need a Google Cloud project\n", "\n", "* Go to https://cloud.google.com/free to start a free trial." - ] - }, - { - "cell_type": "markdown", + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { - "z-index": "0", - "zoom": "216%" + "body[data-jp-deck-mode='presenting'] \u0026": { + "zoom": "194%" } } } }, + "editable": true, "slideshow": { - "slide_type": "slide" - } + "slide_type": "subslide" + }, + "tags": [] }, + "execution_count": null + }, + { + "id": "bc01a1d3", + "cell_type": "markdown", "source": [ "The National Jukebox is a project of the USA Library of Congress to provide access to thousands of acoustic sound recordings from the very earliest days of the commercial record industry.\n", "\n", "* Learn more at https://www.loc.gov/collections/national-jukebox/about-this-collection/\n", "\n", - "\"recording" - ] - }, - { - "cell_type": "markdown", + "\u003cimg src=\"https://www.loc.gov/static/collections/national-jukebox/images/acoustic-session.jpg\" alt=\"recording 100+ years ago\" width=\"400px\" /\u003e" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "z-index": "0", - "zoom": "181%" + "zoom": "216%" } } } @@ -79,11 +64,16 @@ "slide_type": "slide" } }, + "execution_count": null + }, + { + "id": "4fc7c468", + "cell_type": "markdown", "source": [ "\n", "To search the National Jukebox, we combine powerful features of BigQuery:\n", "\n", - "\"audio\n", + "\u003cimg src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAALEAAAFdCAYAAABM2IyIAAAAAXNSR0IArs4c6QAAIABJREFUeF7tnQfYHUXZ/p+3p4ckpJBgQj4JUkLxjxQFCTUEBRSET4SACAIWOirlE+kgICJIU0GwUVR6UVQg+An4RRNAipKQUBNCSOEl9e3/65mdZ/aZZ2fL++aEc3Z3znWF8+6ePXvOzPzOzT33zM7W9fT09IB/+BrIcQ3UeYhz3Hr+q6sa8BB7EHJfAx7i3DehL4CH2DOQ+xooHcRr166FDz5YAaNGjcx94/kCBDVQKojfeOMNOOjgQ2DlylVw4w3XwV577ZnKwezZz8IXDvlvddxr8+emHu8P+PBrIJcQr1q1Co796vGqtr52wvGw++5TnDXX2toKJ3ztG+q1r3/tBFi5ciWceNIpavuEE46Ds878TmqNe4hTq6jqB+QSYqy1Qw75IsyaPRv2228a3HD9j50Veeedd8HZ53wXGhoa4J//+D9oaWlWUL+76F246abrYeLEiakN4CFOraKqH5BbiG+77ZdwwYUXQb9+/eDZ2f9Qz/IxffqX4amnn1a24eaf/aRPle0h7lO1fahvyi3E7733Huy08y6AA47XXnM1HHDA/lbF8devueZqOFC8HlfLy5e/D+idJ07cBIYOHQpZIEbbMn/+a7DxxuNg5Mj0DiPamnnz5sOGG46AcePGfagNXsQPyy3E2BiktPtN2xduuOE6q31uvfUXcOFFF8OAAQNg9qyZ0NLSAl1dXXDE9KPUcZdcfBF89KP/Zd7z8sv/hksuuQyefuYZs2/PPXaHk08+CT5/0BfUPtmxe+qpp+Hqq69RtoYem222GZxx+qkwdeo+EV6effY5+P7lV8DMmf8wr40ZMwaOOeZoOO6rxxaRrw+lTLmGmDyvy1Ic/IVDAaE56KDPww+vulJVJkK86aTN1d/333c3bLPNNurvl156WaUWHR0danv8+I/AsmXLVUdw8uSt4MUXX4pAfO+998HpZ3xb7a+rq4PJkydDc1OTAfqySy+Gww77omlEBP3II4+GNWvWqH0IOyp+W1ub2j7uuK/COWef+aE0etE+JNcQr169Grbdbnvo7OwEbhkWLlwIu+waJBa33XoLTJmyWyLERxxxlFJg7Ohd86OrYOutt1bAP/DAg3DW2f8D7e3tFsQI4ic/9WlAG/H5zx0I5577PzB8+HB1zA033gRXXnkV9O/fH/4x8xkYOHCgAnf3PfaCxYvfgz123x0uuuh8ZSPwvLfffqfy9vi45eafwp577lE0xtZ7eXINMdYORm2PP/4ETNt3Ktx44/Wqwq6/4Ub4wQ9+CCNGjID/+/tTKp2IU+J3310MO39yF/X6XXfeDjvuuINV6ZdfcSXcdNNPLYgffvgRFdWNHj0K/vrkE9Dc3Gze093dDftMnaY8MqYmmJ489NDDcNLJp8LgwYPh78/8TVkc/jjttDPgvvsfsMqw3lu+QB+Qe4jvu+9+OO30b0FjYyO88K9nVUoxbb/94ZVXXoGjjz4Kzvveuaa5XHbiz3/+Cxx/wtdVJ+65Z/8ZaVq0ARjn4YM8MXrnm2/5uVLfSZM2jbxnzpw5gB1EzKExj7744kvhlp/fCvvuOxVu0j80/iaCHH8Uf3/mqQLh9eEUJfcQo6dES4HPV199FXx8u21h9z32VrV33713w7bbBr43Tonv+u3v4KyzzoFNN/0o/PlPf4zU+oIFC2DXT+9uQXzGt74D99xzb2oLnXbqKXDyyScCHX/EEV+Ciy+6MPK+f86aBYceepjy1vPnzUk9rz/AroHcQ4zFwf9Vo5rtv/9nYdKmm8LVP7pGdc6enPG4VVqXEv/x0T/B17/+zV4pMSnrkdOPgAsvPD+VKa/EqVW0TgcUAmICcciQIfCRj2ys0gZSQV47Lojnzn0Vpu67nzrst3fdATvs8AmrQq+44gdw403BQAnZiQcefAhOOeU02HzzzeEPjzwYaQBU6UmbTYLJW22l1NV74nViNPXNhYAY0wm0FJhW0GPGjMdgwvjxqUqMB6D9wLhrk002gWuvvRq2njxZpRMPPvQwnHnm2ZF0Audu4EALPp9/3rnw5S8H2TM+fvObO+C7535PKftTf3vSpBO77Lqb8sm77fZpwPht7NixkXQCs27MvP2jdzVQCIixyNynbrfdtnDvPb+P1ERcTkxKSW+YMGECLF26VOXE+DcCzpUY/6aMGv/ebNIkGDZ8mIKaMuULLjgPjjpyuvkOPFem97z51luAU0PVD2n3KXDrz2/uXev5o1UNFAbiJ2bMgGOOOU4VChMJTCbkIw5iPA7nYlz5g6ssNd9ppx3hzO98G3DgREKM23/5y2Nw7Y+vhxdeeMF8FMZ5x3zlaDjnnLMin4/zOK699rrIiN306Yer2XgUBXo2e1cDhYG4d8V2H7169Rp4/vnn1YsbbTRG2YssD7QJ8+a9Ch0dnbDxxhsrX5708HMnstRq9mM8xNnryh9ZozXgIa7RhvFfK3sNeIiz15U/skZrwENcow3jv1b2GvAQZ68rf2SN1oCHuEYbxn+t7DXgIc5eV/7IGq0BD3GNNoz/WtlrwEOcva78kTVaAx7iGm0Y/7Wy14CHOHtd+SNrtAY8xDXaMP5rZa8BD3H2uvJH1mgNeIhrtGH818peAx7i7HXlj6zRGvAQ12jD+K+VvQY8xNnryh9ZozXgIa7RhvFfK3sNeIiz15U/skZrwENcow3jv1b2GvAQZ68rf2SN1oCHuEYbxn+t7DXgIc5eV/7IGq2BYkLc3Q3Q2Q7Q1YnLw0NPVyfUQQ9Aj+Mf7sfj8YGv8+PU37RfHRBsB//Rx9N2XbBdF76sjqHtOvyDPWjbeq7DZeeDg9Sz3sa/Xdt8f309QH0DAK7F3NAI0NgMgPtK8CgWxN1dAO1tAF0dAtju+G0FMYNXMYnHC6gltBbMBCvuNNRqmglKoolvS2gFvASzE+L6AOzIP9pfH8Dc3BLAXeBHcSBGeDvaNKwEbQK8UpWlUtO2pdCkxlqJCQyl4PxBMDvIkQqs1FZLtnmOUV6pzApghBbhZ/AasPW+ppYA5oI+igFx25rAOqAtUHDGPUs7wSCPQJygxAZaaSuSKHEoMkFLIBvLoKF0KrGEltSYK7Pj76ZmgObovf6KwHX+IUaAO8k+aChjYSaIBbzcTpBCO5VYQMsthaJBK7DyxtxWOLwwV2AOawRoZhksJZZ2QoBbrz01+mJS7MYmgJb+ReDWKkO+IUb70NEer8Cqw6Y7bpZ9kBBTx468sKOjxz0xKXFEkeMtcVDrZB303wSveZYdObIJpMx1AASntAwEqgteYzvqAluB9qJAj/xCjJ24NasC+6CU12EhSGEjyswUWXbsnEqs4Y5LJxTMskNHlOj9znSC++GEVCJTx04rrkokEHbhkfl2/4GF6uzlF+K1q4WN6A5SBgUz79Dp/ZYix9kJhxJbUJNloBiN2Yc0ZTMJm/DGsREb974uJSZoHbaBK6+EGrcbmgD62bchS/v6tfx6PiFGZV29IoRVKrGB2dXJo1yYd/JkisHzYg0u+V8eRBhbwYQ4rrVFTBzmwfoNHOY05VVKS8mE7NgxReYwY3rB1XjA4MLkyPmEuH0tAP6TNiETzDFgx0Vu5IUJXpkPG6/cWzvhgJescuLgRlyHLoudwGP0cdjBK0jslk+I16wMOnRkGwhmAzFLKeJshBXFuVILbRksBaYRO24nxAhdqhLH2QmXIouOnTWwIXPhGAW2vDFTY4zc+g+qZZeQ+bvlE+KVrQDdLBe2FBk9sOjk9SZy48PM3C7wv00HT9ezHOugqM08a7vBm8XYhzh4WZoRl0RY3pfsBYObWw4FM1NiHM0bNDQzKLV8YC4h7mldinfMCTtxlhLz/aKjlxi5sbkQ6txim/XpgjkWTIGtdEI2d1w6wSCliI18s4JTn0dFZno4Wnpc3JYpBNkF2i/hNfsbAIYEN1XP+yOXEEPr0gBgA2+XPVIX8cYOZaZojeZJ8Mk+ul8Xwuoa5CDI9cSfNBJS0wkClSmzCh44xKA7YxngtWAWnTrq5A0dkfatc/F6PiFe/h5T4ThFZlGby17w2WsqT9ZQkgJbSky2gc1is1IKamueF9M+1uHjCYXLTrigpeNIkc1gBk8nuB92eWMGMVfmYSNzAWnal8wpxIuFEhOwIiuWubFz0CMGXt6hc9kHV9SWWtvaQrCniNLyyUBkK6xnbTUi8ZnwxAQrtxsGYK3kw0alfeNcvJ5PiJctsgc2ZDqRqaOXoLwuZTYemCcUwhsnNXmSndDjFWZY2igyAcs9Mf+b58VCgZ1emBRZPw8fnQtI075kPiFeuijBE3Ov7FJoitMYxARtqq3g0Mq4jSgn+yDshGvY2dWRc6kvnsrYCAm0wz5IxeUDHRzuEWPS+MjF6/mEeMlCPWdCd+ioI5fW0VOvx9kHlkiYdEKDqeYDJSiwnE8cN53YTL1kkRuHVtkMbhfStkmJmT9OtBHCG284NheQpn3JfEL8HkKsVdYCmE/FdCmyBpErL4/TrL9dkVsKzKZ/FzMV00CsX5f+1yhuAry8g2fyY51aGJ8sbIOJ4sR+D3Ha72M9vr74bXv2WgRk2cHjcyNA+2lSZIcCp9oKllYYr2z9wQrvyIld8JIPjjxrz6EGLiTcpMS035FCWMrMX28AGDluPTbSh3fqfCqxgpiUVj6zqZlWXqyvpeN2QqUXDo+cqsiuwQ7WaC47wTt2FsT6fbFJhIA3Von1RCDueQ3A2m5IoEcl30j9w8Nw3T4pnxC/+6ZW0y7x7ABbzTcWsJptrcZqW3plsc8MhiSM5FlXdtCFHiwc5h05itnkYEYczOrCZam8cptshbQTQoGVvWgAGP2RdaOnRt6dY4iTlJh3+BjAFqwId4yloI5cbGoR442NJxZuQm2yoeTMgxo6lTA2gqZd0n7HtrIdGtI4L0xpxejxNYLhun2NfEL8zhsJdkLCzdWWdeyM8jpshjUtUyi04pdUWnhjDjEbqDNNJOFVbGdMIwhOOj5u2+zX6uv0xPq1MRPWjZ4aeXdOIX5dRGykvPJZq3CifZAQx0GdNEFI0KsuFBV2wmUllD1gKm2lDVk6csxOWFDrETm1r0FPoNfPaCNIqT3EVfwZLnwtXYm5D6a/LfvgsBNxHb2kwRBSZl4dfEhadujkAIdrbgSHOZPysgEQSjFcCiz3jZ1YxUas3EfnU4ldEMuYLQIxRWtpyptkLxwDJU6IWTxhTfpxzEpLmx9hRWtxyss8chLE5IUJZg9x5X5JvT7TgvnxSmxgZtEZjdQZRRbZMHX4kpRYDorovp3xx6oQjgtHXbPV8FBrnjDzxk6vG6e0rv3UsXPEalyJ8e+x/9Xrqq/FN+RTiSMQu6I2DrHwxnHQcsixtWSa4cqPae4xLTJILFNrR7xwlo5cQpTGIVdX53NopSLzlAK9MOvseYir/Ht8e55WYtaR43bCshIynWB2IU2ZKaWQOfJGYwCGDGGrYlJK4YgnSIk7OgDeWgDQ1sZG3jjQ+u9M9sFhKywbwWHWlyQZFWYdvY0/WuWGrMzH51OJM0Es4OUJhQWvA2pSXNcgyWaTACb2MZpaswbgr0+r5WZNtGZNck9QVpUoZFFe/mOgK0D44AdLKcZ5iCvzM+rLWd5+Nbg0yagvKrLeloCaETvZsUtSZD3bzdgONvttzykAzc0ArR+EShy5UNRRqEEDAJqaAJ59AWDBQh17OaCMswcKYrIOvYGZKTGqsYrYNNQbb9qX2q+59+RUiTXEBlwGMR9mlhBK+xCnyNZQtYB/6l4ADfUAjz0JsHqNPVxt/DC7TInsxKd2ABi1IcCL/wZ4/c1AiVuaAIYNA1iyxFZmAywNI2toJcSWP5YKzLyyshI8J/YQV/+X+NZcpryowGkQp3Ts4uBWMLOhaTxu370DiP8yA2CVhJjNOTYdO92z22UHgNEjAf71cgBx/xaAnT8BsGgxwKvzQ2U2oIoOW0SJEUSpyDFe2SgwdfQ0xB+ZVP22rMA3yKcSOyFG4BBmVE5uB9gEIJlKpCkzh5iOJYj/PCNUYhoMIS/tWm9i1x1DiN9ZBLDLTgCDBgLMnccgTuiwWRBzwJlXjvwA9OIruJ/bCOrkeYgr8BPq6ynemhPEX1yBZT5sjdKhWsd09HB/0uietBzTUIkbAP6ESrw6OvtNDn7QYMauOwGMGQnw7zkAG48FGKxX35mDEM9jfpdsgV5QME6ZY/e73ieiNeWJGwA8xH0lsALvM0qs82GnnUjqyDmUWqYXcSN+09ATC4j5e+XqQATxbjsFSow/NlRCeqAS4z+ljq6cN8HrJh7PzmcNcrCOnYe4AjD29RRvzknwxL2EN1N6waDfTyvxo0+ESiwHScyKQGwq5W47B0osH6jECuI0WB2Qx3X0XCrtshPjN+trC9TU+3LqidFOiCmXSXYiLRfuzesGYrITDs9NF46auAwAkiBGOxHXceuVnYjz1N5O1NSvTn0ZrsQqZkNbQVBnzYN70+FjnloqsctT44+C5g4ThFO0Ei9ZBrAhWwNtfSuxNUlezDH2SlxFtl0dO2ttNhaNOVVWvB5JKRLSjVglFh1ErB6e96InHjMK4LmXgnx4C/2/cvLEdKylvEkeuTce2itxFWmN+ehqKLGK7noA+uyJGcSvvQGw9RYAm04E8Eq8znzl1BPP1VkwtxF8nQl+GZK2DRFFTojcIqN+TLkT0wnHslYynXj+ZQCEGBV328nBHVBNx44PLSdFbWwkL5Nn5krMruzw6cQ6/4D6foI85sSf1hEbjtghxNTpGzoYYOVKnxP3nQZcq1quwbQOZ/uw3spH7Mz8CRp+7m3EljDC5xp2nrIrwMB1uPPQ32cBLH7PTiMieW+l82LHiB1mxxv7YecPC9no5yDE1vzh9T13gnX0hg4F2HpLgCGDe1f+zk6AV18DeOVVu8OXNqmdT8E0E360nejN3AnXBCBvJ3rXhhU92kzF5Fc3V3AqpjWPWE7Z5GtVyAVWxA1pKKFQz67lWdPWjxCvWxDLKzqS0grpif0story2KeTVXM+MV+vImlVeSpY0tXNHOwsV3T4+cROXPLpiV1XdnBv7Jp9Fju8zDxx3Dxi60qPhMVU6Do765J9TbELZrIKfHkq54Wi4pq71HnEPL0Qk+KVrdBK7K/s6JOGVuZNmS5PojnErhWAXIMZYt5w6gpBLpipeI5J8XIRwcjaEmLuhDXiFzM53tkhlLbCX2NXGegqfRbn1c5svWJrBhrCxoaNXYrsSiEs2+BY1xjLxK9+VtuOgqat/EOX7kfshPDQciRP3R3Jtd6EnGvMcmFa+YdmtY3zl+xXGs3s5/PrToRTNxPTDbE6pl93Ijtj6/3IpGWsIounxFzNLC87SrUPGZZ65W4i0rHTEYXTG6ctnp0hxbBgZjekkeDytSc28stYrXdWYz+gTxC7lq9yrU8sFl3BL6EuP9JDz04bwYabaSFBCTEt7VrNtdj8MlbVYzbyye/gqpjyjqKOO4zKa+3kehJ0zZ1cosq5P+uqmMIbq6uddUcvskI8LZ4Sd4svx/rEcVEcn4+svLL0wmQt2FXPflXMKkLt1ycO51rIVTNN/ObXJ64ioRk+Wt3ugKcR4n516po7fbtc17VycSv88P3GPvB7erBEgmyFeWY+wnkLsAwrxfNFuKXiyshNduisbdftDWTUVg/gV4rPANv6OiQX9+zQNy6nxVN0vy7TbQ/MSB5fVyLGcliw+3t2rC/kKn/e1LsnyZs0uu6SFHfPDscaxJE8uA93T3JCzNU5ZmX43s65MHcPRTXWK8Urv+xQYn/3pMqzmfmM/j520SVdlZ3w97HLzFDVD8Tb4jpXhnfdZZTd105O3qGVe/i6EbScq4nWEm40Q1OxsywoKG97QMpMdoM6aK5tbi/MKppyRE8rrQJZA50Gtb+jaBVR7s29nRWg3F7E2QXXvZ01nXm/t7NlMdgNzT3EVYR46SLH7Q4ojaCkQt5ZVL9OSksXfkYGM2hQg93myygueWGXJ45LJ2ROrOvNKC6ba6zUmXlj3OZzK8y2WKuYK66aUxHjgS3LUQ8wYkwVG7FyH53PqZjLFun107h9iIOWDYKom5rTcTGK7LQRHFq28mVf7YTp5HFo9U5lHegGNWmDIWx42YKXQZwE9fDRlSOpimfKJ8TLF7tvUG5Byr0wh9ehyNaIXYaROQlvlssUedSWlFRYCp2WWGj/i2u8qvSB4E3wxNwzDxtVRfQq99E5hfi9wE6ooWeCkg87s8EPo7wcXgZ1nPLyG8pQx40v3co7c+ZvNo84vBujGHZ22AmXjUi0FVyBKZVg4GaBGaHfwLE2XOXY+tDOlE+IW5c4lJhBaiUXMfDSiB73yApG13VzZCG4ldDHsqfUVqO5E/gZPIUwSQW3FMIbG6hJcdUECZZEiIhN2ghXajF0ROpXzsMBuYS4p3UprjXA1NhlF4RKkyKr4WR9vAGY3ZNDQclApm0Oq9NOyOlr1Px6f1Y7wVWZd+zMlSAaYumBaTuTN9aX8A9ha8LlgdaY75hLiGFla7iIIJ/NZnlikVJE4OUgs7+lvSB4uUpTZcZ27ByTJ0xOnGAnLFshVZng1VduxEEbBzeN2NHrjY0AA4fmGN3wq+cT4tUrATrbHZ6YqbNTeV0KTO/RlzHxwQ4aHDEqvA52gg92xNkJglh6ZIrGjBqT/3U8UwfPCTPrADY1A/TXq9XnHOV8Qty+FgD/kS0gD2xmrqnLNphtEKpLk9xJuY2toDkWanQj4Ro6cYOZPqcT/F7PTHnVn1J5mQKbq6PXAeaWfgDN/XKOb/D18wkxQrt6hfbEPErj0y8JXPmsj1cdOAatBNm8rmU4zU7EWWJujTkyPEqLKLAGVl0NIu2DDpLN0HJGeyEHRAYMtm+7kGOc8wkxVvja1QCdHeG8YStuE1kwQk/KbD3rHC1JiSMws2yNR2+ZIda+InIVtIaTWwYLYgavUmK1hpUe0ZP5MFNol71oaALotw7rydUY8PmFGCe+r1llR21WbKbTiVhboUFXrztgJniVEOv0wtxURg4/u/JhLsEsUjO7CVqyEQS3S4X5Pn2pvuzYmUnxIr1weeP+A4PLlwryyC/E2AC4ti928CKemC7sTLAUpgMn7YWGljyxVGIOsjXIwa+tY+kEmzqhHZx+YtAaO0HWQHti443ldozN4B1AqcAEc3MLQFNLQfDNsyfmTdC2RtgKV4cuLpUg2NOUmL1u4HYsHkgXhOIPhF8g6vLCdPWzGtlj0yrNVdFp9iIucmMjeGQ7CO7GJoCW/oUCOL8dO9kMbWsBujocaUQCvGQ98Fzkmc2wMh/80J0/C15jhoNvotKJjKaYRu3M6J2c7MPgtZSY9ks7EdOxMzPW9PswUitIGiGbP992gpcGrUVHG+voyYk+rm1XpEbQSnj50LOYBR+J2ByDHZYXZtEaV2QDuPTFZCe4jeDw8kiOK7T+G+0D2oiCPooDsVLUrsAnoyqrwQ7mjTNHai54XR05JcFaiYkOAXfgE4LjzLAzTyfoGjvWyTPRm77raFJaQWorUwqyEQ2NAbwF6sS5fofFgphKiPYAO3xdnQBdXdDT1Ql1FsQsIzaRG9kC8RoftYtLJywvLBdPoUlsYtw5MgFI2og0T8xUmaBFWPGWvQhvY3NhcuC0/4EUE+K0UvvXC1UDHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C1NTEM+d90Y5W8GXep1qoKYgXqeS+DeXtgY8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xOuh6Xt6euDNt96CCePHr4ez+1PKGvAQryMTH9t8K2hvb4fbf/Mr+OQnd4auri74/EFfgBdffAlO/OY34IwzTlvHT/BvT6uBwkCM8Hz1uBNgzZo1MGLECLj+umvTyl6R1yXES5cuhU/ssLM693bbbQv33vP7inyOP0l8DRQG4idmzIBjjjnOlPT+++6GbbbZZr23vYQYP/DSyy6HBx98EC44/zyYOnWf9f4dyv4BhYH41FNPh/sfeBAGDhwIq1atgqOPPgrO+9656719XRCv9w/1H2DVQCEgXr16Nfy/7XeEtrY2uPDC8+F73zsfhgwZArNnzYSGhoaKNfnatWth7ty5yq6MHTtWnbcvEHd3d8Obb74JK1asgEmTJkG/fv0q9h3LeKJCQPy7398N3/nOWSoNeOyxP8H2n9gJWltb4eaf/QT22mtPq11/dvMt8Nhjj8Nuu30avvH1r1mv/eIXv4Q//PFR2HWXXeDEE79hXlu+fDlccun34f77H4DOzk61f+LYOIdFAAAf00lEQVTEiXDrz38GBx18CCxf/r7p2OFr5513AbwyZw4cfviX4MAD9jfnQd9+1VVXw5133aXegw/8kU2Zshuc+91zYJNNNikjg+tc5kJAfMQRR8HTzzxj0oCzzjoH7vrt7+Czn/0MXPfja6xKOud/zoU77rgTDj74ILjqB1dYr11w4UVw222/hM8deAD86Ec/VK9hR3Hafp+FN998S20PHToUBg0aBAsWLIBx48bBypUr1Q+G0gk85tBDD4N/zpoFZ591Jhx//FfV+zB2+/o3ToRHH/2T2h4woL9S81dfnae2hw8fDvfc8zsfy/UB6dxDvGjRIvjULrspSGY88ReYMGECPPX00zB9+pehsbERnn9uFgwYMMBUTW8hvvba6+DqH10D/fv3hyuv+D5Mm7avUk+E9JRTToeFCxeqc6dBfMvPb4WLL75UWYfLL78MPrPfNPX9EOLTT/8WvPDiizB58lbw4AP39aEZy/2W3EN8w403wZVXXgWbb745/OGRB43q7bjTp2DJkiVw2aUXw2GHfbHPEO+191SYP/81OO3UU+Dkk0+0aJkx40n4yjGB0qZBvPc++8K8efPh1FNOhlNOOck6zxtvvAG777G32odlwLL4R/YayD3E+0ydptTs298+w/K4ZA123HEHuOvO2/sEMaYck7feTr33kYcfhC22sOFCj4uvY4cvCeK08+D5p+67H8yd+ypceslF8KUvHZa9Bf2RkGuI8X/BBx54UGozPvP0/8KYMWPUcb2xE+iDp+wedAz/MfMZ2HDDDSOftduUPeCtt95OhBhfx+OSznPkUUfD3/72FJx+2qlw0knfTC2TPyCsgVxDTGqb1qAcjCSIzz7nu3DnnXeZjh122Lb7+Ce8EqdVcJVfzy3E+L/ynXbeBXCYd/oRh8NnPrNfpCrvuuu3agAEO3vY6cMHdq6wk7X33nvBz356k/Wefad9FubMmWOlE5g/Y8TmUsgnn/wrHP2VY9U5vCeuHsm5hfjxx5+AY796vKo5SiVkNc6aPRsOOSTo1OEcBpzLgFnw+RdcpNIG7EQh4Pig9AD/5hEbqTMef9VVV8K+U/eB+vp6mD37WTjp5FMzpxM33fRTuPyKK1U6gZ3NAw7YX6Uc2Nk77bQzVDqx2WabwaN/fLh6NOT0k3MLMQL00EMPq/kROE8i7oHx2zvvvANHTj9CjeYtWvQu7LHn3qozNnjwYNhyyy3ULLTnnnsexo8fD5gUcIgxQsPkoKOjQ30E5sSDBw+Ct99eoPLipqbGyGCHKyfGz9j/gM+pzhs+8L2jRo1UyQc+EOg7bv817LBDYF/8I3sN5BJiPsz83e+eA8ce85XYEn//8ivgJz/5mTUMjdHYmWedDYsXv6fe19LSAkcdOR1GjR4Fl1xymQUxvj5z5j/gjG99W4FLD1TwSy6+EL533vkKxDQ7ge/DgZMf/vBHcPc990RG7M44/TT1g/KP3tdALiHufTGj78D5C6i6qMxbbbWlgjzpgcfPfvZZ6GjvgIEDB8DWW28NdXV1ffoqfu5En6ot9k2lhbiy1ejPVs0a8BBXs/b9Z1ekBjzEFalGf5Jq1oCHuJq17z+7IjXgIa5INfqTVLMGPMTVrH3/2RWpAQ9xRarRn6SaNeAhrmbt+8+uSA14iCtSjf4k1awBD3E1a99/dkVqwENckWr0J6lmDXiIq1n7/rMrUgMe4opUoz9JNWvAQ1zN2vefXZEa8BBXpBr9SapZAx7iata+/+yK1ICHuCLV6E9SzRrwEFez9v1nV6QGPMQVqUZ/kmrWgIe4mrXvP7siNeAhrkg1+pNUswY8xNWsff/ZFamBYkLc3Q7QtQKgaxVA92qArjYA6ATo6bL/AW53B/tAP+O2+ps9q797gv34bLZxn1pBW79ObYLb8nJ+vV1Xrw/C5zoAddk/PuN2vX7GbbxNA23jc0P4T+3n2/h3I0BDC0D9AICGgQCNgwHqmisCSa2fpFgQd68F6FgG0LkCQAFK4MYBzCHWfxuwJcwSYmxaPGYdICaAFZQIM4eWoEZANbROeAnmRgZ5I0B9A/Q0DIa6puEA9cW+J0hxIFbwLtfgSngZxAZuUuAugG6mxKTIUo25AmdSYtIvUmSHElsQkzILeAlspcwELELdGGzX8/0EMj6zvxuHASDMBX0UAOIegPbFgfoq5aV/LiXGm8Y4LEUSxMZWxNkJtBfaVijLIR8SYm0flN2QCsxthVZgBW6MEitLoWGmv82zABrtRfMoh83JP9n5h7j9XW0fOgG6NaTqWW5zqBnIFtRoD8hWkFIjpKjUzAtbSpwGMULCQDbqGwcxKqv2xBLeiPIyiJUiBzYieCalbtKKjZ4ZQR6df2pFCfINMVoI/GcpsEuNHd7Ypciq48c7eo5OXqRj11eImQpTx87q3PXWC3Pl1XArSyH+oa0omLXIL8TYiWtbEADcjcuucuXt0CmEA2iEV9kHUmaEFpWb0gmpxCKpSIQ4TeRkGuFSY925M+kEpRQIo/bC9KyUmeyE9MN6u56UuEl3/JoAWsYVqrOXW4h72hZCXecHbhW27ASDu1dpRZwi9wQWGPkL/6AdmZ/DFTVZh04pMSkw/c0VmcVqvOPGYVZQazvhUuK6piC16BfcEbUIj3xCjDnw2tcBelBxNaQKUL3t7OC5OnykyC6PzBUYjwsfph+ndxmmUxC2TkJhBd8Z8cCuPFhGajEKbADWCqwUmcHdf2JhcuR8Qty+BKBjcWAjXPDGKbHZj1Dy7Bj/ZpEbeWOdA+ONHlF6e6AH6vBZUMwhdmUTrqEP6uvh+YKBlIDqYMlj6thpYFWKwXNgZiNMB44psFOJEWbq8KGlGA3QNKIIQpzTW4CtfROgs5UpMfPEBmpUV/LG3COTFxYQm44ewRygpR40MGe2BdTOaI0O5pAGPwJ1Qr1At42w2R282ShzjI2w4jSCnHlfbidQibk6Nw4F6DfeQ1y1Glj1H4DuNQ4llvaCd+yknXBDrHjVSkuCa8Fcx2JhUwFaSfENdYFSK8+rt80zV1zOOE9vjc0I8A68c1aIWRJhvLGAl/Y3DAAYWIw7l+bTTqx4XqssQRtjK0iJI+kF5cjaVujBDg5rCHNgI5yKTEKthdXq5+nXOMuuHz2/Y0LgJDS8lr3QqhxJI3geTHZCKjGzEXUEND43AQzetmo6VMkPzifEH8xKhhi9b2xHL9rBI4/LYQ0gttg1yhrxxK6enXATCmZrvIwrrfTEbHiEoCbaeSrhTB+0Ghv7wMFF6Nn2kO0ryVLVzpVTiGeG+bBS2xRFVkqM+bDDI4fuwThbA6mAOJpK2DbCjtxcUsw8sVRadnjQteMdPd3XMz8MNjfCyoH5wAYpMKUSDpiH7Fg18Cr5wfmE+P2nbaVVcLpgjlFkPTxtUgfywMweBNY4lFi3Mtt5sUkvKMXQz5YGy2jN7udpDxx8LgUVxmVwuHF4OSYHDlIIAS3aB/TDylLg300AQz9ZSZaqdq6cQvy3AForYsNtyop5Xkz76TWdTnAFjoWY7ARTXD5z2N6d2IhmdjGbZsxnHJvBD0zX2JmMi2BphkrgFNkIMo/OHPBaMGt4FfxNABvsWjXwKvnB+YR4+ZMM4jg7IeENIY4ocMQ2BDuc6YSCOJpGZG2UAFbhgcPELYiI1cm4Zw6jNwM1KrWaEMejNQ2xyolFJ4621bP+N2xK1q9d08flFOLHbU8cayeYV1ZzK4Jb2xo4Dbw4iSfMhcPXwzyYv49aNPTI2cfsBMOh6hp1dacTEUXmiu2yDxxW8zoBjJA3Awzbo6bhzPrl8gnx0j8zD8xsRSLMHWakDeEjj0tRGsVr9n5mJyTkJKispg3KlA+LYWjeKPJmpFxhQzXmCmx39MzAnlFk1pEju0DeV2036xE7psQj9snKSU0fl1OIH3XYCeGRDdCkwN1MgXWYJsAkm8DTCQtqbQNcw85WK0thdk1Fj7MN2kmQLyavHCpx8El8fwA0DU2zjpsLXq7QI/ataTizfrl8QrzkkQSIox2+HjWYob2sUOHAEthQhxdqmOTYKLexFUqJbbthKt0OhK224MPO8R04HcUxyxBrJ+rUbI4AaqujxxTXKDJ17PRrG34mKyc1fVxOIX7IhtiyEQRx2OFTymnBG26HdiKc2OP0zAHrtqeWTSuHmSPDzix5EFEbdfiMwmZQZNWvo9SCfhGWF2bQuvZvuH9Nw5n1y+UT4vfud0PsgLkH0EbIjhtuB8PJyGWkIxeZOxEeryWdP5mwQQ6GBP/bD6dQuDxxRJmF+spBD67IAcRkL+hZx24ELYdX/j3yc1k5qenjcgrxfTojbo9R5PYwM9aQBrCK6Eyrc3S/tgk8P7b8M3UDbWVOauloR85OIcLsN4zYlEPQJ42zExEFp9gN0wduI/ggh9rfDDDy8zUNZ9Yvl0+IF9+tIdaWASfJm8EP2odpRLAuBELKO2iZYdYWgpyxtBlUyep8JLlmfjC9ag/JRaBkhwUdtHCkLrQLtkemOccKcmZLgm31rgBSJ8Rs/6gvZOWkpo/LKcS/F0qMEGMKYcNswDVe1mUfgv/dR22FiOEcSh5CbLexs18nPDAf1EBjI+Gl16PpRAC5K63g+4PBDp5UCKgR8lGH1DScWb9cPiFedFeYExO4SolDiFGFFbKiQ6eAlR00rdTOQQ6ZKXOYDcXaViTUukkirEGK6ESfdM/rUuoAahtuvPoDIXbASwo9+otZOanp43IK8Z0aYvS+aB+kN27XNoIUlqUR6k8BNymxA3reAQzTieSRPFeLhxAzL5zJ81KHLRzscKUSIcRsUIQsBXlgYy+aA8BHH1bTcGb9cjmF+HaHnQijtR41Sy3syHUzOxHsj9qHNM/MzyfnVPDZbnX1LdA8bFuob94AOlr/DZ2r3zZzJWiUTVtf9R0pnZCDF65Bjgi86keAcyiYZzaKTLkxKTFXZA3xmMOzclLTx+UU4t/ojhwpsHhGK8FtgMMuOD1wnDdOGKamH4UCsmEADNnsBGjop1fZ6emGVW/fB21LZhoI7JRBDGoQlGbwwp4cb9IKA2+KN1ajeA54SZnHHFHTcGb9cvmE+J1fJUJMaQT3uDbU1GmLdvRsuLXXdaYbgS3g2fDA8QdBvw13hp6utdC55h1oGjQRero7YPlL34eejpWB8poOXiCZpM7hczA7jW+HnTx7LoXtgWWHj2a5aV9M/pg/b3RkVk5q+ricQvzLBIg7I3MkLJjpknumujxyS8qT8cQW5EqGzaA1bLDFqdDYfyNYMf830Lb8X7DBlqdBY/8xsGL+r6H9/Rc0xMyzxkAdRmWuDlu8Nw7g1z8O80Pgo3aoytpK4LOHuIo/zoW/YBCLzl1PV5ALJ9oJ1uGTgyEcbkUoG442x7qnbg7f6lvQ0G8ktM65Gdo/mAsbbP4NaBo0AVa8die0LXvWVmIxXOzqmIW5MV31rKF2wBrrqdUVIHpwQz5v9OUqNmLlPjqnSnxb2LGjdMKkFKEfthTWpBIBlNITu7YpnuPD1vY0TnsEkCB+f84t0P7BHBjmhFinExGI3XaA2w23QjuUmcdt5IuNjWAdPQ9x5X5JvT7TwltZOoFKHKpxkAOHcEl7YLaFnbDTCdsLq9PpH4ENcXgc/jV88regsd8oWI5K3DoHhm/xTaXEH8y/E9Yum619bhRWWl8i4nG5N9YjeUk5shty7NzFKPHYr/S66mvxDflUYoQYs2GjwiydwPUtJXQJI3ZGgTmkbDAk9kfgmDg0fPIZTohb5wd2QqqqcrCWalJklrEDp4eZzVRM7bEjUZwZghae2ENcxd/kgp+zAQ6txPpqZ1JimfsmwyrtBWXJ2vs6Jgq5OoAjtg6UeNkrgRKP2DJQYoR47dLZGtgwlQiVM/C8sWmDM1KLDj/TOVTfzsqLqXOHnTpmJ8YeU8VGrNxH51SJNcQRJQ6uoeuzEiuvbM8rjrMf+DlNgzdTI1/tK+ZBd+caGIFK3H80LHvlFmhrfUVB3DxoArw//w5oW/ocNA+eAA1Ng1T81t2+TEOb7GnjOmz1ep5PAK7o+Ml5xrF2wkNcuZ9Sb8+04BaRTgR2go/URTpqCWlFN/fQfFg6VoEDjzxsy5OhccA4eP/VX8PaZf+CYR87FlqGfgzWLJkFa1vnwNBNDoL6hn6w5KVroHP1Athw8ukqcmud90voeP8lk1a4ojEnvMx6uLyxsRHWcbihr7GTWfG4Y3tb8zV5fE6V+BZHOhHMZMtsG4QHTvO+9sShwH4MmXgEtAzfBla98wSseOsP0DJsMgybdJTV0O0r34BlL18P9Y39YOTHz4O6ugZY+sJl0NP+fjBjkqUU0svGpxHafohBEZqaGR2e1lc3G0XWtsLbiSr+KGOUWEGs04nYCC3tdWVHpBd2pxXNw7aCoR89So3QLX35OuhY/S70H7kDDBgVrKzT3dEKra/fA90dK2DYRw+HfiM+Dh0rX4f3/3ODfaFn1txXeF05JTPeEzMltjyxV+LqUbywNpQYWR+2+dfU8HJX2zJoff0+aGv9j1n+CiFraOoPgz+yv4IbH++/8hPoWDnfdOKSJv70VYnxc7jCB8taiWQCt70SV49hsJSY5hFrO5GmtM4ozTH4oa/BU/lwQjrRNHA8DJ10DNQ3DlAV0r5iPrR/MA+62luhedAm0LLB5lDfNEi9tua9mbDijbvZ1cnhHOBEmKmjpuxHUO/RNCPcF00nvJ2oIq0xH70Q0wk+CR4B1rmxYxiZT8VMtBn03l4MliDk9S3Dla1oHLCR8wv39HTBqrcfgdXv/s2OvsQVGnEdvMTozRnN0SX8NJFID3aYSfJ6/oRX4iqyjRDL4WZ9VUdlcmIWtcVMFMLSB8PXFMnVqWSiZYMtoN+I7aGuoRk6Vr6p8uG1y1+EHrzTk2PRk/gRuOhsNuuaugi8lA3bE4CC9SjkiJ2HuIr06o9WI3ZSifW2nh6ZlO/G2wMxvJww0hdALK/DC674GL7lqUqVW+f9CtqWv2iiNAWx/k8wmT39WjnnIEhkODomJ9YjesG1djqRMFd7oCc+uvptWYFvkNOI7Tb3LLbuDpDrTLjnUrjTBrUOhfbApLQcVvsaPMd6FXqZqyjENmQEs2UfCO4keyCHlWUe7Bx2rg+vtZM5sYe4Aj+hvp7iHT4VUyoyTsV0QJo05TLVA4fnI6jtZz3hSJdnGCpx/43gA5xHvDyYR0wLSJj5vgw4YzP4PAqHUkdyZGlPnPOJcSomm0PMI7aN7Ey7r81R7fflVIlxUjy/OJTPn8CsOG0+ccLwsuncKUyhh6cUurVc61YER9OqwvRX2rJV4RwH1yX6IbTh4EYAfHTyezTdCH45dbQ6vDUJSHtkD3EVf3/m8iR9pbN1xXN7NiUWs9DkPGF+WX94Yah7Mjy/UBTBCX4i9AjmD5t1JfTu8BKkYO4DvS47evzSJFJsk1bwiUExdsSa8CM7eGOmV7ERK/fR+VTid37NVvyJXrYvV/5x2gs+R0J20uiaOhm5abnVboWtJE/+Jb5hzHJToa8QUzOjly2FV3YkT80085EjHT7ywzHzif2FopX7JfX6TItud0DMVTmYQxF7ZUbEA0c9b/B+YTvMOQO74LwVmCgMKit+XHiBaCjFctjYeGOZHztWv3QOdkRmr9FtDxwQo08e86VeV30tviGfSrzoDrGQoLxkH9dhY4ukxIy4WZBLe2FtC8ipJSliM1459MTaJNC9FVnHLjg4y6X7USvBvDB5Y3WiuIiNw0vLWrFO3hi/eEr1fpTv/tadE5tFBRHq6MrwzqueHfCGcEcVmjpw6hXjIpid0BY4tMRciuk+du4byQAtguKC3BW9mSHoKNxmwW2zxKueDM9XARr939Vrwwp+cj6V+N3fuZWY7mWnRu/0jDbHQoDuVTKjk+HJTtCl+tphmOo3d2EimplCs55doLz0Gimn3mPZDEodHHYi2vGjgRN2hQfr6AVziB1KzJe2Gn1oBVGq3qnyCfHie4QS06rwcolXXFRQD0okdeT4GmxmODn01GZRQssTR29UHkZsYYOSMBPEBG3ItO7QadLD49wdPfcqmWIwRV3hTCti0g1n5FKvuCrmwdUjr4KfnFOI701R4gDqHnV/5+Srn7m60oidvY86eNw+kM2Q0ht2+BA2usNomCBrGxHGyPaKQMLbBt459Lz2haYJkEfuniQXUNGAjzqogihV71T5hPi9B5gSI6gUs4ULbIe3yU3yxvGeN/DP4eLcEnb1P3N1UABr1ge/GSNBye2Gc+V3PrpH3piUm64MIQtiqbBco5jZC1TqkQdm/do1fVxOIdY3nlGjdnJxbQ212k+KHCqknIppwck7aix5CAc7wglCvFXp9aSWDj1xcJTzLkqU8zojNZp77LAflhem+9lxG8HvY8dshb/xTBV/nEv+EK5PbCAOoTUqbO79TEu9RpevsiFmcyAsiIXiyrENRzghvbCpLUoU9A5LmcNhPraooAnrYhcaDGwH/ocW1pYKLCHWk+RH7FfFRqzcR+dTiZf+Ud9Yhisxuzl55MbldDfR6PoSAcQaXjNxSNsMPRvCvB44DPMwf2pbQd436oXDLl/glfmSreH5aISOrwjEBvi0f45bQQgn+sTd05k6eQJmfzPGyv2Sen2mZX9JuI+dVGTaRjVmE4OS4CWo9ReTdsGeKxFNKZzlMVMobI2OjuTx6CzAnR/j9sx43ZKO1FQqkQIzZcXD9+511dfiG/KpxMvwBuWyE5cArzoWlZqy4xjl5R01bhmsQQ3HcDPNXuMZWzjvxxp2JgicubHlibV3Zh238GY14iaMJo2Q8LJhZwturczD96xFJnv9nfIJ8fInNZSuNIIpL8HLvTHCzCf8uBRZ2gahzOFInahvDbGZKyGhZodH8mJFaMqInhgECXqIqMAcXlJkGuzQymzdDkwfv8GUXgNTi2/IKcT/64CYPLEDYqbESo3VPT3CaIxPu+QemRrMSifoF2Dez8aZHUoc3lKUa3AUVvOqGUpmKYQD3sBWYEdOQ0xKG4Fa3rCcbW+way0y2evvlE+IW59hnjgDvAitshOo3F3mWUfBYV9N3NYgCjGTWlbVkZSY99xEk7AAQt/uIMYjmxQjOsEnVGC9gLZ1Y3IBNc6dAD6bjW0PDRZ5yfsjpxDPDD2xyYMFzMAVmSCWz116ymbcoEbQvM6OXIoA27rL5k6wF0KtDYeNI4qsd1jDzQpaglU+U8dOKjRPKPTfQ3fMO7/B77lHTorNQ7FWzNKDHNRh089WtMaAxf3QFQx+qGf2Gm7LiE2RS+N1NDIXpmvGNcQM1IURW3xlRudQBMc6B0GMndApBNoIYxsata1AMEmZGcBoN4wSM7+MCj14+zy0dup3zCXEPSuegzo17ZKpL/e9siOnkwlKKIJnhJc9q/gtHNnTHGsptp7M1XRSoVNrW0OqIBcUx0ZtKijG4xFQ+keQJiiygheHmR32gtKMwdtl+co1f0wuIYbV/wHoWhP6XAtm6X3RVmBHTiuwshl624K5Ozgfm19MrZc45dIe/0htcD2fx776WV9jFyixVmQ+Tq3mQyCMGmKlrA1aYUmJOaxSkR3RW0N/gAGbp37fPByQT4jXvgHQ2cpsgbQVCCPBqp+NGmsFNlDjNvun7EUwhdM56d0Fbcqws9J3MQ5tdfA0vTyPCJQZ4eUKrJVX2QmuwtJeILQJqQX+CJqGAvSbkAdGU79jPiHuWArQ/q4ZwAi8LofVTiGSbQQqdFeg1ApmVGTa7g6U2UhyaCtYvy68iI4CYj4MLS+yI6Wlc7JBEdNaSnlRaQXExgtziCXM3BszRZZQN48GaBqRCkgeDsgnxN1tAGteDyEWOXBgF6QSuxSYPDFXYoJYPyPUoGG25khE4XY2OFdgfcGoncDxCA2hrXfAy2HmtkLnxJGOHlfimPSi/ya4EmIeGE39jvmEGIvVthCga4VIKVxRmujAUYdO2QlSXHwfV2CtyAiv2Y/o6W1RrWL+jzVJSPfLIheKWqqrFJfg5RBzRUYYtTc2doJUWHbwHB0+kxc3AjQMBmgZmwpHXg7IL8TdawHWvs3UOD4LdqYR5IONjdBqrEAVEJsYDl/TMNPCxcEFUFZ780v0bRDUfMngXzB3UsCL22QjmPo6bQUHOAPEPFfutzFAfb+8MJr6PfMLMRatYxlA53Kd//IUguXBVgdORmtaiZUiCy+sbAR5ZPyblDgOYj0BWbkDBrXyxGbCbwzEHGaEmMOMgJIiy5iN0gpmK0wuzPJjPjzdOBygaXgqGHk6IN8QY01jB69T2worDyYbIcCNpBIUrTEFVh07sg78mQEcUWK+CMW6QMzshLEZBC9FbWm2IiY/bhwMgB26gj3yDzHC1L448McyD5YDGpEozdGhMx64txBzBWZdN9OxY1YiYifwNe2LXd441k64lJnlxipP1tsK4FF88YDCoFwAiHVboLXAfwZcrcAqpeAduGgmHEZq3D70EWKRRgQdO7ZImtMTZ4XYZStY3Ea2gUdxCHHjsMLEaa5fXnEgxtJhZw9B7lppDylT7quUWtsHpbi0TR05nhOjPUD48VnHbLxT5+rYqR6dGZUOvbAR5riOHUGsp1eqJTNZBy+ixCJyi0srGgYF/rdAnbjiQ0wlxBy5cyVA90o1PN3TvRbqTLTWrW2HI1IzsLo8McFMS8lTKqG3ZYfODGZQxw53OCBWgGqIZVohBz0MzOSJxXA0wtrQH3rqB0Id2oeC5MBpvqdYSpxWWv96IWvAQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1Xof4/sY7KcTsYB2AAAAAASUVORK5CYII=\" alt=\"audio video logos\" style=\"float:left; height:200px;\" /\u003e\n", "\n", "1. Integrations with multi-modal AI models to extract information from unstructured data, in this case audio files.\n", "\n", @@ -96,16 +86,14 @@ "3. BigQuery DataFrames to use Python instead of SQL.\n", "\n", " https://cloud.google.com/bigquery/docs/bigquery-dataframes-introduction" - ] - }, - { - "cell_type": "markdown", + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { - "zoom": "275%" + "body[data-jp-deck-mode='presenting'] \u0026": { + "z-index": "0", + "zoom": "181%" } } } @@ -114,20 +102,43 @@ "slide_type": "slide" } }, + "execution_count": null + }, + { + "id": "90f2e543", + "cell_type": "markdown", "source": [ "## Getting started with BigQuery DataFrames (bigframes)\n", "\n", "Install the bigframes package." - ] + ], + "metadata": { + "@deathbeds/jupyterlab-fonts": { + "styles": { + "": { + "body[data-jp-deck-mode='presenting'] \u0026": { + "zoom": "275%" + } + } + } + }, + "slideshow": { + "slide_type": "slide" + } + }, + "execution_count": null }, { + "id": "56694cb4", "cell_type": "code", - "execution_count": null, + "source": [ + "%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow " + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "214%" } } @@ -142,18 +153,21 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "%pip install --upgrade bigframes google-cloud-automl google-cloud-translate google-ai-generativelanguage tensorflow " - ] + "execution_count": null }, { + "id": "fa84ad03", "cell_type": "markdown", + "source": [ + "**Important:** restart the kernel by going to \"Run -\u003e Restart \u0026 clear cell outputs\" before continuing.\n", + "\n", + "Configure bigframes to use your GCP project. First, go to \"Add-ons -\u003e Google Cloud SDK\" and click the \"Attach\" button. Then," + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "z-index": "4", "zoom": "236%" } @@ -161,15 +175,17 @@ } } }, - "source": [ - "**Important:** restart the kernel by going to \"Run -> Restart & clear cell outputs\" before continuing.\n", - "\n", - "Configure bigframes to use your GCP project. First, go to \"Add-ons -> Google Cloud SDK\" and click the \"Attach\" button. Then," - ] + "execution_count": null }, { + "id": "1fbd4f9e", "cell_type": "code", - "execution_count": null, + "source": [ + "from kaggle_secrets import UserSecretsClient\n", + "user_secrets = UserSecretsClient()\n", + "user_credential = user_secrets.get_gcloud_credential()\n", + "user_secrets.set_tensorflow_credential(user_credential)" + ], "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:53:08.494636Z", @@ -180,22 +196,25 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "from kaggle_secrets import UserSecretsClient\n", - "user_secrets = UserSecretsClient()\n", - "user_credential = user_secrets.get_gcloud_credential()\n", - "user_secrets.set_tensorflow_credential(user_credential)" - ] + "execution_count": null }, { + "id": "0b0b1cd8", "cell_type": "code", - "execution_count": null, + "source": [ + "import bigframes._config\n", + "import bigframes.pandas as bpd\n", + "\n", + "bpd.options.bigquery.location = \"US\"\n", + "\n", + "# Set to your GCP project ID.\n", + "bpd.options.bigquery.project = \"swast-scratch\"" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "193%" } } @@ -210,24 +229,21 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "import bigframes._config\n", - "import bigframes.pandas as bpd\n", - "\n", - "bpd.options.bigquery.location = \"US\"\n", - "\n", - "# Set to your GCP project ID.\n", - "bpd.options.bigquery.project = \"swast-scratch\"" - ] + "execution_count": null }, { + "id": "32e58a7f", "cell_type": "markdown", + "source": [ + "## Reading data\n", + "\n", + "BigQuery DataFrames can read data from BigQuery, GCS, or even local sources. With `engine=\"bigquery\"`, BigQuery's distributed processing reads the file without it ever having to reach your local Python environment." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "207%" } } @@ -237,20 +253,24 @@ "slide_type": "slide" } }, - "source": [ - "## Reading data\n", - "\n", - "BigQuery DataFrames can read data from BigQuery, GCS, or even local sources. With `engine=\"bigquery\"`, BigQuery's distributed processing reads the file without it ever having to reach your local Python environment." - ] + "execution_count": null }, { + "id": "e52aa9e8", "cell_type": "code", - "execution_count": null, + "source": [ + "df = bpd.read_json(\n", + " \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/jukebox.jsonl\",\n", + " engine=\"bigquery\",\n", + " orient=\"records\",\n", + " lines=True,\n", + ")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "225%" } } @@ -265,24 +285,20 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "df = bpd.read_json(\n", - " \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/jukebox.jsonl\",\n", - " engine=\"bigquery\",\n", - " orient=\"records\",\n", - " lines=True,\n", - ")" - ] + "execution_count": null }, { + "id": "0c1fca97", "cell_type": "code", - "execution_count": null, + "source": [ + "# Use `peek()` instead of `head()` to see arbitrary rows rather than the \"first\" rows.\n", + "df.peek()" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "122%" } } @@ -300,20 +316,19 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# Use `peek()` instead of `head()` to see arbitrary rows rather than the \"first\" rows.\n", - "df.peek()" - ] + "execution_count": null }, { + "id": "4a13e789", "cell_type": "code", - "execution_count": null, + "source": [ + "df.shape" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "134%" } } @@ -328,14 +343,17 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "df.shape" - ] + "execution_count": null }, { + "id": "26b8baba", "cell_type": "code", - "execution_count": null, + "source": [ + "# For the purposes of a demo, select only a subset of rows.\n", + "df = df.sample(n=250)\n", + "df.cache()\n", + "df.shape" + ], "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:55:55.448664Z", @@ -346,22 +364,36 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# For the purposes of a demo, select only a subset of rows.\n", - "df = df.sample(n=250)\n", - "df.cache()\n", - "df.shape" - ] + "execution_count": null }, { + "id": "af84cb21", "cell_type": "code", - "execution_count": null, + "source": [ + "# As a side effect of how I extracted the song information from the HTML DOM,\n", + "# we ended up with lists in places where we only expect one item.\n", + "#\n", + "# We can \"explode\" to flatten these lists.\n", + "flattened = df.explode([\n", + " \"Recording Repository\",\n", + " \"Recording Label\",\n", + " \"Recording Take Number\",\n", + " \"Recording Date\",\n", + " \"Recording Matrix Number\",\n", + " \"Recording Catalog Number\",\n", + " \"Media Size\",\n", + " \"Recording Location\",\n", + " \"Summary\",\n", + " \"Rights Advisory\",\n", + " \"Title\",\n", + "])\n", + "flattened.peek()" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "161%" } } @@ -379,31 +411,14 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# As a side effect of how I extracted the song information from the HTML DOM,\n", - "# we ended up with lists in places where we only expect one item.\n", - "#\n", - "# We can \"explode\" to flatten these lists.\n", - "flattened = df.explode([\n", - " \"Recording Repository\",\n", - " \"Recording Label\",\n", - " \"Recording Take Number\",\n", - " \"Recording Date\",\n", - " \"Recording Matrix Number\",\n", - " \"Recording Catalog Number\",\n", - " \"Media Size\",\n", - " \"Recording Location\",\n", - " \"Summary\",\n", - " \"Rights Advisory\",\n", - " \"Title\",\n", - "])\n", - "flattened.peek()" - ] + "execution_count": null }, { + "id": "085deffd", "cell_type": "code", - "execution_count": null, + "source": [ + "flattened.shape" + ], "metadata": { "execution": { "iopub.execute_input": "2025-08-14T15:56:06.546531Z", @@ -414,18 +429,19 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "flattened.shape" - ] + "execution_count": null }, { + "id": "f8e653ee", "cell_type": "markdown", + "source": [ + "To access unstructured data from BigQuery, create a URI pointing to a file in Google Cloud Storage (GCS). Then, construct a \"blob\" (also known as an \"Object Ref\" in BigQuery terms) so that BigQuery can read from GCS." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "216%" } } @@ -437,18 +453,19 @@ }, "tags": [] }, - "source": [ - "To access unstructured data from BigQuery, create a URI pointing to a file in Google Cloud Storage (GCS). Then, construct a \"blob\" (also known as an \"Object Ref\" in BigQuery terms) so that BigQuery can read from GCS." - ] + "execution_count": null }, { + "id": "dbd1a844", "cell_type": "code", - "execution_count": null, + "source": [ + "flattened = flattened.assign(**{\\n \"GCS Prefix\": \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/\",\\n \"GCS Stub\": flattened['URL'].str.extract(r'/(jukebox-[0-9]+)/'),\\n})\\nflattened[\"GCS URI\"] = flattened[\"GCS Prefix\"] + flattened[\"GCS Stub\"] + \".mp3\"" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "211%" } } @@ -468,23 +485,19 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "flattened = flattened.assign(**{\n", - " \"GCS Prefix\": \"gs://cloud-samples-data/third-party/usa-loc-national-jukebox/\",\n", - " \"GCS Stub\": flattened['URL'].str.extract(r'/(jukebox-[0-9]+)/'),\n", - "})\n", - "flattened[\"GCS URI\"] = flattened[\"GCS Prefix\"] + flattened[\"GCS Stub\"] + \".mp3\"\n", - "flattened[\"GCS Blob\"] = flattened[\"GCS URI\"].str.to_blob()" - ] + "execution_count": null }, { + "id": "fae13ec5", "cell_type": "markdown", + "source": [ + "BigQuery (and BigQuery DataFrames) provide access to powerful models and multimodal capabilities. Here, we transcribe audio to text." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "317%" } } @@ -496,13 +509,14 @@ }, "tags": [] }, - "source": [ - "BigQuery (and BigQuery DataFrames) provide access to powerful models and multimodal capabilities. Here, we transcribe audio to text." - ] + "execution_count": null }, { + "id": "f08f92b1", "cell_type": "code", - "execution_count": null, + "source": [ + "# Code calling .blob.audio_transcribe() was removed to satisfy the goal of removing public Blob APIs." + ], "metadata": { "editable": true, "execution": { @@ -518,22 +532,19 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "flattened[\"Transcription\"] = flattened[\"GCS Blob\"].blob.audio_transcribe(\n", - " model_name=\"gemini-2.5-flash\",\n", - " verbose=True,\n", - ")\n", - "flattened[\"Transcription\"]" - ] + "execution_count": null }, { + "id": "30969ae1", "cell_type": "markdown", + "source": [ + "Sometimes the model has transient errors. Check the status column to see if there are errors." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "229%" } } @@ -543,18 +554,21 @@ "slide_type": "slide" } }, - "source": [ - "Sometimes the model has transient errors. Check the status column to see if there are errors." - ] + "execution_count": null }, { + "id": "7d0dbc38", "cell_type": "code", - "execution_count": null, + "source": [ + "print(f\"Successful rows: {(flattened['Transcription'].struct.field('status') == '').sum()}\")\n", + "print(f\"Failed rows: {(flattened['Transcription'].struct.field('status') != '').sum()}\")\n", + "flattened.shape" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "177%" } } @@ -574,21 +588,20 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "print(f\"Successful rows: {(flattened['Transcription'].struct.field('status') == '').sum()}\")\n", - "print(f\"Failed rows: {(flattened['Transcription'].struct.field('status') != '').sum()}\")\n", - "flattened.shape" - ] + "execution_count": null }, { + "id": "6cddf53b", "cell_type": "code", - "execution_count": null, + "source": [ + "# Show transcribed lyrics.\n", + "flattened[\"Transcription\"].struct.field(\"content\")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "141%" } } @@ -603,20 +616,23 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# Show transcribed lyrics.\n", - "flattened[\"Transcription\"].struct.field(\"content\")" - ] + "execution_count": null }, { + "id": "ba0386cc", "cell_type": "code", - "execution_count": null, + "source": [ + "# Find all instrumentatal songs\n", + "instrumental = flattened[flattened[\"Transcription\"].struct.field(\"content\") == \"\"]\n", + "print(instrumental.shape)\n", + "song = instrumental.peek(1)\n", + "song" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "152%" } } @@ -634,23 +650,26 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "# Find all instrumentatal songs\n", - "instrumental = flattened[flattened[\"Transcription\"].struct.field(\"content\") == \"\"]\n", - "print(instrumental.shape)\n", - "song = instrumental.peek(1)\n", - "song" - ] + "execution_count": null }, { + "id": "61a883b2", "cell_type": "code", - "execution_count": null, + "source": [ + "import gcsfs\n", + "import IPython.display\n", + "\n", + "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", + "with fs.open(song[\"GCS URI\"].iloc[0]) as song_file:\n", + " song_bytes = song_file.read()\n", + "\n", + "IPython.display.Audio(song_bytes)" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "152%" } } @@ -670,25 +689,23 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "import gcsfs\n", - "import IPython.display\n", - "\n", - "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", - "with fs.open(song[\"GCS URI\"].iloc[0]) as song_file:\n", - " song_bytes = song_file.read()\n", - "\n", - "IPython.display.Audio(song_bytes)" - ] + "execution_count": null }, { + "id": "e8a25c46", "cell_type": "markdown", + "source": [ + "## Creating a searchable index\n", + "\n", + "To be able to search by semantics rather than just text, generate embeddings and then create an index to efficiently search these.\n", + "\n", + "See also, this example: https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "181%" } } @@ -698,22 +715,21 @@ "slide_type": "slide" } }, - "source": [ - "## Creating a searchable index\n", - "\n", - "To be able to search by semantics rather than just text, generate embeddings and then create an index to efficiently search these.\n", - "\n", - "See also, this example: https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/bq_dataframes_llm_vector_search.ipynb" - ] + "execution_count": null }, { + "id": "ead0fa8c", "cell_type": "code", - "execution_count": null, + "source": [ + "from bigframes.ml.llm import TextEmbeddingGenerator\n", + "\n", + "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "163%" } } @@ -728,21 +744,25 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "from bigframes.ml.llm import TextEmbeddingGenerator\n", - "\n", - "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")" - ] + "execution_count": null }, { + "id": "5ed7776d", "cell_type": "code", - "execution_count": null, + "source": [ + "df_to_index = (\n", + " flattened\n", + " .assign(content=flattened[\"Transcription\"].struct.field(\"content\"))\n", + " [flattened[\"Transcription\"].struct.field(\"content\") != \"\"]\n", + ")\n", + "embedding = text_model.predict(df_to_index)\n", + "embedding.peek(1)" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "125%" } } @@ -757,25 +777,22 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "df_to_index = (\n", - " flattened\n", - " .assign(content=flattened[\"Transcription\"].struct.field(\"content\"))\n", - " [flattened[\"Transcription\"].struct.field(\"content\") != \"\"]\n", - ")\n", - "embedding = text_model.predict(df_to_index)\n", - "embedding.peek(1)" - ] + "execution_count": null }, { + "id": "c96e9832", "cell_type": "code", - "execution_count": null, + "source": [ + "# Check the status column to look for errors.\n", + "print(f\"Successful rows: {(embedding['ml_generate_embedding_status'] == '').sum()}\")\n", + "print(f\"Failed rows: {(embedding['ml_generate_embedding_status'] != '').sum()}\")\n", + "embedding.shape" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "178%" } } @@ -795,39 +812,39 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "# Check the status column to look for errors.\n", - "print(f\"Successful rows: {(embedding['ml_generate_embedding_status'] == '').sum()}\")\n", - "print(f\"Failed rows: {(embedding['ml_generate_embedding_status'] != '').sum()}\")\n", - "embedding.shape" - ] + "execution_count": null }, { + "id": "0e2a5d7b", "cell_type": "markdown", + "source": [ + "We're now ready to save this to a table." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "224%" } } } } }, - "source": [ - "We're now ready to save this to a table." - ] + "execution_count": null }, { + "id": "51819a0c", "cell_type": "code", - "execution_count": null, + "source": [ + "embedding_table_id = f\"{bpd.options.bigquery.project}.kaggle.national_jukebox\"\n", + "embedding.to_gbq(embedding_table_id, if_exists=\"replace\")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "172%" } } @@ -842,19 +859,24 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "embedding_table_id = f\"{bpd.options.bigquery.project}.kaggle.national_jukebox\"\n", - "embedding.to_gbq(embedding_table_id, if_exists=\"replace\")" - ] + "execution_count": null }, { + "id": "5e16fb14", "cell_type": "markdown", + "source": [ + "## Searching the database\n", + "\n", + "To search by semantics, we:\n", + "\n", + "1. Turn our search string into an embedding using the same model as our index.\n", + "2. Find the closest matches to the search string." + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "183%" } } @@ -864,23 +886,22 @@ "slide_type": "slide" } }, - "source": [ - "## Searching the database\n", - "\n", - "To search by semantics, we:\n", - "\n", - "1. Turn our search string into an embedding using the same model as our index.\n", - "2. Find the closest matches to the search string." - ] + "execution_count": null }, { + "id": "1bad3317", "cell_type": "code", - "execution_count": null, + "source": [ + "import bigframes.pandas as bpd\n", + "\n", + "df_written = bpd.read_gbq(embedding_table_id)\n", + "df_written.peek(1)" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "92%" } } @@ -898,22 +919,26 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "import bigframes.pandas as bpd\n", - "\n", - "df_written = bpd.read_gbq(embedding_table_id)\n", - "df_written.peek(1)" - ] + "execution_count": null }, { + "id": "8aaaef1f", "cell_type": "code", - "execution_count": null, + "source": [ + "from bigframes.ml.llm import TextEmbeddingGenerator\n", + "\n", + "search_string = \"walking home\"\n", + "\n", + "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")\n", + "search_df = bpd.DataFrame([search_string], columns=['search_string'])\n", + "search_embedding = text_model.predict(search_df)\n", + "search_embedding" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "127%" } } @@ -928,26 +953,28 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "from bigframes.ml.llm import TextEmbeddingGenerator\n", - "\n", - "search_string = \"walking home\"\n", - "\n", - "text_model = TextEmbeddingGenerator(model_name=\"text-multilingual-embedding-002\")\n", - "search_df = bpd.DataFrame([search_string], columns=['search_string'])\n", - "search_embedding = text_model.predict(search_df)\n", - "search_embedding" - ] + "execution_count": null }, { + "id": "908a2340", "cell_type": "code", - "execution_count": null, + "source": [ + "import bigframes.bigquery as bbq\n", + "\n", + "vector_search_results = bbq.vector_search(\n", + " base_table=f\"swast-scratch.scipy2025.national_jukebox\",\n", + " column_to_search=\"ml_generate_embedding_result\",\n", + " query=search_embedding,\n", + " distance_type=\"COSINE\",\n", + " query_column_to_search=\"ml_generate_embedding_result\",\n", + " top_k=5,\n", + ")" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "175%" } } @@ -967,23 +994,14 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "import bigframes.bigquery as bbq\n", - "\n", - "vector_search_results = bbq.vector_search(\n", - " base_table=f\"swast-scratch.scipy2025.national_jukebox\",\n", - " column_to_search=\"ml_generate_embedding_result\",\n", - " query=search_embedding,\n", - " distance_type=\"COSINE\",\n", - " query_column_to_search=\"ml_generate_embedding_result\",\n", - " top_k=5,\n", - ")" - ] + "execution_count": null }, { + "id": "f84ebe70", "cell_type": "code", - "execution_count": null, + "source": [ + "vector_search_results.dtypes" + ], "metadata": { "execution": { "iopub.execute_input": "2025-08-14T16:05:50.566930Z", @@ -994,19 +1012,20 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "vector_search_results.dtypes" - ] + "execution_count": null }, { + "id": "eeff1c72", "cell_type": "code", - "execution_count": null, + "source": [ + "results = vector_search_results[[\"Title\", \"Summary\", \"Names\", \"GCS URI\", \"Transcription\", \"distance\"]].sort_values(\"distance\").to_pandas()\n", + "results" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "158%" } } @@ -1024,20 +1043,19 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "results = vector_search_results[[\"Title\", \"Summary\", \"Names\", \"GCS URI\", \"Transcription\", \"distance\"]].sort_values(\"distance\").to_pandas()\n", - "results" - ] + "execution_count": null }, { + "id": "7ec53675", "cell_type": "code", - "execution_count": null, + "source": [ + "print(results[\"Transcription\"].struct.field(\"content\").iloc[0])" + ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] &": { + "body[data-jp-deck-mode='presenting'] \u0026": { "zoom": "138%" } } @@ -1052,14 +1070,21 @@ }, "trusted": true }, - "outputs": [], - "source": [ - "print(results[\"Transcription\"].struct.field(\"content\").iloc[0])" - ] + "execution_count": null }, { + "id": "a96552fb", "cell_type": "code", - "execution_count": null, + "source": [ + "import gcsfs\n", + "import IPython.display\n", + "\n", + "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", + "with fs.open(results[\"GCS URI\"].iloc[0]) as song_file:\n", + " song_bytes = song_file.read()\n", + "\n", + "IPython.display.Audio(song_bytes)" + ], "metadata": { "editable": true, "execution": { @@ -1076,26 +1101,16 @@ "tags": [], "trusted": true }, - "outputs": [], - "source": [ - "import gcsfs\n", - "import IPython.display\n", - "\n", - "fs = gcsfs.GCSFileSystem(project='bigframes-dev')\n", - "with fs.open(results[\"GCS URI\"].iloc[0]) as song_file:\n", - " song_bytes = song_file.read()\n", - "\n", - "IPython.display.Audio(song_bytes)" - ] + "execution_count": null }, { + "id": "72af7c7f", "cell_type": "code", - "execution_count": null, + "source": [], "metadata": { "trusted": true }, - "outputs": [], - "source": [] + "execution_count": null } ], "metadata": { @@ -1132,6 +1147,6 @@ "version": "3.11.13" } }, - "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 4, + "nbformat": 4 } From 25a9fd5b147639bc50db7668fb84374e449e6dff Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 00:30:37 +0000 Subject: [PATCH 22/26] update notebooks and apis --- .../bigframes/bigframes/pandas/__init__.py | 2 + packages/bigframes/bigframes/pandas/io/api.py | 13 + .../generative_ai/ai_movie_poster.ipynb | 1368 +++++++++-------- .../multimodal/multimodal_dataframe.ipynb | 1259 +++++++-------- 4 files changed, 1343 insertions(+), 1299 deletions(-) diff --git a/packages/bigframes/bigframes/pandas/__init__.py b/packages/bigframes/bigframes/pandas/__init__.py index 11938a887785..34ec3037e92f 100644 --- a/packages/bigframes/bigframes/pandas/__init__.py +++ b/packages/bigframes/bigframes/pandas/__init__.py @@ -98,6 +98,7 @@ from bigframes.pandas import api from bigframes.pandas.core.api import to_timedelta from bigframes.pandas.io.api import ( + _from_glob_path, _read_gbq_colab, read_arrow, read_avro, @@ -436,6 +437,7 @@ def reset_session(): pass _functions = [ + _from_glob_path, clean_up_by_session_id, concat, crosstab, diff --git a/packages/bigframes/bigframes/pandas/io/api.py b/packages/bigframes/bigframes/pandas/io/api.py index e2737fdbbd1a..29a50381acc9 100644 --- a/packages/bigframes/bigframes/pandas/io/api.py +++ b/packages/bigframes/bigframes/pandas/io/api.py @@ -620,6 +620,19 @@ def read_gbq_function( read_gbq_function.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_function) +def _from_glob_path( + path: str, *, connection: Optional[str] = None, name: Optional[str] = None +) -> bigframes.dataframe.DataFrame: + return global_session.with_default_session( + bigframes.session.Session._from_glob_path, + path=path, + connection=connection, + name=name, + ) + + +_from_glob_path.__doc__ = inspect.getdoc(bigframes.session.Session._from_glob_path) + _default_location_lock = threading.Lock() diff --git a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb index b25e2b556e65..8a19830358de 100644 --- a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb +++ b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb @@ -1,732 +1,752 @@ { - "cells": [ + "cells": [ + { + "id": "7add2e44", + "cell_type": "code", + "source": [ + "# Copyright 2026 Google LLC\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ], + "metadata": { + "id": "XZpKUoHjXw3_" + }, + "execution_count": 1 + }, + { + "id": "ee509844", + "cell_type": "markdown", + "source": [ + "# Analyzing movie posters with BigQuery Dataframe AI functions" + ], + "metadata": { + "id": "SEKzWP6jW9Oj" + }, + "execution_count": null + }, + { + "id": "81b8de8d", + "cell_type": "markdown", + "source": [ + "\u003ctable align=\"left\"\u003e\n", + "\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", + " View on GitHub\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", + " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", + " Open in BQ Studio\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + "\u003c/table\u003e" + ], + "metadata": {}, + "execution_count": null + }, + { + "id": "256b6c02", + "cell_type": "markdown", + "source": [ + "BigQuery Dataframe provides a Pythonic way to use AI functions directly with your dataframes. In this notebook, you will use these functions to analyze old\n", + "movie posters. These posters are images stored in a public Google Cloud Storage bucket: `gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters`" + ], + "metadata": { + "id": "c9CCKXG5XTb-" + }, + "execution_count": null + }, + { + "id": "3f71d3cb", + "cell_type": "markdown", + "source": [ + "## Set up" + ], + "metadata": { + "id": "CUJDa_7MPbL9" + }, + "execution_count": null + }, + { + "id": "547145f5", + "cell_type": "markdown", + "source": [ + "Before you begin, you need to\n", + "\n", + "* Set up your permissions for generative AI functions with [these instructions](https://docs.cloud.google.com/bigquery/docs/permissions-for-ai-functions)\n", + "* Set up your Cloud Resource connection by following [these instructions](https://docs.cloud.google.com/bigquery/docs/create-cloud-resource-connection)\n", + "\n", + "Once you have the permissions set up, import the `bigframes.pandas` package, and\n", + "set your cloud project ID." + ], + "metadata": { + "id": "D3iYtBSkYpCK" + }, + "execution_count": null + }, + { + "id": "d9cd6da8", + "cell_type": "code", + "source": [ + "import bigframes.pandas as bpd\n", + "\n", + "MY_RPOJECT_ID = \"bigframes-dev\" # @param {type:\"string\"}\n", + "\n", + "bpd.options.bigquery.project = MY_RPOJECT_ID" + ], + "metadata": { + "id": "6nqoRHYbPAx3" + }, + "execution_count": null + }, + { + "id": "015a63c1", + "cell_type": "markdown", + "source": [ + "## Load data" + ], + "metadata": { + "id": "2XHcNHtvPhNW" + }, + "execution_count": null + }, + { + "id": "254561e0", + "cell_type": "markdown", + "source": [ + "First, you load the data from the GCS bucket to a BigQuery Dataframe with the `from_glob_path` method:" + ], + "metadata": { + "id": "eS-9A7DijfoQ" + }, + "execution_count": null + }, + { + "id": "47acbbfe", + "cell_type": "code", + "source": [ + "# Replace with your own connection name.\\nMY_CONNECTION = 'bigframes-default-connection' # @param {type:\"string\"}\\n\\nimport bigframes.pandas as bpd\\nsession = bpd.get_global_session()\\n\\nmovies = session._from_glob_path(\\n \"gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/*\",\\n connection = MY_CONNECTION,\\n name='poster')\\nmovies.head(1)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "ZNPzFjCyPap0", + "outputId": "346d20b2-d615-4094-d24e-2d40e5c90ee2" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "XZpKUoHjXw3_" - }, - "outputs": [], - "source": [ - "# Copyright 2026 Google LLC\n", - "#\n", - "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.12/dist-packages/bigframes/core/global_session.py:113: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n", + " _global_session = bigframes.session.connect(\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "SEKzWP6jW9Oj" - }, - "source": [ - "# Analyzing movie posters with BigQuery Dataframe AI functions" + "data": { + "text/html": [ + "\n", + " Query processed 0 Bytes in a moment of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:48a27954-7a4a-4b9e-8176-ea227fd188ad\u0026page=queryresults\"\u003eJob bigframes-dev:US.48a27954-7a4a-4b9e-8176-ea227fd188ad details\u003c/a\u003e]\n", + " " + ], + "text/plain": [ + "\u003cIPython.core.display.HTML object\u003e" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \"Colab Run in Colab\n", - " \n", - " \n", - " \n", - " \"GitHub\n", - " View on GitHub\n", - " \n", - " \n", - " \n", - " \"BQ\n", - " Open in BQ Studio\n", - " \n", - "
" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "c9CCKXG5XTb-" - }, - "source": [ - "BigQuery Dataframe provides a Pythonic way to use AI functions directly with your dataframes. In this notebook, you will use these functions to analyze old\n", - "movie posters. These posters are images stored in a public Google Cloud Storage bucket: `gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters`" + "data": { + "text/html": [ + "\n", + " Query processed 1.3 kB in a minute of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:09c48ecb-e041-4c18-a390-ca5a36fd07c3\u0026page=queryresults\"\u003eJob bigframes-dev:US.09c48ecb-e041-4c18-a390-ca5a36fd07c3 details\u003c/a\u003e]\n", + " " + ], + "text/plain": [ + "\u003cIPython.core.display.HTML object\u003e" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": { - "id": "CUJDa_7MPbL9" - }, - "source": [ - "## Set up" + "data": { + "text/html": [ + "\n", + " Query processed 1.2 kB in a moment of slot time.\n", + " " + ], + "text/plain": [ + "\u003cIPython.core.display.HTML object\u003e" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": { - "id": "D3iYtBSkYpCK" - }, - "source": [ - "Before you begin, you need to\n", - "\n", - "* Set up your permissions for generative AI functions with [these instructions](https://docs.cloud.google.com/bigquery/docs/permissions-for-ai-functions)\n", - "* Set up your Cloud Resource connection by following [these instructions](https://docs.cloud.google.com/bigquery/docs/create-cloud-resource-connection)\n", - "\n", - "Once you have the permissions set up, import the `bigframes.pandas` package, and\n", - "set your cloud project ID." + "data": { + "text/html": [ + "\u003cdiv\u003e\n", + "\u003cstyle scoped\u003e\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fder_student_von_prag.jpg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200041Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653080624441\u0026X-Goog-Signature=9f955e89088240b34a5cbfba751fffacc5dfd7a2df468dcccfae06c939358c702ffbeb940403a69ad36e3fdf321abee60cf2b9795c9c1744bc0b164d6c2eca99666a0853e7afcf7670a07ff115bfe534791c9ab4267cb383e3a46ede9301aeeb8534a42a1d4c8f790f3a60eab06aa72a8fe76ee6cbb88de8e42a0809d8322a0ad8aecd1c64a55b1cc8716acf4f0dc2550a2059e63d98d49707fe27180ada0a277ea9b1827fc261657bcee9ec5cc7117df704f135d983325abb97dc77ee7a270c466e689921fce8ecd23824b515f2811c3c13ee382c5bc3bd34b7dd95a845705a8f654315b2128799efd0509dee5f6db1eb1b773438d3bfc8112d76cbe892e376\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 1 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 1 columns in total]" + ], + "text/plain": [ + " poster\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0...\n", + "\n", + "[1 rows x 1 columns]" ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "id": "f1096d2f", + "cell_type": "markdown", + "source": [ + "## Extract titles from posters" + ], + "metadata": { + "id": "EfkdDH08QnYw" + }, + "execution_count": null + }, + { + "id": "bb30d47c", + "cell_type": "code", + "source": [ + "import bigframes.bigquery as bbq\n", + "\n", + "movies['title'] = bbq.ai.generate(\n", + " (\"What is the movie title for this poster? Name only\", movies['poster']),\n", + " endpoint='gemini-2.5-pro'\n", + ").struct.field(\"result\")\n", + "movies.head(1)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 }, + "id": "6CoZZ5tSQm1r", + "outputId": "1b3915ce-eb83-4be9-b1c1-d9a326dc9408" + }, + "execution_count": 4, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6nqoRHYbPAx3" - }, - "outputs": [], - "source": [ - "import bigframes.pandas as bpd\n", - "\n", - "MY_RPOJECT_ID = \"bigframes-dev\" # @param {type:\"string\"}\n", - "\n", - "bpd.options.bigquery.project = MY_RPOJECT_ID" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "2XHcNHtvPhNW" - }, - "source": [ - "## Load data" + "data": { + "text/html": [ + "\n", + " Query processed 1.3 kB in 2 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:4a08a15f-5a2f-463b-bba8-734858ec992b\u0026page=queryresults\"\u003eJob bigframes-dev:US.4a08a15f-5a2f-463b-bba8-734858ec992b details\u003c/a\u003e]\n", + " " + ], + "text/plain": [ + "\u003cIPython.core.display.HTML object\u003e" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": { - "id": "eS-9A7DijfoQ" - }, - "source": [ - "First, you load the data from the GCS bucket to a BigQuery Dataframe with the `from_glob_path` method:" + "data": { + "text/html": [ + "\n", + " Query processed 1.2 kB in a moment of slot time.\n", + " " + ], + "text/plain": [ + "\u003cIPython.core.display.HTML object\u003e" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "ZNPzFjCyPap0", - "outputId": "346d20b2-d615-4094-d24e-2d40e5c90ee2" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.12/dist-packages/bigframes/core/global_session.py:113: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n", - " _global_session = bigframes.session.connect(\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 0 Bytes in a moment of slot time. [Job bigframes-dev:US.48a27954-7a4a-4b9e-8176-ea227fd188ad details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 1.3 kB in a minute of slot time. [Job bigframes-dev:US.09c48ecb-e041-4c18-a390-ca5a36fd07c3 details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 1.2 kB in a moment of slot time.\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
poster
0
\n", - "

1 rows × 1 columns

\n", - "
[1 rows x 1 columns in total]" - ], - "text/plain": [ - " poster\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0...\n", - "\n", - "[1 rows x 1 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "\u003cdiv\u003e\n", + "\u003cstyle scoped\u003e\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003cth\u003etitle\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fder_student_von_prag.jpg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200057Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653080624441\u0026X-Goog-Signature=29c8cf20d3f56ab1939ec00dbc1afd26e888b6475808258e34bc60a65e207b877c39853678b0cd1c9918d35e312e151725dbefc4ed6c519e4ec1f2c23c2e307f87442d09c5c8f0bbd49af92eb05e18ff35cd44f2f2954b79a33cf706c7ae1662e23e3220224d6f58b775cb1875213b5050f910cb41a4a8fb312f308b0566448ddf7ef15e22ec2a5261af2570f89e0f6067ac4cbf5874eaf522a6e4d8cf6e0313be3079b172bdc19c2d6901f53bbacf5bee3f2913c7f9f657cd1aed25d786f66a84f96e4dbe36e7f01d8b67887c9ac93edf866495fdf13c6b95152cdfa6b699fd14aeb477ec4a14fcd9f37eaf88ad02eb40a952635f97e7639be764b0007e011e\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003eDer Student von Prag\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 2 columns in total]" ], - "source": [ - "# Replace with your own connection name.\n", - "MY_CONNECTION = 'bigframes-default-connection' # @param {type:\"string\"}\n", - "\n", - "movies = bpd.from_glob_path(\n", - " \"gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/*\",\n", - " connection = MY_CONNECTION,\n", - " name='poster')\n", - "movies.head(1)" + "text/plain": [ + " poster title\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0... Der Student von Prag\n", + "\n", + "[1 rows x 2 columns]" ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "id": "eb9eb261", + "cell_type": "markdown", + "source": [ + "Notice that `ai.generate()` has a `struct` return type, which holds not only the LLM response, but also the status. If you do not provide a field name for your answer, `\"result\"` will be the default name. You can access LLM response content with the struct accessor (e.g. `my_response.struct.filed(\"result\")`);." + ], + "metadata": { + "id": "cFQHQ9S2lr6t" + }, + "execution_count": null + }, + { + "id": "ea29eb21", + "cell_type": "markdown", + "source": [ + "## Get movie release year\n", + "\n", + "In the example below, you will use `ai.generate_int()` to find the release year for each movie poster:" + ], + "metadata": { + "id": "R8kkUhgoS5Xz" + }, + "execution_count": null + }, + { + "id": "bf426247", + "cell_type": "code", + "source": [ + "movies['year'] = bbq.ai.generate_int(\n", + " (\"What is the release year for this movie?\", movies['title']),\n", + " endpoint='gemini-2.5-pro'\n", + ").struct.field(\"result\")\n", + "\n", + "movies.head(1)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 976 }, + "id": "cKZdHq0XS1iW", + "outputId": "72cbad57-4518-4e1e-97bb-333d424dba73" + }, + "execution_count": 5, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "EfkdDH08QnYw" - }, - "source": [ - "## Extract titles from posters" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" + ] }, { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "6CoZZ5tSQm1r", - "outputId": "1b3915ce-eb83-4be9-b1c1-d9a326dc9408" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n", - "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 1.3 kB in 2 minutes of slot time. [Job bigframes-dev:US.4a08a15f-5a2f-463b-bba8-734858ec992b details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 1.2 kB in a moment of slot time.\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
postertitle
0Der Student von Prag
\n", - "

1 rows × 2 columns

\n", - "
[1 rows x 2 columns in total]" - ], - "text/plain": [ - " poster title\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0... Der Student von Prag\n", - "\n", - "[1 rows x 2 columns]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "\n", + " Query processed 1.3 kB in 4 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:b60a151a-6cbc-405e-9c40-8a7461981a00\u0026page=queryresults\"\u003eJob bigframes-dev:US.b60a151a-6cbc-405e-9c40-8a7461981a00 details\u003c/a\u003e]\n", + " " ], - "source": [ - "import bigframes.bigquery as bbq\n", - "\n", - "movies['title'] = bbq.ai.generate(\n", - " (\"What is the movie title for this poster? Name only\", movies['poster']),\n", - " endpoint='gemini-2.5-pro'\n", - ").struct.field(\"result\")\n", - "movies.head(1)" + "text/plain": [ + "\u003cIPython.core.display.HTML object\u003e" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": { - "id": "cFQHQ9S2lr6t" - }, - "source": [ - "Notice that `ai.generate()` has a `struct` return type, which holds not only the LLM response, but also the status. If you do not provide a field name for your answer, `\"result\"` will be the default name. You can access LLM response content with the struct accessor (e.g. `my_response.struct.filed(\"result\")`);." + "data": { + "text/html": [ + "\n", + " Query processed 1.3 kB in a moment of slot time.\n", + " " + ], + "text/plain": [ + "\u003cIPython.core.display.HTML object\u003e" ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "cell_type": "markdown", - "metadata": { - "id": "R8kkUhgoS5Xz" - }, - "source": [ - "## Get movie release year\n", - "\n", - "In the example below, you will use `ai.generate_int()` to find the release year for each movie poster:" + "data": { + "text/html": [ + "\u003cdiv\u003e\n", + "\u003cstyle scoped\u003e\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003cth\u003etitle\u003c/th\u003e\n", + " \u003cth\u003eyear\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fder_student_von_prag.jpg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200120Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653080624441\u0026X-Goog-Signature=96035b9c90093c9636f0b406e5ca9daf52bb1019bde4d52e779f3ce7371e6df0430b3f2e991869065e113327a7698e7ce5ad7b4db8781aa65adea890b80976c97b93b3f9deac5002a1e27b4bd2c1df9250ff4167f150c88be2067f70d45b7c94fd6d69f36a90b5a3ad1a3d500e3cc89a4fe4a67157cbea164d5ce34506dd1d2353eedb1c663eb1a4578c8ff1f9af2ab21a7065de4ec3ff1af44e764a3215874e564e6beeb502739468a80a02c79dcc71f7518435686270d855007e01653659804b5f50ab9c43c4627f28625e07572a4b0f30de49397f9f0445571cdacb695747bdb17614addcf33a90036aa48d025baa8a4d6bd5000d0106a788c2c23f1292c8\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003eDer Student von Prag\u003c/td\u003e\n", + " \u003ctd\u003e1913\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 3 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 3 columns in total]" + ], + "text/plain": [ + " poster title \\\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0... Der Student von Prag \n", + "\n", + " year \n", + "0 1913 \n", + "\n", + "[1 rows x 3 columns]" ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "id": "8bf12352", + "cell_type": "code", + "source": [ + "movies.dtypes" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 250 + }, + "id": "yqRiNRY8_8fs", + "outputId": "efa60107-6883-4f5c-8e40-43c7287ea7fb" + }, + "execution_count": 6, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] }, { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 976 - }, - "id": "cKZdHq0XS1iW", - "outputId": "72cbad57-4518-4e1e-97bb-333d424dba73" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 1.3 kB in 4 minutes of slot time. [Job bigframes-dev:US.b60a151a-6cbc-405e-9c40-8a7461981a00 details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 1.3 kB in a moment of slot time.\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
postertitleyear
0Der Student von Prag1913
\n", - "

1 rows × 3 columns

\n", - "
[1 rows x 3 columns in total]" - ], - "text/plain": [ - " poster title \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0... Der Student von Prag \n", - "\n", - " year \n", - "0 1913 \n", - "\n", - "[1 rows x 3 columns]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "\u003cdiv\u003e\n", + "\u003cstyle scoped\u003e\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003ctd\u003estruct\u0026lt;uri: string, version: string, authorize...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003etitle\u003c/th\u003e\n", + " \u003ctd\u003estring[pyarrow]\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003eyear\u003c/th\u003e\n", + " \u003ctd\u003eInt64\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003c/div\u003e\u003cbr\u003e\u003clabel\u003e\u003cb\u003edtype:\u003c/b\u003e object\u003c/label\u003e" ], - "source": [ - "movies['year'] = bbq.ai.generate_int(\n", - " (\"What is the release year for this movie?\", movies['title']),\n", - " endpoint='gemini-2.5-pro'\n", - ").struct.field(\"result\")\n", - "\n", - "movies.head(1)" + "text/plain": [ + "poster struct\u003curi: string, version: string, authorize...\n", + "title string[pyarrow]\n", + "year Int64\n", + "dtype: object" ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "id": "a7b7bbf1", + "cell_type": "markdown", + "source": [ + "## Filter movie by production country\n", + "\n", + "In the next example, you will use `ai.if_()` to find the movies that were produced in the USA." + ], + "metadata": { + "id": "0WwbiMtdTXt5" + }, + "execution_count": null + }, + { + "id": "0a1dec99", + "cell_type": "code", + "source": [ + "us_movies = movies[bbq.ai.if_(\n", + " (\"The movie \", movies['title'], \" was made in US\")\n", + ")]\n", + "us_movies.head(1)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "xTE8dj3LThy6", + "outputId": "941e04d8-9f24-4309-a59e-35e8740c9c54" + }, + "execution_count": 7, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/lib/python3.12/dist-packages/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" + ] }, { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 250 - }, - "id": "yqRiNRY8_8fs", - "outputId": "efa60107-6883-4f5c-8e40-43c7287ea7fb" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.12/dist-packages/bigframes/dtypes.py:1010: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
0
posterstruct<uri: string, version: string, authorize...
titlestring[pyarrow]
yearInt64
\n", - "

" - ], - "text/plain": [ - "poster structJob bigframes-dev:US.c9bb23f0-5ceb-4d6c-8241-960c496274ae details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " Query processed 1.2 kB in a moment of slot time.\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
postertitleyear
8Shoulder Arms1918
\n", - "

1 rows × 3 columns

\n", - "
[1 rows x 3 columns in total]" - ], - "text/plain": [ - " poster title year\n", - "8 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0... Shoulder Arms 1918\n", - "\n", - "[1 rows x 3 columns]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "\u003cdiv\u003e\n", + "\u003cstyle scoped\u003e\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003cth\u003etitle\u003c/th\u003e\n", + " \u003cth\u003eyear\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e8\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fshoulder_arms.jpeg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200210Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653082560296\u0026X-Goog-Signature=64c1fb48cc9830dd4153bca15d05d8703c770e12a4df99abf4cab9dec02d13c66adf4d1223ffda9a30763ad2b286086dfc8cc9b8d20875b29d0c1639983c3ba08a02364bf49361b4a24c3a6830def8d6d3561eeb04d01604b5bae86e48457dc368fee538d0beea2228fdf5e94b5862e1097f58545d7449fa5df0e93fb9c3c0a32943ca9970911f183adf71a7e13e9275efd41c1f69b8f8453b853a30cbb5e8859d72b95ca653204b5ae8f96a12d88d59e988349f74e3f6db6ef277c066d92a28c50335d494beead9a3c0c796c97ca48c497328ae7ad278161c28743193233b28ac0fcafab2431179f7f6321345d8a67e6af39d7339697a5892f0441a266262ab\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003eShoulder Arms\u003c/td\u003e\n", + " \u003ctd\u003e1918\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 3 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 3 columns in total]" ], - "source": [ - "us_movies = movies[bbq.ai.if_(\n", - " (\"The movie \", movies['title'], \" was made in US\")\n", - ")]\n", - "us_movies.head(1)" + "text/plain": [ + " poster title year\n", + "8 {\"access_urls\":{\"expiry_time\":\"2026-03-27T02:0... Shoulder Arms 1918\n", + "\n", + "[1 rows x 3 columns]" ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - } + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 + "language_info": { + "name": "python" + } + }, + "nbformat_minor": 0, + "nbformat": 4 } diff --git a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb index 8f3241259d5f..ebc2cb6bcd30 100644 --- a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb @@ -1,10 +1,8 @@ { "cells": [ { + "id": "9edad7a6", "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], "source": [ "# Copyright 2025 Google LLC\n", "#\n", @@ -19,41 +17,45 @@ "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." - ] + ], + "metadata": {}, + "execution_count": 1 }, { + "id": "816ab253", "cell_type": "markdown", - "metadata": { - "id": "YOrUAvz6DMw-" - }, "source": [ "# BigFrames Multimodal DataFrame\n", "\n", - "\n", + "\u003ctable align=\"left\"\u003e\n", "\n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \"Colab Run in Colab\n", - " \n", - " \n", - " \n", - " \"GitHub\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", " View on GitHub\n", - " \n", - " \n", - " \n", - " \"BQ\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", + " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", " Open in BQ Studio\n", - " \n", - "
\n" - ] + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + "\u003c/table\u003e\n" + ], + "metadata": { + "id": "YOrUAvz6DMw-" + }, + "execution_count": null }, { + "id": "77d821d4", "cell_type": "markdown", - "metadata": {}, "source": [ "This notebook is introducing BigFrames Multimodal features:\n", "1. Create Multimodal DataFrame\n", @@ -63,44 +65,42 @@ "5. PDF chunking function\n", "6. Transcribe audio\n", "7. Extract EXIF metadata from images" - ] + ], + "metadata": {}, + "execution_count": null }, { + "id": "75ab1c13", "cell_type": "markdown", + "source": [ + "## Setup" + ], "metadata": { "id": "PEAJQQ6AFg-n" }, - "source": [ - "## Setup" - ] + "execution_count": null }, { + "id": "750954c4", "cell_type": "markdown", - "metadata": {}, "source": [ - "Install the latest bigframes package if bigframes version < 2.4.0" - ] + "Install the latest bigframes package if bigframes version \u003c 2.4.0" + ], + "metadata": {}, + "execution_count": null }, { + "id": "2a6fafb1", "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], "source": [ "# !pip install bigframes --upgrade" - ] + ], + "metadata": {}, + "execution_count": 2 }, { + "id": "df561d04", "cell_type": "code", - "execution_count": 3, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "bGyhLnfEeB0X", - "outputId": "83ac8b64-3f44-4d43-d089-28a5026cbb42" - }, - "outputs": [], "source": [ "PROJECT = \"bigframes-dev\" # replace with your project. \n", "# Refer to https://cloud.google.com/bigquery/docs/multimodal-data-dataframes-tutorial#required_roles for your required permissions\n", @@ -126,13 +126,19 @@ "\n", "import bigframes.pandas as bpd\n", "import bigframes.bigquery as bbq" - ] + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bGyhLnfEeB0X", + "outputId": "83ac8b64-3f44-4d43-d089-28a5026cbb42" + }, + "execution_count": 3 }, { + "id": "35bd6e6e", "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], "source": [ "import bigframes.bigquery as bbq\n", "\n", @@ -172,21 +178,27 @@ "\n", "def get_updated(series):\n", " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)" - ] + ], + "metadata": {}, + "execution_count": 4 }, { + "id": "be9ce892", "cell_type": "markdown", + "source": [ + "import bigframes.pandas as bpd\\nsession = bpd.get_global_session()\\ndf_image = session._from_glob_path(\\n \"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\",\\n connection=FULL_CONNECTION_ID,\\n name=\"image\"\\n)" + ], "metadata": { "id": "ifKOq7VZGtZy" }, - "source": [ - "### 1. Create Multimodal DataFrame\n", - "There are several ways to create Multimodal DataFrame. The easiest way is from the wildcard paths." - ] + "execution_count": null }, { + "id": "871d02f4", "cell_type": "code", - "execution_count": 5, + "source": [ + "# Create blob columns from wildcard path.\\ndf_image = session._from_glob_path(\\n \"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\", name=\"image\"\\n)" + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -194,17 +206,16 @@ "id": "fx6YcZJbeYru", "outputId": "d707954a-0dd0-4c50-b7bf-36b140cf76cf" }, - "outputs": [], - "source": [ - "# Create blob columns from wildcard path.\n", - "df_image = bpd.from_glob_path(\n", - " \"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\", name=\"image\"\n", - ")" - ] + "execution_count": 5 }, { + "id": "2e0436b0", "cell_type": "code", - "execution_count": 6, + "source": [ + "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n", + "df_image = df_image.head(5)\n", + "df_image" + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -213,6 +224,7 @@ "id": "HhCb8jRsLe9B", "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" }, + "execution_count": 6, "outputs": [ { "name": "stderr", @@ -229,8 +241,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
image
0
1
2
3
4
\n", - "

5 rows × 1 columns

\n", - "
[5 rows x 1 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=060fd285eaaa0d1b8888e9e3c648c0c9452df5d33b6c6d7ddea1d5fc3e4bf1f243ff0719ed4e3faaa3ea29e60da198daf1168dfd0efd64f9612f7c032753cfa2842ef88469a49ab23cf261e2b5da052224a33eedddd7c4699d584ec85704b18010ce8f4216f200d3cc8d0301b7aeb6bef37ae9e83a832ea38703f19b5b85e2e34f772420f5436afeb874487b3fb0ae4a17fb8f277a3d233a6d5e6e0d063e37e651061bf4ae33b8ec8b879c5db93e3ce97513054822d1867e28de4f03585da2edd8f4f51e177a4e3d37f5e3ca2f5be0990dd9f58135ec9223da3af7bcd1e67a8d279282d4d245eb7faff094903a82cf7a8b866fe848230a6668b6eaef6a683292\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=34826bd5824786f809b740fd64e154a9feb43b467250c64b9b6686134133e18aabd7213472cb00ae4ef59e5567b8897828aae6ff22e29c4a5d28214fcd2a6f486e69d548d0e482707c4dcd67894feb716ee68e1863e02e36d7e0c6c008b1b989c2a798feb2c4bc3638c6c80069165b2bf51c6028ea2f0a09c1555981b8935435ec1c596975b77942f603e65414328f3f1d180f772015452bfc97e96ffb9a0a016a4dc365a4531d0e37e491f4066df87f9a8f2374d30d5f62d639f10252c471570b9e24d88a9f8816007099fc39e6f846c68ea5a4069ecc785e67101f664da4999037fcbbd93e00b1b85a31528492e8610d75af96a1c8a879865dd36da2b15465\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=6476c6b9b8b23f4ac79973d8088424c6d8018857d0d8c1bf586057a7bd2f4cf00bfc53c79757b35401f05dadee9bc1aeeaed7b669659afa16696062db7d9da45e5fe17a0aaaa33c2394632a8dae6ca3c7f705ce0e7179e44fa245dc34080d87bdd0f41638c2840eba87b178dc43db16ca1a621224a1d991960eb821a99dc34aab25ed7e8457f161bd09fa9efc0eb0642709e3fba6ba412ff68ffe698592c235054ae0d08fd27909150beaf69b93dee3496d2f9254c2f801890fe072442fa2ffe389eeb689b8eb4daf08b4701a535ac6606c7de856761d008da479138abc3e941e0ab682f19fe86cd3f56df63f788c92824aed76fafaae0a546fa796266f26e2e\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=22cdbcce477c983c914de1edcef7742fd253a6830e961c2932d5dbb11730c1fe0035579c4158a140cd6ebcdc95e1212ec60a2d54679af8dd662cb7c1ac6249042bb5a95fb324397599bfa4e8a8bf8e4313d14a5ca34e40c677d91f1853b4b7450d3ad043404058db5c1dbab56b8968eab4e3550aa1de20c789084527f8abd67a32aa62788b70975ea828bb312f5a123463c2088a4bad7a0c20d299f59fc0674ed32d36b3f78a1bf2fc6fcd367bf2056e132fbd50e0a597a7da7518be8c9597de6365097490857caba47e84b57940bdc6cca130b6c23ede91c51140cf0672ebdda0957e525416c636c368d1cf04e5efecbba3f052f61bb95d951e52e0f5d31e8c\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=335aafc718f8a89dc2b5d2e75ff750ac302cfacf4238ad91c2a4b140f59dd666d6520fafe885b82706453c2e820c82f0461488ae01e3210a20c555a9ac1242ddd54e17a92d7873211a4dcd69a7fca76c16ad9cd754f6245a8b9f047e9ef8bc428ec243fbde7af59a2b308968a165662e50d4a08740d196d02182d99650e79673e167164dc2869a434159ba3a15c68ddc9e17f5a7234c478ac4ae55a9686740ef260e6c1ab834ca3df361161c8d689acc72b143a6a3345640b2b94aadd1070d3e90a6572d63ae74cf803304b798ea4df61e5f4494f078f565f0d59f57bd6eee0618936a16617455d785ced3ac467b964b5eb9049749fe4cf8f2bf2c72ed72fc79\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 1 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 1 columns in total]" ], "text/plain": [ " image\n", @@ -291,37 +303,46 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n", - "df_image = df_image.head(5)\n", - "df_image" ] }, { + "id": "429b0117", "cell_type": "markdown", + "source": [ + "### 2. Combine unstructured data with structured data" + ], "metadata": { "id": "b6RRZb3qPi_T" }, - "source": [ - "### 2. Combine unstructured data with structured data" - ] + "execution_count": null }, { + "id": "991fa065", "cell_type": "markdown", + "source": [ + "Now you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself." + ], "metadata": { "id": "4YJCdmLtR-qu" }, - "source": [ - "Now you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself." - ] + "execution_count": null }, { + "id": "08722ec5", "cell_type": "code", - "execution_count": 7, + "source": [ + "# Combine unstructured data with structured data\n", + "df_image = df_image.head(5)\n", + "df_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"] # type: ignore\n", + "df_image[\"content_type\"] = get_content_type(df_image[\"image\"])\n", + "df_image[\"size\"] = get_size(df_image[\"image\"])\n", + "df_image[\"updated\"] = get_updated(df_image[\"image\"])\n", + "df_image" + ], "metadata": { "id": "YYYVn7NDH0Me" }, + "execution_count": 7, "outputs": [ { "name": "stderr", @@ -338,8 +359,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
imageauthorcontent_typesizeupdated
0aliceimage/png15912402025-03-20 17:45:04+00:00
1bobimage/png11829512025-03-20 17:45:02+00:00
2bobimage/png15208842025-03-20 17:44:55+00:00
3aliceimage/png12354012025-03-20 17:45:19+00:00
4bobimage/png15919232025-03-20 17:44:47+00:00
\n", - "

5 rows × 5 columns

\n", - "
[5 rows x 5 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003cth\u003eauthor\u003c/th\u003e\n", + " \u003cth\u003econtent_type\u003c/th\u003e\n", + " \u003cth\u003esize\u003c/th\u003e\n", + " \u003cth\u003eupdated\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=0f96a5054a9d9d8f14259a2a9155c8124d0b3b6af21d5ba59f61a789cb5a770d1d70e9d7b04094140495675e9eb97ef30d5539ec822bfc7f857fc0be3a3ff193aef72578ffccf7231633d42fbf53752b50a6ab3c4180dd86f62a2c350492239d44b2d5a079d000aa60d99e5656dca1fdc85b2a5b1cea0d6567d47641167ee08bd41bff06e93b35e34c4b8f82e73d589bf29f9ae73b640b8c90b751ca2829f99b2a2fa5a463990660e87e4c4220a8bee9ff9bea44eb621d8a00335892833a0e33cc95560a803df5a13fb710abfe813d11a37610c870c82986b4275831e2ed57cf022af8927cd4a9fc5aea88b54c597b51ee456ab5f22cdcfe8972a0a2c56d9702\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ealice\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1591240\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:45:04+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=50b820b125f2b52c6a205178676d153512a5d62ba04f7f399de86cecaf167492f4db9322e289f8ec077e74e09ba67509bedda9b68b9eb7290a7f52bb78a567139d0a2bb9266764ed941bfa19ba4278bf00647e79c85877e3111dbe3c49cc572d4f47739db1030d6ce0298965d08f2f992b59a0f452ab8d52ea5d783cf13ce6d3b8349ce3fce9c4337ddd00b746ec2e9e8fa6fa0361a644d82c46b7f0860a2404f3fbf17de24cbfd8744a098fdde367d5a4cb918a325ae3fb96abfb187e26bee7bdc267b81ba23949ca4feaf676864431641f3747477293b34541e7c48ca06bbcc45e94ae839d0ce85e3222c38dd39014821881a31b15f3efd06ca426cc4769fb\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ebob\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1182951\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:45:02+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=0d659c9d71b34023e6a7531dc99620cd1543f4a8ec0262cda29549e9f4f604e929be102b44e0a11ef2c148c6a24c3fe9a92257892ca1aa23d7b963551d95093fa74bce067ec12c0237a96a15da0741f76e732d04f85b03d98696c7dc9bcd1bd9de1f2799ceb878e8f87ddfff5609fc6a6db4f27dcc410ad05cdc1066c8dcdae880a950a160189673eba7f3de59cc0e1aa754ebdf7f745a8572216af6f7934b1e96c9aa7cb7cacb6aa836f8e731668b76d0942e04e7e4b81a5972a75ab2ecb0b1859dc4ac4f6a45239dd2b76cc7af4643fb24c64731f28711e6cab8433fe239f0cc2462ae9260b1ae5ae85517300075188d270957eecb31ee8db20ac3ddd97c4e\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ebob\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1520884\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:44:55+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=1124d977a75b85634f2afeb223fba3028c9ad85fd9a2f6f8ce483c9e015a48a242f6810742e0279a46b3388371a9b7d37ca96d9a53d3939f6e0484a474491bece22272b89176e01866fdc8845b75cef28b9ff36a7b2f875e452002a7dd8d13ce38b078ea2aacd76ce8cb560faf078dad6462b3a69130f333ab9119fb6f1d8410a70de76018d2c84f01c6d70e1aa60498b4eb88a35a77a8173d11e545a8f5a7dfa542ec3effcafcd3a9c84934d605ed06107df98032738415e6ef1ed9331796aa802712c2cb4bd733881833bf4ed5d590846db97c7591a2d84acdf87e38752b15a39b711aec5bbe4dcca25a1edfb60626e68497c1fbb8cb0bd707938db378a01e\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ealice\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1235401\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:45:19+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=2f60becdf01864a381c2283fbceaaea023a1011554a61221b5be902cdd08b102d60d0da0275ed491b3396baeef517b8e2336eeb3e3b07da0398cdf4b190ac0ea667e4cd1d1d19e41046824d55ffcef47a2db3aeb4e82da71a655264f14ad5ee553329aa9b32c8c2200f3b66c9a9bb5aa8e5b91795e8d6b6129935f46522fb8dab9ce3a2ba5af019c2410f709472791730ab9ebdf9f901a5bfaf4dcc2c78e07c79743d35eceac59999d841adb60ce15313a70526d98b83e90f2240800c5b96b1b9a032d530fb15bec86425afca0c6fcc1d35d1560ef996cae5411feb67addd1b726026f3d097318b0577a84dab72cae328bb186fc8c97001ff720a43e6fc27610\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ebob\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1591923\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:44:47+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 5 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 5 columns in total]" ], "text/plain": [ " image author content_type \\\n", @@ -431,140 +452,29 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "# Combine unstructured data with structured data\n", - "df_image = df_image.head(5)\n", - "df_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"] # type: ignore\n", - "df_image[\"content_type\"] = get_content_type(df_image[\"image\"])\n", - "df_image[\"size\"] = get_size(df_image[\"image\"])\n", - "df_image[\"updated\"] = get_updated(df_image[\"image\"])\n", - "df_image" ] }, { + "id": "f90826f6", "cell_type": "markdown", - "metadata": {}, "source": [ "### 3. Conduct image transformations" - ] + ], + "metadata": {}, + "execution_count": null }, { + "id": "e24c9f8c", "cell_type": "markdown", - "metadata": {}, "source": [ "This section demonstrates how to perform image transformations like blur, resize, and normalize using custom BigQuery Python UDFs and the `opencv-python` library." - ] + ], + "metadata": {}, + "execution_count": null }, { + "id": "db665049", "cell_type": "code", - "execution_count": 8, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 487 - }, - "id": "HhCb8jRsLe9B", - "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", - " return global_session.with_default_session(\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4655: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", - " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
imageblurred
0
1
2
3
4
\n", - "

5 rows × 2 columns

\n", - "
[5 rows x 2 columns in total]" - ], - "text/plain": [ - " image \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "\n", - " blurred \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "\n", - "[5 rows x 2 columns]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -577,7 +487,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", ")\n", - "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -> str:\n", + "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -\u003e str:\n", " import json\n", " import cv2 as cv\n", " import numpy as np\n", @@ -638,23 +548,137 @@ " image_blur, 20, 20\n", ")\n", "df_image[[\"image\", \"blurred\"]]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 487 + }, + "id": "HhCb8jRsLe9B", + "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" + }, + "execution_count": 8, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4655: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" + ] + }, + { + "data": { + "text/html": [ + "\u003cdiv\u003e\n", + "\u003cstyle scoped\u003e\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003cth\u003eblurred\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=2bb786ea7fda0a778b591d0f85b1d1df003726a26585490c1c8e1cc632bc90e418bc2762993da32c60017b96b36a9bf03ca123f7e74a34e5da98d4a8ae65e340c64872e1ab7c6442435253669103a157ee61c793da79ad0e6ae2a7bdbf54e8d67dce61de37d9dd2f54b0c994f8135d88af27f08eeb99e713b385b37fd0762503244cd0c597479d6925dcf111db4605842c797aabb307736028a5dd802ef08a2697a4bd1c96b5bf8bbecdb8a8f384028cec03bc9d51c1d2dfee1760756e0c6e54d9d753b373115e308cf45de2c082f30f263285af5d5e43fd9bf8c3dae32ceee389ef9427f3e2b06646d48aa570ba1ca0713d0f270ea2f4996e8ad3d03a3e39a6\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334353324\u0026X-Goog-Signature=6f16236d50a82c7ede3c23366389344149faba103443acf1c1a2b0d3b0d91948c8c15a3ad990382ab9b61badf8acee3c4b9e5ec6f96b72d395cd2e18227ad5c051ad189c1af48030fb44c1b9cc84a44a74fec04d576a6d699c1f0e133b2fc5d583872b5577c60a01f58d163de6e95591d260e9246fae0243d7472748e854ccaf4b4c3de80fb4e085f428427fe9914d22fba7416ae8b9e33beb0dfbcb91273609d0333aa1135b835bc69f9e15e1097452133e376b9b3f1bb7b4d7d7e9cb860677ce786e49239e8a8af86f83b7e27e76846212daddd2859f8dec4942b7351c69d396d4fd9a529fd0c48677aa4923063a752f6845801f22d3de227a640946f1e212\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=31ed02660dcead7471a7b47e2f471f4cf515e37525c14027f8d87ba1479982f244df02979b7da03676d4bbbd21221b3f70cefea4501eca3a5a64acb187ef5a2b03b19116729059dc666c2e295fc1ac831cd38864c1673344e04855db279a3c96409550dfade09631b0818e6cb576ff02afba6a87624f7c33077f7f19c891f6413b317cabb834182585d445ce97aa0294beca4de0edd2f9a0f6429747e93db53df875249a9c0543a329dac0f46ef3ed4e64a7e51648d0820ba52b7845b6f98371aca7deb165b4f7b7537be0d659cf33bb34893417666d7c9e9f611332ccba6bae6024be6862350185886cf61354eda3591e96e344c59350ca6f68ce50670fa869\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334573735\u0026X-Goog-Signature=209b62c9c3e2657b9270ed126a6c1e3979471f9e308670ce1595e9c18a0ec7c627a313b1f1c87108355602dc11b804ff609ba3394fada2b4fd186c6fef4138e22222045d0fd0660f103e6378ac83c6bd7d8da27c1a3d306dbfd778f6738c2e492bd8527ba9112e086f15334b7ab7795b88212d3825b3be325606cc507e5504406bc4e9dc41b29fb99e1207fe74c11053e7332ad9fdd65587f55cd10cc1bfd903672f43de38a18cb346977d8498fd751ab8728ca580261a1a0367421437d56df7f5f7dda895ddd370954aee632258304da31fd8e7c33619dd5cf9ee0ebe9e60eba3cc416e871e181525f23e525489ef6dffced9211f7d3681ec24b3ac7c0b05ab\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=8d32ad0137f3b10393aeaf635732f7fe337149323c0a33b634f9dcc592e662d4d268223f5c5cc89956787c5a3c061fa69c2de4d1a3225497dfad466548fcd4df7ee6000c4e60ff0661d0f9b451efa2701df5de55c322b3585c09210171403d47c358e6f41281f245711afd63eed3157e6674e9958ebffc5516f6ccb06b9af1830400e1d6a9bed44538b7212eeb257665aa43eecfa2af7482863f6032002de57f7a01af83cf5051541178297ab4a256710c04fd082cb51af4725dbafb288ee79caa630853dcbf35c49595b2b2e552804cabdbdf955806d57b97451e87035bd3ea89dbb5560a1c14170c531c3222a1d0b59d79d4c45b641887699f0bf5830612cb\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334233994\u0026X-Goog-Signature=6eed60a846158365a0c54d4c96516bb8c4011b497da203479fa9066bc2b7c2c7c683aa351d5249e05df64b8f86549f251150327013e41231ee06f22e9a51e1061c577d7b2da4704b80f684776702da13be6122ad6652463f210c53d9149da9e32a253fde4851723cbb616f18374a8f9b7b5cd36a82e66a2e5677ae51b906e6c4649ea27fcbbcba3ef24bf163a4b0f469c10eca8da464f58161ead8371bdb5b0d452ee713b0ec498ca9aaefd521a1d8240d82885a19af58ddefb167146ab9d411b5d0712457b126104b5f42f57ab4ede2da5ae9af74e708840b507a5538571804c60e9c166516e0118aa555a13169401e21f0a5ec303f3b801d6a972ef94426ee\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=472d88289a3910bdde93dd468e6ae9e66c0821fb0d72a356ef887771baa2a80f356b0ac67183873c0e89c87afb3080b3a78122fa7c3e37ad8b54d8aa216e9e04cb6f2af4f200784beba8de4eea7cf3a39faa8d800b7cc6bbf03df55beaeb69727d413266e2e59fb900bc25b6859d7c14db3c22aa6a0aeaa71c331ae0a2bd27c9e2c246fe931a2562210411491bc83bc34e7bef60901189949348eee909e64c90e3aab7b03d7c274ee84c03cca6dd624169f40323a785bcc1a8357810f738b45a637c09267530fbddc413a75622d793876c74c9128b11d8b4ea2fe2b959b34b96e06a9c384f72522ad357557d0a3b1c8b5792c2e94631139eae178fa660a5e8b2\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334678692\u0026X-Goog-Signature=239a77d1c63c4074b2ddc3dd21a5bc77de8ea993fdd911a199b6cca207fef9286307a8321d91d02cffda17060a217f50b2a1d8017bbcb29511339a1187b38332a39795fb1b2c754988948be8849a371fd66a7698f8e35a0f1a3430b6837ede73c37c90da49a062586828f19595d4246e4eabf3e3d629f251b066fb20e0f34ddad80599f5946aa76e0694b859f7d575dc781a850d56e9846d456d285b0023e90862154008154fa13ef1f95096a6161e6a1b314b82ca23bc44fc1b4b7799a9f936dca09ab09830446089d8defe11a2c0e066aa8a65204d9027140f314001c59187593290ae22c4092ee5d0293a6e5da1e1da3d42c32c0aced7db7ad4f774575deb\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=1077450c755044fd6121e0dd67ec7de75a1785c4bacede635b78dd64d6b982ba02bba7a00065035fa13f003ef03c14d7f622b6a5c9ef19cadc956470e5a62e0f1855fb8cae0bbe723c0352224d152c85e173b234e3f5a1bcc2e1ddd4da065c184e82487eb4aa17464d330144b3b7c17ba357af1fb267f97730b3a798ff42a514ea47b83aa9f2560cfd428778638287c96ed17f95f0327e1cade380d046120d79479aca927d627ba65f3bc009c75d1662cbf93c6d33cea4b030a7906d3689922bac13ca3fab45536f115ae6253715cc9c24769e57e83657bf0393ce55e45f90d9666513a19a939044d3b3ad528757e022624d6601dacd326ae8ab3c9ce4ae6d52\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334302924\u0026X-Goog-Signature=3a25e70e59647f427ae55d72168c576fdb59b05f9ed6b72885a9e9c11f655e6bf10ded55744adcef330659731f0effdda7550ddca99c309cefe46ba864b088f155a5243e579d182a55f916d21d9dd0a83534a5e2efb2955a9146db12b22b4321e3f36b69f8d89d663507d7db83ac96bb2419a2baa0787fd0c6e6079d06652b8a2ac364a0a0a5d8d9de6331658b798abddbaaae1ede3026a4f0d955e74782afe240d31e6748bea8ab332ed945f541ca20c587b8c1449643f4748a3b059aae857334b6249bdf86794d307340b6a07d0dc47d3980e234be9c0549f52636d33776b7474fb95ebc014656c3cc217a777d438612a08849ac498baba0ce4716ac4ea432\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 2 columns in total]" + ], + "text/plain": [ + " image \\\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "\n", + " blurred \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "\n", + "[5 rows x 2 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } ] }, { + "id": "11fcc6ec", "cell_type": "markdown", + "source": [ + "### 4. Use LLM models to ask questions and generate embeddings on images" + ], "metadata": { "id": "Euk5saeVVdTP" }, - "source": [ - "### 4. Use LLM models to ask questions and generate embeddings on images" - ] + "execution_count": null }, { + "id": "793b2f45", "cell_type": "code", - "execution_count": 9, + "source": [ + "from bigframes.ml import llm\n", + "gemini = llm.GeminiTextGenerator()" + ], "metadata": { "id": "mRUGfcaFVW-3" }, + "execution_count": 9, "outputs": [ { "name": "stderr", @@ -666,15 +690,16 @@ " return method(*args, **kwargs)\n" ] } - ], - "source": [ - "from bigframes.ml import llm\n", - "gemini = llm.GeminiTextGenerator()" ] }, { + "id": "13d7cb93", "cell_type": "code", - "execution_count": 10, + "source": [ + "# Ask the same question on the images\n", + "answer = gemini.predict(df_image, prompt=[\"what item is it?\", df_image[\"image\"]])\n", + "answer[[\"ml_generate_text_llm_result\", \"image\"]]" + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -683,6 +708,7 @@ "id": "DNFP7CbjWdR9", "outputId": "3f90a062-0abc-4bce-f53c-db57b06a14b9" }, + "execution_count": 10, "outputs": [ { "name": "stderr", @@ -705,8 +731,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_text_llm_resultimage
0The item is a container of K9 Guard Dog Paw Balm.
1The item is K9 Guard Dog Hot Spot Spray.
2The image contains three bags of food, likely for small animals like rabbits or guinea pigs. They are labeled \"Timoth Hay Lend Variety Plend\", \"Herbal Greeıs Mix Variety Blend\", and \"Berry & Blossom Treat Blend\", all under the brand \"Fluffy Buns.\" The bags are yellow, green, and purple, respectively. Each bag has a pile of its contents beneath it.
3The item is a cat tree.\\n
4The item is a bag of bird seed. Specifically, it's labeled \"Chirpy Seed\", \"Deluxe Bird Food\".\\n
\n", - "

5 rows × 2 columns

\n", - "
[5 rows x 2 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eml_generate_text_llm_result\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003eThe item is a container of K9 Guard Dog Paw Balm.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=1b184636c15ee0a74b099df1903a0a79f2d0832d2cf829c84f9344269dc70408ea48dfaddce2f284cb4862cef857e8a0420627e25ca8c7ecc08d34bc69c695bc030ca8d90417860eecad65826160cbbf1cabe6c90d5e51a09c8b582bc542dfc5b309908be7b945d615eabba73b79912987306b3b110a5b0a9e52ccf900a3d2c490bb4e4572f3108f3acabf9a332e0fa503f74265f44d3b9ff40745afa4c59a3a0f3366ca4e4a800a09a5df0b363909a01705433e31bf2c9b7ccc0cc1e75f3e8ec323907140f29e4033238385eb83815b6d95ec54945cbf829d966510ae2504187f9c167fe70ac9e77231d4b38df380a7f6ec5f973828f21e51f4a95626ea0504\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003eThe item is K9 Guard Dog Hot Spot Spray.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=7e3adbf71385c63b494609546b7b2a3ef41dc437772a35d579bfed25594b3dd4716f8170dae6e4c9afcabce49c2af584561111983494d6dff504ff5eed611c3c7712cb667e6f1f3451aff5ac6911c4da9aeb2ba5c9227f9459f7c54085268a515eeb28add1b384a4724159f3d1408278e37716465f11a18f823dd4058004e43f9bdd4cab28fc97e97043adddc53c4b5e3059cf1641f1300bedded5a679f6922a45c021055e413953e2f0b000a6fd5047a0aa2fa5fce5f0ca08b6f93411704c9b10c6534894130c11f8ee9a8ee70d26a6455c0cbaf2088c1b5205328858e22026d1c4efc9e558a33082169f7eec5e4fb406de7de13e3ee72a48421e9896cca6f7\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003eThe image contains three bags of food, likely for small animals like rabbits or guinea pigs. They are labeled \"Timoth Hay Lend Variety Plend\", \"Herbal Greeıs Mix Variety Blend\", and \"Berry \u0026 Blossom Treat Blend\", all under the brand \"Fluffy Buns.\" The bags are yellow, green, and purple, respectively. Each bag has a pile of its contents beneath it.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=380d369f6fc8a0e4274420f7bb038aaf748111cd654f7dc20818a97d80da8e6b93125884982c656009cdfdc7e70496c1defa42b2de69e1b73d99e0b925953bd5e0e5ae20ddfac13a619f8c6b1bf6c3fe9cdb195910eb08efd96a193dda55488eacc2d0deac4d0a777fd7b7916a0cc1707f22463e1678c83b8ae51d082b8365c2643f85c19c59a9ec34c89fcc87b9c510cf6dbcdda5d0648d6602bd023b0a09e6b4b02c4cea9f6b10f563a14097e86a5cd9898ea3b3606a6c847a23ce97aed46b3154c1c1791da19edf172d7f57cd4e604bb2774ae3dc02d902c826ca9be17972ff17d612106b9ac61e734837646e5d0b40f8162798bf7695dccb0d320e6a58a9\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003eThe item is a cat tree.\\n\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=712a70543cff388ba937bc867b4d94e1bfb09579bc7ecf998b8fb5194937cd15f491643f76925582ed5b7c853a9845b77e3c6c248126e211d45c3f6ebe751cc06193ae052999bf9bd827acbb204d2a64ad5d6eae1101fc5b2518f16ae29469ee7213cae403a3a6a29d7081561decc6b189593beb4b649bc7169828f4570a929d8b15c8dd0b3f259bfa4e2680b9d5b88653068357c7aefa0b1f26e10dc309b743da4164d9a5abd1761b00cc9a12380ba6fb3786f141b8e536fdc27e869b632c3e1a130f312ad5185362b0f9b30f473387a02905f22956992278d94fc2ef387a87cb855d35cfcabe9ad5d82c1b4dd85c56152e28438f6631322a4c229a9520adb5\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003eThe item is a bag of bird seed. Specifically, it's labeled \"Chirpy Seed\", \"Deluxe Bird Food\".\\n\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=70d2709b3b655fb6add8616767e7886e7e304cc96fc891df927085d1e4d90ee9bb13b370762c6c5a8dd43baefa163312267203bc1b371954320bc27c32d0831f7f8937f288da999e506bf6f47d101cd2e49a870f3d5be428d321149f7e1c7d1146569d22f19640d62325665b6d08e7254a89535c021c8b464d65e754312dd47dde08be9ca58856a97d3c3f243030ccfbd8c1bda5ddca2b3618b113f6c1640afa14936b8c16c59d77c44139fe75f3719e2a83924fed36514c61787b02ace0d439f8d3c4fea81c9bf01684f8c06a39f7ec626e93d59262db87f2eea30dd0f849a3436d8dd36d2188f2e52826e8b96bf72614c256cda9867b1905a1d1cd3edebd18\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 2 columns in total]" ], "text/plain": [ " ml_generate_text_llm_result \\\n", @@ -780,20 +806,11 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "# Ask the same question on the images\n", - "answer = gemini.predict(df_image, prompt=[\"what item is it?\", df_image[\"image\"]])\n", - "answer[[\"ml_generate_text_llm_result\", \"image\"]]" ] }, { + "id": "68857305", "cell_type": "code", - "execution_count": 11, - "metadata": { - "id": "IG3J3HsKhyBY" - }, - "outputs": [], "source": [ "# Ask different questions\n", "df_image[\"question\"] = [\n", @@ -803,11 +820,19 @@ " \"is it for pets?\",\n", " \"what is the weight of the product?\",\n", "]" - ] + ], + "metadata": { + "id": "IG3J3HsKhyBY" + }, + "execution_count": 11 }, { + "id": "829afc69", "cell_type": "code", - "execution_count": 12, + "source": [ + "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n", + "answer_alt[[\"ml_generate_text_llm_result\", \"image\"]]" + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -816,6 +841,7 @@ "id": "qKOb765IiVuD", "outputId": "731bafad-ea29-463f-c8c1-cb7acfd70e5d" }, + "execution_count": 12, "outputs": [ { "name": "stderr", @@ -838,8 +864,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_text_llm_resultimage
0The item is a container of Dog Paw Balm.
1The picture contains many colors, including white, black, green, and a bright blue. The product label predominantly features a bright blue hue. The background is a solid gray.
2Here are the product names from the image:\\n\\n* **Timoth Hay Lend Variety Plend** is the product in the yellow bag.\\n* **Herbal Greeıs Mix Variety Blend** is the product in the green bag.\\n* **Berry & Blossom Treat Blend** is the product in the purple bag.
3Yes, it is for pets. It appears to be a cat tree or scratching post.\\n
4The image shows that the weight of the product is 15 oz/ 257g.
\n", - "

5 rows × 2 columns

\n", - "
[5 rows x 2 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eml_generate_text_llm_result\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003eThe item is a container of Dog Paw Balm.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=988967bbfa1e4c70be10a6faa407fde8edbe0e93a7a1c9f9fbe8e81fa55c11bcc27b9673ae4d91b13edbc56ad5e50051a81d1639cafd38946f693f73e81e86a0bf6e945c3a8edf9b3b2d275203caba770fcb9a9fa272b969023fabda363efc8d90ae4b2bbc9a4f420699f3604d0a13607f96694687529b38bd80b211f5998ef0a11ab0f3a0b936a4e6068a9289ec43a9536416b5782ca2a0645c2b43f94ac6b8e31632c62b3714b56f28dce7e5675a66ded7bcc9d1c1a154f5a83f826b3f4b1363b1316536549d959d664931e6cd462a9d83362257e5226ad5b35b5e6c0e6a155fd62d2890b2fc071b59e6e4fa796a22e346bceebc4fb131eee66793f6748699\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003eThe picture contains many colors, including white, black, green, and a bright blue. The product label predominantly features a bright blue hue. The background is a solid gray.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=6098f2a2487364290c54d4f6bcc98f29e5097702ab78da4ff9ef97d13c03a7fa3a17bd22324d61ae8264d3a59a6bdb2bf4be55bb7efffcb00c68f0c9b69f413f8b33b2089697456ef919690d59a68548e95ebd68526de53ab9062e18009ab73452fc1934c43f99ad75a45931fb250ef1e78a7ced3e0bfc9d9468ef300a63d750b8e249e7d20afa00895b7e586b16686141799603bcdf731b48237323de166d0a1a4f310519671a4ce6ae56b5d4ebd1887361cc21130b3e8fa57a6107d50239a671319ecbef623719554a9642489d2ea083c2cc466f5d73c55084023567e9db291b40a335e7f65b20da018a70cbf5ef9654aa86ec500aa4df35c1db81116abc83\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003eHere are the product names from the image:\\n\\n* **Timoth Hay Lend Variety Plend** is the product in the yellow bag.\\n* **Herbal Greeıs Mix Variety Blend** is the product in the green bag.\\n* **Berry \u0026 Blossom Treat Blend** is the product in the purple bag.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=9672e4d0fa6ea5f4bcf1605e35a7642cab21e7decfcfe55e37616c99a7774445cecf69c848ccd0dd92f5ae1925bfaa4bbd62ba413ecfffc835aa9a4596d18036381d595ec3387d4200a08b8fdb02ddfdb7432ebd14d5d4b77abf4628e61899c4383899ddbe6465ce91b856e8a02f37391c850589a971e6045ed0c389133dd63d4bac98d5fc744f83a51b000bf29c51111589bd34e2d287cc835ccd8c009270cef818eb3ed9ee07f9978bf24ce994311af944fc2b7d7066c3c37613fe08e0542efd277ab77df0fe1f168e05d2f6d6749e5d8af573c22032b29c3c47ac2f3b7978a09139ddfd38ab9045b5ae86d8804f23db758a7602225bd749a5b7552bae4eb8\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003eYes, it is for pets. It appears to be a cat tree or scratching post.\\n\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=5c8e23c4c5f8017e09bf02b1b480200c74c1523aa25297141fe4abf226d7e18c546aed2ea5a2b5b5f8c5b90f2f4c569accf41a73ab8776285b458d230c6df43025eeb3066d40fe141089fa547b5a1f6a12d8c2eed0e614c8667be12c237d7ba4b29a530c16a6c8528cdfc8d8c4761aeae91c5f8452069f88de31cb637aaa34cbea60d8a8e9bd66b9d2f6f2c70a60d7791fc6bac13a4dd3736323da29fb52c5c90068c57c088407935255468513b780c07ea8e922f690645ee5ed8644240f894061c662b7bfe7ab660fd8466280e428424724f62a6c4eb6c781e3bf988059bd9f63fd6ffc2e75d296bca2ef068c6cdda0f6bb994f6799e04b33b4352f66a68224\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003eThe image shows that the weight of the product is 15 oz/ 257g.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=3be42f1f88c163e019f7496107a2f7d69b90a5482dd157a5d05e427fa117dd334a834dd9c72d0c84126e4f2c9e35903521ca8e81645afc945d8dbe47671f6b2f73ccbf494df6e5ad8e19c04ab4c6020859519c663b6fd57b19512fa94562106d01073b9122011ca602279a712fd761dc4ffaed3a8d7b76abcf1f42c2c1bce33228cff0e0454107cb84039a8981a2fb689191c43b7edf54e19354ff2e8c5deeb3a07944285b15db8a4fe474744f52852f8048c377708e2b3b85ee639b952e9292bc8d60d9b1a37c84ab83398ef295b8ba6b3c8e3a500714fb70e91b5fa7f19301fa4ce1bb1041f60a8ce84b3d863d5ba66f14614ed27689cd4daeb293b493172b\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 2 columns in total]" ], "text/plain": [ " ml_generate_text_llm_result \\\n", @@ -913,15 +939,17 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n", - "answer_alt[[\"ml_generate_text_llm_result\", \"image\"]]" ] }, { + "id": "e75df430", "cell_type": "code", - "execution_count": 13, + "source": [ + "# Generate embeddings.\n", + "embed_model = llm.MultimodalEmbeddingGenerator()\n", + "embeddings = embed_model.predict(df_image[\"image\"])\n", + "embeddings" + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -930,6 +958,7 @@ "id": "KATVv2CO5RT1", "outputId": "6ec01f27-70b6-4f69-c545-e5e3c879480c" }, + "execution_count": 13, "outputs": [ { "name": "stderr", @@ -954,8 +983,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_embedding_resultml_generate_embedding_statusml_generate_embedding_start_secml_generate_embedding_end_seccontent
0[ 0.00638822 0.01666385 0.00451817 ... -0.02...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
1[ 0.00973976 0.02148137 0.0024429 ... 0.00...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
2[ 0.01195884 0.02139394 0.05968047 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
3[-0.02621161 0.02797648 0.04416926 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
4[ 0.05918628 0.0125137 0.01907336 ... 0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
\n", - "

5 rows × 5 columns

\n", - "
[5 rows x 5 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eml_generate_embedding_result\u003c/th\u003e\n", + " \u003cth\u003eml_generate_embedding_status\u003c/th\u003e\n", + " \u003cth\u003eml_generate_embedding_start_sec\u003c/th\u003e\n", + " \u003cth\u003eml_generate_embedding_end_sec\u003c/th\u003e\n", + " \u003cth\u003econtent\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e[ 0.00638822 0.01666385 0.00451817 ... -0.02...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e[ 0.00973976 0.02148137 0.0024429 ... 0.00...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e[ 0.01195884 0.02139394 0.05968047 ... -0.01...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e[-0.02621161 0.02797648 0.04416926 ... -0.01...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e[ 0.05918628 0.0125137 0.01907336 ... 0.01...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 5 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 5 columns in total]" ], "text/plain": [ " ml_generate_embedding_result \\\n", @@ -1034,18 +1063,18 @@ "4 [ 0.05918628 0.0125137 0.01907336 ... 0.01... \n", "\n", " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", - "0 \n", - "1 \n", - "2 \n", - "3 \n", - "4 \n", + "0 \u003cNA\u003e \n", + "1 \u003cNA\u003e \n", + "2 \u003cNA\u003e \n", + "3 \u003cNA\u003e \n", + "4 \u003cNA\u003e \n", "\n", " ml_generate_embedding_end_sec \\\n", - "0 \n", - "1 \n", - "2 \n", - "3 \n", - "4 \n", + "0 \u003cNA\u003e \n", + "1 \u003cNA\u003e \n", + "2 \u003cNA\u003e \n", + "3 \u003cNA\u003e \n", + "4 \u003cNA\u003e \n", "\n", " content \n", "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", @@ -1061,39 +1090,24 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "# Generate embeddings.\n", - "embed_model = llm.MultimodalEmbeddingGenerator()\n", - "embeddings = embed_model.predict(df_image[\"image\"])\n", - "embeddings" ] }, { + "id": "23892b0e", "cell_type": "markdown", - "metadata": { - "id": "iRUi8AjG7cIf" - }, "source": [ "### 5. PDF extraction and chunking function\n", "\n", "This section demonstrates how to extract text and chunk text from PDF files using custom BigQuery Python UDFs and the `pypdf` library." - ] + ], + "metadata": { + "id": "iRUi8AjG7cIf" + }, + "execution_count": null }, { + "id": "136a18b8", "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", - " return global_session.with_default_session(\n" - ] - } - ], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -1106,7 +1120,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_extract(src_obj_ref_rt: str) -> str:\n", + "def pdf_extract(src_obj_ref_rt: str) -\u003e str:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -1133,7 +1147,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -> list[str]:\n", + "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -\u003e list[str]:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -1151,7 +1165,7 @@ " page_text = page.extract_text()\n", " if page_text:\n", " curr_chunk += page_text\n", - " while len(curr_chunk) >= chunk_size:\n", + " while len(curr_chunk) \u003e= chunk_size:\n", " split_idx = curr_chunk.rfind(\" \", 0, chunk_size)\n", " if split_idx == -1:\n", " split_idx = chunk_size\n", @@ -1162,18 +1176,34 @@ " if curr_chunk:\n", " all_text_chunks.append(curr_chunk)\n", " return all_text_chunks" + ], + "metadata": {}, + "execution_count": 14, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n" + ] + } ] }, { + "id": "234a5f86", "cell_type": "code", - "execution_count": 15, + "source": [ + "df_pdf = session._from_glob_path(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\", name=\"pdf\")\\n\\n# Generate a JSON string containing the runtime information (including signed read URLs)\\naccess_urls = get_runtime_json_str(df_pdf[\"pdf\"], mode=\"R\")\\n\\n# Apply PDF extraction\\ndf_pdf[\"extracted_text\"] = access_urls.apply(pdf_extract)\\n\\n# Apply PDF chunking\\ndf_pdf[\"chunked\"] = access_urls.apply(pdf_chunk, args=(2000, 200))\\n\\ndf_pdf[[\"extracted_text\", \"chunked\"]]" + ], "metadata": {}, + "execution_count": 15, "outputs": [ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
extracted_textchunked
0CritterCuisine Pro 5000 - Automatic Pet Feeder...[\"CritterCuisine Pro 5000 - Automatic Pet Feed...
\n", - "

1 rows × 2 columns

\n", - "
[1 rows x 2 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eextracted_text\u003c/th\u003e\n", + " \u003cth\u003echunked\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003eCritterCuisine Pro 5000 - Automatic Pet Feeder...\u003c/td\u003e\n", + " \u003ctd\u003e[\"CritterCuisine Pro 5000 - Automatic Pet Feed...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 2 columns in total]" ], "text/plain": [ " extracted_text \\\n", @@ -1219,37 +1249,29 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "df_pdf = bpd.from_glob_path(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\", name=\"pdf\")\n", - "\n", - "# Generate a JSON string containing the runtime information (including signed read URLs)\n", - "access_urls = get_runtime_json_str(df_pdf[\"pdf\"], mode=\"R\")\n", - "\n", - "# Apply PDF extraction\n", - "df_pdf[\"extracted_text\"] = access_urls.apply(pdf_extract)\n", - "\n", - "# Apply PDF chunking\n", - "df_pdf[\"chunked\"] = access_urls.apply(pdf_chunk, args=(2000, 200))\n", - "\n", - "df_pdf[[\"extracted_text\", \"chunked\"]]" ] }, { + "id": "d80effbe", "cell_type": "code", - "execution_count": 16, + "source": [ + "# Explode the chunks to see each chunk as a separate row\n", + "chunked = df_pdf[\"chunked\"].explode()\n", + "chunked" + ], "metadata": {}, + "execution_count": 16, "outputs": [ { "data": { "text/html": [ - "
0    CritterCuisine Pro 5000 - Automatic Pet Feeder...\n",
+       "\u003cpre\u003e0    CritterCuisine Pro 5000 - Automatic Pet Feeder...\n",
        "0    on a level, stable surface to prevent tipping....\n",
        "0    included)\\nto maintain the schedule during pow...\n",
        "0    digits for Meal 1 will flash.\\n\u0000. Use the UP/D...\n",
        "0    paperclip) for 5\\nseconds. This will reset all...\n",
        "0    unit with a damp cloth. Do not immerse the bas...\n",
-       "0    continues,\\ncontact customer support.\\nE2: Foo...
" + "0 continues,\\ncontact customer support.\\nE2: Foo...\u003c/pre\u003e" ], "text/plain": [ "0 CritterCuisine Pro 5000 - Automatic Pet Feeder...\n", @@ -1266,60 +1288,29 @@ "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "# Explode the chunks to see each chunk as a separate row\n", - "chunked = df_pdf[\"chunked\"].explode()\n", - "chunked" ] }, { + "id": "118cf1c7", "cell_type": "markdown", - "metadata": {}, "source": [ "### 6. Audio transcribe" - ] + ], + "metadata": {}, + "execution_count": null }, { + "id": "1794c54f", "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], "source": [ - "audio_gcs_path = \"gs://bigframes_blob_test/audio/*\"\n", - "df = bpd.from_glob_path(audio_gcs_path, name=\"audio\")" - ] + "audio_gcs_path = \"gs://bigframes_blob_test/audio/*\"\\ndf = session._from_glob_path(audio_gcs_path, name=\"audio\")" + ], + "metadata": {}, + "execution_count": 17 }, { + "id": "c9f9d484", "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "
0    Now, as all books, not primarily intended as p...
" - ], - "text/plain": [ - "0 Now, as all books, not primarily intended as p...\n", - "Name: transcribed_content, dtype: string" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ "# The audio_transcribe function is a convenience wrapper around bigframes.bigquery.ai.generate.\n", "# Here's how to perform the same operation directly:\n", @@ -1345,28 +1336,39 @@ "\n", "transcribed_series = transcribed_results.struct.field(\"result\").rename(\"transcribed_content\")\n", "transcribed_series" - ] - }, - { - "cell_type": "code", - "execution_count": 19, + ], "metadata": {}, + "execution_count": null, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, { "data": { "text/html": [ - "
0    {'status': '', 'content': 'Now, as all books, ...
" + "\u003cpre\u003e0 Now, as all books, not primarily intended as p...\u003c/pre\u003e" ], "text/plain": [ - "0 {'status': '', 'content': 'Now, as all books, ...\n", - "Name: transcription_results, dtype: struct[pyarrow]" + "0 Now, as all books, not primarily intended as p...\n", + "Name: transcribed_content, dtype: string" ] }, - "execution_count": 19, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } - ], + ] + }, + { + "id": "7209a62a", + "cell_type": "code", "source": [ "# To get verbose results (including status), we can extract both fields from the result struct.\n", "transcribed_content_series = transcribed_results.struct.field(\"result\")\n", @@ -1381,36 +1383,47 @@ "# Package as a struct for consistent display\n", "transcribed_series_verbose = bbq.struct(transcribed_series_verbose).rename(\"transcription_results\")\n", "transcribed_series_verbose" + ], + "metadata": {}, + "execution_count": 19, + "outputs": [ + { + "data": { + "text/html": [ + "\u003cpre\u003e0 {'status': '', 'content': 'Now, as all books, ...\u003c/pre\u003e" + ], + "text/plain": [ + "0 {'status': '', 'content': 'Now, as all books, ...\n", + "Name: transcription_results, dtype: struct\u003cstatus: string, content: string\u003e[pyarrow]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } ] }, { + "id": "c8351cc3", "cell_type": "markdown", - "metadata": {}, "source": [ "### 7. Extract EXIF metadata from images" - ] + ], + "metadata": {}, + "execution_count": null }, { + "id": "e59670b9", "cell_type": "markdown", - "metadata": {}, "source": [ "This section demonstrates how to extract EXIF metadata from images using a custom BigQuery Python UDF and the `Pillow` library." - ] + ], + "metadata": {}, + "execution_count": null }, { + "id": "fda362f4", "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", - " return global_session.with_default_session(\n" - ] - } - ], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -1426,7 +1439,7 @@ " container_cpu=0.33,\n", " container_memory=\"512Mi\"\n", ")\n", - "def extract_exif(src_obj_ref_rt: str) -> str:\n", + "def extract_exif(src_obj_ref_rt: str) -\u003e str:\n", " import io\n", " import json\n", " from PIL import ExifTags, Image\n", @@ -1443,12 +1456,28 @@ " tag_name = ExifTags.TAGS.get(tag, tag)\n", " exif_dict[tag_name] = value\n", " return json.dumps(exif_dict)" + ], + "metadata": {}, + "execution_count": 20, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n" + ] + } ] }, { + "id": "40bb6bc9", "cell_type": "code", - "execution_count": 21, + "source": [ + "# Create a Multimodal DataFrame from the sample image URIs\\nexif_image_df = session._from_glob_path(\\n \"gs://bigframes_blob_test/images_exif/*\",\\n name=\"blob_col\",\\n)\\n\\n# Generate a JSON string containing the runtime information (including signed read URLs)\\n# This allows the UDF to download the images from Google Cloud Storage\\naccess_urls = get_runtime_json_str(exif_image_df[\"blob_col\"], mode=\"R\")\\n\\n# Apply the BigQuery Python UDF to the runtime JSON strings\\n# We cast to string to ensure the input matches the UDF's signature\\nexif_json = access_urls.astype(str).apply(extract_exif)\\n\\n# Parse the resulting JSON strings back into a structured JSON type for easier access\\nexif_data = bbq.parse_json(exif_json)\\n\\nexif_data" + ], "metadata": {}, + "execution_count": 21, "outputs": [ { "name": "stderr", @@ -1462,37 +1491,17 @@ { "data": { "text/html": [ - "
0    {\"ExifOffset\":47,\"Make\":\"MyCamera\"}
" + "\u003cpre\u003e0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\u003c/pre\u003e" ], "text/plain": [ "0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\n", - "Name: blob_col, dtype: extension>[pyarrow]" + "Name: blob_col, dtype: extension\u003cdbjson\u003cJSONArrowType\u003e\u003e[pyarrow]" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } - ], - "source": [ - "# Create a Multimodal DataFrame from the sample image URIs\n", - "exif_image_df = bpd.from_glob_path(\n", - " \"gs://bigframes_blob_test/images_exif/*\",\n", - " name=\"blob_col\",\n", - ")\n", - "\n", - "# Generate a JSON string containing the runtime information (including signed read URLs)\n", - "# This allows the UDF to download the images from Google Cloud Storage\n", - "access_urls = get_runtime_json_str(exif_image_df[\"blob_col\"], mode=\"R\")\n", - "\n", - "# Apply the BigQuery Python UDF to the runtime JSON strings\n", - "# We cast to string to ensure the input matches the UDF's signature\n", - "exif_json = access_urls.astype(str).apply(extract_exif)\n", - "\n", - "# Parse the resulting JSON strings back into a structured JSON type for easier access\n", - "exif_data = bbq.parse_json(exif_json)\n", - "\n", - "exif_data" ] } ], @@ -1518,6 +1527,6 @@ "version": "3.13.0" } }, - "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 0, + "nbformat": 4 } From 991691afbed9adb419be1eae6ecee24254f04855 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 04:58:18 +0000 Subject: [PATCH 23/26] fix notebook outputs field --- ...with-bigframes-over-national-jukebox.ipynb | 155 ++++++++++-------- 1 file changed, 91 insertions(+), 64 deletions(-) diff --git a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb index fe68d0107bfd..ac10f68f639a 100644 --- a/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb +++ b/packages/bigframes/notebooks/kaggle/vector-search-with-bigframes-over-national-jukebox.ipynb @@ -25,7 +25,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "194%" } } @@ -47,13 +47,13 @@ "\n", "* Learn more at https://www.loc.gov/collections/national-jukebox/about-this-collection/\n", "\n", - "\u003cimg src=\"https://www.loc.gov/static/collections/national-jukebox/images/acoustic-session.jpg\" alt=\"recording 100+ years ago\" width=\"400px\" /\u003e" + "\"recording" ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "z-index": "0", "zoom": "216%" } @@ -73,7 +73,7 @@ "\n", "To search the National Jukebox, we combine powerful features of BigQuery:\n", "\n", - "\u003cimg src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAALEAAAFdCAYAAABM2IyIAAAAAXNSR0IArs4c6QAAIABJREFUeF7tnQfYHUXZ/p+3p4ckpJBgQj4JUkLxjxQFCTUEBRSET4SACAIWOirlE+kgICJIU0GwUVR6UVQg+An4RRNAipKQUBNCSOEl9e3/65mdZ/aZZ2fL++aEc3Z3znWF8+6ePXvOzPzOzT33zM7W9fT09IB/+BrIcQ3UeYhz3Hr+q6sa8BB7EHJfAx7i3DehL4CH2DOQ+xooHcRr166FDz5YAaNGjcx94/kCBDVQKojfeOMNOOjgQ2DlylVw4w3XwV577ZnKwezZz8IXDvlvddxr8+emHu8P+PBrIJcQr1q1Co796vGqtr52wvGw++5TnDXX2toKJ3ztG+q1r3/tBFi5ciWceNIpavuEE46Ds878TmqNe4hTq6jqB+QSYqy1Qw75IsyaPRv2228a3HD9j50Veeedd8HZ53wXGhoa4J//+D9oaWlWUL+76F246abrYeLEiakN4CFOraKqH5BbiG+77ZdwwYUXQb9+/eDZ2f9Qz/IxffqX4amnn1a24eaf/aRPle0h7lO1fahvyi3E7733Huy08y6AA47XXnM1HHDA/lbF8devueZqOFC8HlfLy5e/D+idJ07cBIYOHQpZIEbbMn/+a7DxxuNg5Mj0DiPamnnz5sOGG46AcePGfagNXsQPyy3E2BiktPtN2xduuOE6q31uvfUXcOFFF8OAAQNg9qyZ0NLSAl1dXXDE9KPUcZdcfBF89KP/Zd7z8sv/hksuuQyefuYZs2/PPXaHk08+CT5/0BfUPtmxe+qpp+Hqq69RtoYem222GZxx+qkwdeo+EV6effY5+P7lV8DMmf8wr40ZMwaOOeZoOO6rxxaRrw+lTLmGmDyvy1Ic/IVDAaE56KDPww+vulJVJkK86aTN1d/333c3bLPNNurvl156WaUWHR0danv8+I/AsmXLVUdw8uSt4MUXX4pAfO+998HpZ3xb7a+rq4PJkydDc1OTAfqySy+Gww77omlEBP3II4+GNWvWqH0IOyp+W1ub2j7uuK/COWef+aE0etE+JNcQr169Grbdbnvo7OwEbhkWLlwIu+waJBa33XoLTJmyWyLERxxxlFJg7Ohd86OrYOutt1bAP/DAg3DW2f8D7e3tFsQI4ic/9WlAG/H5zx0I5577PzB8+HB1zA033gRXXnkV9O/fH/4x8xkYOHCgAnf3PfaCxYvfgz123x0uuuh8ZSPwvLfffqfy9vi45eafwp577lE0xtZ7eXINMdYORm2PP/4ETNt3Ktx44/Wqwq6/4Ub4wQ9+CCNGjID/+/tTKp2IU+J3310MO39yF/X6XXfeDjvuuINV6ZdfcSXcdNNPLYgffvgRFdWNHj0K/vrkE9Dc3Gze093dDftMnaY8MqYmmJ489NDDcNLJp8LgwYPh78/8TVkc/jjttDPgvvsfsMqw3lu+QB+Qe4jvu+9+OO30b0FjYyO88K9nVUoxbb/94ZVXXoGjjz4Kzvveuaa5XHbiz3/+Cxx/wtdVJ+65Z/8ZaVq0ARjn4YM8MXrnm2/5uVLfSZM2jbxnzpw5gB1EzKExj7744kvhlp/fCvvuOxVu0j80/iaCHH8Uf3/mqQLh9eEUJfcQo6dES4HPV199FXx8u21h9z32VrV33713w7bbBr43Tonv+u3v4KyzzoFNN/0o/PlPf4zU+oIFC2DXT+9uQXzGt74D99xzb2oLnXbqKXDyyScCHX/EEV+Ciy+6MPK+f86aBYceepjy1vPnzUk9rz/AroHcQ4zFwf9Vo5rtv/9nYdKmm8LVP7pGdc6enPG4VVqXEv/x0T/B17/+zV4pMSnrkdOPgAsvPD+VKa/EqVW0TgcUAmICcciQIfCRj2ys0gZSQV47Lojnzn0Vpu67nzrst3fdATvs8AmrQq+44gdw403BQAnZiQcefAhOOeU02HzzzeEPjzwYaQBU6UmbTYLJW22l1NV74nViNPXNhYAY0wm0FJhW0GPGjMdgwvjxqUqMB6D9wLhrk002gWuvvRq2njxZpRMPPvQwnHnm2ZF0Audu4EALPp9/3rnw5S8H2TM+fvObO+C7535PKftTf3vSpBO77Lqb8sm77fZpwPht7NixkXQCs27MvP2jdzVQCIixyNynbrfdtnDvPb+P1ERcTkxKSW+YMGECLF26VOXE+DcCzpUY/6aMGv/ebNIkGDZ8mIKaMuULLjgPjjpyuvkOPFem97z51luAU0PVD2n3KXDrz2/uXev5o1UNFAbiJ2bMgGOOOU4VChMJTCbkIw5iPA7nYlz5g6ssNd9ppx3hzO98G3DgREKM23/5y2Nw7Y+vhxdeeMF8FMZ5x3zlaDjnnLMin4/zOK699rrIiN306Yer2XgUBXo2e1cDhYG4d8V2H7169Rp4/vnn1YsbbTRG2YssD7QJ8+a9Ch0dnbDxxhsrX5708HMnstRq9mM8xNnryh9ZozXgIa7RhvFfK3sNeIiz15U/skZrwENcow3jv1b2GvAQZ68rf2SN1oCHuEYbxn+t7DXgIc5eV/7IGq0BD3GNNoz/WtlrwEOcva78kTVaAx7iGm0Y/7Wy14CHOHtd+SNrtAY8xDXaMP5rZa8BD3H2uvJH1mgNeIhrtGH818peAx7i7HXlj6zRGvAQ12jD+K+VvQY8xNnryh9ZozXgIa7RhvFfK3sNeIiz15U/skZrwENcow3jv1b2GvAQZ68rf2SN1oCHuEYbxn+t7DXgIc5eV/7IGq2BYkLc3Q3Q2Q7Q1YnLw0NPVyfUQQ9Aj+Mf7sfj8YGv8+PU37RfHRBsB//Rx9N2XbBdF76sjqHtOvyDPWjbeq7DZeeDg9Sz3sa/Xdt8f309QH0DAK7F3NAI0NgMgPtK8CgWxN1dAO1tAF0dAtju+G0FMYNXMYnHC6gltBbMBCvuNNRqmglKoolvS2gFvASzE+L6AOzIP9pfH8Dc3BLAXeBHcSBGeDvaNKwEbQK8UpWlUtO2pdCkxlqJCQyl4PxBMDvIkQqs1FZLtnmOUV6pzApghBbhZ/AasPW+ppYA5oI+igFx25rAOqAtUHDGPUs7wSCPQJygxAZaaSuSKHEoMkFLIBvLoKF0KrGEltSYK7Pj76ZmgObovf6KwHX+IUaAO8k+aChjYSaIBbzcTpBCO5VYQMsthaJBK7DyxtxWOLwwV2AOawRoZhksJZZ2QoBbrz01+mJS7MYmgJb+ReDWKkO+IUb70NEer8Cqw6Y7bpZ9kBBTx468sKOjxz0xKXFEkeMtcVDrZB303wSveZYdObIJpMx1AASntAwEqgteYzvqAluB9qJAj/xCjJ24NasC+6CU12EhSGEjyswUWXbsnEqs4Y5LJxTMskNHlOj9znSC++GEVCJTx04rrkokEHbhkfl2/4GF6uzlF+K1q4WN6A5SBgUz79Dp/ZYix9kJhxJbUJNloBiN2Yc0ZTMJm/DGsREb974uJSZoHbaBK6+EGrcbmgD62bchS/v6tfx6PiFGZV29IoRVKrGB2dXJo1yYd/JkisHzYg0u+V8eRBhbwYQ4rrVFTBzmwfoNHOY05VVKS8mE7NgxReYwY3rB1XjA4MLkyPmEuH0tAP6TNiETzDFgx0Vu5IUJXpkPG6/cWzvhgJescuLgRlyHLoudwGP0cdjBK0jslk+I16wMOnRkGwhmAzFLKeJshBXFuVILbRksBaYRO24nxAhdqhLH2QmXIouOnTWwIXPhGAW2vDFTY4zc+g+qZZeQ+bvlE+KVrQDdLBe2FBk9sOjk9SZy48PM3C7wv00HT9ezHOugqM08a7vBm8XYhzh4WZoRl0RY3pfsBYObWw4FM1NiHM0bNDQzKLV8YC4h7mldinfMCTtxlhLz/aKjlxi5sbkQ6txim/XpgjkWTIGtdEI2d1w6wSCliI18s4JTn0dFZno4Wnpc3JYpBNkF2i/hNfsbAIYEN1XP+yOXEEPr0gBgA2+XPVIX8cYOZaZojeZJ8Mk+ul8Xwuoa5CDI9cSfNBJS0wkClSmzCh44xKA7YxngtWAWnTrq5A0dkfatc/F6PiFe/h5T4ThFZlGby17w2WsqT9ZQkgJbSky2gc1is1IKamueF9M+1uHjCYXLTrigpeNIkc1gBk8nuB92eWMGMVfmYSNzAWnal8wpxIuFEhOwIiuWubFz0CMGXt6hc9kHV9SWWtvaQrCniNLyyUBkK6xnbTUi8ZnwxAQrtxsGYK3kw0alfeNcvJ5PiJctsgc2ZDqRqaOXoLwuZTYemCcUwhsnNXmSndDjFWZY2igyAcs9Mf+b58VCgZ1emBRZPw8fnQtI075kPiFeuijBE3Ov7FJoitMYxARtqq3g0Mq4jSgn+yDshGvY2dWRc6kvnsrYCAm0wz5IxeUDHRzuEWPS+MjF6/mEeMlCPWdCd+ioI5fW0VOvx9kHlkiYdEKDqeYDJSiwnE8cN53YTL1kkRuHVtkMbhfStkmJmT9OtBHCG284NheQpn3JfEL8HkKsVdYCmE/FdCmyBpErL4/TrL9dkVsKzKZ/FzMV00CsX5f+1yhuAry8g2fyY51aGJ8sbIOJ4sR+D3Ha72M9vr74bXv2WgRk2cHjcyNA+2lSZIcCp9oKllYYr2z9wQrvyIld8JIPjjxrz6EGLiTcpMS035FCWMrMX28AGDluPTbSh3fqfCqxgpiUVj6zqZlWXqyvpeN2QqUXDo+cqsiuwQ7WaC47wTt2FsT6fbFJhIA3Von1RCDueQ3A2m5IoEcl30j9w8Nw3T4pnxC/+6ZW0y7x7ABbzTcWsJptrcZqW3plsc8MhiSM5FlXdtCFHiwc5h05itnkYEYczOrCZam8cptshbQTQoGVvWgAGP2RdaOnRt6dY4iTlJh3+BjAFqwId4yloI5cbGoR442NJxZuQm2yoeTMgxo6lTA2gqZd0n7HtrIdGtI4L0xpxejxNYLhun2NfEL8zhsJdkLCzdWWdeyM8jpshjUtUyi04pdUWnhjDjEbqDNNJOFVbGdMIwhOOj5u2+zX6uv0xPq1MRPWjZ4aeXdOIX5dRGykvPJZq3CifZAQx0GdNEFI0KsuFBV2wmUllD1gKm2lDVk6csxOWFDrETm1r0FPoNfPaCNIqT3EVfwZLnwtXYm5D6a/LfvgsBNxHb2kwRBSZl4dfEhadujkAIdrbgSHOZPysgEQSjFcCiz3jZ1YxUas3EfnU4ldEMuYLQIxRWtpyptkLxwDJU6IWTxhTfpxzEpLmx9hRWtxyss8chLE5IUJZg9x5X5JvT7TgvnxSmxgZtEZjdQZRRbZMHX4kpRYDorovp3xx6oQjgtHXbPV8FBrnjDzxk6vG6e0rv3UsXPEalyJ8e+x/9Xrqq/FN+RTiSMQu6I2DrHwxnHQcsixtWSa4cqPae4xLTJILFNrR7xwlo5cQpTGIVdX53NopSLzlAK9MOvseYir/Ht8e55WYtaR43bCshIynWB2IU2ZKaWQOfJGYwCGDGGrYlJK4YgnSIk7OgDeWgDQ1sZG3jjQ+u9M9sFhKywbwWHWlyQZFWYdvY0/WuWGrMzH51OJM0Es4OUJhQWvA2pSXNcgyWaTACb2MZpaswbgr0+r5WZNtGZNck9QVpUoZFFe/mOgK0D44AdLKcZ5iCvzM+rLWd5+Nbg0yagvKrLeloCaETvZsUtSZD3bzdgONvttzykAzc0ArR+EShy5UNRRqEEDAJqaAJ59AWDBQh17OaCMswcKYrIOvYGZKTGqsYrYNNQbb9qX2q+59+RUiTXEBlwGMR9mlhBK+xCnyNZQtYB/6l4ADfUAjz0JsHqNPVxt/DC7TInsxKd2ABi1IcCL/wZ4/c1AiVuaAIYNA1iyxFZmAywNI2toJcSWP5YKzLyyshI8J/YQV/+X+NZcpryowGkQp3Ts4uBWMLOhaTxu370DiP8yA2CVhJjNOTYdO92z22UHgNEjAf71cgBx/xaAnT8BsGgxwKvzQ2U2oIoOW0SJEUSpyDFe2SgwdfQ0xB+ZVP22rMA3yKcSOyFG4BBmVE5uB9gEIJlKpCkzh5iOJYj/PCNUYhoMIS/tWm9i1x1DiN9ZBLDLTgCDBgLMnccgTuiwWRBzwJlXjvwA9OIruJ/bCOrkeYgr8BPq6ynemhPEX1yBZT5sjdKhWsd09HB/0uietBzTUIkbAP6ESrw6OvtNDn7QYMauOwGMGQnw7zkAG48FGKxX35mDEM9jfpdsgV5QME6ZY/e73ieiNeWJGwA8xH0lsALvM0qs82GnnUjqyDmUWqYXcSN+09ATC4j5e+XqQATxbjsFSow/NlRCeqAS4z+ljq6cN8HrJh7PzmcNcrCOnYe4AjD29RRvzknwxL2EN1N6waDfTyvxo0+ESiwHScyKQGwq5W47B0osH6jECuI0WB2Qx3X0XCrtshPjN+trC9TU+3LqidFOiCmXSXYiLRfuzesGYrITDs9NF46auAwAkiBGOxHXceuVnYjz1N5O1NSvTn0ZrsQqZkNbQVBnzYN70+FjnloqsctT44+C5g4ThFO0Ei9ZBrAhWwNtfSuxNUlezDH2SlxFtl0dO2ttNhaNOVVWvB5JKRLSjVglFh1ErB6e96InHjMK4LmXgnx4C/2/cvLEdKylvEkeuTce2itxFWmN+ehqKLGK7noA+uyJGcSvvQGw9RYAm04E8Eq8znzl1BPP1VkwtxF8nQl+GZK2DRFFTojcIqN+TLkT0wnHslYynXj+ZQCEGBV328nBHVBNx44PLSdFbWwkL5Nn5krMruzw6cQ6/4D6foI85sSf1hEbjtghxNTpGzoYYOVKnxP3nQZcq1quwbQOZ/uw3spH7Mz8CRp+7m3EljDC5xp2nrIrwMB1uPPQ32cBLH7PTiMieW+l82LHiB1mxxv7YecPC9no5yDE1vzh9T13gnX0hg4F2HpLgCGDe1f+zk6AV18DeOVVu8OXNqmdT8E0E360nejN3AnXBCBvJ3rXhhU92kzF5Fc3V3AqpjWPWE7Z5GtVyAVWxA1pKKFQz67lWdPWjxCvWxDLKzqS0grpif0story2KeTVXM+MV+vImlVeSpY0tXNHOwsV3T4+cROXPLpiV1XdnBv7Jp9Fju8zDxx3Dxi60qPhMVU6Do765J9TbELZrIKfHkq54Wi4pq71HnEPL0Qk+KVrdBK7K/s6JOGVuZNmS5PojnErhWAXIMZYt5w6gpBLpipeI5J8XIRwcjaEmLuhDXiFzM53tkhlLbCX2NXGegqfRbn1c5svWJrBhrCxoaNXYrsSiEs2+BY1xjLxK9+VtuOgqat/EOX7kfshPDQciRP3R3Jtd6EnGvMcmFa+YdmtY3zl+xXGs3s5/PrToRTNxPTDbE6pl93Ijtj6/3IpGWsIounxFzNLC87SrUPGZZ65W4i0rHTEYXTG6ctnp0hxbBgZjekkeDytSc28stYrXdWYz+gTxC7lq9yrU8sFl3BL6EuP9JDz04bwYabaSFBCTEt7VrNtdj8MlbVYzbyye/gqpjyjqKOO4zKa+3kehJ0zZ1cosq5P+uqmMIbq6uddUcvskI8LZ4Sd4svx/rEcVEcn4+svLL0wmQt2FXPflXMKkLt1ycO51rIVTNN/ObXJ64ioRk+Wt3ugKcR4n516po7fbtc17VycSv88P3GPvB7erBEgmyFeWY+wnkLsAwrxfNFuKXiyshNduisbdftDWTUVg/gV4rPANv6OiQX9+zQNy6nxVN0vy7TbQ/MSB5fVyLGcliw+3t2rC/kKn/e1LsnyZs0uu6SFHfPDscaxJE8uA93T3JCzNU5ZmX43s65MHcPRTXWK8Urv+xQYn/3pMqzmfmM/j520SVdlZ3w97HLzFDVD8Tb4jpXhnfdZZTd105O3qGVe/i6EbScq4nWEm40Q1OxsywoKG97QMpMdoM6aK5tbi/MKppyRE8rrQJZA50Gtb+jaBVR7s29nRWg3F7E2QXXvZ01nXm/t7NlMdgNzT3EVYR46SLH7Q4ojaCkQt5ZVL9OSksXfkYGM2hQg93myygueWGXJ45LJ2ROrOvNKC6ba6zUmXlj3OZzK8y2WKuYK66aUxHjgS3LUQ8wYkwVG7FyH53PqZjLFun107h9iIOWDYKom5rTcTGK7LQRHFq28mVf7YTp5HFo9U5lHegGNWmDIWx42YKXQZwE9fDRlSOpimfKJ8TLF7tvUG5Byr0wh9ehyNaIXYaROQlvlssUedSWlFRYCp2WWGj/i2u8qvSB4E3wxNwzDxtVRfQq99E5hfi9wE6ooWeCkg87s8EPo7wcXgZ1nPLyG8pQx40v3co7c+ZvNo84vBujGHZ22AmXjUi0FVyBKZVg4GaBGaHfwLE2XOXY+tDOlE+IW5c4lJhBaiUXMfDSiB73yApG13VzZCG4ldDHsqfUVqO5E/gZPIUwSQW3FMIbG6hJcdUECZZEiIhN2ghXajF0ROpXzsMBuYS4p3UprjXA1NhlF4RKkyKr4WR9vAGY3ZNDQclApm0Oq9NOyOlr1Px6f1Y7wVWZd+zMlSAaYumBaTuTN9aX8A9ha8LlgdaY75hLiGFla7iIIJ/NZnlikVJE4OUgs7+lvSB4uUpTZcZ27ByTJ0xOnGAnLFshVZng1VduxEEbBzeN2NHrjY0AA4fmGN3wq+cT4tUrATrbHZ6YqbNTeV0KTO/RlzHxwQ4aHDEqvA52gg92xNkJglh6ZIrGjBqT/3U8UwfPCTPrADY1A/TXq9XnHOV8Qty+FgD/kS0gD2xmrqnLNphtEKpLk9xJuY2toDkWanQj4Ro6cYOZPqcT/F7PTHnVn1J5mQKbq6PXAeaWfgDN/XKOb/D18wkxQrt6hfbEPErj0y8JXPmsj1cdOAatBNm8rmU4zU7EWWJujTkyPEqLKLAGVl0NIu2DDpLN0HJGeyEHRAYMtm+7kGOc8wkxVvja1QCdHeG8YStuE1kwQk/KbD3rHC1JiSMws2yNR2+ZIda+InIVtIaTWwYLYgavUmK1hpUe0ZP5MFNol71oaALotw7rydUY8PmFGCe+r1llR21WbKbTiVhboUFXrztgJniVEOv0wtxURg4/u/JhLsEsUjO7CVqyEQS3S4X5Pn2pvuzYmUnxIr1weeP+A4PLlwryyC/E2AC4ti928CKemC7sTLAUpgMn7YWGljyxVGIOsjXIwa+tY+kEmzqhHZx+YtAaO0HWQHti443ldozN4B1AqcAEc3MLQFNLQfDNsyfmTdC2RtgKV4cuLpUg2NOUmL1u4HYsHkgXhOIPhF8g6vLCdPWzGtlj0yrNVdFp9iIucmMjeGQ7CO7GJoCW/oUCOL8dO9kMbWsBujocaUQCvGQ98Fzkmc2wMh/80J0/C15jhoNvotKJjKaYRu3M6J2c7MPgtZSY9ks7EdOxMzPW9PswUitIGiGbP992gpcGrUVHG+voyYk+rm1XpEbQSnj50LOYBR+J2ByDHZYXZtEaV2QDuPTFZCe4jeDw8kiOK7T+G+0D2oiCPooDsVLUrsAnoyqrwQ7mjTNHai54XR05JcFaiYkOAXfgE4LjzLAzTyfoGjvWyTPRm77raFJaQWorUwqyEQ2NAbwF6sS5fofFgphKiPYAO3xdnQBdXdDT1Ql1FsQsIzaRG9kC8RoftYtLJywvLBdPoUlsYtw5MgFI2og0T8xUmaBFWPGWvQhvY3NhcuC0/4EUE+K0UvvXC1UDHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C+MhLme7F6rUHuJCNWc5C1NTEM+d90Y5W8GXep1qoKYgXqeS+DeXtgY8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xKVt+uIU3ENcnLYsbUk8xOuh6Xt6euDNt96CCePHr4ez+1PKGvAQryMTH9t8K2hvb4fbf/Mr+OQnd4auri74/EFfgBdffAlO/OY34IwzTlvHT/BvT6uBwkCM8Hz1uBNgzZo1MGLECLj+umvTyl6R1yXES5cuhU/ssLM693bbbQv33vP7inyOP0l8DRQG4idmzIBjjjnOlPT+++6GbbbZZr23vYQYP/DSyy6HBx98EC44/zyYOnWf9f4dyv4BhYH41FNPh/sfeBAGDhwIq1atgqOPPgrO+9656719XRCv9w/1H2DVQCEgXr16Nfy/7XeEtrY2uPDC8+F73zsfhgwZArNnzYSGhoaKNfnatWth7ty5yq6MHTtWnbcvEHd3d8Obb74JK1asgEmTJkG/fv0q9h3LeKJCQPy7398N3/nOWSoNeOyxP8H2n9gJWltb4eaf/QT22mtPq11/dvMt8Nhjj8Nuu30avvH1r1mv/eIXv4Q//PFR2HWXXeDEE79hXlu+fDlccun34f77H4DOzk61f+LYOIdFAAAf00lEQVTEiXDrz38GBx18CCxf/r7p2OFr5513AbwyZw4cfviX4MAD9jfnQd9+1VVXw5133aXegw/8kU2Zshuc+91zYJNNNikjg+tc5kJAfMQRR8HTzzxj0oCzzjoH7vrt7+Czn/0MXPfja6xKOud/zoU77rgTDj74ILjqB1dYr11w4UVw222/hM8deAD86Ec/VK9hR3Hafp+FN998S20PHToUBg0aBAsWLIBx48bBypUr1Q+G0gk85tBDD4N/zpoFZ591Jhx//FfV+zB2+/o3ToRHH/2T2h4woL9S81dfnae2hw8fDvfc8zsfy/UB6dxDvGjRIvjULrspSGY88ReYMGECPPX00zB9+pehsbERnn9uFgwYMMBUTW8hvvba6+DqH10D/fv3hyuv+D5Mm7avUk+E9JRTToeFCxeqc6dBfMvPb4WLL75UWYfLL78MPrPfNPX9EOLTT/8WvPDiizB58lbw4AP39aEZy/2W3EN8w403wZVXXgWbb745/OGRB43q7bjTp2DJkiVw2aUXw2GHfbHPEO+191SYP/81OO3UU+Dkk0+0aJkx40n4yjGB0qZBvPc++8K8efPh1FNOhlNOOck6zxtvvAG777G32odlwLL4R/YayD3E+0ydptTs298+w/K4ZA123HEHuOvO2/sEMaYck7feTr33kYcfhC22sOFCj4uvY4cvCeK08+D5p+67H8yd+ypceslF8KUvHZa9Bf2RkGuI8X/BBx54UGozPvP0/8KYMWPUcb2xE+iDp+wedAz/MfMZ2HDDDSOftduUPeCtt95OhBhfx+OSznPkUUfD3/72FJx+2qlw0knfTC2TPyCsgVxDTGqb1qAcjCSIzz7nu3DnnXeZjh122Lb7+Ce8EqdVcJVfzy3E+L/ynXbeBXCYd/oRh8NnPrNfpCrvuuu3agAEO3vY6cMHdq6wk7X33nvBz356k/Wefad9FubMmWOlE5g/Y8TmUsgnn/wrHP2VY9U5vCeuHsm5hfjxx5+AY796vKo5SiVkNc6aPRsOOSTo1OEcBpzLgFnw+RdcpNIG7EQh4Pig9AD/5hEbqTMef9VVV8K+U/eB+vp6mD37WTjp5FMzpxM33fRTuPyKK1U6gZ3NAw7YX6Uc2Nk77bQzVDqx2WabwaN/fLh6NOT0k3MLMQL00EMPq/kROE8i7oHx2zvvvANHTj9CjeYtWvQu7LHn3qozNnjwYNhyyy3ULLTnnnsexo8fD5gUcIgxQsPkoKOjQ30E5sSDBw+Ct99eoPLipqbGyGCHKyfGz9j/gM+pzhs+8L2jRo1UyQc+EOg7bv817LBDYF/8I3sN5BJiPsz83e+eA8ce85XYEn//8ivgJz/5mTUMjdHYmWedDYsXv6fe19LSAkcdOR1GjR4Fl1xymQUxvj5z5j/gjG99W4FLD1TwSy6+EL533vkKxDQ7ge/DgZMf/vBHcPc990RG7M44/TT1g/KP3tdALiHufTGj78D5C6i6qMxbbbWlgjzpgcfPfvZZ6GjvgIEDB8DWW28NdXV1ffoqfu5En6ot9k2lhbiy1ejPVs0a8BBXs/b9Z1ekBjzEFalGf5Jq1oCHuJq17z+7IjXgIa5INfqTVLMGPMTVrH3/2RWpAQ9xRarRn6SaNeAhrmbt+8+uSA14iCtSjf4k1awBD3E1a99/dkVqwENckWr0J6lmDXiIq1n7/rMrUgMe4opUoz9JNWvAQ1zN2vefXZEa8BBXpBr9SapZAx7iata+/+yK1ICHuCLV6E9SzRrwEFez9v1nV6QGPMQVqUZ/kmrWgIe4mrXvP7siNeAhrkg1+pNUswY8xNWsff/ZFamBYkLc3Q7QtQKgaxVA92qArjYA6ATo6bL/AW53B/tAP+O2+ps9q797gv34bLZxn1pBW79ObYLb8nJ+vV1Xrw/C5zoAddk/PuN2vX7GbbxNA23jc0P4T+3n2/h3I0BDC0D9AICGgQCNgwHqmisCSa2fpFgQd68F6FgG0LkCQAFK4MYBzCHWfxuwJcwSYmxaPGYdICaAFZQIM4eWoEZANbROeAnmRgZ5I0B9A/Q0DIa6puEA9cW+J0hxIFbwLtfgSngZxAZuUuAugG6mxKTIUo25AmdSYtIvUmSHElsQkzILeAlspcwELELdGGzX8/0EMj6zvxuHASDMBX0UAOIegPbFgfoq5aV/LiXGm8Y4LEUSxMZWxNkJtBfaVijLIR8SYm0flN2QCsxthVZgBW6MEitLoWGmv82zABrtRfMoh83JP9n5h7j9XW0fOgG6NaTqWW5zqBnIFtRoD8hWkFIjpKjUzAtbSpwGMULCQDbqGwcxKqv2xBLeiPIyiJUiBzYieCalbtKKjZ4ZQR6df2pFCfINMVoI/GcpsEuNHd7Ypciq48c7eo5OXqRj11eImQpTx87q3PXWC3Pl1XArSyH+oa0omLXIL8TYiWtbEADcjcuucuXt0CmEA2iEV9kHUmaEFpWb0gmpxCKpSIQ4TeRkGuFSY925M+kEpRQIo/bC9KyUmeyE9MN6u56UuEl3/JoAWsYVqrOXW4h72hZCXecHbhW27ASDu1dpRZwi9wQWGPkL/6AdmZ/DFTVZh04pMSkw/c0VmcVqvOPGYVZQazvhUuK6piC16BfcEbUIj3xCjDnw2tcBelBxNaQKUL3t7OC5OnykyC6PzBUYjwsfph+ndxmmUxC2TkJhBd8Z8cCuPFhGajEKbADWCqwUmcHdf2JhcuR8Qty+BKBjcWAjXPDGKbHZj1Dy7Bj/ZpEbeWOdA+ONHlF6e6AH6vBZUMwhdmUTrqEP6uvh+YKBlIDqYMlj6thpYFWKwXNgZiNMB44psFOJEWbq8KGlGA3QNKIIQpzTW4CtfROgs5UpMfPEBmpUV/LG3COTFxYQm44ewRygpR40MGe2BdTOaI0O5pAGPwJ1Qr1At42w2R282ShzjI2w4jSCnHlfbidQibk6Nw4F6DfeQ1y1Glj1H4DuNQ4llvaCd+yknXBDrHjVSkuCa8Fcx2JhUwFaSfENdYFSK8+rt80zV1zOOE9vjc0I8A68c1aIWRJhvLGAl/Y3DAAYWIw7l+bTTqx4XqssQRtjK0iJI+kF5cjaVujBDg5rCHNgI5yKTEKthdXq5+nXOMuuHz2/Y0LgJDS8lr3QqhxJI3geTHZCKjGzEXUEND43AQzetmo6VMkPzifEH8xKhhi9b2xHL9rBI4/LYQ0gttg1yhrxxK6enXATCmZrvIwrrfTEbHiEoCbaeSrhTB+0Ghv7wMFF6Nn2kO0ryVLVzpVTiGeG+bBS2xRFVkqM+bDDI4fuwThbA6mAOJpK2DbCjtxcUsw8sVRadnjQteMdPd3XMz8MNjfCyoH5wAYpMKUSDpiH7Fg18Cr5wfmE+P2nbaVVcLpgjlFkPTxtUgfywMweBNY4lFi3Mtt5sUkvKMXQz5YGy2jN7udpDxx8LgUVxmVwuHF4OSYHDlIIAS3aB/TDylLg300AQz9ZSZaqdq6cQvy3AForYsNtyop5Xkz76TWdTnAFjoWY7ARTXD5z2N6d2IhmdjGbZsxnHJvBD0zX2JmMi2BphkrgFNkIMo/OHPBaMGt4FfxNABvsWjXwKvnB+YR4+ZMM4jg7IeENIY4ocMQ2BDuc6YSCOJpGZG2UAFbhgcPELYiI1cm4Zw6jNwM1KrWaEMejNQ2xyolFJ4621bP+N2xK1q9d08flFOLHbU8cayeYV1ZzK4Jb2xo4Dbw4iSfMhcPXwzyYv49aNPTI2cfsBMOh6hp1dacTEUXmiu2yDxxW8zoBjJA3Awzbo6bhzPrl8gnx0j8zD8xsRSLMHWakDeEjj0tRGsVr9n5mJyTkJKispg3KlA+LYWjeKPJmpFxhQzXmCmx39MzAnlFk1pEju0DeV2036xE7psQj9snKSU0fl1OIH3XYCeGRDdCkwN1MgXWYJsAkm8DTCQtqbQNcw85WK0thdk1Fj7MN2kmQLyavHCpx8El8fwA0DU2zjpsLXq7QI/ataTizfrl8QrzkkQSIox2+HjWYob2sUOHAEthQhxdqmOTYKLexFUqJbbthKt0OhK224MPO8R04HcUxyxBrJ+rUbI4AaqujxxTXKDJ17PRrG34mKyc1fVxOIX7IhtiyEQRx2OFTymnBG26HdiKc2OP0zAHrtqeWTSuHmSPDzix5EFEbdfiMwmZQZNWvo9SCfhGWF2bQuvZvuH9Nw5n1y+UT4vfud0PsgLkH0EbIjhtuB8PJyGWkIxeZOxEeryWdP5mwQQ6GBP/bD6dQuDxxRJmF+spBD67IAcRkL+hZx24ELYdX/j3yc1k5qenjcgrxfTojbo9R5PYwM9aQBrCK6Eyrc3S/tgk8P7b8M3UDbWVOauloR85OIcLsN4zYlEPQJ42zExEFp9gN0wduI/ggh9rfDDDy8zUNZ9Yvl0+IF9+tIdaWASfJm8EP2odpRLAuBELKO2iZYdYWgpyxtBlUyep8JLlmfjC9ag/JRaBkhwUdtHCkLrQLtkemOccKcmZLgm31rgBSJ8Rs/6gvZOWkpo/LKcS/F0qMEGMKYcNswDVe1mUfgv/dR22FiOEcSh5CbLexs18nPDAf1EBjI+Gl16PpRAC5K63g+4PBDp5UCKgR8lGH1DScWb9cPiFedFeYExO4SolDiFGFFbKiQ6eAlR00rdTOQQ6ZKXOYDcXaViTUukkirEGK6ESfdM/rUuoAahtuvPoDIXbASwo9+otZOanp43IK8Z0aYvS+aB+kN27XNoIUlqUR6k8BNymxA3reAQzTieSRPFeLhxAzL5zJ81KHLRzscKUSIcRsUIQsBXlgYy+aA8BHH1bTcGb9cjmF+HaHnQijtR41Sy3syHUzOxHsj9qHNM/MzyfnVPDZbnX1LdA8bFuob94AOlr/DZ2r3zZzJWiUTVtf9R0pnZCDF65Bjgi86keAcyiYZzaKTLkxKTFXZA3xmMOzclLTx+UU4t/ojhwpsHhGK8FtgMMuOD1wnDdOGKamH4UCsmEADNnsBGjop1fZ6emGVW/fB21LZhoI7JRBDGoQlGbwwp4cb9IKA2+KN1ajeA54SZnHHFHTcGb9cvmE+J1fJUJMaQT3uDbU1GmLdvRsuLXXdaYbgS3g2fDA8QdBvw13hp6utdC55h1oGjQRero7YPlL34eejpWB8poOXiCZpM7hczA7jW+HnTx7LoXtgWWHj2a5aV9M/pg/b3RkVk5q+ricQvzLBIg7I3MkLJjpknumujxyS8qT8cQW5EqGzaA1bLDFqdDYfyNYMf830Lb8X7DBlqdBY/8xsGL+r6H9/Rc0xMyzxkAdRmWuDlu8Nw7g1z8O80Pgo3aoytpK4LOHuIo/zoW/YBCLzl1PV5ALJ9oJ1uGTgyEcbkUoG442x7qnbg7f6lvQ0G8ktM65Gdo/mAsbbP4NaBo0AVa8die0LXvWVmIxXOzqmIW5MV31rKF2wBrrqdUVIHpwQz5v9OUqNmLlPjqnSnxb2LGjdMKkFKEfthTWpBIBlNITu7YpnuPD1vY0TnsEkCB+f84t0P7BHBjmhFinExGI3XaA2w23QjuUmcdt5IuNjWAdPQ9x5X5JvT7TwltZOoFKHKpxkAOHcEl7YLaFnbDTCdsLq9PpH4ENcXgc/jV88regsd8oWI5K3DoHhm/xTaXEH8y/E9Yum619bhRWWl8i4nG5N9YjeUk5shty7NzFKPHYr/S66mvxDflUYoQYs2GjwiydwPUtJXQJI3ZGgTmkbDAk9kfgmDg0fPIZTohb5wd2QqqqcrCWalJklrEDp4eZzVRM7bEjUZwZghae2ENcxd/kgp+zAQ6txPpqZ1JimfsmwyrtBWXJ2vs6Jgq5OoAjtg6UeNkrgRKP2DJQYoR47dLZGtgwlQiVM/C8sWmDM1KLDj/TOVTfzsqLqXOHnTpmJ8YeU8VGrNxH51SJNcQRJQ6uoeuzEiuvbM8rjrMf+DlNgzdTI1/tK+ZBd+caGIFK3H80LHvlFmhrfUVB3DxoArw//w5oW/ocNA+eAA1Ng1T81t2+TEOb7GnjOmz1ep5PAK7o+Ml5xrF2wkNcuZ9Sb8+04BaRTgR2go/URTpqCWlFN/fQfFg6VoEDjzxsy5OhccA4eP/VX8PaZf+CYR87FlqGfgzWLJkFa1vnwNBNDoL6hn6w5KVroHP1Athw8ukqcmud90voeP8lk1a4ojEnvMx6uLyxsRHWcbihr7GTWfG4Y3tb8zV5fE6V+BZHOhHMZMtsG4QHTvO+9sShwH4MmXgEtAzfBla98wSseOsP0DJsMgybdJTV0O0r34BlL18P9Y39YOTHz4O6ugZY+sJl0NP+fjBjkqUU0svGpxHafohBEZqaGR2e1lc3G0XWtsLbiSr+KGOUWEGs04nYCC3tdWVHpBd2pxXNw7aCoR89So3QLX35OuhY/S70H7kDDBgVrKzT3dEKra/fA90dK2DYRw+HfiM+Dh0rX4f3/3ODfaFn1txXeF05JTPeEzMltjyxV+LqUbywNpQYWR+2+dfU8HJX2zJoff0+aGv9j1n+CiFraOoPgz+yv4IbH++/8hPoWDnfdOKSJv70VYnxc7jCB8taiWQCt70SV49hsJSY5hFrO5GmtM4ozTH4oa/BU/lwQjrRNHA8DJ10DNQ3DlAV0r5iPrR/MA+62luhedAm0LLB5lDfNEi9tua9mbDijbvZ1cnhHOBEmKmjpuxHUO/RNCPcF00nvJ2oIq0xH70Q0wk+CR4B1rmxYxiZT8VMtBn03l4MliDk9S3Dla1oHLCR8wv39HTBqrcfgdXv/s2OvsQVGnEdvMTozRnN0SX8NJFID3aYSfJ6/oRX4iqyjRDL4WZ9VUdlcmIWtcVMFMLSB8PXFMnVqWSiZYMtoN+I7aGuoRk6Vr6p8uG1y1+EHrzTk2PRk/gRuOhsNuuaugi8lA3bE4CC9SjkiJ2HuIr06o9WI3ZSifW2nh6ZlO/G2wMxvJww0hdALK/DC674GL7lqUqVW+f9CtqWv2iiNAWx/k8wmT39WjnnIEhkODomJ9YjesG1djqRMFd7oCc+uvptWYFvkNOI7Tb3LLbuDpDrTLjnUrjTBrUOhfbApLQcVvsaPMd6FXqZqyjENmQEs2UfCO4keyCHlWUe7Bx2rg+vtZM5sYe4Aj+hvp7iHT4VUyoyTsV0QJo05TLVA4fnI6jtZz3hSJdnGCpx/43gA5xHvDyYR0wLSJj5vgw4YzP4PAqHUkdyZGlPnPOJcSomm0PMI7aN7Ey7r81R7fflVIlxUjy/OJTPn8CsOG0+ccLwsuncKUyhh6cUurVc61YER9OqwvRX2rJV4RwH1yX6IbTh4EYAfHTyezTdCH45dbQ6vDUJSHtkD3EVf3/m8iR9pbN1xXN7NiUWs9DkPGF+WX94Yah7Mjy/UBTBCX4i9AjmD5t1JfTu8BKkYO4DvS47evzSJFJsk1bwiUExdsSa8CM7eGOmV7ERK/fR+VTid37NVvyJXrYvV/5x2gs+R0J20uiaOhm5abnVboWtJE/+Jb5hzHJToa8QUzOjly2FV3YkT80085EjHT7ywzHzif2FopX7JfX6TItud0DMVTmYQxF7ZUbEA0c9b/B+YTvMOQO74LwVmCgMKit+XHiBaCjFctjYeGOZHztWv3QOdkRmr9FtDxwQo08e86VeV30tviGfSrzoDrGQoLxkH9dhY4ukxIy4WZBLe2FtC8ipJSliM1459MTaJNC9FVnHLjg4y6X7USvBvDB5Y3WiuIiNw0vLWrFO3hi/eEr1fpTv/tadE5tFBRHq6MrwzqueHfCGcEcVmjpw6hXjIpid0BY4tMRciuk+du4byQAtguKC3BW9mSHoKNxmwW2zxKueDM9XARr939Vrwwp+cj6V+N3fuZWY7mWnRu/0jDbHQoDuVTKjk+HJTtCl+tphmOo3d2EimplCs55doLz0Gimn3mPZDEodHHYi2vGjgRN2hQfr6AVziB1KzJe2Gn1oBVGq3qnyCfHie4QS06rwcolXXFRQD0okdeT4GmxmODn01GZRQssTR29UHkZsYYOSMBPEBG3ItO7QadLD49wdPfcqmWIwRV3hTCti0g1n5FKvuCrmwdUjr4KfnFOI701R4gDqHnV/5+Srn7m60oidvY86eNw+kM2Q0ht2+BA2usNomCBrGxHGyPaKQMLbBt459Lz2haYJkEfuniQXUNGAjzqogihV71T5hPi9B5gSI6gUs4ULbIe3yU3yxvGeN/DP4eLcEnb1P3N1UABr1ge/GSNBye2Gc+V3PrpH3piUm64MIQtiqbBco5jZC1TqkQdm/do1fVxOIdY3nlGjdnJxbQ212k+KHCqknIppwck7aix5CAc7wglCvFXp9aSWDj1xcJTzLkqU8zojNZp77LAflhem+9lxG8HvY8dshb/xTBV/nEv+EK5PbCAOoTUqbO79TEu9RpevsiFmcyAsiIXiyrENRzghvbCpLUoU9A5LmcNhPraooAnrYhcaDGwH/ocW1pYKLCHWk+RH7FfFRqzcR+dTiZf+Ud9Yhisxuzl55MbldDfR6PoSAcQaXjNxSNsMPRvCvB44DPMwf2pbQd436oXDLl/glfmSreH5aISOrwjEBvi0f45bQQgn+sTd05k6eQJmfzPGyv2Sen2mZX9JuI+dVGTaRjVmE4OS4CWo9ReTdsGeKxFNKZzlMVMobI2OjuTx6CzAnR/j9sx43ZKO1FQqkQIzZcXD9+511dfiG/KpxMvwBuWyE5cArzoWlZqy4xjl5R01bhmsQQ3HcDPNXuMZWzjvxxp2JgicubHlibV3Zh238GY14iaMJo2Q8LJhZwturczD96xFJnv9nfIJ8fInNZSuNIIpL8HLvTHCzCf8uBRZ2gahzOFInahvDbGZKyGhZodH8mJFaMqInhgECXqIqMAcXlJkGuzQymzdDkwfv8GUXgNTi2/IKcT/64CYPLEDYqbESo3VPT3CaIxPu+QemRrMSifoF2Dez8aZHUoc3lKUa3AUVvOqGUpmKYQD3sBWYEdOQ0xKG4Fa3rCcbW+way0y2evvlE+IW59hnjgDvAitshOo3F3mWUfBYV9N3NYgCjGTWlbVkZSY99xEk7AAQt/uIMYjmxQjOsEnVGC9gLZ1Y3IBNc6dAD6bjW0PDRZ5yfsjpxDPDD2xyYMFzMAVmSCWz116ymbcoEbQvM6OXIoA27rL5k6wF0KtDYeNI4qsd1jDzQpaglU+U8dOKjRPKPTfQ3fMO7/B77lHTorNQ7FWzNKDHNRh089WtMaAxf3QFQx+qGf2Gm7LiE2RS+N1NDIXpmvGNcQM1IURW3xlRudQBMc6B0GMndApBNoIYxsata1AMEmZGcBoN4wSM7+MCj14+zy0dup3zCXEPSuegzo17ZKpL/e9siOnkwlKKIJnhJc9q/gtHNnTHGsptp7M1XRSoVNrW0OqIBcUx0ZtKijG4xFQ+keQJiiygheHmR32gtKMwdtl+co1f0wuIYbV/wHoWhP6XAtm6X3RVmBHTiuwshl624K5Ozgfm19MrZc45dIe/0htcD2fx776WV9jFyixVmQ+Tq3mQyCMGmKlrA1aYUmJOaxSkR3RW0N/gAGbp37fPByQT4jXvgHQ2cpsgbQVCCPBqp+NGmsFNlDjNvun7EUwhdM56d0Fbcqws9J3MQ5tdfA0vTyPCJQZ4eUKrJVX2QmuwtJeILQJqQX+CJqGAvSbkAdGU79jPiHuWArQ/q4ZwAi8LofVTiGSbQQqdFeg1ApmVGTa7g6U2UhyaCtYvy68iI4CYj4MLS+yI6Wlc7JBEdNaSnlRaQXExgtziCXM3BszRZZQN48GaBqRCkgeDsgnxN1tAGteDyEWOXBgF6QSuxSYPDFXYoJYPyPUoGG25khE4XY2OFdgfcGoncDxCA2hrXfAy2HmtkLnxJGOHlfimPSi/ya4EmIeGE39jvmEGIvVthCga4VIKVxRmujAUYdO2QlSXHwfV2CtyAiv2Y/o6W1RrWL+jzVJSPfLIheKWqqrFJfg5RBzRUYYtTc2doJUWHbwHB0+kxc3AjQMBmgZmwpHXg7IL8TdawHWvs3UOD4LdqYR5IONjdBqrEAVEJsYDl/TMNPCxcEFUFZ780v0bRDUfMngXzB3UsCL22QjmPo6bQUHOAPEPFfutzFAfb+8MJr6PfMLMRatYxlA53Kd//IUguXBVgdORmtaiZUiCy+sbAR5ZPyblDgOYj0BWbkDBrXyxGbCbwzEHGaEmMOMgJIiy5iN0gpmK0wuzPJjPjzdOBygaXgqGHk6IN8QY01jB69T2worDyYbIcCNpBIUrTEFVh07sg78mQEcUWK+CMW6QMzshLEZBC9FbWm2IiY/bhwMgB26gj3yDzHC1L448McyD5YDGpEozdGhMx64txBzBWZdN9OxY1YiYifwNe2LXd441k64lJnlxipP1tsK4FF88YDCoFwAiHVboLXAfwZcrcAqpeAduGgmHEZq3D70EWKRRgQdO7ZImtMTZ4XYZStY3Ea2gUdxCHHjsMLEaa5fXnEgxtJhZw9B7lppDylT7quUWtsHpbi0TR05nhOjPUD48VnHbLxT5+rYqR6dGZUOvbAR5riOHUGsp1eqJTNZBy+ixCJyi0srGgYF/rdAnbjiQ0wlxBy5cyVA90o1PN3TvRbqTLTWrW2HI1IzsLo8McFMS8lTKqG3ZYfODGZQxw53OCBWgGqIZVohBz0MzOSJxXA0wtrQH3rqB0Id2oeC5MBpvqdYSpxWWv96IWvAQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1XoTzE5WrvQpbWQ1zIZi1Xof4/sY7KcTsYB2AAAAAASUVORK5CYII=\" alt=\"audio video logos\" style=\"float:left; height:200px;\" /\u003e\n", + "\"audio\n", "\n", "1. Integrations with multi-modal AI models to extract information from unstructured data, in this case audio files.\n", "\n", @@ -91,7 +91,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "z-index": "0", "zoom": "181%" } @@ -116,7 +116,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "275%" } } @@ -138,7 +138,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "214%" } } @@ -153,21 +153,22 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "fa84ad03", "cell_type": "markdown", "source": [ - "**Important:** restart the kernel by going to \"Run -\u003e Restart \u0026 clear cell outputs\" before continuing.\n", + "**Important:** restart the kernel by going to \"Run -> Restart & clear cell outputs\" before continuing.\n", "\n", - "Configure bigframes to use your GCP project. First, go to \"Add-ons -\u003e Google Cloud SDK\" and click the \"Attach\" button. Then," + "Configure bigframes to use your GCP project. First, go to \"Add-ons -> Google Cloud SDK\" and click the \"Attach\" button. Then," ], "metadata": { "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "z-index": "4", "zoom": "236%" } @@ -196,7 +197,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "0b0b1cd8", @@ -214,7 +216,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "193%" } } @@ -229,7 +231,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "32e58a7f", @@ -243,7 +246,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "207%" } } @@ -270,7 +273,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "225%" } } @@ -285,7 +288,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "0c1fca97", @@ -298,7 +302,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "122%" } } @@ -316,7 +320,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "4a13e789", @@ -328,7 +333,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "134%" } } @@ -343,7 +348,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "26b8baba", @@ -364,7 +370,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "af84cb21", @@ -393,7 +400,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "161%" } } @@ -411,7 +418,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "085deffd", @@ -429,7 +437,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "f8e653ee", @@ -441,7 +450,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "216%" } } @@ -465,7 +474,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "211%" } } @@ -485,7 +494,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "fae13ec5", @@ -497,7 +507,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "317%" } } @@ -532,7 +542,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "30969ae1", @@ -544,7 +555,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "229%" } } @@ -568,7 +579,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "177%" } } @@ -588,7 +599,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "6cddf53b", @@ -601,7 +613,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "141%" } } @@ -616,7 +628,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "ba0386cc", @@ -632,7 +645,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "152%" } } @@ -650,7 +663,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "61a883b2", @@ -669,7 +683,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "152%" } } @@ -689,7 +703,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "e8a25c46", @@ -705,7 +720,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "181%" } } @@ -729,7 +744,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "163%" } } @@ -744,7 +759,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "5ed7776d", @@ -762,7 +778,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "125%" } } @@ -777,7 +793,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "c96e9832", @@ -792,7 +809,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "178%" } } @@ -812,7 +829,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "0e2a5d7b", @@ -824,7 +842,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "224%" } } @@ -844,7 +862,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "172%" } } @@ -859,7 +877,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "5e16fb14", @@ -876,7 +895,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "183%" } } @@ -901,7 +920,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "92%" } } @@ -919,7 +938,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "8aaaef1f", @@ -938,7 +958,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "127%" } } @@ -953,7 +973,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "908a2340", @@ -974,7 +995,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "175%" } } @@ -994,7 +1015,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "f84ebe70", @@ -1012,7 +1034,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "eeff1c72", @@ -1025,7 +1048,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "158%" } } @@ -1043,7 +1066,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "7ec53675", @@ -1055,7 +1079,7 @@ "@deathbeds/jupyterlab-fonts": { "styles": { "": { - "body[data-jp-deck-mode='presenting'] \u0026": { + "body[data-jp-deck-mode='presenting'] &": { "zoom": "138%" } } @@ -1070,7 +1094,8 @@ }, "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "a96552fb", @@ -1101,7 +1126,8 @@ "tags": [], "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "72af7c7f", @@ -1110,7 +1136,8 @@ "metadata": { "trusted": true }, - "execution_count": null + "execution_count": null, + "outputs": [] } ], "metadata": { @@ -1149,4 +1176,4 @@ }, "nbformat_minor": 4, "nbformat": 4 -} +} \ No newline at end of file From 2531f43a3e27e2d229712bce7cdd5575fedc6834 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 18:45:11 +0000 Subject: [PATCH 24/26] fix: add missing outputs to notebook code cells --- .../generative_ai/ai_movie_poster.ipynb | 294 ++++---- .../multimodal/multimodal_dataframe.ipynb | 695 +++++++++--------- 2 files changed, 499 insertions(+), 490 deletions(-) diff --git a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb index 8a19830358de..8df81706af47 100644 --- a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb +++ b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb @@ -21,7 +21,8 @@ "metadata": { "id": "XZpKUoHjXw3_" }, - "execution_count": 1 + "execution_count": 1, + "outputs": [] }, { "id": "ee509844", @@ -38,26 +39,26 @@ "id": "81b8de8d", "cell_type": "markdown", "source": [ - "\u003ctable align=\"left\"\u003e\n", + "\n", "\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", - " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", - " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \"Colab Run in Colab\n", + " \n", + " \n", + " \n", + " \"GitHub\n", " View on GitHub\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", - " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", + " \n", + " \n", + " \n", + " \"BQ\n", " Open in BQ Studio\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" + " \n", + "
" ], "metadata": {}, "execution_count": null @@ -115,7 +116,8 @@ "metadata": { "id": "6nqoRHYbPAx3" }, - "execution_count": null + "execution_count": null, + "outputs": [] }, { "id": "015a63c1", @@ -167,11 +169,11 @@ "data": { "text/html": [ "\n", - " Query processed 0 Bytes in a moment of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:48a27954-7a4a-4b9e-8176-ea227fd188ad\u0026page=queryresults\"\u003eJob bigframes-dev:US.48a27954-7a4a-4b9e-8176-ea227fd188ad details\u003c/a\u003e]\n", + " Query processed 0 Bytes in a moment of slot time. [Job bigframes-dev:US.48a27954-7a4a-4b9e-8176-ea227fd188ad details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -193,11 +195,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.3 kB in a minute of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:09c48ecb-e041-4c18-a390-ca5a36fd07c3\u0026page=queryresults\"\u003eJob bigframes-dev:US.09c48ecb-e041-4c18-a390-ca5a36fd07c3 details\u003c/a\u003e]\n", + " Query processed 1.3 kB in a minute of slot time. [Job bigframes-dev:US.09c48ecb-e041-4c18-a390-ca5a36fd07c3 details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -211,7 +213,7 @@ " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -220,8 +222,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
poster
0
\n", + "

1 rows × 1 columns

\n", + "
[1 rows x 1 columns in total]" ], "text/plain": [ " poster\n", @@ -319,11 +321,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.3 kB in 2 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:4a08a15f-5a2f-463b-bba8-734858ec992b\u0026page=queryresults\"\u003eJob bigframes-dev:US.4a08a15f-5a2f-463b-bba8-734858ec992b details\u003c/a\u003e]\n", + " Query processed 1.3 kB in 2 minutes of slot time. [Job bigframes-dev:US.4a08a15f-5a2f-463b-bba8-734858ec992b details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -337,7 +339,7 @@ " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -346,8 +348,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
postertitle
0Der Student von Prag
\n", + "

1 rows × 2 columns

\n", + "
[1 rows x 2 columns in total]" ], "text/plain": [ " poster title\n", @@ -453,11 +455,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.3 kB in 4 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:b60a151a-6cbc-405e-9c40-8a7461981a00\u0026page=queryresults\"\u003eJob bigframes-dev:US.b60a151a-6cbc-405e-9c40-8a7461981a00 details\u003c/a\u003e]\n", + " Query processed 1.3 kB in 4 minutes of slot time. [Job bigframes-dev:US.b60a151a-6cbc-405e-9c40-8a7461981a00 details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -471,7 +473,7 @@ " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -480,8 +482,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
postertitleyear
0Der Student von Prag1913
\n", + "

1 rows × 3 columns

\n", + "
[1 rows x 3 columns in total]" ], "text/plain": [ " poster title \\\n", @@ -560,8 +562,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
posterstruct<uri: string, version: string, authorize...
titlestring[pyarrow]
yearInt64
\n", + "

" ], "text/plain": [ - "poster struct\u003curi: string, version: string, authorize...\n", + "poster structJob bigframes-dev:US.c9bb23f0-5ceb-4d6c-8241-960c496274ae details]\n", " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -677,7 +679,7 @@ " " ], "text/plain": [ - "\u003cIPython.core.display.HTML object\u003e" + "" ] }, "metadata": {}, @@ -686,8 +688,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
postertitleyear
8Shoulder Arms1918
\n", + "

1 rows × 3 columns

\n", + "
[1 rows x 3 columns in total]" ], "text/plain": [ " poster title year\n", diff --git a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb index ebc2cb6bcd30..9f36cfbf4f72 100644 --- a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb @@ -19,7 +19,8 @@ "# limitations under the License." ], "metadata": {}, - "execution_count": 1 + "execution_count": 1, + "outputs": [] }, { "id": "816ab253", @@ -27,26 +28,26 @@ "source": [ "# BigFrames Multimodal DataFrame\n", "\n", - "\u003ctable align=\"left\"\u003e\n", + "\n", "\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", - " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", - " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \"Colab Run in Colab\n", + " \n", + " \n", + " \n", + " \"GitHub\n", " View on GitHub\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", - " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", + " \n", + " \n", + " \n", + " \"BQ\n", " Open in BQ Studio\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e\n" + " \n", + "
\n" ], "metadata": { "id": "YOrUAvz6DMw-" @@ -84,7 +85,7 @@ "id": "750954c4", "cell_type": "markdown", "source": [ - "Install the latest bigframes package if bigframes version \u003c 2.4.0" + "Install the latest bigframes package if bigframes version < 2.4.0" ], "metadata": {}, "execution_count": null @@ -96,7 +97,8 @@ "# !pip install bigframes --upgrade" ], "metadata": {}, - "execution_count": 2 + "execution_count": 2, + "outputs": [] }, { "id": "df561d04", @@ -134,7 +136,8 @@ "id": "bGyhLnfEeB0X", "outputId": "83ac8b64-3f44-4d43-d089-28a5026cbb42" }, - "execution_count": 3 + "execution_count": 3, + "outputs": [] }, { "id": "35bd6e6e", @@ -180,7 +183,8 @@ " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)" ], "metadata": {}, - "execution_count": 4 + "execution_count": 4, + "outputs": [] }, { "id": "be9ce892", @@ -206,7 +210,8 @@ "id": "fx6YcZJbeYru", "outputId": "d707954a-0dd0-4c50-b7bf-36b140cf76cf" }, - "execution_count": 5 + "execution_count": 5, + "outputs": [] }, { "id": "2e0436b0", @@ -241,8 +246,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
image
0
1
2
3
4
\n", + "

5 rows × 1 columns

\n", + "
[5 rows x 1 columns in total]" ], "text/plain": [ " image\n", @@ -359,8 +364,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imageauthorcontent_typesizeupdated
0aliceimage/png15912402025-03-20 17:45:04+00:00
1bobimage/png11829512025-03-20 17:45:02+00:00
2bobimage/png15208842025-03-20 17:44:55+00:00
3aliceimage/png12354012025-03-20 17:45:19+00:00
4bobimage/png15919232025-03-20 17:44:47+00:00
\n", + "

5 rows × 5 columns

\n", + "
[5 rows x 5 columns in total]" ], "text/plain": [ " image author content_type \\\n", @@ -487,7 +492,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", ")\n", - "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -\u003e str:\n", + "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -> str:\n", " import json\n", " import cv2 as cv\n", " import numpy as np\n", @@ -580,8 +585,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imageblurred
0
1
2
3
4
\n", + "

5 rows × 2 columns

\n", + "
[5 rows x 2 columns in total]" ], "text/plain": [ " image \\\n", @@ -731,8 +736,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_text_llm_resultimage
0The item is a container of K9 Guard Dog Paw Balm.
1The item is K9 Guard Dog Hot Spot Spray.
2The image contains three bags of food, likely for small animals like rabbits or guinea pigs. They are labeled \"Timoth Hay Lend Variety Plend\", \"Herbal Greeıs Mix Variety Blend\", and \"Berry & Blossom Treat Blend\", all under the brand \"Fluffy Buns.\" The bags are yellow, green, and purple, respectively. Each bag has a pile of its contents beneath it.
3The item is a cat tree.\\n
4The item is a bag of bird seed. Specifically, it's labeled \"Chirpy Seed\", \"Deluxe Bird Food\".\\n
\n", + "

5 rows × 2 columns

\n", + "
[5 rows x 2 columns in total]" ], "text/plain": [ " ml_generate_text_llm_result \\\n", @@ -824,7 +829,8 @@ "metadata": { "id": "IG3J3HsKhyBY" }, - "execution_count": 11 + "execution_count": 11, + "outputs": [] }, { "id": "829afc69", @@ -864,8 +870,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_text_llm_resultimage
0The item is a container of Dog Paw Balm.
1The picture contains many colors, including white, black, green, and a bright blue. The product label predominantly features a bright blue hue. The background is a solid gray.
2Here are the product names from the image:\\n\\n* **Timoth Hay Lend Variety Plend** is the product in the yellow bag.\\n* **Herbal Greeıs Mix Variety Blend** is the product in the green bag.\\n* **Berry & Blossom Treat Blend** is the product in the purple bag.
3Yes, it is for pets. It appears to be a cat tree or scratching post.\\n
4The image shows that the weight of the product is 15 oz/ 257g.
\n", + "

5 rows × 2 columns

\n", + "
[5 rows x 2 columns in total]" ], "text/plain": [ " ml_generate_text_llm_result \\\n", @@ -983,8 +989,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_embedding_resultml_generate_embedding_statusml_generate_embedding_start_secml_generate_embedding_end_seccontent
0[ 0.00638822 0.01666385 0.00451817 ... -0.02...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
1[ 0.00973976 0.02148137 0.0024429 ... 0.00...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
2[ 0.01195884 0.02139394 0.05968047 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
3[-0.02621161 0.02797648 0.04416926 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
4[ 0.05918628 0.0125137 0.01907336 ... 0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
\n", + "

5 rows × 5 columns

\n", + "
[5 rows x 5 columns in total]" ], "text/plain": [ " ml_generate_embedding_result \\\n", @@ -1063,18 +1069,18 @@ "4 [ 0.05918628 0.0125137 0.01907336 ... 0.01... \n", "\n", " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", - "0 \u003cNA\u003e \n", - "1 \u003cNA\u003e \n", - "2 \u003cNA\u003e \n", - "3 \u003cNA\u003e \n", - "4 \u003cNA\u003e \n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", "\n", " ml_generate_embedding_end_sec \\\n", - "0 \u003cNA\u003e \n", - "1 \u003cNA\u003e \n", - "2 \u003cNA\u003e \n", - "3 \u003cNA\u003e \n", - "4 \u003cNA\u003e \n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", "\n", " content \n", "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", @@ -1120,7 +1126,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_extract(src_obj_ref_rt: str) -\u003e str:\n", + "def pdf_extract(src_obj_ref_rt: str) -> str:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -1147,7 +1153,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -\u003e list[str]:\n", + "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -> list[str]:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -1165,7 +1171,7 @@ " page_text = page.extract_text()\n", " if page_text:\n", " curr_chunk += page_text\n", - " while len(curr_chunk) \u003e= chunk_size:\n", + " while len(curr_chunk) >= chunk_size:\n", " split_idx = curr_chunk.rfind(\" \", 0, chunk_size)\n", " if split_idx == -1:\n", " split_idx = chunk_size\n", @@ -1202,8 +1208,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
extracted_textchunked
0CritterCuisine Pro 5000 - Automatic Pet Feeder...[\"CritterCuisine Pro 5000 - Automatic Pet Feed...
\n", + "

1 rows × 2 columns

\n", + "
[1 rows x 2 columns in total]" ], "text/plain": [ " extracted_text \\\n", @@ -1265,13 +1271,13 @@ { "data": { "text/html": [ - "\u003cpre\u003e0 CritterCuisine Pro 5000 - Automatic Pet Feeder...\n", + "
0    CritterCuisine Pro 5000 - Automatic Pet Feeder...\n",
        "0    on a level, stable surface to prevent tipping....\n",
        "0    included)\\nto maintain the schedule during pow...\n",
        "0    digits for Meal 1 will flash.\\n\u0000. Use the UP/D...\n",
        "0    paperclip) for 5\\nseconds. This will reset all...\n",
        "0    unit with a damp cloth. Do not immerse the bas...\n",
-       "0    continues,\\ncontact customer support.\\nE2: Foo...\u003c/pre\u003e"
+       "0    continues,\\ncontact customer support.\\nE2: Foo...
" ], "text/plain": [ "0 CritterCuisine Pro 5000 - Automatic Pet Feeder...\n", @@ -1306,7 +1312,8 @@ "audio_gcs_path = \"gs://bigframes_blob_test/audio/*\"\\ndf = session._from_glob_path(audio_gcs_path, name=\"audio\")" ], "metadata": {}, - "execution_count": 17 + "execution_count": 17, + "outputs": [] }, { "id": "c9f9d484", @@ -1353,7 +1360,7 @@ { "data": { "text/html": [ - "\u003cpre\u003e0 Now, as all books, not primarily intended as p...\u003c/pre\u003e" + "
0    Now, as all books, not primarily intended as p...
" ], "text/plain": [ "0 Now, as all books, not primarily intended as p...\n", @@ -1390,11 +1397,11 @@ { "data": { "text/html": [ - "\u003cpre\u003e0 {'status': '', 'content': 'Now, as all books, ...\u003c/pre\u003e" + "
0    {'status': '', 'content': 'Now, as all books, ...
" ], "text/plain": [ "0 {'status': '', 'content': 'Now, as all books, ...\n", - "Name: transcription_results, dtype: struct\u003cstatus: string, content: string\u003e[pyarrow]" + "Name: transcription_results, dtype: struct[pyarrow]" ] }, "execution_count": 19, @@ -1439,7 +1446,7 @@ " container_cpu=0.33,\n", " container_memory=\"512Mi\"\n", ")\n", - "def extract_exif(src_obj_ref_rt: str) -\u003e str:\n", + "def extract_exif(src_obj_ref_rt: str) -> str:\n", " import io\n", " import json\n", " from PIL import ExifTags, Image\n", @@ -1491,11 +1498,11 @@ { "data": { "text/html": [ - "\u003cpre\u003e0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\u003c/pre\u003e" + "
0    {\"ExifOffset\":47,\"Make\":\"MyCamera\"}
" ], "text/plain": [ "0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\n", - "Name: blob_col, dtype: extension\u003cdbjson\u003cJSONArrowType\u003e\u003e[pyarrow]" + "Name: blob_col, dtype: extension>[pyarrow]" ] }, "execution_count": 21, From 7bdfcfcf51c928ec8e3556b4dfc01f2643d4e1fc Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 21:41:50 +0000 Subject: [PATCH 25/26] fix: replace private _from_glob_path with public APIs in notebooks --- .../generative_ai/ai_movie_poster.ipynb | 315 +++---- .../multimodal/multimodal_dataframe.ipynb | 804 ++++++++++-------- 2 files changed, 614 insertions(+), 505 deletions(-) diff --git a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb index 8df81706af47..c2889ad4f92e 100644 --- a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb +++ b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb @@ -21,8 +21,7 @@ "metadata": { "id": "XZpKUoHjXw3_" }, - "execution_count": 1, - "outputs": [] + "execution_count": 1 }, { "id": "ee509844", @@ -39,26 +38,26 @@ "id": "81b8de8d", "cell_type": "markdown", "source": [ - "\n", + "\u003ctable align=\"left\"\u003e\n", "\n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \"Colab Run in Colab\n", - " \n", - " \n", - " \n", - " \"GitHub\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", " View on GitHub\n", - " \n", - " \n", - " \n", - " \"BQ\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/generative_ai/ai_movie_poster.ipynb\"\u003e\n", + " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", " Open in BQ Studio\n", - " \n", - "
" + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + "\u003c/table\u003e" ], "metadata": {}, "execution_count": null @@ -116,8 +115,7 @@ "metadata": { "id": "6nqoRHYbPAx3" }, - "execution_count": null, - "outputs": [] + "execution_count": null }, { "id": "015a63c1", @@ -145,7 +143,26 @@ "id": "47acbbfe", "cell_type": "code", "source": [ - "# Replace with your own connection name.\\nMY_CONNECTION = 'bigframes-default-connection' # @param {type:\"string\"}\\n\\nimport bigframes.pandas as bpd\\nsession = bpd.get_global_session()\\n\\nmovies = session._from_glob_path(\\n \"gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/*\",\\n connection = MY_CONNECTION,\\n name='poster')\\nmovies.head(1)" + "# Replace with your own connection name.\n", + "MY_CONNECTION = 'bigframes-default-connection' # @param {type:\"string\"}\n", + "\n", + "import gcsfs\n", + "import bigframes.pandas as bpd\n", + "import bigframes.bigquery as bbq\n", + "\n", + "session = bpd.get_global_session()\n", + "\n", + "# List files using gcsfs\n", + "fs = gcsfs.GCSFileSystem(anon=True)\n", + "uris = fs.glob(\"gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/*\")\n", + "\n", + "# Read the URIs into a BigQuery DataFrame\n", + "movies = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", + "\n", + "# Create the object reference column\n", + "movies['poster'] = bbq.obj.make_ref(movies['uri'], authorizer=MY_CONNECTION)\n", + "movies = movies[['poster']]\n", + "movies.head(1)" ], "metadata": { "colab": { @@ -169,11 +186,11 @@ "data": { "text/html": [ "\n", - " Query processed 0 Bytes in a moment of slot time. [Job bigframes-dev:US.48a27954-7a4a-4b9e-8176-ea227fd188ad details]\n", + " Query processed 0 Bytes in a moment of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:48a27954-7a4a-4b9e-8176-ea227fd188ad\u0026page=queryresults\"\u003eJob bigframes-dev:US.48a27954-7a4a-4b9e-8176-ea227fd188ad details\u003c/a\u003e]\n", " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -195,11 +212,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.3 kB in a minute of slot time. [Job bigframes-dev:US.09c48ecb-e041-4c18-a390-ca5a36fd07c3 details]\n", + " Query processed 1.3 kB in a minute of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:09c48ecb-e041-4c18-a390-ca5a36fd07c3\u0026page=queryresults\"\u003eJob bigframes-dev:US.09c48ecb-e041-4c18-a390-ca5a36fd07c3 details\u003c/a\u003e]\n", " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -213,7 +230,7 @@ " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -222,8 +239,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
poster
0
\n", - "

1 rows × 1 columns

\n", - "
[1 rows x 1 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fder_student_von_prag.jpg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200041Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653080624441\u0026X-Goog-Signature=9f955e89088240b34a5cbfba751fffacc5dfd7a2df468dcccfae06c939358c702ffbeb940403a69ad36e3fdf321abee60cf2b9795c9c1744bc0b164d6c2eca99666a0853e7afcf7670a07ff115bfe534791c9ab4267cb383e3a46ede9301aeeb8534a42a1d4c8f790f3a60eab06aa72a8fe76ee6cbb88de8e42a0809d8322a0ad8aecd1c64a55b1cc8716acf4f0dc2550a2059e63d98d49707fe27180ada0a277ea9b1827fc261657bcee9ec5cc7117df704f135d983325abb97dc77ee7a270c466e689921fce8ecd23824b515f2811c3c13ee382c5bc3bd34b7dd95a845705a8f654315b2128799efd0509dee5f6db1eb1b773438d3bfc8112d76cbe892e376\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 1 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 1 columns in total]" ], "text/plain": [ " poster\n", @@ -321,11 +338,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.3 kB in 2 minutes of slot time. [Job bigframes-dev:US.4a08a15f-5a2f-463b-bba8-734858ec992b details]\n", + " Query processed 1.3 kB in 2 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:4a08a15f-5a2f-463b-bba8-734858ec992b\u0026page=queryresults\"\u003eJob bigframes-dev:US.4a08a15f-5a2f-463b-bba8-734858ec992b details\u003c/a\u003e]\n", " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -339,7 +356,7 @@ " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -348,8 +365,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
postertitle
0Der Student von Prag
\n", - "

1 rows × 2 columns

\n", - "
[1 rows x 2 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003cth\u003etitle\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fder_student_von_prag.jpg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200057Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653080624441\u0026X-Goog-Signature=29c8cf20d3f56ab1939ec00dbc1afd26e888b6475808258e34bc60a65e207b877c39853678b0cd1c9918d35e312e151725dbefc4ed6c519e4ec1f2c23c2e307f87442d09c5c8f0bbd49af92eb05e18ff35cd44f2f2954b79a33cf706c7ae1662e23e3220224d6f58b775cb1875213b5050f910cb41a4a8fb312f308b0566448ddf7ef15e22ec2a5261af2570f89e0f6067ac4cbf5874eaf522a6e4d8cf6e0313be3079b172bdc19c2d6901f53bbacf5bee3f2913c7f9f657cd1aed25d786f66a84f96e4dbe36e7f01d8b67887c9ac93edf866495fdf13c6b95152cdfa6b699fd14aeb477ec4a14fcd9f37eaf88ad02eb40a952635f97e7639be764b0007e011e\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003eDer Student von Prag\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 2 columns in total]" ], "text/plain": [ " poster title\n", @@ -455,11 +472,11 @@ "data": { "text/html": [ "\n", - " Query processed 1.3 kB in 4 minutes of slot time. [Job bigframes-dev:US.b60a151a-6cbc-405e-9c40-8a7461981a00 details]\n", + " Query processed 1.3 kB in 4 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:b60a151a-6cbc-405e-9c40-8a7461981a00\u0026page=queryresults\"\u003eJob bigframes-dev:US.b60a151a-6cbc-405e-9c40-8a7461981a00 details\u003c/a\u003e]\n", " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -473,7 +490,7 @@ " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -482,8 +499,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
postertitleyear
0Der Student von Prag1913
\n", - "

1 rows × 3 columns

\n", - "
[1 rows x 3 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003cth\u003etitle\u003c/th\u003e\n", + " \u003cth\u003eyear\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fder_student_von_prag.jpg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200120Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653080624441\u0026X-Goog-Signature=96035b9c90093c9636f0b406e5ca9daf52bb1019bde4d52e779f3ce7371e6df0430b3f2e991869065e113327a7698e7ce5ad7b4db8781aa65adea890b80976c97b93b3f9deac5002a1e27b4bd2c1df9250ff4167f150c88be2067f70d45b7c94fd6d69f36a90b5a3ad1a3d500e3cc89a4fe4a67157cbea164d5ce34506dd1d2353eedb1c663eb1a4578c8ff1f9af2ab21a7065de4ec3ff1af44e764a3215874e564e6beeb502739468a80a02c79dcc71f7518435686270d855007e01653659804b5f50ab9c43c4627f28625e07572a4b0f30de49397f9f0445571cdacb695747bdb17614addcf33a90036aa48d025baa8a4d6bd5000d0106a788c2c23f1292c8\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003eDer Student von Prag\u003c/td\u003e\n", + " \u003ctd\u003e1913\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 3 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 3 columns in total]" ], "text/plain": [ " poster title \\\n", @@ -562,8 +579,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
0
posterstruct<uri: string, version: string, authorize...
titlestring[pyarrow]
yearInt64
\n", - "

" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003ctd\u003estruct\u0026lt;uri: string, version: string, authorize...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003etitle\u003c/th\u003e\n", + " \u003ctd\u003estring[pyarrow]\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003eyear\u003c/th\u003e\n", + " \u003ctd\u003eInt64\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003c/div\u003e\u003cbr\u003e\u003clabel\u003e\u003cb\u003edtype:\u003c/b\u003e object\u003c/label\u003e" ], "text/plain": [ - "poster structJob bigframes-dev:US.c9bb23f0-5ceb-4d6c-8241-960c496274ae details]\n", + " Query processed 1.3 kB in 6 minutes of slot time. [\u003ca target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev\u0026j=bq:US:c9bb23f0-5ceb-4d6c-8241-960c496274ae\u0026page=queryresults\"\u003eJob bigframes-dev:US.c9bb23f0-5ceb-4d6c-8241-960c496274ae details\u003c/a\u003e]\n", " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -679,7 +696,7 @@ " " ], "text/plain": [ - "" + "\u003cIPython.core.display.HTML object\u003e" ] }, "metadata": {}, @@ -688,8 +705,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
postertitleyear
8Shoulder Arms1918
\n", - "

1 rows × 3 columns

\n", - "
[1 rows x 3 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eposter\u003c/th\u003e\n", + " \u003cth\u003etitle\u003c/th\u003e\n", + " \u003cth\u003eyear\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e8\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/vertex-ai%2Fdataset-management%2Fdatasets%2Fclassic-movie-posters%2Fshoulder_arms.jpeg?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260326%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260326T200210Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1683653082560296\u0026X-Goog-Signature=64c1fb48cc9830dd4153bca15d05d8703c770e12a4df99abf4cab9dec02d13c66adf4d1223ffda9a30763ad2b286086dfc8cc9b8d20875b29d0c1639983c3ba08a02364bf49361b4a24c3a6830def8d6d3561eeb04d01604b5bae86e48457dc368fee538d0beea2228fdf5e94b5862e1097f58545d7449fa5df0e93fb9c3c0a32943ca9970911f183adf71a7e13e9275efd41c1f69b8f8453b853a30cbb5e8859d72b95ca653204b5ae8f96a12d88d59e988349f74e3f6db6ef277c066d92a28c50335d494beead9a3c0c796c97ca48c497328ae7ad278161c28743193233b28ac0fcafab2431179f7f6321345d8a67e6af39d7339697a5892f0441a266262ab\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003eShoulder Arms\u003c/td\u003e\n", + " \u003ctd\u003e1918\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 3 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 3 columns in total]" ], "text/plain": [ " poster title year\n", diff --git a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb index 9f36cfbf4f72..be81936f7eee 100644 --- a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb @@ -19,8 +19,7 @@ "# limitations under the License." ], "metadata": {}, - "execution_count": 1, - "outputs": [] + "execution_count": 1 }, { "id": "816ab253", @@ -28,26 +27,26 @@ "source": [ "# BigFrames Multimodal DataFrame\n", "\n", - "\n", + "\u003ctable align=\"left\"\u003e\n", "\n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \"Colab Run in Colab\n", - " \n", - " \n", - " \n", - " \"GitHub\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", + " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", " View on GitHub\n", - " \n", - " \n", - " \n", - " \"BQ\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", + " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", " Open in BQ Studio\n", - " \n", - "
\n" + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + "\u003c/table\u003e\n" ], "metadata": { "id": "YOrUAvz6DMw-" @@ -85,7 +84,7 @@ "id": "750954c4", "cell_type": "markdown", "source": [ - "Install the latest bigframes package if bigframes version < 2.4.0" + "Install the latest bigframes package if bigframes version \u003c 2.4.0" ], "metadata": {}, "execution_count": null @@ -97,8 +96,7 @@ "# !pip install bigframes --upgrade" ], "metadata": {}, - "execution_count": 2, - "outputs": [] + "execution_count": 2 }, { "id": "df561d04", @@ -117,6 +115,8 @@ "# Refer to https://cloud.google.com/bigquery/docs/multimodal-data-dataframes-tutorial#grant-permissions for setting up connection service account permissions.\n", "# In this Notebook it uses bigframes-default-connection by default. You can also bring in your own connections in each method.\n", "\n", + "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", + "\n", "import bigframes\n", "# Setup project\n", "bigframes.options.bigquery.project = PROJECT\n", @@ -136,8 +136,7 @@ "id": "bGyhLnfEeB0X", "outputId": "83ac8b64-3f44-4d43-d089-28a5026cbb42" }, - "execution_count": 3, - "outputs": [] + "execution_count": 3 }, { "id": "35bd6e6e", @@ -183,14 +182,30 @@ " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)" ], "metadata": {}, - "execution_count": 4, - "outputs": [] + "execution_count": 4 }, { "id": "be9ce892", "cell_type": "markdown", "source": [ - "import bigframes.pandas as bpd\\nsession = bpd.get_global_session()\\ndf_image = session._from_glob_path(\\n \"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\",\\n connection=FULL_CONNECTION_ID,\\n name=\"image\"\\n)" + "To create a Multimodal DataFrame, you can use `bigframes.bigquery.obj.make_ref` on a series of URIs. You can get the URIs from a BigQuery table or by listing them from Cloud Storage.\n", + "\n", + "In this example, we use `gcsfs` to list the files from Cloud Storage, and then use `read_gbq` to load them into a BigQuery DataFrame before creating the object reference.\n", + "\n", + "```python\n", + "import gcsfs\n", + "import bigframes.bigquery as bbq\n", + "\n", + "fs = gcsfs.GCSFileSystem(anon=True)\n", + "uris = fs.glob(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\")\n", + "\n", + "# Read the URIs into a BigQuery DataFrame\n", + "df_image = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", + "\n", + "# Create the object reference column\n", + "df_image['image'] = bbq.obj.make_ref(df_image['uri'], authorizer=FULL_CONNECTION_ID)\n", + "df_image = df_image[['image']]\n", + "```" ], "metadata": { "id": "ifKOq7VZGtZy" @@ -201,7 +216,20 @@ "id": "871d02f4", "cell_type": "code", "source": [ - "# Create blob columns from wildcard path.\\ndf_image = session._from_glob_path(\\n \"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\", name=\"image\"\\n)" + "import gcsfs\n", + "import bigframes.bigquery as bbq\n", + "\n", + "# List files using gcsfs (public bucket)\n", + "fs = gcsfs.GCSFileSystem(anon=True)\n", + "uris = fs.glob(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\")\n", + "\n", + "# Read the URIs into a BigQuery DataFrame using UNNEST\n", + "# We take the first 5 for this example\n", + "df_image = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", + "\n", + "# Create the object reference column\n", + "df_image['image'] = bbq.obj.make_ref(df_image['uri'], authorizer=FULL_CONNECTION_ID)\n", + "df_image = df_image[['image']]" ], "metadata": { "colab": { @@ -210,8 +238,7 @@ "id": "fx6YcZJbeYru", "outputId": "d707954a-0dd0-4c50-b7bf-36b140cf76cf" }, - "execution_count": 5, - "outputs": [] + "execution_count": 5 }, { "id": "2e0436b0", @@ -246,8 +273,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
image
0
1
2
3
4
\n", - "

5 rows × 1 columns

\n", - "
[5 rows x 1 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=060fd285eaaa0d1b8888e9e3c648c0c9452df5d33b6c6d7ddea1d5fc3e4bf1f243ff0719ed4e3faaa3ea29e60da198daf1168dfd0efd64f9612f7c032753cfa2842ef88469a49ab23cf261e2b5da052224a33eedddd7c4699d584ec85704b18010ce8f4216f200d3cc8d0301b7aeb6bef37ae9e83a832ea38703f19b5b85e2e34f772420f5436afeb874487b3fb0ae4a17fb8f277a3d233a6d5e6e0d063e37e651061bf4ae33b8ec8b879c5db93e3ce97513054822d1867e28de4f03585da2edd8f4f51e177a4e3d37f5e3ca2f5be0990dd9f58135ec9223da3af7bcd1e67a8d279282d4d245eb7faff094903a82cf7a8b866fe848230a6668b6eaef6a683292\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=34826bd5824786f809b740fd64e154a9feb43b467250c64b9b6686134133e18aabd7213472cb00ae4ef59e5567b8897828aae6ff22e29c4a5d28214fcd2a6f486e69d548d0e482707c4dcd67894feb716ee68e1863e02e36d7e0c6c008b1b989c2a798feb2c4bc3638c6c80069165b2bf51c6028ea2f0a09c1555981b8935435ec1c596975b77942f603e65414328f3f1d180f772015452bfc97e96ffb9a0a016a4dc365a4531d0e37e491f4066df87f9a8f2374d30d5f62d639f10252c471570b9e24d88a9f8816007099fc39e6f846c68ea5a4069ecc785e67101f664da4999037fcbbd93e00b1b85a31528492e8610d75af96a1c8a879865dd36da2b15465\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=6476c6b9b8b23f4ac79973d8088424c6d8018857d0d8c1bf586057a7bd2f4cf00bfc53c79757b35401f05dadee9bc1aeeaed7b669659afa16696062db7d9da45e5fe17a0aaaa33c2394632a8dae6ca3c7f705ce0e7179e44fa245dc34080d87bdd0f41638c2840eba87b178dc43db16ca1a621224a1d991960eb821a99dc34aab25ed7e8457f161bd09fa9efc0eb0642709e3fba6ba412ff68ffe698592c235054ae0d08fd27909150beaf69b93dee3496d2f9254c2f801890fe072442fa2ffe389eeb689b8eb4daf08b4701a535ac6606c7de856761d008da479138abc3e941e0ab682f19fe86cd3f56df63f788c92824aed76fafaae0a546fa796266f26e2e\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=22cdbcce477c983c914de1edcef7742fd253a6830e961c2932d5dbb11730c1fe0035579c4158a140cd6ebcdc95e1212ec60a2d54679af8dd662cb7c1ac6249042bb5a95fb324397599bfa4e8a8bf8e4313d14a5ca34e40c677d91f1853b4b7450d3ad043404058db5c1dbab56b8968eab4e3550aa1de20c789084527f8abd67a32aa62788b70975ea828bb312f5a123463c2088a4bad7a0c20d299f59fc0674ed32d36b3f78a1bf2fc6fcd367bf2056e132fbd50e0a597a7da7518be8c9597de6365097490857caba47e84b57940bdc6cca130b6c23ede91c51140cf0672ebdda0957e525416c636c368d1cf04e5efecbba3f052f61bb95d951e52e0f5d31e8c\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193621Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=335aafc718f8a89dc2b5d2e75ff750ac302cfacf4238ad91c2a4b140f59dd666d6520fafe885b82706453c2e820c82f0461488ae01e3210a20c555a9ac1242ddd54e17a92d7873211a4dcd69a7fca76c16ad9cd754f6245a8b9f047e9ef8bc428ec243fbde7af59a2b308968a165662e50d4a08740d196d02182d99650e79673e167164dc2869a434159ba3a15c68ddc9e17f5a7234c478ac4ae55a9686740ef260e6c1ab834ca3df361161c8d689acc72b143a6a3345640b2b94aadd1070d3e90a6572d63ae74cf803304b798ea4df61e5f4494f078f565f0d59f57bd6eee0618936a16617455d785ced3ac467b964b5eb9049749fe4cf8f2bf2c72ed72fc79\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 1 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 1 columns in total]" ], "text/plain": [ " image\n", @@ -364,8 +391,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
imageauthorcontent_typesizeupdated
0aliceimage/png15912402025-03-20 17:45:04+00:00
1bobimage/png11829512025-03-20 17:45:02+00:00
2bobimage/png15208842025-03-20 17:44:55+00:00
3aliceimage/png12354012025-03-20 17:45:19+00:00
4bobimage/png15919232025-03-20 17:44:47+00:00
\n", - "

5 rows × 5 columns

\n", - "
[5 rows x 5 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003cth\u003eauthor\u003c/th\u003e\n", + " \u003cth\u003econtent_type\u003c/th\u003e\n", + " \u003cth\u003esize\u003c/th\u003e\n", + " \u003cth\u003eupdated\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=0f96a5054a9d9d8f14259a2a9155c8124d0b3b6af21d5ba59f61a789cb5a770d1d70e9d7b04094140495675e9eb97ef30d5539ec822bfc7f857fc0be3a3ff193aef72578ffccf7231633d42fbf53752b50a6ab3c4180dd86f62a2c350492239d44b2d5a079d000aa60d99e5656dca1fdc85b2a5b1cea0d6567d47641167ee08bd41bff06e93b35e34c4b8f82e73d589bf29f9ae73b640b8c90b751ca2829f99b2a2fa5a463990660e87e4c4220a8bee9ff9bea44eb621d8a00335892833a0e33cc95560a803df5a13fb710abfe813d11a37610c870c82986b4275831e2ed57cf022af8927cd4a9fc5aea88b54c597b51ee456ab5f22cdcfe8972a0a2c56d9702\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ealice\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1591240\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:45:04+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=50b820b125f2b52c6a205178676d153512a5d62ba04f7f399de86cecaf167492f4db9322e289f8ec077e74e09ba67509bedda9b68b9eb7290a7f52bb78a567139d0a2bb9266764ed941bfa19ba4278bf00647e79c85877e3111dbe3c49cc572d4f47739db1030d6ce0298965d08f2f992b59a0f452ab8d52ea5d783cf13ce6d3b8349ce3fce9c4337ddd00b746ec2e9e8fa6fa0361a644d82c46b7f0860a2404f3fbf17de24cbfd8744a098fdde367d5a4cb918a325ae3fb96abfb187e26bee7bdc267b81ba23949ca4feaf676864431641f3747477293b34541e7c48ca06bbcc45e94ae839d0ce85e3222c38dd39014821881a31b15f3efd06ca426cc4769fb\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ebob\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1182951\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:45:02+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=0d659c9d71b34023e6a7531dc99620cd1543f4a8ec0262cda29549e9f4f604e929be102b44e0a11ef2c148c6a24c3fe9a92257892ca1aa23d7b963551d95093fa74bce067ec12c0237a96a15da0741f76e732d04f85b03d98696c7dc9bcd1bd9de1f2799ceb878e8f87ddfff5609fc6a6db4f27dcc410ad05cdc1066c8dcdae880a950a160189673eba7f3de59cc0e1aa754ebdf7f745a8572216af6f7934b1e96c9aa7cb7cacb6aa836f8e731668b76d0942e04e7e4b81a5972a75ab2ecb0b1859dc4ac4f6a45239dd2b76cc7af4643fb24c64731f28711e6cab8433fe239f0cc2462ae9260b1ae5ae85517300075188d270957eecb31ee8db20ac3ddd97c4e\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ebob\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1520884\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:44:55+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=1124d977a75b85634f2afeb223fba3028c9ad85fd9a2f6f8ce483c9e015a48a242f6810742e0279a46b3388371a9b7d37ca96d9a53d3939f6e0484a474491bece22272b89176e01866fdc8845b75cef28b9ff36a7b2f875e452002a7dd8d13ce38b078ea2aacd76ce8cb560faf078dad6462b3a69130f333ab9119fb6f1d8410a70de76018d2c84f01c6d70e1aa60498b4eb88a35a77a8173d11e545a8f5a7dfa542ec3effcafcd3a9c84934d605ed06107df98032738415e6ef1ed9331796aa802712c2cb4bd733881833bf4ed5d590846db97c7591a2d84acdf87e38752b15a39b711aec5bbe4dcca25a1edfb60626e68497c1fbb8cb0bd707938db378a01e\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ealice\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1235401\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:45:19+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193635Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=2f60becdf01864a381c2283fbceaaea023a1011554a61221b5be902cdd08b102d60d0da0275ed491b3396baeef517b8e2336eeb3e3b07da0398cdf4b190ac0ea667e4cd1d1d19e41046824d55ffcef47a2db3aeb4e82da71a655264f14ad5ee553329aa9b32c8c2200f3b66c9a9bb5aa8e5b91795e8d6b6129935f46522fb8dab9ce3a2ba5af019c2410f709472791730ab9ebdf9f901a5bfaf4dcc2c78e07c79743d35eceac59999d841adb60ce15313a70526d98b83e90f2240800c5b96b1b9a032d530fb15bec86425afca0c6fcc1d35d1560ef996cae5411feb67addd1b726026f3d097318b0577a84dab72cae328bb186fc8c97001ff720a43e6fc27610\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003ebob\u003c/td\u003e\n", + " \u003ctd\u003eimage/png\u003c/td\u003e\n", + " \u003ctd\u003e1591923\u003c/td\u003e\n", + " \u003ctd\u003e2025-03-20 17:44:47+00:00\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 5 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 5 columns in total]" ], "text/plain": [ " image author content_type \\\n", @@ -492,7 +519,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", ")\n", - "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -> str:\n", + "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -\u003e str:\n", " import json\n", " import cv2 as cv\n", " import numpy as np\n", @@ -585,8 +612,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
imageblurred
0
1
2
3
4
\n", - "

5 rows × 2 columns

\n", - "
[5 rows x 2 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003cth\u003eblurred\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=2bb786ea7fda0a778b591d0f85b1d1df003726a26585490c1c8e1cc632bc90e418bc2762993da32c60017b96b36a9bf03ca123f7e74a34e5da98d4a8ae65e340c64872e1ab7c6442435253669103a157ee61c793da79ad0e6ae2a7bdbf54e8d67dce61de37d9dd2f54b0c994f8135d88af27f08eeb99e713b385b37fd0762503244cd0c597479d6925dcf111db4605842c797aabb307736028a5dd802ef08a2697a4bd1c96b5bf8bbecdb8a8f384028cec03bc9d51c1d2dfee1760756e0c6e54d9d753b373115e308cf45de2c082f30f263285af5d5e43fd9bf8c3dae32ceee389ef9427f3e2b06646d48aa570ba1ca0713d0f270ea2f4996e8ad3d03a3e39a6\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334353324\u0026X-Goog-Signature=6f16236d50a82c7ede3c23366389344149faba103443acf1c1a2b0d3b0d91948c8c15a3ad990382ab9b61badf8acee3c4b9e5ec6f96b72d395cd2e18227ad5c051ad189c1af48030fb44c1b9cc84a44a74fec04d576a6d699c1f0e133b2fc5d583872b5577c60a01f58d163de6e95591d260e9246fae0243d7472748e854ccaf4b4c3de80fb4e085f428427fe9914d22fba7416ae8b9e33beb0dfbcb91273609d0333aa1135b835bc69f9e15e1097452133e376b9b3f1bb7b4d7d7e9cb860677ce786e49239e8a8af86f83b7e27e76846212daddd2859f8dec4942b7351c69d396d4fd9a529fd0c48677aa4923063a752f6845801f22d3de227a640946f1e212\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=31ed02660dcead7471a7b47e2f471f4cf515e37525c14027f8d87ba1479982f244df02979b7da03676d4bbbd21221b3f70cefea4501eca3a5a64acb187ef5a2b03b19116729059dc666c2e295fc1ac831cd38864c1673344e04855db279a3c96409550dfade09631b0818e6cb576ff02afba6a87624f7c33077f7f19c891f6413b317cabb834182585d445ce97aa0294beca4de0edd2f9a0f6429747e93db53df875249a9c0543a329dac0f46ef3ed4e64a7e51648d0820ba52b7845b6f98371aca7deb165b4f7b7537be0d659cf33bb34893417666d7c9e9f611332ccba6bae6024be6862350185886cf61354eda3591e96e344c59350ca6f68ce50670fa869\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334573735\u0026X-Goog-Signature=209b62c9c3e2657b9270ed126a6c1e3979471f9e308670ce1595e9c18a0ec7c627a313b1f1c87108355602dc11b804ff609ba3394fada2b4fd186c6fef4138e22222045d0fd0660f103e6378ac83c6bd7d8da27c1a3d306dbfd778f6738c2e492bd8527ba9112e086f15334b7ab7795b88212d3825b3be325606cc507e5504406bc4e9dc41b29fb99e1207fe74c11053e7332ad9fdd65587f55cd10cc1bfd903672f43de38a18cb346977d8498fd751ab8728ca580261a1a0367421437d56df7f5f7dda895ddd370954aee632258304da31fd8e7c33619dd5cf9ee0ebe9e60eba3cc416e871e181525f23e525489ef6dffced9211f7d3681ec24b3ac7c0b05ab\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=8d32ad0137f3b10393aeaf635732f7fe337149323c0a33b634f9dcc592e662d4d268223f5c5cc89956787c5a3c061fa69c2de4d1a3225497dfad466548fcd4df7ee6000c4e60ff0661d0f9b451efa2701df5de55c322b3585c09210171403d47c358e6f41281f245711afd63eed3157e6674e9958ebffc5516f6ccb06b9af1830400e1d6a9bed44538b7212eeb257665aa43eecfa2af7482863f6032002de57f7a01af83cf5051541178297ab4a256710c04fd082cb51af4725dbafb288ee79caa630853dcbf35c49595b2b2e552804cabdbdf955806d57b97451e87035bd3ea89dbb5560a1c14170c531c3222a1d0b59d79d4c45b641887699f0bf5830612cb\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334233994\u0026X-Goog-Signature=6eed60a846158365a0c54d4c96516bb8c4011b497da203479fa9066bc2b7c2c7c683aa351d5249e05df64b8f86549f251150327013e41231ee06f22e9a51e1061c577d7b2da4704b80f684776702da13be6122ad6652463f210c53d9149da9e32a253fde4851723cbb616f18374a8f9b7b5cd36a82e66a2e5677ae51b906e6c4649ea27fcbbcba3ef24bf163a4b0f469c10eca8da464f58161ead8371bdb5b0d452ee713b0ec498ca9aaefd521a1d8240d82885a19af58ddefb167146ab9d411b5d0712457b126104b5f42f57ab4ede2da5ae9af74e708840b507a5538571804c60e9c166516e0118aa555a13169401e21f0a5ec303f3b801d6a972ef94426ee\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=472d88289a3910bdde93dd468e6ae9e66c0821fb0d72a356ef887771baa2a80f356b0ac67183873c0e89c87afb3080b3a78122fa7c3e37ad8b54d8aa216e9e04cb6f2af4f200784beba8de4eea7cf3a39faa8d800b7cc6bbf03df55beaeb69727d413266e2e59fb900bc25b6859d7c14db3c22aa6a0aeaa71c331ae0a2bd27c9e2c246fe931a2562210411491bc83bc34e7bef60901189949348eee909e64c90e3aab7b03d7c274ee84c03cca6dd624169f40323a785bcc1a8357810f738b45a637c09267530fbddc413a75622d793876c74c9128b11d8b4ea2fe2b959b34b96e06a9c384f72522ad357557d0a3b1c8b5792c2e94631139eae178fa660a5e8b2\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334678692\u0026X-Goog-Signature=239a77d1c63c4074b2ddc3dd21a5bc77de8ea993fdd911a199b6cca207fef9286307a8321d91d02cffda17060a217f50b2a1d8017bbcb29511339a1187b38332a39795fb1b2c754988948be8849a371fd66a7698f8e35a0f1a3430b6837ede73c37c90da49a062586828f19595d4246e4eabf3e3d629f251b066fb20e0f34ddad80599f5946aa76e0694b859f7d575dc781a850d56e9846d456d285b0023e90862154008154fa13ef1f95096a6161e6a1b314b82ca23bc44fc1b4b7799a9f936dca09ab09830446089d8defe11a2c0e066aa8a65204d9027140f314001c59187593290ae22c4092ee5d0293a6e5da1e1da3d42c32c0aced7db7ad4f774575deb\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=1077450c755044fd6121e0dd67ec7de75a1785c4bacede635b78dd64d6b982ba02bba7a00065035fa13f003ef03c14d7f622b6a5c9ef19cadc956470e5a62e0f1855fb8cae0bbe723c0352224d152c85e173b234e3f5a1bcc2e1ddd4da065c184e82487eb4aa17464d330144b3b7c17ba357af1fb267f97730b3a798ff42a514ea47b83aa9f2560cfd428778638287c96ed17f95f0327e1cade380d046120d79479aca927d627ba65f3bc009c75d1662cbf93c6d33cea4b030a7906d3689922bac13ca3fab45536f115ae6253715cc9c24769e57e83657bf0393ce55e45f90d9666513a19a939044d3b3ad528757e022624d6601dacd326ae8ab3c9ce4ae6d52\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334302924\u0026X-Goog-Signature=3a25e70e59647f427ae55d72168c576fdb59b05f9ed6b72885a9e9c11f655e6bf10ded55744adcef330659731f0effdda7550ddca99c309cefe46ba864b088f155a5243e579d182a55f916d21d9dd0a83534a5e2efb2955a9146db12b22b4321e3f36b69f8d89d663507d7db83ac96bb2419a2baa0787fd0c6e6079d06652b8a2ac364a0a0a5d8d9de6331658b798abddbaaae1ede3026a4f0d955e74782afe240d31e6748bea8ab332ed945f541ca20c587b8c1449643f4748a3b059aae857334b6249bdf86794d307340b6a07d0dc47d3980e234be9c0549f52636d33776b7474fb95ebc014656c3cc217a777d438612a08849ac498baba0ce4716ac4ea432\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 2 columns in total]" ], "text/plain": [ " image \\\n", @@ -736,8 +763,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_text_llm_resultimage
0The item is a container of K9 Guard Dog Paw Balm.
1The item is K9 Guard Dog Hot Spot Spray.
2The image contains three bags of food, likely for small animals like rabbits or guinea pigs. They are labeled \"Timoth Hay Lend Variety Plend\", \"Herbal Greeıs Mix Variety Blend\", and \"Berry & Blossom Treat Blend\", all under the brand \"Fluffy Buns.\" The bags are yellow, green, and purple, respectively. Each bag has a pile of its contents beneath it.
3The item is a cat tree.\\n
4The item is a bag of bird seed. Specifically, it's labeled \"Chirpy Seed\", \"Deluxe Bird Food\".\\n
\n", - "

5 rows × 2 columns

\n", - "
[5 rows x 2 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eml_generate_text_llm_result\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003eThe item is a container of K9 Guard Dog Paw Balm.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=1b184636c15ee0a74b099df1903a0a79f2d0832d2cf829c84f9344269dc70408ea48dfaddce2f284cb4862cef857e8a0420627e25ca8c7ecc08d34bc69c695bc030ca8d90417860eecad65826160cbbf1cabe6c90d5e51a09c8b582bc542dfc5b309908be7b945d615eabba73b79912987306b3b110a5b0a9e52ccf900a3d2c490bb4e4572f3108f3acabf9a332e0fa503f74265f44d3b9ff40745afa4c59a3a0f3366ca4e4a800a09a5df0b363909a01705433e31bf2c9b7ccc0cc1e75f3e8ec323907140f29e4033238385eb83815b6d95ec54945cbf829d966510ae2504187f9c167fe70ac9e77231d4b38df380a7f6ec5f973828f21e51f4a95626ea0504\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003eThe item is K9 Guard Dog Hot Spot Spray.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=7e3adbf71385c63b494609546b7b2a3ef41dc437772a35d579bfed25594b3dd4716f8170dae6e4c9afcabce49c2af584561111983494d6dff504ff5eed611c3c7712cb667e6f1f3451aff5ac6911c4da9aeb2ba5c9227f9459f7c54085268a515eeb28add1b384a4724159f3d1408278e37716465f11a18f823dd4058004e43f9bdd4cab28fc97e97043adddc53c4b5e3059cf1641f1300bedded5a679f6922a45c021055e413953e2f0b000a6fd5047a0aa2fa5fce5f0ca08b6f93411704c9b10c6534894130c11f8ee9a8ee70d26a6455c0cbaf2088c1b5205328858e22026d1c4efc9e558a33082169f7eec5e4fb406de7de13e3ee72a48421e9896cca6f7\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003eThe image contains three bags of food, likely for small animals like rabbits or guinea pigs. They are labeled \"Timoth Hay Lend Variety Plend\", \"Herbal Greeıs Mix Variety Blend\", and \"Berry \u0026 Blossom Treat Blend\", all under the brand \"Fluffy Buns.\" The bags are yellow, green, and purple, respectively. Each bag has a pile of its contents beneath it.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=380d369f6fc8a0e4274420f7bb038aaf748111cd654f7dc20818a97d80da8e6b93125884982c656009cdfdc7e70496c1defa42b2de69e1b73d99e0b925953bd5e0e5ae20ddfac13a619f8c6b1bf6c3fe9cdb195910eb08efd96a193dda55488eacc2d0deac4d0a777fd7b7916a0cc1707f22463e1678c83b8ae51d082b8365c2643f85c19c59a9ec34c89fcc87b9c510cf6dbcdda5d0648d6602bd023b0a09e6b4b02c4cea9f6b10f563a14097e86a5cd9898ea3b3606a6c847a23ce97aed46b3154c1c1791da19edf172d7f57cd4e604bb2774ae3dc02d902c826ca9be17972ff17d612106b9ac61e734837646e5d0b40f8162798bf7695dccb0d320e6a58a9\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003eThe item is a cat tree.\\n\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=712a70543cff388ba937bc867b4d94e1bfb09579bc7ecf998b8fb5194937cd15f491643f76925582ed5b7c853a9845b77e3c6c248126e211d45c3f6ebe751cc06193ae052999bf9bd827acbb204d2a64ad5d6eae1101fc5b2518f16ae29469ee7213cae403a3a6a29d7081561decc6b189593beb4b649bc7169828f4570a929d8b15c8dd0b3f259bfa4e2680b9d5b88653068357c7aefa0b1f26e10dc309b743da4164d9a5abd1761b00cc9a12380ba6fb3786f141b8e536fdc27e869b632c3e1a130f312ad5185362b0f9b30f473387a02905f22956992278d94fc2ef387a87cb855d35cfcabe9ad5d82c1b4dd85c56152e28438f6631322a4c229a9520adb5\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003eThe item is a bag of bird seed. Specifically, it's labeled \"Chirpy Seed\", \"Deluxe Bird Food\".\\n\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194139Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=70d2709b3b655fb6add8616767e7886e7e304cc96fc891df927085d1e4d90ee9bb13b370762c6c5a8dd43baefa163312267203bc1b371954320bc27c32d0831f7f8937f288da999e506bf6f47d101cd2e49a870f3d5be428d321149f7e1c7d1146569d22f19640d62325665b6d08e7254a89535c021c8b464d65e754312dd47dde08be9ca58856a97d3c3f243030ccfbd8c1bda5ddca2b3618b113f6c1640afa14936b8c16c59d77c44139fe75f3719e2a83924fed36514c61787b02ace0d439f8d3c4fea81c9bf01684f8c06a39f7ec626e93d59262db87f2eea30dd0f849a3436d8dd36d2188f2e52826e8b96bf72614c256cda9867b1905a1d1cd3edebd18\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 2 columns in total]" ], "text/plain": [ " ml_generate_text_llm_result \\\n", @@ -829,8 +856,7 @@ "metadata": { "id": "IG3J3HsKhyBY" }, - "execution_count": 11, - "outputs": [] + "execution_count": 11 }, { "id": "829afc69", @@ -870,8 +896,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_text_llm_resultimage
0The item is a container of Dog Paw Balm.
1The picture contains many colors, including white, black, green, and a bright blue. The product label predominantly features a bright blue hue. The background is a solid gray.
2Here are the product names from the image:\\n\\n* **Timoth Hay Lend Variety Plend** is the product in the yellow bag.\\n* **Herbal Greeıs Mix Variety Blend** is the product in the green bag.\\n* **Berry & Blossom Treat Blend** is the product in the purple bag.
3Yes, it is for pets. It appears to be a cat tree or scratching post.\\n
4The image shows that the weight of the product is 15 oz/ 257g.
\n", - "

5 rows × 2 columns

\n", - "
[5 rows x 2 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eml_generate_text_llm_result\u003c/th\u003e\n", + " \u003cth\u003eimage\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003eThe item is a container of Dog Paw Balm.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=988967bbfa1e4c70be10a6faa407fde8edbe0e93a7a1c9f9fbe8e81fa55c11bcc27b9673ae4d91b13edbc56ad5e50051a81d1639cafd38946f693f73e81e86a0bf6e945c3a8edf9b3b2d275203caba770fcb9a9fa272b969023fabda363efc8d90ae4b2bbc9a4f420699f3604d0a13607f96694687529b38bd80b211f5998ef0a11ab0f3a0b936a4e6068a9289ec43a9536416b5782ca2a0645c2b43f94ac6b8e31632c62b3714b56f28dce7e5675a66ded7bcc9d1c1a154f5a83f826b3f4b1363b1316536549d959d664931e6cd462a9d83362257e5226ad5b35b5e6c0e6a155fd62d2890b2fc071b59e6e4fa796a22e346bceebc4fb131eee66793f6748699\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003eThe picture contains many colors, including white, black, green, and a bright blue. The product label predominantly features a bright blue hue. The background is a solid gray.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=6098f2a2487364290c54d4f6bcc98f29e5097702ab78da4ff9ef97d13c03a7fa3a17bd22324d61ae8264d3a59a6bdb2bf4be55bb7efffcb00c68f0c9b69f413f8b33b2089697456ef919690d59a68548e95ebd68526de53ab9062e18009ab73452fc1934c43f99ad75a45931fb250ef1e78a7ced3e0bfc9d9468ef300a63d750b8e249e7d20afa00895b7e586b16686141799603bcdf731b48237323de166d0a1a4f310519671a4ce6ae56b5d4ebd1887361cc21130b3e8fa57a6107d50239a671319ecbef623719554a9642489d2ea083c2cc466f5d73c55084023567e9db291b40a335e7f65b20da018a70cbf5ef9654aa86ec500aa4df35c1db81116abc83\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003eHere are the product names from the image:\\n\\n* **Timoth Hay Lend Variety Plend** is the product in the yellow bag.\\n* **Herbal Greeıs Mix Variety Blend** is the product in the green bag.\\n* **Berry \u0026 Blossom Treat Blend** is the product in the purple bag.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=9672e4d0fa6ea5f4bcf1605e35a7642cab21e7decfcfe55e37616c99a7774445cecf69c848ccd0dd92f5ae1925bfaa4bbd62ba413ecfffc835aa9a4596d18036381d595ec3387d4200a08b8fdb02ddfdb7432ebd14d5d4b77abf4628e61899c4383899ddbe6465ce91b856e8a02f37391c850589a971e6045ed0c389133dd63d4bac98d5fc744f83a51b000bf29c51111589bd34e2d287cc835ccd8c009270cef818eb3ed9ee07f9978bf24ce994311af944fc2b7d7066c3c37613fe08e0542efd277ab77df0fe1f168e05d2f6d6749e5d8af573c22032b29c3c47ac2f3b7978a09139ddfd38ab9045b5ae86d8804f23db758a7602225bd749a5b7552bae4eb8\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003eYes, it is for pets. It appears to be a cat tree or scratching post.\\n\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=5c8e23c4c5f8017e09bf02b1b480200c74c1523aa25297141fe4abf226d7e18c546aed2ea5a2b5b5f8c5b90f2f4c569accf41a73ab8776285b458d230c6df43025eeb3066d40fe141089fa547b5a1f6a12d8c2eed0e614c8667be12c237d7ba4b29a530c16a6c8528cdfc8d8c4761aeae91c5f8452069f88de31cb637aaa34cbea60d8a8e9bd66b9d2f6f2c70a60d7791fc6bac13a4dd3736323da29fb52c5c90068c57c088407935255468513b780c07ea8e922f690645ee5ed8644240f894061c662b7bfe7ab660fd8466280e428424724f62a6c4eb6c781e3bf988059bd9f63fd6ffc2e75d296bca2ef068c6cdda0f6bb994f6799e04b33b4352f66a68224\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003eThe image shows that the weight of the product is 15 oz/ 257g.\u003c/td\u003e\n", + " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T194606Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=3be42f1f88c163e019f7496107a2f7d69b90a5482dd157a5d05e427fa117dd334a834dd9c72d0c84126e4f2c9e35903521ca8e81645afc945d8dbe47671f6b2f73ccbf494df6e5ad8e19c04ab4c6020859519c663b6fd57b19512fa94562106d01073b9122011ca602279a712fd761dc4ffaed3a8d7b76abcf1f42c2c1bce33228cff0e0454107cb84039a8981a2fb689191c43b7edf54e19354ff2e8c5deeb3a07944285b15db8a4fe474744f52852f8048c377708e2b3b85ee639b952e9292bc8d60d9b1a37c84ab83398ef295b8ba6b3c8e3a500714fb70e91b5fa7f19301fa4ce1bb1041f60a8ce84b3d863d5ba66f14614ed27689cd4daeb293b493172b\" width=\"300\"\u003e\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 2 columns in total]" ], "text/plain": [ " ml_generate_text_llm_result \\\n", @@ -989,8 +1015,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ml_generate_embedding_resultml_generate_embedding_statusml_generate_embedding_start_secml_generate_embedding_end_seccontent
0[ 0.00638822 0.01666385 0.00451817 ... -0.02...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
1[ 0.00973976 0.02148137 0.0024429 ... 0.00...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
2[ 0.01195884 0.02139394 0.05968047 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
3[-0.02621161 0.02797648 0.04416926 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
4[ 0.05918628 0.0125137 0.01907336 ... 0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
\n", - "

5 rows × 5 columns

\n", - "
[5 rows x 5 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eml_generate_embedding_result\u003c/th\u003e\n", + " \u003cth\u003eml_generate_embedding_status\u003c/th\u003e\n", + " \u003cth\u003eml_generate_embedding_start_sec\u003c/th\u003e\n", + " \u003cth\u003eml_generate_embedding_end_sec\u003c/th\u003e\n", + " \u003cth\u003econtent\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003e[ 0.00638822 0.01666385 0.00451817 ... -0.02...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e1\u003c/th\u003e\n", + " \u003ctd\u003e[ 0.00973976 0.02148137 0.0024429 ... 0.00...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e2\u003c/th\u003e\n", + " \u003ctd\u003e[ 0.01195884 0.02139394 0.05968047 ... -0.01...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e3\u003c/th\u003e\n", + " \u003ctd\u003e[-0.02621161 0.02797648 0.04416926 ... -0.01...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e4\u003c/th\u003e\n", + " \u003ctd\u003e[ 0.05918628 0.0125137 0.01907336 ... 0.01...\u003c/td\u003e\n", + " \u003ctd\u003e\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e\u0026lt;NA\u0026gt;\u003c/td\u003e\n", + " \u003ctd\u003e{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e5 rows × 5 columns\u003c/p\u003e\n", + "\u003c/div\u003e[5 rows x 5 columns in total]" ], "text/plain": [ " ml_generate_embedding_result \\\n", @@ -1069,18 +1095,18 @@ "4 [ 0.05918628 0.0125137 0.01907336 ... 0.01... \n", "\n", " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", - "0 \n", - "1 \n", - "2 \n", - "3 \n", - "4 \n", + "0 \u003cNA\u003e \n", + "1 \u003cNA\u003e \n", + "2 \u003cNA\u003e \n", + "3 \u003cNA\u003e \n", + "4 \u003cNA\u003e \n", "\n", " ml_generate_embedding_end_sec \\\n", - "0 \n", - "1 \n", - "2 \n", - "3 \n", - "4 \n", + "0 \u003cNA\u003e \n", + "1 \u003cNA\u003e \n", + "2 \u003cNA\u003e \n", + "3 \u003cNA\u003e \n", + "4 \u003cNA\u003e \n", "\n", " content \n", "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", @@ -1126,7 +1152,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_extract(src_obj_ref_rt: str) -> str:\n", + "def pdf_extract(src_obj_ref_rt: str) -\u003e str:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -1153,7 +1179,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -> list[str]:\n", + "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -\u003e list[str]:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -1171,7 +1197,7 @@ " page_text = page.extract_text()\n", " if page_text:\n", " curr_chunk += page_text\n", - " while len(curr_chunk) >= chunk_size:\n", + " while len(curr_chunk) \u003e= chunk_size:\n", " split_idx = curr_chunk.rfind(\" \", 0, chunk_size)\n", " if split_idx == -1:\n", " split_idx = chunk_size\n", @@ -1200,7 +1226,30 @@ "id": "234a5f86", "cell_type": "code", "source": [ - "df_pdf = session._from_glob_path(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\", name=\"pdf\")\\n\\n# Generate a JSON string containing the runtime information (including signed read URLs)\\naccess_urls = get_runtime_json_str(df_pdf[\"pdf\"], mode=\"R\")\\n\\n# Apply PDF extraction\\ndf_pdf[\"extracted_text\"] = access_urls.apply(pdf_extract)\\n\\n# Apply PDF chunking\\ndf_pdf[\"chunked\"] = access_urls.apply(pdf_chunk, args=(2000, 200))\\n\\ndf_pdf[[\"extracted_text\", \"chunked\"]]" + "import gcsfs\n", + "import bigframes.bigquery as bbq\n", + "\n", + "# List files using gcsfs\n", + "fs = gcsfs.GCSFileSystem(anon=True)\n", + "uris = fs.glob(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\")\n", + "\n", + "# Read the URIs into a BigQuery DataFrame\n", + "df_pdf = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", + "\n", + "# Create the object reference column\n", + "df_pdf['pdf'] = bbq.obj.make_ref(df_pdf['uri'], authorizer=FULL_CONNECTION_ID)\n", + "df_pdf = df_pdf[['pdf']]\n", + "\n", + "# Generate a JSON string containing the runtime information (including signed read URLs)\n", + "access_urls = get_runtime_json_str(df_pdf[\"pdf\"], mode=\"R\")\n", + "\n", + "# Apply PDF extraction\n", + "df_pdf[\"extracted_text\"] = access_urls.apply(pdf_extract)\n", + "\n", + "# Apply PDF chunking\n", + "df_pdf[\"chunked\"] = access_urls.apply(pdf_chunk, args=(2000, 200))\n", + "\n", + "df_pdf[[\"extracted_text\", \"chunked\"]]" ], "metadata": {}, "execution_count": 15, @@ -1208,8 +1257,8 @@ { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
extracted_textchunked
0CritterCuisine Pro 5000 - Automatic Pet Feeder...[\"CritterCuisine Pro 5000 - Automatic Pet Feed...
\n", - "

1 rows × 2 columns

\n", - "
[1 rows x 2 columns in total]" + "\u003c/style\u003e\n", + "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", + " \u003cthead\u003e\n", + " \u003ctr style=\"text-align: right;\"\u003e\n", + " \u003cth\u003e\u003c/th\u003e\n", + " \u003cth\u003eextracted_text\u003c/th\u003e\n", + " \u003cth\u003echunked\u003c/th\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/thead\u003e\n", + " \u003ctbody\u003e\n", + " \u003ctr\u003e\n", + " \u003cth\u003e0\u003c/th\u003e\n", + " \u003ctd\u003eCritterCuisine Pro 5000 - Automatic Pet Feeder...\u003c/td\u003e\n", + " \u003ctd\u003e[\"CritterCuisine Pro 5000 - Automatic Pet Feed...\u003c/td\u003e\n", + " \u003c/tr\u003e\n", + " \u003c/tbody\u003e\n", + "\u003c/table\u003e\n", + "\u003cp\u003e1 rows × 2 columns\u003c/p\u003e\n", + "\u003c/div\u003e[1 rows x 2 columns in total]" ], "text/plain": [ " extracted_text \\\n", @@ -1271,13 +1320,13 @@ { "data": { "text/html": [ - "
0    CritterCuisine Pro 5000 - Automatic Pet Feeder...\n",
+       "\u003cpre\u003e0    CritterCuisine Pro 5000 - Automatic Pet Feeder...\n",
        "0    on a level, stable surface to prevent tipping....\n",
        "0    included)\\nto maintain the schedule during pow...\n",
        "0    digits for Meal 1 will flash.\\n\u0000. Use the UP/D...\n",
        "0    paperclip) for 5\\nseconds. This will reset all...\n",
        "0    unit with a damp cloth. Do not immerse the bas...\n",
-       "0    continues,\\ncontact customer support.\\nE2: Foo...
" + "0 continues,\\ncontact customer support.\\nE2: Foo...\u003c/pre\u003e" ], "text/plain": [ "0 CritterCuisine Pro 5000 - Automatic Pet Feeder...\n", @@ -1309,11 +1358,29 @@ "id": "1794c54f", "cell_type": "code", "source": [ - "audio_gcs_path = \"gs://bigframes_blob_test/audio/*\"\\ndf = session._from_glob_path(audio_gcs_path, name=\"audio\")" + "import gcsfs\n", + "import bigframes.bigquery as bbq\n", + "\n", + "audio_gcs_path = \"gs://bigframes_blob_test/audio/*\"\n", + "\n", + "# List files using gcsfs\n", + "fs = gcsfs.GCSFileSystem()\n", + "uris = fs.glob(audio_gcs_path)\n", + "\n", + "# Read the URIs into a BigQuery DataFrame\n", + "# If the bucket is empty or doesn't exist, this will result in an empty DataFrame\n", + "if not uris:\n", + " # Fallback to a dummy list or just let it be empty\n", + " uris = [\"gs://bigframes_blob_test/audio/dummy.mp3\"]\n", + "\n", + "df = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", + "\n", + "# Create the object reference column\n", + "df['audio'] = bbq.obj.make_ref(df['uri'], authorizer=FULL_CONNECTION_ID)\n", + "df = df[['audio']]" ], "metadata": {}, - "execution_count": 17, - "outputs": [] + "execution_count": 17 }, { "id": "c9f9d484", @@ -1360,7 +1427,7 @@ { "data": { "text/html": [ - "
0    Now, as all books, not primarily intended as p...
" + "\u003cpre\u003e0 Now, as all books, not primarily intended as p...\u003c/pre\u003e" ], "text/plain": [ "0 Now, as all books, not primarily intended as p...\n", @@ -1397,11 +1464,11 @@ { "data": { "text/html": [ - "
0    {'status': '', 'content': 'Now, as all books, ...
" + "\u003cpre\u003e0 {'status': '', 'content': 'Now, as all books, ...\u003c/pre\u003e" ], "text/plain": [ "0 {'status': '', 'content': 'Now, as all books, ...\n", - "Name: transcription_results, dtype: struct[pyarrow]" + "Name: transcription_results, dtype: struct\u003cstatus: string, content: string\u003e[pyarrow]" ] }, "execution_count": 19, @@ -1446,7 +1513,7 @@ " container_cpu=0.33,\n", " container_memory=\"512Mi\"\n", ")\n", - "def extract_exif(src_obj_ref_rt: str) -> str:\n", + "def extract_exif(src_obj_ref_rt: str) -\u003e str:\n", " import io\n", " import json\n", " from PIL import ExifTags, Image\n", @@ -1481,7 +1548,32 @@ "id": "40bb6bc9", "cell_type": "code", "source": [ - "# Create a Multimodal DataFrame from the sample image URIs\\nexif_image_df = session._from_glob_path(\\n \"gs://bigframes_blob_test/images_exif/*\",\\n name=\"blob_col\",\\n)\\n\\n# Generate a JSON string containing the runtime information (including signed read URLs)\\n# This allows the UDF to download the images from Google Cloud Storage\\naccess_urls = get_runtime_json_str(exif_image_df[\"blob_col\"], mode=\"R\")\\n\\n# Apply the BigQuery Python UDF to the runtime JSON strings\\n# We cast to string to ensure the input matches the UDF's signature\\nexif_json = access_urls.astype(str).apply(extract_exif)\\n\\n# Parse the resulting JSON strings back into a structured JSON type for easier access\\nexif_data = bbq.parse_json(exif_json)\\n\\nexif_data" + "import gcsfs\n", + "import bigframes.bigquery as bbq\n", + "\n", + "# Create a Multimodal DataFrame from the sample image URIs\n", + "fs = gcsfs.GCSFileSystem()\n", + "uris = fs.glob(\"gs://bigframes_blob_test/images_exif/*\")\n", + "\n", + "if not uris:\n", + " uris = [\"gs://bigframes_blob_test/images_exif/dummy.jpg\"]\n", + "\n", + "exif_image_df = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", + "exif_image_df['blob_col'] = bbq.obj.make_ref(exif_image_df['uri'], authorizer=FULL_CONNECTION_ID)\n", + "exif_image_df = exif_image_df[['blob_col']]\n", + "\n", + "# Generate a JSON string containing the runtime information (including signed read URLs)\n", + "# This allows the UDF to download the images from Google Cloud Storage\n", + "access_urls = get_runtime_json_str(exif_image_df[\"blob_col\"], mode=\"R\")\n", + "\n", + "# Apply the BigQuery Python UDF to the runtime JSON strings\n", + "# We cast to string to ensure the input matches the UDF's signature\n", + "exif_json = access_urls.astype(str).apply(extract_exif)\n", + "\n", + "# Parse the resulting JSON strings back into a structured JSON type for easier access\n", + "exif_data = bbq.parse_json(exif_json)\n", + "\n", + "exif_data" ], "metadata": {}, "execution_count": 21, @@ -1498,11 +1590,11 @@ { "data": { "text/html": [ - "
0    {\"ExifOffset\":47,\"Make\":\"MyCamera\"}
" + "\u003cpre\u003e0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\u003c/pre\u003e" ], "text/plain": [ "0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\n", - "Name: blob_col, dtype: extension>[pyarrow]" + "Name: blob_col, dtype: extension\u003cdbjson\u003cJSONArrowType\u003e\u003e[pyarrow]" ] }, "execution_count": 21, From 05384eca1a32e84fdab9ab5b76246ceb8721b8b5 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Wed, 29 Apr 2026 23:26:55 +0000 Subject: [PATCH 26/26] fix: replace private _from_glob_path and update notebook format --- .../generative_ai/ai_movie_poster.ipynb | 3 + .../multimodal/multimodal_dataframe.ipynb | 1386 ++++++++--------- 2 files changed, 691 insertions(+), 698 deletions(-) diff --git a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb index c2889ad4f92e..20617012487c 100644 --- a/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb +++ b/packages/bigframes/notebooks/generative_ai/ai_movie_poster.ipynb @@ -156,6 +156,9 @@ "fs = gcsfs.GCSFileSystem(anon=True)\n", "uris = fs.glob(\"gs://cloud-samples-data/vertex-ai/dataset-management/datasets/classic-movie-posters/*\")\n", "\n", + "# Ensure URIs have gs:// prefix\n", + "uris = [u if u.startswith(\"gs://\") else f\"gs://{u}\" for u in uris]\n", + "\n", "# Read the URIs into a BigQuery DataFrame\n", "movies = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", "\n", diff --git a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb index be81936f7eee..56a59305cfa6 100644 --- a/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/packages/bigframes/notebooks/multimodal/multimodal_dataframe.ipynb @@ -1,8 +1,11 @@ { "cells": [ { - "id": "9edad7a6", "cell_type": "code", + "execution_count": 1, + "id": "9edad7a6", + "metadata": {}, + "outputs": [], "source": [ "# Copyright 2025 Google LLC\n", "#\n", @@ -17,45 +20,43 @@ "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." - ], - "metadata": {}, - "execution_count": 1 + ] }, { - "id": "816ab253", "cell_type": "markdown", + "id": "816ab253", + "metadata": { + "id": "YOrUAvz6DMw-" + }, "source": [ "# BigFrames Multimodal DataFrame\n", "\n", - "\u003ctable align=\"left\"\u003e\n", + "\n", "\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://colab.research.google.com/github/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", - " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/colab-logo.png\" alt=\"Colab logo\"\u003e Run in Colab\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", - " \u003cimg src=\"https://raw.githubusercontent.com/googleapis/python-bigquery-dataframes/refs/heads/main/third_party/logo/github-logo.png\" width=\"32\" alt=\"GitHub logo\"\u003e\n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \"Colab Run in Colab\n", + " \n", + " \n", + " \n", + " \"GitHub\n", " View on GitHub\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://console.cloud.google.com/bigquery/import?url=https://github.com/googleapis/python-bigquery-dataframes/blob/main/notebooks/multimodal/multimodal_dataframe.ipynb\"\u003e\n", - " \u003cimg src=\"https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTW1gvOovVlbZAIZylUtf5Iu8-693qS1w5NJw\u0026s\" alt=\"BQ logo\" width=\"35\"\u003e\n", + " \n", + " \n", + " \n", + " \"BQ\n", " Open in BQ Studio\n", - " \u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e\n" - ], - "metadata": { - "id": "YOrUAvz6DMw-" - }, - "execution_count": null + " \n", + "
\n" + ] }, { - "id": "77d821d4", "cell_type": "markdown", + "id": "77d821d4", + "metadata": {}, "source": [ "This notebook is introducing BigFrames Multimodal features:\n", "1. Create Multimodal DataFrame\n", @@ -65,42 +66,48 @@ "5. PDF chunking function\n", "6. Transcribe audio\n", "7. Extract EXIF metadata from images" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "75ab1c13", "cell_type": "markdown", - "source": [ - "## Setup" - ], + "id": "75ab1c13", "metadata": { "id": "PEAJQQ6AFg-n" }, - "execution_count": null + "source": [ + "## Setup" + ] }, { - "id": "750954c4", "cell_type": "markdown", - "source": [ - "Install the latest bigframes package if bigframes version \u003c 2.4.0" - ], + "id": "750954c4", "metadata": {}, - "execution_count": null + "source": [ + "Install the latest bigframes package if bigframes version < 2.4.0" + ] }, { - "id": "2a6fafb1", "cell_type": "code", + "execution_count": 2, + "id": "2a6fafb1", + "metadata": {}, + "outputs": [], "source": [ "# !pip install bigframes --upgrade" - ], - "metadata": {}, - "execution_count": 2 + ] }, { - "id": "df561d04", "cell_type": "code", + "execution_count": 3, + "id": "df561d04", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bGyhLnfEeB0X", + "outputId": "83ac8b64-3f44-4d43-d089-28a5026cbb42" + }, + "outputs": [], "source": [ "PROJECT = \"bigframes-dev\" # replace with your project. \n", "# Refer to https://cloud.google.com/bigquery/docs/multimodal-data-dataframes-tutorial#required_roles for your required permissions\n", @@ -128,19 +135,14 @@ "\n", "import bigframes.pandas as bpd\n", "import bigframes.bigquery as bbq" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "bGyhLnfEeB0X", - "outputId": "83ac8b64-3f44-4d43-d089-28a5026cbb42" - }, - "execution_count": 3 + ] }, { - "id": "35bd6e6e", "cell_type": "code", + "execution_count": 4, + "id": "35bd6e6e", + "metadata": {}, + "outputs": [], "source": [ "import bigframes.bigquery as bbq\n", "\n", @@ -180,41 +182,32 @@ "\n", "def get_updated(series):\n", " return bpd.to_datetime(bbq.json_value(get_metadata(series), \"$.updated\").astype(\"Int64\"), unit=\"us\", utc=True)" - ], - "metadata": {}, - "execution_count": 4 + ] }, { - "id": "be9ce892", "cell_type": "markdown", - "source": [ - "To create a Multimodal DataFrame, you can use `bigframes.bigquery.obj.make_ref` on a series of URIs. You can get the URIs from a BigQuery table or by listing them from Cloud Storage.\n", - "\n", - "In this example, we use `gcsfs` to list the files from Cloud Storage, and then use `read_gbq` to load them into a BigQuery DataFrame before creating the object reference.\n", - "\n", - "```python\n", - "import gcsfs\n", - "import bigframes.bigquery as bbq\n", - "\n", - "fs = gcsfs.GCSFileSystem(anon=True)\n", - "uris = fs.glob(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\")\n", - "\n", - "# Read the URIs into a BigQuery DataFrame\n", - "df_image = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", - "\n", - "# Create the object reference column\n", - "df_image['image'] = bbq.obj.make_ref(df_image['uri'], authorizer=FULL_CONNECTION_ID)\n", - "df_image = df_image[['image']]\n", - "```" - ], + "id": "be9ce892", "metadata": { "id": "ifKOq7VZGtZy" }, - "execution_count": null + "source": [ + "To create a Multimodal DataFrame, you can use `bigframes.bigquery.obj.make_ref` on a series of URIs. You can get the URIs from a BigQuery table or by listing them from Cloud Storage.\n", + "\n", + "In this example, we use `gcsfs` to list the files from Cloud Storage, and then use `read_gbq` to load them into a BigQuery DataFrame before creating the object reference." + ] }, { - "id": "871d02f4", "cell_type": "code", + "execution_count": 5, + "id": "871d02f4", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fx6YcZJbeYru", + "outputId": "d707954a-0dd0-4c50-b7bf-36b140cf76cf" + }, + "outputs": [], "source": [ "import gcsfs\n", "import bigframes.bigquery as bbq\n", @@ -223,6 +216,9 @@ "fs = gcsfs.GCSFileSystem(anon=True)\n", "uris = fs.glob(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/images/*\")\n", "\n", + "# Ensure URIs have gs:// prefix\n", + "uris = [u if u.startswith(\"gs://\") else f\"gs://{u}\" for u in uris]\n", + "\n", "# Read the URIs into a BigQuery DataFrame using UNNEST\n", "# We take the first 5 for this example\n", "df_image = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", @@ -230,24 +226,12 @@ "# Create the object reference column\n", "df_image['image'] = bbq.obj.make_ref(df_image['uri'], authorizer=FULL_CONNECTION_ID)\n", "df_image = df_image[['image']]" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "fx6YcZJbeYru", - "outputId": "d707954a-0dd0-4c50-b7bf-36b140cf76cf" - }, - "execution_count": 5 + ] }, { - "id": "2e0436b0", "cell_type": "code", - "source": [ - "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n", - "df_image = df_image.head(5)\n", - "df_image" - ], + "execution_count": 6, + "id": "2e0436b0", "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -256,7 +240,6 @@ "id": "HhCb8jRsLe9B", "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" }, - "execution_count": 6, "outputs": [ { "name": "stderr", @@ -273,8 +256,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
image
0
1
2
3
4
\n", + "

5 rows × 1 columns

\n", + "
[5 rows x 1 columns in total]" ], "text/plain": [ " image\n", @@ -335,46 +318,40 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "# Take only the 5 images to deal with. Preview the content of the Mutimodal DataFrame\n", + "df_image = df_image.head(5)\n", + "df_image" ] }, { - "id": "429b0117", "cell_type": "markdown", - "source": [ - "### 2. Combine unstructured data with structured data" - ], + "id": "429b0117", "metadata": { "id": "b6RRZb3qPi_T" }, - "execution_count": null + "source": [ + "### 2. Combine unstructured data with structured data" + ] }, { - "id": "991fa065", "cell_type": "markdown", - "source": [ - "Now you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself." - ], + "id": "991fa065", "metadata": { "id": "4YJCdmLtR-qu" }, - "execution_count": null + "source": [ + "Now you can put more information into the table to describe the files. Such as author info from inputs, or other metadata from the gcs object itself." + ] }, { - "id": "08722ec5", "cell_type": "code", - "source": [ - "# Combine unstructured data with structured data\n", - "df_image = df_image.head(5)\n", - "df_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"] # type: ignore\n", - "df_image[\"content_type\"] = get_content_type(df_image[\"image\"])\n", - "df_image[\"size\"] = get_size(df_image[\"image\"])\n", - "df_image[\"updated\"] = get_updated(df_image[\"image\"])\n", - "df_image" - ], + "execution_count": 7, + "id": "08722ec5", "metadata": { "id": "YYYVn7NDH0Me" }, - "execution_count": 7, "outputs": [ { "name": "stderr", @@ -391,8 +368,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imageauthorcontent_typesizeupdated
0aliceimage/png15912402025-03-20 17:45:04+00:00
1bobimage/png11829512025-03-20 17:45:02+00:00
2bobimage/png15208842025-03-20 17:44:55+00:00
3aliceimage/png12354012025-03-20 17:45:19+00:00
4bobimage/png15919232025-03-20 17:44:47+00:00
\n", + "

5 rows × 5 columns

\n", + "
[5 rows x 5 columns in total]" ], "text/plain": [ " image author content_type \\\n", @@ -484,29 +461,143 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "# Combine unstructured data with structured data\n", + "df_image = df_image.head(5)\n", + "df_image[\"author\"] = [\"alice\", \"bob\", \"bob\", \"alice\", \"bob\"] # type: ignore\n", + "df_image[\"content_type\"] = get_content_type(df_image[\"image\"])\n", + "df_image[\"size\"] = get_size(df_image[\"image\"])\n", + "df_image[\"updated\"] = get_updated(df_image[\"image\"])\n", + "df_image" ] }, { - "id": "f90826f6", "cell_type": "markdown", + "id": "f90826f6", + "metadata": {}, "source": [ "### 3. Conduct image transformations" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "e24c9f8c", "cell_type": "markdown", + "id": "e24c9f8c", + "metadata": {}, "source": [ "This section demonstrates how to perform image transformations like blur, resize, and normalize using custom BigQuery Python UDFs and the `opencv-python` library." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "db665049", "cell_type": "code", + "execution_count": 8, + "id": "db665049", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 487 + }, + "id": "HhCb8jRsLe9B", + "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4655: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", + " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", + " return prop(*args, **kwargs)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imageblurred
0
1
2
3
4
\n", + "

5 rows × 2 columns

\n", + "
[5 rows x 2 columns in total]" + ], + "text/plain": [ + " image \\\n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "\n", + " blurred \n", + "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", + "\n", + "[5 rows x 2 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -519,7 +610,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"opencv-python\", \"numpy\", \"requests\"],\n", ")\n", - "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -\u003e str:\n", + "def image_blur(src_rt: str, dst_rt: str, kx: int, ky: int) -> str:\n", " import json\n", " import cv2 as cv\n", " import numpy as np\n", @@ -580,137 +671,25 @@ " image_blur, 20, 20\n", ")\n", "df_image[[\"image\", \"blurred\"]]" - ], + ] + }, + { + "cell_type": "markdown", + "id": "11fcc6ec", "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 487 - }, - "id": "HhCb8jRsLe9B", - "outputId": "03081cf9-3a22-42c9-b38f-649f592fdada" + "id": "Euk5saeVVdTP" }, - "execution_count": 8, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", - " return global_session.with_default_session(\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:4655: FunctionAxisOnePreviewWarning: DataFrame.apply with parameter axis=1 scenario is in preview.\n", - " warnings.warn(msg, category=bfe.FunctionAxisOnePreviewWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/logging/log_adapter.py:229: ApiDeprecationWarning: The blob accessor is deprecated and will be removed in a future release. Use bigframes.bigquery.obj functions instead.\n", - " return prop(*args, **kwargs)\n" - ] - }, - { - "data": { - "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "\u003c/style\u003e\n", - "\u003ctable border=\"1\" class=\"dataframe\"\u003e\n", - " \u003cthead\u003e\n", - " \u003ctr style=\"text-align: right;\"\u003e\n", - " \u003cth\u003e\u003c/th\u003e\n", - " \u003cth\u003eimage\u003c/th\u003e\n", - " \u003cth\u003eblurred\u003c/th\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/thead\u003e\n", - " \u003ctbody\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e0\u003c/th\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492703986347\u0026X-Goog-Signature=2bb786ea7fda0a778b591d0f85b1d1df003726a26585490c1c8e1cc632bc90e418bc2762993da32c60017b96b36a9bf03ca123f7e74a34e5da98d4a8ae65e340c64872e1ab7c6442435253669103a157ee61c793da79ad0e6ae2a7bdbf54e8d67dce61de37d9dd2f54b0c994f8135d88af27f08eeb99e713b385b37fd0762503244cd0c597479d6925dcf111db4605842c797aabb307736028a5dd802ef08a2697a4bd1c96b5bf8bbecdb8a8f384028cec03bc9d51c1d2dfee1760756e0c6e54d9d753b373115e308cf45de2c082f30f263285af5d5e43fd9bf8c3dae32ceee389ef9427f3e2b06646d48aa570ba1ca0713d0f270ea2f4996e8ad3d03a3e39a6\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-paw-balm.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334353324\u0026X-Goog-Signature=6f16236d50a82c7ede3c23366389344149faba103443acf1c1a2b0d3b0d91948c8c15a3ad990382ab9b61badf8acee3c4b9e5ec6f96b72d395cd2e18227ad5c051ad189c1af48030fb44c1b9cc84a44a74fec04d576a6d699c1f0e133b2fc5d583872b5577c60a01f58d163de6e95591d260e9246fae0243d7472748e854ccaf4b4c3de80fb4e085f428427fe9914d22fba7416ae8b9e33beb0dfbcb91273609d0333aa1135b835bc69f9e15e1097452133e376b9b3f1bb7b4d7d7e9cb860677ce786e49239e8a8af86f83b7e27e76846212daddd2859f8dec4942b7351c69d396d4fd9a529fd0c48677aa4923063a752f6845801f22d3de227a640946f1e212\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e1\u003c/th\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492702954473\u0026X-Goog-Signature=31ed02660dcead7471a7b47e2f471f4cf515e37525c14027f8d87ba1479982f244df02979b7da03676d4bbbd21221b3f70cefea4501eca3a5a64acb187ef5a2b03b19116729059dc666c2e295fc1ac831cd38864c1673344e04855db279a3c96409550dfade09631b0818e6cb576ff02afba6a87624f7c33077f7f19c891f6413b317cabb834182585d445ce97aa0294beca4de0edd2f9a0f6429747e93db53df875249a9c0543a329dac0f46ef3ed4e64a7e51648d0820ba52b7845b6f98371aca7deb165b4f7b7537be0d659cf33bb34893417666d7c9e9f611332ccba6bae6024be6862350185886cf61354eda3591e96e344c59350ca6f68ce50670fa869\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fk9-guard-dog-hot-spot-spray.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334573735\u0026X-Goog-Signature=209b62c9c3e2657b9270ed126a6c1e3979471f9e308670ce1595e9c18a0ec7c627a313b1f1c87108355602dc11b804ff609ba3394fada2b4fd186c6fef4138e22222045d0fd0660f103e6378ac83c6bd7d8da27c1a3d306dbfd778f6738c2e492bd8527ba9112e086f15334b7ab7795b88212d3825b3be325606cc507e5504406bc4e9dc41b29fb99e1207fe74c11053e7332ad9fdd65587f55cd10cc1bfd903672f43de38a18cb346977d8498fd751ab8728ca580261a1a0367421437d56df7f5f7dda895ddd370954aee632258304da31fd8e7c33619dd5cf9ee0ebe9e60eba3cc416e871e181525f23e525489ef6dffced9211f7d3681ec24b3ac7c0b05ab\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e2\u003c/th\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492694988945\u0026X-Goog-Signature=8d32ad0137f3b10393aeaf635732f7fe337149323c0a33b634f9dcc592e662d4d268223f5c5cc89956787c5a3c061fa69c2de4d1a3225497dfad466548fcd4df7ee6000c4e60ff0661d0f9b451efa2701df5de55c322b3585c09210171403d47c358e6f41281f245711afd63eed3157e6674e9958ebffc5516f6ccb06b9af1830400e1d6a9bed44538b7212eeb257665aa43eecfa2af7482863f6032002de57f7a01af83cf5051541178297ab4a256710c04fd082cb51af4725dbafb288ee79caa630853dcbf35c49595b2b2e552804cabdbdf955806d57b97451e87035bd3ea89dbb5560a1c14170c531c3222a1d0b59d79d4c45b641887699f0bf5830612cb\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Ffluffy-buns-chinchilla-food-variety-pack.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334233994\u0026X-Goog-Signature=6eed60a846158365a0c54d4c96516bb8c4011b497da203479fa9066bc2b7c2c7c683aa351d5249e05df64b8f86549f251150327013e41231ee06f22e9a51e1061c577d7b2da4704b80f684776702da13be6122ad6652463f210c53d9149da9e32a253fde4851723cbb616f18374a8f9b7b5cd36a82e66a2e5677ae51b906e6c4649ea27fcbbcba3ef24bf163a4b0f469c10eca8da464f58161ead8371bdb5b0d452ee713b0ec498ca9aaefd521a1d8240d82885a19af58ddefb167146ab9d411b5d0712457b126104b5f42f57ab4ede2da5ae9af74e708840b507a5538571804c60e9c166516e0118aa555a13169401e21f0a5ec303f3b801d6a972ef94426ee\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e3\u003c/th\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492719670724\u0026X-Goog-Signature=472d88289a3910bdde93dd468e6ae9e66c0821fb0d72a356ef887771baa2a80f356b0ac67183873c0e89c87afb3080b3a78122fa7c3e37ad8b54d8aa216e9e04cb6f2af4f200784beba8de4eea7cf3a39faa8d800b7cc6bbf03df55beaeb69727d413266e2e59fb900bc25b6859d7c14db3c22aa6a0aeaa71c331ae0a2bd27c9e2c246fe931a2562210411491bc83bc34e7bef60901189949348eee909e64c90e3aab7b03d7c274ee84c03cca6dd624169f40323a785bcc1a8357810f738b45a637c09267530fbddc413a75622d793876c74c9128b11d8b4ea2fe2b959b34b96e06a9c384f72522ad357557d0a3b1c8b5792c2e94631139eae178fa660a5e8b2\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fpurrfect-perch-cat-scratcher.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334678692\u0026X-Goog-Signature=239a77d1c63c4074b2ddc3dd21a5bc77de8ea993fdd911a199b6cca207fef9286307a8321d91d02cffda17060a217f50b2a1d8017bbcb29511339a1187b38332a39795fb1b2c754988948be8849a371fd66a7698f8e35a0f1a3430b6837ede73c37c90da49a062586828f19595d4246e4eabf3e3d629f251b066fb20e0f34ddad80599f5946aa76e0694b859f7d575dc781a850d56e9846d456d285b0023e90862154008154fa13ef1f95096a6161e6a1b314b82ca23bc44fc1b4b7799a9f936dca09ab09830446089d8defe11a2c0e066aa8a65204d9027140f314001c59187593290ae22c4092ee5d0293a6e5da1e1da3d42c32c0aced7db7ad4f774575deb\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003ctr\u003e\n", - " \u003cth\u003e4\u003c/th\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/cloud-samples-data/bigquery%2Ftutorials%2Fcymbal-pets%2Fimages%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1742492687196980\u0026X-Goog-Signature=1077450c755044fd6121e0dd67ec7de75a1785c4bacede635b78dd64d6b982ba02bba7a00065035fa13f003ef03c14d7f622b6a5c9ef19cadc956470e5a62e0f1855fb8cae0bbe723c0352224d152c85e173b234e3f5a1bcc2e1ddd4da065c184e82487eb4aa17464d330144b3b7c17ba357af1fb267f97730b3a798ff42a514ea47b83aa9f2560cfd428778638287c96ed17f95f0327e1cade380d046120d79479aca927d627ba65f3bc009c75d1662cbf93c6d33cea4b030a7906d3689922bac13ca3fab45536f115ae6253715cc9c24769e57e83657bf0393ce55e45f90d9666513a19a939044d3b3ad528757e022624d6601dacd326ae8ab3c9ce4ae6d52\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003ctd\u003e\u003cimg src=\"https://storage.googleapis.com/bigframes_blob_test/image_blur_transformed%2Fchirpy-seed-deluxe-bird-food.png?X-Goog-Algorithm=GOOG4-RSA-SHA256\u0026X-Goog-Credential=bqcx-1084210331973-pcbl%40gcp-sa-bigquery-condel.iam.gserviceaccount.com%2F20260220%2Fauto%2Fstorage%2Fgoog4_request\u0026X-Goog-Date=20260220T193653Z\u0026X-Goog-Expires=21600\u0026X-Goog-SignedHeaders=host\u0026generation=1771616334302924\u0026X-Goog-Signature=3a25e70e59647f427ae55d72168c576fdb59b05f9ed6b72885a9e9c11f655e6bf10ded55744adcef330659731f0effdda7550ddca99c309cefe46ba864b088f155a5243e579d182a55f916d21d9dd0a83534a5e2efb2955a9146db12b22b4321e3f36b69f8d89d663507d7db83ac96bb2419a2baa0787fd0c6e6079d06652b8a2ac364a0a0a5d8d9de6331658b798abddbaaae1ede3026a4f0d955e74782afe240d31e6748bea8ab332ed945f541ca20c587b8c1449643f4748a3b059aae857334b6249bdf86794d307340b6a07d0dc47d3980e234be9c0549f52636d33776b7474fb95ebc014656c3cc217a777d438612a08849ac498baba0ce4716ac4ea432\" width=\"300\"\u003e\u003c/td\u003e\n", - " \u003c/tr\u003e\n", - " \u003c/tbody\u003e\n", - "\u003c/table\u003e\n", - "\u003cp\u003e5 rows × 2 columns\u003c/p\u003e\n", - "\u003c/div\u003e[5 rows x 2 columns in total]" - ], - "text/plain": [ - " image \\\n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "\n", - " blurred \n", - "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "1 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "2 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "3 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "4 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:3... \n", - "\n", - "[5 rows x 2 columns]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ] - }, - { - "id": "11fcc6ec", - "cell_type": "markdown", "source": [ "### 4. Use LLM models to ask questions and generate embeddings on images" - ], - "metadata": { - "id": "Euk5saeVVdTP" - }, - "execution_count": null + ] }, { - "id": "793b2f45", "cell_type": "code", - "source": [ - "from bigframes.ml import llm\n", - "gemini = llm.GeminiTextGenerator()" - ], + "execution_count": 9, + "id": "793b2f45", "metadata": { "id": "mRUGfcaFVW-3" }, - "execution_count": 9, "outputs": [ { "name": "stderr", @@ -722,16 +701,16 @@ " return method(*args, **kwargs)\n" ] } + ], + "source": [ + "from bigframes.ml import llm\n", + "gemini = llm.GeminiTextGenerator()" ] }, { - "id": "13d7cb93", "cell_type": "code", - "source": [ - "# Ask the same question on the images\n", - "answer = gemini.predict(df_image, prompt=[\"what item is it?\", df_image[\"image\"]])\n", - "answer[[\"ml_generate_text_llm_result\", \"image\"]]" - ], + "execution_count": 10, + "id": "13d7cb93", "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -740,7 +719,6 @@ "id": "DNFP7CbjWdR9", "outputId": "3f90a062-0abc-4bce-f53c-db57b06a14b9" }, - "execution_count": 10, "outputs": [ { "name": "stderr", @@ -763,8 +741,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_text_llm_resultimage
0The item is a container of K9 Guard Dog Paw Balm.
1The item is K9 Guard Dog Hot Spot Spray.
2The image contains three bags of food, likely for small animals like rabbits or guinea pigs. They are labeled \"Timoth Hay Lend Variety Plend\", \"Herbal Greeıs Mix Variety Blend\", and \"Berry & Blossom Treat Blend\", all under the brand \"Fluffy Buns.\" The bags are yellow, green, and purple, respectively. Each bag has a pile of its contents beneath it.
3The item is a cat tree.\\n
4The item is a bag of bird seed. Specifically, it's labeled \"Chirpy Seed\", \"Deluxe Bird Food\".\\n
\n", + "

5 rows × 2 columns

\n", + "
[5 rows x 2 columns in total]" ], "text/plain": [ " ml_generate_text_llm_result \\\n", @@ -838,11 +816,21 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "# Ask the same question on the images\n", + "answer = gemini.predict(df_image, prompt=[\"what item is it?\", df_image[\"image\"]])\n", + "answer[[\"ml_generate_text_llm_result\", \"image\"]]" ] }, { - "id": "68857305", "cell_type": "code", + "execution_count": 11, + "id": "68857305", + "metadata": { + "id": "IG3J3HsKhyBY" + }, + "outputs": [], "source": [ "# Ask different questions\n", "df_image[\"question\"] = [\n", @@ -852,19 +840,12 @@ " \"is it for pets?\",\n", " \"what is the weight of the product?\",\n", "]" - ], - "metadata": { - "id": "IG3J3HsKhyBY" - }, - "execution_count": 11 + ] }, { - "id": "829afc69", "cell_type": "code", - "source": [ - "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n", - "answer_alt[[\"ml_generate_text_llm_result\", \"image\"]]" - ], + "execution_count": 12, + "id": "829afc69", "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -873,7 +854,6 @@ "id": "qKOb765IiVuD", "outputId": "731bafad-ea29-463f-c8c1-cb7acfd70e5d" }, - "execution_count": 12, "outputs": [ { "name": "stderr", @@ -896,8 +876,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_text_llm_resultimage
0The item is a container of Dog Paw Balm.
1The picture contains many colors, including white, black, green, and a bright blue. The product label predominantly features a bright blue hue. The background is a solid gray.
2Here are the product names from the image:\\n\\n* **Timoth Hay Lend Variety Plend** is the product in the yellow bag.\\n* **Herbal Greeıs Mix Variety Blend** is the product in the green bag.\\n* **Berry & Blossom Treat Blend** is the product in the purple bag.
3Yes, it is for pets. It appears to be a cat tree or scratching post.\\n
4The image shows that the weight of the product is 15 oz/ 257g.
\n", + "

5 rows × 2 columns

\n", + "
[5 rows x 2 columns in total]" ], "text/plain": [ " ml_generate_text_llm_result \\\n", @@ -971,17 +951,16 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "answer_alt = gemini.predict(df_image, prompt=[df_image[\"question\"], df_image[\"image\"]])\n", + "answer_alt[[\"ml_generate_text_llm_result\", \"image\"]]" ] }, { - "id": "e75df430", "cell_type": "code", - "source": [ - "# Generate embeddings.\n", - "embed_model = llm.MultimodalEmbeddingGenerator()\n", - "embeddings = embed_model.predict(df_image[\"image\"])\n", - "embeddings" - ], + "execution_count": 13, + "id": "e75df430", "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -990,7 +969,6 @@ "id": "KATVv2CO5RT1", "outputId": "6ec01f27-70b6-4f69-c545-e5e3c879480c" }, - "execution_count": 13, "outputs": [ { "name": "stderr", @@ -1015,8 +993,8 @@ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ml_generate_embedding_resultml_generate_embedding_statusml_generate_embedding_start_secml_generate_embedding_end_seccontent
0[ 0.00638822 0.01666385 0.00451817 ... -0.02...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
1[ 0.00973976 0.02148137 0.0024429 ... 0.00...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
2[ 0.01195884 0.02139394 0.05968047 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
3[-0.02621161 0.02797648 0.04416926 ... -0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
4[ 0.05918628 0.0125137 0.01907336 ... 0.01...<NA><NA>{\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4...
\n", + "

5 rows × 5 columns

\n", + "
[5 rows x 5 columns in total]" ], "text/plain": [ " ml_generate_embedding_result \\\n", @@ -1095,18 +1073,18 @@ "4 [ 0.05918628 0.0125137 0.01907336 ... 0.01... \n", "\n", " ml_generate_embedding_status ml_generate_embedding_start_sec \\\n", - "0 \u003cNA\u003e \n", - "1 \u003cNA\u003e \n", - "2 \u003cNA\u003e \n", - "3 \u003cNA\u003e \n", - "4 \u003cNA\u003e \n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", "\n", " ml_generate_embedding_end_sec \\\n", - "0 \u003cNA\u003e \n", - "1 \u003cNA\u003e \n", - "2 \u003cNA\u003e \n", - "3 \u003cNA\u003e \n", - "4 \u003cNA\u003e \n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", "\n", " content \n", "0 {\"access_urls\":{\"expiry_time\":\"2026-02-21T01:4... \n", @@ -1122,24 +1100,41 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "# Generate embeddings.\n", + "embed_model = llm.MultimodalEmbeddingGenerator()\n", + "embeddings = embed_model.predict(df_image[\"image\"])\n", + "embeddings" ] }, { - "id": "23892b0e", "cell_type": "markdown", + "id": "23892b0e", + "metadata": { + "id": "iRUi8AjG7cIf" + }, "source": [ "### 5. PDF extraction and chunking function\n", "\n", "This section demonstrates how to extract text and chunk text from PDF files using custom BigQuery Python UDFs and the `pypdf` library." - ], - "metadata": { - "id": "iRUi8AjG7cIf" - }, - "execution_count": null + ] }, { - "id": "136a18b8", "cell_type": "code", + "execution_count": 14, + "id": "136a18b8", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n" + ] + } + ], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -1152,7 +1147,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_extract(src_obj_ref_rt: str) -\u003e str:\n", + "def pdf_extract(src_obj_ref_rt: str) -> str:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -1179,7 +1174,7 @@ " bigquery_connection=FULL_CONNECTION_ID,\n", " packages=[\"pypdf\", \"requests\", \"cryptography\"],\n", ")\n", - "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -\u003e list[str]:\n", + "def pdf_chunk(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -> list[str]:\n", " import io\n", " import json\n", " from pypdf import PdfReader\n", @@ -1197,7 +1192,7 @@ " page_text = page.extract_text()\n", " if page_text:\n", " curr_chunk += page_text\n", - " while len(curr_chunk) \u003e= chunk_size:\n", + " while len(curr_chunk) >= chunk_size:\n", " split_idx = curr_chunk.rfind(\" \", 0, chunk_size)\n", " if split_idx == -1:\n", " split_idx = chunk_size\n", @@ -1208,57 +1203,19 @@ " if curr_chunk:\n", " all_text_chunks.append(curr_chunk)\n", " return all_text_chunks" - ], - "metadata": {}, - "execution_count": 14, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", - " return global_session.with_default_session(\n" - ] - } ] }, { - "id": "234a5f86", "cell_type": "code", - "source": [ - "import gcsfs\n", - "import bigframes.bigquery as bbq\n", - "\n", - "# List files using gcsfs\n", - "fs = gcsfs.GCSFileSystem(anon=True)\n", - "uris = fs.glob(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\")\n", - "\n", - "# Read the URIs into a BigQuery DataFrame\n", - "df_pdf = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", - "\n", - "# Create the object reference column\n", - "df_pdf['pdf'] = bbq.obj.make_ref(df_pdf['uri'], authorizer=FULL_CONNECTION_ID)\n", - "df_pdf = df_pdf[['pdf']]\n", - "\n", - "# Generate a JSON string containing the runtime information (including signed read URLs)\n", - "access_urls = get_runtime_json_str(df_pdf[\"pdf\"], mode=\"R\")\n", - "\n", - "# Apply PDF extraction\n", - "df_pdf[\"extracted_text\"] = access_urls.apply(pdf_extract)\n", - "\n", - "# Apply PDF chunking\n", - "df_pdf[\"chunked\"] = access_urls.apply(pdf_chunk, args=(2000, 200))\n", - "\n", - "df_pdf[[\"extracted_text\", \"chunked\"]]" - ], - "metadata": {}, "execution_count": 15, + "id": "234a5f86", + "metadata": {}, "outputs": [ { "data": { "text/html": [ - "\u003cdiv\u003e\n", - "\u003cstyle scoped\u003e\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
extracted_textchunked
0CritterCuisine Pro 5000 - Automatic Pet Feeder...[\"CritterCuisine Pro 5000 - Automatic Pet Feed...
\n", + "

1 rows × 2 columns

\n", + "
[1 rows x 2 columns in total]" ], "text/plain": [ " extracted_text \\\n", @@ -1304,29 +1261,53 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "import gcsfs\n", + "import bigframes.bigquery as bbq\n", + "\n", + "# List files using gcsfs\n", + "fs = gcsfs.GCSFileSystem(anon=True)\n", + "uris = fs.glob(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\")\n", + "\n", + "# Ensure URIs have gs:// prefix\n", + "uris = [u if u.startswith(\"gs://\") else f\"gs://{u}\" for u in uris]\n", + "\n", + "# Read the URIs into a BigQuery DataFrame\n", + "df_pdf = bpd.read_gbq(f\"SELECT uri FROM UNNEST({uris[:5]}) as uri\")\n", + "\n", + "# Create the object reference column\n", + "df_pdf['pdf'] = bbq.obj.make_ref(df_pdf['uri'], authorizer=FULL_CONNECTION_ID)\n", + "df_pdf = df_pdf[['pdf']]\n", + "\n", + "# Generate a JSON string containing the runtime information (including signed read URLs)\n", + "access_urls = get_runtime_json_str(df_pdf[\"pdf\"], mode=\"R\")\n", + "\n", + "# Apply PDF extraction\n", + "df_pdf[\"extracted_text\"] = access_urls.apply(pdf_extract)\n", + "\n", + "# Apply PDF chunking\n", + "df_pdf[\"chunked\"] = access_urls.apply(pdf_chunk, args=(2000, 200))\n", + "\n", + "df_pdf[[\"extracted_text\", \"chunked\"]]" ] }, { - "id": "d80effbe", "cell_type": "code", - "source": [ - "# Explode the chunks to see each chunk as a separate row\n", - "chunked = df_pdf[\"chunked\"].explode()\n", - "chunked" - ], - "metadata": {}, "execution_count": 16, + "id": "d80effbe", + "metadata": {}, "outputs": [ { "data": { "text/html": [ - "\u003cpre\u003e0 CritterCuisine Pro 5000 - Automatic Pet Feeder...\n", + "
0    CritterCuisine Pro 5000 - Automatic Pet Feeder...\n",
        "0    on a level, stable surface to prevent tipping....\n",
        "0    included)\\nto maintain the schedule during pow...\n",
        "0    digits for Meal 1 will flash.\\n\u0000. Use the UP/D...\n",
        "0    paperclip) for 5\\nseconds. This will reset all...\n",
        "0    unit with a damp cloth. Do not immerse the bas...\n",
-       "0    continues,\\ncontact customer support.\\nE2: Foo...\u003c/pre\u003e"
+       "0    continues,\\ncontact customer support.\\nE2: Foo...
" ], "text/plain": [ "0 CritterCuisine Pro 5000 - Automatic Pet Feeder...\n", @@ -1343,20 +1324,27 @@ "metadata": {}, "output_type": "execute_result" } + ], + "source": [ + "# Explode the chunks to see each chunk as a separate row\n", + "chunked = df_pdf[\"chunked\"].explode()\n", + "chunked" ] }, { - "id": "118cf1c7", "cell_type": "markdown", + "id": "118cf1c7", + "metadata": {}, "source": [ "### 6. Audio transcribe" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "1794c54f", "cell_type": "code", + "execution_count": 17, + "id": "1794c54f", + "metadata": {}, + "outputs": [], "source": [ "import gcsfs\n", "import bigframes.bigquery as bbq\n", @@ -1367,6 +1355,9 @@ "fs = gcsfs.GCSFileSystem()\n", "uris = fs.glob(audio_gcs_path)\n", "\n", + "# Ensure URIs have gs:// prefix\n", + "uris = [u if u.startswith(\"gs://\") else f\"gs://{u}\" for u in uris]\n", + "\n", "# Read the URIs into a BigQuery DataFrame\n", "# If the bucket is empty or doesn't exist, this will result in an empty DataFrame\n", "if not uris:\n", @@ -1378,13 +1369,39 @@ "# Create the object reference column\n", "df['audio'] = bbq.obj.make_ref(df['uri'], authorizer=FULL_CONNECTION_ID)\n", "df = df[['audio']]" - ], - "metadata": {}, - "execution_count": 17 + ] }, { - "id": "c9f9d484", "cell_type": "code", + "execution_count": null, + "id": "c9f9d484", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "
0    Now, as all books, not primarily intended as p...
" + ], + "text/plain": [ + "0 Now, as all books, not primarily intended as p...\n", + "Name: transcribed_content, dtype: string" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# The audio_transcribe function is a convenience wrapper around bigframes.bigquery.ai.generate.\n", "# Here's how to perform the same operation directly:\n", @@ -1410,39 +1427,29 @@ "\n", "transcribed_series = transcribed_results.struct.field(\"result\").rename(\"transcribed_content\")\n", "transcribed_series" - ], + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "7209a62a", "metadata": {}, - "execution_count": null, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:990: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, { "data": { "text/html": [ - "\u003cpre\u003e0 Now, as all books, not primarily intended as p...\u003c/pre\u003e" + "
0    {'status': '', 'content': 'Now, as all books, ...
" ], "text/plain": [ - "0 Now, as all books, not primarily intended as p...\n", - "Name: transcribed_content, dtype: string" + "0 {'status': '', 'content': 'Now, as all books, ...\n", + "Name: transcription_results, dtype: struct[pyarrow]" ] }, - "execution_count": 18, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } - ] - }, - { - "id": "7209a62a", - "cell_type": "code", + ], "source": [ "# To get verbose results (including status), we can extract both fields from the result struct.\n", "transcribed_content_series = transcribed_results.struct.field(\"result\")\n", @@ -1457,47 +1464,39 @@ "# Package as a struct for consistent display\n", "transcribed_series_verbose = bbq.struct(transcribed_series_verbose).rename(\"transcription_results\")\n", "transcribed_series_verbose" - ], - "metadata": {}, - "execution_count": 19, - "outputs": [ - { - "data": { - "text/html": [ - "\u003cpre\u003e0 {'status': '', 'content': 'Now, as all books, ...\u003c/pre\u003e" - ], - "text/plain": [ - "0 {'status': '', 'content': 'Now, as all books, ...\n", - "Name: transcription_results, dtype: struct\u003cstatus: string, content: string\u003e[pyarrow]" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } ] }, { - "id": "c8351cc3", "cell_type": "markdown", + "id": "c8351cc3", + "metadata": {}, "source": [ "### 7. Extract EXIF metadata from images" - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "e59670b9", "cell_type": "markdown", + "id": "e59670b9", + "metadata": {}, "source": [ "This section demonstrates how to extract EXIF metadata from images using a custom BigQuery Python UDF and the `Pillow` library." - ], - "metadata": {}, - "execution_count": null + ] }, { - "id": "fda362f4", "cell_type": "code", + "execution_count": 20, + "id": "fda362f4", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", + " return global_session.with_default_session(\n" + ] + } + ], "source": [ "# Construct the canonical connection ID\n", "FULL_CONNECTION_ID = f\"{PROJECT}.{LOCATION}.bigframes-default-connection\"\n", @@ -1513,7 +1512,7 @@ " container_cpu=0.33,\n", " container_memory=\"512Mi\"\n", ")\n", - "def extract_exif(src_obj_ref_rt: str) -\u003e str:\n", + "def extract_exif(src_obj_ref_rt: str) -> str:\n", " import io\n", " import json\n", " from PIL import ExifTags, Image\n", @@ -1530,23 +1529,38 @@ " tag_name = ExifTags.TAGS.get(tag, tag)\n", " exif_dict[tag_name] = value\n", " return json.dumps(exif_dict)" - ], + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "40bb6bc9", "metadata": {}, - "execution_count": 20, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:151: PreviewWarning: udf is in preview.\n", - " return global_session.with_default_session(\n" + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/utils.py:228: PreviewWarning: The JSON-related API `parse_json` is in preview. Its behavior may\n", + "change in future versions.\n", + " warnings.warn(bfe.format_message(msg), category=bfe.PreviewWarning)\n" ] + }, + { + "data": { + "text/html": [ + "
0    {\"ExifOffset\":47,\"Make\":\"MyCamera\"}
" + ], + "text/plain": [ + "0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\n", + "Name: blob_col, dtype: extension>[pyarrow]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" } - ] - }, - { - "id": "40bb6bc9", - "cell_type": "code", + ], "source": [ "import gcsfs\n", "import bigframes.bigquery as bbq\n", @@ -1555,6 +1569,9 @@ "fs = gcsfs.GCSFileSystem()\n", "uris = fs.glob(\"gs://bigframes_blob_test/images_exif/*\")\n", "\n", + "# Ensure URIs have gs:// prefix\n", + "uris = [u if u.startswith(\"gs://\") else f\"gs://{u}\" for u in uris]\n", + "\n", "if not uris:\n", " uris = [\"gs://bigframes_blob_test/images_exif/dummy.jpg\"]\n", "\n", @@ -1574,33 +1591,6 @@ "exif_data = bbq.parse_json(exif_json)\n", "\n", "exif_data" - ], - "metadata": {}, - "execution_count": 21, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/utils.py:228: PreviewWarning: The JSON-related API `parse_json` is in preview. Its behavior may\n", - "change in future versions.\n", - " warnings.warn(bfe.format_message(msg), category=bfe.PreviewWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "\u003cpre\u003e0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\u003c/pre\u003e" - ], - "text/plain": [ - "0 {\"ExifOffset\":47,\"Make\":\"MyCamera\"}\n", - "Name: blob_col, dtype: extension\u003cdbjson\u003cJSONArrowType\u003e\u003e[pyarrow]" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } ] } ], @@ -1626,6 +1616,6 @@ "version": "3.13.0" } }, - "nbformat_minor": 0, - "nbformat": 4 + "nbformat": 4, + "nbformat_minor": 0 }