From afd0b3984e5aefbf59bf82859384770af2c366d2 Mon Sep 17 00:00:00 2001 From: Valentyn Tymofieiev Date: Tue, 12 May 2026 17:55:18 -0700 Subject: [PATCH 01/17] Introduce python memory profiling pipeline options. --- .../apache_beam/options/pipeline_options.py | 52 +++++++++++++++++++ .../options/pipeline_options_test.py | 17 ++++++ .../options/pipeline_options_validator.py | 3 ++ 3 files changed, 72 insertions(+) diff --git a/sdks/python/apache_beam/options/pipeline_options.py b/sdks/python/apache_beam/options/pipeline_options.py index a5b66ce28ac5..1d527f7bd751 100644 --- a/sdks/python/apache_beam/options/pipeline_options.py +++ b/sdks/python/apache_beam/options/pipeline_options.py @@ -1658,6 +1658,58 @@ def _add_argparse_args(cls, parser): default=1.0, help='A number between 0 and 1 indicating the ratio ' 'of bundles that should be profiled.') + parser.add_argument( + '--profiler_agent', + default=None, + help=( + 'Specifies the profiling agent to launch the SDK worker harness ' + 'with (e.g., "memray", "tcmalloc", or a custom wrapper script/binary).')) + parser.add_argument( + '--profiler_extra_arg', + '--profiler_extra_args', + dest='profiler_extra_args', + action=_CommaSeparatedListAction, + default=None, + help='Comma-separated list of extra arguments to pass to the profiler agent.') + parser.add_argument( + '--profiler_extra_env_var', + '--profiler_extra_env_vars', + dest='profiler_extra_env_vars', + action=_CommaSeparatedListAction, + default=None, + help=( + 'Comma-separated list of environment variables required by the profiler agent ' + 'in format "KEY1=VAL1,KEY2=VAL2".')) + parser.add_argument( + '--profile_temp_location', + default=None, + help=( + 'Directory path on the worker where local profiles are saved. ' + 'Defaults to ${semi_persist_dir}/profiles if not specified.')) + parser.add_argument( + '--profile_sync_period_sec', + type=int, + default=0, + help=( + 'Frequency (in seconds) at which the local profiles are synced to GCS. ' + 'Defaults to 0 (sync disabled).')) + parser.add_argument( + '--profiler_stop_after_min', + type=int, + default=0, + help=( + 'Time limit (in minutes) for profiling a single process. When exceeded, ' + 'the worker process is restarted without the profiler.')) + + def validate(self, validator): + errors = [] + if self.profiler_agent: + if self.profile_cpu or self.profile_memory: + errors.append( + '--profiler_agent is mutually exclusive with --profile_cpu ' + 'and --profile_memory.') + return errors + class SetupOptions(PipelineOptions): diff --git a/sdks/python/apache_beam/options/pipeline_options_test.py b/sdks/python/apache_beam/options/pipeline_options_test.py index 901f56b99cb6..6e83de612ad6 100644 --- a/sdks/python/apache_beam/options/pipeline_options_test.py +++ b/sdks/python/apache_beam/options/pipeline_options_test.py @@ -669,6 +669,23 @@ def test_unknown_option_prefix(self): options = PipelineOptions(['--type_check_strictness', 'blahblah']) options.view_as(TypeOptions) + def test_profiling_agent_is_exclusive_with_legacy_profiling_options(self): + options = PipelineOptions(['--profiler_agent=memray']) + validator = PipelineOptionsValidator(options, None) + self.assertEqual(validator.validate(), []) + + options = PipelineOptions(['--profiler_agent=memray', '--profile_cpu']) + validator = PipelineOptionsValidator(options, None) + errors = validator.validate() + self.assertTrue(any('--profiler_agent is mutually exclusive' in err for err in errors)) + + options = PipelineOptions(['--profiler_agent=memray', '--profile_memory']) + validator = PipelineOptionsValidator(options, None) + errors = validator.validate() + self.assertTrue(any('--profiler_agent is mutually exclusive' in err for err in errors)) + + + def test_add_experiment(self): options = PipelineOptions([]) options.view_as(DebugOptions).add_experiment('new_experiment') diff --git a/sdks/python/apache_beam/options/pipeline_options_validator.py b/sdks/python/apache_beam/options/pipeline_options_validator.py index 0217363bc9b8..426461a48fcd 100644 --- a/sdks/python/apache_beam/options/pipeline_options_validator.py +++ b/sdks/python/apache_beam/options/pipeline_options_validator.py @@ -30,6 +30,7 @@ from apache_beam.options.pipeline_options import DebugOptions from apache_beam.options.pipeline_options import GoogleCloudOptions from apache_beam.options.pipeline_options import PortableOptions +from apache_beam.options.pipeline_options import ProfilingOptions from apache_beam.options.pipeline_options import SetupOptions from apache_beam.options.pipeline_options import StandardOptions from apache_beam.options.pipeline_options import TestOptions @@ -55,6 +56,7 @@ class PipelineOptionsValidator(object): DebugOptions, GoogleCloudOptions, PortableOptions, + ProfilingOptions, SetupOptions, StandardOptions, TestOptions, @@ -62,6 +64,7 @@ class PipelineOptionsValidator(object): WorkerOptions ] + # Mutually exclusive options for different types of portable environments. REQUIRED_ENVIRONMENT_OPTIONS = { 'DOCKER': [], From fc5e51625e89b99542b2efeb55ffd942ffc759c5 Mon Sep 17 00:00:00 2001 From: Valentyn Tymofieiev Date: Tue, 12 May 2026 18:09:41 -0700 Subject: [PATCH 02/17] Add memray and google-pprof dependencies --- sdks/python/container/Dockerfile | 3 +++ sdks/python/container/base_image_requirements_manual.txt | 2 ++ 2 files changed, 5 insertions(+) diff --git a/sdks/python/container/Dockerfile b/sdks/python/container/Dockerfile index b0e0b94e7086..fa3414cd332e 100644 --- a/sdks/python/container/Dockerfile +++ b/sdks/python/container/Dockerfile @@ -45,7 +45,10 @@ RUN \ ccache \ # Required for using Beam Python SDK on ARM machines. libgeos-dev \ + # Required for memory profiling with tcmalloc. + google-perftools \ && \ + rm -rf /var/lib/apt/lists/* && \ pip install --upgrade pip setuptools wheel && \ diff --git a/sdks/python/container/base_image_requirements_manual.txt b/sdks/python/container/base_image_requirements_manual.txt index 2a6a11415ee9..a78d993461c0 100644 --- a/sdks/python/container/base_image_requirements_manual.txt +++ b/sdks/python/container/base_image_requirements_manual.txt @@ -38,6 +38,8 @@ google-cloud-profiler;python_version<="3.12" # tests google-api-python-client;python_version<="3.13" guppy3 +# Explicitly pin memray to use a version compatible with postprocessing logic in Beam. +memray==1.19.3 mmh3 # Optimizes execution of some Beam codepaths. TODO: Make it Beam's dependency. nltk # Commonly used for natural language processing. google-crc32c From dc5b25af31998e42426f40c0c1428e300b0cd77c Mon Sep 17 00:00:00 2001 From: Valentyn Tymofieiev Date: Wed, 13 May 2026 19:00:24 -0700 Subject: [PATCH 03/17] Parse profiler options in boot.go --- sdks/python/container/boot.go | 88 +++++++++++++++++++++++++++++++---- 1 file changed, 80 insertions(+), 8 deletions(-) diff --git a/sdks/python/container/boot.go b/sdks/python/container/boot.go index 82d11dc89cb2..71f3262c5cd7 100644 --- a/sdks/python/container/boot.go +++ b/sdks/python/container/boot.go @@ -133,7 +133,14 @@ type PipelineOptionsData struct { } type OptionsData struct { - Experiments []string `json:"experiments"` + Experiments []string `json:"experiments"` + ProfilerAgent string `json:"profiler_agent"` + ProfilerExtraArgs []string `json:"profiler_extra_args"` + ProfilerExtraEnvVars []string `json:"profiler_extra_env_vars"` + ProfileLocation string `json:"profile_location"` + ProfileTempLocation string `json:"profile_temp_location"` + ProfileSyncPeriodSec int `json:"profile_sync_period_sec"` + ProfilerStopAfterMin int `json:"profiler_stop_after_min"` } func getExperiments(options string) []string { @@ -198,6 +205,30 @@ func launchSDKProcess() error { logger.Printf(ctx, "Build isolation disabled when installing packages with pip") } + var opts PipelineOptionsData + if err := json.Unmarshal([]byte(options), &opts); err != nil { + logger.Warnf(ctx, "Failed to unmarshal pipeline options for profiling config: %v", err) + } + + profileTempLocation := opts.Options.ProfileTempLocation + if profileTempLocation == "" { + profileTempLocation = filepath.Join(*semiPersistDir, "profiles") + } + + if opts.Options.ProfilerAgent != "" { + logger.Printf(ctx, "Worker will be configured with profiler agent enabled.") + logger.Printf(ctx, "ProfilerAgent: %v", opts.Options.ProfilerAgent) + logger.Printf(ctx, "ProfilerExtraArgs: %v", opts.Options.ProfilerExtraArgs) + logger.Printf(ctx, "ProfilerExtraEnvVars: %v", opts.Options.ProfilerExtraEnvVars) + logger.Printf(ctx, "ProfileLocation: %v", opts.Options.ProfileLocation) + logger.Printf(ctx, "ProfileTempLocation: %v", profileTempLocation) + logger.Printf(ctx, "ProfileSyncPeriodSec: %v", opts.Options.ProfileSyncPeriodSec) + logger.Printf(ctx, "ProfilerStopAfterMin: %v", opts.Options.ProfilerStopAfterMin) + if err := os.MkdirAll(profileTempLocation, 0755); err != nil { + logger.Warnf(ctx, "Failed to create ProfileTempLocation: %v", err) + } + } + // (2) Retrieve and install the staged packages. // // No log.Fatalf() from here on, otherwise deferred cleanups will not be called! @@ -314,11 +345,6 @@ func launchSDKProcess() error { childPids.mu.Unlock() }() - args := []string{ - "-m", - sdkHarnessEntrypoint, - } - var wg sync.WaitGroup wg.Add(len(workerIds)) for _, workerId := range workerIds { @@ -333,8 +359,54 @@ func launchSDKProcess() error { childPids.mu.Unlock() return } - logger.Printf(ctx, "Executing Python (worker %v): python %v", workerId, strings.Join(args, " ")) - cmd := StartCommandEnv(map[string]string{"WORKER_ID": workerId}, os.Stdin, bufLogger, bufLogger, "python", args...) + + currentProg := "python" + currentArgs := []string{"-m", sdkHarnessEntrypoint} + currentEnv := map[string]string{"WORKER_ID": workerId} + + if opts.Options.ProfilerAgent != "" { + if opts.Options.ProfilerAgent == "memray" { + timeSuffix := time.Now().Format("20060102150405") + memrayFile := filepath.Join(profileTempLocation, fmt.Sprintf("memray-%s-%s.bin", workerId, timeSuffix)) + currentArgs = []string{"-m", "memray", "run"} + currentArgs = append(currentArgs, opts.Options.ProfilerExtraArgs...) + currentArgs = append(currentArgs, "-o", memrayFile, "-m", sdkHarnessEntrypoint) + } else if opts.Options.ProfilerAgent == "tcmalloc" { + tcmallocHeapPath := filepath.Join(profileTempLocation, fmt.Sprintf("tcmalloc-%s", workerId)) + existingPreload := os.Getenv("LD_PRELOAD") + if existingPreload != "" { + currentEnv["LD_PRELOAD"] = existingPreload + ":/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4" + } else { + currentEnv["LD_PRELOAD"] = "/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4" + } + currentEnv["HEAPPROFILE"] = tcmallocHeapPath + } else { + currentProg = opts.Options.ProfilerAgent + currentArgs = append(append([]string{}, opts.Options.ProfilerExtraArgs...), "python", "-m", sdkHarnessEntrypoint) + } + + for _, envVar := range opts.Options.ProfilerExtraEnvVars { + parts := strings.SplitN(envVar, "=", 2) + if len(parts) == 2 { + currentEnv[parts[0]] = parts[1] + } else { + logger.Errorf(ctx, "Failed to parse profiler extra environment variable: %v. Expected format KEY=VALUE", envVar) + } + } + } + + var envStr string + if len(currentEnv) > 0 { + var envStrings []string + for k, v := range currentEnv { + envStrings = append(envStrings, k+"="+v) + } + slices.Sort(envStrings) + envStr = strings.Join(envStrings, ", ") + } + + logger.Printf(ctx, "Executing Python (%v): %v %v", envStr, currentProg, strings.Join(currentArgs, " ")) + cmd := StartCommandEnv(currentEnv, os.Stdin, bufLogger, bufLogger, currentProg, currentArgs...) childPids.v = append(childPids.v, cmd.Process.Pid) childPids.mu.Unlock() From 645702010ae25b837dceef65842c686634ee490a Mon Sep 17 00:00:00 2001 From: Valentyn Tymofieiev Date: Fri, 5 Jun 2026 12:01:41 -0700 Subject: [PATCH 04/17] Upload profiles to GCS. --- sdks/python/container/boot.go | 44 +++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/sdks/python/container/boot.go b/sdks/python/container/boot.go index 71f3262c5cd7..ded4a4cc4f80 100644 --- a/sdks/python/container/boot.go +++ b/sdks/python/container/boot.go @@ -141,6 +141,7 @@ type OptionsData struct { ProfileTempLocation string `json:"profile_temp_location"` ProfileSyncPeriodSec int `json:"profile_sync_period_sec"` ProfilerStopAfterMin int `json:"profiler_stop_after_min"` + JobId string `json:"jobId,omitempty"` } func getExperiments(options string) []string { @@ -227,6 +228,32 @@ func launchSDKProcess() error { if err := os.MkdirAll(profileTempLocation, 0755); err != nil { logger.Warnf(ctx, "Failed to create ProfileTempLocation: %v", err) } + + if strings.HasPrefix(opts.Options.ProfileLocation, "gs://") { + if _, err := exec.LookPath("gcloud"); err != nil { + logger.Errorf(ctx, "gcloud is not available, profiles will not be synced.") + } else { + jobId := opts.Options.JobId + if jobId == "" { + jobId = "BEAM_JOB" + } + hostname, _ := os.Hostname() + if hostname == "" { + hostname = "default-worker" + } + baseGcsDest := strings.TrimSuffix(opts.Options.ProfileLocation, "/") + gcsDestPath := fmt.Sprintf("%s/%s/%s", baseGcsDest, jobId, hostname) + + if opts.Options.ProfileSyncPeriodSec > 0 { + ticker := time.NewTicker(time.Duration(opts.Options.ProfileSyncPeriodSec) * time.Second) + go func() { + for range ticker.C { + syncProfilesToGCS(ctx, logger, profileTempLocation, gcsDestPath) + } + }() + } + } + } } // (2) Retrieve and install the staged packages. @@ -604,3 +631,20 @@ func logSubmissionEnvDependencies(ctx context.Context, bufLogger *tools.Buffered bufLogger.Printf(ctx, "%s", string(content)) return nil } + +// syncProfilesToGCS uploads newly created local memory profiles to the designated GCS target path using gcloud storage. +func syncProfilesToGCS(ctx context.Context, logger *tools.Logger, localDir, gcsDest string) { + entries, err := os.ReadDir(localDir) + if err != nil || len(entries) == 0 { + return + } + + logger.Printf(ctx, "Syncing profiles from %s to %s", localDir, gcsDest) + + cmd := exec.Command("gcloud", "storage", "rsync", localDir, gcsDest) + if err := cmd.Run(); err != nil { + logger.Warnf(ctx, "Failed to sync profiles to GCS: %v", err) + } else { + logger.Printf(ctx, "Successfully synced profiles to GCS.") + } +} From 3c28f1285c09e9690598f48ee8c8af9446d55963 Mon Sep 17 00:00:00 2001 From: Valentyn Tymofieiev Date: Fri, 5 Jun 2026 15:31:28 -0700 Subject: [PATCH 05/17] Disable the profiler after a timeout reached. --- sdks/python/container/boot.go | 52 +++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/sdks/python/container/boot.go b/sdks/python/container/boot.go index ded4a4cc4f80..7276f46fa73d 100644 --- a/sdks/python/container/boot.go +++ b/sdks/python/container/boot.go @@ -32,6 +32,7 @@ import ( "slices" "strings" "sync" + "sync/atomic" "syscall" "time" @@ -380,6 +381,16 @@ func launchSDKProcess() error { bufLogger := tools.NewBufferedLogger(logger) errorCount := 0 + jobId := opts.Options.JobId + if jobId == "" { + jobId = "BEAM_JOB" + } + hostname, _ := os.Hostname() + if hostname == "" { + hostname = "default-worker" + } + stopProfilingSentinel := filepath.Join(profileTempLocation, fmt.Sprintf(".profiler_completed_%s_%s", jobId, hostname)) + for { childPids.mu.Lock() if childPids.canceled { @@ -391,7 +402,12 @@ func launchSDKProcess() error { currentArgs := []string{"-m", sdkHarnessEntrypoint} currentEnv := map[string]string{"WORKER_ID": workerId} - if opts.Options.ProfilerAgent != "" { + enableProfiler := opts.Options.ProfilerAgent != "" + if _, err := os.Stat(stopProfilingSentinel); err == nil { + enableProfiler = false + } + + if enableProfiler { if opts.Options.ProfilerAgent == "memray" { timeSuffix := time.Now().Format("20060102150405") memrayFile := filepath.Join(profileTempLocation, fmt.Sprintf("memray-%s-%s.bin", workerId, timeSuffix)) @@ -437,7 +453,39 @@ func launchSDKProcess() error { childPids.v = append(childPids.v, cmd.Process.Pid) childPids.mu.Unlock() - if err := cmd.Wait(); err != nil { + var timer *time.Timer + var profilingTimedOut atomic.Bool + + if enableProfiler && opts.Options.ProfilerStopAfterMin > 0 { + duration := time.Duration(opts.Options.ProfilerStopAfterMin) * time.Minute + timer = time.AfterFunc(duration, func() { + childPids.mu.Lock() + defer childPids.mu.Unlock() + if cmd.Process != nil { + logger.Printf(ctx, "Profiling timeout of %d minutes reached. Sending SIGINT to worker %s", + opts.Options.ProfilerStopAfterMin, workerId) + profilingTimedOut.Store(true) + syscall.Kill(-cmd.Process.Pid, syscall.SIGINT) + } + }) + } + + err := cmd.Wait() + if timer != nil { + timer.Stop() + } + + if err != nil { + if profilingTimedOut.Load() { + if f, err := os.Create(stopProfilingSentinel); err == nil { + f.Close() + } + bufLogger.FlushAtDebug(ctx) + logger.Printf(ctx, "Python worker %v terminated after profiling timeout. Restarting without profiler.", workerId) + // Error is not counted toward error budget. + continue + } + // Retry on fatal errors, like OOMs and segfaults, not just // DoFns throwing exceptions. errorCount += 1 From 2aa2d1fa0676532e6820cf19bf587212316d5e63 Mon Sep 17 00:00:00 2001 From: Valentyn Tymofieiev Date: Fri, 5 Jun 2026 16:48:38 -0700 Subject: [PATCH 06/17] Support --profiler_stop_after_crash --- .../python/apache_beam/options/pipeline_options.py | 7 +++++++ .../apache_beam/options/pipeline_options_test.py | 2 -- sdks/python/container/boot.go | 14 +++++++++++--- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/sdks/python/apache_beam/options/pipeline_options.py b/sdks/python/apache_beam/options/pipeline_options.py index 1d527f7bd751..fee696660da7 100644 --- a/sdks/python/apache_beam/options/pipeline_options.py +++ b/sdks/python/apache_beam/options/pipeline_options.py @@ -1700,6 +1700,13 @@ def _add_argparse_args(cls, parser): help=( 'Time limit (in minutes) for profiling a single process. When exceeded, ' 'the worker process is restarted without the profiler.')) + parser.add_argument( + '--profiler_stop_after_crash', + action='store_true', + default=False, + help=( + 'If True, the profiling agent won\'t be re-enabled after a worker ' + 'process crash.')) def validate(self, validator): errors = [] diff --git a/sdks/python/apache_beam/options/pipeline_options_test.py b/sdks/python/apache_beam/options/pipeline_options_test.py index 6e83de612ad6..33dfc87b29d3 100644 --- a/sdks/python/apache_beam/options/pipeline_options_test.py +++ b/sdks/python/apache_beam/options/pipeline_options_test.py @@ -684,8 +684,6 @@ def test_profiling_agent_is_exclusive_with_legacy_profiling_options(self): errors = validator.validate() self.assertTrue(any('--profiler_agent is mutually exclusive' in err for err in errors)) - - def test_add_experiment(self): options = PipelineOptions([]) options.view_as(DebugOptions).add_experiment('new_experiment') diff --git a/sdks/python/container/boot.go b/sdks/python/container/boot.go index 7276f46fa73d..433f409e41db 100644 --- a/sdks/python/container/boot.go +++ b/sdks/python/container/boot.go @@ -141,8 +141,9 @@ type OptionsData struct { ProfileLocation string `json:"profile_location"` ProfileTempLocation string `json:"profile_temp_location"` ProfileSyncPeriodSec int `json:"profile_sync_period_sec"` - ProfilerStopAfterMin int `json:"profiler_stop_after_min"` - JobId string `json:"jobId,omitempty"` + ProfilerStopAfterMin int `json:"profiler_stop_after_min"` + ProfilerStopAfterCrash bool `json:"profiler_stop_after_crash"` + JobId string `json:"jobId,omitempty"` } func getExperiments(options string) []string { @@ -389,7 +390,7 @@ func launchSDKProcess() error { if hostname == "" { hostname = "default-worker" } - stopProfilingSentinel := filepath.Join(profileTempLocation, fmt.Sprintf(".profiler_completed_%s_%s", jobId, hostname)) + stopProfilingSentinel := filepath.Join(profileTempLocation, fmt.Sprintf(".profiler_disengaged_%s_%s", jobId, hostname)) for { childPids.mu.Lock() @@ -486,6 +487,13 @@ func launchSDKProcess() error { continue } + if enableProfiler && opts.Options.ProfilerStopAfterCrash { + if f, err := os.Create(stopProfilingSentinel); err == nil { + f.Close() + } + logger.Printf(ctx, "Python worker %v crashed. Disabling profiler on subsequent restarts because --profiler_stop_after_crash is enabled.", workerId) + } + // Retry on fatal errors, like OOMs and segfaults, not just // DoFns throwing exceptions. errorCount += 1 From ba4e33133e7f9b3e45b86bd265b760d5eec2e600 Mon Sep 17 00:00:00 2001 From: Valentyn Tymofieiev Date: Mon, 8 Jun 2026 12:33:33 -0700 Subject: [PATCH 07/17] Add on-the-worker postprocessing support. --- .../apache_beam/options/pipeline_options.py | 7 ++ sdks/python/container/boot.go | 88 +++++++++++++++++-- 2 files changed, 88 insertions(+), 7 deletions(-) diff --git a/sdks/python/apache_beam/options/pipeline_options.py b/sdks/python/apache_beam/options/pipeline_options.py index fee696660da7..2c345b518f85 100644 --- a/sdks/python/apache_beam/options/pipeline_options.py +++ b/sdks/python/apache_beam/options/pipeline_options.py @@ -1707,6 +1707,13 @@ def _add_argparse_args(cls, parser): help=( 'If True, the profiling agent won\'t be re-enabled after a worker ' 'process crash.')) + parser.add_argument( + '--profile_postprocess_interval_sec', + type=int, + default=300, + help=( + 'Frequency (in seconds) at which the local profiles are post-processed ' + 'on-the-fly. Defaults to 300 (5 minutes). Set to 0 to disable.')) def validate(self, validator): errors = [] diff --git a/sdks/python/container/boot.go b/sdks/python/container/boot.go index 433f409e41db..4fc15874e3ab 100644 --- a/sdks/python/container/boot.go +++ b/sdks/python/container/boot.go @@ -134,15 +134,16 @@ type PipelineOptionsData struct { } type OptionsData struct { - Experiments []string `json:"experiments"` - ProfilerAgent string `json:"profiler_agent"` - ProfilerExtraArgs []string `json:"profiler_extra_args"` - ProfilerExtraEnvVars []string `json:"profiler_extra_env_vars"` - ProfileLocation string `json:"profile_location"` - ProfileTempLocation string `json:"profile_temp_location"` - ProfileSyncPeriodSec int `json:"profile_sync_period_sec"` + Experiments []string `json:"experiments"` + ProfilerAgent string `json:"profiler_agent"` + ProfilerExtraArgs []string `json:"profiler_extra_args"` + ProfilerExtraEnvVars []string `json:"profiler_extra_env_vars"` + ProfileLocation string `json:"profile_location"` + ProfileTempLocation string `json:"profile_temp_location"` + ProfileSyncPeriodSec int `json:"profile_sync_period_sec"` ProfilerStopAfterMin int `json:"profiler_stop_after_min"` ProfilerStopAfterCrash bool `json:"profiler_stop_after_crash"` + ProfilePostprocessIntervalSec int `json:"profile_postprocess_interval_sec"` JobId string `json:"jobId,omitempty"` } @@ -256,6 +257,10 @@ func launchSDKProcess() error { } } } + + if opts.Options.ProfilerAgent == "memray" { + go postProcessProfilesLoop(ctx, logger, profileTempLocation, opts.Options.ProfilePostprocessIntervalSec) + } } // (2) Retrieve and install the staged packages. @@ -704,3 +709,72 @@ func syncProfilesToGCS(ctx context.Context, logger *tools.Logger, localDir, gcsD logger.Printf(ctx, "Successfully synced profiles to GCS.") } } + +// postProcessProfilesLoop runs a background loop that periodically triggers profile post-processing if enabled. +func postProcessProfilesLoop(ctx context.Context, logger *tools.Logger, profilesDir string, intervalSec int) { + if intervalSec <= 0 { + return + } + + for { + runPostProcessingSweep(ctx, logger, profilesDir, intervalSec) + + select { + case <-ctx.Done(): + return + case <-time.After(time.Duration(intervalSec) * time.Second): + // Block until the sleep completes before starting the next sweep + } + } +} + +// runPostProcessingSweep scans the profiles directory and launches concurrent postprocessing for newly updated profiles. +func runPostProcessingSweep(ctx context.Context, logger *tools.Logger, profilesDir string, intervalSec int) { + files, err := os.ReadDir(profilesDir) + if err != nil { + return + } + + var wg sync.WaitGroup + for _, file := range files { + name := file.Name() + if !strings.HasSuffix(name, ".bin") || strings.HasPrefix(name, ".") { + continue + } + + binPath := filepath.Join(profilesDir, name) + binInfo, err := os.Stat(binPath) + if err != nil || binInfo.Size() == 0 { + continue + } + + htmlPath := strings.TrimSuffix(binPath, ".bin") + ".html" + htmlInfo, err := os.Stat(htmlPath) + + shouldProcess := false + if os.IsNotExist(err) { + shouldProcess = true + } else if err == nil { + // Don't regenerate when there were no updates to the profile. + if binInfo.ModTime().After(htmlInfo.ModTime().Add(time.Duration(intervalSec) * time.Second)) { + shouldProcess = true + } + } + + if shouldProcess { + wg.Add(1) + go func(bin, html, filename string) { + defer wg.Done() + + cmd := exec.CommandContext(ctx, "python", "-m", "memray", "flamegraph", "-f", "-o", html, bin) + if err := cmd.Run(); err != nil { + logger.Warnf(ctx, "Failed to generate flamegraph for %s: %v", filename, err) + } else { + logger.Printf(ctx, "Successfully generated/updated flamegraph: %s", filepath.Base(html)) + } + }(binPath, htmlPath, name) + } + } + + wg.Wait() +} From ed7201038e6e0e3235b536ab44141d866b1dabb1 Mon Sep 17 00:00:00 2001 From: Valentyn Tymofieiev Date: Mon, 8 Jun 2026 12:49:00 -0700 Subject: [PATCH 08/17] Generate also the --leaks flamegraph. --- sdks/python/container/boot.go | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/sdks/python/container/boot.go b/sdks/python/container/boot.go index 4fc15874e3ab..d4827c080c79 100644 --- a/sdks/python/container/boot.go +++ b/sdks/python/container/boot.go @@ -763,16 +763,27 @@ func runPostProcessingSweep(ctx context.Context, logger *tools.Logger, profilesD if shouldProcess { wg.Add(1) - go func(bin, html, filename string) { + go func(bin string) { defer wg.Done() - cmd := exec.CommandContext(ctx, "python", "-m", "memray", "flamegraph", "-f", "-o", html, bin) - if err := cmd.Run(); err != nil { - logger.Warnf(ctx, "Failed to generate flamegraph for %s: %v", filename, err) - } else { - logger.Printf(ctx, "Successfully generated/updated flamegraph: %s", filepath.Base(html)) + html := strings.TrimSuffix(bin, ".bin") + ".html" + filename := filepath.Base(bin) + + // 1. Peak Flamegraph + cmd1 := exec.CommandContext(ctx, "python", "-m", "memray", "flamegraph", "-f", "-o", html, bin) + if err := cmd1.Run(); err != nil { + logger.Warnf(ctx, "Failed to generate peak flamegraph for %s: %v", filename, err) } - }(binPath, htmlPath, name) + + // 2. Leaks Flamegraph + leaksHtml := strings.TrimSuffix(bin, ".bin") + "_leaks.html" + cmd2 := exec.CommandContext(ctx, "python", "-m", "memray", "flamegraph", "-f", "--leaks", "-o", leaksHtml, bin) + if err := cmd2.Run(); err != nil { + logger.Warnf(ctx, "Failed to generate leaks flamegraph for %s: %v", filename, err) + } + + logger.Printf(ctx, "Successfully updated flamegraphs for %s (Peak & Leaks)", filename) + }(binPath) } } From 19811c26bab0de5f87eb13e46221f8b847114489 Mon Sep 17 00:00:00 2001 From: Valentyn Tymofieiev Date: Mon, 8 Jun 2026 13:14:42 -0700 Subject: [PATCH 09/17] Profiler options: use seconds, update defaults. --- .../apache_beam/options/pipeline_options.py | 16 +++++++------- sdks/python/container/boot.go | 22 +++++++++---------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/sdks/python/apache_beam/options/pipeline_options.py b/sdks/python/apache_beam/options/pipeline_options.py index 2c345b518f85..f4cf37b48fc1 100644 --- a/sdks/python/apache_beam/options/pipeline_options.py +++ b/sdks/python/apache_beam/options/pipeline_options.py @@ -1687,18 +1687,18 @@ def _add_argparse_args(cls, parser): 'Directory path on the worker where local profiles are saved. ' 'Defaults to ${semi_persist_dir}/profiles if not specified.')) parser.add_argument( - '--profile_sync_period_sec', + '--profile_upload_interval_sec', type=int, - default=0, + default=300, help=( - 'Frequency (in seconds) at which the local profiles are synced to GCS. ' - 'Defaults to 0 (sync disabled).')) + 'Frequency (in seconds) at which the local profiles are uploaded to GCS. ' + 'Defaults to 300 (5 min).')) parser.add_argument( - '--profiler_stop_after_min', + '--profiler_stop_after_sec', type=int, default=0, help=( - 'Time limit (in minutes) for profiling a single process. When exceeded, ' + 'Time limit (in seconds) for profiling a single process. When exceeded, ' 'the worker process is restarted without the profiler.')) parser.add_argument( '--profiler_stop_after_crash', @@ -1710,10 +1710,10 @@ def _add_argparse_args(cls, parser): parser.add_argument( '--profile_postprocess_interval_sec', type=int, - default=300, + default=600, help=( 'Frequency (in seconds) at which the local profiles are post-processed ' - 'on-the-fly. Defaults to 300 (5 minutes). Set to 0 to disable.')) + 'on-the-fly. Defaults to 600 (10 minutes). Set to 0 to disable.')) def validate(self, validator): errors = [] diff --git a/sdks/python/container/boot.go b/sdks/python/container/boot.go index d4827c080c79..4cdd2e6406e2 100644 --- a/sdks/python/container/boot.go +++ b/sdks/python/container/boot.go @@ -140,8 +140,8 @@ type OptionsData struct { ProfilerExtraEnvVars []string `json:"profiler_extra_env_vars"` ProfileLocation string `json:"profile_location"` ProfileTempLocation string `json:"profile_temp_location"` - ProfileSyncPeriodSec int `json:"profile_sync_period_sec"` - ProfilerStopAfterMin int `json:"profiler_stop_after_min"` + ProfileUploadIntervalSec int `json:"profile_upload_interval_sec"` + ProfilerStopAfterSec int `json:"profiler_stop_after_sec"` ProfilerStopAfterCrash bool `json:"profiler_stop_after_crash"` ProfilePostprocessIntervalSec int `json:"profile_postprocess_interval_sec"` JobId string `json:"jobId,omitempty"` @@ -226,15 +226,15 @@ func launchSDKProcess() error { logger.Printf(ctx, "ProfilerExtraEnvVars: %v", opts.Options.ProfilerExtraEnvVars) logger.Printf(ctx, "ProfileLocation: %v", opts.Options.ProfileLocation) logger.Printf(ctx, "ProfileTempLocation: %v", profileTempLocation) - logger.Printf(ctx, "ProfileSyncPeriodSec: %v", opts.Options.ProfileSyncPeriodSec) - logger.Printf(ctx, "ProfilerStopAfterMin: %v", opts.Options.ProfilerStopAfterMin) + logger.Printf(ctx, "ProfileUploadIntervalSec: %v", opts.Options.ProfileUploadIntervalSec) + logger.Printf(ctx, "ProfilerStopAfterSec: %v", opts.Options.ProfilerStopAfterSec) if err := os.MkdirAll(profileTempLocation, 0755); err != nil { logger.Warnf(ctx, "Failed to create ProfileTempLocation: %v", err) } if strings.HasPrefix(opts.Options.ProfileLocation, "gs://") { if _, err := exec.LookPath("gcloud"); err != nil { - logger.Errorf(ctx, "gcloud is not available, profiles will not be synced.") + logger.Errorf(ctx, "gcloud is not available, profiles will not be uploaded.") } else { jobId := opts.Options.JobId if jobId == "" { @@ -247,8 +247,8 @@ func launchSDKProcess() error { baseGcsDest := strings.TrimSuffix(opts.Options.ProfileLocation, "/") gcsDestPath := fmt.Sprintf("%s/%s/%s", baseGcsDest, jobId, hostname) - if opts.Options.ProfileSyncPeriodSec > 0 { - ticker := time.NewTicker(time.Duration(opts.Options.ProfileSyncPeriodSec) * time.Second) + if opts.Options.ProfileUploadIntervalSec > 0 { + ticker := time.NewTicker(time.Duration(opts.Options.ProfileUploadIntervalSec) * time.Second) go func() { for range ticker.C { syncProfilesToGCS(ctx, logger, profileTempLocation, gcsDestPath) @@ -462,14 +462,14 @@ func launchSDKProcess() error { var timer *time.Timer var profilingTimedOut atomic.Bool - if enableProfiler && opts.Options.ProfilerStopAfterMin > 0 { - duration := time.Duration(opts.Options.ProfilerStopAfterMin) * time.Minute + if enableProfiler && opts.Options.ProfilerStopAfterSec > 0 { + duration := time.Duration(opts.Options.ProfilerStopAfterSec) * time.Second timer = time.AfterFunc(duration, func() { childPids.mu.Lock() defer childPids.mu.Unlock() if cmd.Process != nil { - logger.Printf(ctx, "Profiling timeout of %d minutes reached. Sending SIGINT to worker %s", - opts.Options.ProfilerStopAfterMin, workerId) + logger.Printf(ctx, "Profiling timeout of %d seconds reached. Sending SIGINT to worker %s", + opts.Options.ProfilerStopAfterSec, workerId) profilingTimedOut.Store(true) syscall.Kill(-cmd.Process.Pid, syscall.SIGINT) } From 569db3b595c6fb0aeff53cb73a914da1c1200274 Mon Sep 17 00:00:00 2001 From: Valentyn Tymofieiev Date: Mon, 8 Jun 2026 13:46:32 -0700 Subject: [PATCH 10/17] Set a default profile location from temp location. --- .../apache_beam/options/pipeline_options.py | 10 ++++++++ .../options/pipeline_options_test.py | 24 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/sdks/python/apache_beam/options/pipeline_options.py b/sdks/python/apache_beam/options/pipeline_options.py index f4cf37b48fc1..4e10c1205fc3 100644 --- a/sdks/python/apache_beam/options/pipeline_options.py +++ b/sdks/python/apache_beam/options/pipeline_options.py @@ -1722,6 +1722,16 @@ def validate(self, validator): errors.append( '--profiler_agent is mutually exclusive with --profile_cpu ' 'and --profile_memory.') + + if self.profile_location and self.profile_location.lower() == 'none': + self.profile_location = None + elif not self.profile_location: + temp_location = self.view_as(GoogleCloudOptions).temp_location + if temp_location: + self.profile_location = temp_location.rstrip('/') + '/profiles' + _LOGGER.info( + 'Setting --profile_location to %s since profiling is enabled.', + self.profile_location) return errors diff --git a/sdks/python/apache_beam/options/pipeline_options_test.py b/sdks/python/apache_beam/options/pipeline_options_test.py index 33dfc87b29d3..2e17fbb254ca 100644 --- a/sdks/python/apache_beam/options/pipeline_options_test.py +++ b/sdks/python/apache_beam/options/pipeline_options_test.py @@ -684,6 +684,30 @@ def test_profiling_agent_is_exclusive_with_legacy_profiling_options(self): errors = validator.validate() self.assertTrue(any('--profiler_agent is mutually exclusive' in err for err in errors)) + def test_profile_location_defaulting_and_opt_out(self): + options = PipelineOptions(['--profiler_agent=memray', '--temp_location=gs://bucket/temp']) + validator = PipelineOptionsValidator(options, None) + self.assertEqual(validator.validate(), []) + self.assertEqual(options.view_as(ProfilingOptions).profile_location, 'gs://bucket/temp/profiles') + + options = PipelineOptions([ + '--profiler_agent=memray', + '--temp_location=gs://bucket/temp', + '--profile_location=gs://other-bucket/custom_profiles' + ]) + validator = PipelineOptionsValidator(options, None) + self.assertEqual(validator.validate(), []) + self.assertEqual(options.view_as(ProfilingOptions).profile_location, 'gs://other-bucket/custom_profiles') + + options = PipelineOptions([ + '--profiler_agent=memray', + '--temp_location=gs://bucket/temp', + '--profile_location=nOnE' + ]) + validator = PipelineOptionsValidator(options, None) + self.assertEqual(validator.validate(), []) + self.assertIsNone(options.view_as(ProfilingOptions).profile_location) + def test_add_experiment(self): options = PipelineOptions([]) options.view_as(DebugOptions).add_experiment('new_experiment') From d1f3b01046918b972abed05d0d211ca41d063697 Mon Sep 17 00:00:00 2001 From: Valentyn Tymofieiev Date: Mon, 8 Jun 2026 17:31:45 -0700 Subject: [PATCH 11/17] Refactor: move profiling pieces into a separate file. --- sdks/python/container/boot.go | 211 ++------------------ sdks/python/container/profiler.go | 314 ++++++++++++++++++++++++++++++ 2 files changed, 327 insertions(+), 198 deletions(-) create mode 100644 sdks/python/container/profiler.go diff --git a/sdks/python/container/boot.go b/sdks/python/container/boot.go index 4cdd2e6406e2..6184e854e944 100644 --- a/sdks/python/container/boot.go +++ b/sdks/python/container/boot.go @@ -214,54 +214,8 @@ func launchSDKProcess() error { logger.Warnf(ctx, "Failed to unmarshal pipeline options for profiling config: %v", err) } - profileTempLocation := opts.Options.ProfileTempLocation - if profileTempLocation == "" { - profileTempLocation = filepath.Join(*semiPersistDir, "profiles") - } - - if opts.Options.ProfilerAgent != "" { - logger.Printf(ctx, "Worker will be configured with profiler agent enabled.") - logger.Printf(ctx, "ProfilerAgent: %v", opts.Options.ProfilerAgent) - logger.Printf(ctx, "ProfilerExtraArgs: %v", opts.Options.ProfilerExtraArgs) - logger.Printf(ctx, "ProfilerExtraEnvVars: %v", opts.Options.ProfilerExtraEnvVars) - logger.Printf(ctx, "ProfileLocation: %v", opts.Options.ProfileLocation) - logger.Printf(ctx, "ProfileTempLocation: %v", profileTempLocation) - logger.Printf(ctx, "ProfileUploadIntervalSec: %v", opts.Options.ProfileUploadIntervalSec) - logger.Printf(ctx, "ProfilerStopAfterSec: %v", opts.Options.ProfilerStopAfterSec) - if err := os.MkdirAll(profileTempLocation, 0755); err != nil { - logger.Warnf(ctx, "Failed to create ProfileTempLocation: %v", err) - } - - if strings.HasPrefix(opts.Options.ProfileLocation, "gs://") { - if _, err := exec.LookPath("gcloud"); err != nil { - logger.Errorf(ctx, "gcloud is not available, profiles will not be uploaded.") - } else { - jobId := opts.Options.JobId - if jobId == "" { - jobId = "BEAM_JOB" - } - hostname, _ := os.Hostname() - if hostname == "" { - hostname = "default-worker" - } - baseGcsDest := strings.TrimSuffix(opts.Options.ProfileLocation, "/") - gcsDestPath := fmt.Sprintf("%s/%s/%s", baseGcsDest, jobId, hostname) - - if opts.Options.ProfileUploadIntervalSec > 0 { - ticker := time.NewTicker(time.Duration(opts.Options.ProfileUploadIntervalSec) * time.Second) - go func() { - for range ticker.C { - syncProfilesToGCS(ctx, logger, profileTempLocation, gcsDestPath) - } - }() - } - } - } - - if opts.Options.ProfilerAgent == "memray" { - go postProcessProfilesLoop(ctx, logger, profileTempLocation, opts.Options.ProfilePostprocessIntervalSec) - } - } + ctx = setupProfilerConfig(ctx, logger, &opts) + startProfilerBackgroundTasks(ctx, logger) // (2) Retrieve and install the staged packages. // @@ -387,16 +341,6 @@ func launchSDKProcess() error { bufLogger := tools.NewBufferedLogger(logger) errorCount := 0 - jobId := opts.Options.JobId - if jobId == "" { - jobId = "BEAM_JOB" - } - hostname, _ := os.Hostname() - if hostname == "" { - hostname = "default-worker" - } - stopProfilingSentinel := filepath.Join(profileTempLocation, fmt.Sprintf(".profiler_disengaged_%s_%s", jobId, hostname)) - for { childPids.mu.Lock() if childPids.canceled { @@ -408,41 +352,10 @@ func launchSDKProcess() error { currentArgs := []string{"-m", sdkHarnessEntrypoint} currentEnv := map[string]string{"WORKER_ID": workerId} - enableProfiler := opts.Options.ProfilerAgent != "" - if _, err := os.Stat(stopProfilingSentinel); err == nil { - enableProfiler = false - } - - if enableProfiler { - if opts.Options.ProfilerAgent == "memray" { - timeSuffix := time.Now().Format("20060102150405") - memrayFile := filepath.Join(profileTempLocation, fmt.Sprintf("memray-%s-%s.bin", workerId, timeSuffix)) - currentArgs = []string{"-m", "memray", "run"} - currentArgs = append(currentArgs, opts.Options.ProfilerExtraArgs...) - currentArgs = append(currentArgs, "-o", memrayFile, "-m", sdkHarnessEntrypoint) - } else if opts.Options.ProfilerAgent == "tcmalloc" { - tcmallocHeapPath := filepath.Join(profileTempLocation, fmt.Sprintf("tcmalloc-%s", workerId)) - existingPreload := os.Getenv("LD_PRELOAD") - if existingPreload != "" { - currentEnv["LD_PRELOAD"] = existingPreload + ":/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4" - } else { - currentEnv["LD_PRELOAD"] = "/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4" - } - currentEnv["HEAPPROFILE"] = tcmallocHeapPath - } else { - currentProg = opts.Options.ProfilerAgent - currentArgs = append(append([]string{}, opts.Options.ProfilerExtraArgs...), "python", "-m", sdkHarnessEntrypoint) - } - - for _, envVar := range opts.Options.ProfilerExtraEnvVars { - parts := strings.SplitN(envVar, "=", 2) - if len(parts) == 2 { - currentEnv[parts[0]] = parts[1] - } else { - logger.Errorf(ctx, "Failed to parse profiler extra environment variable: %v. Expected format KEY=VALUE", envVar) - } - } - } + profilingActive := false + currentProg, currentArgs, currentEnv, profilingActive = maybeWithProfiler( + ctx, logger, workerId, currentProg, currentArgs, currentEnv, + ) var envStr string if len(currentEnv) > 0 { @@ -462,14 +375,15 @@ func launchSDKProcess() error { var timer *time.Timer var profilingTimedOut atomic.Bool - if enableProfiler && opts.Options.ProfilerStopAfterSec > 0 { - duration := time.Duration(opts.Options.ProfilerStopAfterSec) * time.Second + pcfg := getProfilerConfig(ctx) + if profilingActive && pcfg != nil && pcfg.StopAfterSec > 0 { + duration := time.Duration(pcfg.StopAfterSec) * time.Second timer = time.AfterFunc(duration, func() { childPids.mu.Lock() defer childPids.mu.Unlock() if cmd.Process != nil { logger.Printf(ctx, "Profiling timeout of %d seconds reached. Sending SIGINT to worker %s", - opts.Options.ProfilerStopAfterSec, workerId) + pcfg.StopAfterSec, workerId) profilingTimedOut.Store(true) syscall.Kill(-cmd.Process.Pid, syscall.SIGINT) } @@ -483,19 +397,15 @@ func launchSDKProcess() error { if err != nil { if profilingTimedOut.Load() { - if f, err := os.Create(stopProfilingSentinel); err == nil { - f.Close() - } + stopProfiling(ctx) bufLogger.FlushAtDebug(ctx) logger.Printf(ctx, "Python worker %v terminated after profiling timeout. Restarting without profiler.", workerId) // Error is not counted toward error budget. continue } - if enableProfiler && opts.Options.ProfilerStopAfterCrash { - if f, err := os.Create(stopProfilingSentinel); err == nil { - f.Close() - } + if profilingActive && pcfg != nil && pcfg.StopAfterCrash { + stopProfiling(ctx) logger.Printf(ctx, "Python worker %v crashed. Disabling profiler on subsequent restarts because --profiler_stop_after_crash is enabled.", workerId) } @@ -693,99 +603,4 @@ func logSubmissionEnvDependencies(ctx context.Context, bufLogger *tools.Buffered return nil } -// syncProfilesToGCS uploads newly created local memory profiles to the designated GCS target path using gcloud storage. -func syncProfilesToGCS(ctx context.Context, logger *tools.Logger, localDir, gcsDest string) { - entries, err := os.ReadDir(localDir) - if err != nil || len(entries) == 0 { - return - } - - logger.Printf(ctx, "Syncing profiles from %s to %s", localDir, gcsDest) - - cmd := exec.Command("gcloud", "storage", "rsync", localDir, gcsDest) - if err := cmd.Run(); err != nil { - logger.Warnf(ctx, "Failed to sync profiles to GCS: %v", err) - } else { - logger.Printf(ctx, "Successfully synced profiles to GCS.") - } -} -// postProcessProfilesLoop runs a background loop that periodically triggers profile post-processing if enabled. -func postProcessProfilesLoop(ctx context.Context, logger *tools.Logger, profilesDir string, intervalSec int) { - if intervalSec <= 0 { - return - } - - for { - runPostProcessingSweep(ctx, logger, profilesDir, intervalSec) - - select { - case <-ctx.Done(): - return - case <-time.After(time.Duration(intervalSec) * time.Second): - // Block until the sleep completes before starting the next sweep - } - } -} - -// runPostProcessingSweep scans the profiles directory and launches concurrent postprocessing for newly updated profiles. -func runPostProcessingSweep(ctx context.Context, logger *tools.Logger, profilesDir string, intervalSec int) { - files, err := os.ReadDir(profilesDir) - if err != nil { - return - } - - var wg sync.WaitGroup - for _, file := range files { - name := file.Name() - if !strings.HasSuffix(name, ".bin") || strings.HasPrefix(name, ".") { - continue - } - - binPath := filepath.Join(profilesDir, name) - binInfo, err := os.Stat(binPath) - if err != nil || binInfo.Size() == 0 { - continue - } - - htmlPath := strings.TrimSuffix(binPath, ".bin") + ".html" - htmlInfo, err := os.Stat(htmlPath) - - shouldProcess := false - if os.IsNotExist(err) { - shouldProcess = true - } else if err == nil { - // Don't regenerate when there were no updates to the profile. - if binInfo.ModTime().After(htmlInfo.ModTime().Add(time.Duration(intervalSec) * time.Second)) { - shouldProcess = true - } - } - - if shouldProcess { - wg.Add(1) - go func(bin string) { - defer wg.Done() - - html := strings.TrimSuffix(bin, ".bin") + ".html" - filename := filepath.Base(bin) - - // 1. Peak Flamegraph - cmd1 := exec.CommandContext(ctx, "python", "-m", "memray", "flamegraph", "-f", "-o", html, bin) - if err := cmd1.Run(); err != nil { - logger.Warnf(ctx, "Failed to generate peak flamegraph for %s: %v", filename, err) - } - - // 2. Leaks Flamegraph - leaksHtml := strings.TrimSuffix(bin, ".bin") + "_leaks.html" - cmd2 := exec.CommandContext(ctx, "python", "-m", "memray", "flamegraph", "-f", "--leaks", "-o", leaksHtml, bin) - if err := cmd2.Run(); err != nil { - logger.Warnf(ctx, "Failed to generate leaks flamegraph for %s: %v", filename, err) - } - - logger.Printf(ctx, "Successfully updated flamegraphs for %s (Peak & Leaks)", filename) - }(binPath) - } - } - - wg.Wait() -} diff --git a/sdks/python/container/profiler.go b/sdks/python/container/profiler.go new file mode 100644 index 000000000000..f588255065f4 --- /dev/null +++ b/sdks/python/container/profiler.go @@ -0,0 +1,314 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "sync" + "time" + + "github.com/apache/beam/sdks/v2/go/container/tools" +) + +type profilerConfigKeyType int + +const profilerConfigKey profilerConfigKeyType = iota + +// ProfilerConfig holds all pre-computed profiling parameters. +type ProfilerConfig struct { + Enabled bool + Agent string + ExtraArgs []string + ExtraEnvVars []string + Location string + TempLocation string + StopSentinelPath string + GcsDestPath string + UploadIntervalSec int + StopAfterSec int + StopAfterCrash bool + PostprocessIntervalSec int +} + +// setupProfilerConfig parses PipelineOptionsData and stores a resolved ProfilerConfig in the context. +func setupProfilerConfig(ctx context.Context, logger *tools.Logger, opts *PipelineOptionsData) context.Context { + agent := opts.Options.ProfilerAgent + if agent == "" { + return ctx + } + + tempLocation := opts.Options.ProfileTempLocation + if tempLocation == "" { + tempLocation = filepath.Join(*semiPersistDir, "profiles") + } + + jobId := opts.Options.JobId + if jobId == "" { + jobId = "BEAM_JOB" + } + hostname, _ := os.Hostname() + if hostname == "" { + hostname = "default-worker" + } + sentinelPath := filepath.Join(tempLocation, fmt.Sprintf(".profiler_disengaged_%s_%s", jobId, hostname)) + + var gcsDestPath string + if strings.HasPrefix(opts.Options.ProfileLocation, "gs://") { + baseGcsDest := strings.TrimSuffix(opts.Options.ProfileLocation, "/") + gcsDestPath = fmt.Sprintf("%s/%s/%s", baseGcsDest, jobId, hostname) + } + + config := &ProfilerConfig{ + Enabled: true, + Agent: agent, + ExtraArgs: opts.Options.ProfilerExtraArgs, + ExtraEnvVars: opts.Options.ProfilerExtraEnvVars, + Location: opts.Options.ProfileLocation, + TempLocation: tempLocation, + StopSentinelPath: sentinelPath, + GcsDestPath: gcsDestPath, + UploadIntervalSec: opts.Options.ProfileUploadIntervalSec, + StopAfterSec: opts.Options.ProfilerStopAfterSec, + StopAfterCrash: opts.Options.ProfilerStopAfterCrash, + PostprocessIntervalSec: opts.Options.ProfilePostprocessIntervalSec, + } + + return context.WithValue(ctx, profilerConfigKey, config) +} + +// getProfilerConfig extracts the ProfilerConfig from the context. +func getProfilerConfig(ctx context.Context) *ProfilerConfig { + if cfg, ok := ctx.Value(profilerConfigKey).(*ProfilerConfig); ok { + return cfg + } + return nil +} + +// startProfilerBackgroundTasks initializes profiling locations and runs background tasks (GCS sync, post-processing loops) if profiling is enabled. +func startProfilerBackgroundTasks(ctx context.Context, logger *tools.Logger) { + pcfg := getProfilerConfig(ctx) + if pcfg == nil { + return + } + + logger.Printf(ctx, "Worker will be configured with profiler agent enabled.") + logger.Printf(ctx, "ProfilerAgent: %v", pcfg.Agent) + logger.Printf(ctx, "ProfilerExtraArgs: %v", pcfg.ExtraArgs) + logger.Printf(ctx, "ProfilerExtraEnvVars: %v", pcfg.ExtraEnvVars) + logger.Printf(ctx, "ProfileLocation: %v", pcfg.Location) + logger.Printf(ctx, "ProfileTempLocation: %v", pcfg.TempLocation) + logger.Printf(ctx, "ProfileUploadIntervalSec: %v", pcfg.UploadIntervalSec) + logger.Printf(ctx, "ProfilerStopAfterSec: %v", pcfg.StopAfterSec) + logger.Printf(ctx, "ProfilerStopAfterCrash: %v", pcfg.StopAfterCrash) + logger.Printf(ctx, "ProfilePostprocessIntervalSec: %v", pcfg.PostprocessIntervalSec) + if err := os.MkdirAll(pcfg.TempLocation, 0755); err != nil { + logger.Warnf(ctx, "Failed to create ProfileTempLocation: %v", err) + } + + if pcfg.GcsDestPath != "" { + if _, err := exec.LookPath("gcloud"); err != nil { + logger.Errorf(ctx, "gcloud is not available, profiles will not be uploaded.") + } else { + if pcfg.UploadIntervalSec > 0 { + ticker := time.NewTicker(time.Duration(pcfg.UploadIntervalSec) * time.Second) + go func() { + for range ticker.C { + syncProfilesToGCS(ctx, logger, pcfg.TempLocation, pcfg.GcsDestPath) + } + }() + } + } + } + + if pcfg.Agent == "memray" { + go postProcessProfilesLoop(ctx, logger, pcfg.TempLocation, pcfg.PostprocessIntervalSec) + } +} + +// maybeWithProfiler builds the execution arguments and environment variables if profiling is enabled and active. +func maybeWithProfiler( + ctx context.Context, + logger *tools.Logger, + workerId string, + currentProg string, + currentArgs []string, + currentEnv map[string]string, +) (string, []string, map[string]string, bool) { + pcfg := getProfilerConfig(ctx) + if pcfg == nil { + return currentProg, currentArgs, currentEnv, false + } + + if _, err := os.Stat(pcfg.StopSentinelPath); err == nil { + return currentProg, currentArgs, currentEnv, false + } + + prog := currentProg + var args []string + // Copy env + env := make(map[string]string) + for k, v := range currentEnv { + env[k] = v + } + + if pcfg.Agent == "memray" { + timeSuffix := time.Now().Format("20060102150405") + memrayFile := filepath.Join(pcfg.TempLocation, fmt.Sprintf("memray-%s-%s.bin", workerId, timeSuffix)) + args = []string{"-m", "memray", "run"} + args = append(args, pcfg.ExtraArgs...) + args = append(args, "-o", memrayFile, "-m", sdkHarnessEntrypoint) + } else if pcfg.Agent == "tcmalloc" { + tcmallocHeapPath := filepath.Join(pcfg.TempLocation, fmt.Sprintf("tcmalloc-%s", workerId)) + existingPreload := os.Getenv("LD_PRELOAD") + if existingPreload != "" { + env["LD_PRELOAD"] = existingPreload + ":/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4" + } else { + env["LD_PRELOAD"] = "/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4" + } + env["HEAPPROFILE"] = tcmallocHeapPath + args = currentArgs + } else { + prog = pcfg.Agent + args = append(append([]string{}, pcfg.ExtraArgs...), currentProg) + args = append(args, currentArgs...) + } + + for _, envVar := range pcfg.ExtraEnvVars { + parts := strings.SplitN(envVar, "=", 2) + if len(parts) == 2 { + env[parts[0]] = parts[1] + } else { + logger.Errorf(ctx, "Failed to parse profiler extra environment variable: %v. Expected format KEY=VALUE", envVar) + } + } + + return prog, args, env, true +} + +// stopProfiling creates a dummy file at StopSentinelPath to signal that profiling should stop. +func stopProfiling(ctx context.Context) error { + pcfg := getProfilerConfig(ctx) + if pcfg == nil { + return nil + } + f, err := os.Create(pcfg.StopSentinelPath) + if err == nil { + f.Close() + } + return err +} + +// syncProfilesToGCS uploads newly created local memory profiles to the designated GCS target path using gcloud storage. +func syncProfilesToGCS(ctx context.Context, logger *tools.Logger, localDir, gcsDest string) { + entries, err := os.ReadDir(localDir) + if err != nil || len(entries) == 0 { + return + } + + logger.Printf(ctx, "Syncing profiles from %s to %s", localDir, gcsDest) + + cmd := exec.Command("gcloud", "storage", "rsync", localDir, gcsDest) + if err := cmd.Run(); err != nil { + logger.Warnf(ctx, "Failed to sync profiles to GCS: %v", err) + } else { + logger.Printf(ctx, "Successfully synced profiles to GCS.") + } +} + +// postProcessProfilesLoop runs a background loop that periodically triggers profile post-processing if enabled. +func postProcessProfilesLoop(ctx context.Context, logger *tools.Logger, profilesDir string, intervalSec int) { + if intervalSec <= 0 { + return + } + + for { + runPostProcessingSweep(ctx, logger, profilesDir, intervalSec) + + select { + case <-ctx.Done(): + return + case <-time.After(time.Duration(intervalSec) * time.Second): + // Block until the sleep completes before starting the next sweep + } + } +} + +// runPostProcessingSweep scans the profiles directory and launches concurrent postprocessing for newly updated profiles. +func runPostProcessingSweep(ctx context.Context, logger *tools.Logger, profilesDir string, intervalSec int) { + files, err := os.ReadDir(profilesDir) + if err != nil { + return + } + + var wg sync.WaitGroup + for _, file := range files { + name := file.Name() + if !strings.HasSuffix(name, ".bin") || strings.HasPrefix(name, ".") { + continue + } + + binPath := filepath.Join(profilesDir, name) + binInfo, err := os.Stat(binPath) + if err != nil || binInfo.Size() == 0 { + continue + } + + htmlPath := strings.TrimSuffix(binPath, ".bin") + ".html" + htmlInfo, err := os.Stat(htmlPath) + + shouldProcess := false + if os.IsNotExist(err) { + shouldProcess = true + } else if err == nil { + // Don't regenerate when there were no updates to the profile. + if binInfo.ModTime().After(htmlInfo.ModTime().Add(time.Duration(intervalSec) * time.Second)) { + shouldProcess = true + } + } + + if shouldProcess { + wg.Add(1) + go func(bin string) { + defer wg.Done() + + html := strings.TrimSuffix(bin, ".bin") + ".html" + filename := filepath.Base(bin) + + // 1. Peak Flamegraph + cmd1 := exec.CommandContext(ctx, "python", "-m", "memray", "flamegraph", "-f", "-o", html, bin) + if err := cmd1.Run(); err != nil { + logger.Warnf(ctx, "Failed to generate peak flamegraph for %s: %v", filename, err) + } + + // 2. Leaks Flamegraph + leaksHtml := strings.TrimSuffix(bin, ".bin") + "_leaks.html" + cmd2 := exec.CommandContext(ctx, "python", "-m", "memray", "flamegraph", "-f", "--leaks", "-o", leaksHtml, bin) + if err := cmd2.Run(); err != nil { + logger.Warnf(ctx, "Failed to generate leaks flamegraph for %s: %v", filename, err) + } + + logger.Printf(ctx, "Successfully updated flamegraphs for %s (Peak & Leaks)", filename) + }(binPath) + } + } + + wg.Wait() +} From b8168d678f6ecbb85fc61eed04ddb6a46a40195a Mon Sep 17 00:00:00 2001 From: Valentyn Tymofieiev Date: Tue, 9 Jun 2026 16:50:13 -0700 Subject: [PATCH 12/17] Post-process profiles sequentially. --- sdks/python/container/profiler.go | 45 +++++++++++++------------------ 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/sdks/python/container/profiler.go b/sdks/python/container/profiler.go index f588255065f4..6f2b5548264c 100644 --- a/sdks/python/container/profiler.go +++ b/sdks/python/container/profiler.go @@ -22,7 +22,6 @@ import ( "os/exec" "path/filepath" "strings" - "sync" "time" "github.com/apache/beam/sdks/v2/go/container/tools" @@ -251,14 +250,13 @@ func postProcessProfilesLoop(ctx context.Context, logger *tools.Logger, profiles } } -// runPostProcessingSweep scans the profiles directory and launches concurrent postprocessing for newly updated profiles. +// runPostProcessingSweep scans the profiles directory and launches sequential postprocessing for newly updated profiles. func runPostProcessingSweep(ctx context.Context, logger *tools.Logger, profilesDir string, intervalSec int) { files, err := os.ReadDir(profilesDir) if err != nil { return } - var wg sync.WaitGroup for _, file := range files { name := file.Name() if !strings.HasSuffix(name, ".bin") || strings.HasPrefix(name, ".") { @@ -285,30 +283,23 @@ func runPostProcessingSweep(ctx context.Context, logger *tools.Logger, profilesD } if shouldProcess { - wg.Add(1) - go func(bin string) { - defer wg.Done() - - html := strings.TrimSuffix(bin, ".bin") + ".html" - filename := filepath.Base(bin) - - // 1. Peak Flamegraph - cmd1 := exec.CommandContext(ctx, "python", "-m", "memray", "flamegraph", "-f", "-o", html, bin) - if err := cmd1.Run(); err != nil { - logger.Warnf(ctx, "Failed to generate peak flamegraph for %s: %v", filename, err) - } - - // 2. Leaks Flamegraph - leaksHtml := strings.TrimSuffix(bin, ".bin") + "_leaks.html" - cmd2 := exec.CommandContext(ctx, "python", "-m", "memray", "flamegraph", "-f", "--leaks", "-o", leaksHtml, bin) - if err := cmd2.Run(); err != nil { - logger.Warnf(ctx, "Failed to generate leaks flamegraph for %s: %v", filename, err) - } - - logger.Printf(ctx, "Successfully updated flamegraphs for %s (Peak & Leaks)", filename) - }(binPath) + html := strings.TrimSuffix(binPath, ".bin") + ".html" + filename := filepath.Base(binPath) + + // 1. Peak Flamegraph + cmd1 := exec.CommandContext(ctx, "python", "-m", "memray", "flamegraph", "-f", "-o", html, binPath) + if err := cmd1.Run(); err != nil { + logger.Warnf(ctx, "Failed to generate peak flamegraph for %s: %v", filename, err) + } + + // 2. Leaks Flamegraph + leaksHtml := strings.TrimSuffix(binPath, ".bin") + "_leaks.html" + cmd2 := exec.CommandContext(ctx, "python", "-m", "memray", "flamegraph", "-f", "--leaks", "-o", leaksHtml, binPath) + if err := cmd2.Run(); err != nil { + logger.Warnf(ctx, "Failed to generate leaks flamegraph for %s: %v", filename, err) + } + + logger.Printf(ctx, "Successfully updated flamegraphs for %s (Peak & Leaks)", filename) } } - - wg.Wait() } From 7c33d74df1deed6bd3b4e63b0d29394c214a5a57 Mon Sep 17 00:00:00 2001 From: Valentyn Tymofieiev Date: Tue, 9 Jun 2026 17:46:30 -0700 Subject: [PATCH 13/17] Simplify tcmalloc preloading to be compatible with ARM versions. --- sdks/python/container/profiler.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdks/python/container/profiler.go b/sdks/python/container/profiler.go index 6f2b5548264c..302ee6cd4339 100644 --- a/sdks/python/container/profiler.go +++ b/sdks/python/container/profiler.go @@ -178,9 +178,9 @@ func maybeWithProfiler( tcmallocHeapPath := filepath.Join(pcfg.TempLocation, fmt.Sprintf("tcmalloc-%s", workerId)) existingPreload := os.Getenv("LD_PRELOAD") if existingPreload != "" { - env["LD_PRELOAD"] = existingPreload + ":/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4" + env["LD_PRELOAD"] = existingPreload + ":libtcmalloc.so.4" } else { - env["LD_PRELOAD"] = "/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4" + env["LD_PRELOAD"] = "libtcmalloc.so.4" } env["HEAPPROFILE"] = tcmallocHeapPath args = currentArgs From a662b080916bae73a841961efef8b2b1f4e12de3 Mon Sep 17 00:00:00 2001 From: Valentyn Tymofieiev Date: Tue, 9 Jun 2026 18:02:41 -0700 Subject: [PATCH 14/17] Use consistent mechanisms for periodic invocations of background tasks. --- sdks/python/container/profiler.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/sdks/python/container/profiler.go b/sdks/python/container/profiler.go index 302ee6cd4339..806c7264ad39 100644 --- a/sdks/python/container/profiler.go +++ b/sdks/python/container/profiler.go @@ -127,10 +127,15 @@ func startProfilerBackgroundTasks(ctx context.Context, logger *tools.Logger) { logger.Errorf(ctx, "gcloud is not available, profiles will not be uploaded.") } else { if pcfg.UploadIntervalSec > 0 { - ticker := time.NewTicker(time.Duration(pcfg.UploadIntervalSec) * time.Second) go func() { - for range ticker.C { - syncProfilesToGCS(ctx, logger, pcfg.TempLocation, pcfg.GcsDestPath) + for { + select { + case <-ctx.Done(): + return + case <-time.After(time.Duration(pcfg.UploadIntervalSec) * time.Second): + // TODO(tvalentyn): Consider a periodic cleanup as well to save local disk space. + syncProfilesToGCS(ctx, logger, pcfg.TempLocation, pcfg.GcsDestPath) + } } }() } @@ -224,7 +229,7 @@ func syncProfilesToGCS(ctx context.Context, logger *tools.Logger, localDir, gcsD logger.Printf(ctx, "Syncing profiles from %s to %s", localDir, gcsDest) - cmd := exec.Command("gcloud", "storage", "rsync", localDir, gcsDest) + cmd := exec.CommandContext(ctx, "gcloud", "storage", "rsync", localDir, gcsDest) if err := cmd.Run(); err != nil { logger.Warnf(ctx, "Failed to sync profiles to GCS: %v", err) } else { From b29ee47ddbc84f3d91b984b2c731ad03cfe122fe Mon Sep 17 00:00:00 2001 From: Valentyn Tymofieiev Date: Tue, 9 Jun 2026 18:13:50 -0700 Subject: [PATCH 15/17] Formatting --- .../apache_beam/options/pipeline_options.py | 8 +++++--- .../options/pipeline_options_test.py | 17 ++++++++++++----- .../options/pipeline_options_validator.py | 1 - 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/sdks/python/apache_beam/options/pipeline_options.py b/sdks/python/apache_beam/options/pipeline_options.py index 4e10c1205fc3..ec7a0a34baa9 100644 --- a/sdks/python/apache_beam/options/pipeline_options.py +++ b/sdks/python/apache_beam/options/pipeline_options.py @@ -1663,14 +1663,17 @@ def _add_argparse_args(cls, parser): default=None, help=( 'Specifies the profiling agent to launch the SDK worker harness ' - 'with (e.g., "memray", "tcmalloc", or a custom wrapper script/binary).')) + 'with (e.g., "memray", "tcmalloc", or a custom wrapper script/binary).' + )) parser.add_argument( '--profiler_extra_arg', '--profiler_extra_args', dest='profiler_extra_args', action=_CommaSeparatedListAction, default=None, - help='Comma-separated list of extra arguments to pass to the profiler agent.') + help= + 'Comma-separated list of extra arguments to pass to the profiler agent.' + ) parser.add_argument( '--profiler_extra_env_var', '--profiler_extra_env_vars', @@ -1735,7 +1738,6 @@ def validate(self, validator): return errors - class SetupOptions(PipelineOptions): @classmethod def _add_argparse_args(cls, parser): diff --git a/sdks/python/apache_beam/options/pipeline_options_test.py b/sdks/python/apache_beam/options/pipeline_options_test.py index 2e17fbb254ca..d9337fd5a33b 100644 --- a/sdks/python/apache_beam/options/pipeline_options_test.py +++ b/sdks/python/apache_beam/options/pipeline_options_test.py @@ -677,18 +677,23 @@ def test_profiling_agent_is_exclusive_with_legacy_profiling_options(self): options = PipelineOptions(['--profiler_agent=memray', '--profile_cpu']) validator = PipelineOptionsValidator(options, None) errors = validator.validate() - self.assertTrue(any('--profiler_agent is mutually exclusive' in err for err in errors)) + self.assertTrue( + any('--profiler_agent is mutually exclusive' in err for err in errors)) options = PipelineOptions(['--profiler_agent=memray', '--profile_memory']) validator = PipelineOptionsValidator(options, None) errors = validator.validate() - self.assertTrue(any('--profiler_agent is mutually exclusive' in err for err in errors)) + self.assertTrue( + any('--profiler_agent is mutually exclusive' in err for err in errors)) def test_profile_location_defaulting_and_opt_out(self): - options = PipelineOptions(['--profiler_agent=memray', '--temp_location=gs://bucket/temp']) + options = PipelineOptions( + ['--profiler_agent=memray', '--temp_location=gs://bucket/temp']) validator = PipelineOptionsValidator(options, None) self.assertEqual(validator.validate(), []) - self.assertEqual(options.view_as(ProfilingOptions).profile_location, 'gs://bucket/temp/profiles') + self.assertEqual( + options.view_as(ProfilingOptions).profile_location, + 'gs://bucket/temp/profiles') options = PipelineOptions([ '--profiler_agent=memray', @@ -697,7 +702,9 @@ def test_profile_location_defaulting_and_opt_out(self): ]) validator = PipelineOptionsValidator(options, None) self.assertEqual(validator.validate(), []) - self.assertEqual(options.view_as(ProfilingOptions).profile_location, 'gs://other-bucket/custom_profiles') + self.assertEqual( + options.view_as(ProfilingOptions).profile_location, + 'gs://other-bucket/custom_profiles') options = PipelineOptions([ '--profiler_agent=memray', diff --git a/sdks/python/apache_beam/options/pipeline_options_validator.py b/sdks/python/apache_beam/options/pipeline_options_validator.py index 426461a48fcd..29253329908e 100644 --- a/sdks/python/apache_beam/options/pipeline_options_validator.py +++ b/sdks/python/apache_beam/options/pipeline_options_validator.py @@ -64,7 +64,6 @@ class PipelineOptionsValidator(object): WorkerOptions ] - # Mutually exclusive options for different types of portable environments. REQUIRED_ENVIRONMENT_OPTIONS = { 'DOCKER': [], From e5aa7ee0d2ffd01d7a4ed774f177a648fdfc4677 Mon Sep 17 00:00:00 2001 From: Valentyn Tymofieiev Date: Tue, 9 Jun 2026 18:59:28 -0700 Subject: [PATCH 16/17] Regenerate dependencies. --- .../ml/py310/base_image_requirements.txt | 44 ++++++++------ .../ml/py310/gpu_image_requirements.txt | 60 ++++++++++--------- .../ml/py311/base_image_requirements.txt | 44 ++++++++------ .../ml/py311/gpu_image_requirements.txt | 60 ++++++++++--------- .../ml/py312/base_image_requirements.txt | 44 ++++++++------ .../ml/py312/gpu_image_requirements.txt | 60 ++++++++++--------- .../ml/py313/base_image_requirements.txt | 44 ++++++++------ .../py310/base_image_requirements.txt | 41 ++++++++----- .../py311/base_image_requirements.txt | 41 ++++++++----- .../py312/base_image_requirements.txt | 41 ++++++++----- .../py313/base_image_requirements.txt | 41 ++++++++----- .../py314/base_image_requirements.txt | 41 ++++++++----- 12 files changed, 324 insertions(+), 237 deletions(-) diff --git a/sdks/python/container/ml/py310/base_image_requirements.txt b/sdks/python/container/ml/py310/base_image_requirements.txt index 33b5587dc86b..74419bff9cd9 100644 --- a/sdks/python/container/ml/py310/base_image_requirements.txt +++ b/sdks/python/container/ml/py310/base_image_requirements.txt @@ -24,7 +24,7 @@ absl-py==2.4.0 aiofiles==25.1.0 aiohappyeyeballs==2.6.2 -aiohttp==3.14.0 +aiohttp==3.14.1 aiosignal==1.4.0 annotated-types==0.7.0 anyio==4.13.0 @@ -34,7 +34,7 @@ async-timeout==5.0.1 attrs==26.1.0 backports.tarfile==1.2.0 beartype==0.22.9 -beautifulsoup4==4.14.3 +beautifulsoup4==4.15.0 betterproto==2.0.0b7 bs4==0.0.2 build==1.5.0 @@ -57,43 +57,43 @@ exceptiongroup==1.3.1 execnet==2.1.2 fastavro==1.12.2 fasteners==0.20 -filelock==3.29.0 +filelock==3.29.1 flatbuffers==25.12.19 freezegun==1.5.5 frozenlist==1.8.0 fsspec==2026.4.0 future==1.0.0 gast==0.7.0 -google-api-core==2.30.3 +google-api-core==2.31.0 google-api-python-client==2.197.0 google-apitools==0.5.31 google-auth==2.53.0 google-auth-httplib2==0.2.1 -google-cloud-aiplatform==1.154.0 +google-cloud-aiplatform==1.157.0 google-cloud-bigquery==3.41.0 -google-cloud-bigquery-storage==2.38.0 +google-cloud-bigquery-storage==2.39.0 google-cloud-bigtable==2.38.0 -google-cloud-build==3.36.0 +google-cloud-build==3.37.0 google-cloud-core==2.6.0 google-cloud-dataflow-client==0.13.0 -google-cloud-datastore==2.24.0 -google-cloud-dlp==3.36.0 +google-cloud-datastore==2.25.0 +google-cloud-dlp==3.37.0 google-cloud-kms==3.13.0 google-cloud-language==2.20.0 -google-cloud-monitoring==2.30.0 +google-cloud-monitoring==2.31.0 google-cloud-profiler==4.1.0 -google-cloud-pubsub==2.38.0 +google-cloud-pubsub==2.39.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.17.0 -google-cloud-secret-manager==2.28.0 -google-cloud-spanner==3.66.0 -google-cloud-storage==3.10.1 +google-cloud-secret-manager==2.29.0 +google-cloud-spanner==3.67.0 +google-cloud-storage==3.11.0 google-cloud-videointelligence==2.19.0 google-cloud-vision==3.14.0 google-crc32c==1.8.0 -google-genai==2.7.0 +google-genai==2.8.0 google-pasta==0.2.0 -google-resumable-media==2.9.0 +google-resumable-media==2.10.0 googleapis-common-protos==1.75.0 greenlet==3.5.1 grpc-google-iam-v1==0.14.4 @@ -105,7 +105,7 @@ guppy3==3.1.7 h11==0.16.0 h2==4.3.0 h5py==3.14.0 -hf-xet==1.5.0 +hf-xet==1.5.1 hpack==4.1.0 httpcore==1.0.9 httplib2==0.31.2 @@ -129,9 +129,12 @@ keras==3.12.2 keyring==25.7.0 keyrings.google-artifactregistry-auth==1.1.2 libclang==18.1.1 +linkify-it-py==2.1.0 markdown-it-py==4.2.0 MarkupSafe==3.0.3 +mdit-py-plugins==0.6.1 mdurl==0.1.2 +memray==1.19.3 ml_dtypes==0.5.4 mmh3==5.2.1 mock==5.2.0 @@ -158,6 +161,7 @@ parameterized==0.9.0 pg8000==1.31.5 pillow==12.2.0 pip==26.1.2 +platformdirs==4.10.0 pluggy==1.6.0 portalocker==3.2.0 propcache==0.5.2 @@ -195,7 +199,7 @@ requests-mock==1.12.1 rich==15.0.0 rpds-py==0.30.0 rsa==4.9.1 -safetensors==0.7.0 +safetensors==0.8.0 scikit-learn==1.7.2 scipy==1.15.3 scramp==1.4.8 @@ -214,15 +218,17 @@ tensorflow==2.21.0 tensorflow-cpu-aws==2.21.0;platform_machine=="aarch64" termcolor==3.3.0 testcontainers==4.14.2 +textual==8.2.7 threadpoolctl==3.6.0 tokenizers==0.21.4 tomli==2.4.1 torch==2.8.0+cpu -tqdm==4.67.3 +tqdm==4.68.2 transformers==4.55.4 typing-inspection==0.4.2 typing_extensions==4.15.0 tzdata==2026.2 +uc-micro-py==2.0.0 uritemplate==4.2.0 urllib3==2.7.0 virtualenv-clone==0.5.7 diff --git a/sdks/python/container/ml/py310/gpu_image_requirements.txt b/sdks/python/container/ml/py310/gpu_image_requirements.txt index d909ca3142ee..249a14d8f791 100644 --- a/sdks/python/container/ml/py310/gpu_image_requirements.txt +++ b/sdks/python/container/ml/py310/gpu_image_requirements.txt @@ -24,7 +24,7 @@ absl-py==2.4.0 aiofiles==25.1.0 aiohappyeyeballs==2.6.2 -aiohttp==3.14.0 +aiohttp==3.14.1 aiosignal==1.4.0 annotated-doc==0.0.4 annotated-types==0.7.0 @@ -36,13 +36,13 @@ async-timeout==5.0.1 attrs==26.1.0 backports.tarfile==1.2.0 beartype==0.22.9 -beautifulsoup4==4.14.3 +beautifulsoup4==4.15.0 betterproto==2.0.0b7 blake3==1.0.8 bs4==0.0.2 build==1.5.0 cachetools==6.2.6 -cbor2==6.1.1 +cbor2==6.1.2 certifi==2026.5.20 cffi==2.0.0 charset-normalizer==3.4.7 @@ -76,7 +76,7 @@ fastapi-cloud-cli==0.19.0 fastar==0.11.0 fastavro==1.12.2 fasteners==0.20 -filelock==3.29.0 +filelock==3.29.1 flatbuffers==25.12.19 freezegun==1.5.5 frozenlist==1.8.0 @@ -84,36 +84,36 @@ fsspec==2026.4.0 future==1.0.0 gast==0.7.0 gguf==0.19.0 -google-api-core==2.30.3 +google-api-core==2.31.0 google-api-python-client==2.197.0 google-apitools==0.5.31 google-auth==2.53.0 google-auth-httplib2==0.2.1 -google-cloud-aiplatform==1.154.0 +google-cloud-aiplatform==1.157.0 google-cloud-bigquery==3.41.0 -google-cloud-bigquery-storage==2.38.0 +google-cloud-bigquery-storage==2.39.0 google-cloud-bigtable==2.38.0 -google-cloud-build==3.36.0 +google-cloud-build==3.37.0 google-cloud-core==2.6.0 google-cloud-dataflow-client==0.13.0 -google-cloud-datastore==2.24.0 -google-cloud-dlp==3.36.0 +google-cloud-datastore==2.25.0 +google-cloud-dlp==3.37.0 google-cloud-kms==3.13.0 google-cloud-language==2.20.0 -google-cloud-monitoring==2.30.0 +google-cloud-monitoring==2.31.0 google-cloud-profiler==4.1.0 -google-cloud-pubsub==2.38.0 +google-cloud-pubsub==2.39.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.17.0 -google-cloud-secret-manager==2.28.0 -google-cloud-spanner==3.66.0 -google-cloud-storage==3.10.1 +google-cloud-secret-manager==2.29.0 +google-cloud-spanner==3.67.0 +google-cloud-storage==3.11.0 google-cloud-videointelligence==2.19.0 google-cloud-vision==3.14.0 google-crc32c==1.8.0 -google-genai==2.7.0 +google-genai==2.8.0 google-pasta==0.2.0 -google-resumable-media==2.9.0 +google-resumable-media==2.10.0 googleapis-common-protos==1.75.0 greenlet==3.5.1 grpc-google-iam-v1==0.14.4 @@ -125,7 +125,7 @@ guppy3==3.1.7 h11==0.16.0 h2==4.3.0 h5py==3.16.0 -hf-xet==1.5.0 +hf-xet==1.5.1 hpack==4.1.0 httpcore==1.0.9 httplib2==0.31.2 @@ -153,14 +153,17 @@ keyring==25.7.0 keyrings.google-artifactregistry-auth==1.1.2 lark==1.2.2 libclang==18.1.1 +linkify-it-py==2.1.0 llguidance==0.7.30 llvmlite==0.44.0 lm-format-enforcer==0.10.12 Markdown==3.10.2 markdown-it-py==4.2.0 MarkupSafe==3.0.3 +mdit-py-plugins==0.6.1 mdurl==0.1.2 -mistral_common==1.11.2 +memray==1.19.3 +mistral_common==1.11.3 ml_dtypes==0.5.4 mmh3==5.2.1 mock==5.2.0 @@ -225,6 +228,7 @@ partial-json-parser==0.2.1.1.post7 pg8000==1.31.5 pillow==12.2.0 pip==26.1.2 +platformdirs==4.10.0 pluggy==1.6.0 portalocker==3.2.0 prometheus-fastapi-instrumentator==8.0.0 @@ -259,7 +263,7 @@ pytest-xdist==3.8.0 python-dateutil==2.9.0.post0 python-dotenv==1.2.2 python-json-logger==4.1.0 -python-multipart==0.0.30 +python-multipart==0.0.32 python-tds==1.17.1 pytz==2026.2 PyYAML==6.0.3 @@ -271,24 +275,24 @@ regex==2026.5.9 requests==2.34.2 requests-mock==1.12.1 rich==15.0.0 -rich-toolkit==0.19.10 +rich-toolkit==0.20.1 rignore==0.7.6 rpds-py==0.30.0 rsa==4.9.1 -safetensors==0.7.0 +safetensors==0.8.0 scikit-learn==1.7.2 scipy==1.15.3 scramp==1.4.8 SecretStorage==3.5.0 sentencepiece==0.2.1 -sentry-sdk==2.61.1 +sentry-sdk==2.62.0 setproctitle==1.3.7 setuptools==81.0.0 shellingham==1.5.4 six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 -soundfile==0.13.1 +soundfile==0.14.0 soupsieve==2.8.4 soxr==1.1.0 SQLAlchemy==2.0.50 @@ -303,6 +307,7 @@ tensorflow==2.20.0 tensorflow-cpu-aws==2.20.0;platform_machine=="aarch64" termcolor==3.3.0 testcontainers==4.14.2 +textual==8.2.7 threadpoolctl==3.6.0 tiktoken==0.13.0 tokenizers==0.21.4 @@ -310,16 +315,17 @@ tomli==2.4.1 torch==2.7.1 torchaudio==2.7.1 torchvision==0.22.1 -tqdm==4.67.3 +tqdm==4.68.2 transformers==4.55.4 triton==3.3.1 -typer==0.26.6 +typer==0.26.7 typing-inspection==0.4.2 typing_extensions==4.15.0 tzdata==2026.2 +uc-micro-py==2.0.0 uritemplate==4.2.0 urllib3==2.7.0 -uvicorn==0.48.0 +uvicorn==0.49.0 uvloop==0.22.1 virtualenv-clone==0.5.7 vllm==0.10.1.1 diff --git a/sdks/python/container/ml/py311/base_image_requirements.txt b/sdks/python/container/ml/py311/base_image_requirements.txt index a72c2814fd2e..85ae711b2324 100644 --- a/sdks/python/container/ml/py311/base_image_requirements.txt +++ b/sdks/python/container/ml/py311/base_image_requirements.txt @@ -24,7 +24,7 @@ absl-py==2.4.0 aiofiles==25.1.0 aiohappyeyeballs==2.6.2 -aiohttp==3.14.0 +aiohttp==3.14.1 aiosignal==1.4.0 annotated-types==0.7.0 anyio==4.13.0 @@ -33,7 +33,7 @@ astunparse==1.6.3 attrs==26.1.0 backports.tarfile==1.2.0 beartype==0.22.9 -beautifulsoup4==4.14.3 +beautifulsoup4==4.15.0 betterproto==2.0.0b6 bs4==0.0.2 build==1.5.0 @@ -55,43 +55,43 @@ envoy_data_plane==1.0.3 execnet==2.1.2 fastavro==1.12.2 fasteners==0.20 -filelock==3.29.0 +filelock==3.29.1 flatbuffers==25.12.19 freezegun==1.5.5 frozenlist==1.8.0 fsspec==2026.4.0 future==1.0.0 gast==0.7.0 -google-api-core==2.30.3 +google-api-core==2.31.0 google-api-python-client==2.197.0 google-apitools==0.5.31 google-auth==2.53.0 google-auth-httplib2==0.2.1 -google-cloud-aiplatform==1.154.0 +google-cloud-aiplatform==1.157.0 google-cloud-bigquery==3.41.0 -google-cloud-bigquery-storage==2.38.0 +google-cloud-bigquery-storage==2.39.0 google-cloud-bigtable==2.38.0 -google-cloud-build==3.36.0 +google-cloud-build==3.37.0 google-cloud-core==2.6.0 google-cloud-dataflow-client==0.13.0 -google-cloud-datastore==2.24.0 -google-cloud-dlp==3.36.0 +google-cloud-datastore==2.25.0 +google-cloud-dlp==3.37.0 google-cloud-kms==3.13.0 google-cloud-language==2.20.0 -google-cloud-monitoring==2.30.0 +google-cloud-monitoring==2.31.0 google-cloud-profiler==4.1.0 -google-cloud-pubsub==2.38.0 +google-cloud-pubsub==2.39.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.17.0 -google-cloud-secret-manager==2.28.0 -google-cloud-spanner==3.66.0 -google-cloud-storage==3.10.1 +google-cloud-secret-manager==2.29.0 +google-cloud-spanner==3.67.0 +google-cloud-storage==3.11.0 google-cloud-videointelligence==2.19.0 google-cloud-vision==3.14.0 google-crc32c==1.8.0 -google-genai==2.7.0 +google-genai==2.8.0 google-pasta==0.2.0 -google-resumable-media==2.9.0 +google-resumable-media==2.10.0 googleapis-common-protos==1.75.0 greenlet==3.5.1 grpc-google-iam-v1==0.14.4 @@ -104,7 +104,7 @@ guppy3==3.1.7 h11==0.16.0 h2==4.3.0 h5py==3.14.0 -hf-xet==1.5.0 +hf-xet==1.5.1 hpack==4.1.0 httpcore==1.0.9 httplib2==0.31.2 @@ -128,9 +128,12 @@ keras==3.14.1 keyring==25.7.0 keyrings.google-artifactregistry-auth==1.1.2 libclang==18.1.1 +linkify-it-py==2.1.0 markdown-it-py==4.2.0 MarkupSafe==3.0.3 +mdit-py-plugins==0.6.1 mdurl==0.1.2 +memray==1.19.3 ml_dtypes==0.5.4 mmh3==5.2.1 mock==5.2.0 @@ -157,6 +160,7 @@ parameterized==0.9.0 pg8000==1.31.5 pillow==12.2.0 pip==26.1.2 +platformdirs==4.10.0 pluggy==1.6.0 portalocker==3.2.0 propcache==0.5.2 @@ -194,7 +198,7 @@ requests-mock==1.12.1 rich==15.0.0 rpds-py==2026.5.1 rsa==4.9.1 -safetensors==0.7.0 +safetensors==0.8.0 scikit-learn==1.7.2 scipy==1.17.1 scramp==1.4.8 @@ -213,14 +217,16 @@ tensorflow==2.21.0 tensorflow-cpu-aws==2.21.0;platform_machine=="aarch64" termcolor==3.3.0 testcontainers==4.14.2 +textual==8.2.7 threadpoolctl==3.6.0 tokenizers==0.21.4 torch==2.8.0+cpu -tqdm==4.67.3 +tqdm==4.68.2 transformers==4.55.4 typing-inspection==0.4.2 typing_extensions==4.15.0 tzdata==2026.2 +uc-micro-py==2.0.0 uritemplate==4.2.0 urllib3==2.7.0 virtualenv-clone==0.5.7 diff --git a/sdks/python/container/ml/py311/gpu_image_requirements.txt b/sdks/python/container/ml/py311/gpu_image_requirements.txt index 49e997701548..2aa43f9c8fd3 100644 --- a/sdks/python/container/ml/py311/gpu_image_requirements.txt +++ b/sdks/python/container/ml/py311/gpu_image_requirements.txt @@ -24,7 +24,7 @@ absl-py==2.4.0 aiofiles==25.1.0 aiohappyeyeballs==2.6.2 -aiohttp==3.14.0 +aiohttp==3.14.1 aiosignal==1.4.0 annotated-doc==0.0.4 annotated-types==0.7.0 @@ -35,13 +35,13 @@ astunparse==1.6.3 attrs==26.1.0 backports.tarfile==1.2.0 beartype==0.22.9 -beautifulsoup4==4.14.3 +beautifulsoup4==4.15.0 betterproto==2.0.0b6 blake3==1.0.8 bs4==0.0.2 build==1.5.0 cachetools==6.2.6 -cbor2==6.1.1 +cbor2==6.1.2 certifi==2026.5.20 cffi==2.0.0 charset-normalizer==3.4.7 @@ -74,7 +74,7 @@ fastapi-cloud-cli==0.19.0 fastar==0.11.0 fastavro==1.12.2 fasteners==0.20 -filelock==3.29.0 +filelock==3.29.1 flatbuffers==25.12.19 freezegun==1.5.5 frozenlist==1.8.0 @@ -82,36 +82,36 @@ fsspec==2026.4.0 future==1.0.0 gast==0.7.0 gguf==0.19.0 -google-api-core==2.30.3 +google-api-core==2.31.0 google-api-python-client==2.197.0 google-apitools==0.5.31 google-auth==2.53.0 google-auth-httplib2==0.2.1 -google-cloud-aiplatform==1.154.0 +google-cloud-aiplatform==1.157.0 google-cloud-bigquery==3.41.0 -google-cloud-bigquery-storage==2.38.0 +google-cloud-bigquery-storage==2.39.0 google-cloud-bigtable==2.38.0 -google-cloud-build==3.36.0 +google-cloud-build==3.37.0 google-cloud-core==2.6.0 google-cloud-dataflow-client==0.13.0 -google-cloud-datastore==2.24.0 -google-cloud-dlp==3.36.0 +google-cloud-datastore==2.25.0 +google-cloud-dlp==3.37.0 google-cloud-kms==3.13.0 google-cloud-language==2.20.0 -google-cloud-monitoring==2.30.0 +google-cloud-monitoring==2.31.0 google-cloud-profiler==4.1.0 -google-cloud-pubsub==2.38.0 +google-cloud-pubsub==2.39.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.17.0 -google-cloud-secret-manager==2.28.0 -google-cloud-spanner==3.66.0 -google-cloud-storage==3.10.1 +google-cloud-secret-manager==2.29.0 +google-cloud-spanner==3.67.0 +google-cloud-storage==3.11.0 google-cloud-videointelligence==2.19.0 google-cloud-vision==3.14.0 google-crc32c==1.8.0 -google-genai==2.7.0 +google-genai==2.8.0 google-pasta==0.2.0 -google-resumable-media==2.9.0 +google-resumable-media==2.10.0 googleapis-common-protos==1.75.0 greenlet==3.5.1 grpc-google-iam-v1==0.14.4 @@ -124,7 +124,7 @@ guppy3==3.1.7 h11==0.16.0 h2==4.3.0 h5py==3.16.0 -hf-xet==1.5.0 +hf-xet==1.5.1 hpack==4.1.0 httpcore==1.0.9 httplib2==0.31.2 @@ -152,14 +152,17 @@ keyring==25.7.0 keyrings.google-artifactregistry-auth==1.1.2 lark==1.2.2 libclang==18.1.1 +linkify-it-py==2.1.0 llguidance==0.7.30 llvmlite==0.44.0 lm-format-enforcer==0.10.12 Markdown==3.10.2 markdown-it-py==4.2.0 MarkupSafe==3.0.3 +mdit-py-plugins==0.6.1 mdurl==0.1.2 -mistral_common==1.11.2 +memray==1.19.3 +mistral_common==1.11.3 ml_dtypes==0.5.4 mmh3==5.2.1 mock==5.2.0 @@ -224,6 +227,7 @@ partial-json-parser==0.2.1.1.post7 pg8000==1.31.5 pillow==12.2.0 pip==26.1.2 +platformdirs==4.10.0 pluggy==1.6.0 portalocker==3.2.0 prometheus-fastapi-instrumentator==8.0.0 @@ -258,7 +262,7 @@ pytest-xdist==3.8.0 python-dateutil==2.9.0.post0 python-dotenv==1.2.2 python-json-logger==4.1.0 -python-multipart==0.0.30 +python-multipart==0.0.32 python-tds==1.17.1 pytz==2026.2 PyYAML==6.0.3 @@ -270,24 +274,24 @@ regex==2026.5.9 requests==2.34.2 requests-mock==1.12.1 rich==15.0.0 -rich-toolkit==0.19.10 +rich-toolkit==0.20.1 rignore==0.7.6 rpds-py==2026.5.1 rsa==4.9.1 -safetensors==0.7.0 +safetensors==0.8.0 scikit-learn==1.7.2 scipy==1.17.1 scramp==1.4.8 SecretStorage==3.5.0 sentencepiece==0.2.1 -sentry-sdk==2.61.1 +sentry-sdk==2.62.0 setproctitle==1.3.7 setuptools==81.0.0 shellingham==1.5.4 six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 -soundfile==0.13.1 +soundfile==0.14.0 soupsieve==2.8.4 soxr==1.1.0 SQLAlchemy==2.0.50 @@ -302,22 +306,24 @@ tensorflow==2.20.0 tensorflow-cpu-aws==2.20.0;platform_machine=="aarch64" termcolor==3.3.0 testcontainers==4.14.2 +textual==8.2.7 threadpoolctl==3.6.0 tiktoken==0.13.0 tokenizers==0.21.4 torch==2.7.1 torchaudio==2.7.1 torchvision==0.22.1 -tqdm==4.67.3 +tqdm==4.68.2 transformers==4.55.4 triton==3.3.1 -typer==0.26.6 +typer==0.26.7 typing-inspection==0.4.2 typing_extensions==4.15.0 tzdata==2026.2 +uc-micro-py==2.0.0 uritemplate==4.2.0 urllib3==2.7.0 -uvicorn==0.48.0 +uvicorn==0.49.0 uvloop==0.22.1 virtualenv-clone==0.5.7 vllm==0.10.1.1 diff --git a/sdks/python/container/ml/py312/base_image_requirements.txt b/sdks/python/container/ml/py312/base_image_requirements.txt index ad3c161a2c69..2b8283180794 100644 --- a/sdks/python/container/ml/py312/base_image_requirements.txt +++ b/sdks/python/container/ml/py312/base_image_requirements.txt @@ -24,7 +24,7 @@ absl-py==2.4.0 aiofiles==25.1.0 aiohappyeyeballs==2.6.2 -aiohttp==3.14.0 +aiohttp==3.14.1 aiosignal==1.4.0 annotated-types==0.7.0 anyio==4.13.0 @@ -32,7 +32,7 @@ asn1crypto==1.5.1 astunparse==1.6.3 attrs==26.1.0 beartype==0.22.9 -beautifulsoup4==4.14.3 +beautifulsoup4==4.15.0 betterproto==2.0.0b6 bs4==0.0.2 build==1.5.0 @@ -54,43 +54,43 @@ envoy_data_plane==1.0.3 execnet==2.1.2 fastavro==1.12.2 fasteners==0.20 -filelock==3.29.0 +filelock==3.29.1 flatbuffers==25.12.19 freezegun==1.5.5 frozenlist==1.8.0 fsspec==2026.4.0 future==1.0.0 gast==0.7.0 -google-api-core==2.30.3 +google-api-core==2.31.0 google-api-python-client==2.197.0 google-apitools==0.5.31 google-auth==2.53.0 google-auth-httplib2==0.2.1 -google-cloud-aiplatform==1.154.0 +google-cloud-aiplatform==1.157.0 google-cloud-bigquery==3.41.0 -google-cloud-bigquery-storage==2.38.0 +google-cloud-bigquery-storage==2.39.0 google-cloud-bigtable==2.38.0 -google-cloud-build==3.36.0 +google-cloud-build==3.37.0 google-cloud-core==2.6.0 google-cloud-dataflow-client==0.13.0 -google-cloud-datastore==2.24.0 -google-cloud-dlp==3.36.0 +google-cloud-datastore==2.25.0 +google-cloud-dlp==3.37.0 google-cloud-kms==3.13.0 google-cloud-language==2.20.0 -google-cloud-monitoring==2.30.0 +google-cloud-monitoring==2.31.0 google-cloud-profiler==4.1.0 -google-cloud-pubsub==2.38.0 +google-cloud-pubsub==2.39.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.17.0 -google-cloud-secret-manager==2.28.0 -google-cloud-spanner==3.66.0 -google-cloud-storage==3.10.1 +google-cloud-secret-manager==2.29.0 +google-cloud-spanner==3.67.0 +google-cloud-storage==3.11.0 google-cloud-videointelligence==2.19.0 google-cloud-vision==3.14.0 google-crc32c==1.8.0 -google-genai==2.7.0 +google-genai==2.8.0 google-pasta==0.2.0 -google-resumable-media==2.9.0 +google-resumable-media==2.10.0 googleapis-common-protos==1.75.0 greenlet==3.5.1 grpc-google-iam-v1==0.14.4 @@ -103,7 +103,7 @@ guppy3==3.1.7 h11==0.16.0 h2==4.3.0 h5py==3.14.0 -hf-xet==1.5.0 +hf-xet==1.5.1 hpack==4.1.0 httpcore==1.0.9 httplib2==0.31.2 @@ -126,9 +126,12 @@ keras==3.14.1 keyring==25.7.0 keyrings.google-artifactregistry-auth==1.1.2 libclang==18.1.1 +linkify-it-py==2.1.0 markdown-it-py==4.2.0 MarkupSafe==3.0.3 +mdit-py-plugins==0.6.1 mdurl==0.1.2 +memray==1.19.3 ml_dtypes==0.5.4 mmh3==5.2.1 mock==5.2.0 @@ -155,6 +158,7 @@ parameterized==0.9.0 pg8000==1.31.5 pillow==12.2.0 pip==26.1.2 +platformdirs==4.10.0 pluggy==1.6.0 portalocker==3.2.0 propcache==0.5.2 @@ -192,7 +196,7 @@ requests-mock==1.12.1 rich==15.0.0 rpds-py==2026.5.1 rsa==4.9.1 -safetensors==0.7.0 +safetensors==0.8.0 scikit-learn==1.7.2 scipy==1.17.1 scramp==1.4.8 @@ -211,14 +215,16 @@ tensorflow==2.21.0 tensorflow-cpu-aws==2.21.0;platform_machine=="aarch64" termcolor==3.3.0 testcontainers==4.14.2 +textual==8.2.7 threadpoolctl==3.6.0 tokenizers==0.21.4 torch==2.8.0+cpu -tqdm==4.67.3 +tqdm==4.68.2 transformers==4.55.4 typing-inspection==0.4.2 typing_extensions==4.15.0 tzdata==2026.2 +uc-micro-py==2.0.0 uritemplate==4.2.0 urllib3==2.7.0 virtualenv-clone==0.5.7 diff --git a/sdks/python/container/ml/py312/gpu_image_requirements.txt b/sdks/python/container/ml/py312/gpu_image_requirements.txt index d5cfda651ff4..c659def93a8c 100644 --- a/sdks/python/container/ml/py312/gpu_image_requirements.txt +++ b/sdks/python/container/ml/py312/gpu_image_requirements.txt @@ -24,7 +24,7 @@ absl-py==2.4.0 aiofiles==25.1.0 aiohappyeyeballs==2.6.2 -aiohttp==3.14.0 +aiohttp==3.14.1 aiosignal==1.4.0 annotated-doc==0.0.4 annotated-types==0.7.0 @@ -34,13 +34,13 @@ astor==0.8.1 astunparse==1.6.3 attrs==26.1.0 beartype==0.22.9 -beautifulsoup4==4.14.3 +beautifulsoup4==4.15.0 betterproto==2.0.0b6 blake3==1.0.8 bs4==0.0.2 build==1.5.0 cachetools==6.2.6 -cbor2==6.1.1 +cbor2==6.1.2 certifi==2026.5.20 cffi==2.0.0 charset-normalizer==3.4.7 @@ -73,7 +73,7 @@ fastapi-cloud-cli==0.19.0 fastar==0.11.0 fastavro==1.12.2 fasteners==0.20 -filelock==3.29.0 +filelock==3.29.1 flatbuffers==25.12.19 freezegun==1.5.5 frozenlist==1.8.0 @@ -81,36 +81,36 @@ fsspec==2026.4.0 future==1.0.0 gast==0.7.0 gguf==0.19.0 -google-api-core==2.30.3 +google-api-core==2.31.0 google-api-python-client==2.197.0 google-apitools==0.5.31 google-auth==2.53.0 google-auth-httplib2==0.2.1 -google-cloud-aiplatform==1.154.0 +google-cloud-aiplatform==1.157.0 google-cloud-bigquery==3.41.0 -google-cloud-bigquery-storage==2.38.0 +google-cloud-bigquery-storage==2.39.0 google-cloud-bigtable==2.38.0 -google-cloud-build==3.36.0 +google-cloud-build==3.37.0 google-cloud-core==2.6.0 google-cloud-dataflow-client==0.13.0 -google-cloud-datastore==2.24.0 -google-cloud-dlp==3.36.0 +google-cloud-datastore==2.25.0 +google-cloud-dlp==3.37.0 google-cloud-kms==3.13.0 google-cloud-language==2.20.0 -google-cloud-monitoring==2.30.0 +google-cloud-monitoring==2.31.0 google-cloud-profiler==4.1.0 -google-cloud-pubsub==2.38.0 +google-cloud-pubsub==2.39.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.17.0 -google-cloud-secret-manager==2.28.0 -google-cloud-spanner==3.66.0 -google-cloud-storage==3.10.1 +google-cloud-secret-manager==2.29.0 +google-cloud-spanner==3.67.0 +google-cloud-storage==3.11.0 google-cloud-videointelligence==2.19.0 google-cloud-vision==3.14.0 google-crc32c==1.8.0 -google-genai==2.7.0 +google-genai==2.8.0 google-pasta==0.2.0 -google-resumable-media==2.9.0 +google-resumable-media==2.10.0 googleapis-common-protos==1.75.0 greenlet==3.5.1 grpc-google-iam-v1==0.14.4 @@ -123,7 +123,7 @@ guppy3==3.1.7 h11==0.16.0 h2==4.3.0 h5py==3.16.0 -hf-xet==1.5.0 +hf-xet==1.5.1 hpack==4.1.0 httpcore==1.0.9 httplib2==0.31.2 @@ -150,14 +150,17 @@ keyring==25.7.0 keyrings.google-artifactregistry-auth==1.1.2 lark==1.2.2 libclang==18.1.1 +linkify-it-py==2.1.0 llguidance==0.7.30 llvmlite==0.44.0 lm-format-enforcer==0.10.12 Markdown==3.10.2 markdown-it-py==4.2.0 MarkupSafe==3.0.3 +mdit-py-plugins==0.6.1 mdurl==0.1.2 -mistral_common==1.11.2 +memray==1.19.3 +mistral_common==1.11.3 ml_dtypes==0.5.4 mmh3==5.2.1 mock==5.2.0 @@ -222,6 +225,7 @@ partial-json-parser==0.2.1.1.post7 pg8000==1.31.5 pillow==12.2.0 pip==26.1.2 +platformdirs==4.10.0 pluggy==1.6.0 portalocker==3.2.0 prometheus-fastapi-instrumentator==8.0.0 @@ -256,7 +260,7 @@ pytest-xdist==3.8.0 python-dateutil==2.9.0.post0 python-dotenv==1.2.2 python-json-logger==4.1.0 -python-multipart==0.0.30 +python-multipart==0.0.32 python-tds==1.17.1 pytz==2026.2 PyYAML==6.0.3 @@ -268,24 +272,24 @@ regex==2026.5.9 requests==2.34.2 requests-mock==1.12.1 rich==15.0.0 -rich-toolkit==0.19.10 +rich-toolkit==0.20.1 rignore==0.7.6 rpds-py==2026.5.1 rsa==4.9.1 -safetensors==0.7.0 +safetensors==0.8.0 scikit-learn==1.7.2 scipy==1.17.1 scramp==1.4.8 SecretStorage==3.5.0 sentencepiece==0.2.1 -sentry-sdk==2.61.1 +sentry-sdk==2.62.0 setproctitle==1.3.7 setuptools==79.0.1 shellingham==1.5.4 six==1.17.0 sniffio==1.3.1 sortedcontainers==2.4.0 -soundfile==0.13.1 +soundfile==0.14.0 soupsieve==2.8.4 soxr==1.1.0 SQLAlchemy==2.0.50 @@ -300,22 +304,24 @@ tensorflow==2.20.0 tensorflow-cpu-aws==2.20.0;platform_machine=="aarch64" termcolor==3.3.0 testcontainers==4.14.2 +textual==8.2.7 threadpoolctl==3.6.0 tiktoken==0.13.0 tokenizers==0.21.4 torch==2.7.1 torchaudio==2.7.1 torchvision==0.22.1 -tqdm==4.67.3 +tqdm==4.68.2 transformers==4.55.4 triton==3.3.1 -typer==0.26.6 +typer==0.26.7 typing-inspection==0.4.2 typing_extensions==4.15.0 tzdata==2026.2 +uc-micro-py==2.0.0 uritemplate==4.2.0 urllib3==2.7.0 -uvicorn==0.48.0 +uvicorn==0.49.0 uvloop==0.22.1 virtualenv-clone==0.5.7 vllm==0.10.1.1 diff --git a/sdks/python/container/ml/py313/base_image_requirements.txt b/sdks/python/container/ml/py313/base_image_requirements.txt index 1d129a09fedf..b98bef893b89 100644 --- a/sdks/python/container/ml/py313/base_image_requirements.txt +++ b/sdks/python/container/ml/py313/base_image_requirements.txt @@ -24,7 +24,7 @@ absl-py==2.4.0 aiofiles==25.1.0 aiohappyeyeballs==2.6.2 -aiohttp==3.14.0 +aiohttp==3.14.1 aiosignal==1.4.0 annotated-types==0.7.0 anyio==4.13.0 @@ -32,7 +32,7 @@ asn1crypto==1.5.1 astunparse==1.6.3 attrs==26.1.0 beartype==0.22.9 -beautifulsoup4==4.14.3 +beautifulsoup4==4.15.0 betterproto==2.0.0b6 bs4==0.0.2 build==1.5.0 @@ -54,42 +54,42 @@ envoy_data_plane==1.0.3 execnet==2.1.2 fastavro==1.12.2 fasteners==0.20 -filelock==3.29.0 +filelock==3.29.1 flatbuffers==25.12.19 freezegun==1.5.5 frozenlist==1.8.0 fsspec==2026.4.0 future==1.0.0 gast==0.7.0 -google-api-core==2.30.3 +google-api-core==2.31.0 google-api-python-client==2.197.0 google-apitools==0.5.35 google-auth==2.53.0 google-auth-httplib2==0.2.1 -google-cloud-aiplatform==1.154.0 +google-cloud-aiplatform==1.157.0 google-cloud-bigquery==3.41.0 -google-cloud-bigquery-storage==2.38.0 +google-cloud-bigquery-storage==2.39.0 google-cloud-bigtable==2.38.0 -google-cloud-build==3.36.0 +google-cloud-build==3.37.0 google-cloud-core==2.6.0 google-cloud-dataflow-client==0.13.0 -google-cloud-datastore==2.24.0 -google-cloud-dlp==3.36.0 +google-cloud-datastore==2.25.0 +google-cloud-dlp==3.37.0 google-cloud-kms==3.13.0 google-cloud-language==2.20.0 -google-cloud-monitoring==2.30.0 -google-cloud-pubsub==2.38.0 +google-cloud-monitoring==2.31.0 +google-cloud-pubsub==2.39.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.17.0 -google-cloud-secret-manager==2.28.0 -google-cloud-spanner==3.66.0 -google-cloud-storage==3.10.1 +google-cloud-secret-manager==2.29.0 +google-cloud-spanner==3.67.0 +google-cloud-storage==3.11.0 google-cloud-videointelligence==2.19.0 google-cloud-vision==3.14.0 google-crc32c==1.8.0 -google-genai==2.7.0 +google-genai==2.8.0 google-pasta==0.2.0 -google-resumable-media==2.9.0 +google-resumable-media==2.10.0 googleapis-common-protos==1.75.0 greenlet==3.5.1 grpc-google-iam-v1==0.14.4 @@ -102,7 +102,7 @@ guppy3==3.1.7 h11==0.16.0 h2==4.3.0 h5py==3.14.0 -hf-xet==1.5.0 +hf-xet==1.5.1 hpack==4.1.0 httpcore==1.0.9 httplib2==0.31.2 @@ -125,9 +125,12 @@ keras==3.14.1 keyring==25.7.0 keyrings.google-artifactregistry-auth==1.1.2 libclang==18.1.1 +linkify-it-py==2.1.0 markdown-it-py==4.2.0 MarkupSafe==3.0.3 +mdit-py-plugins==0.6.1 mdurl==0.1.2 +memray==1.19.3 ml_dtypes==0.5.4 mmh3==5.2.1 mock==5.2.0 @@ -154,6 +157,7 @@ parameterized==0.9.0 pg8000==1.31.5 pillow==12.2.0 pip==26.1.2 +platformdirs==4.10.0 pluggy==1.6.0 portalocker==3.2.0 propcache==0.5.2 @@ -191,7 +195,7 @@ requests-mock==1.12.1 rich==15.0.0 rpds-py==2026.5.1 rsa==4.9.1 -safetensors==0.7.0 +safetensors==0.8.0 scikit-learn==1.7.2 scipy==1.17.1 scramp==1.4.8 @@ -210,14 +214,16 @@ tensorflow==2.21.0 tensorflow-cpu-aws==2.21.0;platform_machine=="aarch64" termcolor==3.3.0 testcontainers==4.14.2 +textual==8.2.7 threadpoolctl==3.6.0 tokenizers==0.21.4 torch==2.8.0+cpu -tqdm==4.67.3 +tqdm==4.68.2 transformers==4.55.4 typing-inspection==0.4.2 typing_extensions==4.15.0 tzdata==2026.2 +uc-micro-py==2.0.0 uritemplate==4.2.0 urllib3==2.7.0 virtualenv-clone==0.5.7 diff --git a/sdks/python/container/py310/base_image_requirements.txt b/sdks/python/container/py310/base_image_requirements.txt index c44d8ef0bf5c..f3a840c1809e 100644 --- a/sdks/python/container/py310/base_image_requirements.txt +++ b/sdks/python/container/py310/base_image_requirements.txt @@ -23,7 +23,7 @@ aiofiles==25.1.0 aiohappyeyeballs==2.6.2 -aiohttp==3.14.0 +aiohttp==3.14.1 aiosignal==1.4.0 annotated-types==0.7.0 anyio==4.13.0 @@ -32,7 +32,7 @@ async-timeout==5.0.1 attrs==26.1.0 backports.tarfile==1.2.0 beartype==0.22.9 -beautifulsoup4==4.14.3 +beautifulsoup4==4.15.0 betterproto==2.0.0b7 bs4==0.0.2 build==1.5.0 @@ -58,35 +58,35 @@ fasteners==0.20 freezegun==1.5.5 frozenlist==1.8.0 future==1.0.0 -google-api-core==2.30.3 +google-api-core==2.31.0 google-api-python-client==2.197.0 google-apitools==0.5.31 google-auth==2.53.0 google-auth-httplib2==0.2.1 -google-cloud-aiplatform==1.154.0 +google-cloud-aiplatform==1.157.0 google-cloud-bigquery==3.41.0 -google-cloud-bigquery-storage==2.38.0 +google-cloud-bigquery-storage==2.39.0 google-cloud-bigtable==2.38.0 -google-cloud-build==3.36.0 +google-cloud-build==3.37.0 google-cloud-core==2.6.0 google-cloud-dataflow-client==0.13.0 -google-cloud-datastore==2.24.0 -google-cloud-dlp==3.36.0 +google-cloud-datastore==2.25.0 +google-cloud-dlp==3.37.0 google-cloud-kms==3.13.0 google-cloud-language==2.20.0 -google-cloud-monitoring==2.30.0 +google-cloud-monitoring==2.31.0 google-cloud-profiler==4.1.0 -google-cloud-pubsub==2.38.0 +google-cloud-pubsub==2.39.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.17.0 -google-cloud-secret-manager==2.28.0 -google-cloud-spanner==3.66.0 -google-cloud-storage==3.10.1 +google-cloud-secret-manager==2.29.0 +google-cloud-spanner==3.67.0 +google-cloud-storage==3.11.0 google-cloud-videointelligence==2.19.0 google-cloud-vision==3.14.0 google-crc32c==1.8.0 -google-genai==2.7.0 -google-resumable-media==2.9.0 +google-genai==2.8.0 +google-resumable-media==2.10.0 googleapis-common-protos==1.75.0 greenlet==3.5.1 grpc-google-iam-v1==0.14.4 @@ -117,7 +117,12 @@ jsonschema==4.26.0 jsonschema-specifications==2025.9.1 keyring==25.7.0 keyrings.google-artifactregistry-auth==1.1.2 +linkify-it-py==2.1.0 +markdown-it-py==4.2.0 MarkupSafe==3.0.3 +mdit-py-plugins==0.6.1 +mdurl==0.1.2 +memray==1.19.3 mmh3==5.2.1 mock==5.2.0 more-itertools==11.1.0 @@ -138,6 +143,7 @@ parameterized==0.9.0 pg8000==1.31.5 pillow==12.2.0 pip==26.1.2 +platformdirs==4.10.0 pluggy==1.6.0 portalocker==3.2.0 propcache==0.5.2 @@ -172,6 +178,7 @@ referencing==0.37.0 regex==2026.5.9 requests==2.34.2 requests-mock==1.12.1 +rich==15.0.0 rpds-py==0.30.0 rsa==4.9.1 scikit-learn==1.7.2 @@ -188,12 +195,14 @@ sqlalchemy_pytds==1.0.2 sqlparse==0.5.5 tenacity==9.1.4 testcontainers==4.14.2 +textual==8.2.7 threadpoolctl==3.6.0 tomli==2.4.1 -tqdm==4.67.3 +tqdm==4.68.2 typing-inspection==0.4.2 typing_extensions==4.15.0 tzdata==2026.2 +uc-micro-py==2.0.0 uritemplate==4.2.0 urllib3==2.7.0 virtualenv-clone==0.5.7 diff --git a/sdks/python/container/py311/base_image_requirements.txt b/sdks/python/container/py311/base_image_requirements.txt index 790825a8b2af..68f5e5469390 100644 --- a/sdks/python/container/py311/base_image_requirements.txt +++ b/sdks/python/container/py311/base_image_requirements.txt @@ -23,7 +23,7 @@ aiofiles==25.1.0 aiohappyeyeballs==2.6.2 -aiohttp==3.14.0 +aiohttp==3.14.1 aiosignal==1.4.0 annotated-types==0.7.0 anyio==4.13.0 @@ -31,7 +31,7 @@ asn1crypto==1.5.1 attrs==26.1.0 backports.tarfile==1.2.0 beartype==0.22.9 -beautifulsoup4==4.14.3 +beautifulsoup4==4.15.0 betterproto==2.0.0b6 bs4==0.0.2 build==1.5.0 @@ -56,35 +56,35 @@ fasteners==0.20 freezegun==1.5.5 frozenlist==1.8.0 future==1.0.0 -google-api-core==2.30.3 +google-api-core==2.31.0 google-api-python-client==2.197.0 google-apitools==0.5.31 google-auth==2.53.0 google-auth-httplib2==0.2.1 -google-cloud-aiplatform==1.154.0 +google-cloud-aiplatform==1.157.0 google-cloud-bigquery==3.41.0 -google-cloud-bigquery-storage==2.38.0 +google-cloud-bigquery-storage==2.39.0 google-cloud-bigtable==2.38.0 -google-cloud-build==3.36.0 +google-cloud-build==3.37.0 google-cloud-core==2.6.0 google-cloud-dataflow-client==0.13.0 -google-cloud-datastore==2.24.0 -google-cloud-dlp==3.36.0 +google-cloud-datastore==2.25.0 +google-cloud-dlp==3.37.0 google-cloud-kms==3.13.0 google-cloud-language==2.20.0 -google-cloud-monitoring==2.30.0 +google-cloud-monitoring==2.31.0 google-cloud-profiler==4.1.0 -google-cloud-pubsub==2.38.0 +google-cloud-pubsub==2.39.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.17.0 -google-cloud-secret-manager==2.28.0 -google-cloud-spanner==3.66.0 -google-cloud-storage==3.10.1 +google-cloud-secret-manager==2.29.0 +google-cloud-spanner==3.67.0 +google-cloud-storage==3.11.0 google-cloud-videointelligence==2.19.0 google-cloud-vision==3.14.0 google-crc32c==1.8.0 -google-genai==2.7.0 -google-resumable-media==2.9.0 +google-genai==2.8.0 +google-resumable-media==2.10.0 googleapis-common-protos==1.75.0 greenlet==3.5.1 grpc-google-iam-v1==0.14.4 @@ -116,7 +116,12 @@ jsonschema==4.26.0 jsonschema-specifications==2025.9.1 keyring==25.7.0 keyrings.google-artifactregistry-auth==1.1.2 +linkify-it-py==2.1.0 +markdown-it-py==4.2.0 MarkupSafe==3.0.3 +mdit-py-plugins==0.6.1 +mdurl==0.1.2 +memray==1.19.3 mmh3==5.2.1 mock==5.2.0 more-itertools==11.1.0 @@ -137,6 +142,7 @@ parameterized==0.9.0 pg8000==1.31.5 pillow==12.2.0 pip==26.1.2 +platformdirs==4.10.0 pluggy==1.6.0 portalocker==3.2.0 propcache==0.5.2 @@ -171,6 +177,7 @@ referencing==0.37.0 regex==2026.5.9 requests==2.34.2 requests-mock==1.12.1 +rich==15.0.0 rpds-py==2026.5.1 rsa==4.9.1 scikit-learn==1.7.2 @@ -187,11 +194,13 @@ sqlalchemy_pytds==1.0.2 sqlparse==0.5.5 tenacity==9.1.4 testcontainers==4.14.2 +textual==8.2.7 threadpoolctl==3.6.0 -tqdm==4.67.3 +tqdm==4.68.2 typing-inspection==0.4.2 typing_extensions==4.15.0 tzdata==2026.2 +uc-micro-py==2.0.0 uritemplate==4.2.0 urllib3==2.7.0 virtualenv-clone==0.5.7 diff --git a/sdks/python/container/py312/base_image_requirements.txt b/sdks/python/container/py312/base_image_requirements.txt index 151705811fdd..3f4194f20ea4 100644 --- a/sdks/python/container/py312/base_image_requirements.txt +++ b/sdks/python/container/py312/base_image_requirements.txt @@ -23,14 +23,14 @@ aiofiles==25.1.0 aiohappyeyeballs==2.6.2 -aiohttp==3.14.0 +aiohttp==3.14.1 aiosignal==1.4.0 annotated-types==0.7.0 anyio==4.13.0 asn1crypto==1.5.1 attrs==26.1.0 beartype==0.22.9 -beautifulsoup4==4.14.3 +beautifulsoup4==4.15.0 betterproto==2.0.0b6 bs4==0.0.2 build==1.5.0 @@ -55,35 +55,35 @@ fasteners==0.20 freezegun==1.5.5 frozenlist==1.8.0 future==1.0.0 -google-api-core==2.30.3 +google-api-core==2.31.0 google-api-python-client==2.197.0 google-apitools==0.5.31 google-auth==2.53.0 google-auth-httplib2==0.2.1 -google-cloud-aiplatform==1.154.0 +google-cloud-aiplatform==1.157.0 google-cloud-bigquery==3.41.0 -google-cloud-bigquery-storage==2.38.0 +google-cloud-bigquery-storage==2.39.0 google-cloud-bigtable==2.38.0 -google-cloud-build==3.36.0 +google-cloud-build==3.37.0 google-cloud-core==2.6.0 google-cloud-dataflow-client==0.13.0 -google-cloud-datastore==2.24.0 -google-cloud-dlp==3.36.0 +google-cloud-datastore==2.25.0 +google-cloud-dlp==3.37.0 google-cloud-kms==3.13.0 google-cloud-language==2.20.0 -google-cloud-monitoring==2.30.0 +google-cloud-monitoring==2.31.0 google-cloud-profiler==4.1.0 -google-cloud-pubsub==2.38.0 +google-cloud-pubsub==2.39.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.17.0 -google-cloud-secret-manager==2.28.0 -google-cloud-spanner==3.66.0 -google-cloud-storage==3.10.1 +google-cloud-secret-manager==2.29.0 +google-cloud-spanner==3.67.0 +google-cloud-storage==3.11.0 google-cloud-videointelligence==2.19.0 google-cloud-vision==3.14.0 google-crc32c==1.8.0 -google-genai==2.7.0 -google-resumable-media==2.9.0 +google-genai==2.8.0 +google-resumable-media==2.10.0 googleapis-common-protos==1.75.0 greenlet==3.5.1 grpc-google-iam-v1==0.14.4 @@ -114,7 +114,12 @@ jsonschema==4.26.0 jsonschema-specifications==2025.9.1 keyring==25.7.0 keyrings.google-artifactregistry-auth==1.1.2 +linkify-it-py==2.1.0 +markdown-it-py==4.2.0 MarkupSafe==3.0.3 +mdit-py-plugins==0.6.1 +mdurl==0.1.2 +memray==1.19.3 mmh3==5.2.1 mock==5.2.0 more-itertools==11.1.0 @@ -135,6 +140,7 @@ parameterized==0.9.0 pg8000==1.31.5 pillow==12.2.0 pip==26.1.2 +platformdirs==4.10.0 pluggy==1.6.0 portalocker==3.2.0 propcache==0.5.2 @@ -169,6 +175,7 @@ referencing==0.37.0 regex==2026.5.9 requests==2.34.2 requests-mock==1.12.1 +rich==15.0.0 rpds-py==2026.5.1 rsa==4.9.1 scikit-learn==1.7.2 @@ -185,11 +192,13 @@ sqlalchemy_pytds==1.0.2 sqlparse==0.5.5 tenacity==9.1.4 testcontainers==4.14.2 +textual==8.2.7 threadpoolctl==3.6.0 -tqdm==4.67.3 +tqdm==4.68.2 typing-inspection==0.4.2 typing_extensions==4.15.0 tzdata==2026.2 +uc-micro-py==2.0.0 uritemplate==4.2.0 urllib3==2.7.0 virtualenv-clone==0.5.7 diff --git a/sdks/python/container/py313/base_image_requirements.txt b/sdks/python/container/py313/base_image_requirements.txt index 68ad2a9530c2..31ef940c1e20 100644 --- a/sdks/python/container/py313/base_image_requirements.txt +++ b/sdks/python/container/py313/base_image_requirements.txt @@ -23,14 +23,14 @@ aiofiles==25.1.0 aiohappyeyeballs==2.6.2 -aiohttp==3.14.0 +aiohttp==3.14.1 aiosignal==1.4.0 annotated-types==0.7.0 anyio==4.13.0 asn1crypto==1.5.1 attrs==26.1.0 beartype==0.22.9 -beautifulsoup4==4.14.3 +beautifulsoup4==4.15.0 betterproto==2.0.0b6 bs4==0.0.2 build==1.5.0 @@ -55,34 +55,34 @@ fasteners==0.20 freezegun==1.5.5 frozenlist==1.8.0 future==1.0.0 -google-api-core==2.30.3 +google-api-core==2.31.0 google-api-python-client==2.197.0 google-apitools==0.5.35 google-auth==2.53.0 google-auth-httplib2==0.2.1 -google-cloud-aiplatform==1.154.0 +google-cloud-aiplatform==1.157.0 google-cloud-bigquery==3.41.0 -google-cloud-bigquery-storage==2.38.0 +google-cloud-bigquery-storage==2.39.0 google-cloud-bigtable==2.38.0 -google-cloud-build==3.36.0 +google-cloud-build==3.37.0 google-cloud-core==2.6.0 google-cloud-dataflow-client==0.13.0 -google-cloud-datastore==2.24.0 -google-cloud-dlp==3.36.0 +google-cloud-datastore==2.25.0 +google-cloud-dlp==3.37.0 google-cloud-kms==3.13.0 google-cloud-language==2.20.0 -google-cloud-monitoring==2.30.0 -google-cloud-pubsub==2.38.0 +google-cloud-monitoring==2.31.0 +google-cloud-pubsub==2.39.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.17.0 -google-cloud-secret-manager==2.28.0 -google-cloud-spanner==3.66.0 -google-cloud-storage==3.10.1 +google-cloud-secret-manager==2.29.0 +google-cloud-spanner==3.67.0 +google-cloud-storage==3.11.0 google-cloud-videointelligence==2.19.0 google-cloud-vision==3.14.0 google-crc32c==1.8.0 -google-genai==2.7.0 -google-resumable-media==2.9.0 +google-genai==2.8.0 +google-resumable-media==2.10.0 googleapis-common-protos==1.75.0 greenlet==3.5.1 grpc-google-iam-v1==0.14.4 @@ -113,7 +113,12 @@ jsonschema==4.26.0 jsonschema-specifications==2025.9.1 keyring==25.7.0 keyrings.google-artifactregistry-auth==1.1.2 +linkify-it-py==2.1.0 +markdown-it-py==4.2.0 MarkupSafe==3.0.3 +mdit-py-plugins==0.6.1 +mdurl==0.1.2 +memray==1.19.3 mmh3==5.2.1 mock==5.2.0 more-itertools==11.1.0 @@ -134,6 +139,7 @@ parameterized==0.9.0 pg8000==1.31.5 pillow==12.2.0 pip==26.1.2 +platformdirs==4.10.0 pluggy==1.6.0 portalocker==3.2.0 propcache==0.5.2 @@ -168,6 +174,7 @@ referencing==0.37.0 regex==2026.5.9 requests==2.34.2 requests-mock==1.12.1 +rich==15.0.0 rpds-py==2026.5.1 rsa==4.9.1 scikit-learn==1.7.2 @@ -184,11 +191,13 @@ sqlalchemy_pytds==1.0.2 sqlparse==0.5.5 tenacity==9.1.4 testcontainers==4.14.2 +textual==8.2.7 threadpoolctl==3.6.0 -tqdm==4.67.3 +tqdm==4.68.2 typing-inspection==0.4.2 typing_extensions==4.15.0 tzdata==2026.2 +uc-micro-py==2.0.0 uritemplate==4.2.0 urllib3==2.7.0 virtualenv-clone==0.5.7 diff --git a/sdks/python/container/py314/base_image_requirements.txt b/sdks/python/container/py314/base_image_requirements.txt index 7b9225c5e68d..2c7fd33996c2 100644 --- a/sdks/python/container/py314/base_image_requirements.txt +++ b/sdks/python/container/py314/base_image_requirements.txt @@ -23,14 +23,14 @@ aiofiles==25.1.0 aiohappyeyeballs==2.6.2 -aiohttp==3.14.0 +aiohttp==3.14.1 aiosignal==1.4.0 annotated-types==0.7.0 anyio==4.13.0 asn1crypto==1.5.1 attrs==26.1.0 beartype==0.22.9 -beautifulsoup4==4.14.3 +beautifulsoup4==4.15.0 betterproto==2.0.0b6 bs4==0.0.2 build==1.5.0 @@ -55,33 +55,33 @@ fasteners==0.20 freezegun==1.5.5 frozenlist==1.8.0 future==1.0.0 -google-api-core==2.30.3 +google-api-core==2.31.0 google-apitools==0.5.35 google-auth==2.53.0 google-auth-httplib2==0.2.1 -google-cloud-aiplatform==1.154.0 +google-cloud-aiplatform==1.157.0 google-cloud-bigquery==3.41.0 -google-cloud-bigquery-storage==2.38.0 +google-cloud-bigquery-storage==2.39.0 google-cloud-bigtable==2.38.0 -google-cloud-build==3.36.0 +google-cloud-build==3.37.0 google-cloud-core==2.6.0 google-cloud-dataflow-client==0.13.0 -google-cloud-datastore==2.24.0 -google-cloud-dlp==3.36.0 +google-cloud-datastore==2.25.0 +google-cloud-dlp==3.37.0 google-cloud-kms==3.13.0 google-cloud-language==2.20.0 -google-cloud-monitoring==2.30.0 -google-cloud-pubsub==2.38.0 +google-cloud-monitoring==2.31.0 +google-cloud-pubsub==2.39.0 google-cloud-recommendations-ai==0.10.18 google-cloud-resource-manager==1.17.0 -google-cloud-secret-manager==2.28.0 -google-cloud-spanner==3.66.0 -google-cloud-storage==3.10.1 +google-cloud-secret-manager==2.29.0 +google-cloud-spanner==3.67.0 +google-cloud-storage==3.11.0 google-cloud-videointelligence==2.19.0 google-cloud-vision==3.14.0 google-crc32c==1.8.0 -google-genai==2.7.0 -google-resumable-media==2.9.0 +google-genai==2.8.0 +google-resumable-media==2.10.0 googleapis-common-protos==1.75.0 greenlet==3.5.1 grpc-google-iam-v1==0.14.4 @@ -112,7 +112,12 @@ jsonschema==4.26.0 jsonschema-specifications==2025.9.1 keyring==25.7.0 keyrings.google-artifactregistry-auth==1.1.2 +linkify-it-py==2.1.0 +markdown-it-py==4.2.0 MarkupSafe==3.0.3 +mdit-py-plugins==0.6.1 +mdurl==0.1.2 +memray==1.19.3 mmh3==5.2.1 mock==5.2.0 more-itertools==11.1.0 @@ -133,6 +138,7 @@ parameterized==0.9.0 pg8000==1.31.5 pillow==12.2.0 pip==26.1.2 +platformdirs==4.10.0 pluggy==1.6.0 portalocker==3.2.0 propcache==0.5.2 @@ -167,6 +173,7 @@ referencing==0.37.0 regex==2026.5.9 requests==2.34.2 requests-mock==1.12.1 +rich==15.0.0 rpds-py==2026.5.1 rsa==4.9.1 scikit-learn==1.7.2 @@ -183,11 +190,13 @@ sqlalchemy_pytds==1.0.2 sqlparse==0.5.5 tenacity==9.1.4 testcontainers==4.14.2 +textual==8.2.7 threadpoolctl==3.6.0 -tqdm==4.67.3 +tqdm==4.68.2 typing-inspection==0.4.2 typing_extensions==4.15.0 tzdata==2026.2 +uc-micro-py==2.0.0 urllib3==2.7.0 virtualenv-clone==0.5.7 websockets==16.0 From 24c4bb3c799c2b5fa14e02444774651f295abff0 Mon Sep 17 00:00:00 2001 From: Valentyn Tymofieiev Date: Tue, 9 Jun 2026 19:08:50 -0700 Subject: [PATCH 17/17] Update changes.md --- CHANGES.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index ac8215f35494..1d3d7bdc414f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -59,22 +59,21 @@ ## Highlights -* New highly anticipated feature X added to Python SDK ([#X](https://github.com/apache/beam/issues/X)). -* New highly anticipated feature Y added to Java SDK ([#Y](https://github.com/apache/beam/issues/Y)). +* Python SDK now supports memory profiling with Memray ([#38853](https://github.com/apache/beam/issues/38853)). * (Python) Added [Qdrant](https://qdrant.tech/) VectorDatabaseWriteConfig implementation ([#38141](https://github.com/apache/beam/issues/38141)). + ## I/Os * Support for X source added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). ## New Features / Improvements -* X feature added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). * (Java) Enabled state tag encoding v2 by default for new Dataflow Streaming Engine jobs. It can be disabled by passing `--experiments=disable_streaming_engine_state_tag_encoding_v2` or `--updateCompatibilityVersion=2.74.0` pipeline option. Note that the tag encoding version cannot change during a job update. Jobs using tag encoding v2 (enabled by default for new jobs on 2.75.0+) cannot be downgraded to Beam versions prior to 2.73.0, as only versions 2.73.0 and later support tag encoding v2. ([#38705](https://github.com/apache/beam/issues/38705)). +* (Python) Added instrumentation to support off-the-shelf profiling agents when launching Python SDK Harness ([#38853](https://github.com/apache/beam/issues/38853)). ## Breaking Changes -* X behavior was changed ([#X](https://github.com/apache/beam/issues/X)). * (Python) Typehints of dataclass fields are honored during type inferences. To restore the behavior of fallback-to-any, use pipeline option `--exclude_infer_dataclass_field_type` ([#38797](https://github.com/apache/beam/issues/38797)). However fixing forward is recommended. @@ -85,7 +84,6 @@ ## Bugfixes -* Fixed X (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). * Fixed IcebergIO writing manifest column bounds padded with trailing `0x00` bytes, which broke equality predicate pushdown in some query engines (Java) ([#38580](https://github.com/apache/beam/issues/38580)). ## Security Fixes