diff --git a/internal/builder/vm/lcow/kernel_args.go b/internal/builder/vm/lcow/kernel_args.go index 21222761e1..de3e974b5b 100644 --- a/internal/builder/vm/lcow/kernel_args.go +++ b/internal/builder/vm/lcow/kernel_args.go @@ -26,6 +26,7 @@ func buildKernelArgs( kernelDirect bool, hasConsole bool, rootFsFile string, + liveMigrationAllowed bool, ) (string, error) { log.G(ctx).WithField("rootFsFile", rootFsFile).Debug("buildKernelArgs: starting kernel arguments construction") @@ -80,7 +81,7 @@ func buildKernelArgs( args = append(args, "brd.rd_nr=0", "pmtmr=0") // 8. Init arguments (passed after "--" separator) - initArgs := buildInitArgs(ctx, opts, writableOverlayDirs, disableTimeSyncService, processDumpLocation, rootFsFile, hasConsole) + initArgs := buildInitArgs(ctx, opts, writableOverlayDirs, disableTimeSyncService, processDumpLocation, rootFsFile, hasConsole, liveMigrationAllowed) args = append(args, "--", initArgs) result := strings.Join(args, " ") @@ -149,6 +150,7 @@ func buildInitArgs( processDumpLocation string, rootFsFile string, hasConsole bool, + liveMigrationAllowed bool, ) string { log.G(ctx).WithFields(logrus.Fields{ "rootFsFile": rootFsFile, @@ -158,7 +160,7 @@ func buildInitArgs( entropyArgs := fmt.Sprintf("-e %d", vmutils.LinuxEntropyVsockPort) // Build GCS execution command - gcsCmd := buildGCSCommand(opts, disableTimeSyncService, processDumpLocation) + gcsCmd := buildGCSCommand(opts, disableTimeSyncService, processDumpLocation, liveMigrationAllowed) // Construct init arguments var initArgsList []string @@ -192,14 +194,8 @@ func buildGCSCommand( opts *runhcsoptions.Options, disableTimeSyncService bool, processDumpLocation string, + liveMigrationAllowed bool, ) string { - // Start with vsockexec wrapper - var cmdParts []string - cmdParts = append(cmdParts, "/bin/vsockexec") - - // Add logging vsock port - cmdParts = append(cmdParts, fmt.Sprintf("-e %d", vmutils.LinuxLogVsockPort)) - // Determine log level logLevel := "info" if opts != nil && opts.LogLevel != "" { @@ -227,8 +223,15 @@ func buildGCSCommand( gcsParts = append(gcsParts, "-core-dump-location", processDumpLocation) } - // Combine vsockexec and GCS command - cmdParts = append(cmdParts, strings.Join(gcsParts, " ")) + gcsCmd := strings.Join(gcsParts, " ") + + // Live-migratable pods skip vsockexec: the host does not run a log + // listener, so connect on LinuxLogVsockPort would stall init. + if liveMigrationAllowed { + return gcsCmd + } - return strings.Join(cmdParts, " ") + // vsockexec `-e ` wires gcs's stderr to LinuxLogVsockPort, which + // the host listener reads and republishes. + return fmt.Sprintf("/bin/vsockexec -e %d %s", vmutils.LinuxLogVsockPort, gcsCmd) } diff --git a/internal/builder/vm/lcow/sandbox_options.go b/internal/builder/vm/lcow/sandbox_options.go index b6cc22f94b..4f51823f6b 100644 --- a/internal/builder/vm/lcow/sandbox_options.go +++ b/internal/builder/vm/lcow/sandbox_options.go @@ -25,6 +25,11 @@ type SandboxOptions struct { // ConfidentialConfig carries confidential computing fields that are not // part of the HCS document but are needed for confidential VM setup. ConfidentialConfig *ConfidentialConfig + + // LiveMigrationAllowed is a sandbox-scoped gate indicating that the sandbox is + // intended to be live-migratable. When true, the platform log listener will + // not be initialized. + LiveMigrationAllowed bool } // ConfidentialConfig carries confidential computing configuration that is not diff --git a/internal/builder/vm/lcow/specs.go b/internal/builder/vm/lcow/specs.go index 5a2d44c7bf..bd9dd60eea 100644 --- a/internal/builder/vm/lcow/specs.go +++ b/internal/builder/vm/lcow/specs.go @@ -209,6 +209,7 @@ func BuildSandboxConfig( bootOptions.LinuxKernelDirect != nil, // isKernelDirectBoot comPorts != nil, // hasConsole filepath.Base(rootFsFullPath), + sandboxOptions.LiveMigrationAllowed, ) if err != nil { return nil, nil, fmt.Errorf("failed to build kernel args: %w", err) @@ -320,6 +321,7 @@ func parseSandboxOptions(ctx context.Context, platform string, annotations map[s FullyPhysicallyBacked: oci.ParseAnnotationsBool(ctx, annotations, shimannotations.FullyPhysicallyBacked, false), PolicyBasedRouting: oci.ParseAnnotationsBool(ctx, annotations, iannotations.NetworkingPolicyBasedRouting, false), NoWritableFileShares: oci.ParseAnnotationsBool(ctx, annotations, shimannotations.DisableWritableFileShares, false), + LiveMigrationAllowed: oci.ParseAnnotationsBool(ctx, annotations, shimannotations.LiveMigrationAllowed, false), } // Determine if this is a confidential VM early, as it affects boot options parsing diff --git a/internal/builder/vm/lcow/specs_test.go b/internal/builder/vm/lcow/specs_test.go index 2fb3036231..d9a288ce0c 100644 --- a/internal/builder/vm/lcow/specs_test.go +++ b/internal/builder/vm/lcow/specs_test.go @@ -2057,3 +2057,158 @@ func TestBuildSandboxConfig_CPUClamping(t *testing.T) { t.Errorf("expected processor count to be clamped to host count %d, got %d", hostCount, actualCount) } } + +// TestBuildSandboxConfig_LiveMigration validates the wiring for the +// io.microsoft.migration.allowed sandbox annotation. The annotation is parsed +// into SandboxOptions.LiveMigrationAllowed and threaded down into the kernel +// command line: live-migratable sandboxes must skip the /bin/vsockexec wrapper +// (which would otherwise stall init waiting for a host log listener that the +// LM-enabled host does not run), while non-LM sandboxes must continue to use +// vsockexec so that GCS stderr is forwarded over LinuxLogVsockPort. +func TestBuildSandboxConfig_LiveMigration(t *testing.T) { + ctx := context.Background() + + validBootFilesPath := newBootFilesPath(t) + defaultOpts := defaultSandboxOpts(validBootFilesPath) + + // Pre-format the vsockexec prefix once so the assertions are obviously + // driven by the same constant the production code uses. + vsockexecPrefix := fmt.Sprintf("/bin/vsockexec -e %d", vmutils.LinuxLogVsockPort) + + tests := []specTestCase{ + { + name: "live migration disabled by default", + validate: func(t *testing.T, doc *hcsschema.ComputeSystem, sandboxOpts *SandboxOptions) { + t.Helper() + if sandboxOpts.LiveMigrationAllowed { + t.Errorf("expected LiveMigrationAllowed=false by default, got true") + } + kernelArgs := getKernelArgs(doc) + if !strings.Contains(kernelArgs, vsockexecPrefix) { + t.Errorf("expected vsockexec wrapper %q in kernel args (LM disabled), got %q", vsockexecPrefix, kernelArgs) + } + if !strings.Contains(kernelArgs, "/bin/gcs") { + t.Errorf("expected /bin/gcs in kernel args, got %q", kernelArgs) + } + }, + }, + { + name: "live migration explicitly disabled", + spec: &vm.Spec{ + Annotations: map[string]string{ + shimannotations.LiveMigrationAllowed: "false", + }, + }, + validate: func(t *testing.T, doc *hcsschema.ComputeSystem, sandboxOpts *SandboxOptions) { + t.Helper() + if sandboxOpts.LiveMigrationAllowed { + t.Errorf("expected LiveMigrationAllowed=false when annotation=\"false\", got true") + } + kernelArgs := getKernelArgs(doc) + if !strings.Contains(kernelArgs, vsockexecPrefix) { + t.Errorf("expected vsockexec wrapper %q in kernel args, got %q", vsockexecPrefix, kernelArgs) + } + }, + }, + { + name: "live migration enabled drops vsockexec wrapper", + spec: &vm.Spec{ + Annotations: map[string]string{ + shimannotations.LiveMigrationAllowed: "true", + }, + }, + validate: func(t *testing.T, doc *hcsschema.ComputeSystem, sandboxOpts *SandboxOptions) { + t.Helper() + if !sandboxOpts.LiveMigrationAllowed { + t.Errorf("expected LiveMigrationAllowed=true when annotation=\"true\", got false") + } + kernelArgs := getKernelArgs(doc) + // The vsockexec wrapper must not appear at all when LM is on: + // neither the prefix nor the binary path on its own. + if strings.Contains(kernelArgs, "vsockexec") { + t.Errorf("expected no vsockexec in kernel args when LM enabled, got %q", kernelArgs) + } + if strings.Contains(kernelArgs, fmt.Sprintf("-e %d", vmutils.LinuxLogVsockPort)) { + t.Errorf("expected no log vsock port (%d) wiring when LM enabled, got %q", vmutils.LinuxLogVsockPort, kernelArgs) + } + // /bin/gcs must still be invoked - just without the wrapper. + if !strings.Contains(kernelArgs, "/bin/gcs") { + t.Errorf("expected /bin/gcs in kernel args even when LM enabled, got %q", kernelArgs) + } + }, + }, + { + name: "live migration combined with debug log level", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + LogLevel: "debug", + }, + spec: &vm.Spec{ + Annotations: map[string]string{ + shimannotations.LiveMigrationAllowed: "true", + }, + }, + validate: func(t *testing.T, doc *hcsschema.ComputeSystem, sandboxOpts *SandboxOptions) { + t.Helper() + if !sandboxOpts.LiveMigrationAllowed { + t.Errorf("expected LiveMigrationAllowed=true, got false") + } + kernelArgs := getKernelArgs(doc) + // Other GCS flags must still be threaded through the command + // even when the vsockexec wrapper is removed. + if !strings.Contains(kernelArgs, "-loglevel debug") { + t.Errorf("expected -loglevel debug in kernel args when LM enabled, got %q", kernelArgs) + } + if strings.Contains(kernelArgs, "vsockexec") { + t.Errorf("expected no vsockexec when LM enabled, got %q", kernelArgs) + } + }, + }, + { + name: "live migration with disable time sync still drops vsockexec", + spec: &vm.Spec{ + Annotations: map[string]string{ + shimannotations.LiveMigrationAllowed: "true", + shimannotations.DisableLCOWTimeSyncService: "true", + }, + }, + validate: func(t *testing.T, doc *hcsschema.ComputeSystem, sandboxOpts *SandboxOptions) { + t.Helper() + if !sandboxOpts.LiveMigrationAllowed { + t.Errorf("expected LiveMigrationAllowed=true, got false") + } + kernelArgs := getKernelArgs(doc) + if !strings.Contains(kernelArgs, "-disable-time-sync") { + t.Errorf("expected -disable-time-sync flag in kernel args, got %q", kernelArgs) + } + if strings.Contains(kernelArgs, "vsockexec") { + t.Errorf("expected no vsockexec when LM enabled, got %q", kernelArgs) + } + }, + }, + { + name: "live migration invalid annotation value falls back to default (false)", + spec: &vm.Spec{ + Annotations: map[string]string{ + // ParseAnnotationsBool returns the default value (false) on + // unparseable input, so the sandbox should behave like the + // default-disabled case rather than failing the build. + shimannotations.LiveMigrationAllowed: "not-a-bool", + }, + }, + validate: func(t *testing.T, doc *hcsschema.ComputeSystem, sandboxOpts *SandboxOptions) { + t.Helper() + if sandboxOpts.LiveMigrationAllowed { + t.Errorf("expected LiveMigrationAllowed=false on invalid annotation value, got true") + } + kernelArgs := getKernelArgs(doc) + if !strings.Contains(kernelArgs, vsockexecPrefix) { + t.Errorf("expected vsockexec wrapper %q in kernel args, got %q", vsockexecPrefix, kernelArgs) + } + }, + }, + } + + runTestCases(t, ctx, defaultOpts, tests) +} diff --git a/internal/controller/vm/vm_lcow.go b/internal/controller/vm/vm_lcow.go index 72b8edbf87..abb26f49f1 100644 --- a/internal/controller/vm/vm_lcow.go +++ b/internal/controller/vm/vm_lcow.go @@ -12,6 +12,7 @@ import ( "github.com/Microsoft/hcsshim/internal/controller/device/plan9" "github.com/Microsoft/hcsshim/internal/controller/network" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" "github.com/Microsoft/hcsshim/internal/vm/vmmanager" "github.com/Microsoft/hcsshim/internal/vm/vmutils" @@ -150,6 +151,13 @@ func (c *Controller) setupEntropyListener(ctx context.Context, group *errgroup.G // running inside the Linux VM. The logs are parsed and // forwarded to the host's logging system for monitoring and debugging. func (c *Controller) setupLoggingListener(ctx context.Context, group *errgroup.Group) { + // For live-migratable sandboxes, we skip logging socket. + if c.sandboxOptions != nil && c.sandboxOptions.LiveMigrationAllowed { + log.G(ctx).Info("skipping GCS log listener: pod is live-migratable") + close(c.logOutputDone) + return + } + group.Go(func() error { // The GCS will connect to this port to stream log output. logConn, err := winio.ListenHvsock(&winio.HvsockAddr{