From 219eb6a8e918f0e71f89b563a2df50843078bffc Mon Sep 17 00:00:00 2001 From: gaohuatao Date: Mon, 19 Apr 2021 20:09:08 +0800 Subject: [PATCH] Adapt kata shimv2 Signed-off-by: gaohuatao --- ...t-reap-agent-process-blocked-problem.patch | 78 ------------------- agent/series.conf | 1 - kata-containers.spec | 8 +- ...-set-timeout-to-wait-in-stop-process.patch | 54 +++++++++++++ runtime/series.conf | 1 + 5 files changed, 62 insertions(+), 80 deletions(-) delete mode 100644 agent/patches/0011-agent-fix-agent-reap-agent-process-blocked-problem.patch create mode 100644 runtime/patches/0071-runtime-set-timeout-to-wait-in-stop-process.patch diff --git a/agent/patches/0011-agent-fix-agent-reap-agent-process-blocked-problem.patch b/agent/patches/0011-agent-fix-agent-reap-agent-process-blocked-problem.patch deleted file mode 100644 index 272e0b5..0000000 --- a/agent/patches/0011-agent-fix-agent-reap-agent-process-blocked-problem.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 3ac1232a2e3fbfc0465473e5d81cde41847c4252 Mon Sep 17 00:00:00 2001 -From: jiangpengfei -Date: Wed, 19 Aug 2020 11:47:37 +0800 -Subject: [PATCH 11/16] agent: fix agent reap agent process blocked problem - -reason: add container waitProcess() timeout when -container process status is D/T. - -Signed-off-by: jiangpengfei ---- - grpc.go | 43 +++++++++++++++++++++++++++++++++---------- - 1 file changed, 33 insertions(+), 10 deletions(-) - -diff --git a/grpc.go b/grpc.go -index de2cae7..3dd088e 100644 ---- a/grpc.go -+++ b/grpc.go -@@ -49,6 +49,11 @@ const ( - libcontainerPath = "/run/libcontainer" - ) - -+// keep waitProcessTimeout value same as value in kata-runtime wait WaitProcessRequest response -+const ( -+ waitProcessTimeOut = 10 -+) -+ - var ( - sysfsCPUOnlinePath = "/sys/devices/system/cpu" - sysfsMemOnlinePath = "/sys/devices/system/memory" -@@ -996,17 +1001,35 @@ func (a *agentGRPC) WaitProcess(ctx context.Context, req *pb.WaitProcessRequest) - ctr.deleteProcess(proc.id) - }) - -- // Using helper function wait() to deal with the subreaper. -- libContProcess := (*reaperLibcontainerProcess)(&(proc.process)) -- exitCode, err := a.sandbox.subreaper.wait(proc.exitCodeCh, libContProcess) -- if err != nil { -- return &pb.WaitProcessResponse{}, err -+ done := make(chan error) -+ var exitCode int = 0 -+ go func() { -+ // Using helper function wait() to deal with the subreaper. -+ libContProcess := (*reaperLibcontainerProcess)(&(proc.process)) -+ var err error -+ exitCode, err = a.sandbox.subreaper.wait(proc.exitCodeCh, libContProcess) -+ if err != nil { -+ done <- err -+ close(done) -+ return -+ } -+ // refill the exitCodeCh with the exitcode which can be read out -+ // by another WaitProcess(). Since this channel isn't be closed, -+ // here the refill will always success and it will be free by GC -+ // once the process exits. -+ proc.exitCodeCh <- exitCode -+ -+ close(done) -+ }() -+ -+ select { -+ case err := <-done: -+ if err != nil { -+ return &pb.WaitProcessResponse{}, err -+ } -+ case <-time.After(time.Duration(waitProcessTimeOut) * time.Second): -+ return &pb.WaitProcessResponse{}, grpcStatus.Errorf(codes.DeadlineExceeded, "agent wait reap container process timeout reached after %ds", waitProcessTimeOut) - } -- //refill the exitCodeCh with the exitcode which can be read out -- //by another WaitProcess(). Since this channel isn't be closed, -- //here the refill will always success and it will be free by GC -- //once the process exits. -- proc.exitCodeCh <- exitCode - - return &pb.WaitProcessResponse{ - Status: int32(exitCode), --- -2.14.3 (Apple Git-98) - diff --git a/agent/series.conf b/agent/series.conf index 6f69da3..0c2b848 100644 --- a/agent/series.conf +++ b/agent/series.conf @@ -8,7 +8,6 @@ 0008-agent-support-get-root-bus-path-dynamically.patch 0009-storage-add-pkg-storage-for-mount.patch 0010-storage-mount-nfs-and-gpath-in-agent.patch -0011-agent-fix-agent-reap-agent-process-blocked-problem.patch 0012-network-support-set-dns-without-nameserver.patch 0013-agent-support-setting-multi-queues-of-interface.patch 0014-agent-fix-init-hugepages-failed-problem.patch diff --git a/kata-containers.spec b/kata-containers.spec index 7fd3010..f9f3d5d 100644 --- a/kata-containers.spec +++ b/kata-containers.spec @@ -2,7 +2,7 @@ %global debug_package %{nil} %define VERSION v1.11.1 -%define RELEASE 13 +%define RELEASE 14 Name: kata-containers Version: %{VERSION} @@ -91,6 +91,12 @@ install -p -m 640 -D ./runtime/cli/config/configuration-qemu.toml %{buildroot}/u %changelog +* Sun Apr 25 2021 gaohuatao - 1.11.1-14 +- Type:bugfix +- ID:NA +- SUG:NA +- DESC: do wait process timeout in runtime + * Tue Mar 23 2021 jikui - 1.11.1-13 - Type:bugfix - ID:NA diff --git a/runtime/patches/0071-runtime-set-timeout-to-wait-in-stop-process.patch b/runtime/patches/0071-runtime-set-timeout-to-wait-in-stop-process.patch new file mode 100644 index 0000000..bf4efd2 --- /dev/null +++ b/runtime/patches/0071-runtime-set-timeout-to-wait-in-stop-process.patch @@ -0,0 +1,54 @@ +From 578e4ee59350844a5a7122f31c3a4d10762e40a5 Mon Sep 17 00:00:00 2001 +From: gaohuatao +Date: Mon, 19 Apr 2021 20:01:44 +0800 +Subject: [PATCH] runtime: set timeout to wait in stop process + +Signed-off-by: gaohuatao +--- + virtcontainers/container.go | 23 ++++++++++++++++++----- + 1 file changed, 18 insertions(+), 5 deletions(-) + +diff --git a/virtcontainers/container.go b/virtcontainers/container.go +index 34f6ec6..50788c8 100644 +--- a/virtcontainers/container.go ++++ b/virtcontainers/container.go +@@ -61,6 +61,8 @@ var safeCopyFiles = map[string]struct{}{ + // #define FLOPPY_MAJOR 2 + const floppyMajor = int64(2) + ++const waitProcessTimeOutDefault = 10 ++ + // Process gathers data related to a container process. + type Process struct { + // Token is the process execution context ID. It must be +@@ -1174,11 +1176,22 @@ func (c *Container) stop(force bool) error { + c.Logger().Errorf("send signal to container failed: %v", err) + } + +- // Since the agent has supported the MultiWaitProcess, it's better to +- // wait the process here to make sure the process has exited before to +- // issue stopContainer, otherwise the RemoveContainerRequest in it will +- // get failed if the process hasn't exited. +- c.sandbox.agent.waitProcess(c, c.id) ++ done := make(chan error) ++ go func() { ++ // Since the agent has supported the MultiWaitProcess, it's better to ++ // wait the process here to make sure the process has exited before to ++ // issue stopContainer, otherwise the RemoveContainerRequest in it will ++ // get failed if the process hasn't exited. ++ c.sandbox.agent.waitProcess(c, c.id) ++ close(done) ++ }() ++ ++ select { ++ case <-done: ++ c.Logger().Debug("Wait container process success") ++ case <-time.After(time.Duration(waitProcessTimeOutDefault) * time.Second): ++ return fmt.Errorf("Wait container %v process timeout reached after %ds", c.id, waitProcessTimeOutDefault) ++ } + + defer func() { + // Save device and drive data. +-- +2.20.1 + diff --git a/runtime/series.conf b/runtime/series.conf index 3d6f95c..739fd5a 100644 --- a/runtime/series.conf +++ b/runtime/series.conf @@ -68,3 +68,4 @@ 0068-kata-runtime-modify-make-flags.patch 0069-kata-runtime-add-linkmode-to-resolve-build-error.patch 0070-kata-runtime-remove-ctty-to-resolve-build-failed.patch +0071-runtime-set-timeout-to-wait-in-stop-process.patch -- Gitee