From 0d79479484ecdda5d8ce90b29a9b9d6879aa6b80 Mon Sep 17 00:00:00 2001 From: Rowan Bohde Date: Thu, 23 May 2024 11:19:08 -0500 Subject: [PATCH] feat: add Runner.ShutdownTimeout config option This controls the amount of time the runner will wait for running jobs to finish before cancelling them. Defaults to 0s in order to maintain backwards compatibility with previous behavior. --- internal/app/cmd/daemon.go | 11 +++++++++-- internal/app/poll/poller.go | 17 +++++++++++++++-- internal/pkg/config/config.example.yaml | 3 +++ internal/pkg/config/config.go | 19 ++++++++++--------- 4 files changed, 37 insertions(+), 13 deletions(-) diff --git a/internal/app/cmd/daemon.go b/internal/app/cmd/daemon.go index fc47fd2..8f2fb0b 100644 --- a/internal/app/cmd/daemon.go +++ b/internal/app/cmd/daemon.go @@ -125,9 +125,16 @@ func runDaemon(ctx context.Context, configFile *string) func(cmd *cobra.Command, go poller.Poll() <-ctx.Done() - log.Infof("runner: %s gracefully shutting down", resp.Msg.Runner.Name) + log.Infof("runner: %s shutdown initiated, waiting %s for running jobs to complete before shutting down", resp.Msg.Runner.Name, cfg.Runner.ShutdownTimeout) - return poller.Shutdown(context.Background()) + ctx, cancel := context.WithTimeout(context.Background(), cfg.Runner.ShutdownTimeout) + defer cancel() + + err = poller.Shutdown(ctx) + if err != nil { + log.Warnf("runner: %s cancelled in progress jobs during shutdown", resp.Msg.Runner.Name) + } + return nil } } diff --git a/internal/app/poll/poller.go b/internal/app/poll/poller.go index 31d5a5d..088f3f6 100644 --- a/internal/app/poll/poller.go +++ b/internal/app/poll/poller.go @@ -74,14 +74,27 @@ func (p *Poller) Shutdown(ctx context.Context) error { p.shutdownPolling() select { - // gracefully shutdown + // graceful shutdown completed succesfully case <-p.done: return nil - // Our timeout for shutting down ran out + // our timeout for shutting down ran out case <-ctx.Done(): + // when both the timeout fires and the graceful shutdown + // completed succsfully, this branch of the select may + // fire. Do a non-blocking check here against the graceful + // shutdown status to avoid sending an error if we don't need to. + _, ok := <-p.done + if !ok { + return nil + } + // force a shutdown of all running jobs p.shutdownJobs() + + // wait for running jobs to report their status to Gitea + _, _ = <-p.done + return ctx.Err() } } diff --git a/internal/pkg/config/config.example.yaml b/internal/pkg/config/config.example.yaml index 648db96..22e1055 100644 --- a/internal/pkg/config/config.example.yaml +++ b/internal/pkg/config/config.example.yaml @@ -23,6 +23,9 @@ runner: # Please note that the Gitea instance also has a timeout (3h by default) for the job. # So the job could be stopped by the Gitea instance if it's timeout is shorter than this. timeout: 3h + # The timeout for the runner to wait for running jobs to finish when shutting down. + # Any running jobs that haven't finished after this timeout will be cancelled. + shutdown_timeout: 0s # Whether skip verifying the TLS certificate of the Gitea instance. insecure: false # The timeout for fetching the job from the Gitea instance. diff --git a/internal/pkg/config/config.go b/internal/pkg/config/config.go index f4b1078..afc34b9 100644 --- a/internal/pkg/config/config.go +++ b/internal/pkg/config/config.go @@ -21,15 +21,16 @@ type Log struct { // Runner represents the configuration for the runner. type Runner struct { - File string `yaml:"file"` // File specifies the file path for the runner. - Capacity int `yaml:"capacity"` // Capacity specifies the capacity of the runner. - Envs map[string]string `yaml:"envs"` // Envs stores environment variables for the runner. - EnvFile string `yaml:"env_file"` // EnvFile specifies the path to the file containing environment variables for the runner. - Timeout time.Duration `yaml:"timeout"` // Timeout specifies the duration for runner timeout. - Insecure bool `yaml:"insecure"` // Insecure indicates whether the runner operates in an insecure mode. - FetchTimeout time.Duration `yaml:"fetch_timeout"` // FetchTimeout specifies the timeout duration for fetching resources. - FetchInterval time.Duration `yaml:"fetch_interval"` // FetchInterval specifies the interval duration for fetching resources. - Labels []string `yaml:"labels"` // Labels specify the labels of the runner. Labels are declared on each startup + File string `yaml:"file"` // File specifies the file path for the runner. + Capacity int `yaml:"capacity"` // Capacity specifies the capacity of the runner. + Envs map[string]string `yaml:"envs"` // Envs stores environment variables for the runner. + EnvFile string `yaml:"env_file"` // EnvFile specifies the path to the file containing environment variables for the runner. + Timeout time.Duration `yaml:"timeout"` // Timeout specifies the duration for runner timeout. + ShutdownTimeout time.Duration `yaml:"shutdown_timeout"` // ShutdownTimeout specifies the duration to wait for running jobs to complete during a shutdown of the runner. + Insecure bool `yaml:"insecure"` // Insecure indicates whether the runner operates in an insecure mode. + FetchTimeout time.Duration `yaml:"fetch_timeout"` // FetchTimeout specifies the timeout duration for fetching resources. + FetchInterval time.Duration `yaml:"fetch_interval"` // FetchInterval specifies the interval duration for fetching resources. + Labels []string `yaml:"labels"` // Labels specify the labels of the runner. Labels are declared on each startup } // Cache represents the configuration for caching.