diff --git a/internal/app/cmd/daemon.go b/internal/app/cmd/daemon.go index fc47fd2..8f2fb0b 100644 --- a/internal/app/cmd/daemon.go +++ b/internal/app/cmd/daemon.go @@ -125,9 +125,16 @@ func runDaemon(ctx context.Context, configFile *string) func(cmd *cobra.Command, go poller.Poll() <-ctx.Done() - log.Infof("runner: %s gracefully shutting down", resp.Msg.Runner.Name) + log.Infof("runner: %s shutdown initiated, waiting %s for running jobs to complete before shutting down", resp.Msg.Runner.Name, cfg.Runner.ShutdownTimeout) - return poller.Shutdown(context.Background()) + ctx, cancel := context.WithTimeout(context.Background(), cfg.Runner.ShutdownTimeout) + defer cancel() + + err = poller.Shutdown(ctx) + if err != nil { + log.Warnf("runner: %s cancelled in progress jobs during shutdown", resp.Msg.Runner.Name) + } + return nil } } diff --git a/internal/app/poll/poller.go b/internal/app/poll/poller.go index 31d5a5d..088f3f6 100644 --- a/internal/app/poll/poller.go +++ b/internal/app/poll/poller.go @@ -74,14 +74,27 @@ func (p *Poller) Shutdown(ctx context.Context) error { p.shutdownPolling() select { - // gracefully shutdown + // graceful shutdown completed succesfully case <-p.done: return nil - // Our timeout for shutting down ran out + // our timeout for shutting down ran out case <-ctx.Done(): + // when both the timeout fires and the graceful shutdown + // completed succsfully, this branch of the select may + // fire. Do a non-blocking check here against the graceful + // shutdown status to avoid sending an error if we don't need to. + _, ok := <-p.done + if !ok { + return nil + } + // force a shutdown of all running jobs p.shutdownJobs() + + // wait for running jobs to report their status to Gitea + _, _ = <-p.done + return ctx.Err() } } diff --git a/internal/pkg/config/config.example.yaml b/internal/pkg/config/config.example.yaml index 648db96..22e1055 100644 --- a/internal/pkg/config/config.example.yaml +++ b/internal/pkg/config/config.example.yaml @@ -23,6 +23,9 @@ runner: # Please note that the Gitea instance also has a timeout (3h by default) for the job. # So the job could be stopped by the Gitea instance if it's timeout is shorter than this. timeout: 3h + # The timeout for the runner to wait for running jobs to finish when shutting down. + # Any running jobs that haven't finished after this timeout will be cancelled. + shutdown_timeout: 0s # Whether skip verifying the TLS certificate of the Gitea instance. insecure: false # The timeout for fetching the job from the Gitea instance. diff --git a/internal/pkg/config/config.go b/internal/pkg/config/config.go index f4b1078..afc34b9 100644 --- a/internal/pkg/config/config.go +++ b/internal/pkg/config/config.go @@ -21,15 +21,16 @@ type Log struct { // Runner represents the configuration for the runner. type Runner struct { - File string `yaml:"file"` // File specifies the file path for the runner. - Capacity int `yaml:"capacity"` // Capacity specifies the capacity of the runner. - Envs map[string]string `yaml:"envs"` // Envs stores environment variables for the runner. - EnvFile string `yaml:"env_file"` // EnvFile specifies the path to the file containing environment variables for the runner. - Timeout time.Duration `yaml:"timeout"` // Timeout specifies the duration for runner timeout. - Insecure bool `yaml:"insecure"` // Insecure indicates whether the runner operates in an insecure mode. - FetchTimeout time.Duration `yaml:"fetch_timeout"` // FetchTimeout specifies the timeout duration for fetching resources. - FetchInterval time.Duration `yaml:"fetch_interval"` // FetchInterval specifies the interval duration for fetching resources. - Labels []string `yaml:"labels"` // Labels specify the labels of the runner. Labels are declared on each startup + File string `yaml:"file"` // File specifies the file path for the runner. + Capacity int `yaml:"capacity"` // Capacity specifies the capacity of the runner. + Envs map[string]string `yaml:"envs"` // Envs stores environment variables for the runner. + EnvFile string `yaml:"env_file"` // EnvFile specifies the path to the file containing environment variables for the runner. + Timeout time.Duration `yaml:"timeout"` // Timeout specifies the duration for runner timeout. + ShutdownTimeout time.Duration `yaml:"shutdown_timeout"` // ShutdownTimeout specifies the duration to wait for running jobs to complete during a shutdown of the runner. + Insecure bool `yaml:"insecure"` // Insecure indicates whether the runner operates in an insecure mode. + FetchTimeout time.Duration `yaml:"fetch_timeout"` // FetchTimeout specifies the timeout duration for fetching resources. + FetchInterval time.Duration `yaml:"fetch_interval"` // FetchInterval specifies the interval duration for fetching resources. + Labels []string `yaml:"labels"` // Labels specify the labels of the runner. Labels are declared on each startup } // Cache represents the configuration for caching.