Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b4b8d095a4 | ||
|
|
9310973f36 | ||
|
|
9cdd580dcf | ||
|
|
df9990a42e | ||
|
|
2a2fd706b5 | ||
|
|
b49c01546f | ||
|
|
374127eba5 | ||
|
|
793df2edbd |
@@ -125,7 +125,7 @@ func (r *RootCmd) workspaceAgent() *clibase.Cmd {
|
||||
args := append(os.Args, "--no-reap")
|
||||
err := reaper.ForkReap(
|
||||
reaper.WithExecArgs(args...),
|
||||
reaper.WithCatchSignals(InterruptSignals...),
|
||||
reaper.WithCatchSignals(StopSignals...),
|
||||
)
|
||||
if err != nil {
|
||||
logger.Error(ctx, "agent process reaper unable to fork", slog.Error(err))
|
||||
@@ -144,7 +144,7 @@ func (r *RootCmd) workspaceAgent() *clibase.Cmd {
|
||||
// Note that we don't want to handle these signals in the
|
||||
// process that runs as PID 1, that's why we do this after
|
||||
// the reaper forked.
|
||||
ctx, stopNotify := inv.SignalNotifyContext(ctx, InterruptSignals...)
|
||||
ctx, stopNotify := inv.SignalNotifyContext(ctx, StopSignals...)
|
||||
defer stopNotify()
|
||||
|
||||
// DumpHandler does signal handling, so we call it after the
|
||||
|
||||
@@ -890,7 +890,7 @@ func (r *RootCmd) scaletestWorkspaceTraffic() *clibase.Cmd {
|
||||
Handler: func(inv *clibase.Invocation) (err error) {
|
||||
ctx := inv.Context()
|
||||
|
||||
notifyCtx, stop := signal.NotifyContext(ctx, InterruptSignals...) // Checked later.
|
||||
notifyCtx, stop := signal.NotifyContext(ctx, StopSignals...) // Checked later.
|
||||
defer stop()
|
||||
ctx = notifyCtx
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ fi
|
||||
Handler: func(inv *clibase.Invocation) error {
|
||||
ctx := inv.Context()
|
||||
|
||||
ctx, stop := inv.SignalNotifyContext(ctx, InterruptSignals...)
|
||||
ctx, stop := inv.SignalNotifyContext(ctx, StopSignals...)
|
||||
defer stop()
|
||||
|
||||
client, err := r.createAgentClient()
|
||||
|
||||
@@ -25,7 +25,7 @@ func (r *RootCmd) gitAskpass() *clibase.Cmd {
|
||||
Handler: func(inv *clibase.Invocation) error {
|
||||
ctx := inv.Context()
|
||||
|
||||
ctx, stop := inv.SignalNotifyContext(ctx, InterruptSignals...)
|
||||
ctx, stop := inv.SignalNotifyContext(ctx, StopSignals...)
|
||||
defer stop()
|
||||
|
||||
user, host, err := gitauth.ParseAskpass(inv.Args[0])
|
||||
|
||||
@@ -29,7 +29,7 @@ func (r *RootCmd) gitssh() *clibase.Cmd {
|
||||
|
||||
// Catch interrupt signals to ensure the temporary private
|
||||
// key file is cleaned up on most cases.
|
||||
ctx, stop := inv.SignalNotifyContext(ctx, InterruptSignals...)
|
||||
ctx, stop := inv.SignalNotifyContext(ctx, StopSignals...)
|
||||
defer stop()
|
||||
|
||||
// Early check so errors are reported immediately.
|
||||
|
||||
@@ -337,7 +337,7 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
|
||||
|
||||
// Register signals early on so that graceful shutdown can't
|
||||
// be interrupted by additional signals. Note that we avoid
|
||||
// shadowing cancel() (from above) here because notifyStop()
|
||||
// shadowing cancel() (from above) here because stopCancel()
|
||||
// restores default behavior for the signals. This protects
|
||||
// the shutdown sequence from abruptly terminating things
|
||||
// like: database migrations, provisioner work, workspace
|
||||
@@ -345,8 +345,10 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
|
||||
//
|
||||
// To get out of a graceful shutdown, the user can send
|
||||
// SIGQUIT with ctrl+\ or SIGKILL with `kill -9`.
|
||||
notifyCtx, notifyStop := inv.SignalNotifyContext(ctx, InterruptSignals...)
|
||||
defer notifyStop()
|
||||
stopCtx, stopCancel := signalNotifyContext(ctx, inv, StopSignalsNoInterrupt...)
|
||||
defer stopCancel()
|
||||
interruptCtx, interruptCancel := signalNotifyContext(ctx, inv, InterruptSignals...)
|
||||
defer interruptCancel()
|
||||
|
||||
cacheDir := vals.CacheDir.String()
|
||||
err = os.MkdirAll(cacheDir, 0o700)
|
||||
@@ -1028,13 +1030,18 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
|
||||
hangDetector.Start()
|
||||
defer hangDetector.Close()
|
||||
|
||||
waitForProvisionerJobs := false
|
||||
// Currently there is no way to ask the server to shut
|
||||
// itself down, so any exit signal will result in a non-zero
|
||||
// exit of the server.
|
||||
var exitErr error
|
||||
select {
|
||||
case <-notifyCtx.Done():
|
||||
exitErr = notifyCtx.Err()
|
||||
case <-stopCtx.Done():
|
||||
exitErr = stopCtx.Err()
|
||||
waitForProvisionerJobs = true
|
||||
_, _ = io.WriteString(inv.Stdout, cliui.Bold("Stop caught, waiting for provisioner jobs to complete and gracefully exiting. Use ctrl+\\ to force quit"))
|
||||
case <-interruptCtx.Done():
|
||||
exitErr = interruptCtx.Err()
|
||||
_, _ = io.WriteString(inv.Stdout, cliui.Bold("Interrupt caught, gracefully exiting. Use ctrl+\\ to force quit"))
|
||||
case <-tunnelDone:
|
||||
exitErr = xerrors.New("dev tunnel closed unexpectedly")
|
||||
@@ -1082,7 +1089,16 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
|
||||
defer wg.Done()
|
||||
|
||||
r.Verbosef(inv, "Shutting down provisioner daemon %d...", id)
|
||||
err := shutdownWithTimeout(provisionerDaemon.Shutdown, 5*time.Second)
|
||||
timeout := 5 * time.Second
|
||||
if waitForProvisionerJobs {
|
||||
// It can last for a long time...
|
||||
timeout = 30 * time.Minute
|
||||
}
|
||||
|
||||
err := shutdownWithTimeout(func(ctx context.Context) error {
|
||||
// We only want to cancel active jobs if we aren't exiting gracefully.
|
||||
return provisionerDaemon.Shutdown(ctx, !waitForProvisionerJobs)
|
||||
}, timeout)
|
||||
if err != nil {
|
||||
cliui.Errorf(inv.Stderr, "Failed to shut down provisioner daemon %d: %s\n", id, err)
|
||||
return
|
||||
@@ -2512,3 +2528,12 @@ func escapePostgresURLUserInfo(v string) (string, error) {
|
||||
|
||||
return v, nil
|
||||
}
|
||||
|
||||
func signalNotifyContext(ctx context.Context, inv *clibase.Invocation, sig ...os.Signal) (context.Context, context.CancelFunc) {
|
||||
// On Windows, some of our signal functions lack support.
|
||||
// If we pass in no signals, we should just return the context as-is.
|
||||
if len(sig) == 0 {
|
||||
return context.WithCancel(ctx)
|
||||
}
|
||||
return inv.SignalNotifyContext(ctx, sig...)
|
||||
}
|
||||
|
||||
@@ -47,7 +47,7 @@ func (r *RootCmd) newCreateAdminUserCommand() *clibase.Cmd {
|
||||
logger = logger.Leveled(slog.LevelDebug)
|
||||
}
|
||||
|
||||
ctx, cancel := inv.SignalNotifyContext(ctx, InterruptSignals...)
|
||||
ctx, cancel := inv.SignalNotifyContext(ctx, StopSignals...)
|
||||
defer cancel()
|
||||
|
||||
if newUserDBURL == "" {
|
||||
|
||||
@@ -21,6 +21,7 @@ import (
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -1605,7 +1606,7 @@ func TestServer_Production(t *testing.T) {
|
||||
}
|
||||
|
||||
//nolint:tparallel,paralleltest // This test cannot be run in parallel due to signal handling.
|
||||
func TestServer_Shutdown(t *testing.T) {
|
||||
func TestServer_InterruptShutdown(t *testing.T) {
|
||||
t.Skip("This test issues an interrupt signal which will propagate to the test runner.")
|
||||
|
||||
if runtime.GOOS == "windows" {
|
||||
@@ -1638,6 +1639,46 @@ func TestServer_Shutdown(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestServer_GracefulShutdown(t *testing.T) {
|
||||
t.Parallel()
|
||||
if runtime.GOOS == "windows" {
|
||||
// Sending interrupt signal isn't supported on Windows!
|
||||
t.SkipNow()
|
||||
}
|
||||
ctx, cancelFunc := context.WithCancel(context.Background())
|
||||
defer cancelFunc()
|
||||
|
||||
root, cfg := clitest.New(t,
|
||||
"server",
|
||||
"--in-memory",
|
||||
"--http-address", ":0",
|
||||
"--access-url", "http://example.com",
|
||||
"--provisioner-daemons", "1",
|
||||
"--cache-dir", t.TempDir(),
|
||||
)
|
||||
var stopFunc context.CancelFunc
|
||||
root = root.WithTestSignalNotifyContext(t, func(parent context.Context, signals ...os.Signal) (context.Context, context.CancelFunc) {
|
||||
if !reflect.DeepEqual(cli.StopSignalsNoInterrupt, signals) {
|
||||
return context.WithCancel(ctx)
|
||||
}
|
||||
var ctx context.Context
|
||||
ctx, stopFunc = context.WithCancel(parent)
|
||||
return ctx, stopFunc
|
||||
})
|
||||
serverErr := make(chan error, 1)
|
||||
pty := ptytest.New(t).Attach(root)
|
||||
go func() {
|
||||
serverErr <- root.WithContext(ctx).Run()
|
||||
}()
|
||||
_ = waitAccessURL(t, cfg)
|
||||
// It's fair to assume `stopFunc` isn't nil here, because the server
|
||||
// has started and access URL is propagated.
|
||||
stopFunc()
|
||||
pty.ExpectMatch("waiting for provisioner jobs to complete")
|
||||
err := <-serverErr
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func BenchmarkServerHelp(b *testing.B) {
|
||||
// server --help is a good proxy for measuring the
|
||||
// constant overhead of each command.
|
||||
|
||||
@@ -7,8 +7,23 @@ import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
var InterruptSignals = []os.Signal{
|
||||
// StopSignals is the list of signals that are used for handling
|
||||
// shutdown behavior.
|
||||
var StopSignals = []os.Signal{
|
||||
os.Interrupt,
|
||||
syscall.SIGTERM,
|
||||
syscall.SIGHUP,
|
||||
}
|
||||
|
||||
// StopSignals is the list of signals that are used for handling
|
||||
// graceful shutdown behavior.
|
||||
var StopSignalsNoInterrupt = []os.Signal{
|
||||
syscall.SIGTERM,
|
||||
syscall.SIGHUP,
|
||||
}
|
||||
|
||||
// InterruptSignals is the list of signals that are used for handling
|
||||
// immediate shutdown behavior.
|
||||
var InterruptSignals = []os.Signal{
|
||||
os.Interrupt,
|
||||
}
|
||||
|
||||
@@ -6,4 +6,12 @@ import (
|
||||
"os"
|
||||
)
|
||||
|
||||
var InterruptSignals = []os.Signal{os.Interrupt}
|
||||
var StopSignals = []os.Signal{
|
||||
os.Interrupt,
|
||||
}
|
||||
|
||||
var StopSignalsNoInterrupt = []os.Signal{}
|
||||
|
||||
var InterruptSignals = []os.Signal{
|
||||
os.Interrupt,
|
||||
}
|
||||
|
||||
@@ -73,7 +73,7 @@ func (r *RootCmd) ssh() *clibase.Cmd {
|
||||
// session can persist for up to 72 hours, since we set a long
|
||||
// timeout on the Agent side of the connection. In particular,
|
||||
// OpenSSH sends SIGHUP to terminate a proxy command.
|
||||
ctx, stop := inv.SignalNotifyContext(inv.Context(), InterruptSignals...)
|
||||
ctx, stop := inv.SignalNotifyContext(inv.Context(), StopSignals...)
|
||||
defer stop()
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
@@ -498,7 +498,7 @@ func (c *provisionerdCloser) Close() error {
|
||||
c.closed = true
|
||||
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
|
||||
defer cancel()
|
||||
shutdownErr := c.d.Shutdown(ctx)
|
||||
shutdownErr := c.d.Shutdown(ctx, true)
|
||||
closeErr := c.d.Close()
|
||||
if shutdownErr != nil {
|
||||
return shutdownErr
|
||||
|
||||
@@ -51,7 +51,7 @@ SELECT
|
||||
template_versions.archived,
|
||||
COALESCE(visible_users.avatar_url, ''::text) AS created_by_avatar_url,
|
||||
COALESCE(visible_users.username, ''::text) AS created_by_username
|
||||
FROM (public.template_versions
|
||||
FROM (template_versions
|
||||
LEFT JOIN visible_users ON (template_versions.created_by = visible_users.id));
|
||||
|
||||
COMMENT ON VIEW template_version_with_user IS 'Joins in the username + avatar url of the created by user.';
|
||||
|
||||
@@ -53,7 +53,7 @@ SELECT
|
||||
template_versions.archived,
|
||||
COALESCE(visible_users.avatar_url, ''::text) AS created_by_avatar_url,
|
||||
COALESCE(visible_users.username, ''::text) AS created_by_username
|
||||
FROM (public.template_versions
|
||||
FROM (template_versions
|
||||
LEFT JOIN visible_users ON (template_versions.created_by = visible_users.id));
|
||||
|
||||
COMMENT ON VIEW template_version_with_user IS 'Joins in the username + avatar url of the created by user.';
|
||||
|
||||
29
docs/changelogs/v2.9.1.md
Normal file
29
docs/changelogs/v2.9.1.md
Normal file
@@ -0,0 +1,29 @@
|
||||
## Changelog
|
||||
|
||||
### Features
|
||||
|
||||
- Add separate signals for shutdown handling on Coder server and provisionerd.
|
||||
(#12358) (@kylecarbs)
|
||||
|
||||
> `SIGTERM` will stop accepting new provisioner jobs and wait running jobs to
|
||||
> finish before shutting down.
|
||||
>
|
||||
> `SIGINT` (existing behavior) will cancel in-flight jobs then shut down.
|
||||
|
||||
### Bug fixes
|
||||
|
||||
- Fixed an issue where single-replica workspace proxy deployments may enter an
|
||||
unhealthy state due to replica management errors. (#12641) (@deansheather)
|
||||
|
||||
- Fixed an issue preventing upgrade to `v2.9.0` due to a migration that hard a
|
||||
hardcoded schema name. (#12620) (@95gabor)
|
||||
|
||||
Compare: [`v2.9.0...v2.9.1`](https://github.com/coder/coder/compare/v2.9.0...v2.9.1)
|
||||
|
||||
## Container image
|
||||
|
||||
- `docker pull ghcr.io/coder/coder:v2.9.1`
|
||||
|
||||
## Install/upgrade
|
||||
|
||||
Refer to our docs to [install](https://coder.com/docs/v2/latest/install) or [upgrade](https://coder.com/docs/v2/latest/admin/upgrade) Coder, or use a release asset below.
|
||||
20
docs/changelogs/v2.9.2.md
Normal file
20
docs/changelogs/v2.9.2.md
Normal file
@@ -0,0 +1,20 @@
|
||||
## Changelog
|
||||
|
||||
### Bug fixes
|
||||
|
||||
- Fix issue causing Coder's external auth to open duplicate windows
|
||||
(#12729) (@aslilac)
|
||||
|
||||
- Fix: remove unnecessary href from external
|
||||
auth flow (#12586) (@emryk)
|
||||
|
||||
|
||||
Compare: [`v2.9.1...v2.9.2`](https://github.com/coder/coder/compare/v2.9.1...v2.9.2)
|
||||
|
||||
## Container image
|
||||
|
||||
- `docker pull ghcr.io/coder/coder:v2.9.2`
|
||||
|
||||
## Install/upgrade
|
||||
|
||||
Refer to our docs to [install](https://coder.com/docs/v2/latest/install) or [upgrade](https://coder.com/docs/v2/latest/admin/upgrade) Coder, or use a release asset below.
|
||||
@@ -88,8 +88,10 @@ func (r *RootCmd) provisionerDaemonStart() *clibase.Cmd {
|
||||
ctx, cancel := context.WithCancel(inv.Context())
|
||||
defer cancel()
|
||||
|
||||
notifyCtx, notifyStop := inv.SignalNotifyContext(ctx, agpl.InterruptSignals...)
|
||||
defer notifyStop()
|
||||
stopCtx, stopCancel := inv.SignalNotifyContext(ctx, agpl.StopSignalsNoInterrupt...)
|
||||
defer stopCancel()
|
||||
interruptCtx, interruptCancel := inv.SignalNotifyContext(ctx, agpl.InterruptSignals...)
|
||||
defer interruptCancel()
|
||||
|
||||
tags, err := agpl.ParseProvisionerTags(rawTags)
|
||||
if err != nil {
|
||||
@@ -212,10 +214,17 @@ func (r *RootCmd) provisionerDaemonStart() *clibase.Cmd {
|
||||
Metrics: metrics,
|
||||
})
|
||||
|
||||
waitForProvisionerJobs := false
|
||||
var exitErr error
|
||||
select {
|
||||
case <-notifyCtx.Done():
|
||||
exitErr = notifyCtx.Err()
|
||||
case <-stopCtx.Done():
|
||||
exitErr = stopCtx.Err()
|
||||
_, _ = fmt.Fprintln(inv.Stdout, cliui.Bold(
|
||||
"Stop caught, waiting for provisioner jobs to complete and gracefully exiting. Use ctrl+\\ to force quit",
|
||||
))
|
||||
waitForProvisionerJobs = true
|
||||
case <-interruptCtx.Done():
|
||||
exitErr = interruptCtx.Err()
|
||||
_, _ = fmt.Fprintln(inv.Stdout, cliui.Bold(
|
||||
"Interrupt caught, gracefully exiting. Use ctrl+\\ to force quit",
|
||||
))
|
||||
@@ -225,7 +234,7 @@ func (r *RootCmd) provisionerDaemonStart() *clibase.Cmd {
|
||||
cliui.Errorf(inv.Stderr, "Unexpected error, shutting down server: %s\n", exitErr)
|
||||
}
|
||||
|
||||
err = srv.Shutdown(ctx)
|
||||
err = srv.Shutdown(ctx, waitForProvisionerJobs)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("shutdown: %w", err)
|
||||
}
|
||||
|
||||
@@ -142,7 +142,7 @@ func (r *RootCmd) proxyServer() *clibase.Cmd {
|
||||
//
|
||||
// To get out of a graceful shutdown, the user can send
|
||||
// SIGQUIT with ctrl+\ or SIGKILL with `kill -9`.
|
||||
notifyCtx, notifyStop := inv.SignalNotifyContext(ctx, cli.InterruptSignals...)
|
||||
notifyCtx, notifyStop := inv.SignalNotifyContext(ctx, cli.StopSignals...)
|
||||
defer notifyStop()
|
||||
|
||||
// Clean up idle connections at the end, e.g.
|
||||
|
||||
@@ -441,7 +441,7 @@ func TestProvisionerDaemonServe(t *testing.T) {
|
||||
build := coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspace.LatestBuild.ID)
|
||||
require.Equal(t, codersdk.WorkspaceStatusRunning, build.Status)
|
||||
|
||||
err = pd.Shutdown(ctx)
|
||||
err = pd.Shutdown(ctx, false)
|
||||
require.NoError(t, err)
|
||||
err = terraformServer.Close()
|
||||
require.NoError(t, err)
|
||||
|
||||
@@ -449,8 +449,21 @@ func (s *Server) handleRegister(res wsproxysdk.RegisterWorkspaceProxyResponse) e
|
||||
}
|
||||
|
||||
func (s *Server) pingSiblingReplicas(replicas []codersdk.Replica) {
|
||||
ctx, cancel := context.WithTimeout(s.ctx, 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
errStr := pingSiblingReplicas(ctx, s.Logger, &s.replicaPingSingleflight, s.derpMeshTLSConfig, replicas)
|
||||
s.replicaErrMut.Lock()
|
||||
s.replicaErr = errStr
|
||||
s.replicaErrMut.Unlock()
|
||||
if s.Options.ReplicaErrCallback != nil {
|
||||
s.Options.ReplicaErrCallback(replicas, s.replicaErr)
|
||||
}
|
||||
}
|
||||
|
||||
func pingSiblingReplicas(ctx context.Context, logger slog.Logger, sf *singleflight.Group, derpMeshTLSConfig *tls.Config, replicas []codersdk.Replica) string {
|
||||
if len(replicas) == 0 {
|
||||
return
|
||||
return ""
|
||||
}
|
||||
|
||||
// Avoid pinging multiple times at once if the list hasn't changed.
|
||||
@@ -462,18 +475,11 @@ func (s *Server) pingSiblingReplicas(replicas []codersdk.Replica) {
|
||||
singleflightStr := strings.Join(relayURLs, " ") // URLs can't contain spaces.
|
||||
|
||||
//nolint:dogsled
|
||||
_, _, _ = s.replicaPingSingleflight.Do(singleflightStr, func() (any, error) {
|
||||
const (
|
||||
perReplicaTimeout = 3 * time.Second
|
||||
fullTimeout = 10 * time.Second
|
||||
)
|
||||
ctx, cancel := context.WithTimeout(s.ctx, fullTimeout)
|
||||
defer cancel()
|
||||
|
||||
errStrInterface, _, _ := sf.Do(singleflightStr, func() (any, error) {
|
||||
client := http.Client{
|
||||
Timeout: perReplicaTimeout,
|
||||
Timeout: 3 * time.Second,
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: s.derpMeshTLSConfig,
|
||||
TLSClientConfig: derpMeshTLSConfig,
|
||||
DisableKeepAlives: true,
|
||||
},
|
||||
}
|
||||
@@ -485,7 +491,7 @@ func (s *Server) pingSiblingReplicas(replicas []codersdk.Replica) {
|
||||
err := replicasync.PingPeerReplica(ctx, client, peer.RelayAddress)
|
||||
if err != nil {
|
||||
errs <- xerrors.Errorf("ping sibling replica %s (%s): %w", peer.Hostname, peer.RelayAddress, err)
|
||||
s.Logger.Warn(ctx, "failed to ping sibling replica, this could happen if the replica has shutdown",
|
||||
logger.Warn(ctx, "failed to ping sibling replica, this could happen if the replica has shutdown",
|
||||
slog.F("replica_hostname", peer.Hostname),
|
||||
slog.F("replica_relay_address", peer.RelayAddress),
|
||||
slog.Error(err),
|
||||
@@ -504,20 +510,14 @@ func (s *Server) pingSiblingReplicas(replicas []codersdk.Replica) {
|
||||
}
|
||||
}
|
||||
|
||||
s.replicaErrMut.Lock()
|
||||
defer s.replicaErrMut.Unlock()
|
||||
s.replicaErr = ""
|
||||
if len(replicaErrs) > 0 {
|
||||
s.replicaErr = fmt.Sprintf("Failed to dial peers: %s", strings.Join(replicaErrs, ", "))
|
||||
if len(replicaErrs) == 0 {
|
||||
return "", nil
|
||||
}
|
||||
if s.Options.ReplicaErrCallback != nil {
|
||||
s.Options.ReplicaErrCallback(replicas, s.replicaErr)
|
||||
}
|
||||
|
||||
//nolint:nilnil // we don't actually use the return value of the
|
||||
// singleflight here
|
||||
return nil, nil
|
||||
return fmt.Sprintf("Failed to dial peers: %s", strings.Join(replicaErrs, ", ")), nil
|
||||
})
|
||||
|
||||
//nolint:forcetypeassert
|
||||
return errStrInterface.(string)
|
||||
}
|
||||
|
||||
func (s *Server) handleRegisterFailure(err error) {
|
||||
@@ -590,7 +590,8 @@ func (s *Server) healthReport(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
s.replicaErrMut.Lock()
|
||||
if s.replicaErr != "" {
|
||||
report.Errors = append(report.Errors, "High availability networking: it appears you are running more than one replica of the proxy, but the replicas are unable to establish a mesh for networking: "+s.replicaErr)
|
||||
report.Warnings = append(report.Warnings,
|
||||
"High availability networking: it appears you are running more than one replica of the proxy, but the replicas are unable to establish a mesh for networking: "+s.replicaErr)
|
||||
}
|
||||
s.replicaErrMut.Unlock()
|
||||
|
||||
|
||||
@@ -563,7 +563,7 @@ func TestWorkspaceProxyDERPMeshProbe(t *testing.T) {
|
||||
return proxyRes
|
||||
}
|
||||
|
||||
registerBrokenProxy := func(ctx context.Context, t *testing.T, primaryAccessURL *url.URL, accessURL, token string) {
|
||||
registerBrokenProxy := func(ctx context.Context, t *testing.T, primaryAccessURL *url.URL, accessURL, token string) uuid.UUID {
|
||||
t.Helper()
|
||||
// Create a HTTP server that always replies with 500.
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
@@ -574,21 +574,23 @@ func TestWorkspaceProxyDERPMeshProbe(t *testing.T) {
|
||||
// Register a proxy.
|
||||
wsproxyClient := wsproxysdk.New(primaryAccessURL)
|
||||
wsproxyClient.SetSessionToken(token)
|
||||
|
||||
hostname, err := cryptorand.String(6)
|
||||
require.NoError(t, err)
|
||||
replicaID := uuid.New()
|
||||
_, err = wsproxyClient.RegisterWorkspaceProxy(ctx, wsproxysdk.RegisterWorkspaceProxyRequest{
|
||||
AccessURL: accessURL,
|
||||
WildcardHostname: "",
|
||||
DerpEnabled: true,
|
||||
DerpOnly: false,
|
||||
ReplicaID: uuid.New(),
|
||||
ReplicaID: replicaID,
|
||||
ReplicaHostname: hostname,
|
||||
ReplicaError: "",
|
||||
ReplicaRelayAddress: srv.URL,
|
||||
Version: buildinfo.Version(),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
return replicaID
|
||||
}
|
||||
|
||||
t.Run("ProbeOK", func(t *testing.T) {
|
||||
@@ -781,8 +783,120 @@ func TestWorkspaceProxyDERPMeshProbe(t *testing.T) {
|
||||
resp.Body.Close()
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Len(t, respJSON.Errors, 1, "proxy is healthy")
|
||||
require.Contains(t, respJSON.Errors[0], "High availability networking")
|
||||
require.Len(t, respJSON.Warnings, 1, "proxy is healthy")
|
||||
require.Contains(t, respJSON.Warnings[0], "High availability networking")
|
||||
})
|
||||
|
||||
// This test catches a regression we detected on dogfood which caused
|
||||
// proxies to remain unhealthy after a mesh failure if they dropped to zero
|
||||
// siblings after the failure.
|
||||
t.Run("HealthyZero", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
deploymentValues := coderdtest.DeploymentValues(t)
|
||||
deploymentValues.Experiments = []string{
|
||||
"*",
|
||||
}
|
||||
|
||||
client, closer, api, _ := coderdenttest.NewWithAPI(t, &coderdenttest.Options{
|
||||
Options: &coderdtest.Options{
|
||||
DeploymentValues: deploymentValues,
|
||||
AppHostname: "*.primary.test.coder.com",
|
||||
IncludeProvisionerDaemon: true,
|
||||
RealIPConfig: &httpmw.RealIPConfig{
|
||||
TrustedOrigins: []*net.IPNet{{
|
||||
IP: net.ParseIP("127.0.0.1"),
|
||||
Mask: net.CIDRMask(8, 32),
|
||||
}},
|
||||
TrustedHeaders: []string{
|
||||
"CF-Connecting-IP",
|
||||
},
|
||||
},
|
||||
},
|
||||
LicenseOptions: &coderdenttest.LicenseOptions{
|
||||
Features: license.Features{
|
||||
codersdk.FeatureWorkspaceProxy: 1,
|
||||
},
|
||||
},
|
||||
})
|
||||
t.Cleanup(func() {
|
||||
_ = closer.Close()
|
||||
})
|
||||
|
||||
proxyURL, err := url.Parse("https://proxy2.test.coder.com")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create 1 real proxy replica.
|
||||
replicaPingErr := make(chan string, 4)
|
||||
proxy := coderdenttest.NewWorkspaceProxyReplica(t, api, client, &coderdenttest.ProxyOptions{
|
||||
Name: "proxy-2",
|
||||
ProxyURL: proxyURL,
|
||||
ReplicaPingCallback: func(replicas []codersdk.Replica, err string) {
|
||||
replicaPingErr <- err
|
||||
},
|
||||
})
|
||||
|
||||
ctx := testutil.Context(t, testutil.WaitLong)
|
||||
otherReplicaID := registerBrokenProxy(ctx, t, api.AccessURL, proxyURL.String(), proxy.Options.ProxySessionToken)
|
||||
|
||||
// Force the proxy to re-register immediately.
|
||||
err = proxy.RegisterNow()
|
||||
require.NoError(t, err, "failed to force proxy to re-register")
|
||||
|
||||
// Wait for the ping to fail.
|
||||
for {
|
||||
replicaErr := testutil.RequireRecvCtx(ctx, t, replicaPingErr)
|
||||
t.Log("replica ping error:", replicaErr)
|
||||
if replicaErr != "" {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// GET /healthz-report
|
||||
u := proxy.ServerURL.ResolveReference(&url.URL{Path: "/healthz-report"})
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
|
||||
require.NoError(t, err)
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
require.NoError(t, err)
|
||||
var respJSON codersdk.ProxyHealthReport
|
||||
err = json.NewDecoder(resp.Body).Decode(&respJSON)
|
||||
resp.Body.Close()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, respJSON.Warnings, 1, "proxy is healthy")
|
||||
require.Contains(t, respJSON.Warnings[0], "High availability networking")
|
||||
|
||||
// Deregister the other replica.
|
||||
wsproxyClient := wsproxysdk.New(api.AccessURL)
|
||||
wsproxyClient.SetSessionToken(proxy.Options.ProxySessionToken)
|
||||
err = wsproxyClient.DeregisterWorkspaceProxy(ctx, wsproxysdk.DeregisterWorkspaceProxyRequest{
|
||||
ReplicaID: otherReplicaID,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Force the proxy to re-register immediately.
|
||||
err = proxy.RegisterNow()
|
||||
require.NoError(t, err, "failed to force proxy to re-register")
|
||||
|
||||
// Wait for the ping to be skipped.
|
||||
for {
|
||||
replicaErr := testutil.RequireRecvCtx(ctx, t, replicaPingErr)
|
||||
t.Log("replica ping error:", replicaErr)
|
||||
// Should be empty because there are no more peers. This was where
|
||||
// the regression was.
|
||||
if replicaErr == "" {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// GET /healthz-report
|
||||
req, err = http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
|
||||
require.NoError(t, err)
|
||||
resp, err = http.DefaultClient.Do(req)
|
||||
require.NoError(t, err)
|
||||
err = json.NewDecoder(resp.Body).Decode(&respJSON)
|
||||
resp.Body.Close()
|
||||
require.NoError(t, err)
|
||||
require.Len(t, respJSON.Warnings, 0, "proxy is unhealthy")
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -474,15 +474,18 @@ func (p *Server) isClosed() bool {
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown triggers a graceful exit of each registered provisioner.
|
||||
func (p *Server) Shutdown(ctx context.Context) error {
|
||||
// Shutdown gracefully exists with the option to cancel the active job.
|
||||
// If false, it will wait for the job to complete.
|
||||
//
|
||||
//nolint:revive
|
||||
func (p *Server) Shutdown(ctx context.Context, cancelActiveJob bool) error {
|
||||
p.mutex.Lock()
|
||||
p.opts.Logger.Info(ctx, "attempting graceful shutdown")
|
||||
if !p.shuttingDownB {
|
||||
close(p.shuttingDownCh)
|
||||
p.shuttingDownB = true
|
||||
}
|
||||
if p.activeJob != nil {
|
||||
if cancelActiveJob && p.activeJob != nil {
|
||||
p.activeJob.Cancel()
|
||||
}
|
||||
p.mutex.Unlock()
|
||||
|
||||
@@ -671,7 +671,7 @@ func TestProvisionerd(t *testing.T) {
|
||||
}),
|
||||
})
|
||||
require.Condition(t, closedWithin(updateChan, testutil.WaitShort))
|
||||
err := server.Shutdown(context.Background())
|
||||
err := server.Shutdown(context.Background(), true)
|
||||
require.NoError(t, err)
|
||||
require.Condition(t, closedWithin(completeChan, testutil.WaitShort))
|
||||
require.NoError(t, server.Close())
|
||||
@@ -762,7 +762,7 @@ func TestProvisionerd(t *testing.T) {
|
||||
require.Condition(t, closedWithin(completeChan, testutil.WaitShort))
|
||||
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
|
||||
defer cancel()
|
||||
require.NoError(t, server.Shutdown(ctx))
|
||||
require.NoError(t, server.Shutdown(ctx, true))
|
||||
require.NoError(t, server.Close())
|
||||
})
|
||||
|
||||
@@ -853,7 +853,7 @@ func TestProvisionerd(t *testing.T) {
|
||||
require.Condition(t, closedWithin(completeChan, testutil.WaitShort))
|
||||
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
|
||||
defer cancel()
|
||||
require.NoError(t, server.Shutdown(ctx))
|
||||
require.NoError(t, server.Shutdown(ctx, true))
|
||||
require.NoError(t, server.Close())
|
||||
})
|
||||
|
||||
@@ -944,7 +944,7 @@ func TestProvisionerd(t *testing.T) {
|
||||
t.Log("completeChan closed")
|
||||
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
|
||||
defer cancel()
|
||||
require.NoError(t, server.Shutdown(ctx))
|
||||
require.NoError(t, server.Shutdown(ctx, true))
|
||||
require.NoError(t, server.Close())
|
||||
})
|
||||
|
||||
@@ -1039,7 +1039,7 @@ func TestProvisionerd(t *testing.T) {
|
||||
require.Condition(t, closedWithin(completeChan, testutil.WaitShort))
|
||||
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
|
||||
defer cancel()
|
||||
require.NoError(t, server.Shutdown(ctx))
|
||||
require.NoError(t, server.Shutdown(ctx, true))
|
||||
require.NoError(t, server.Close())
|
||||
assert.Equal(t, ops[len(ops)-1], "CompleteJob")
|
||||
assert.Contains(t, ops[0:len(ops)-1], "Log: Cleaning Up | ")
|
||||
@@ -1076,7 +1076,7 @@ func createProvisionerd(t *testing.T, dialer provisionerd.Dialer, connector prov
|
||||
t.Cleanup(func() {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
|
||||
defer cancel()
|
||||
_ = server.Shutdown(ctx)
|
||||
_ = server.Shutdown(ctx, true)
|
||||
_ = server.Close()
|
||||
})
|
||||
return server
|
||||
|
||||
@@ -233,6 +233,46 @@ describe("CreateWorkspacePage", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("optional external auth is optional", async () => {
|
||||
jest
|
||||
.spyOn(API, "getWorkspaceQuota")
|
||||
.mockResolvedValueOnce(MockWorkspaceQuota);
|
||||
jest
|
||||
.spyOn(API, "getUsers")
|
||||
.mockResolvedValueOnce({ users: [MockUser], count: 1 });
|
||||
jest.spyOn(API, "createWorkspace").mockResolvedValueOnce(MockWorkspace);
|
||||
jest
|
||||
.spyOn(API, "getTemplateVersionExternalAuth")
|
||||
.mockResolvedValue([
|
||||
{ ...MockTemplateVersionExternalAuthGithub, optional: true },
|
||||
]);
|
||||
|
||||
renderCreateWorkspacePage();
|
||||
await waitForLoaderToBeRemoved();
|
||||
|
||||
const nameField = await screen.findByLabelText(nameLabelText);
|
||||
// have to use fireEvent b/c userEvent isn't cleaning up properly between tests
|
||||
fireEvent.change(nameField, {
|
||||
target: { value: "test" },
|
||||
});
|
||||
|
||||
// Ensure we're not logged in
|
||||
await screen.findByText("Login with GitHub");
|
||||
|
||||
const submitButton = screen.getByText(createWorkspaceText);
|
||||
await userEvent.click(submitButton);
|
||||
|
||||
await waitFor(() =>
|
||||
expect(API.createWorkspace).toBeCalledWith(
|
||||
MockUser.organization_ids[0],
|
||||
MockUser.id,
|
||||
expect.objectContaining({
|
||||
...MockWorkspaceRequest,
|
||||
}),
|
||||
),
|
||||
);
|
||||
});
|
||||
|
||||
it("auto create a workspace if uses mode=auto", async () => {
|
||||
const param = "first_parameter";
|
||||
const paramValue = "It works!";
|
||||
|
||||
@@ -105,6 +105,11 @@ const CreateWorkspacePage: FC = () => {
|
||||
userParametersQuery.data ? userParametersQuery.data : [],
|
||||
);
|
||||
|
||||
const hasAllRequiredExternalAuth = Boolean(
|
||||
!isLoadingExternalAuth &&
|
||||
externalAuth?.every((auth) => auth.optional || auth.authenticated),
|
||||
);
|
||||
|
||||
const autoCreationStartedRef = useRef(false);
|
||||
const automateWorkspaceCreation = useEffectEvent(async () => {
|
||||
if (autoCreationStartedRef.current) {
|
||||
@@ -143,14 +148,15 @@ const CreateWorkspacePage: FC = () => {
|
||||
</Helmet>
|
||||
{loadFormDataError && <ErrorAlert error={loadFormDataError} />}
|
||||
{isLoadingFormData ||
|
||||
isLoadingExternalAuth ||
|
||||
autoCreateWorkspaceMutation.isLoading ? (
|
||||
isLoadingExternalAuth ||
|
||||
autoCreateWorkspaceMutation.isLoading ? (
|
||||
<Loader />
|
||||
) : (
|
||||
<CreateWorkspacePageView
|
||||
mode={mode}
|
||||
defaultName={defaultName}
|
||||
defaultOwner={me}
|
||||
hasAllRequiredExternalAuth={hasAllRequiredExternalAuth}
|
||||
autofillParameters={autofillParameters}
|
||||
error={createWorkspaceMutation.error}
|
||||
resetMutation={createWorkspaceMutation.reset}
|
||||
@@ -198,10 +204,10 @@ const useExternalAuth = (versionId: string | undefined) => {
|
||||
const { data: externalAuth, isLoading: isLoadingExternalAuth } = useQuery(
|
||||
versionId
|
||||
? {
|
||||
...templateVersionExternalAuth(versionId),
|
||||
refetchInterval:
|
||||
externalAuthPollingState === "polling" ? 1000 : false,
|
||||
}
|
||||
...templateVersionExternalAuth(versionId),
|
||||
refetchInterval:
|
||||
externalAuthPollingState === "polling" ? 1000 : false,
|
||||
}
|
||||
: { enabled: false },
|
||||
);
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ const meta: Meta<typeof CreateWorkspacePageView> = {
|
||||
template: MockTemplate,
|
||||
parameters: [],
|
||||
externalAuth: [],
|
||||
hasAllRequiredExternalAuth: true,
|
||||
mode: "form",
|
||||
permissions: {
|
||||
createWorkspaceForUser: true,
|
||||
@@ -134,6 +135,7 @@ export const ExternalAuth: Story = {
|
||||
optional: true,
|
||||
},
|
||||
],
|
||||
hasAllRequiredExternalAuth: false,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -159,6 +161,7 @@ export const ExternalAuthError: Story = {
|
||||
optional: true,
|
||||
},
|
||||
],
|
||||
hasAllRequiredExternalAuth: false,
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
@@ -60,6 +60,7 @@ export interface CreateWorkspacePageViewProps {
|
||||
externalAuth: TypesGen.TemplateVersionExternalAuth[];
|
||||
externalAuthPollingState: ExternalAuthPollingState;
|
||||
startPollingExternalAuth: () => void;
|
||||
hasAllRequiredExternalAuth: boolean;
|
||||
parameters: TypesGen.TemplateVersionParameter[];
|
||||
autofillParameters: AutofillBuildParameter[];
|
||||
permissions: CreateWSPermissions;
|
||||
@@ -82,6 +83,7 @@ export const CreateWorkspacePageView: FC<CreateWorkspacePageViewProps> = ({
|
||||
externalAuth,
|
||||
externalAuthPollingState,
|
||||
startPollingExternalAuth,
|
||||
hasAllRequiredExternalAuth,
|
||||
parameters,
|
||||
autofillParameters,
|
||||
permissions,
|
||||
@@ -92,7 +94,6 @@ export const CreateWorkspacePageView: FC<CreateWorkspacePageViewProps> = ({
|
||||
const [owner, setOwner] = useState(defaultOwner);
|
||||
const [searchParams] = useSearchParams();
|
||||
const disabledParamsList = searchParams?.get("disable_params")?.split(",");
|
||||
const requiresExternalAuth = externalAuth.some((auth) => !auth.authenticated);
|
||||
const [suggestedName, setSuggestedName] = useState(() =>
|
||||
generateWorkspaceName(),
|
||||
);
|
||||
@@ -117,7 +118,7 @@ export const CreateWorkspacePageView: FC<CreateWorkspacePageViewProps> = ({
|
||||
}),
|
||||
enableReinitialize: true,
|
||||
onSubmit: (request) => {
|
||||
if (requiresExternalAuth) {
|
||||
if (!hasAllRequiredExternalAuth) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -144,10 +145,6 @@ export const CreateWorkspacePageView: FC<CreateWorkspacePageViewProps> = ({
|
||||
[autofillParameters],
|
||||
);
|
||||
|
||||
const hasAllRequiredExternalAuth = externalAuth.every(
|
||||
(auth) => auth.optional || auth.authenticated,
|
||||
);
|
||||
|
||||
return (
|
||||
<Margins size="medium">
|
||||
<PageHeader actions={<Button onClick={onCancel}>Cancel</Button>}>
|
||||
|
||||
@@ -29,7 +29,6 @@ export const ExternalAuthButton: FC<ExternalAuthButtonProps> = ({
|
||||
<LoadingButton
|
||||
fullWidth
|
||||
loading={isLoading}
|
||||
href={auth.authenticate_url}
|
||||
variant="contained"
|
||||
size="xlarge"
|
||||
startIcon={
|
||||
|
||||
Reference in New Issue
Block a user