feat(aibridged): add overload protection with rate limiting and concurrency control

This adds configurable overload protection to the AI Bridge daemon to prevent the server from being overwhelmed during periods of high load. New configuration options: - CODER_AIBRIDGE_MAX_CONCURRENCY: Maximum number of concurrent AI Bridge requests (0 to disable) - CODER_AIBRIDGE_RATE_LIMIT: Maximum number of requests per rate window (0 to disable) - CODER_AIBRIDGE_RATE_WINDOW: Duration of the rate limiting window (default: 1m) When limits are exceeded: - Concurrency limit: Returns HTTP 503 Service Unavailable - Rate limit: Returns HTTP 429 Too Many Requests The overload protection middleware wraps the aibridged HTTP handler and provides: - Concurrency limiting using an atomic counter - Rate limiting using the go-chi/httprate library Both protections are optional and disabled by default (0 values). Fixes coder/internal#1153
2025-12-08 11:17:58 +00:00
17 changed files with 539 additions and 14 deletions
--- a/cli/testdata/coder_server_--help.golden
+++ b/cli/testdata/coder_server_--help.golden
@@ -118,12 +118,23 @@ AI BRIDGE OPTIONS:
          requests (requires the "oauth2" and "mcp-server-http" experiments to
          be enabled).

+      --aibridge-max-concurrency int, $CODER_AIBRIDGE_MAX_CONCURRENCY (default: 0)
+          Maximum number of concurrent AI Bridge requests. Set to 0 to disable
+          (unlimited).
+
      --aibridge-openai-base-url string, $CODER_AIBRIDGE_OPENAI_BASE_URL (default: https://api.openai.com/v1/)
          The base URL of the OpenAI API.

      --aibridge-openai-key string, $CODER_AIBRIDGE_OPENAI_KEY
          The key to authenticate against the OpenAI API.

+      --aibridge-rate-limit int, $CODER_AIBRIDGE_RATE_LIMIT (default: 0)
+          Maximum number of AI Bridge requests per rate window. Set to 0 to
+          disable rate limiting.
+
+      --aibridge-rate-window duration, $CODER_AIBRIDGE_RATE_WINDOW (default: 1m)
+          Duration of the rate limiting window for AI Bridge requests.
+
 CLIENT OPTIONS: 
 These options change the behavior of how clients interact with the Coder.
 Clients include the Coder CLI, Coder Desktop, IDE extensions, and the web UI.
--- a/cli/testdata/server-config.yaml.golden
+++ b/cli/testdata/server-config.yaml.golden
@@ -742,6 +742,17 @@ aibridge:
  # (token, prompt, tool use).
  # (default: 60d, type: duration)
  retention: 1440h0m0s
+  # Maximum number of concurrent AI Bridge requests. Set to 0 to disable
+  # (unlimited).
+  # (default: 0, type: int)
+  max_concurrency: 0
+  # Maximum number of AI Bridge requests per rate window. Set to 0 to disable rate
+  # limiting.
+  # (default: 0, type: int)
+  rate_limit: 0
+  # Duration of the rate limiting window for AI Bridge requests.
+  # (default: 1m, type: duration)
+  rate_window: 1m0s
 # Configure data retention policies for various database tables. Retention
 # policies automatically purge old data to reduce database size and improve
 # performance. Setting a retention duration to 0 disables automatic purging for
--- a/coderd/apidoc/docs.go
+++ b/coderd/apidoc/docs.go
@@ -11877,9 +11877,19 @@ const docTemplate = `{
                "inject_coder_mcp_tools": {
                    "type": "boolean"
                },
+                "max_concurrency": {
+                    "description": "Overload protection settings.",
+                    "type": "integer"
+                },
                "openai": {
                    "$ref": "#/definitions/codersdk.AIBridgeOpenAIConfig"
                },
+                "rate_limit": {
+                    "type": "integer"
+                },
+                "rate_window": {
+                    "type": "integer"
+                },
                "retention": {
                    "type": "integer"
                }
--- a/coderd/apidoc/swagger.json
+++ b/coderd/apidoc/swagger.json
@@ -10543,9 +10543,19 @@
 				"inject_coder_mcp_tools": {
 					"type": "boolean"
 				},
+				"max_concurrency": {
+					"description": "Overload protection settings.",
+					"type": "integer"
+				},
 				"openai": {
 					"$ref": "#/definitions/codersdk.AIBridgeOpenAIConfig"
 				},
+				"rate_limit": {
+					"type": "integer"
+				},
+				"rate_window": {
+					"type": "integer"
+				},
 				"retention": {
 					"type": "integer"
 				}
--- a/codersdk/deployment.go
+++ b/codersdk/deployment.go
@@ -3391,6 +3391,37 @@ Write out the current server config as YAML to stdout.`,
 			YAML:        "retention",
 			Annotations: serpent.Annotations{}.Mark(annotationFormatDuration, "true"),
 		},
+		{
+			Name:        "AI Bridge Max Concurrency",
+			Description: "Maximum number of concurrent AI Bridge requests. Set to 0 to disable (unlimited).",
+			Flag:        "aibridge-max-concurrency",
+			Env:         "CODER_AIBRIDGE_MAX_CONCURRENCY",
+			Value:       &c.AI.BridgeConfig.MaxConcurrency,
+			Default:     "0",
+			Group:       &deploymentGroupAIBridge,
+			YAML:        "max_concurrency",
+		},
+		{
+			Name:        "AI Bridge Rate Limit",
+			Description: "Maximum number of AI Bridge requests per rate window. Set to 0 to disable rate limiting.",
+			Flag:        "aibridge-rate-limit",
+			Env:         "CODER_AIBRIDGE_RATE_LIMIT",
+			Value:       &c.AI.BridgeConfig.RateLimit,
+			Default:     "0",
+			Group:       &deploymentGroupAIBridge,
+			YAML:        "rate_limit",
+		},
+		{
+			Name:        "AI Bridge Rate Window",
+			Description: "Duration of the rate limiting window for AI Bridge requests.",
+			Flag:        "aibridge-rate-window",
+			Env:         "CODER_AIBRIDGE_RATE_WINDOW",
+			Value:       &c.AI.BridgeConfig.RateWindow,
+			Default:     "1m",
+			Group:       &deploymentGroupAIBridge,
+			YAML:        "rate_window",
+			Annotations: serpent.Annotations{}.Mark(annotationFormatDuration, "true"),
+		},
 		// Retention settings
 		{
 			Name:        "Audit Logs Retention",
@@ -3461,6 +3492,10 @@ type AIBridgeConfig struct {
 	Bedrock             AIBridgeBedrockConfig   `json:"bedrock" typescript:",notnull"`
 	InjectCoderMCPTools serpent.Bool            `json:"inject_coder_mcp_tools" typescript:",notnull"`
 	Retention           serpent.Duration        `json:"retention" typescript:",notnull"`
+	// Overload protection settings.
+	MaxConcurrency serpent.Int64    `json:"max_concurrency" typescript:",notnull"`
+	RateLimit      serpent.Int64    `json:"rate_limit" typescript:",notnull"`
+	RateWindow     serpent.Duration `json:"rate_window" typescript:",notnull"`
 }

 type AIBridgeOpenAIConfig struct {
--- a/docs/reference/api/general.md
+++ b/docs/reference/api/general.md
@@ -176,10 +176,13 @@ curl -X GET http://coder-server:8080/api/v2/deployment/config \
        },
        "enabled": true,
        "inject_coder_mcp_tools": true,
+        "max_concurrency": 0,
        "openai": {
          "base_url": "string",
          "key": "string"
        },
+        "rate_limit": 0,
+        "rate_window": 0,
        "retention": 0
      }
    },
--- a/docs/reference/api/schemas.md
+++ b/docs/reference/api/schemas.md
@@ -390,24 +390,30 @@
  },
  "enabled": true,
  "inject_coder_mcp_tools": true,
+  "max_concurrency": 0,
  "openai": {
    "base_url": "string",
    "key": "string"
  },
+  "rate_limit": 0,
+  "rate_window": 0,
  "retention": 0
 }
 ```

 ### Properties

-| Name                     | Type                                                                 | Required | Restrictions | Description |
-|--------------------------|----------------------------------------------------------------------|----------|--------------|-------------|
-| `anthropic`              | [codersdk.AIBridgeAnthropicConfig](#codersdkaibridgeanthropicconfig) | false    |              |             |
-| `bedrock`                | [codersdk.AIBridgeBedrockConfig](#codersdkaibridgebedrockconfig)     | false    |              |             |
-| `enabled`                | boolean                                                              | false    |              |             |
-| `inject_coder_mcp_tools` | boolean                                                              | false    |              |             |
-| `openai`                 | [codersdk.AIBridgeOpenAIConfig](#codersdkaibridgeopenaiconfig)       | false    |              |             |
-| `retention`              | integer                                                              | false    |              |             |
+| Name                     | Type                                                                 | Required | Restrictions | Description                   |
+|--------------------------|----------------------------------------------------------------------|----------|--------------|-------------------------------|
+| `anthropic`              | [codersdk.AIBridgeAnthropicConfig](#codersdkaibridgeanthropicconfig) | false    |              |                               |
+| `bedrock`                | [codersdk.AIBridgeBedrockConfig](#codersdkaibridgebedrockconfig)     | false    |              |                               |
+| `enabled`                | boolean                                                              | false    |              |                               |
+| `inject_coder_mcp_tools` | boolean                                                              | false    |              |                               |
+| `max_concurrency`        | integer                                                              | false    |              | Overload protection settings. |
+| `openai`                 | [codersdk.AIBridgeOpenAIConfig](#codersdkaibridgeopenaiconfig)       | false    |              |                               |
+| `rate_limit`             | integer                                                              | false    |              |                               |
+| `rate_window`            | integer                                                              | false    |              |                               |
+| `retention`              | integer                                                              | false    |              |                               |

 ## codersdk.AIBridgeInterception

@@ -700,10 +706,13 @@
    },
    "enabled": true,
    "inject_coder_mcp_tools": true,
+    "max_concurrency": 0,
    "openai": {
      "base_url": "string",
      "key": "string"
    },
+    "rate_limit": 0,
+    "rate_window": 0,
    "retention": 0
  }
 }
@@ -2860,10 +2869,13 @@ CreateWorkspaceRequest provides options for creating a new workspace. Only one o
        },
        "enabled": true,
        "inject_coder_mcp_tools": true,
+        "max_concurrency": 0,
        "openai": {
          "base_url": "string",
          "key": "string"
        },
+        "rate_limit": 0,
+        "rate_window": 0,
        "retention": 0
      }
    },
@@ -3382,10 +3394,13 @@ CreateWorkspaceRequest provides options for creating a new workspace. Only one o
      },
      "enabled": true,
      "inject_coder_mcp_tools": true,
+      "max_concurrency": 0,
      "openai": {
        "base_url": "string",
        "key": "string"
      },
+      "rate_limit": 0,
+      "rate_window": 0,
      "retention": 0
    }
  },
--- a/docs/reference/cli/server.md
+++ b/docs/reference/cli/server.md
@@ -1771,6 +1771,39 @@ Whether to inject Coder's MCP tools into intercepted AI Bridge requests (require

 Length of time to retain data such as interceptions and all related records (token, prompt, tool use).

+### --aibridge-max-concurrency
+
+|             |                                              |
+|-------------|----------------------------------------------|
+| Type        | <code>int</code>                             |
+| Environment | <code>$CODER_AIBRIDGE_MAX_CONCURRENCY</code> |
+| YAML        | <code>aibridge.max_concurrency</code>        |
+| Default     | <code>0</code>                               |
+
+Maximum number of concurrent AI Bridge requests. Set to 0 to disable (unlimited).
+
+### --aibridge-rate-limit
+
+|             |                                         |
+|-------------|-----------------------------------------|
+| Type        | <code>int</code>                        |
+| Environment | <code>$CODER_AIBRIDGE_RATE_LIMIT</code> |
+| YAML        | <code>aibridge.rate_limit</code>        |
+| Default     | <code>0</code>                          |
+
+Maximum number of AI Bridge requests per rate window. Set to 0 to disable rate limiting.
+
+### --aibridge-rate-window
+
+|             |                                          |
+|-------------|------------------------------------------|
+| Type        | <code>duration</code>                    |
+| Environment | <code>$CODER_AIBRIDGE_RATE_WINDOW</code> |
+| YAML        | <code>aibridge.rate_window</code>        |
+| Default     | <code>1m</code>                          |
+
+Duration of the rate limiting window for AI Bridge requests.
+
 ### --audit-logs-retention

 |             |                                          |
--- a/enterprise/aibridged/aibridged.go
+++ b/enterprise/aibridged/aibridged.go
@@ -33,6 +33,9 @@ type Server struct {
 	// A pool of [aibridge.RequestBridge] instances, which service incoming requests.
 	requestBridgePool Pooler

+	// overloadProtection provides rate limiting and concurrency control.
+	overloadProtection *OverloadProtection
+
 	logger slog.Logger
 	tracer trace.Tracer
 	wg     sync.WaitGroup
@@ -50,7 +53,7 @@ type Server struct {
 	shutdownOnce sync.Once
 }

-func New(ctx context.Context, pool Pooler, rpcDialer Dialer, logger slog.Logger, tracer trace.Tracer) (*Server, error) {
+func New(ctx context.Context, pool Pooler, rpcDialer Dialer, logger slog.Logger, tracer trace.Tracer, overloadCfg *OverloadConfig) (*Server, error) {
 	if rpcDialer == nil {
 		return nil, xerrors.Errorf("nil rpcDialer given")
 	}
@@ -68,6 +71,16 @@ func New(ctx context.Context, pool Pooler, rpcDialer Dialer, logger slog.Logger,
 		requestBridgePool: pool,
 	}

+	// Initialize overload protection if configured.
+	if overloadCfg != nil {
+		daemon.overloadProtection = NewOverloadProtection(*overloadCfg, logger)
+		logger.Info(ctx, "overload protection enabled",
+			slog.F("max_concurrency", overloadCfg.MaxConcurrency),
+			slog.F("rate_limit", overloadCfg.RateLimit),
+			slog.F("rate_window", overloadCfg.RateWindow),
+		)
+	}
+
 	daemon.wg.Add(1)
 	go daemon.connect()

--- a/enterprise/aibridged/aibridged_integration_test.go
+++ b/enterprise/aibridged/aibridged_integration_test.go
@@ -189,7 +189,7 @@ func TestIntegration(t *testing.T) {
 	// Given: aibridged is started.
 	srv, err := aibridged.New(t.Context(), pool, func(ctx context.Context) (aibridged.DRPCClient, error) {
 		return aiBridgeClient, nil
-	}, logger, tracer)
+	}, logger, tracer, nil)
 	require.NoError(t, err, "create new aibridged")
 	t.Cleanup(func() {
 		_ = srv.Shutdown(ctx)
@@ -382,7 +382,7 @@ func TestIntegrationWithMetrics(t *testing.T) {
 	// Given: aibridged is started.
 	srv, err := aibridged.New(ctx, pool, func(ctx context.Context) (aibridged.DRPCClient, error) {
 		return aiBridgeClient, nil
-	}, logger, testTracer)
+	}, logger, testTracer, nil)
 	require.NoError(t, err, "create new aibridged")
 	t.Cleanup(func() {
 		_ = srv.Shutdown(ctx)
--- a/enterprise/aibridged/aibridged_test.go
+++ b/enterprise/aibridged/aibridged_test.go
@@ -41,7 +41,7 @@ func newTestServer(t *testing.T) (*aibridged.Server, *mock.MockDRPCClient, *mock
 		pool,
 		func(ctx context.Context) (aibridged.DRPCClient, error) {
 			return client, nil
-		}, logger, testTracer)
+		}, logger, testTracer, nil)
 	require.NoError(t, err, "create new aibridged")
 	t.Cleanup(func() {
 		srv.Shutdown(context.Background())
@@ -309,7 +309,7 @@ func TestRouting(t *testing.T) {
 			// Given: aibridged is started.
 			srv, err := aibridged.New(t.Context(), pool, func(ctx context.Context) (aibridged.DRPCClient, error) {
 				return client, nil
-			}, logger, testTracer)
+			}, logger, testTracer, nil)
 			require.NoError(t, err, "create new aibridged")
 			t.Cleanup(func() {
 				_ = srv.Shutdown(testutil.Context(t, testutil.WaitShort))
--- a/enterprise/aibridged/http.go
+++ b/enterprise/aibridged/http.go
@@ -19,8 +19,19 @@ var (
 	ErrConnect               = xerrors.New("could not connect to coderd")
 	ErrUnauthorized          = xerrors.New("unauthorized")
 	ErrAcquireRequestHandler = xerrors.New("failed to acquire request handler")
+	ErrOverloaded            = xerrors.New("server is overloaded")
 )

+// Handler returns an http.Handler that wraps the server with any configured
+// overload protection (rate limiting and concurrency control).
+func (s *Server) Handler() http.Handler {
+	var handler http.Handler = s
+	if s.overloadProtection != nil {
+		handler = s.overloadProtection.WrapHandler(handler)
+	}
+	return handler
+}
+
 // ServeHTTP is the entrypoint for requests which will be intercepted by AI Bridge.
 // This function will validate that the given API key may be used to perform the request.
 //
--- a/enterprise/aibridged/overload.go
+++ b/enterprise/aibridged/overload.go
@@ -0,0 +1,119 @@
+package aibridged
+
+import (
+	"net/http"
+	"sync/atomic"
+	"time"
+
+	"github.com/go-chi/httprate"
+
+	"cdr.dev/slog"
+	"github.com/coder/coder/v2/coderd/httpapi"
+	"github.com/coder/coder/v2/codersdk"
+)
+
+// OverloadConfig configures overload protection for the AI Bridge server.
+type OverloadConfig struct {
+	// MaxConcurrency is the maximum number of concurrent requests allowed.
+	// Set to 0 to disable concurrency limiting.
+	MaxConcurrency int64
+
+	// RateLimit is the maximum number of requests per RateWindow.
+	// Set to 0 to disable rate limiting.
+	RateLimit int64
+
+	// RateWindow is the duration of the rate limiting window.
+	RateWindow time.Duration
+}
+
+// OverloadProtection provides middleware for protecting the AI Bridge server
+// from overload conditions.
+type OverloadProtection struct {
+	config OverloadConfig
+	logger slog.Logger
+
+	// concurrencyLimiter tracks the number of concurrent requests.
+	currentConcurrency atomic.Int64
+
+	// rateLimiter is the rate limiting middleware.
+	rateLimiter func(http.Handler) http.Handler
+}
+
+// NewOverloadProtection creates a new OverloadProtection instance.
+func NewOverloadProtection(config OverloadConfig, logger slog.Logger) *OverloadProtection {
+	op := &OverloadProtection{
+		config: config,
+		logger: logger.Named("overload"),
+	}
+
+	// Initialize rate limiter if configured.
+	if config.RateLimit > 0 && config.RateWindow > 0 {
+		op.rateLimiter = httprate.Limit(
+			int(config.RateLimit),
+			config.RateWindow,
+			httprate.WithKeyFuncs(httprate.KeyByIP),
+			httprate.WithLimitHandler(func(w http.ResponseWriter, r *http.Request) {
+				httpapi.Write(r.Context(), w, http.StatusTooManyRequests, codersdk.Response{
+					Message: "AI Bridge rate limit exceeded. Please try again later.",
+				})
+			}),
+		)
+	}
+
+	return op
+}
+
+// ConcurrencyMiddleware returns a middleware that limits concurrent requests.
+// Returns nil if concurrency limiting is disabled.
+func (op *OverloadProtection) ConcurrencyMiddleware() func(http.Handler) http.Handler {
+	if op.config.MaxConcurrency <= 0 {
+		return nil
+	}
+
+	return func(next http.Handler) http.Handler {
+		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			current := op.currentConcurrency.Add(1)
+			defer op.currentConcurrency.Add(-1)
+
+			if current > op.config.MaxConcurrency {
+				op.logger.Warn(r.Context(), "ai bridge concurrency limit exceeded",
+					slog.F("current", current),
+					slog.F("max", op.config.MaxConcurrency),
+				)
+				httpapi.Write(r.Context(), w, http.StatusServiceUnavailable, codersdk.Response{
+					Message: "AI Bridge is currently at capacity. Please try again later.",
+				})
+				return
+			}
+
+			next.ServeHTTP(w, r)
+		})
+	}
+}
+
+// RateLimitMiddleware returns a middleware that limits the rate of requests.
+// Returns nil if rate limiting is disabled.
+func (op *OverloadProtection) RateLimitMiddleware() func(http.Handler) http.Handler {
+	return op.rateLimiter
+}
+
+// CurrentConcurrency returns the current number of concurrent requests.
+func (op *OverloadProtection) CurrentConcurrency() int64 {
+	return op.currentConcurrency.Load()
+}
+
+// WrapHandler wraps the given handler with all enabled overload protection
+// middleware.
+func (op *OverloadProtection) WrapHandler(handler http.Handler) http.Handler {
+	// Apply rate limiting first (cheaper check).
+	if op.rateLimiter != nil {
+		handler = op.rateLimiter(handler)
+	}
+
+	// Then apply concurrency limiting.
+	if concurrencyMW := op.ConcurrencyMiddleware(); concurrencyMW != nil {
+		handler = concurrencyMW(handler)
+	}
+
+	return handler
+}
--- a/enterprise/aibridged/overload_test.go
+++ b/enterprise/aibridged/overload_test.go
@@ -0,0 +1,226 @@
+package aibridged_test
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"cdr.dev/slog"
+	"cdr.dev/slog/sloggers/slogtest"
+	"github.com/coder/coder/v2/enterprise/aibridged"
+	"github.com/coder/coder/v2/testutil"
+)
+
+func TestOverloadProtection_ConcurrencyLimit(t *testing.T) {
+	t.Parallel()
+
+	logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
+
+	t.Run("allows_requests_within_limit", func(t *testing.T) {
+		t.Parallel()
+
+		op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
+			MaxConcurrency: 5,
+		}, logger)
+
+		var handlerCalls atomic.Int32
+		handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			handlerCalls.Add(1)
+			w.WriteHeader(http.StatusOK)
+		})
+
+		wrapped := op.WrapHandler(handler)
+
+		// Make 5 requests in sequence - all should succeed.
+		for i := 0; i < 5; i++ {
+			req := httptest.NewRequest(http.MethodGet, "/", nil)
+			rec := httptest.NewRecorder()
+			wrapped.ServeHTTP(rec, req)
+			assert.Equal(t, http.StatusOK, rec.Code)
+		}
+
+		assert.Equal(t, int32(5), handlerCalls.Load())
+	})
+
+	t.Run("rejects_requests_over_limit", func(t *testing.T) {
+		t.Parallel()
+
+		op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
+			MaxConcurrency: 2,
+		}, logger)
+
+		// Create a handler that blocks until we release it.
+		blocked := make(chan struct{})
+		var handlerCalls atomic.Int32
+		handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			handlerCalls.Add(1)
+			<-blocked
+			w.WriteHeader(http.StatusOK)
+		})
+
+		wrapped := op.WrapHandler(handler)
+
+		// Start 2 requests that will block.
+		var wg sync.WaitGroup
+		for i := 0; i < 2; i++ {
+			wg.Add(1)
+			go func() {
+				defer wg.Done()
+				req := httptest.NewRequest(http.MethodGet, "/", nil)
+				rec := httptest.NewRecorder()
+				wrapped.ServeHTTP(rec, req)
+			}()
+		}
+
+		// Wait for the handlers to be called.
+		require.Eventually(t, func() bool {
+			return handlerCalls.Load() == 2
+		}, testutil.WaitShort, testutil.IntervalFast)
+
+		// Make a third request - it should be rejected.
+		req := httptest.NewRequest(http.MethodGet, "/", nil)
+		rec := httptest.NewRecorder()
+		wrapped.ServeHTTP(rec, req)
+		assert.Equal(t, http.StatusServiceUnavailable, rec.Code)
+
+		// Verify current concurrency is 2.
+		assert.Equal(t, int64(2), op.CurrentConcurrency())
+
+		// Unblock the handlers.
+		close(blocked)
+		wg.Wait()
+
+		// Verify concurrency is back to 0.
+		assert.Equal(t, int64(0), op.CurrentConcurrency())
+	})
+
+	t.Run("disabled_when_zero", func(t *testing.T) {
+		t.Parallel()
+
+		op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
+			MaxConcurrency: 0, // Disabled.
+		}, logger)
+
+		assert.Nil(t, op.ConcurrencyMiddleware())
+	})
+}
+
+func TestOverloadProtection_RateLimit(t *testing.T) {
+	t.Parallel()
+
+	logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
+
+	t.Run("allows_requests_within_limit", func(t *testing.T) {
+		t.Parallel()
+
+		op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
+			RateLimit:  5,
+			RateWindow: time.Minute,
+		}, logger)
+
+		var handlerCalls atomic.Int32
+		handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			handlerCalls.Add(1)
+			w.WriteHeader(http.StatusOK)
+		})
+
+		wrapped := op.WrapHandler(handler)
+
+		// Make 5 requests - all should succeed.
+		for i := 0; i < 5; i++ {
+			req := httptest.NewRequest(http.MethodGet, "/", nil)
+			rec := httptest.NewRecorder()
+			wrapped.ServeHTTP(rec, req)
+			assert.Equal(t, http.StatusOK, rec.Code)
+		}
+
+		assert.Equal(t, int32(5), handlerCalls.Load())
+	})
+
+	t.Run("rejects_requests_over_limit", func(t *testing.T) {
+		t.Parallel()
+
+		op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
+			RateLimit:  2,
+			RateWindow: time.Minute,
+		}, logger)
+
+		var handlerCalls atomic.Int32
+		handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			handlerCalls.Add(1)
+			w.WriteHeader(http.StatusOK)
+		})
+
+		wrapped := op.WrapHandler(handler)
+
+		// Make 3 requests - first 2 should succeed, 3rd should be rate limited.
+		for i := 0; i < 3; i++ {
+			req := httptest.NewRequest(http.MethodGet, "/", nil)
+			rec := httptest.NewRecorder()
+			wrapped.ServeHTTP(rec, req)
+
+			if i < 2 {
+				assert.Equal(t, http.StatusOK, rec.Code)
+			} else {
+				assert.Equal(t, http.StatusTooManyRequests, rec.Code)
+			}
+		}
+
+		assert.Equal(t, int32(2), handlerCalls.Load())
+	})
+
+	t.Run("disabled_when_zero", func(t *testing.T) {
+		t.Parallel()
+
+		op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
+			RateLimit: 0, // Disabled.
+		}, logger)
+
+		assert.Nil(t, op.RateLimitMiddleware())
+	})
+}
+
+func TestOverloadProtection_Combined(t *testing.T) {
+	t.Parallel()
+
+	logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
+
+	t.Run("both_limits_applied", func(t *testing.T) {
+		t.Parallel()
+
+		op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
+			MaxConcurrency: 10,
+			RateLimit:      3,
+			RateWindow:     time.Minute,
+		}, logger)
+
+		var handlerCalls atomic.Int32
+		handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			handlerCalls.Add(1)
+			w.WriteHeader(http.StatusOK)
+		})
+
+		wrapped := op.WrapHandler(handler)
+
+		// Make 4 requests - first 3 should succeed, 4th should be rate limited.
+		for i := 0; i < 4; i++ {
+			req := httptest.NewRequest(http.MethodGet, "/", nil)
+			rec := httptest.NewRecorder()
+			wrapped.ServeHTTP(rec, req)
+
+			if i < 3 {
+				assert.Equal(t, http.StatusOK, rec.Code)
+			} else {
+				assert.Equal(t, http.StatusTooManyRequests, rec.Code)
+			}
+		}
+
+		assert.Equal(t, int32(3), handlerCalls.Load())
+	})
+}
--- a/enterprise/cli/aibridged.go
+++ b/enterprise/cli/aibridged.go
@@ -44,10 +44,21 @@ func newAIBridgeDaemon(coderAPI *coderd.API) (*aibridged.Server, error) {
 		return nil, xerrors.Errorf("create request pool: %w", err)
 	}

+	// Configure overload protection if any limits are set.
+	var overloadCfg *aibridged.OverloadConfig
+	bridgeCfg := coderAPI.DeploymentValues.AI.BridgeConfig
+	if bridgeCfg.MaxConcurrency.Value() > 0 || bridgeCfg.RateLimit.Value() > 0 {
+		overloadCfg = &aibridged.OverloadConfig{
+			MaxConcurrency: bridgeCfg.MaxConcurrency.Value(),
+			RateLimit:      bridgeCfg.RateLimit.Value(),
+			RateWindow:     bridgeCfg.RateWindow.Value(),
+		}
+	}
+
 	// Create daemon.
 	srv, err := aibridged.New(ctx, pool, func(dialCtx context.Context) (aibridged.DRPCClient, error) {
 		return coderAPI.CreateInMemoryAIBridgeServer(dialCtx)
-	}, logger, tracer)
+	}, logger, tracer, overloadCfg)
 	if err != nil {
 		return nil, xerrors.Errorf("start in-memory aibridge daemon: %w", err)
 	}
--- a/enterprise/cli/testdata/coder_server_--help.golden
+++ b/enterprise/cli/testdata/coder_server_--help.golden
@@ -119,12 +119,23 @@ AI BRIDGE OPTIONS:
          requests (requires the "oauth2" and "mcp-server-http" experiments to
          be enabled).

+      --aibridge-max-concurrency int, $CODER_AIBRIDGE_MAX_CONCURRENCY (default: 0)
+          Maximum number of concurrent AI Bridge requests. Set to 0 to disable
+          (unlimited).
+
      --aibridge-openai-base-url string, $CODER_AIBRIDGE_OPENAI_BASE_URL (default: https://api.openai.com/v1/)
          The base URL of the OpenAI API.

      --aibridge-openai-key string, $CODER_AIBRIDGE_OPENAI_KEY
          The key to authenticate against the OpenAI API.

+      --aibridge-rate-limit int, $CODER_AIBRIDGE_RATE_LIMIT (default: 0)
+          Maximum number of AI Bridge requests per rate window. Set to 0 to
+          disable rate limiting.
+
+      --aibridge-rate-window duration, $CODER_AIBRIDGE_RATE_WINDOW (default: 1m)
+          Duration of the rate limiting window for AI Bridge requests.
+
 CLIENT OPTIONS: 
 These options change the behavior of how clients interact with the Coder.
 Clients include the Coder CLI, Coder Desktop, IDE extensions, and the web UI.
--- a/site/src/api/typesGenerated.ts
+++ b/site/src/api/typesGenerated.ts
@@ -33,6 +33,12 @@ export interface AIBridgeConfig {
 	readonly bedrock: AIBridgeBedrockConfig;
 	readonly inject_coder_mcp_tools: boolean;
 	readonly retention: number;
+	/**
+	 * Overload protection settings.
+	 */
+	readonly max_concurrency: number;
+	readonly rate_limit: number;
+	readonly rate_window: number;
 }

 // From codersdk/aibridge.go