Compare commits

...

1 Commits

Author SHA1 Message Date
Paweł Banaszewski
d4c7cb2021 feat(aibridged): add overload protection with rate limiting and concurrency control
This adds configurable overload protection to the AI Bridge daemon to prevent
the server from being overwhelmed during periods of high load.

New configuration options:
- CODER_AIBRIDGE_MAX_CONCURRENCY: Maximum number of concurrent AI Bridge requests (0 to disable)
- CODER_AIBRIDGE_RATE_LIMIT: Maximum number of requests per rate window (0 to disable)
- CODER_AIBRIDGE_RATE_WINDOW: Duration of the rate limiting window (default: 1m)

When limits are exceeded:
- Concurrency limit: Returns HTTP 503 Service Unavailable
- Rate limit: Returns HTTP 429 Too Many Requests

The overload protection middleware wraps the aibridged HTTP handler and provides:
- Concurrency limiting using an atomic counter
- Rate limiting using the go-chi/httprate library

Both protections are optional and disabled by default (0 values).

Fixes coder/internal#1153
2025-12-08 11:17:58 +00:00
17 changed files with 539 additions and 14 deletions

View File

@@ -118,12 +118,23 @@ AI BRIDGE OPTIONS:
requests (requires the "oauth2" and "mcp-server-http" experiments to
be enabled).
--aibridge-max-concurrency int, $CODER_AIBRIDGE_MAX_CONCURRENCY (default: 0)
Maximum number of concurrent AI Bridge requests. Set to 0 to disable
(unlimited).
--aibridge-openai-base-url string, $CODER_AIBRIDGE_OPENAI_BASE_URL (default: https://api.openai.com/v1/)
The base URL of the OpenAI API.
--aibridge-openai-key string, $CODER_AIBRIDGE_OPENAI_KEY
The key to authenticate against the OpenAI API.
--aibridge-rate-limit int, $CODER_AIBRIDGE_RATE_LIMIT (default: 0)
Maximum number of AI Bridge requests per rate window. Set to 0 to
disable rate limiting.
--aibridge-rate-window duration, $CODER_AIBRIDGE_RATE_WINDOW (default: 1m)
Duration of the rate limiting window for AI Bridge requests.
CLIENT OPTIONS:
These options change the behavior of how clients interact with the Coder.
Clients include the Coder CLI, Coder Desktop, IDE extensions, and the web UI.

View File

@@ -742,6 +742,17 @@ aibridge:
# (token, prompt, tool use).
# (default: 60d, type: duration)
retention: 1440h0m0s
# Maximum number of concurrent AI Bridge requests. Set to 0 to disable
# (unlimited).
# (default: 0, type: int)
max_concurrency: 0
# Maximum number of AI Bridge requests per rate window. Set to 0 to disable rate
# limiting.
# (default: 0, type: int)
rate_limit: 0
# Duration of the rate limiting window for AI Bridge requests.
# (default: 1m, type: duration)
rate_window: 1m0s
# Configure data retention policies for various database tables. Retention
# policies automatically purge old data to reduce database size and improve
# performance. Setting a retention duration to 0 disables automatic purging for

10
coderd/apidoc/docs.go generated
View File

@@ -11877,9 +11877,19 @@ const docTemplate = `{
"inject_coder_mcp_tools": {
"type": "boolean"
},
"max_concurrency": {
"description": "Overload protection settings.",
"type": "integer"
},
"openai": {
"$ref": "#/definitions/codersdk.AIBridgeOpenAIConfig"
},
"rate_limit": {
"type": "integer"
},
"rate_window": {
"type": "integer"
},
"retention": {
"type": "integer"
}

View File

@@ -10543,9 +10543,19 @@
"inject_coder_mcp_tools": {
"type": "boolean"
},
"max_concurrency": {
"description": "Overload protection settings.",
"type": "integer"
},
"openai": {
"$ref": "#/definitions/codersdk.AIBridgeOpenAIConfig"
},
"rate_limit": {
"type": "integer"
},
"rate_window": {
"type": "integer"
},
"retention": {
"type": "integer"
}

View File

@@ -3391,6 +3391,37 @@ Write out the current server config as YAML to stdout.`,
YAML: "retention",
Annotations: serpent.Annotations{}.Mark(annotationFormatDuration, "true"),
},
{
Name: "AI Bridge Max Concurrency",
Description: "Maximum number of concurrent AI Bridge requests. Set to 0 to disable (unlimited).",
Flag: "aibridge-max-concurrency",
Env: "CODER_AIBRIDGE_MAX_CONCURRENCY",
Value: &c.AI.BridgeConfig.MaxConcurrency,
Default: "0",
Group: &deploymentGroupAIBridge,
YAML: "max_concurrency",
},
{
Name: "AI Bridge Rate Limit",
Description: "Maximum number of AI Bridge requests per rate window. Set to 0 to disable rate limiting.",
Flag: "aibridge-rate-limit",
Env: "CODER_AIBRIDGE_RATE_LIMIT",
Value: &c.AI.BridgeConfig.RateLimit,
Default: "0",
Group: &deploymentGroupAIBridge,
YAML: "rate_limit",
},
{
Name: "AI Bridge Rate Window",
Description: "Duration of the rate limiting window for AI Bridge requests.",
Flag: "aibridge-rate-window",
Env: "CODER_AIBRIDGE_RATE_WINDOW",
Value: &c.AI.BridgeConfig.RateWindow,
Default: "1m",
Group: &deploymentGroupAIBridge,
YAML: "rate_window",
Annotations: serpent.Annotations{}.Mark(annotationFormatDuration, "true"),
},
// Retention settings
{
Name: "Audit Logs Retention",
@@ -3461,6 +3492,10 @@ type AIBridgeConfig struct {
Bedrock AIBridgeBedrockConfig `json:"bedrock" typescript:",notnull"`
InjectCoderMCPTools serpent.Bool `json:"inject_coder_mcp_tools" typescript:",notnull"`
Retention serpent.Duration `json:"retention" typescript:",notnull"`
// Overload protection settings.
MaxConcurrency serpent.Int64 `json:"max_concurrency" typescript:",notnull"`
RateLimit serpent.Int64 `json:"rate_limit" typescript:",notnull"`
RateWindow serpent.Duration `json:"rate_window" typescript:",notnull"`
}
type AIBridgeOpenAIConfig struct {

View File

@@ -176,10 +176,13 @@ curl -X GET http://coder-server:8080/api/v2/deployment/config \
},
"enabled": true,
"inject_coder_mcp_tools": true,
"max_concurrency": 0,
"openai": {
"base_url": "string",
"key": "string"
},
"rate_limit": 0,
"rate_window": 0,
"retention": 0
}
},

View File

@@ -390,24 +390,30 @@
},
"enabled": true,
"inject_coder_mcp_tools": true,
"max_concurrency": 0,
"openai": {
"base_url": "string",
"key": "string"
},
"rate_limit": 0,
"rate_window": 0,
"retention": 0
}
```
### Properties
| Name | Type | Required | Restrictions | Description |
|--------------------------|----------------------------------------------------------------------|----------|--------------|-------------|
| `anthropic` | [codersdk.AIBridgeAnthropicConfig](#codersdkaibridgeanthropicconfig) | false | | |
| `bedrock` | [codersdk.AIBridgeBedrockConfig](#codersdkaibridgebedrockconfig) | false | | |
| `enabled` | boolean | false | | |
| `inject_coder_mcp_tools` | boolean | false | | |
| `openai` | [codersdk.AIBridgeOpenAIConfig](#codersdkaibridgeopenaiconfig) | false | | |
| `retention` | integer | false | | |
| Name | Type | Required | Restrictions | Description |
|--------------------------|----------------------------------------------------------------------|----------|--------------|-------------------------------|
| `anthropic` | [codersdk.AIBridgeAnthropicConfig](#codersdkaibridgeanthropicconfig) | false | | |
| `bedrock` | [codersdk.AIBridgeBedrockConfig](#codersdkaibridgebedrockconfig) | false | | |
| `enabled` | boolean | false | | |
| `inject_coder_mcp_tools` | boolean | false | | |
| `max_concurrency` | integer | false | | Overload protection settings. |
| `openai` | [codersdk.AIBridgeOpenAIConfig](#codersdkaibridgeopenaiconfig) | false | | |
| `rate_limit` | integer | false | | |
| `rate_window` | integer | false | | |
| `retention` | integer | false | | |
## codersdk.AIBridgeInterception
@@ -700,10 +706,13 @@
},
"enabled": true,
"inject_coder_mcp_tools": true,
"max_concurrency": 0,
"openai": {
"base_url": "string",
"key": "string"
},
"rate_limit": 0,
"rate_window": 0,
"retention": 0
}
}
@@ -2860,10 +2869,13 @@ CreateWorkspaceRequest provides options for creating a new workspace. Only one o
},
"enabled": true,
"inject_coder_mcp_tools": true,
"max_concurrency": 0,
"openai": {
"base_url": "string",
"key": "string"
},
"rate_limit": 0,
"rate_window": 0,
"retention": 0
}
},
@@ -3382,10 +3394,13 @@ CreateWorkspaceRequest provides options for creating a new workspace. Only one o
},
"enabled": true,
"inject_coder_mcp_tools": true,
"max_concurrency": 0,
"openai": {
"base_url": "string",
"key": "string"
},
"rate_limit": 0,
"rate_window": 0,
"retention": 0
}
},

View File

@@ -1771,6 +1771,39 @@ Whether to inject Coder's MCP tools into intercepted AI Bridge requests (require
Length of time to retain data such as interceptions and all related records (token, prompt, tool use).
### --aibridge-max-concurrency
| | |
|-------------|----------------------------------------------|
| Type | <code>int</code> |
| Environment | <code>$CODER_AIBRIDGE_MAX_CONCURRENCY</code> |
| YAML | <code>aibridge.max_concurrency</code> |
| Default | <code>0</code> |
Maximum number of concurrent AI Bridge requests. Set to 0 to disable (unlimited).
### --aibridge-rate-limit
| | |
|-------------|-----------------------------------------|
| Type | <code>int</code> |
| Environment | <code>$CODER_AIBRIDGE_RATE_LIMIT</code> |
| YAML | <code>aibridge.rate_limit</code> |
| Default | <code>0</code> |
Maximum number of AI Bridge requests per rate window. Set to 0 to disable rate limiting.
### --aibridge-rate-window
| | |
|-------------|------------------------------------------|
| Type | <code>duration</code> |
| Environment | <code>$CODER_AIBRIDGE_RATE_WINDOW</code> |
| YAML | <code>aibridge.rate_window</code> |
| Default | <code>1m</code> |
Duration of the rate limiting window for AI Bridge requests.
### --audit-logs-retention
| | |

View File

@@ -33,6 +33,9 @@ type Server struct {
// A pool of [aibridge.RequestBridge] instances, which service incoming requests.
requestBridgePool Pooler
// overloadProtection provides rate limiting and concurrency control.
overloadProtection *OverloadProtection
logger slog.Logger
tracer trace.Tracer
wg sync.WaitGroup
@@ -50,7 +53,7 @@ type Server struct {
shutdownOnce sync.Once
}
func New(ctx context.Context, pool Pooler, rpcDialer Dialer, logger slog.Logger, tracer trace.Tracer) (*Server, error) {
func New(ctx context.Context, pool Pooler, rpcDialer Dialer, logger slog.Logger, tracer trace.Tracer, overloadCfg *OverloadConfig) (*Server, error) {
if rpcDialer == nil {
return nil, xerrors.Errorf("nil rpcDialer given")
}
@@ -68,6 +71,16 @@ func New(ctx context.Context, pool Pooler, rpcDialer Dialer, logger slog.Logger,
requestBridgePool: pool,
}
// Initialize overload protection if configured.
if overloadCfg != nil {
daemon.overloadProtection = NewOverloadProtection(*overloadCfg, logger)
logger.Info(ctx, "overload protection enabled",
slog.F("max_concurrency", overloadCfg.MaxConcurrency),
slog.F("rate_limit", overloadCfg.RateLimit),
slog.F("rate_window", overloadCfg.RateWindow),
)
}
daemon.wg.Add(1)
go daemon.connect()

View File

@@ -189,7 +189,7 @@ func TestIntegration(t *testing.T) {
// Given: aibridged is started.
srv, err := aibridged.New(t.Context(), pool, func(ctx context.Context) (aibridged.DRPCClient, error) {
return aiBridgeClient, nil
}, logger, tracer)
}, logger, tracer, nil)
require.NoError(t, err, "create new aibridged")
t.Cleanup(func() {
_ = srv.Shutdown(ctx)
@@ -382,7 +382,7 @@ func TestIntegrationWithMetrics(t *testing.T) {
// Given: aibridged is started.
srv, err := aibridged.New(ctx, pool, func(ctx context.Context) (aibridged.DRPCClient, error) {
return aiBridgeClient, nil
}, logger, testTracer)
}, logger, testTracer, nil)
require.NoError(t, err, "create new aibridged")
t.Cleanup(func() {
_ = srv.Shutdown(ctx)

View File

@@ -41,7 +41,7 @@ func newTestServer(t *testing.T) (*aibridged.Server, *mock.MockDRPCClient, *mock
pool,
func(ctx context.Context) (aibridged.DRPCClient, error) {
return client, nil
}, logger, testTracer)
}, logger, testTracer, nil)
require.NoError(t, err, "create new aibridged")
t.Cleanup(func() {
srv.Shutdown(context.Background())
@@ -309,7 +309,7 @@ func TestRouting(t *testing.T) {
// Given: aibridged is started.
srv, err := aibridged.New(t.Context(), pool, func(ctx context.Context) (aibridged.DRPCClient, error) {
return client, nil
}, logger, testTracer)
}, logger, testTracer, nil)
require.NoError(t, err, "create new aibridged")
t.Cleanup(func() {
_ = srv.Shutdown(testutil.Context(t, testutil.WaitShort))

View File

@@ -19,8 +19,19 @@ var (
ErrConnect = xerrors.New("could not connect to coderd")
ErrUnauthorized = xerrors.New("unauthorized")
ErrAcquireRequestHandler = xerrors.New("failed to acquire request handler")
ErrOverloaded = xerrors.New("server is overloaded")
)
// Handler returns an http.Handler that wraps the server with any configured
// overload protection (rate limiting and concurrency control).
func (s *Server) Handler() http.Handler {
var handler http.Handler = s
if s.overloadProtection != nil {
handler = s.overloadProtection.WrapHandler(handler)
}
return handler
}
// ServeHTTP is the entrypoint for requests which will be intercepted by AI Bridge.
// This function will validate that the given API key may be used to perform the request.
//

View File

@@ -0,0 +1,119 @@
package aibridged
import (
"net/http"
"sync/atomic"
"time"
"github.com/go-chi/httprate"
"cdr.dev/slog"
"github.com/coder/coder/v2/coderd/httpapi"
"github.com/coder/coder/v2/codersdk"
)
// OverloadConfig configures overload protection for the AI Bridge server.
type OverloadConfig struct {
// MaxConcurrency is the maximum number of concurrent requests allowed.
// Set to 0 to disable concurrency limiting.
MaxConcurrency int64
// RateLimit is the maximum number of requests per RateWindow.
// Set to 0 to disable rate limiting.
RateLimit int64
// RateWindow is the duration of the rate limiting window.
RateWindow time.Duration
}
// OverloadProtection provides middleware for protecting the AI Bridge server
// from overload conditions.
type OverloadProtection struct {
config OverloadConfig
logger slog.Logger
// concurrencyLimiter tracks the number of concurrent requests.
currentConcurrency atomic.Int64
// rateLimiter is the rate limiting middleware.
rateLimiter func(http.Handler) http.Handler
}
// NewOverloadProtection creates a new OverloadProtection instance.
func NewOverloadProtection(config OverloadConfig, logger slog.Logger) *OverloadProtection {
op := &OverloadProtection{
config: config,
logger: logger.Named("overload"),
}
// Initialize rate limiter if configured.
if config.RateLimit > 0 && config.RateWindow > 0 {
op.rateLimiter = httprate.Limit(
int(config.RateLimit),
config.RateWindow,
httprate.WithKeyFuncs(httprate.KeyByIP),
httprate.WithLimitHandler(func(w http.ResponseWriter, r *http.Request) {
httpapi.Write(r.Context(), w, http.StatusTooManyRequests, codersdk.Response{
Message: "AI Bridge rate limit exceeded. Please try again later.",
})
}),
)
}
return op
}
// ConcurrencyMiddleware returns a middleware that limits concurrent requests.
// Returns nil if concurrency limiting is disabled.
func (op *OverloadProtection) ConcurrencyMiddleware() func(http.Handler) http.Handler {
if op.config.MaxConcurrency <= 0 {
return nil
}
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
current := op.currentConcurrency.Add(1)
defer op.currentConcurrency.Add(-1)
if current > op.config.MaxConcurrency {
op.logger.Warn(r.Context(), "ai bridge concurrency limit exceeded",
slog.F("current", current),
slog.F("max", op.config.MaxConcurrency),
)
httpapi.Write(r.Context(), w, http.StatusServiceUnavailable, codersdk.Response{
Message: "AI Bridge is currently at capacity. Please try again later.",
})
return
}
next.ServeHTTP(w, r)
})
}
}
// RateLimitMiddleware returns a middleware that limits the rate of requests.
// Returns nil if rate limiting is disabled.
func (op *OverloadProtection) RateLimitMiddleware() func(http.Handler) http.Handler {
return op.rateLimiter
}
// CurrentConcurrency returns the current number of concurrent requests.
func (op *OverloadProtection) CurrentConcurrency() int64 {
return op.currentConcurrency.Load()
}
// WrapHandler wraps the given handler with all enabled overload protection
// middleware.
func (op *OverloadProtection) WrapHandler(handler http.Handler) http.Handler {
// Apply rate limiting first (cheaper check).
if op.rateLimiter != nil {
handler = op.rateLimiter(handler)
}
// Then apply concurrency limiting.
if concurrencyMW := op.ConcurrencyMiddleware(); concurrencyMW != nil {
handler = concurrencyMW(handler)
}
return handler
}

View File

@@ -0,0 +1,226 @@
package aibridged_test
import (
"net/http"
"net/http/httptest"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"cdr.dev/slog"
"cdr.dev/slog/sloggers/slogtest"
"github.com/coder/coder/v2/enterprise/aibridged"
"github.com/coder/coder/v2/testutil"
)
func TestOverloadProtection_ConcurrencyLimit(t *testing.T) {
t.Parallel()
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
t.Run("allows_requests_within_limit", func(t *testing.T) {
t.Parallel()
op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
MaxConcurrency: 5,
}, logger)
var handlerCalls atomic.Int32
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handlerCalls.Add(1)
w.WriteHeader(http.StatusOK)
})
wrapped := op.WrapHandler(handler)
// Make 5 requests in sequence - all should succeed.
for i := 0; i < 5; i++ {
req := httptest.NewRequest(http.MethodGet, "/", nil)
rec := httptest.NewRecorder()
wrapped.ServeHTTP(rec, req)
assert.Equal(t, http.StatusOK, rec.Code)
}
assert.Equal(t, int32(5), handlerCalls.Load())
})
t.Run("rejects_requests_over_limit", func(t *testing.T) {
t.Parallel()
op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
MaxConcurrency: 2,
}, logger)
// Create a handler that blocks until we release it.
blocked := make(chan struct{})
var handlerCalls atomic.Int32
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handlerCalls.Add(1)
<-blocked
w.WriteHeader(http.StatusOK)
})
wrapped := op.WrapHandler(handler)
// Start 2 requests that will block.
var wg sync.WaitGroup
for i := 0; i < 2; i++ {
wg.Add(1)
go func() {
defer wg.Done()
req := httptest.NewRequest(http.MethodGet, "/", nil)
rec := httptest.NewRecorder()
wrapped.ServeHTTP(rec, req)
}()
}
// Wait for the handlers to be called.
require.Eventually(t, func() bool {
return handlerCalls.Load() == 2
}, testutil.WaitShort, testutil.IntervalFast)
// Make a third request - it should be rejected.
req := httptest.NewRequest(http.MethodGet, "/", nil)
rec := httptest.NewRecorder()
wrapped.ServeHTTP(rec, req)
assert.Equal(t, http.StatusServiceUnavailable, rec.Code)
// Verify current concurrency is 2.
assert.Equal(t, int64(2), op.CurrentConcurrency())
// Unblock the handlers.
close(blocked)
wg.Wait()
// Verify concurrency is back to 0.
assert.Equal(t, int64(0), op.CurrentConcurrency())
})
t.Run("disabled_when_zero", func(t *testing.T) {
t.Parallel()
op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
MaxConcurrency: 0, // Disabled.
}, logger)
assert.Nil(t, op.ConcurrencyMiddleware())
})
}
func TestOverloadProtection_RateLimit(t *testing.T) {
t.Parallel()
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
t.Run("allows_requests_within_limit", func(t *testing.T) {
t.Parallel()
op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
RateLimit: 5,
RateWindow: time.Minute,
}, logger)
var handlerCalls atomic.Int32
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handlerCalls.Add(1)
w.WriteHeader(http.StatusOK)
})
wrapped := op.WrapHandler(handler)
// Make 5 requests - all should succeed.
for i := 0; i < 5; i++ {
req := httptest.NewRequest(http.MethodGet, "/", nil)
rec := httptest.NewRecorder()
wrapped.ServeHTTP(rec, req)
assert.Equal(t, http.StatusOK, rec.Code)
}
assert.Equal(t, int32(5), handlerCalls.Load())
})
t.Run("rejects_requests_over_limit", func(t *testing.T) {
t.Parallel()
op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
RateLimit: 2,
RateWindow: time.Minute,
}, logger)
var handlerCalls atomic.Int32
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handlerCalls.Add(1)
w.WriteHeader(http.StatusOK)
})
wrapped := op.WrapHandler(handler)
// Make 3 requests - first 2 should succeed, 3rd should be rate limited.
for i := 0; i < 3; i++ {
req := httptest.NewRequest(http.MethodGet, "/", nil)
rec := httptest.NewRecorder()
wrapped.ServeHTTP(rec, req)
if i < 2 {
assert.Equal(t, http.StatusOK, rec.Code)
} else {
assert.Equal(t, http.StatusTooManyRequests, rec.Code)
}
}
assert.Equal(t, int32(2), handlerCalls.Load())
})
t.Run("disabled_when_zero", func(t *testing.T) {
t.Parallel()
op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
RateLimit: 0, // Disabled.
}, logger)
assert.Nil(t, op.RateLimitMiddleware())
})
}
func TestOverloadProtection_Combined(t *testing.T) {
t.Parallel()
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
t.Run("both_limits_applied", func(t *testing.T) {
t.Parallel()
op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
MaxConcurrency: 10,
RateLimit: 3,
RateWindow: time.Minute,
}, logger)
var handlerCalls atomic.Int32
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handlerCalls.Add(1)
w.WriteHeader(http.StatusOK)
})
wrapped := op.WrapHandler(handler)
// Make 4 requests - first 3 should succeed, 4th should be rate limited.
for i := 0; i < 4; i++ {
req := httptest.NewRequest(http.MethodGet, "/", nil)
rec := httptest.NewRecorder()
wrapped.ServeHTTP(rec, req)
if i < 3 {
assert.Equal(t, http.StatusOK, rec.Code)
} else {
assert.Equal(t, http.StatusTooManyRequests, rec.Code)
}
}
assert.Equal(t, int32(3), handlerCalls.Load())
})
}

View File

@@ -44,10 +44,21 @@ func newAIBridgeDaemon(coderAPI *coderd.API) (*aibridged.Server, error) {
return nil, xerrors.Errorf("create request pool: %w", err)
}
// Configure overload protection if any limits are set.
var overloadCfg *aibridged.OverloadConfig
bridgeCfg := coderAPI.DeploymentValues.AI.BridgeConfig
if bridgeCfg.MaxConcurrency.Value() > 0 || bridgeCfg.RateLimit.Value() > 0 {
overloadCfg = &aibridged.OverloadConfig{
MaxConcurrency: bridgeCfg.MaxConcurrency.Value(),
RateLimit: bridgeCfg.RateLimit.Value(),
RateWindow: bridgeCfg.RateWindow.Value(),
}
}
// Create daemon.
srv, err := aibridged.New(ctx, pool, func(dialCtx context.Context) (aibridged.DRPCClient, error) {
return coderAPI.CreateInMemoryAIBridgeServer(dialCtx)
}, logger, tracer)
}, logger, tracer, overloadCfg)
if err != nil {
return nil, xerrors.Errorf("start in-memory aibridge daemon: %w", err)
}

View File

@@ -119,12 +119,23 @@ AI BRIDGE OPTIONS:
requests (requires the "oauth2" and "mcp-server-http" experiments to
be enabled).
--aibridge-max-concurrency int, $CODER_AIBRIDGE_MAX_CONCURRENCY (default: 0)
Maximum number of concurrent AI Bridge requests. Set to 0 to disable
(unlimited).
--aibridge-openai-base-url string, $CODER_AIBRIDGE_OPENAI_BASE_URL (default: https://api.openai.com/v1/)
The base URL of the OpenAI API.
--aibridge-openai-key string, $CODER_AIBRIDGE_OPENAI_KEY
The key to authenticate against the OpenAI API.
--aibridge-rate-limit int, $CODER_AIBRIDGE_RATE_LIMIT (default: 0)
Maximum number of AI Bridge requests per rate window. Set to 0 to
disable rate limiting.
--aibridge-rate-window duration, $CODER_AIBRIDGE_RATE_WINDOW (default: 1m)
Duration of the rate limiting window for AI Bridge requests.
CLIENT OPTIONS:
These options change the behavior of how clients interact with the Coder.
Clients include the Coder CLI, Coder Desktop, IDE extensions, and the web UI.

View File

@@ -33,6 +33,12 @@ export interface AIBridgeConfig {
readonly bedrock: AIBridgeBedrockConfig;
readonly inject_coder_mcp_tools: boolean;
readonly retention: number;
/**
* Overload protection settings.
*/
readonly max_concurrency: number;
readonly rate_limit: number;
readonly rate_window: number;
}
// From codersdk/aibridge.go