Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d4c7cb2021 |
11
cli/testdata/coder_server_--help.golden
vendored
11
cli/testdata/coder_server_--help.golden
vendored
@@ -118,12 +118,23 @@ AI BRIDGE OPTIONS:
|
||||
requests (requires the "oauth2" and "mcp-server-http" experiments to
|
||||
be enabled).
|
||||
|
||||
--aibridge-max-concurrency int, $CODER_AIBRIDGE_MAX_CONCURRENCY (default: 0)
|
||||
Maximum number of concurrent AI Bridge requests. Set to 0 to disable
|
||||
(unlimited).
|
||||
|
||||
--aibridge-openai-base-url string, $CODER_AIBRIDGE_OPENAI_BASE_URL (default: https://api.openai.com/v1/)
|
||||
The base URL of the OpenAI API.
|
||||
|
||||
--aibridge-openai-key string, $CODER_AIBRIDGE_OPENAI_KEY
|
||||
The key to authenticate against the OpenAI API.
|
||||
|
||||
--aibridge-rate-limit int, $CODER_AIBRIDGE_RATE_LIMIT (default: 0)
|
||||
Maximum number of AI Bridge requests per rate window. Set to 0 to
|
||||
disable rate limiting.
|
||||
|
||||
--aibridge-rate-window duration, $CODER_AIBRIDGE_RATE_WINDOW (default: 1m)
|
||||
Duration of the rate limiting window for AI Bridge requests.
|
||||
|
||||
CLIENT OPTIONS:
|
||||
These options change the behavior of how clients interact with the Coder.
|
||||
Clients include the Coder CLI, Coder Desktop, IDE extensions, and the web UI.
|
||||
|
||||
11
cli/testdata/server-config.yaml.golden
vendored
11
cli/testdata/server-config.yaml.golden
vendored
@@ -742,6 +742,17 @@ aibridge:
|
||||
# (token, prompt, tool use).
|
||||
# (default: 60d, type: duration)
|
||||
retention: 1440h0m0s
|
||||
# Maximum number of concurrent AI Bridge requests. Set to 0 to disable
|
||||
# (unlimited).
|
||||
# (default: 0, type: int)
|
||||
max_concurrency: 0
|
||||
# Maximum number of AI Bridge requests per rate window. Set to 0 to disable rate
|
||||
# limiting.
|
||||
# (default: 0, type: int)
|
||||
rate_limit: 0
|
||||
# Duration of the rate limiting window for AI Bridge requests.
|
||||
# (default: 1m, type: duration)
|
||||
rate_window: 1m0s
|
||||
# Configure data retention policies for various database tables. Retention
|
||||
# policies automatically purge old data to reduce database size and improve
|
||||
# performance. Setting a retention duration to 0 disables automatic purging for
|
||||
|
||||
10
coderd/apidoc/docs.go
generated
10
coderd/apidoc/docs.go
generated
@@ -11877,9 +11877,19 @@ const docTemplate = `{
|
||||
"inject_coder_mcp_tools": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"max_concurrency": {
|
||||
"description": "Overload protection settings.",
|
||||
"type": "integer"
|
||||
},
|
||||
"openai": {
|
||||
"$ref": "#/definitions/codersdk.AIBridgeOpenAIConfig"
|
||||
},
|
||||
"rate_limit": {
|
||||
"type": "integer"
|
||||
},
|
||||
"rate_window": {
|
||||
"type": "integer"
|
||||
},
|
||||
"retention": {
|
||||
"type": "integer"
|
||||
}
|
||||
|
||||
10
coderd/apidoc/swagger.json
generated
10
coderd/apidoc/swagger.json
generated
@@ -10543,9 +10543,19 @@
|
||||
"inject_coder_mcp_tools": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"max_concurrency": {
|
||||
"description": "Overload protection settings.",
|
||||
"type": "integer"
|
||||
},
|
||||
"openai": {
|
||||
"$ref": "#/definitions/codersdk.AIBridgeOpenAIConfig"
|
||||
},
|
||||
"rate_limit": {
|
||||
"type": "integer"
|
||||
},
|
||||
"rate_window": {
|
||||
"type": "integer"
|
||||
},
|
||||
"retention": {
|
||||
"type": "integer"
|
||||
}
|
||||
|
||||
@@ -3391,6 +3391,37 @@ Write out the current server config as YAML to stdout.`,
|
||||
YAML: "retention",
|
||||
Annotations: serpent.Annotations{}.Mark(annotationFormatDuration, "true"),
|
||||
},
|
||||
{
|
||||
Name: "AI Bridge Max Concurrency",
|
||||
Description: "Maximum number of concurrent AI Bridge requests. Set to 0 to disable (unlimited).",
|
||||
Flag: "aibridge-max-concurrency",
|
||||
Env: "CODER_AIBRIDGE_MAX_CONCURRENCY",
|
||||
Value: &c.AI.BridgeConfig.MaxConcurrency,
|
||||
Default: "0",
|
||||
Group: &deploymentGroupAIBridge,
|
||||
YAML: "max_concurrency",
|
||||
},
|
||||
{
|
||||
Name: "AI Bridge Rate Limit",
|
||||
Description: "Maximum number of AI Bridge requests per rate window. Set to 0 to disable rate limiting.",
|
||||
Flag: "aibridge-rate-limit",
|
||||
Env: "CODER_AIBRIDGE_RATE_LIMIT",
|
||||
Value: &c.AI.BridgeConfig.RateLimit,
|
||||
Default: "0",
|
||||
Group: &deploymentGroupAIBridge,
|
||||
YAML: "rate_limit",
|
||||
},
|
||||
{
|
||||
Name: "AI Bridge Rate Window",
|
||||
Description: "Duration of the rate limiting window for AI Bridge requests.",
|
||||
Flag: "aibridge-rate-window",
|
||||
Env: "CODER_AIBRIDGE_RATE_WINDOW",
|
||||
Value: &c.AI.BridgeConfig.RateWindow,
|
||||
Default: "1m",
|
||||
Group: &deploymentGroupAIBridge,
|
||||
YAML: "rate_window",
|
||||
Annotations: serpent.Annotations{}.Mark(annotationFormatDuration, "true"),
|
||||
},
|
||||
// Retention settings
|
||||
{
|
||||
Name: "Audit Logs Retention",
|
||||
@@ -3461,6 +3492,10 @@ type AIBridgeConfig struct {
|
||||
Bedrock AIBridgeBedrockConfig `json:"bedrock" typescript:",notnull"`
|
||||
InjectCoderMCPTools serpent.Bool `json:"inject_coder_mcp_tools" typescript:",notnull"`
|
||||
Retention serpent.Duration `json:"retention" typescript:",notnull"`
|
||||
// Overload protection settings.
|
||||
MaxConcurrency serpent.Int64 `json:"max_concurrency" typescript:",notnull"`
|
||||
RateLimit serpent.Int64 `json:"rate_limit" typescript:",notnull"`
|
||||
RateWindow serpent.Duration `json:"rate_window" typescript:",notnull"`
|
||||
}
|
||||
|
||||
type AIBridgeOpenAIConfig struct {
|
||||
|
||||
3
docs/reference/api/general.md
generated
3
docs/reference/api/general.md
generated
@@ -176,10 +176,13 @@ curl -X GET http://coder-server:8080/api/v2/deployment/config \
|
||||
},
|
||||
"enabled": true,
|
||||
"inject_coder_mcp_tools": true,
|
||||
"max_concurrency": 0,
|
||||
"openai": {
|
||||
"base_url": "string",
|
||||
"key": "string"
|
||||
},
|
||||
"rate_limit": 0,
|
||||
"rate_window": 0,
|
||||
"retention": 0
|
||||
}
|
||||
},
|
||||
|
||||
31
docs/reference/api/schemas.md
generated
31
docs/reference/api/schemas.md
generated
@@ -390,24 +390,30 @@
|
||||
},
|
||||
"enabled": true,
|
||||
"inject_coder_mcp_tools": true,
|
||||
"max_concurrency": 0,
|
||||
"openai": {
|
||||
"base_url": "string",
|
||||
"key": "string"
|
||||
},
|
||||
"rate_limit": 0,
|
||||
"rate_window": 0,
|
||||
"retention": 0
|
||||
}
|
||||
```
|
||||
|
||||
### Properties
|
||||
|
||||
| Name | Type | Required | Restrictions | Description |
|
||||
|--------------------------|----------------------------------------------------------------------|----------|--------------|-------------|
|
||||
| `anthropic` | [codersdk.AIBridgeAnthropicConfig](#codersdkaibridgeanthropicconfig) | false | | |
|
||||
| `bedrock` | [codersdk.AIBridgeBedrockConfig](#codersdkaibridgebedrockconfig) | false | | |
|
||||
| `enabled` | boolean | false | | |
|
||||
| `inject_coder_mcp_tools` | boolean | false | | |
|
||||
| `openai` | [codersdk.AIBridgeOpenAIConfig](#codersdkaibridgeopenaiconfig) | false | | |
|
||||
| `retention` | integer | false | | |
|
||||
| Name | Type | Required | Restrictions | Description |
|
||||
|--------------------------|----------------------------------------------------------------------|----------|--------------|-------------------------------|
|
||||
| `anthropic` | [codersdk.AIBridgeAnthropicConfig](#codersdkaibridgeanthropicconfig) | false | | |
|
||||
| `bedrock` | [codersdk.AIBridgeBedrockConfig](#codersdkaibridgebedrockconfig) | false | | |
|
||||
| `enabled` | boolean | false | | |
|
||||
| `inject_coder_mcp_tools` | boolean | false | | |
|
||||
| `max_concurrency` | integer | false | | Overload protection settings. |
|
||||
| `openai` | [codersdk.AIBridgeOpenAIConfig](#codersdkaibridgeopenaiconfig) | false | | |
|
||||
| `rate_limit` | integer | false | | |
|
||||
| `rate_window` | integer | false | | |
|
||||
| `retention` | integer | false | | |
|
||||
|
||||
## codersdk.AIBridgeInterception
|
||||
|
||||
@@ -700,10 +706,13 @@
|
||||
},
|
||||
"enabled": true,
|
||||
"inject_coder_mcp_tools": true,
|
||||
"max_concurrency": 0,
|
||||
"openai": {
|
||||
"base_url": "string",
|
||||
"key": "string"
|
||||
},
|
||||
"rate_limit": 0,
|
||||
"rate_window": 0,
|
||||
"retention": 0
|
||||
}
|
||||
}
|
||||
@@ -2860,10 +2869,13 @@ CreateWorkspaceRequest provides options for creating a new workspace. Only one o
|
||||
},
|
||||
"enabled": true,
|
||||
"inject_coder_mcp_tools": true,
|
||||
"max_concurrency": 0,
|
||||
"openai": {
|
||||
"base_url": "string",
|
||||
"key": "string"
|
||||
},
|
||||
"rate_limit": 0,
|
||||
"rate_window": 0,
|
||||
"retention": 0
|
||||
}
|
||||
},
|
||||
@@ -3382,10 +3394,13 @@ CreateWorkspaceRequest provides options for creating a new workspace. Only one o
|
||||
},
|
||||
"enabled": true,
|
||||
"inject_coder_mcp_tools": true,
|
||||
"max_concurrency": 0,
|
||||
"openai": {
|
||||
"base_url": "string",
|
||||
"key": "string"
|
||||
},
|
||||
"rate_limit": 0,
|
||||
"rate_window": 0,
|
||||
"retention": 0
|
||||
}
|
||||
},
|
||||
|
||||
33
docs/reference/cli/server.md
generated
33
docs/reference/cli/server.md
generated
@@ -1771,6 +1771,39 @@ Whether to inject Coder's MCP tools into intercepted AI Bridge requests (require
|
||||
|
||||
Length of time to retain data such as interceptions and all related records (token, prompt, tool use).
|
||||
|
||||
### --aibridge-max-concurrency
|
||||
|
||||
| | |
|
||||
|-------------|----------------------------------------------|
|
||||
| Type | <code>int</code> |
|
||||
| Environment | <code>$CODER_AIBRIDGE_MAX_CONCURRENCY</code> |
|
||||
| YAML | <code>aibridge.max_concurrency</code> |
|
||||
| Default | <code>0</code> |
|
||||
|
||||
Maximum number of concurrent AI Bridge requests. Set to 0 to disable (unlimited).
|
||||
|
||||
### --aibridge-rate-limit
|
||||
|
||||
| | |
|
||||
|-------------|-----------------------------------------|
|
||||
| Type | <code>int</code> |
|
||||
| Environment | <code>$CODER_AIBRIDGE_RATE_LIMIT</code> |
|
||||
| YAML | <code>aibridge.rate_limit</code> |
|
||||
| Default | <code>0</code> |
|
||||
|
||||
Maximum number of AI Bridge requests per rate window. Set to 0 to disable rate limiting.
|
||||
|
||||
### --aibridge-rate-window
|
||||
|
||||
| | |
|
||||
|-------------|------------------------------------------|
|
||||
| Type | <code>duration</code> |
|
||||
| Environment | <code>$CODER_AIBRIDGE_RATE_WINDOW</code> |
|
||||
| YAML | <code>aibridge.rate_window</code> |
|
||||
| Default | <code>1m</code> |
|
||||
|
||||
Duration of the rate limiting window for AI Bridge requests.
|
||||
|
||||
### --audit-logs-retention
|
||||
|
||||
| | |
|
||||
|
||||
@@ -33,6 +33,9 @@ type Server struct {
|
||||
// A pool of [aibridge.RequestBridge] instances, which service incoming requests.
|
||||
requestBridgePool Pooler
|
||||
|
||||
// overloadProtection provides rate limiting and concurrency control.
|
||||
overloadProtection *OverloadProtection
|
||||
|
||||
logger slog.Logger
|
||||
tracer trace.Tracer
|
||||
wg sync.WaitGroup
|
||||
@@ -50,7 +53,7 @@ type Server struct {
|
||||
shutdownOnce sync.Once
|
||||
}
|
||||
|
||||
func New(ctx context.Context, pool Pooler, rpcDialer Dialer, logger slog.Logger, tracer trace.Tracer) (*Server, error) {
|
||||
func New(ctx context.Context, pool Pooler, rpcDialer Dialer, logger slog.Logger, tracer trace.Tracer, overloadCfg *OverloadConfig) (*Server, error) {
|
||||
if rpcDialer == nil {
|
||||
return nil, xerrors.Errorf("nil rpcDialer given")
|
||||
}
|
||||
@@ -68,6 +71,16 @@ func New(ctx context.Context, pool Pooler, rpcDialer Dialer, logger slog.Logger,
|
||||
requestBridgePool: pool,
|
||||
}
|
||||
|
||||
// Initialize overload protection if configured.
|
||||
if overloadCfg != nil {
|
||||
daemon.overloadProtection = NewOverloadProtection(*overloadCfg, logger)
|
||||
logger.Info(ctx, "overload protection enabled",
|
||||
slog.F("max_concurrency", overloadCfg.MaxConcurrency),
|
||||
slog.F("rate_limit", overloadCfg.RateLimit),
|
||||
slog.F("rate_window", overloadCfg.RateWindow),
|
||||
)
|
||||
}
|
||||
|
||||
daemon.wg.Add(1)
|
||||
go daemon.connect()
|
||||
|
||||
|
||||
@@ -189,7 +189,7 @@ func TestIntegration(t *testing.T) {
|
||||
// Given: aibridged is started.
|
||||
srv, err := aibridged.New(t.Context(), pool, func(ctx context.Context) (aibridged.DRPCClient, error) {
|
||||
return aiBridgeClient, nil
|
||||
}, logger, tracer)
|
||||
}, logger, tracer, nil)
|
||||
require.NoError(t, err, "create new aibridged")
|
||||
t.Cleanup(func() {
|
||||
_ = srv.Shutdown(ctx)
|
||||
@@ -382,7 +382,7 @@ func TestIntegrationWithMetrics(t *testing.T) {
|
||||
// Given: aibridged is started.
|
||||
srv, err := aibridged.New(ctx, pool, func(ctx context.Context) (aibridged.DRPCClient, error) {
|
||||
return aiBridgeClient, nil
|
||||
}, logger, testTracer)
|
||||
}, logger, testTracer, nil)
|
||||
require.NoError(t, err, "create new aibridged")
|
||||
t.Cleanup(func() {
|
||||
_ = srv.Shutdown(ctx)
|
||||
|
||||
@@ -41,7 +41,7 @@ func newTestServer(t *testing.T) (*aibridged.Server, *mock.MockDRPCClient, *mock
|
||||
pool,
|
||||
func(ctx context.Context) (aibridged.DRPCClient, error) {
|
||||
return client, nil
|
||||
}, logger, testTracer)
|
||||
}, logger, testTracer, nil)
|
||||
require.NoError(t, err, "create new aibridged")
|
||||
t.Cleanup(func() {
|
||||
srv.Shutdown(context.Background())
|
||||
@@ -309,7 +309,7 @@ func TestRouting(t *testing.T) {
|
||||
// Given: aibridged is started.
|
||||
srv, err := aibridged.New(t.Context(), pool, func(ctx context.Context) (aibridged.DRPCClient, error) {
|
||||
return client, nil
|
||||
}, logger, testTracer)
|
||||
}, logger, testTracer, nil)
|
||||
require.NoError(t, err, "create new aibridged")
|
||||
t.Cleanup(func() {
|
||||
_ = srv.Shutdown(testutil.Context(t, testutil.WaitShort))
|
||||
|
||||
@@ -19,8 +19,19 @@ var (
|
||||
ErrConnect = xerrors.New("could not connect to coderd")
|
||||
ErrUnauthorized = xerrors.New("unauthorized")
|
||||
ErrAcquireRequestHandler = xerrors.New("failed to acquire request handler")
|
||||
ErrOverloaded = xerrors.New("server is overloaded")
|
||||
)
|
||||
|
||||
// Handler returns an http.Handler that wraps the server with any configured
|
||||
// overload protection (rate limiting and concurrency control).
|
||||
func (s *Server) Handler() http.Handler {
|
||||
var handler http.Handler = s
|
||||
if s.overloadProtection != nil {
|
||||
handler = s.overloadProtection.WrapHandler(handler)
|
||||
}
|
||||
return handler
|
||||
}
|
||||
|
||||
// ServeHTTP is the entrypoint for requests which will be intercepted by AI Bridge.
|
||||
// This function will validate that the given API key may be used to perform the request.
|
||||
//
|
||||
|
||||
119
enterprise/aibridged/overload.go
Normal file
119
enterprise/aibridged/overload.go
Normal file
@@ -0,0 +1,119 @@
|
||||
package aibridged
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/go-chi/httprate"
|
||||
|
||||
"cdr.dev/slog"
|
||||
"github.com/coder/coder/v2/coderd/httpapi"
|
||||
"github.com/coder/coder/v2/codersdk"
|
||||
)
|
||||
|
||||
// OverloadConfig configures overload protection for the AI Bridge server.
|
||||
type OverloadConfig struct {
|
||||
// MaxConcurrency is the maximum number of concurrent requests allowed.
|
||||
// Set to 0 to disable concurrency limiting.
|
||||
MaxConcurrency int64
|
||||
|
||||
// RateLimit is the maximum number of requests per RateWindow.
|
||||
// Set to 0 to disable rate limiting.
|
||||
RateLimit int64
|
||||
|
||||
// RateWindow is the duration of the rate limiting window.
|
||||
RateWindow time.Duration
|
||||
}
|
||||
|
||||
// OverloadProtection provides middleware for protecting the AI Bridge server
|
||||
// from overload conditions.
|
||||
type OverloadProtection struct {
|
||||
config OverloadConfig
|
||||
logger slog.Logger
|
||||
|
||||
// concurrencyLimiter tracks the number of concurrent requests.
|
||||
currentConcurrency atomic.Int64
|
||||
|
||||
// rateLimiter is the rate limiting middleware.
|
||||
rateLimiter func(http.Handler) http.Handler
|
||||
}
|
||||
|
||||
// NewOverloadProtection creates a new OverloadProtection instance.
|
||||
func NewOverloadProtection(config OverloadConfig, logger slog.Logger) *OverloadProtection {
|
||||
op := &OverloadProtection{
|
||||
config: config,
|
||||
logger: logger.Named("overload"),
|
||||
}
|
||||
|
||||
// Initialize rate limiter if configured.
|
||||
if config.RateLimit > 0 && config.RateWindow > 0 {
|
||||
op.rateLimiter = httprate.Limit(
|
||||
int(config.RateLimit),
|
||||
config.RateWindow,
|
||||
httprate.WithKeyFuncs(httprate.KeyByIP),
|
||||
httprate.WithLimitHandler(func(w http.ResponseWriter, r *http.Request) {
|
||||
httpapi.Write(r.Context(), w, http.StatusTooManyRequests, codersdk.Response{
|
||||
Message: "AI Bridge rate limit exceeded. Please try again later.",
|
||||
})
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
return op
|
||||
}
|
||||
|
||||
// ConcurrencyMiddleware returns a middleware that limits concurrent requests.
|
||||
// Returns nil if concurrency limiting is disabled.
|
||||
func (op *OverloadProtection) ConcurrencyMiddleware() func(http.Handler) http.Handler {
|
||||
if op.config.MaxConcurrency <= 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return func(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
current := op.currentConcurrency.Add(1)
|
||||
defer op.currentConcurrency.Add(-1)
|
||||
|
||||
if current > op.config.MaxConcurrency {
|
||||
op.logger.Warn(r.Context(), "ai bridge concurrency limit exceeded",
|
||||
slog.F("current", current),
|
||||
slog.F("max", op.config.MaxConcurrency),
|
||||
)
|
||||
httpapi.Write(r.Context(), w, http.StatusServiceUnavailable, codersdk.Response{
|
||||
Message: "AI Bridge is currently at capacity. Please try again later.",
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
next.ServeHTTP(w, r)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// RateLimitMiddleware returns a middleware that limits the rate of requests.
|
||||
// Returns nil if rate limiting is disabled.
|
||||
func (op *OverloadProtection) RateLimitMiddleware() func(http.Handler) http.Handler {
|
||||
return op.rateLimiter
|
||||
}
|
||||
|
||||
// CurrentConcurrency returns the current number of concurrent requests.
|
||||
func (op *OverloadProtection) CurrentConcurrency() int64 {
|
||||
return op.currentConcurrency.Load()
|
||||
}
|
||||
|
||||
// WrapHandler wraps the given handler with all enabled overload protection
|
||||
// middleware.
|
||||
func (op *OverloadProtection) WrapHandler(handler http.Handler) http.Handler {
|
||||
// Apply rate limiting first (cheaper check).
|
||||
if op.rateLimiter != nil {
|
||||
handler = op.rateLimiter(handler)
|
||||
}
|
||||
|
||||
// Then apply concurrency limiting.
|
||||
if concurrencyMW := op.ConcurrencyMiddleware(); concurrencyMW != nil {
|
||||
handler = concurrencyMW(handler)
|
||||
}
|
||||
|
||||
return handler
|
||||
}
|
||||
226
enterprise/aibridged/overload_test.go
Normal file
226
enterprise/aibridged/overload_test.go
Normal file
@@ -0,0 +1,226 @@
|
||||
package aibridged_test
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"cdr.dev/slog"
|
||||
"cdr.dev/slog/sloggers/slogtest"
|
||||
"github.com/coder/coder/v2/enterprise/aibridged"
|
||||
"github.com/coder/coder/v2/testutil"
|
||||
)
|
||||
|
||||
func TestOverloadProtection_ConcurrencyLimit(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
|
||||
|
||||
t.Run("allows_requests_within_limit", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
|
||||
MaxConcurrency: 5,
|
||||
}, logger)
|
||||
|
||||
var handlerCalls atomic.Int32
|
||||
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
handlerCalls.Add(1)
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
|
||||
wrapped := op.WrapHandler(handler)
|
||||
|
||||
// Make 5 requests in sequence - all should succeed.
|
||||
for i := 0; i < 5; i++ {
|
||||
req := httptest.NewRequest(http.MethodGet, "/", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
wrapped.ServeHTTP(rec, req)
|
||||
assert.Equal(t, http.StatusOK, rec.Code)
|
||||
}
|
||||
|
||||
assert.Equal(t, int32(5), handlerCalls.Load())
|
||||
})
|
||||
|
||||
t.Run("rejects_requests_over_limit", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
|
||||
MaxConcurrency: 2,
|
||||
}, logger)
|
||||
|
||||
// Create a handler that blocks until we release it.
|
||||
blocked := make(chan struct{})
|
||||
var handlerCalls atomic.Int32
|
||||
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
handlerCalls.Add(1)
|
||||
<-blocked
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
|
||||
wrapped := op.WrapHandler(handler)
|
||||
|
||||
// Start 2 requests that will block.
|
||||
var wg sync.WaitGroup
|
||||
for i := 0; i < 2; i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
req := httptest.NewRequest(http.MethodGet, "/", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
wrapped.ServeHTTP(rec, req)
|
||||
}()
|
||||
}
|
||||
|
||||
// Wait for the handlers to be called.
|
||||
require.Eventually(t, func() bool {
|
||||
return handlerCalls.Load() == 2
|
||||
}, testutil.WaitShort, testutil.IntervalFast)
|
||||
|
||||
// Make a third request - it should be rejected.
|
||||
req := httptest.NewRequest(http.MethodGet, "/", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
wrapped.ServeHTTP(rec, req)
|
||||
assert.Equal(t, http.StatusServiceUnavailable, rec.Code)
|
||||
|
||||
// Verify current concurrency is 2.
|
||||
assert.Equal(t, int64(2), op.CurrentConcurrency())
|
||||
|
||||
// Unblock the handlers.
|
||||
close(blocked)
|
||||
wg.Wait()
|
||||
|
||||
// Verify concurrency is back to 0.
|
||||
assert.Equal(t, int64(0), op.CurrentConcurrency())
|
||||
})
|
||||
|
||||
t.Run("disabled_when_zero", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
|
||||
MaxConcurrency: 0, // Disabled.
|
||||
}, logger)
|
||||
|
||||
assert.Nil(t, op.ConcurrencyMiddleware())
|
||||
})
|
||||
}
|
||||
|
||||
func TestOverloadProtection_RateLimit(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
|
||||
|
||||
t.Run("allows_requests_within_limit", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
|
||||
RateLimit: 5,
|
||||
RateWindow: time.Minute,
|
||||
}, logger)
|
||||
|
||||
var handlerCalls atomic.Int32
|
||||
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
handlerCalls.Add(1)
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
|
||||
wrapped := op.WrapHandler(handler)
|
||||
|
||||
// Make 5 requests - all should succeed.
|
||||
for i := 0; i < 5; i++ {
|
||||
req := httptest.NewRequest(http.MethodGet, "/", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
wrapped.ServeHTTP(rec, req)
|
||||
assert.Equal(t, http.StatusOK, rec.Code)
|
||||
}
|
||||
|
||||
assert.Equal(t, int32(5), handlerCalls.Load())
|
||||
})
|
||||
|
||||
t.Run("rejects_requests_over_limit", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
|
||||
RateLimit: 2,
|
||||
RateWindow: time.Minute,
|
||||
}, logger)
|
||||
|
||||
var handlerCalls atomic.Int32
|
||||
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
handlerCalls.Add(1)
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
|
||||
wrapped := op.WrapHandler(handler)
|
||||
|
||||
// Make 3 requests - first 2 should succeed, 3rd should be rate limited.
|
||||
for i := 0; i < 3; i++ {
|
||||
req := httptest.NewRequest(http.MethodGet, "/", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
wrapped.ServeHTTP(rec, req)
|
||||
|
||||
if i < 2 {
|
||||
assert.Equal(t, http.StatusOK, rec.Code)
|
||||
} else {
|
||||
assert.Equal(t, http.StatusTooManyRequests, rec.Code)
|
||||
}
|
||||
}
|
||||
|
||||
assert.Equal(t, int32(2), handlerCalls.Load())
|
||||
})
|
||||
|
||||
t.Run("disabled_when_zero", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
|
||||
RateLimit: 0, // Disabled.
|
||||
}, logger)
|
||||
|
||||
assert.Nil(t, op.RateLimitMiddleware())
|
||||
})
|
||||
}
|
||||
|
||||
func TestOverloadProtection_Combined(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
|
||||
|
||||
t.Run("both_limits_applied", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
op := aibridged.NewOverloadProtection(aibridged.OverloadConfig{
|
||||
MaxConcurrency: 10,
|
||||
RateLimit: 3,
|
||||
RateWindow: time.Minute,
|
||||
}, logger)
|
||||
|
||||
var handlerCalls atomic.Int32
|
||||
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
handlerCalls.Add(1)
|
||||
w.WriteHeader(http.StatusOK)
|
||||
})
|
||||
|
||||
wrapped := op.WrapHandler(handler)
|
||||
|
||||
// Make 4 requests - first 3 should succeed, 4th should be rate limited.
|
||||
for i := 0; i < 4; i++ {
|
||||
req := httptest.NewRequest(http.MethodGet, "/", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
wrapped.ServeHTTP(rec, req)
|
||||
|
||||
if i < 3 {
|
||||
assert.Equal(t, http.StatusOK, rec.Code)
|
||||
} else {
|
||||
assert.Equal(t, http.StatusTooManyRequests, rec.Code)
|
||||
}
|
||||
}
|
||||
|
||||
assert.Equal(t, int32(3), handlerCalls.Load())
|
||||
})
|
||||
}
|
||||
@@ -44,10 +44,21 @@ func newAIBridgeDaemon(coderAPI *coderd.API) (*aibridged.Server, error) {
|
||||
return nil, xerrors.Errorf("create request pool: %w", err)
|
||||
}
|
||||
|
||||
// Configure overload protection if any limits are set.
|
||||
var overloadCfg *aibridged.OverloadConfig
|
||||
bridgeCfg := coderAPI.DeploymentValues.AI.BridgeConfig
|
||||
if bridgeCfg.MaxConcurrency.Value() > 0 || bridgeCfg.RateLimit.Value() > 0 {
|
||||
overloadCfg = &aibridged.OverloadConfig{
|
||||
MaxConcurrency: bridgeCfg.MaxConcurrency.Value(),
|
||||
RateLimit: bridgeCfg.RateLimit.Value(),
|
||||
RateWindow: bridgeCfg.RateWindow.Value(),
|
||||
}
|
||||
}
|
||||
|
||||
// Create daemon.
|
||||
srv, err := aibridged.New(ctx, pool, func(dialCtx context.Context) (aibridged.DRPCClient, error) {
|
||||
return coderAPI.CreateInMemoryAIBridgeServer(dialCtx)
|
||||
}, logger, tracer)
|
||||
}, logger, tracer, overloadCfg)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("start in-memory aibridge daemon: %w", err)
|
||||
}
|
||||
|
||||
@@ -119,12 +119,23 @@ AI BRIDGE OPTIONS:
|
||||
requests (requires the "oauth2" and "mcp-server-http" experiments to
|
||||
be enabled).
|
||||
|
||||
--aibridge-max-concurrency int, $CODER_AIBRIDGE_MAX_CONCURRENCY (default: 0)
|
||||
Maximum number of concurrent AI Bridge requests. Set to 0 to disable
|
||||
(unlimited).
|
||||
|
||||
--aibridge-openai-base-url string, $CODER_AIBRIDGE_OPENAI_BASE_URL (default: https://api.openai.com/v1/)
|
||||
The base URL of the OpenAI API.
|
||||
|
||||
--aibridge-openai-key string, $CODER_AIBRIDGE_OPENAI_KEY
|
||||
The key to authenticate against the OpenAI API.
|
||||
|
||||
--aibridge-rate-limit int, $CODER_AIBRIDGE_RATE_LIMIT (default: 0)
|
||||
Maximum number of AI Bridge requests per rate window. Set to 0 to
|
||||
disable rate limiting.
|
||||
|
||||
--aibridge-rate-window duration, $CODER_AIBRIDGE_RATE_WINDOW (default: 1m)
|
||||
Duration of the rate limiting window for AI Bridge requests.
|
||||
|
||||
CLIENT OPTIONS:
|
||||
These options change the behavior of how clients interact with the Coder.
|
||||
Clients include the Coder CLI, Coder Desktop, IDE extensions, and the web UI.
|
||||
|
||||
6
site/src/api/typesGenerated.ts
generated
6
site/src/api/typesGenerated.ts
generated
@@ -33,6 +33,12 @@ export interface AIBridgeConfig {
|
||||
readonly bedrock: AIBridgeBedrockConfig;
|
||||
readonly inject_coder_mcp_tools: boolean;
|
||||
readonly retention: number;
|
||||
/**
|
||||
* Overload protection settings.
|
||||
*/
|
||||
readonly max_concurrency: number;
|
||||
readonly rate_limit: number;
|
||||
readonly rate_window: number;
|
||||
}
|
||||
|
||||
// From codersdk/aibridge.go
|
||||
|
||||
Reference in New Issue
Block a user