Compare commits

...

3 Commits

Author SHA1 Message Date
Susana Cardoso Ferreira 34470a18e5 config updates for prebuilds traces 2026-02-12 11:47:14 +00:00
Sas Swart 02e46abc89 add local obs 2026-02-12 10:18:05 +00:00
Sas Swart f5ad1a1743 fix: preserve the stream property for chat/completions calls in aibridge 2026-02-04 15:36:53 +00:00
17 changed files with 2517 additions and 8 deletions
+2 -2
View File
@@ -1051,10 +1051,10 @@ func (c *StoreReconciler) provision(
// Strip trace context - provisionerd is a separate service and should
// start its own trace rather than continuing the prebuilds trace.
buildCtx := trace.ContextWithSpan(ctx, tracing.NoopSpan)
//buildCtx := trace.ContextWithSpan(ctx, tracing.NoopSpan)
_, provisionerJob, _, err := builder.Build(
buildCtx,
ctx,
db,
c.fileCache,
func(_ policy.Action, _ rbac.Objecter) bool {
+2 -2
View File
@@ -473,7 +473,7 @@ require (
github.com/anthropics/anthropic-sdk-go v1.19.0
github.com/brianvoe/gofakeit/v7 v7.14.0
github.com/coder/agentapi-sdk-go v0.0.0-20250505131810-560d1d88d225
github.com/coder/aibridge v1.0.1
github.com/coder/aibridge v1.0.2-0.20260204153447-fc99cd835341
github.com/coder/aisdk-go v0.0.9
github.com/coder/boundary v0.6.1
github.com/coder/preview v1.0.4
@@ -596,4 +596,4 @@ tool (
replace github.com/anthropics/anthropic-sdk-go v1.19.0 => github.com/dannykopping/anthropic-sdk-go v0.0.0-20251230111224-88a4315810bd
// https://github.com/openai/openai-go/pull/602
replace github.com/openai/openai-go/v3 => github.com/SasSwart/openai-go/v3 v3.0.0-20260202093810-72af3b857f95
replace github.com/openai/openai-go/v3 => github.com/SasSwart/openai-go/v3 v3.0.0-20260204134041-fb987b42a728
+4 -4
View File
@@ -693,8 +693,8 @@ github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/ProtonMail/go-crypto v1.3.0 h1:ILq8+Sf5If5DCpHQp4PbZdS1J7HDFRXz/+xKBiRGFrw=
github.com/ProtonMail/go-crypto v1.3.0/go.mod h1:9whxjD8Rbs29b4XWbB8irEcE8KHMqaR2e7GWU1R+/PE=
github.com/SasSwart/openai-go/v3 v3.0.0-20260202093810-72af3b857f95 h1:HVJp3FanNaeFAlwg0/lkdkSnwFemHnwwjXBM8KRj540=
github.com/SasSwart/openai-go/v3 v3.0.0-20260202093810-72af3b857f95/go.mod h1:cdufnVK14cWcT9qA1rRtrXx4FTRsgbDPW7Ia7SS5cZo=
github.com/SasSwart/openai-go/v3 v3.0.0-20260204134041-fb987b42a728 h1:FOjd3xOH+arcrtz1e5P6WZ/VtRD5KQHHRg4kc4BZers=
github.com/SasSwart/openai-go/v3 v3.0.0-20260204134041-fb987b42a728/go.mod h1:cdufnVK14cWcT9qA1rRtrXx4FTRsgbDPW7Ia7SS5cZo=
github.com/SherClockHolmes/webpush-go v1.4.0 h1:ocnzNKWN23T9nvHi6IfyrQjkIc0oJWv1B1pULsf9i3s=
github.com/SherClockHolmes/webpush-go v1.4.0/go.mod h1:XSq8pKX11vNV8MJEMwjrlTkxhAj1zKfxmyhdV7Pd6UA=
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8=
@@ -927,8 +927,8 @@ github.com/cncf/xds/go v0.0.0-20251022180443-0feb69152e9f h1:Y8xYupdHxryycyPlc9Y
github.com/cncf/xds/go v0.0.0-20251022180443-0feb69152e9f/go.mod h1:HlzOvOjVBOfTGSRXRyY0OiCS/3J1akRGQQpRO/7zyF4=
github.com/coder/agentapi-sdk-go v0.0.0-20250505131810-560d1d88d225 h1:tRIViZ5JRmzdOEo5wUWngaGEFBG8OaE1o2GIHN5ujJ8=
github.com/coder/agentapi-sdk-go v0.0.0-20250505131810-560d1d88d225/go.mod h1:rNLVpYgEVeu1Zk29K64z6Od8RBP9DwqCu9OfCzh8MR4=
github.com/coder/aibridge v1.0.1 h1:l6MgNVLvyu9EFp/Q00OItymTlGVK16XXT/KfSuDmxBM=
github.com/coder/aibridge v1.0.1/go.mod h1:M1aoiK6qmybTjD2nzcTCRPXzA/I0Ned+MAxUmz4Ju+k=
github.com/coder/aibridge v1.0.2-0.20260204153447-fc99cd835341 h1:auhVmi16oMocN9a+FJ4SlmP3dgHitFXgaZLWZ1fPDMY=
github.com/coder/aibridge v1.0.2-0.20260204153447-fc99cd835341/go.mod h1:c7Of2xfAksZUrPWN180Eh60fiKgzs7dyOjniTjft6AE=
github.com/coder/aisdk-go v0.0.9 h1:Vzo/k2qwVGLTR10ESDeP2Ecek1SdPfZlEjtTfMveiVo=
github.com/coder/aisdk-go v0.0.9/go.mod h1:KF6/Vkono0FJJOtWtveh5j7yfNrSctVTpwgweYWSp5M=
github.com/coder/boundary v0.6.1 h1:hLnrincIFA8Wak5SrH/xQDIIhkKQpnHVotLwC585z7g=
+1
View File
@@ -0,0 +1 @@
.env
+86
View File
@@ -0,0 +1,86 @@
// Grafana Alloy configuration to scrape pprof from develop.sh and forward to Pyroscope
// The develop.sh server exposes pprof at /api/v2/debug/pprof/ instead of /debug/pprof/
pyroscope.scrape "coderd" {
targets = [
{
"__address__" = "host.docker.internal:3000",
"service_name" = "coderd",
},
]
authorization {
credentials = sys.env("CODER_AUTH_TOKEN")
type = "Bearer"
}
forward_to = [pyroscope.write.local.receiver]
profiling_config {
profile.process_cpu {
enabled = true
delta = true
path = "/api/v2/debug/pprof/profile"
}
profile.memory {
enabled = true
path = "/api/v2/debug/pprof/allocs"
}
profile.goroutine {
enabled = true
path = "/api/v2/debug/pprof/goroutine"
}
profile.block {
enabled = false
path = "/api/v2/debug/pprof/block"
}
profile.mutex {
enabled = false
path = "/api/v2/debug/pprof/mutex"
}
}
delta_profiling_duration="2s"
scrape_interval = "3s"
scrape_timeout = "10s"
}
pyroscope.scrape "llmmock" {
targets = []
forward_to = [pyroscope.write.local.receiver]
profiling_config {
profile.process_cpu {
enabled = true
delta = true
path = "/debug/pprof/profile"
}
profile.memory {
enabled = true
path = "/debug/pprof/allocs"
}
profile.goroutine {
enabled = true
path = "/debug/pprof/goroutine"
}
profile.block {
enabled = false
path = "/debug/pprof/block"
}
profile.mutex {
enabled = false
path = "/debug/pprof/mutex"
}
}
delta_profiling_duration="2s"
scrape_interval = "3s"
scrape_timeout = "10s"
}
pyroscope.write "local" {
endpoint {
url = "http://pyroscope:4040/"
}
}
+134
View File
@@ -0,0 +1,134 @@
version: '3.8'
services:
prometheus:
image: prom/prometheus:latest
container_name: prometheus
ports:
- "9090:9090"
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus-data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
- '--storage.tsdb.retention.time=30d'
extra_hosts:
- "host.docker.internal:host-gateway"
networks:
- observability
restart: unless-stopped
grafana:
image: grafana/grafana:latest
container_name: grafana
ports:
- "3100:3000"
volumes:
- grafana-data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=admin
- GF_USERS_ALLOW_SIGN_UP=false
- GF_INSTALL_PLUGINS=pyroscope-datasource,pyroscope-panel
networks:
- observability
depends_on:
- prometheus
- pyroscope
- tempo
- loki
restart: unless-stopped
pyroscope:
image: grafana/pyroscope:latest
container_name: pyroscope
ports:
- "4040:4040"
volumes:
- ./pyroscope/pyroscope.yml:/etc/pyroscope/pyroscope.yml
- pyroscope-data:/var/lib/pyroscope
command:
- server
- --config=/etc/pyroscope/pyroscope.yml
networks:
- observability
restart: unless-stopped
grafana-alloy:
image: grafana/alloy:latest
container_name: grafana-alloy
volumes:
- ./alloy/config.alloy:/etc/alloy/config.alloy:ro
environment:
- CODER_AUTH_TOKEN=${CODER_AUTH_TOKEN}
command:
- run
- --server.http.listen-addr=0.0.0.0:12345
- --storage.path=/var/lib/alloy/data
- /etc/alloy/config.alloy
extra_hosts:
- "host.docker.internal:host-gateway"
networks:
- observability
depends_on:
- pyroscope
restart: unless-stopped
tempo:
image: grafana/tempo:2.6.1
container_name: tempo
ports:
- "3200:3200" # Tempo HTTP
- "4317:4317" # OTLP gRPC
- "4318:4318" # OTLP HTTP
volumes:
- ./tempo/tempo.yml:/etc/tempo/tempo.yml
command:
- -config.file=/etc/tempo/tempo.yml
- -target=all
networks:
- observability
restart: unless-stopped
loki:
image: grafana/loki:latest
container_name: loki
ports:
- "3101:3100"
volumes:
- ./loki/loki.yml:/etc/loki/local-config.yaml
- loki-data:/loki
command:
- -config.file=/etc/loki/local-config.yaml
networks:
- observability
restart: unless-stopped
promtail:
image: grafana/promtail:latest
container_name: promtail
volumes:
- ./promtail/promtail.yml:/etc/promtail/config.yml:ro
- ./logs:/var/log/coder:ro
command:
- -config.file=/etc/promtail/config.yml
networks:
- observability
depends_on:
- loki
restart: unless-stopped
volumes:
prometheus-data:
grafana-data:
pyroscope-data:
loki-data:
networks:
observability:
driver: bridge
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,13 @@
apiVersion: 1
providers:
- name: 'Default'
orgId: 1
folder: ''
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /etc/grafana/provisioning/dashboards
foldersFromFilesStructure: true
@@ -0,0 +1,31 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
uid: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
editable: true
jsonData:
timeInterval: "15s"
- name: Pyroscope
type: pyroscope-datasource
uid: pyroscope
access: proxy
url: http://pyroscope:4040
editable: true
jsonData:
keepNestedSets: true
minStep: "15s"
- name: Loki
type: loki
uid: loki
access: proxy
url: http://loki:3100
editable: true
jsonData:
maxLines: 1000
@@ -0,0 +1,38 @@
apiVersion: 1
datasources:
- name: Tempo
uid: tempo
type: tempo
access: proxy
url: http://tempo:3200
editable: true
jsonData:
httpMethod: GET
nodeGraph:
enabled: true
serviceMap:
datasourceUid: prometheus
tracesToMetrics:
datasourceUid: prometheus
tags:
- key: service.name
value: service
- key: job
queries:
- name: P90 latency (spanmetrics)
query: sum(rate(tempo_spanmetrics_latency_bucket[$__interval])) by (le, service)
# Optional: traces->logs (if you have Loki wired up)
# tracesToLogs:
# datasourceUid: loki
# tags:
# - job
# - instance
# mappedTags:
# - key: service.name
# value: service
# spanStartTimeShift: '1h'
# spanEndTimeShift: '1h'
+1
View File
@@ -0,0 +1 @@
*.log
+26
View File
@@ -0,0 +1,26 @@
auth_enabled: false
server:
http_listen_port: 3100
grpc_listen_port: 9096
common:
instance_addr: 127.0.0.1
path_prefix: /loki
storage:
filesystem:
chunks_directory: /loki/chunks
rules_directory: /loki/rules
replication_factor: 1
ring:
kvstore:
store: inmemory
schema_config:
configs:
- from: 2020-10-24
store: tsdb
object_store: filesystem
schema: v13
index:
period: 24h
@@ -0,0 +1,40 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
cluster: 'local-observability'
environment: 'development'
# Alertmanager configuration (optional, can be added later)
# alerting:
# alertmanagers:
# - static_configs:
# - targets: []
# Load rules once and periodically evaluate them (optional)
# rule_files:
# - "alert_rules.yml"
scrape_configs:
# Scrape Prometheus itself
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
# Coder server from develop.sh
# This scrapes metrics from a running ./scripts/develop.sh server
# Requires: Start develop.sh with --prometheus-enable flag or set CODER_PROMETHEUS_ENABLE=true
- job_name: 'coderd-develop'
static_configs:
- targets: ['host.docker.internal:2118']
labels:
service: 'coderd'
environment: 'development'
instance: 'develop-sh'
scrape_interval: 8s
scrape_timeout: 5s
metrics_path: '/api/v2/metrics'
- job_name: 'tempo-metrics-generator'
static_configs:
- targets: ['tempo:3200']
+36
View File
@@ -0,0 +1,36 @@
server:
http_listen_port: 9080
grpc_listen_port: 0
positions:
filename: /tmp/positions.yaml
clients:
- url: http://loki:3100/loki/api/v1/push
scrape_configs:
- job_name: coder-logs
static_configs:
- targets:
- localhost
labels:
job: coder-logs
__path__: /var/log/coder/*
pipeline_stages:
- regex:
expression: '^(?P<timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d+Z|\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})'
- labels:
filename:
- timestamp:
source: timestamp
format: RFC3339Nano
fallback_formats:
- "2006-01-02 15:04:05"
- output:
source: message
relabel_configs:
- source_labels:
- __path__
target_label: filename
regex: '.*/([^/]+)$'
replacement: '${1}'
+12
View File
@@ -0,0 +1,12 @@
# Pyroscope agent configuration
# This configures the agent to scrape pprof endpoints
# Server address to forward profiles to
server-address: http://pyroscope:4040
# Scrape configuration
# Note: The Pyroscope agent may need to be configured differently
# depending on the version. This is a basic configuration.
# Logging
log-level: info
@@ -0,0 +1,31 @@
# Pyroscope server configuration
# This configures Pyroscope to collect profiling data from Go processes
# Storage configuration
storage:
# Path where Pyroscope will store data
path: /var/lib/pyroscope
# Retention period (30 days = 720 hours)
retention: 720h
# Server configuration
server:
# HTTP API address
api-bind-address: :4040
# Base URL for the UI (adjust if behind a proxy)
base-url: http://localhost:4040
# Ingest configuration
# Pyroscope can ingest from:
# 1. HTTP endpoint (pprof format) - configure scrape targets below
# 2. Direct push from Go applications using pyroscope client
ingestion:
# Maximum number of samples per second
max_ingestion_rate: 10000
# Logging
log-level: info
# Note: Pyroscope server mode doesn't natively support scraping pprof endpoints.
# Grafana Alloy is used to scrape pprof endpoints and forward to Pyroscope.
# See README.md for configuration details.
+69
View File
@@ -0,0 +1,69 @@
server:
http_listen_port: 3200
distributor:
receivers:
otlp:
protocols:
http:
endpoint: "0.0.0.0:4318"
grpc:
endpoint: "0.0.0.0:4317"
ingester:
# how long a trace can be idle before it's flushed to a block (optional, but nice)
trace_idle_period: 10s
# you already used this before; fine to keep
max_block_duration: 5m
lifecycler:
ring:
kvstore:
store: memberlist # use in-memory memberlist ring (good for single-binary/docker)
replication_factor: 1 # single node, so 1 is fine
heartbeat_period: 5s # 👈 this must be > 0
metrics_generator:
# WAL for *metrics* generated from traces
storage:
path: /tmp/tempo/generator/wal
# WAL for *traces* used by local-blocks (needed for TraceQL metrics)
# See MetricSummary/local-blocks notes.
traces_storage:
path: /tmp/tempo/generator/traces
processor:
# Prometheus span metrics (RED style metrics)
span_metrics: {}
# Service graph metrics (for service map / node graph)
service_graphs: {}
# Local blocks enable TraceQL metrics API (/api/metrics/...)
local_blocks:
# Persist blocks so you can query a longer window than just in-memory
flush_to_storage: true
#compactor:
# compaction:
# Totally fine to tweak; this is just a sane default for local dev
#block_retention: 24h
storage:
trace:
backend: local
local:
path: /tmp/tempo/traces
overrides:
defaults:
# Enable metrics-generator processors for the (default) tenant
# Note: dashes here, underscores in the config block.
metrics_generator:
processors:
- span-metrics
- service-graphs
- local-blocks