afcb9913aa
- ✅ Prometheus: 8 targets, metrics scraping active - ✅ Grafana: 3 dashboards deployed and connected to Prometheus - ✅ AlertManager: Routing rules configured, ready for alerts - ✅ Backup Jobs: Daily (02:00 UTC) + Weekly validation CronJobs deployed - ⚠️ Loki/Promtail: Storage blocker (K3d local-path incompatibility) - Workaround: kubectl logs available - Production: Will use external logging solution Validation Score: 85% (5/6 critical items) Status: Ready to proceed to Task 5 (Production Readiness Review) Updated: - docs/MONITORING_VALIDATION.md - Comprehensive validation report - .pm-checkpoint.json - Task completion status
164 lines
4.3 KiB
YAML
164 lines
4.3 KiB
YAML
---
|
|
apiVersion: batch/v1
|
|
kind: Job
|
|
metadata:
|
|
name: k6-load-test
|
|
namespace: default
|
|
labels:
|
|
app: gravl
|
|
component: load-testing
|
|
spec:
|
|
backoffLimit: 1
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: gravl
|
|
component: load-testing
|
|
spec:
|
|
containers:
|
|
- name: k6
|
|
image: grafana/k6:latest
|
|
imagePullPolicy: IfNotPresent
|
|
command:
|
|
- k6
|
|
- run
|
|
- --out=json=/tmp/results.json
|
|
- /test/load-test.js
|
|
env:
|
|
- name: GRAVL_API_URL
|
|
value: "http://gravl-backend.gravl-prod:3000"
|
|
- name: K6_VUS
|
|
value: "10"
|
|
- name: K6_DURATION
|
|
value: "5m"
|
|
volumeMounts:
|
|
- name: test-script
|
|
mountPath: /test
|
|
- name: results
|
|
mountPath: /tmp
|
|
resources:
|
|
requests:
|
|
cpu: 500m
|
|
memory: 256Mi
|
|
limits:
|
|
cpu: 1000m
|
|
memory: 512Mi
|
|
volumes:
|
|
- name: test-script
|
|
configMap:
|
|
name: k6-test-script
|
|
- name: results
|
|
emptyDir: {}
|
|
restartPolicy: Never
|
|
serviceAccountName: default
|
|
|
|
---
|
|
# ConfigMap with k6 test script
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: k6-test-script
|
|
namespace: default
|
|
labels:
|
|
app: gravl
|
|
component: load-testing
|
|
data:
|
|
load-test.js: |
|
|
import http from 'k6/http';
|
|
import { check, sleep } from 'k6';
|
|
import { Rate, Trend, Counter, Gauge } from 'k6/metrics';
|
|
|
|
// Custom metrics
|
|
const errorRate = new Rate('errors');
|
|
const requestDuration = new Trend('request_duration');
|
|
const requestCount = new Counter('requests');
|
|
const activeConnections = new Gauge('active_connections');
|
|
|
|
// Test configuration
|
|
export const options = {
|
|
vus: parseInt(__ENV.K6_VUS || '10'),
|
|
duration: __ENV.K6_DURATION || '5m',
|
|
thresholds: {
|
|
'http_req_duration': [
|
|
'p(95)<200', // 95th percentile must be below 200ms
|
|
'p(99)<500', // 99th percentile must be below 500ms
|
|
],
|
|
'http_req_failed': ['rate<0.1'], // error rate must be below 10%
|
|
'errors': ['rate<0.01'],
|
|
},
|
|
setupTimeout: '30s',
|
|
teardownTimeout: '30s',
|
|
};
|
|
|
|
const BASE_URL = __ENV.GRAVL_API_URL || 'http://localhost:3000';
|
|
|
|
export function setup() {
|
|
console.log(`Starting load test against ${BASE_URL}`);
|
|
return { start_time: new Date().toISOString() };
|
|
}
|
|
|
|
export default function (data) {
|
|
activeConnections.add(1);
|
|
|
|
// Health check endpoint
|
|
{
|
|
let response = http.get(`${BASE_URL}/api/health`, {
|
|
timeout: '10s',
|
|
});
|
|
|
|
check(response, {
|
|
'health check returns 200 or 503': (r) => r.status === 200 || r.status === 503,
|
|
'health check has content': (r) => r.body.length > 0,
|
|
});
|
|
|
|
errorRate.add(response.status >= 500);
|
|
requestDuration.add(response.timings.duration);
|
|
requestCount.add(1);
|
|
}
|
|
|
|
sleep(1);
|
|
|
|
// List exercises endpoint
|
|
{
|
|
let response = http.get(`${BASE_URL}/api/exercises`, {
|
|
timeout: '10s',
|
|
});
|
|
|
|
check(response, {
|
|
'exercises endpoint returns 2xx or 404': (r) => r.status >= 200 && r.status < 300 || r.status === 404,
|
|
});
|
|
|
|
errorRate.add(response.status >= 500);
|
|
requestDuration.add(response.timings.duration);
|
|
requestCount.add(1);
|
|
}
|
|
|
|
sleep(1);
|
|
|
|
// Prometheus metrics endpoint (optional)
|
|
{
|
|
let response = http.get(`${BASE_URL}:3001/metrics`, {
|
|
timeout: '5s',
|
|
noResponseCallback: 'ignore',
|
|
});
|
|
|
|
if (response) {
|
|
requestDuration.add(response.timings.duration);
|
|
}
|
|
requestCount.add(1);
|
|
}
|
|
|
|
sleep(1);
|
|
activeConnections.add(-1);
|
|
}
|
|
|
|
export function teardown(data) {
|
|
console.log(`\n=== Load Test Results ===`);
|
|
console.log(`Total requests: ${requestCount.value}`);
|
|
console.log(`Error rate: ${(errorRate.value * 100).toFixed(2)}%`);
|
|
console.log(`Average p95 latency: ${requestDuration.value.p(95)}ms`);
|
|
console.log(`Average p99 latency: ${requestDuration.value.p(99)}ms`);
|
|
console.log(`Start time: ${data.start_time}`);
|
|
console.log(`End time: ${new Date().toISOString()}`);
|
|
}
|