227 lines
5.8 KiB
Markdown
227 lines
5.8 KiB
Markdown
---
|
|
title: Implement Health Checks for Microservices
|
|
impact: MEDIUM-HIGH
|
|
impactDescription: Health checks enable orchestrators to manage service lifecycle
|
|
tags: microservices, health-checks, terminus, kubernetes
|
|
---
|
|
|
|
## Implement Health Checks for Microservices
|
|
|
|
Implement liveness and readiness probes using `@nestjs/terminus`. Liveness checks determine if the service should be restarted. Readiness checks determine if the service can accept traffic. Proper health checks enable Kubernetes and load balancers to route traffic correctly.
|
|
|
|
**Incorrect (simple ping that doesn't check dependencies):**
|
|
|
|
```typescript
|
|
// Simple ping that doesn't check dependencies
|
|
@Controller('health')
|
|
export class HealthController {
|
|
@Get()
|
|
check(): string {
|
|
return 'OK'; // Service might be unhealthy but returns OK
|
|
}
|
|
}
|
|
|
|
// Health check that blocks on slow dependencies
|
|
@Controller('health')
|
|
export class HealthController {
|
|
@Get()
|
|
async check(): Promise<string> {
|
|
// If database is slow, health check times out
|
|
await this.userRepo.findOne({ where: { id: '1' } });
|
|
await this.redis.ping();
|
|
await this.externalApi.healthCheck();
|
|
return 'OK';
|
|
}
|
|
}
|
|
```
|
|
|
|
**Correct (use @nestjs/terminus for comprehensive health checks):**
|
|
|
|
```typescript
|
|
// Use @nestjs/terminus for comprehensive health checks
|
|
import {
|
|
HealthCheckService,
|
|
HttpHealthIndicator,
|
|
TypeOrmHealthIndicator,
|
|
HealthCheck,
|
|
DiskHealthIndicator,
|
|
MemoryHealthIndicator,
|
|
} from '@nestjs/terminus';
|
|
|
|
@Controller('health')
|
|
export class HealthController {
|
|
constructor(
|
|
private health: HealthCheckService,
|
|
private http: HttpHealthIndicator,
|
|
private db: TypeOrmHealthIndicator,
|
|
private disk: DiskHealthIndicator,
|
|
private memory: MemoryHealthIndicator,
|
|
) {}
|
|
|
|
// Liveness probe - is the service alive?
|
|
@Get('live')
|
|
@HealthCheck()
|
|
liveness() {
|
|
return this.health.check([
|
|
// Basic checks only
|
|
() => this.memory.checkHeap('memory_heap', 200 * 1024 * 1024), // 200MB
|
|
]);
|
|
}
|
|
|
|
// Readiness probe - can the service handle traffic?
|
|
@Get('ready')
|
|
@HealthCheck()
|
|
readiness() {
|
|
return this.health.check([
|
|
() => this.db.pingCheck('database'),
|
|
() =>
|
|
this.http.pingCheck('redis', 'http://redis:6379', { timeout: 1000 }),
|
|
() =>
|
|
this.disk.checkStorage('disk', { path: '/', thresholdPercent: 0.9 }),
|
|
]);
|
|
}
|
|
|
|
// Deep health check for debugging
|
|
@Get('deep')
|
|
@HealthCheck()
|
|
deepCheck() {
|
|
return this.health.check([
|
|
() => this.db.pingCheck('database'),
|
|
() => this.memory.checkHeap('memory_heap', 200 * 1024 * 1024),
|
|
() => this.memory.checkRSS('memory_rss', 300 * 1024 * 1024),
|
|
() =>
|
|
this.disk.checkStorage('disk', { path: '/', thresholdPercent: 0.9 }),
|
|
() =>
|
|
this.http.pingCheck('external-api', 'https://api.example.com/health'),
|
|
]);
|
|
}
|
|
}
|
|
|
|
// Custom indicator for business-specific health
|
|
@Injectable()
|
|
export class QueueHealthIndicator extends HealthIndicator {
|
|
constructor(private queueService: QueueService) {
|
|
super();
|
|
}
|
|
|
|
async isHealthy(key: string): Promise<HealthIndicatorResult> {
|
|
const queueStats = await this.queueService.getStats();
|
|
|
|
const isHealthy = queueStats.failedCount < 100;
|
|
const result = this.getStatus(key, isHealthy, {
|
|
waiting: queueStats.waitingCount,
|
|
active: queueStats.activeCount,
|
|
failed: queueStats.failedCount,
|
|
});
|
|
|
|
if (!isHealthy) {
|
|
throw new HealthCheckError('Queue unhealthy', result);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
}
|
|
|
|
// Redis health indicator
|
|
@Injectable()
|
|
export class RedisHealthIndicator extends HealthIndicator {
|
|
constructor(@InjectRedis() private redis: Redis) {
|
|
super();
|
|
}
|
|
|
|
async isHealthy(key: string): Promise<HealthIndicatorResult> {
|
|
try {
|
|
const pong = await this.redis.ping();
|
|
return this.getStatus(key, pong === 'PONG');
|
|
} catch (error) {
|
|
throw new HealthCheckError('Redis check failed', this.getStatus(key, false));
|
|
}
|
|
}
|
|
}
|
|
|
|
// Use custom indicators
|
|
@Get('ready')
|
|
@HealthCheck()
|
|
readiness() {
|
|
return this.health.check([
|
|
() => this.db.pingCheck('database'),
|
|
() => this.redis.isHealthy('redis'),
|
|
() => this.queue.isHealthy('job-queue'),
|
|
]);
|
|
}
|
|
|
|
// Graceful shutdown handling
|
|
@Injectable()
|
|
export class GracefulShutdownService implements OnApplicationShutdown {
|
|
private isShuttingDown = false;
|
|
|
|
isShutdown(): boolean {
|
|
return this.isShuttingDown;
|
|
}
|
|
|
|
async onApplicationShutdown(signal: string): Promise<void> {
|
|
this.isShuttingDown = true;
|
|
console.log(`Shutting down on ${signal}`);
|
|
|
|
// Wait for in-flight requests
|
|
await new Promise((resolve) => setTimeout(resolve, 5000));
|
|
}
|
|
}
|
|
|
|
// Health check respects shutdown state
|
|
@Get('ready')
|
|
@HealthCheck()
|
|
readiness() {
|
|
if (this.shutdownService.isShutdown()) {
|
|
throw new ServiceUnavailableException('Shutting down');
|
|
}
|
|
|
|
return this.health.check([
|
|
() => this.db.pingCheck('database'),
|
|
]);
|
|
}
|
|
```
|
|
|
|
### Kubernetes Configuration
|
|
|
|
```yaml
|
|
# Kubernetes deployment with probes
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: api-service
|
|
spec:
|
|
template:
|
|
spec:
|
|
containers:
|
|
- name: api
|
|
image: api-service:latest
|
|
ports:
|
|
- containerPort: 3000
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /health/live
|
|
port: 3000
|
|
initialDelaySeconds: 30
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
failureThreshold: 3
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /health/ready
|
|
port: 3000
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 5
|
|
timeoutSeconds: 3
|
|
failureThreshold: 3
|
|
startupProbe:
|
|
httpGet:
|
|
path: /health/live
|
|
port: 3000
|
|
initialDelaySeconds: 0
|
|
periodSeconds: 5
|
|
failureThreshold: 30
|
|
```
|
|
|
|
Reference: [NestJS Terminus](https://docs.nestjs.com/recipes/terminus)
|