/
/
/
1#!/bin/bash
2# ==============================================================================
3# DNS Stack Health Check Script
4# ==============================================================================
5#
6# Description: Comprehensive health check for Pi-hole + Unbound DNS stack
7# Usage: ./dns-stack-health-check.sh [full|quick|pihole|unbound|network]
8#
9# This script is automatically generated by Ansible - DO NOT EDIT MANUALLY
10# Template: dns-stack-health-check.sh.j2
11#
12# ==============================================================================
13
14set -euo pipefail
15
16# Configuration
17PIHOLE_CONTAINER="{{ pihole_container_name }}"
18UNBOUND_CONTAINER="{{ unbound_container_name }}"
19PIHOLE_WEB_PORT={{ pihole_web_port }}
20PIHOLE_DNS_PORT={{ pihole_dns_port }}
21UNBOUND_PORT={{ unbound_port }}
22LOG_FILE="/var/log/dns-stack-health.log"
23
24# Health check thresholds
25MAX_RESTARTS=10
26MAX_MEMORY_MB=512
27MAX_CPU_PERCENT=80
28DNS_TIMEOUT=2
29
30# Exit codes
31SUCCESS=0
32WARNING=1
33CRITICAL=2
34UNKNOWN=3
35
36# Logging function
37log() {
38 echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "${LOG_FILE}"
39}
40
41# Check if container exists
42container_exists() {
43 docker inspect "$1" >/dev/null 2>&1
44}
45
46# Check if container is running
47container_running() {
48 docker inspect -f '{{.State.Running}}' "$1" 2>/dev/null | grep -q "true"
49}
50
51# Check container restart count
52check_restart_count() {
53 local container="$1"
54 local restarts=$(docker inspect -f '{{.RestartCount}}' "${container}" 2>/dev/null || echo "0")
55
56 if [[ ${restarts} -gt ${MAX_RESTARTS} ]]; then
57 log "CRITICAL: ${container} restart count (${restarts}) exceeds threshold (${MAX_RESTARTS})"
58 return ${CRITICAL}
59 elif [[ ${restarts} -gt 0 ]]; then
60 log "WARNING: ${container} has restarted ${restarts} time(s)"
61 return ${WARNING}
62 else
63 log "OK: ${container} restart count: ${restarts}"
64 return ${SUCCESS}
65 fi
66}
67
68# Check container resource usage
69check_resources() {
70 local container="$1"
71 local stats=$(docker stats "${container}" --no-stream --format "{{.MemUsage}}|{{.CPUPerc}}" 2>/dev/null || echo "N/A|N/A")
72 local memory_usage=$(echo "${stats}" | cut -d'|' -f1 | sed 's/[^0-9]*//g')
73 local cpu_percent=$(echo "${stats}" | cut -d'|' -f2 | sed 's/%//')
74
75 # Memory check
76 if [[ "${memory_usage}" != "N/A" ]] && [[ ${memory_usage} -gt ${MAX_MEMORY_MB}000000 ]]; then
77 local memory_mb=$((memory_usage / 1000000))
78 log "CRITICAL: ${container} memory usage (${memory_mb}MB) exceeds threshold (${MAX_MEMORY_MB}MB)"
79 return ${CRITICAL}
80 fi
81
82 # CPU check
83 if [[ "${cpu_percent}" != "N/A" ]] && [[ $(printf "%.0f" "${cpu_percent}") -gt ${MAX_CPU_PERCENT} ]]; then
84 log "CRITICAL: ${container} CPU usage (${cpu_percent}%) exceeds threshold (${MAX_CPU_PERCENT}%)"
85 return ${CRITICAL}
86 fi
87
88 log "OK: ${container} resource usage within limits"
89 return ${SUCCESS}
90}
91
92# Check Pi-hole Web UI accessibility
93check_pihole_web() {
94 if curl -s -f "http://localhost:${PIHOLE_WEB_PORT}/admin/" >/dev/null; then
95 log "OK: Pi-hole Web UI is accessible"
96 return ${SUCCESS}
97 else
98 log "CRITICAL: Pi-hole Web UI is not accessible"
99 return ${CRITICAL}
100 fi
101}
102
103# Check DNS port accessibility
104check_dns_port() {
105 local port="$1"
106 local service="$2"
107
108 if nc -z -w ${DNS_TIMEOUT} localhost ${port} >/dev/null 2>&1; then
109 log "OK: ${service} port ${port} is open"
110 return ${SUCCESS}
111 else
112 log "CRITICAL: ${service} port ${port} is not accessible"
113 return ${CRITICAL}
114 fi
115}
116
117# Test DNS resolution
118test_dns_resolution() {
119 local test_domains=(
120 "google.com"
121 "github.com"
122 "microsoft.com"
123 "example.com"
124 )
125
126 local failures=0
127
128 for domain in "${test_domains[@]}"; do
129 if dig +short +time=2 +tries=2 @localhost "${domain}" >/dev/null 2>&1; then
130 log "OK: DNS resolution successful for ${domain}"
131 else
132 log "CRITICAL: DNS resolution failed for ${domain}"
133 failures=$((failures + 1))
134 fi
135 done
136
137 if [[ ${failures} -eq ${#test_domains[@]} ]]; then
138 return ${CRITICAL}
139 elif [[ ${failures} -gt 0 ]]; then
140 return ${WARNING}
141 else
142 return ${SUCCESS}
143 fi
144}
145
146# Check Docker logs for errors
147check_logs() {
148 local container="$1"
149 local error_count=$(docker logs "${container}" --since 1h 2>&1 | grep -i -E "(error|fail|exception|critical)" | wc -l)
150
151 if [[ ${error_count} -gt 5 ]]; then
152 log "CRITICAL: Found ${error_count} error messages in ${container} logs (last hour)"
153 return ${CRITICAL}
154 elif [[ ${error_count} -gt 0 ]]; then
155 log "WARNING: Found ${error_count} error messages in ${container} logs (last hour)"
156 return ${WARNING}
157 else
158 log "OK: No recent error messages in ${container} logs"
159 return ${SUCCESS}
160 fi
161}
162
163# Check Pi-hole specific metrics
164check_pihole_metrics() {
165 local metrics=$(curl -s "http://localhost:${PIHOLE_WEB_PORT}/admin/api.php" 2>/dev/null || echo "{}")
166
167 if [[ -n "${metrics}" ]]; then
168 local status=$(echo "${metrics}" | jq -r '.status' 2>/dev/null || echo "unknown")
169 local domains_blocked=$(echo "${metrics}" | jq -r '.domains_being_blocked' 2>/dev/null || echo "0")
170
171 if [[ "${status}" == "enabled" ]]; then
172 log "OK: Pi-hole status: ${status}, domains blocked: ${domains_blocked}"
173 return ${SUCCESS}
174 else
175 log "CRITICAL: Pi-hole status: ${status}"
176 return ${CRITICAL}
177 fi
178 else
179 log "WARNING: Could not fetch Pi-hole metrics"
180 return ${WARNING}
181 fi
182}
183
184# Check Unbound specific metrics
185check_unbound_metrics() {
186 if docker exec "${UNBOUND_CONTAINER}" unbound-control status >/dev/null 2>&1; then
187 log "OK: Unbound control interface responsive"
188 return ${SUCCESS}
189 else
190 log "CRITICAL: Unbound control interface not responsive"
191 return ${CRITICAL}
192 fi
193}
194
195# Perform full health check
196full_health_check() {
197 local overall_status=${SUCCESS}
198
199 log "Starting full DNS stack health check"
200
201 # Check container existence and running state
202 for container in "${PIHOLE_CONTAINER}" "${UNBOUND_CONTAINER}"; do
203 if ! container_exists "${container}"; then
204 log "CRITICAL: Container does not exist: ${container}"
205 return ${CRITICAL}
206 fi
207
208 if ! container_running "${container}"; then
209 log "CRITICAL: Container is not running: ${container}"
210 return ${CRITICAL}
211 fi
212 done
213
214 # Run all checks
215 check_restart_count "${PIHOLE_CONTAINER}" || overall_status=${?}
216 check_restart_count "${UNBOUND_CONTAINER}" || overall_status=${?}
217 check_resources "${PIHOLE_CONTAINER}" || overall_status=${?}
218 check_resources "${UNBOUND_CONTAINER}" || overall_status=${?}
219 check_pihole_web || overall_status=${?}
220 check_dns_port "${PIHOLE_DNS_PORT}" "Pi-hole DNS" || overall_status=${?}
221 check_dns_port "${UNBOUND_PORT}" "Unbound DNS" || overall_status=${?}
222 test_dns_resolution || overall_status=${?}
223 check_logs "${PIHOLE_CONTAINER}" || overall_status=${?}
224 check_logs "${UNBOUND_CONTAINER}" || overall_status=${?}
225 check_pihole_metrics || overall_status=${?}
226 check_unbound_metrics || overall_status=${?}
227
228 # Summary
229 case ${overall_status} in
230 ${SUCCESS})
231 log "HEALTH CHECK SUMMARY: DNS stack operational"
232 ;;
233 ${WARNING})
234 log "HEALTH CHECK SUMMARY: DNS stack operational with warnings"
235 ;;
236 ${CRITICAL})
237 log "HEALTH CHECK SUMMARY: Critical issues detected in DNS stack"
238 ;;
239 esac
240
241 return ${overall_status}
242}
243
244# Quick health check (basic checks only)
245quick_health_check() {
246 log "Starting quick DNS stack health check"
247
248 for container in "${PIHOLE_CONTAINER}" "${UNBOUND_CONTAINER}"; do
249 if ! container_exists "${container}" || ! container_running "${container}"; then
250 log "CRITICAL: Container not running: ${container}"
251 return ${CRITICAL}
252 fi
253 done
254
255 if ! check_pihole_web; then
256 return ${CRITICAL}
257 fi
258
259 if ! check_dns_port "${PIHOLE_DNS_PORT}" "Pi-hole DNS"; then
260 return ${CRITICAL}
261 fi
262
263 if ! test_dns_resolution; then
264 return ${CRITICAL}
265 fi
266
267 log "QUICK CHECK SUMMARY: DNS stack appears operational"
268 return ${SUCCESS}
269}
270
271# Show usage
272usage() {
273 cat << EOF
274DNS Stack Health Check Script
275
276Usage: $0 [mode]
277
278Modes:
279 full Comprehensive health check (default)
280 quick Basic service availability check
281 pihole Pi-hole specific checks only
282 unbound Unbound specific checks only
283 network Network connectivity checks only
284 help Show this help message
285
286Exit Codes:
287 0 - Success (all checks passed)
288 1 - Warning (non-critical issues)
289 2 - Critical (service impaired)
290 3 - Unknown (check could not complete)
291
292Examples:
293 $0 full
294 $0 quick
295 $0 pihole
296 $0 unbound
297EOF
298}
299
300# Main execution
301main() {
302 local mode="${1:-full}"
303
304 case "${mode}" in
305 full)
306 full_health_check
307 ;;
308 quick)
309 quick_health_check
310 ;;
311 pihole)
312 check_restart_count "${PIHOLE_CONTAINER}"
313 check_resources "${PIHOLE_CONTAINER}"
314 check_pihole_web
315 check_dns_port "${PIHOLE_DNS_PORT}" "Pi-hole DNS"
316 check_pihole_metrics
317 check_logs "${PIHOLE_CONTAINER}"
318 ;;
319 unbound)
320 check_restart_count "${UNBOUND_CONTAINER}"
321 check_resources "${UNBOUND_CONTAINER}"
322 check_dns_port "${UNBOUND_PORT}" "Unbound DNS"
323 check_unbound_metrics
324 check_logs "${UNBOUND_CONTAINER}"
325 ;;
326 network)
327 check_pihole_web
328 check_dns_port "${PIHOLE_DNS_PORT}" "Pi-hole DNS"
329 check_dns_port "${UNBOUND_PORT}" "Unbound DNS"
330 test_dns_resolution
331 ;;
332 help|*)
333 usage
334 return ${SUCCESS}
335 ;;
336 esac
337}
338
339# Run main function with all arguments
340main "$@"
341
342# Capture and return the exit code
343exit $?