From 494d9e77035970439b5ca4bb0653ec5f62629b04 Mon Sep 17 00:00:00 2001 From: admin Date: Wed, 7 Jan 2026 08:49:36 +0000 Subject: [PATCH] Add apps/cluster-health-dashboard/Jenkinsfile --- apps/cluster-health-dashboard/Jenkinsfile | 839 ++++++++++++++++++++++ 1 file changed, 839 insertions(+) create mode 100644 apps/cluster-health-dashboard/Jenkinsfile diff --git a/apps/cluster-health-dashboard/Jenkinsfile b/apps/cluster-health-dashboard/Jenkinsfile new file mode 100644 index 0000000..6344c34 --- /dev/null +++ b/apps/cluster-health-dashboard/Jenkinsfile @@ -0,0 +1,839 @@ +pipeline { + agent any + + environment { + DASHBOARD_NAME = 'cluster-health-dashboard' + OUTPUT_DIR = '/tmp/dashboard-reports' + PROMETHEUS_URL = 'http://prometheus-server.monitoring.svc.cluster.local' + GRAFANA_URL = 'http://grafana.monitoring.svc.cluster.local' + + // Notification + TELEGRAM_BOT_TOKEN = credentials('telegram-bot-token') + TELEGRAM_CHAT_ID = credentials('telegram-chat-id') + + // Pricing (adjust to your actual costs) + CPU_PRICE_PER_HOUR = '0.04' // $0.04 per vCPU hour + MEMORY_PRICE_PER_GB_HOUR = '0.005' // $0.005 per GB hour + } + + triggers { + // Run daily at 8 AM on weekdays + cron('0 8 * * 1-5') + } + + parameters { + choice( + name: 'REPORT_PERIOD', + choices: ['24h', '7d', '30d'], + description: 'Time period for metrics' + ) + booleanParam( + name: 'SEND_EMAIL', + defaultValue: true, + description: 'Send report via email' + ) + booleanParam( + name: 'SEND_TELEGRAM', + defaultValue: true, + description: 'Send summary to Telegram' + ) + } + + stages { + stage('Initialize') { + steps { + script { + echo "πŸš€ Starting Cluster Health Dashboard generation..." + sh """ + mkdir -p ${OUTPUT_DIR} + rm -f ${OUTPUT_DIR}/* + """ + + if (params.SEND_TELEGRAM) { + sendTelegramNotification( + "πŸ“Š Cluster Health Report\n\n" + + "Generating dashboard for period: ${params.REPORT_PERIOD}" + ) + } + } + } + } + + stage('Collect Cluster Info') { + steps { + script { + echo "πŸ“‹ Collecting cluster information..." + + // Get cluster version + env.CLUSTER_VERSION = sh( + script: 'kubectl version --short 2>/dev/null | grep Server | cut -d" " -f3 || echo "unknown"', + returnStdout: true + ).trim() + + // Get node count + env.NODE_COUNT = sh( + script: 'kubectl get nodes --no-headers 2>/dev/null | wc -l || echo "0"', + returnStdout: true + ).trim() + + // Get namespace count + env.NAMESPACE_COUNT = sh( + script: 'kubectl get namespaces --no-headers 2>/dev/null | wc -l || echo "0"', + returnStdout: true + ).trim() + + // Get total pod count + env.POD_COUNT = sh( + script: 'kubectl get pods --all-namespaces --no-headers 2>/dev/null | wc -l || echo "0"', + returnStdout: true + ).trim() + + echo "Cluster version: ${env.CLUSTER_VERSION}" + echo "Nodes: ${env.NODE_COUNT}" + echo "Namespaces: ${env.NAMESPACE_COUNT}" + echo "Pods: ${env.POD_COUNT}" + } + } + } + + stage('Query Prometheus Metrics') { + steps { + script { + echo "πŸ“ˆ Querying Prometheus for metrics..." + + def period = params.REPORT_PERIOD + + // CPU Usage + env.AVG_CPU_USAGE = queryPrometheus( + "avg(rate(container_cpu_usage_seconds_total{container!=''}[5m])) * 100" + ) + + // Memory Usage + env.TOTAL_MEMORY_USAGE_GB = queryPrometheus( + "sum(container_memory_usage_bytes{container!=''}) / 1024 / 1024 / 1024" + ) + + // Network RX + env.NETWORK_RX_MB = queryPrometheus( + "sum(rate(container_network_receive_bytes_total[5m])) / 1024 / 1024" + ) + + // Network TX + env.NETWORK_TX_MB = queryPrometheus( + "sum(rate(container_network_transmit_bytes_total[5m])) / 1024 / 1024" + ) + + // Pod restart count + env.TOTAL_RESTARTS = queryPrometheus( + "sum(kube_pod_container_status_restarts_total)" + ) + + echo "Metrics collected successfully" + } + } + } + + stage('Analyze Node Resources') { + steps { + script { + echo "πŸ’» Analyzing node resources..." + + try { + // Get CPU values + def cpuValues = sh( + script: """kubectl get nodes -o jsonpath='{range .items[*]}{.status.capacity.cpu}{"\\n"}{end}' 2>/dev/null || echo "0" """, + returnStdout: true + ).trim().split('\n') + + // Get Memory values + def memValues = sh( + script: """kubectl get nodes -o jsonpath='{range .items[*]}{.status.capacity.memory}{"\\n"}{end}' 2>/dev/null || echo "0Ki" """, + returnStdout: true + ).trim().split('\n') + + def totalCPU = 0 + def totalMemoryGB = 0 + + cpuValues.each { cpu -> + if (cpu?.trim() && cpu != "0") { + totalCPU += cpu.toInteger() + } + } + + memValues.each { mem -> + if (mem?.trim() && mem != "0Ki") { + def memKi = mem.replaceAll('[^0-9]', '') + if (memKi) { + def memKiLong = memKi.toLong() + totalMemoryGB += (memKiLong / 1024 / 1024) + } + } + } + + env.TOTAL_CPU_CORES = totalCPU.toString() + env.TOTAL_MEMORY_GB = totalMemoryGB.toString() + + echo "Total CPU cores: ${env.TOTAL_CPU_CORES}" + echo "Total Memory: ${env.TOTAL_MEMORY_GB} GB" + } catch (Exception e) { + echo "⚠️ Failed to analyze node resources: ${e.message}" + env.TOTAL_CPU_CORES = "0" + env.TOTAL_MEMORY_GB = "0" + } + } + } + } + + stage('Analyze Pod Status') { + steps { + script { + echo "πŸ“¦ Analyzing pod status across namespaces..." + + try { + // Get pod phases + def podPhases = sh( + script: """kubectl get pods --all-namespaces -o jsonpath='{range .items[*]}{.status.phase}{"\\n"}{end}' 2>/dev/null || echo "" """, + returnStdout: true + ).trim() + + def running = 0 + def pending = 0 + def failed = 0 + + if (podPhases) { + podPhases.split('\n').each { phase -> + def p = phase.toLowerCase().trim() + if (p == 'running') running++ + else if (p == 'pending') pending++ + else if (p == 'failed') failed++ + } + } + + env.PODS_RUNNING = running.toString() + env.PODS_PENDING = pending.toString() + env.PODS_FAILED = failed.toString() + + // Get namespace stats + def namespaces = sh( + script: """kubectl get namespaces -o jsonpath='{range .items[*]}{.metadata.name}{"\\n"}{end}' 2>/dev/null || echo "" """, + returnStdout: true + ).trim().split('\n') + + def namespaceStats = [:] + + namespaces.each { ns -> + if (ns?.trim()) { + def podCount = sh( + script: """kubectl get pods -n ${ns} --no-headers 2>/dev/null | wc -l || echo "0" """, + returnStdout: true + ).trim().toInteger() + + if (podCount > 0) { + namespaceStats[ns] = [pods: podCount, containers: podCount * 2] + } + } + } + + // Save namespace stats as string + env.NAMESPACE_STATS = namespaceStats.collect { k, v -> "${k}:${v.pods}:${v.containers}" }.join(',') + + echo "Pods running: ${env.PODS_RUNNING}" + echo "Pods pending: ${env.PODS_PENDING}" + echo "Pods failed: ${env.PODS_FAILED}" + } catch (Exception e) { + echo "⚠️ Failed to analyze pods: ${e.message}" + env.PODS_RUNNING = "0" + env.PODS_PENDING = "0" + env.PODS_FAILED = "0" + env.NAMESPACE_STATS = "" + } + } + } + } + + stage('Calculate Costs') { + steps { + script { + echo "πŸ’° Calculating resource costs..." + + try { + def cpuHours = env.TOTAL_CPU_CORES.toFloat() * 24 * 30 // Monthly + def memoryGBHours = env.TOTAL_MEMORY_GB.toFloat() * 24 * 30 + + def cpuCost = cpuHours * env.CPU_PRICE_PER_HOUR.toFloat() + def memoryCost = memoryGBHours * env.MEMORY_PRICE_PER_GB_HOUR.toFloat() + def totalCost = cpuCost + memoryCost + + env.MONTHLY_CPU_COST = String.format('%.2f', cpuCost) + env.MONTHLY_MEMORY_COST = String.format('%.2f', memoryCost) + env.MONTHLY_TOTAL_COST = String.format('%.2f', totalCost) + + echo "Estimated monthly costs:" + echo " CPU: \$${env.MONTHLY_CPU_COST}" + echo " Memory: \$${env.MONTHLY_MEMORY_COST}" + echo " Total: \$${env.MONTHLY_TOTAL_COST}" + } catch (Exception e) { + echo "⚠️ Failed to calculate costs: ${e.message}" + env.MONTHLY_CPU_COST = "0.00" + env.MONTHLY_MEMORY_COST = "0.00" + env.MONTHLY_TOTAL_COST = "0.00" + } + } + } + } + + stage('Check for Issues') { + steps { + script { + echo "πŸ” Checking for potential issues..." + + def issues = [] + + // High restart count + if (env.TOTAL_RESTARTS.toFloat() > 10) { + issues << "⚠️ High pod restart count: ${env.TOTAL_RESTARTS}" + } + + // Failed pods + if (env.PODS_FAILED.toInteger() > 0) { + issues << "❌ ${env.PODS_FAILED} pods in Failed state" + } + + // Pending pods + if (env.PODS_PENDING.toInteger() > 5) { + issues << "⚠️ ${env.PODS_PENDING} pods in Pending state (possible resource constraints)" + } + + // High CPU usage + if (env.AVG_CPU_USAGE.toFloat() > 80) { + issues << "πŸ”₯ High CPU usage: ${env.AVG_CPU_USAGE}%" + } + + env.ISSUES = issues.size() > 0 ? issues.join('\n') : "βœ… No issues detected" + + if (issues.size() > 0) { + echo "Found ${issues.size()} issues:" + issues.each { echo it } + } else { + echo "βœ… No critical issues found" + } + } + } + } + + stage('Generate HTML Dashboard') { + steps { + script { + echo "🎨 Generating HTML dashboard..." + + def namespaceTable = generateNamespaceTable(env.NAMESPACE_STATS) + + def html = generateDashboardHTML( + namespaceTable: namespaceTable + ) + + writeFile file: "${OUTPUT_DIR}/dashboard.html", text: html + + echo "βœ… Dashboard generated: ${OUTPUT_DIR}/dashboard.html" + } + } + } + + stage('Generate JSON Report') { + steps { + script { + echo "πŸ“„ Generating JSON report..." + + def report = """ +{ + "generated_at": "${new Date().format('yyyy-MM-dd HH:mm:ss')}", + "period": "${params.REPORT_PERIOD}", + "cluster": { + "version": "${env.CLUSTER_VERSION}", + "nodes": ${env.NODE_COUNT}, + "namespaces": ${env.NAMESPACE_COUNT}, + "total_pods": ${env.POD_COUNT} + }, + "resources": { + "total_cpu_cores": ${env.TOTAL_CPU_CORES}, + "total_memory_gb": ${env.TOTAL_MEMORY_GB}, + "avg_cpu_usage_percent": ${env.AVG_CPU_USAGE}, + "total_memory_usage_gb": ${env.TOTAL_MEMORY_USAGE_GB} + }, + "pods": { + "running": ${env.PODS_RUNNING}, + "pending": ${env.PODS_PENDING}, + "failed": ${env.PODS_FAILED} + }, + "costs": { + "monthly_cpu_usd": ${env.MONTHLY_CPU_COST}, + "monthly_memory_usd": ${env.MONTHLY_MEMORY_COST}, + "monthly_total_usd": ${env.MONTHLY_TOTAL_COST} + }, + "issues": "${env.ISSUES.replaceAll('"', '\\"').replaceAll('\n', '\\\\n')}" +} + """ + + writeFile file: "${OUTPUT_DIR}/report.json", text: report + + echo "βœ… JSON report generated" + } + } + } + } + + post { + success { + script { + echo "βœ… Dashboard generation completed successfully!" + + // Archive artifacts + archiveArtifacts artifacts: "${OUTPUT_DIR}/*", fingerprint: true + + // Publish HTML report + publishHTML([ + allowMissing: false, + alwaysLinkToLastBuild: true, + keepAll: true, + reportDir: OUTPUT_DIR, + reportFiles: 'dashboard.html', + reportName: 'Cluster Health Dashboard', + reportTitles: 'Cluster Health Dashboard' + ]) + + // Send Telegram summary + if (params.SEND_TELEGRAM) { + def message = """ +πŸ“Š Cluster Health Report + +━━━━━━━━━━━━━━━━━━━━━━ +πŸ“‹ Cluster Info +Version: ${env.CLUSTER_VERSION} +Nodes: ${env.NODE_COUNT} +Namespaces: ${env.NAMESPACE_COUNT} +Total Pods: ${env.POD_COUNT} + +━━━━━━━━━━━━━━━━━━━━━━ +πŸ’» Resources +CPU Cores: ${env.TOTAL_CPU_CORES} +Memory: ${env.TOTAL_MEMORY_GB} GB +Avg CPU Usage: ${env.AVG_CPU_USAGE}% + +━━━━━━━━━━━━━━━━━━━━━━ +πŸ“¦ Pod Status +Running: ${env.PODS_RUNNING} βœ… +Pending: ${env.PODS_PENDING} ⏳ +Failed: ${env.PODS_FAILED} ❌ + +━━━━━━━━━━━━━━━━━━━━━━ +πŸ’° Estimated Monthly Costs +CPU: \$${env.MONTHLY_CPU_COST} +Memory: \$${env.MONTHLY_MEMORY_COST} +Total: \$${env.MONTHLY_TOTAL_COST} + +━━━━━━━━━━━━━━━━━━━━━━ +πŸ” Issues +${env.ISSUES} + +View Full Dashboard + """ + + sendTelegramNotification(message) + } + + echo "\nπŸ“Š Dashboard URL: ${env.BUILD_URL}Cluster_20Health_20Dashboard/" + } + } + + failure { + script { + echo "❌ Dashboard generation failed!" + + if (params.SEND_TELEGRAM) { + sendTelegramNotification( + "❌ Cluster Health Report Failed\n\n" + + "View Console Output" + ) + } + } + } + + always { + script { + echo "🧹 Cleanup completed" + } + } + } +} + +// Helper function to query Prometheus (NO JQ NEEDED!) +def queryPrometheus(query) { + try { + def response = sh( + script: """ + curl -s '${PROMETHEUS_URL}/api/v1/query?query=${URLEncoder.encode(query, "UTF-8")}' + """, + returnStdout: true + ).trim() + + // Extract value using grep and sed (no jq!) + def value = sh( + script: """ + echo '${response}' | grep -oP '"value":\\[\\d+,"\\K[^"]+' || echo "0" + """, + returnStdout: true + ).trim() + + return value ?: "0" + } catch (Exception e) { + echo "⚠️ Failed to query Prometheus: ${e.message}" + return "0" + } +} + +// Helper function to send Telegram notification +def sendTelegramNotification(message) { + try { + sh """ + curl -s -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \ + -d chat_id="${TELEGRAM_CHAT_ID}" \ + -d parse_mode="HTML" \ + -d disable_web_page_preview=true \ + -d text="${message}" + """ + } catch (Exception e) { + echo "⚠️ Failed to send Telegram notification: ${e.message}" + } +} + +// Helper function to generate namespace table HTML +def generateNamespaceTable(namespaceStatsStr) { + def rows = "" + + if (namespaceStatsStr) { + def stats = namespaceStatsStr.split(',') + stats.each { stat -> + def parts = stat.split(':') + if (parts.size() >= 3) { + rows += """ + + ${parts[0]} + ${parts[1]} + ${parts[2]} + + """ + } + } + } + + if (!rows) { + rows = "No data available" + } + + return rows +} + +// Helper function to generate complete HTML dashboard +def generateDashboardHTML(args) { + def namespaceTable = args.namespaceTable + + return """ + + + + + + Cluster Health Dashboard + + + +
+ +
+

☸️ Kubernetes Cluster Health Dashboard

+
Generated: ${new Date().format('yyyy-MM-dd HH:mm:ss')} | Period: ${params.REPORT_PERIOD}
+
+ + +
+
+

πŸ“‹ Cluster Information

+
+ Kubernetes Version + ${env.CLUSTER_VERSION} +
+
+ Nodes + ${env.NODE_COUNT} +
+
+ Namespaces + ${env.NAMESPACE_COUNT} +
+
+ Total Pods + ${env.POD_COUNT} +
+
+ +
+

πŸ’» Resource Capacity

+
+ Total CPU Cores + ${env.TOTAL_CPU_CORES} +
+
+ Total Memory + ${env.TOTAL_MEMORY_GB} GB +
+
+ Avg CPU Usage + ${env.AVG_CPU_USAGE}% +
+
+
+
+
+ +
+

πŸ“¦ Pod Status

+
+ Running + ${env.PODS_RUNNING} +
+
+ Pending + ${env.PODS_PENDING} +
+
+ Failed + ${env.PODS_FAILED} +
+
+ Total Restarts + ${env.TOTAL_RESTARTS} +
+
+ +
+

πŸ’° Estimated Monthly Costs

+
+ CPU Cost + \$${env.MONTHLY_CPU_COST} +
+
+ Memory Cost + \$${env.MONTHLY_MEMORY_COST} +
+
+ Total Cost + \$${env.MONTHLY_TOTAL_COST} +
+
+ πŸ’‘ Based on: CPU \$${env.CPU_PRICE_PER_HOUR}/core/hour, Memory \$${env.MEMORY_PRICE_PER_GB_HOUR}/GB/hour +
+
+
+ + +
+

πŸ” Health Checks & Issues

+ ${env.ISSUES.contains('βœ…') ? + '
' + env.ISSUES + '
' : + env.ISSUES.split('\n').collect { "
${it}
" }.join('') + } +
+ + +
+

πŸ“Š Resources by Namespace

+ + + + + + + + + + ${namespaceTable} + +
NamespacePodsContainers
+
+ + +
+ Generated by Jenkins CI/CD Pipeline β€’ Build #${env.BUILD_NUMBER} +
+
+ + + """ +} \ No newline at end of file