The Metrics & Monitoring module provides comprehensive system monitoring, custom metrics collection, and health checking capabilities. It enables real-time observability of both system resources and application performance.
-- Get current CPU usagelocalcpu_usage=metrics.system_cpu()log.info("CPU Usage: "..string.format("%.1f%%",cpu_usage))-- Get memory informationlocalmemory_info=metrics.system_memory()log.info("Memory: "..string.format("%.1f%% (%.0f/%.0f MB)",memory_info.percent,memory_info.used_mb,memory_info.total_mb))-- Get disk usagelocaldisk_info=metrics.system_disk("/")log.info("Disk: "..string.format("%.1f%% (%.1f/%.1f GB)",disk_info.percent,disk_info.used_gb,disk_info.total_gb))-- Check specific disk pathlocalvar_disk=metrics.system_disk("/var")log.info("Var disk usage: "..string.format("%.1f%%",var_disk.percent))
-- Set simple gauge valuesmetrics.gauge("cpu_temperature",65.4)metrics.gauge("active_connections",142)metrics.gauge("queue_size",23)-- Set gauge with tagsmetrics.gauge("memory_usage",memory_percent,{server="web-01",environment="production",region="us-east-1"})-- Update deployment statusmetrics.gauge("deployment_progress",75.5,{app="frontend",version="v2.1.0"})
-- Record response timesmetrics.histogram("response_time_ms",245.6,{endpoint="/api/users",method="GET"})-- Record payload sizesmetrics.histogram("payload_size_bytes",1024,{content_type="application/json"})-- Record batch sizesmetrics.histogram("batch_size",150,{operation="bulk_insert",table="user_events"})
-- Time function execution automaticallylocalduration=metrics.timer("database_query",function()-- Simulate database querylocalresult=exec.run("sleep 0.1")returnresultend,{query_type="select",table="users"})log.info("Database query took: "..string.format("%.2f ms",duration))-- Time complex operationslocalprocessing_time=metrics.timer("data_processing",function()-- Process large datasetlocaldata={}fori=1,100000dodata[i]=math.sqrt(i)*2.5endreturn#dataend,{operation="mathematical_computation",size="large"})log.info("Data processing completed in: "..string.format("%.2f ms",processing_time))
-- Create health check functionfunctioncheck_application_health()localhealth_score=100localissues={}-- Check database connectivitylocaldb_result=exec.run("pg_isready -h localhost -p 5432")ifdb_result~=""thenhealth_score=health_score-20table.insert(issues,"Database connection failed")end-- Check disk spacelocaldisk=metrics.system_disk("/")ifdisk.percent>90thenhealth_score=health_score-30table.insert(issues,"Disk space critical: "..string.format("%.1f%%",disk.percent))end-- Check memory usagelocalmemory=metrics.system_memory()ifmemory.percent>85thenhealth_score=health_score-25table.insert(issues,"Memory usage high: "..string.format("%.1f%%",memory.percent))end-- Record health scoremetrics.gauge("application_health_score",health_score)ifhealth_score<70thenmetrics.alert("application_health",{level="warning",message="Application health degraded: "..table.concat(issues,", "),score=health_score})endreturnhealth_score>=70end-- Use in tasksModernDSLs={health_monitoring={tasks={health_check={command=function()localhealthy=check_application_health()returnhealthy,healthyand"System healthy"or"System health issues detected"end}}}}
-- Get specific custom metriclocalcpu_metric=metrics.get_custom("cpu_temperature")ifcpu_metricthenlog.info("CPU Temperature metric: "..data.to_json(cpu_metric))end-- List all custom metricslocalall_metrics=metrics.list_custom()log.info("Total custom metrics: "..#all_metrics)fori,metric_nameinipairs(all_metrics)dolog.info(" "..i..". "..metric_name)end
ModernDSLs={performance_monitoring={tasks={monitor_api_performance={command=function()-- Start monitoring sessionlog.info("Starting API performance monitoring...")-- Simulate API calls and measure performancefori=1,10dolocalapi_time=metrics.timer("api_call_"..i,function()-- Simulate API callexec.run("curl -s -o /dev/null -w '%{time_total}' https://api.example.com/health")end,{endpoint="health",call_number=tostring(i)})-- Record response timemetrics.histogram("api_response_time",api_time,{endpoint="health"})-- Check if response time is acceptableifapi_time>1000then-- 1 secondmetrics.counter("slow_api_calls",1,{endpoint="health"})metrics.alert("slow_api_response",{level="warning",message=string.format("Slow API response: %.2f ms",api_time),response_time=api_time,threshold=1000})end-- Brief delay between callsexec.run("sleep 0.1")end-- Get summary statisticslocalsystem_health=metrics.health_status()log.info("System health after API tests: "..system_health.overall)returntrue,"API performance monitoring completed"end}}}}