Monitoring & Observability
DEPRECATED
This page is deprecated. For the latest documentation, please visit DeepWiki.
This guide covers monitoring MAIF applications in production, including health checks, metrics collection, and observability patterns.
Overview
MAIF includes built-in components for:
- Health Checks: Verify system health
- Rate Limiting: Control request throughput
- Metrics: Collect and aggregate metrics
- Cost Tracking: Monitor resource usage
Built-in Components
Health Checker
MAIF provides a HealthChecker class for monitoring system health:
python
from maif import HealthChecker
# Create a health checker
health_checker = HealthChecker()
# Perform health check
status = health_checker.check()
print(f"Health status: {status}")
# Check specific components
components = health_checker.check_components()
for component, status in components.items():
print(f"{component}: {status}")Rate Limiter
Control request throughput with the built-in rate limiter:
python
from maif import RateLimiter
# Create a rate limiter (100 requests per second)
rate_limiter = RateLimiter(requests_per_second=100)
def handle_request(request):
if rate_limiter.allow():
# Process the request
process(request)
else:
# Rate limited
return "Too many requests", 429Metrics Aggregator
Collect and aggregate metrics:
python
from maif import MetricsAggregator
# Create metrics aggregator
metrics = MetricsAggregator()
# Record metrics
metrics.record("requests_total", 1)
metrics.record("response_time_ms", 45.2)
metrics.record("artifacts_created", 1)
# Get aggregated metrics
summary = metrics.get_summary()
print(f"Total requests: {summary['requests_total']}")
print(f"Avg response time: {summary['response_time_ms_avg']}ms")Cost Tracker
Track resource usage and costs:
python
from maif import CostTracker
# Create cost tracker
cost_tracker = CostTracker()
# Track operations
cost_tracker.record_operation("embedding_generation", tokens=1000)
cost_tracker.record_operation("storage_write", bytes=1024000)
cost_tracker.record_operation("api_call", count=1)
# Get cost summary
costs = cost_tracker.get_summary()
print(f"Total cost: ${costs['total']:.4f}")MAIF File Monitoring
Monitor Artifact Integrity
python
from maif_api import load_maif
import os
def check_artifact_health(artifact_path: str) -> dict:
"""Check health of a MAIF artifact."""
status = {
"path": artifact_path,
"exists": os.path.exists(artifact_path),
"readable": False,
"integrity": False,
"size_bytes": 0
}
if not status["exists"]:
return status
status["size_bytes"] = os.path.getsize(artifact_path)
try:
artifact = load_maif(artifact_path)
status["readable"] = True
status["integrity"] = artifact.verify()
status["content_count"] = len(artifact.get_content_list())
except Exception as e:
status["error"] = str(e)
return status
# Check artifact health
health = check_artifact_health("my_artifact.maif")
print(f"Artifact healthy: {health['integrity']}")Monitor Multiple Artifacts
python
import os
from maif_api import load_maif
class ArtifactMonitor:
"""Monitor a directory of MAIF artifacts."""
def __init__(self, directory: str):
self.directory = directory
self.last_check = {}
def scan(self) -> list:
"""Scan and check all artifacts."""
results = []
for filename in os.listdir(self.directory):
if filename.endswith('.maif'):
path = os.path.join(self.directory, filename)
status = self._check_file(path)
results.append(status)
return results
def _check_file(self, path: str) -> dict:
"""Check a single artifact file."""
status = {
"path": path,
"name": os.path.basename(path),
"healthy": False,
"size_bytes": os.path.getsize(path),
"modified": os.path.getmtime(path)
}
try:
artifact = load_maif(path)
status["healthy"] = artifact.verify()
status["blocks"] = len(artifact.get_content_list())
except Exception as e:
status["error"] = str(e)
return status
def get_unhealthy(self) -> list:
"""Get list of unhealthy artifacts."""
results = self.scan()
return [r for r in results if not r.get("healthy", False)]
# Usage
monitor = ArtifactMonitor("./artifacts")
unhealthy = monitor.get_unhealthy()
if unhealthy:
print(f"Warning: {len(unhealthy)} unhealthy artifacts found")Integration with Standard Tools
Python Logging
python
import logging
from maif_api import create_maif
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('maif_app')
def create_monitored_artifact(name: str, content: str):
"""Create artifact with logging."""
logger.info(f"Creating artifact: {name}")
try:
artifact = create_maif(name)
artifact.add_text(content)
artifact.save(f"{name}.maif")
logger.info(f"Artifact created successfully: {name}")
return True
except Exception as e:
logger.error(f"Failed to create artifact {name}: {e}")
return FalsePrometheus Metrics
Export metrics to Prometheus:
python
# requirements: prometheus_client
from prometheus_client import Counter, Histogram, start_http_server
from maif_api import create_maif, load_maif
import time
# Define metrics
ARTIFACTS_CREATED = Counter(
'maif_artifacts_created_total',
'Total number of artifacts created',
['agent_id']
)
ARTIFACT_OPERATIONS = Histogram(
'maif_operation_duration_seconds',
'Duration of MAIF operations',
['operation']
)
ARTIFACT_SIZE = Histogram(
'maif_artifact_size_bytes',
'Size of MAIF artifacts'
)
def create_artifact_with_metrics(agent_id: str, content: str, path: str):
"""Create artifact with Prometheus metrics."""
start = time.time()
artifact = create_maif(agent_id)
artifact.add_text(content)
artifact.save(path)
# Record metrics
duration = time.time() - start
ARTIFACTS_CREATED.labels(agent_id=agent_id).inc()
ARTIFACT_OPERATIONS.labels(operation='create').observe(duration)
import os
ARTIFACT_SIZE.observe(os.path.getsize(path))
return artifact
# Start metrics server
start_http_server(8000)
print("Metrics available at http://localhost:8000/metrics")Structured JSON Logging
python
import json
import logging
from datetime import datetime
class JSONFormatter(logging.Formatter):
"""JSON log formatter for MAIF."""
def format(self, record):
log_entry = {
"timestamp": datetime.utcnow().isoformat(),
"level": record.levelname,
"logger": record.name,
"message": record.getMessage(),
}
# Add extra fields
if hasattr(record, 'artifact_id'):
log_entry['artifact_id'] = record.artifact_id
if hasattr(record, 'operation'):
log_entry['operation'] = record.operation
if hasattr(record, 'duration_ms'):
log_entry['duration_ms'] = record.duration_ms
return json.dumps(log_entry)
# Configure JSON logging
handler = logging.StreamHandler()
handler.setFormatter(JSONFormatter())
logger = logging.getLogger('maif_json')
logger.addHandler(handler)
logger.setLevel(logging.INFO)
# Log with extra context
logger.info(
"Artifact created",
extra={
'artifact_id': 'artifact_123',
'operation': 'create',
'duration_ms': 45.2
}
)Health Check Endpoint
Create a simple health check endpoint:
python
from http.server import HTTPServer, BaseHTTPRequestHandler
import json
from maif_api import load_maif
import os
class HealthHandler(BaseHTTPRequestHandler):
"""HTTP handler for health checks."""
def do_GET(self):
if self.path == '/health':
status = self.check_health()
self.send_response(200 if status['healthy'] else 503)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps(status).encode())
def check_health(self) -> dict:
"""Check overall system health."""
status = {
'healthy': True,
'checks': {}
}
# Check if artifacts directory exists
artifacts_dir = os.environ.get('ARTIFACTS_DIR', './artifacts')
status['checks']['artifacts_dir'] = os.path.isdir(artifacts_dir)
# Check sample artifact if it exists
sample_path = os.path.join(artifacts_dir, 'health_check.maif')
if os.path.exists(sample_path):
try:
artifact = load_maif(sample_path)
status['checks']['sample_artifact'] = artifact.verify()
except:
status['checks']['sample_artifact'] = False
# Overall health
status['healthy'] = all(status['checks'].values())
return status
# Run health check server
def run_health_server(port: int = 8080):
server = HTTPServer(('', port), HealthHandler)
print(f"Health check server running on port {port}")
server.serve_forever()
# run_health_server() # Uncomment to runMonitoring Patterns
Operation Timing
python
import time
from contextlib import contextmanager
@contextmanager
def timed_operation(name: str, logger=None):
"""Context manager for timing operations."""
start = time.time()
try:
yield
finally:
duration = (time.time() - start) * 1000 # ms
message = f"{name} completed in {duration:.2f}ms"
if logger:
logger.info(message)
else:
print(message)
# Usage
with timed_operation("artifact_creation"):
artifact = create_maif("timed-artifact")
artifact.add_text("Some content")
artifact.save("timed.maif")Error Tracking
python
from collections import defaultdict
from datetime import datetime
class ErrorTracker:
"""Track and aggregate errors."""
def __init__(self):
self.errors = defaultdict(list)
def record(self, error_type: str, message: str, context: dict = None):
self.errors[error_type].append({
"timestamp": datetime.now().isoformat(),
"message": message,
"context": context or {}
})
def get_summary(self) -> dict:
return {
error_type: len(errors)
for error_type, errors in self.errors.items()
}
def get_recent(self, error_type: str = None, limit: int = 10) -> list:
if error_type:
return self.errors[error_type][-limit:]
all_errors = []
for errors in self.errors.values():
all_errors.extend(errors)
return sorted(all_errors, key=lambda x: x['timestamp'])[-limit:]
# Usage
tracker = ErrorTracker()
try:
artifact = load_maif("nonexistent.maif")
except Exception as e:
tracker.record("load_error", str(e), {"path": "nonexistent.maif"})
print(tracker.get_summary())Best Practices
- Monitor artifact integrity - Regular integrity checks prevent data corruption
- Use structured logging - JSON logs are easier to analyze
- Track operation timing - Identify performance bottlenecks
- Set up alerts - Get notified of failures immediately
- Keep metrics lightweight - Don't impact application performance
Next Steps
- Performance → - Optimize based on monitoring data
- Architecture → - System design patterns
- API Reference → - Complete documentation