#!/bin/bash
###############################################################################
# PostgreSQL Backup Script for Gravl
# 
# Performs full database backups to AWS S3 with compression and retention
# Supports both manual and automated (CronJob) execution
#
# Usage: ./backup.sh [OPTIONS]
# 
# Options:
#   --full              Perform full backup (default)
#   --incremental       Perform incremental backup (WAL only)
#   --region REGION     AWS region (default: eu-north-1)
#   --bucket BUCKET     S3 bucket name (default: gravl-backups-REGION)
#   --db-pod POD        Database pod name (default: gravl-db-0)
#   --namespace NS      Kubernetes namespace (default: gravl-prod)
#   --dry-run           Show what would be done without doing it
#   --debug             Enable debug output
#   --help              Show this help message
#
# Example:
#   ./backup.sh --full --region eu-north-1
#   ./backup.sh --full --dry-run
#
###############################################################################

set -euo pipefail

# Color codes for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

# Configuration
BACKUP_TYPE="full"
AWS_REGION="eu-north-1"
S3_BUCKET=""
DB_POD="gravl-db-0"
DB_NAMESPACE="gravl-prod"
DB_USER="gravl_admin"
DB_NAME="gravl"
DRY_RUN=false
DEBUG=false
BACKUP_DIR="/tmp/gravl-backup-$$"
RETENTION_DAYS=30
COMPRESSION_LEVEL=6

# Logging functions
log_info() {
    echo -e "${BLUE}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} INFO: $*" >&2
}

log_success() {
    echo -e "${GREEN}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} SUCCESS: $*" >&2
}

log_warn() {
    echo -e "${YELLOW}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} WARN: $*" >&2
}

log_error() {
    echo -e "${RED}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} ERROR: $*" >&2
}

log_debug() {
    if [ "$DEBUG" = true ]; then
        echo -e "${BLUE}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} DEBUG: $*" >&2
    fi
}

# Print help
print_help() {
    cat << 'HELP'
PostgreSQL Backup Script for Gravl

Usage: ./backup.sh [OPTIONS]

Options:
  --full              Perform full backup (default)
  --incremental       Perform incremental backup (WAL only)
  --region REGION     AWS region (default: eu-north-1)
  --bucket BUCKET     S3 bucket name
  --db-pod POD        Database pod name (default: gravl-db-0)
  --namespace NS      Kubernetes namespace (default: gravl-prod)
  --dry-run           Show what would be done without doing it
  --debug             Enable debug output
  --help              Show this help message

Examples:
  ./backup.sh --full --region eu-north-1
  ./backup.sh --full --dry-run
  ./backup.sh --incremental --bucket my-backup-bucket

HELP
}

# Parse command line arguments
parse_args() {
    while [[ $# -gt 0 ]]; do
        case $1 in
            --full)
                BACKUP_TYPE="full"
                shift
                ;;
            --incremental)
                BACKUP_TYPE="incremental"
                shift
                ;;
            --region)
                AWS_REGION="$2"
                shift 2
                ;;
            --bucket)
                S3_BUCKET="$2"
                shift 2
                ;;
            --db-pod)
                DB_POD="$2"
                shift 2
                ;;
            --namespace)
                DB_NAMESPACE="$2"
                shift 2
                ;;
            --dry-run)
                DRY_RUN=true
                shift
                ;;
            --debug)
                DEBUG=true
                shift
                ;;
            --help)
                print_help
                exit 0
                ;;
            *)
                log_error "Unknown option: $1"
                print_help
                exit 1
                ;;
        esac
    done
}

# Validate prerequisites
validate_prerequisites() {
    log_info "Validating prerequisites..."
    
    # Check for required commands
    for cmd in kubectl aws gzip date; do
        if ! command -v $cmd &> /dev/null; then
            log_error "Required command not found: $cmd"
            exit 1
        fi
    done
    log_debug "✓ All required commands found"
    
    # Check kubectl context
    if ! kubectl cluster-info &> /dev/null; then
        log_error "Cannot connect to Kubernetes cluster"
        exit 1
    fi
    log_debug "✓ Kubernetes cluster accessible"
    
    # Check database pod
    if ! kubectl get pod $DB_POD -n $DB_NAMESPACE &> /dev/null; then
        log_error "Database pod not found: $DB_POD in namespace $DB_NAMESPACE"
        exit 1
    fi
    log_debug "✓ Database pod found"
    
    # Set S3 bucket if not provided
    if [ -z "$S3_BUCKET" ]; then
        S3_BUCKET="gravl-backups-${AWS_REGION}"
        log_debug "Using default bucket: $S3_BUCKET"
    fi
    
    # Check AWS credentials
    if ! aws s3 ls "s3://${S3_BUCKET}" --region "$AWS_REGION" &> /dev/null; then
        log_error "Cannot access S3 bucket: s3://$S3_BUCKET in region $AWS_REGION"
        exit 1
    fi
    log_debug "✓ S3 bucket accessible"
}

# Perform full backup
backup_full() {
    local backup_date=$(date +%Y-%m-%d)
    local backup_time=$(date +%H%M%S)
    local backup_file="${DB_NAME}_${backup_date}.sql.gz"
    local backup_path="$BACKUP_DIR/$backup_file"
    local manifest_file="${backup_file}.manifest.json"
    
    log_info "Starting full backup: $backup_file"
    log_debug "Backup path: $backup_path"
    
    # Create backup directory
    mkdir -p "$BACKUP_DIR"
    
    # Perform backup
    log_info "Executing pg_dump from pod $DB_POD..."
    if ! kubectl exec -it "$DB_POD" -n "$DB_NAMESPACE" -- \
        pg_dump -h localhost -U "$DB_USER" -d "$DB_NAME" --no-password 2>/dev/null | \
        gzip -"$COMPRESSION_LEVEL" > "$backup_path"; then
        log_error "Backup failed"
        rm -rf "$BACKUP_DIR"
        exit 1
    fi
    
    if [ ! -f "$backup_path" ]; then
        log_error "Backup file was not created"
        exit 1
    fi
    
    # Calculate file size and checksum
    local backup_size=$(stat -f%z "$backup_path" 2>/dev/null || stat -c%s "$backup_path")
    local backup_checksum=$(sha256sum "$backup_path" | awk '{print $1}')
    
    log_success "Backup created: $backup_file ($(numfmt --to=iec-i --suffix=B $backup_size 2>/dev/null || echo "$backup_size bytes"))"
    log_debug "SHA256: $backup_checksum"
    
    # Create manifest
    log_info "Creating backup manifest..."
    cat > "$BACKUP_DIR/$manifest_file" << MANIFEST
{
  "backup_id": "${backup_file%.*}",
  "timestamp": "$(date -Iseconds)",
  "size_bytes": $backup_size,
  "size_human": "$(numfmt --to=iec-i --suffix=B $backup_size 2>/dev/null || echo "$backup_size bytes")",
  "checksum_sha256": "$backup_checksum",
  "backup_type": "full",
  "database": "$DB_NAME",
  "status": "success",
  "expiry": "$(date -u -d "+${RETENTION_DAYS} days" -Iseconds 2>/dev/null || date -u -v+${RETENTION_DAYS}d -Iseconds)"
}
MANIFEST
    
    # Upload to S3
    upload_to_s3 "$backup_path" "daily-backups/$backup_file"
    upload_to_s3 "$BACKUP_DIR/$manifest_file" "daily-backups/$manifest_file"
    
    # Cleanup
    rm -rf "$BACKUP_DIR"
}

# Upload file to S3
upload_to_s3() {
    local file_path="$1"
    local s3_key="$2"
    local s3_uri="s3://${S3_BUCKET}/${s3_key}"
    
    log_info "Uploading to S3: $s3_uri"
    
    if [ "$DRY_RUN" = true ]; then
        log_debug "[DRY RUN] Would upload: $file_path -> $s3_uri"
        return 0
    fi
    
    if ! aws s3 cp "$file_path" "$s3_uri" \
        --region "$AWS_REGION" \
        --storage-class STANDARD_IA \
        --sse AES256 \
        --metadata "backup-date=$(date +%Y-%m-%d),hostname=$(hostname)"; then
        log_error "S3 upload failed: $s3_uri"
        exit 1
    fi
    
    log_success "Uploaded: $s3_key"
}

# Cleanup old backups
cleanup_old_backups() {
    log_info "Cleaning up backups older than $RETENTION_DAYS days..."
    
    if [ "$DRY_RUN" = true ]; then
        log_debug "[DRY RUN] Would clean up old backups"
        return 0
    fi
    
    # List and delete old backups
    local cutoff_date=$(date -u -d "-$RETENTION_DAYS days" +%Y-%m-%d 2>/dev/null || \
                        date -u -v-${RETENTION_DAYS}d +%Y-%m-%d)
    
    log_debug "Cutoff date: $cutoff_date"
    
    # Note: This is simplified. A production system should use more sophisticated cleanup.
    log_info "Old backup cleanup configured for S3 lifecycle policies"
}

# Generate and upload metrics
upload_metrics() {
    log_info "Recording backup metrics..."
    
    # These would be pushed to Prometheus/monitoring system
    # For now, just log the completion
    
    log_success "Backup metrics recorded"
}

# Main execution
main() {
    log_info "=========================================="
    log_info "Gravl PostgreSQL Backup Script"
    log_info "=========================================="
    log_info "Backup Type:  $BACKUP_TYPE"
    log_info "Region:       $AWS_REGION"
    log_info "Bucket:       $S3_BUCKET"
    log_info "Pod:          $DB_POD"
    log_info "Namespace:    $DB_NAMESPACE"
    log_info "Retention:    $RETENTION_DAYS days"
    log_info "Dry Run:      $DRY_RUN"
    log_info "=========================================="
    log_info ""
    
    parse_args "$@"
    validate_prerequisites
    
    case "$BACKUP_TYPE" in
        full)
            backup_full
            ;;
        incremental)
            log_info "Incremental backup: WAL archiving is continuous (see PostgreSQL WAL config)"
            ;;
        *)
            log_error "Unknown backup type: $BACKUP_TYPE"
            exit 1
            ;;
    esac
    
    cleanup_old_backups
    upload_metrics
    
    log_info "=========================================="
    log_success "Backup completed successfully!"
    log_info "=========================================="
}

# Run main
main "$@"