#!/bin/bash ############################################################################### # PostgreSQL Restore Script for Gravl # # Restores database from S3 backups with support for full restore and PITR # Validates data integrity after restore # # Usage: ./restore.sh [OPTIONS] # # Options: # --backup-file FILE Backup file to restore (required) # --target-pod POD Target pod for restore (default: gravl-db-0) # --namespace NS Kubernetes namespace (default: gravl-prod) # --pitr-time TIME Point-in-time recovery timestamp (optional) # --region REGION AWS region (default: eu-north-1) # --bucket BUCKET S3 bucket name # --validate Run validation queries after restore # --dry-run Show what would be done without doing it # --debug Enable debug output # --help Show this help message # # Example: # ./restore.sh --backup-file gravl_2026-03-04.sql.gz # ./restore.sh --backup-file gravl_2026-03-04.sql.gz --pitr-time "2026-03-04 10:30:00 UTC" # ./restore.sh --backup-file gravl_2026-03-04.sql.gz --validate # ############################################################################### set -euo pipefail # Color codes for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color # Configuration BACKUP_FILE="" TARGET_POD="gravl-db-0" DB_NAMESPACE="gravl-prod" DB_USER="gravl_admin" DB_NAME="gravl" PITR_TIME="" AWS_REGION="eu-north-1" S3_BUCKET="" VALIDATE=false DRY_RUN=false DEBUG=false TEMP_DIR="/tmp/gravl-restore-$$" # Logging functions log_info() { echo -e "${BLUE}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} INFO: $*" >&2 } log_success() { echo -e "${GREEN}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} SUCCESS: $*" >&2 } log_warn() { echo -e "${YELLOW}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} WARN: $*" >&2 } log_error() { echo -e "${RED}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} ERROR: $*" >&2 } log_debug() { if [ "$DEBUG" = true ]; then echo -e "${BLUE}[$(date '+%Y-%m-%d %H:%M:%S')]${NC} DEBUG: $*" >&2 fi } # Print help print_help() { cat << 'HELP' PostgreSQL Restore Script for Gravl Usage: ./restore.sh [OPTIONS] Options: --backup-file FILE Backup file to restore (required) --target-pod POD Target pod for restore (default: gravl-db-0) --namespace NS Kubernetes namespace (default: gravl-prod) --pitr-time TIME Point-in-time recovery timestamp (optional) --region REGION AWS region (default: eu-north-1) --bucket BUCKET S3 bucket name --validate Run validation queries after restore --dry-run Show what would be done without doing it --debug Enable debug output --help Show this help message Examples: ./restore.sh --backup-file gravl_2026-03-04.sql.gz ./restore.sh --backup-file gravl_2026-03-04.sql.gz --pitr-time "2026-03-04 10:30:00 UTC" ./restore.sh --backup-file gravl_2026-03-04.sql.gz --validate ./restore.sh --backup-file gravl_2026-03-04.sql.gz --dry-run HELP } # Parse command line arguments parse_args() { while [[ $# -gt 0 ]]; do case $1 in --backup-file) BACKUP_FILE="$2" shift 2 ;; --target-pod) TARGET_POD="$2" shift 2 ;; --namespace) DB_NAMESPACE="$2" shift 2 ;; --pitr-time) PITR_TIME="$2" shift 2 ;; --region) AWS_REGION="$2" shift 2 ;; --bucket) S3_BUCKET="$2" shift 2 ;; --validate) VALIDATE=true shift ;; --dry-run) DRY_RUN=true shift ;; --debug) DEBUG=true shift ;; --help) print_help exit 0 ;; *) log_error "Unknown option: $1" print_help exit 1 ;; esac done } # Validate prerequisites validate_prerequisites() { log_info "Validating prerequisites..." if [ -z "$BACKUP_FILE" ]; then log_error "Backup file is required (--backup-file)" print_help exit 1 fi # Check for required commands for cmd in kubectl aws gunzip; do if ! command -v $cmd &> /dev/null; then log_error "Required command not found: $cmd" exit 1 fi done log_debug "✓ All required commands found" # Check kubectl context if ! kubectl cluster-info &> /dev/null; then log_error "Cannot connect to Kubernetes cluster" exit 1 fi log_debug "✓ Kubernetes cluster accessible" # Check target pod if ! kubectl get pod "$TARGET_POD" -n "$DB_NAMESPACE" &> /dev/null; then log_error "Target pod not found: $TARGET_POD in namespace $DB_NAMESPACE" exit 1 fi log_debug "✓ Target pod found" # Set S3 bucket if not provided if [ -z "$S3_BUCKET" ]; then S3_BUCKET="gravl-backups-${AWS_REGION}" log_debug "Using default bucket: $S3_BUCKET" fi # Check AWS credentials if ! aws s3 ls "s3://${S3_BUCKET}" --region "$AWS_REGION" &> /dev/null; then log_error "Cannot access S3 bucket: s3://$S3_BUCKET in region $AWS_REGION" exit 1 fi log_debug "✓ S3 bucket accessible" } # Download backup from S3 download_backup() { local s3_uri="s3://${S3_BUCKET}/daily-backups/${BACKUP_FILE}" log_info "Downloading backup from S3: $s3_uri" if [ "$DRY_RUN" = true ]; then log_debug "[DRY RUN] Would download: $s3_uri" return 0 fi mkdir -p "$TEMP_DIR" if ! aws s3 cp "$s3_uri" "$TEMP_DIR/$BACKUP_FILE" \ --region "$AWS_REGION"; then log_error "Failed to download backup from S3" rm -rf "$TEMP_DIR" exit 1 fi if [ ! -f "$TEMP_DIR/$BACKUP_FILE" ]; then log_error "Backup file was not downloaded" rm -rf "$TEMP_DIR" exit 1 fi local file_size=$(stat -f%z "$TEMP_DIR/$BACKUP_FILE" 2>/dev/null || stat -c%s "$TEMP_DIR/$BACKUP_FILE") log_success "Backup downloaded ($(numfmt --to=iec-i --suffix=B $file_size 2>/dev/null || echo "$file_size bytes"))" } # Verify backup integrity verify_backup() { log_info "Verifying backup integrity..." if [ "$DRY_RUN" = true ]; then log_debug "[DRY RUN] Would verify backup" return 0 fi # Check if backup file is valid gzip if ! gunzip -t "$TEMP_DIR/$BACKUP_FILE" &>/dev/null; then log_error "Backup file is corrupted or not valid gzip" rm -rf "$TEMP_DIR" exit 1 fi log_success "Backup integrity verified" } # Perform full restore restore_full() { log_info "Starting full database restore to pod: $TARGET_POD" if [ "$DRY_RUN" = true ]; then log_debug "[DRY RUN] Would restore backup to $TARGET_POD" return 0 fi # Copy backup to pod log_info "Copying backup to pod..." if ! kubectl cp "$TEMP_DIR/$BACKUP_FILE" \ "$DB_NAMESPACE/$TARGET_POD:/tmp/$BACKUP_FILE" \ --container postgresql; then log_error "Failed to copy backup to pod" exit 1 fi log_debug "✓ Backup copied to pod" # Execute restore in pod log_info "Executing restore in pod..." if ! kubectl exec "$TARGET_POD" -n "$DB_NAMESPACE" -- \ sh -c "gunzip -c /tmp/$BACKUP_FILE | \ psql -U $DB_USER -d $DB_NAME" > /dev/null 2>&1; then log_error "Restore failed during SQL execution" exit 1 fi # Cleanup backup file from pod kubectl exec "$TARGET_POD" -n "$DB_NAMESPACE" -- \ rm "/tmp/$BACKUP_FILE" 2>/dev/null || true log_success "Database restore completed" } # Validate data integrity validate_data() { if [ "$VALIDATE" = false ]; then return 0 fi log_info "Running data integrity validation..." if [ "$DRY_RUN" = true ]; then log_debug "[DRY RUN] Would run validation queries" return 0 fi # Table count check log_info "Checking table counts..." local table_count=$(kubectl exec "$TARGET_POD" -n "$DB_NAMESPACE" -- \ psql -U "$DB_USER" -d "$DB_NAME" -t -c \ "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = 'public'") if [ "$table_count" -eq 0 ]; then log_warn "No tables found in restored database" else log_success "✓ Found $table_count tables" fi # Sample data check log_info "Checking sample data..." local sample_check=$(kubectl exec "$TARGET_POD" -n "$DB_NAMESPACE" -- \ psql -U "$DB_USER" -d "$DB_NAME" -t -c \ "SELECT 'OK' WHERE EXISTS (SELECT 1 FROM pg_tables WHERE schemaname = 'public' LIMIT 1)") if [ "$sample_check" = "OK" ]; then log_success "✓ Data validation passed" else log_warn "Unable to verify sample data" fi } # Generate restore report generate_report() { local report_file="/tmp/restore_report_$(date +%Y%m%d_%H%M%S).json" log_info "Generating restore report: $report_file" cat > "$report_file" << REPORT { "restore_id": "restore_$(date +%s)", "timestamp": "$(date -Iseconds)", "backup_file": "$BACKUP_FILE", "target_pod": "$TARGET_POD", "namespace": "$DB_NAMESPACE", "pitr_time": "${PITR_TIME:-none}", "status": "success", "validated": $VALIDATE } REPORT log_success "Restore report: $report_file" cat "$report_file" } # Main execution main() { log_info "==========================================" log_info "Gravl PostgreSQL Restore Script" log_info "==========================================" log_info "Backup File: $BACKUP_FILE" log_info "Target Pod: $TARGET_POD" log_info "Namespace: $DB_NAMESPACE" log_info "Region: $AWS_REGION" log_info "Bucket: $S3_BUCKET" log_info "Validate: $VALIDATE" log_info "Dry Run: $DRY_RUN" log_info "==========================================" log_info "" parse_args "$@" validate_prerequisites download_backup verify_backup restore_full validate_data generate_report # Cleanup rm -rf "$TEMP_DIR" log_info "==========================================" log_success "Restore completed successfully!" log_info "==========================================" } # Run main main "$@"