#!/usr/local/bin/bash
#/ Usage: ghe-backup-mssql
#/ 
#/
#/ Note: This command typically isn't called directly. It's invoked by
#/ ghe-backup.
set -e

# Bring in the backup configuration
# shellcheck source=share/github-backup-utils/ghe-backup-config
. "$( dirname "${BASH_SOURCE[0]}" )/ghe-backup-config"

# Set up remote host and root backup snapshot directory based on config
backup_dir="$GHE_SNAPSHOT_DIR/mssql"
last_mssql=
backup_command=
backup_type=
full_expire=
diff_expire=
tran_expire=

# Check if the export tool is available in this version
export_tool_available() {
  if [ -z "$GHE_TEST_REMOTE_VERSION" ]; then
    ghe-ssh "$GHE_HOSTNAME" "test -e /usr/local/bin/ghe-export-mssql"
  else
    # Always return available for test
    return 0
  fi
}

if ! export_tool_available; then
  ghe_verbose "ghe-export-mssql is not available"
  exit
fi

add_minute() {
  # Expect date string in the format of yyyymmddTHHMMSS
  # Here parse date differently depending on GNU Linux vs BSD MacOS
  if date -v -1d > /dev/null 2>&1; then
    echo "$(date -v +$2M -ujf'%Y%m%dT%H%M%S' $1 +%Y%m%dT%H%M%S)"
  else
    dt=$1
    echo "$(date -u '+%Y%m%dT%H%M%S' -d "${dt:0:8} ${dt:9:2}:${dt:11:2}:${dt:13:2} $2 minutes")"
  fi
}

find_timestamp() {
  filename="${1##*/}"
  IFS='@' read -ra parts <<< "$filename"
  datetime_part=${parts[1]:0:15}
  echo $datetime_part
}

actions_dbs() {
  all_dbs=$(echo 'set -o pipefail; ghe-mssql-console -y -n -q "SET NOCOUNT ON; SELECT name FROM sys.databases"' | ghe-ssh "$GHE_HOSTNAME" /bin/bash)
  for db in $all_dbs; do
    if [[ ! "$db" =~ ^(master|tempdb|model|msdb)$ ]] && [[ "$db" =~ ^[a-zA-Z0-9_-]+$ ]]; then
      echo "$db"
    fi
  done
}

ensure_same_dbs() {
  locals=()
  while read -r file; do
    filename=$(basename "$file")
    locals+=("$filename")
  done < <(find "$1" \( -name "*.bak" -o -name "*.diff" -o -name "*.log" \))

  for remote in $(actions_dbs); do
    remaining_locals=()
    for local in "${locals[@]}"; do
      if ! [[ "$local" == "$remote"* ]]; then
        remaining_locals+=("$local")
      fi
    done
    locals=("${remaining_locals[@]}")
  done

  if [[ "${#locals[@]}" -ne 0 ]]; then
    ghe_verbose "Warning: Found following ${#locals[@]} backup files that can't be traced back to the specified GHES host."
    ghe_verbose "Warning: Did you recently reconfigure the GHES host? Move or delete these backup files if no longer needed."
    for local in "${locals[@]}"; do
      ghe_verbose "$1/$local"
    done

    exit 1
  fi
}

run_query() {
  echo "set -o pipefail; ghe-mssql-console -y -n -q \"SET NOCOUNT ON; $1\"" | ghe-ssh "$GHE_HOSTNAME" /bin/bash | sed 's/^[[:space:]]*//;s/[[:space:]]*$//'
}

get_latest_backup_file() {
  backups_dir=$1
  db=$2
  ext=$3

  latest_full_backup=$(find "$backups_dir" -type f -name "$db*.$ext" | egrep '[0-9]{8}T[0-9]{6}' | sort | tail -n 1)
  latest_full_backup_file="${latest_full_backup##*/}"
  echo "$latest_full_backup_file"
}

get_backup_val() {
  db=$1
  filename=$2
  column=$3
  run_query "
  SELECT s.$column
  FROM msdb.dbo.backupset s
  JOIN msdb.dbo.backupmediafamily f
  ON s.media_set_id = f.media_set_id
  WHERE s.database_name = '$db' AND f.physical_device_name LIKE '%$filename'"
}

get_backup_checkpoint_lsn() {
  get_backup_val "$1" "$2" "checkpoint_lsn"
}

get_backup_last_lsn() {
  get_backup_val "$1" "$2" "last_lsn"
}

get_next_log_backup_starting_lsn() {
  db=$1
  # last_log_backup_lsn: The starting log sequence number of the next log backup
  # https://docs.microsoft.com/en-us/sql/relational-databases/system-catalog-views/sys-database-recovery-status-transact-sql
  run_query "
    SELECT last_log_backup_lsn
    FROM sys.database_recovery_status drs 
    JOIN sys.databases db on drs.database_id = db.database_id 
    WHERE db.name = '$db'"
}

get_next_diff_backup_base_lsn() {
  db=$1
  # differential_base_lsn: Base for differential backups. Data extents changed after this LSN will be included in a differential backup.
  # https://docs.microsoft.com/en-us/sql/relational-databases/system-catalog-views/sys-master-files-transact-sql
  run_query "
    SELECT differential_base_lsn
    FROM sys.master_files mf
    WHERE mf.name = '$db'"
}

last_mssql=$GHE_DATA_DIR/current/mssql

if [ ! -d $last_mssql ] \
  || [ -z "$(find $last_mssql -type f -name '*.bak' | head -n 1)" ]; then
  ghe_verbose "Taking first full backup"
  backup_type="full"
else
  ensure_same_dbs "$last_mssql"

  # Check schedule to determine backup type
  IFS=',' read -ra cadence <<< "$GHE_MSSQL_BACKUP_CADENCE"

  current=$(date -u +%Y%m%d%H%M%S)

  full=$(find "$last_mssql" -type f -name "*.bak" | head -n 1)
  full=$(find_timestamp $full)
  full_expire=$(add_minute $full ${cadence[0]})
  full_expire="${full_expire//T}"

  diff=$(find "$last_mssql" -type f -name "*.diff" | head -n 1)
  if [ -f "$diff" ]; then
    diff=$(find_timestamp $diff)
    diff_expire=$(add_minute $diff ${cadence[1]})
    diff_expire="${diff_expire//T}"
  else
    diff_expire=$(add_minute $full ${cadence[1]})
    diff_expire="${diff_expire//T}"
  fi

  tran=$(find "$last_mssql" -type f -name "*.log" | egrep '[0-9]{8}T[0-9]{6}' | sort | tail -1)
  tran=$(find_timestamp $tran)
  tran_expire=$(add_minute $tran ${cadence[2]})
  tran_expire="${tran_expire//T}"

  ghe_verbose "current $current, full expire $full_expire, \
diff expire $diff_expire, tran expire $tran_expire"

  # Determine the type of backup to take based on expiry time
  if [ $current -gt $full_expire ]; then
    backup_type='full'
  elif [ $current -gt $diff_expire ]; then
    backup_type='diff'
  elif [ $current -gt $tran_expire ]; then
    backup_type='transaction'
  fi

  # Upgrade to a full backup if the diff/transaction backup might not be restorable due to other backup mechanisms interfering
  # with the transaction LSN chain or differential base LSN.
  if [ "$backup_type" == 'diff' ] || [ "$backup_type" == 'transaction' ]; then
    ghe_verbose "Checking for conflicting backups to ensure a $backup_type backup is sufficient"

    for db in $(actions_dbs); do
      # Ensure that a diff backup will be based on the full backup file we have (rather than one another backup mechanism took)
      if [ "$backup_type" == 'diff' ]; then
        full_backup_file=$(get_latest_backup_file "$last_mssql" "$db" "bak")
        if [[ "$full_backup_file" == "" ]]; then
          ghe_verbose "Taking a full backup instead of a diff backup because for $db a full backup file wasn't found"
          backup_type="full"
          break
        fi

        full_backup_file_checkpoint_lsn=$(get_backup_checkpoint_lsn "$db" "$full_backup_file")
        if [[ "$full_backup_file_checkpoint_lsn" = "NULL" ]] || [[ "$full_backup_file_checkpoint_lsn" == "" ]]; then
          ghe_verbose "Taking a full backup instead of a diff backup because for $db the checkpoint LSN for $full_backup_file couldn't be determined"
          backup_type="full"
          break
        fi

        next_diff_backup_base_lsn=$(get_next_diff_backup_base_lsn "$db")
        if [[ "$next_diff_backup_base_lsn" = "NULL" ]] || [[ "$next_diff_backup_base_lsn" == "" ]]; then
          ghe_verbose "Taking a full backup instead of a $backup_type backup because for $db the base LSN for the next diff backup couldn't be determined"
          backup_type="full"
          break
        fi

        # The base of the diff backup we're about to take must exactly match the checkpoint LSN of the full backup file we have
        if [[ "$next_diff_backup_base_lsn" -ne "$full_backup_file_checkpoint_lsn" ]]; then
          ghe_verbose "Taking a full backup instead of a $backup_type backup because for $db the diff would have base LSN $next_diff_backup_base_lsn yet our full backup has checkpoint LSN $full_backup_file_checkpoint_lsn"
          backup_type="full"
          break
        fi
      fi

      # Ensure that a transaction log backup will immediately follow the previous one
      latest_log_backup_file=$(get_latest_backup_file "$last_mssql" "$db" "log")
      if [[ "$latest_log_backup_file" == "" ]]; then
        ghe_verbose "Taking a full backup instead of a $backup_type backup because for $db a previous transaction log backup wasn't found"
        backup_type="full"
        break
      fi

      latest_log_backup_last_lsn=$(get_backup_last_lsn "$db" "$latest_log_backup_file")
      if [[ "$latest_log_backup_last_lsn" = "NULL" ]] || [[ "$latest_log_backup_last_lsn" == "" ]]; then
        ghe_verbose "Taking a full backup instead of a $backup_type backup because for $db the LSN range for $latest_log_backup_file couldn't be determined"
        backup_type="full"
        break
      fi

      next_log_backup_starting_lsn=$(get_next_log_backup_starting_lsn "$db")
      if [[ "$next_log_backup_starting_lsn" = "NULL" ]] || [[ "$next_log_backup_starting_lsn" == "" ]]; then
        ghe_verbose "Taking a full backup instead of a $backup_type backup because for $db the starting LSN for the next log backup couldn't be determined"
        backup_type="full"
        break
      fi

      # The starting LSN of the backup we're about to take must be equal to (or before) the last LSN from the last backup,
      # otherwise there'll be a gap and the logfiles won't be restorable
      if [[ "$next_log_backup_starting_lsn" -gt "$latest_log_backup_last_lsn" ]]; then
        ghe_verbose "Taking a full backup instead of a $backup_type backup because for $db a gap would exist between the last backup ending at LSN $latest_log_backup_last_lsn and next backup starting at $next_log_backup_starting_lsn"
        backup_type="full"
        break
      fi
    done
  fi  
fi

# Make sure root backup dir exists if this is the first run
mkdir -p "$backup_dir"

# Use hard links to "copy" over previous applicable backups to the new snapshot folder to save disk space and time
if [ -d $last_mssql ]; then
  for p in $last_mssql/*
  do
    [[ -e "$p" ]] || break

    filename="${p##*/}"
    extension="${filename##*.}"
    transfer=

    # Copy full backups unless we're taking a new full backup
    if [ $extension = "bak" ] && [ "$backup_type" != 'full' ]; then
      transfer=1
    fi

    # Copy diff backups unless we're taking a new full or diff backup
    if [ $extension = "diff" ] && [ "$backup_type" != 'full' ] && [ "$backup_type" != 'diff' ]; then
      transfer=1
    fi

    # Copy transaction log backups unless we're taking a new full or diff backup
    if [ $extension = "log" ] && [ "$backup_type" != 'full' ] && [ "$backup_type" != 'diff' ]; then
      transfer=1
    fi

    if [ -n "$transfer" ]; then
      ghe_verbose "Creating hard link to $filename"
      ln $last_mssql/$filename $backup_dir/$filename
    fi
  done
fi

if [ -n "$backup_type" ]; then
  ghe_verbose "Taking $backup_type backup"

  backup_command='ghe-export-mssql'
  if [ "$backup_type" = "diff" ]; then
    backup_command='ghe-export-mssql -d'
  elif [ "$backup_type" = "transaction" ]; then
    backup_command='ghe-export-mssql -t'
  fi

  bm_start "$(basename $0)"
  ghe-ssh "$GHE_HOSTNAME" -- "$backup_command" || failures="$failures mssql"
  bm_end "$(basename $0)"

  # Configure the backup cadence on the appliance, which is used for diagnostics
  ghe-ssh "$GHE_HOSTNAME" "ghe-config mssql.backup.cadence $GHE_MSSQL_BACKUP_CADENCE"

  # Transfer backup files from appliance to backup host
  appliance_dir="$GHE_REMOTE_DATA_DIR/user/mssql/backups"
  backups=$(echo "set -o pipefail; if sudo test -d \"$appliance_dir\"; then \
    sudo ls \"$appliance_dir\"; fi" | ghe-ssh "$GHE_HOSTNAME" /bin/bash)
  for b in $backups
  do
    ghe_verbose "Transferring to backup host $b"
    ghe-ssh "$GHE_HOSTNAME" "sudo cat $appliance_dir/$b" > $backup_dir/$b
  done
fi
