#!/usr/bin/env python2

"""
repl-to-bakset.py

Update the backups on the active bakset (backup set) zpool.

I keep multiple zpools named like bakset1, bakset2, ..., only one of which is
online at any time.  The others are offsite, and swapped with the onsite zpool
every month or so.

Periodically, the backed-up datasets are snapshotted and replicated to the
active bakset.
"""

import subprocess
import sys
import time


datasets = ['pool/bu', 'ssdpool/jails']         # datasets to back up


def run_or_print(cmd):
    print 'Executing: %s' % cmd
    status = subprocess.call(cmd, shell=True, stderr=subprocess.STDOUT)
    if status:
        print "'%s' failed with status %d" % (cmd, status)
        sys.exit(status)


# Find the name of the backup set which receives the replication.
zpools = subprocess.check_output('zpool list -Ho name'.split()).split('\n')
baksets = [x for x in zpools if x.startswith('bakset')]
if not baksets:
    print('No backup set zpool found')
    sys.exit(1)
if len(baksets) != 1:
    print('Too many backup set zpools found: %s' % ', '.join(baksets))
    sys.exit(1)
bakset = baksets[0]

# TODO - make sure everything will work before starting the actual replication.
# For each root dataset and each of its children:
# * If a matching dataset is not found in the backup set, then a non-incremental
#   replication will be required.  That will apply to the dataset in question
#   and any of its children
# * Find the most recent snapshot made to the backup set.
# * List the snapshots at the corresponding dataset on the backup set.  The last
#   snapshot should match the one found in the previous step.
# * If a match is found, then an incremental replication using that as a base
#   should work fine, and the replication for that dataset can proceed.
# * If a match isn't found, something went wrong - probably, the necessary base
#   snapshot got removed from the source dataset.  In that case, look for the
#   most recent snapshot on both source and target datasets that match.
# * If a match is found, then use that as the incremental base, even if it isn't
#   a @bakset... snapshot.  Just don't destroy it in the source dataset later.
#   If the new base isn't the last one in the backup set dataset, then any later
#   snapshots will first have to be destroyed.
# * If a match isn't found, then a full stream will have to be sent, for a
#   non-incremental replication.  This is a fatal error - the dataset will need
#   destroying on the backup set side, and that's probably best done in the
#   FreeNAS gui (I'm not sure if it's reflected in the FreeNAS DB anywhere).
# * Oh yeah - if a backup set dataset isn't found to match the source side, then
#   a non-incremental replication will need to be done, and can be done
#   automatically.
# * While analyzing the replication, build up the full list of cmds to be run.
#   That way, all the snapshots can be done before any of the send/receives, and
#   all the destroys can wait until all the send/receives are successful.

new_snapshot_base = '@%s-%s' % (bakset, time.strftime('%Y%m%d.%H%M'))

for dataset in datasets:
    cmd = 'zfs list -Ho name -d 1 -t snapshot ' + dataset
    snapshots = subprocess.check_output(cmd.split()).split('\n')
    bakset_snapshots = ([x for x in snapshots
                           if x[len(dataset):].startswith('@' + bakset)])
    incremental = False
    if bakset_snapshots:
        incremental = True
        incremental_snapshot = bakset_snapshots[-1]
        incremental_snapshot_base = incremental_snapshot.split('@', 1)[1]
    run_or_print('zfs snapshot -r %s%s' % (dataset, new_snapshot_base))
    if incremental:
        run_or_print('zfs send -e -R -I %s %s%s | zfs receive -v -F -d %s' %
                     (incremental_snapshot_base, dataset, new_snapshot_base,
                      bakset))
        run_or_print('zfs destroy -r -v %s' % incremental_snapshot)
    else:
        run_or_print('zfs send -e -R %s%s | zfs receive -v -F -d %s' %
                     (dataset, new_snapshot_base, bakset))