Rsync Python script

A Python script that wraps rsync to have rolling backups for the latest n days, deleting the old ones. Nice because in this approach rsync uses hard links to build incremental backups that appear as full backups.

Of course it needs to be run under cron.

Fur the actual rsync call that is used in this script see http://www.sanitarium.net/golug/rsync_backups_2010.html

#!/usr/bin/python

import os
import datetime

basedir = '/home/lobianco/temp/testrsync/base/'
bkpsdir = '/home/lobianco/temp/testrsync/bkps/rsync/'
excludefile = '/home/lobianco/temp/testrsync/base/excludes.txt'
bkpsdays = 30
bkpsbase = 'bkps-www'

now  = datetime.datetime.now()
past = datetime.datetime.now() + datetime.timedelta(-bkpsdays)

maxbkps_y = 0
maxbkps_m = 0
maxbkps_d = 0

print("** INFO: Starting rsync backup of "+basedir+" to "+bkpsdir+" - "+str(now.year)+"."+str(now.month)+"."+str(now.day)+" "+str(now.hour)+":"+str(now.minute))

# If a complete backup for today already exists I remove it as I can't create an incremental of itself..
rmtodaybkp = "rm -rf \""+bkpsdir+bkpsbase+"."+str(now.year)+"-"+str(now.month)+"-"+str(now.day)+"\""
os.system(rmtodaybkp)

# Removing the old bkps and getting the information on which is the latest backup..
bkpdirs = os.listdir(bkpsdir) # Getting subdirectories..
for bkpdir in bkpdirs:
  parts = bkpdir.split('.')
  if (parts[0] == bkpsbase):
    if len(parts) != 2:
      print("**** ERROR: There is a problem with a directory starting with bkpsbase but not having the date elements.. ("+bkpdir+")")
    else:
      de = parts[1].split('-') # date elements
      if len(de) != 3:
        print("**** ERROR: There is a problem with a directory having a subset of the date elements.. ("+bkpdir+")")
      else:
        # removing old bkps
        if int(de[0]) <= past.year:
          if int(de[0]) < past.year or int(de[1])<= past.month: 
            if int(de[0]) < past.year or int(de[1])< past.month or int(de[2])<= past.day:        
              rmdir = bkpsdir+bkpdir
              rmcommand = "rm -rf \""+rmdir+"\""
              os.system(rmcommand)
              
# looking for maxfolder              
for bkpdir in bkpdirs:
  parts = bkpdir.split('.')
  if (parts[0] == bkpsbase):
    if len(parts) == 2:
      de = parts[1].split('-') # date elements
      if len(de) == 3:
        if int(de[0]) >= maxbkps_y:
          if int(de[0]) > maxbkps_y or int(de[1]) >= maxbkps_m:
            if int(de[0]) > maxbkps_y or int(de[1]) > maxbkps_m or int(de[2]) >= maxbkps_d:
              maxbkps_y = int(de[0])
              maxbkps_m = int(de[1])
              maxbkps_d = int(de[2])

# actually performing the backup..    
bkpcommand1 = "rm -rf \""+bkpsdir+bkpsbase+".incomplete\""+" && mkdir -p \""+bkpsdir+bkpsbase+".incomplete\""
bkpcommand2 = ""
bkpcommand3 = "mv \""+bkpsdir+bkpsbase+".incomplete\" "+"\""+bkpsdir+bkpsbase+"."+str(now.year)+"-"+str(now.month)+"-"+str(now.day)+"\""

if (maxbkps_y == 0): # first backup
  print("** INFO: Performing first backup of the serie..")
  bkpcommand2 = "rsync --archive --one-file-system --hard-links "
  bkpcommand2 += "--human-readable --inplace --numeric-ids --delete "
  bkpcommand2 += "--delete-excluded --exclude-from=\""+excludefile+"\" "
  #bkpcommand2 += "--verbose --progress --itemize-changes "
  bkpcommand2 += "\""+basedir+"\" "+"\""+bkpsdir+bkpsbase+".incomplete/\""
else: # not the first one, I can hard link to the most recent backup..
  bkpcommand2 = "rsync --archive --one-file-system --hard-links "
  bkpcommand2 += "--human-readable --inplace --numeric-ids --delete "
  bkpcommand2 += "--delete-excluded --exclude-from=\""+excludefile+"\" "
  bkpcommand2 += "--link-dest=\""+bkpsdir+bkpsbase+"."+str(maxbkps_y)+"-"+str(maxbkps_m)+"-"+str(maxbkps_d)+"/\" "  
  #bkpcommand2 += "--verbose --progress --itemize-changes "
  bkpcommand2 += "\""+basedir+"\" "+"\""+bkpsdir+bkpsbase+".incomplete/\""

os.system(bkpcommand1)
os.system(bkpcommand2)
os.system(bkpcommand3)

print("** INFO: rsync backup completed - "+str(now.year)+"."+str(now.month)+"."+str(now.day)+" "+str(now.hour)+":"+str(now.minute))