A Python script that wraps rsync to have rolling backups for the latest n days, deleting the old ones. Nice because in this approach rsync uses hard links to build incremental backups that appear as full backups.
Of course it needs to be run under cron.
Fur the actual rsync call that is used in this script see http://www.sanitarium.net/golug/rsync_backups_2010.html
#!/usr/bin/python
import os
import datetime
basedir = '/home/lobianco/temp/testrsync/base/'
bkpsdir = '/home/lobianco/temp/testrsync/bkps/rsync/'
excludefile = '/home/lobianco/temp/testrsync/base/excludes.txt'
bkpsdays = 30
bkpsbase = 'bkps-www'
now = datetime.datetime.now()
past = datetime.datetime.now() + datetime.timedelta(-bkpsdays)
maxbkps_y = 0
maxbkps_m = 0
maxbkps_d = 0
print("** INFO: Starting rsync backup of "+basedir+" to "+bkpsdir+" - "+str(now.year)+"."+str(now.month)+"."+str(now.day)+" "+str(now.hour)+":"+str(now.minute))
# If a complete backup for today already exists I remove it as I can't create an incremental of itself..
rmtodaybkp = "rm -rf \""+bkpsdir+bkpsbase+"."+str(now.year)+"-"+str(now.month)+"-"+str(now.day)+"\""
os.system(rmtodaybkp)
# Removing the old bkps and getting the information on which is the latest backup..
bkpdirs = os.listdir(bkpsdir) # Getting subdirectories..
for bkpdir in bkpdirs:
parts = bkpdir.split('.')
if (parts[0] == bkpsbase):
if len(parts) != 2:
print("**** ERROR: There is a problem with a directory starting with bkpsbase but not having the date elements.. ("+bkpdir+")")
else:
de = parts[1].split('-') # date elements
if len(de) != 3:
print("**** ERROR: There is a problem with a directory having a subset of the date elements.. ("+bkpdir+")")
else:
# removing old bkps
if int(de[0]) <= past.year:
if int(de[0]) < past.year or int(de[1])<= past.month:
if int(de[0]) < past.year or int(de[1])< past.month or int(de[2])<= past.day:
rmdir = bkpsdir+bkpdir
rmcommand = "rm -rf \""+rmdir+"\""
os.system(rmcommand)
# looking for maxfolder
for bkpdir in bkpdirs:
parts = bkpdir.split('.')
if (parts[0] == bkpsbase):
if len(parts) == 2:
de = parts[1].split('-') # date elements
if len(de) == 3:
if int(de[0]) >= maxbkps_y:
if int(de[0]) > maxbkps_y or int(de[1]) >= maxbkps_m:
if int(de[0]) > maxbkps_y or int(de[1]) > maxbkps_m or int(de[2]) >= maxbkps_d:
maxbkps_y = int(de[0])
maxbkps_m = int(de[1])
maxbkps_d = int(de[2])
# actually performing the backup..
bkpcommand1 = "rm -rf \""+bkpsdir+bkpsbase+".incomplete\""+" && mkdir -p \""+bkpsdir+bkpsbase+".incomplete\""
bkpcommand2 = ""
bkpcommand3 = "mv \""+bkpsdir+bkpsbase+".incomplete\" "+"\""+bkpsdir+bkpsbase+"."+str(now.year)+"-"+str(now.month)+"-"+str(now.day)+"\""
if (maxbkps_y == 0): # first backup
print("** INFO: Performing first backup of the serie..")
bkpcommand2 = "rsync --archive --one-file-system --hard-links "
bkpcommand2 += "--human-readable --inplace --numeric-ids --delete "
bkpcommand2 += "--delete-excluded --exclude-from=\""+excludefile+"\" "
#bkpcommand2 += "--verbose --progress --itemize-changes "
bkpcommand2 += "\""+basedir+"\" "+"\""+bkpsdir+bkpsbase+".incomplete/\""
else: # not the first one, I can hard link to the most recent backup..
bkpcommand2 = "rsync --archive --one-file-system --hard-links "
bkpcommand2 += "--human-readable --inplace --numeric-ids --delete "
bkpcommand2 += "--delete-excluded --exclude-from=\""+excludefile+"\" "
bkpcommand2 += "--link-dest=\""+bkpsdir+bkpsbase+"."+str(maxbkps_y)+"-"+str(maxbkps_m)+"-"+str(maxbkps_d)+"/\" "
#bkpcommand2 += "--verbose --progress --itemize-changes "
bkpcommand2 += "\""+basedir+"\" "+"\""+bkpsdir+bkpsbase+".incomplete/\""
os.system(bkpcommand1)
os.system(bkpcommand2)
os.system(bkpcommand3)
print("** INFO: rsync backup completed - "+str(now.year)+"."+str(now.month)+"."+str(now.day)+" "+str(now.hour)+":"+str(now.minute))