Because you can only have one media root in django, i often need to create symlinks to the media files for other applications. So, have updated so it uses os.walk (not os.path.walk) and it now goes into symlinks. See below:
"""
Sync Media to S3
================
Django command that scans all files in your settings.MEDIA_ROOT folder and
uploads them to S3 with the same directory structure.
This command can optionally do the following but it is off by default:
* gzip compress any CSS and Javascript files it finds and adds the appropriate
'Content-Encoding' header.
* set a far future 'Expires' header for optimal caching.
Note: This script requires the Python boto library and valid Amazon Web
Services API keys.
Required settings.py variables:
AWS_ACCESS_KEY_ID = ''
AWS_SECRET_ACCESS_KEY = ''
AWS_BUCKET_NAME = ''
Command options are:
-p PREFIX, --prefix=PREFIX
The prefix to prepend to the path on S3.
--gzip Enables gzipping CSS and Javascript files.
--expires Enables setting a far future expires header.
--force Skip the file mtime check to force upload of all
files.
--filter-list Override default directory and file exclusion
filters. (enter as comma seperated line)
TODO:
* Use fnmatch (or regex) to allow more complex FILTER_LIST rules.
"""
import datetime
import email
import mimetypes
import optparse
import os
import sys
import time
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
# Make sure boto is available
try:
import boto
import boto.exception
except ImportError:
raise ImportError, "The boto Python library is not installed."
class Command(BaseCommand):
# Extra variables to avoid passing these around
AWS_ACCESS_KEY_ID = ''
AWS_SECRET_ACCESS_KEY = ''
AWS_BUCKET_NAME = ''
DIRECTORY = ''
FILTER_LIST = ['.DS_Store', '.svn', '.hg', '.git', 'Thumbs.db']
GZIP_CONTENT_TYPES = (
'text/css',
'application/javascript',
'application/x-javascript'
)
upload_count = 0
skip_count = 0
option_list = BaseCommand.option_list + (
optparse.make_option('-p', '--prefix',
dest='prefix', default='',
help="The prefix to prepend to the path on S3."),
optparse.make_option('-d', '--dir',
dest='dir', default=settings.MEDIA_ROOT,
help="The root directory to use instead of your MEDIA_ROOT"),
optparse.make_option('--gzip',
action='store_true', dest='gzip', default=False,
help="Enables gzipping CSS and Javascript files."),
optparse.make_option('--expires',
action='store_true', dest='expires', default=False,
help="Enables setting a far future expires header."),
optparse.make_option('--force',
action='store_true', dest='force', default=False,
help="Skip the file mtime check to force upload of all files."),
optparse.make_option('--filter-list', dest='filter_list',
action='store', default='',
help="Override default directory and file exclusion filters. (enter as comma seperated line)"),
)
help = 'Syncs the complete MEDIA_ROOT structure and files to S3 into the given bucket name.'
args = 'bucket_name'
can_import_settings = True
def handle(self, *args, **options):
# Check for AWS keys in settings
if not hasattr(settings, 'AWS_ACCESS_KEY_ID') or \
not hasattr(settings, 'AWS_SECRET_ACCESS_KEY'):
raise CommandError('Missing AWS keys from settings file. Please' +
'supply both AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY.')
else:
self.AWS_ACCESS_KEY_ID = settings.AWS_ACCESS_KEY_ID
self.AWS_SECRET_ACCESS_KEY = settings.AWS_SECRET_ACCESS_KEY
if not hasattr(settings, 'AWS_BUCKET_NAME'):
raise CommandError('Missing bucket name from settings file. Please' +
' add the AWS_BUCKET_NAME to your settings file.')
else:
if not settings.AWS_BUCKET_NAME:
raise CommandError('AWS_BUCKET_NAME cannot be empty.')
self.AWS_BUCKET_NAME = settings.AWS_BUCKET_NAME
if not hasattr(settings, 'MEDIA_ROOT'):
raise CommandError('MEDIA_ROOT must be set in your settings.')
else:
if not settings.MEDIA_ROOT:
raise CommandError('MEDIA_ROOT must be set in your settings.')
self.verbosity = int(options.get('verbosity'))
self.prefix = options.get('prefix')
self.do_gzip = options.get('gzip')
self.do_expires = options.get('expires')
self.do_force = options.get('force')
self.DIRECTORY = options.get('dir')
self.FILTER_LIST = getattr(settings, 'FILTER_LIST', self.FILTER_LIST)
filter_list = options.get('filter_list')
if filter_list:
# command line option overrides default filter_list and
# settings.filter_list
self.FILTER_LIST = filter_list.split(',')
# Now call the syncing method to walk the MEDIA_ROOT directory and
# upload all files found.
self.sync_s3()
print
print "%d files uploaded." % (self.upload_count)
print "%d files skipped." % (self.skip_count)
def sync_s3(self):
"""
Walks the media directory and syncs files to S3
"""
bucket, key = self.open_s3()
#os.path.walk(self.DIRECTORY, self.upload_s3,
# (bucket, key, self.AWS_BUCKET_NAME, self.DIRECTORY))
self.upload_s3(bucket, key, self.AWS_BUCKET_NAME, self.DIRECTORY)
def compress_string(self, s):
"""Gzip a given string."""
import cStringIO, gzip
zbuf = cStringIO.StringIO()
zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf)
zfile.write(s)
zfile.close()
return zbuf.getvalue()
def open_s3(self):
"""
Opens connection to S3 returning bucket and key
"""
conn = boto.connect_s3(self.AWS_ACCESS_KEY_ID, self.AWS_SECRET_ACCESS_KEY)
try:
bucket = conn.get_bucket(self.AWS_BUCKET_NAME)
except boto.exception.S3ResponseError:
bucket = conn.create_bucket(self.AWS_BUCKET_NAME)
return bucket, boto.s3.key.Key(bucket)
#def upload_s3(self, arg, dirname, names):
def upload_s3(self, bucket, key, bucket_name, root_dir):
for dirname, dirs, names in os.walk(root_dir, followlinks = True):
if not dirname in self.FILTER_LIST:
# Exclude the dirs in the filter list
dirs[:] = [d for d in dirs if not d in self.FILTER_LIST]
for file in names:
if not file in self.FILTER_LIST:
headers = {}
filename = "%s/%s" % (dirname, file)
file_key = filename[len(root_dir):]
if self.prefix:
file_key = '%s/%s' % (self.prefix, file_key)
# Check if file on S3 is older than local file, if so, upload
if not self.do_force:
s3_key = bucket.get_key(file_key)
if s3_key:
s3_datetime = datetime.datetime(*time.strptime(
s3_key.last_modified, '%a, %d %b %Y %H:%M:%S %Z')[0:6])
local_datetime = datetime.datetime.utcfromtimestamp(
os.stat(filename).st_mtime)
if local_datetime < s3_datetime:
self.skip_count += 1
if self.verbosity > 1:
print "File %s hasn't been modified since last " \
"being uploaded" % (file_key)
continue
# File is newer, let's process and upload
if self.verbosity > 0:
print "Uploading %s..." % (file_key)
content_type = mimetypes.guess_type(filename)[0]
if content_type:
headers['Content-Type'] = content_type
file_obj = open(filename, 'rb')
file_size = os.fstat(file_obj.fileno()).st_size
filedata = file_obj.read()
if self.do_gzip:
# Gzipping only if file is large enough (>1K is recommended)
# and only if file is a common text type (not a binary file)
if file_size > 1024 and content_type in self.GZIP_CONTENT_TYPES:
filedata = self.compress_string(filedata)
headers['Content-Encoding'] = 'gzip'
if self.verbosity > 1:
print "\tgzipped: %dk to %dk" % \
(file_size/1024, len(filedata)/1024)
if self.do_expires:
# HTTP/1.0
headers['Expires'] = '%s GMT' % (email.Utils.formatdate(
time.mktime((datetime.datetime.now() +
datetime.timedelta(days=365*2)).timetuple())))
# HTTP/1.1
headers['Cache-Control'] = 'max-age %d' % (3600 * 24 * 365 * 2)
if self.verbosity > 1:
print "\texpires: %s" % (headers['Expires'])
print "\tcache-control: %s" % (headers['Cache-Control'])
try:
key.name = file_key
key.set_contents_from_string(filedata, headers, replace=True)
key.set_acl('public-read')
except boto.s3.connection.S3CreateError, e:
print "Failed: %s" % e
except Exception, e:
print e
raise
else:
self.upload_count += 1
file_obj.close()
# Backwards compatibility for Django r9110
if not [opt for opt in Command.option_list if opt.dest=='verbosity']:
Command.option_list += (
optparse.make_option('-v', '--verbosity',
dest='verbosity', default=1, action='count',
help="Verbose mode. Multiple -v options increase the verbosity."),
)