autocastr.py

autocastr.py
# Download your podcasts and separate them into CD-R sized folders to allow for quick burning
# why? Because I'm cheap -- now you can listen to podcasts in your car without aftermarket sound systems or crappy mp3 player docks, just use your CD player!
# External dependancy, too lazy to parse RSS feeds myself	try: import feedparser except: raise Exception("You need feedparser! Use pip or easy_install to get it") import os, shutil, re, urllib2, json, time, glob, sys
# Settings are stored in an external JSON file	with open('settings.json', 'r') as fp: settings = json.loads(fp.read())
# Threshold for making a new folder	DIR_LIMIT = settings['folder_size'] * 1048576
# How many old episodes per feed to retrieve	MAX_BACKLOG = settings['max_backlog'] FOLDER_FORMAT = os.path.join('podcasts','cd-%s') AUDIO_MIME = re.compile('audio/.*', re.I)
# Use a list comprehension to generate a list of feed URLs	tracked_feeds = settings['feeds'] tracked_urls = [f['url'] for f in tracked_feeds]
# The feeds we want to track are stored in a text file	with open('feeds.txt', 'r') as fp: feeds = fp.read() feeds = feeds.strip().split('\n')
# Add any new feeds to settings	for feed in feeds: if feed not in tracked_urls: settings['feeds'].append({'url':feed, 'last_checked':0})
# Remove any feeds that we don't want to download anymore, notice that I make a copy of the list so I can operate on it while in the loop context	for i, existing in enumerate(settings['feeds'][:]): if existing['url'] not in feeds: settings['feeds'].pop(i)
# Return the latest folder in the podcasts directory, based on the highest number	def get_latest_folder(): glob_path = FOLDER_FORMAT % '*' cds = glob.glob(glob_path)
# Do some python-fu to get the max directory number	latest = max([int(re.sub('[^\d]', '', c)) for c in cds]) return latest
# Helper to clear out all old podcasts	def clean_root():
# Who needs recursive directory walking when you have shutil!	shutil.rmtree('podcasts', True) os.mkdir('podcasts')
# Strips filename of any strange characters so the OS won't complain when we try to save it	def clean_filename(value):
# Bad but w/e	import unicodedata value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') value = unicode(re.sub('[^\w\s-]', '', value).strip().lower()) return re.sub('[-\s]+', '-', value)
# Writes the mp3 to the first available directory; if adding it to the lastest directory will cause the folder to exceed the size limit then a new folder is created and the file is stored there	def add_file(data, name, filesize): name = clean_filename(name) latest_dir = FOLDER_FORMAT % get_latest_folder() size = sum([os.path.getsize(os.path.join(latest_dir, f)) for f in os.listdir(latest_dir)]) if size + filesize > DIR_LIMIT: latest_dir = FOLDER_FORMAT % (get_latest_folder() + 1) os.mkdir(latest_dir) with open(os.path.join(latest_dir, name) + '.mp3', 'wb') as fp: fp.write(data) default_path = FOLDER_FORMAT % '0'
# Make sure the folder exists on disk, if it doesn't create it	if not os.path.exists(default_path): os.mkdir(default_path)
# Loop over the feeds	for feed in settings['feeds']: d = feedparser.parse(feed['url']) print 'Checking for new episodes of', d['feed']['title'] ep_count = 0
# some podcast feeds are dumb and don't put the newest episodes at the top... lambda to the rescue	sorted_entries = sorted(d['entries'], cmp = lambda x,y: cmp(y['updated_parsed'], x['updated_parsed'])) for episode in sorted_entries: if ep_count >= MAX_BACKLOG: break if time.mktime(episode['updated_parsed']) <= feed['last_checked']: continue if 'enclosures' in episode: type = episode['enclosures'][0].type if AUDIO_MIME.search(type) is not None: print 'Downloading %s - %s\n' % (episode['title'], episode['enclosures'][0].href) file = urllib2.urlopen(episode['enclosures'][0].href) filename = '%s - %s' % (d['feed']['title'], episode['title']) filesize = int(episode['enclosures'][0].length) add_file(file.read(), filename, filesize) ep_count += 1
# Make the time of the latest entry so we don't bother checking stuff multiple times	feed['last_checked'] = time.mktime(max([e['updated_parsed'] for e in d['entries']]))
# I don't know why this was in the loop, probably doesn't need to be	with open('settings.json', 'w') as fp: json.dump(settings, fp, indent = 4) print 'Done.' sys.exit()