【python】download google music

#!/usr/bin/env python3
#
gmd.py

"""Google Music Downloader(GMD)
--Version: 1.5
--Update: 2011-06-12
--Author: kev++
--License: GNU GPL

Usage: gmd.py [options] AlbumId

Options:
-h, --help show this help message and exit
-r RANGES, --ranges=RANGES album index ranges
-p PATH, --path=PATH local path
--nolog don't log

Quick-Start Tutorial:
Open your Firefox, type url:

http://www.google.cn/music/artist?id=Ae2300d8b0232c06c
=================
Ae2300d8b0232c06c is AlbumId
================= =======
Type $ python gmd.py Ae2300d8b0232c06c in cmd/bash shell
===============================
Hit Enter key
=====
Everything should be OK if you're lucky

Real-World Example:
C:>python gmd.py --ranges :2,7:8,11: B17b0e7e724dd4e79
-----------Songs-----------
| 1: A Force De Solitude |
| 2: C'Est Parce Que Je T|
| 7: Jamais Personne |
| 8: Je Laime |
| 11: Marie |
| 12: Si Un Jour |
---------------------------
1: A Force De Solitude
✓ 100.0% of 4.63MB
2: C'Est Parce Que Je T'aime
X file exists
7: Jamais Personne
↓ 14.1% of 3.21MB

Known Issus:
Downloading a lot of songs in a short period causes IP blocked!(img code required)
Some of downloading base URL is :http://g.top100.cn/16667639/html/download.html?id=<SongId>
"""

import sys, re, urllib.request, os, string, datetime, optparse
import xml.etree.ElementTree as ET
from functools import reduce

def downloadAlbum(albumId, ranges=':', path=None, islog=True):
album
= parseAlbum(albumId, ranges)
printSummary(album)
dirPath
= makeDir(album['info'], path)
for song in album['songs']:
downloadSong(song, dirPath)
if islog:
log(album, dirPath)
return report(album)

def makeDir(info, path):
dirPath
= path
if not dirPath:
homePath
= os.path.expanduser('~') #default download to '$HOME'
subPath = string.Template('Music/${artist}/${name}').substitute(info).translate(str.maketrans('', '', '\:*?<>|"'))
dirPath
= os.path.join(homePath, subPath)
if not os.path.exists(dirPath):
os.makedirs(dirPath)
return dirPath

def parseAlbum(albumId, ranges):
albumXmlUrl
='http://www.google.cn/music/album?id={0}&output=xml'.format(albumId)
xmlTree
= ET.parse(urllib.request.urlopen(albumXmlUrl))
info
= dict((i.tag, i.text) for i in xmlTree.find('.//info') if i.tag in ['id', 'name', 'artist', 'releaseDate'])
songs
= [dict((tag, song.findtext(tag)) for tag in ['id', 'name', 'artist']) for song in xmlTree.findall('.//song')] #a list of dict
info['total'] = len(songs)
rs
= parseranges(ranges, len(songs))
for i, song in enumerate(songs):
song[
'track'] = i+1
song[
'name'] = song['name'].translate(str.maketrans('', '', r'\/:*?<>|"'))
song[
'download'] = True if i in rs else False
song[
'url'] = getSongUrl(song['id']) if song['download'] else None
album
= dict(info=info, songs=songs)
return album

def getSongUrl(songId):
url
='http://www.google.cn/music/top100/musicdownload?id={0}'.format(songId)
html
=urllib.request.urlopen(url).read().decode('utf-8').replace('&quot;', '"')
matches
= re.search(r'"/(music/top100/url\?q=http://file[^"]+)"', html, re.I)
if matches is not None:
return 'http://www.google.cn/{0}'.format(matches.group(1)).replace('&amp;', '&')
else:
return None

def printSummary(album):
print(' -----------Songs----------- ')
for song in album['songs']:
if song['download']:
print('| {0}{1:>3}: {2:<20.20}|'.format(' ' if song['url'] else 'X', song['track'], song['name']))
print(' --------------------------- ')

def downloadSong(song, path):
if not song['download']:
return

try:
print(' {0}{1:>3}: {2}'.format(' ' if song['url'] else 'X', song['track'], song['name']))
fileName
= os.path.join(path, song['name']+'.mp3')
if os.path.exists(fileName):
print(' X file exists')
elif not song['url']:
print(' X url is empty')
else:
downloadFile(song[
'url'], fileName)
print('')
except Exception as e:
print(' X cannot download. Error msg:', e)
song[
'url'] = None

def downloadFile(url, fileName):
def urlretrieve(url, fileName, reporthook=None):
with open(fileName,
'wb') as file:
resp
=urllib.request.urlopen(url)
inf
= resp.info()
size
= 0
bn
= 0
bs
= 8192
ds
= 0

if 'Content-Length' in inf:
size
= int(inf['Content-Length'])

while True:
buf
= resp.read(bs)
if not buf:
break
file.write(buf)
bn
+=1
ds
+=len(buf)
if reporthook:
reporthook(bn, bs, size)
return ds

def downloadReporthook(blocknum, bs, size):
if size==0:
size
=1e10
percent
= blocknum*bs*100/size
print('\t↓ {0:5.1f}% of {1:.2f}MB\r'.format(min(100, percent), size/1024/1024), end='')
sys.stdout.flush()

urlretrieve(url, fileName, downloadReporthook)
#`urllib.request.urlretrieve' is VERY SLOW

def log(album, path):
info
= album['info']
songs
= album['songs']

root
= ET.Element('album')
for key in ['id', 'name', 'artist', 'releaseDate', 'total']:
e
= ET.SubElement(root, key)
e.text
= str(info[key])

ET.SubElement(root,
'downloadDate').text = str(datetime.datetime.now())
esongs
= ET.SubElement(root, 'songs')

for song in songs:
esong
= ET.SubElement(esongs, 'song')
for key in ['id', 'track', 'name', 'url', 'download']:
e
= ET.SubElement(esong, key)
e.text
= str(song[key])

logName
= os.path.join(path, 'log.xml')
xml
= ET.ElementTree(root)
xml.write(logName, encoding
='utf-8')

def parseranges(ranges, n):
"""
Translate ":2,4:6,9:" to "0 1 3 4 5 8 9...n-1"
== === == === ===== =========
"""
def torange(x, n):
if len(x)==1:
(x0, )
= x
s
= 1 if x0=='' else int(x0)
e
= n if x0=='' else s
elif len(x)==2:
(x0, x1)
= x
s
= 1 if x0=='' else int(x0)
e
= n if x1=='' else int(x1)
else:
raise ValueError
return range(s-1, e)
return sorted(reduce(lambda x, y:x.union(set(y)), map(lambda x:torange(x, n), map(lambda x:x.split(':'), ranges.split(','))), set()))

def report(album):
def toranges(l):
if not l:
return ''
l
= sorted(l)
ranges
= []
start
= None
end
= None
for i in l:
if start == None:
start
= end = i
elif i == end+1:
end
= i
else:
ranges.append(range(start,end
+1))
start
= end = i
else:
ranges.append(range(start,end
+1))
return ",".join(["{0}".format(r[0]) if len(r)==1 else "{0}:{1}".format(r[0],r[-1]) for r in ranges])
failures
= toranges([int(song['track']) for song in album['songs'] if song['download'] and not song['url']])
return failures

if __name__=='__main__':
parser
= optparse.OptionParser(usage='usage: %prog [options] AlbumId1 [AlbumId2..]')
parser.add_option(
'-r', '--ranges', dest='ranges', type=str, default=':', help='album index ranges')
parser.add_option(
'-p', '--path', dest='path', default=None, help='local path')
parser.add_option(
'-b', '--beep', dest='beep', action='store_true', default=False, help='beep when finish')
parser.add_option(
'-y', '--yes', dest='yes', action='store_true', default=False, help='say yes to redownload')
parser.add_option(
'--nolog', dest='islog', action='store_false', default=True, help='don\'t log')
(opts, args)
= parser.parse_args()

if not re.match(r'^(\d*(:\d*)?)(,(\d*(:\d*)?))*$', opts.ranges):
parser.error(
'option [ranges] format wrong')
else:
RANGES
= opts.ranges

PATH
= opts.path
BEEP
= opts.beep
YES
= opts.yes
ISLOG
= opts.islog

if not len(args):
parser.error(
'argument [AlbumId] cannot be empty')

for arg in args:
if not re.match('^\w{17}$', arg):
parser.error(
'argument [AlbumId] format is wrong')
else:
ALBUMID
= arg

while True:
RANGES
= downloadAlbum(ALBUMID, RANGES, PATH, ISLOG)
if BEEP:
print('\a')
if not RANGES:
break
if YES:
print('\nAuto redownload AlbumId:"{0}" Tracks:"{1}"!'.format(ALBUMID, RANGES))
continue
ans
= input('\nRedownload AlbumId:"{0}" Tracks:"{1}"?(Y/N): '.format(ALBUMID, RANGES))
if ans.lower().strip() != 'y':
print('You can also add "-r {0}" options to redownload later!'.format(RANGES))
break

作者: K++   发布时间: 2011-06-12