-
This commit is contained in:
497
Kodi/Lenovo/addons/metadata.generic.albums/lib/scraper.py
Normal file
497
Kodi/Lenovo/addons/metadata.generic.albums/lib/scraper.py
Normal file
@@ -0,0 +1,497 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import json
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import _strptime # https://bugs.python.org/issue7980
|
||||
from socket import timeout
|
||||
from threading import Thread
|
||||
from urllib.error import HTTPError, URLError
|
||||
import xbmc
|
||||
import xbmcaddon
|
||||
import xbmcgui
|
||||
import xbmcplugin
|
||||
from .allmusic import allmusic_albumfind
|
||||
from .allmusic import allmusic_albumdetails
|
||||
from .discogs import discogs_albumfind
|
||||
from .discogs import discogs_albummain
|
||||
from .discogs import discogs_albumdetails
|
||||
from .fanarttv import fanarttv_albumart
|
||||
from .musicbrainz import musicbrainz_albumfind
|
||||
from .musicbrainz import musicbrainz_albumdetails
|
||||
from .musicbrainz import musicbrainz_albumlinks
|
||||
from .musicbrainz import musicbrainz_albumart
|
||||
from .nfo import nfo_geturl
|
||||
from .theaudiodb import theaudiodb_albumdetails
|
||||
from .wikipedia import wikipedia_albumdetails
|
||||
from .utils import *
|
||||
|
||||
ADDONID = xbmcaddon.Addon().getAddonInfo('id')
|
||||
ADDONNAME = xbmcaddon.Addon().getAddonInfo('name')
|
||||
ADDONVERSION = xbmcaddon.Addon().getAddonInfo('version')
|
||||
|
||||
|
||||
def log(txt):
|
||||
message = '%s: %s' % (ADDONID, txt)
|
||||
xbmc.log(msg=message, level=xbmc.LOGDEBUG)
|
||||
|
||||
def get_data(url, jsonformat, retry=True):
|
||||
try:
|
||||
if url.startswith('https://musicbrainz.org/'):
|
||||
api_timeout('musicbrainztime')
|
||||
elif url.startswith('https://api.discogs.com/'):
|
||||
api_timeout('discogstime')
|
||||
headers = {}
|
||||
headers['User-Agent'] = '%s/%s ( http://kodi.tv )' % (ADDONNAME, ADDONVERSION)
|
||||
req = urllib.request.Request(url, headers=headers)
|
||||
resp = urllib.request.urlopen(req, timeout=5)
|
||||
respdata = resp.read()
|
||||
except URLError as e:
|
||||
log('URLError: %s - %s' % (e.reason, url))
|
||||
return
|
||||
except HTTPError as e:
|
||||
log('HTTPError: %s - %s' % (e.reason, url))
|
||||
return
|
||||
except socket.timeout as e:
|
||||
log('socket: %s - %s' % (e, url))
|
||||
return
|
||||
if resp.getcode() == 503:
|
||||
log('exceeding musicbrainz api limit')
|
||||
if retry:
|
||||
xbmc.sleep(1000)
|
||||
get_data(url, jsonformat, retry=False)
|
||||
else:
|
||||
return
|
||||
elif resp.getcode() == 429:
|
||||
log('exceeding discogs api limit')
|
||||
if retry:
|
||||
xbmc.sleep(1000)
|
||||
get_data(url, jsonformat, retry=False)
|
||||
else:
|
||||
return
|
||||
if jsonformat:
|
||||
respdata = json.loads(respdata)
|
||||
return respdata
|
||||
|
||||
def api_timeout(scraper):
|
||||
currenttime = round(time.time() * 1000)
|
||||
previoustime = xbmcgui.Window(10000).getProperty(scraper)
|
||||
if previoustime:
|
||||
timeout = currenttime - int(previoustime)
|
||||
if timeout < 1000:
|
||||
xbmc.sleep(1000 - timeout)
|
||||
xbmcgui.Window(10000).setProperty(scraper, str(round(time.time() * 1000)))
|
||||
|
||||
|
||||
class Scraper():
|
||||
def __init__(self, action, key, artist, album, url, nfo, settings):
|
||||
# parse path settings
|
||||
self.parse_settings(settings)
|
||||
# this is just for backward compatibility with xml based scrapers https://github.com/xbmc/xbmc/pull/11632
|
||||
if action == 'resolveid':
|
||||
# return the result
|
||||
result = self.resolve_mbid(key)
|
||||
self.return_resolved(result)
|
||||
# search for artist name / album title matches
|
||||
elif action == 'find':
|
||||
# try musicbrainz first
|
||||
result = self.find_album(artist, album, 'musicbrainz')
|
||||
if result:
|
||||
self.return_search(result)
|
||||
# fallback to discogs
|
||||
else:
|
||||
result = self.find_album(artist, album, 'discogs')
|
||||
if result:
|
||||
self.return_search(result)
|
||||
# return info id's
|
||||
elif action == 'getdetails':
|
||||
details = {}
|
||||
links = {}
|
||||
url = json.loads(url)
|
||||
artist = url.get('artist')
|
||||
album = url.get('album')
|
||||
mbalbumid = url.get('mbalbumid')
|
||||
mbreleasegroupid = url.get('mbreleasegroupid')
|
||||
dcid = url.get('dcalbumid')
|
||||
threads = []
|
||||
extrascrapers = []
|
||||
# we have musicbrainz album id
|
||||
if mbalbumid:
|
||||
# get the mbreleasegroupid, artist and album if we don't have them
|
||||
if not mbreleasegroupid:
|
||||
result = self.get_details(mbalbumid, 'musicbrainz', details)
|
||||
if not result:
|
||||
scrapers = [[mbalbumid, 'musicbrainz']]
|
||||
else:
|
||||
mbreleasegroupid = details['musicbrainz']['mbreleasegroupid']
|
||||
artist = details['musicbrainz']['artist_description']
|
||||
album = details['musicbrainz']['album']
|
||||
scrapers = [[mbreleasegroupid, 'theaudiodb'], [mbreleasegroupid, 'fanarttv'], [mbreleasegroupid, 'coverarchive']]
|
||||
else:
|
||||
scrapers = [[mbalbumid, 'musicbrainz'], [mbreleasegroupid, 'theaudiodb'], [mbreleasegroupid, 'fanarttv'], [mbreleasegroupid, 'coverarchive']]
|
||||
# get musicbrainz links to other metadata sites
|
||||
lthread = Thread(target = self.get_links, args = (mbreleasegroupid, links))
|
||||
lthread.start()
|
||||
for item in scrapers:
|
||||
thread = Thread(target = self.get_details, args = (item[0], item[1], details))
|
||||
threads.append(thread)
|
||||
thread.start()
|
||||
# wait for the musicbrainz links to return
|
||||
lthread.join()
|
||||
if 'musicbrainz' in links:
|
||||
# scrape allmusic if we have an url provided by musicbrainz
|
||||
if 'allmusic' in links['musicbrainz']:
|
||||
extrascrapers.append([{'url': links['musicbrainz']['allmusic']}, 'allmusic'])
|
||||
# only scrape allmusic by artistname and albumtitle if explicitly enabled
|
||||
elif self.inaccurate and artist and album:
|
||||
extrascrapers.append([{'artist': artist, 'album': album}, 'allmusic'])
|
||||
# scrape discogs if we have an url provided by musicbrainz
|
||||
if 'discogs' in links['musicbrainz']:
|
||||
extrascrapers.append([{'masterurl': links['musicbrainz']['discogs']}, 'discogs'])
|
||||
# only scrape discogs by artistname and albumtitle if explicitly enabled
|
||||
elif self.inaccurate and artist and album:
|
||||
extrascrapers.append([{'artist': artist, 'album': album}, 'discogs'])
|
||||
# scrape wikipedia if we have an url provided by musicbrainz
|
||||
if 'wikipedia' in links['musicbrainz']:
|
||||
extrascrapers.append([links['musicbrainz']['wikipedia'], 'wikipedia'])
|
||||
elif 'wikidata' in links['musicbrainz']:
|
||||
extrascrapers.append([links['musicbrainz']['wikidata'], 'wikidata'])
|
||||
for item in extrascrapers:
|
||||
thread = Thread(target = self.get_details, args = (item[0], item[1], details))
|
||||
threads.append(thread)
|
||||
thread.start()
|
||||
# we have a discogs id
|
||||
else:
|
||||
thread = Thread(target = self.get_details, args = ({'url': dcid}, 'discogs', details))
|
||||
threads.append(thread)
|
||||
thread.start()
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
result = self.compile_results(details)
|
||||
if result:
|
||||
self.return_details(result)
|
||||
# extract the mbalbumid from the provided musicbrainz url
|
||||
elif action == 'NfoUrl':
|
||||
# check if there is a musicbrainz url in the nfo file
|
||||
mbalbumid = nfo_geturl(nfo)
|
||||
if mbalbumid:
|
||||
# return the result
|
||||
result = self.resolve_mbid(mbalbumid)
|
||||
self.return_nfourl(result)
|
||||
xbmcplugin.endOfDirectory(int(sys.argv[1]))
|
||||
|
||||
def parse_settings(self, data):
|
||||
settings = json.loads(data)
|
||||
# note: path settings are taken from the db, they may not reflect the current settings.xml file
|
||||
self.review = settings['review']
|
||||
self.genre = settings['genre']
|
||||
self.lang = settings['lang']
|
||||
self.mood = settings['mood']
|
||||
self.rating = settings['rating']
|
||||
self.style = settings['style']
|
||||
self.theme = settings['theme']
|
||||
self.inaccurate = settings['inaccurate']
|
||||
|
||||
def resolve_mbid(self, mbalbumid):
|
||||
item = {}
|
||||
item['artist_description'] = ''
|
||||
item['album'] = ''
|
||||
item['mbalbumid'] = mbalbumid
|
||||
item['mbreleasegroupid'] = ''
|
||||
return item
|
||||
|
||||
def find_album(self, artist, album, site):
|
||||
json = True
|
||||
# musicbrainz
|
||||
if site == 'musicbrainz':
|
||||
url = MUSICBRAINZURL % (MUSICBRAINZSEARCH % (urllib.parse.quote_plus(album), urllib.parse.quote_plus(artist), urllib.parse.quote_plus(artist)))
|
||||
scraper = musicbrainz_albumfind
|
||||
# discogs
|
||||
elif site == 'discogs':
|
||||
url = DISCOGSURL % (DISCOGSSEARCH % (urllib.parse.quote_plus(album), urllib.parse.quote_plus(artist), DISCOGSKEY , DISCOGSSECRET))
|
||||
scraper = discogs_albumfind
|
||||
result = get_data(url, json)
|
||||
if not result:
|
||||
return
|
||||
albumresults = scraper(result, artist, album)
|
||||
return albumresults
|
||||
|
||||
def get_links(self, param, links):
|
||||
json = True
|
||||
url = MUSICBRAINZURL % (MUSICBRAINZLINKS % param)
|
||||
result = get_data(url, json)
|
||||
if result:
|
||||
linkresults = musicbrainz_albumlinks(result)
|
||||
links['musicbrainz'] = linkresults
|
||||
return links
|
||||
|
||||
def get_details(self, param, site, details):
|
||||
json = True
|
||||
# theaudiodb
|
||||
if site == 'theaudiodb':
|
||||
url = AUDIODBURL % (AUDIODBKEY, AUDIODBDETAILS % param)
|
||||
albumscraper = theaudiodb_albumdetails
|
||||
# musicbrainz
|
||||
elif site == 'musicbrainz':
|
||||
url = MUSICBRAINZURL % (MUSICBRAINZDETAILS % param)
|
||||
albumscraper = musicbrainz_albumdetails
|
||||
# fanarttv
|
||||
elif site == 'fanarttv':
|
||||
url = FANARTVURL % (param, FANARTVKEY)
|
||||
albumscraper = fanarttv_albumart
|
||||
# coverarchive
|
||||
elif site == 'coverarchive':
|
||||
url = MUSICBRAINZART % (param)
|
||||
albumscraper = musicbrainz_albumart
|
||||
# discogs
|
||||
elif site == 'discogs':
|
||||
# musicbrainz provides a link to the master release, but we need the main release
|
||||
if 'masterurl' in param:
|
||||
masterdata = get_data(DISCOGSURL % (DISCOGSMASTER % (param['masterurl'], DISCOGSKEY , DISCOGSSECRET)), True)
|
||||
if masterdata:
|
||||
url = discogs_albummain(masterdata)
|
||||
if url:
|
||||
param['url'] = url
|
||||
else:
|
||||
return
|
||||
else:
|
||||
return
|
||||
# search by artistname and albumtitle if we do not have an url
|
||||
if not 'url' in param:
|
||||
url = DISCOGSURL % (DISCOGSSEARCH % (urllib.parse.quote_plus(param['album']), urllib.parse.quote_plus(param['artist']), DISCOGSKEY , DISCOGSSECRET))
|
||||
albumresult = get_data(url, json)
|
||||
if albumresult:
|
||||
albums = discogs_albumfind(albumresult, param['artist'], param['album'])
|
||||
if albums:
|
||||
albumresult = sorted(albums, key=lambda k: k['relevance'], reverse=True)
|
||||
param['url'] = albumresult[0]['dcalbumid']
|
||||
else:
|
||||
return
|
||||
else:
|
||||
return
|
||||
url = DISCOGSURL % (DISCOGSDETAILS % (param['url'], DISCOGSKEY, DISCOGSSECRET))
|
||||
albumscraper = discogs_albumdetails
|
||||
# wikipedia
|
||||
elif site == 'wikipedia':
|
||||
url = WIKIPEDIAURL % param
|
||||
albumscraper = wikipedia_albumdetails
|
||||
elif site == 'wikidata':
|
||||
# resolve wikidata to wikipedia url
|
||||
result = get_data(WIKIDATAURL % param, json)
|
||||
try:
|
||||
album = result['entities'][param]['sitelinks']['enwiki']['url'].rsplit('/', 1)[1]
|
||||
except:
|
||||
return
|
||||
site = 'wikipedia'
|
||||
url = WIKIPEDIAURL % album
|
||||
albumscraper = wikipedia_albumdetails
|
||||
# allmusic
|
||||
elif site == 'allmusic':
|
||||
json = False
|
||||
# search by artistname and albumtitle if we do not have an url
|
||||
if not 'url' in param:
|
||||
url = ALLMUSICURL % (ALLMUSICSEARCH % (urllib.parse.quote_plus(param['artist']), urllib.parse.quote_plus(param['album'])))
|
||||
albumresult = get_data(url, json)
|
||||
if albumresult:
|
||||
albums = allmusic_albumfind(albumresult, param['artist'], param['album'])
|
||||
if albums:
|
||||
param['url'] = albums[0]['url']
|
||||
else:
|
||||
return
|
||||
else:
|
||||
return
|
||||
url = ALLMUSICDETAILS % param['url']
|
||||
albumscraper = allmusic_albumdetails
|
||||
result = get_data(url, json)
|
||||
if not result:
|
||||
return
|
||||
albumresults = albumscraper(result)
|
||||
if not albumresults:
|
||||
return
|
||||
details[site] = albumresults
|
||||
return details
|
||||
|
||||
def compile_results(self, details):
|
||||
result = {}
|
||||
thumbs = []
|
||||
extras = []
|
||||
# merge metadata results, start with the least accurate sources
|
||||
if 'discogs' in details:
|
||||
for k, v in details['discogs'].items():
|
||||
if v:
|
||||
result[k] = v
|
||||
if k == 'thumb' and v:
|
||||
thumbs.append(v)
|
||||
if 'wikipedia' in details:
|
||||
for k, v in details['wikipedia'].items():
|
||||
if v:
|
||||
result[k] = v
|
||||
if 'allmusic' in details:
|
||||
for k, v in details['allmusic'].items():
|
||||
if v:
|
||||
result[k] = v
|
||||
if k == 'thumb' and v:
|
||||
thumbs.append(v)
|
||||
if 'theaudiodb' in details:
|
||||
for k, v in details['theaudiodb'].items():
|
||||
if v:
|
||||
result[k] = v
|
||||
if k == 'thumb' and v:
|
||||
thumbs.append(v)
|
||||
if k == 'extras' and v:
|
||||
extras.append(v)
|
||||
if 'musicbrainz' in details:
|
||||
for k, v in details['musicbrainz'].items():
|
||||
if v:
|
||||
result[k] = v
|
||||
if 'coverarchive' in details:
|
||||
for k, v in details['coverarchive'].items():
|
||||
if v:
|
||||
result[k] = v
|
||||
if k == 'thumb' and v:
|
||||
thumbs.append(v)
|
||||
if k == 'extras' and v:
|
||||
extras.append(v)
|
||||
# prefer artwork from fanarttv
|
||||
if 'fanarttv' in details:
|
||||
for k, v in details['fanarttv'].items():
|
||||
if v:
|
||||
result[k] = v
|
||||
if k == 'thumb' and v:
|
||||
thumbs.append(v)
|
||||
if k == 'extras' and v:
|
||||
extras.append(v)
|
||||
# use musicbrainz artist as it provides the mbartistid (used for resolveid in the artist scraper)
|
||||
if 'musicbrainz' in details:
|
||||
result['artist'] = details['musicbrainz']['artist']
|
||||
# provide artwork from all scrapers for getthumb option
|
||||
if result:
|
||||
# thumb list from most accurate sources first
|
||||
thumbs.reverse()
|
||||
thumbnails = []
|
||||
for thumblist in thumbs:
|
||||
for item in thumblist:
|
||||
thumbnails.append(item)
|
||||
# extra art from most accurate sources first
|
||||
extras.reverse()
|
||||
extraart = []
|
||||
for extralist in extras:
|
||||
for item in extralist:
|
||||
extraart.append(item)
|
||||
# add the extra art to the end of the thumb list
|
||||
if extraart:
|
||||
thumbnails.extend(extraart)
|
||||
if thumbnails:
|
||||
result['thumb'] = thumbnails
|
||||
data = self.user_prefs(details, result)
|
||||
return data
|
||||
|
||||
def user_prefs(self, details, result):
|
||||
# user preferences
|
||||
lang = 'description' + self.lang
|
||||
if self.review == 'theaudiodb' and 'theaudiodb' in details:
|
||||
if lang in details['theaudiodb']:
|
||||
result['description'] = details['theaudiodb'][lang]
|
||||
elif 'descriptionEN' in details['theaudiodb']:
|
||||
result['description'] = details['theaudiodb']['descriptionEN']
|
||||
elif (self.review in details) and ('description' in details[self.review]):
|
||||
result['description'] = details[self.review]['description']
|
||||
if (self.genre in details) and ('genre' in details[self.genre]):
|
||||
result['genre'] = details[self.genre]['genre']
|
||||
if (self.style in details) and ('styles' in details[self.style]):
|
||||
result['styles'] = details[self.style]['styles']
|
||||
if (self.mood in details) and ('moods' in details[self.mood]):
|
||||
result['moods'] = details[self.mood]['moods']
|
||||
if (self.theme in details) and ('themes' in details[self.theme]):
|
||||
result['themes'] = details[self.theme]['themes']
|
||||
if (self.rating in details) and ('rating' in details[self.rating]):
|
||||
result['rating'] = details[self.rating]['rating']
|
||||
result['votes'] = details[self.rating]['votes']
|
||||
return result
|
||||
|
||||
def return_search(self, data):
|
||||
items = []
|
||||
for item in data:
|
||||
listitem = xbmcgui.ListItem(item['album'], offscreen=True)
|
||||
listitem.setArt({'thumb': item['thumb']})
|
||||
listitem.setProperty('album.artist', item['artist_description'])
|
||||
listitem.setProperty('album.year', item.get('year',''))
|
||||
listitem.setProperty('album.type', item.get('type',''))
|
||||
listitem.setProperty('album.releasestatus', item.get('releasestatus',''))
|
||||
listitem.setProperty('album.label', item.get('label',''))
|
||||
listitem.setProperty('relevance', item['relevance'])
|
||||
url = {'artist':item['artist_description'], 'album':item['album']}
|
||||
if 'mbalbumid' in item:
|
||||
url['mbalbumid'] = item['mbalbumid']
|
||||
url['mbreleasegroupid'] = item['mbreleasegroupid']
|
||||
if 'dcalbumid' in item:
|
||||
url['dcalbumid'] = item['dcalbumid']
|
||||
items.append((json.dumps(url), listitem, True))
|
||||
if items:
|
||||
xbmcplugin.addDirectoryItems(handle=int(sys.argv[1]), items=items)
|
||||
|
||||
def return_nfourl(self, item):
|
||||
listitem = xbmcgui.ListItem(offscreen=True)
|
||||
xbmcplugin.addDirectoryItem(handle=int(sys.argv[1]), url=json.dumps(item), listitem=listitem, isFolder=True)
|
||||
|
||||
def return_resolved(self, item):
|
||||
listitem = xbmcgui.ListItem(path=json.dumps(item), offscreen=True)
|
||||
xbmcplugin.setResolvedUrl(handle=int(sys.argv[1]), succeeded=True, listitem=listitem)
|
||||
|
||||
def return_details(self, item):
|
||||
if not 'album' in item:
|
||||
return
|
||||
listitem = xbmcgui.ListItem(item['album'], offscreen=True)
|
||||
if 'mbalbumid' in item:
|
||||
listitem.setProperty('album.musicbrainzid', item['mbalbumid'])
|
||||
listitem.setProperty('album.releaseid', item['mbalbumid'])
|
||||
if 'mbreleasegroupid' in item:
|
||||
listitem.setProperty('album.releasegroupid', item['mbreleasegroupid'])
|
||||
if 'scrapedmbid' in item:
|
||||
listitem.setProperty('album.scrapedmbid', item['scrapedmbid'])
|
||||
if 'artist' in item:
|
||||
listitem.setProperty('album.artists', str(len(item['artist'])))
|
||||
for count, artist in enumerate(item['artist']):
|
||||
listitem.setProperty('album.artist%i.name' % (count + 1), artist['artist'])
|
||||
listitem.setProperty('album.artist%i.musicbrainzid' % (count + 1), artist.get('mbartistid', ''))
|
||||
listitem.setProperty('album.artist%i.sortname' % (count + 1), artist.get('artistsort', ''))
|
||||
if 'genre' in item:
|
||||
listitem.setProperty('album.genre', item['genre'])
|
||||
if 'styles' in item:
|
||||
listitem.setProperty('album.styles', item['styles'])
|
||||
if 'moods' in item:
|
||||
listitem.setProperty('album.moods', item['moods'])
|
||||
if 'themes' in item:
|
||||
listitem.setProperty('album.themes', item['themes'])
|
||||
if 'description' in item:
|
||||
listitem.setProperty('album.review', item['description'])
|
||||
if 'releasedate' in item:
|
||||
listitem.setProperty('album.releasedate', item['releasedate'])
|
||||
if 'originaldate' in item:
|
||||
listitem.setProperty('album.originaldate', item['originaldate'])
|
||||
if 'releasestatus' in item:
|
||||
listitem.setProperty('album.releasestatus', item['releasestatus'])
|
||||
if 'artist_description' in item:
|
||||
listitem.setProperty('album.artist_description', item['artist_description'])
|
||||
if 'label' in item:
|
||||
listitem.setProperty('album.label', item['label'])
|
||||
if 'type' in item:
|
||||
listitem.setProperty('album.type', item['type'])
|
||||
if 'compilation' in item:
|
||||
listitem.setProperty('album.compilation', item['compilation'])
|
||||
if 'year' in item:
|
||||
listitem.setProperty('album.year', item['year'])
|
||||
if 'rating' in item:
|
||||
listitem.setProperty('album.rating', item['rating'])
|
||||
if 'votes' in item:
|
||||
listitem.setProperty('album.votes', item['votes'])
|
||||
if 'thumb' in item:
|
||||
listitem.setProperty('album.thumbs', str(len(item['thumb'])))
|
||||
for count, thumb in enumerate(item['thumb']):
|
||||
listitem.setProperty('album.thumb%i.url' % (count + 1), thumb['image'])
|
||||
listitem.setProperty('album.thumb%i.aspect' % (count + 1), thumb['aspect'])
|
||||
listitem.setProperty('album.thumb%i.preview' % (count + 1), thumb['preview'])
|
||||
xbmcplugin.setResolvedUrl(handle=int(sys.argv[1]), succeeded=True, listitem=listitem)
|
||||
Reference in New Issue
Block a user