DevOps/Kodi/Lenovo/addons/metadata.themoviedb.org.python/python/lib/tmdbscraper/tmdb.py

from datetime import datetime, timedelta
from . import tmdbapi

class TMDBMovieScraper(object):
    def __init__(self, url_settings, language, certification_country, search_language=""):
        self.url_settings = url_settings
        self.language = language
        self.certification_country = certification_country
        if(search_language == ""):
            self.search_language = language
        else:
            self.search_language = search_language
        self._urls = None

    @property
    def urls(self):
        if not self._urls:
            self._urls = _load_base_urls(self.url_settings)
        return self._urls

    def search(self, title, year=None):

        def is_best(item):
            return item['title'].lower() == title and (
                not year or item.get('release_date', '').startswith(year))

        search_media_id = _parse_media_id(title)
        if search_media_id:
            if search_media_id['type'] == 'tmdb':
                result = _get_movie(search_media_id['id'], None, True)
                if 'error' in result:
                    return result
                result = [result]
            else:
                result = tmdbapi.find_movie_by_external_id(search_media_id['id'], language=self.search_language)
                if 'error' in result:
                    return result
                result = result.get('movie_results')
        else:
            response = tmdbapi.search_movie(query=title, year=year, language=self.search_language)
            if 'error' in response:
                return response
            result = response['results']
            # get second page if available and if first page doesn't contain an `is_best` result with popularity > 5
            if response['total_pages'] > 1:
                bests = [item for item in result if is_best(item) and item.get('popularity',0) > 5]
                if not bests:
                    response = tmdbapi.search_movie(query=title, year=year, language=self.language, page=2)
                    if not 'error' in response:
                        result += response['results']
        urls = self.urls

        if result:
            # move all `is_best` results at the beginning of the list, sort them by popularity (if found):
            bests_first = sorted([item for item in result if is_best(item)], key=lambda k: k.get('popularity',0), reverse=True)
            result = bests_first + [item for item in result if item not in bests_first]

        for item in result:
            if item.get('poster_path'):
                item['poster_path'] = urls['preview'] + item['poster_path']
            if item.get('backdrop_path'):
                item['backdrop_path'] = urls['preview'] + item['backdrop_path']
        return result

    def get_details(self, uniqueids):
        media_id = uniqueids.get('tmdb')
        if not media_id:
            imdb_id = uniqueids.get('imdb')
            if not imdb_id:
                return None

            find_results = tmdbapi.find_movie_by_external_id(imdb_id)
            if 'error' in find_results:
                return find_results
            if find_results.get('movie_results'):
                movie = find_results['movie_results'][0]
                media_id = movie['id']
            if not media_id:
                return None

        details = self._gather_details(media_id)
        if not details:
            return None
        if details.get('error'):
            return details
        return self._assemble_details(**details)

    def _gather_details(self, media_id):
        movie = _get_movie(media_id, self.language)
        if not movie or movie.get('error'):
            return movie

        # don't specify language to get English text for fallback
        movie_fallback = _get_movie(media_id)
        movie['images'] = movie_fallback['images']

        collection = _get_moviecollection(movie['belongs_to_collection'].get('id'), self.language) if \
            movie['belongs_to_collection'] else None
        collection_fallback = _get_moviecollection(movie['belongs_to_collection'].get('id')) if \
            movie['belongs_to_collection'] else None
        if collection and collection_fallback and 'images' in collection_fallback:
            collection['images'] = collection_fallback['images']

        return {'movie': movie, 'movie_fallback': movie_fallback, 'collection': collection,
            'collection_fallback': collection_fallback}

    def _assemble_details(self, movie, movie_fallback, collection, collection_fallback):
        info = {
            'title': movie['title'],
            'originaltitle': movie['original_title'],
            'plot': movie.get('overview') or movie_fallback.get('overview'),
            'tagline': movie.get('tagline') or movie_fallback.get('tagline'),
            'studio': _get_names(movie['production_companies']),
            'genre': _get_names(movie['genres']),
            'country': _get_names(movie['production_countries']),
            'credits': _get_cast_members(movie['casts'], 'crew', 'Writing', ['Screenplay', 'Writer', 'Author']),
            'director': _get_cast_members(movie['casts'], 'crew', 'Directing', ['Director']),
            'premiered': movie['release_date'],
            'tag': _get_names(movie['keywords']['keywords'])
        }

        if 'countries' in movie['releases']:
            certcountry = self.certification_country.upper()
            for country in movie['releases']['countries']:
                if country['iso_3166_1'] == certcountry and country['certification']:
                    info['mpaa'] = country['certification']
                    break

        trailer = _parse_trailer(movie.get('trailers', {}), movie_fallback.get('trailers', {}))
        if trailer:
            info['trailer'] = trailer
        if collection:
            info['set'] = collection.get('name') or collection_fallback.get('name')
            info['setoverview'] = collection.get('overview') or collection_fallback.get('overview')
        if movie.get('runtime'):
            info['duration'] = movie['runtime'] * 60

        ratings = {'themoviedb': {'rating': float(movie['vote_average']), 'votes': int(movie['vote_count'])}}
        uniqueids = {'tmdb': str(movie['id']), 'imdb': movie['imdb_id']}
        cast = [{
                'name': actor['name'],
                'role': actor['character'],
                'thumbnail': self.urls['original'] + actor['profile_path']
                    if actor['profile_path'] else "",
                'order': actor['order']
            }
            for actor in movie['casts'].get('cast', [])
        ]
        available_art = _parse_artwork(movie, collection, self.urls, self.language)

        _info = {'set_tmdbid': movie['belongs_to_collection'].get('id')
            if movie['belongs_to_collection'] else None}

        return {'info': info, 'ratings': ratings, 'uniqueids': uniqueids, 'cast': cast,
            'available_art': available_art, '_info': _info}

def _parse_media_id(title):
    if title.startswith('tt') and title[2:].isdigit():
        return {'type': 'imdb', 'id':title} # IMDB ID works alone because it is clear
    title = title.lower()
    if title.startswith('tmdb/') and title[5:].isdigit(): # TMDB ID
        return {'type': 'tmdb', 'id':title[5:]}
    elif title.startswith('imdb/tt') and title[7:].isdigit(): # IMDB ID with prefix to match
        return {'type': 'imdb', 'id':title[5:]}
    return None

def _get_movie(mid, language=None, search=False):
    details = None if search else \
        'trailers,images,releases,casts,keywords' if language is not None else \
        'trailers,images'
    return tmdbapi.get_movie(mid, language=language, append_to_response=details)

def _get_moviecollection(collection_id, language=None):
    if not collection_id:
        return None
    details = 'images'
    return tmdbapi.get_collection(collection_id, language=language, append_to_response=details)

def _parse_artwork(movie, collection, urlbases, language):
    if language:
        # Image languages don't have regional variants
        language = language.split('-')[0]
    posters = []
    landscape = []
    logos = []
    fanart = []

    if 'images' in movie:
        posters = _build_image_list_with_fallback(movie['images']['posters'], urlbases, language)
        landscape = _build_image_list_with_fallback(movie['images']['backdrops'], urlbases, language)
        logos = _build_image_list_with_fallback(movie['images']['logos'], urlbases, language)
        fanart = _build_fanart_list(movie['images']['backdrops'], urlbases)

    setposters = []
    setlandscape = []
    setfanart = []
    if collection and 'images' in collection:
        setposters = _build_image_list_with_fallback(collection['images']['posters'], urlbases, language)
        setlandscape = _build_image_list_with_fallback(collection['images']['backdrops'], urlbases, language)
        setfanart = _build_fanart_list(collection['images']['backdrops'], urlbases)

    return {'poster': posters, 'landscape': landscape, 'fanart': fanart,
        'set.poster': setposters, 'set.landscape': setlandscape, 'set.fanart': setfanart, 'clearlogo': logos}

def _build_image_list_with_fallback(imagelist, urlbases, language, language_fallback='en'):
    images = _build_image_list(imagelist, urlbases, [language])

    # Add backup images
    if language != language_fallback:
        images.extend(_build_image_list(imagelist, urlbases, [language_fallback]))

    # Add any images if nothing set so far
    if not images:
        images = _build_image_list(imagelist, urlbases)

    return images

def _build_fanart_list(imagelist, urlbases):
    return _build_image_list(imagelist, urlbases, ['xx', None])

def _build_image_list(imagelist, urlbases, languages=[]):
    result = []
    for img in imagelist:
        if languages and img['iso_639_1'] not in languages:
            continue
        if img['file_path'].endswith('.svg'):
            continue
        result.append({
            'url': urlbases['original'] + img['file_path'],
            'preview': urlbases['preview'] + img['file_path'],
            'lang': img['iso_639_1']
        })
    return result

def _get_date_numeric(datetime_):
    return (datetime_ - datetime(1970, 1, 1)).total_seconds()

def _load_base_urls(url_settings):
    urls = {}
    urls['original'] = url_settings.getSettingString('originalUrl')
    urls['preview'] = url_settings.getSettingString('previewUrl')
    last_updated = url_settings.getSettingString('lastUpdated')
    if not urls['original'] or not urls['preview'] or not last_updated or \
            float(last_updated) < _get_date_numeric(datetime.now() - timedelta(days=30)):
        conf = tmdbapi.get_configuration()
        if conf:
            urls['original'] = conf['images']['secure_base_url'] + 'original'
            urls['preview'] = conf['images']['secure_base_url'] + 'w780'
            url_settings.setSetting('originalUrl', urls['original'])
            url_settings.setSetting('previewUrl', urls['preview'])
            url_settings.setSetting('lastUpdated', str(_get_date_numeric(datetime.now())))
    return urls

def _parse_trailer(trailers, fallback):
    if trailers.get('youtube'):
        return 'plugin://plugin.video.youtube/play/?video_id='+trailers['youtube'][0]['source']
    if fallback.get('youtube'):
        return 'plugin://plugin.video.youtube/play/?video_id='+fallback['youtube'][0]['source']
    return None

def _get_names(items):
    return [item['name'] for item in items] if items else []

def _get_cast_members(casts, casttype, department, jobs):
    result = []
    if casttype in casts:
        for cast in casts[casttype]:
            if cast['department'] == department and cast['job'] in jobs and cast['name'] not in result:
                result.append(cast['name'])
    return result