#!/usr/local/bin/python2.7
# -*- coding: utf-8 -*-

"""
yle-dl - rtmpdump frontend for Yle Areena, Elävä Arkisto and YleX Areena

Copyright (C) 2010-2014 Antti Ajanki <antti.ajanki@iki.fi>

This script extracts RTMP stream information from Yle Areena
(http://areena.yle.fi), YleX Areena (http://ylex.yle.fi/ylex-areena),
Elävä Arkisto (http://yle.fi/elavaarkisto/index.html) web pages and
calls rtmpdump with correct parameters.
"""

import sys
import urllib
import urllib2
import re
import subprocess
import os
import os.path
import platform
import signal
import urlparse
import htmlentitydefs
import json
import string
import xml.dom.minidom
import time
import codecs
import base64
import ctypes
import ctypes.util
from operator import itemgetter
from Crypto.Cipher import AES

version = '2.2.1'

AREENA_NG_SWF = 'http://areena.yle.fi/static/player/1.2.8/flowplayer/flowplayer.commercial-3.2.7-encrypted.swf'
AREENA_NG_HTTP_HEADERS = {'User-Agent': 'yle-dl/' + version.split(' ')[0]}

ARKISTO_SWF = 'http://yle.fi/elavaarkisto/flowplayer/flowplayer.commercial-3.2.7.swf?0.7134730119723827'
RTMPDUMP_OPTIONS_ARKISTO = ['-s', ARKISTO_SWF, '-m', '60']

RTMPDUMP_OPTIONS_YLEX = ['-m', '60']

RTMP_SCHEMES = ['rtmp', 'rtmpe', 'rtmps', 'rtmpt', 'rtmpte', 'rtmpts']

# list of all options that require an argument
ARGOPTS = ('--rtmp', '-r', '--host', '-n', '--port', '-c', '--socks',
           '-S', '--swfUrl', '-s', '--tcUrl', '-t', '--pageUrl', '-p',
           '--app', '-a', '--swfhash', '-w', '--swfsize', '-x', '--swfVfy',
           '-W', '--swfAge', '-X', '--auth', '-u', '--conn', '-C',
           '--flashVer', '-f', '--subscribe', '-d', '--flv', '-o',
           '--timeout', '-m', '--start', '-A', '--stop', '-B', '--token',
           '-T', '--skip', '-k')

# rtmpdump exit codes
RD_SUCCESS = 0
RD_FAILED = 1
RD_INCOMPLETE = 2

debug = False
excludechars_linux = '*/|'
excludechars_windows = '\"*/:<>?|'
excludechars = excludechars_linux
rtmpdump_binary = None

libcname = ctypes.util.find_library('c')
libc = libcname and ctypes.CDLL(libcname)

def log(msg):
    enc = sys.stderr.encoding or 'UTF-8'
    sys.stderr.write(msg.encode(enc, 'backslashreplace'))
    sys.stderr.write('\n')
    sys.stderr.flush()

def splashscreen():
    log(u'yle-dl %s: Download media files from Yle Areena and Elävä Arkisto' % version)
    log(u'Copyright (C) 2009-2014 Antti Ajanki <antti.ajanki@iki.fi>, license: GPLv2')

def usage():
    """Print the usage message to stderr"""
    splashscreen()
    log(u'')
    log(u'%s [yle-dl or rtmpdump options] URL' % sys.argv[0])
    log(u'')
    log(u'yle-dl options:')
    log(u'')
    log(u'--latestepisode         Download the latest episode')
    log(u"--showurl               Print librtmp-compatible URL, don't download")
    log(u"--showtitle             Print stream title, don't download")
    log(u"--showepisodepage       Print web page for each episode")
    log(u'--vfat                  Create Windows-compatible filenames')
    log(u'--sublang lang          Download subtitles, lang = fin, swe, smi, none or all')
    log(u'--hardsubs              Download stream with hard subs if available')
    log(u'--maxbitrate br         Maximum bitrate stream to download, integer in kB/s')
    log(u'                        or "best" or "worst"')
    log(u'--rtmpdump path         Set path to rtmpdump binary')
    log(u'--destdir dir           Save files to dir')
    log(u'')
    log(u'rtmpdump options:')
    log(u'')
    subprocess.call([rtmpdump_binary, '--help'])

def download_page(url):
    """Returns contents of a HTML page at url."""
    if url.find('://') == -1:
        url = 'http://' + url
    if '#' in url:
        url = url[:url.find('#')]

    request = urllib2.Request(url, headers=AREENA_NG_HTTP_HEADERS)
    try:
        urlreader = urllib2.urlopen(request)
        content = urlreader.read()

        charset = urlreader.info().getparam('charset')
        if not charset:
            metacharset = re.search(r'<meta [^>]*?charset="(.*?)"', content)
            if metacharset:
                charset = metacharset.group(1)
        if not charset:
            charset = 'iso-8859-1'

        return unicode(content, charset, 'replace')
    except urllib2.URLError, exc:
        log(u"Can't read %s: %s" % (url, exc))
        return None
    except ValueError:
        log(u'Invalid URL: ' + url)
        return None

def encode_url_utf8(url):
    """Encode the path component of url to percent-encoded UTF8."""
    (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)

    path = path.encode('UTF8')

    # Assume that the path is already encoded if there seems to be
    # percent encoded entities.
    if re.search(r'%[0-9A-Fa-f]{2}', path) is None:
        path = urllib.quote(path, '/+')

    return urlparse.urlunparse((scheme, netloc, path, params, query, fragment))

def decode_html_entity(entity):
    if not entity:
        return u''

    try:
        x = htmlentitydefs.entitydefs[entity]
    except KeyError:
        x = entity

    if x.startswith('&#') and x[-1] == ';':
        x = x[1:-1]

    if x[0] == '#':
        try:
            return unichr(int(x[1:]))
        except (ValueError, OverflowError):
            return u'?'
    else:
        return unicode(x, 'iso-8859-1', 'ignore')

def replace_entitydefs(content):
    return re.sub(r'&(.*?);', lambda m: decode_html_entity(m.group(1)), content)

def int_or_else(x, default):
    try:
        return int(x)
    except ValueError:
        return default

def sane_filename(name):
    if isinstance(name, unicode):
        tr = dict((ord(c), ord(u'_')) for c in excludechars)
    else:
        tr = string.maketrans(excludechars, '_'*len(excludechars))
    x = name.strip(' .').translate(tr)
    if x:
        return x
    else:
        return 'ylevideo'

def execute_rtmpdump(args):
    """Start rtmpdump process with argument list args and wait until
    completion."""
    if debug:
        log('Executing:')
        log(' '.join(args))

    enc = sys.getfilesystemencoding()
    encoded_args = [x.encode(enc, 'replace') for x in args]

    try:
        if platform.system() == 'Windows':
            rtmpdump_process = subprocess.Popen(encoded_args)
        else:
            rtmpdump_process = subprocess.Popen(encoded_args,
                preexec_fn=sigterm_when_parent_dies)
        return rtmpdump_process.wait()
    except KeyboardInterrupt:
        try:
            os.kill(rtmpdump_process.pid, signal.SIGINT)
            rtmpdump_process.wait()
        except OSError:
            # The rtmpdump process died before we killed it.
            pass
        return RD_INCOMPLETE
    except OSError, exc:
        log(u'Execution failed: ' + unicode(exc, 'UTF-8', 'replace'))
        return RD_INCOMPLETE

def sigterm_when_parent_dies():
    PR_SET_PDEATHSIG = 1

    try:
        libc.prctl(PR_SET_PDEATHSIG, signal.SIGTERM)
    except AttributeError:
        # libc is None or libc does not contain prctl
        pass

def downloader_factory(url):
    if url.startswith('http://www.yle.fi/elavaarkisto/') or \
            url.startswith('http://yle.fi/elavaarkisto/') or \
            url.startswith('http://yle.fi/arkivet/') or \
            url.startswith('http://svenska.yle.fi/arkivet/'):
        return ElavaArkistoDownloader()
    elif url.startswith('http://ylex.yle.fi/'):
        return YleXDownloader()
    elif url.startswith('http://areena.yle.fi/tv/suora/') or \
            url.startswith('http://arenan.yle.fi/tv/direkt/'):
        return AreenaLiveDownloader()
    elif re.match(r'^http://(www\.)?yle\.fi/radio/[a-zA-Z0-9]+/suora/?$', url) or \
            url == 'http://ylex.yle.fi/radio-popup':
        return AreenaLiveRadioDownloader()
    elif url.startswith('http://areena.yle.fi/') or \
            url.startswith('http://arenan.yle.fi/') or \
            url.startswith('http://yle.fi/'):
        return AreenaNGDownloader()
    else:
        return None

def get_output_filename(args_in):
    prev = None
    args = list(args_in) # copy
    while args:
        opt = args.pop()
        if opt in ('-o', '--flv'):
            if prev:
                return prev
            else:
                return None
        prev = opt
    return None

def is_resume_job(args):
    return '--resume' in args or '-e' in args

def bitrate_from_arg(arg):
    if arg == 'best':
        return sys.maxint
    elif arg == 'worst':
        return 0
    else:
        try:
            return int(arg)
        except ValueError:
            log(u'Invalid bitrate %s, defaulting to best' % arg)
            arg = sys.maxint

def next_available_filename(proposed):
    i = 1
    enc = sys.getfilesystemencoding()
    filename = proposed
    basename, ext = os.path.splitext(filename)
    while os.path.exists(filename.encode(enc, 'replace')):
        log(u'%s exists, trying an alternative name' % filename)
        filename = basename + '-' + str(i) + ext
        i += 1

    return filename

def which(program):
    """Search for program on $PATH and return the full path if found."""
    # Adapted from
    # http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python

    def is_exe(fpath):
        return os.path.isfile(fpath) and os.access(fpath, os.X_OK)

    fpath, fname = os.path.split(program)
    if fpath:
        if is_exe(program):
            return program
    else:
        for path in os.environ["PATH"].split(os.pathsep):
            exe_file = os.path.join(path, program)
            if is_exe(exe_file):
                return exe_file

    return None

def parse_rtmp_single_component_app(rtmpurl):
    """Extract single path-component app and playpath from rtmpurl."""
    # YLE server requires that app is the first path component
    # only. By default librtmp would take the first two
    # components (app/appInstance).
    #
    # This also means that we can't rely on librtmp's playpath
    # parser and have to duplicate the logic here.
    k = 0
    if rtmpurl.find('://') != -1:
        i = -1
        for i, x in enumerate(rtmpurl):
            if x == '/':
                k += 1
                if k == 4:
                    break

        playpath = rtmpurl[(i+1):]
        app_only_rtmpurl = rtmpurl[:i]
    else:
        playpath = rtmpurl
        app_only_rtmpurl = ''

    ext = os.path.splitext(playpath)[1]
    if ext == '.mp4':
        playpath = 'mp4:' + playpath
        ext = '.flv'
    elif ext == '.mp3':
        playpath = 'mp3:' + playpath[:-4]

    return (app_only_rtmpurl, playpath, ext)

def log_output_file(outputfile, done=False):
    if outputfile and outputfile != '-':
        if done:
            log(u'Stream saved to ' + outputfile)
        else:
            log(u'Output file: ' + outputfile)


class StreamFilters:
    """Parameters for deciding which of potentially multiple available stream
    versions to download.
    """
    def __init__(self, latest_only, sublang, hardsubs, maxbitrate):
        self.latest_only = latest_only
        self.sublang = sublang
        self.hardsubs = hardsubs
        self.maxbitrate = maxbitrate

    def keep_lowest_bitrate(self):
        return self.maxbitrate <= 0


### Areena (new) ###

class AreenaNGDownloader:
    OP_DOWNLOAD = 1
    OP_PRINT_DOWNLOAD_URL = 2
    OP_PRINT_EPISODE_PAGE_URL = 3

    def download_episodes(self, url, filters, rtmpdumpargs, destdir):
        """Extract all episodes (or just the latest episode if
        latest_only is True) from url."""
        return self.process_episodes(url, filters, rtmpdumpargs, destdir,
                                     self.OP_DOWNLOAD)

    def print_urls(self, url, print_episode_url, filters):
        """Extract episodes from url and print their
        librtmp-compatible URLs on stdout."""
        optype = (self.OP_PRINT_EPISODE_PAGE_URL if (print_episode_url)
            else self.OP_PRINT_DOWNLOAD_URL)
        return self.process_episodes(url, filters, [], None, optype)

    def print_titles(self, url, filters):
        playlist = self.get_playlist(url, filters.latest_only)
        if not playlist:
            return RD_FAILED

        enc = sys.getfilesystemencoding()
        for clip in playlist:
            print self.get_clip_title(clip).encode(enc, 'replace')

        return RD_SUCCESS

    def process_episodes(self, url, filters, rtmpdumpargs, destdir, optype):
        playlist = self.get_playlist(url, filters.latest_only)
        if not playlist:
            return RD_FAILED

        overall_status = RD_SUCCESS
        for clip in playlist:

            # Areena's "all episodes" page does not include subtitle information, only 
            # the single episode pages do. So read the episode numbers from the "all 
            # episodes" page and then download the episodes individually.
            if (clip.has_key('id') and not clip['id'] in url and
                not clip.has_key('subtitles') and optype == self.OP_DOWNLOAD):
                url = "http://areena.yle.fi/tv/" + clip['id']
                print url
                
                res = self.download_episodes(url, filters, rtmpdumpargs, destdir)
            else:
                res = self.process_single_episode(clip, url, filters,
                                                  rtmpdumpargs, destdir, optype)
            if res != RD_SUCCESS:
                overall_status = res

        return overall_status

    def process_single_episode(self, clip, pageurl, filters, rtmpdumpargs,
                               destdir, optype):
        """Construct clip parameters and starts a rtmpdump process."""
        streamurl = AreenaStreamUrl(clip, pageurl, filters)
        if not streamurl.is_valid():
            log(u'Unsupported stream at %s: %s' %
                (pageurl, streamurl.get_error_message()))
            return RD_FAILED

        enc = sys.getfilesystemencoding()
        if optype == self.OP_PRINT_DOWNLOAD_URL:
            print streamurl.to_url().encode(enc, 'replace')
            return RD_SUCCESS
        elif optype == self.OP_PRINT_EPISODE_PAGE_URL:
            print streamurl.to_episode_url().encode(enc, 'replace')
            return RD_SUCCESS

        if not streamurl.to_rtmpdump_args():
            log(u'Downloading the stream at %s is not yet supported.' % pageurl)
            log(u'Try --showurl')
            return RD_FAILED

        fullargs = self.build_full_rtmpdumpargs(clip, streamurl, destdir, rtmpdumpargs)
        outputfile = get_output_filename(fullargs)
        self.download_subtitles(clip, filters, outputfile)
        return self.save_rtmp_stream_to_file(fullargs, outputfile)

    def save_rtmp_stream_to_file(self, rtmpdumpargs, outputfile):
        log_output_file(outputfile)
        retcode = execute_rtmpdump(rtmpdumpargs)
        if retcode != RD_SUCCESS:
            return retcode

        log_output_file(outputfile, True)

        return retcode

    def build_full_rtmpdumpargs(self, clip, streamurl, destdir, rtmpdumpargs):
        outputparam = []
        if '-o' not in rtmpdumpargs and '--flv' not in rtmpdumpargs:
            filename = self.get_clip_filename(clip)
            if destdir:
                filename = os.path.join(destdir, filename)

            if not is_resume_job(rtmpdumpargs):
                filename = next_available_filename(filename)
            outputparam = ['-o', filename]

        args = [rtmpdump_binary]
        args += streamurl.to_rtmpdump_args()
        args += outputparam
        args += rtmpdumpargs
        return args


    def get_clip_title(self, clip):
        if 'channel' in clip:
            # Live radio broadcast
            curtime = time.strftime('-%Y-%m-%d-%H:%M:%S')
            title = clip['channel'].get('name', 'yle-radio') + curtime

        elif 'title' in clip:
            # Video or radio stream
            title = clip['title']
            date = None
            broadcasted = clip.get('broadcasted', None)
            if broadcasted:
                date = broadcasted.get('date', None)
            if not date:
                date = clip.get('published', None)
            if date:
                title += '-' + date.replace('/', '-').replace(' ', '-')

        else:
            title = time.strftime('areena-%Y-%m-%d-%H:%M:%S')

        return title

    def get_clip_filename(self, clip):
        return sane_filename(self.get_clip_title(clip)) + '.flv'

    def get_playlist(self, url, latest_episode):
        (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
        episodeurl = urlparse.urlunparse((scheme, netloc, path + '.json', params, query, ''))
        fulldata = self.load_metadata(episodeurl)
        if fulldata is None:
            return None

        playlist = []
        if 'contentType' in fulldata or 'channel' in fulldata:
            playlist = [fulldata]
        elif 'search' in fulldata:
            playlist = fulldata['search'].get('results', [])
        elif 'availableEpisodes' in fulldata or \
                'availableClips' in fulldata:
            playlist = self.get_full_series_playlist(url)

        if latest_episode:
            playlist = sorted(playlist, key=self.get_media_time)[-1:]

        return playlist

    def get_full_series_playlist(self, url):
        playlist = []
        (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
        program_types = ['ohjelmat', 'muut']
        for ptype in program_types:
            size_query = 'from=0&to=1000&sisalto=%s' % ptype
            searchurl = urlparse.urlunparse((scheme, netloc, path + '.json',
                                             params, query + size_query, ''))
            fulldata = self.load_metadata(searchurl)
            if fulldata is not None:
                playlist.extend(fulldata.get('search', {}).get('results', []))
        return playlist

    def load_metadata(self, url):
        jsonstr = download_page(url)
        if not jsonstr:
            return None
        
        if debug:
            log(url)
            log(jsonstr)
        
        try:
            metadata = json.loads(jsonstr)
        except ValueError:
            log(u'Invalid JSON file at ' +  url)
            return None

        return metadata

    def download_subtitles(self, clip, filters, videofilename):
        media = clip.get('media', {})
        if media.has_key('subtitles') and not filters.hardsubs:
            preferred_lang = filters.sublang
            basename = os.path.splitext(videofilename)[0]
            for sub in media['subtitles']:
                lang = sub.get('lang', '')
                if lang == preferred_lang or preferred_lang == 'all':
                    url = sub.get('url', None)
                    if url:
                        try:
                            subtitlefile = basename + '.' + lang + '.srt'
                            enc = sys.getfilesystemencoding()
                            urllib.urlretrieve(url, subtitlefile.encode(enc, 'replace'))
                            self.add_BOM(subtitlefile)
                            log(u'Subtitles saved to ' + subtitlefile)
                            if preferred_lang != 'all':
                                return
                        except IOError, exc:
                            log(u'Failed to download subtitles at %s: %s' % (url, exc))

    def add_BOM(self, filename):
        """Add byte-order mark into a file.

        Assumes (but does not check!) that the file is UTF-8 encoded.
        """
        enc = sys.getfilesystemencoding()
        encoded_filename = filename.encode(enc, 'replace')
        content = open(encoded_filename, 'r').read()
        if content.startswith(codecs.BOM_UTF8):
            return

        f = open(encoded_filename, 'w')
        f.write(codecs.BOM_UTF8)
        f.write(content)
        f.close()

    def parse_yle_date(self, yledate):
        """Convert strings like 2012-06-16T18:45:00 into a struct_time.

        Returns None if parsing fails.
        """
        try:
            return time.strptime(yledate, '%Y-%m-%dT%H:%M:%S')
        except (ValueError, TypeError):
            return None

    def get_media_time(self, media):
        """Extract date (as struct_time) from media metadata."""
        broadcasted = media.get('broadcasted', {}) or {}
        return self.parse_yle_date(broadcasted.get('date', None)) or \
            self.parse_yle_date(media.get('published', None)) or \
            time.gmtime(0)


### Areena stream URL ###


class AreenaStreamUrlUtils:
    # Extracted from
    # http://areena.yle.fi/static/player/1.2.8/flowplayer/flowplayer.commercial-3.2.7-encrypted.swf
    AES_KEY = 'hjsadf89hk123ghk'

    def rtmp_parameters_from_papi(self, papiurl, pageurl, islive, filters):
        papi = download_page(papiurl)
        if not papi:
            log('Failed to download papi')
            return None

        if papi.startswith('<media>'):
            papi_decoded = papi
        else:
            papi_decoded = self.decode_papi(papi)

        if debug:
            log(papi_decoded)

        try:
            papi_xml = xml.dom.minidom.parseString(papi_decoded)
        except Exception as exc:
            log(unicode(exc.message, 'utf-8', 'ignore'))
            return None

        streams = self.streams_from_papi(papi_xml)
        if not streams:
            log('No streams found in papi')
            return None

        withsubs = self.filter_by_hard_subtitles(streams, filters)
        best = self.select_quality(withsubs, filters)
        if not best:
            log('No streams matching the bitrate limit')
            return None
        
        rtmp_connect = best.connect
        rtmp_stream = best.stream
        if debug:
            log('Selected stream')
            log(str(best))

        try:
            scheme, edgefcs, rtmppath = self.rtmpurlparse(rtmp_connect)
        except ValueError as exc:
            log(unicode(exc.message, 'utf-8', 'ignore'))
            return None

        ident = download_page('http://%s/fcs/ident' % edgefcs)
        if ident is None:
            log('Failed to read ident')
            return None

        if debug:
            log(ident)

        try:
            identxml = xml.dom.minidom.parseString(ident)
        except Exception as exc:
            log(unicode(exc.message, 'utf-8'))
            return None

        nodelist = identxml.getElementsByTagName('ip')
        if len(nodelist) < 1 or len(nodelist[0].childNodes) < 1:
            log('No <ip> node!')
            return None
        rtmp_ip = nodelist[0].firstChild.nodeValue

        app_without_fcsvhost = rtmppath.lstrip('/')
        app_fields = app_without_fcsvhost.split('?', 1)
        baseapp = app_fields[0]
        if len(app_fields) > 1:
            auth = app_fields[1]
        else:
            auth = ''
        app = '%s?_fcs_vhost=%s&%s' % (baseapp, edgefcs, auth)
        rtmpbase = '%s://%s/%s' % (scheme, edgefcs, baseapp)
        tcurl = '%s://%s/%s' % (scheme, rtmp_ip, app)

        rtmpparams = {'rtmp': rtmpbase,
                      'app': app,
                      'playpath': rtmp_stream,
                      'tcUrl': tcurl,
                      'pageUrl': pageurl,
                      'swfUrl': AREENA_NG_SWF}
        if islive:
            rtmpparams['live'] = '1'

        return rtmpparams

    def decode_papi(self, papi):
        try:
            bytestring = base64.b64decode(str(papi))
        except (UnicodeEncodeError, TypeError):
            return None

        iv = bytestring[:16]
        ciphertext = bytestring[16:]
        padlen = 16 - (len(ciphertext) % 16)
        ciphertext = ciphertext + '\0'*padlen

        decrypter = AES.new(self.AES_KEY, AES.MODE_CFB, iv, segment_size=16*8)
        return decrypter.decrypt(ciphertext)[:-padlen]

    def rtmpurlparse(self, url):
        if '://' not in url:
            raise ValueError("Invalid RTMP URL")

        scheme, rest = url.split('://', 1)
        if scheme not in RTMP_SCHEMES:
            raise ValueError("Invalid RTMP URL")

        if '/' not in rest:
            raise ValueError("Invalid RTMP URL")

        server, app_and_playpath = rest.split('/', 1)
        return (scheme, server, app_and_playpath)

    def rtmp_parameters_to_url(self, params):
        components = [params['rtmp']]
        for key, value in params.iteritems():
            if key != 'rtmp':
                components.append('%s=%s' % (key, value))
        return ' '.join(components)

    def rtmp_parameters_to_rtmpdump_args(self, params):
        args = []
        for key, value in params.iteritems():
            if key == 'live':
                args.append('--live')
            else:
                args.append('--%s=%s' % (key, value))
        return args

    def streams_from_papi(self, papi):
        streams = []
        assets = papi.getElementsByTagName('onlineAsset')
        for asset in assets:
            urls = asset.getElementsByTagName('url')
            if urls and len(urls) >= 1:
                connect = self.extract_child_content(urls[0], 'connect')
                stream = self.extract_child_content(urls[0], 'stream')
                if connect and stream:
                    videoBR = self.extract_child_content(asset, 'videoBitrate') or ''
                    audioBR = self.extract_child_content(asset, 'audioBitrate') or ''
                    subtitles = self.extract_child_content(asset, 'hardSubtitles') or ''
                    streams.append(PAPIStream(connect, stream, videoBR,
                                              audioBR, subtitles))
        return streams

    def extract_child_content(self, node, childName):
        child = node.getElementsByTagName(childName)
        if child.length > 0 and child[0].firstChild:
            return child[0].firstChild.nodeValue
        else:
            return None

    def filter_by_hard_subtitles(self, streams, filters):
        if filters.hardsubs and filters.sublang == 'all':
            filtered = streams
        elif filters.hardsubs and filters.sublang != 'none':
            filtered = [s for s in streams if s.hardSubtitles == filters.sublang]
        else:
            filtered = [s for s in streams if not s.hardSubtitles]
        return filtered or streams

    def select_quality(self, streams, filters):
        if filters.keep_lowest_bitrate():
            # lowest quality stream
            return sorted(streams, key=lambda x: x.bitrate())[0]
        else:
            # highest quality stream below maxbitrate
            below_limit = [x for x in streams if x.bitrate() < filters.maxbitrate]
            if below_limit:
                return sorted(below_limit, key=lambda x: x.bitrate())[-1]
            else:
                return None


class AreenaStreamUrl(AreenaStreamUrlUtils):
    def __init__(self, clip, pageurl, filters):
        self.rtmp_params = None
        self.direct_url = None
        self.error = None
        self.episodeurl = pageurl

        if clip:
            if 'channel' in clip:
                self._initialize_liveradio_parameters(clip, pageurl, filters)
            else:
                self._initialize_tv_stream(clip, pageurl, filters)
            self.episodeurl = self._create_pageurl(clip)

    def is_valid(self):
        return self.rtmp_params or self.direct_url

    def get_error_message(self):
        if self.is_valid():
            return None
        else:
            return self.error or 'No parameters'

    def to_url(self):
        if self.rtmp_params:
            return self.rtmp_parameters_to_url(self.rtmp_params)
        elif self.direct_url:
            return self.direct_url
        else:
            return ''

    def to_rtmpdump_args(self):
        if self.rtmp_params:
            return self.rtmp_parameters_to_rtmpdump_args(self.rtmp_params)
        else:
            return []

    def to_episode_url(self):
        return self.episodeurl

    def _initialize_liveradio_parameters(self, clip, pageurl, filters):
        channel = clip.get('channel', {})
        lang = channel.get('lang', 'fi')
        radioid = channel.get('id', None)
        if not radioid:
            self.error = 'id missing'
            return

        papiurl = 'http://papi.yle.fi/ng/radio/rtmp/%s/%s' % (radioid, lang)
        self.rtmp_params = self.rtmp_parameters_from_papi(papiurl, pageurl, True, filters)

    def _initialize_tv_stream(self, clip, pageurl, filters):
        # Search results don't have the media item so we have to
        # download clip metadata from the source.
        if not clip.has_key('media'):
            clip = self._get_metadata(clip)
            if not clip:
                return

        media = clip.get('media', {})
        if media.get('id'):
            self._parse_rtmp_url(media, pageurl, filters)
        elif media.get('mediaUrl'):
            self._parse_direct_media_url(media)
        elif media.get('downloadUrl'):
            self._parse_direct_download_url(media)
        else:
            self.error = 'No id, mediaUrl or downloadUrl'

    def _parse_rtmp_url(self, media, pageurl, filters):
        if media.get('live', False):
            islive = True
            papiurl = 'http://papi.yle.fi/ng/live/rtmp/' + media['id'] + '/fin'
        else:
            islive = False
            papiurl = 'http://papi.yle.fi/ng/mod/rtmp/' + media['id']

        self.rtmp_params = \
          self.rtmp_parameters_from_papi(papiurl, pageurl, islive, filters)

    def _parse_direct_media_url(self, media):
        self.direct_url = media.get('mediaUrl')

    def _parse_direct_download_url(self, media):
        self.direct_url = media.get('downloadUrl')

    def _get_metadata(self, clip):
        metadata_page = self._create_pageurl(clip)
        if not metadata_page:
            return None

        jsonurl = metadata_page + '.json'
        jsonstr = download_page(jsonurl)
        if not jsonstr:
            return None

        try:
            clipjson = json.loads(jsonstr)
        except ValueError:
            log(u'Invalid JSON file at ' +  jsonurl)
            return None

        return clipjson

    def _create_pageurl(self, media):
        if 'type' not in media or 'id' not in media:
            return ''

        if media['type'] == 'audio':
            urltype = 'radio'
        else:
            urltype = 'tv'

        return 'http://areena.yle.fi/%s/%s' % (urltype, media['id'])


class PAPIStream:
    def __init__(self, connect, stream, videoBitrate, audioBitrate, hardSubtitles):
        self.connect = connect
        self.stream = stream
        self.videoBitrate = int_or_else(videoBitrate, 0)
        self.audioBitrate = int_or_else(audioBitrate, 0)
        self.hardSubtitles = hardSubtitles

    def __str__(self):
        return json.dumps({
            'connect': self.connect,
            'stream': self.stream,
            'videoBitrate': self.videoBitrate,
            'audioBitrate': self.audioBitrate,
            'hardSubtitles': self.hardSubtitles})

    def bitrate(self):
        return self.videoBitrate + self.audioBitrate


### Areena live TV ###
#
# This is for the real live streams
# (http://areena.yle.fi/tv/suora/...). The old-style discrete live
# broadcasts (http://areena.yle.fi/tv/...) are still handled by
# AreenaNGDownloader.


class AreenaLiveDownloader:
    def download_episodes(self, url, filters, rtmpdumpargs, destdir):
        streamurl = AreenaLiveStreamUrl(url, filters)
        if not streamurl.is_valid():
            return RD_FAILED

        outputparam = []
        if '-o' not in rtmpdumpargs and '--flv' not in rtmpdumpargs:
            filename = self.get_live_stream_title(url) + '.flv'
            if destdir:
                filename = os.path.join(destdir, filename)

            if not is_resume_job(rtmpdumpargs):
                filename = next_available_filename(filename)
            outputparam = ['-o', filename]

        args = [rtmpdump_binary]
        args += streamurl.to_rtmpdump_args()
        args += outputparam
        args += rtmpdumpargs

        outputfile = get_output_filename(args)
        log_output_file(outputfile)

        retcode = execute_rtmpdump(args)
        if retcode != RD_SUCCESS:
            return retcode

        log_output_file(outputfile, True)

        return retcode

    def print_urls(self, url, print_episode_url, filters):
        """Extract episodes from url and print their
        librtmp-compatible URLs on stdout."""
        printableurl = (url if print_episode_url
                        else AreenaLiveStreamUrl(url, filters).to_url())
        enc = sys.getfilesystemencoding()
        print printableurl.encode(enc, 'replace')
        return RD_SUCCESS

    def print_titles(self, url, filters):
        enc = sys.getfilesystemencoding()
        print self.get_live_stream_title(url).encode(enc, 'replace')
        return RD_SUCCESS

    def get_live_stream_title(self, url):
        title = AreenaLiveStreamUrl.extract_live_channel_from_url(url) or 'yleTV'
        title += time.strftime('-%Y-%m-%d-%H:%M:%S')
        return title


### Areena live stream URL ###


class AreenaLiveStreamUrl(AreenaStreamUrlUtils):
    def __init__(self, pageurl, filters):
        self.rtmp_params = self._get_live_rtmp_parameters(pageurl, filters)

    def is_valid(self):
        return bool(self.rtmp_params)

    def to_url(self):
        return self.rtmp_parameters_to_url(self.rtmp_params)

    def to_rtmpdump_args(self):
        return self.rtmp_parameters_to_rtmpdump_args(self.rtmp_params)

    @staticmethod
    def extract_live_channel_from_url(url):
        m = re.search(r'http://(?:areena.yle.fi/tv/suora|arenan.yle.fi/tv/direkt)/(.+)', url)
        return m and m.group(1)

    def _get_live_rtmp_parameters(self, url, filters):
        channel = AreenaLiveStreamUrl.extract_live_channel_from_url(url)
        if channel is None:
            return None

        default_media_id = 'yle-' + channel
        fem_mapping = {'fem': 'yle-fem-fi',
                       'fem?kieli=sv': 'yle-fem-sv'}
        media_id = fem_mapping.get(channel, default_media_id)
        papiurl = 'http://papi.yle.fi/ng/live/rtmp/' + media_id + '/fin'
        return self.rtmp_parameters_from_papi(papiurl, url, True, filters)


### Areena live radio ###


class AreenaLiveRadioDownloader(AreenaStreamUrlUtils):
    def download_episodes(self, url, filters, rtmpdumpargs, destdir):
        streamurl = AreenaLiveRadioStreamUrl(url, filters)
        if not streamurl.is_valid():
            return RD_FAILED

        outputparam = []
        if '-o' not in rtmpdumpargs and '--flv' not in rtmpdumpargs:
            filename = self.get_live_stream_title(url) + '.flv'
            if destdir:
                filename = os.path.join(destdir, filename)

            if not is_resume_job(rtmpdumpargs):
                filename = next_available_filename(filename)
            outputparam = ['-o', filename]

        args = [rtmpdump_binary]
        args += streamurl.to_rtmpdump_args()
        args += outputparam
        args += rtmpdumpargs

        outputfile = get_output_filename(args)
        log_output_file(outputfile)

        retcode = execute_rtmpdump(args)
        if retcode != RD_SUCCESS:
            return retcode

        log_output_file(outputfile, True)

        return retcode

    def print_urls(self, url, print_episode_url, filters):
        printableurl = (url if print_episode_url
                        else AreenaLiveRadioStreamUrl(url, filters).to_url())
        enc = sys.getfilesystemencoding()
        print printableurl.encode(enc, 'replace')
        return RD_SUCCESS

    def print_titles(self, url, filters):
        enc = sys.getfilesystemencoding()
        print self.get_live_stream_title(url).encode(enc, 'replace')
        return RD_SUCCESS

    def get_live_stream_title(self, pageurl):
        m = re.match(r'http://(?:www\.)?yle\.fi/radio/([a-zA-Z0-9]+)/suora/?', pageurl)
        title = m.group(1) if m else 'yleradio'
        title += time.strftime('-%Y-%m-%d-%H:%M:%S')
        return title


class AreenaLiveRadioStreamUrl(AreenaStreamUrlUtils):
    def __init__(self, pageurl, filters):
        self.rtmp_params = self._get_radio_rtmpurl(pageurl, filters)

    def is_valid(self):
        return bool(self.rtmp_params)

    def to_url(self):
        return self.rtmp_parameters_to_url(self.rtmp_params)

    def to_rtmpdump_args(self):
        return self.rtmp_parameters_to_rtmpdump_args(self.rtmp_params)

    def _get_radio_rtmpurl(self, pageurl, filters):
        html = download_page(pageurl)
        if not html:
            return None

        radioid1 = re.search(r'"id": "/([0-9]+)"', html)
        radioid2 = re.search(r'id="live-channel".+data-id="([0-9]+)"', html)
        radioid = radioid1 or radioid2
        if not radioid:
            return None

        streamid = radioid.group(1)
        papiurl = 'http://papi.yle.fi/ng/radio/rtmp/%s/fi' % streamid
        return self.rtmp_parameters_from_papi(papiurl, pageurl, True, filters)


### Elava Arkisto ###


class ElavaArkistoDownloader(AreenaStreamUrlUtils):

    def extract_playlist(self, mediajson, pageurl, filters):
        pagedata = json.loads(mediajson)
        if not pagedata.has_key('media'):
            return []

        if debug:
            log('media in pagedata:')
            log(str(pagedata['media']))

        clips = []
        for mediaitem in pagedata['media']:
            title = mediaitem.get('title', 'elavaarkisto')
            filename = sane_filename(title)

            downloadURL = mediaitem.get('downloadURL', None)

            rtmpurl, playpath, ext = self.arkisto_rtmp(mediaitem, filters)

            if not rtmpurl:
                rtmpurl, playpath, ext = \
                  self.mediakanta_rtmp(mediaitem, pageurl, filters)

            if debug:
                log('clip rtmp url: ' + rtmpurl)

            if not rtmpurl:
                continue

            clips.append({'rtmp': rtmpurl,
                          'playpath': playpath,
                          'downloadURL': downloadURL,
                          'title': title,
                          'filename': filename + ext})

        return clips

    def arkisto_rtmp(self, mediaitem, filters):
        streams = self.extract_streams_and_bitrates(mediaitem)
        if not streams:
            return (None, None, None)

        if filters.keep_lowest_bitrate():
            selected = sorted(streams, key=itemgetter(0))[0]
        else:
            filtered = [s for s in streams if s[0] <= filters.maxbitrate]
            if not filtered:
                log('No streams matching the bitrate limit')
                return (None, None, None)
            selected = sorted(streams, key=itemgetter(0))[-1]

        if debug:
            log('Selected by bitrate filter: ' + str(selected))
            
        return parse_rtmp_single_component_app(selected[1])

    def extract_streams_and_bitrates(self, mediaitem):
        streams = []
        for clip in mediaitem.get('urls', {}).get('domestic', []):
            rate = float(clip.get('bitrate', 0))
            url = clip.get('url', '')
            streams.append((rate, url))
        return streams

    def mediakanta_rtmp(self, mediaitem, pageurl, filters):
        if mediaitem.get('mediakantaId', '') != '':
            papiurl = 'http://papi.yle.fi/ea/mod/rtmp/' + mediaitem['mediakantaId']
            rtmp_params = self.rtmp_parameters_from_papi(papiurl, pageurl, False, filters)
            playpath = rtmp_params['playpath']
            ext = os.path.splitext(playpath)[1] or '.mp3'
            return (rtmp_params['rtmp'], playpath, ext)
        else:
            return (None, None, None)

    def download_single_episode(self, rtmpurl, playpath, downloadURL,
                                filename, rtmpdumpargs, pageurl):
        if downloadURL:
            log('Downloading from HTTP server...')
            log_output_file(filename)

            enc = sys.getfilesystemencoding()
            try:
                urllib.urlretrieve(downloadURL, filename.encode(enc))
            except IOError, exc:
                log(u'Download failed: ' + str(exc))
                return RD_FAILED

            log_output_file(filename, True)
            return RD_SUCCESS
        else:
            args = [rtmpdump_binary]
            args += RTMPDUMP_OPTIONS_ARKISTO
            args += ['-r', rtmpurl,
                     '-y', playpath,
                     '-p', pageurl,
                     '-o', filename]
            args += rtmpdumpargs

            outputfile = get_output_filename(args)
            log_output_file(outputfile)

            retcode = execute_rtmpdump(args)
            if retcode != RD_SUCCESS:
                return retcode

            log_output_file(outputfile, True)

            return retcode

    def print_librtmp_url(self, rtmpurl, playpath, pageurl, downloadURL):
        """Print a librtmp-compatible Elava Arkisto URL to stdout."""
        if downloadURL:
            print downloadURL
        else:
            print '%s playpath=%s swfUrl=%s pageUrl=%s' % \
                (rtmpurl, playpath, ARKISTO_SWF, pageurl)
        return RD_SUCCESS

    def get_playlist(self, url, filters):
        (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)

        if '.' in path:
            path = path.rsplit('.', 1)[0]
        path = path + '.json'
        jsonurl = urlparse.urlunparse((scheme, netloc, path, '', '', ''))

        mediajson = download_page(jsonurl)
        if mediajson is None:
            return None

        # Yle server sends UTF-8 but doesn't set charset in
        # Content-type header. This will workaround the problem.
        mediajson = mediajson.encode('iso-8859-1').decode('utf-8')

        playlist = self.extract_playlist(mediajson, url, filters)
        if len(playlist) == 0:
            log(u"Can't find streams at %s." % url)
            return None

        if filters.latest_only:
            playlist = playlist[:1]

        return playlist

    def download_episodes(self, url, filters, rtmpdumpargs, destdir):
        """Download playlist from Elava Arkisto page at url and
        download all clips using rtmpdump."""
        playlist = self.get_playlist(url, filters)
        if playlist is None:
            return RD_FAILED

        overall_status = RD_SUCCESS
        for clip in playlist:
            filename = clip['filename']
            if destdir:
                filename = os.path.join(destdir, filename)

            if not is_resume_job(rtmpdumpargs):
                filename = next_available_filename(filename)

            status = self.download_single_episode(clip['rtmp'],
                                                  clip['playpath'],
                                                  clip['downloadURL'],
                                                  filename,
                                                  rtmpdumpargs, url)
            if status != RD_SUCCESS:
                overall_status = status

        return overall_status

    def print_urls(self, url, print_episode_url, filters):
        """Download playlist from Elava Arkisto page at url and print
        a librtmp-compatible URL for each clip."""
        playlist = self.get_playlist(url, filters)
        if playlist is None:
            return RD_FAILED

        if print_episode_url:
            print url
        else:
            for clip in playlist:
                self.print_librtmp_url(clip['rtmp'], clip['playpath'],
                                       url, clip['downloadURL'])

        return RD_SUCCESS

    def print_titles(self, url, filters):
        playlist = self.get_playlist(url, filters)
        if playlist is None:
            return RD_FAILED

        enc = sys.getfilesystemencoding()
        for clip in playlist:
            print clip['title'].encode(enc, 'replace')

        return RD_SUCCESS
        


### YleX Areena ###


class YleXDownloader(AreenaNGDownloader):
    def download_episodes(self, url, filters, argv, destdir):
        """Download a stream from the given YleX Areena url using
        rtmpdump."""
        html = download_page(url)
        if not html:
            return RD_FAILED

        streamurl = YleXStreamUrl(html, url, filters)
        if not streamurl.is_valid():
            return RD_FAILED

        outputoptions = []
        if not '-o' in argv and not '--flv' in argv:
            filename = sane_filename(self.stream_title(html)) + '.flv'
            if destdir:
                filename = os.path.join(destdir, filename)

            if not is_resume_job(argv):
                filename = next_available_filename(filename)

            outputoptions = ['-o', filename]

        args = [rtmpdump_binary]
        args += streamurl.to_rtmpdump_args()
        args += RTMPDUMP_OPTIONS_YLEX
        args += outputoptions
        args += argv

        outputfile = get_output_filename(args)
        log_output_file(outputfile)

        retcode = execute_rtmpdump(args)
        if retcode != RD_SUCCESS:
            return retcode

        log_output_file(outputfile, True)

        return retcode

    def print_urls(self, url, print_episode_url, filters):
        """Print a librtmp-compatible YleX Areena URL to stdout."""
        html = download_page(url)
        if not html:
            return RD_FAILED

        streamurl = YleXStreamUrl(html, url, filters)
        if not streamurl.is_valid():
            return RD_FAILED

        enc = sys.getfilesystemencoding()
        printableurl = None
        if print_episode_url:
            printableurl = streamurl.to_episode_url()
        else:
            printableurl = streamurl.to_url()
        print printableurl.encode(enc, 'replace')
        return RD_SUCCESS

    def print_titles(self, url, filters):
        enc = sys.getfilesystemencoding()
        print self.stream_title(download_page(url)).encode(enc, 'replace')
        return RD_SUCCESS

    def stream_title(self, html):
        if html:
            match = re.search(r'<meta +?property="og:title" +?content="(.+?)" *?/>', html)
            if match:
                return replace_entitydefs(match.group(1))

        return time.strftime('YleX-%Y-%m-%d-%H:%M:%S')


### YleX Areena stream URL ###


class YleXStreamUrl(AreenaStreamUrlUtils):
    def __init__(self, html, url, filters):
        self.rtmp_params = self._parse_rtmp_params(html, url, filters)
        self.episode_url = url

    def is_valid(self):
        return bool(self.rtmp_params)

    def to_url(self):
        return self.rtmp_parameters_to_url(self.rtmp_params)

    def to_episode_url(self):
        return self.episode_url

    def to_rtmpdump_args(self):
        return self.rtmp_parameters_to_rtmpdump_args(self.rtmp_params)

    def _parse_rtmp_params(self, html, pageurl, filters):
        match = re.search(r'<meta +?property="og:image" +?content="(.+?)" *?/>', html)
        if not match:
            return None

        match = re.search(r'/([a-fA-F0-9]+)_', match.group(1))
        if not match:
            return None

        papiurl = 'http://papi.yle.fi/ng/mod/rtmp/%s' % match.group(1)
        return self.rtmp_parameters_from_papi(papiurl, pageurl, False, filters)


### main program ###


def main():
    global debug
    global rtmpdump_binary
    latest_episode = False
    url_only = False
    title_only = False
    print_episode_url = False
    sublang = 'all'
    hardsubs = False
    bitratearg = sys.maxint
    show_usage = False
    url = None
    destdir = None

    #argv = list(sys.argv[1:])

    # Is sys.getfilesystemencoding() the correct encoding for
    # sys.argv?
    encoding = sys.getfilesystemencoding()
    argv = [unicode(x, encoding, 'ignore') for x in sys.argv[1:]]
    rtmpdumpargs = []
    while argv:
        arg = argv.pop(0)
        if not arg.startswith('-'):
            url = arg
        elif arg in ['--verbose', '-V', '--debug', '-z']:
            debug = True
            rtmpdumpargs.append(arg)
        elif arg in ['--help', '-h']:
            show_usage = True
        elif arg in ['--latestepisode']:
            latest_episode = True
        elif arg == '--showurl':
            url_only = True
        elif arg == '--showtitle':
            title_only = True
        elif arg == '--showepisodepage':
            url_only = True
            print_episode_url = True
        elif arg == '--vfat':
            global excludechars
            global excludechars_windows
            excludechars = excludechars_windows
        elif arg == '--sublang':
            if argv:
                sublang = argv.pop(0)
        elif arg == '--hardsubs':
            hardsubs = True
        elif arg == '--maxbitrate':
            if argv:
                bitratearg = argv.pop(0)
        elif arg == '--rtmpdump':
            if argv:
                rtmpdump_binary = argv.pop(0)
        elif arg == '--destdir':
            if argv:
                destdir = argv.pop(0)
        else:
            rtmpdumpargs.append(arg)
            if arg in ARGOPTS and argv:
                rtmpdumpargs.append(argv.pop(0))

    if not rtmpdump_binary:
        if sys.platform == 'win32':
            rtmpdump_binary = which('rtmpdump.exe')
        else:
            rtmpdump_binary = which('rtmpdump')

    if show_usage or url is None:
        usage()
        sys.exit(1)

    if debug or not (url_only or title_only):
        splashscreen()

    if not rtmpdump_binary:
        log(u'Error: rtmpdump not found on path, use --rtmpdump for setting the location')
        sys.exit(1)

    url = encode_url_utf8(url)
    dl = downloader_factory(url)
    if not dl:
        log(u'Unsupported URL %s.' % url)
        log(u'Is this really a Yle video page?')
        return RD_FAILED

    maxbitrate = bitrate_from_arg(bitratearg)
    sfilt = StreamFilters(latest_episode, sublang, hardsubs, maxbitrate)
    if url_only:
        sys.exit(dl.print_urls(url, print_episode_url, sfilt))
    elif title_only:
        sys.exit(dl.print_titles(url, sfilt))
    else:
        sys.exit(dl.download_episodes(url, sfilt, rtmpdumpargs, destdir))


if __name__ == '__main__':
    main()
