#! /usr/bin/env python

from __future__ import print_function
import logging, os, sys
import argparse

## Base paths etc. for set and index file downloading
CVMFSBASE = "/cvmfs/sft.cern.ch/lcg/external/lhapdfsets/current/"
URLBASE = "http://lhapdfsets.web.cern.ch/lhapdfsets/current/"
INDEX_FILENAME = "pdfsets.index"


class SetInfo(object):
    """Stores PDF metadata: name, version, ID code."""

    def __init__(self, name, id_code, version):
        self.name = name
        self.id_code = id_code
        self.version = version

    def __eq__(self, other):
        if isinstance(other, SetInfo):
            return self.name == other.name
        else:
            return self.name == other

    def __ne__(self, other):
        return not self == other

    def __repr__(self):
        return self.name


def get_reference_list(filepath):
    """Reads reference file and returns list of SetInfo objects.

    The reference file is space-delimited, with columns:
    id_code version name
    """
    database = []
    try:
        import csv
        csv_file = open(filepath, "r")
        logging.debug("Reading %s" % filepath)
        reader = csv.reader(csv_file, delimiter=" ", skipinitialspace=True, strict=True)
        for row in reader:
            # <= 6.0.5
            if len(row) == 2:
                id_code, name, version = int(row[0]), str(row[1]), None
            # >= 6.1.0
            elif len(row) == 3:
                id_code, name, version = int(row[0]), str(row[1]), int(row[2])
            else:
                raise ValueError
            database.append(SetInfo(name, id_code, version))
    except IOError:
        logging.error("Could not open %s" % filepath)
    except (ValueError, csv.Error):
        logging.error("Corrupted file on line %d: %s" % (reader.line_num, filepath))
        csv_file.close()
        database = []
    else:
        csv_file.close()
    return database


def get_installed_list(_=None):
    """Returns a list of SetInfo objects representing installed PDF sets.
    """
    import lhapdf
    database = []
    setnames = lhapdf.availablePDFSets()
    for sn in setnames:
        pdfset = lhapdf.getPDFSet(sn)
        database.append(SetInfo(sn, pdfset.lhapdfID, pdfset.dataversion))
    return database


# TODO: Move this into the Python module to allow Python-scripted downloading?
def download_url(source, dest_dir, dryrun=False):
    """Download a file from a URL or POSIX path source to the destination directory."""

    if not os.path.isdir(os.path.abspath(dest_dir)):
        logging.info("Creating directory %s" % dest_dir)
        os.makedirs(dest_dir)
    dest_filepath = os.path.join(dest_dir, os.path.basename(source))

    # Decide whether to copy or download
    if source.startswith("/") or source.startswith("file://"):  # POSIX
        if source.startswith("file://"):
            source = source[len("file://"):]
        logging.debug("Downloading from %s" % source)
        logging.debug("Downloading to %s" % dest_filepath)
        try:
            file_size = os.stat(source).st_size
            if dryrun:
                logging.info("%s [%s]" % (os.path.basename(source), convertBytes(file_size)))
                return False
            import shutil
            shutil.copy(source, dest_filepath)
        except:
            logging.debug("Unable to download %s" % source)
            return False

    else:  # URL
        url = source
        try:
            import urllib.request as urllib
        except ImportError:
            import urllib2 as urllib
        try:
            u = urllib.urlopen(url)
            content_length = u.info().get("Content-Length", 0)
            if isinstance(content_length, list):
                file_size = int(content_length[0]) if content_length else 0
            else:
                file_size = int(content_length)
        except urllib.URLError:
            e = sys.exc_info()[1]
            logging.debug("Unable to download %s" % url)
            return False

        logging.debug("Downloading from %s" % url)
        logging.debug("Downloading to %s" % dest_filepath)
        if dryrun:
            if file_size:
                logging.info("%s [%s]" % (os.path.basename(url), convertBytes(file_size)))
            else:
                logging.info("%s" % os.path.basename(url))
            return False

        try:
            dest_file = open(dest_filepath, "wb")
        except IOError:
            logging.error("Could not write to %s" % dest_filepath)
            return False
        try:
            try:
                file_size_dl = 0
                buffer_size = 8192
                while True:
                    buffer = u.read(buffer_size)
                    if not buffer: break

                    file_size_dl += len(buffer)
                    dest_file.write(buffer)

                    status = chr(13) + "%s: " % os.path.basename(url)
                    status += r"%s" % convertBytes(file_size_dl).rjust(10)
                    if file_size:
                        status += r"[%3.1f%%]" % (file_size_dl * 100. / file_size)
                    sys.stdout.write(status + " ")
            except urllib.URLError:
                e = sys.exc_info()[1]
                logging.error("Error during download: ", e.reason)
                return False
            except KeyboardInterrupt:
                logging.error("Download halted by user")
                return False
        finally:
            dest_file.close()
            print("")

    return True


def extract_tarball(tar_filename, dest_dir, keep_tarball):
    """Extracts a tarball to the destination directory."""

    tarpath = os.path.join(dest_dir, tar_filename)
    try:
        import tarfile
        tar_file = tarfile.open(tarpath, "r:gz")
        tar_file.extractall(dest_dir)
        tar_file.close()
    except:
        logging.error("Unable to extract %s to %s" % (tar_filename, dest_dir))
    if not keep_tarball:
        try:
            os.remove(tarpath)
        except:
            logging.error("Unable to remove %s after expansion" % tar_filename)


def convertBytes(size, nDecimalPoints=1):
    units = ("B", "KB", "MB", "GB")
    import math
    i = int(math.floor(math.log(size, 1024)))
    p = math.pow(1024, i)
    s = round(size / p, nDecimalPoints)
    if s > 0:
        return "%s %s" % (s, units[i])
    else:
        return "0 B"


def download_file(sources, filename, dest_dir, dryrun=False):
    sources_tried = []
    for source in sources:
        url = source + filename
        if download_url(url, dest_dir, dryrun):
            return True
        sources_tried.append(url)
    logging.error("Unable to download from any of %s" % sources_tried)
    return False


def globfilt(pdfs, patterns):
    """Unix-style pattern matching of arguments"""
    rtn = []
    if not patterns:
        return pdfs
    from fnmatch import fnmatch
    for pdf in pdfs:
        for pattern in patterns:
            if fnmatch(pdf, pattern):
                rtn.append(pdf)
    return rtn


class CommandHandler(object):
    """\
A program for managing LHAPDF parton distribution function data files.

The main sub-commands that can be used are:
  - list|ls:     list available PDF sets, optionally filtered and/or categorised by status
  - show:        show metadata details of specified PDF sets
  - update:      download and install a new PDF set index file
  - install|get: download and install new PDF set data files
  - upgrade:     download and install newer replacement PDF set data files where available
"""

    def __init__(self):

        ## Load settings from Python module
        try:
            import lhapdf
            DATADIR = lhapdf.paths()[0]
            VERSION = lhapdf.__version__
        except ImportError:
            DATADIR = None
            VERSION = None

        ## Parse the command line
        ap = argparse.ArgumentParser(description=self.__doc__, formatter_class=argparse.RawTextHelpFormatter)
        ap.add_argument("COMMAND", metavar="COMMAND [suboptions]", help="Subcommand to run")
        ap.add_argument("--listdir", default=DATADIR, dest="LISTDIR",
                        help="Directory containing the lhapdf.index list file [default: %(default)s]")
        ap.add_argument("--pdfdir", default=DATADIR, dest="PDFDIR",
                        help="Directory for installation of PDF set data [default: %(default)s]")
        ap.add_argument("--source", default=[CVMFSBASE, URLBASE], action="append", dest="SOURCES",
                        help="Prepend a path or URL to be used as a source of data files [default: %(default)s]")
        ap.add_argument("-q", "--quiet", help="Suppress normal messages", dest="VERBOSITY", action="store_const",
                        const=logging.ERROR, default=logging.INFO)
        ap.add_argument("-v", "--verbose", help="Output debug messages", dest="VERBOSITY", action="store_const",
                        const=logging.DEBUG, default=logging.INFO)
        if VERSION:
            ap.add_argument("--version", action="version", version=VERSION)
        self.mainargs, otherargs = ap.parse_known_args()

        ## Apply verbosity settings
        logging.basicConfig(format="%(message)s", level=self.mainargs.VERBOSITY)

        ## Re-order the sources list since argparse doesn't have a "prepend" action!
        self.mainargs.SOURCES = self.mainargs.SOURCES[3:] + self.mainargs.SOURCES[:3]

        ## Check for a command
        if not hasattr(self, self.mainargs.COMMAND):
            print("Unrecognized command")
            ap.print_help()
            exit(2)

        ## Use dispatch pattern to invoke method with same name:
        getattr(self, self.mainargs.COMMAND)(otherargs)


    def _scanpdfs(self):
        self.master_list, self.installed = {}, {}

        ## Return empty lists if relevant search directories are not known
        if self.mainargs.LISTDIR is None or self.mainargs.PDFDIR is None:
            return

        ## List and install commands require us to build lists of reference and installed PDFs
        indexpath = os.path.join(self.mainargs.LISTDIR, INDEX_FILENAME)
        logging.debug("Index file = " + indexpath)
        for pdf in get_reference_list(indexpath):
            self.master_list[pdf.name] = pdf
        for pdf in get_installed_list(self.mainargs.PDFDIR):
            self.installed[pdf.name] = pdf

        ## Check installation status of all PDFs
        for pdf in self.master_list.keys():
            self.master_list[pdf].installed = pdf in self.installed
            if pdf not in self.installed or self.installed[pdf].version is None or self.master_list[
                pdf].version is None:
                self.master_list[pdf].outdated = False
            else:
                self.master_list[pdf].outdated = self.installed[pdf].version < self.master_list[pdf].version


    def list(self, otherargs):
        """List all standard PDF sets, or search using a Unix-style pattern.
        (by default lists all sets available for download; use --installed or --outdated to explore those installed on the current system)"""
        ap = argparse.ArgumentParser(description=__doc__, usage="%(prog)s list [options] [pattern...]")
        ap.add_argument("PATTERNS", nargs="*", help="patterns to match PDF sets against")
        ag = ap.add_mutually_exclusive_group()
        ag.add_argument("--installed", dest="INSTALLED", action="store_true", help="list installed PDF sets")
        ag.add_argument("--outdated", dest="OUTDATED", action="store_true",
                        help="list installed, but outdated, PDF sets")
        ap.add_argument("--codes", dest="CODES", action="store_true", help="additionally show ID codes")
        subargs = ap.parse_args(otherargs)
        # if subargs.INSTALLED and subargs.OUTDATED:
        #     ap.error("Options '--installed' and '--outdated' are mutually exclusive")

        ## Scan the current PDF collection
        self._scanpdfs()

        ## Filter PDFs on optional patterns and status
        # pdfs = globfilt(self.master_list.keys(), subargs.PATTERNS)
        pdfs = []
        for pdf in self.master_list.keys():
            if globfilt([pdf, str(self.master_list[pdf].id_code)], subargs.PATTERNS):
                pdfs.append(pdf)
        if subargs.INSTALLED:
            pdfs = [pdf for pdf in pdfs if self.master_list[pdf].installed]
        if subargs.OUTDATED:
            pdfs = [pdf for pdf in pdfs if self.master_list[pdf].outdated]

        ## Display
        # TODO: (optional) ordering by LHAPDF ID code
        for pdf in sorted(pdfs):
            if subargs.CODES:
                print("%d  %s" % (self.master_list[pdf].id_code, pdf))
            else:
                print(pdf)
        sys.exit(0)


    def ls(self, otherargs):
        self.list(otherargs)


    def show(self, otherargs):
        """Show details for installed PDF sets matching Unix-style patterns."""
        ap = argparse.ArgumentParser(description=__doc__, usage="%(prog)s show [options] pattern...")
        ap.add_argument("PATTERNS", nargs="+", help="patterns to match PDF sets against")
        subargs = ap.parse_args(otherargs)

        ## Scan the current PDF collection
        self._scanpdfs()

        ## Filter PDFs on optional patterns and status
        # pdfs = globfilt(self.installed.keys(), subargs.PATTERNS)
        pdfs = []
        for pdf in self.master_list.keys():
            if globfilt([pdf, str(self.master_list[pdf].id_code)], subargs.PATTERNS):
                pdfs.append(pdf)

        ## Display
        strs = []
        for pdf in sorted(pdfs):
            import lhapdf
            ps = lhapdf.getPDFSet(pdf)
            out = ""
            out += ps.name + "\n" + "=" * len(pdf) + "\n"
            out += "LHAPDF ID: {:d}\n".format(ps.lhapdfID)
            out += "Version: {:d}\n".format(ps.dataversion)
            out += ps.description + "\n"
            out += "Number of members: {:d}\n".format(ps.size)
            out += "Error type: {:s}\n".format(ps.errorType)
            strs.append(out)

        print("\n\n".join(strs))
        sys.exit(0)


    def update(self, otherargs):
        """Update the list of available PDF sets."""
        ap = argparse.ArgumentParser(description=__doc__, usage="%(prog)s update")
        updateargs = ap.parse_args(otherargs)
        if self.mainargs.LISTDIR is not None:
            download_file(self.mainargs.SOURCES, INDEX_FILENAME, self.mainargs.LISTDIR)
        else:
            print("PDF index file location not known: can't update")
        sys.exit(0)


    def install(self, otherargs):
        """Download and unpack a list of PDFs, or those matching a Unix-style pattern."""
        ap = argparse.ArgumentParser(description=__doc__, usage="%(prog)s install [options] pattern...")
        ap.add_argument("PATTERNS", nargs="*", help="patterns to match PDF sets against")
        ap.add_argument("--dryrun", dest="DRYRUN", action="store_true", help="Do not download sets")
        ap.add_argument("--upgrade", dest="UPGRADE", action="store_true", help="Force reinstall (used to upgrade)")
        ap.add_argument("--keep", dest="KEEP_TARBALLS", action="store_true", help="Keep the downloaded tarballs")
        subargs = ap.parse_args(otherargs)

        if self.mainargs.PDFDIR is None:
            print("PDF data file location not known: can't upgrade or install")

        ## Scan the current PDF collection
        self._scanpdfs()

        ## Filter PDFs on optional patterns
        pdfs = globfilt(self.master_list.keys(), subargs.PATTERNS)

        if not pdfs:
            logging.warning("No PDFs known matching patterns: %s" % ", ".join(subargs.PATTERNS))

        for pdf in sorted(pdfs):
            if pdf in self.installed and not subargs.UPGRADE:
                logging.warning("PDF already installed: %s (use --upgrade to force install)" % pdf)
                continue

            if self.master_list[pdf].version == -1:
                logging.warn("PDF %s is unvalidated. You need to download this manually" % pdf)

            tar_filename = pdf + ".tar.gz"
            if download_file(self.mainargs.SOURCES, tar_filename, self.mainargs.PDFDIR, dryrun=subargs.DRYRUN):
                extract_tarball(tar_filename, self.mainargs.PDFDIR, subargs.KEEP_TARBALLS)


    def get(self, otherargs):
        self.install(otherargs)


    def upgrade(self, otherargs):
        """Reinstall all PDF sets considered outdated by the local reference list"""
        ap = argparse.ArgumentParser(description=__doc__, usage="%(prog)s upgrade")
        ap.add_argument("PATTERNS", nargs="*", help="patterns to match PDF sets against")
        ap.add_argument("--dryrun", dest="DRYRUN", action="store_true", help="Do not download sets")
        ap.add_argument("--keep", dest="KEEP_TARBALLS", action="store_true", help="Keep the downloaded tarballs")
        subargs = ap.parse_args(otherargs)

        if self.mainargs.PDFDIR is None:
            print("PDF data file location not known: can't upgrade or install")

        ## Scan the current PDF collection
        self._scanpdfs()

        ## Get the PDFs in need of an update
        outdated_pdfs = [pdf for pdf in self.master_list.keys() if self.master_list[pdf].outdated]

        ## Filter PDFs on optional patterns
        upgrade_pdfs = globfilt(outdated_pdfs, subargs.PATTERNS)

        for pdf in upgrade_pdfs:
            tar_filename = pdf + ".tar.gz"
            if download_file(self.mainargs.SOURCES, tar_filename, self.mainargs.PDFDIR, dryrun=subargs.DRYRUN):
                extract_tarball(tar_filename, self.mainargs.PDFDIR, upgrade.KEEP_TARBALLS)


if __name__ == "__main__":
    CommandHandler()