#! /usr/bin/python -tt
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# Copyright 2005 Duke University
# Copyright 2007 Red Hat
import os
import re
import time
import types
import urlparse
urlparse.uses_fragment.append("media")
import urllib

import Errors
from urlgrabber.grabber import URLGrabber
from urlgrabber.grabber import default_grabber
from urlgrabber.progress import format_number
import urlgrabber.mirror
from urlgrabber.grabber import URLGrabError
import repoMDObject
import packageSack
from repos import Repository
import parser
import sqlitecachec
import sqlitesack
from yum import config
from yum import misc
from yum import comps
from yum import _
from constants import *
import metalink

import logging
import logginglevels

import warnings

import glob
import shutil
import stat
import errno
import tempfile

# This is unused now, probably nothing uses it but it was global/public.
skip_old_DBMD_check = False

try:
    import xattr
    if not hasattr(xattr, 'get') or not hasattr(xattr, 'set'):
        xattr = None # This is a "newer" API.
except ImportError:
    xattr = None

#  The problem we are trying to solve here is that:
#
# 1. We rarely want to be downloading MD/pkgs/etc.
# 2. We want to check those files are valid (match checksums) when we do
#    download them.
# 3. We _really_ don't want to checksum all the files every time we
#    run (100s of MBs).
# 4. We can continue to download files from bad mirrors, or retry files due to
#    C-c etc.
#
# ...we used to solve this by just checking the file size, and assuming the
# files had been downloaded and checksumed as correct if that matched. But that
# was error prone on bad mirrors, so now we store the checksum in an
# xattr ... this does mean that if you can't store xattrs (Eg. NFS) you will
# rechecksum everything constantly.

def _xattr_get_chksum(filename, chktype):
    if not xattr:
        return None

    try:
        ret = xattr.get(filename, 'user.yum.checksum.' + chktype)
    except: # Documented to be "EnvironmentError", but make sure
        return None

    return ret

def _xattr_set_chksum(filename, chktype, chksum):
    if not xattr:
        return None

    try:
        xattr.set(filename, 'user.yum.checksum.' + chktype, chksum)
    except:
        return False # Data too long. = IOError ... ignore everything.

    return True


warnings.simplefilter("ignore", Errors.YumFutureDeprecationWarning)

logger = logging.getLogger("yum.Repos")
verbose_logger = logging.getLogger("yum.verbose.Repos")

class YumPackageSack(packageSack.PackageSack):
    """imports/handles package objects from an mdcache dict object"""
    def __init__(self, packageClass):
        packageSack.PackageSack.__init__(self)
        self.pc = packageClass
        self.added = {}

    def __del__(self):
        try:
            self.close()
        except Errors.RepoError, e:
            verbose_logger.debug("Exception %s %s in %s ignored" % (repr(e), str(e), self.__del__))

    def close(self):
        self.added = {}

    def addDict(self, repo, datatype, dataobj, callback=None):
        if repo in self.added:
            if datatype in self.added[repo]:
                return

        total = len(dataobj)
        if datatype == 'metadata':
            current = 0
            for pkgid in dataobj:
                current += 1
                if callback: callback.progressbar(current, total, repo)
                pkgdict = dataobj[pkgid]
                po = self.pc(repo, pkgdict)
                po.id = pkgid
                self._addToDictAsList(self.pkgsByID, pkgid, po)
                self.addPackage(po)

            if repo not in self.added:
                self.added[repo] = []
            self.added[repo].append('metadata')
            # indexes will need to be rebuilt
            self.indexesBuilt = 0

        elif datatype in ['filelists', 'otherdata']:
            if repo in self.added:
                if 'metadata' not in self.added[repo]:
                    raise Errors.RepoError, '%s md for %s imported before primary' \
                           % (datatype, repo.ui_id)
            current = 0
            for pkgid in dataobj:
                current += 1
                if callback: callback.progressbar(current, total, repo)
                pkgdict = dataobj[pkgid]
                if pkgid in self.pkgsByID:
                    for po in self.pkgsByID[pkgid]:
                        po.importFromDict(pkgdict)

            self.added[repo].append(datatype)
            # indexes will need to be rebuilt
            self.indexesBuilt = 0
        else:
            # umm, wtf?
            pass

    def _retrieve_async(self, repo, data):
        """ Just schedule the metadata downloads """

        for item in data:
            if item in self.added.get(repo, []):
                continue
            if item == 'metadata':
                mydbtype = 'primary_db'
            elif item == 'filelists':
                mydbtype = 'filelists_db'
            elif item == 'otherdata':
                mydbtype = 'other_db'
            else:
                continue

            if self._check_db_version(repo, mydbtype):
                if not self._check_uncompressed_db_gen(repo, mydbtype):
                    # NOTE: No failfunc.
                    repo._retrieveMD(mydbtype, async=True, failfunc=None)

    def populate(self, repo, mdtype='metadata', callback=None, cacheonly=0):
        if mdtype == 'all':
            data = ['metadata', 'filelists', 'otherdata']
        else:
            data = [ mdtype ]

        if not hasattr(repo, 'cacheHandler'):
            repo.cacheHandler = sqlitecachec.RepodataParserSqlite(
                storedir=repo.cachedir,
                repoid=repo.id,
                callback=callback,
                )
        for item in data:
            if repo in self.added:
                if item in self.added[repo]:
                    continue

            if item == 'metadata':
                mydbtype = 'primary_db'
                mymdtype = 'primary'
                repo_get_function = repo.getPrimaryXML
                repo_cache_function = repo.cacheHandler.getPrimary

            elif item == 'filelists':
                mydbtype = 'filelists_db'
                mymdtype = 'filelists'
                repo_get_function = repo.getFileListsXML
                repo_cache_function = repo.cacheHandler.getFilelists

            elif item == 'otherdata':
                mydbtype = 'other_db'
                mymdtype = 'other'
                repo_get_function = repo.getOtherXML
                repo_cache_function = repo.cacheHandler.getOtherdata

            else:
                continue

            if self._check_db_version(repo, mydbtype):
                #  Use gen decompression on DB files. Keeps exactly what we
                # downloaded in the download dir.

                # Backwards compat. ... try the old uncompressed version first.
                db_un_fn = self._check_uncompressed_db(repo, mydbtype)
                if not db_un_fn:
                    db_un_fn = self._check_uncompressed_db_gen(repo, mydbtype)

                if not db_un_fn:
                    db_fn = repo._retrieveMD(mydbtype)
                    if db_fn:
                        # unlink the decompressed file, we know it's not valid
                        misc.unlink_f(repo.cachedir +'/gen/%s.sqlite' % mydbtype)
                        db_un_fn = self._check_uncompressed_db_gen(repo,
                                                                   mydbtype)
                    if not db_un_fn: # Shouldn't happen?
                        raise Errors.RepoError, '%s: Check uncompressed DB failed' % repo

                dobj = repo.cacheHandler.open_database(db_un_fn)

            else:
                repo._xml2sqlite_local = True
                # Download...
                xml = repo_get_function()

                #  Use generated dir. and handle compression types metadata
                # parser doesn't understand.
                gen = mymdtype + '.xml'
                ret = misc.repo_gen_decompress(xml, gen, cached=repo.cache)
                if not ret:
                    raise Errors.RepoError, '%s: Decompress DB failed' % repo
                xml = ret
                # Convert XML => .sqlite
                xmldata = repo.repoXML.getData(mymdtype)
                (ctype, csum) = xmldata.checksum
                dobj = repo_cache_function(xml, csum)

            if not cacheonly:
                self.addDict(repo, item, dobj, callback)
            del dobj


        # get rid of all this stuff we don't need now
        del repo.cacheHandler

    def _check_uncompressed_db_gen(self, repo, mdtype):
        """return file name of db in gen/ dir if good, None if not"""

        mydbdata         = repo.repoXML.getData(mdtype)
        (r_base, remote) = mydbdata.location
        fname            = os.path.basename(remote)
        compressed_fn    = repo.cachedir + '/' + fname
        db_un_fn         = mdtype + '.sqlite'

        if not repo._checkMD(compressed_fn, mdtype, data=mydbdata,
                             check_can_fail=True):
            return None

        ret = misc.repo_gen_decompress(compressed_fn, db_un_fn,
                                       cached=repo.cache)
        if ret:
            return self._check_uncompressed_db_fn(repo, mdtype, ret)
        return None

    def _check_uncompressed_db(self, repo, mdtype):
        """return file name of uncompressed db is good, None if not"""
        mydbdata = repo.repoXML.getData(mdtype)
        (r_base, remote) = mydbdata.location
        fname = os.path.basename(remote)
        compressed_fn = repo.cachedir + '/' + fname
        db_un_fn = misc.decompress(compressed_fn, fn_only=True)

        return self._check_uncompressed_db_fn(repo, mdtype, db_un_fn)

    def _check_uncompressed_db_fn(self, repo, mdtype, db_un_fn):
        result = None

        if os.path.exists(db_un_fn):


            try:
                repo.checkMD(db_un_fn, mdtype, openchecksum=True)
            except URLGrabError:
                if not repo.cache:
                    misc.unlink_f(db_un_fn)
            else:
                result = db_un_fn

        return result

    def _check_db_version(self, repo, mdtype):
        return repo._check_db_version(mdtype)

class YumRepository(Repository, config.RepoConf):
    """
    This is an actual repository object

    Configuration attributes are pulled in from config.RepoConf.
    """

    def __init__(self, repoid):
        config.RepoConf.__init__(self)
        Repository.__init__(self, repoid)

        self.repofile = None
        self.mirrorurls = []
        self._urls = []
        self.enablegroups = 0
        self.groupsfilename = 'yumgroups.xml' # something some freaks might
                                              # eventually want
        self.repoMDFile = 'repodata/repomd.xml'
        self._repoXML = None
        self._oldRepoMDData = {}
        self.cache = 0
        self._retry_no_cache = False
        self.mirrorlistparsed = 0
        self.yumvar = {} # empty dict of yumvariables for $string replacement
        self._proxy_dict = {}
        self.metadata_cookie_fn = 'cachecookie'
        self._metadataCurrent = None
        self._metalink = None
        self.groups_added = False
        self.http_headers = {}
        self.repo_config_age = 0 # if we're a repo not from a file then the
                                 # config is very, very old
        # throw in some stubs for things that will be set by the config class
        self.basecachedir = ""
        self.base_persistdir = ""
        self.cost = 1000
        self.copy_local = 0
        # holder for stuff we've grabbed
        self.retrieved = { 'primary':0, 'filelists':0, 'other':0, 'group':0,
                           'updateinfo':0, 'prestodelta':0}
        self._preloaded_repomd = False

        # callbacks
        self.callback = None  # for the grabber
        self.multi_callback = None
        self.failure_obj = None
        self.mirror_failure_obj = None
        self.interrupt_callback = None
        self._callbacks_changed = False

        # callback function for handling media
        self.mediafunc = None

        # callbacks for gpg key importing and confirmation
        self.gpg_import_func = None
        self.gpgca_import_func = None
        self.confirm_func = None

        #  The reason we want to turn this off are things like repoids
        # called "tmp" in repoquery --repofrompath and/or new1/old1 in repodiff.
        self.timestamp_check = True

        self._sack = None

        self._grabfunc = None
        self._grab = None
        self._async = False

    def __cmp__(self, other):
        """ Sort yum repos. by cost, and then by alphanumeric on their id. """
        if other is None:
            return 1
        if hasattr(other, 'cost'):
            ocost = other.cost
        else:
            ocost = 1000
        ret = cmp(self.cost, ocost)
        if ret:
            return ret
        return cmp(self.id, other.id)

    def _getSack(self):
        # FIXME: Note that having the repo hold the sack, which holds "repos"
        # is not only confusing but creates a circular dep.
        #  Atm. we don't leak memory because RepoStorage.close() is called,
        # which calls repo.close() which calls sack.close() which removes the
        # repos from the sack ... thus. breaking the cycle.
        if self._sack is None:
            self._sack = sqlitesack.YumSqlitePackageSack(
                sqlitesack.YumAvailablePackageSqlite)
        return self._sack
    sack = property(_getSack)

    def _ui_id(self):
        """ Show self.id, but include any $releasever/$basearch/etc. data. """
        if hasattr(self, '__cached_ui_id'):
            return getattr(self, '__cached_ui_id')

        val = config._readRawRepoFile(self)
        if not val:
            val = ''
        else:
            ini, section_id = val
            ini = ini[section_id]
            if 'metalink' in ini:
                val = ini['metalink']
            elif 'mirrorlist' in ini:
                val = ini['mirrorlist']
            elif 'baseurl' in ini:
                val = ini['baseurl']
            else:
                val = ''
        ret = self.id

        for var in self.ui_repoid_vars:
            if '$'+var in val:
                ret += '/'
                ret += str(self.yumvar[var])

        setattr(self, '__cached_ui_id', ret)
        return ret
    ui_id = property(_ui_id)

    def close(self):
        if self._sack is not None:
            self.sack.close()
        Repository.close(self)

    def _resetSack(self):
        self._sack = None

    def __getProxyDict(self):
        self.doProxyDict()
        if self._proxy_dict:
            return self._proxy_dict
        return None

    # consistent access to how proxy information should look (and ensuring
    # that it's actually determined for the repo)
    proxy_dict = property(__getProxyDict)

    def getPackageSack(self):
        """Returns the instance of this repository's package sack."""
        return self.sack


    def ready(self):
        """Returns true if this repository is setup and ready for use."""
        if hasattr(self, 'metadata_cookie'):
            return self.repoXML is not None
        return False


    def getGroupLocation(self):
        """Returns the location of the group."""
        if 'group_gz' in self.repoXML.fileTypes():
            thisdata = self.repoXML.getData('group_gz')
        else:
            thisdata = self.repoXML.getData('group')
        return thisdata.location

    def __str__(self):
        # Note: You might expect this to be .ui_id, except people got used to
        # the fact that str(repo) == repo.id and used the former instead of
        # the later when they wanted just the .id. So we have to live with it
        # and use .ui_id explicitly.
        return self.id

    def _checksum(self, sumtype, file, CHUNK=2**16, checksum_can_fail=False,
                  datasize=None):
        """takes filename, hand back Checksum of it
           sumtype = md5 or sha
           filename = /path/to/file
           CHUNK=65536 by default"""
        try:
            return misc.checksum(sumtype, file, CHUNK, datasize)
        except (Errors.MiscError, EnvironmentError), e:
            if checksum_can_fail:
                return None
            msg = 'Error opening file for checksum: %s' % e
            if isinstance(e, Errors.FIPSNonCompliantError):
                msg = str(e)
            raise Errors.RepoError(msg)

    def dump(self):
        output = '[%s]\n' % self.id
        # we exclude all vars which start with _ or are in this list:
        excluded_vars = ('mediafunc', 'sack', 'metalink_data', 'grab', 
                         'grabfunc', 'repoXML', 'cfg', 'retrieved',
                        'mirrorlistparsed', 'gpg_import_func', 
                        'gpgca_import_func', 'failure_obj',
                        'callback', 'confirm_func', 'groups_added', 
                        'interrupt_callback', 'id', 'mirror_failure_obj',
                        'repo_config_age', 'groupsfilename', 'copy_local', 
                        'basecachedir', 'http_headers', 'metadata_cookie',
                        'metadata_cookie_fn', 'quick_enable_disable',
                        'repoMDFile', 'timestamp_check', 'urls', 'mirrorurls',
                        'yumvar', 'repofile', 'multi_callback')
        for attr in dir(self):
            if attr.startswith('_'):
                continue
            if attr in excluded_vars:
                continue
            if isinstance(getattr(self, attr), types.MethodType):
                continue
            res = getattr(self, attr)
            if not res and type(res) not in (type(False), type(0)):
                res = ''
            if type(res) == types.ListType:
                res = ',\n   '.join(res)
            output = output + '%s = %s\n' % (attr, res)

        return output

    def enablePersistent(self):
        """Persistently enables this repository."""
        self.enable()
        try:
            config.writeRawRepoFile(self,only=['enabled'])
        except IOError, e:
            if e.errno == errno.EACCES:
                logger.warning(e)
            else:
                raise IOError, str(e)

    def disablePersistent(self):
        """Persistently disables this repository."""
        self.disable()
        try:
            config.writeRawRepoFile(self,only=['enabled'])
        except IOError, e:
            if e.errno == errno.EACCES:
                logger.warning(e)
            else:
                raise IOError, str(e)

    def check(self):
        """self-check the repo information  - if we don't have enough to move
           on then raise a repo error"""
        if len(self._urls) < 1 and not self.mediaid:
            raise Errors.RepoError, \
             'Cannot find a valid baseurl for repo: %s' % self.ui_id

    def doProxyDict(self):
        if self._proxy_dict:
            return

        self._proxy_dict = {} # zap it
        proxy_string = None
        empty = (None, '_none_', '')
        if self.proxy in empty:  # got 'proxy=_none_'
            proxy_string = ''   # this disables default proxies
        elif self.proxy:
            proxy_string = '%s' % self.proxy
            if self.proxy_username not in empty:

                auth = urllib.quote(self.proxy_username)
                if self.proxy_password not in empty:
                    auth += ':' + urllib.quote(self.proxy_password)

                proto, rest = re.match('(\w+://)(.+)', proxy_string).groups()
                proxy_string = '%s%s@%s' % (proto, auth, rest)

        if proxy_string is not None:
            self._proxy_dict['http'] = proxy_string
            self._proxy_dict['https'] = proxy_string
            self._proxy_dict['ftp'] = proxy_string

    def __headersListFromDict(self, cache=True):
        """Convert our dict of headers to a list of 2-tuples for urlgrabber."""
        headers = []

        for key in self.http_headers:
            headers.append((key, self.http_headers[key]))
        if not (cache or 'Pragma' in self.http_headers):
            headers.append(('Pragma', 'no-cache'))

        return headers

    def setupGrab(self):
        warnings.warn('setupGrab() will go away in a future version of Yum.\n',
                Errors.YumFutureDeprecationWarning, stacklevel=2)
        self._setupGrab()

    def _setupGrab(self):
        """sets up the grabber functions with the already stocked in urls for
           the mirror groups"""

        if self.failovermethod == 'roundrobin':
            mgclass = urlgrabber.mirror.MGRandomOrder
        else:
            mgclass = urlgrabber.mirror.MirrorGroup

        ugopts = self._default_grabopts()
        self._grabfunc = URLGrabber(progress_obj=self.callback,
                                    multi_progress_obj=self.multi_callback,
                                    failure_callback=self.failure_obj,
                                    interrupt_callback=self.interrupt_callback,
                                    copy_local=self.copy_local,
                                    reget='simple',
                                    **ugopts)
        def add_mc(url):
            host = urlparse.urlsplit(url).netloc.split('@')[-1]
            mc = self.metalink_data._host2mc.get(host)
            if mc:
                url = {
                    'mirror': misc.to_utf8(url),
                    'kwargs': {
                        'max_connections': mc.max_connections,
                        'preference': mc.preference,
                        'private': mc.private,
                    },
                }
            return url
        urls = self.urls
        if self.metalink:
            urls = map(add_mc, urls)

        def mirror_failure(obj):
            action = {}

            # timeout, refused connect, and HTTP 503 may retry
            e = obj.exception
            if e.errno == 12 or \
               e.errno == 14 and getattr(e, 'code', 0) in (7, 503):
                tries = getattr(obj, 'tries', self.retries)
                if tries <= self.retries - len(self.urls):
                    # don't remove this mirror yet
                    action['remove'] = False
            elif e.errno == -3:
                # unsupported checksum type, fail now
                action['fail'] = True

            # No known user of this callback, but just in case...
            cb = self.mirror_failure_obj
            if cb:
                fun, arg, karg = callable(cb) and (cb, (), {}) or cb
                action.update(fun(obj, *arg, **karg))

            return action

        self._grab = mgclass(self._grabfunc, urls,
                             failure_callback=mirror_failure)

    def _default_grabopts(self, cache=True):
        opts = { 'keepalive': self.keepalive,
                 'bandwidth': self.bandwidth,
                 'retry': self.retries,
                 'throttle': self.throttle,
                 'timeout': self.timeout,
                 'minrate': self.minrate,
                 'ip_resolve': self.ip_resolve,
                 'http_headers': tuple(self.__headersListFromDict(cache=cache)),
                 'ssl_verify_peer': self.sslverify,
                 'ssl_verify_host': self.sslverify,
                 'ssl_ca_cert': self.sslcacert,
                 'ssl_cert': self.sslclientcert,
                 'ssl_key': self.sslclientkey,
                 'user_agent': default_grabber.opts.user_agent,
                 'username': self.username,
                 'password': self.password,
                 'ftp_disable_epsv': self.ftp_disable_epsv,
                 }
        if self.proxy == 'libproxy':
            opts['libproxy'] = True
        else:
            opts['proxies'] = self.proxy_dict
        return opts

    def _getgrabfunc(self):
        if not self._grabfunc or self._callbacks_changed:
            self._setupGrab()
            self._callbacks_changed = False
        return self._grabfunc

    def _getgrab(self):
        if not self._grab or self._callbacks_changed:
            self._setupGrab()
            self._callbacks_changed = False
        return self._grab

    grabfunc = property(lambda self: self._getgrabfunc())
    grab = property(lambda self: self._getgrab())

    def _dirSetupMkdir_p(self, dpath):
        """make the necessary directory path, if possible, raise on failure"""
        if os.path.exists(dpath) and os.path.isdir(dpath):
            return

        try:
            os.makedirs(dpath, mode=0755)
        except OSError, e:
            msg = "%s: %s %s: %s" % ("Error making cache directory",
                                     dpath, "error was", e)
            raise Errors.RepoError, msg

    def dirSetup(self):
        """make the necessary dirs, if possible, raise on failure"""

        cachedir = os.path.join(self.basecachedir, self.id)
        persistdir = os.path.join(self.base_persistdir, self.id)
        pkgdir = os.path.join(cachedir, 'packages')
        hdrdir = os.path.join(cachedir, 'headers')
        self.setAttribute('_dir_setup_cachedir', cachedir)
        self.setAttribute('_dir_setup_pkgdir', pkgdir)
        self.setAttribute('_dir_setup_hdrdir', hdrdir)
        self.setAttribute('_dir_setup_persistdir', persistdir)
        ext=''
        if os.geteuid() != 0:
            ext = '-ro'
        self.setAttribute('_dir_setup_gpgdir', persistdir + '/gpgdir' + ext)
        self.setAttribute('_dir_setup_gpgcadir', persistdir + '/gpgcadir' + ext)

        cookie = self.cachedir + '/' + self.metadata_cookie_fn
        self.setAttribute('_dir_setup_metadata_cookie', cookie)

        for dir in [self.cachedir, self.cachedir + '/gen', self.pkgdir]:
            self._dirSetupMkdir_p(dir)

        # persistdir is really root-only but try the make anyway and just
        # catch the exception
        for dir in [self.persistdir]:
            try:
                self._dirSetupMkdir_p(dir)
            except Errors.RepoError, e:
                pass
                
        # if we're using a cachedir that's not the system one, copy over these
        # basic items from the system one
        if self._preload_md_from_system_cache('repomd.xml'):
            self._preloaded_repomd = True
        self._preload_md_from_system_cache('cachecookie')
        self._preload_md_from_system_cache('mirrorlist.txt')
        self._preload_md_from_system_cache('metalink.xml')

    def _dirGetAttr(self, attr):
        """ Make the directory attributes call .dirSetup() if needed. """
        attr = '_dir_setup_' + attr
        if not hasattr(self, attr):
            self.dirSetup()
        return getattr(self, attr)
    def _dirSetAttr(self, attr, val):
        """ Make the directory attributes call .dirSetup() if needed. """
        attr = '_dir_setup_' + attr
        if not hasattr(self, attr):
            self.dirSetup()

        if attr == '_dir_setup_pkgdir':
            if not hasattr(self, '_old_pkgdirs'):
                self._old_pkgdirs = []
            self._old_pkgdirs.append(getattr(self, attr))

        ret = setattr(self, attr, val)
        if attr in ('_dir_setup_pkgdir', ):
            self._dirSetupMkdir_p(val)
        return ret
    cachedir = property(lambda self: self._dirGetAttr('cachedir'))
    persistdir = property(lambda self: self._dirGetAttr('persistdir'))

    pkgdir   = property(lambda self: self._dirGetAttr('pkgdir'),
                        lambda self, x: self._dirSetAttr('pkgdir', x))
    hdrdir   = property(lambda self: self._dirGetAttr('hdrdir'),
                        lambda self, x: self._dirSetAttr('hdrdir', x))
    gpgdir   = property(lambda self: self._dirGetAttr('gpgdir'),
                        lambda self, x: self._dirSetAttr('gpgdir', x))
    gpgcadir   = property(lambda self: self._dirGetAttr('gpgcadir'),  
                        lambda self, x: self._dirSetAttr('gpgcadir', x))
    metadata_cookie = property(lambda self: self._dirGetAttr('metadata_cookie'))

    def baseurlSetup(self):
        warnings.warn('baseurlSetup() will go away in a future version of Yum.\n',
                Errors.YumFutureDeprecationWarning, stacklevel=2)
        self._baseurlSetup()

    def _hack_mirrorlist_for_anaconda(self):
        #  Anaconda doesn't like having mirrorlist and metalink, so we allow
        # mirrorlist to act like metalink. Except we'd really like to know which
        # we have without parsing it ... and want to store it in the right
        # place etc.
        #  So here is #1 hack: see if the metalin kis unset and the mirrorlist
        # URL contains the string "metalink", if it does we copy it over.
        if self.metalink:
            return
        if not self.mirrorlist:
            return
        if self.mirrorlist.find("metalink") == -1:
            return
        self.metalink = self.mirrorlist

    def _baseurlSetup(self):
        """go through the baseurls and mirrorlists and populate self.urls
           with valid ones, run  self.check() at the end to make sure it worked"""

        self.baseurl = self._replace_and_check_url(self.baseurl)
        # FIXME: We put all the mirrors in .baseurl as well as
        # .urls for backward compat. (see bottom of func). So we'll save this
        # out for repolist -v ... or anything else wants to know the baseurl
        self._orig_baseurl = self.baseurl

        mirrorurls = []
        self._hack_mirrorlist_for_anaconda()
        if self.metalink and not self.mirrorlistparsed:
            # FIXME: This is kind of lying to API callers
            mirrorurls.extend(list(self.metalink_data.urls()))
            self.mirrorlistparsed = True
        if self.mirrorlist and not self.mirrorlistparsed:
            mirrorurls.extend(self._getMirrorList())
            self.mirrorlistparsed = True

        self.mirrorurls = self._replace_and_check_url(mirrorurls)
        self._urls = self.baseurl + self.mirrorurls
        # if our mirrorlist is just screwed then make sure we unlink a mirrorlist cache
        if len(self._urls) < 1:
            if hasattr(self, 'mirrorlist_file') and os.path.exists(self.mirrorlist_file):
                if not self.cache:
                    try:
                        misc.unlink_f(self.mirrorlist_file)
                    except (IOError, OSError), e:
                        logger.error('Could not delete bad mirrorlist file: %s - %s' % (self.mirrorlist_file, e))
                    else:
                        logger.warning('removing mirrorlist with no valid mirrors: %s' % self.mirrorlist_file)
        # store them all back in baseurl for compat purposes
        self.baseurl = self._urls
        self.check()

    def _replace_and_check_url(self, url_list):
        goodurls = []
        skipped = None
        for url in url_list:
            # obvious bogons get ignored b/c, we could get more interesting checks but <shrug>
            if url in ['', None]:
                continue
            url = parser.varReplace(url, self.yumvar)
            if url[-1] != '/':
                url= url + '/'
            try:
                # This started throwing ValueErrors, BZ 666826
                (s,b,p,q,f,o) = urlparse.urlparse(url)
            except (ValueError, IndexError, KeyError), e:
                s = 'blah'

            if s not in ['http', 'ftp', 'file', 'https']:
                skipped = url
                continue
            else:
                goodurls.append(url)

        if skipped is not None:
            # Caller cleans up for us.
            if goodurls:
                logger.warning('YumRepo Warning: Some mirror URLs are not using ftp, http[s] or file.\n Eg. %s' % misc.to_utf8(skipped))
            else: # And raises in this case
                logger.error('YumRepo Error: All mirror URLs are not using ftp, http[s] or file.\n Eg. %s' % misc.to_utf8(skipped))
        return goodurls

    def _geturls(self):
        if not self._urls:
            self._baseurlSetup()
        return self._urls

    urls = property(fget=lambda self: self._geturls(),
                    fset=lambda self, value: setattr(self, "_urls", value),
                    fdel=lambda self: setattr(self, "_urls", None))

    def _getMetalink(self):
        if not self._metalink:
            self.metalink_filename = self.cachedir + '/' + 'metalink.xml'
            local = self.metalink_filename + '.tmp'
            if not self._metalinkCurrent():
                url = misc.to_utf8(self.metalink)
                ugopts = self._default_grabopts(cache=self.http_caching=='all')
                try:
                    ug = URLGrabber(progress_obj = self.callback, **ugopts)
                    result = ug.urlgrab(url, local, text="%s/metalink" % self.ui_id)

                except URLGrabError, e:
                    if not os.path.exists(self.metalink_filename):
                        msg = ("Cannot retrieve metalink for repository: %s. "
                               "Please verify its path and try again" % self.ui_id )
                        raise Errors.RepoError, msg
                    #  Now, we have an old usable metalink, so we can't move to
                    # a newer repomd.xml ... or checksums won't match.
                    logger.error("Could not get metalink %s error was\n%s: %s" % (url, e.args[0], misc.to_unicode(e.args[1])))
                    self._metadataCurrent = True

            if not self._metadataCurrent:
                try:
                    self._metalink = metalink.MetaLinkRepoMD(result)
                    shutil.move(result, self.metalink_filename)
                except metalink.MetaLinkRepoErrorParseFail, e:
                    # Downloaded file failed to parse, revert (dito. above):
                    logger.error("Could not parse metalink %s error was \n%s" % (url, e))
                    self._metadataCurrent = True
                    misc.unlink_f(result)

            if self._metadataCurrent:
                self._metalink = metalink.MetaLinkRepoMD(self.metalink_filename)

        return self._metalink

    metalink_data = property(fget=lambda self: self._getMetalink(),
                             fset=lambda self, value: setattr(self, "_metalink",
                                                              value),
                             fdel=lambda self: setattr(self, "_metalink", None))

    def _all_urls_are_files(self, url):
        if url:
            return url.startswith("/") or url.startswith("file:")

        if not self.urls: # WTF ... but whatever.
            return False

        # Not an explicit url ... so make sure all mirrors/etc. are file://
        for url in self.urls:
            if not self._all_urls_are_files(url):
                return False
        return True

    def _getFile(self, url=None, relative=None, local=None, start=None, end=None,
            copy_local=None, checkfunc=None, text=None, reget='simple', 
            cache=True, size=None, **kwargs):
        """retrieve file from the mirrorgroup for the repo
           relative to local, optionally get range from
           start to end, also optionally retrieve from a specific baseurl"""

        # if local or relative is None: raise an exception b/c that shouldn't happen
        # if url is not None - then do a grab from the complete url - not through
        # the mirror, raise errors as need be
        # if url is None do a grab via the mirror group/grab for the repo
        # return the path to the local file

        # if copylocal isn't specified pickup the repo-defined attr
        if copy_local is None:
            copy_local = self.copy_local

        if local is None or relative is None:
            raise Errors.RepoError, \
                  "get request for Repo %s, gave no source or dest" % self.ui_id

        if self.cache == 1:
            if os.path.exists(local): # FIXME - we should figure out a way
                return local          # to run the checkfunc from here

            else: # ain't there - raise
                raise Errors.RepoError, \
                    "Caching enabled but no local cache of %s from %s" % (local,

                           self.ui_id)

        if url:
            (scheme, netloc, path, query, fragid) = urlparse.urlsplit(url)

        if self.mediaid and self.mediafunc:
            discnum = 1
            if url:
                if scheme == "media" and fragid:
                    discnum = int(fragid)
            try:
                # FIXME: we need to figure out what really matters to
                # pass to the media grabber function here
                result = self.mediafunc(local = local, checkfunc = checkfunc, relative = relative, text = text, copy_local = copy_local, url = url, mediaid = self.mediaid, name = self.name, discnum = discnum, range = (start, end))
                return result
            except Errors.MediaError, e:
                verbose_logger.log(logginglevels.DEBUG_2, "Error getting package from media; falling back to url %s" %(e,))

        if size and (copy_local or not self._all_urls_are_files(url)):
            dirstat = os.statvfs(os.path.dirname(local))
            avail = dirstat.f_bavail * dirstat.f_bsize
            if avail < long(size):
                raise Errors.RepoError, _('''\
Insufficient space in download directory %s
    * free   %s
    * needed %s'''
                ) % (os.path.dirname(local), format_number(avail), format_number(long(size)))

        if url and scheme != "media":
            ugopts = self._default_grabopts(cache=cache)
            ug = URLGrabber(progress_obj = self.callback,
                            copy_local = copy_local,
                            reget = reget,
                            failure_callback = self.failure_obj,
                            interrupt_callback=self.interrupt_callback,
                            checkfunc=checkfunc,
                            size=size,
                            retry_no_cache=self._retry_no_cache,
                            **ugopts)

            remote = url + '/' + relative

            try:
                result = ug.urlgrab(misc.to_utf8(remote), local,
                                    text=misc.to_utf8(text),
                                    range=(start, end),
                                    )
            except URLGrabError, e:
                self._del_dl_file(local, size)
                errstr = "failed to retrieve %s from %s\nerror was %s" % (relative, self, e)
                e = Errors.RepoError(errstr)
                e.repo = self
                raise e

        else:
            headers = tuple(self.__headersListFromDict(cache=cache))
            try:
                result = self.grab.urlgrab(misc.to_utf8(relative), local,
                                           text = misc.to_utf8(text),
                                           range = (start, end),
                                           copy_local=copy_local,
                                           reget = reget,
                                           checkfunc=checkfunc,
                                           http_headers=headers,
                                           size=size,
                                           retry_no_cache=self._retry_no_cache,
                                           **kwargs
                                           )
            except URLGrabError, e:
                self._del_dl_file(local, size)
                errstr = "failure: %s from %s: %s" % (relative, self, e)
                errors = getattr(e, 'errors', None)
                e = Errors.NoMoreMirrorsRepoError(errstr, errors)
                e.repo = self
                raise e

        return result
    __get = _getFile

    def getPackage(self, package, checkfunc=None, text=None, cache=True, **kwargs):
        remote = package.relativepath
        local = package.localPkg()
        basepath = package.basepath

        if self._preload_pkg_from_system_cache(package):
            if package.verifyLocalPkg():
                return local
            misc.unlink_f(local)

        if checkfunc is None:
            def checkfunc(obj):
                if not package.verifyLocalPkg():
                    misc.unlink_f(local)
                    raise URLGrabError(-1, _('Package does not match intended download.'))

        # We would normally pass this to _getFile() directly but that could
        # break backward compatibility with plugins that override _getFile()
        # (BZ 1360532).
        self._retry_no_cache = self.http_caching == 'lazy:packages'
        try:
            ret = self._getFile(url=basepath,
                            relative=remote,
                            local=local,
                            checkfunc=checkfunc,
                            text=text,
                            cache=cache,
                            size=package.size,
                            **kwargs
                            )
        finally:
            self._retry_no_cache = False

        if not kwargs.get('async') and not package.verifyLocalPkg():
            # Don't return as "success" when bad.
            msg = "Downloaded package %s, from %s, but it was invalid."
            msg = msg % (package, package.repo.id)
            raise Errors.RepoError, msg

        return ret

    def getHeader(self, package, checkfunc = None, reget = 'simple',
            cache = True):

        remote = package.relativepath
        local =  package.localHdr()
        start = package.hdrstart
        end = package.hdrend
        size = end-start
        basepath = package.basepath
        # yes, I know, don't ask
        if not os.path.exists(self.hdrdir):
            os.makedirs(self.hdrdir)

        return self._getFile(url=basepath, relative=remote, local=local, start=start,
                        reget=None, end=end, checkfunc=checkfunc, copy_local=1,
                        cache=cache, size=size,
                        )

    def metadataCurrent(self):
        """Check if there is a metadata_cookie and check its age. If the
        age of the cookie is less than metadata_expire time then return true
        else return False. This result is cached, so that metalink/repomd.xml
        are synchronized."""
        if self._metadataCurrent is not None:
            return self._metadataCurrent

        mC_def = self.withinCacheAge(self.metadata_cookie, self.metadata_expire)
        if not mC_def: # Normal path...
            return mC_def

        # Edge cases, both repomd.xml and metalink (if used). Must exist.
        repomdfn = self.cachedir + '/' + 'repomd.xml'
        if not os.path.exists(repomdfn):
            return False

        self._hack_mirrorlist_for_anaconda()
        mlfn = self.cachedir + '/' + 'metalink.xml'
        if self.metalink and not os.path.exists(mlfn):
            return False

        self._metadataCurrent = True
        return True

    #  The metalink _shouldn't_ be newer than the repomd.xml or the checksums
    # will be off, but we only really care when we are downloading the
    # repomd.xml ... so keep it in mind that they can be off on disk.
    #  Also see _getMetalink()
    def _metalinkCurrent(self):
        if self._metadataCurrent is not None:
            return self._metadataCurrent

        if self.cache and not os.path.exists(self.metalink_filename):
            raise Errors.RepoError, 'Cannot find metalink.xml file for %s' %self

        if self.cache:
            self._metadataCurrent = True
        elif not os.path.exists(self.metalink_filename):
            self._metadataCurrent = False
        elif self.withinCacheAge(self.metadata_cookie, self.metadata_expire):
            self._metadataCurrent = True
        else:
            self._metadataCurrent = False
        return self._metadataCurrent

    def _matchExpireFilter(self):
        """Return whether cache_req matches metadata_expire_filter."""
        # Never/write means we just skip this...
        if (hasattr(self, '_metadata_cache_req') and
                self._metadata_cache_req.startswith("read-only:") and
                self.metadata_expire_filter.startswith("read-only:")):

            cache_filt = self.metadata_expire_filter[len("read-only:"):]
            cache_req  = self._metadata_cache_req[len("read-only:"):]

            if cache_filt == 'future':
                assert cache_req in ('past', 'present', 'future')
                return True
            if cache_filt == 'present':
                if cache_req in ('past', 'present'):
                    return True
            if cache_filt == 'past':
                if cache_req == 'past':
                    return True
        return False

    def withinCacheAge(self, myfile, expiration_time, expire_req_filter=True):
        """check if any file is older than a certain amount of time. Used for
           the cachecookie and the mirrorlist
           return True if w/i the expiration time limit
           false if the time limit has expired

           Additionally compare the file to age of the newest .repo or yum.conf
           file. If any of them are newer then invalidate the cache
           """

        if expire_req_filter and self._matchExpireFilter():
            expiration_time = -1

        # -1 is special and should never get refreshed
        if expiration_time == -1 and os.path.exists(myfile):
            return True
        val = False
        if os.path.exists(myfile):
            cookie_info = os.stat(myfile)
            if cookie_info[8] + expiration_time > time.time():
                val = True
            # WE ARE FROM THE FUTURE!!!!
            elif cookie_info[8] > time.time():
                val = False

            if not self.check_config_file_age:
                return val

            # make sure none of our config files for this repo are newer than
            # us
            if cookie_info[8] < int(self.repo_config_age):
                val = False

        return val

    def setMetadataCookie(self):
        """if possible, set touch the metadata_cookie file"""

        check = self.metadata_cookie
        if not os.path.exists(self.metadata_cookie):
            check = self.cachedir

        if os.access(check, os.W_OK):
            fo = open(self.metadata_cookie, 'w+')
            fo.close()
            del fo

    def setup(self, cache, mediafunc = None, gpg_import_func=None, confirm_func=None, gpgca_import_func=None):
        try:
            self.cache = cache
            self.mediafunc = mediafunc
            self.gpg_import_func = gpg_import_func
            self.gpgca_import_func = gpgca_import_func
            self.confirm_func = confirm_func
        except Errors.RepoError, e:
            raise
        if not self.mediafunc and self.mediaid and not self.mirrorlist and not self.baseurl:
            verbose_logger.log(logginglevels.DEBUG_2, "Disabling media repo for non-media-aware frontend")
            self.enabled = False
            self.skip_if_unavailable = True

    def _cachingRepoXML(self, local):
        """ Should we cache the current repomd.xml """
        if self.cache and not os.path.exists(local):
            raise Errors.RepoError, 'Cannot find repomd.xml file for %s' % self.ui_id
        if self.cache or self.metadataCurrent():
            return True
        return False

    def _getFileRepoXML(self, local, text=None, grab_can_fail=None):
        """ Call _getFile() for the repomd.xml file. """
        checkfunc = (self._checkRepoXML, (), {})
        if grab_can_fail is None:
            grab_can_fail = 'old_repo_XML' in self._oldRepoMDData
        tfname = ''
        try:
            # This is named so that "yum clean metadata" picks it up
            tfname = tempfile.mktemp(prefix='repomd', suffix="tmp.xml",
                                     dir=os.path.dirname(local))
            result = self._getFile(relative=self.repoMDFile,
                                   local=tfname,
                                   copy_local=1,
                                   text=text,
                                   reget=None,
                                   checkfunc=checkfunc,
                                   cache=self.http_caching == 'all',
                                   size=102400) # setting max size as 100K

        except URLGrabError, e:
            misc.unlink_f(tfname)
            if grab_can_fail:
                return None
            raise Errors.RepoError, 'Error downloading file %s: %s' % (local, e)
        except Errors.RepoError:
            misc.unlink_f(tfname)
            if grab_can_fail:
                return None
            raise

        # This should always work...
        try:
            os.rename(result, local)
        except:
            # But in case it doesn't...
            misc.unlink_f(tfname)
            if grab_can_fail:
                return None
            raise Errors.RepoError, 'Error renaming file %s to %s' % (result,
                                                                      local)
        return local

    def _parseRepoXML(self, local, parse_can_fail=None):
        """ Parse the repomd.xml file. """
        try:
            return repoMDObject.RepoMD(self.id, local)
        except Errors.RepoMDError, e:
            if parse_can_fail is None:
                parse_can_fail = 'old_repo_XML' in self._oldRepoMDData
            if parse_can_fail:
                return None
            raise Errors.RepoError, 'Error importing repomd.xml from %s: %s' % (self.ui_id, e)

    def _saveOldRepoXML(self, local):
        """ If we have an older repomd.xml file available, save it out. """
        # Cleanup old trash...
        for fname in glob.glob(self.cachedir + "/*.old.tmp"):
            misc.unlink_f(fname)

        if os.path.exists(local):
            old_local = local + '.old.tmp' # locked, so this is ok
            shutil.copy2(local, old_local)
            xml = self._parseRepoXML(old_local, True)
            if xml is None:
                return None
            self._oldRepoMDData = {'old_repo_XML' : xml, 'local' : local,
                                   'old_local' : old_local, 'new_MD_files' : []}
            return xml
        return None

    def _revertOldRepoXML(self):
        """ If we have older data available, revert to it. """

        #  If we can't do a timestamp check, then we can be looking at a
        # completely different repo. from last time ... ergo. we can't revert.
        #  We still want the old data, so we don't download twice. So we
        # pretend everything is good until the revert.
        if not self.timestamp_check:
            raise Errors.RepoError, "Can't download or revert repomd.xml for %s" % self.ui_id

        if 'old_repo_XML' not in self._oldRepoMDData:
            self._oldRepoMDData = {}
            return

        # Unique names mean the rename doesn't work anymore.
        for fname in self._oldRepoMDData['new_MD_files']:
            misc.unlink_f(fname)

        old_data = self._oldRepoMDData
        self._oldRepoMDData = {}

        if 'old_local' in old_data:
            os.rename(old_data['old_local'], old_data['local'])

        self._repoXML = old_data['old_repo_XML']

        if 'old_MD_files' not in old_data:
            return
        for revert in old_data['old_MD_files']:
            os.rename(revert + '.old.tmp', revert)

    def _doneOldRepoXML(self):
        """ Done with old data, delete it. """
        old_data = self._oldRepoMDData
        self._oldRepoMDData = {}

        if 'old_local' in old_data:
            misc.unlink_f(old_data['old_local'])

        if 'old_MD_files' not in old_data:
            return
        for revert in old_data['old_MD_files']:
            misc.unlink_f(revert + '.old.tmp')

    def _get_mdtype_data(self, mdtype, repoXML=None):
        if repoXML is None:
            repoXML = self.repoXML

        if mdtype == 'group' and 'group_gz' in repoXML.fileTypes():
            mdtype = 'group_gz'
        if (mdtype in ['other', 'filelists', 'primary'] and
            self._check_db_version(mdtype + '_db', repoXML=repoXML)):
            mdtype += '_db'

        return (mdtype, repoXML.repoData.get(mdtype))

    def _get_mdtype_fname(self, data, compressed=False):
        (r_base, remote) = data.location
        local = self.cachedir + '/' + os.path.basename(remote)

        if compressed: # DB file, we need the uncompressed version
            local = misc.decompress(local, fn_only=True)
        return local

    def _groupCheckDataMDNewer(self):
        """ We check the timestamps, if any of the timestamps for the
            "new" data is older than what we have ... we revert. """

        if 'old_repo_XML' not in self._oldRepoMDData:
            return True
        old_repo_XML = self._oldRepoMDData['old_repo_XML']

        if (self.timestamp_check and
            old_repo_XML.timestamp > self.repoXML.timestamp):
            logger.warning("Not using downloaded %s/repomd.xml because it is "
                           "older than what we have:\n"
                           "  Current   : %s\n  Downloaded: %s" %
                           (self.id,
                            time.ctime(old_repo_XML.timestamp),
                            time.ctime(self.repoXML.timestamp)))
            return False
        return True

    @staticmethod
    def _checkRepoXMLMetalink(repoXML, repomd):
        """ Check parsed repomd.xml against metalink.repomd data. """
        if repoXML.timestamp != repomd.timestamp:
            return False
        if repoXML.length != repomd.size:
            return False

        done = False
        for checksum in repoXML.checksums:
            if checksum not in repomd.chksums:
                continue

            if repoXML.checksums[checksum] != repomd.chksums[checksum]:
                return False

            #  All checksums should be trusted, but if we have more than one
            # then we might as well check them all ... paranoia is good.
            done = True

        return done

    def _checkRepoMetalink(self, repoXML=None, metalink_data=None):
        """ Check the repomd.xml against the metalink data, if we have it. """

        if repoXML is None:
            repoXML = self._repoXML
        if metalink_data is None:
            metalink_data = self.metalink_data

        if self._checkRepoXMLMetalink(repoXML, metalink_data.repomd):
            return True

        # FIXME: We probably want to skip to the first mirror which has the
        # latest repomd.xml, but say "if we can't find one, use the newest old
        # repomd.xml" ... alas. that's not so easy to do in urlgrabber atm.
        for repomd in self.metalink_data.old_repomds:
            if self._checkRepoXMLMetalink(repoXML, repomd):
                verbose_logger.log(logginglevels.DEBUG_2,
                                   "Using older repomd.xml\n"
                                   "  Latest: %s\n"
                                   "  Using: %s" %
                                   (time.ctime(metalink_data.repomd.timestamp),
                                    time.ctime(repomd.timestamp)))
                return True
        return False

    def _latestRepoXML(self, local):
        """ Save the Old Repo XML, and if it exists check to see if it's the
            latest available given the metalink data. """

        oxml = self._saveOldRepoXML(local)
        if not oxml: # No old repomd.xml data
            return False

        self._hack_mirrorlist_for_anaconda()
        if not self.metalink: # Nothing to check it against
            return False

        # Get the latest metalink, and the latest repomd data from it
        repomd = self.metalink_data.repomd

        if self.timestamp_check and oxml.timestamp > repomd.timestamp:
            #  We have something "newer" than the latest, and have timestamp
            # checking which will kill anything passing the metalink check.
            return True

        # Do we have the latest repomd already
        return self._checkRepoXMLMetalink(oxml, repomd)

    def _commonLoadRepoXML(self, text, mdtypes=None):
        """ Common LoadRepoXML for instant and group, returns False if you
            should just return. """
        local  = self.cachedir + '/repomd.xml'
        if self._repoXML is not None:
            return False

        if self._cachingRepoXML(local):
            caching = True
            result = local
        else:
            caching = False
            if self._latestRepoXML(local):
                result = local
                old_data = self._oldRepoMDData
                self._repoXML = old_data['old_repo_XML']
            else:
                result = self._getFileRepoXML(local, text)
                if result is None:
                    if (self.skip_if_unavailable and hasattr(self, '_metadata_cache_req')
                        and self._metadata_cache_req in ('write', 'read-only:future')):
                        # Since skip_if_unavailable=True, we can just disable this repo
                        raise Errors.RepoError, "Can't download repomd.xml for %s" % self.ui_id

                    # Ignore this as we have a copy
                    self._revertOldRepoXML()
                    return False

            # if we have a 'fresh' repomd.xml then update the cookie
            self.setMetadataCookie()

        if self._repoXML is None:
            self._repoXML = self._parseRepoXML(result)
        if self._repoXML is None:
            self._revertOldRepoXML()
            return False

        if caching:
            return False # Skip any work.

        if not self._groupCheckDataMDNewer():
            self._revertOldRepoXML()
            return False
        return True

    def _check_db_version(self, mdtype, repoXML=None):
        if self.mddownloadpolicy == 'xml':
            return False

        if repoXML is None:
            repoXML = self.repoXML
        if mdtype in repoXML.repoData:
            if DBVERSION == repoXML.repoData[mdtype].dbversion:
                return True
        return False

    # mmdtype is unused, but in theory was == primary
    # dbmtype == primary_db etc.
    def _groupCheckDataMDValid(self, data, dbmdtype, mmdtype, file_check=False):
        """ Check that we already have this data, and that it's valid. Given
            the DB mdtype and the main mdtype (no _db suffix). """

        if data is None:
            return None

        if not file_check:
            compressed = False
            local = self._get_mdtype_fname(data)
        else:
            compressed = False
            local = self._get_mdtype_fname(data)
            if not os.path.exists(local):
                local = misc.decompress(local, fn_only=True)
                compressed = True
        #  If we can, make a copy of the system-wide-cache version of this file,
        # note that we often don't get here. So we also do this in
        # YumPackageSack.populate ... and we look for the uncompressed versions
        # in retrieveMD.
        self._preload_md_from_system_cache(os.path.basename(local))
        if not self._checkMD(local, dbmdtype, openchecksum=compressed,
                             data=data, check_can_fail=True):
            return None

        return local

    def _commonRetrieveDataMD(self, mdtypes=None):
        """ Retrieve any listed mdtypes, and revert if there was a failure.
            Also put any of the non-valid mdtype files from the old_repo_XML
            into the delete list, this means metadata can change filename
            without us leaking it. """

        downloading = self._commonRetrieveDataMD_list(mdtypes)
        for (ndata, nmdtype) in downloading:
            if not self._retrieveMD(nmdtype, retrieve_can_fail=True):
                self._revertOldRepoXML()
                return False
        self._commonRetrieveDataMD_done(downloading)
        return True

    def _commonRetrieveDataMD_list(self, mdtypes):
        """ Return a list of metadata to be retrieved """

        def _mdtype_eq(omdtype, odata, nmdtype, ndata):
            """ Check if two returns from _get_mdtype_data() are equal. """
            if ndata is None:
                return False
            if omdtype != nmdtype:
                return False
            if odata.checksum != ndata.checksum:
                return False
            #  If we turn --unique-md-filenames on without chaning the data,
            # then we'll get different filenames, but the same checksum.
            #  Atm. just say they are different, to make sure we delete the
            # old files.
            orname = os.path.basename(odata.location[1])
            nrname = os.path.basename(ndata.location[1])
            if orname != nrname:
                return False
            return True

        all_mdtypes = self.retrieved.keys()
        # Add in any extra stuff we don't know about.
        for mdtype in self.repoXML.fileTypes():
            if mdtype in all_mdtypes:
                continue
            if mdtype in ('primary_db', 'filelists_db', 'other_db', 'group_gz'):
                continue
            all_mdtypes.append(mdtype)

        if mdtypes is None:
            mdtypes = all_mdtypes

        reverts = []
        if 'old_repo_XML' not in self._oldRepoMDData:
            old_repo_XML = None
        else:
            old_repo_XML = self._oldRepoMDData['old_repo_XML']
            self._oldRepoMDData['old_MD_files'] = reverts

        # Inited twice atm. ... sue me
        newmdfiles = self._oldRepoMDData['new_MD_files'] = []
        downloading = []
        for mdtype in all_mdtypes:
            (nmdtype, ndata) = self._get_mdtype_data(mdtype)

            if old_repo_XML:
                (omdtype, odata) = self._get_mdtype_data(mdtype,
                                                         repoXML=old_repo_XML)
                local = self._groupCheckDataMDValid(odata, omdtype,mdtype,True)
                if local:
                    if _mdtype_eq(omdtype, odata, nmdtype, ndata):
                        continue # If they are the same do nothing

                    # Move this version, we _may_ get a new one.
                    # We delete it on success, revert it back on failure.
                    # We don't copy as we know it's bad due to above test.
                    os.rename(local, local + '.old.tmp')
                    reverts.append(local)

                    #  This is the super easy way. We just to see if a generated
                    # file is there for all files, but it should always work.
                    #  And anyone who is giving us MD with blah and blah.sqlite
                    # which are different types, can play a game I like to call
                    # "come here, ouch".
                    gen_local = local + '.sqlite'
                    if os.path.exists(gen_local):
                        os.rename(gen_local, gen_local + '.old.tmp')
                        reverts.append(gen_local)

            if ndata is None: # Doesn't exist in this repo
                continue

            if mdtype not in mdtypes:
                continue

            # No old repomd data, but we might still have uncompressed MD
            if self._groupCheckDataMDValid(ndata, nmdtype, mdtype):
                continue
            downloading.append((ndata, nmdtype))
            newmdfiles.append(self._get_mdtype_fname(ndata, False))
        return downloading

    def _commonRetrieveDataMD_done(self, downloading):
        """ Uncompress the downloaded metadata """

        for (ndata, nmdtype) in downloading:
            local = self._get_mdtype_fname(ndata, False)
        self._doneOldRepoXML()

    def _groupLoadRepoXML(self, text=None, mdtypes=None):
        """ Retrieve the new repomd.xml from the repository, then check it
            and parse it. If it fails we revert to the old version and pretend
            that is fine. If the new repomd.xml requires new version of files
            that we have, like updateinfo.xml, we download those too and if any
            of those fail, we again revert everything and pretend old data is
            good. """

        if self._commonLoadRepoXML(text):
            self._commonRetrieveDataMD(mdtypes)

    def _mdpolicy2mdtypes(self):
        md_groups = {'instant'       : ['__None__'],
                     'group:primary' : ['primary'],
                     'group:small'   : ["primary", "updateinfo", "group", "pkgtags"],
                     'group:main'    : ["primary", "updateinfo", "group", "pkgtags",
                                        "filelists", "prestodelta"]}
        mdtypes = set()
        if type(self.mdpolicy) in types.StringTypes:
            mdtypes.update(md_groups.get(self.mdpolicy, [self.mdpolicy]))
        else:
            for mdpolicy in self.mdpolicy:
                mdtypes.update(md_groups.get(mdpolicy, [mdpolicy]))

        if not mdtypes or 'group:all' in mdtypes:
            mdtypes = None
        else:
            mdtypes.discard("__None__")
            mdtypes = sorted(list(mdtypes))
        return mdtypes

    def _loadRepoXML(self, text=None):
        """retrieve/check/read in repomd.xml from the repository"""
        try:
            return self._groupLoadRepoXML(text, self._mdpolicy2mdtypes())
        except KeyboardInterrupt:
            self._revertOldRepoXML() # Undo metadata cookie?
            raise
        raise Errors.RepoError, 'Bad loadRepoXML policy (for %s): %s' % (self.ui_id, self.mdpolicy)

    def _getRepoXML(self):
        if self._repoXML:
            return self._repoXML
        self._loadRepoXML(text=self.ui_id)
        return self._repoXML


    repoXML = property(fget=lambda self: self._getRepoXML(),
                       fset=lambda self, val: setattr(self, "_repoXML", val),
                       fdel=lambda self: setattr(self, "_repoXML", None))

    def _checkRepoXML(self, fo):
        if type(fo) is types.InstanceType:
            filepath = fo.filename
        else:
            filepath = fo

        if self.repo_gpgcheck and not self._override_sigchecks:

            if misc.gpgme is None:
                raise URLGrabError(-1, 'pygpgme is not working so repomd.xml can not be verified for %s' % (self))

            sigfile = self.cachedir + '/repomd.xml.asc'
            try:
                result = self._getFile(relative='repodata/repomd.xml.asc',
                                       copy_local=1,
                                       local = sigfile,
                                       text='%s/signature' % self.ui_id,
                                       reget=None,
                                       checkfunc=None,
                                       cache=self.http_caching == 'all',
                                       size=102400)
            except URLGrabError, e:
                raise URLGrabError(-1, 'Error finding signature for repomd.xml for %s: %s' % (self, e))
            valid = misc.valid_detached_sig(result, filepath, self.gpgdir)
            if not valid and self.gpg_import_func:
                try:
                    self.gpg_import_func(self, self.confirm_func)
                except Errors.YumBaseError, e:
                    raise URLGrabError(-1, 'Gpg Keys not imported, cannot verify repomd.xml for repo %s' % (self))
                valid = misc.valid_detached_sig(result, filepath, self.gpgdir)

            if not valid:
                raise URLGrabError(-1, 'repomd.xml signature could not be verified for %s' % (self))

        try:
            repoXML = repoMDObject.RepoMD(self.id, filepath)
        except Errors.RepoMDError, e:
            raise URLGrabError(-1, 'Error importing repomd.xml for %s: %s' % (self, e))

        self._hack_mirrorlist_for_anaconda()
        if self.metalink and not self._checkRepoMetalink(repoXML):
            raise URLGrabError(-1, 'repomd.xml does not match metalink for %s' %
                               self)

    def _del_dl_file(self, local, size):
        """ Delete a downloaded file if it's the correct size. """

        sd = misc.stat_f(local)
        if not sd: # File doesn't exist...
            return

        if size and sd.st_size < size:
            return # Still more to get...

        # Is the correct size, or too big ... delete it so we'll try again.
        misc.unlink_f(local)

    def checkMD(self, fn, mdtype, openchecksum=False):
        """check the metadata type against its checksum"""
        return self._checkMD(fn, mdtype, openchecksum)

    def _checkMD(self, fn, mdtype, openchecksum=False,
                 data=None, check_can_fail=False):
        """ Internal function, use .checkMD() from outside yum. """

        thisdata = data # So the argument name is nicer
        if thisdata is None:
            thisdata = self.repoXML.getData(mdtype)

        # Note openchecksum means do it after you've uncompressed the data.
        if openchecksum:
            (r_ctype, r_csum) = thisdata.openchecksum # get the remote checksum
            size = thisdata.opensize
        else:
            (r_ctype, r_csum) = thisdata.checksum # get the remote checksum
            size = thisdata.size

        if type(fn) == types.InstanceType: # this is an urlgrabber check
            file = fn.filename
        else:
            file = fn

        if size is not None:
            size = int(size)

        l_csum = _xattr_get_chksum(file, r_ctype)
        if l_csum:
            fsize = misc.stat_f(file)
            if fsize is not None: # We just got an xattr, so it should be there
                if size is None and l_csum == r_csum and fsize.st_size > 0:
                    return 1
                if size == fsize.st_size and l_csum == r_csum:
                    return 1
            # Anything goes wrong, run the checksums as normal...

        try: # get the local checksum
            l_csum = self._checksum(r_ctype, file, datasize=size)
        except Errors.RepoError, e:
            if check_can_fail:
                return None
            raise URLGrabError(-3, 'Error performing checksum: %s' % e)

        if l_csum == r_csum:
            _xattr_set_chksum(file, r_ctype, l_csum)
            return 1
        else:
            if check_can_fail:
                return None
            raise URLGrabError(-1, 'Metadata file does not match checksum')

    def retrieveMD(self, mdtype):
        """base function to retrieve metadata files from the remote url
           returns the path to the local metadata file of a 'mdtype'
           mdtype can be 'primary', 'filelists', 'other' or 'group'."""
        return self._retrieveMD(mdtype)

    def _retrieveMD(self, mdtype, retrieve_can_fail=False, **kwargs):
        """ Internal function, use .retrieveMD() from outside yum. """
        #  Note that this can raise Errors.RepoMDError if mdtype doesn't exist
        # for this repo.
        # FIXME - maybe retrieveMD should call decompress() after we've checked
        # the checksum by default? since we're never acting on compressed MD
        thisdata = self.repoXML.getData(mdtype)

        (r_base, remote) = thisdata.location
        fname = os.path.basename(remote)
        local = self.cachedir + '/' + fname

        if self.retrieved.get(mdtype):
            # got it, move along
            return local

        # Having preloaded the repomd means we should first try preloading this
        # file as well (forcing it this way is only needed when dealing with
        # simple filenames).
        if self._preloaded_repomd:
            misc.unlink_f(local)

        if (os.path.exists(local) or
            self._preload_md_from_system_cache(os.path.basename(local))):
            if self._checkMD(local, mdtype, check_can_fail=True):
                self.retrieved[mdtype] = 1
                return local # it's the same return the local one

        if self.cache == 1:
            if retrieve_can_fail:
                return None
            if os.path.exists(local):
                msg = "Caching enabled and local cache: %s does not match checksum" % local
            else:
                msg = "Caching enabled but no local cache of %s from %s" % (local, self.ui_id)
            raise Errors.RepoError, msg

        # Given the file already exists, is it a partial download of thisdata
        # that we can try to reget?  With unique filenames, that's always.
        # With simple filenames, use the old expected checksum to verify
        # (assuming the existing file or part represents the old data but it
        # usually does).
        partial = True
        orepomd = self._oldRepoMDData.get('old_repo_XML')
        if orepomd is not None:
            odata = orepomd.repoData.get(mdtype)
            if odata is not None:
                ofname = os.path.basename(odata.location[1])
                partial = (fname != ofname or
                           thisdata.checksum == odata.checksum)

        try:
            def checkfunc(obj):
                try:
                    self.checkMD(obj, mdtype)
                except URLGrabError:
                    #  Don't share MD among mirrors, in theory we could use:
                    #     self._del_dl_file(local, int(thisdata.size))
                    # ...but this is safer.
                    misc.unlink_f(obj.filename)
                    raise
                self.retrieved[mdtype] = 1
            text = "%s/%s" % (self.ui_id, mdtype)
            if thisdata.size is None or not partial:
                reget = None
            else:
                reget = 'simple'
                self._del_dl_file(local, int(thisdata.size))
            local = self._getFile(relative=remote,
                                  local=local, 
                                  copy_local=1,
                                  reget=reget,
                                  checkfunc=checkfunc, 
                                  text=text,
                                  cache=self.http_caching == 'all',
                                  size=thisdata.size,
                                  **kwargs)
        except Errors.RepoError:
            if retrieve_can_fail:
                return None
            raise
        except URLGrabError, e:
            if retrieve_can_fail:
                return None
            raise Errors.RepoError, \
                "Could not retrieve %s matching remote checksum from %s" % (local, self.ui_id)
        else:
            return local


    def getPrimaryXML(self):
        """this gets you the path to the primary.xml file, retrieving it if we
           need a new one"""

        return self.retrieveMD('primary')


    def getFileListsXML(self):
        """this gets you the path to the filelists.xml file, retrieving it if we
           need a new one"""

        return self.retrieveMD('filelists')

    def getOtherXML(self):
        return self.retrieveMD('other')

    def getGroups(self):
        """gets groups and returns group file path for the repository, if there
           is none or retrieve/decompress fails, it returns None"""
        if 'group_gz' in self.repoXML.fileTypes():
            fn = self._retrieveMD('group_gz', retrieve_can_fail=True)
            if fn:
                try:
                    fn = misc.repo_gen_decompress(fn, 'comps.xml', cached=self.cache)
                except IOError, e:
                    logger.warning(e)
                    fn = None
            return fn
        return self._retrieveMD('group', retrieve_can_fail=True)

    def setCallback(self, callback, multi_callback=None):
        self.callback = callback
        self.multi_callback = multi_callback
        self._callbacks_changed = True

    def setFailureObj(self, failure_obj):
        self.failure_obj = failure_obj
        self._callbacks_changed = True

    def setMirrorFailureObj(self, failure_obj):
        self.mirror_failure_obj = failure_obj
        self._callbacks_changed = True

    def setInterruptCallback(self, callback):
        self.interrupt_callback = callback
        self._callbacks_changed = True

    def _readMirrorList(self, fo, url=None):
        """ read the mirror list from the specified file object """
        returnlist = []

        content = []
        if fo is not None:
            try:
                content = fo.readlines()
            except Exception, e:
                if url is None: # Shouldn't happen
                    url = "<unknown>"
                logger.error("Could not read mirrorlist %s, error was \n%s" % (url, e))
                content = []
            for line in content:
                if not re.match('\w+://\S+\s*$', line):
                    continue
                mirror = line.rstrip() # no more trailing \n's
                mirror = mirror.replace('$ARCH', '$BASEARCH')
                returnlist.append(mirror)

        return (returnlist, content)

    def _getMirrorList(self):
        """retrieve an up2date-style mirrorlist file from our mirrorlist url,
           also save the file to the local repo dir and use that if cache expiry
           not expired

           we also s/$ARCH/$BASEARCH/ and move along
           return the baseurls from the mirrorlist file
           """
        self.mirrorlist_file = self.cachedir + '/' + 'mirrorlist.txt'
        fo = None

        cacheok = False
        if self.withinCacheAge(self.mirrorlist_file, self.mirrorlist_expire,
                               expire_req_filter=False):
            cacheok = True
            fo = open(self.mirrorlist_file, 'r')
            url = 'file://' + self.mirrorlist_file # just to keep self._readMirrorList(fo,url) happy
        else:
            url = self.mirrorlist
            scheme = urlparse.urlparse(url)[0]
            if scheme == '':
                url = 'file://' + url
            ugopts = self._default_grabopts()
            try:
                fo = urlgrabber.grabber.urlopen(url, **ugopts)
            except URLGrabError, e:
                logger.error("Could not retrieve mirrorlist %s error was\n%s: %s" % (url, e.args[0], misc.to_unicode(e.args[1])))
                fo = None

        (returnlist, content) = self._readMirrorList(fo, url)

        if returnlist:
            if not self.cache and not cacheok:
                output = open(self.mirrorlist_file, 'w')
                for line in content:
                    output.write(line)
                output.close()
        elif not cacheok and os.path.exists(self.mirrorlist_file):
            # New mirror file failed, so use the old one (better than nothing)
            os.utime(self.mirrorlist_file, None)
            return self._readMirrorList(open(self.mirrorlist_file, 'r'))[0]

        return returnlist

    def _preload_file(self, fn, destfn):
        """attempts to copy the file, if possible"""
        # don't copy it if the copy in our users dir is newer or equal
        if not os.path.exists(fn):
            return False
        if os.path.exists(destfn):
            if os.stat(fn)[stat.ST_CTIME] <= os.stat(destfn)[stat.ST_CTIME]:
                return False
        try:
            # IOError is the main culprit, with mode=600. But ignore everything.
            shutil.copy2(fn, destfn)
        except:
            return False
        return True

    def _preload_file_from_system_cache(self, filename, subdir='',
                                        destfn=None):
        """attempts to copy the file from the system-wide cache,
           if possible"""
        if not hasattr(self, 'old_base_cache_dir'):
            return False
        if self.old_base_cache_dir == "":
            return False

        glob_repo_cache_dir=os.path.join(self.old_base_cache_dir, self.id)
        if not os.path.exists(glob_repo_cache_dir):
            return False
        if os.path.normpath(glob_repo_cache_dir) == os.path.normpath(self.cachedir):
            return False

        # Try to copy whatever file it is
        fn = glob_repo_cache_dir   + '/' + subdir + os.path.basename(filename)
        if destfn is None:
            destfn = self.cachedir + '/' + subdir + os.path.basename(filename)
        return self._preload_file(fn, destfn)

    def _preload_md_from_system_cache(self, filename):
        """attempts to copy the metadata file from the system-wide cache,
           if possible"""
        return self._preload_file_from_system_cache(filename)
    
    def _preload_pkg_from_system_cache(self, pkg):
        """attempts to copy the package from the system-wide cache,
           if possible"""
        pname  = os.path.basename(pkg.localPkg())
        destfn = os.path.join(self.pkgdir, pname)
        if self._preload_file_from_system_cache(pkg.localPkg(),
                                                subdir='packages/',
                                                destfn=destfn):
            return True

        if not hasattr(self, '_old_pkgdirs'):
            return False
        for opkgdir in self._old_pkgdirs:
            if self._preload_file(os.path.join(opkgdir, pname), destfn):
                return True
        return False

    def _verify_md(self):
        problems = []
        print 'verifying md'
        try:
            md_types = self.repoXML.fileTypes()
        except Errors.RepoError, e:
            prb = RepoVerifyProblem(1, "failed to load repomd.xml", str(e))
            problems.append(prb)
            return problems

        for md_type in md_types:
            print 'verifying %s' % md_type
            try:
                self.retrieveMD(md_type)
            except Errors.RepoError, e:
                msg = "%s metadata missing or does not match checksum" % md_type
                prb = RepoVerifyProblem(2, msg, str(e))
                problems.append(prb)

        return problems

    def _verify_comps(self):
        print 'verifying comps'
        problems = []
        # grab the comps for this repo
        # run the xmllint on it
        # chuck it into a comps object
        # make sure it parses

        grpfile = self.getGroups()

        # open it up as a file object so iterparse can cope with our compressed file
        if grpfile is not None:
            grpfile = misc.decompress(grpfile)
        try:
            c = comps.Comps()
            c.add(grpfile)
        except (Errors.GroupsError, Errors.CompsException), e:
            msg = "comps file failed to add"
            prb = RepoVerifyProblem(REPO_PROBLEM_COMPS, msg, str(e))
            problems.add(prb)
        else:
            if c.compscount == 0:
                msg = "no groups in comps"
                prb = RepoVerifyProblem(REPO_PROBLEM_COMPS, msg, "")
                problems.add(prb)

        return problems

    def _verify_packages(self):
        return []

    def verify(self, items=['repodata', 'comps']):
        """download/verify the specified items
           @items = ['repodata', 'comps'] can include: repodata, comps, packages
        """
        problems = []
        if 'repodata' in items:
            problems.extend(self._verify_md())
        if 'comps' in items:        
            if self.enablegroups:
                problems.extend(self._verify_comps())
        if 'packages' in items:
            problems.extend(self._verify_packages())
        # what else can we verify?

        return problems


def getMirrorList(mirrorlist, pdict = None):
    warnings.warn('getMirrorList() will go away in a future version of Yum.\n',
            Errors.YumFutureDeprecationWarning, stacklevel=2)
    """retrieve an up2date-style mirrorlist file from a url,
       we also s/$ARCH/$BASEARCH/ and move along
       returns a list of the urls from that file"""

    returnlist = []
    if hasattr(urlgrabber.grabber, 'urlopen'):
        urlresolver = urlgrabber.grabber
    else:
        import urllib
        urlresolver = urllib

    scheme = urlparse.urlparse(mirrorlist)[0]
    if scheme == '':
        url = 'file://' + mirrorlist
    else:
        url = mirrorlist

    try:
        fo = urlresolver.urlopen(url, proxies=pdict)
    except URLGrabError, e:
        print "Could not retrieve mirrorlist %s error was\n%s: %s" % (url, e.args[0], misc.to_unicode(e.args[1]))
        fo = None

    if fo is not None:
        content = fo.readlines()
        for line in content:
            if re.match('\s*(#|$)', line):
                continue
            mirror = line.rstrip() # no more trailing \n's
            mirror = mirror.replace('$ARCH', '$BASEARCH')
            returnlist.append(mirror)

    return returnlist

class RepoVerifyProblem:
    """ Holder for each "problem" we find with a repo.verify(). """
    
    def __init__(self, type, msg, details, fake=False):
        self.type           = type
        self.message        = msg
        self.details        = details
        self.fake           = fake