#!/usr/bin/python -t # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Library General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # Copyright 2005 Duke University # # Seth Vidal # Luke Macken """ Update metadata (updateinfo.xml) parsing. """ import sys from yum.i18n import utf8_text_wrap, to_utf8, to_unicode, _ from yum.yumRepo import YumRepository from yum.packages import FakeRepository from yum.misc import to_xml, decompress, repo_gen_decompress from yum.misc import cElementTree_iterparse as iterparse import Errors import logginglevels import rpmUtils.miscutils from rpmUtils.arch import ArchStorage def safe_iterparse(filename, logger=None): """ Works like iterparse, but hides XML errors (prints a warning). """ try: for event, elem in iterparse(filename): yield event, elem except SyntaxError: # Bad XML if logger: logger.critical(_("Updateinfo file is not valid XML: %s"), filename) else: print >> sys.stderr, "Updateinfo file is not valid XML:", filename class UpdateNoticeException(Exception): """ An exception thrown for bad UpdateNotice data. """ pass class UpdateNotice(object): """ A single update notice (for instance, a security fix). """ def __init__(self, elem=None, repoid=None, vlogger=None): self._md = { 'from' : '', 'type' : '', 'title' : '', 'release' : '', 'status' : '', 'version' : '', 'pushcount' : '', 'update_id' : '', 'issued' : '', 'updated' : '', 'description' : '', 'rights' : '', 'severity' : '', 'summary' : '', 'solution' : '', 'references' : [], 'pkglist' : [], 'reboot_suggested' : False } if elem: self._parse(elem) self._repoid = repoid self._vlogger = vlogger def __getitem__(self, item): """ Allows scriptable metadata access (ie: un['update_id']). """ if type(item) is int: return sorted(self._md)[item] ret = self._md.get(item) if ret == '': ret = None return ret def __contains__(self, item): """ Allows quick tests for foo in blah. """ return item in self._md def __setitem__(self, item, val): self._md[item] = val def __eq__(self, other): # Tests to see if it's "the same data", which means that the # packages can be different (see add_notice). def _rid(un): if hasattr(un, '_repoid') and un._repoid is not None: return un._repoid else: return '' def _log_failure(data): """Log the mismatched data similarly to conflict markers in git.""" if self._vlogger is None: return msg = _('Duplicate of %s differs in some fields:\n') msg %= other._md['update_id'] msg += '<<<<<<< %s:%s\n' % (_rid(other), data) msg += '%r\n=======\n%r\n' % (other._md[data], self._md[data]) msg += '>>>>>>> %s:%s' % (_rid(self), data) # --verbose mode enables this self._vlogger.log(logginglevels.DEBUG_3, msg) if not other or not hasattr(other, '_md'): return False for data in ('type', 'update_id', 'status', 'rights', 'severity', 'release', 'issued', 'updated', 'version', 'pushcount', 'from', 'title', 'summary', 'description', 'solution'): if data == 'status': # FIXME: See below... continue if self._md[data] != other._md[data]: _log_failure(data) return False # FIXME: Massive hack, Fedora is really broken and gives status=stable # and status=testing for updateinfo notices, just depending on which # repo. they come from. data = 'status' if self._md[data] != other._md[data]: if self._md[data] not in ('stable', 'testing'): _log_failure(data) return False if other._md[data] not in ('stable', 'testing'): _log_failure(data) return False # They are both really "stable" ... self._md[data] = 'stable' other._md[data] = 'stable' return True def __ne__(self, other): return not (self == other) def text(self, skip_data=('files', 'summary', 'rights', 'solution')): head = """ =============================================================================== %(title)s =============================================================================== Update ID : %(update_id)s Release : %(release)s Type : %(type)s Status : %(status)s Issued : %(issued)s """ % self._md if self._md['updated'] and self._md['updated'] != self._md['issued']: head += " Updated : %s" % self._md['updated'] # Add our bugzilla references bzs = filter(lambda r: r['type'] == 'bugzilla', self._md['references']) if len(bzs) and 'bugs' not in skip_data: buglist = " Bugs :" for bz in bzs: buglist += " %s%s\n\t :" % (bz['id'], 'title' in bz and ' - %s' % bz['title'] or '') head += buglist[: - 1].rstrip() + '\n' # Add our CVE references cves = filter(lambda r: r['type'] == 'cve', self._md['references']) if len(cves) and 'cves' not in skip_data: cvelist = " CVEs :" for cve in cves: cvelist += " %s\n\t :" % cve['id'] head += cvelist[: - 1].rstrip() + '\n' if self._md['summary'] and 'summary' not in skip_data: data = utf8_text_wrap(self._md['summary'], width=64, subsequent_indent=' ' * 12 + ': ') head += " Summary : %s\n" % '\n'.join(data) if self._md['description'] and 'description' not in skip_data: desc = utf8_text_wrap(self._md['description'], width=64, subsequent_indent=' ' * 12 + ': ') head += "Description : %s\n" % '\n'.join(desc) if self._md['solution'] and 'solution' not in skip_data: data = utf8_text_wrap(self._md['solution'], width=64, subsequent_indent=' ' * 12 + ': ') head += " Solution : %s\n" % '\n'.join(data) if self._md['rights'] and 'rights' not in skip_data: data = utf8_text_wrap(self._md['rights'], width=64, subsequent_indent=' ' * 12 + ': ') head += " Rights : %s\n" % '\n'.join(data) if self._md['severity'] and 'severity' not in skip_data: data = utf8_text_wrap(self._md['severity'], width=64, subsequent_indent=' ' * 12 + ': ') head += " Severity : %s\n" % '\n'.join(data) if 'files' in skip_data: return head[:-1] # chop the last '\n' # Get a list of arches we care about: #XXX ARCH CHANGE - what happens here if we set the arch - we need to # pass this in, perhaps arches = set(rpmUtils.arch.getArchList()) filelist = " Files :" for pkg in self._md['pkglist']: for file in pkg['packages']: if file['arch'] not in arches: continue filelist += " %s\n\t :" % file['filename'] head += filelist[: - 1].rstrip() return head def __str__(self): return to_utf8(self.text()) def __unicode__(self): return to_unicode(self.text()) def get_metadata(self): """ Return the metadata dict. """ return self._md def _parse(self, elem): """ Parse an update element:: """ if elem.tag == 'update': for attrib in ('from', 'type', 'status', 'version'): self._md[attrib] = elem.attrib.get(attrib) for child in elem: if child.tag == 'id': if not child.text: raise UpdateNoticeException("No id element found") self._md['update_id'] = child.text elif child.tag == 'pushcount': self._md['pushcount'] = child.text elif child.tag == 'issued': self._md['issued'] = child.attrib.get('date') elif child.tag == 'updated': self._md['updated'] = child.attrib.get('date') elif child.tag == 'references': self._parse_references(child) elif child.tag == 'description': self._md['description'] = child.text elif child.tag == 'rights': self._md['rights'] = child.text elif child.tag == 'severity': self._md[child.tag] = child.text elif child.tag == 'summary': self._md['summary'] = child.text elif child.tag == 'solution': self._md['solution'] = child.text elif child.tag == 'pkglist': self._parse_pkglist(child) elif child.tag == 'title': self._md['title'] = child.text elif child.tag == 'release': self._md['release'] = child.text else: raise UpdateNoticeException('No update element found') def _parse_references(self, elem): """ Parse the update references:: """ for reference in elem: if reference.tag == 'reference': data = {} for refattrib in ('id', 'href', 'type', 'title'): data[refattrib] = reference.attrib.get(refattrib) self._md['references'].append(data) else: raise UpdateNoticeException('No reference element found') def _parse_pkglist(self, elem): """ Parse the package list:: """ for collection in elem: data = { 'packages' : [] } if 'short' in collection.attrib: data['short'] = collection.attrib.get('short') for item in collection: if item.tag == 'name': data['name'] = item.text elif item.tag == 'package': data['packages'].append(self._parse_package(item)) self._md['pkglist'].append(data) def _parse_package(self, elem): """ Parse an individual package:: """ package = {} for pkgfield in ('arch', 'epoch', 'name', 'version', 'release', 'src'): package[pkgfield] = elem.attrib.get(pkgfield) # Bad epoch and arch data is the most common (missed) screwups. # Deal with bad epoch data. if not package['epoch'] or package['epoch'][0] not in '0123456789': package['epoch'] = None for child in elem: if child.tag == 'filename': package['filename'] = child.text elif child.tag == 'sum': package['sum'] = (child.attrib.get('type'), child.text) elif child.tag == 'reboot_suggested': self._md['reboot_suggested'] = True return package def xml(self): """Generate the xml for this update notice object""" msg = """ %s %s %s %s\n""" % (to_xml(self._md['from']), to_xml(self._md['status']), to_xml(self._md['type']), to_xml(self._md['version']), to_xml(self._md['update_id']), to_xml(self._md['title']), to_xml(self._md['release']), to_xml(self._md['issued'], attrib=True), to_xml(self._md['description'])) if self._md['updated']: # include the updated date in the generated xml msg += """ \n""" % (to_xml(self._md['updated'], attrib=True)) if self._md['summary']: msg += """ %s\n""" % (to_xml(self._md['summary'])) if self._md['solution']: msg += """ %s\n""" % (to_xml(self._md['solution'])) if self._md['rights']: msg += """ %s\n""" % (to_xml(self._md['rights'])) if self._md['severity']: msg += """ %s\n""" % (to_xml(self._md['severity'])) if self._md['references']: msg += """ \n""" for ref in self._md['references']: if ref['title']: msg += """ \n""" % ( to_xml(ref['href'], attrib=True), to_xml(ref['id'], attrib=True), to_xml(ref['title'], attrib=True), to_xml(ref['type'], attrib=True)) else: msg += """ \n""" % ( to_xml(ref['href'], attrib=True), to_xml(ref['id'], attrib=True), to_xml(ref['type'], attrib=True)) msg += """ \n""" if self._md['pkglist']: msg += """ \n""" for coll in self._md['pkglist']: msg += """ \n %s\n""" % ( to_xml(coll['short'], attrib=True), to_xml(coll['name'])) for pkg in coll['packages']: msg += """ %s \n""" % (to_xml(pkg['arch'], attrib=True), to_xml(pkg['name'], attrib=True), to_xml(pkg['release'], attrib=True), to_xml(pkg['src'], attrib=True), to_xml(pkg['version'], attrib=True), to_xml(pkg['epoch'] or '0', attrib=True), to_xml(pkg['filename'])) msg += """ \n""" msg += """ \n""" msg += """\n""" return msg def _rpm_tup_vercmp(tup1, tup2): """ Compare two "std." tuples, (n, a, e, v, r). """ return rpmUtils.miscutils.compareEVR((tup1[2], tup1[3], tup1[4]), (tup2[2], tup2[3], tup2[4])) class UpdateMetadata(object): """ The root update metadata object. """ def __init__(self, repos=[], logger=None, vlogger=None): self._notices = {} self._cache = {} # a pkg nvr => notice cache for quick lookups self._no_cache = {} # a pkg name only => notice list self._repos = [] # list of repo ids that we've parsed self._logger = logger self._vlogger = vlogger for repo in repos: try: # attempt to grab the updateinfo.xml.gz from the repodata self.add(repo) except Errors.RepoMDError: continue # No metadata found for this repo self.arch_storage = ArchStorage() self.archlist = self.arch_storage.archlist def get_notices(self, name=None): """ Return all notices. """ if name is None: return self._notices.values() return name in self._no_cache and self._no_cache[name] or [] notices = property(get_notices) def get_notice(self, nvr): """ Retrieve an update notice for a given (name, version, release) string or tuple. """ if type(nvr) in (type([]), type(())): nvr = '-'.join(nvr) return self._cache.get(nvr) or None # The problem with the above "get_notice" is that not everyone updates # daily. So if you are at pkg-1, pkg-2 has a security notice, and pkg-3 # has a BZ fix notice. All you can see is the BZ notice for the new "pkg-3" # with the above. # So now instead you lookup based on the _installed_ pkg.pkgtup, and get # two notices, in order: [(pkgtup-3, notice), (pkgtup-2, notice)] # the reason for the sorting order is that the first match will give you # the minimum pkg you need to move to. def get_applicable_notices(self, pkgtup): """ Retrieve any update notices which are newer than a given std. pkgtup (name, arch, epoch, version, release) tuple. Returns: list of (pkgtup, notice) that are newer than the given pkgtup, in the order of newest pkgtups first. """ oldpkgtup = pkgtup name = oldpkgtup[0] arch = oldpkgtup[1] ret = [] other_arch_list = [] notices = set() for notice in self.get_notices(name): for upkg in notice['pkglist']: for pkg in upkg['packages']: other_arch = False if pkg['name'] != name or pkg['arch'] != arch: if (notice not in notices and pkg['name'] == name and pkg['arch'] in self.archlist): other_arch = True else: continue pkgtup = (pkg['name'], pkg['arch'], pkg['epoch'] or '0', pkg['version'], pkg['release']) if _rpm_tup_vercmp(pkgtup, oldpkgtup) <= 0: continue if other_arch: other_arch_list.append((pkgtup, notice)) else: ret.append((pkgtup, notice)) notices.add(notice) for pkgtup, notice in other_arch_list: if notice not in notices: ret.append((pkgtup, notice)) ret.sort(cmp=_rpm_tup_vercmp, key=lambda x: x[0], reverse=True) return ret def add_notice(self, un): """ Add an UpdateNotice object. This should be fully populated with data, esp. update_id and pkglist/packages. """ if not un or not un["update_id"]: return False # This is "special", the main thing we want to deal with here is # having one errata that has multiple packages in it rpmA and rpmB, but # the packages are in repos. repoA and repoB. So instead of doing a # single errata pointing to both rpmA and rpmB and put the same thing # in both repodata (which is legal, and works fine) people want to have # just the packages from repoA in the repodata for repoA and vice versa. if un['update_id'] in self._notices: oun = self._notices[un['update_id']] if oun != un: return False # Ok, main parts of errata are the same, so now merge references: seen = set() for ref in oun['references']: seen.add(ref['id']) for ref in un['references']: if ref['id'] in seen: continue seen.add(ref['id']) oun['references'].append(ref) # ...and pkglist (this assumes that a pkglist name XYZ is the same): seen = set() for pkg in oun['pkglist']: seen.add(pkg['name']) for pkg in un['pkglist']: if pkg['name'] in seen: continue seen.add(pkg['name']) oun['pkglist'].append(pkg) un = oun self._notices[un['update_id']] = un for pkg in un['pkglist']: for filedata in pkg['packages']: self._cache['%s-%s-%s' % (filedata['name'], filedata['version'], filedata['release'])] = un no = self._no_cache.setdefault(filedata['name'], set()) no.add(un) return True def add(self, obj, mdtype='updateinfo'): """ Parse a metadata from a given YumRepository, file, or filename. """ def _rid(repoid, fmt=_(' (from %s)')): if not repoid: return '' return fmt % repoid if not obj: raise UpdateNoticeException repoid = None if type(obj) in (type(''), type(u'')): unfile = decompress(obj) infile = open(unfile, 'rt') elif isinstance(obj, YumRepository): if obj.id not in self._repos: repoid = obj.id self._repos.append(obj.id) md = obj.retrieveMD(mdtype) if not md: raise UpdateNoticeException() unfile = repo_gen_decompress(md, 'updateinfo.xml') infile = open(unfile, 'rt') elif isinstance(obj, FakeRepository): raise Errors.RepoMDError, "No updateinfo for local pkg" else: # obj is a file object infile = obj have_dup = False for event, elem in safe_iterparse(infile, logger=self._logger): if elem.tag == 'update': try: un = UpdateNotice(elem, repoid, self._vlogger) except UpdateNoticeException, e: msg = _("An update notice%s is broken, skipping.") % _rid(repoid) if self._vlogger: self._vlogger.log(logginglevels.DEBUG_1, "%s", msg) else: print >> sys.stderr, msg continue if not self.add_notice(un): msg = _("Update notice %s%s is broken, or a bad duplicate, skipping.") % (un['update_id'], _rid(repoid)) if not have_dup: msg += _('\nYou should report this problem to the owner of the %srepository.') % _rid(repoid, "%s ") msg += _('\nTo help pinpoint the issue, please attach the output of "yum updateinfo --verbose" to the report.') have_dup = True if self._vlogger: self._vlogger.warn("%s", msg) else: print >> sys.stderr, msg def __unicode__(self): ret = u'' for notice in self.notices: ret += unicode(notice) return ret def __str__(self): return to_utf8(self.__unicode__()) def xml(self, fileobj=None): msg = """\n""" if fileobj: fileobj.write(msg) for notice in self._notices.values(): if fileobj: fileobj.write(notice.xml()) else: msg += notice.xml() end = """\n""" if fileobj: fileobj.write(end) else: msg += end if fileobj: return return msg def main(): """ update_md test function. """ import yum.misc yum.misc.setup_locale() def usage(): print >> sys.stderr, "Usage: %s ..." % sys.argv[0] sys.exit(1) if len(sys.argv) < 2: usage() try: print sys.argv[1] um = UpdateMetadata() for srcfile in sys.argv[1:]: um.add(srcfile) print unicode(um) except IOError: print >> sys.stderr, "%s: No such file:\'%s\'" % (sys.argv[0], sys.argv[1:]) usage() if __name__ == '__main__': main()