#!/usr/bin/python # # Version: 0.3.3 # # A plugin for the Yellowdog Updater Modified which sorts each repo's # mirrorlist by connection speed prior to download. # # To install this plugin, just drop it into /usr/lib/yum-plugins, and # make sure you have 'plugins=1' in your /etc/yum.conf. You also need to # create the following configuration file, if not installed through an RPM: # # /etc/yum/pluginconf.d/fastestmirror.conf: # [main] # enabled=1 # verbose=1 # socket_timeout=3 # hostfilepath=timedhosts # maxhostfileage=10 # maxthreads=15 # #exclude=.gov, facebook # #include_only=.nl,.de,.uk,.ie # #prefer=your.favourite.mirror # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # (C) Copyright 2005 Luke Macken # """ B{FastestMirror} is a Yum plugin which sorts each repository's mirrorlist according to connection speed prior to download. """ import os import sys import time import socket import string import urlparse import datetime import threading import re from yum.plugins import TYPE_CORE requires_api_version = '2.5' plugin_type = (TYPE_CORE,) verbose = False always_print_best_host = True socket_timeout = 3 timedhosts = {} hostfilepath = '' maxhostfileage = 10 loadcache = False maxthreads = 15 exclude = None include_only = None prefer = None downgrade_ftp = True done_sock_timeout = False done_repos = set() def init_hook(conduit): """ This function initiliazes the variables required for running B{fastestmirror} module. The variables are initiliazed from the main section of the plugin file. There are no parameteres for this function. It uses global variables to communicate with other functions. This function refers: - L{get_hostfile_age} @param verbose : Verbosity of output. @type verbose : Boolean @param socket_timeout : The default timeout for a socket connection. @type socket_timeout : Integer @param hostfilepath : Absolute path to the plugin's cache file. @type hostfilepath : String @param maxhostfileage : Maximum age of the plugin's cache file. @type maxhostfileage : Integer @param loadcache : Fastest Mirrors to be loaded from plugin's cache or not. @type loadcache : Boolean """ global verbose, socket_timeout, hostfilepath, maxhostfileage, loadcache global maxthreads, exclude, include_only, prefer, downgrade_ftp, always_print_best_host if hasattr(conduit, 'registerPackageName'): conduit.registerPackageName("yum-plugin-fastestmirror") verbose = conduit.confBool('main', 'verbose', default=False) always_print_best_host = conduit.confBool('main', 'always_print_best_host', default=True) socket_timeout = conduit.confInt('main', 'socket_timeout', default=3) hostfilepath = conduit.confString('main', 'hostfilepath', default='timedhosts') maxhostfileage = conduit.confInt('main', 'maxhostfileage', default=10) maxthreads = conduit.confInt('main', 'maxthreads', default=10) exclude = conduit.confString('main', 'exclude', default=None) include_only = conduit.confString('main', 'include_only', default=None) prefer = conduit.confString('main', 'prefer', default='no.prefer.mirror') downgrade_ftp = conduit.confBool('main', 'downgrade_ftp', default=True) def clean_hook(conduit): """ This function cleans the plugin cache file if exists. The function is called when C{yum [options] clean [plugins | all ]} is executed. """ global hostfilepath if hostfilepath and hostfilepath[0] != '/': hostfilepath = conduit._base.conf.cachedir + '/' + hostfilepath if os.path.exists(hostfilepath): conduit.info(2, "Cleaning up list of fastest mirrors") try: os.unlink(hostfilepath) except Exception, e: conduit.info(2, "Cleanup failed: %s" % e) # Get the hostname from a url, stripping away any usernames/passwords host = lambda mirror: mirror.split('/')[2].split('@')[-1] def _can_write_results(fname): if not os.path.exists(fname): try: hostfile = file(hostfilepath, 'w') return True except: return False return os.access(fname, os.W_OK) def _len_non_ftp(urls): ''' Count the number of urls, which aren't ftp. ''' num = 0 for url in urls: if url.startswith("ftp:"): continue num += 1 return num def prereposetup_hook(conduit): """ This function is called after Yum has initiliazed all the repository information. If cache file exists, this function will load the mirror speeds from the file, else it will determine the fastest mirrors afresh and write them back to the cache file. There are no parameteres for this function. It uses global variables to communicate with other functions. This function refers: - L{read_timedhosts()} - L{FastestMirror.get_mirrorlist()} - L{write_timedhosts()} @param loadcache : Fastest Mirrors to be loaded from plugin's cache file or not. @type loadcache : Boolean """ global loadcache, exclude, include_only, prefer, hostfilepath if hostfilepath and hostfilepath[0] != '/': hostfilepath = conduit._base.conf.cachedir + '/' + hostfilepath # If the file hostfilepath exists and is newer than the maxhostfileage, # then load the cache. if os.path.exists(hostfilepath) and get_hostfile_age() < maxhostfileage: loadcache = True opts, commands = conduit.getCmdLine() if conduit._base.conf.cache or not _can_write_results(hostfilepath): return if done_repos: conduit.info(2, "Checking for new repos for mirrors") elif loadcache: conduit.info(2, "Loading mirror speeds from cached hostfile") read_timedhosts() else: conduit.info(2, "Determining fastest mirrors") repomirrors = {} repos = conduit.getRepos() # First do all of the URLs as one big list, this way we get as much # parallelism as possible (if we need to do the network tests). all_urls = [] for repo in repos.listEnabled(): if repo.id in done_repos: continue if downgrade_ftp and _len_non_ftp(repo.urls) == 1: continue if len(repo.urls) == 1: continue all_urls.extend(repo.urls) all_urls = FastestMirror(all_urls).get_mirrorlist() # This should now just be looking up the cached times. for repo in repos.listEnabled(): if repo.id in done_repos: continue if downgrade_ftp and _len_non_ftp(repo.urls) == 1: repo.urls = sorted(repo.urls, reverse=True) # ftp comes before http continue if len(repo.urls) == 1: continue if str(repo) not in repomirrors: repomirrors[str(repo)] = FastestMirror(repo.urls).get_mirrorlist() if include_only: def includeCheck(mirror): if filter(lambda exp: re.search(exp, host(mirror)), include_only.replace(',', ' ').split()): conduit.info(2, "Including mirror: %s" % host(mirror)) return True return False repomirrors[str(repo)] = filter(includeCheck,repomirrors[str(repo)]) else: if exclude: def excludeCheck(mirror): if filter(lambda exp: re.search(exp, host(mirror)), exclude.replace(',', ' ').split()): conduit.info(2, "Excluding mirror: %s" % host(mirror)) return False return True repomirrors[str(repo)] = filter(excludeCheck,repomirrors[str(repo)]) repo.urls = repomirrors[str(repo)] if len(repo.urls): lvl = 3 if always_print_best_host: lvl = 2 conduit.info(lvl, " * %s: %s" % (str(repo), host(repo.urls[0]))) repo.failovermethod = 'priority' repo.check() repo.setupGrab() done_repos.add(repo.id) if done_sock_timeout: socket.setdefaulttimeout(None) if not loadcache: write_timedhosts() def read_timedhosts(): """ This function reads the time and hostname from the plugin's cache file and store them in C{timedhosts}. There are no parameteres for this function. It uses global variables to communicate with other functions. This function is referred by: - L{prereposetup_hook()} @param timedhosts : A list of time intervals to reach different hosts corresponding to the mirrors. The index of the list are hostnames. C{timedhosts[host] = time}. @type timedhosts : List """ global timedhosts try: hostfile = file(hostfilepath) for line in hostfile.readlines(): host, time = line.split() timedhosts[host] = float(time) hostfile.close() except IOError: pass def write_timedhosts(): """ This function writes the plugin's cache file with the entries in the C{timedhosts} list. There are no parameteres for this function. It uses global variables to communicate with other functions. This function is referred by: - L{prereposetup_hook()} @param timedhosts : A list of time intervals to reach different hosts corresponding to the mirrors. The index of the list are hostnames. C{timedhosts[host] = time}. @type timedhosts : List """ global timedhosts try: hostfile = file(hostfilepath, 'w') for host in timedhosts: hostfile.write('%s %s\n' % (host, timedhosts[host])) hostfile.close() except IOError: pass def get_hostfile_age(): """ This function returns the current age of the plugin's cache file. There are no parameteres for this function. It uses global variables to communicate with other functions. This function is referred by: - L{init_hook()} @param hostfilepath : Absolute path to the plugin's cache file. @type hostfilepath : String @rtype: Integer @return: The age of the plugin's cache file. """ global hostfilepath timestamp = datetime.datetime.fromtimestamp(os.path.getmtime(hostfilepath)) return (datetime.datetime.now() - timestamp).days class FastestMirror: """ This is the helper class of B{fastestmirror} module. This class does all the processing of the response time calculation for all the mirrors of all the enabled Yum repositories. """ def __init__(self, mirrorlist): """ This is the initiliazer function of the B{L{FastestMirror}} class. @param mirrorlist : A list of mirrors for an enabled repository. @type mirrorlist : List """ self.mirrorlist = mirrorlist self.results = {} self.threads = [] # If we don't spawn any threads, we don't need locking... def _init_lock(self): if not hasattr(self, '_results_lock'): self._results_lock = threading.Lock() global done_sock_timeout done_sock_timeout = True socket.setdefaulttimeout(socket_timeout) def _acquire_lock(self): if hasattr(self, '_results_lock'): self._results_lock.acquire() def _release_lock(self): if hasattr(self, '_results_lock'): self._results_lock.release() def get_mirrorlist(self): """ This function pings/polls all the mirrors in the list C{FastestMirror.mirrorlist} and returns the sorted list of mirrors according to the increasing response time of the mirrors. This function refers: - L{FastestMirror._poll_mirrors()} This function is referred by: - L{prereposetup_hook()} - L{main()} @rtype: List @return: The list of mirrors sorted according to the increasing response time. """ self._poll_mirrors() if not downgrade_ftp: mirrors = [(v, k) for k, v in self.results.items()] else: # False comes before True mirrors = [(k.startswith("ftp"), v, k) for k, v in self.results.items()] mirrors.sort() return [x[-1] for x in mirrors] def _poll_mirrors(self): """ This function uses L{PollThread} class to ping/poll individual mirror in parallel. This function refers: - L{PollThread.run()} This function is referred by: - L{FastestMirror.get_mirrorlist()} """ global maxthreads for mirror in self.mirrorlist: if len(self.threads) > maxthreads: if self.threads[0].isAlive(): self.threads[0].join() del self.threads[0] if mirror.startswith("file:"): mhost = "127.0.0.1" else: mhost = host(mirror) if mhost in timedhosts: result = timedhosts[mhost] if verbose: print "%s already timed: %s" % (mhost, result) self._add_result(mirror, mhost, result) elif mhost in ("127.0.0.1", "::1", "localhost", prefer): self._add_result(mirror, mhost, 0) else: # No cached info. so spawn a thread and find the info. out self._init_lock() pollThread = PollThread(self, mirror) pollThread.start() self.threads.append(pollThread) while len(self.threads) > 0: if self.threads[0].isAlive(): self.threads[0].join() del self.threads[0] def _add_result(self, mirror, host, time): """ This function is called by L{PollThread.run()} to add details of a mirror in C{FastestMirror.results} dictionary. This function is referred by: - L{PollThread.run()} @param mirror : The mirror that was polled for response time. @type mirror : String @param host : The hostname of the mirror. @type host : String @param time : The response time of the mirror. @type time : Integer @param timedhosts : A list of time intervals to reach different hosts corresponding to the mirrors. The index of the list are hostnames. @type timedhosts : List """ global timedhosts self._acquire_lock() if verbose: print " * %s : %f secs" % (host, time) self.results[mirror] = time timedhosts[host] = time self._release_lock() class PollThread(threading.Thread): """ B{PollThread} class implements C{threading.Thread} class. This class provides the functionalities to ping/poll the mirrors in parallel. """ def __init__(self, parent, mirror): """ It is initiliazer function for B{L{PollThread}} class. This function initiliazes the service ports for different webservices. @param parent : The parent class. @type parent : Class @param mirror : The mirror of a repository. @type mirror : String """ threading.Thread.__init__(self) self.parent = parent self.mirror = mirror self.host = host(mirror) uService = urlparse.urlparse(mirror)[0] if uService == "http": self.port = 80 elif uService == "https": self.port = 443 elif uService == "ftp": self.port = 21 elif uService == "file": self.host = "127.0.0.1" else: self.port = -2 def run(self): """ The C{threading.Thread.run()} function is being overridden here. This function pings/polls a mirror and add the details of that mirror to the C{FastestMirror.results} dictionary. The response time of any mirror is '99999999999' if any exception occurs during polling. This function refers: - L{FastestMirror._add_result()} This function is referred by: - L{FastestMirror._poll_mirrors()} """ try: if self.host in timedhosts: result = timedhosts[self.host] if verbose: print "%s already timed: %s" % (self.host, result) else: if self.host in ("127.0.0.1", "::1", "localhost", prefer): result = 0 else: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) uPort = string.find(self.host,":") if uPort > 0: self.port = int(self.host[uPort+1:]) self.host = self.host[:uPort] time_before = time.time() sock.connect((self.host, self.port)) result = time.time() - time_before sock.close() self.parent._add_result(self.mirror, self.host, result) except: if verbose: print " * %s : dead" % self.host self.parent._add_result(self.mirror, self.host, 99999999999) def main(): """ This is the main function for B{fastestmirror} module. This function explains the usage of B{fastestmirror} module. Also parses the command line arguments. This function refers: - L{FastestMirror.get_mirrorlist()} """ global verbose verbose = True if len(sys.argv) == 1: print "Usage: %s [mirror2] ... [mirrorN]" % sys.argv[0] sys.exit(-1) mirrorlist = sys.argv[1:] print "Result: " + str(FastestMirror(mirrorlist).get_mirrorlist()) if __name__ == '__main__': main()