Efc@sWdZddlZddlZddlZddlZddlZddlZddlZddlZddl Z ddl Z ddl Z ddl Z ddl Z ddlZyddlmZWn!ek rddlmZnXyddlZWnek reZnXeZddlmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$ddlm%Z%m&Z&m'Z'm(Z(e j)d Z*da,de j-ddeddZ.dZ/d e0fd YZ1d e1efd YZ2ej3d Z4dZ5ddDdYZ6ddEdYZ7dZ8ddFdYZ9de9fdYZ:de9fdYZ;de9fdYZ<dZ=de9fdYZ>ddGd YZ?d!e?fd"YZ@d#dHd$YZAd%eAe9fd&YZBd'eAe9fd(YZCd)ZDd*dId+YZEd,e9eEfd-YZFd.e9eEfd/YZGd0e9fd1YZHd2eHfd3YZIeJed4rd5eHfd6YZKnd7e9fd8YZLd9e9fd:YZMd;ZNd<ZOd=ZPd>e9fd?YZQd@e9fdAYZRdBeRfdCYZSdS(Js! An extensible library for opening URLs using a variety of protocols The simplest way to use this module is to call the urlopen function, which accepts a string containing a URL or a Request object (described below). It opens the URL and returns the results as file-like object; the returned object has some extra methods described below. The OpenerDirector manages a collection of Handler objects that do all the actual work. Each Handler implements a particular protocol or option. The OpenerDirector is a composite object that invokes the Handlers needed to open the requested URL. For example, the HTTPHandler performs HTTP GET and POST requests and deals with non-error returns. The HTTPRedirectHandler automatically deals with HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler deals with digest authentication. urlopen(url, data=None) -- Basic usage is the same as original urllib. pass the url and optionally data to post to an HTTP URL, and get a file-like object back. One difference is that you can also pass a Request instance instead of URL. Raises a URLError (subclass of IOError); for HTTP errors, raises an HTTPError, which can also be treated as a valid response. build_opener -- Function that creates a new OpenerDirector instance. Will install the default handlers. Accepts one or more Handlers as arguments, either instances or Handler classes that it will instantiate. If one of the argument is a subclass of the default handler, the argument will be installed instead of the default. install_opener -- Installs a new opener as the default opener. objects of interest: OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages the Handler classes, while dealing with requests and responses. Request -- An object that encapsulates the state of a request. The state can be as simple as the URL. It can also include extra HTTP headers, e.g. a User-Agent. BaseHandler -- exceptions: URLError -- A subclass of IOError, individual protocols have their own specific subclass. HTTPError -- Also a valid HTTP response, so you can treat an HTTP error as an exceptional event or valid response. internals: BaseHandler and parent _call_chain conventions Example usage: import urllib2 # set up authentication info authinfo = urllib2.HTTPBasicAuthHandler() authinfo.add_password(realm='PDQ Application', uri='https://mahler:8092/site-updates.py', user='klem', passwd='geheim$parole') proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"}) # build a new opener that adds authentication and caching FTP handlers opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler) # install it urllib2.install_opener(opener) f = urllib2.urlopen('http://www.python.org/') iN(tStringIO(tunwraptunquotet splittypet splithosttquotet addinfourlt splitporttsplittagttoBytest splitattrt ftpwrappert splitusert splitpasswdt splitvalue(t localhostt url2pathnamet getproxiest proxy_bypassic Cs|s|s|r|dk r-tdntsBtdntjdtjd|d|}td|dt}t|}nF|rtd|}t|}n"t dkrta }nt }|j |||S(NsDYou can't pass both context and any of cafile, capath, and cadefaultsSSL support not availablet cert_reqstcafiletcapathtcontexttcheck_hostname( tNonet ValueErrort _have_ssltsslt_create_stdlib_contextt CERT_REQUIREDt HTTPSHandlertTruet build_openert_openertopen( turltdatattimeoutRRt cadefaultRt https_handlertopener((s/usr/lib64/python2.7/urllib2.pyturlopens$    cCs |adS(N(R!(R(((s/usr/lib64/python2.7/urllib2.pytinstall_openerstURLErrorcBseZdZdZRS(cCs|f|_||_dS(N(targstreason(tselfR-((s/usr/lib64/python2.7/urllib2.pyt__init__s cCs d|jS(Ns(R-(R.((s/usr/lib64/python2.7/urllib2.pyt__str__s(t__name__t __module__R/R0(((s/usr/lib64/python2.7/urllib2.pyR+s t HTTPErrorcBsAeZdZejZdZdZedZdZ RS(sBRaised when HTTP error occurs, but also acts like non-error returncCsV||_||_||_||_||_|dk rR|j||||ndS(N(tcodetmsgthdrstfptfilenameRt_HTTPError__super_init(R.R#R4R5R6R7((s/usr/lib64/python2.7/urllib2.pyR/s      cCsd|j|jfS(NsHTTP Error %s: %s(R4R5(R.((s/usr/lib64/python2.7/urllib2.pyR0scCs|jS(N(R5(R.((s/usr/lib64/python2.7/urllib2.pyR-scCs|jS(N(R6(R.((s/usr/lib64/python2.7/urllib2.pytinfos( R1R2t__doc__RR/R9R0tpropertyR-R:(((s/usr/lib64/python2.7/urllib2.pyR3s   s:\d+$cCs_|j}tj|d}|dkr@|jdd}ntjd|d}|jS(sReturn request-host, as defined by RFC 2965. Variation from RFC: returned value is lowercased, for convenient comparison. ittHost(t get_full_urlturlparset get_headert _cut_port_retsubtlower(trequestR#thost((s/usr/lib64/python2.7/urllib2.pyt request_hosts   tRequestcBseZdidedZdZdZdZdZdZ dZ dZ dZ d Z d Zd Zd Zd ZdZdZdZddZdZRS(cCst||_t|j\|_|_d|_d|_d|_d|_||_ i|_ x*|j D]\}}|j ||qmWi|_ |dkrt|}n||_||_dS(N(Rt_Request__originalRt_Request__fragmentRttypeRFtportt _tunnel_hostR$theaderstitemst add_headertunredirected_hdrsRGtorigin_req_hostt unverifiable(R.R#R$RNRRRStkeytvalue((s/usr/lib64/python2.7/urllib2.pyR/s         cCs^|d dkrQ|d}ttd|rQt|d|t||Snt|dS(Ni t _Request__r_tget_(thasattrRHtgetattrtAttributeError(R.tattrtname((s/usr/lib64/python2.7/urllib2.pyt __getattr__s  cCs|jrdSdSdS(NtPOSTtGET(thas_data(R.((s/usr/lib64/python2.7/urllib2.pyt get_methods cCs ||_dS(N(R$(R.R$((s/usr/lib64/python2.7/urllib2.pytadd_data scCs |jdk S(N(R$R(R.((s/usr/lib64/python2.7/urllib2.pyR` scCs|jS(N(R$(R.((s/usr/lib64/python2.7/urllib2.pytget_datascCs(|jrd|j|jfS|jSdS(Ns%s#%s(RJRI(R.((s/usr/lib64/python2.7/urllib2.pyR?s cCsV|jdkrOt|j\|_|_|jdkrOtd|jqOn|jS(Nsunknown url type: %s(RKRRRIt_Request__r_typeR(R.((s/usr/lib64/python2.7/urllib2.pytget_types cCsR|jdkrKt|j\|_|_|jrKt|j|_qKn|jS(N(RFRRRdt_Request__r_hostR(R.((s/usr/lib64/python2.7/urllib2.pytget_host s  cCs|jS(N(Rf(R.((s/usr/lib64/python2.7/urllib2.pyt get_selector'scCsJ|jdkr(|j r(|j|_n||_|j|_||_dS(Nthttps(RKRMRFRIRf(R.RFRK((s/usr/lib64/python2.7/urllib2.pyt set_proxy*s   cCs|j|jkS(N(RfRI(R.((s/usr/lib64/python2.7/urllib2.pyt has_proxy3scCs|jS(N(RR(R.((s/usr/lib64/python2.7/urllib2.pytget_origin_req_host6scCs|jS(N(RS(R.((s/usr/lib64/python2.7/urllib2.pytis_unverifiable9scCs||j|jLs RNRRRSN(i-i.i/i3(sGETR(i-i.i/( RatreplaceRRNRORHRlRR3R?( R.RR7R4R5RNtnewurltmt newheaders((s/usr/lib64/python2.7/urllib2.pyR8s   c Csd|kr"|jdd}n&d|krD|jdd}ndStj|}|jsyt|}d|d>> _parse_proxy('file:/ftp.example.com/') Traceback (most recent call last): ValueError: proxy URL with no authority: 'file:/ftp.example.com/' The first three items of the returned tuple may be None. Examples of authority parsing: >>> _parse_proxy('proxy.example.com') (None, None, None, 'proxy.example.com') >>> _parse_proxy('proxy.example.com:3128') (None, None, None, 'proxy.example.com:3128') The authority component may optionally include userinfo (assumed to be username:password): >>> _parse_proxy('joe:password@proxy.example.com') (None, 'joe', 'password', 'proxy.example.com') >>> _parse_proxy('joe:password@proxy.example.com:3128') (None, 'joe', 'password', 'proxy.example.com:3128') Same examples, but with URLs instead: >>> _parse_proxy('http://proxy.example.com/') ('http', None, None, 'proxy.example.com') >>> _parse_proxy('http://proxy.example.com:3128/') ('http', None, None, 'proxy.example.com:3128') >>> _parse_proxy('http://joe:password@proxy.example.com/') ('http', 'joe', 'password', 'proxy.example.com') >>> _parse_proxy('http://joe:password@proxy.example.com:3128') ('http', 'joe', 'password', 'proxy.example.com:3128') Everything after the authority is ignored: >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128') ('ftp', 'joe', 'password', 'proxy.example.com') Test for no trailing '/' case: >>> _parse_proxy('http://joe:password@proxy.example.com') ('http', 'joe', 'password', 'proxy.example.com') Rs//sproxy URL with no authority: %riiN(RRRRRR R ( tproxytschemetr_schemet authoritytendtuserinfothostporttusertpassword((s/usr/lib64/python2.7/urllib2.pyt _parse_proxys2      RcBs#eZdZddZdZRS(idcCse|dkrt}n||_x=|jD]/\}}t|d||||jdq.WdS(Ns%s_opencSs||||S(N((trRRKR((s/usr/lib64/python2.7/urllib2.pyts(RRtproxiesROtsetattrR(R.RRKR#((s/usr/lib64/python2.7/urllib2.pyR/s     c Cs|j}t|\}}}}|dkr9|}n|j|jr_t|jr_dS|r|rdt|t|f} tj| j } |j dd| nt|}|j ||||ks|dkrdS|j j |d|jSdS(Ns%s:%ssProxy-authorizationsBasic RiR%(ReRRRgRFRRtbase64t b64encodetstripRPRjRR"R%( R.RRRKt orig_typet proxy_typeRRRt user_passtcreds((s/usr/lib64/python2.7/urllib2.pyRs       N(R1R2RRR/R(((s/usr/lib64/python2.7/urllib2.pyRs tHTTPPasswordMgrcBs8eZdZdZdZedZdZRS(cCs i|_dS(N(tpasswd(R.((s/usr/lib64/python2.7/urllib2.pyR/scCst|tr|g}n||jkr:i|j|(((s/usr/lib64/python2.7/urllib2.pyR=scCs}tjjdr;td}|j|}|j|Sgt|D]}ttj dd^qH}dj |SdS(sReturn n random bytes.s /dev/urandomiiR=N( tosRtexistsR"RRtrangetchrtrandomt randrangetjoin(tntftsRtL((s/usr/lib64/python2.7/urllib2.pyt randombytess  1tAbstractDigestAuthHandlercBsSeZddZdZdZdZdZdZdZ dZ RS( cCsO|dkrt}n||_|jj|_d|_d|_d|_dS(Ni(RRR RR#t nonce_countt last_nonce(R.R ((s/usr/lib64/python2.7/urllib2.pyR/s     cCs d|_dS(Ni(R#(R.((s/usr/lib64/python2.7/urllib2.pyR%scCs|j|d}|jdkrBt|jdd|dn|jd7_|r|jd}|jdkr|j||SndS(Niisdigest auth failediitdigest(RsRR#R3R?tsplitRDtretry_http_digest_auth(R.R3RFRRNR0R((s/usr/lib64/python2.7/urllib2.pyR2scCs|jdd\}}tt|}|j||}|rd|}|jj|jd|krndS|j|j||j j |d|j }|SdS(NRis Digest %sR%( ROtparse_keqv_listtparse_http_listtget_authorizationRNRsR3RRpRR"R%(R.RR6ttokent challengetchaltauth_valtresp((s/usr/lib64/python2.7/urllib2.pyRPs cCs<tjd|j|tjtdfj}|d S(Ns %s:%s:%s:%sii(thashlibtsha1RLttimetctimeRJt hexdigest(R.tnoncetdig((s/usr/lib64/python2.7/urllib2.pyt get_cnoncescCsdyK|d}|d}|jd}|jdd}|jdd}Wntk r_dSX|j|\}} |dkrdS|jj||j\} } | dkrdS|jr|j|j |} nd} d| || f} d|j |j f}|d kr||j kr?|j d 7_ nd |_ ||_ d |j }|j|}d ||||||f}| || |}nD|dkr| || d|||f}ntd |d| |||j |f}|r|d|7}n| r5|d| 7}n|d|7}|r`|d||f7}n|S(NR R^tqopt algorithmtMD5topaques%s:%s:%ss%s:%sR6is%08xs%s:%s:%s:%s:%ssqop '%s' is not supported.s>username="%s", realm="%s", nonce="%s", uri="%s", response="%s"s , opaque="%s"s , digest="%s"s, algorithm="%s"s, qop=auth, nc=%s, cnonce="%s"(RsRtKeyErrortget_algorithm_implsR RR?R`tget_entity_digestRcRaRhRMRLR`R+(R.RRVR R^RaRbRdtHtKDRR4tentdigtA1tA2tncvaluetcnoncetnoncebittrespdigR((s/usr/lib64/python2.7/urllib2.pyRSsV    !        ( csU|j}|dkr$dn|dkr<dnfd}|fS(NRccSstj|jS(N(RYtmd5R](tx((s/usr/lib64/python2.7/urllib2.pyRAstSHAcSstj|jS(N(RYRZR](Rr((s/usr/lib64/python2.7/urllib2.pyRCscsd||fS(Ns%s:%s((RHtd(Rh(s/usr/lib64/python2.7/urllib2.pyREs(tupper(R.RbRi((Rhs/usr/lib64/python2.7/urllib2.pyRf<s     cCsdS(N(R(R.R$RV((s/usr/lib64/python2.7/urllib2.pyRgHsN( R1R2RR/R%R2RPR`RSRfRg(((s/usr/lib64/python2.7/urllib2.pyRKs   = tHTTPDigestAuthHandlercBs#eZdZdZdZdZRS(sAn authentication protocol defined by RFC 2069 Digest authentication improves on basic authentication because it does not transmit passwords in the clear. R;icCs?tj|jd}|jd|||}|j|S(Niswww-authenticate(R@R?R2R%(R.RR7R4R5RNRFtretry((s/usr/lib64/python2.7/urllib2.pyR<Ws   (R1R2R;R3RR<(((s/usr/lib64/python2.7/urllib2.pyRvMstProxyDigestAuthHandlercBseZdZdZdZRS(sProxy-AuthorizationicCs2|j}|jd|||}|j|S(Nsproxy-authenticate(RgR2R%(R.RR7R4R5RNRFRw((s/usr/lib64/python2.7/urllib2.pyR>ds    (R1R2R3RR>(((s/usr/lib64/python2.7/urllib2.pyRx_stAbstractHTTPHandlercBs/eZddZdZdZdZRS(icCs ||_dS(N(t _debuglevel(R.t debuglevel((s/usr/lib64/python2.7/urllib2.pyR/mscCs ||_dS(N(Rz(R.tlevel((s/usr/lib64/python2.7/urllib2.pytset_http_debuglevelpsc Cs:|j}|s!tdn|jr|j}|jds[|jddn|jds|jddt|qn|}|jrt|j \}}t |\}}n|jds|jd|nxH|j j D]:\}} |j }|j|s|j|| qqW|S(Ns no host givens Content-types!application/x-www-form-urlencodedsContent-lengths%dR>(RgR+R`RcRrRpRRkRRhRRR{Rn( R.RERFR$tsel_hostRtseltsel_pathR\RU((s/usr/lib64/python2.7/urllib2.pyt do_request_ss.      c  s|j}|s!tdn||d|j|}|j|jt|jjtfd|jj Ddds Rt Connectioncss'|]\}}|j|fVqdS(N(ttitle(RR\Ro((s/usr/lib64/python2.7/urllib2.pys ssProxy-AuthorizationRNt buffering(RgR+R%tset_debuglevelRzRRQRvRNRORMt set_tunnelRERaRhR$RRRt getresponseRRRtrecvt _fileobjectRR5R?tstatusR4R-( R.t http_classRthttp_conn_argsRFRttunnel_headerstproxy_auth_hdrterrRR7RX((RNs/usr/lib64/python2.7/urllib2.pyRs@ ,    )     (R1R2R/R}RR(((s/usr/lib64/python2.7/urllib2.pyRyks   RcBseZdZejZRS(cCs|jtj|S(N(RRtHTTPConnection(R.R((s/usr/lib64/python2.7/urllib2.pyt http_opens(R1R2RRyRt http_request(((s/usr/lib64/python2.7/urllib2.pyRs RRcBs,eZddddZdZejZRS(icCs&tj||||_||_dS(N(RyR/t_contextt_check_hostname(R.R{RR((s/usr/lib64/python2.7/urllib2.pyR/s cCs%|jtj|d|jd|jS(NRR(RRtHTTPSConnectionRR(R.R((s/usr/lib64/python2.7/urllib2.pyt https_opensN(R1R2RR/RRyRt https_request(((s/usr/lib64/python2.7/urllib2.pyRs tHTTPCookieProcessorcBs2eZddZdZdZeZeZRS(cCs4ddl}|dkr'|j}n||_dS(Ni(t cookielibRt CookieJart cookiejar(R.RR((s/usr/lib64/python2.7/urllib2.pyR/s  cCs|jj||S(N(Rtadd_cookie_header(R.RE((s/usr/lib64/python2.7/urllib2.pyRscCs|jj|||S(N(Rtextract_cookies(R.RER((s/usr/lib64/python2.7/urllib2.pyRsN(R1R2RR/RRRR(((s/usr/lib64/python2.7/urllib2.pyRs    RcBseZdZRS(cCs |j}td|dS(Nsunknown url type: %s(ReR+(R.RRK((s/usr/lib64/python2.7/urllib2.pyRs (R1R2R(((s/usr/lib64/python2.7/urllib2.pyRscCsmi}x`|D]X}|jdd\}}|ddkr[|ddkr[|dd!}n|||Parse list of key=value strings where keys are not duplicated.t=iiR&i(RO(tltparsedteltRR((s/usr/lib64/python2.7/urllib2.pyRQs  cCsg}d}t}}x|D]}|r?||7}t}qn|r|dkr]t}qn|dkrrt}n||7}qn|dkr|j|d}qn|dkrt}n||7}qW|r|j|ng|D]}|j^qS(spParse lists as described by RFC 2068 Section 2. In particular, parse comma-separated lists where the elements of the list may include quoted-strings. A quoted-string could contain a comma. A non-quoted string could have quotes in the middle. Neither commas nor quotes count if they are escaped. Only double-quotes count, not single-quotes. R=s\R&t,(RxRRR(RHtrestparttescapeRtcur((s/usr/lib64/python2.7/urllib2.pyRRs4            cCs-ytj|SWntjk r(dSXdS(N(Rt gethostbynametgaierrorR(RF((s/usr/lib64/python2.7/urllib2.pyt_safe_gethostbyname:sRcBs)eZdZdZdZdZRS(cCsq|j}|d dkr`|dd!dkr`|jr`|jdkr`d|_|jj|S|j|SdS(Nis//iRRtftp(RhRFRKRR"topen_local_file(R.RR#((s/usr/lib64/python2.7/urllib2.pyt file_openBs  , cCs|tjdkruy7ttjddtjtjdt_Wqutjk rqtjdft_quXntjS(NRi( RtnamesRR Rtgethostbyname_ext gethostnameRR(R.((s/usr/lib64/python2.7/urllib2.pyt get_namesMs$cCs[ddl}ddl}|j}|j}t|}ytj|}|j}|jj |j dt } |j |d} t jtd| pd|| f} |rt|\}} n| s| r(t||jkr(|rd||} n d|} tt|d| | SWntk rJ}t|nXtddS( Nitusegmtis6Content-type: %s Content-length: %d Last-modified: %s s text/plainsfile://trbsfile not on local host(t email.utilst mimetypesRgRhRR?tstattst_sizetutilst formatdatetst_mtimeRt guess_typet mimetoolstMessageRRRRRR"tOSErrorR+(R.RtemailRRFR8t localfiletstatstsizetmodifiedtmtypeRNRLtorigurlR5((s/usr/lib64/python2.7/urllib2.pyRXs0        N(R1R2RRRRR(((s/usr/lib64/python2.7/urllib2.pyR@s  RcBseZdZdZRS(cCsddl}ddl}|j}|s9tdnt|\}}|dkrc|j}n t|}t|\}}|rt |\}}nd}t |}|pd}|pd}yt j |}Wn"t j k r}t|nXt|j\} } | jd} tt | } | d | d} } | rg| d rg| d} ny/|j||||| |j} | rdpd}xM| D]E}t|\}}|jd kr|dkr|j}qqW| j| |\}}d}|j|jd}|r;|d|7}n|dk rd|dkrd|d|7}nt|}tj|}t|||jSWn0|jk r}td|tj dnXdS(Nisftp error: no host givenR=RiiR9tDRKtatARRtsContent-type: %s sContent-length: %d s ftp error: %si(RRRR9RtR(!tftplibRRgR+RRtFTP_PORTRR R RRRRR RhROtmapt connect_ftpR%RRDRutretrfileRR?RRRRt all_errorstsystexc_info(R.RRRRFRLRR R5RtattrstdirstfiletfwRKR[RUR7tretrlenRNRtsf((s/usr/lib64/python2.7/urllib2.pytftp_openus\          !   c Cs%t||||||dt}|S(Nt persistent(R Rx(R.RR RFRLRR%R((s/usr/lib64/python2.7/urllib2.pyRs (R1R2RR(((s/usr/lib64/python2.7/urllib2.pyRts 5tCacheFTPHandlercBs>eZdZdZdZdZdZdZRS(cCs1i|_i|_d|_d|_d|_dS(Nii<i(tcacheR%tsoonesttdelayt max_conns(R.((s/usr/lib64/python2.7/urllib2.pyR/s     cCs ||_dS(N(R(R.tt((s/usr/lib64/python2.7/urllib2.pyt setTimeoutscCs ||_dS(N(R(R.R((s/usr/lib64/python2.7/urllib2.pyt setMaxConnsscCs|||dj||f}||jkrJtj|j|j|Ls                 ^"     r 'i H-@ ?   n  + 4<