E D R , A S I H C RSS

웹에요청할때Agent바꾸는방법

# -*- coding: euc-kr -*-
import re
import time
import urllib
import urllib2
import ClientCookie
import thread
from threading import *
import re
MAX_CALL_COUNT = 20
## @brief  사이트 검색 결과 중 각 정의 결과 를 얻어 오는 클래스 
# @author 남상협
# @date 2007-02-14   
class TextExtractor:
    def __init__(self):
        self.urlOpener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(ClientCookie.CookieJar() ),\
                                              ClientCookie.SeekableProcessor,\
                                              ClientCookie.HTTPEquivProcessor,\
                                              ClientCookie.HTTPRefreshProcessor,\
                                              ClientCookie.HTTPRefererProcessor)         
    def getTextFromMSDN(self):
        theUrl = 'http://msdn2.microsoft.com/zh-cn/library/ms130214.aspx'
        #theUrl = 'http://www.google.co.kr/search?hl=ko&q=define:"'+word+'"'
        req = urllib2.Request(theUrl)
        req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)')
        try :
            f =  self.urlOpener.open(req)
            data = f.read()
            f.close()
        except Exception,e:
            print e
            data = ""
        return data
if __name__ == "__main__":
    extractor = TextExtractor()
    data = extractor.getTextFromMSDN()
    f = file("result.txt","w")
    f.write(data)
    f.close()
Valid XHTML 1.0! Valid CSS! powered by MoniWiki
last modified 2021-02-07 05:30:27
Processing time 0.0095 sec