# -*- coding: euc-kr -*- import re import time import urllib import urllib2 import ClientCookie import thread from threading import * import re MAX_CALL_COUNT = 20 ## @brief 사이트 검색 결과 중 각 정의 결과 를 얻어 오는 클래스 # @author 남상협 # @date 2007-02-14 class TextExtractor: def __init__(self): self.urlOpener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(ClientCookie.CookieJar() ),\ ClientCookie.SeekableProcessor,\ ClientCookie.HTTPEquivProcessor,\ ClientCookie.HTTPRefreshProcessor,\ ClientCookie.HTTPRefererProcessor) def getTextFromMSDN(self): theUrl = 'http://msdn2.microsoft.com/zh-cn/library/ms130214.aspx' #theUrl = 'http://www.google.co.kr/search?hl=ko&q=define:"'+word+'"' req = urllib2.Request(theUrl) req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)') try : f = self.urlOpener.open(req) data = f.read() f.close() except Exception,e: print e data = "" return data if __name__ == "__main__": extractor = TextExtractor() data = extractor.getTextFromMSDN() f = file("result.txt","w") f.write(data) f.close()