No older revisions available
No older revisions available
# -*- coding: euc-kr -*-
import re
import time
import urllib
import urllib2
import ClientCookie
import thread
from threading import *
import re
MAX_CALL_COUNT = 20
## @brief 사이트 검색 결과 중 각 정의 결과 를 얻어 오는 클래스
# @author 남상협
# @date 2007-02-14
class TextExtractor:
def __init__(self):
self.urlOpener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(ClientCookie.CookieJar() ),\
ClientCookie.SeekableProcessor,\
ClientCookie.HTTPEquivProcessor,\
ClientCookie.HTTPRefreshProcessor,\
ClientCookie.HTTPRefererProcessor)
def getTextFromMSDN(self):
theUrl = 'http://msdn2.microsoft.com/zh-cn/library/ms130214.aspx'
#theUrl = 'http://www.google.co.kr/search?hl=ko&q=define:"'+word+'"'
req = urllib2.Request(theUrl)
req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)')
try :
f = self.urlOpener.open(req)
data = f.read()
f.close()
except Exception,e:
print e
data = ""
return data
if __name__ == "__main__":
extractor = TextExtractor()
data = extractor.getTextFromMSDN()
f = file("result.txt","w")
f.write(data)
f.close()