~cpp
import os
import time
import re
import urllib
time.clock()
print """이 프로그램은 www.replays.co.kr의 스타크래프트 리플레이를
모아주는 프로그램 입니다."""
name = raw_input("검색하고 싶은 게이머의 이름을 입력하세요 : ")
inputDir = raw_input("""저장 하고 싶은 경로를 지정하세요.(예>c:\\\\replay\\\\) : """)
global keyGamer
keyGamer = name
global saveDirName
saveDirName = name
global keyRace
keyRace = ''
global savedNum
savedNum = 0
global defaultDir
defaultDir = inputDir
#defaultDir = 'D:\\Unzip\\star\\'
def main():
global savedNum
replayNum = 0
#url='http://www.daum.net'
url = 'http://www.replays.co.kr/technote/main.cgi?board=bestreplay_pds/'
print '%s replay.' % keyGamer
print 'going to that page...'
a = urllib.urlopen(url)
lines = a.readlines()
print 'reading page....'
f = open('temp.html', 'w')
for temp in lines:
f.write(temp)
f.close()
#replayNum가져오기
#http://165.194.17.5/wiki/index.php?url=zeropage&no=2985&title=Linux/RegularExpression&login=processing&id=&redirect=yes
#re.compile() 해당 데이터에서 원하는 정보를 해석하여 갖는것
pattern = re.compile('(^<TABLE.*<a.*number=)(.*)&view=2.*\[1\].*')
print 'pattern searching...'
lineNum = 0 #라인넘버초기화
for line in lines:
try:
lineNum = lineNum + 1
matching = pattern.match(line)
if matching:
replayNum = matching.group(2)
print replayNum
#print '라인넘버 : %d' % lineNum
except:
print '시발이다'
#목록뒤지기(with 선수이름)
itemNum = 15
flag = 1
for i in range(int(replayNum), 0, itemNum * -1):
print '★★★ %s %d번째 페이지 탐색중....' % (keyGamer, flag)
flag = flag + 1
beReadingUrl = 'http://www.replays.co.kr/technote/main.cgi?board=bestreplay_pds&number=%d&view=2&howmanytext=' % i
aaa = urllib.urlopen(beReadingUrl)
lines = aaa.readlines()
# pattern = re.compile('.*<a.*<a.*\"(http.*)\".*' + keyGamer + '.*')
pattern = re.compile('.*<a.*<a.*\".(.*)\".*' + keyGamer + '.*')
for line in lines:
try:
matching = pattern.match(line)
if matching:
choicedRepUrl = 'http://www.replays.co.kr/technote' + matching.group(1)
#print matching.group()
#print choicedRepUrl
saveRep(choicedRepUrl)
#print '성공이다'
#print '노매칭'
except:
print 'except'
print '.'
print '.'
print '.'
print '.'
print '☆☆☆☆☆☆☆☆완료!!!☆☆☆☆☆☆☆☆☆'
print '%s 하위디렉토리에 총 %d 개의 리플레이를 저장하였습니다.' % (saveDirName , savedNum)
print '경과시간 : 약 %d 분' % (int(time.clock()/60) + 1)
def saveRep(url):
global savedNum
savedNum += 1
a = urllib.urlopen(url)
lines = a.readlines()
pattern = re.compile('^<a href=\".(.*filename=(.*.rep).*)\".*')
for line in lines:
try:
matching = pattern.match(line)
if matching:
downUrl = 'http://www.replays.co.kr/technote' + matching.group(1)
fileName = matching.group(2)
#print downUrl
#print fileName
#print 'rep페이지 접근중 : %s' % downUrl
aa = urllib.urlopen(downUrl)
print ' %d 번째 rep화일저장중 : %s ' % (savedNum , fileName)
#print 'confirm existing directory...'
if os.path.exists(defaultDir + saveDirName)==0:
os.mkdir(defaultDir + saveDirName)
fp = open(defaultDir + saveDirName + '\\' + fileName, 'wb')
fp.write(aa.read())
fp.close()
except:
print 'except'
if __name__ == '__main__':
main()