~cpp import unittest from WikiPageGather import * class WikiPageGatherTestCase (unittest.TestCase): def setUp (self): self.pageGather = WikiPageGather () def tearDown (self): self.pageGather = None def testConvertWikiPageNameToMoinFileName (self): self.assertEquals (self.pageGather.WikiPageNameToMoinFileName ('''한글테스트'''), '''_c7_d1_b1_db_c5_d7_bd_ba_c6_ae''') self.assertEquals (self.pageGather.WikiPageNameToMoinFileName ("FrontPage"), "FrontPage") def testGetPageNamesFromPage (self): self.pageGather.SetPage ("FrontPage") self.assertEquals (self.pageGather.GetPageNamesFromPage (), ["LearningHowToLearn", "ActiveX", "Python", "XPInstalled", "TestFirstProgramming", "한글테스트", "PrevFrontPage"]) def testGetPageNamesFromString (self): strings = "Test First In TestFirstIn TesF TestFi guuweo StringIn" self.assertEquals (self.pageGather.GetPageNamesFromString (strings), ["TestFirstIn", "TestFi", "StringIn"]) strings = '''["Testing"] ["Testing The Program"] higu TestFirst twet''' self.assertEquals (self.pageGather.GetPageNamesFromString (strings), ["Testing", "Testing The Program", "TestFirst"]) def testIsHeadTagLine (self): strings = "== testing ==" self.assertEquals (self.pageGather.IsHeadTagLine (strings), 1) strings = "tese ewfe ewfw" self.assertEquals (self.pageGather.IsHeadTagLine (strings), 0) def testRemoveHeadLine (self): strings = '''=== ExtremeProgramming ===\ntesting.. -_-a\n== TestFirst Programmin ==\nfwe\n''' self.assertEquals (self.pageGather.RemoveHeadLine (strings), "testing.. -_-a\nfwe\n") def testGetWikiPage(self): self.assertEquals (self.pageGather.GetWikiPage ("FrontPage"), '=== Reading ===\n' + '["LearningHowToLearn"]\n\n\n=== C++ ===\n["ActiveX"]\n\n' + '[[Include(ActiveX,Test,1)]]\n\n=== Python ===\n["Python"]\n\n' + '=== ExtremeProgramming ===\n * ["XPInstalled"]\n * TestFirstProgramming\n'+ ' * ["ÇѱÛÅ׽ºƮ"]\n\n----\n["PrevFrontPage"]\n\n----\n') suite = unittest.makeSuite (WikiPageGatherTestCase, "test") runner = unittest.TextTestRunner () runner.run (suite)
~cpp import string, urllib, re class WikiPageGather: def __init__(self): self.pagePath = "f:\web\wiki-moinmoin\data\text\" self.pagename = '' def WikiPageNameToMoinFileName (self,pagename): safe = string.letters + string.digits res = list(pagename) for i in range(len(res)): c = res[i] if c not in safe: res[i] = '_%02x' % ord(c) return string.joinfields(res, '') def SetPage (self, pagename): self.pagename = pagename def GetWikiPage (self, pagename): fullpathname = self.pagePath + self.WikiPageNameToMoinFileName (pagename) pagefile = open (fullpathname, 'r') lines = pagefile.readlines () page = '' for line in lines: page += line pagefile.close() return page def RemoveHeadLine (self, lines): lines = string.split (lines, "\n") resultText = '' for line in lines: if self.IsHeadTagLine (line): continue elif line == '': continue resultText += line + "\n" return resultText def IsHeadTagLine (self, strings): try: if strings[0] == '=': return 1 elif strings[0:2] == '==': return 1 elif strings[0:3] == '===': return 1 else: return 0 except IndexError: return 0 def GetPageNamesFromPage (self): page = self.GetWikiPage (self.pagename) page = self.RemoveHeadLine (page) pagenamelist = [] pagenamelist += self.GetPageNamesFromString (page) return pagenamelist def GetPageNamesFromString (self, strings): PageNameRegularStr = '''(([A-Z][a-z0-9]+){2,})|(\[".*?"\])''' PageNameReg = re.compile (PageNameRegularStr) PageNameReg.findall (strings) pagenamelist = [] for pagename in PageNameReg.findall (strings): if pagename[0] != '': pagenamelist.append (pagename[0]) elif pagename[2] != '': realname = string.replace (pagename[2], '''["''', '') realname = string.replace (realname, '''"]''', '') pagenamelist.append (realname) return pagenamelist def main (self): self.SetPage ("FrontPage") for pagename in self.GetPageNamesFromPage (): print "pagename : " + pagename print "filename : " + self.WikiPageNameToMoinFileName (pagename) if __name__ == "__main__": pageGather = WikiPageGather () pageGather.main ()
~cpp pagename : LearningHowToLearn filename : LearningHowToLearn pagename : ActiveX filename : ActiveX pagename : Python filename : Python pagename : XPInstalled filename : XPInstalled pagename : TestFirstProgramming filename : TestFirstProgramming pagename : 한글테스트 filename : _c7_d1_b1_db_c5_d7_bd_ba_c6_ae pagename : PrevFrontPage filename : PrevFrontPage