어느정도 진행되면 올리는 소스
버전 정보 ¶
1월 21일 ¶
분석기(Parser)가 사전을 보고 동사를 찾는다.
VP를 찾는다.
VP를 찾는다.
1월 20일 ¶
단어를 어근에 따라 저장하는 사전이 있다. 그 사전에서 으뜸꼴(기본형)을 검색해서 동사를 찾아낸다.
어근이 변하는 동사는 조합형으로 변환해 으뜸꼴을 찾는다.
어근이 변하는 동사는 조합형으로 변환해 으뜸꼴을 찾는다.
소스 ¶
~cpp # EazyWord.py # coding: euc-kr class EazyWord: def __init__( self ): self.cases = [] def setRoot( self, aRoot ): self.root = aRoot def addCase( self, aCase ): self.cases.append(aCase) def getCases( self ): return tuple(self.cases) def getRoot( self ): return self.root
~cpp # EazyWordTest.py # coding: euc-kr import unittest from EazyWord import EazyWord class EazyWordTestCase(unittest.TestCase): def setUp(self): self.word = EazyWord() self.word.setRoot('먹') self.word.addCase('AOV') def testAddCase(self): self.assertEquals( ('AOV',), self.word.getCases() ) if __name__ == '__main__': unittest.main()
~cpp # EazyDic.py # coding: euc-kr class EazyDic: def __init__(self): self.words = {} def addWord( self, aWord): self.words[aWord.getRoot()] = aWord.getCases() def getRoots( self ): return self.words.keys()
~cpp # EazyDicTest.py # coding: euc-kr import unittest from EazyWord import EazyWord from EazyDic import EazyDic class EazyDicTestCase(unittest.TestCase): def setUp(self): self.parser = EazyParser() self.dic = EazyDic() self.muk = EazyWord() self.muk.setRoot('먹') self.muk.addCase('AOV') self.ga = EazyWord() self.ga.setRoot('가') self.ga.addCase('ALV') if __name__ == '__main__': unittest.main()
~cpp # EazyParser.py # coding: euc-kr import hangul import string u = lambda x: unicode(x, "euc-kr") class EazyParser: def updateDictionary(self, aDict): self.dic = aDict def findFundermentalForm(self, aWord): johabWord = hangul.disjoint(u(aWord)) roots = self.dic.getRoots() for each in roots: if hangul.disjoint(u(each)) in johabWord: return each + '다' def findVerb(self, aSentence): lastToken = aSentence.split()[-1] return self.findFundermentalForm( self.removeMark(lastToken) ) def removeMark(self, aPhrase): for mark in string.punctuation: if mark in aPhrase: aPhrase = aPhrase.replace(mark,'') return aPhrase def findVP(self, aSentence): NP = self.findNP(aSentence) temp = aSentence.split(' ') temp.remove(NP) return string.join( temp, ' ') def findNP( self, aSentence ): return aSentence.split(' ')[0]
~cpp # EazyParserTest.py # coding: euc-kr import unittest from EazyWord import EazyWord from EazyDic import EazyDic from EazyParser import EazyParser class EazyParserTestCase(unittest.TestCase): def setUp(self): self.parser = EazyParser() self.dic = EazyDic() self.muk = EazyWord() self.muk.setRoot('먹') self.muk.addCase('AOV') self.dic.addWord(self.muk) self.parser.updateDictionary( self.dic ) self.ga = EazyWord() self.ga.setRoot('가') self.ga.addCase('ALV') def testFindFundermentalForm(self): self.assertEquals( '먹다', self.parser.findFundermentalForm('먹는다') ) self.dic.addWord(self.ga) self.parser.updateDictionary( self.dic ) self.assertEquals( '가다', self.parser.findFundermentalForm('간다') ) def testFindVerb(self): self.assertEquals( '먹다', self.parser.findVerb('나는 밥을 먹는다.') ) self.assertEquals( None, self.parser.findVerb('나는 학교에 간다.') ) def testFindVP(self): self.assertEquals( '밥을 먹는다.', self.parser.findVP('나는 밥을 먹는다.') ) def testFindNP(self): self.assertEquals( '밥을', self.parser.findNP( '밥을 내가 먹는다.' ) ) if __name__ == '__main__': unittest.main()
~cpp # AllTest.py from EazyParserTest import * from EazyWordTest import * from EazyDicTest import * import unittest if __name__=='__main__': suite = unittest.TestSuite() suite.addTest(unittest.makeSuite(EazyParserTestCase, 'test')) suite.addTest(unittest.makeSuite(EazyWordTestCase, 'test')) suite.addTest(unittest.makeSuite(EazyDicTestCase, 'test')) runner = unittest.TextTestRunner() runner.run (suite)