데블스캠프2011/둘째날/Machine-Learning/NaiveBayesClassifier/김수경 (rev. 1.1)
Java ¶
- 중간에 기숙사 오리엔테이션 갔다왔는데 시간 내에 짠 건 좋음.
- 코드가 미친듯이 더러운 건 안 좋음.
Runner.java ¶
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package us.linfl.ml;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
*
* @author Linflus
*/
public class Runner {
/**
* @param args the command line arguments
*/
public static void main(String[] args) {
BufferedReader reader = null;
String path = "C:\\Users\\Linflus\\CAU\\ZeroPage\\2011\\Devils Camp\\trunk\\DocumentClassification\\";
try {
// TODO code application logic here
WordsTable words = new WordsTable();
words.setFile(path + "index.economy.db");
words.readFile(WordsTable.Type.ECONOMY);
words.setFile(path + "index.politics.db");
words.readFile(WordsTable.Type.POLITICS);
reader = new BufferedReader(new FileReader(path + "politics.txt"));
NaiveBayes nb;
String line;
while((line = reader.readLine()) != null){
nb = new NaiveBayes(reader.readLine(), words);
System.out.println(nb.classify());
}
System.out.println((float)NaiveBayes.p/(NaiveBayes.e+NaiveBayes.p));
} catch (IOException ex) {
Logger.getLogger(Runner.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
NaiveBayes.java ¶
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package us.linfl.ml;
/**
*
* @author Linflus
*/
public class NaiveBayes {
public static int e = 0;
public static int p = 0;
WordsTable table;
String[] words;
public NaiveBayes(String document, WordsTable table) {
words = document.split("\\s+");
this.table = table;
}
public double calcDocProb() {
float pd = (float)table.ecoN / (table.ecoN + table.poliN);
if (pd == 1 || pd == 0) {
return 0;
}
return Math.log(pd / (1 - pd));
}
public WordsTable.Type classify() {
double pw = 0;
String key;
for (int i = 0; i < words.length; i++) {
key = words[i];
if (table.poliWords.containsKey(key) && table.ecoWords.containsKey(key)) {
if (table.poliWords.get(key) != 0) {
pw += Math.log((float)table.ecoWords.get(key) / (table.poliWords.get(key)));
}
}
}
if (calcDocProb() + pw > 0) {
e++;
return WordsTable.Type.ECONOMY;
} else {
p++;
return WordsTable.Type.POLITICS;
}
}
}
WordsTable.java ¶
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package us.linfl.ml;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
*
* @author Linflus
*/
public class WordsTable {
String filename;
Type filetype;
Map<String, Integer> ecoWords = new HashMap();
Map<String, Integer> poliWords = new HashMap();
int ecoN = 0, poliN = 0;
public void setFile(String filename) {
this.filename = filename;
}
public int[] getCount(String word) {
int[] count = {};
count[0] = ecoWords.get(word);
count[1] = poliWords.get(word);
return count;
}
public void readFile(Type filetype) {
try {
String line;
BufferedReader reader = new BufferedReader(new FileReader(filename));
while((line = reader.readLine()) != null){
parse(line, filetype);
}
} catch (IOException ex) {
Logger.getLogger(WordsTable.class.getName()).log(Level.SEVERE, null, ex);
}
}
private void parse(String document, Type filetype) {
String[] words = document.split("\\s+");
Map<String, Integer> typedWords = null;
if(filetype == Type.ECONOMY){
typedWords = ecoWords;
ecoN++;
}else if(filetype == Type.POLITICS){
typedWords = poliWords;
poliN++;
}
String key = null;
for(int i=0; i<words.length; i++) {
key = words[i];
if(typedWords.containsKey(key)){
typedWords.put(key, typedWords.get(key) + 1);
}else{
typedWords.put(key, 1);
}
}
}
public enum Type {
ECONOMY, POLITICS
}
}