U E D R , A S I H C RSS

데블스캠프2011/둘째날/Machine-Learning/Naive Bayes Classifier/김수경

Java

  • 중간에 기숙사 오리엔테이션 갔다왔는데 시간 내에 짠 건 좋음.
  • 코드가 미친듯이 더러운 건 안 좋음... 그냥 안 좋은 정도가 아니다.

Runner.java

/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */

package us.linfl.ml;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 *
 * @author Linflus
 */
public class Runner {

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) {
        BufferedReader reader = null;
        String path = "C:\\Users\\Linflus\\CAU\\ZeroPage\\2011\\Devils Camp\\trunk\\DocumentClassification\\";
        try {
            // TODO code application logic here
            WordsTable words = new WordsTable();
            words.setFile(path + "index.economy.db");
            words.readFile(WordsTable.Type.ECONOMY);
            words.setFile(path + "index.politics.db");
            words.readFile(WordsTable.Type.POLITICS);
            reader = new BufferedReader(new FileReader(path + "politics.txt"));
            NaiveBayes nb;
            String line;
            while((line = reader.readLine()) != null){
                nb = new NaiveBayes(reader.readLine(), words);
                System.out.println(nb.classify());
            }

            System.out.println((float)NaiveBayes.p/(NaiveBayes.e+NaiveBayes.p));
        } catch (IOException ex) {
            Logger.getLogger(Runner.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

}

NaiveBayes.java

/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package us.linfl.ml;

/**
 *
 * @author Linflus
 */
public class NaiveBayes {
    public static int e = 0;
    public static int p = 0;
    WordsTable table;
    String[] words;

    public NaiveBayes(String document, WordsTable table) {
        words = document.split("\\s+");
        this.table = table;
    }

    public double calcDocProb() {
        float pd = (float)table.ecoN / (table.ecoN + table.poliN);
        if (pd == 1 || pd == 0) {
            return 0;
        }
        return Math.log(pd / (1 - pd));
    }

    public WordsTable.Type classify() {
        double pw = 0;
        String key;
        for (int i = 0; i < words.length; i++) {
            key = words[i];
            if (table.poliWords.containsKey(key) && table.ecoWords.containsKey(key)) {
                if (table.poliWords.get(key) != 0) {
                    pw += Math.log((float)table.ecoWords.get(key) / (table.poliWords.get(key)));
                }
            }
        }
        
        if (calcDocProb() + pw > 0) {
            e++;
            return WordsTable.Type.ECONOMY;
        } else {
            p++;
            return WordsTable.Type.POLITICS;
        }
        
    }
}

WordsTable.java

/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */

package us.linfl.ml;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 *
 * @author Linflus
 */
public class WordsTable {
    String filename;
    Type filetype;
    Map<String, Integer> ecoWords = new HashMap();
    Map<String, Integer> poliWords = new HashMap();
    int ecoN = 0, poliN = 0;

    public void setFile(String filename) {
        this.filename = filename;
    }

    public int[] getCount(String word) {
        int[] count = {};
        count[0] = ecoWords.get(word);
        count[1] = poliWords.get(word);
        return count;
    }
    public void readFile(Type filetype) {
        try {
            String line;
            BufferedReader reader = new BufferedReader(new FileReader(filename));
            while((line = reader.readLine()) != null){
                parse(line, filetype);
            }            
        } catch (IOException ex) {
            Logger.getLogger(WordsTable.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    private void parse(String document, Type filetype) {
        String[] words = document.split("\\s+");
        
        Map<String, Integer> typedWords = null;

        if(filetype == Type.ECONOMY){
            typedWords = ecoWords;
            ecoN++;
        }else if(filetype == Type.POLITICS){
            typedWords = poliWords;
            poliN++;
        }
        
        String key = null;
        for(int i=0; i<words.length; i++) {
            key = words[i];
            if(typedWords.containsKey(key)){
            typedWords.put(key, typedWords.get(key) + 1);
            }else{
                typedWords.put(key, 1);
            }
        }
    }

    public enum Type {
        ECONOMY, POLITICS
    }

}
Valid XHTML 1.0! Valid CSS! powered by MoniWiki
last modified 2011-06-28 13:46:04
Processing time 0.0147 sec