U E D R , A S I H C RSS

데블스캠프2013/셋째날/머신러닝

Machine Learning

  • 강사 : 김태진
  • 데이터 및 강의자료 링크
  • 셋째날 네번째 세션
  • C++, C#, Java, Python.. 다양하네요. ㅋㅋ

정종록, 조영준, 원준연 팀

  • 코드를 올려주세요.

//맘에안든다......
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;

namespace ZPDC2013HELL
{
    public struct News
    {
        public int[] words;
        public int category;
    }
    class Program
    {
        const int SIZEBIG = 8165;
        const int SIZESMALL = 20;

        static void Main(string[] args)
        {
            News[] sampleNews = new News[11293];
            for (int i = 0; i < 11293; i++)
            {
                sampleNews[i].words = new int[SIZEBIG];
            }
            News[] testNews = new News[7528];
            for (int i = 0; i < 7528; i++)
            {
                testNews[i].words = new int[SIZEBIG];
            }


            int count = 0;

            char[] sep = new char[] { ',' };

            string[] temp1 = new string[SIZEBIG];

            StreamReader reader = new StreamReader(@"C:\ZPDC2013\train_data11293x8165");
            String line;

            line = reader.ReadLine();
            do
            {
                temp1 = line.Split(sep);
                for (int i = 0; i < SIZEBIG; i++)
                {
                    sampleNews[count].words[i] = Convert.ToInt32(temp1[i]);
                }
                count++;
                line = reader.ReadLine();
            }while(line!=null);

            reader.Close();




            reader = new StreamReader(@"C:\ZPDC2013\train_class11293x20");
            temp1 = new string[SIZESMALL];
            count = 0;

            line = reader.ReadLine();
            do
            {

                temp1 = line.Split(sep);
                for (int i = 0; i < SIZESMALL; i++)
                {
                    if (temp1[i] == "1") sampleNews[count].category = i;
                }
                count++;
                line = reader.ReadLine();
            } while (line != null);

            reader.Close();





            reader = new StreamReader(@"C:\ZPDC2013\test_data7528x8165");
            temp1 = new string[SIZEBIG];
            count = 0;

            line = reader.ReadLine();
            do
            {
                temp1 = line.Split(sep);
                for (int i = 0; i < SIZEBIG; i++)
                {
                    testNews[count].words[i] = Convert.ToInt32(temp1[i]);
                }
                count++;
                line = reader.ReadLine();
            } while (line != null);

            reader.Close();


            int idx = 0;
            int min = 0;
            int diff = 0;
            int diffTemp = 0;

            for (int i = 0; i < 7528; i++)
            {
                idx = 0;
                min = 10000;
                diff = 0;
                for (int j = 0; j < 11293; j++)
                {
                    diff = 0;
                    for (int k = 0; k < SIZEBIG; k++)
                    {
                        diffTemp = testNews[i].words[k] - sampleNews[j].words[k];
                        if (diffTemp < 0) diffTemp = diffTemp * (-1);
                        diff += diffTemp;
                    }
                    //Console.WriteLine("{0} : {1}", diff, min);
                    if (diff < min)
                    {
                        idx = j;
                        min = diff;
                    }
                }
                testNews[i].category = sampleNews[idx].category;
                Console.WriteLine("{0} : {1}", i, testNews[i].category);
            }

            for (int i = 0; i < 7528; i++)
            {
                Console.WriteLine(testNews[i].category);
            }



            Console.WriteLine("END");
            Console.ReadKey();
        }
    }
}

김민재, 송정규, 김도현 팀

  • 코드를 올려주세요.


서민관, 박희정 팀

  • 코드를 올려주세요.
MachineLearning.py
def compare(firstData, secondData):
    diffSum = 0;
    firstDataList = [int(i) for i in firstData.split(',')];
    secondDataList = [int(i) for i in secondData.split(',')];
    for i in range(len(firstDataList)):
        diffSum += abs(firstDataList[i] - secondDataList[i]);
    return diffSum;

trainData = open('DataSet/train_data11293x8165').readlines();
trainClass = open('DataSet/train_class11293x20').readlines();
testData = open('DataSet/test_data7528x8165').readlines();
testClass = list();
print 'load DataSet finished'

similiarIndex = 0;
diffValue = 0;
leastDiffValue = 10000;

for i in range(len(testData)):
    for j in range(len(trainData)):
        diffValue = compare(testData[i], trainData[j]);
        # print 'diffValue : ', diffValue;
        if diffValue < leastDiffValue:
            leastDiffValue = diffValue;
            similiarIndex = j;
    leastDiffValue = 10000;
    print 'similiar index : ', similiarIndex;
    testClass.append(trainClass[similiarIndex]);

f = open("test_class")
for i in testClass:
    f.write(i);

MachineLearning.cpp
#include <iostream>
#include <fstream>
#include <vector>
#include <sstream>
#include <istream>

using namespace std;

std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) {
    std::stringstream ss(s);
    std::string item;
    while (std::getline(ss, item, delim)) {
        elems.push_back(item);
    }
    return elems;
}

std::vector<std::string> split(const std::string &s, char delim) {
    std::vector<std::string> elems;
    split(s, delim, elems);
    return elems;
}

int compare(string firstData, string secondData) {
	vector<string> firstDataList = split(firstData, ',');
	vector<string> secondDataList = split(secondData, ',');
	int diffValue = 0;

	for ( int i = 0; i < firstDataList.size(); i++ ) {
		diffValue += abs(atoi(firstDataList[i].c_str()) - atoi(secondDataList[i].c_str()));
	}

	return diffValue;
}

int main(int argc, char *argv[]) {
	ifstream trainData;
	trainData.open("train_data11293x8165");
	vector<string> trainDataList = vector<string>();
	ifstream trainClass;
	trainClass.open("train_class11293x20");
	vector<string> trainClassList = vector<string>();
	ifstream testData;
	testData.open("test_data7528x8165");
	vector<string> testDataList = vector<string>();
	vector<string> testClass = vector<string>();
	int leastDiffValue = 10000;
	int similiarIndex = 0;

	cout << "File Open Finished" << endl;

	string line;

	int idx = 0;
	while ( trainData.good() ) {
		getline(trainData, line);
		trainDataList.insert(trainDataList.end(), line);
		cout << "index : " << idx++ << endl;
		cout << "data[0] : " << line[0] << endl;
	}
	while ( trainClass.good() ) {
		getline(trainClass, line);
		trainClassList.insert(trainClassList.end(), line);
	}
	while ( testData.good() ) {
		getline(testData, line);
		testDataList.insert(testDataList.end(), line);
	}

	cout << "File load Finished" << endl;

	for ( int i = 0; i < testDataList.size(); i++ ) {
		for ( int j = 0; j < trainDataList.size(); j++ ) {
			int diffValue = compare(testDataList[i], trainDataList[j]);

			if ( diffValue < leastDiffValue ) {
				leastDiffValue = diffValue;
				similiarIndex = j;
			}
		}

		leastDiffValue = 10000;
		cout << "similiar index : " << similiarIndex << endl;
		testClass.insert(testClass.end(), trainClassList[similiarIndex]);
	}

	ofstream outputFile;
	outputFile.open("Test_Class");
	for ( int i = 0; i < testClass.size(); i++ ) {
		outputFile << testClass[i] << endl;
	}
	outputFile.close();
	
	trainData.close();
	trainClass.close();
	testData.close();

	return 0;
}

김해천, 김남규 팀

  • 코드를 올려주세요.

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

int main(){

	const int Label_Num=20;
	const int Word_Num=8165;
	const int News_Num=11293;

	int Labels[Label_Num]={0,};
	int Labels_Number[Label_Num]={0,};
	int Words[Word_Num]={0,};
	float Words_Sum[Label_Num][Word_Num]={0,};
	//int News[News_Num];

	FILE * LABEL, *WORD;

	LABEL = fopen("train_class11293x20","r+");
	WORD = fopen("train_data11293x8165","r+");

	for(int k=0;k<News_Num;k++){
		for(int i=0;i<Word_Num-1;i++){
			fscanf(LABEL, "%d",&Labels[i]);
		}
		fscanf(LABEL, "%d\n",&Labels[19]);

		int Label_turn_num;
	
		for(int i=0;i<20;i++){
			if(Labels[i]==1){
				Label_turn_num=i;
				Labels_Number[i]++;
				break;
			}
		}

		for(int i=0;i<Word_Num-1;i++){
			fscanf(WORD, "%d,",&Words[i]);
			Words_Sum[Label_turn_num][i]+=Words[i];
		}
		fscanf(WORD, "%d\n",&Words[19]);
		Words_Sum[Label_turn_num][19]+=Words[19];

	}

	for(int k=0; k<Label_Num; k++){
		for(int i=0; i<Word_Num; i++){
			Words_Sum[k][i]/=Labels_Number[k];
		}
	}
	fclose(LABEL);
	fclose(WORD);

	int Test_Word_Num[Word_Num]={0,};
	float Test_Sum_Subb[Label_Num]={0,};
	
	FILE * TEST, *PRINT;

	TEST = fopen("test_data7528x8165","r");
	PRINT = fopen("test_class7528x8165","w");

	//7528
	for(int j=0;j<7528;j++){

		for(int i=0;i<Word_Num;i++){
			fscanf(TEST, "%d,",&Test_Word_Num[i]);
		}//그 줄에 있는 건 다 읽음.

		for(int i=0;i<Label_Num;i++){
			Test_Sum_Subb[i] =0;
		}
	
		for(int i=0;i<Label_Num;i++){
			for(int k=0;k<Word_Num;k++){
				Test_Sum_Subb[i] = Test_Sum_Subb[i] + abs(Words_Sum[i][k]-Test_Word_Num[k]);
			}
		}//분류..
	
		int min=0, minnum=1000000;
	
		for(int i=0;i<Label_Num;i++){
			if(min>Test_Sum_Subb[i]){
				min=i;
				minnum=Test_Sum_Subb[i];
			}
		}
		//min<= 결과값 분류
	
		for(int i=0;i<Label_Num;i++){
			if(min==i)
				fprintf(PRINT,"%d,",1);
			else
				fprintf(PRINT,"%d,",0);
		}
		fprintf(PRINT,"\n");
	}

	fclose(TEST);
	fclose(PRINT);


	return 0;
}

안혁준, 남근우 팀

  • 코드를 올려주세요.

#include <iostream>
#include <fstream>
#include <cstdio>
#include <cstdlib>

using namespace std;

struct DArray {
	int ** data;
	int col;
	int row;
};

int ** allocArr(int row, int col);
void deallocArr(int ** target, int row);
void readFile(int ** target, const char * filename, int row, int col);
void findClass(DArray train_data, DArray train_class, DArray test_data);

int main(){
	char buf[1024*1024];

	//alloc
	int ** train_data = allocArr(11293, 8165);
	int ** train_class = allocArr(11293, 20);
	int ** test_data = allocArr(7528, 8165);

	readFile(train_data, "DataSet/train_data11293x8165", 11293, 8165);
	readFile(train_class, "DataSet/train_class11293x20", 11293, 20);
	readFile(test_data, "DataSet/test_data7528x8165", 7528, 8165);

	DArray train_d;
	DArray train_c;
	DArray test_d;

	train_d.data = train_data;
	train_d.row = 11293;
	train_d.col = 8163;
	train_c.data = train_class;
	train_c.row = 11293;
	train_c.col = 20;
	test_d.data = test_data;
	test_d.row = 7528;
	test_d.col = 8163;
	
	findClass(train_d, train_c, test_d);

	//dealloc
	deallocArr(train_data, 11293);
	deallocArr(train_class, 11293);
	deallocArr(test_data, 7528);
}
int ** allocArr(int row, int col){
	int ** train_data = (int**)malloc(sizeof(int*) * row);
	for(int i = 0; i < row; i++){
		train_data[i] = (int*)malloc(sizeof(int) * col);
	}
	return train_data;
}
void deallocArr(int ** target, int row){
	for(int i = 0; i < row; i++){
		free(target[i]);
	}
	free(target);
}
void readFile(int ** target, const char * filename, int row, int col){
	FILE * file = fopen(filename, "r");
	for(int i = 0; i < row; i++){
		for( int j = 0; j < col; j++){
			if(j < col)
				fscanf(file, "%d,", &target[i][j]);
			else
				fscanf(file, "%d", &target[i][j]);
		}
	}
	fclose(file);
}
void findClass(DArray train_data, DArray train_class, DArray test_data){
	for(int i = 0; i < test_data.row; i++){
		//test_data[i];
		int min_index = -1;
		int min = 1000;
		for(int j = 0; j < train_data.row; j++){
			//train_data[j]
			int sum = 0; 
			for(int k =0; k < train_data.col; k++){
				int v = test_data.data[i][k] - train_data.data[j][k];
				v = v >0 ? v : -v;
				sum += v;
			}

			if(sum < min){
				min_index = j;
				min = sum;
			}
		}
		//min이 어떤 클래스 인지.
		for(int j = 0; j < train_class.col; j++){
			if(train_class.data[min_index][j]){
				printf("%d\n", j);
			}
		}
	}
}

고한종, 임지훈 팀

  • 코드를 올려주세요.
JAVA로 짬.
import java.io.File;
import java.io.FileInputStream;
import java.io.PrintWriter;
import java.util.Scanner;
import java.util.StringTokenizer;

public class Main {
	public static void main(String[] args) throws Exception {
		short[] saveMatrix = new short[11293];

		PrintWriter testClass = new PrintWriter("DataSet/test_class7528x20");

		FileInputStream testData = new FileInputStream(new File(
				"DataSet/test_data7528x8165"));
		Scanner testReader = new Scanner(testData);
		for (; testReader.hasNextLine();) {
			String oneSubject = testReader.nextLine();

			FileInputStream trainData = new FileInputStream(new File(
					"DataSet/train_data11293x8165"));
			Scanner trainReader = new Scanner(trainData);
			for (int i = 0; trainReader.hasNextLine(); i++) {
				String oneCompare = trainReader.nextLine();

				short counter = 0;
				StringTokenizer oct = new StringTokenizer(oneCompare, ",");
				StringTokenizer ost = new StringTokenizer(oneSubject, ",");
				while (oct.hasMoreTokens()) {
					String ct = oct.nextToken();
					String st = ost.nextToken();

					if (ct.equals(st)) {
						counter++;
					}
				}
				saveMatrix[i] = counter;
			}
			trainReader.close();
			trainData.close();

			short big = -1;
			int who = -1;
			for (int j = 0; j < saveMatrix.length; j++) {
				if (big <= saveMatrix[j]) {
					big = saveMatrix[j];
					who = j;
				}
			}
			FileInputStream trainClass = new FileInputStream(new File(
					"DataSet/train_class11293x20"));
			Scanner trainClassReader = new Scanner(trainClass);
			for (int l = 0; trainClassReader.hasNextLine() && l < who; l++) {
				trainClassReader.nextLine();
			}
			String result = trainClassReader.nextLine();
			trainClassReader.close();
			trainClass.close();
			
			testClass.println(result);
			
		}
		testReader.close();
		testData.close();
		
		testClass.close();
	}
}

장혁수, 이예나 팀

  • 코드를 올려주세요.

#include <iostream>
#include <ppl.h>

using namespace std;

#define TRAIN_SIZE	11293
#define TEST_SIZE	7528
#define DATA_SIZE	8165

void mallocArray(int ****arr, int size) {
	(*arr) = (int***) malloc(sizeof(int**) * 2);
	for(int i=0; i<2; i++)
		(*arr)[i] = (int**) malloc (sizeof(int*) * size);
	for(int i=0; i<size; i++)
		(*arr)[0][i] = (int*) malloc (sizeof(int) * DATA_SIZE);
	for(int i=0; i<size; i++)
		(*arr)[1][i] = (int*) malloc (sizeof(int));
}

void readFile(int ***arr, int ***arr2) {
	FILE *pFile;
	char buff[20000];
	char *token;

	pFile = fopen("train_data11293x8165.csv","rt");
	for(int i=0, j=0; fgets(buff, 20000, pFile) > 0; i++, j=0) {
		token = strtok(buff, ",");
		if(token != NULL) {
			arr[0][i][j++] = atoi(token);
			while(token != NULL) {
				token = strtok(NULL, ",");
				if(token)
					arr[0][i][j++] = atoi(token);
			}
		}
	}
	fclose(pFile);

	pFile = fopen("train_class11293x20.csv","rt");
	for(int i=0, j=1; fgets(buff, 20000, pFile) > 0; i++, j=0) {
		token = strtok(buff, ",");
		if(token != NULL) {
			if(atoi(token) == 1)
				arr[1][i][0] = j;
			else
				j++;
			while(token != NULL) {
				token = strtok(NULL, ",");
				if(token)
					if(atoi(token) == 1)
						arr[1][i][0] = j;
					else
						j++;
			}
		}
	}
	fclose(pFile);

	pFile = fopen("test_data7528x8165.csv","rt");
	for(int i=0, j=0; fgets(buff, 20000, pFile) > 0; i++, j=0) {
		token = strtok(buff, ",");
		if(token != NULL) {
			arr2[0][i][j++] = atoi(token);
			while(token != NULL) {
				token = strtok(NULL, ",");
				if(token)
					arr2[0][i][j++] = atoi(token);
			}
		}
	}
	fclose(pFile);
}

int compare(int *test, int *train) {
	int result = 0;

	for(int i=0; i<DATA_SIZE; i++) {
		result += abs(test[i] - train[i]);
	}

	return result;
}

int main() {
	int ***train = NULL;
	int ***test = NULL;

	mallocArray(&train, TRAIN_SIZE);
	mallocArray(&test, TEST_SIZE);

	readFile(train, test);

	
	int min_index = 0;
	int result = 0;
	
	Concurrency::parallel_for(0, TEST_SIZE, [&](int i) {
	//for(int i=0; i<TEST_SIZE; i++) {
		for(int j=0, min=99999; j<TRAIN_SIZE; j++) {
			result = compare(test[0][i], train[0][j]);
			if(result < min) {
				min = result;
				min_index = j;
			}
		}
		test[1][i][0] = min_index;
	}
	);

	FILE *outFile;
	outFile = fopen("result.txt", "w");
	for(int i=0; i<TEST_SIZE; i++) {
		fprintf(outFile, "%d", test[1][i][0]);
	}

	return 0;
}

박성현, 송바위샘 팀

  • 코드를 올려주세요.

#include <stdio.h>
#include <math.h>

int getClosestIndex(int sum, double compare[]);

int main(void) {

	FILE* train_data = fopen("train_data11293x8165", "rb");
	FILE* train_class = fopen("train_class11293x20", "rb");

	double avr[20] = {0,};
	const int SIZE = 11293;
	int sum[SIZE] = {0,};
	int count[20] = {0,};
	long long total[20] = {0,};

	int d = 0;
	char temp;

	for(int i=0; i<SIZE; ++i) {
		
		for(int k=0; k<8165; ++k) {
			fscanf(train_data, "%d,", &d);
			sum[i] += d;
		}

		for(int k=0; k<20; ++k) {
			fscanf(train_class, "%d,", &d);
			// printf("%d, ", d);
			if(d == 1) {
				total[k] += sum[i];
				count[k]++;
			}
		}
		printf("%d...\n", i);
	}

	for(int i=0; i<20; i++) {
		if( count[i] != 0 ) {
			avr[i] = total[i] / (double)count[i];
		}
		printf("avr[%d] : %lf\n", i, avr[i]);
	}
	
	fclose(train_data);
	fclose(train_class);

	FILE* test_data = fopen("test_data7528x8165", "rb");
	FILE* result = fopen("result.txt", "w");
	int new_sum[7528] = {0,};

	for(int i=0; i<7528; ++i) {
			
		for(int k=0; k<8165; ++k) {
			fscanf(train_data, "%d,", &d);
			new_sum[i] += d;			
		}
		printf("%d...%d\n", i, new_sum[i]);

		int idx = getClosestIndex(new_sum[i], avr);
		fprintf(result, "%d\n", idx+1);
	}
	fclose(test_data);
	fclose(result);

	return 0;
}

int getClosestIndex(int sum, double compare[]) {

	int idx = 0;
	double min = abs((double)sum - compare[0]);

	for(int i=1; i<20; i++) {
		if( min > abs((double)sum - compare[i]) ) {
			min = abs((double)sum - compare[i]);
			idx = i;
		}
	}

	return idx;
}

백주협, 지영민, 엄기용 팀

  • 코드를 올려주세요.

#include "stdafx.h"
#include <iostream>
#include <fstream>
#include <string>
#include <math.h>

using namespace std;

double freq[20][9000];

int _tmain(int argc, _TCHAR* argv[])
{
	ifstream ifs;
	ifstream iifs;
	ofstream ofs;
	string category[20];
	string dic[9000];
	int cnt=0,ccnt,loading=0,old_cnt=0,oldindex=0;
	int rank[20][8165];
	int cat,i,j,k,itemp;
	ifs.open("comp_names8165x1.txt");

	for(cnt=0;cnt<8165;cnt++)
		ifs >> dic[cnt];

	ifs.close();

	ifs.open("label_names20x1.txt");
	for(cnt=0;cnt<20;cnt++)
		ifs >> category[cnt];

	ifs.close();

	ifs.open("train_class11293x20.txt");
	iifs.open("train_data11293x8165.txt");

	char tmp[20000]={0,},seps[]   = ",";
	char *token,*context;	

	for(i=0;i<11293;i++)
	{
		ifs >> tmp;
		
		cnt=0;
		while(1)
		{
			if(tmp[cnt] == '1') break;
			cnt++;
		}

		cnt/=2;
		iifs >> tmp;

		ccnt=0;
		token = strtok_s( tmp, seps ,&context);
		while( token != NULL )
		{
			freq[cnt][ccnt]+=atoi(token);
			ccnt++;
			token = strtok_s( NULL, seps ,&context);
		}
		if(cnt != old_cnt)
		{
			for(k=0;k<8156;k++)
				freq[old_cnt][k]/=(i-oldindex);
			oldindex=i;
			old_cnt=cnt;
		}
	}

	for(k=0;k<8156;k++)
		freq[cnt][k]/=(i-oldindex);

	iifs.close();
	ifs.close();	
	
	for(cat=0;cat<20;cat++)
	{
		for(j=0;j<8165;j++)
			rank[cat][j]=j;
		
		for(i=0;i<8164;i++)
		{
			for(j=i+1;j<8165;j++)
			{
				if(freq[cat][rank[cat][i]] < freq[cat][rank[cat][j]])
				{
					itemp=rank[cat][i];
					rank[cat][i]=rank[cat][j];
					rank[cat][j]=itemp;
				}
			}
		}
	}	
	
	ifs.open("test_data7528x8165.txt");
	ofs.open("result.txt");

	double a[8165];
	double sel[21];
	int mincat;
	sel[20]=99999;

	for(int ii=0;ii<7528;ii++)
	{
		ifs >> tmp;
		ccnt=0;
		token = strtok_s( tmp, seps ,&context);

		while( token != NULL )
		{
			a[ccnt]=atoi(token);
			ccnt++;
			token = strtok_s( NULL, seps ,&context);
		}
		for(i=0;i<20;i++)
		{
			sel[i]=0;
			for(j=0;j<500;j++)
			{
				sel[i]+=abs(a[rank[i][j]]-freq[i][rank[i][j]]);
			}
		}
		mincat=20;
		for(i=0;i<20;i++)
		{
			if(sel[i] <sel[mincat])
				mincat=i;
		}
		ofs << category[mincat] << endl;		
	}
	
	ifs.close();
	ofs.close();

	return 0;
}

김윤환 팀

  • 코드를 올려주세요.




C++ Class

//DoubleArray.h
#include <cstring>

class DoubleArray{
	public :
		DoubleArray(int row, int col);
		~DoubleArray();
		int ** data();
		const int rowSize();
		const int colSize();
	private :
		int ** data_;
		int row_size;
		int col_size;
};

DoubleArray::DoubleArray(int row, int col) : row_size(row), col_size(col){
	data_ = new int* [row];
	for(int i = 0; i < row; i++){
		data_[i] = new int[col];
		for(int j = 0; j < col; j++){
			data_[i][j] = 0;
		}
	}
}
DoubleArray::~DoubleArray(){
	for(int i = 0; i < row_size; i++){
		delete [] data_[i];
	}
	delete [] data_;
}
int ** DoubleArray::data(){
	return data_;
}
const int DoubleArray::rowSize(){
	return row_size;
}
const int DoubleArray::colSize(){
	return col_size;
}

//main.cpp
#include <iostream>
#include <fstream>
#include <cstdio>
#include <cstdlib>

#include "DoubleArray.h"

#define INT_MAX 0x7fffffff;

using namespace std;

void readFile(DoubleArray & target, const char * filename);
void findClass(DoubleArray & train_data, DoubleArray & train_class, DoubleArray & test_data);

int main(){
	char buf[1024*1024];

	DoubleArray train_data(11293, 8165), train_class(11293, 20), test_data(7528, 8165);

	cout<<"read Train Data...."<<endl;
	readFile(train_data, "DataSet/train_data11293x8165");
	cout<<"read Train class...."<<endl;
	readFile(train_class, "DataSet/train_class11293x20");
	cout<<"read Test Data...."<<endl;
	readFile(test_data, "DataSet/test_data7528x8165");
	
	cout<<"find Class..."<<endl;
	findClass(train_data, train_class, test_data);
	cout<<"end of find class"<<endl;
}

void readFile(DoubleArray & target, const char * filename){
	FILE * file = fopen(filename, "r");
	for(int i = 0; i < target.rowSize(); i++){
		for( int j = 0; j < target.colSize(); j++){
			if(j < target.colSize() - 1)
				fscanf(file, "%d,", &(target.data()[i][j]));
			else
				fscanf(file, "%d", &(target.data()[i][j]));
		}
	}
	fclose(file);
}
int getClass(int index, DoubleArray & train_class){
	int * classData = train_class.data()[index];
	for(int i = 0; i < train_class.colSize(); i++){
		if(classData[i] == 1)
			return i;
	}
	return -1;
}
void findClass(DoubleArray & train_data, DoubleArray & train_class, DoubleArray & test_data){

	cout<<"training..."<<endl;
	int count[20] = {0};
	DoubleArray trained_data(20, 8165);
	for(int i = 0; i < train_data.rowSize(); i++){
		int index = getClass(i, train_class);
		if(index == -1){
			cerr<<"error occur!"<<endl;
			continue;
		}

		for(int j = 0; j < trained_data.colSize();j++)
			trained_data.data()[index][j] += train_data.data()[i][j];
		count[index]++;
	}
	for(int i = 0; i < trained_data.rowSize(); i++){
		for(int j = 0; j < trained_data.colSize(); j++){
			trained_data.data()[i][j] /= count[i];
		}
	}

	
	cout<<"running..."<<endl;
	for(int i = 0; i < test_data.rowSize(); i++){
		int min_index = -1;
		int min = INT_MAX;
		for(int j = 0; j < trained_data.rowSize(); j++){
			int sum = 0; 
			for(int k =0; k < trained_data.colSize(); k++){
				int v = test_data.data()[i][k] - trained_data.data()[j][k];
				v = v > 0 ? v : -v;
				sum += v;
			}

			if(sum < min){
				min_index = j;
				min = sum;
			}
		}
		cout<<min_index<<",";
	}
	cout<<endl;
}
//계속 작성중



블스캠프2013/셋째날
Valid XHTML 1.0! Valid CSS! powered by MoniWiki
last modified 2021-02-07 05:29:16
Processing time 0.0285 sec