정종록, 조영준, 원준연 팀 ¶
- 코드를 올려주세요.
//맘에안든다...... using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using System.IO; namespace ZPDC2013HELL { public struct News { public int[] words; public int category; } class Program { const int SIZEBIG = 8165; const int SIZESMALL = 20; static void Main(string[] args) { News[] sampleNews = new News[11293]; for (int i = 0; i < 11293; i++) { sampleNews[i].words = new int[SIZEBIG]; } News[] testNews = new News[7528]; for (int i = 0; i < 7528; i++) { testNews[i].words = new int[SIZEBIG]; } int count = 0; char[] sep = new char[] { ',' }; string[] temp1 = new string[SIZEBIG]; StreamReader reader = new StreamReader(@"C:\ZPDC2013\train_data11293x8165"); String line; line = reader.ReadLine(); do { temp1 = line.Split(sep); for (int i = 0; i < SIZEBIG; i++) { sampleNews[count].words[i] = Convert.ToInt32(temp1[i]); } count++; line = reader.ReadLine(); }while(line!=null); reader.Close(); reader = new StreamReader(@"C:\ZPDC2013\train_class11293x20"); temp1 = new string[SIZESMALL]; count = 0; line = reader.ReadLine(); do { temp1 = line.Split(sep); for (int i = 0; i < SIZESMALL; i++) { if (temp1[i] == "1") sampleNews[count].category = i; } count++; line = reader.ReadLine(); } while (line != null); reader.Close(); reader = new StreamReader(@"C:\ZPDC2013\test_data7528x8165"); temp1 = new string[SIZEBIG]; count = 0; line = reader.ReadLine(); do { temp1 = line.Split(sep); for (int i = 0; i < SIZEBIG; i++) { testNews[count].words[i] = Convert.ToInt32(temp1[i]); } count++; line = reader.ReadLine(); } while (line != null); reader.Close(); int idx = 0; int min = 0; int diff = 0; int diffTemp = 0; for (int i = 0; i < 7528; i++) { idx = 0; min = 10000; diff = 0; for (int j = 0; j < 11293; j++) { diff = 0; for (int k = 0; k < SIZEBIG; k++) { diffTemp = testNews[i].words[k] - sampleNews[j].words[k]; if (diffTemp < 0) diffTemp = diffTemp * (-1); diff += diffTemp; } //Console.WriteLine("{0} : {1}", diff, min); if (diff < min) { idx = j; min = diff; } } testNews[i].category = sampleNews[idx].category; Console.WriteLine("{0} : {1}", i, testNews[i].category); } for (int i = 0; i < 7528; i++) { Console.WriteLine(testNews[i].category); } Console.WriteLine("END"); Console.ReadKey(); } } }
서민관, 박희정 팀 ¶
- 코드를 올려주세요.
def compare(firstData, secondData): diffSum = 0; firstDataList = [int(i) for i in firstData.split(',')]; secondDataList = [int(i) for i in secondData.split(',')]; for i in range(len(firstDataList)): diffSum += abs(firstDataList[i] - secondDataList[i]); return diffSum; trainData = open('DataSet/train_data11293x8165').readlines(); trainClass = open('DataSet/train_class11293x20').readlines(); testData = open('DataSet/test_data7528x8165').readlines(); testClass = list(); print 'load DataSet finished' similiarIndex = 0; diffValue = 0; leastDiffValue = 10000; for i in range(len(testData)): for j in range(len(trainData)): diffValue = compare(testData[i], trainData[j]); # print 'diffValue : ', diffValue; if diffValue < leastDiffValue: leastDiffValue = diffValue; similiarIndex = j; leastDiffValue = 10000; print 'similiar index : ', similiarIndex; testClass.append(trainClass[similiarIndex]); f = open("test_class") for i in testClass: f.write(i);
MachineLearning.cpp
#include <iostream> #include <fstream> #include <vector> #include <sstream> #include <istream> using namespace std; std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) { std::stringstream ss(s); std::string item; while (std::getline(ss, item, delim)) { elems.push_back(item); } return elems; } std::vector<std::string> split(const std::string &s, char delim) { std::vector<std::string> elems; split(s, delim, elems); return elems; } int compare(string firstData, string secondData) { vector<string> firstDataList = split(firstData, ','); vector<string> secondDataList = split(secondData, ','); int diffValue = 0; for ( int i = 0; i < firstDataList.size(); i++ ) { diffValue += abs(atoi(firstDataList[i].c_str()) - atoi(secondDataList[i].c_str())); } return diffValue; } int main(int argc, char *argv[]) { ifstream trainData; trainData.open("train_data11293x8165"); vector<string> trainDataList = vector<string>(); ifstream trainClass; trainClass.open("train_class11293x20"); vector<string> trainClassList = vector<string>(); ifstream testData; testData.open("test_data7528x8165"); vector<string> testDataList = vector<string>(); vector<string> testClass = vector<string>(); int leastDiffValue = 10000; int similiarIndex = 0; cout << "File Open Finished" << endl; string line; int idx = 0; while ( trainData.good() ) { getline(trainData, line); trainDataList.insert(trainDataList.end(), line); cout << "index : " << idx++ << endl; cout << "data[0] : " << line[0] << endl; } while ( trainClass.good() ) { getline(trainClass, line); trainClassList.insert(trainClassList.end(), line); } while ( testData.good() ) { getline(testData, line); testDataList.insert(testDataList.end(), line); } cout << "File load Finished" << endl; for ( int i = 0; i < testDataList.size(); i++ ) { for ( int j = 0; j < trainDataList.size(); j++ ) { int diffValue = compare(testDataList[i], trainDataList[j]); if ( diffValue < leastDiffValue ) { leastDiffValue = diffValue; similiarIndex = j; } } leastDiffValue = 10000; cout << "similiar index : " << similiarIndex << endl; testClass.insert(testClass.end(), trainClassList[similiarIndex]); } ofstream outputFile; outputFile.open("Test_Class"); for ( int i = 0; i < testClass.size(); i++ ) { outputFile << testClass[i] << endl; } outputFile.close(); trainData.close(); trainClass.close(); testData.close(); return 0; }
김해천, 김남규 팀 ¶
- 코드를 올려주세요.
#include <stdio.h> #include <stdlib.h> #include <math.h> int main(){ const int Label_Num=20; const int Word_Num=8165; const int News_Num=11293; int Labels[Label_Num]={0,}; int Labels_Number[Label_Num]={0,}; int Words[Word_Num]={0,}; float Words_Sum[Label_Num][Word_Num]={0,}; //int News[News_Num]; FILE * LABEL, *WORD; LABEL = fopen("train_class11293x20","r+"); WORD = fopen("train_data11293x8165","r+"); for(int k=0;k<News_Num;k++){ for(int i=0;i<Word_Num-1;i++){ fscanf(LABEL, "%d",&Labels[i]); } fscanf(LABEL, "%d\n",&Labels[19]); int Label_turn_num; for(int i=0;i<20;i++){ if(Labels[i]==1){ Label_turn_num=i; Labels_Number[i]++; break; } } for(int i=0;i<Word_Num-1;i++){ fscanf(WORD, "%d,",&Words[i]); Words_Sum[Label_turn_num][i]+=Words[i]; } fscanf(WORD, "%d\n",&Words[19]); Words_Sum[Label_turn_num][19]+=Words[19]; } for(int k=0; k<Label_Num; k++){ for(int i=0; i<Word_Num; i++){ Words_Sum[k][i]/=Labels_Number[k]; } } fclose(LABEL); fclose(WORD); int Test_Word_Num[Word_Num]={0,}; float Test_Sum_Subb[Label_Num]={0,}; FILE * TEST, *PRINT; TEST = fopen("test_data7528x8165","r"); PRINT = fopen("test_class7528x8165","w"); //7528 for(int j=0;j<7528;j++){ for(int i=0;i<Word_Num;i++){ fscanf(TEST, "%d,",&Test_Word_Num[i]); }//그 줄에 있는 건 다 읽음. for(int i=0;i<Label_Num;i++){ Test_Sum_Subb[i] =0; } for(int i=0;i<Label_Num;i++){ for(int k=0;k<Word_Num;k++){ Test_Sum_Subb[i] = Test_Sum_Subb[i] + abs(Words_Sum[i][k]-Test_Word_Num[k]); } }//분류.. int min=0, minnum=1000000; for(int i=0;i<Label_Num;i++){ if(min>Test_Sum_Subb[i]){ min=i; minnum=Test_Sum_Subb[i]; } } //min<= 결과값 분류 for(int i=0;i<Label_Num;i++){ if(min==i) fprintf(PRINT,"%d,",1); else fprintf(PRINT,"%d,",0); } fprintf(PRINT,"\n"); } fclose(TEST); fclose(PRINT); return 0; }
안혁준, 남근우 팀 ¶
- 코드를 올려주세요.
#include <iostream> #include <fstream> #include <cstdio> #include <cstdlib> using namespace std; struct DArray { int ** data; int col; int row; }; int ** allocArr(int row, int col); void deallocArr(int ** target, int row); void readFile(int ** target, const char * filename, int row, int col); void findClass(DArray train_data, DArray train_class, DArray test_data); int main(){ char buf[1024*1024]; //alloc int ** train_data = allocArr(11293, 8165); int ** train_class = allocArr(11293, 20); int ** test_data = allocArr(7528, 8165); readFile(train_data, "DataSet/train_data11293x8165", 11293, 8165); readFile(train_class, "DataSet/train_class11293x20", 11293, 20); readFile(test_data, "DataSet/test_data7528x8165", 7528, 8165); DArray train_d; DArray train_c; DArray test_d; train_d.data = train_data; train_d.row = 11293; train_d.col = 8163; train_c.data = train_class; train_c.row = 11293; train_c.col = 20; test_d.data = test_data; test_d.row = 7528; test_d.col = 8163; findClass(train_d, train_c, test_d); //dealloc deallocArr(train_data, 11293); deallocArr(train_class, 11293); deallocArr(test_data, 7528); } int ** allocArr(int row, int col){ int ** train_data = (int**)malloc(sizeof(int*) * row); for(int i = 0; i < row; i++){ train_data[i] = (int*)malloc(sizeof(int) * col); } return train_data; } void deallocArr(int ** target, int row){ for(int i = 0; i < row; i++){ free(target[i]); } free(target); } void readFile(int ** target, const char * filename, int row, int col){ FILE * file = fopen(filename, "r"); for(int i = 0; i < row; i++){ for( int j = 0; j < col; j++){ if(j < col) fscanf(file, "%d,", &target[i][j]); else fscanf(file, "%d", &target[i][j]); } } fclose(file); } void findClass(DArray train_data, DArray train_class, DArray test_data){ for(int i = 0; i < test_data.row; i++){ //test_data[i]; int min_index = -1; int min = 1000; for(int j = 0; j < train_data.row; j++){ //train_data[j] int sum = 0; for(int k =0; k < train_data.col; k++){ int v = test_data.data[i][k] - train_data.data[j][k]; v = v >0 ? v : -v; sum += v; } if(sum < min){ min_index = j; min = sum; } } //min이 어떤 클래스 인지. for(int j = 0; j < train_class.col; j++){ if(train_class.data[min_index][j]){ printf("%d\n", j); } } } }
고한종, 임지훈 팀 ¶
- 코드를 올려주세요.
import java.io.File; import java.io.FileInputStream; import java.io.PrintWriter; import java.util.Scanner; import java.util.StringTokenizer; public class Main { public static void main(String[] args) throws Exception { short[] saveMatrix = new short[11293]; PrintWriter testClass = new PrintWriter("DataSet/test_class7528x20"); FileInputStream testData = new FileInputStream(new File( "DataSet/test_data7528x8165")); Scanner testReader = new Scanner(testData); for (; testReader.hasNextLine();) { String oneSubject = testReader.nextLine(); FileInputStream trainData = new FileInputStream(new File( "DataSet/train_data11293x8165")); Scanner trainReader = new Scanner(trainData); for (int i = 0; trainReader.hasNextLine(); i++) { String oneCompare = trainReader.nextLine(); short counter = 0; StringTokenizer oct = new StringTokenizer(oneCompare, ","); StringTokenizer ost = new StringTokenizer(oneSubject, ","); while (oct.hasMoreTokens()) { String ct = oct.nextToken(); String st = ost.nextToken(); if (ct.equals(st)) { counter++; } } saveMatrix[i] = counter; } trainReader.close(); trainData.close(); short big = -1; int who = -1; for (int j = 0; j < saveMatrix.length; j++) { if (big <= saveMatrix[j]) { big = saveMatrix[j]; who = j; } } FileInputStream trainClass = new FileInputStream(new File( "DataSet/train_class11293x20")); Scanner trainClassReader = new Scanner(trainClass); for (int l = 0; trainClassReader.hasNextLine() && l < who; l++) { trainClassReader.nextLine(); } String result = trainClassReader.nextLine(); trainClassReader.close(); trainClass.close(); testClass.println(result); } testReader.close(); testData.close(); testClass.close(); } }
장혁수, 이예나 팀 ¶
- 코드를 올려주세요.
#include <iostream> #include <ppl.h> using namespace std; #define TRAIN_SIZE 11293 #define TEST_SIZE 7528 #define DATA_SIZE 8165 void mallocArray(int ****arr, int size) { (*arr) = (int***) malloc(sizeof(int**) * 2); for(int i=0; i<2; i++) (*arr)[i] = (int**) malloc (sizeof(int*) * size); for(int i=0; i<size; i++) (*arr)[0][i] = (int*) malloc (sizeof(int) * DATA_SIZE); for(int i=0; i<size; i++) (*arr)[1][i] = (int*) malloc (sizeof(int)); } void readFile(int ***arr, int ***arr2) { FILE *pFile; char buff[20000]; char *token; pFile = fopen("train_data11293x8165.csv","rt"); for(int i=0, j=0; fgets(buff, 20000, pFile) > 0; i++, j=0) { token = strtok(buff, ","); if(token != NULL) { arr[0][i][j++] = atoi(token); while(token != NULL) { token = strtok(NULL, ","); if(token) arr[0][i][j++] = atoi(token); } } } fclose(pFile); pFile = fopen("train_class11293x20.csv","rt"); for(int i=0, j=1; fgets(buff, 20000, pFile) > 0; i++, j=0) { token = strtok(buff, ","); if(token != NULL) { if(atoi(token) == 1) arr[1][i][0] = j; else j++; while(token != NULL) { token = strtok(NULL, ","); if(token) if(atoi(token) == 1) arr[1][i][0] = j; else j++; } } } fclose(pFile); pFile = fopen("test_data7528x8165.csv","rt"); for(int i=0, j=0; fgets(buff, 20000, pFile) > 0; i++, j=0) { token = strtok(buff, ","); if(token != NULL) { arr2[0][i][j++] = atoi(token); while(token != NULL) { token = strtok(NULL, ","); if(token) arr2[0][i][j++] = atoi(token); } } } fclose(pFile); } int compare(int *test, int *train) { int result = 0; for(int i=0; i<DATA_SIZE; i++) { result += abs(test[i] - train[i]); } return result; } int main() { int ***train = NULL; int ***test = NULL; mallocArray(&train, TRAIN_SIZE); mallocArray(&test, TEST_SIZE); readFile(train, test); int min_index = 0; int result = 0; Concurrency::parallel_for(0, TEST_SIZE, [&](int i) { //for(int i=0; i<TEST_SIZE; i++) { for(int j=0, min=99999; j<TRAIN_SIZE; j++) { result = compare(test[0][i], train[0][j]); if(result < min) { min = result; min_index = j; } } test[1][i][0] = min_index; } ); FILE *outFile; outFile = fopen("result.txt", "w"); for(int i=0; i<TEST_SIZE; i++) { fprintf(outFile, "%d", test[1][i][0]); } return 0; }
박성현, 송바위샘 팀 ¶
- 코드를 올려주세요.
#include <stdio.h> #include <math.h> int getClosestIndex(int sum, double compare[]); int main(void) { FILE* train_data = fopen("train_data11293x8165", "rb"); FILE* train_class = fopen("train_class11293x20", "rb"); double avr[20] = {0,}; const int SIZE = 11293; int sum[SIZE] = {0,}; int count[20] = {0,}; long long total[20] = {0,}; int d = 0; char temp; for(int i=0; i<SIZE; ++i) { for(int k=0; k<8165; ++k) { fscanf(train_data, "%d,", &d); sum[i] += d; } for(int k=0; k<20; ++k) { fscanf(train_class, "%d,", &d); // printf("%d, ", d); if(d == 1) { total[k] += sum[i]; count[k]++; } } printf("%d...\n", i); } for(int i=0; i<20; i++) { if( count[i] != 0 ) { avr[i] = total[i] / (double)count[i]; } printf("avr[%d] : %lf\n", i, avr[i]); } fclose(train_data); fclose(train_class); FILE* test_data = fopen("test_data7528x8165", "rb"); FILE* result = fopen("result.txt", "w"); int new_sum[7528] = {0,}; for(int i=0; i<7528; ++i) { for(int k=0; k<8165; ++k) { fscanf(train_data, "%d,", &d); new_sum[i] += d; } printf("%d...%d\n", i, new_sum[i]); int idx = getClosestIndex(new_sum[i], avr); fprintf(result, "%d\n", idx+1); } fclose(test_data); fclose(result); return 0; } int getClosestIndex(int sum, double compare[]) { int idx = 0; double min = abs((double)sum - compare[0]); for(int i=1; i<20; i++) { if( min > abs((double)sum - compare[i]) ) { min = abs((double)sum - compare[i]); idx = i; } } return idx; }
백주협, 지영민, 엄기용 팀 ¶
- 코드를 올려주세요.
#include "stdafx.h" #include <iostream> #include <fstream> #include <string> #include <math.h> using namespace std; double freq[20][9000]; int _tmain(int argc, _TCHAR* argv[]) { ifstream ifs; ifstream iifs; ofstream ofs; string category[20]; string dic[9000]; int cnt=0,ccnt,loading=0,old_cnt=0,oldindex=0; int rank[20][8165]; int cat,i,j,k,itemp; ifs.open("comp_names8165x1.txt"); for(cnt=0;cnt<8165;cnt++) ifs >> dic[cnt]; ifs.close(); ifs.open("label_names20x1.txt"); for(cnt=0;cnt<20;cnt++) ifs >> category[cnt]; ifs.close(); ifs.open("train_class11293x20.txt"); iifs.open("train_data11293x8165.txt"); char tmp[20000]={0,},seps[] = ","; char *token,*context; for(i=0;i<11293;i++) { ifs >> tmp; cnt=0; while(1) { if(tmp[cnt] == '1') break; cnt++; } cnt/=2; iifs >> tmp; ccnt=0; token = strtok_s( tmp, seps ,&context); while( token != NULL ) { freq[cnt][ccnt]+=atoi(token); ccnt++; token = strtok_s( NULL, seps ,&context); } if(cnt != old_cnt) { for(k=0;k<8156;k++) freq[old_cnt][k]/=(i-oldindex); oldindex=i; old_cnt=cnt; } } for(k=0;k<8156;k++) freq[cnt][k]/=(i-oldindex); iifs.close(); ifs.close(); for(cat=0;cat<20;cat++) { for(j=0;j<8165;j++) rank[cat][j]=j; for(i=0;i<8164;i++) { for(j=i+1;j<8165;j++) { if(freq[cat][rank[cat][i]] < freq[cat][rank[cat][j]]) { itemp=rank[cat][i]; rank[cat][i]=rank[cat][j]; rank[cat][j]=itemp; } } } } ifs.open("test_data7528x8165.txt"); ofs.open("result.txt"); double a[8165]; double sel[21]; int mincat; sel[20]=99999; for(int ii=0;ii<7528;ii++) { ifs >> tmp; ccnt=0; token = strtok_s( tmp, seps ,&context); while( token != NULL ) { a[ccnt]=atoi(token); ccnt++; token = strtok_s( NULL, seps ,&context); } for(i=0;i<20;i++) { sel[i]=0; for(j=0;j<500;j++) { sel[i]+=abs(a[rank[i][j]]-freq[i][rank[i][j]]); } } mincat=20; for(i=0;i<20;i++) { if(sel[i] <sel[mincat]) mincat=i; } ofs << category[mincat] << endl; } ifs.close(); ofs.close(); return 0; }
C++ Class ¶
//DoubleArray.h #include <cstring> class DoubleArray{ public : DoubleArray(int row, int col); ~DoubleArray(); int ** data(); const int rowSize(); const int colSize(); private : int ** data_; int row_size; int col_size; }; DoubleArray::DoubleArray(int row, int col) : row_size(row), col_size(col){ data_ = new int* [row]; for(int i = 0; i < row; i++){ data_[i] = new int[col]; for(int j = 0; j < col; j++){ data_[i][j] = 0; } } } DoubleArray::~DoubleArray(){ for(int i = 0; i < row_size; i++){ delete [] data_[i]; } delete [] data_; } int ** DoubleArray::data(){ return data_; } const int DoubleArray::rowSize(){ return row_size; } const int DoubleArray::colSize(){ return col_size; }
//main.cpp #include <iostream> #include <fstream> #include <cstdio> #include <cstdlib> #include "DoubleArray.h" #define INT_MAX 0x7fffffff; using namespace std; void readFile(DoubleArray & target, const char * filename); void findClass(DoubleArray & train_data, DoubleArray & train_class, DoubleArray & test_data); int main(){ char buf[1024*1024]; DoubleArray train_data(11293, 8165), train_class(11293, 20), test_data(7528, 8165); cout<<"read Train Data...."<<endl; readFile(train_data, "DataSet/train_data11293x8165"); cout<<"read Train class...."<<endl; readFile(train_class, "DataSet/train_class11293x20"); cout<<"read Test Data...."<<endl; readFile(test_data, "DataSet/test_data7528x8165"); cout<<"find Class..."<<endl; findClass(train_data, train_class, test_data); cout<<"end of find class"<<endl; } void readFile(DoubleArray & target, const char * filename){ FILE * file = fopen(filename, "r"); for(int i = 0; i < target.rowSize(); i++){ for( int j = 0; j < target.colSize(); j++){ if(j < target.colSize() - 1) fscanf(file, "%d,", &(target.data()[i][j])); else fscanf(file, "%d", &(target.data()[i][j])); } } fclose(file); } int getClass(int index, DoubleArray & train_class){ int * classData = train_class.data()[index]; for(int i = 0; i < train_class.colSize(); i++){ if(classData[i] == 1) return i; } return -1; } void findClass(DoubleArray & train_data, DoubleArray & train_class, DoubleArray & test_data){ cout<<"training..."<<endl; int count[20] = {0}; DoubleArray trained_data(20, 8165); for(int i = 0; i < train_data.rowSize(); i++){ int index = getClass(i, train_class); if(index == -1){ cerr<<"error occur!"<<endl; continue; } for(int j = 0; j < trained_data.colSize();j++) trained_data.data()[index][j] += train_data.data()[i][j]; count[index]++; } for(int i = 0; i < trained_data.rowSize(); i++){ for(int j = 0; j < trained_data.colSize(); j++){ trained_data.data()[i][j] /= count[i]; } } cout<<"running..."<<endl; for(int i = 0; i < test_data.rowSize(); i++){ int min_index = -1; int min = INT_MAX; for(int j = 0; j < trained_data.rowSize(); j++){ int sum = 0; for(int k =0; k < trained_data.colSize(); k++){ int v = test_data.data()[i][k] - trained_data.data()[j][k]; v = v > 0 ? v : -v; sum += v; } if(sum < min){ min_index = j; min = sum; } } cout<<min_index<<","; } cout<<endl; } //계속 작성중