#include<stdio.h>
#include<Windows.h>
#include<map>
#include<vector>
#include<string>
#include <set>
#include <time.h>
#define MAX_RESULT 256
using namespace std;
/************************************************************************/
/* 获得directory 下的所有文件名的map */
/************************************************************************/
void enumFiles(const char *directory,map<int ,string> &result)
{
WIN32_FIND_DATA findFileData;
HANDLE hFind;
//map<int,string> result;
char pattern[MAX_PATH];
int i=0;
//开始查找
strcpy(pattern,directory);
strcat(pattern,"\*");
hFind =FindFirstFile(pattern,&findFileData);
if(hFind==INVALID_HANDLE_VALUE)
{
return ;
}
else
{
do
{
string tmp(findFileData.cFileName);
if (tmp.find_first_of('.')==string::npos)
{
result.insert(make_pair(i++,tmp));
}
} while (FindNextFile(hFind,&findFileData)!=0);
}
//查找结束
}
int main(int argc,char *argv[])
{
//1. 获取文件夹下的所有文件名
string baseDir("E:\\研究生课程\\下学期\\机器学习\\作业\\2_MLKD-Project2-Release\\MLKD-Project2-Release\\tc");
map<int,string> result;
string sourceDir=baseDir+"\\train\\";
enumFiles(sourceDir.c_str(),result);
srand(time(NULL));
int swapIndex;
string tmpFileName;
//1. 随机化
for (int i=0;i<result.size();i++)
{
swapIndex=rand()%(result.size());
if(i!=swapIndex)
{
pos1 =result.find(i);
tmpFileName = pos1->second;
pos2=result.find(swapIndex);
pos1->second = pos2->second;
pos2->second = tmpFileName;
}
}
FILE *rfp;
rfp=fopen("E:\\研究生课程\\下学期\\机器学习\\作业\\2_MLKD-Project2-Release\\MLKD-Project2-Release\\tc\\train.doc.label","r");
if (rfp==NULL)
{
printf("open train.doc.label error!\n");
return -1;
}
//3. 读入label
map<int,int> trainLabels;
map<int,int>::iterator pos3;
int tmpFileNo,label;
for(int i=0;i<4500;i++)
{
fscanf(rfp,"%d\t%d",&tmpFileNo,&label);
trainLabels.insert(make_pair(tmpFileNo,label));
}
fclose(rfp);
//2. 随机生成3组不相同的文件名 复制文件到文件夹
int part=3;
int size=result.size()/part;
string desDir;
char tmp='A';
map<int,string>::iterator pos1,pos2;
for(int i=0;i<part;i++)
{
FILE *fLabels;
string fLabel=baseDir+"\\train";
desDir=baseDir+"\\train";
desDir.push_back(tmp);
fLabel.push_back(tmp);
fLabel.append(".doc.label");
fLabels=fopen(fLabel.c_str(),"w+");
if (fLabels==NULL)
{
printf("write to train.doc.label error!\n");
return -1;
}
//创建文件夹
bool isCorrect = CreateDirectory(desDir.c_str(),NULL);
if (!isCorrect)
{
printf("copying error!\n");
return -1;
}
tmp=tmp+1;
//在每个文件夹中复制size个文件
for (int j=0;j<size;j++)
{
pos1 = result.find(j+size*i);
string sourceFile = sourceDir+pos1->second;
string desFile = desDir+"\\"+pos1->second;
//复制文件到trainA\B\C中
isCorrect=CopyFile(sourceFile.c_str(),desFile.c_str(),FALSE);
if(!isCorrect)
{
printf("copying error!\n");
return -1;
}
printf("copying %d \n",j+size*i+1);
//复制label到文件trainA\B\C.doc.label中
int fileNo = atoi(pos1->second.c_str());
pos3 = trainLabels.find(fileNo);
fprintf(fLabels,"%d\t%d\n",pos3->first,pos3->second);
}
fclose(fLabels);
}
return 0;
}