java 数据流相关
直接代码吧,很清楚:
package mahout;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.MalformedURLException;
import java.net.URL;
import org.apache.hadoop.io.IOUtils;
public class GetLibData {
//将数据集进行分割,单个文件太大了。
public static void main(String[] args) throws Exception {
File src = new File("D:\\hadoop相关\\数据集\\links-simple-sorted\\links-simple-sorted.txt");
FileInputStream fis = new FileInputStream(src);
BufferedReader reader = new BufferedReader(new InputStreamReader(fis));
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File("data/wike-part100000"))));
String line = null;
int count = 0;
while((line = reader.readLine()) != null && count < 100001){
writer.write(line);
writer.write("\n");
count++;
}
writer.close();
fis.close();
reader.close();
}
//下载数据集从网上
private static void test1() throws MalformedURLException, IOException,
FileNotFoundException {
URL libUrl = new URL("http://www.occamslab.com/petricek/data/ratings.dat");
InputStream in = libUrl.openStream();
FileOutputStream fos = new FileOutputStream(new File("data/test.dat"));
IOUtils.copyBytes(in, fos, 4096);;
in.close();
fos.close();
}
}
当然数据集可以自己制造。
浙公网安备 33010602011771号