Loading

Java_IO流实验

实验题目链接:Java第09次实验(IO流)

0. 字节流与二进制文件

我的代码

package experiment.io;

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;

public class Experiment {
	public static void main(String[] args) throws FileNotFoundException {
		FileOutputStream outFile = new FileOutputStream("student.data");
		FileInputStream inFile = new FileInputStream("student.data");
		try (DataOutputStream dataOutput = new DataOutputStream(outFile);
				DataInputStream dataInput = new DataInputStream(inFile)) {
			/**
			 * 将学生信息写入student.data文件中
			 */
			Student st = new Student(2018211, "张三", 15, 3);
			dataOutput.writeInt(st.getId());
			dataOutput.writeUTF(st.getName());
			dataOutput.writeInt(st.getAge());
			dataOutput.writeDouble(st.getGrade());
			dataOutput.flush();

			/**
			 * 将student.data文件中的数据重新读出到newSt对象中
			 */
			Student newSt = new Student();
			newSt.setId(dataInput.readInt());
			newSt.setName(dataInput.readUTF());
			newSt.setAge(dataInput.readInt());
			newSt.setGrade(dataInput.readDouble());
			System.out.println(newSt.toString());
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
}

我的总结

  • 二进制文件与文本文件的区别:
    • 二进制文件可以存储char/int/long等各种变量类型的值,实际上存储的是01字符串,这也表明存储数据字节大小的不同;而文本文件只能存储char型的字符变量,其每条数据是固定长度的,如ASCII码存储的每个字符为1字节。
    • 二进制文件,它是将内存中的数据以二进制形式原样放到文件中,读取时候也不需要经过处理就可以直接放到内存中,读写速度快,但是如果不经过专门编译器的编译,其可读性差;而文本文件会有一个编码方式,如ASCII码,它会将内存中的数据转化为对应编码,再将编码写入文件,读取时需要解码,再将对应字符读出,读写速度较慢,但是可读性好
  • try...catch...finally注意事项 :
    • 即使try或catch语句中有return语句,也要执行完finally中的语句,程序才可能结束;finally常用于关闭资源。
    • 执行一次try块只会执行一次catch块。
  • 使用try..with...resouces关闭资源 可以简化关闭资源的步骤,直接将初始化资源代码写在try后的括号中即可。

1. 字符流与文本文件:使用 PrintWriter(写),BufferedReader(读)

我的代码

package experiment.io;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;

public class ExperimentFirst {
	List<Student> students = new ArrayList<Student>();

	public static void ListreadStudents(String fileName) throws FileNotFoundException, UnsupportedEncodingException {
		ArrayList<Student> students = new ArrayList<Student>();
		FileInputStream file = new FileInputStream("students.txt");
		InputStreamReader in = new InputStreamReader(file, "UTF-8");// 解决中文乱码问题
		String s = null;
		try (BufferedReader buf = new BufferedReader(in)) {
			while ((s = buf.readLine()) != null) {
				String[] item = s.split("\\s+");
				Student st = null;

				/**
				 * 出错行处理,增强程序的健壮性
				 */
				try {
					st = new Student(Integer.parseInt(item[0]), item[1], Integer.parseInt(item[2]),
							Integer.parseInt(item[3]));
					students.add(st);
				} catch (ArrayIndexOutOfBoundsException e) { 
                    // 处理数组越界,即一行中内容过多或则内容缺少问题
					System.out.println(e);
					continue;
				} catch (NumberFormatException e) { 
                    // 处理数据类型不对应问题
					System.out.println(e);
					continue;
				}
			}
			for (Student e : students) {
				System.out.println(e.toString());
			}
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	public static void main(String[] args) {
		Scanner sc = new Scanner(System.in);
		String fileName = sc.nextLine();
		try {
			ExperimentFirst.ListreadStudents(fileName);
		} catch (FileNotFoundException | UnsupportedEncodingException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		/**
		 * 使用PrintWriter将Student对象写入文本文件
		 */
		List<Student> stuList = new ArrayList<>();
		Student d1 = new Student(1,"x",18,99.5);
		Student d2 = new Student(2,"x",19,100.0);
		Student d3 = new Student(3,"x",20,59.5);
		stuList.add(d1);
		stuList.add(d2);
		stuList.add(d3);
		PrintWriter printWriter = null;
		try {
			System.out.println("请输入要写入数据的文件名:");
			fileName = sc.nextLine();
		    printWriter = new PrintWriter(fileName);
		    for (Student e : stuList) {
		    	printWriter.write(String.valueOf(e.getId()) + " ");
			    printWriter.write(e.getName() + " ");
			    printWriter.write(String.valueOf(e.getAge()) + " ");
			    printWriter.write(String.valueOf(e.getGrade()) + "\n");
			}
		} catch (FileNotFoundException e) {
		    e.printStackTrace();
		} finally {
			printWriter.close();// 关闭资源,保存
		}
		sc.close();
	}
}

我的总结

  • 中文乱码问题可以通过InputStreamReader方法解决,要在初始化时候传入编码方式,以告诉改变FileReader的默认编码方式,解决中文乱码问题。
  • 在解决错误行问题时,对应的错误情况应用相应的catch块抓取即可解决。比如每行只有3个数据的错误,即说明它会出现数组越界的问题,增加ArrayIndexOutOfBoundsException的catch块即可。

2. 缓冲流(结合使用JUint进行测试)

我的代码

代码1:使用PrintWriter往文件中写数据

package experiment.io;

import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintWriter;
import java.util.Random;

public class ExperimentSecond {
	public static void main(String[] args) {
		Random ra = new Random(100);
		int num = 1000_0000;
		try(PrintWriter pw = new PrintWriter(new FileOutputStream("student.txt"))){
			for (int i = 0; i < num; i++) {
				pw.println(ra.nextInt(11));
			}
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		}
	}
}

代码2:使用JUint进行测试 BufferedReaderScanner 的读取效率

package experiment.io;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Scanner;

import org.junit.jupiter.api.Test;

class ReadTest {

	@Test
	void testBufferedReader() throws IOException {
		int count = 0;
		int sum = 0;
		
		FileInputStream file = new FileInputStream("student.txt");
		InputStreamReader in = new InputStreamReader(file);
		try(BufferedReader br = new BufferedReader(in)) {
			String s = null;
			while((s = br.readLine()) != null) {
				count++;
				sum += Integer.parseInt(s);
			}
		}
		System.out.printf("testBufferedReader: count = %d, sum = %d, avg = %.5f\n", count, sum, sum * 1.0 / count);
	}
	
	@Test
	void testScanner() throws FileNotFoundException {
		int count = 0;
		int sum = 0;
		
		FileInputStream file = new FileInputStream("student.txt");
		try(Scanner sc = new Scanner(file)) {
			while(sc.hasNextLine()) {
				count += 1;
				sum += Integer.parseInt(sc.nextLine());
			}
		}
		System.out.printf("testScanner: count = %d, sum = %d, avg = %.5f\n", count, sum, sum * 1.0 / count);
	}
}

我的总结

  • 在JUint中进行测试代码时,对测试的方法要加上@Test,否则会发生错误。
  • 格式化输出的format方法基本上和printf一致。
  • 测试代码效率时,应设置基本相同的代码,以确保测试变量的单一性。
  • Scanner的方法中,hashNextXXX应该与nextXXXX对应使用,否则会提示错误信息。

3. 字节流之对象流

我的代码

package experiment.io;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.List;

public class ExperimentThird {
	/**
	 * 通过ObjectOutputStream和FileOutputStream将Student对象写出到文件中。
	 * @param stuList 代表要写入文件中的若干个Student数据。
	 * @throws IOException 
	 */
	public static void writeStudent(List<Object> stuList) throws IOException {
		FileOutputStream file = new FileOutputStream("student.txt");
		Student[] students = new Student[stuList.size()];
		for (int i = 0; i < students.length; i++) {
			students[i] = (Student)stuList.get(i);
		}
		try (ObjectOutputStream out = new ObjectOutputStream(file)) {
			out.writeObject(students);
		}
	}
	
	/**
	 * 通过ObjectInOutputStream和FileInputStream将Student对象读入到变量中。
	 * @param fileName 表示要访问的文件名
	 * @return 一个带有若干个Student对象的List
	 * @throws FileNotFoundException
	 * @throws ClassNotFoundException 
	 */
	public static List<Object> readStudents(String fileName) throws FileNotFoundException, ClassNotFoundException {
		
		List<Object> newStuList = new ArrayList<>();
		FileInputStream file = new FileInputStream(fileName);
		Student[] students = null;
		try (ObjectInputStream in = new ObjectInputStream(file)) {
			students = (Student[])in.readObject();
		} catch (IOException e) {
			e.printStackTrace();
		}
		for (Student st : students) {
			newStuList.add(st);
		}
		return newStuList;

	}

	public static void main(String[] args) throws IOException, ClassNotFoundException {
		List<Object> stuList = new ArrayList<>();
		Student st1 = new Student(1, "a", 18, 10);
		Student st2 = new Student(2, "b", 19, 11);
		Student st3 = new Student(3, "c", 20, 12);
		stuList.add(st1);
		stuList.add(st2);
		stuList.add(st3);
		ExperimentThird.writeStudent(stuList);
		List<Object> newStuList = ExperimentThird.readStudents("student.txt");
		for (Object e : newStuList) {
			System.out.println(e);
		}
	}
}

我的总结

  • 使用ObjectInputStream和ObjectOutputStream读写文件时,读写的对象对应的那个类应该进行序列化,即实现Serializable接口。上面代码中的Student类就实现了该接口。
  • 序列化的作用就是为了保存各种对象的状态在内存中,并且可以把保存的对象状态再读出来。且序列化时只对对象进行保存,不管对象的方法。
  • 在读写序列化后的对象时,可以通过writeObject和readObject的方法读取一个数组,如例子中的Student[] students 数组。

5. 文件操作

我的代码

package experiment.io;

import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Scanner;

/**
 * @version 1.00 05 Sep 1997
 * @author Gary Cornell
 */
public class ExperimentFifth {
	public static void findFile(Path dir,String fileName) {				
		try {
			File pathName = new File(dir.toString()); //将路径转化为String形式,传入pathName中
			String[] fileNames = pathName.list(); // 获得该路径下所有目录

			for (int i = 0; i < fileNames.length; i++) { // 遍历这些目录
				File f = new File(pathName.getPath(), fileNames[i]);
				if(fileNames[i].contains(fileName)) {
					System.out.println(f.getCanonicalPath());
				}
				if (f.isDirectory()) {
					findFile(f.toPath(), fileName);
				}				
			}
		} catch (IOException e) {
			e.printStackTrace();
		}		
	}
	public static void main(String[] args) {
		Scanner sc = new Scanner(System.in);
		System.out.println("输入根路径信息:");
		String path = sc.nextLine();
		Path dir = Paths.get(path);
		System.out.println("输入要查找的文件名:");
		String fileName = sc.nextLine();
		ExperimentFifth.findFile(dir, fileName);
		sc.close();
	}
}

在"G:/eclipse-workspace"根路径下找到的所有包含”Experiment“关键字文件路径如下:

输入根路径信息:
G:\\eclipse-workspace
输入要查找的文件名:
Experiment
G:\eclipse-workspace\.metadata\.plugins\org.eclipse.debug.core\.launches\Experiment (1).launch
G:\eclipse-workspace\.metadata\.plugins\org.eclipse.debug.core\.launches\Experiment.launch
G:\eclipse-workspace\.metadata\.plugins\org.eclipse.debug.core\.launches\ExperimentFifth.launch
G:\eclipse-workspace\.metadata\.plugins\org.eclipse.debug.core\.launches\ExperimentFirst.launch
G:\eclipse-workspace\.metadata\.plugins\org.eclipse.debug.core\.launches\ExperimentSecond.launch
G:\eclipse-workspace\.metadata\.plugins\org.eclipse.debug.core\.launches\ExperimentSixth.launch
G:\eclipse-workspace\.metadata\.plugins\org.eclipse.debug.core\.launches\ExperimentThird.launch
G:\eclipse-workspace\IO\bin\experiment\io\Experiment.class
G:\eclipse-workspace\IO\bin\experiment\io\ExperimentFifth.class
G:\eclipse-workspace\IO\bin\experiment\io\ExperimentFirst.class
G:\eclipse-workspace\IO\bin\experiment\io\ExperimentSecond.class
G:\eclipse-workspace\IO\bin\experiment\io\ExperimentSixth.class
G:\eclipse-workspace\IO\bin\experiment\io\ExperimentThird.class
G:\eclipse-workspace\IO\src\experiment\io\Experiment.java
G:\eclipse-workspace\IO\src\experiment\io\ExperimentFifth.java
G:\eclipse-workspace\IO\src\experiment\io\ExperimentFirst.java
G:\eclipse-workspace\IO\src\experiment\io\ExperimentSecond.java
G:\eclipse-workspace\IO\src\experiment\io\ExperimentSixth.java
G:\eclipse-workspace\IO\src\experiment\io\ExperimentThird.java
G:\eclipse-workspace\javaLearn\bin\ExperimentSix
G:\eclipse-workspace\javaLearn\src\ExperimentSix

我的总结

  • 该查找路径的方法是通过递归实现的,首先获得一个目录下的所有文件目录,存入数组中,再对数组中的每个数据遍历;如果发现包含fileName关键字,则输出其路径;如果发现是一个文件,则递归继续寻找该文件下的目录,以此类推。
  • 这里要注意的一点就是通过File中的list()方法来获得目录底下的所有文件或文本字符信息。

6. 正则表达式

我的代码

package experiment.io;

import java.io.*;
import java.net.*;
import java.util.regex.*;

/**
 * This program displays all URLs in a web page by matching a regular expression
 * that describes the <a href=...> HTML tag. Start the program as <br>
 * java HrefMatch URL
 * 
 * @version 1.01 2004-06-04
 * @author Cay Horstmann
 */
public class ExperimentSixth {
	public static void main(String[] args) {
		try {
			// get URL string from command line or use default
			String urlString;
			if (args.length > 0)
				urlString = args[0];
			else
				urlString = "http://cec.jmu.edu.cn/index.jsp";

			// open reader for URL
			InputStreamReader in = new InputStreamReader(new URL(urlString).openStream());
			// InputStreamReader in = new InputStreamReader(new
			// FileInputStream("集美大学-计算机工程学院.htm"));
			// read contents into string builder
			StringBuilder input = new StringBuilder();
			int ch;
			while ((ch = in.read()) != -1)
				input.append((char) ch);

			// search for all occurrences of pattern
			String patternString = "<a\\s+href\\s*=\\s*(\"[^\"]*\"|[^\\s>]*)\\s*>";
			String patternImgString = "[+-]?[0-9]+"; // 匹配所有数字字符串
			String patternChineseString = "[\u4e00-\u9fa5]"; // 匹配文档中的所有中文
			String patternPictureString = "img(.*?)(src=)(.*?)(jpg|gif)\"";// 匹配所有图片

			Pattern pattern = Pattern.compile(patternPictureString, Pattern.CASE_INSENSITIVE);
			Matcher matcher = pattern.matcher(input);

			while (matcher.find()) {
				int start = matcher.start();
				int end = matcher.end();
				String match = input.substring(start, end);
				System.out.println(match);
			}
		} catch (IOException e) {
			e.printStackTrace();
		} catch (PatternSyntaxException e) {
			e.printStackTrace();
		}
	}
}

匹配到的“集美大学计算机工程学院”网站下的图片信息:

IMG src="images/jimei12.jpg"
IMG src="images/1_dh_01.gif"
IMG src="images/1_dh_03.gif"
IMG src="images/1_dhs_01.gif"
IMG src="images/1_dhs_03.gif"
IMG src="images/1_body_01.gif"
IMG border="0" src="images/1_more.gif"
IMG src="images/1_huandeng_01.gif"
imgdiv" style="padding:0px;border:0px;"><a id="u_u2_url" target="_blank"><img id="u_u2_pic" border=0  src="/system/resource/images/space.gif"
IMG border="0" src="images/1_more.gif"
IMG src="images/1_ad001.gif"
IMG src="images/1_ico001.gif"
IMG border="0" src="images/1_more.gif"
IMG src="images/1_title_index3.gif"
IMG src="images/1_ico001.gif"
IMG border="0" src="images/1_more.gif"
IMG src="images/1_title_index3.gif"
IMG src="images/1_ico001.gif"
IMG border="0" src="images/1_more.gif"
IMG src="images/1_title_index3.gif"
IMG src="images/1_ico001.gif"
IMG border="0" src="images/1_more.gif"
IMG src="images/1_title_index3.gif"
IMG border="0" src="images/1_more.gif"
IMG border="0" src="images/1_more.gif"
IMG src="images/1_list_body_bg02.gif"
IMG src="images/1_yqlj_1.gif"
IMG src="images/1_yqlj_3.gif"


我的总结

img(.*?)(src=)(.*?)(jpg|gif)\"

  • 如上代码为匹配图片的正则表达式。首先img匹配以img或IMG开头的数据;(.*?)任意字符尽可能少的匹配;(src=)匹配一次src=;接下来括号信息说明同上;(jpg|gif)是匹配一次jpg或gif信息;\“ 即匹配一次双引号。
[+-]?[0-9]+

  • 上述代码匹配所以数字字符串。[+-]?表示匹配0次或一次+-中的一个;[0-9]+表示匹配1次或多次数字字符信息。
posted @ 2019-12-04 14:02  August_丶  阅读(453)  评论(0编辑  收藏  举报