帮同学作一个作业,写了两个程序,完成的功能类似LINUX中的head和tail命令,可以获取一个文件中间的几行。这两个程序实现的是同一个功能,但是一个用的是一个类似LIBC中的fgetc的函数到文件里一个一个字符地取;而第二个程序则是用了行缓存,通过INIT_LINE_BUFFER和INC_LINE_BUFFER来控制,缓存大小和扩展大小。程序旨在说明问题,如果代码上有什么丑陋的,请包涵。
未做buffer的程序代码:
做了buffer的程序代码:
最后用了一个shell脚本来测试两个的运行时间,比较其优劣(其中的BigFile.txt是一个很大的文件):
用个跑下来,前者要比后者慢一倍。可见buffer的好处。如果调高buffer的size,效果将更明显。
未做buffer的程序代码:
1#include <stdio.h>
2#include <stdlib.h>
3#include <sys/types.h>
4#include <sys/stat.h>
5#include <string.h>
6#include <unistd.h>
7#include <fcntl.h>
8
9#define CHAR_BUFFER 1
10#define BUFFER_SIZE 1024
11#define LINE_BUFFER 128
12#define MAX_FILE_LINE_NUM 1000000
13
14int rw_ptr; // read-write pointer
15int lineNum; // number of lines in the file
16
17int position_rw_pntr(int fd, int num_lines);
18char* get_next_line(int fd);
19int get_next_char(int fd);
20
21int main(int argc, char *argv[])
22{
23 int midLines;
24 int i = 0;
25 char* lineString;
26 int succ;
27 int fd;
28 if (argc != 3) {
29 printf("Usage: lab2 <lines> <file>\n");
30 return -1;
31 }
32
33 midLines = atoi(argv[1]);
34 if ((fd = open(argv[2], O_RDONLY)) == -1)
35 {
36 perror("Open file error");
37 return EXIT_FAILURE;
38 }
39
40 if (position_rw_pntr(fd, midLines) == -1)
41 {
42 perror("Position_rw_pntr Error");
43 return EXIT_FAILURE;
44 }
45
46 while (i != midLines && i != lineNum)
47 {
48 lineString = get_next_line(fd);
49 printf("%s\n", lineString);
50 i++;
51 }
52
53 free(lineString);
54 close(fd);
55
56 return EXIT_SUCCESS;
57}
58
59int position_rw_pntr(int fd, int num_lines)
60{
61 int start;
62 int i, n;
63 char buf[BUFFER_SIZE];
64 int* lineCount = (int *)malloc(MAX_FILE_LINE_NUM * sizeof(int));
65 int byteNum = 0;
66
67 if (lseek(fd, 0, SEEK_SET) == -1)
68 {
69 return -1;
70 }
71
72 lineNum = 0;
73 lineCount[lineNum] = 0;
74 while ((n = read(fd, buf, BUFFER_SIZE)) != 0 )
75 {
76 for (i = 0; i < n; i++)
77 {
78 byteNum++;
79 if (buf[i] == '\n')
80 {
81 lineCount[++lineNum] = byteNum;
82 }
83 }
84 }
85
86 if (lineNum < num_lines)
87 {
88 rw_ptr = 0;
89 }
90 else
91 {
92 if (lineNum % 2)
93 {
94 rw_ptr = lineCount[(lineNum - num_lines) / 2 + (lineNum - num_lines + 1) % 2 - 1];
95 }
96 else
97 {
98 rw_ptr = lineCount[(lineNum - num_lines) / 2 + (lineNum - num_lines) % 2 - 1];
99 }
100 }
101
102 return 1;
103}
104
105char* get_next_line(int fd)
106{
107 int n, i;
108 char byteChar;
109 char* buf;
110
111 if (lseek(fd, rw_ptr, SEEK_SET) == -1)
112 {
113 return NULL;
114 }
115
116 buf = (char *)malloc(LINE_BUFFER * sizeof(char));
117
118 for (i = 0; i < LINE_BUFFER; i++)
119 {
120 byteChar = (char)get_next_char(fd);
121 if (byteChar == EOF || byteChar == '\n')
122 {
123 buf[i] = '\0';
124 break;
125 }
126 else
127 {
128 buf[i] = byteChar;
129 }
130 }
131
132 return buf;
133}
134
135int get_next_char(int fd)
136{
137 char charBuf[1];
138 if (lseek(fd, rw_ptr, SEEK_SET) == -1)
139 {
140 return EOF;
141 }
142
143 rw_ptr++;
144 if (read(fd, charBuf, 1) == 0)
145 {
146 return EOF;
147 }
148 return charBuf[0];
149}
150
2#include <stdlib.h>
3#include <sys/types.h>
4#include <sys/stat.h>
5#include <string.h>
6#include <unistd.h>
7#include <fcntl.h>
8
9#define CHAR_BUFFER 1
10#define BUFFER_SIZE 1024
11#define LINE_BUFFER 128
12#define MAX_FILE_LINE_NUM 1000000
13
14int rw_ptr; // read-write pointer
15int lineNum; // number of lines in the file
16
17int position_rw_pntr(int fd, int num_lines);
18char* get_next_line(int fd);
19int get_next_char(int fd);
20
21int main(int argc, char *argv[])
22{
23 int midLines;
24 int i = 0;
25 char* lineString;
26 int succ;
27 int fd;
28 if (argc != 3) {
29 printf("Usage: lab2 <lines> <file>\n");
30 return -1;
31 }
32
33 midLines = atoi(argv[1]);
34 if ((fd = open(argv[2], O_RDONLY)) == -1)
35 {
36 perror("Open file error");
37 return EXIT_FAILURE;
38 }
39
40 if (position_rw_pntr(fd, midLines) == -1)
41 {
42 perror("Position_rw_pntr Error");
43 return EXIT_FAILURE;
44 }
45
46 while (i != midLines && i != lineNum)
47 {
48 lineString = get_next_line(fd);
49 printf("%s\n", lineString);
50 i++;
51 }
52
53 free(lineString);
54 close(fd);
55
56 return EXIT_SUCCESS;
57}
58
59int position_rw_pntr(int fd, int num_lines)
60{
61 int start;
62 int i, n;
63 char buf[BUFFER_SIZE];
64 int* lineCount = (int *)malloc(MAX_FILE_LINE_NUM * sizeof(int));
65 int byteNum = 0;
66
67 if (lseek(fd, 0, SEEK_SET) == -1)
68 {
69 return -1;
70 }
71
72 lineNum = 0;
73 lineCount[lineNum] = 0;
74 while ((n = read(fd, buf, BUFFER_SIZE)) != 0 )
75 {
76 for (i = 0; i < n; i++)
77 {
78 byteNum++;
79 if (buf[i] == '\n')
80 {
81 lineCount[++lineNum] = byteNum;
82 }
83 }
84 }
85
86 if (lineNum < num_lines)
87 {
88 rw_ptr = 0;
89 }
90 else
91 {
92 if (lineNum % 2)
93 {
94 rw_ptr = lineCount[(lineNum - num_lines) / 2 + (lineNum - num_lines + 1) % 2 - 1];
95 }
96 else
97 {
98 rw_ptr = lineCount[(lineNum - num_lines) / 2 + (lineNum - num_lines) % 2 - 1];
99 }
100 }
101
102 return 1;
103}
104
105char* get_next_line(int fd)
106{
107 int n, i;
108 char byteChar;
109 char* buf;
110
111 if (lseek(fd, rw_ptr, SEEK_SET) == -1)
112 {
113 return NULL;
114 }
115
116 buf = (char *)malloc(LINE_BUFFER * sizeof(char));
117
118 for (i = 0; i < LINE_BUFFER; i++)
119 {
120 byteChar = (char)get_next_char(fd);
121 if (byteChar == EOF || byteChar == '\n')
122 {
123 buf[i] = '\0';
124 break;
125 }
126 else
127 {
128 buf[i] = byteChar;
129 }
130 }
131
132 return buf;
133}
134
135int get_next_char(int fd)
136{
137 char charBuf[1];
138 if (lseek(fd, rw_ptr, SEEK_SET) == -1)
139 {
140 return EOF;
141 }
142
143 rw_ptr++;
144 if (read(fd, charBuf, 1) == 0)
145 {
146 return EOF;
147 }
148 return charBuf[0];
149}
150
做了buffer的程序代码:
1#include <stdio.h>
2#include <stdlib.h>
3#include <sys/types.h>
4#include <sys/stat.h>
5#include <string.h>
6#include <unistd.h>
7#include <fcntl.h>
8
9#define BUFFER_SIZE 1024
10#define INIT_BUFF_SIZE 64
11#define INC_BUFF_SIZE 8
12#define LINE_BUFFER 128
13#define MAX_FILE_LINE_NUM 1000000
14
15int rw_ptr; // read-write pointer
16int lineNum; // number of lines in the file
17char lineBuf[INIT_BUFF_SIZE];
18int linePtr;
19int curBufSize;
20
21int position_rw_pntr(int fd, int num_lines);
22char* get_next_line(int fd);
23int get_next_char(int fd);
24
25int main(int argc, char *argv[])
26{
27 int midLines;
28 int i = 0;
29 char* lineString;
30 int succ;
31 int fd;
32 if (argc != 3) {
33 printf("Usage: lab2 <lines> <file>\n");
34 return -1;
35 }
36
37 midLines = atoi(argv[1]);
38 if ((fd = open(argv[2], O_RDONLY)) == -1)
39 {
40 perror("Open file error");
41 return EXIT_FAILURE;
42 }
43
44 if (position_rw_pntr(fd, midLines) == -1)
45 {
46 perror("Position_rw_pntr Error");
47 return EXIT_FAILURE;
48 }
49
50 while (i != midLines && i != lineNum)
51 {
52 lineString = get_next_line(fd);
53 printf("%s\n", lineString);
54 i++;
55 }
56
57 free(lineString);
58 close(fd);
59
60 return EXIT_SUCCESS;
61}
62
63int position_rw_pntr(int fd, int num_lines)
64{
65 int start;
66 int i, n;
67 char buf[BUFFER_SIZE];
68 int* lineCount = (int *)malloc(MAX_FILE_LINE_NUM * sizeof(int));
69 int byteNum = 0;
70
71 if (lseek(fd, 0, SEEK_SET) == -1)
72 {
73 return -1;
74 }
75
76 lineNum = 0;
77 lineCount[lineNum] = 0;
78 while ((n = read(fd, buf, BUFFER_SIZE)) != 0 )
79 {
80 for (i = 0; i < n; i++)
81 {
82 byteNum++;
83 if (buf[i] == '\n')
84 {
85 lineCount[++lineNum] = byteNum;
86 }
87 }
88 }
89
90 if (lineNum < num_lines)
91 {
92 rw_ptr = 0;
93 }
94 else
95 {
96 if (lineNum % 2)
97 {
98 rw_ptr = lineCount[(lineNum - num_lines) / 2 + (lineNum - num_lines + 1) % 2 - 1];
99 }
100 else
101 {
102 rw_ptr = lineCount[(lineNum - num_lines) / 2 + (lineNum - num_lines) % 2 - 1];
103 }
104 }
105
106 return 1;
107}
108
109char* get_next_line(int fd)
110{
111 int n, i;
112 char byteChar;
113 char* buf;
114
115 if (lseek(fd, rw_ptr, SEEK_SET) == -1)
116 {
117 return NULL;
118 }
119
120 linePtr = 0;
121 buf = (char *)malloc(LINE_BUFFER * sizeof(char));
122
123 for (i = 0; i < LINE_BUFFER; i++)
124 {
125 byteChar = (char)get_next_char(fd);
126 if (byteChar == EOF || byteChar == '\n')
127 {
128 buf[i] = '\0';
129 break;
130 }
131 else
132 {
133 buf[i] = byteChar;
134 }
135 }
136
137 return buf;
138}
139
140int get_next_char(int fd)
141{
142 int n;
143 if (lseek(fd, rw_ptr, SEEK_SET) == -1)
144 {
145 return EOF;
146 }
147
148 if (linePtr == 0)
149 {
150 if ((curBufSize = read(fd, lineBuf, INIT_BUFF_SIZE)) == 0)
151 {
152 return EOF;
153 }
154 linePtr = 0;
155 }
156
157 if (linePtr < curBufSize)
158 {
159 rw_ptr++;
160 return lineBuf[linePtr++];
161 }
162 else
163 {
164 if ((curBufSize = read(fd, lineBuf, INC_BUFF_SIZE)) == 0)
165 {
166 return EOF;
167 }
168 else
169 {
170 rw_ptr++;
171 linePtr = 0;
172 return lineBuf[linePtr++];
173 }
174 }
175}
176
177
2#include <stdlib.h>
3#include <sys/types.h>
4#include <sys/stat.h>
5#include <string.h>
6#include <unistd.h>
7#include <fcntl.h>
8
9#define BUFFER_SIZE 1024
10#define INIT_BUFF_SIZE 64
11#define INC_BUFF_SIZE 8
12#define LINE_BUFFER 128
13#define MAX_FILE_LINE_NUM 1000000
14
15int rw_ptr; // read-write pointer
16int lineNum; // number of lines in the file
17char lineBuf[INIT_BUFF_SIZE];
18int linePtr;
19int curBufSize;
20
21int position_rw_pntr(int fd, int num_lines);
22char* get_next_line(int fd);
23int get_next_char(int fd);
24
25int main(int argc, char *argv[])
26{
27 int midLines;
28 int i = 0;
29 char* lineString;
30 int succ;
31 int fd;
32 if (argc != 3) {
33 printf("Usage: lab2 <lines> <file>\n");
34 return -1;
35 }
36
37 midLines = atoi(argv[1]);
38 if ((fd = open(argv[2], O_RDONLY)) == -1)
39 {
40 perror("Open file error");
41 return EXIT_FAILURE;
42 }
43
44 if (position_rw_pntr(fd, midLines) == -1)
45 {
46 perror("Position_rw_pntr Error");
47 return EXIT_FAILURE;
48 }
49
50 while (i != midLines && i != lineNum)
51 {
52 lineString = get_next_line(fd);
53 printf("%s\n", lineString);
54 i++;
55 }
56
57 free(lineString);
58 close(fd);
59
60 return EXIT_SUCCESS;
61}
62
63int position_rw_pntr(int fd, int num_lines)
64{
65 int start;
66 int i, n;
67 char buf[BUFFER_SIZE];
68 int* lineCount = (int *)malloc(MAX_FILE_LINE_NUM * sizeof(int));
69 int byteNum = 0;
70
71 if (lseek(fd, 0, SEEK_SET) == -1)
72 {
73 return -1;
74 }
75
76 lineNum = 0;
77 lineCount[lineNum] = 0;
78 while ((n = read(fd, buf, BUFFER_SIZE)) != 0 )
79 {
80 for (i = 0; i < n; i++)
81 {
82 byteNum++;
83 if (buf[i] == '\n')
84 {
85 lineCount[++lineNum] = byteNum;
86 }
87 }
88 }
89
90 if (lineNum < num_lines)
91 {
92 rw_ptr = 0;
93 }
94 else
95 {
96 if (lineNum % 2)
97 {
98 rw_ptr = lineCount[(lineNum - num_lines) / 2 + (lineNum - num_lines + 1) % 2 - 1];
99 }
100 else
101 {
102 rw_ptr = lineCount[(lineNum - num_lines) / 2 + (lineNum - num_lines) % 2 - 1];
103 }
104 }
105
106 return 1;
107}
108
109char* get_next_line(int fd)
110{
111 int n, i;
112 char byteChar;
113 char* buf;
114
115 if (lseek(fd, rw_ptr, SEEK_SET) == -1)
116 {
117 return NULL;
118 }
119
120 linePtr = 0;
121 buf = (char *)malloc(LINE_BUFFER * sizeof(char));
122
123 for (i = 0; i < LINE_BUFFER; i++)
124 {
125 byteChar = (char)get_next_char(fd);
126 if (byteChar == EOF || byteChar == '\n')
127 {
128 buf[i] = '\0';
129 break;
130 }
131 else
132 {
133 buf[i] = byteChar;
134 }
135 }
136
137 return buf;
138}
139
140int get_next_char(int fd)
141{
142 int n;
143 if (lseek(fd, rw_ptr, SEEK_SET) == -1)
144 {
145 return EOF;
146 }
147
148 if (linePtr == 0)
149 {
150 if ((curBufSize = read(fd, lineBuf, INIT_BUFF_SIZE)) == 0)
151 {
152 return EOF;
153 }
154 linePtr = 0;
155 }
156
157 if (linePtr < curBufSize)
158 {
159 rw_ptr++;
160 return lineBuf[linePtr++];
161 }
162 else
163 {
164 if ((curBufSize = read(fd, lineBuf, INC_BUFF_SIZE)) == 0)
165 {
166 return EOF;
167 }
168 else
169 {
170 rw_ptr++;
171 linePtr = 0;
172 return lineBuf[linePtr++];
173 }
174 }
175}
176
177
最后用了一个shell脚本来测试两个的运行时间,比较其优劣(其中的BigFile.txt是一个很大的文件):
#!/bin/bash
set `date`
echo start test part1 at $4
../part1/lab2.exe 300000 BigFile.txt > part1.bt
set `date`
echo finish test part1 at $4
set `date`
echo start test part2 at $4
../part2/lab2.exe 300000 BigFile.txt > part2.bt
set `date`
echo finish test part2 at $4
set `date`
echo start test part1 at $4
../part1/lab2.exe 300000 BigFile.txt > part1.bt
set `date`
echo finish test part1 at $4
set `date`
echo start test part2 at $4
../part2/lab2.exe 300000 BigFile.txt > part2.bt
set `date`
echo finish test part2 at $4
用个跑下来,前者要比后者慢一倍。可见buffer的好处。如果调高buffer的size,效果将更明显。