帮同学作一个作业,写了两个程序,完成的功能类似LINUX中的head和tail命令,可以获取一个文件中间的几行。这两个程序实现的是同一个功能,但是一个用的是一个类似LIBC中的fgetc的函数到文件里一个一个字符地取;而第二个程序则是用了行缓存,通过INIT_LINE_BUFFER和INC_LINE_BUFFER来控制,缓存大小和扩展大小。程序旨在说明问题,如果代码上有什么丑陋的,请包涵。
未做buffer的程序代码:
做了buffer的程序代码:
最后用了一个shell脚本来测试两个的运行时间,比较其优劣(其中的BigFile.txt是一个很大的文件):
用个跑下来,前者要比后者慢一倍。可见buffer的好处。如果调高buffer的size,效果将更明显。
未做buffer的程序代码:
1
#include <stdio.h>
2
#include <stdlib.h>
3
#include <sys/types.h>
4
#include <sys/stat.h>
5
#include <string.h>
6
#include <unistd.h>
7
#include <fcntl.h>
8
9
#define CHAR_BUFFER 1
10
#define BUFFER_SIZE 1024
11
#define LINE_BUFFER 128
12
#define MAX_FILE_LINE_NUM 1000000
13
14
int rw_ptr; // read-write pointer
15
int lineNum; // number of lines in the file
16
17
int position_rw_pntr(int fd, int num_lines);
18
char* get_next_line(int fd);
19
int get_next_char(int fd);
20
21
int main(int argc, char *argv[])
22
{
23
int midLines;
24
int i = 0;
25
char* lineString;
26
int succ;
27
int fd;
28
if (argc != 3) {
29
printf("Usage: lab2 <lines> <file>\n");
30
return -1;
31
}
32
33
midLines = atoi(argv[1]);
34
if ((fd = open(argv[2], O_RDONLY)) == -1)
35
{
36
perror("Open file error");
37
return EXIT_FAILURE;
38
}
39
40
if (position_rw_pntr(fd, midLines) == -1)
41
{
42
perror("Position_rw_pntr Error");
43
return EXIT_FAILURE;
44
}
45
46
while (i != midLines && i != lineNum)
47
{
48
lineString = get_next_line(fd);
49
printf("%s\n", lineString);
50
i++;
51
}
52
53
free(lineString);
54
close(fd);
55
56
return EXIT_SUCCESS;
57
}
58
59
int position_rw_pntr(int fd, int num_lines)
60
{
61
int start;
62
int i, n;
63
char buf[BUFFER_SIZE];
64
int* lineCount = (int *)malloc(MAX_FILE_LINE_NUM * sizeof(int));
65
int byteNum = 0;
66
67
if (lseek(fd, 0, SEEK_SET) == -1)
68
{
69
return -1;
70
}
71
72
lineNum = 0;
73
lineCount[lineNum] = 0;
74
while ((n = read(fd, buf, BUFFER_SIZE)) != 0 )
75
{
76
for (i = 0; i < n; i++)
77
{
78
byteNum++;
79
if (buf[i] == '\n')
80
{
81
lineCount[++lineNum] = byteNum;
82
}
83
}
84
}
85
86
if (lineNum < num_lines)
87
{
88
rw_ptr = 0;
89
}
90
else
91
{
92
if (lineNum % 2)
93
{
94
rw_ptr = lineCount[(lineNum - num_lines) / 2 + (lineNum - num_lines + 1) % 2 - 1];
95
}
96
else
97
{
98
rw_ptr = lineCount[(lineNum - num_lines) / 2 + (lineNum - num_lines) % 2 - 1];
99
}
100
}
101
102
return 1;
103
}
104
105
char* get_next_line(int fd)
106
{
107
int n, i;
108
char byteChar;
109
char* buf;
110
111
if (lseek(fd, rw_ptr, SEEK_SET) == -1)
112
{
113
return NULL;
114
}
115
116
buf = (char *)malloc(LINE_BUFFER * sizeof(char));
117
118
for (i = 0; i < LINE_BUFFER; i++)
119
{
120
byteChar = (char)get_next_char(fd);
121
if (byteChar == EOF || byteChar == '\n')
122
{
123
buf[i] = '\0';
124
break;
125
}
126
else
127
{
128
buf[i] = byteChar;
129
}
130
}
131
132
return buf;
133
}
134
135
int get_next_char(int fd)
136
{
137
char charBuf[1];
138
if (lseek(fd, rw_ptr, SEEK_SET) == -1)
139
{
140
return EOF;
141
}
142
143
rw_ptr++;
144
if (read(fd, charBuf, 1) == 0)
145
{
146
return EOF;
147
}
148
return charBuf[0];
149
}
150
#include <stdio.h>2
#include <stdlib.h>3
#include <sys/types.h>4
#include <sys/stat.h>5
#include <string.h>6
#include <unistd.h>7
#include <fcntl.h>8

9
#define CHAR_BUFFER 110
#define BUFFER_SIZE 102411
#define LINE_BUFFER 12812
#define MAX_FILE_LINE_NUM 100000013

14
int rw_ptr; // read-write pointer15
int lineNum; // number of lines in the file16

17
int position_rw_pntr(int fd, int num_lines);18
char* get_next_line(int fd);19
int get_next_char(int fd);20

21
int main(int argc, char *argv[]) 22
{23
int midLines;24
int i = 0;25
char* lineString;26
int succ;27
int fd;28
if (argc != 3) {29
printf("Usage: lab2 <lines> <file>\n");30
return -1;31
}32
33
midLines = atoi(argv[1]);34
if ((fd = open(argv[2], O_RDONLY)) == -1) 35
{36
perror("Open file error");37
return EXIT_FAILURE;38
}39

40
if (position_rw_pntr(fd, midLines) == -1)41
{42
perror("Position_rw_pntr Error");43
return EXIT_FAILURE;44
}45
46
while (i != midLines && i != lineNum) 47
{48
lineString = get_next_line(fd);49
printf("%s\n", lineString);50
i++;51
}52

53
free(lineString);54
close(fd);55
56
return EXIT_SUCCESS;57
}58

59
int position_rw_pntr(int fd, int num_lines) 60
{61
int start;62
int i, n;63
char buf[BUFFER_SIZE];64
int* lineCount = (int *)malloc(MAX_FILE_LINE_NUM * sizeof(int));65
int byteNum = 0;66

67
if (lseek(fd, 0, SEEK_SET) == -1) 68
{69
return -1;70
}71
72
lineNum = 0;73
lineCount[lineNum] = 0;74
while ((n = read(fd, buf, BUFFER_SIZE)) != 0 ) 75
{76
for (i = 0; i < n; i++) 77
{78
byteNum++;79
if (buf[i] == '\n') 80
{81
lineCount[++lineNum] = byteNum;82
}83
}84
}85

86
if (lineNum < num_lines)87
{88
rw_ptr = 0;89
}90
else91
{ 92
if (lineNum % 2)93
{94
rw_ptr = lineCount[(lineNum - num_lines) / 2 + (lineNum - num_lines + 1) % 2 - 1];95
}96
else97
{98
rw_ptr = lineCount[(lineNum - num_lines) / 2 + (lineNum - num_lines) % 2 - 1];99
}100
}101
102
return 1;103
}104

105
char* get_next_line(int fd) 106
{107
int n, i;108
char byteChar;109
char* buf;110

111
if (lseek(fd, rw_ptr, SEEK_SET) == -1) 112
{113
return NULL;114
}115
116
buf = (char *)malloc(LINE_BUFFER * sizeof(char));117

118
for (i = 0; i < LINE_BUFFER; i++) 119
{120
byteChar = (char)get_next_char(fd);121
if (byteChar == EOF || byteChar == '\n')122
{123
buf[i] = '\0';124
break;125
}126
else127
{128
buf[i] = byteChar;129
}130
}131

132
return buf;133
}134

135
int get_next_char(int fd)136
{137
char charBuf[1];138
if (lseek(fd, rw_ptr, SEEK_SET) == -1) 139
{140
return EOF;141
}142
143
rw_ptr++;144
if (read(fd, charBuf, 1) == 0) 145
{146
return EOF;147
}148
return charBuf[0];149
}150

做了buffer的程序代码:
1
#include <stdio.h>
2
#include <stdlib.h>
3
#include <sys/types.h>
4
#include <sys/stat.h>
5
#include <string.h>
6
#include <unistd.h>
7
#include <fcntl.h>
8
9
#define BUFFER_SIZE 1024
10
#define INIT_BUFF_SIZE 64
11
#define INC_BUFF_SIZE 8
12
#define LINE_BUFFER 128
13
#define MAX_FILE_LINE_NUM 1000000
14
15
int rw_ptr; // read-write pointer
16
int lineNum; // number of lines in the file
17
char lineBuf[INIT_BUFF_SIZE];
18
int linePtr;
19
int curBufSize;
20
21
int position_rw_pntr(int fd, int num_lines);
22
char* get_next_line(int fd);
23
int get_next_char(int fd);
24
25
int main(int argc, char *argv[])
26
{
27
int midLines;
28
int i = 0;
29
char* lineString;
30
int succ;
31
int fd;
32
if (argc != 3) {
33
printf("Usage: lab2 <lines> <file>\n");
34
return -1;
35
}
36
37
midLines = atoi(argv[1]);
38
if ((fd = open(argv[2], O_RDONLY)) == -1)
39
{
40
perror("Open file error");
41
return EXIT_FAILURE;
42
}
43
44
if (position_rw_pntr(fd, midLines) == -1)
45
{
46
perror("Position_rw_pntr Error");
47
return EXIT_FAILURE;
48
}
49
50
while (i != midLines && i != lineNum)
51
{
52
lineString = get_next_line(fd);
53
printf("%s\n", lineString);
54
i++;
55
}
56
57
free(lineString);
58
close(fd);
59
60
return EXIT_SUCCESS;
61
}
62
63
int position_rw_pntr(int fd, int num_lines)
64
{
65
int start;
66
int i, n;
67
char buf[BUFFER_SIZE];
68
int* lineCount = (int *)malloc(MAX_FILE_LINE_NUM * sizeof(int));
69
int byteNum = 0;
70
71
if (lseek(fd, 0, SEEK_SET) == -1)
72
{
73
return -1;
74
}
75
76
lineNum = 0;
77
lineCount[lineNum] = 0;
78
while ((n = read(fd, buf, BUFFER_SIZE)) != 0 )
79
{
80
for (i = 0; i < n; i++)
81
{
82
byteNum++;
83
if (buf[i] == '\n')
84
{
85
lineCount[++lineNum] = byteNum;
86
}
87
}
88
}
89
90
if (lineNum < num_lines)
91
{
92
rw_ptr = 0;
93
}
94
else
95
{
96
if (lineNum % 2)
97
{
98
rw_ptr = lineCount[(lineNum - num_lines) / 2 + (lineNum - num_lines + 1) % 2 - 1];
99
}
100
else
101
{
102
rw_ptr = lineCount[(lineNum - num_lines) / 2 + (lineNum - num_lines) % 2 - 1];
103
}
104
}
105
106
return 1;
107
}
108
109
char* get_next_line(int fd)
110
{
111
int n, i;
112
char byteChar;
113
char* buf;
114
115
if (lseek(fd, rw_ptr, SEEK_SET) == -1)
116
{
117
return NULL;
118
}
119
120
linePtr = 0;
121
buf = (char *)malloc(LINE_BUFFER * sizeof(char));
122
123
for (i = 0; i < LINE_BUFFER; i++)
124
{
125
byteChar = (char)get_next_char(fd);
126
if (byteChar == EOF || byteChar == '\n')
127
{
128
buf[i] = '\0';
129
break;
130
}
131
else
132
{
133
buf[i] = byteChar;
134
}
135
}
136
137
return buf;
138
}
139
140
int get_next_char(int fd)
141
{
142
int n;
143
if (lseek(fd, rw_ptr, SEEK_SET) == -1)
144
{
145
return EOF;
146
}
147
148
if (linePtr == 0)
149
{
150
if ((curBufSize = read(fd, lineBuf, INIT_BUFF_SIZE)) == 0)
151
{
152
return EOF;
153
}
154
linePtr = 0;
155
}
156
157
if (linePtr < curBufSize)
158
{
159
rw_ptr++;
160
return lineBuf[linePtr++];
161
}
162
else
163
{
164
if ((curBufSize = read(fd, lineBuf, INC_BUFF_SIZE)) == 0)
165
{
166
return EOF;
167
}
168
else
169
{
170
rw_ptr++;
171
linePtr = 0;
172
return lineBuf[linePtr++];
173
}
174
}
175
}
176
177
#include <stdio.h>2
#include <stdlib.h>3
#include <sys/types.h>4
#include <sys/stat.h>5
#include <string.h>6
#include <unistd.h>7
#include <fcntl.h>8

9
#define BUFFER_SIZE 102410
#define INIT_BUFF_SIZE 6411
#define INC_BUFF_SIZE 812
#define LINE_BUFFER 12813
#define MAX_FILE_LINE_NUM 100000014

15
int rw_ptr; // read-write pointer16
int lineNum; // number of lines in the file17
char lineBuf[INIT_BUFF_SIZE];18
int linePtr;19
int curBufSize;20

21
int position_rw_pntr(int fd, int num_lines);22
char* get_next_line(int fd);23
int get_next_char(int fd);24

25
int main(int argc, char *argv[]) 26
{27
int midLines;28
int i = 0;29
char* lineString;30
int succ;31
int fd;32
if (argc != 3) {33
printf("Usage: lab2 <lines> <file>\n");34
return -1;35
}36

37
midLines = atoi(argv[1]);38
if ((fd = open(argv[2], O_RDONLY)) == -1) 39
{40
perror("Open file error");41
return EXIT_FAILURE;42
}43

44
if (position_rw_pntr(fd, midLines) == -1)45
{46
perror("Position_rw_pntr Error");47
return EXIT_FAILURE;48
}49

50
while (i != midLines && i != lineNum) 51
{52
lineString = get_next_line(fd);53
printf("%s\n", lineString);54
i++;55
}56

57
free(lineString);58
close(fd);59

60
return EXIT_SUCCESS;61
}62

63
int position_rw_pntr(int fd, int num_lines) 64
{65
int start;66
int i, n;67
char buf[BUFFER_SIZE];68
int* lineCount = (int *)malloc(MAX_FILE_LINE_NUM * sizeof(int));69
int byteNum = 0;70

71
if (lseek(fd, 0, SEEK_SET) == -1) 72
{73
return -1;74
}75

76
lineNum = 0;77
lineCount[lineNum] = 0;78
while ((n = read(fd, buf, BUFFER_SIZE)) != 0 ) 79
{80
for (i = 0; i < n; i++) 81
{82
byteNum++;83
if (buf[i] == '\n') 84
{85
lineCount[++lineNum] = byteNum;86
}87
}88
}89

90
if (lineNum < num_lines)91
{92
rw_ptr = 0;93
}94
else95
{ 96
if (lineNum % 2)97
{98
rw_ptr = lineCount[(lineNum - num_lines) / 2 + (lineNum - num_lines + 1) % 2 - 1];99
}100
else101
{102
rw_ptr = lineCount[(lineNum - num_lines) / 2 + (lineNum - num_lines) % 2 - 1];103
}104
}105

106
return 1;107
}108

109
char* get_next_line(int fd) 110
{111
int n, i;112
char byteChar;113
char* buf;114

115
if (lseek(fd, rw_ptr, SEEK_SET) == -1) 116
{117
return NULL;118
}119

120
linePtr = 0;121
buf = (char *)malloc(LINE_BUFFER * sizeof(char));122
123
for (i = 0; i < LINE_BUFFER; i++) 124
{125
byteChar = (char)get_next_char(fd);126
if (byteChar == EOF || byteChar == '\n')127
{128
buf[i] = '\0';129
break;130
}131
else132
{133
buf[i] = byteChar;134
}135
}136

137
return buf;138
}139

140
int get_next_char(int fd)141
{142
int n;143
if (lseek(fd, rw_ptr, SEEK_SET) == -1) 144
{145
return EOF;146
}147

148
if (linePtr == 0)149
{150
if ((curBufSize = read(fd, lineBuf, INIT_BUFF_SIZE)) == 0)151
{152
return EOF;153
}154
linePtr = 0;155
}156
157
if (linePtr < curBufSize)158
{159
rw_ptr++;160
return lineBuf[linePtr++];161
}162
else163
{164
if ((curBufSize = read(fd, lineBuf, INC_BUFF_SIZE)) == 0)165
{166
return EOF;167
}168
else169
{170
rw_ptr++;171
linePtr = 0;172
return lineBuf[linePtr++];173
}174
}175
}176

177

最后用了一个shell脚本来测试两个的运行时间,比较其优劣(其中的BigFile.txt是一个很大的文件):
#!/bin/bash
set `date`
echo start test part1 at $4
../part1/lab2.exe 300000 BigFile.txt > part1.bt
set `date`
echo finish test part1 at $4
set `date`
echo start test part2 at $4
../part2/lab2.exe 300000 BigFile.txt > part2.bt
set `date`
echo finish test part2 at $4
set `date`
echo start test part1 at $4
../part1/lab2.exe 300000 BigFile.txt > part1.bt
set `date`
echo finish test part1 at $4
set `date`
echo start test part2 at $4
../part2/lab2.exe 300000 BigFile.txt > part2.bt
set `date`
echo finish test part2 at $4
用个跑下来,前者要比后者慢一倍。可见buffer的好处。如果调高buffer的size,效果将更明显。



浙公网安备 33010602011771号