CSAPP实验5: cachelab

理论上还有第个perflab....

Part A

之前寒假的时候beginend说过cachelab很难,但是感觉做下来还行?也可能是他把malloclab记错成了cachelab也说不定(

不管了。Part A就是要按照书上cache memory的组织结构写一个简单的判断器,即给定若干读和写,来判断每次对内存的操作是/否命中缓存。事实上开一个三维数组就好了(如果不实现Blocks甚至只需要二维)

不过这一次还是学到很多东西的,这里罗列一下

  1. getopt()函数,这个函数来自unistd.h或直接是getopt.h,两者选哪一个取决于C的标准。这个函数实现了从argc, argv中一个一个取出参数的功能,并且提供了[必选参数/可选参数/单独参数]三类参数的提取,很好用。如果有类似--debug这样的参数可能要用上long_getopt()之类的函数
  2. strtok()函数,这个最早见到是在PA lab里面。可以理解为split()函数
  3. sscanf()atoi(),这个可以把字符串转数字。根据数字的进制选择用不用sscanf。事实上还有类似的atof()itoa()sprintf()这样的函数。多看官方文档~
  4. calloc()类似于malloc(),区别在于calloc()会初始化分配的内存为0,常用与数组(回想一下数组的默认初始化)

个人觉得库函数还是很好用的,至少比自己写要精炼得多了。看来还是要多研究研究别人造过的轮子啊

难点大概就在于getopt()和如何优雅地取出对应的位,还有就是对M操作的处理。这些都不算太难,写就完了。注意LRU的策略指的是最后一次访问最早的先被删。

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <getopt.h>
#include <stdbool.h>

#include "cachelab.h"

#define CMD_ARGS "h::v::s:E:b:t:"
#define INF 0x7FFFFFFF

typedef long long LL;

typedef struct {
	int *block;
	int sign, last;
	bool used;
} Set;

Set **Line;

int s, E, b, S, B;
int hit_cnt, miss_cnt, evict_cnt;

char *filename;

bool debug = false;

void output(char *s) {
	if (debug) printf("%s", s);
}

void hit() {
	output("hit ");
	++ hit_cnt;
}

void miss() {
	output("miss ");
	++ miss_cnt;
}

void evict() {
	output("eviction "); 
	++ evict_cnt;
}

FILE *openFile(char *filename) {
	FILE *fin = fopen(filename, "r");
	if (fin == NULL) {
		puts("Error: file not found");
		exit(-1);
	}
	return fin;
}

void update(int cur_time, int set_no, int block_offset, int sign, int wsiz) {
	int rec = -1, rec_last = INF;
	for (int i = 0; i < E; ++ i) {
		Set *tmp_line = &Line[set_no][i];
		if (!tmp_line->used) {
			rec = i;
			break;
		}
		if (tmp_line->last < rec_last) {
			rec_last = tmp_line->last;
			rec = i;
		}
	}
	Line[set_no][rec].last = cur_time;
	Line[set_no][rec].sign = sign;

	if (Line[set_no][rec].used) {
		evict();
	} else {
		Line[set_no][rec].used = true;
	}
}

bool load(int cur_time, int set_no, int block_offset, int sign, int wsiz) {
	for (int i = 0; i < E; ++ i) {
		Set *tmp_line = &Line[set_no][i];
		if (tmp_line->used && tmp_line->sign == sign) {
			tmp_line->last = cur_time;
			hit();
			return true;
		}
	}

	miss();
	update(cur_time, set_no, block_offset, sign, wsiz);
	return false;
}

void process(FILE *fin) {
	char *cmd = (char *)malloc(51 * sizeof(char));
	char *tmp_cmd = cmd;
	int cur_time = 0;
	for (fgets(cmd, 30, fin); !feof(fin); cmd = tmp_cmd, fgets(cmd, 30, fin)) {
		if (cmd[0] == 'I') continue;
		while ( (*cmd) == ' ') cmd ++;
		cur_time ++;
		cmd[strlen(cmd) - 1] = '\0';

		char *type = strtok(cmd, " ,");
		char *addr = strtok(NULL, " ,");
		char *wsiz = strtok(NULL, " ,");

		int wsiz_n = atoi(wsiz);
		LL addr_n; sscanf(addr, "%llx", &addr_n);

		if (debug) printf("%s %llx,%s ", type, addr_n, wsiz);

		int block_offset = addr_n & (B - 1);
		int set_no = (addr_n >> b) & (S - 1);
		int sign = (addr_n >> (s + b) );

		load(cur_time, set_no, block_offset, sign, wsiz_n);
		if (type[0] == 'M') {
			load(cur_time, set_no, block_offset, sign, wsiz_n);
		}
		output("\n");
	}
	printSummary(hit_cnt, miss_cnt, evict_cnt);
}

void init() {
	Line = malloc(S * sizeof(Set *));
	for (int i = 0; i < S; ++ i) {
		Line[i] = malloc(E * sizeof(Set));
		for (int j = 0; j < E; ++ j) {
			Line[i][j].block = malloc(B * sizeof(int));
			Line[i][j].used = false;
		}
	}
}

int main(int argc, char *const *argv) {
	char *filename;
	for (int opt; ~(opt = getopt(argc, argv, CMD_ARGS)); ) {
		switch (opt) {
			case 's': {
				s = atoi(optarg);
				S = 1 << s;
				break;
			}
			case 'E': {
				E = atoi(optarg);
				break;
			}
			case 'b': {
				b = atoi(optarg);
				B = 1 << b;
				break;
			}
			case 't': {
				filename = optarg;
				break;
			}
			case 'v': {
				debug = true;
			}
		}
	}

	init();

	process(openFile(filename));
	return 0;
}

Part B

写死我了...

一个最naive的优化就是视频中说的blocking,通过恰当分块就可以实现高效利用cache
对于32x32的问题,答案很简单就是分成8x8的块,61x67的也类似,难的在于64x64

难点在于:一次访存会加载连续8个元素,而访问行数大于4时就会出现thrashing,因此用8x8的block会thrashing,用4x4的block则利用不充分
最后是看了别人的解析才会做的,具体可以看这篇https://www.cnblogs.com/liqiuhao/p/8026100.html
大概意思就是把8x8分成四个4x4,每次用4x8的方式移动,这样是坠吼的

/* 
 * transpose_submit - This is the solution transpose function that you
 *     will be graded on for Part B of the assignment. Do not change
 *     the description string "Transpose submission", as the driver
 *     searches for that string to identify the transpose function to
 *     be graded. 
 */
char transpose_submit_desc[] = "Transpose submission";
void transpose_submit(int M, int N, int A[N][M], int B[M][N])
{
    int t0, t1, t2, t3, t4, t5, t6, t7, t8;
    if (M == 64) {
        for (int si = 0; si < N; si += 8) {
            for (int sj = 0; sj < M; sj += 8) {
                for (int i = si; i < si + 4; ++ i) {
                    t0 = A[i][sj];
                    t1 = A[i][sj + 1];
                    t2 = A[i][sj + 2];
                    t3 = A[i][sj + 3];
                    t4 = A[i][sj + 4];
                    t5 = A[i][sj + 5];
                    t6 = A[i][sj + 6];
                    t7 = A[i][sj + 7];

                    B[sj][i] = t0;
                    B[sj + 1][i] = t1;
                    B[sj + 2][i] = t2;
                    B[sj + 3][i] = t3;
                    B[sj][i + 4] = t7;
                    B[sj + 1][i + 4] = t6;
                    B[sj + 2][i + 4] = t5;
                    B[sj + 3][i + 4] = t4;
                }
                for (int j = 0; j < 4; ++ j) {
                    t0 = A[si + 4][sj + j + 4];
                    t1 = A[si + 5][sj + j + 4];
                    t2 = A[si + 6][sj + j + 4];
                    t3 = A[si + 7][sj + j + 4];

                    t4 = A[si + 4][sj + 3 - j];
                    t5 = A[si + 5][sj + 3 - j];
                    t6 = A[si + 6][sj + 3 - j];
                    t7 = A[si + 7][sj + 3 - j];

                    B[sj + j + 4][si] = B[sj + 3 - j][si + 4];
                    B[sj + j + 4][si + 1] = B[sj + 3 - j][si + 5];
                    B[sj + j + 4][si + 2] = B[sj + 3 - j][si + 6];
                    B[sj + j + 4][si + 3] = B[sj + 3 - j][si + 7];

                    B[sj + 3 - j][si + 4] = t4;
                    B[sj + 3 - j][si + 5] = t5;
                    B[sj + 3 - j][si + 6] = t6;
                    B[sj + 3 - j][si + 7] = t7;
                    B[sj + j + 4][si + 4] = t0;
                    B[sj + j + 4][si + 5] = t1;
                    B[sj + j + 4][si + 6] = t2;
                    B[sj + j + 4][si + 7] = t3;
                }
            }
        }
    } else if (M == 61) {
        for (int si = 0; si < N; si += 13) {
            for (int sj = 0; sj < M; sj += 8) {
                for (int i = si; i < si + 13 && i < N; ++ i) {
                    for (int j = sj; j < sj + 8 && j < M; ++ j) {
                        t0 = A[i][j];
                        B[j][i] = t0;
                    }
                }
            }
        }
    } else if (M == 32) {
        for (int si = 0; si < N; si += 8) {
            for (int sj = 0; sj < M; sj += 8) {
                for (int i = si; i < si + 8; ++ i) {
                    t1 = A[i][sj];
                    t2 = A[i][sj + 1];
                    t3 = A[i][sj + 2];
                    t4 = A[i][sj + 3];
                    t5 = A[i][sj + 4];
                    t6 = A[i][sj + 5];
                    t7 = A[i][sj + 6];
                    t8 = A[i][sj + 7];

                    B[sj][i] = t1;
                    B[sj + 1][i] = t2;
                    B[sj + 2][i] = t3;
                    B[sj + 3][i] = t4;
                    B[sj + 4][i] = t5;
                    B[sj + 5][i] = t6;
                    B[sj + 6][i] = t7;
                    B[sj + 7][i] = t8;
                }
            }
        }
    }
}
posted @ 2021-03-07 10:59  jjppp  阅读(829)  评论(0编辑  收藏  举报