Part A 实现csim.c
概念
- 冷不命中
- 冲突不命中
- 容量不命中
- 写命中 > 直写和写回
- 写不命中 > 写分配和非写分配
工具
valgrind
csim.c
#include "cachelab.h"
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <limits.h>
#include <getopt.h>
#include <string.h>
#ifndef OVERFLOW
#define OVERFLOW 2
#endif
int h, v, s, E, b, S;
int hits,
misses,
evictions;
char t[100];
typedef struct
{
int valid;
int tag;
int timestamp;
} cache_line, *cache_set, **cache; //cache模拟器的结构
cache _cache = NULL;
void printUsage()
{
printf("Usage: ./csim-ref [-hv] -s <num> -E <num> -b <num> -t <file>\n"
"Options: \n"
" -h: Optional help flag that prints usage info"
" -v: Optional verbose flag that displays trace info"
" -s <s>: Number of set index bits (S = 2^s is the number of sets)"
" -E <E>: Associativity (number of lines per set)"
" -b <b>: Number of block bits (B = 2^b is the block size)"
" -t <tracefile>: Name of the valgrind trace to replay"
"Examples: \n"
"./csim-ref -s 4 -E 1 -b 4 -t traces/yi.trace");
}
void init_cache()
{
_cache = (cache)malloc(sizeof(cache_set) * S);
if (_cache == NULL)
{
exit(OVERFLOW);
}
for (int i = 0; i < S; i++)
{
cache_set set = (cache_set)malloc(sizeof(cache_line) * E);
if (!set)
{
exit(OVERFLOW);
}
_cache[i] = set;
for (int j = 0; j < E; j++)
{
set[j].valid = 0;
set[j].tag = -1;
set[j].timestamp = -1;
}
}
}
void update(unsigned int address)
{
int setAddr = (address >> b) & (-1u >> (64 - s));
int tagAddr = address >> (b + s);
int max_stamp = INT_MIN;
int max_stamp_index = -1;
// 查看有没有命中
for (int i = 0; i < E; i++)
{
if (tagAddr == _cache[setAddr][i].tag)
{
_cache[setAddr][i].timestamp = 0;
hits++;
return;
}
}
// 查看有没有空行
for (int i = 0; i < E; i++)
{
if (_cache[setAddr][i].valid == 0)
{
_cache[setAddr][i].valid = 1;
_cache[setAddr][i].tag = tagAddr;
_cache[setAddr][i].timestamp = 0;
misses++;
return;
}
}
// LRU替换
evictions++;
misses++;
for (int i = 0; i < E; i++)
{
if (_cache[setAddr][i].timestamp > max_stamp)
{
max_stamp = _cache[setAddr][i].timestamp;
max_stamp_index = i;
}
}
_cache[setAddr][max_stamp_index].tag = tagAddr;
_cache[setAddr][max_stamp_index].timestamp = 0;
return;
}
void update_stamp()
{
for (int i = 0; i < S; i++)
{
for (int j = 0; j < E; j++)
{
if(_cache[i][j].valid == 1){
_cache[i][j].timestamp++;
}
}
}
}
void parse_trace()
{
FILE *fp = fopen(t, "r");
if (fp == NULL)
{
exit(1);
}
char operation;
unsigned int address;
int size;
while (fscanf(fp, " %c %xu,%d", &operation, &address, &size) > 0)
{
switch (operation)
{
//case 'I':
case 'L':
update(address);
break;
case 'M':
update(address);
case 'S':
update(address);
}
update_stamp();
}
fclose(fp);
for (int i = 0; i < S; i++)
{
free(_cache[i]);
}
free(_cache);
}
int main(int argc, char *argv[])
{
h = 0;
v = 0;
hits = misses = evictions = 0;
int opt; //接受getopt的返回值
while (-1 != (opt = (getopt(argc, argv, "hvs:E:b:t:"))))
{
switch (opt)
{
case 'h':
h = 1;
printUsage();
break;
case 'v':
v = 1;
printUsage();
break;
case 's':
s = atoi(optarg);
break;
case 'E':
E = atoi(optarg);
break;
case 'b':
b = atoi(optarg);
break;
case 't':
strcpy(t, optarg);
break;
default:
printUsage();
break;
}
}
if (s <= 0 || E <= 0 || b <= 0) // 如果选项参数不合格就退出
return -1;
S = 1 << s; // S=2^s
FILE *fp = fopen(t, "r");
if (fp == NULL)
{
printf("open error");
exit(-1);
}
init_cache();
parse_trace();
printSummary(hits, misses, evictions);
return 0;
}
Part B
分块技术
M32 X M32
void transpose_submit(int M, int N, int A[N][M], int B[M][N])
{
int i, j, k, v1, v2, v3, v4, v5, v6, v7, v8;
int b = 8;
for (i = 0; i < M; i += b)
{
for (j = 0; j < N; j += b)
{
for (k = i; k < i + b; k++)
{
// 循环展开
v1 = A[k][j];
v2 = A[k][j + 1];
v3 = A[k][j + 2];
v4 = A[k][j + 3];
v5 = A[k][j + 4];
v6 = A[k][j + 5];
v7 = A[k][j + 6];
v8 = A[k][j + 7];
B[j][k] = v1;
B[j + 1][k] = v2;
B[j + 2][k] = v3;
B[j + 3][k] = v4;
B[j + 4][k] = v5;
B[j + 5][k] = v6;
B[j + 6][k] = v7;
B[j + 7][k] = v8;
}
}
}
}
M64 X M64
for (int i = 0; i < N; i += 8)
{
for (int j = 0; j < M; j += 8)
{
for (int k = i; k < i + 4; ++k)
{
/* 读取1 2,暂时放在左下角1 2 */
int temp_value0 = A[k][j];
int temp_value1 = A[k][j+1];
int temp_value2 = A[k][j+2];
int temp_value3 = A[k][j+3];
int temp_value4 = A[k][j+4];
int temp_value5 = A[k][j+5];
int temp_value6 = A[k][j+6];
int temp_value7 = A[k][j+7];
B[j][k] = temp_value0;
B[j+1][k] = temp_value1;
B[j+2][k] = temp_value2;
B[j+3][k] = temp_value3;
/* 逆序放置 */
B[j][k+4] = temp_value7;
B[j+1][k+4] = temp_value6;
B[j+2][k+4] = temp_value5;
B[j+3][k+4] = temp_value4;
}
for (int l = 0; l < 4; ++l)
{
/* 按列读取 */
int temp_value0 = A[i+4][j+3-l];
int temp_value1 = A[i+5][j+3-l];
int temp_value2 = A[i+6][j+3-l];
int temp_value3 = A[i+7][j+3-l];
int temp_value4 = A[i+4][j+4+l];
int temp_value5 = A[i+5][j+4+l];
int temp_value6 = A[i+6][j+4+l];
int temp_value7 = A[i+7][j+4+l];
/* 从下向上按行转换2到3 */
B[j+4+l][i] = B[j+3-l][i+4];
B[j+4+l][i+1] = B[j+3-l][i+5];
B[j+4+l][i+2] = B[j+3-l][i+6];
B[j+4+l][i+3] = B[j+3-l][i+7];
/* 将3 4放到正确的位置 */
B[j+3-l][i+4] = temp_value0;
B[j+3-l][i+5] = temp_value1;
B[j+3-l][i+6] = temp_value2;
B[j+3-l][i+7] = temp_value3;
B[j+4+l][i+4] = temp_value4;
B[j+4+l][i+5] = temp_value5;
B[j+4+l][i+6] = temp_value6;
B[j+4+l][i+7] = temp_value7;
}
}
}
M61 X M67
int i, j, v1, v2, v3, v4, v5, v6, v7, v8;
int n = N / 8 * 8;
int m = M / 8 * 8;
for (j = 0; j < m; j += 8)
for (i = 0; i < n; ++i)
{
v1 = A[i][j];
v2 = A[i][j + 1];
v3 = A[i][j + 2];
v4 = A[i][j + 3];
v5 = A[i][j + 4];
v6 = A[i][j + 5];
v7 = A[i][j + 6];
v8 = A[i][j + 7];
B[j][i] = v1;
B[j + 1][i] = v2;
B[j + 2][i] = v3;
B[j + 3][i] = v4;
B[j + 4][i] = v5;
B[j + 5][i] = v6;
B[j + 6][i] = v7;
B[j + 7][i] = v8;
}
for (i = n; i < N; ++i)
for (j = m; j < M; ++j)
{
v1 = A[i][j];
B[j][i] = v1;
}
for (i = 0; i < N; ++i)
for (j = m; j < M; ++j)
{
v1 = A[i][j];
B[j][i] = v1;
}
for (i = n; i < N; ++i)
for (j = 0; j < M; ++j)
{
v1 = A[i][j];
B[j][i] = v1;
}