Cachelab实验

853 阅读3分钟

Part A 实现csim.c

概念

  • 冷不命中
  • 冲突不命中
  • 容量不命中
  • 写命中 > 直写和写回
  • 写不命中 > 写分配和非写分配

工具

valgrind

csim.c

#include "cachelab.h"
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <limits.h>
#include <getopt.h>
#include <string.h>

#ifndef OVERFLOW
#define OVERFLOW 2
#endif

int h, v, s, E, b, S;

int hits,
    misses,
    evictions;

char t[100];

typedef struct
{
    int valid;
    int tag;
    int timestamp;
} cache_line, *cache_set, **cache; //cache模拟器的结构

cache _cache = NULL;

void printUsage()
{
    printf("Usage: ./csim-ref [-hv] -s <num> -E <num> -b <num> -t <file>\n"
           "Options: \n"
           "   -h: Optional help flag that prints usage info"
           "   -v: Optional verbose flag that displays trace info"
           "   -s <s>: Number of set index bits (S = 2^s is the number of sets)"
           "   -E <E>: Associativity (number of lines per set)"
           "   -b <b>: Number of block bits (B = 2^b is the block size)"
           "   -t <tracefile>: Name of the valgrind trace to replay"
           "Examples: \n"
           "./csim-ref -s 4 -E 1 -b 4 -t traces/yi.trace");
}

void init_cache()
{
    _cache = (cache)malloc(sizeof(cache_set) * S);
    if (_cache == NULL)
    {
        exit(OVERFLOW);
    }
    for (int i = 0; i < S; i++)
    {
        cache_set set = (cache_set)malloc(sizeof(cache_line) * E);
        if (!set)
        {
            exit(OVERFLOW);
        }
        _cache[i] = set;
        for (int j = 0; j < E; j++)
        {
            set[j].valid = 0;
            set[j].tag = -1;
            set[j].timestamp = -1;
        }
    }
}

void update(unsigned int address)
{
    int setAddr = (address >> b) & (-1u >> (64 - s));
    int tagAddr = address >> (b + s);
    int max_stamp = INT_MIN;
    int max_stamp_index = -1;

    // 查看有没有命中
    for (int i = 0; i < E; i++)
    {
        if (tagAddr == _cache[setAddr][i].tag)
        {
            _cache[setAddr][i].timestamp = 0;
            hits++;
            return;
        }
    }

    // 查看有没有空行
    for (int i = 0; i < E; i++)
    {
        if (_cache[setAddr][i].valid == 0)
        {
            _cache[setAddr][i].valid = 1;
            _cache[setAddr][i].tag = tagAddr;
            _cache[setAddr][i].timestamp = 0;
            misses++;
            return;
        }
    }

    // LRU替换
    evictions++;
    misses++;

    for (int i = 0; i < E; i++)
    {
        if (_cache[setAddr][i].timestamp > max_stamp)
        {
            max_stamp = _cache[setAddr][i].timestamp;
            max_stamp_index = i;
        }
    }

    _cache[setAddr][max_stamp_index].tag = tagAddr;
    _cache[setAddr][max_stamp_index].timestamp = 0;
    return;
}

void update_stamp()
{
    for (int i = 0; i < S; i++)
    {
        for (int j = 0; j < E; j++)
        {
            if(_cache[i][j].valid == 1){
                _cache[i][j].timestamp++;
            }   
        }
    }
}

void parse_trace()
{
    FILE *fp = fopen(t, "r");
    if (fp == NULL)
    {
        exit(1);
    }

    char operation;
    unsigned int address;
    int size;
    while (fscanf(fp, " %c %xu,%d", &operation, &address, &size) > 0)
    {
        switch (operation)
        {
        //case 'I':
        case 'L':
            update(address);
            break;
        case 'M':
            update(address);
        case 'S':
            update(address);
        }
        update_stamp();
    }
    fclose(fp);
    for (int i = 0; i < S; i++)
    {
        free(_cache[i]);
    }

    free(_cache);
}

int main(int argc, char *argv[])
{
    h = 0;
    v = 0;
    hits = misses = evictions = 0;
    int opt; //接受getopt的返回值
    while (-1 != (opt = (getopt(argc, argv, "hvs:E:b:t:"))))
    {
        switch (opt)
        {
        case 'h':
            h = 1;
            printUsage();
            break;
        case 'v':
            v = 1;
            printUsage();
            break;
        case 's':
            s = atoi(optarg);
            break;
        case 'E':
            E = atoi(optarg);
            break;
        case 'b':
            b = atoi(optarg);
            break;
        case 't':
            strcpy(t, optarg);
            break;
        default:
            printUsage();
            break;
        }
    }
    if (s <= 0 || E <= 0 || b <= 0) // 如果选项参数不合格就退出
        return -1;
    S = 1 << s; // S=2^s

    FILE *fp = fopen(t, "r");
    if (fp == NULL)
    {
        printf("open error");
        exit(-1);
    }

    init_cache();
    parse_trace();
    printSummary(hits, misses, evictions);
    return 0;
}

Part B

分块技术

M32 X M32

void transpose_submit(int M, int N, int A[N][M], int B[M][N])
{
    int i, j, k, v1, v2, v3, v4, v5, v6, v7, v8;
    int b = 8;
    for (i = 0; i < M; i += b)
    {
        for (j = 0; j < N; j += b)
        {
            for (k = i; k < i + b; k++) 
            {
            // 循环展开
                v1 = A[k][j];
                v2 = A[k][j + 1];
                v3 = A[k][j + 2];
                v4 = A[k][j + 3];
                v5 = A[k][j + 4];
                v6 = A[k][j + 5];
                v7 = A[k][j + 6];
                v8 = A[k][j + 7];

                B[j][k] = v1;
                B[j + 1][k] = v2;
                B[j + 2][k] = v3;
                B[j + 3][k] = v4;
                B[j + 4][k] = v5;
                B[j + 5][k] = v6;
                B[j + 6][k] = v7;
                B[j + 7][k] = v8;
            }
        }
    }
}

M64 X M64

for (int i = 0; i < N; i += 8)
{
    for (int j = 0; j < M; j += 8)
    {
        for (int k = i; k < i + 4; ++k)
        {
        /* 读取1 2,暂时放在左下角1 2 */
            int temp_value0 = A[k][j];
            int temp_value1 = A[k][j+1];
            int temp_value2 = A[k][j+2];
            int temp_value3 = A[k][j+3];
            int temp_value4 = A[k][j+4];
            int temp_value5 = A[k][j+5];
            int temp_value6 = A[k][j+6];
            int temp_value7 = A[k][j+7];
          
            B[j][k] = temp_value0;
            B[j+1][k] = temp_value1;
            B[j+2][k] = temp_value2;
            B[j+3][k] = temp_value3;
          /* 逆序放置 */
            B[j][k+4] = temp_value7;
            B[j+1][k+4] = temp_value6;
            B[j+2][k+4] = temp_value5;
            B[j+3][k+4] = temp_value4;
        }
         for (int l = 0; l < 4; ++l)
        {
           /* 按列读取 */
            int temp_value0 = A[i+4][j+3-l];
            int temp_value1 = A[i+5][j+3-l];
            int temp_value2 = A[i+6][j+3-l];
            int temp_value3 = A[i+7][j+3-l];
            int temp_value4 = A[i+4][j+4+l];
            int temp_value5 = A[i+5][j+4+l];
            int temp_value6 = A[i+6][j+4+l];
            int temp_value7 = A[i+7][j+4+l];
           
           /* 从下向上按行转换2到3 */
            B[j+4+l][i] = B[j+3-l][i+4];
            B[j+4+l][i+1] = B[j+3-l][i+5];
            B[j+4+l][i+2] = B[j+3-l][i+6];
            B[j+4+l][i+3] = B[j+3-l][i+7];
           /* 将3 4放到正确的位置 */
            B[j+3-l][i+4] = temp_value0;
            B[j+3-l][i+5] = temp_value1;
            B[j+3-l][i+6] = temp_value2;
            B[j+3-l][i+7] = temp_value3;
            B[j+4+l][i+4] = temp_value4;
            B[j+4+l][i+5] = temp_value5;
            B[j+4+l][i+6] = temp_value6;
            B[j+4+l][i+7] = temp_value7;
        } 
    }
}

M61 X M67

int i, j, v1, v2, v3, v4, v5, v6, v7, v8;
    int n = N / 8 * 8;
    int m = M / 8 * 8;
    for (j = 0; j < m; j += 8)
        for (i = 0; i < n; ++i)
        {
            v1 = A[i][j];
            v2 = A[i][j + 1];
            v3 = A[i][j + 2];
            v4 = A[i][j + 3];
            v5 = A[i][j + 4];
            v6 = A[i][j + 5];
            v7 = A[i][j + 6];
            v8 = A[i][j + 7];

            B[j][i] = v1;
            B[j + 1][i] = v2;
            B[j + 2][i] = v3;
            B[j + 3][i] = v4;
            B[j + 4][i] = v5;
            B[j + 5][i] = v6;
            B[j + 6][i] = v7;
            B[j + 7][i] = v8;
        }
    for (i = n; i < N; ++i)
        for (j = m; j < M; ++j)
        {
            v1 = A[i][j];
            B[j][i] = v1;
        }
    for (i = 0; i < N; ++i)
        for (j = m; j < M; ++j)
        {
            v1 = A[i][j];
            B[j][i] = v1;
        }
    for (i = n; i < N; ++i)
        for (j = 0; j < M; ++j)
        {
            v1 = A[i][j];
            B[j][i] = v1;
        }