官方文档
实验环境
- ubuntu 18.04
- apt install -y tk8.5-dev tcl8.5-dev
- apt install build-essential
- 注释ssim.c第845和846行
Part A
part A实验考察对Y86汇编指令的掌握程度。
sum.ys
# Execution begins at address 0
.pos 0
irmovq stack,%rsp
call main
halt
# Sample linked list
.align 8
ele1:
.quad 0x00a
.quad ele2
ele2:
.quad 0x0b0
.quad ele3
ele3:
.quad 0xc00
.quad 0
main:
irmovq ele1,%rdi
call sum_list
ret
sum_list:
irmovq $0, %rax
jmp test
loop:
mrmovq 0(%rdi), %r9
addq %r9, %rax
mrmovq 8(%rdi), %rdi
test:
andq %rdi, %rdi
jne loop
ret
//栈从0x100开始并向下增长
.pos 0x100
stack:
rsum.ys
# Execution begins at address
.pos 0
irmovq stack,%rsp
call main
ret
# Sample linked list
.align 8
ele1:
.quad 0x00a
.quad ele2
ele2:
.quad 0x0b0
.quad ele3
ele3:
.quad 0xc00
.quad 0
main:
irmovq ele1, %rdi
call rsum_list
ret
rsum_list:
pushq %r8
irmovq $0,%rax
andq %rdi,%rdi
je test
mrmovq 0(%rdi), %r8
mrmovq 8(%rdi), %rdi
call rsum_list
addq %r8,%rax
test:
popq %r8
ret
# stack start from here and grows to lower address
.pos 0x100
stack:
copy.ys
#Execution begins at address
.pos 0
irmovq stack, %rsp
call main
ret
.align 8
# Source block
src:
.quad 0x00a
.quad 0x0b0
.quad 0xc00
# Destination block
dest:
.quad 0x111
.quad 0x222
.quad 0x333
main:
irmovq src,%rdi
irmovq dest,%rsi
irmovq $3,%rdx
call copy
ret
copy:
irmovq $0, %rax
irmovq $8, %r12
irmovq $1, %r13
jmp test
loop:
mrmovq 0(%rdi),%r8
rrmovq %r8,%r9
addq %r12, %rdi
rmmovq %r9,0(%rsi)
addq %r12,%rsi
xorq %r9,%rax
subq %r13,%rdx
test:
andq %rdx,%rdx
jne loop
ret
#stack start from here and grows to low address
.pos 0x100
stack:
Part B
part b考察HCL指令。CTRL + F 搜索IIADDQ查看添加位置。
################ Fetch Stage ###################################
# Determine instruction code
word icode = [
imem_error: INOP;
1: imem_icode; # Default: get from instruction memory
];
# Determine instruction function
word ifun = [
imem_error: FNONE;
1: imem_ifun; # Default: get from instruction memory
];
bool instr_valid = icode in
{ INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ, IIADDQ};
# Does fetched instruction require a regid byte?
bool need_regids =
icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ,
IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ};
# Does fetched instruction require a constant word?
bool need_valC =
icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL, IIADDQ};
################ Decode Stage ###################################
## What register should be used as the A source?
word srcA = [
icode in { IRRMOVQ, IRMMOVQ, IOPQ, IPUSHQ } : rA;
icode in { IPOPQ, IRET } : RRSP;
1 : RNONE; # Don't need register
];
## What register should be used as the B source?
word srcB = [
icode in { IOPQ, IRMMOVQ, IMRMOVQ, IIADDQ} : rB;
icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
1 : RNONE; # Don't need register
];
## What register should be used as the E destination?
word dstE = [
icode in { IRRMOVQ } && Cnd : rB;
icode in { IIRMOVQ, IOPQ, IIADDQ} : rB;
icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
1 : RNONE; # Don't write any register
];
## What register should be used as the M destination?
word dstM = [
icode in { IMRMOVQ, IPOPQ } : rA;
1 : RNONE; # Don't write any register
];
################ Execute Stage ###################################
## Select input A to ALU
word aluA = [
icode in { IRRMOVQ, IOPQ } : valA;
icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ } : valC;
icode in { ICALL, IPUSHQ } : -8;
icode in { IRET, IPOPQ } : 8;
# Other instructions don't need ALU
];
## Select input B to ALU
word aluB = [
icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL,
IPUSHQ, IRET, IPOPQ, IIADDQ } : valB;
icode in { IRRMOVQ, IIRMOVQ } : 0;
# Other instructions don't need ALU
];
## Set the ALU function
word alufun = [
icode == IOPQ : ifun;
1 : ALUADD;
];
## Should the condition codes be updated?
bool set_cc = icode in { IOPQ, IIADDQ };
################ Memory Stage ###################################
## Set read control signal
bool mem_read = icode in { IMRMOVQ, IPOPQ, IRET };
## Set write control signal
bool mem_write = icode in { IRMMOVQ, IPUSHQ, ICALL };
## Select memory address
word mem_addr = [
icode in { IRMMOVQ, IPUSHQ, ICALL, IMRMOVQ } : valE;
icode in { IPOPQ, IRET } : valA;
# Other instructions don't need address
];
## Select memory input data
word mem_data = [
# Value from register
icode in { IRMMOVQ, IPUSHQ } : valA;
# Return PC
icode == ICALL : valP;
# Default: Don't write anything
];
## Determine instruction status
word Stat = [
imem_error || dmem_error : SADR;
!instr_valid: SINS;
icode == IHALT : SHLT;
1 : SAOK;
];
################ Program Counter Update ############################
## What address should instruction be fetched at
word new_pc = [
# Call. Use instruction constant
icode == ICALL : valC;
# Taken branch. Use instruction constant
icode == IJXX && Cnd : valC;
# Completion of RET instruction. Use value from stack
icode == IRET : valM;
# Default: Use incremented PC
1 : valP;
];
#/* $end seq-all-hcl */
Part C
考察对流水线pipe-line的理解。
pipe-line.hcl中添加iaddq相关逻辑
注意set_cc处不要忘了添加iaddq
################ Fetch Stage ###################################
## What address should instruction be fetched at
word f_pc = [
# Mispredicted branch. Fetch at incremented PC
M_icode == IJXX && !M_Cnd : M_valA;
# Completion of RET instruction
W_icode == IRET : W_valM;
# Default: Use predicted value of PC
1 : F_predPC;
];
## Determine icode of fetched instruction
word f_icode = [
imem_error : INOP;
1: imem_icode;
];
# Determine ifun
word f_ifun = [
imem_error : FNONE;
1: imem_ifun;
];
# Is instruction valid?
bool instr_valid = f_icode in
{ INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ, IIADDQ };
# Determine status code for fetched instruction
word f_stat = [
imem_error: SADR;
!instr_valid : SINS;
f_icode == IHALT : SHLT;
1 : SAOK;
];
# Does fetched instruction require a regid byte?
bool need_regids =
f_icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ,
IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ };
# Does fetched instruction require a constant word?
bool need_valC =
f_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL, IIADDQ };
# Predict next value of PC
word f_predPC = [
f_icode in { IJXX, ICALL } : f_valC;
1 : f_valP;
];
################ Decode Stage ######################################
## What register should be used as the A source?
word d_srcA = [
D_icode in { IRRMOVQ, IRMMOVQ, IOPQ, IPUSHQ } : D_rA;
D_icode in { IPOPQ, IRET } : RRSP;
1 : RNONE; # Don't need register
];
## What register should be used as the B source?
word d_srcB = [
D_icode in { IOPQ, IRMMOVQ, IMRMOVQ, IIADDQ } : D_rB;
D_icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
1 : RNONE; # Don't need register
];
## What register should be used as the E destination?
word d_dstE = [
D_icode in { IRRMOVQ, IIRMOVQ, IOPQ, IIADDQ } : D_rB;
D_icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
1 : RNONE; # Don't write any register
];
## What register should be used as the M destination?
word d_dstM = [
D_icode in { IMRMOVQ, IPOPQ } : D_rA;
1 : RNONE; # Don't write any register
];
## What should be the A value?
## Forward into decode stage for valA
word d_valA = [
D_icode in { ICALL, IJXX } : D_valP; # Use incremented PC
d_srcA == e_dstE : e_valE; # Forward valE from execute
d_srcA == M_dstM : m_valM; # Forward valM from memory
d_srcA == M_dstE : M_valE; # Forward valE from memory
d_srcA == W_dstM : W_valM; # Forward valM from write back
d_srcA == W_dstE : W_valE; # Forward valE from write back
1 : d_rvalA; # Use value read from register file
];
word d_valB = [
d_srcB == e_dstE : e_valE; # Forward valE from execute
d_srcB == M_dstM : m_valM; # Forward valM from memory
d_srcB == M_dstE : M_valE; # Forward valE from memory
d_srcB == W_dstM : W_valM; # Forward valM from write back
d_srcB == W_dstE : W_valE; # Forward valE from write back
1 : d_rvalB; # Use value read from register file
];
################ Execute Stage #####################################
## Select input A to ALU
word aluA = [
E_icode in { IRRMOVQ, IOPQ } : E_valA;
E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ } : E_valC;
E_icode in { ICALL, IPUSHQ } : -8;
E_icode in { IRET, IPOPQ } : 8;
# Other instructions don't need ALU
];
## Select input B to ALU
word aluB = [
E_icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL,
IPUSHQ, IRET, IPOPQ, IIADDQ } : E_valB;
E_icode in { IRRMOVQ, IIRMOVQ } : 0;
# Other instructions don't need ALU
];
## Set the ALU function
word alufun = [
E_icode == IOPQ : E_ifun;
1 : ALUADD;
];
## Should the condition codes be updated?
bool set_cc = E_icode in { IOPQ, IIADDQ } &&
# State changes only during normal operation
!m_stat in { SADR, SINS, SHLT } && !W_stat in { SADR, SINS, SHLT };
## Generate valA in execute stage
word e_valA = E_valA; # Pass valA through stage
## Set dstE to RNONE in event of not-taken conditional move
word e_dstE = [
E_icode == IRRMOVQ && !e_Cnd : RNONE;
1 : E_dstE;
];
################ Memory Stage ######################################
## Select memory address
word mem_addr = [
M_icode in { IRMMOVQ, IPUSHQ, ICALL, IMRMOVQ } : M_valE;
M_icode in { IPOPQ, IRET } : M_valA;
# Other instructions don't need address
];
## Set read control signal
bool mem_read = M_icode in { IMRMOVQ, IPOPQ, IRET };
## Set write control signal
bool mem_write = M_icode in { IRMMOVQ, IPUSHQ, ICALL };
#/* $begin pipe-m_stat-hcl */
## Update the status
word m_stat = [
dmem_error : SADR;
1 : M_stat;
];
#/* $end pipe-m_stat-hcl */
## Set E port register ID
word w_dstE = W_dstE;
## Set E port value
word w_valE = W_valE;
## Set M port register ID
word w_dstM = W_dstM;
## Set M port value
word w_valM = W_valM;
## Update processor status
word Stat = [
W_stat == SBUB : SAOK;
1 : W_stat;
];
################ Pipeline Register Control #########################
# Should I stall or inject a bubble into Pipeline Register F?
# At most one of these can be true.
bool F_bubble = 0;
bool F_stall =
# Conditions for a load/use hazard
E_icode in { IMRMOVQ, IPOPQ } &&
E_dstM in { d_srcA, d_srcB } ||
# Stalling at fetch while ret passes through pipeline
IRET in { D_icode, E_icode, M_icode };
# Should I stall or inject a bubble into Pipeline Register D?
# At most one of these can be true.
bool D_stall =
# Conditions for a load/use hazard
E_icode in { IMRMOVQ, IPOPQ } &&
E_dstM in { d_srcA, d_srcB };
bool D_bubble =
# Mispredicted branch
(E_icode == IJXX && !e_Cnd) ||
# Stalling at fetch while ret passes through pipeline
# but not condition for a load/use hazard
!(E_icode in { IMRMOVQ, IPOPQ } && E_dstM in { d_srcA, d_srcB }) &&
IRET in { D_icode, E_icode, M_icode };
# Should I stall or inject a bubble into Pipeline Register E?
# At most one of these can be true.
bool E_stall = 0;
bool E_bubble =
# Mispredicted branch
(E_icode == IJXX && !e_Cnd) ||
# Conditions for a load/use hazard
E_icode in { IMRMOVQ, IPOPQ } &&
E_dstM in { d_srcA, d_srcB};
# Should I stall or inject a bubble into Pipeline Register M?
# At most one of these can be true.
bool M_stall = 0;
# Start injecting bubbles as soon as exception passes through memory stage
bool M_bubble = m_stat in { SADR, SINS, SHLT } || W_stat in { SADR, SINS, SHLT };
# Should I stall or inject a bubble into Pipeline Register W?
bool W_stall = W_stat in { SADR, SINS, SHLT };
bool W_bubble = 0;
#/* $end pipe-all-hcl */
ncopy.ys
50分。5层循环展开。另外n % 5余数小于等于2的n较多。
# You can modify this portion
# Loop header
xorq %rax,%rax # count = 0;
iaddq $-4, %rdx
jle EQ0
Npos0:
mrmovq (%rdi), %r10
mrmovq 8(%rdi), %r11
mrmovq 16(%rdi), %r12
mrmovq 24(%rdi), %r13
mrmovq 32(%rdi), %r14
rmmovq %r10, (%rsi)
andq %r10, %r10 # val <= 0?
jle Npos1
iaddq $1, %rax
Npos1:
rmmovq %r11, 8(%rsi)
andq %r11, %r11 # val <= 0?
jle Npos2
iaddq $1, %rax
Npos2:
rmmovq %r12, 16(%rsi)
andq %r12, %r12 # val <= 0?
jle Npos3
iaddq $1, %rax
Npos3:
rmmovq %r13, 24(%rsi)
andq %r13, %r13 # val <= 0?
jle Npos4
iaddq $1, %rax
Npos4:
rmmovq %r14, 32(%rsi)
andq %r14, %r14 # val <= 0?
jle Tail
iaddq $1, %rax
Tail:
iaddq $40, %rsi
iaddq $40, %rdi
iaddq $-5, %rdx
jg Npos0
EQ0:
iaddq $4, %rdx
jle Done
mrmovq (%rdi), %r10
mrmovq 8(%rdi), %r11
rmmovq %r10, (%rsi)
andq %r10, %r10
jle EQ1
iaddq $1, %rax
EQ1:
iaddq $-1, %rdx
jle Done
rmmovq %r11, 8(%rsi)
andq %r11, %r11
jle EQ2
iaddq $1, %rax
EQ2:
iaddq $-1, %rdx
jle Done
mrmovq 16(%rdi), %r12
rmmovq %r12, 16(%rsi)
andq %r12, %r12
jle EQ3
iaddq $1, %rax
EQ3:
iaddq $-1, %rdx
jle Done
mrmovq 24(%rdi), %r13
rmmovq %r13, 24(%rsi)
andq %r13, %r13
jle Done
iaddq $1, %rax