📄 cpuarmcore.pas
字号:
CMP_OPCODE: begin
// MRS Rd, SPSR (transfer SPSR contents to a register)
// cond 00 0 10P0 0 1111 [Rd] 0000 0000 0000
if cpuCurrentOpcode and $F0FFF = $F0000 then begin
if SPSR <> 0 then regs[Rd] := regs[SPSR];
end else
UndefinedState('MRS Rd, SPSR found that does not decode properly');
end;
CMN_OPCODE: begin
Rm := cpuCurrentOpcode and $F;
if cpuCurrentOpcode and $FFFF0 = $9F000 then begin
// MSR SPSR, Rm cond 00 0 10P1 0 1001 1111 0000 0000 [Rm], P=1
if Rm = R15 then UndefinedState('MSR SPSR, r15');
if SPSR <> 0 then regs[SPSR] := regs[Rm];
end else if cpuCurrentOpcode and $FF000 = $8F000 then begin
// MSR SPSR, Rm cond 00 0 10P1 0 1000 1111 0000 0000 [Rm]
// MSR SPSR, operand2 cond 00 1 10P1 0 1000 1111 Rotate Immedia
if SPSR <> 0 then regs[SPSR] := (operand2 and $F0000000) or (regs[SPSR] and $0FFFFFFF);
end else
UndefinedState('MSR (CMN-type) found that does not decode properly');
// keep pc from changing
Rd := 0;
end;
// Back to standard ALU ops
ORR_OPCODE: regs[Rd] := operand1 or operand2;
MOV_OPCODE: regs[Rd] := operand2;
BIC_OPCODE: regs[Rd] := operand1 and not operand2;
MVN_OPCODE: regs[Rd] := not operand2;
end;
if Rd = R15 then FlushPipeARM;
end;
// Since I do the pipelining a little differently than the ARM7tdmi
// does in hardware, I update R15 outside of this func regardless
// of the operand 2 format, so I have to fix R15 back up if its
// further along than normal (register specified shift)
if rsShift then Dec(regs[R15], 4);
end;
//////////////////////////////////////////////////////////////////////
// Multiply and Multiply-Accumulate (MUL, MLA)
procedure Multiply;
const
SET_FLAGS_BIT = 1 shl 20;
ACCUMULATE_BIT = 1 shl 21;
var
Rd, Rm, Rs: byte;
m: uint32;
begin
// Multiply and Multiply-Accumulate (MUL, MLA)
// The multiply and multiply-accumulate instructions use an 8 bit
// Booth's algorithm to perform integer multiplication.
// The destination register Rd must not be the same as the operand
// register Rm, and R15 should not be used at all.
Rm := cpuCurrentOpcode and $F;
Rs := (cpuCurrentOpcode shr 8) and $F;
Rd := (cpuCurrentOpcode shr 16) and $F;
// MUL uses m internal cycles, and MLA m + 1 where m is the number
// of multiplier array cycles required to complete the multiply,
// which is controlled by the value of the multiplier operand Rs
// m is 1 if bits [32:8] of Rs are all 0 or all 1
// m is 2 if bits [32:16] of Rs are all 0 or all 1
// m is 3 if bits [32:24] of Rs are all 0 or all 1
// m is 4 in all other cases.
m := regs[Rs];
if m shr 31 <> 0 then m := not m;
if m and $FFFFFF00 = 0 then m := 1
else if m and $FFFF0000 = 0 then m := 2
else if m and $FF000000 = 0 then m := 3
else m := 4;
if cpuCurrentOpcode and ACCUMULATE_BIT = 0 then begin
// The multiply form of the instruction gives Rd := Rm * Rs
Dec(quota, m * cycI);
regs[Rd] := regs[Rm] * regs[Rs];
end else begin
// The multiply-accumulate form gives Rd := Rm * Rs + Rn
Dec(quota, (m+1) * cycI);
regs[Rd] := regs[Rm] * regs[Rs] + regs[(cpuCurrentOpcode shr 12) and $F];
end;
// If the S bit is set, the N and Z flags are set according to the
// result, and the C flag is set to a meaningless value.
if cpuCurrentOpcode and SET_FLAGS_BIT <> 0 then begin
negative := regs[Rd] shr 31 <> 0;
zero := regs[Rd] = 0;
end;
end;
//////////////////////////////////////////////////////////////////////
// Herein lies the only ASM used in the MappyVM core, and out of
// laziness rather than need for speed. IA32 already provides us with
// a nice 32x32->64 multiply, why reinvent the wheel.
procedure LongMultiply;
const
DO_ACCUMULATE = 1 shl 21;
IS_SIGNED = 1 shl 22;
SET_FLAGS = 1 shl 20;
var
signed: boolean;
Rm, Rs, RdHi, RdLo, m: uint32;
begin
// Multiply Long and Multiply-Accumulate Long (MULL, MLAL)
// Multiply long instructions perform integer multiplication on two
// 32 bit operands and produce 64 bit results. Signed and unsigned
// multiplication with optional accumulate total to 4 variations.
signed := cpuCurrentOpcode and IS_SIGNED <> 0;
Rm := regs[cpuCurrentOpcode and $F];
Rs := regs[(cpuCurrentOpcode shr 8) and $F];
m := Rs;
// MULL uses m+1 internal cycles and MLAL m+2, where m is the
// number of 8 bit multiplier array cycles required to complete
// the multiply, which is controlled by the value of Rs.
// Possible values of m for signed instructions SMULL, SMLAL are:
// m is 1 if bits [32:8] of Rs are all 0 or all 1
// m is 2 if bits [32:16] of Rs are all 0 or all 1
// m is 3 if bits [32:24] of Rs are all 0 or all 1
// m is 4 in all other cases.
if signed and (Rs shr 31 <> 0) then m := not m;
// For unsigned instructions UMULL, UMLAL, m is the same as for the
// signed instructions, except the regions must be all 0's
if m and $FFFFFF00 = 0 then m := 2
else if m and $FFFF0000 = 0 then m := 3
else if m and $FF000000 = 0 then m := 4
else m := 5;
// R15 must not be used as an operand or as a destination register.
// RdHi, RdLo, and Rm must all specify different registers.
if cpuCurrentOpcode and DO_ACCUMULATE = 0 then begin
// Convert the m count into actual cycles
Dec(quota, m*cycI);
// The multiply forms (UMULL and SMULL) take two 32 bit numbers
// and multiply them to produce a 64 bit result of the form:
// (RdHi,RdLo) := Rm * Rs.
if signed then asm
mov eax,[Rm]
imul dword ptr [Rs]
mov [RdLo],eax
mov [RdHi],edx
end else asm
mov eax,[Rm]
mul dword ptr [Rs]
mov [RdLo],eax
mov [RdHi],edx
end;
end else begin
// Convert the m count into actual cycles
Dec(quota, (m+1)*cycI);
// The multiply-accumulate forms (UMLAL and SMLAL) take two 32 bit
// numbers, multiply them together, and add the 64 bit value that
// was initially in (RdHi,RdLo), as shown below:
// (RdHi,RdLo) := Rm * Rs + (RdHi,RdLo)
RdHi := regs[(cpuCurrentOpcode shr 16) and $F];
RdLo := regs[(cpuCurrentOpcode shr 12) and $F];
if signed then asm
mov eax,[Rm]
imul dword ptr [Rs]
add [RdLo],eax
adc [RdHi],edx
end else asm
mov eax,[Rm]
mul dword ptr [Rs]
add [RdLo],eax
adc [RdHi],edx
end;
end;
regs[(cpuCurrentOpcode shr 16) and $F] := RdHi;
regs[(cpuCurrentOpcode shr 12) and $F] := RdLo;
// If the S bit is set, the N and Z flags are set according to the
// result (N is equal to bit 63 of the result, and Z is set if and
// only if all 64 bits are 0), and the C and V flags are set to
// meaningless values.
if cpuCurrentOpcode and SET_FLAGS <> 0 then begin
if (RdLo = 0) and (RdHi = 0) then begin
zero := true;
negative := false;
end else begin
zero := false;
negative := RdHi shr 31 <> 0;
end;
end;
end;
//////////////////////////////////////////////////////////////////////
procedure SingleDataTransfer;
const
LOAD_BIT = 1 shl 20;
WRITEBACK_BIT = 1 shl 21;
BYTE_BIT = 1 shl 22;
UP_BIT = 1 shl 23;
PRE_INCREMENT_BIT = 1 shl 24;
IMMEDIATE_BIT = 1 shl 25;
var
Rn, Rd, Rm: byte;
index, operand2: uint32;
preIncrement: boolean;
begin
// Single Data Transfer (LDR, STR)
// The single data transfer instructions are used to load or store a
// single byte or word. The memory address used for the transfer is
// obtained by adding or subtracting an offset from a base register.
// Parse the operands
Rn := (cpuCurrentOpcode shr 16) and $F;
Rd := (cpuCurrentOpcode shr 12) and $F;
Rm := cpuCurrentOpcode and $F;
index := regs[Rn];
// The offset from the base may be either a 12 bit immediate value,
// or a second register (possibly shifted in some way). The offset
// can be added or subtracted from the base register Rn before or
// after the base is used as the transfer address.
if cpuCurrentOpcode and IMMEDIATE_BIT = 0 then
operand2 := cpuCurrentOpcode and $FFF
else
operand2 := BarrelShifter(regs[Rm], (cpuCurrentOpcode shr 5) and $3, (cpuCurrentOpcode shr 7) and $1F);
// Are we indexing forwards or backwards
if cpuCurrentOpcode and UP_BIT = 0 then operand2 := -operand2;
preIncrement := cpuCurrentOpcode and PRE_INCREMENT_BIT <> 0;
if preIncrement then begin
Inc(index, operand2);
// The modified base value may be written back if W is 1. The W
// bit is redundant with post-indexed addressing and set to 0, as
// the modified base is always written back.
// Write-back must not be specified if R15 is the base register
if cpuCurrentOpcode and WRITEBACK_BIT <> 0 then regs[Rn] := index;
end;
// Check the L bit and see if its a load or store
if cpuCurrentOpcode and LOAD_BIT <> 0 then begin
// Load from memory
if cpuCurrentOpcode and BYTE_BIT = 0 then
regs[Rd] := memLoadWord(index)
else
regs[Rd] := memReadByte(index);
// LDR instructions have one trailing I cycle, unless the PC is
// modified, in which case the I cycle is in the middle.
Dec(quota, cycI);
if Rd = R15 then FlushPipeARM;
end else begin
// When R15 is the source register, the stored value will be
// the address of the instruction plus 12.
Inc(regs[R15], 4);
// Store to memory
if cpuCurrentOpcode and BYTE_BIT = 0 then
memWriteWord(index, regs[Rd])
else
memWriteByte(index, regs[Rd]);
Dec(regs[R15], 4);
end;
// Check the P bit and post-increment if neccesary
if not preIncrement then regs[Rn] := index + operand2;
end;
//////////////////////////////////////////////////////////////////////
procedure HalfwordXfer;
const
LOAD_BIT = 1 shl 20;
WRITEBACK_BIT = 1 shl 21;
IMMEDIATE_BIT = 1 shl 22;
UP_BIT = 1 shl 23;
PRE_INCREMENT_BIT = 1 shl 24;
var
Rn, Rd, Rm: byte;
index, offset: uint32;
preIncrement: boolean;
begin
// Halfword and Signed Data Transfer (LDRH/STRH/LDRSB/LDRSH)
// These instructions are used to load or store halfwords of data
// and also load sign-extended bytes or halfwords. The address used
// in the transfer is calculated by via a combination of a base
// register and adding or subtracting an offset. The result of this
// calculation may be written back into the base register as well.
// R15 should not be specified as the register offset (Rm).
// Write-back should not be specified if R15 is specified as the
// base register (Rn) or if post-indexing is used.
// Parse the operands
Rm := cpuCurrentOpcode and $F;
Rd := (cpuCurrentOpcode shr 12) and $F;
Rn := (cpuCurrentOpcode shr 16) and $F;
index := regs[Rn];
// The offset from the base may be either a 8-bit unsigned binary
// immediate value, or a second register. The 8-bit offset is
// formed by concatenating bits 11 to 8 and bits 3 to 0 of the
// instruction word, such that bit 11 becomes the MSB and bit 0
// becomes the LSB. The offset may be added to (U=1) or subtracted
// from (U=0) the base register Rn, and this may be performed either
// before (pre-indexed, P=1) or after (post-indexed, P=0) the base
// register is used as the transfer address.
if cpuCurrentOpcode and IMMEDIATE_BIT <> 0 then
offset := (cpuCurrentOpcode shr 4) and $F0 + Rm
else
offset := regs[Rm];
// Are we indexing forwards or backwards
if cpuCurrentOpcode and UP_BIT = 0 then offset := -offset;
// Check the P bit and pre-increment if neccesary
preIncrement := cpuCurrentOpcode and PRE_INCREMENT_BIT <> 0;
if preIncrement then begin
Inc(index, offset);
if cpuCurrentOpcode and WRITEBACK_BIT <> 0 then regs[Rn] := index;
end;
// Halfword loads and stores on an address with A0=1 are
// 'unpredictable', so don't do anything special ATM
// Check the L bit and see if its a load or store
if cpuCurrentOpcode and LOAD_BIT <> 0 then begin
// Load halfword, or signed byte/halfword from memory
case ((cpuCurrentOpcode shr 5) and $3) of
1: regs[Rd] := memReadHalfWord(index);
2: begin
// The LDRSB instruction loads the selected byte into the
// destination register and sign extends it to 32 bits.
regs[Rd] := memReadByte(index);
if regs[Rd] shr 7 <> 0 then regs[Rd] := $FFFFFF00 or regs[Rd];
end;
3: begin
// The LDRSH instruction loads the selected halfword into the
// destination register and sign extends it to 32 bits
regs[Rd] := memReadHalfWord(index);
if regs[Rd] shr 15 <> 0 then regs[Rd] := $FFFF0000 or regs[Rd];
end;
end;
// LDR(H,SH,SB) instructions have one trailing I cycle, unless
// the PC is modified, in which case the I cycle is in the middle.
Dec(quota, cycI);
if Rd = R15 then FlushPipeARM;
end else begin
// Store halfword to memory
// When R15 is the source register Rd, the address will be the
// address of the instruction plus 12
if Rd = R15 then
memWriteHalfWord(index, regs[R15] + 4)
else
memWriteHalfWord(index, regs[Rd]);
end;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -