SpinalHDL / VexRiscv

A FPGA friendly 32 bit RISC-V CPU implementation
MIT License
2.52k stars 420 forks source link

Instructions to save/restore register to stack is taking 2 clock each #387

Closed ztachip closed 7 months ago

ztachip commented 10 months ago

I wrote a simple task switching that save registers and restore registers // Save registers sw s0,4(sp) sw s1,8(sp) sw s2,12(sp) sw s3,16(sp) : : // Restore registers lw s0,4(sp) lw s1,8(sp) lw s2,12(sp) : :

My stack fits entirely in data cache

But it seems it takes 2 clocks per each instruction above I have enabled all bypass in the HarzardPlugin Should the stack save/restore instructions above should take just 1 clock each.

Thanks

Dolu1990 commented 10 months ago

Hi, What memory bus / Soc are you using ?

ztachip commented 10 months ago

I am using AXI for bus.

package vexriscv.VexRiscvForSim

import spinal.core. import spinal.lib. import vexriscv.ip.{DataCacheConfig, InstructionCacheConfig} import spinal.lib.bus.amba3.apb. import spinal.lib.bus.amba4.axi. import spinal.lib.bus.misc.SizeMapping import spinal.lib.io.{InOutWrapper, TriStateArray} import spinal.lib.misc.{InterruptCtrl, Prescaler, Timer} import spinal.lib.soc.pinsec.{PinsecTimerCtrl, PinsecTimerCtrlExternal} import vexriscv.plugin. import vexriscv.{VexRiscv, VexRiscvConfig, plugin} import spinal.lib.com.spi.ddr. import spinal.lib.bus.simple._ import scala.collection.mutable.ArrayBuffer

case class RiscvConfig( coreFrequency : HertzNumber, cpuPlugins : ArrayBuffer[Plugin[VexRiscv]]){ }

object RiscvConfig{ def default : RiscvConfig = default(false) def default(bigEndian : Boolean = false) = RiscvConfig( coreFrequency = 166 MHz, cpuPlugins = ArrayBuffer( new IBusCachedPlugin( resetVector = 0x00004000l, prediction = STATIC, relaxedPcCalculation = true, config = InstructionCacheConfig( cacheSize = 40962, bytePerLine =32, wayCount = 2, addressWidth = 32, cpuDataWidth = 32, memDataWidth = 32, catchIllegalAccess = true, catchAccessFault = true, asyncTagMemory = false, twoCycleRam = true, twoCycleCache = true ) ), new DBusCachedPlugin( config = new DataCacheConfig( cacheSize = 40962, bytePerLine = 32, wayCount = 2, addressWidth = 32, cpuDataWidth = 32, memDataWidth = 32, catchAccessError = true, catchIllegal = true, catchUnaligned = true, withLrSc = true, withAmo = true ), memoryTranslatorPortConfig = null ),

  new CsrPlugin(CsrPluginConfig.smallest(mtvecInit = 0x80000020l)),
  new DecoderSimplePlugin(
    catchIllegalInstruction = true 
  ),
  new StaticMemoryTranslatorPlugin(
    ioRange      = _(31 downto 31) === 0x1
  ),
  new RegFilePlugin(
    regFileReadyKind = plugin.ASYNC,
    zeroBoot = false
  ),
  new IntAluPlugin,
  new SrcPlugin(
    separatedAddSub = false,
    executeInsertion = true 
  ),
  new FullBarrelShifterPlugin,
  new HazardSimplePlugin(
    bypassExecute           = true,
    bypassMemory            = true,
    bypassWriteBack         = true,
    bypassWriteBackBuffer   = true,
    pessimisticUseSrc       = false,
    pessimisticWriteRegFile = false,
    pessimisticAddressMatch = false
  ),
  new MulPlugin,
  new DivPlugin,
  new BranchPlugin(
    earlyBranch = true,
    catchAddressMisaligned = true 
  ),
  new YamlPlugin("cpu0.yaml")
)

)

def fast = { val config = default //Replace HazardSimplePlugin to get datapath bypass config.cpuPlugins(config.cpuPlugins.indexWhere(_.isInstanceOf[HazardSimplePlugin])) = new HazardSimplePlugin( bypassExecute = true, bypassMemory = true, bypassWriteBack = true, bypassWriteBackBuffer = true ) config } }

case class VexRiscvForSim(config : RiscvConfig) extends Component{ import config._

val io = new Bundle { //Clocks / reset val asyncReset = in Bool() val mainClk = in Bool() val iBus = master(Axi4ReadOnly(Axi4Config(addressWidth=32,dataWidth=32,idWidth=1).toFullConfig())) val dBus = master(Axi4(Axi4Config(addressWidth=32,dataWidth=32,idWidth=1).toFullConfig())) }

val resetCtrlClockDomain = ClockDomain( clock = io.mainClk, config = ClockDomainConfig( resetKind = BOOT ) )

val resetCtrl = new ClockingArea(resetCtrlClockDomain) { val mainClkResetUnbuffered = False

//Implement an counter to keep the reset axiResetOrder high 64 cycles
// Also this counter will automatically do a reset when the system boot.
val systemClkResetCounter = Reg(UInt(6 bits)) init(0)
when(systemClkResetCounter =/= U(systemClkResetCounter.range -> true)){
  systemClkResetCounter := systemClkResetCounter + 1
  mainClkResetUnbuffered := True
}
when(BufferCC(io.asyncReset)){
  systemClkResetCounter := 0
}

//Create all reset used later in the design
val mainClkReset = RegNext(mainClkResetUnbuffered)
val systemReset  = RegNext(mainClkResetUnbuffered)

}

val systemClockDomain = ClockDomain( clock = io.mainClk, reset = resetCtrl.systemReset, frequency = FixedFrequency(coreFrequency) )

val system = new ClockingArea(systemClockDomain) {

val bigEndianDBus = config.cpuPlugins.exists(_ match{ case plugin : DBusSimplePlugin => plugin.bigEndian case _ => false})

//Instanciate the CPU
val cpu = new VexRiscv(
  config = VexRiscvConfig(
    plugins = cpuPlugins
  )
)

//Checkout plugins used to instanciate the CPU to connect them to the SoC
val timerInterrupt = False
val externalInterrupt = False
var iBus : Axi4ReadOnly = null
var dBus : Axi4 = null
for(plugin <- cpu.plugins) plugin match{
  case plugin : IBusCachedPlugin =>
    iBus = plugin.iBus.toAxi4ReadOnly().toFullConfig()
  case plugin : DBusCachedPlugin =>
    dBus = plugin.dBus.toAxi4Shared().toAxi4().toFullConfig()
  case plugin : CsrPlugin        => {
    plugin.externalInterrupt := externalInterrupt
    plugin.timerInterrupt := timerInterrupt
  }
  case _ =>
}
io.iBus <> iBus;
io.dBus <> dBus;

} }

object VexRiscvForSim{ def main(args: Array[String]) { SpinalVhdl(VexRiscvForSim(RiscvConfig.default.copy())) } }

Dolu1990 commented 10 months ago

where you accessing the cached memory region ? or the uncached one ? Else at that stage the best is to check the simulation to see what is happening. In VexRiscv there is a few "lastStage" signals to help figuring out what is commiting.

ztachip commented 10 months ago

Outside of VexRiscv, I instantiated some logic to split iBus/dBus between an internal RAM block (16K) and external memory (256M) based on memory address region. During the test of doing continuously stack save/restore, I see VexRiscv flushing out the stack content but I see no read. I just noticed that the total number clocks of the operation is twice of what it should be. Do you have any internal signals I can tap to see if VexRiscv is stalled on some condition? Thanks Vuong


From: Dolu1990 @.> Sent: January 22, 2024 5:05 AM To: SpinalHDL/VexRiscv @.> Cc: ztachip @.>; Author @.> Subject: Re: [SpinalHDL/VexRiscv] Instructions to save/restore register to stack is taking 2 clock each (Issue #387)

where you accessing the cached memory region ? or the uncached one ? Else at that stage the best is to check the simulation to see what is happening. In VexRiscv there is a few "lastStage" signals to help figuring out what is commiting.

— Reply to this email directly, view it on GitHubhttps://github.com/SpinalHDL/VexRiscv/issues/387#issuecomment-1903648972, or unsubscribehttps://github.com/notifications/unsubscribe-auth/ACSDUFXK3VEDZONRCACYZGLYPY2V7AVCNFSM6AAAAABCEFCC26VHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMYTSMBTGY2DQOJXGI. You are receiving this because you authored the thread.

ztachip commented 10 months ago

To make sure all data access fit the DataCache, My stack is only 2K. And my DataCache is 8K/2way

Dolu1990 commented 10 months ago

I see VexRiscv flushing out the stack content but I see no read.

Its cache is write-through, so it is normal. Maybe the SoC memory system can't follow that bandwidth ?

Do you have any internal signals I can tap to see if VexRiscv is stalled on some condition?

Yes, on every stage, there is a xxx_arbitration_xxx which contains signals.

But i would say, first check if the memory buses are stuck some cycles.

ztachip commented 9 months ago

There are no external memory cycles. The program simply pushing and poping register values to/from stack that fit entirely in cache.

But can we expect code below to take just one clock per instruction?

sw s0,4(sp) sw s1,8(sp) sw s2,12(sp) sw s3,16(sp) : : // Restore registers lw s0,4(sp) lw s1,8(sp) lw s2,12(sp) : :

Dolu1990 commented 9 months ago

But can we expect code below to take just one clock per instruction?

Yes, it should as far as i know.

Can you share a wave file?

ztachip commented 9 months ago

Sure, are there any VexRiscv internal signals you like me to show on wave file?


From: Dolu1990 @.> Sent: February 7, 2024 11:00 AM To: SpinalHDL/VexRiscv @.> Cc: ztachip @.>; Author @.> Subject: Re: [SpinalHDL/VexRiscv] Instructions to save/restore register to stack is taking 2 clock each (Issue #387)

But can we expect code below to take just one clock per instruction?

Yes, it should as far as i know.

Can you share a wave file?

— Reply to this email directly, view it on GitHubhttps://github.com/SpinalHDL/VexRiscv/issues/387#issuecomment-1932359214, or unsubscribehttps://github.com/notifications/unsubscribe-auth/ACSDUFUQ4OUGMNCFFZBXKZLYSOQIRAVCNFSM6AAAAABCEFCC26VHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMYTSMZSGM2TSMRRGQ. You are receiving this because you authored the thread.Message ID: @.***>

Dolu1990 commented 9 months ago

All of them ^^

ztachip commented 9 months ago

I assume you want wavefile from Verilator? My code has VHDL so Verilator does not work for it unfortunately, but I can try to create a test program with Verilog+Verilator Does this work for you? Thanks


From: Dolu1990 @.> Sent: February 12, 2024 3:52 AM To: SpinalHDL/VexRiscv @.> Cc: ztachip @.>; Author @.> Subject: Re: [SpinalHDL/VexRiscv] Instructions to save/restore register to stack is taking 2 clock each (Issue #387)

All of them ^^

— Reply to this email directly, view it on GitHubhttps://github.com/SpinalHDL/VexRiscv/issues/387#issuecomment-1938255826, or unsubscribehttps://github.com/notifications/unsubscribe-auth/ACSDUFXRL4PTC6LY3QNRJILYTHJ3RAVCNFSM6AAAAABCEFCC26VHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMYTSMZYGI2TKOBSGY. You are receiving this because you authored the thread.Message ID: @.***>

Dolu1990 commented 9 months ago

doesn't need to be a wave from verilator, either a VCD or a FST is fine.