Closed ztachip closed 7 months ago
Hi, What memory bus / Soc are you using ?
I am using AXI for bus.
package vexriscv.VexRiscvForSim
import spinal.core. import spinal.lib. import vexriscv.ip.{DataCacheConfig, InstructionCacheConfig} import spinal.lib.bus.amba3.apb. import spinal.lib.bus.amba4.axi. import spinal.lib.bus.misc.SizeMapping import spinal.lib.io.{InOutWrapper, TriStateArray} import spinal.lib.misc.{InterruptCtrl, Prescaler, Timer} import spinal.lib.soc.pinsec.{PinsecTimerCtrl, PinsecTimerCtrlExternal} import vexriscv.plugin. import vexriscv.{VexRiscv, VexRiscvConfig, plugin} import spinal.lib.com.spi.ddr. import spinal.lib.bus.simple._ import scala.collection.mutable.ArrayBuffer
case class RiscvConfig( coreFrequency : HertzNumber, cpuPlugins : ArrayBuffer[Plugin[VexRiscv]]){ }
object RiscvConfig{ def default : RiscvConfig = default(false) def default(bigEndian : Boolean = false) = RiscvConfig( coreFrequency = 166 MHz, cpuPlugins = ArrayBuffer( new IBusCachedPlugin( resetVector = 0x00004000l, prediction = STATIC, relaxedPcCalculation = true, config = InstructionCacheConfig( cacheSize = 40962, bytePerLine =32, wayCount = 2, addressWidth = 32, cpuDataWidth = 32, memDataWidth = 32, catchIllegalAccess = true, catchAccessFault = true, asyncTagMemory = false, twoCycleRam = true, twoCycleCache = true ) ), new DBusCachedPlugin( config = new DataCacheConfig( cacheSize = 40962, bytePerLine = 32, wayCount = 2, addressWidth = 32, cpuDataWidth = 32, memDataWidth = 32, catchAccessError = true, catchIllegal = true, catchUnaligned = true, withLrSc = true, withAmo = true ), memoryTranslatorPortConfig = null ),
new CsrPlugin(CsrPluginConfig.smallest(mtvecInit = 0x80000020l)),
new DecoderSimplePlugin(
catchIllegalInstruction = true
),
new StaticMemoryTranslatorPlugin(
ioRange = _(31 downto 31) === 0x1
),
new RegFilePlugin(
regFileReadyKind = plugin.ASYNC,
zeroBoot = false
),
new IntAluPlugin,
new SrcPlugin(
separatedAddSub = false,
executeInsertion = true
),
new FullBarrelShifterPlugin,
new HazardSimplePlugin(
bypassExecute = true,
bypassMemory = true,
bypassWriteBack = true,
bypassWriteBackBuffer = true,
pessimisticUseSrc = false,
pessimisticWriteRegFile = false,
pessimisticAddressMatch = false
),
new MulPlugin,
new DivPlugin,
new BranchPlugin(
earlyBranch = true,
catchAddressMisaligned = true
),
new YamlPlugin("cpu0.yaml")
)
)
def fast = { val config = default //Replace HazardSimplePlugin to get datapath bypass config.cpuPlugins(config.cpuPlugins.indexWhere(_.isInstanceOf[HazardSimplePlugin])) = new HazardSimplePlugin( bypassExecute = true, bypassMemory = true, bypassWriteBack = true, bypassWriteBackBuffer = true ) config } }
case class VexRiscvForSim(config : RiscvConfig) extends Component{ import config._
val io = new Bundle { //Clocks / reset val asyncReset = in Bool() val mainClk = in Bool() val iBus = master(Axi4ReadOnly(Axi4Config(addressWidth=32,dataWidth=32,idWidth=1).toFullConfig())) val dBus = master(Axi4(Axi4Config(addressWidth=32,dataWidth=32,idWidth=1).toFullConfig())) }
val resetCtrlClockDomain = ClockDomain( clock = io.mainClk, config = ClockDomainConfig( resetKind = BOOT ) )
val resetCtrl = new ClockingArea(resetCtrlClockDomain) { val mainClkResetUnbuffered = False
//Implement an counter to keep the reset axiResetOrder high 64 cycles
// Also this counter will automatically do a reset when the system boot.
val systemClkResetCounter = Reg(UInt(6 bits)) init(0)
when(systemClkResetCounter =/= U(systemClkResetCounter.range -> true)){
systemClkResetCounter := systemClkResetCounter + 1
mainClkResetUnbuffered := True
}
when(BufferCC(io.asyncReset)){
systemClkResetCounter := 0
}
//Create all reset used later in the design
val mainClkReset = RegNext(mainClkResetUnbuffered)
val systemReset = RegNext(mainClkResetUnbuffered)
}
val systemClockDomain = ClockDomain( clock = io.mainClk, reset = resetCtrl.systemReset, frequency = FixedFrequency(coreFrequency) )
val system = new ClockingArea(systemClockDomain) {
val bigEndianDBus = config.cpuPlugins.exists(_ match{ case plugin : DBusSimplePlugin => plugin.bigEndian case _ => false})
//Instanciate the CPU
val cpu = new VexRiscv(
config = VexRiscvConfig(
plugins = cpuPlugins
)
)
//Checkout plugins used to instanciate the CPU to connect them to the SoC
val timerInterrupt = False
val externalInterrupt = False
var iBus : Axi4ReadOnly = null
var dBus : Axi4 = null
for(plugin <- cpu.plugins) plugin match{
case plugin : IBusCachedPlugin =>
iBus = plugin.iBus.toAxi4ReadOnly().toFullConfig()
case plugin : DBusCachedPlugin =>
dBus = plugin.dBus.toAxi4Shared().toAxi4().toFullConfig()
case plugin : CsrPlugin => {
plugin.externalInterrupt := externalInterrupt
plugin.timerInterrupt := timerInterrupt
}
case _ =>
}
io.iBus <> iBus;
io.dBus <> dBus;
} }
object VexRiscvForSim{ def main(args: Array[String]) { SpinalVhdl(VexRiscvForSim(RiscvConfig.default.copy())) } }
where you accessing the cached memory region ? or the uncached one ? Else at that stage the best is to check the simulation to see what is happening. In VexRiscv there is a few "lastStage" signals to help figuring out what is commiting.
Outside of VexRiscv, I instantiated some logic to split iBus/dBus between an internal RAM block (16K) and external memory (256M) based on memory address region. During the test of doing continuously stack save/restore, I see VexRiscv flushing out the stack content but I see no read. I just noticed that the total number clocks of the operation is twice of what it should be. Do you have any internal signals I can tap to see if VexRiscv is stalled on some condition? Thanks Vuong
From: Dolu1990 @.> Sent: January 22, 2024 5:05 AM To: SpinalHDL/VexRiscv @.> Cc: ztachip @.>; Author @.> Subject: Re: [SpinalHDL/VexRiscv] Instructions to save/restore register to stack is taking 2 clock each (Issue #387)
where you accessing the cached memory region ? or the uncached one ? Else at that stage the best is to check the simulation to see what is happening. In VexRiscv there is a few "lastStage" signals to help figuring out what is commiting.
— Reply to this email directly, view it on GitHubhttps://github.com/SpinalHDL/VexRiscv/issues/387#issuecomment-1903648972, or unsubscribehttps://github.com/notifications/unsubscribe-auth/ACSDUFXK3VEDZONRCACYZGLYPY2V7AVCNFSM6AAAAABCEFCC26VHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMYTSMBTGY2DQOJXGI. You are receiving this because you authored the thread.
To make sure all data access fit the DataCache, My stack is only 2K. And my DataCache is 8K/2way
I see VexRiscv flushing out the stack content but I see no read.
Its cache is write-through, so it is normal. Maybe the SoC memory system can't follow that bandwidth ?
Do you have any internal signals I can tap to see if VexRiscv is stalled on some condition?
Yes, on every stage, there is a xxx_arbitration_xxx which contains signals.
But i would say, first check if the memory buses are stuck some cycles.
There are no external memory cycles. The program simply pushing and poping register values to/from stack that fit entirely in cache.
But can we expect code below to take just one clock per instruction?
sw s0,4(sp) sw s1,8(sp) sw s2,12(sp) sw s3,16(sp) : : // Restore registers lw s0,4(sp) lw s1,8(sp) lw s2,12(sp) : :
But can we expect code below to take just one clock per instruction?
Yes, it should as far as i know.
Can you share a wave file?
Sure, are there any VexRiscv internal signals you like me to show on wave file?
From: Dolu1990 @.> Sent: February 7, 2024 11:00 AM To: SpinalHDL/VexRiscv @.> Cc: ztachip @.>; Author @.> Subject: Re: [SpinalHDL/VexRiscv] Instructions to save/restore register to stack is taking 2 clock each (Issue #387)
But can we expect code below to take just one clock per instruction?
Yes, it should as far as i know.
Can you share a wave file?
— Reply to this email directly, view it on GitHubhttps://github.com/SpinalHDL/VexRiscv/issues/387#issuecomment-1932359214, or unsubscribehttps://github.com/notifications/unsubscribe-auth/ACSDUFUQ4OUGMNCFFZBXKZLYSOQIRAVCNFSM6AAAAABCEFCC26VHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMYTSMZSGM2TSMRRGQ. You are receiving this because you authored the thread.Message ID: @.***>
All of them ^^
I assume you want wavefile from Verilator? My code has VHDL so Verilator does not work for it unfortunately, but I can try to create a test program with Verilog+Verilator Does this work for you? Thanks
From: Dolu1990 @.> Sent: February 12, 2024 3:52 AM To: SpinalHDL/VexRiscv @.> Cc: ztachip @.>; Author @.> Subject: Re: [SpinalHDL/VexRiscv] Instructions to save/restore register to stack is taking 2 clock each (Issue #387)
All of them ^^
— Reply to this email directly, view it on GitHubhttps://github.com/SpinalHDL/VexRiscv/issues/387#issuecomment-1938255826, or unsubscribehttps://github.com/notifications/unsubscribe-auth/ACSDUFXRL4PTC6LY3QNRJILYTHJ3RAVCNFSM6AAAAABCEFCC26VHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMYTSMZYGI2TKOBSGY. You are receiving this because you authored the thread.Message ID: @.***>
doesn't need to be a wave from verilator, either a VCD or a FST is fine.
I wrote a simple task switching that save registers and restore registers // Save registers sw s0,4(sp) sw s1,8(sp) sw s2,12(sp) sw s3,16(sp) : : // Restore registers lw s0,4(sp) lw s1,8(sp) lw s2,12(sp) : :
My stack fits entirely in data cache
But it seems it takes 2 clocks per each instruction above I have enabled all bypass in the HarzardPlugin Should the stack save/restore instructions above should take just 1 clock each.
Thanks