Open ARF1939261764 opened 6 months ago
I had a hacky fix that the core no long locks up after issuing multiple RoCC instructions that result in MMIO stores (TL Put I think) on the same address.
However, I'm not sure whether it's sound as I suspect that it causes problems with fromhost
/tohost
on FireSim (process waits on https://github.com/ucb-bar/libgloss-htif/blob/39234a16247ab1fa234821b251f1f1870c3de343/misc/htif.c#L26 indefinitely). Interestingly, it works fine on Verilator.
The fix roughly looks like
diff --git a/src/main/scala/rocket/DCache.scala b/src/main/scala/rocket/DCache.scala
index 0467d149a..e6af58ff2 100644
--- a/src/main/scala/rocket/DCache.scala
+++ b/src/main/scala/rocket/DCache.scala
@@ -622,7 +622,9 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
// grant
val (d_first, d_last, d_done, d_address_inc) = edge.addr_inc(tl_out.d)
- val (d_opc, grantIsUncached, grantIsUncachedData) = {
+ // FIXME: grantIsUncachedAccess is a misleading name
+ // usingDataScratchpad still uses uncachedGrantOpcodesSansData
+ val (d_opc, grantIsUncached, grantIsUncachedAccess) = {
val uncachedGrantOpcodesSansData = Seq(AccessAck, HintAck)
val uncachedGrantOpcodesWithData = Seq(AccessAckData)
val uncachedGrantOpcodes = uncachedGrantOpcodesWithData ++ uncachedGrantOpcodesSansData
@@ -634,7 +636,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val data = DecodeLogic(opc, uncachedGrantOpcodesWithData, uncachedGrantOpcodesSansData)
(opc, true.B, data)
} else {
- (whole_opc, whole_opc.isOneOf(uncachedGrantOpcodes), whole_opc.isOneOf(uncachedGrantOpcodesWithData))
+ (whole_opc, whole_opc.isOneOf(uncachedGrantOpcodes), whole_opc.isOneOf(Seq(AccessAckData, AccessAck)))
}
}
tl_d_data_encoded := encodeData(tl_out.d.bits.data, tl_out.d.bits.corrupt && !io.ptw.customCSRs.suppressCorruptOnGrantData && !grantIsUncached)
@@ -665,11 +667,12 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
f := false.B
}
}
- when (grantIsUncachedData) {
+ when (grantIsUncachedAccess) {
if (!cacheParams.separateUncachedResp) {
if (!cacheParams.pipelineWayMux)
s1_data_way := 1.U << nWays
- s2_req.cmd := M_XRD
+ // Still respond to cache clients for AccessAck in response to Put
+ s2_req.cmd := Mux(tl_out.d.bits.opcode === AccessAckData, M_XRD, M_XWR)
s2_req.size := uncachedResp.size
s2_req.signed := uncachedResp.signed
s2_req.tag := uncachedResp.tag
@@ -726,7 +729,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
// don't accept uncached grants if there's a structural hazard on s2_data...
val blockUncachedGrant = Reg(Bool())
blockUncachedGrant := dataArb.io.out.valid
- when (grantIsUncachedData && (blockUncachedGrant || s1_valid)) {
+ when (grantIsUncachedAccess && (blockUncachedGrant || s1_valid)) {
tl_out.d.ready := false.B
// ...but insert bubble to guarantee grant's eventual forward progress
when (tl_out.d.valid) {
@@ -923,7 +926,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
val s2_uncached_data_word = RegEnable(s1_uncached_data_word, io.cpu.replay_next)
val doUncachedResp = RegNext(io.cpu.replay_next)
io.cpu.resp.valid := (s2_valid_hit_pre_data_ecc || doUncachedResp) && !s2_data_error
- io.cpu.replay_next := tl_out.d.fire() && grantIsUncachedData && !cacheParams.separateUncachedResp.B
+ io.cpu.replay_next := tl_out.d.fire() && grantIsUncachedAccess && !cacheParams.separateUncachedResp.B
when (doUncachedResp) {
assert(!s2_valid_hit)
io.cpu.resp.bits.replay := true.B
@@ -931,13 +934,13 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) {
}
io.cpu.uncached_resp.map { resp =>
- resp.valid := tl_out.d.valid && grantIsUncachedData
+ resp.valid := tl_out.d.valid && grantIsUncachedAccess
resp.bits.tag := uncachedResp.tag
resp.bits.size := uncachedResp.size
resp.bits.signed := uncachedResp.signed
resp.bits.data := new LoadGen(uncachedResp.size, uncachedResp.signed, uncachedResp.addr, s1_uncached_data_word, false.B, wordBytes).data
resp.bits.data_raw := s1_uncached_data_word
- when (grantIsUncachedData && !resp.ready) {
+ when (grantIsUncachedAccess && !resp.ready) {
tl_out.d.ready := false.B
}
}
@jerryz123 any thought on this?
Type of issue: bug report
Impact: rocc memory interface
I used the chipyard BlackBoxExample configuration, along with the code above, to produce the waveform in the image,my purpose is to write 1024 consecutive 8Byte sizes of data to address
0x60010000
, with successive increments of address. But on the third write, the rocc_mem_req_ready signal pulled down, and never pulled up.And I found that this was related to the address that was written, for example I had no problem doing the same to the address
0x80010000
,The waveform is shown in the figure below:The difference between these two addresses is that the address
0x60010000
is mapped to themmio axi
interface, but the address0x80010000
is mapped to themem axi
interfaceOn accessing address 0x60010000, the write request made is put into SimpleHellaCacheIF's retransmission queue. inflight indicates how many requests are currently not responding to the resp. A value of 3 indicates that two requests are waiting for the resp(two have a bit of 1). But according to the information I read on the Internet, rocc's memory write operation does not respond, so I don't know if this is a bug.
Because the read operation has response, the read request can be completed normally at 0x60010000
Please tell us about your environment: Rocket 1.6
What is the use case for changing the behavior? store operations can be performed continuously