clash-lang / clash-compiler

Haskell to VHDL/Verilog/SystemVerilog compiler
https://clash-lang.org/
Other
1.44k stars 153 forks source link

ROM contents are not reduced to NF during synthesis #467

Open gergoerdi opened 5 years ago

gergoerdi commented 5 years ago

CLaSH version: 949a05a1d8b1e4b3aabf22424

I managed to minimize my program into the following, with no external dependencies:

{-# LANGUAGE RecordWildCards, TupleSections #-}
{-# LANGUAGE ApplicativeDo #-}
{-# LANGUAGE GeneralizedNewtypeDeriving, DerivingStrategies #-}
module CHIP8 where

import Clash.Prelude
import Data.Char
import qualified Data.List as L
import Data.Word
import Data.Maybe (fromMaybe)
import Control.Monad.State hiding (state)
import Control.Monad.RWS

{-# NOINLINE topEntity #-}
{-# ANN topEntity
  (Synthesize
    { t_name   = "CHIP8"
    , t_inputs =
          [ PortName "CLK_25MHZ"
          , PortName "RESET"
          ]
    , t_output = PortName "VGA_VSYNC"
    }) #-}
topEntity
    :: Clock System Source
    -> Reset System Asynchronous
    -> Signal System Bit
topEntity = exposeClockReset board
  where
    board = fromMaybe low . cpuOutFBWrite <$> cpuOut
      where
        cpuIn = do
            cpuInMem <- rom hexDigits (cpuOutMemAddr <$> cpuOut)
            cpuInVBlank <- pure False
            pure CPUIn{..}

        cpuOut = mealyState (runCPU defaultOut cpu) initState cpuIn

hexDigits :: Vec (1 * 8) Word8
hexDigits = concat . map (pad . fmap lineToByte) $
    ("****" :>
     "*  *" :>
     "*  *" :>
     "*  *" :>
     "****" :>
     Nil) :>
    Nil
  where
    pad = (++ repeat 0)

lineToByte :: String -> Word8
lineToByte s = L.foldl push 0 s `shiftL` 4
  where
    push :: Word8 -> Char -> Word8
    push x c = x `shiftL` 1 + if isSpace c then 0 else 1

data CPUIn = CPUIn
    { cpuInMem :: Word8
    , cpuInVBlank :: Bool
    }

data CPUState = CPUState
    { pc :: Unsigned 8
    }
    deriving (Generic, Undefined)

initState :: CPUState
initState = CPUState
    { pc = 0x0200
    }

data CPUOut = CPUOut
    { cpuOutMemAddr :: Unsigned 8
    , cpuOutMemWrite :: Maybe Word8
    , cpuOutFBWrite :: Maybe Bit
    }

defaultOut :: CPUState -> CPUOut
defaultOut CPUState{..} = CPUOut{..}
  where
    cpuOutMemAddr = pc
    cpuOutMemWrite = mzero
    cpuOutFBWrite = mzero

cpu :: CPU CPUIn CPUState CPUOut ()
cpu = pure ()

newtype CPU i s o a = CPU{ unCPU :: RWS i (Endo o) s a }
    deriving newtype (Functor, Applicative, Monad, MonadState s)

runCPU :: (s -> o) -> CPU i s o () -> (i -> State s o)
runCPU mkDef cpu inp = do
    s <- get
    let (s', f) = execRWS (unCPU cpu) inp s
    put s'
    def <- gets mkDef
    return $ appEndo f def

mealyState :: (HiddenClockReset domain gated synchronous, Undefined s)
           => (i -> State s o) -> s -> (Signal domain i -> Signal domain o)
mealyState f = mealy step
  where
    step s x = let (y, s') = runState (f x) s
               in (s', y)

The error message is 3000+ lines long, and starts with:

Loading dependencies took 2.505077142s
Parsing and compiling primitives took 0.508476818s
Compiling: CHIP8.topEntity
Clash.Normalize(176): Expr belonging to bndr: #CHIP8.topEntity_go3584
(:: GHC.Types.[] GHC.Types.Char -> GHC.Word.Word8 -> GHC.Word.Word8) 
remains recursive after normalization:
gergoerdi commented 5 years ago

Although I am not seeing anything non-well-founded about it, the problematic part is this:

    cpuIn = do
        cpuInMem <- rom hexDigits (cpuOutMemAddr <$> cpuOut)
        cpuInVBlank <- pure False
        pure CPUIn{..}

    cpuOut = mealyState (runCPU defaultOut cpu) initState cpuIn

If I replace that rom with a RAM, like this:

        cpuInMem <- unpack <$> blockRamFile d256 "image.rom" (cpuOutMemAddr <$> cpuOut) (pure Nothing)

then CLaSH manages to synthesize HDL without further issues.

leonschoorl commented 5 years ago

Problem is you're using Data.List.foldl (inside of lineToByte), that's the recursive function it can't translate.

You could

gergoerdi commented 5 years ago

Thanks, that makes sense!

But in that case, maybe there's an improvement request hiding here: I think rom would be more intuitive if it reduced the ROM contents to normal form at compile time. I didn't expect any computational aspect of hexDigits to survive to the generated VHDL.

gergoerdi commented 5 years ago

I am going to go with the TH solution for now, since that is closest to my intention (to have this ROM in the generated VHDL just be a bunch of bytes).

However, if I try your third solution, i.e. rewriting the ROM contents to not use Data.List, then I get the following CLaSH error:

Applied 19205 transformations

src-clash/CHIP8.hs:58:1: error:
    Clash.Netlist.BlackBox(150): Can't match template for "Clash.Explicit.ROM.rom#" :

-- rom begin
~GENSYM[~COMPNAME_rom][0] : block
  signal ~GENSYM[ROM][1] : ~TYP[2];
  signal ~GENSYM[rd][2]  : integer range 0 to ~LIT[0]-1;~IF ~ISGATED[1] ~THEN
  signal ~GENSYM[clk][3] : std_logic;
  signal ~GENSYM[ce][4]  : boolean;~ELSE ~FI
begin
  ~SYM[1] <= ~LIT[2];

  ~SYM[2] <= to_integer(~ARG[3])
  -- pragma translate_off
                mod ~LIT[0]
  -- pragma translate_on
                ;
  ~IF ~ISGATED[1] ~THEN
  (~SYM[3],~SYM[4]) <= ~ARG[1];
  ~GENSYM[romSync][5] : process (~SYM[3])
  begin
    if (rising_edge(~SYM[3])) then
      if ~SYM[4] then~IF ~VIVADO ~THEN
        ~RESULT <= ~FROMBV[~SYM[1](~SYM[2])][~TYPO]
        -- pragma translate_off
        after 1 ps
        -- pragma translate_on
        ;~ELSE
        ~RESULT <= ~SYM[1](~SYM[2])
        -- pragma translate_off
        after 1 ps
        -- pragma translate_on
        ;~FI
      end if;
    end if;
  end process;~ELSE
  ~SYM[5] : process (~ARG[1])
  begin
    if (rising_edge(~ARG[1])) then~IF ~VIVADO ~THEN
      ~RESULT <= ~FROMBV[~SYM[1](~SYM[2])][~TYPO]
      -- pragma translate_off
      after 1 ps
      -- pragma translate_on
      ;~ELSE
      ~RESULT <= ~SYM[1](~SYM[2])
      -- pragma translate_off
      after 1 ps
      -- pragma translate_on
      ;~FI
    end if;
  end process;~FI
end block;
-- rom end

with context:

Context {bbResult = (Identifier "t" Nothing,Unsigned 8), bbInputs = [(Literal (Just (Unsigned 32,32)) (NumLit 128),Unsigned 32,True),(Identifier "CLK_25MHZ" Nothing,Clock "CLK_25MHZ" 39721 Source,False),(Identifier "\\#t_app_arg\\" Nothing,Vector 128 (Unsigned 8),False),(Identifier "result_13" Nothing,Signed 32,False)], bbFunctions = fromList [], bbQsysIncName = [], bbLevel = 0, bbCompName = "CHIP8"}

    NB: The source location of the error is not exact, only indicative, as it is acquired after optimisations.
    The actual location of the error can be in a function that is inlined.
    To prevent inlining of those functions, annotate them with a NOINLINE pragma.
   |
58 | topEntity = exposeClockReset board
   | ^^^^^^^^^
)

(I'll see if I can trigger this with just the minimal version above, since this error is coming from my real, large source)