haskell-numerics / random-fu

A suite of Haskell libraries for representing, manipulating, and sampling random variables
42 stars 21 forks source link

Why no MonadRandom for MWC? #26

Closed idontgetoutmuch closed 8 years ago

idontgetoutmuch commented 9 years ago

I had a quick go at this and notwithstanding http://dilbert.com/strips/comic/2001-10-25/, it performs poorly. I am guessing creating a seed is expensive.

{-# OPTIONS_GHC -Wall                      #-}
{-# OPTIONS_GHC -fno-warn-name-shadowing   #-}
{-# OPTIONS_GHC -fno-warn-type-defaults    #-}

import Data.Random.Source.MWC
import Data.Random
import Control.Monad.ST
import Data.Random.Source

import Control.Monad.Primitive
import Control.Monad.State

type GenST s = Gen (PrimState (ST s))

asGenST :: (GenST s -> ST s a) -> (GenST s -> ST s a)
asGenST = id

testUniform :: MonadRandom m => Int -> m [Double]
testUniform n = replicateM (fromIntegral n) (sample stdUniform)

n :: Int
n = 10^5

us :: [Double]
us = runST (testUniform n)

mean :: Double
mean = sum us / fromIntegral n

main :: IO ()
main = do
  print mean

instance MonadRandom (ST s) where
  getRandomDouble = create >>= (asGenST getRandomDoubleFrom)
~/Dropbox/Private/Multinomial $ ./TestUniformMWC +RTS -s
0.623246317772896
   2,810,590,152 bytes allocated in the heap
      29,658,360 bytes copied during GC
       5,220,784 bytes maximum residency (6 sample(s))
       1,741,368 bytes maximum slop
              16 MB total memory in use (0 MB lost due to fragmentation)

                                    Tot time (elapsed)  Avg pause  Max pause
  Gen  0      5448 colls,     0 par    0.07s    0.08s     0.0000s    0.0008s
  Gen  1         6 colls,     0 par    0.01s    0.01s     0.0022s    0.0054s

  INIT    time    0.00s  (  0.00s elapsed)
  MUT     time    1.01s  (  1.02s elapsed)
  GC      time    0.08s  (  0.09s elapsed)
  EXIT    time    0.00s  (  0.00s elapsed)
  Total   time    1.10s  (  1.12s elapsed)

  %GC     time       7.6%  (8.3% elapsed)

  Alloc rate    2,771,708,623 bytes per MUT second

  Productivity  92.4% of total user, 90.9% of total elapsed
idontgetoutmuch commented 9 years ago

Maybe this

{-# LANGUAGE TemplateHaskell   #-}
{-# LANGUAGE GADTs             #-}
{-# LANGUAGE FlexibleInstances #-}

import Data.Random
import Data.Random.Source
import qualified System.Random.MWC as MWC
import Control.Monad.Reader
import Control.Monad.Primitive

$(monadRandom [d|
  instance (PrimMonad m, s ~ PrimState m) => MonadRandom (ReaderT (MWC.Gen s) m) where
    getRandomWord16 = ask >>= lift . MWC.uniform
    getRandomWord32 = ask >>= lift . MWC.uniform
    getRandomWord64 = ask >>= lift . MWC.uniform
  |])

testUniform :: MonadRandom m => Int -> m [Double]
testUniform n = replicateM (fromIntegral n) (sample stdUniform)

n :: Int
n = 10^7

main :: IO ()
main = do
    seed <- MWC.create
    xs <- runReaderT (testUniform n) seed
    print (sum xs / fromIntegral n)

but look at the performance :-(

./RandomFuMWC +RTS -s
0.5000432391067587
   3,286,220,896 bytes allocated in the heap
   2,427,475,880 bytes copied during GC
     600,186,048 bytes maximum residency (12 sample(s))
     100,510,656 bytes maximum slop
            1249 MB total memory in use (0 MB lost due to fragmentation)

                                    Tot time (elapsed)  Avg pause  Max pause
  Gen  0      5942 colls,     0 par    1.06s    1.13s     0.0002s    0.0013s
  Gen  1        12 colls,     0 par    0.82s    1.23s     0.1024s    0.5787s

  INIT    time    0.00s  (  0.00s elapsed)
  MUT     time    1.47s  (  1.39s elapsed)
  GC      time    1.87s  (  2.36s elapsed)
  EXIT    time    0.01s  (  0.09s elapsed)
  Total   time    3.35s  (  3.84s elapsed)

  %GC     time      56.0%  (61.3% elapsed)

  Alloc rate    2,242,365,923 bytes per MUT second

  Productivity  44.0% of total user, 38.3% of total elapsed
yongqli commented 9 years ago

Any idea why this is so slow?

idontgetoutmuch commented 9 years ago

Sadly I haven't had time to investigate. I'll see if I can dig out my notes and post what I have done here.

yongqli commented 9 years ago

Interestingly, making GHC inline everything with

-O2 -fllvm -rtsopts -threaded -fexcess-precision -j6 +RTS -N12 -RTS 
-fsimpl-tick-factor=1000 -funfolding-use-threshold=1000 
-funfolding-creation-threshold=1000

reduces run time from 3.16s to 1.96s, making it faster than State PureMT.

let x = flip evalState (pureMT 0) $ testUniform n