fastmachinelearning / hls4ml

Machine learning on FPGAs using HLS
https://fastmachinelearning.org/hls4ml
Apache License 2.0
1.17k stars 388 forks source link

ERROR: [XFORM 203-504] Stop unrolling loop 'MultLoop' #903

Closed behnamarefy closed 8 months ago

behnamarefy commented 8 months ago

hello i'm working on a cnn network and i tried to implement it using hls4ml0.7.1 and vivado2019.2 but by trying different reuse factor i get the this error :

ERROR: [XFORM 203-504] Stop unrolling loop 'MultLoop' (firmware/nnet_utils/nnet_dense_resource.h:52) in function 'nnet::conv_2d_cl<nnet::array<ap_fixed<22, 6, (ap_q_mode)5, (ap_o_mode)3, 0>, 256u>, nnet::array<ap_fixed<22, 6, (ap_q_mode)5, (ap_o_mode)3, 0>, 128u>, config7>' because it may cause large runtime and excessive memory usage due to increase in code size. Please avoid unrolling the loop or form sub-functions for code in the loop body.

here is my code :

nb_epoch = 200     # number of epochs to train on
batch_size = 1024  # training batch size
input_shape=[2,128]

filters_per_conv_layer = [256, 128, 64 ]
neurons_per_dense_layer = [ 64]

x = x_in = Input(input_shape + [1])

for i, f in enumerate(filters_per_conv_layer):
    print(('Adding convolutional block {} with N={} filters').format(i, f))
    x = Conv2D(
        int(f),
        kernel_size=(2, 8),
        strides=(1, 1),
        padding='same',
        kernel_initializer='lecun_uniform',
        # kernel_regularizer=l1(0.0001),
        # use_bias=False,
        name='conv_{}'.format(i),
    )(x)
    x = BatchNormalization(name='bn_conv_{}'.format(i))(x)
    x = Activation('relu', name='conv_act_%i' % i)(x)
    x = MaxPooling2D(pool_size=(1, 2), name='pool_{}'.format(i))(x)
x = Flatten()(x)

for i, n in enumerate(neurons_per_dense_layer):
    print(('Adding dense block {} with N={} neurons').format(i, n))
    x = Dense(n, kernel_initializer='lecun_uniform',  name='dense_%i' % i)(x)
    x = BatchNormalization(name='bn_dense_{}'.format(i))(x)
    x = Activation('relu', name='dense_act_%i' % i)(x)
x = Dense(11, name='output_dense')(x)
x_out = Activation('softmax', name='output_softmax')(x)

model = Model(inputs = x_in,outputs = x_out)
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

model.summary()

i tried to pruned this network and i load the pruned network here is the whole code

import matplotlib.pyplot as plt
import numpy as np
import time
import tensorflow.compat.v2 as tf
import tensorflow_datasets as tfds
from tensorflow_model_optimization.sparsity.keras import strip_pruning
from tensorflow_model_optimization.python.core.sparsity.keras import pruning_wrapper

from qkeras.utils import _add_supported_quantized_objects
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.regularizers import l1
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import ZeroPadding2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import MaxPool2D
import os

from tensorflow.keras.models import Model
os.environ['PATH'] = '/tools/Xilinx/Vivado/2019.2/bin:' + os.environ['PATH']
co = {}
_add_supported_quantized_objects(co)
co['PruneLowMagnitude'] = pruning_wrapper.PruneLowMagnitude

model_pruned = tf.keras.models.load_model('pruned_behnam2.h5', custom_objects=co)
model_pruned = strip_pruning(model_pruned)
LOSS = tf.keras.losses.CategoricalCrossentropy()
OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=3e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=True)

model_pruned.compile(loss=LOSS, optimizer=OPTIMIZER, metrics=["accuracy"])

model_keras = model = tf.keras.models.load_model('behnam2.h5')
model_keras.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

model_keras.summary()
model_pruned.summary()
for layer in model_pruned.layers:
    if layer.__class__.__name__ in ['Conv2D', 'Dense']:
        w = layer.get_weights()[0]
        layersize = np.prod(w.shape)
        print("{}: {}".format(layer.name, layersize))  # 0 = weights, 1 = biases
        if layersize > 4096:  # assuming that shape[0] is batch, i.e., 'None'
            print("Layer {} is too large ({}), are you sure you want to train?".format(layer.name, layersize))

conv_0: 4096
conv_1: 524288
Layer conv_1 is too large (524288), are you sure you want to train?
conv_2: 131072
Layer conv_2 is too large (131072), are you sure you want to train?
dense_0: 131072
Layer dense_0 is too large (131072), are you sure you want to train?
output_dense: 704

import hls4ml
import plotting

# First, the baseline model
hls_config = hls4ml.utils.config_from_keras_model(model_pruned, granularity='name')

# Set the precision and reuse factor for the full model
hls_config['Model']['Precision'] = 'ap_fixed<22,6>'
hls_config['Model']['ReuseFactor'] = 4
hls_config['Model']['Strategy'] = 'resource'
# Create an entry for each layer, here you can for instance change the strategy for a layer to 'resource'
# or increase the reuse factor individually for large layers.
# In this case, we designed the model to be small enough for a fully parallel implementation
# so we use the latency strategy and reuse factor of 1 for all layers.
for Layer in hls_config['LayerName'].keys():
    hls_config['LayerName'][Layer]['Strategy'] = 'resource'
    hls_config['LayerName'][Layer]['ReuseFactor'] = 4
    hls_config['LayerName'][Layer]['Precision'] = 'ap_fixed<22,6>'
# If you want best numerical performance for high-accuray models, while the default latency strategy is faster but numerically more unstable
hls_config['LayerName']['output_softmax']['Strategy'] = 'Stable'
hls_config['LayerName']['dense_0']['ReuseFactor'] = 64
plotting.print_dict(hls_config)

cfg = hls4ml.converters.create_config(backend='Vivado')
cfg['IOType'] = 'io_stream'  # Must set this if using CNNs!
cfg['HLSConfig'] = hls_config
cfg['KerasModel'] = model
cfg['OutputDir'] = 'pruned_cnn/'
cfg['XilinxPart'] = 'xcu250-figd2104-2L-e'

hls_model = hls4ml.converters.keras_to_hls(cfg)
hls_model.compile()

hls_model.build(csim=False, synth=True, vsynth=False)


INFO: [HLS 200-489] Unrolling loop 'InitAccum' (firmware/nnet_utils/nnet_dense_resource.h:37) in function 'nnet::conv_2d_cl<nnet::array<ap_fixed<22, 6, (ap_q_mode)5, (ap_o_mode)3, 0>, 256u>, nnet::array<ap_fixed<22, 6, (ap_q_mode)5, (ap_o_mode)3, 0>, 128u>, config7>' completely with a factor of 128.
INFO: [HLS 200-489] Unrolling loop 'MultLoop' (firmware/nnet_utils/nnet_dense_resource.h:52) in function 'nnet::conv_2d_cl<nnet::array<ap_fixed<22, 6, (ap_q_mode)5, (ap_o_mode)3, 0>, 256u>, nnet::array<ap_fixed<22, 6, (ap_q_mode)5, (ap_o_mode)3, 0>, 128u>, config7>' completely with a factor of 131072.
ERROR: [XFORM 203-504] Stop unrolling loop 'MultLoop' (firmware/nnet_utils/nnet_dense_resource.h:52) in function 'nnet::conv_2d_cl<nnet::array<ap_fixed<22, 6, (ap_q_mode)5, (ap_o_mode)3, 0>, 256u>, nnet::array<ap_fixed<22, 6, (ap_q_mode)5, (ap_o_mode)3, 0>, 128u>, config7>' because it may cause large runtime and excessive memory usage due to increase in code size. Please avoid unrolling the loop or form sub-functions for code in the loop body.
ERROR: [HLS 200-70] Pre-synthesis failed.
command 'ap_source' returned error code
    while executing
"source build_prj.tcl"
    ("uplevel" body line 1)
    invoked from within
"uplevel \#0 [list source $arg] "

INFO: [Common 17-206] Exiting vivado_hls at Mon Oct 30 13:35:42 2023...
CSynthesis report not found.
Vivado synthesis report not found.
Cosim report not found.
Timing report not found.
vloncar commented 8 months ago

duplicate of #904 where there's some discussion