Open hhbyyh opened 5 years ago
Torch Script module from Python API, memory increases for the first 3 iterations.
import os
import psutil
import torch
import torchvision
torch.set_num_threads(1)
traced_script_module = torch.jit.trace(torchvision.models.resnet101(pretrained=True), torch.rand(1, 3, 224, 224))
for i in range(100):
samples = torch.rand(32, 3, 224, 224)
predictions = traced_script_module(samples)
print(predictions)
process = psutil.Process(os.getpid())
print(process.memory_info().rss / 1e9, "G memory")
output:
tensor([[-2.2903e+00, -1.6855e+00, 3.8102e-02, ..., -9.4584e-01,
2.3668e+00, -3.5562e-01],
[ 4.3640e-01, -3.0016e-01, -3.0599e-01, ..., 1.1968e+00,
1.4018e+00, -7.9118e-01],
[-1.7140e+00, 2.7694e-03, -4.4360e-01, ..., -1.3820e+00,
2.4074e+00, -9.4551e-01],
...,
[ 1.1089e+00, 6.9741e-01, -4.8336e-01, ..., -7.7349e-01,
1.4667e+00, 1.4881e+00],
[-1.8032e+00, -1.1997e+00, -6.4645e-01, ..., -2.4350e-01,
4.4409e-01, 3.7147e+00],
[-1.3564e+00, 3.7601e-01, -1.5289e+00, ..., 2.5337e-01,
3.0679e+00, 8.1390e-01]], grad_fn=<DifferentiableGraphBackward>)
5.928787968 G memory
tensor([[-1.3370, -2.2020, -3.9834, ..., -0.8153, 1.4579, 0.1649],
[-1.4750, -1.0122, 0.2388, ..., -0.0842, 2.4480, 0.6924],
[-2.3939, -1.4854, 1.9183, ..., 0.0355, 0.3399, 1.6879],
...,
[-0.2749, 1.6111, -0.7473, ..., 0.5937, 1.2901, -0.4303],
[-0.7707, -1.3135, -0.2625, ..., 0.1321, 0.9864, -0.5495],
[-0.2005, -1.6247, 1.1925, ..., 0.5141, 0.9492, -1.1067]],
grad_fn=<DifferentiableGraphBackward>)
8.865099776 G memory
tensor([[-1.2737, -0.9936, -0.1794, ..., -0.9375, 0.8533, 0.8077],
[-0.8139, -0.5009, -0.2187, ..., -0.7565, 1.1524, -0.5535],
[ 0.6839, -0.2231, 0.2724, ..., 0.4830, 1.4113, -0.2519],
...,
[ 0.0329, 2.2376, 1.1090, ..., 0.0342, 1.0230, 0.8690],
[ 0.4129, -0.4162, 0.0832, ..., 2.2181, 2.3926, 0.4473],
[-1.2382, 0.4778, -0.7004, ..., 1.0786, 0.9393, 1.3353]],
grad_fn=<DifferentiableGraphBackward>)
8.954937344 G memory
tensor([[ 2.9310e+00, 1.7859e+00, -1.0819e+00, ..., -6.1685e-03,
-5.7516e-01, -4.9102e-01],
[ 1.6096e-03, -1.1933e+00, 4.7245e-02, ..., -1.3567e-01,
-3.8962e-01, 3.3101e-01],
[ 1.2337e+00, 2.9382e+00, -9.1299e-01, ..., -2.5608e-01,
1.6244e+00, -1.1455e+00],
...,
[ 8.9977e-02, -1.9731e+00, -7.0138e-01, ..., -1.8564e-01,
2.4577e+00, 1.2462e-01],
[-1.1238e+00, -9.2590e-02, -9.8799e-01, ..., -9.1702e-01,
5.3392e-01, 1.5519e+00],
[-1.9508e+00, -1.3581e+00, -2.5739e+00, ..., -1.8627e+00,
2.8877e+00, 1.7870e+00]], grad_fn=<DifferentiableGraphBackward>)
10.509094912 G memory
tensor([[-2.6716, -1.2049, -1.4170, ..., -2.0941, 2.4951, 2.7314],
[-1.6459, -0.7796, -2.1277, ..., -0.2482, 2.4392, -0.2142],
[ 0.0723, 0.1851, -1.0794, ..., -0.1078, 0.6993, 0.5189],
...,
[-1.1382, 0.8323, -2.3813, ..., 0.2738, 1.2351, -0.9120],
[ 0.4644, -1.4338, 2.6241, ..., -0.0459, 1.0317, 2.3192],
[-0.4867, -0.5519, -0.2944, ..., -0.8683, 2.9462, 1.0247]],
grad_fn=<DifferentiableGraphBackward>)
10.521800704 G memory
torch script from cpp:
#include <cstring>
#include <stdio.h>
#include <stdlib.h>
#include <cstdio>
#include <cstdint>
#include <cstdlib>
#include <cassert>
#include <stdexcept>
#include <sstream>
#include <string>
#include <iostream>
#include <torch/script.h>
#include <memory>
#include <torch/torch.h>
#include <cstddef>
#include <cstdio>
#include <iostream>
#include <string>
#include <vector>
#include <typeinfo>
auto main() -> int {
auto p_model_path = "/home/yuhao/PycharmProjects/pytorch_test/pts/resNet50.pt";
std::shared_ptr<torch::jit::script::Module> model_ptr = torch::jit::load(p_model_path);
for (int ii = 0; ii < 500; ii++) {
auto x = torch::rand({64, 3, 224, 224});
std::vector<torch::jit::IValue> modelInputs;
modelInputs.push_back(x);
auto output = model_ptr->forward(modelInputs);
std::cout << ii << "\n";
}
}
memory consumption bounces from 1.5g to 5g consistently for every iteration.
TorchNet python inference:
from optparse import OptionParser
import torch
from torchvision import datasets, models, transforms
from zoo.common.nncontext import init_nncontext
from zoo.feature.common import *
from zoo.feature.image import *
from zoo.pipeline.api.net.torch_net import TorchNet
from bigdl.nn.layer import Model
from pyspark.sql.functions import col, udf
from pyspark.sql.types import StringType
from zoo.common.nncontext import *
from zoo.feature.image import *
from zoo.pipeline.nnframes import *
def inference(image_path, sc):
model = models.resnet18(pretrained=True).eval()
net = TorchNet.from_pytorch(model, [1, 3, 224, 224])
imageDF = NNImageReader.readImages(image_path, sc, resizeH=300, resizeW=300, image_codec=1)
getName = udf(lambda row: row[0], StringType())
transformer = ChainedPreprocessing(
[RowToImageFeature(), ImageResize(256, 256), ImageCenterCrop(224, 224),
ImageChannelNormalize(123.0, 117.0, 104.0, 255.0, 255.0, 255.0),
ImageMatToTensor(), ImageFeatureToTensor()])
# Model.loadModel('/home/yuhao/workspace/model/bigdl_vgg-16_imagenet_0.4.0.model')
classifier_model = NNClassifierModel(net, transformer) \
.setFeaturesCol("image").setBatchSize(4)
predictionDF = classifier_model.transform(imageDF).withColumn("name", getName(col("image")))
return predictionDF
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Need parameters: <modelPath> <imagePath>")
exit(-1)
sparkConf = init_spark_conf().setAppName("testNNClassifer").setMaster('local[1]').set('spark.driver.memory', '20g')
sc = init_nncontext(sparkConf)
image_path = sys.argv[1]
predictionDF = inference(image_path, sc)
predictionDF.select("name", "prediction").orderBy("name").show(20, False)
memory usage increases from 2.4g to 7.4g in about 50 batches, and keep stable after that.
Use the issue to track the progress on memory optimization for TorchNet.