cwacek / python-jsonschema-objects

Automatic Python binding generation from JSON Schemas
MIT License
364 stars 96 forks source link

Memory leak and not constant response time #210

Closed SebastienAndreo closed 3 years ago

SebastienAndreo commented 3 years ago

The memory consumption and the execution time increase if the builder is created at each iteration

Using the master/latest version

Example Schema and code

from memory_profiler import profile
import python_jsonschema_objects as pjs
import json
import gc
import time

schema_str = """
{
  "title": "Person",
  "type": "object",
  "properties": {
    "firstName": {
      "type": "string",
      "description": "The person's first name."
    },
    "lastName": {
      "type": "string",
      "description": "The person's last name."
    },
    "age": {
      "description": "Age in years which must be equal to or greater than zero.",
      "type": "integer",
      "minimum": 0
    }
  }
}
"""

instance_str = """
{
  "firstName": "John",
  "lastName": "Doe",
  "age": 21
}
"""
obj_array = []

# Builder created as a Singleton
#builder = pjs.ObjectBuilder(json.loads(schema_str))
#ns = builder.build_classes()

@profile
def test(n):
    t = time.time()
    for i in range(1, n):
        # Builder created at each iteration
        builder = pjs.ObjectBuilder(json.loads(schema_str))
        ns = builder.build_classes()
        a = ns.Person.from_json(instance_str)
        obj_array.append(a)
    print('sec:', time.time()-t)

test(100)
gc.collect()
test(100)
gc.collect()
test(100)
gc.collect()
test(100)
gc.collect()
test(100)
gc.collect()
test(100)
gc.collect()
test(100)
gc.collect()
test(100)
gc.collect()
test(100)
gc.collect()
test(100)
Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
    43     25.0 MiB     25.0 MiB           1   @profile
    44                                         def test(n):
    45     25.0 MiB      0.0 MiB           1       t = time.time()
    46     29.7 MiB      0.0 MiB         100       for i in range(1, n):
    47     29.7 MiB      1.1 MiB          99           builder = pjs.ObjectBuilder(json.loads(schema_str))
    48     29.7 MiB      0.5 MiB          99           ns = builder.build_classes()
    49     29.7 MiB      3.1 MiB          99           a = ns.Person.from_json(instance_str)
    50     29.7 MiB      0.0 MiB          99           obj_array.append(a)
    51     29.7 MiB      0.0 MiB           1       print('sec:', time.time()-t)

sec: 1.3166437149047852
Filename: /home/seb/memory.py

Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
    43     29.7 MiB     29.7 MiB           1   @profile
    44                                         def test(n):
    45     29.7 MiB      0.0 MiB           1       t = time.time()
    46     42.0 MiB      0.0 MiB         100       for i in range(1, n):
    47     41.7 MiB      0.8 MiB          99           builder = pjs.ObjectBuilder(json.loads(schema_str))
    48     41.7 MiB      0.3 MiB          99           ns = builder.build_classes()
    49     42.0 MiB     11.2 MiB          99           a = ns.Person.from_json(instance_str)
    50     42.0 MiB      0.0 MiB          99           obj_array.append(a)
    51     42.0 MiB      0.0 MiB           1       print('sec:', time.time()-t)

sec: 1.4638645648956299
Filename: /home/seb/memory.py

Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
    43     42.0 MiB     42.0 MiB           1   @profile
    44                                         def test(n):
    45     42.0 MiB      0.0 MiB           1       t = time.time()
    46     59.1 MiB      0.0 MiB         100       for i in range(1, n):
    47     58.9 MiB      1.2 MiB          99           builder = pjs.ObjectBuilder(json.loads(schema_str))
    48     58.9 MiB      0.8 MiB          99           ns = builder.build_classes()
    49     59.1 MiB     15.2 MiB          99           a = ns.Person.from_json(instance_str)
    50     59.1 MiB      0.0 MiB          99           obj_array.append(a)
    51     59.1 MiB      0.0 MiB           1       print('sec:', time.time()-t)

sec: 1.66902494430542
Filename: /home/seb/memory.py

Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
    43     59.1 MiB     59.1 MiB           1   @profile
    44                                         def test(n):
    45     59.1 MiB      0.0 MiB           1       t = time.time()
    46     81.3 MiB      0.0 MiB         100       for i in range(1, n):
    47     81.1 MiB      1.2 MiB          99           builder = pjs.ObjectBuilder(json.loads(schema_str))
    48     81.1 MiB      0.0 MiB          99           ns = builder.build_classes()
    49     81.3 MiB     21.0 MiB          99           a = ns.Person.from_json(instance_str)
    50     81.3 MiB      0.0 MiB          99           obj_array.append(a)
    51     81.3 MiB      0.0 MiB           1       print('sec:', time.time()-t)

sec: 1.8282761573791504
Filename: /home/seb/memory.py

Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
    43     81.3 MiB     81.3 MiB           1   @profile
    44                                         def test(n):
    45     81.3 MiB      0.0 MiB           1       t = time.time()
    46    117.2 MiB      0.0 MiB         100       for i in range(1, n):
    47    116.7 MiB      1.0 MiB          99           builder = pjs.ObjectBuilder(json.loads(schema_str))
    48    116.7 MiB      0.4 MiB          99           ns = builder.build_classes()
    49    117.2 MiB     34.5 MiB          99           a = ns.Person.from_json(instance_str)
    50    117.2 MiB      0.0 MiB          99           obj_array.append(a)
    51    117.2 MiB      0.0 MiB           1       print('sec:', time.time()-t)

sec: 2.0199222564697266
Filename: /home/seb/memory.py

Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
    43    117.2 MiB    117.2 MiB           1   @profile
    44                                         def test(n):
    45    117.2 MiB      0.0 MiB           1       t = time.time()
    46    158.7 MiB      0.0 MiB         100       for i in range(1, n):
    47    158.2 MiB      1.8 MiB          99           builder = pjs.ObjectBuilder(json.loads(schema_str))
    48    158.2 MiB      0.6 MiB          99           ns = builder.build_classes()
    49    158.7 MiB     39.2 MiB          99           a = ns.Person.from_json(instance_str)
    50    158.7 MiB      0.0 MiB          99           obj_array.append(a)
    51    158.7 MiB      0.0 MiB           1       print('sec:', time.time()-t)

sec: 2.4503257274627686
Filename: /home/seb/memory.py

Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
    43    158.7 MiB    158.7 MiB           1   @profile
    44                                         def test(n):
    45    158.7 MiB      0.0 MiB           1       t = time.time()
    46    205.2 MiB      0.0 MiB         100       for i in range(1, n):
    47    205.0 MiB      1.9 MiB          99           builder = pjs.ObjectBuilder(json.loads(schema_str))
    48    205.0 MiB      0.0 MiB          99           ns = builder.build_classes()
    49    205.2 MiB     44.6 MiB          99           a = ns.Person.from_json(instance_str)
    50    205.2 MiB      0.0 MiB          99           obj_array.append(a)
    51    205.2 MiB      0.0 MiB           1       print('sec:', time.time()-t)

sec: 2.3949639797210693
Filename: /home/seb/memory.py

Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
    43    205.2 MiB    205.2 MiB           1   @profile
    44                                         def test(n):
    45    205.2 MiB      0.0 MiB           1       t = time.time()
    46    257.1 MiB      0.0 MiB         100       for i in range(1, n):
    47    256.6 MiB      1.8 MiB          99           builder = pjs.ObjectBuilder(json.loads(schema_str))
    48    256.6 MiB      0.4 MiB          99           ns = builder.build_classes()
    49    257.1 MiB     49.7 MiB          99           a = ns.Person.from_json(instance_str)
    50    257.1 MiB      0.0 MiB          99           obj_array.append(a)
    51    257.1 MiB      0.0 MiB           1       print('sec:', time.time()-t)

sec: 2.855459213256836
Filename: /home/seb/memory.py

Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
    43    257.1 MiB    257.1 MiB           1   @profile
    44                                         def test(n):
    45    257.1 MiB      0.0 MiB           1       t = time.time()
    46    313.9 MiB      0.0 MiB         100       for i in range(1, n):
    47    313.1 MiB      1.0 MiB          99           builder = pjs.ObjectBuilder(json.loads(schema_str))
    48    313.1 MiB      0.0 MiB          99           ns = builder.build_classes()
    49    313.9 MiB     55.7 MiB          99           a = ns.Person.from_json(instance_str)
    50    313.9 MiB      0.1 MiB          99           obj_array.append(a)
    51    313.9 MiB      0.0 MiB           1       print('sec:', time.time()-t)

sec: 2.741384506225586
Filename: /home/seb/memory.py

Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
    43    313.9 MiB    313.9 MiB           1   @profile
    44                                         def test(n):
    45    313.9 MiB      0.0 MiB           1       t = time.time()
    46    375.9 MiB      0.0 MiB         100       for i in range(1, n):
    47    375.3 MiB      1.4 MiB          99           builder = pjs.ObjectBuilder(json.loads(schema_str))
    48    375.3 MiB      1.2 MiB          99           ns = builder.build_classes()
    49    375.9 MiB     59.3 MiB          99           a = ns.Person.from_json(instance_str)
    50    375.9 MiB      0.0 MiB          99           obj_array.append(a)
    51    375.9 MiB      0.0 MiB           1       print('sec:', time.time()-t)

Expected behavior the memory pressure created by the instantiation of a Person Class should stay constant and the execution time also.

cwacek commented 3 years ago

I don't agree. You're creating a whole new object builder from the schema every time, and not necessarily cleaning up the old one. That means it's re-parsing the JSON and recreating the entire class type hierarchy each time.

Why wouldn't this increase memory consumption? There's no way for the code to know you're loading the same schema over and over again.

Do you have a valid use case for doing this?

SebastienAndreo commented 3 years ago

That the memory increases globally is not the real problem and you are right for that point. My observation is more: the memory instantiation cost of a new person: 'a = ns.Person.from_json(instance_str)' is not constant. I reran the test with 18 iterations and as you can see the memory increment (2.7,11.5,15.2,20.1,34.8,39.2,45.2,49.5,54.6,60.1,64.6,69.1,74.3,79.7,84.1,90.2,112.1,136.4) is following a linear function which will mean a quadratic function for the global system.

So the question is Why the instantiation of a new class from a Json cost more and more memory?

SebastienAndreo commented 3 years ago

@cwacek can you comment? You also mentioned, "not necessarily cleaning up the old one" I haven't seen cleanup methods, how can I do that ?

cwacek commented 3 years ago

I took a closer look at your logs and I see what you’re talking about, but I haven’t had time to dig into it yet.

Is this something you’re running into in real code, or are you just exploring? The way you’re using the builder is pretty weird outside of a GC test case.

On Apr 2, 2021, at 4:58 PM, Sébastien @.***> wrote:

 @cwacek can you comment? You also mentioned, "not necessarily cleaning up the old one" I haven't seen cleanup methods, how can I do that ?

— You are receiving this because you were mentioned. Reply to this email directly, view it on GitHub, or unsubscribe.

cwacek commented 3 years ago

TLDR: I looked into this further, and I'm convinced this is a non-issue. If you take the build_classes() call out of the loop, there is nearly zero increase in memory usage. I'm closing this issue unless you can tell me a reasonable use case for building classes in each loop iteration

Details Here's what it looks like if you build the classes outside of the loop (and thus get to re-use the constructed classes):

@profile
def test(n):
    t = time.time()
    builder = pjs.ObjectBuilder(json.loads(schema_str))
    ns = builder.build_classes()
    for i in range(1, n):
        a = ns.Person.from_json(instance_str)
        obj_array.append(a)
    print('sec:', time.time()-t)

Result:

sec: 0.14585113525390625
Filename: test/test_210_memoryuse.py

Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
    44     27.3 MiB     27.3 MiB           1   @profile
    45                                         def test(n):
    46     27.3 MiB      0.0 MiB           1       t = time.time()
    47     27.3 MiB      0.0 MiB           1       builder = pjs.ObjectBuilder(json.loads(schema_str))
    48     27.3 MiB      0.0 MiB           1       ns = builder.build_classes()
    49     27.4 MiB      0.0 MiB         100       for i in range(1, n):
    50                                                 # Builder created at each iteration
    51                                                 #builder = pjs.ObjectBuilder(json.loads(schema_str))
    52                                                 #ns = builder.build_classes()
    53     27.4 MiB      0.1 MiB          99           a = ns.Person.from_json(instance_str)
    54     27.4 MiB      0.0 MiB          99           obj_array.append(a)
    55     27.4 MiB      0.0 MiB           1       print('sec:', time.time()-t)

sec: 0.13786005973815918
Filename: test/test_210_memoryuse.py

Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
    44     27.4 MiB     27.4 MiB           1   @profile
    45                                         def test(n):
    46     27.4 MiB      0.0 MiB           1       t = time.time()
    47     27.5 MiB      0.0 MiB           1       builder = pjs.ObjectBuilder(json.loads(schema_str))
    48     27.5 MiB      0.0 MiB           1       ns = builder.build_classes()
    49     27.6 MiB      0.0 MiB         100       for i in range(1, n):
    50                                                 # Builder created at each iteration
    51                                                 #builder = pjs.ObjectBuilder(json.loads(schema_str))
    52                                                 #ns = builder.build_classes()
    53     27.6 MiB      0.1 MiB          99           a = ns.Person.from_json(instance_str)
    54     27.6 MiB      0.0 MiB          99           obj_array.append(a)
    55     27.6 MiB      0.0 MiB           1       print('sec:', time.time()-t)

sec: 0.13791322708129883
Filename: test/test_210_memoryuse.py

Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
    44     27.6 MiB     27.6 MiB           1   @profile
    45                                         def test(n):
    46     27.6 MiB      0.0 MiB           1       t = time.time()
    47     27.6 MiB      0.0 MiB           1       builder = pjs.ObjectBuilder(json.loads(schema_str))
    48     27.6 MiB      0.0 MiB           1       ns = builder.build_classes()
    49     27.7 MiB      0.0 MiB         100       for i in range(1, n):
    50                                                 # Builder created at each iteration
    51                                                 #builder = pjs.ObjectBuilder(json.loads(schema_str))
    52                                                 #ns = builder.build_classes()
    53     27.7 MiB      0.1 MiB          99           a = ns.Person.from_json(instance_str)
    54     27.7 MiB      0.0 MiB          99           obj_array.append(a)
    55     27.7 MiB      0.0 MiB           1       print('sec:', time.time()-t)

sec: 0.1420879364013672
Filename: test/test_210_memoryuse.py

Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
    44     27.7 MiB     27.7 MiB           1   @profile
    45                                         def test(n):
    46     27.7 MiB      0.0 MiB           1       t = time.time()
    47     27.8 MiB      0.0 MiB           1       builder = pjs.ObjectBuilder(json.loads(schema_str))
    48     27.8 MiB      0.0 MiB           1       ns = builder.build_classes()
    49     27.9 MiB      0.0 MiB         100       for i in range(1, n):
    50                                                 # Builder created at each iteration
    51                                                 #builder = pjs.ObjectBuilder(json.loads(schema_str))
    52                                                 #ns = builder.build_classes()
    53     27.9 MiB      0.1 MiB          99           a = ns.Person.from_json(instance_str)
    54     27.9 MiB      0.0 MiB          99           obj_array.append(a)
    55     27.9 MiB      0.0 MiB           1       print('sec:', time.time()-t)

This tells me as I suspected that it's building the classes that incurs the memory overhead, and by extension, the the memory_profiler is slightly off when it reports it by line. This wouldn't be surprising if we consider that build_classes() constructs a bunch of types but doesn't actually instantiate them - there might be extra bookkeeping and overhead if you actually instantiate a type.

To validate this I adjusted the function to call build_classes, then instantiate two objects. As I suspected, only the first object creation uses any appreciable memory.

Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
    44     61.8 MiB     61.8 MiB           1   @profile
    45                                         def test(n):
    46     61.8 MiB      0.0 MiB           1       t = time.time()
    47                                             #builder = pjs.ObjectBuilder(json.loads(schema_str))
    48                                             #ns = builder.build_classes()
    49     84.6 MiB      0.0 MiB         100       for i in range(1, n):
    50                                                 # Builder created at each iteration
    51     84.3 MiB      1.4 MiB          99           builder = pjs.ObjectBuilder(json.loads(schema_str))
    52     84.3 MiB      0.4 MiB          99           ns = builder.build_classes()
    53     84.6 MiB     20.9 MiB          99           a = ns.Person.from_json(instance_str)
    54     84.6 MiB      0.0 MiB          99           obj_array.append(a)
    55     84.6 MiB      0.0 MiB          99           b = ns.Person.from_json(instance_str)
    56     84.6 MiB      0.0 MiB          99           obj_array.append(b)
    57     84.6 MiB      0.0 MiB           1       print('sec:', time.time()-t)