Open JaroslavTulach opened 4 months ago
The generated code for persistance of Name.Literal
is using writeUTF
to store the String
name
:
@Override
protected void writeObject(org.enso.compiler.core.ir.Name.Literal obj, Output out) throws IOException {
out.writeUTF(obj.name());
out.writeBoolean(obj.isMethod());
out.writeInline(scala.Option.class, obj.location());
out.writeInline(scala.Option.class, obj.originalName());
out.writeInline(org.enso.compiler.core.ir.MetadataStorage.class, obj.passData());
out.writeInline(org.enso.compiler.core.ir.DiagnosticStorage.class, obj.diagnostics());
}
the writeUTF
method currently stores the name "in place" - e.g. repeats the characters again and again. If we replace that line with:
out.writeObject(obj.name().intern());
the size of the Standard.Base
.bindings
cache goes down from 42MB to 32MB and overall we save 23% on the size of all caches.
One other method is to use writeObject
and readObject
for String
in the generated classes:
$ git diff
diff --git lib/java/persistance-dsl/src/main/java/org/enso/persist/impl/PersistableProcessor.java lib/java/persistance-dsl/src/main/java/org/enso/persist/impl/PersistableProcessor.java
index e7ee6e05f4..ab17b3685f 100644
--- lib/java/persistance-dsl/src/main/java/org/enso/persist/impl/PersistableProcessor.java
+++ lib/java/persistance-dsl/src/main/java/org/enso/persist/impl/PersistableProcessor.java
@@ -182,7 +182,9 @@ public class PersistableProcessor extends AbstractProcessor {
if (cons != null) {
for (var v : cons.getParameters()) {
if (tu.isSameType(eu.getTypeElement("java.lang.String").asType(), v.asType())) {
- w.append(" var ").append(v.getSimpleName()).append(" = in.readUTF();\n");
+ w.append(" var ")
+ .append(v.getSimpleName())
+ .append(" = (java.lang.String) in.readObject();\n");
} else if (!v.asType().getKind().isPrimitive()) {
var type = tu.erasure(v.asType());
var elem = (TypeElement) tu.asElement(type);
@@ -241,7 +243,7 @@ public class PersistableProcessor extends AbstractProcessor {
if (cons != null) {
for (var v : cons.getParameters()) {
if (tu.isSameType(eu.getTypeElement("java.lang.String").asType(), v.asType())) {
- w.append(" out.writeUTF(obj.").append(v.getSimpleName()).append("());\n");
+ w.append(" out.writeObject(obj.").append(v.getSimpleName()).append("());\n");
} else if (!v.asType().getKind().isPrimitive()) {
var type = tu.erasure(v.asType());
:...skipping...
diff --git lib/java/persistance-dsl/src/main/java/org/enso/persist/impl/PersistableProcessor.java lib/java/persistance-dsl/src/main/java/org/enso/persist/impl/PersistableProcessor.java
index e7ee6e05f4..ab17b3685f 100644
--- lib/java/persistance-dsl/src/main/java/org/enso/persist/impl/PersistableProcessor.java
+++ lib/java/persistance-dsl/src/main/java/org/enso/persist/impl/PersistableProcessor.java
@@ -182,7 +182,9 @@ public class PersistableProcessor extends AbstractProcessor {
if (cons != null) {
for (var v : cons.getParameters()) {
if (tu.isSameType(eu.getTypeElement("java.lang.String").asType(), v.asType())) {
- w.append(" var ").append(v.getSimpleName()).append(" = in.readUTF();\n");
+ w.append(" var ")
+ .append(v.getSimpleName())
+ .append(" = (java.lang.String) in.readObject();\n");
} else if (!v.asType().getKind().isPrimitive()) {
var type = tu.erasure(v.asType());
var elem = (TypeElement) tu.asElement(type);
@@ -241,7 +243,7 @@ public class PersistableProcessor extends AbstractProcessor {
if (cons != null) {
for (var v : cons.getParameters()) {
if (tu.isSameType(eu.getTypeElement("java.lang.String").asType(), v.asType())) {
- w.append(" out.writeUTF(obj.").append(v.getSimpleName()).append("());\n");
+ w.append(" out.writeObject(obj.").append(v.getSimpleName()).append("());\n");
} else if (!v.asType().getKind().isPrimitive()) {
var type = tu.erasure(v.asType());
var elem = (TypeElement) tu.asElement(type);
diff --git lib/java/persistance/src/main/java/org/enso/persist/PerGenerator.java lib/java/persistance/src/main/java/org/enso/persist/PerGenerator.java
index d8a3e2c2bd..d64d7d5d12 100644
--- lib/java/persistance/src/main/java/org/enso/persist/PerGenerator.java
+++ lib/java/persistance/src/main/java/org/enso/persist/PerGenerator.java
@@ -13,7 +13,7 @@ import java.util.function.Function;
import org.slf4j.Logger;
final class PerGenerator {
- static final byte[] HEADER = new byte[] {0x0a, 0x0d, 0x13, 0x0f};
+ static final byte[] HEADER = new byte[] {0x0a, 0x0d, 0x13, 0x13};
private final OutputStream main;
private final Map<Object, WriteResult> knownObjects = new IdentityHashMap<>();
private int countReferences = 1;
diff --git lib/java/persistance/src/main/java/org/enso/persist/PerInputImpl.java lib/java/persistance/src/main/java/org/enso/persist/PerInputImpl.java
index 5bfe457cd2..1f6c9c126a 100644
--- lib/java/persistance/src/main/java/org/enso/persist/PerInputImpl.java
+++ lib/java/persistance/src/main/java/org/enso/persist/PerInputImpl.java
@@ -319,11 +319,11 @@ final class PerInputImpl implements Input {
}
final Object resolveObject(Object res) {
- if (readResolve != null) {
- return readResolve.apply(res);
- } else {
- return res;
+ var v = (readResolve != null) ? readResolve.apply(res) : res;
+ if (v instanceof String s) {
+ v = s.intern();
}
+ return v;
}
final Object getObjectAt(int at) {
with this change the caches are also down to the previous values. Moreover it is guaranteed that various Name.Literal
instances point to the same string. By executing following OQL query:
var arr = []
heap.forEachObject(function(l) {
var n = l.name.toString();
if (!n.contains("Standard")) {
return;
}
arr.push(l.name)
}, 'org.enso.compiler.core.ir.Name$Literal')
arr
that all the IR.Name.name
values are pointing to the identical instance of the string.
I've executed Enso program as
enso --run test/Base_Tests
and in the middle of execution I have taken a heap dump. Then I used VisualVM to compute histogram ofName.Literal
instances based on their name. There is a lot of duplicates - for exampleStandard
is repeated 15000 times!The program to execute in OQL console is: