Open JakeWharton opened 1 month ago
Take into account that unsigned numbers in K/JS add quite an overhead, unfortunately. Not sure it's optimal for a low level library like kotlinx-io.
For reference: something similar was proposed during implementation of Unsafe
API and was rejected. The reasoning could be read here: https://github.com/Kotlin/kotlinx-io/pull/334#discussion_r1648800192
@whyoleg I am proposing the inverse of that. Unsafe should stay signed, but high-level APIs should be unsigned.
@lppedd Do you have examples?
For a quick demonstration of changing Buffer.size
to UInt
I wrote:
@JsExport
fun stuff(): Int {
val buffer = Buffer().apply {
writeString("sup")
}
return buffer.size.toInt()
}
class Buffer {
var size: UInt = 0U
private set
fun writeString(string: String) {
size += string.length.toUInt()
}
}
which the playground turned into the following JS:
//region block: polyfills
(function () {
if (typeof globalThis === 'object')
return;
Object.defineProperty(Object.prototype, '__magic__', {get: function () {
return this;
}, configurable: true});
__magic__.globalThis = __magic__;
delete Object.prototype.__magic__;
}());
//endregion
(function (root, factory) {
if (typeof define === 'function' && define.amd)
define(['exports'], factory);
else if (typeof exports === 'object')
factory(module.exports);
else
root.moduleId = factory(typeof moduleId === 'undefined' ? {} : moduleId);
}(globalThis, function (_) {
'use strict';
//region block: pre-declaration
initMetadataForObject(Unit, 'Unit');
initMetadataForClass(Buffer, 'Buffer', Buffer);
//endregion
function Unit() {
}
var Unit_instance;
function Unit_getInstance() {
return Unit_instance;
}
function implement(interfaces) {
var maxSize = 1;
var masks = [];
var inductionVariable = 0;
var last = interfaces.length;
while (inductionVariable < last) {
var i = interfaces[inductionVariable];
inductionVariable = inductionVariable + 1 | 0;
var currentSize = maxSize;
var tmp1_elvis_lhs = i.prototype.$imask$;
var imask = tmp1_elvis_lhs == null ? i.$imask$ : tmp1_elvis_lhs;
if (!(imask == null)) {
masks.push(imask);
currentSize = imask.length;
}
var iid = i.$metadata$.iid;
var tmp;
if (iid == null) {
tmp = null;
} else {
// Inline function 'kotlin.let' call
// Inline function 'kotlin.contracts.contract' call
// Inline function 'kotlin.js.implement.<anonymous>' call
tmp = bitMaskWith(iid);
}
var iidImask = tmp;
if (!(iidImask == null)) {
masks.push(iidImask);
currentSize = Math.max(currentSize, iidImask.length);
}
if (currentSize > maxSize) {
maxSize = currentSize;
}
}
return compositeBitMask(maxSize, masks);
}
function bitMaskWith(activeBit) {
var numberIndex = activeBit >> 5;
var intArray = new Int32Array(numberIndex + 1 | 0);
var positionInNumber = activeBit & 31;
var numberWithSettledBit = 1 << positionInNumber;
intArray[numberIndex] = intArray[numberIndex] | numberWithSettledBit;
return intArray;
}
function compositeBitMask(capacity, masks) {
var tmp = 0;
var tmp_0 = new Int32Array(capacity);
while (tmp < capacity) {
var tmp_1 = tmp;
var result = 0;
var inductionVariable = 0;
var last = masks.length;
while (inductionVariable < last) {
var mask = masks[inductionVariable];
inductionVariable = inductionVariable + 1 | 0;
if (tmp_1 < mask.length) {
result = result | mask[tmp_1];
}
}
tmp_0[tmp_1] = result;
tmp = tmp + 1 | 0;
}
return tmp_0;
}
function objectCreate(proto) {
proto = proto === VOID ? null : proto;
return Object.create(proto);
}
function defineProp(obj, name, getter, setter) {
return Object.defineProperty(obj, name, {configurable: true, get: getter, set: setter});
}
function equals(obj1, obj2) {
if (obj1 == null) {
return obj2 == null;
}
if (obj2 == null) {
return false;
}
if (typeof obj1 === 'object' && typeof obj1.equals === 'function') {
return obj1.equals(obj2);
}
if (obj1 !== obj1) {
return obj2 !== obj2;
}
if (typeof obj1 === 'number' && typeof obj2 === 'number') {
var tmp;
if (obj1 === obj2) {
var tmp_0;
if (obj1 !== 0) {
tmp_0 = true;
} else {
// Inline function 'kotlin.js.asDynamic' call
var tmp_1 = 1 / obj1;
// Inline function 'kotlin.js.asDynamic' call
tmp_0 = tmp_1 === 1 / obj2;
}
tmp = tmp_0;
} else {
tmp = false;
}
return tmp;
}
return obj1 === obj2;
}
function protoOf(constructor) {
return constructor.prototype;
}
function createMetadata(kind, name, defaultConstructor, associatedObjectKey, associatedObjects, suspendArity) {
var undef = VOID;
var iid = kind === 'interface' ? generateInterfaceId() : VOID;
return {kind: kind, simpleName: name, associatedObjectKey: associatedObjectKey, associatedObjects: associatedObjects, suspendArity: suspendArity, $kClass$: undef, defaultConstructor: defaultConstructor, iid: iid};
}
function generateInterfaceId() {
if (globalInterfaceId === VOID) {
globalInterfaceId = 0;
}
// Inline function 'kotlin.js.unsafeCast' call
globalInterfaceId = globalInterfaceId + 1 | 0;
// Inline function 'kotlin.js.unsafeCast' call
return globalInterfaceId;
}
var globalInterfaceId;
function initMetadataFor(kind, ctor, name, defaultConstructor, parent, interfaces, suspendArity, associatedObjectKey, associatedObjects) {
if (!(parent == null)) {
ctor.prototype = Object.create(parent.prototype);
ctor.prototype.constructor = ctor;
}
var metadata = createMetadata(kind, name, defaultConstructor, associatedObjectKey, associatedObjects, suspendArity);
ctor.$metadata$ = metadata;
if (!(interfaces == null)) {
var receiver = !equals(metadata.iid, VOID) ? ctor : ctor.prototype;
receiver.$imask$ = implement(interfaces);
}
}
function initMetadataForClass(ctor, name, defaultConstructor, parent, interfaces, suspendArity, associatedObjectKey, associatedObjects) {
var kind = 'class';
initMetadataFor(kind, ctor, name, defaultConstructor, parent, interfaces, suspendArity, associatedObjectKey, associatedObjects);
}
function initMetadataForObject(ctor, name, defaultConstructor, parent, interfaces, suspendArity, associatedObjectKey, associatedObjects) {
var kind = 'object';
initMetadataFor(kind, ctor, name, defaultConstructor, parent, interfaces, suspendArity, associatedObjectKey, associatedObjects);
}
function initMetadataForLambda(ctor, parent, interfaces, suspendArity) {
initMetadataForClass(ctor, 'Lambda', VOID, parent, interfaces, suspendArity, VOID, VOID);
}
function initMetadataForCoroutine(ctor, parent, interfaces, suspendArity) {
initMetadataForClass(ctor, 'Coroutine', VOID, parent, interfaces, suspendArity, VOID, VOID);
}
function initMetadataForFunctionReference(ctor, parent, interfaces, suspendArity) {
initMetadataForClass(ctor, 'FunctionReference', VOID, parent, interfaces, suspendArity, VOID, VOID);
}
function initMetadataForCompanion(ctor, parent, interfaces, suspendArity) {
initMetadataForObject(ctor, 'Companion', VOID, parent, interfaces, suspendArity, VOID, VOID);
}
function get_VOID() {
_init_properties_void_kt__3zg9as();
return VOID;
}
var VOID;
var properties_initialized_void_kt_e4ret2;
function _init_properties_void_kt__3zg9as() {
if (!properties_initialized_void_kt_e4ret2) {
properties_initialized_void_kt_e4ret2 = true;
VOID = void 0;
}
}
function _UInt___init__impl__l7qpdl(data) {
return data;
}
function _UInt___get_data__impl__f0vqqw($this) {
return $this;
}
function stuff() {
// Inline function 'kotlin.apply' call
var this_0 = new Buffer();
// Inline function 'kotlin.contracts.contract' call
// Inline function 'stuff.<anonymous>' call
this_0.writeString_npol3u_k$('sup');
var buffer = this_0;
// Inline function 'kotlin.UInt.toInt' call
var this_1 = buffer.size_1;
return _UInt___get_data__impl__f0vqqw(this_1);
}
function Buffer() {
this.size_1 = _UInt___init__impl__l7qpdl(0);
}
protoOf(Buffer).writeString_npol3u_k$ = function (string) {
var tmp = this;
// Inline function 'kotlin.UInt.plus' call
var this_0 = this.size_1;
// Inline function 'kotlin.toUInt' call
var this_1 = string.length;
var other = _UInt___init__impl__l7qpdl(this_1);
tmp.size_1 = _UInt___init__impl__l7qpdl(_UInt___get_data__impl__f0vqqw(this_0) + _UInt___get_data__impl__f0vqqw(other) | 0);
};
//region block: init
Unit_instance = new Unit();
//endregion
//region block: exports
function $jsExportAll$(_) {
_.stuff = stuff;
}
$jsExportAll$(_);
if (typeof get_output !== "undefined") {
get_output();
output = new BufferedOutput();
_.output = get_output();
}
//endregion
return _;
}));
which when dumped into swc
produces:
function Buffer() {
this.size_1 = 0;
}
// ⋮
Buffer.prototype.writeString_npol3u_k$ = function(string) {
var this_0 = this.size_1, this_1 = string.length;
this.size_1 = this_0 + this_1 | 0;
}
// ⋮
_.stuff = function() {
var this_0 = new Buffer();
return this_0.writeString_npol3u_k$('sup'), this_0.size_1;
}
which has completely eliminated the helper functions around the unsigned type.
I am proposing the inverse of that. Unsafe should stay signed, but high-level APIs should be unsigned.
Yeah, I understand, but still, the same reasoning could be applied for high-level API, specifically https://github.com/Kotlin/KEEP/blob/master/proposals/unsigned-types.md#non-negative-integers
Just let imagine the situation. That we need to read some amount of data in chunks, f.e X=100
, we read in chunks by 8. So on each operation we decrease X by the size which was actually read. At some point X=4
. Now we read next, last chunk, but we forgot to check something, and instead of X=4-8=-4
with signed number, which will cause issue because of index out of bound exception, with unsigned numbers, we would receive X=4-8=BIG_BIG_NUMBER
which probably will be unnoticed, and we will just continue to read in chunks until infinity.
While example could not be that good, I think that the idea is visible. Sometimes, negative numbers, especially in IO with a lot of kind of unsafe
code and some expectations could be good.
May be, unsigned integers in this case could do more good than harm but at least now I don't see it
Which when dumped into SWC
But then you're implying the use of a minifier. I don't take it for granted, I always take the compiler output as a reference.
Which when dumped into SWC
But then you're implying the use of a minifier. I don't take it for granted, I always take the compiler output as a reference.
Of course. Anyone serious about running anything on JS (let alone from another language which outputs JS) will be using a compile-time optimization step. kotlinc is not an optimizing compiler, and that's true for every backend target.
But here, V8 will also inline all of UInt
away: https://js.godbolt.org/z/eY5xWY18K
It's not easy to read! Most of the assembly instructions deal with the JS stack bookkeeping, but if you read through the noise you can see the lookup of the size
and it being added with a normal add instruction.
Even easier is to just scroll down below the assembly and it tells you that both UInt_
-prefixed functions were completely inlined.
Of course we could also verify this with benchmarks which will show an Int
-flavored and UInt
-flavored variant will perform exactly the same after warmup and JIT compilation.
I am proposing the inverse of that. Unsafe should stay signed, but high-level APIs should be unsigned.
Yeah, I understand, but still, the same reasoning could be applied for high-level API, specifically https://github.com/Kotlin/KEEP/blob/master/proposals/unsigned-types.md#non-negative-integers
I do remember reading this in the KEEP at some point. I find their argument in general to be poor, and contradicts part of the point of adding unsigned numbers in the first place.
The specific example of indexOf
by itself is fine. Unilaterally declaring unsigned numbers as somehow not being about eliminating the negative domain is just ridiculous.
How we choose to interpret the given bit-width of the number is as much part of the trade-off as it is in choosing a bit width to begin with. Otherwise why bother with adding UByte
and UShort
? Just use an Int
!
Just let imagine the situation. That we need to read some amount of data in chunks, f.e
X=100
, we read in chunks by 8. So on each operation we decrease X by the size which was actually read. At some pointX=4
. Now we read next, last chunk, but we forgot to check something, and instead ofX=4-8=-4
with signed number, which will cause issue because of index out of bound exception, with unsigned numbers, we would receiveX=4-8=BIG_BIG_NUMBER
which probably will be unnoticed, and we will just continue to read in chunks until infinity. While example could not be that good, I think that the idea is visible. Sometimes, negative numbers, especially in IO with a lot of kind ofunsafe
code and some expectations could be good.
For the unsafe API I buy this argument on the basis of things like mechanical sympathy.
For the high-level API, this is why languages feature checked and saturating mathematical operations (on both signed and unsigned types).
But additionally, the Source
, Sink
, and Buffer
APIs encapsulate these index-based operations near completely. While there's still a potential that you're doing some bookkeeping as a caller, it should be nowhere near the level you would be with raw ByteArray
s.
May be, unsigned integers in this case could do more good than harm but at least now I don't see it
It very well might be the case that it doesn't make sense. But we (Okio people) never even had the chance to consider them anywhere. I just wanted to make sure somebody evaluated whether they made sense or not because this library does have that chance.
Of course. Anyone serious about running anything on JS will be using a compile-time optimization step
You're not wrong, but K/JS mostly caters to the ones that don't want to deal (as much as possible) with the JS ecosystem.
I'd definitely agree with your viewpoint if an optimization step was built into the toolchain. Webpack is there for the browser, but Node.js requires a manual setup.
This might be a good precedent to finally materialize our approach to unsigned types and add it to API guidelines. In general, we avoid unsigned types for any type refinements and arithmetics, as well as we were not intending them be used in such contexts.
When I approached the idea of using unsigned types as an indexing mechanism (not in IO tho, but in the standard library), the following things came across:
endIndex - startIndex + 1u
and errors in such math will be more surprising. Wrapping/modulo arithmetic is hardu
suffix or use toUInt
mindlessly to do basic indexing math (NB: needs additional validation that it is the case). See also: previous point.indexOf
issue -- -1
is an indicator of a missing element. We either need to come up with a different pattern or give up and embrace type mixing here.for
-IDE convention for unsigned types, annoying toUint
in things where we got used to Int
, not all extensions/application APIs work with unsigned types, instead accepting signed ones; separate syntax for unsigned literals etc.)String
and ByteString
(and in Source
/Sink
vs Java's IO extensions; and in the standard library in general)
String
/ByteString
index-based processing back-to-back probably will be toughI'd say we'll need to duct-tape the prototype and see how it goes. TL;DR, the things we have to care about: consistency (with other APIs), mixed math, convenience/familiarity/recognition of API patterns, interop (with Java and other APIs)
Okio was written in Java before being ported to Kotlin, and binary compatibility meant we retained the use of signed numbers. With this library being a fresh take on its design, there is an opportunity to switch to unsigned numbers where it makes sense (and if it makes sense).
It certainly would be nice to never have to check values for being negative all over the place in the internals.
The concern, of course, is that there is enough overhead to make this not worthwhile. Or that the conversion ceremony is too much boilerplate.
I suspect the unsafe internals and unsafe APIs will still likely use signed
ByteArray
and signedInt
s, but perhaps the size and indexing ofBuffer
/ByteString
and all their APIs could switch.