dotnet / runtime

.NET is a cross-platform runtime for cloud, mobile, desktop, and IoT apps.
https://docs.microsoft.com/dotnet/core/
MIT License
15.26k stars 4.73k forks source link

API Proposal: ByteSize Struct #19566

Open benaadams opened 7 years ago

benaadams commented 7 years ago

Like TimeSpan to remove ambiguity in sizes e.g. https://github.com/aspnet/KestrelHttpServer/issues/1247

Suggested API

namespace System
{
    public struct ByteSize : IComparable
        , IComparable<ByteSize>, IEquatable<ByteSize>, IFormattable
    {
        public ByteSize(long sizeInBytes)

        public static readonly ByteSize Zero = new ByteSize(0L);
        public static readonly ByteSize MinValue = new ByteSize(long.MinValue);
        public static readonly ByteSize MaxValue = new ByteSize(long.MaxValue);
        public static readonly ByteSize MinBitValue = new ByteSize(long.MinValue / 8);
        public static readonly ByteSize MaxBitValue = new ByteSize(long.MaxValue / 8);

/*      // Modulus sizes - useful or confusing?
        public int Bytes { get; }
        public int KiloBytes { get; }
        public int MegaBytes { get; }
        public int GigaBytes { get; }
        public int TeraBytes { get; }
        public int PetaBytes { get; }
        public int ExaBytes { get; }

        public int BiBytes { get; }
        public int KiBiBytes { get; }
        public int MeBiBytes { get; }
        public int GiBiBytes { get; }
        public int TeBiBytes { get; }
        public int PeBiBytes { get; }
        public int ExBiBytes { get; }

        public int Bits { get; }
        public int KiloBits { get; }
        public int MegaBits { get; }
        public int GigaBits { get; }
        public int TeraBits { get; }
        public int PetaBits { get; }
        public int ExaBits { get; }

        public int BiBits { get; }
        public int KiBiBits { get; }
        public int MeBiBits { get; }
        public int GiBiBits { get; }
        public int TeBiBits { get; }
        public int PeBiBits { get; }
        public int ExBiBits { get; }
*/
        public long TotalBytes { get; }

        public double TotalKiloBytes { get; }
        public double TotalMegaBytes { get; }
        public double TotalGigaBytes { get; }
        public double TotalTeraBytes { get; }
        public double TotalPetaBytes { get; }
        public double TotalExaBytes { get; }

        public double TotalKiBiBytes { get; }
        public double TotalMeBiBytes { get; }
        public double TotalGiBiBytes { get; }
        public double TotalTeBiBytes { get; }
        public double TotalPeBiBytes { get; }
        public double TotalExBiBytes { get; }

        public long TotalBits { get; }

        public double TotalKiloBits { get; }
        public double TotalMegaBits { get; }
        public double TotalGigaBits { get; }
        public double TotalTeraBits { get; }
        public double TotalPetaBits { get; }
        public double TotalExaBits { get; }

        public double TotalKiBiBits { get; }
        public double TotalMeBiBits { get; }
        public double TotalGiBiBits { get; }
        public double TotalTeBiBits { get; }
        public double TotalPeBiBits { get; }
        public double TotalExBiBits { get; }

        public static ByteSize FromKiloBytes(double value);
        public static ByteSize FromMegaBytes(double value);
        public static ByteSize FromGigaBytes(double value);
        public static ByteSize FromTeraBytes(double value);
        public static ByteSize FromPetaBytes(double value);
        public static ByteSize FromExaBytes(double value);

        public static ByteSize FromKiBiBytes(double value);
        public static ByteSize FromMeBiBytes(double value);
        public static ByteSize FromGiBiBytes(double value);
        public static ByteSize FromTeBiBytes(double value);
        public static ByteSize FromPeBiBytes(double value);
        public static ByteSize FromExBiBytes(double value);

        public static ByteSize FromKiloBits(double value);
        public static ByteSize FromMegaBits(double value);
        public static ByteSize FromGigaBits(double value);
        public static ByteSize FromTeraBits(double value);
        public static ByteSize FromPetaBits(double value);
        public static ByteSize FromExaBits(double value);

        public static ByteSize FromKiBiBits(double value);
        public static ByteSize FromMeBiBits(double value);
        public static ByteSize FromGiBiBits(double value);
        public static ByteSize FromTeBiBits(double value);
        public static ByteSize FromPeBiBits(double value);
        public static ByteSize FromExBiBits(double value);

        public ByteSize Negate();
        public static ByteSize operator -(ByteSize b);
        public static ByteSize operator +(ByteSize b);

        public ByteSize Add(ByteSize bs);
        public static ByteSize operator +(ByteSize b1, ByteSize b2);

        public ByteSize Subtract(ByteSize bs);
        public static ByteSize operator -(ByteSize b1, ByteSize b2);

        public static implicit operator long(ByteSize sizeInBytes) => size.TotalBytes;
        public static implicit operator ByteSize(long sizeInBytes) => new ByteSize(sizeInBytes);

        public override int GetHashCode();
        public override bool Equals(Object value);
        public bool Equals(ByteSize other);
        public static bool Equals(ByteSize b1, ByteSize b2);

        public static bool operator ==(ByteSize b1, ByteSize b2);
        public static bool operator !=(ByteSize b1, ByteSize b2);
        public static bool operator <(ByteSize b1, ByteSize b2);
        public static bool operator <=(ByteSize b1, ByteSize b2);
        public static bool operator >(ByteSize b1, ByteSize b2);
        public static bool operator >=(ByteSize b1, ByteSize b2);

        public static int Compare(ByteSize b1, ByteSize b2);
        int IComparable.CompareTo(object obj);
        public int CompareTo(ByteSize other);

        public string ToString();
        public string ToString(string format);
        public string ToString(string format, IFormatProvider formatProvider);

        public static TimeSpan Parse(string s);
        public static ByteSize Parse(string input, IFormatProvider formatProvider);
        public static ByteSize ParseExact(string input, string format, IFormatProvider formatProvider);
        public static ByteSize ParseExact(string input, string[] formats, IFormatProvider formatProvider);
        public static ByteSize ParseExact(string input, string format, IFormatProvider formatProvider, ByteSizeStyles styles);
        public static ByteSize ParseExact(string input, string[] formats, IFormatProvider formatProvider, ByteSizeStyles styles);
        public static bool TryParse(string s, out ByteSize result);
        public static bool TryParse(string input, IFormatProvider formatProvider, out ByteSize result);
        public static bool TryParseExact(string input, string format, IFormatProvider formatProvider, out ByteSize result);
        public static bool TryParseExact(string input, string[] formats, IFormatProvider formatProvider, out ByteSize result);
        public static bool TryParseExact(string input, string format, IFormatProvider formatProvider, ByteSizeStyles styles, out ByteSize result);
        public static bool TryParseExact(string input, string[] formats, IFormatProvider formatProvider, ByteSizeStyles styles, out ByteSize result);
    }
}

namespace System.Globalization
{
    [Flags]
    public enum ByteSizeStyles
    {
        None = 0x00000000,
        AssumeNegative = 0x00000001,
        AssumeBinary = 0x00000020,
        AssumeDecimal = 0x00000040
    }

    public sealed class ByteSizeFormatInfo : IFormatProvider
    {
        enum ByteSizeFormatFlags
        {
            None = 0x00000000,
            UseDecimal = 0x00000001, // Default binary e.g. 10 KiB
            UseBits = 0x00000002 // Default bytes
        }

        public object GetFormat(Type formatType)
        {
            throw new NotImplementedException();
        }
    }
}
jnm2 commented 7 years ago

I don't suppose a BitSize would make sense?

NickCraver commented 7 years ago

If we did the proposal, it should include also the kibibyte, mebi, gibibyte, etc. series. These are very commonly needed in networking, SSD/HDD storage, and other bits.

Bonus: which is used varies on storage by platform (e.g. macOS uses gigibyte, Windows uses gigabyte).

benaadams commented 7 years ago

There is ambigiuity between Kilobyte as 1000 bytes and 1024 bytes (though Kibibyte does exist as @NickCraver points out)

Would defining both types differentiate this enough?

ZeBobo5 commented 7 years ago

Does-it include Qubit / KiloQubit ... too ? :sweat_smile:

karelz commented 7 years ago

What's the Format value difference? (I assume just lack of domain knowledge from my side)

If you add the the `Kibi' variants, we can run it by API review in 1h or next week.

Do you have a namespace in mind? System.Numerics, System, anything else?

krwq commented 7 years ago

As @karelz pointed - these probably should be Kibi (1024) not Kilo (1000) prefixes. I believe kilo used to be 1024 but it was causing too much confusion

karelz commented 7 years ago

API review - early comments:

jnm2 commented 7 years ago

Just being devil's advocate. Have you considered fractional bytes, as in 55.1 bytes/sec or 700.3 bytes/file?

mellinoe commented 7 years ago

Not that I am against the pattern or anything, but "typed unit structures" are a fairly alien concept to the BCL. There are many such places where this kind of abstraction could be good from a usability perspective, but it's not something we usually do. This also won't mix well with the many call sites in our libraries which use ints, longs, IntPtrs etc. to represent data sizes in byte units.

Have you considered fractional bytes, as in 55.1 bytes/sec or 700.3 bytes/file

Those are compound units, i.e. BytesPerSecond.

krwq commented 7 years ago

I also believe that ByteSize is not a best name as byte size is simply 8 bits. Better name would be IMO DataSize or DataSizeUnitsConverter (or something along these lines).

The Format enum - I might be missing some context - perhaps it is being more widely used and I'm not aware of that but I'm seeing this for the first time and it its intention is not clear to me - I believe it should be something more verbose and more self describing.

Parse, TryParse, TryParseExact - is this for translating something like "10MB" to bytes or perhaps "10 MB" or maybe "10 megabytes" or maybe "10 megabytes and 20 kilobytes" - will this work with localized strings? IMO this is next level of abstraction and should be extracted from this review as I believe it opens too many questions and ambiguity.

I do not enjoy idea of Yottabytes being "final" unit and having different type - I'd rather change this to short and throw an Exception that higher numbers are not supported yet - this way we can extend this in case in couple of years some new technology will show up and Yottabytes hard drives will be accessible for everyone

TotalKilobytes - should this be BigInteger instead? Double is perhaps useful in many cases but feels like this should be some integer type

mellinoe commented 7 years ago

I also believe that ByteSize is not a best name as byte size is simply 8 bits. Better name would be IMO DataSize or DataSizeUnitsConverter (or something along these lines).

I think the whole point is to encode the units in the type. A DataSize parameter is just as ambiguous as a ulong parameter, at least at a glance.

benaadams commented 7 years ago

The Format enum - I might be missing some context

Whether to interpret 1 KB as decimal or binary (e.g. 1000 or 1024)

I do not enjoy idea of Yottabytes being "final" unit and having different type - I'd rather change this to short and throw an Exception that higher numbers are not supported yet

Turns out long can only support +/- 7 ExBiBytes, updating spec atm

benaadams commented 7 years ago

Updated spec; notably changed to long for byte differences

benaadams commented 7 years ago

This also won't mix well with the many call sites in our libraries which use ints, longs, IntPtrs etc. to represent data sizes in byte units.

@mellinoe should there be a ShortByteSize which is an int and a LongByteSize which is a long to provide this interoperablity?

ByteLength a better name than ByteSize?

jnm2 commented 7 years ago

ByteSizeF?

karelz commented 7 years ago

What's the point of so many constructors? How will they be used?

Why did you use short everywhere instead of long? The naming short KiloBytes vs. double TotalKiloBytes sounds weird. Is there any precedence to this pattern of having int vs. double differentiated with 'Total' prefix?

benaadams commented 7 years ago

What's the point of so many constructors? How will they be used?

Removed; same can be achieved, probably more usefully via chaining as can specify units e.g.

new ByteSize(10) + ByteSize.FromKiloBytes(10).Add(ByteSize.FromMeBiBytes(10))

The naming short KiloBytes vs. double TotalKiloBytes sounds weird. Is there any precedence to this pattern of having int vs. double differentiated with 'Total' prefix?

As used in TimeSpan e.g. Days vs TotalDays

KiloBytes is the modulus of its unit and can only return the range -1000 to 1000 e.g. 111,111,111 bytes would return 111

TotalKiloBytes is all the bytes expressed in kilobytes e.g. 1,111,111 bytes would return 1,111.111

karelz commented 7 years ago

KiloBytes is the modulus of its unit

Oh, I entirely missed that. I though you are providing utility function to give me KBs (even if there is millions of them - that's what 'Total&asterix;' gives me). You truly want it similar to TimeSpan. Now the 'Total' prefix makes perfect sense. I wonder: In TimeSpan one expects trouble as the base is changing (1000, vs. 60 vs. 24). I wonder if this API won't be too confusing ... (or is it just me?)

JonHanna commented 7 years ago

Such modulus properties as TimeSpan.Hours which can only be [-23, 23] and DateTime.Hour which can only be [0, 23] use int.

benaadams commented 7 years ago

Mostly I think they are useful when custom formatting; though ToString should probably handle that?

Happy to remove; will comment the block in suggested api

benaadams commented 7 years ago

@JonHanna next you'll tell me people use int rather than uint for their loop array indexer; due to negative array sizes 😉

Updated to int

danmoseley commented 7 years ago

@tarekgh @mellinoe can you give feedback on delta between this and ready for review?

tarekgh commented 7 years ago

@karelz @mellinoe do you have any more comments or you think the proposal looks good?

karelz commented 7 years ago

Fine from my side. I think it will spark again the discussion about where to put these APIs -- the idea that's rolling around is that CoreFX might not be best place. We might need something like CoreFXExtensions (disclaimer: I just made up the name).

mellinoe commented 7 years ago

Not that I am against the pattern or anything, but "typed unit structures" are a fairly alien concept to the BCL. There are many such places where this kind of abstraction could be good from a usability perspective, but it's not something we usually do. This also won't mix well with the many call sites in our libraries which use ints, longs, IntPtrs etc. to represent data sizes in byte units.

^ I still think this is something to at least talk about, and I also have a few more thoughts.

benaadams commented 7 years ago

It's called ByteSize but can hold negative values. What is the meaning of a negative byte size,

For diffs/deltas/not enough space: var change = fileSize1 - fileSize0;

how would such values be handled in the functions that are intended to use this type?

It may be valid, as above, if it needs to be > 0 by a test?

if (size <= ByteSize.Zero) throw new ArgumentOutOfRange(nameof(size))

karelz commented 7 years ago

API review:

Notes:

benaadams commented 7 years ago

It should be struct, otherwise what is the point?

Haha, yeah - oversight 😄

NoneGiven commented 6 years ago

As far as I'm aware, outside of disk capacity marketing, "1 kilobyte = 1000 bytes" is not used anywhere. Some programs accurately describe sizes as KiB, but programs that say KB generally still mean 1024 bytes. Case in point: Windows Explorer.

However you slice it, some people are going to find all those properties either ambiguous or redundant and will need to check the documentation (or hopefully, just the IntelliSense). I think this API could be made cleaner by omitting anything to do with 1000 byte multiples, and then biting the bullet and just calling the 1024 byte multiples kilo, mega, etc. even though that's not technically correct.

Additionally, I think the "modulus" properties are not useful, clutter the API surface, and lead to nonsensical behavior. In a TimeSpan, it makes sense to say that the Hours value is an integer between 0 and 23, because if it were more, that would count toward the next "place value" which is Days. However, a size in bytes does not have place values. 3565158 bytes is not 3 MiB, 409 KiB, and 614 B. I don't think anyone in history has used notation like that. 3565158 bytes is just 3.4 MiB (or 3481.6 KiB), and that's all the properties should expose.

(Also, what's up with the casing? KiloBytes is weird, KiBiBytes is worse.)

tarekgh commented 6 years ago

quick note, Parse functionality always a source of problems and I believe we should not repeat the same mistake. I think at least

        public static TimeSpan Parse(string s);
        public static ByteSize Parse(string input, IFormatProvider formatProvider);

should be removed and consumers should either use ParseExact or use TryParse/Exact.

tarekgh commented 6 years ago

other things, for formatting with cultures, does Windows support providing the needed unit names? (e.g. KB, GB...etc.). the framework doesn't carry the globalization data and always depends on the OS for that. so to be able to support that, we'll need to ensure we can get the needed data from the OS.

tarekgh commented 6 years ago

would be nice if you can add some code sample and expected results of executing such sample code.

ericwj commented 5 years ago

These KiKiByte names exist because hard disk manufacturers like to need statements in tiny letters about how big their products are. Just to save some 5, 7, 10, or 12.5% of money. Oh yeah and for Apple systems because they went with the deception.

For any programmer a Kilobyte has always been 1024 bytes, K, M, G, T in the context of bytes are powers of 1 << 10. That should be the default in any programming language. If ByteSize.FromKiloBytes(x) will do a division by 1000 bytes imho creates confusion and makes me sad.

Shouldn't the KiKiBytes units and names not just be removed and two types created which convert between the two instead, for example by a formatter if you do ToString()? The type names should indicate the type of KiloByte that they define, with the prettiest, cleanest name reserved for the one that uses powers of 1024, imho.

YohanSciubukgian commented 2 years ago

Any update to share on this proposal ?