dotnet / runtime

.NET is a cross-platform runtime for cloud, mobile, desktop, and IoT apps.
https://docs.microsoft.com/dotnet/core/
MIT License
14.55k stars 4.54k forks source link

Add string keyed dictionary ReadOnlySpan<char> lookup support #27229

Closed TylerBrinkley closed 4 weeks ago

TylerBrinkley commented 5 years ago

UPDATED 1/22/2024 by @stephentoub

Option 1 (recommended)

namespace System.Collections.Generic
{
    public static class CollectionExtensions
    {
+       public static Dictionary<TKey, TValue>.MappedLookup<TMappedKey> GetMappedLookup<TKey, TValue, TMappedKey>(
+           this Dictionary<TKey, TValue> dictionary)
+           where TKey : notnull where TMappedKey : allow ref struct;
+       public static bool TryGetMappedLookup<TKey, TValue, TMappedKey>(
+           this Dictionary<TKey, TValue> dictionary,
+           out Dictionary<TKey, TValue>.MappedLookup<TMappedKey> lookup)
+           where TKey : notnull where TMappedKey : allow ref struct;

+       public static HashSet<T>.MappedLookup<TMapped> GetMappedLookup<TMapped>(
+           this HashSet<T> set)
+           where TMapped : allow ref struct;
+       public static bool TryGetMappedLookup(
+           this HashSet<T> set, out HashSet<T>.MappedLookup<TMapped> lookup)
+           where TMapped : allow ref struct;
    }

    public class Dictionary<TKey, TValue>
    {
+       public readonly struct MappedLookup<TMappedKey> where TMappedKey : allow ref struct
+       {
+           public bool ContainsKey(TMappedKey key);
+           public bool TryAdd(TMappedKey  key, TValue value);
+           public bool TryGetValue(TMappedKey key, [MaybeNullWhen(false)] out TValue value);
+           public bool TryGetValue<TKey, TValue, TMappedKey>(TMappedKey  key, [NotNullWhen(true)] out TKey? actualKey, [MaybeNullWhen(false)] out TValue value);
+           public bool Remove(TMappedKey key);
+           public bool Remove(TMappedKey  key, [MaybeNullWhen(false)] out TValue value));
+       }
    }

    public class HashSet<T>
    {
+       public readonly struct MappedLookup<TMapped>
+       {
+           public bool Add(TMapped item);
+           public bool Contains(TMapped item);
+           public bool Remove(TMapped item);
+           public bool TryGetValue(TMapped equalValue, [MaybeNullWhen(false)] out T actualValue);
+       }
    }
}

namespace System.Runtime.InteropServices
{
    public static class CollectionsMarshal
    {
+       public static ref TValue? GetValueRefOrAddDefault<TKey, TValue, TMappedKey>(
+           Dictionary<TKey, TValue>.MappedLookup<TMappedKey> dictionary,
+           TMappedKey key, out bool exists)
+           where TKey : notnull where TMappedKey : allow ref struct;
+       public static ref TValue GetValueRefOrNullRef<TKey, TValue, TMappedKey>(
+           Dictionary<TKey, TValue>.MappedLookup<TMappedKey> dictionary,
+           TMappedKey key)
+           where TKey : notnull where TMappedKey : allow ref struct;
    }
}

namespace System.Collections.Concurrent
{
    public class ConcurrentDictionary<TKey, TValue>
+   {
+       public ConcurrentDictionary<TKey, TValue>.MappedLookup<TMappedKey> GetMappedLookup<TMappedKey>() 
+           where TMappedKey : allow ref struct;
+       public bool TryGetMappedLookup<TMappedKey>(
+           out ConcurrentDictionary<TKey, TValue>.MappedLookup<TMappedKey> lookup)
+           where TMappedKey : allow ref struct;

+       public readonly struct MappedLookup<TMappedKey>
+       {
+           public bool ContainsKey<TKey, TValue, TMappedKey>(TMappedKey key);
+           public bool TryAdd<TKey, TValue, TMappedKey>(TMappedKey key, TValue value);
+           public bool TryGetValue<TKey, TValue, TMappedKey>(TMappedKey key, [MaybeNullWhen(false)] out TValue value);
+           public bool TryGetValue<TKey, TValue, TMappedKey>(TMappedKey key, [NotNullWhen(true)] out TKey? actualKey, [MaybeNullWhen(false)] out TValue value);
+           public bool TryRemove<TKey, TValue, TMappedKey>(TMappedKey key, [MaybeNullWhen(false)] out TValue value);
+           public bool TryRemove<TKey, TValue, TMappedKey>(TMappedKey key, [NotNullWhen(true) out TKey? actualKey, [MaybeNullWhen(false)] out TValue value);
+       }
+   }
}

namespace System.Collections.Immutable
{
    public class FrozenDictionary<TKey, TValue>
    {
+       public FrozenDictionary<TKey, TValue>.MappedLookup<TMappedKey> GetMappedLookup<TMappedKey>()
+           where TMappedKey : allow ref struct;
+       public bool TryGetMappedLookup<TMappedKey>(
+           out FrozenDictionary<TKey, TValue>.MappedLookup<TMappedKey> lookup)
+           where TMappedKey : allow ref struct;

+       public readonly struct MappedLookup<TMappedKey>
+       {
+           public bool ContainsKey<TKey, TValue, TMappedKey>(TMappedKey key);
+           public bool TryGetValue<TKey, TValue, TMappedKey>(TMappedKey key, [MaybeNullWhen(false)] out TValue value);
+           public bool TryGetValue<TKey, TValue, TMappedKey>(TMappedKey key, [NotNullWhen(true)] out TKey? actualKey, [MaybeNullWhen(false)] out TValue value);
+       }
    }

    public class FrozenSet<T>
    {
+       public FrozenSet<T>.MappedLookup<TMapped> GetMappedLookup<TMapped>()
+           where TMappedKey : allow ref struct;
+       public bool TryGetMappedLookup<TMappedKey>(
+           out FrozenSet<T>.MappedLookup<TMapped> lookup)
+           where TMappedKey : allow ref struct;

+       public readonly struct MappedLookup<TMapped>
+       {
+           public bool Contains(TMapped item);
+           public bool TryGetValue(TMapped equalValue, [MaybeNullWhen(false)] out T actualValue);
+       }
    }
}

Pros of Option 1:

Cons of Option 1:

Option 2

namespace System.Collections.Generic
{
    public static class CollectionExtensions
    {
+       public static bool ContainsKey<TKey, TValue, TMappedKey>(
+           this Dictionary<TKey, TValue> dictionary, TMappedKey key)
+           where TKey : notnull where TMappedKey : allow ref struct
+       public static bool Remove<TKey, TValue, TMappedKey>(
+           this Dictionary<TKey, TValue> dictionary, TMappedKey key)
+           where TKey : notnull where TMappedKey : allow ref struct
+       public static bool Remove<TKey, TValue, TMappedKey>(
+           this Dictionary<TKey, TValue> dictionary, TMappedKey  key, [MaybeNullWhen(false)] out TValue value))
+           where TKey : notnull where TMappedKey : allow ref struct
+       public static bool TryAdd<TKey, TValue, TMappedKey>(
+           this Dictionary<TKey, TValue> dictionary, TMappedKey  key, TValue value)
+           where TKey : notnull where TMappedKey : allow ref struct
+       public static bool TryGetValue<TKey, TValue, TMappedKey>(
+           this Dictionary<TKey, TValue> dictionary, TMappedKey key, [MaybeNullWhen(false)] out TValue value)
+           where TKey : notnull where TMappedKey : allow ref struct
+       public static bool TryGetValue<TKey, TValue, TMappedKey>(
+           this Dictionary<TKey, TValue> dictionary, TMappedKey  key, [NotNullWhen(true)] out TKey? actualKey, [MaybeNullWhen(false)] out TValue value)
+           where TKey : notnull where TMappedKey : allow ref struct

+       public static bool Add<T, TMapped>(this HashSet<T> set, TMapped item) where TMapped : allow ref struct;
+       public static bool Contains<T, TMapped>(this HashSet<T> set, TMapped item) where TMapped : allow ref struct;
+       public static bool Remove<T, TMapped>(this HashSet<T> set, TMapped item) where TMapped : allow ref struct;
+       public static bool TryGetValue<T, TMapped>(this HashSet<T> set, TMapped equalValue, [MaybeNullWhen(false)] out T actualValue) where TMapped : allow ref struct;
    }
}

namespace System.Runtime.InteropServices
{
    public static class CollectionsMarshal
    {
+       public static ref TValue? GetValueRefOrAddDefault<TKey, TValue, TMappedKey>(Dictionary<TKey, TValue> dictionary, TMappedKey key, out bool exists) where TKey : notnull where TMappedKey : allow ref struct;
+       public static ref TValue GetValueRefOrNullRef<TKey, TValue, TMappedKey>(Dictionary<TKey, TValue> dictionary, TMappedKey key) where TKey : notnull where TMappedKey : allow ref struct;
    }
}

namespace System.Collections.Concurrent
{
+   public static class ConcurrentCollectionExtensions
+   {
+       public static bool ContainsKey<TKey, TValue, TMappedKey>(
+           this ConcurrentDictionary<TKey, TValue> dictionary, TMappedKey key)
+           where TKey : notnull;
+       public static bool TryAdd<TKey, TValue, TMappedKey>(
+           this ConcurrentDictionary<TKey, TValue> dictionary, TMappedKey key, TValue value)
+           where TKey : not null;
+       public static bool TryGetValue<TKey, TValue, TMappedKey>(
+           this ConcurrentDictionary<TKey, TValue> dictionary, TMappedKey key, [MaybeNullWhen(false)] out TValue value) 
+           where TKey : not null;
+       public static bool TryGetValue<TKey, TValue, TMappedKey>(
+           this ConcurrentDictionary<TKey, TValue> dictionary, TMappedKey key, [NotNullWhen(true)] out TKey? actualKey, [MaybeNullWhen(false)] out TValue value)
+           where TKey : not null where TMappedKey : allow ref struct;
+       public static bool TryRemove<TKey, TValue, TMappedKey>(
+           this ConcurrentDictionary<TKey, TValue> dictionary, TMappedKey key, [MaybeNullWhen(false)] out TValue value) 
+           where TKey : notnull;
+   }
}

namespace System.Collections.Immutable
{
    public class FrozenDictionary<TKey, TValue>
    {
+       public bool ContainsKey<TMappedKey>(TMappedKey key) where TKey : notnull where TMappedKey : allow ref struct;
+       public bool TryGetValue<TMappedKey>(TMappedKey key, [NotNullWhen(true)] out TKey? actualKey, [MaybeNullWhen(false)] out TValue value) where TKey : not null where TMappedKey : allow ref struct;
+       public bool TryGetValue<TMappedKey>(TMappedKey key, [MaybeNullWhen(false)] out TValue value) where TKey : not null where TMappedKey : allow ref struct;
    }

    public class FrozenSet<T>
    {
+       public bool Contains<TMapped>(TMapped item);
+       public bool TryGetValue<TMapped>(TMapped equalValue, [MaybeNullWhen(false)] out T actualValue);
    }
}

Pros of Option 2:

Cons of Option 2:

Regardless of approach:

namespace System.Collections.Generic
{
    // implemented by all string-related comparers, e.g. the comparer instances returned from StringComparer.Ordinal, EqualityComparer<string>.Default, etc.
+   public interface IMappedEqualityComparer<TMapped, T> where TMapped : allow ref struct
+   {
+       bool Equals(TMapped mapped, T other);
+       int GetHashCode(TMapped span);
+       TKey Map(TMapped mapped);
+   }
}

The above is based on the assumption that we will be getting a ref struct anti-constraint in for .NET 9 / C# 13. If that ends up not happening, this will be revamped to look almost the same, except using ReadOnlySpan<TKeyElement> instead of TMappedKey.


UPDATED 1/3/2024 by @stephentoub.

namespace System.Collections.Generic
{
    // implemented by all string-related comparers, e.g. the comparer instances
    // returned from StringComparer.Ordinal, EqualityComparer<string>.Default, etc.
+   public interface IMappedSpanEqualityComparer<TSpanElement, T>
+   {
+       bool Equals(ReadOnlySpan<TSpanElement> span, T other);
+       int GetHashCode(ReadOnlySpan<TSpanElement> span);
+       T Map(ReadOnlySpan<TSpanElement> span);
+   }

    public static class CollectionExtensions
    {
+       public static bool ContainsKey<TKey, TValue, TSpanElement>(this Dictionary<TKey, TValue> dictionary, ReadOnlySpan<TSpanElement> key) where TKey : notnull;
+       public static bool Remove<TKey, TValue, TSpanElement>(this Dictionary<TKey, TValue> dictionary, ReadOnlySpan<TSpanElement> key) where TKey : notnull;
+       public static bool Remove<TKey, TValue, TSpanElement>(this Dictionary<TKey, TValue> dictionary, ReadOnlySpan<TSpanElement> key, [MaybeNullWhen(false)] out TValue value) where TKey : notnull;
+       public static bool TryAdd<TKey, TValue, TSpanElement>(this Dictionary<TKey, TValue> dictionary, ReadOnlySpan<TSpanElement> key, TValue value) where TKey : not null;
+       public static bool TryGetValue<TKey, TValue, TSpanElement>(this Dictionary<TKey, TValue> dictionary, ReadOnlySpan<TSpanElement> key, [MaybeNullWhen(false)] out TValue value) where TKey : notnull;
+       public static bool TryGetValue<TKey, TValue, TSpanElement>(this Dictionary<TKey, TValue> dictionary, ReadOnlySpan<TSpanElement> key, [NotNullWhen(true)] out TKey? actualKey, [MaybeNullWhen(false)] out TValue value) where TKey : notnull;

+       public static bool Add<T, TSpanElement>(this HashSet<T> set, ReadOnlySpan<TSpanElement> item);
+       public static bool Contains<T, TSpanElement>(this HashSet<T> set, ReadOnlySpan<TSpanElement> item);
+       public static bool Remove<T, TSpanElement>(this HashSet<T> set, ReadOnlySpan<TSpanElement> item);
+       public static bool TryGetValue<T, TSpanElement>(this HashSet<T> set, ReadOnlySpan<TSpanElement> equalValue, [MaybeNullWhen(false)] out T actualValue);
    }
}

namespace System.Runtime.InteropServices
{
    public static class CollectionsMarshal
    {
+       public static ref TValue? GetValueRefOrAddDefault<TKey, TValue, TSpanElement>(Dictionary<TKey, TValue> dictionary, ReadOnlySpan<TSpanElement> key, out bool exists) where TKey : notnull;
+       public static ref TValue GetValueRefOrNullRef<TKey, TValue, TSpanElement>(Dictionary<TKey, TValue> dictionary, ReadOnlySpan<TSpanElement> key) where TKey : notnull;
    }
}

namespace System.Collections.Concurrent
{
+   public static class ConcurrentCollectionExtensions
+   {
+       public static bool ContainsKey<TKey, TValue, TSpanElement>(this ConcurrentDictionary<TKey, TValue> dictionary, ReadOnlySpan<TSpanElement> key) where TKey : notnull;
+       public static bool TryAdd<TKey, TValue, TSpanElement>(this ConcurrentDictionary<TKey, TValue> dictionary, ReadOnlySpan<TSpanElement> key, TValue value) where TKey : not null;
+       public static bool TryGetValue<TKey, TValue, TSpanElement>(this ConcurrentDictionary<TKey, TValue> dictionary, ReadOnlySpan<TSpanElement> key, [MaybeNullWhen(false)] out TValue value) where TKey : not null;
+       public static bool TryRemove<TKey, TValue, TSpanElement>(this ConcurrentDictionary<TKey, TValue> dictionary, ReadOnlySpan<TSpanElement> key, [MaybeNullWhen(false)] out TValue value) where TKey : notnull;
+   }
}

If .NET 9 and C# 13 and up supporting ref struct constraints, we would tweak these APIs to instead take a TMappingKey : ref struct rather than a ReadOnlySpan<TElement>. See https://github.com/dotnet/runtime/issues/27229#issuecomment-1537274804 for more details.

Open issues:


Moved from corefxlab#2438 as the proposed changes could not be implemented in a separate library.

Motivation

I would like to perform dictionary lookups using a ReadOnlySpan<char> value on a string keyed dictionary without having to allocate a string.

There are two different design approaches proposed, add extension methods to Dictionary<string, TValue> or add a new Type .

Extension Method Proposed API

 namespace System.Collections.Generic {
+    public static class DictionaryExtensions {
+        public static bool ContainsKey<TValue>(this Dictionary<string, TValue> dictionary, ReadOnlySpan<char> key);
+        public static bool Remove<TValue>(this Dictionary<string, TValue> dictionary, ReadOnlySpan<char> key);
+        public static bool TryGetValue<TValue>(this Dictionary<string, TValue> dictionary, ReadOnlySpan<char> key, out TValue value);
+        public static T GetValue<TValue>(this Dictionary<string, TValue> dictionary, ReadOnlySpan<char> key);
+    }
 }
 namespace System {
      public abstract class StringComparer : IComparer<string>, IEqualityComparer<string>, IComparer, IEqualityComparer {
+        public virtual bool Equals(ReadOnlySpan<char> x, ReadOnlySpan<char> y);
+        public virtual int GetHashCode(ReadOnlySpan<char> obj);
      }
     public sealed class CultureAwareComparer : StringComparer {
+        public override bool Equals(ReadOnlySpan<char> x, ReadOnlySpan<char> y);
+        public override int GetHashCode(ReadOnlySpan<char> obj);
     }
     public class OrdinalComparer : StringComparer {
+        public override bool Equals(ReadOnlySpan<char> x, ReadOnlySpan<char> y);
+        public override int GetHashCode(ReadOnlySpan<char> obj);
     }
 }

Implementation Details

New Type Proposed API

 namespace System.Collections.Specialized {
+    public class StringKeyedDictionary<TValue> : Dictionary<string, TValue> {
+        public StringKeyedDictionary();
+        public StringKeyedDictionary(StringComparer comparer);
+        public StringKeyedDictionary(int capacity);
+        public StringKeyedDictionary(int capacity, StringComparer comparer);
+        public StringKeyedDictionary(IDictionary<string, TValue> dictionary);
+        public StringKeyedDictionary(IDictionary<string, TValue> dictionary, StringComparer comparer);
+        public StringKeyedDictionary(IEnumerable<KeyValuePair<string, TValue>> collection);
+        public StringKeyedDictionary(IEnumerable<KeyValuePair<string, TValue>> collection, StringComparer comparer);
+        public new StringComparer Comparer { get; }
+        public T this[ReadOnlySpan<char> key] { get; }
+        public bool ContainsKey(ReadOnlySpan<char> key);
+        public bool Remove(ReadOnlySpan<char> key);
+        public bool TryGetValue(ReadOnlySpan<char> key, out TValue value);
+    }
 }
 namespace System {
      public abstract class StringComparer : IComparer<string>, IEqualityComparer<string>, IComparer, IEqualityComparer {
+        public virtual bool Equals(ReadOnlySpan<char> x, ReadOnlySpan<char> y);
+        public virtual int GetHashCode(ReadOnlySpan<char> obj);
      }
     public sealed class CultureAwareComparer : StringComparer {
+        public override bool Equals(ReadOnlySpan<char> x, ReadOnlySpan<char> y);
+        public override int GetHashCode(ReadOnlySpan<char> obj);
     }
     public class OrdinalComparer : StringComparer {
+        public override bool Equals(ReadOnlySpan<char> x, ReadOnlySpan<char> y);
+        public override int GetHashCode(ReadOnlySpan<char> obj);
     }
 }

Implementation Details

Possible Addition

While not specifically needed for this request a ReadOnlySpan<char> Compare overload should probably be added as well while we're updating StringComparer.

 namespace System {
     public abstract class StringComparer : IComparer, IEqualityComparer, IComparer<string>, IEqualityComparer<string> {
+        public virtual int Compare(ReadOnlySpan<char> x, ReadOnlySpan<char> y);
     }
     public sealed class CultureAwareComparer : StringComparer {
+        public override int Compare(ReadOnlySpan<char> x, ReadOnlySpan<char> y);
     }
     public class OrdinalComparer : StringComparer {
+        public override int Compare(ReadOnlySpan<char> x, ReadOnlySpan<char> y);
     }
 }

Open Questions

Discussion Summary.

We have two different design approaches, add a new Type or add extension methods to Dictionary<string, TValue>.

Here's a comparison of the pro's and con's of the different design approaches.

Extension Methods StringKeyedDictionary
Pros No new type introduced. No performance penalty due to casting the comparer.
Existing code exposing a string keyed Dictionary needs no changes to use which is helpful when they can't easily be changed. Indexer support.
More scalable
Cons Performance penalty of casting the comparer. New type introduced meaning an additional staple of the .NET ecosystem to know.
No indexer support, must use a GetValue method instead. Existing code that exposes a string keyed Dictionary may not easily be changed to using the new type.
Dictionary internals exposed with the internal modifier. Dictionary internals exposed with the private protected modifier.
Somewhat opaque to user as to knowing the comparer must derive from StringComparer.

Updates

svick commented 5 years ago

Would it be better to instead have a set of extension methods on Dictionary<string, T>? That way, the new methods could be used on any string-keyed Dictionary, not just on some special type.

Wraith2 commented 5 years ago

Dictionary<TKey,TValue> is implemented in Coreclr and the is a high bar for new types because of library size issues. Do you intend or this implementation not to be collision resistant or do you have a method to store the key without the span somehow? convert it into a ReadOnlyMemory?

benaadams commented 5 years ago

Would it be better to instead have a set of extension methods on Dictionary<string, T>?

How would it perform the search (using hashcode + equals) against the current Dictionary api surface?

do you have a method to store the key without the span somehow? convert it into a ReadOnlyMemory?

ROS<char>.ToString()

svick commented 5 years ago

@benaadams

Would it be better to instead have a set of extension methods on Dictionary<string, T>?

How would it perform the search (using hashcode + equals) against the current Dictionary api surface?

It wouldn't use the public API surface, it would use some new internal members on Dictionary. That's worse than StringKeyedDictionary, which would probably use new private protected members, but I think not by much.

benaadams commented 5 years ago

Ah.. so something like?

static class StringKeyedDictionaryExtensions 
{
    static bool ContainsKey<TValue>(this Dictionary<string, TValue> dictionary, ReadOnlySpan<char> key);
    static bool Remove<TValue>(this Dictionary<string, TValue> dictionary, ReadOnlySpan<char> key);
    static bool TryGetValue<TValue>(this Dictionary<string, TValue> dictionary, ReadOnlySpan<char> key, out TValue value);
}
Wraith2 commented 5 years ago

Right, I think I missed the point then. The idea is to have the standard dictionary at the back end but methods which allow you to use ROS as the key to search for avoiding the allocation to check if something exists. So it needs access to the internals because the comparer needs to understand that it can do ROS-string comparisons which aren't currently possible.

svick commented 5 years ago

@benaadams Yup. Plus probably an extension method version of the indexer:

public static T GetValue<TValue>(this Dictionary<string, TValue> dictionary, ReadOnlySpan<char> key);

Though that's an advantage of the StringKeyedDictionary design: it can have a real indexer.

safern commented 5 years ago

Also if they're extension methods we would need to provide overrides to specify a custom Comparer that can compare ROS right? Or that would be achieved by adding the StringEqualityComparer type and interface and in the extension methods check if Comparer is IStringEqualityComparer then use the ROS overrides if not the string ones?

Also with override methods Adding values through the indexer or TryAdd/Add apis, we wouldn't be able to take advantage of the ROS comparisons right?

benaadams commented 5 years ago

Also if they're extension methods we would need to provide overrides to specify a custom Comparer that can compare ROS right?

Dictionary is based on IEqualityComparer<T> so not sure if it could work... (even if default interfaces implementations pulling up the slack, not sure how it could work for other types as a ROS comparison would be exposed on the interface, which probably wouldn't make sense?)

Also with override methods Adding values through the indexer or TryAdd/Add apis, we wouldn't be able to take advantage of the ROS comparisons right?

Can covert the string to a ROS at nominal cost for the comparison; if it didn't exist the ROS<char> could be turned into a string for persistent key use; so that wouldn't necessarily be a problem.

safern commented 5 years ago

Dictionary is based on IEqualityComparer so not sure if it could work... (even if default interfaces implementations pulling up the slack, not sure how it could work for other types as a ROS comparison would be exposed on the interface, which probably wouldn't make sense?)

Yes, but people could just use StringEqualityComparer and since it would implement IEqualityComparer it can be passed through the constructor. However in order to use the ROS overloads if we go through the Extension methods route we would need to cast the Comparer to IStringEqualityComparer

I think if we want to use the ROS comparison features and other of its features through a custom Comparer we should not use Extension methods, unless I’m missing something we could do in a different way without overriding implementation.

TylerBrinkley commented 5 years ago

Here's a comparison of the pro's and con's of the different design approaches.

Extension Methods StringKeyedDictionary
Pros No new type introduced. No performance penalty due to casting the comparer.
Existing code exposing a string keyed Dictionary needs no changes to use which is helpful when they can't easily be changed. Indexer support.
Cons Performance penalty of casting the comparer. New type introduced meaning an additional staple of the .NET ecosystem to know.
No indexer support, must use a GetValue method instead. Existing code that exposes a string keyed Dictionary may not easily be changed to using the new type.
Dictionary internals exposed with the internal modifier. Dictionary internals exposed with the private protected modifier.
Somewhat opaque to user as to knowing it uses IStringEqualityComparer.

Honestly, I'm kind of torn on this. Adding a new foundational Type to the ecosystem is costly in terms of what developers need to know. I'm feeling this even more now with the addition of all the types introduced with Spans and Pipelines. Additionally, I could easily see this expanding to others such as HashSet, SortedDictionary, SortedSet, OrderedDictionary & OrderedSet if they ever get implemented, and ConcurrentDictionary.

If the performance isn't an issue I now think I'd prefer the extension method route.

safern commented 5 years ago

@TylerBrinkley thanks for the great summary, I just updated the issue description and included this comparison in there. Will mark as ready for review now.

TylerBrinkley commented 5 years ago

I've updated the first post to correct formatting and make it a little easier to understand.

cdmihai commented 5 years ago

Overloads for ConcurrentDictionary would also be nice :)

terrajobst commented 5 years ago

Video

On the surface, the extension approach looks more appealing because it would allow the lookups to work against any instance of a dictionary (at least in principle, the current design only works if the dictionary was instantiated with a comparer that implements IStringEqualityComparer).

However, this also complicates the implementation and might have performance implications if we have to expose more methods that only make sense when TKey happens to be string. Also, one could argue that in the hot paths where people would be inclined to lookup with spans that they do in fact control their dictionary and whether they need to pass in the correct comparer or use a derived type of Dictionary<,> doesn't matter.

I totally buy the scenario, but adding a new collection type (even if derived from Dictionary<,>) feels heavy handed, but maybe I'm overthinking this. Maybe just putting it in S.C.Specialized is good enough.

What do others think?

jkotas commented 5 years ago

This is trying to workaround the fundamental problem that ref-like types cannot be used as keys in collections today. We will hit the same problem for all keyed collections and many ref-like types. @cdmihai mentioned that ConcurrentDictionary would be nice too. The same can be said for HashMap and other collections. And the same can be said for Utf8 strings, Spans and other ref-like types. If we take this proposal, where are we going to stop – are we going to add all combinations like StringKeyedHashMap, SpanKeyedConcurrentDictionary?

I think we need to have ref struct constraint first to make this scalable.

TylerBrinkley commented 5 years ago

I agree scalability is an issue if we add new types which is why I'm now in favor of the extension methods route which should scale more favorably.

Even if the ref struct constraint is added I don't see how we can use that for this use case in a non-stack context such as non-ref-like class fields or static fields.

Joe4evr commented 5 years ago

This is trying to workaround the fundamental problem that ref-like types cannot be used as keys in collections today.

It's not so much using ReadOnlySpan<char> as the dictionary key, but doing a lookup with it without copy-allocating the slice it is wrapping.

benaadams commented 5 years ago

Is what's equatable/alternative forms; so string <=> ReadOnlySpan<char>; however what about string <=> ReadOnlySpan<byte>, Encoding.UTF8 etc

d3v3us commented 5 years ago

I would change NameValueCollection, making more effective version, passing allowDublicates parameter in constructor. For dictionary i think extension method will fit completely.

TylerBrinkley commented 5 years ago

what about string <=> ReadOnlySpan<byte>, Encoding.UTF8 etc

Another reason the extension methods route makes better sense.

What if we instead relied on the comparer to derive from StringComparer and not define an interface? That way we can easily expand support through adding virtual methods without having to define new interfaces or else wait for default interface implementation support.

Joe4evr commented 5 years ago

what about string <=> ReadOnlySpan<byte>, Encoding.UTF8 etc

What if we instead relied on the comparer to derive from StringComparer and not define an interface?

Sounds reasonable. I can already see use-cases for wanting to determine the value equality between a string and utf8string.

//using regular old UTF-16 strings as a key due to framework reasons,
//even though 99.9% of the time, it doesn't go outside UTF-8 range
private readonly Dictionary<string, Foo> _fooMap = new Dictionary<string, Foo>(StringComparer.OrdinalIgnoreCase);

//what this proposal already boils down to:
void M(ReadOnlySpan<char> sliceOfString)
{
    if (_fooMap.TryGetValue(sliceOfString, out var foo)
    {
        //....
    }
}

//but when incoming data could also be UTF-8:
void M(ReadOnlySpan<byte> sliceOfUtf8String) //or 'ROS<Char8>' if that becomes the more appropriate thing
{
    if (_fooMap.TryGetValue(sliceOfUtf8String, out var foo)
    {
        //....
    }
}
TylerBrinkley commented 5 years ago

Really appreciated hearing the discussion of this in the .NET Design Reviews GitHub Triage.

I think this should go back to triage to decide the best design approach. Again, personally I'd much prefer the extension method route.

DaZombieKiller commented 5 years ago

What if an API was added to Dictionary<TKey, TValue> that allows you to retrieve a bucket? For example (where DictionaryBucket is a new type, mainly to ensure that it's read-only):

DictionaryBucket<TValue> Dictionary<TKey, TValue>.GetBucket(int hashCode) or IReadOnlyList<TValue> Dictionary<TKey, TValue>.GetBucket(int hashCode)

This way, the extension methods could operate on public APIs, and would just need to iterate over the bucket and compare values.

I'm not sure how realistic this would be, and I'm absolutely certain that there are significant problems with this that I haven't considered, but I figured I'd throw the idea out there.

Wraith2 commented 5 years ago

It would be exposing and locking implementation details of the dictionary, it would prevent reimplementing using another method if a better one were found in future.

TylerBrinkley commented 5 years ago

As Dictionary doesn't currently support this scenario I was looking at implementing this in a custom dictionary and am having trouble getting the hashcode of the span. Are there any API's currently exposed which allows the retrieval of the hashcode of a ReadOnlySpan<char> in the Ordinal or OrdinalIgnoreCase sense without converting to a string?

jkotas commented 5 years ago

Look at the APIs introduced in https://github.com/dotnet/corefx/issues/31302

TylerBrinkley commented 5 years ago

@jkotas Thanks. Since .NET Standard 2.1 is including some API's introduced for .NET Core 3.0 would including this API be considered as well?

jkotas commented 5 years ago

.NET Standard 2.1 is very close to done at this point. We do not plan to add more APIs to it.

APIs introduced in .NET Core 3.0 are not included in .NET Standard 2.1 by default. Exceptions were made for a few APIs, mostly the ones required to support C# 8.0.

TylerBrinkley commented 5 years ago

@jkotas Thanks. I guess I'll resort to multi-targeting .NET Core App as well then.

scalablecory commented 4 years ago

I would prefer a generalized "proxy key lookup" support on Dictionary/etc. than specializing it for strings.

TylerBrinkley commented 4 years ago

@scalablecory what use case would you have for a generalized "proxy key lookup"? Also, what kind of API would you propose that would address this use case given the nature of the stack-only ReadOnlySpan<char>?

scalablecory commented 4 years ago

@TylerBrinkley we will need to combine a ref struct generic constraint with a new proposal for heterogeneous lookup on keyed containers.

Special-casing some support for String would be a band-aid that we can't remove later on, and I'd rather wait for language changes needed to make a proper generalized change.

TylerBrinkley commented 4 years ago

@scalablecory Are you thinking of something like this?

public interface IAltEqualityComparer<T, TAlt> : IEqualityComparer<T> {
    int GetHashCode(TAlt value);
    bool Equals(TAlt value, T other);
}
public interface IAltRefEqualityComparer<T, TAlt> : IEqualityComparer<T> where TAlt : ref struct {
    int GetHashCode(TAlt value);
    bool Equals(TAlt value, T other);
}
public class Dictionary<TKey, TValue> {
    public bool ContainsKey<TAltKey>(TAltKey altKey);
    public bool ContainsKeyRef<TAltKey>(TAltKey altKey) where TAltKey : ref struct;
}

or this?

public delegate int GetHashCodeDelegate<TAltKey, TKey>(TAltKey altKey, IEqualityComparer<TKey> comparer);
public delegate bool EqualsDelegate<TAltKey, TKey>(TAltKey altKey, TKey key, IEqualityComparer<TKey> comparer);
public delegate int GetHashCodeDelegateRef<TAltKey, TKey>(TAltKey altKey, IEqualityComparer<TKey> comparer) where TAltKey : ref struct;
public delegate bool EqualsDelegateRef<TAltKey, TKey>(TAltKey altKey, TKey key, IEqualityComparer<TKey> comparer) where TAltKey : ref struct;

public class Dictionary<TKey, TValue> {
    public bool ContainsKey<TAltKey>(TAltKey altKey, GetHashCodeDelegate<TAltKey, TKey> getHashCode, EqualsDelegate<TAltKey, TKey> equals);
    public bool ContainsKey<TAltKey>(TAltKey altKey, GetHashCodeDelegateRef<TAltKey, TKey> getHashCode, EqualsDelegateRef<TAltKey, TKey> equals) where TAltKey : ref struct;
}
scalablecory commented 4 years ago

@TylerBrinkley I'm imagining a hybrid of the two.

C# changes - ref struct constraint

Need a better name for this, and a C# proposal. Maybe a CLR proposal too...

The constraint would enforce that values of the type are only used in ways which are compatible with ref struct types.

So, you can pass in either a normal type or a ref struct, but it can't be stored in a non-stack field, can't be captured or used in async, etc.

Library changes

public interface IEqualityComparer<T, TAlt> where TAlt: ref struct
{
    int GetHashCode(TAlt value);
    bool Equals(TAlt value, T other);
}

class StringComparer : IEqualityComparer<string, ReadOnlySpan<char>>
{
    public int GetHashCode(ReadOnlySpan<char> value);
    public bool Equals(ReadOnlySpan<char> value, string other);
}
// also Utf8String / Char8

class Dictionary<TKey, TValue>
{
    public bool ContainsKey<TAltKey>(TAltKey key, IEqualityComparer<TKey, TAltKey> comparer) where TAltKey: ref struct;
    public bool TryGetValue<TAltKey>(TAltKey key, IEqualityComparer<TKey, TAltKey> comparer, out TValue value) where TAltKey: ref struct;
    public bool Remove<TAltKey>(TAltKey key, IEqualityComparer<TKey, TAltKey> comparer) where TAltKey: ref struct;
    public bool Remove<TAltKey>(TAltKey key, IEqualityComparer<TKey, TAltKey> comparer, out TValue value) where TAltKey: ref struct;
}
// plus ConcurrentDictionary

class HashSet<T>
{
    public bool Contains<TAlt>(TAlt item, IEqualityComparer<T, TAlt> comparer) where TAlt: ref struct;
    public bool Remove<TAlt>(TAlt item, IEqualityComparer<T, TAlt> comparer) where TAlt: ref struct;
    public bool TryGetValue<TAlt>(TAlt item, IEqualityComparer<T, TAlt> comparer, out T actualValue) where TAlt: ref struct;
}

// IComparer, SortedDictionary, SortedSet? Not sure if worth it.

Risks

Passing in an incompatible comparer might cause these to return an incorrect value. Usage of these will only happen in more advanced scenarios, so I think this is acceptable.

TylerBrinkley commented 4 years ago

I really don't like passing in the comparer in the method call due to the incompatible comparer issues and general usability. Is there a reason you don't suggest having the methods just try to type cast the collection's comparer?

scalablecory commented 4 years ago

I really don't like passing in the comparer in the method call due to the incompatible comparer issues and general usability. Is there a reason you don't suggest having the methods just try to type cast the collection's comparer?

I agree, it's not good for usability and I don't like it. Mainly looking for feedback :).

I'd really like to find ways to avoid a cast, because: a) adds a cast in what will be used in perf-sensitive code. (may or may not be significant depending on type/data -- need to measure) b) moves usage errors from compile-time to run-time.

To look at an existing implementation, C++ has something in between these two approaches:

MihaMarkic commented 4 years ago

There is also ImmutableDictionary<TKey, TValue> out there and other types. Implementing a specialized dictionary type wouldn't solve all the others.

DaZombieKiller commented 4 years ago

Now that we have default interface implementations, would it make sense for this to be done by adding new APIs to IDictionary/IReadOnlyDictionary (among others) with default implementations that allocate and call the regular methods?

GrabYourPitchforks commented 4 years ago

Much of the discussion so far has been predicated around that this API would have a few constraints, and checking for those constraints at runtime could prove costly. The big constraint is that to avoid surprise allocations at runtime, the comparer provided to the Dictionary<string, ...> instance must:

One possible way to solve this is to introduce a new type, as below.

namespace System.Collections.Generic
{
    // I am terrible at naming
    public class SpanOfCharDictionary<TValue> : Dictionary<string, TValue>
    {
        public SpanOfCharDictionary(ISpanOfCharEqualityComparer comparer) : base(comparer) { }

        public bool ContainsKey(ReadOnlySpan<char> key);
        public bool TryGetValue(ReadOnlySpan<char> key, out TValue? value);
        public TValue this[ReadOnlySpan<char> value] { get; }
        /* other read-only methods - insertions still use 'string' */
    }
}

If you want to perform ReadOnlySpan<char> lookups against a dictionary, it'd have to be against one of these specific instances, not against any old Dictionary<string, ...>. If you're creating the dictionary within your own code this is viable, but it might not be viable if you're consuming dictionaries that other components have given you.

Another way is to introduce a wrapper that could be formed around an existing Dictionary<string, ...> instance. The necessary checks would take place at the time the wrapper is created. If you're able to create an instance of the wrapper, then you could call the methods very cheaply since they'd just pass-through.

namespace System.Collections.Generic
{
    public static class DictionaryExtensions
    {
        // returns false / null if underlying dict instance wasn't created using a comparer that allows for quick span lookup
        public static bool TryCreateSpannyWrapper<TValue>(this Dictionary<string, TValue> dictionary, out SpanOfCharDictionaryWrapper<TValue>? wrapper);
    }

    public sealed class SpanOfCharDictionaryWrapper<TValue>
    {
        // use the extension method above to get an instance of this type
        internal SpanOfCharDictionaryWrapper();

        public bool ContainsKey(ReadOnlySpan<char> key);
        public bool TryGetValue(ReadOnlySpan<char> key, out TValue? value);
        public TValue this[ReadOnlySpan<char> value] { get; }
        /* other read-only methods */
    }
}

The wrapper would need to be created on a per-dictionary basis. The idea is that you'd create it once (per instance), then store the instance alongside wherever you're already storing the backing dictionary instance.

It'd be inefficient to create these wrappers if you're only performing one or two lookups against any given dictionary. But if you're using ROS<char>, I think it's likely that you're using long-lived dictionary instances, so it'd likely be worthwhile to create these wrappers.

Joe4evr commented 4 years ago

Some of my thoughts about this, primarily from @GrabYourPitchforks's comment:

EDIT: Today I realized this intersection could even make use of default implementations, since there should never really be a reason that the IEC<string> methods don't simply forward to the span-based methods when someone decides they want to implement ISpanOfCharEqualityComparer. I get that existing implementors can't remove their old methods due to binary compat, but for any new implementors it'd be a huge boon.

scalablecory commented 3 years ago

I'd love for this to support more than string. I would use it for ReadOnlySpan<byte> and byte[], for instance.

akraines commented 3 years ago

Any updates on this?

eiriktsarpalis commented 2 years ago

Related to #27039.

KeithLRobertson commented 2 years ago

The proposal here is to support Dictionary lookups using ROSC. (Here I'll use ROSC and ROMC as shorthand for ReadOnlySpan and ReadOnlyMemory of Char.) This only makes sense if the key type is "stringlike", but library changes like this should apply generally, not just for a specific case. How to expand this concept to apply for any Dictionary key type?

We can generalize this proposal by noting that we want to look up the key value with one key type (here, ROSC), but since we can't store a Span in the Dictionary structures, we will store using a different key type (String or ROMC). So we generalize Dictionary to have two key types, one for lookup and one for storage, IDictionary<KL, KS, V>. A Dictionary's EqualityComparer will be based on the lookup type KL; it will also need two Func delegates to convert between KL and KS. Used for lookups, KS to KL should be fast and allocation-free. KL to KS only happens when a new key gets stored. (For ROSC, this could allocate a String, or the delegate might allocate space in a global char buffer (under lock), copy the ROSC data to it, and return the ROMC key.)

Dictionary members for lookups accept parameters of KL (i.e. Item[], Add, ContainsKey, Remove, TryGetValue). Those which view the dictionary's data use KS; IDictionary<KL, KS, V> : ICollection<KeyValuePair<KS, V>> so GetEnumerator() returns IEnumerator<KeyValuePair<KS, V>> , and Keys has type ICollection<KS>. GetOrAdd's valueFactory would also map from KS so that the value can be or can contain the stored key object.

Our current dictionary types reduce to this case by using the same type for KL and KS, i.e. IDictionary<K, V> : IDictionary<K, K, V>, and the identity function (K k) => k for both conversions. That generalizes the dictionary concept to allow lookups based on a type which is different from the stored key type.

Now "all we need" is support for Span as a generic parameter. We would need a constraint that allows ref struct types (as well as objects and structs), perhaps using ref keyword (for reference-able), e.g. void Example<T>() where T : ref {}. A formal ref parameter can only be used for method parameters and locals, and as an actual generic type that's also ref. They cannot be used for fields or properties (unless "write-only). I suppose they could be used for method returns, given data flow tracking, just as a method can return a Span type but can't return a span referencing its stack frame. Action and Func type parameters will be ref (including the Func return type!, as needed for the above conversion KS => KL). Existing type parameters that meet the constraint, like T in IEqualityComparer<T> can be made ref.

Oh yes, and we simply need to re-implement all the dictionary types (most usefully, ConcurrentDictionary) to be IDictionary<KL, KS, V>. I'm being cheeky here with the phrasing, of course. But really, I don't think this part is difficult or risky. These would be new types, not replacements, so not affecting existing clients. And mostly it would involve copying existing files, replacing a few types, and adding conversion functions. (It would add the two-key interface to existing dictionary types which may confuse some existing applications which reflect on them.)

In the end, I think the most useful proposal here is the ref constraint allowing ref struct types as generic parameters. This could support many, many other scenarios.

Support for lookups by a different type from what's stored is interesting, but perhaps not as broadly useful. We can't convert Span to Memory, but when parsing we could simply use Memory to represent parts of a buffer instead of Span, though it's slightly less efficient. One issue with my Dictionary proposal is that when we do store a new entry, we have to create KS from KL even when really not needed. With IDictionary<Span<Char>, String, Int32> universe; for example, universe["ultimateAnswer".AsSpan()] = 42 would create a new String key instance when a perfectly good String already existed, and there's no way to avoid it.

pCYSl5EDgo commented 2 years ago

There is a System.Runtime.InteropServices.CollectionsMarshal extension type for Dictionary<TKey, TValue> as I've written in #63827 . The API signatures should be the same.

Proposed API

using System.Collections.Generic;

namespace System.Runtime.InteropServices;

public static class CollectionsMarshal
{
    public static ref TValue GetValueRefOrNullRef<TValue>(Dictionary<string, TValue> dictionary, ReadOnlySpan<char> key)
        => ref dictionary.FindValue(key, FromReadOnlySpanCharToString, MemoryExtensions.AsSpan);

    public static ref TValue? GetValueRefOrAddDefault<TValue>((Dictionary<string, TValue> dictionary, ReadOnlySpan<char> key, out bool exists)
        => ref Dictionary<string, TValue>.CollectionsMarshalHelper.GetValueRefOrAddDefault(dictionary, key, out exists, FromReadOnlySpanCharToString, MemoryExtensions.AsSpan);

    public static ref TValue GetValueRefOrNullRef<TKeyElement, TValue>(Dictionary<TKeyElement[], TValue> dictionary, ReadOnlySpan<TKeyElement> key)
        => ref dictionary.FindValue(key, FromReadOnlySpanToArray, MemoryExtensions.AsSpan);

    public static ref TValue? GetValueRefOrAddDefault<TKeyElement, TValue>((Dictionary<TKeyElement[], TValue> dictionary, ReadOnlySpan<char> key, out bool exists)
        => ref Dictionary<TKeyElement[], TValue>.CollectionsMarshalHelper.GetValueRefOrAddDefault(dictionary, key, out exists, FromReadOnlySpanToArray, MemoryExtensions.AsSpan);

    private static string FromReadOnlySpanCharToString(ReadOnlySpan<char> value) => new string(value);
    private static T[] FromReadOnlySpanToArray<T>(ReadOnlySpan<T> value) => value.ToArray();
}
namespace System.Collections.Generic;

// internal or public
interface IReadOnlySpanEqualityComparer<TElement>
{
    bool Equals(ReadOnlySpan<TElement> x, ReadOnlySpan<TElement> y);
    int GetHashCode(ReadOnlySpan<TElement> obj);
}

internal delegate TEnumerable ConverterFuncFromReadOnlySpan<TEnumerable, TElement>(ReadOnlySpan<TElement> value);
internal delegate ReadOnlySpan<TElement> ConverterFuncToReadOnlySpan<TEnumerable, TElement>(TEnumerable value);

public sealed class ArraySequenceEqualityComparer<T> : IEqualityComparer<T[]>, IReadOnlySpanEqualityComparer<T>
{
    public bool Equals(ReadOnlySpan<T> x, ReadOnlySpan<T> y) => MemoryExtensions.SequenceEqual(x, y);
    public bool Equals(T[]? x, T[]? y) => (x is null && y is null) || (x is not null && y is not null &&  MemoryExtensions.SequenceEqual(new ReadOnlySpan<T>(x), new ReadOnlySpan<T>(y)));
    public int GetHashCode(ReadOnlySpan<T> obj) => obj.Length;
    public int GetHashCode(T[] obj) => obj.Length;
}
Dictionary implementation is like this.
```C# public class Dictionary { internal ref TValue FindValue(ReadOnlySpan keySpan, ConverterFuncFromReadOnlySpan converterFrom, ConverterFuncToReadOnlySpan converterTo) { ref Entry entry = ref Unsafe.NullRef(); if (_buckets != null) { Debug.Assert(_entries != null, "expected entries to be != null"); IEqualityComparer? comparer = _comparer; if (comparer is IReadOnlySpanEqualityComparer readOnlySpanEqualityComparer) { uint hashCode = readOnlySpanEqualityComparer.GetHashCode(keySpan); int i = GetBucket(hashCode); Entry[]? entries = _entries; uint collisionCount = 0; i--; // Value in _buckets is 1-based; subtract 1 from i. We do it here so it fuses with the following conditional. do { // Should be a while loop https://github.com/dotnet/runtime/issues/9422 // Test in if to drop range check for following array access if ((uint)i >= (uint)entries.Length) { goto ReturnNotFound; } entry = ref entries[i]; if (entry.hashCode == hashCode && readOnlySpanEqualityComparer.Equals(converterTo(entry.key), key)) { goto ReturnFound; } i = entry.next; collisionCount++; } while (collisionCount <= (uint)entries.Length); // The chain of entries forms a loop; which means a concurrent update has happened. // Break out of the loop and throw, rather than looping forever. goto ConcurrentOperation; } else { TKey key = converterFrom(keySpan); if (comparer is null) { /* ellipsis */ } else { /* ellipsis */ } } } goto ReturnNotFound; ConcurrentOperation: ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); ReturnFound: ref TValue value = ref entry.value; Return: return ref value; ReturnNotFound: value = ref Unsafe.NullRef(); goto Return; } internal static class CollectionsMarshalHelper { public static ref TValue? GetValueRefOrAddDefault(Dictionary dictionary, ReadOnlySpan keySpan, out bool exists, ConverterFuncFromReadOnlySpan converterFrom, ConverterFuncToReadOnlySpan converterTo); } } ```
StringComparer implements `IReadOnlySpanEqualityComparer` like this.
```C# namespace System; public abstract class StringComparer : IReadOnlySpanEqualityComparer { public abstract bool Equals(ReadOnlySpan x, ReadOnlySpan y); public abstract int GetHashCode(ReadOnlySpan obj); } public sealed class CultureAwareComparer : StringComparer { public override bool Equals(ReadOnlySpan x, ReadOnlySpan y) => _compareInfo.Compare(x, y, _options) == 0; public override int GetHashCode(ReadOnlySpan obj) => _compareInfo.GetHashCode(obj, _options); } public class OrdinalComparer : StringComparer { public override bool Equals(ReadOnlySpan x, ReadOnlySpan y) { if (_ignoreCase) { if (x.Length != y.Length) { return false; } else if (x.IsEmpty) { return true; } return System.Globalization.Ordinal.EqualsIgnoreCase(ref x[0], ref y[0], x.Length); } return MemoryExtensions.SequenceEqual(x, y); } public override int GetHashCode(ReadOnlySpan obj) { if (obj.IsEmpty) { return _ignoreCase ? string.Empty.GetHashCodeOrdinalIgnoreCase() : string.Empty.GetHashCode(); } else if (_ignoreCase) { ulong seed = Marvin.DefaultSeed; return Marvin.ComputeHash32OrdinalIgnoreCase(ref obj[0], obj.Length /* in chars, not bytes */, (uint)seed, (uint)(seed >> 32)); } else { ulong seed = Marvin.DefaultSeed; return Marvin.ComputeHash32(ref Unsafe.As(ref obj[0]), (uint)obj.Length * 2 /* in bytes, not chars */, (uint)seed, (uint)(seed >> 32)); } } } internal sealed class OrdinalCaseSensitiveComparer : OrdinalComparer { public override bool Equals(ReadOnlySpan x, ReadOnlySpan y) => MemoryExtensions.SequenceEqual(x, y); public override int GetHashCode(ReadOnlySpan obj) { if (obj.IsEmpty) { return string.Empty.GetHashCode(); } ulong seed = Marvin.DefaultSeed; return Marvin.ComputeHash32(ref Unsafe.As(ref obj[0]), (uint)obj.Length * 2 /* in bytes, not chars */, (uint)seed, (uint)(seed >> 32)); } } internal sealed class OrdinalIgnoreCaseComparer : OrdinalComparer { public override bool Equals(ReadOnlySpan x, ReadOnlySpan y) { if (x.Length != y.Length) { return false; } else if (x.IsEmpty) { return true; } return System.Globalization.Ordinal.EqualsIgnoreCase(ref x[0], ref y[0], x.Length); } public override int GetHashCode(ReadOnlySpan obj) { if (obj.IsEmpty) { return string.Empty.GetHashCodeOrdinalIgnoreCase(); } ulong seed = Marvin.DefaultSeed; return Marvin.ComputeHash32OrdinalIgnoreCase(ref obj[0], obj.Length /* in chars, not bytes */, (uint)seed, (uint)(seed >> 32)); } } ```

Discussion

The name of IReadOnlySpanEqualityComparer<T> can be ISpanEqualityComparer<T>. Which is appropriate?

pCYSl5EDgo commented 2 years ago

When StringComparer implements IReadOnlySpanEqualityComparer<char>, new abstract methods(Equals and GetHashCode) are necessary. As Microsoft.CodeAnalysis gets Equals via reflection, the update is needed.

aarondandy commented 2 years ago

Just wanted to provide some input from my experience after making my own (terrible) dictionary implementation to support ReadOnlySpan<char>. I have an overload bool TryGetValue(ReadOnlySpan<char> key, out string actualKey, out TValue value) that prevents a redundant string allocation in my case. I'm not sure this fits in well with the proposal but maybe the additional use case will be helpful to the discussion. Also, not having this ability caused me to pollute the world with yet another terrible dictionary implementation written by somebody who has no idea what they are doing 🤣.

davidfowl commented 2 years ago

Is there anything holding this feature up from making progress?

stephentoub commented 2 years ago

Is there anything holding this feature up from making progress?

https://github.com/dotnet/runtime/issues/27229#issuecomment-416742603

And if just for dictionary, a coherent/consistent solution without significant rough edges or performance cliffs.