Open mijicd opened 4 years ago
cc @alexknvl
It would definitely allow defining instances for a greater variety of "industrial strength" hashing algorithms.
However, it would also move us a bit farther from the Scala standard library. We could still create a Hash
from any hashCode
definition by widening an Int
to a Long
. But it would mean that if users wanted to use any of our Hash
instances in a context that expected a hashCode
they would have to compress the Long
to an Int
(maybe we could provide a default reasonable way to do this).
How about
/**
* A type class used to represent a hashing scheme for objects of a given type.
*/
trait Hashable[@sp A] extends Any with Serializable {
def hash[Z](value: A)(implicit Z: Hash[Z]): Z =
hashWithSeed[Z](Z.empty, value)
def hashWithSeed[Z](seed: Z, value: A)(implicit hash: Hash[Z]): Z
}
object Hashable {
/**
* Summon a `Hash` instance given the specific type.
*/
@inline final def apply[A](implicit A: Hashable[A]): Hashable[A] = A
def hash[Z: Hash, A: Hashable](z: Z, a: A): Z =
Hashable[A].hashWithSeed[Z](z, a)
def hash[Z: Hash, A: Hashable, B: Hashable](z0: Z, a: A, b: B): Z = {
val z1 = Hashable[A].hashWithSeed(z0, a)
val z2 = Hashable[B].hashWithSeed(z1, b)
z2
}
def hash[Z: Hash, A: Hashable, B: Hashable, C: Hashable](z0: Z, a: A, b: B, c: C): Z = {
val z1 = Hashable[A].hashWithSeed(z0, a)
val z2 = Hashable[B].hashWithSeed(z1, b)
val z3 = Hashable[C].hashWithSeed(z2, c)
z3
}
implicit val int: Hashable[Int] = new Hashable[Int] {
def hashWithSeed[Z](z: Z, a: Int)(implicit Z: Hash[Z]): Z =
Z.int(z, a)
}
implicit val bigint: Hashable[BigInt] = new Hashable[BigInt] {
def hashWithSeed[Z](z: Z, a: BigInt)(implicit Z: Hash[Z]): Z =
Z.bytes(z, a.toByteArray)
}
implicit def list[A](implicit A: Hashable[A]): Hashable[List[A]] =
new Hashable[List[A]] {
def hashWithSeed[Z](z: Z, list: List[A])(implicit Z: Hash[Z]): Z =
list.foldRight(Z.long(z, 0x8c6b486a6a044063L))((a, z) => A.hashWithSeed(z, a))
}
implicit def set[A](implicit A: Hashable[A], ord: Order[A]): Hashable[Set[A]] =
new Hashable[Set[A]] {
def hashWithSeed[Z](z: Z, set: Set[A])(implicit Z: Hash[Z]): Z = {
val list = set.toList.sortWith(ord.lt)
list.foldRight(Z.long(z, 0x9fcda00fb8f98458L))((a, z) => A.hashWithSeed(z, a))
}
}
}
trait Hash[Z] extends Any with Serializable {
def empty: Z
def bytes(z: Z, bytes: Array[Byte]): Z
def byte(z: Z, x: Byte): Z =
bytes(z, Array(x))
def short(z: Z, x: Short): Z =
bytes(z, Array((x >>> 8).toByte, (x & 0xFF).toByte))
def int(z: Z, x: Int): Z =
bytes(z, Array(
(x >>> 24).toByte,
((x >>> 16) & 0xFF).toByte,
((x >>> 8) & 0xFF).toByte,
(x & 0xFF).toByte))
def long(z: Z, x: Long): Z = bytes(z, Array(
((x >>> 56) & 0xFF).toByte,
((x >>> 48) & 0xFF).toByte,
((x >>> 40) & 0xFF).toByte,
((x >>> 32) & 0xFF).toByte,
((x >>> 24) & 0xFF).toByte,
((x >>> 16) & 0xFF).toByte,
((x >>> 8) & 0xFF).toByte,
(x & 0xFF).toByte))
def string(z: Z, x: String): Z =
bytes(z, x.getBytes("UTF-8"))
}
object Hash {
/**
* Summon a `Hash` instance given the specific type.
*/
@inline final def apply[A](implicit A: Hash[A]): Hash[A] = A
}
The only problem is that type-specific versions of hashCode
could be faster in principle.
The current version of
Hash
is able to produce 32-bit hashes, and it has somewhat of a bias toward thehashCode
s. However, this makes it unusable for larger hashes as Alex pointed out here.That said, I'd like to hear the opinions for and against this change. Cheers :beers: