aboutsummaryrefslogtreecommitdiffstats
path: root/test/monniaux/ocaml/byterun/hash.c
diff options
context:
space:
mode:
Diffstat (limited to 'test/monniaux/ocaml/byterun/hash.c')
-rw-r--r--test/monniaux/ocaml/byterun/hash.c419
1 files changed, 419 insertions, 0 deletions
diff --git a/test/monniaux/ocaml/byterun/hash.c b/test/monniaux/ocaml/byterun/hash.c
new file mode 100644
index 00000000..f7d0d222
--- /dev/null
+++ b/test/monniaux/ocaml/byterun/hash.c
@@ -0,0 +1,419 @@
+/**************************************************************************/
+/* */
+/* OCaml */
+/* */
+/* Xavier Leroy, projet Cristal, INRIA Rocquencourt */
+/* */
+/* Copyright 1996 Institut National de Recherche en Informatique et */
+/* en Automatique. */
+/* */
+/* All rights reserved. This file is distributed under the terms of */
+/* the GNU Lesser General Public License version 2.1, with the */
+/* special exception on linking described in the file LICENSE. */
+/* */
+/**************************************************************************/
+
+#define CAML_INTERNALS
+
+/* The generic hashing primitive */
+
+/* The interface of this file is in "mlvalues.h" (for [caml_hash_variant])
+ and in "hash.h" (for the other exported functions). */
+
+#include "caml/mlvalues.h"
+#include "caml/custom.h"
+#include "caml/memory.h"
+#include "caml/hash.h"
+
+/* The new implementation, based on MurmurHash 3,
+ http://code.google.com/p/smhasher/ */
+
+#define ROTL32(x,n) ((x) << n | (x) >> (32-n))
+
+#define MIX(h,d) \
+ d *= 0xcc9e2d51; \
+ d = ROTL32(d, 15); \
+ d *= 0x1b873593; \
+ h ^= d; \
+ h = ROTL32(h, 13); \
+ h = h * 5 + 0xe6546b64;
+
+#define FINAL_MIX(h) \
+ h ^= h >> 16; \
+ h *= 0x85ebca6b; \
+ h ^= h >> 13; \
+ h *= 0xc2b2ae35; \
+ h ^= h >> 16;
+
+CAMLexport uint32_t caml_hash_mix_uint32(uint32_t h, uint32_t d)
+{
+ MIX(h, d);
+ return h;
+}
+
+/* Mix a platform-native integer. */
+
+CAMLexport uint32_t caml_hash_mix_intnat(uint32_t h, intnat d)
+{
+ uint32_t n;
+#ifdef ARCH_SIXTYFOUR
+ /* Mix the low 32 bits and the high 32 bits, in a way that preserves
+ 32/64 compatibility: we want n = (uint32_t) d
+ if d is in the range [-2^31, 2^31-1]. */
+ n = (d >> 32) ^ (d >> 63) ^ d;
+ /* If 0 <= d < 2^31: d >> 32 = 0 d >> 63 = 0
+ If -2^31 <= d < 0: d >> 32 = -1 d >> 63 = -1
+ In both cases, n = (uint32_t) d. */
+#else
+ n = d;
+#endif
+ MIX(h, n);
+ return h;
+}
+
+/* Mix a 64-bit integer. */
+
+CAMLexport uint32_t caml_hash_mix_int64(uint32_t h, int64_t d)
+{
+ uint32_t hi = (uint32_t) (d >> 32), lo = (uint32_t) d;
+ MIX(h, lo);
+ MIX(h, hi);
+ return h;
+}
+
+/* Mix a double-precision float.
+ Treats +0.0 and -0.0 identically.
+ Treats all NaNs identically.
+*/
+
+CAMLexport uint32_t caml_hash_mix_double(uint32_t hash, double d)
+{
+ union {
+ double d;
+#if defined(ARCH_BIG_ENDIAN) || (defined(__arm__) && !defined(__ARM_EABI__))
+ struct { uint32_t h; uint32_t l; } i;
+#else
+ struct { uint32_t l; uint32_t h; } i;
+#endif
+ } u;
+ uint32_t h, l;
+ /* Convert to two 32-bit halves */
+ u.d = d;
+ h = u.i.h; l = u.i.l;
+ /* Normalize NaNs */
+ if ((h & 0x7FF00000) == 0x7FF00000 && (l | (h & 0xFFFFF)) != 0) {
+ h = 0x7FF00000;
+ l = 0x00000001;
+ }
+ /* Normalize -0 into +0 */
+ else if (h == 0x80000000 && l == 0) {
+ h = 0;
+ }
+ MIX(hash, l);
+ MIX(hash, h);
+ return hash;
+}
+
+/* Mix a single-precision float.
+ Treats +0.0 and -0.0 identically.
+ Treats all NaNs identically.
+*/
+
+CAMLexport uint32_t caml_hash_mix_float(uint32_t hash, float d)
+{
+ union {
+ float f;
+ uint32_t i;
+ } u;
+ uint32_t n;
+ /* Convert to int32_t */
+ u.f = d; n = u.i;
+ /* Normalize NaNs */
+ if ((n & 0x7F800000) == 0x7F800000 && (n & 0x007FFFFF) != 0) {
+ n = 0x7F800001;
+ }
+ /* Normalize -0 into +0 */
+ else if (n == 0x80000000) {
+ n = 0;
+ }
+ MIX(hash, n);
+ return hash;
+}
+
+/* Mix an OCaml string */
+
+CAMLexport uint32_t caml_hash_mix_string(uint32_t h, value s)
+{
+ mlsize_t len = caml_string_length(s);
+ mlsize_t i;
+ uint32_t w;
+
+ /* Mix by 32-bit blocks (little-endian) */
+ for (i = 0; i + 4 <= len; i += 4) {
+#ifdef ARCH_BIG_ENDIAN
+ w = Byte_u(s, i)
+ | (Byte_u(s, i+1) << 8)
+ | (Byte_u(s, i+2) << 16)
+ | (Byte_u(s, i+3) << 24);
+#else
+ w = *((uint32_t *) &Byte_u(s, i));
+#endif
+ MIX(h, w);
+ }
+ /* Finish with up to 3 bytes */
+ w = 0;
+ switch (len & 3) {
+ case 3: w = Byte_u(s, i+2) << 16; /* fallthrough */
+ case 2: w |= Byte_u(s, i+1) << 8; /* fallthrough */
+ case 1: w |= Byte_u(s, i);
+ MIX(h, w);
+ default: /*skip*/; /* len & 3 == 0, no extra bytes, do nothing */
+ }
+ /* Finally, mix in the length. Ignore the upper 32 bits, generally 0. */
+ h ^= (uint32_t) len;
+ return h;
+}
+
+/* Maximal size of the queue used for breadth-first traversal. */
+#define HASH_QUEUE_SIZE 256
+/* Maximal number of Forward_tag links followed in one step */
+#define MAX_FORWARD_DEREFERENCE 1000
+
+/* The generic hash function */
+
+CAMLprim value caml_hash(value count, value limit, value seed, value obj)
+{
+ value queue[HASH_QUEUE_SIZE]; /* Queue of values to examine */
+ intnat rd; /* Position of first value in queue */
+ intnat wr; /* One past position of last value in queue */
+ intnat sz; /* Max number of values to put in queue */
+ intnat num; /* Max number of meaningful values to see */
+ uint32_t h; /* Rolling hash */
+ value v;
+ mlsize_t i, len;
+
+ sz = Long_val(limit);
+ if (sz < 0 || sz > HASH_QUEUE_SIZE) sz = HASH_QUEUE_SIZE;
+ num = Long_val(count);
+ h = Int_val(seed);
+ queue[0] = obj; rd = 0; wr = 1;
+
+ while (rd < wr && num > 0) {
+ v = queue[rd++];
+ again:
+ if (Is_long(v)) {
+ h = caml_hash_mix_intnat(h, v);
+ num--;
+ }
+ else if (Is_in_value_area(v)) {
+ switch (Tag_val(v)) {
+ case String_tag:
+ h = caml_hash_mix_string(h, v);
+ num--;
+ break;
+ case Double_tag:
+ h = caml_hash_mix_double(h, Double_val(v));
+ num--;
+ break;
+ case Double_array_tag:
+ for (i = 0, len = Wosize_val(v) / Double_wosize; i < len; i++) {
+ h = caml_hash_mix_double(h, Double_flat_field(v, i));
+ num--;
+ if (num <= 0) break;
+ }
+ break;
+ case Abstract_tag:
+ /* Block contents unknown. Do nothing. */
+ break;
+ case Infix_tag:
+ /* Mix in the offset to distinguish different functions from
+ the same mutually-recursive definition */
+ h = caml_hash_mix_uint32(h, Infix_offset_val(v));
+ v = v - Infix_offset_val(v);
+ goto again;
+ case Forward_tag:
+ /* PR#6361: we can have a loop here, so limit the number of
+ Forward_tag links being followed */
+ for (i = MAX_FORWARD_DEREFERENCE; i > 0; i--) {
+ v = Forward_val(v);
+ if (Is_long(v) || !Is_in_value_area(v) || Tag_val(v) != Forward_tag)
+ goto again;
+ }
+ /* Give up on this object and move to the next */
+ break;
+ case Object_tag:
+ h = caml_hash_mix_intnat(h, Oid_val(v));
+ num--;
+ break;
+ case Custom_tag:
+ /* If no hashing function provided, do nothing. */
+ /* Only use low 32 bits of custom hash, for 32/64 compatibility */
+ if (Custom_ops_val(v)->hash != NULL) {
+ uint32_t n = (uint32_t) Custom_ops_val(v)->hash(v);
+ h = caml_hash_mix_uint32(h, n);
+ num--;
+ }
+ break;
+ default:
+ /* Mix in the tag and size, but do not count this towards [num] */
+ h = caml_hash_mix_uint32(h, Whitehd_hd(Hd_val(v)));
+ /* Copy fields into queue, not exceeding the total size [sz] */
+ for (i = 0, len = Wosize_val(v); i < len; i++) {
+ if (wr >= sz) break;
+ queue[wr++] = Field(v, i);
+ }
+ break;
+ }
+ } else {
+ /* v is a pointer outside the heap, probably a code pointer.
+ Shall we count it? Let's say yes by compatibility with old code. */
+ h = caml_hash_mix_intnat(h, v);
+ num--;
+ }
+ }
+ /* Final mixing of bits */
+ FINAL_MIX(h);
+ /* Fold result to the range [0, 2^30-1] so that it is a nonnegative
+ OCaml integer both on 32 and 64-bit platforms. */
+ return Val_int(h & 0x3FFFFFFFU);
+}
+
+/* The old implementation */
+
+struct hash_state {
+ uintnat accu;
+ intnat univ_limit, univ_count;
+};
+
+static void hash_aux(struct hash_state*, value obj);
+
+CAMLprim value caml_hash_univ_param(value count, value limit, value obj)
+{
+ struct hash_state h;
+ h.univ_limit = Long_val(limit);
+ h.univ_count = Long_val(count);
+ h.accu = 0;
+ hash_aux(&h, obj);
+ return Val_long(h.accu & 0x3FFFFFFF);
+ /* The & has two purposes: ensure that the return value is positive
+ and give the same result on 32 bit and 64 bit architectures. */
+}
+
+#define Alpha 65599
+#define Beta 19
+#define Combine(new) (h->accu = h->accu * Alpha + (new))
+#define Combine_small(new) (h->accu = h->accu * Beta + (new))
+
+static void hash_aux(struct hash_state* h, value obj)
+{
+ unsigned char * p;
+ mlsize_t i, j;
+ tag_t tag;
+
+ h->univ_limit--;
+ if (h->univ_count < 0 || h->univ_limit < 0) return;
+
+ again:
+ if (Is_long(obj)) {
+ h->univ_count--;
+ Combine(Long_val(obj));
+ return;
+ }
+
+ /* Pointers into the heap are well-structured blocks. So are atoms.
+ We can inspect the block contents. */
+
+ CAMLassert (Is_block (obj));
+ if (Is_in_value_area(obj)) {
+ tag = Tag_val(obj);
+ switch (tag) {
+ case String_tag:
+ h->univ_count--;
+ i = caml_string_length(obj);
+ for (p = &Byte_u(obj, 0); i > 0; i--, p++)
+ Combine_small(*p);
+ break;
+ case Double_tag:
+ /* For doubles, we inspect their binary representation, LSB first.
+ The results are consistent among all platforms with IEEE floats. */
+ h->univ_count--;
+#ifdef ARCH_BIG_ENDIAN
+ for (p = &Byte_u(obj, sizeof(double) - 1), i = sizeof(double);
+ i > 0;
+ p--, i--)
+#else
+ for (p = &Byte_u(obj, 0), i = sizeof(double);
+ i > 0;
+ p++, i--)
+#endif
+ Combine_small(*p);
+ break;
+ case Double_array_tag:
+ h->univ_count--;
+ for (j = 0; j < Bosize_val(obj); j += sizeof(double)) {
+#ifdef ARCH_BIG_ENDIAN
+ for (p = &Byte_u(obj, j + sizeof(double) - 1), i = sizeof(double);
+ i > 0;
+ p--, i--)
+#else
+ for (p = &Byte_u(obj, j), i = sizeof(double);
+ i > 0;
+ p++, i--)
+#endif
+ Combine_small(*p);
+ }
+ break;
+ case Abstract_tag:
+ /* We don't know anything about the contents of the block.
+ Better do nothing. */
+ break;
+ case Infix_tag:
+ hash_aux(h, obj - Infix_offset_val(obj));
+ break;
+ case Forward_tag:
+ obj = Forward_val (obj);
+ goto again;
+ case Object_tag:
+ h->univ_count--;
+ Combine(Oid_val(obj));
+ break;
+ case Custom_tag:
+ /* If no hashing function provided, do nothing */
+ if (Custom_ops_val(obj)->hash != NULL) {
+ h->univ_count--;
+ Combine(Custom_ops_val(obj)->hash(obj));
+ }
+ break;
+ default:
+ h->univ_count--;
+ Combine_small(tag);
+ i = Wosize_val(obj);
+ while (i != 0) {
+ i--;
+ hash_aux(h, Field(obj, i));
+ }
+ break;
+ }
+ return;
+ }
+
+ /* Otherwise, obj is a pointer outside the heap, to an object with
+ a priori unknown structure. Use its physical address as hash key. */
+ Combine((intnat) obj);
+}
+
+/* Hashing variant tags */
+
+CAMLexport value caml_hash_variant(char const * tag)
+{
+ value accu;
+ /* Same hashing algorithm as in ../typing/btype.ml, function hash_variant */
+ for (accu = Val_int(0); *tag != 0; tag++)
+ accu = Val_int(223 * Int_val(accu) + *((unsigned char *) tag));
+#ifdef ARCH_SIXTYFOUR
+ accu = accu & Val_long(0x7FFFFFFFL);
+#endif
+ /* Force sign extension of bit 31 for compatibility between 32 and 64-bit
+ platforms */
+ return (int32_t) accu;
+}