diff options
author | David Monniaux <david.monniaux@univ-grenoble-alpes.fr> | 2019-03-20 17:32:00 +0100 |
---|---|---|
committer | David Monniaux <david.monniaux@univ-grenoble-alpes.fr> | 2019-03-20 17:33:45 +0100 |
commit | 4c9c95b6a0ac8aa31abb1f7ab48c3f645c059bd6 (patch) | |
tree | 62eaadc788f4426d18974f6c1cbf23b616d43edb /test/monniaux/ocaml/byterun/hash.c | |
parent | f8f393317fcfee9613f09513f21dd0461c503d8c (diff) | |
download | compcert-kvx-4c9c95b6a0ac8aa31abb1f7ab48c3f645c059bd6.tar.gz compcert-kvx-4c9c95b6a0ac8aa31abb1f7ab48c3f645c059bd6.zip |
ocaml byterunner example
Diffstat (limited to 'test/monniaux/ocaml/byterun/hash.c')
-rw-r--r-- | test/monniaux/ocaml/byterun/hash.c | 419 |
1 files changed, 419 insertions, 0 deletions
diff --git a/test/monniaux/ocaml/byterun/hash.c b/test/monniaux/ocaml/byterun/hash.c new file mode 100644 index 00000000..f7d0d222 --- /dev/null +++ b/test/monniaux/ocaml/byterun/hash.c @@ -0,0 +1,419 @@ +/**************************************************************************/ +/* */ +/* OCaml */ +/* */ +/* Xavier Leroy, projet Cristal, INRIA Rocquencourt */ +/* */ +/* Copyright 1996 Institut National de Recherche en Informatique et */ +/* en Automatique. */ +/* */ +/* All rights reserved. This file is distributed under the terms of */ +/* the GNU Lesser General Public License version 2.1, with the */ +/* special exception on linking described in the file LICENSE. */ +/* */ +/**************************************************************************/ + +#define CAML_INTERNALS + +/* The generic hashing primitive */ + +/* The interface of this file is in "mlvalues.h" (for [caml_hash_variant]) + and in "hash.h" (for the other exported functions). */ + +#include "caml/mlvalues.h" +#include "caml/custom.h" +#include "caml/memory.h" +#include "caml/hash.h" + +/* The new implementation, based on MurmurHash 3, + http://code.google.com/p/smhasher/ */ + +#define ROTL32(x,n) ((x) << n | (x) >> (32-n)) + +#define MIX(h,d) \ + d *= 0xcc9e2d51; \ + d = ROTL32(d, 15); \ + d *= 0x1b873593; \ + h ^= d; \ + h = ROTL32(h, 13); \ + h = h * 5 + 0xe6546b64; + +#define FINAL_MIX(h) \ + h ^= h >> 16; \ + h *= 0x85ebca6b; \ + h ^= h >> 13; \ + h *= 0xc2b2ae35; \ + h ^= h >> 16; + +CAMLexport uint32_t caml_hash_mix_uint32(uint32_t h, uint32_t d) +{ + MIX(h, d); + return h; +} + +/* Mix a platform-native integer. */ + +CAMLexport uint32_t caml_hash_mix_intnat(uint32_t h, intnat d) +{ + uint32_t n; +#ifdef ARCH_SIXTYFOUR + /* Mix the low 32 bits and the high 32 bits, in a way that preserves + 32/64 compatibility: we want n = (uint32_t) d + if d is in the range [-2^31, 2^31-1]. */ + n = (d >> 32) ^ (d >> 63) ^ d; + /* If 0 <= d < 2^31: d >> 32 = 0 d >> 63 = 0 + If -2^31 <= d < 0: d >> 32 = -1 d >> 63 = -1 + In both cases, n = (uint32_t) d. */ +#else + n = d; +#endif + MIX(h, n); + return h; +} + +/* Mix a 64-bit integer. */ + +CAMLexport uint32_t caml_hash_mix_int64(uint32_t h, int64_t d) +{ + uint32_t hi = (uint32_t) (d >> 32), lo = (uint32_t) d; + MIX(h, lo); + MIX(h, hi); + return h; +} + +/* Mix a double-precision float. + Treats +0.0 and -0.0 identically. + Treats all NaNs identically. +*/ + +CAMLexport uint32_t caml_hash_mix_double(uint32_t hash, double d) +{ + union { + double d; +#if defined(ARCH_BIG_ENDIAN) || (defined(__arm__) && !defined(__ARM_EABI__)) + struct { uint32_t h; uint32_t l; } i; +#else + struct { uint32_t l; uint32_t h; } i; +#endif + } u; + uint32_t h, l; + /* Convert to two 32-bit halves */ + u.d = d; + h = u.i.h; l = u.i.l; + /* Normalize NaNs */ + if ((h & 0x7FF00000) == 0x7FF00000 && (l | (h & 0xFFFFF)) != 0) { + h = 0x7FF00000; + l = 0x00000001; + } + /* Normalize -0 into +0 */ + else if (h == 0x80000000 && l == 0) { + h = 0; + } + MIX(hash, l); + MIX(hash, h); + return hash; +} + +/* Mix a single-precision float. + Treats +0.0 and -0.0 identically. + Treats all NaNs identically. +*/ + +CAMLexport uint32_t caml_hash_mix_float(uint32_t hash, float d) +{ + union { + float f; + uint32_t i; + } u; + uint32_t n; + /* Convert to int32_t */ + u.f = d; n = u.i; + /* Normalize NaNs */ + if ((n & 0x7F800000) == 0x7F800000 && (n & 0x007FFFFF) != 0) { + n = 0x7F800001; + } + /* Normalize -0 into +0 */ + else if (n == 0x80000000) { + n = 0; + } + MIX(hash, n); + return hash; +} + +/* Mix an OCaml string */ + +CAMLexport uint32_t caml_hash_mix_string(uint32_t h, value s) +{ + mlsize_t len = caml_string_length(s); + mlsize_t i; + uint32_t w; + + /* Mix by 32-bit blocks (little-endian) */ + for (i = 0; i + 4 <= len; i += 4) { +#ifdef ARCH_BIG_ENDIAN + w = Byte_u(s, i) + | (Byte_u(s, i+1) << 8) + | (Byte_u(s, i+2) << 16) + | (Byte_u(s, i+3) << 24); +#else + w = *((uint32_t *) &Byte_u(s, i)); +#endif + MIX(h, w); + } + /* Finish with up to 3 bytes */ + w = 0; + switch (len & 3) { + case 3: w = Byte_u(s, i+2) << 16; /* fallthrough */ + case 2: w |= Byte_u(s, i+1) << 8; /* fallthrough */ + case 1: w |= Byte_u(s, i); + MIX(h, w); + default: /*skip*/; /* len & 3 == 0, no extra bytes, do nothing */ + } + /* Finally, mix in the length. Ignore the upper 32 bits, generally 0. */ + h ^= (uint32_t) len; + return h; +} + +/* Maximal size of the queue used for breadth-first traversal. */ +#define HASH_QUEUE_SIZE 256 +/* Maximal number of Forward_tag links followed in one step */ +#define MAX_FORWARD_DEREFERENCE 1000 + +/* The generic hash function */ + +CAMLprim value caml_hash(value count, value limit, value seed, value obj) +{ + value queue[HASH_QUEUE_SIZE]; /* Queue of values to examine */ + intnat rd; /* Position of first value in queue */ + intnat wr; /* One past position of last value in queue */ + intnat sz; /* Max number of values to put in queue */ + intnat num; /* Max number of meaningful values to see */ + uint32_t h; /* Rolling hash */ + value v; + mlsize_t i, len; + + sz = Long_val(limit); + if (sz < 0 || sz > HASH_QUEUE_SIZE) sz = HASH_QUEUE_SIZE; + num = Long_val(count); + h = Int_val(seed); + queue[0] = obj; rd = 0; wr = 1; + + while (rd < wr && num > 0) { + v = queue[rd++]; + again: + if (Is_long(v)) { + h = caml_hash_mix_intnat(h, v); + num--; + } + else if (Is_in_value_area(v)) { + switch (Tag_val(v)) { + case String_tag: + h = caml_hash_mix_string(h, v); + num--; + break; + case Double_tag: + h = caml_hash_mix_double(h, Double_val(v)); + num--; + break; + case Double_array_tag: + for (i = 0, len = Wosize_val(v) / Double_wosize; i < len; i++) { + h = caml_hash_mix_double(h, Double_flat_field(v, i)); + num--; + if (num <= 0) break; + } + break; + case Abstract_tag: + /* Block contents unknown. Do nothing. */ + break; + case Infix_tag: + /* Mix in the offset to distinguish different functions from + the same mutually-recursive definition */ + h = caml_hash_mix_uint32(h, Infix_offset_val(v)); + v = v - Infix_offset_val(v); + goto again; + case Forward_tag: + /* PR#6361: we can have a loop here, so limit the number of + Forward_tag links being followed */ + for (i = MAX_FORWARD_DEREFERENCE; i > 0; i--) { + v = Forward_val(v); + if (Is_long(v) || !Is_in_value_area(v) || Tag_val(v) != Forward_tag) + goto again; + } + /* Give up on this object and move to the next */ + break; + case Object_tag: + h = caml_hash_mix_intnat(h, Oid_val(v)); + num--; + break; + case Custom_tag: + /* If no hashing function provided, do nothing. */ + /* Only use low 32 bits of custom hash, for 32/64 compatibility */ + if (Custom_ops_val(v)->hash != NULL) { + uint32_t n = (uint32_t) Custom_ops_val(v)->hash(v); + h = caml_hash_mix_uint32(h, n); + num--; + } + break; + default: + /* Mix in the tag and size, but do not count this towards [num] */ + h = caml_hash_mix_uint32(h, Whitehd_hd(Hd_val(v))); + /* Copy fields into queue, not exceeding the total size [sz] */ + for (i = 0, len = Wosize_val(v); i < len; i++) { + if (wr >= sz) break; + queue[wr++] = Field(v, i); + } + break; + } + } else { + /* v is a pointer outside the heap, probably a code pointer. + Shall we count it? Let's say yes by compatibility with old code. */ + h = caml_hash_mix_intnat(h, v); + num--; + } + } + /* Final mixing of bits */ + FINAL_MIX(h); + /* Fold result to the range [0, 2^30-1] so that it is a nonnegative + OCaml integer both on 32 and 64-bit platforms. */ + return Val_int(h & 0x3FFFFFFFU); +} + +/* The old implementation */ + +struct hash_state { + uintnat accu; + intnat univ_limit, univ_count; +}; + +static void hash_aux(struct hash_state*, value obj); + +CAMLprim value caml_hash_univ_param(value count, value limit, value obj) +{ + struct hash_state h; + h.univ_limit = Long_val(limit); + h.univ_count = Long_val(count); + h.accu = 0; + hash_aux(&h, obj); + return Val_long(h.accu & 0x3FFFFFFF); + /* The & has two purposes: ensure that the return value is positive + and give the same result on 32 bit and 64 bit architectures. */ +} + +#define Alpha 65599 +#define Beta 19 +#define Combine(new) (h->accu = h->accu * Alpha + (new)) +#define Combine_small(new) (h->accu = h->accu * Beta + (new)) + +static void hash_aux(struct hash_state* h, value obj) +{ + unsigned char * p; + mlsize_t i, j; + tag_t tag; + + h->univ_limit--; + if (h->univ_count < 0 || h->univ_limit < 0) return; + + again: + if (Is_long(obj)) { + h->univ_count--; + Combine(Long_val(obj)); + return; + } + + /* Pointers into the heap are well-structured blocks. So are atoms. + We can inspect the block contents. */ + + CAMLassert (Is_block (obj)); + if (Is_in_value_area(obj)) { + tag = Tag_val(obj); + switch (tag) { + case String_tag: + h->univ_count--; + i = caml_string_length(obj); + for (p = &Byte_u(obj, 0); i > 0; i--, p++) + Combine_small(*p); + break; + case Double_tag: + /* For doubles, we inspect their binary representation, LSB first. + The results are consistent among all platforms with IEEE floats. */ + h->univ_count--; +#ifdef ARCH_BIG_ENDIAN + for (p = &Byte_u(obj, sizeof(double) - 1), i = sizeof(double); + i > 0; + p--, i--) +#else + for (p = &Byte_u(obj, 0), i = sizeof(double); + i > 0; + p++, i--) +#endif + Combine_small(*p); + break; + case Double_array_tag: + h->univ_count--; + for (j = 0; j < Bosize_val(obj); j += sizeof(double)) { +#ifdef ARCH_BIG_ENDIAN + for (p = &Byte_u(obj, j + sizeof(double) - 1), i = sizeof(double); + i > 0; + p--, i--) +#else + for (p = &Byte_u(obj, j), i = sizeof(double); + i > 0; + p++, i--) +#endif + Combine_small(*p); + } + break; + case Abstract_tag: + /* We don't know anything about the contents of the block. + Better do nothing. */ + break; + case Infix_tag: + hash_aux(h, obj - Infix_offset_val(obj)); + break; + case Forward_tag: + obj = Forward_val (obj); + goto again; + case Object_tag: + h->univ_count--; + Combine(Oid_val(obj)); + break; + case Custom_tag: + /* If no hashing function provided, do nothing */ + if (Custom_ops_val(obj)->hash != NULL) { + h->univ_count--; + Combine(Custom_ops_val(obj)->hash(obj)); + } + break; + default: + h->univ_count--; + Combine_small(tag); + i = Wosize_val(obj); + while (i != 0) { + i--; + hash_aux(h, Field(obj, i)); + } + break; + } + return; + } + + /* Otherwise, obj is a pointer outside the heap, to an object with + a priori unknown structure. Use its physical address as hash key. */ + Combine((intnat) obj); +} + +/* Hashing variant tags */ + +CAMLexport value caml_hash_variant(char const * tag) +{ + value accu; + /* Same hashing algorithm as in ../typing/btype.ml, function hash_variant */ + for (accu = Val_int(0); *tag != 0; tag++) + accu = Val_int(223 * Int_val(accu) + *((unsigned char *) tag)); +#ifdef ARCH_SIXTYFOUR + accu = accu & Val_long(0x7FFFFFFFL); +#endif + /* Force sign extension of bit 31 for compatibility between 32 and 64-bit + platforms */ + return (int32_t) accu; +} |