From 2ec5b3fb2ccb0120be641e077089f3da5e53d8a3 Mon Sep 17 00:00:00 2001 From: xleroy Date: Sun, 17 Sep 2006 12:04:56 +0000 Subject: Davantage de tests git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@104 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- test/c/knucleotide.c | 369 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 369 insertions(+) create mode 100644 test/c/knucleotide.c (limited to 'test/c/knucleotide.c') diff --git a/test/c/knucleotide.c b/test/c/knucleotide.c new file mode 100644 index 00000000..f7438926 --- /dev/null +++ b/test/c/knucleotide.c @@ -0,0 +1,369 @@ +/* The Computer Language Shootout + http://shootout.alioth.debian.org/ + + Contributed by Josh Goldfoot + to compile, use gcc -O3 + + This revision uses "simple_hash.h," available from + http://cvs.alioth.debian.org/cgi-bin/cvsweb.cgi/shootout/bench/Include/?cvsroot=shootout + +*/ +#include +#include +#include +#include + +enum { ht_num_primes = 28 }; + +static unsigned long ht_prime_list[ht_num_primes] = { + 53ul, 97ul, 193ul, 389ul, 769ul, + 1543ul, 3079ul, 6151ul, 12289ul, 24593ul, + 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, + 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, + 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul, + 1610612741ul, 3221225473ul, 4294967291ul +}; + +struct ht_node { + char *key; + int val; + struct ht_node *next; +}; + +struct ht_ht { + int size; + struct ht_node **tbl; + int iter_index; + struct ht_node *iter_next; + int items; +#ifdef HT_DEBUG + int collisions; +#endif /* HT_DEBUG */ +}; + +/*inline*/ int ht_val(struct ht_node *node) { + return(node->val); +} + +/*inline*/ char *ht_key(struct ht_node *node) { + return(node->key); +} + +/*inline*/ int ht_hashcode(struct ht_ht *ht, char *key) { + unsigned long val = 0; + for (; *key; ++key) val = 5 * val + *key; + return(val % ht->size); +} + +struct ht_node *ht_node_create(char *key) { + char *newkey; + struct ht_node *node; + if ((node = (struct ht_node *)malloc(sizeof(struct ht_node))) == 0) { + perror("malloc ht_node"); + exit(1); + } + if ((newkey = (char *)strdup(key)) == 0) { + perror("strdup newkey"); + exit(1); + } + node->key = newkey; + node->val = 0; + node->next = (struct ht_node *)NULL; + return(node); +} + +struct ht_ht *ht_create(int size) { + int i = 0; + struct ht_ht *ht = (struct ht_ht *)malloc(sizeof(struct ht_ht)); + while (ht_prime_list[i] < size) { i++; } + ht->size = ht_prime_list[i]; + ht->tbl = (struct ht_node **)calloc(ht->size, sizeof(struct ht_node *)); + ht->iter_index = 0; + ht->iter_next = 0; + ht->items = 0; +#ifdef HT_DEBUG + ht->collisions = 0; +#endif /* HT_DEBUG */ + return(ht); +} + +void ht_destroy(struct ht_ht *ht) { + struct ht_node *cur, *next; + int i; +#ifdef HT_DEBUG + int chain_len; + int max_chain_len = 0; + int density = 0; + fprintf(stderr, " HT: size %d\n", ht->size); + fprintf(stderr, " HT: items %d\n", ht->items); + fprintf(stderr, " HT: collisions %d\n", ht->collisions); +#endif /* HT_DEBUG */ + for (i=0; isize; i++) { + next = ht->tbl[i]; +#ifdef HT_DEBUG + if (next) { + density++; + } + chain_len = 0; +#endif /* HT_DEBUG */ + while (next) { + cur = next; + next = next->next; + free(cur->key); + free(cur); +#ifdef HT_DEBUG + chain_len++; +#endif /* HT_DEBUG */ + } +#ifdef HT_DEBUG + if (chain_len > max_chain_len) + max_chain_len = chain_len; +#endif /* HT_DEBUG */ + } + free(ht->tbl); + free(ht); +#ifdef HT_DEBUG + fprintf(stderr, " HT: density %d\n", density); + fprintf(stderr, " HT: max chain len %d\n", max_chain_len); +#endif /* HT_DEBUG */ +} + +/*inline*/ struct ht_node *ht_find(struct ht_ht *ht, char *key) { + int hash_code = ht_hashcode(ht, key); + struct ht_node *node = ht->tbl[hash_code]; + while (node) { + if (strcmp(key, node->key) == 0) return(node); + node = node->next; + } + return((struct ht_node *)NULL); +} + +/*inline*/ struct ht_node *ht_find_new(struct ht_ht *ht, char *key) { + int hash_code = ht_hashcode(ht, key); + struct ht_node *prev = 0, *node = ht->tbl[hash_code]; + while (node) { + if (strcmp(key, node->key) == 0) return(node); + prev = node; + node = node->next; +#ifdef HT_DEBUG + ht->collisions++; +#endif /* HT_DEBUG */ + } + ht->items++; + if (prev) { + return(prev->next = ht_node_create(key)); + } else { + return(ht->tbl[hash_code] = ht_node_create(key)); + } +} + +/* + * Hash Table iterator data/functions + */ +/*inline*/ struct ht_node *ht_next(struct ht_ht *ht) { + unsigned long index; + struct ht_node *node = ht->iter_next; + if (node) { + ht->iter_next = node->next; + return(node); + } else { + while (ht->iter_index < ht->size) { + index = ht->iter_index++; + if (ht->tbl[index]) { + ht->iter_next = ht->tbl[index]->next; + return(ht->tbl[index]); + } + } + } + return((struct ht_node *)NULL); +} + +/*inline*/ struct ht_node *ht_first(struct ht_ht *ht) { + ht->iter_index = 0; + ht->iter_next = (struct ht_node *)NULL; + return(ht_next(ht)); +} + +/*inline*/ int ht_count(struct ht_ht *ht) { + return(ht->items); +} + +long +hash_table_size (int fl, long buflen) +{ + long maxsize1, maxsize2; + + maxsize1 = buflen - fl; + maxsize2 = 4; + while (--fl > 0 && maxsize2 < maxsize1) + maxsize2 = maxsize2 * 4; + if (maxsize1 < maxsize2) + return maxsize1; + return maxsize2; +} + +struct ht_ht * +generate_frequencies (int fl, char *buffer, long buflen) +{ + struct ht_ht *ht; + char *reader; + long i; + char nulled; + + if (fl > buflen) + return NULL; + + ht = ht_create (hash_table_size (fl, buflen)); + for (i = 0; i < buflen - fl + 1; i++) + { + reader = &(buffer[i]); + nulled = reader[fl]; + reader[fl] = 0x00; + ht_find_new (ht, reader)->val++; + reader[fl] = nulled; + } + return ht; +} + +typedef struct ssorter +{ + char *string; + int num; +} sorter; + +void +write_frequencies (int fl, char *buffer, long buflen) +{ + struct ht_ht *ht; + long total, i, j, size; + struct ht_node *nd; + sorter *s; + sorter tmp; + + ht = generate_frequencies (fl, buffer, buflen); + total = 0; + size = 0; + for (nd = ht_first (ht); nd != NULL; nd = ht_next (ht)) + { + total = total + nd->val; + size++; + } + s = calloc (size, sizeof (sorter)); + i = 0; + for (nd = ht_first (ht); nd != NULL; nd = ht_next (ht)) + { + s[i].string = nd->key; + s[i++].num = nd->val; + } + for (i = 0; i < size - 1; i++) + for (j = i + 1; j < size; j++) + if (s[i].num < s[j].num) + { + memcpy (&tmp, &(s[i]), sizeof (sorter)); + memcpy (&(s[i]), &(s[j]), sizeof (sorter)); + memcpy (&(s[j]), &tmp, sizeof (sorter)); + } + for (i = 0; i < size; i++) + printf ("%s %.3f\n", s[i].string, 100 * (float) s[i].num / total); + printf ("\n"); + ht_destroy (ht); + free (s); +} + +void +write_count (char *searchFor, char *buffer, long buflen) +{ + struct ht_ht *ht; + + ht = generate_frequencies (strlen (searchFor), buffer, buflen); + printf ("%d\t%s\n", ht_find_new (ht, searchFor)->val, searchFor); + ht_destroy (ht); +} + +int +main () +{ + char c; + char *line, *buffer, *tmp, *x; + int i, linelen, nothree; + long buflen, seqlen; + FILE * f; + + line = malloc (256); + if (!line) + return 2; + seqlen = 0; + nothree = 1; + + f = fopen("Results/knucleotide-input.txt", "r"); + if (f == NULL) return 2; + + while (nothree && fgets (line, 255, f)) + if (line[0] == '>' && line[1] == 'T' && line[2] == 'H') + nothree = 0; + free (line); + + buflen = 10240; + buffer = malloc (buflen + 1); + if (!buffer) + return 2; + x = buffer; + + while (fgets (x, 255, f)) + { + linelen = strlen (x); + if (linelen) + { + if (x[linelen - 1] == '\n') + linelen--; + c = x[0]; + if (c == '>') + break; + else if (c != ';') + { + seqlen = seqlen + linelen; + if (seqlen + 512 >= buflen) + { + buflen = buflen + 10240; + tmp = realloc (buffer, buflen + 1); + if (tmp == NULL) + return 2; + buffer = tmp; + x = &(buffer[seqlen]); + } + else + x = &(x[linelen]); + x[0] = 0; + } + } + } + for (i = 0; i < seqlen; i++) + buffer[i] = toupper (buffer[i]); + write_frequencies (1, buffer, seqlen); + write_frequencies (2, buffer, seqlen); + write_count ("GGT", buffer, seqlen); + write_count ("GGTA", buffer, seqlen); + write_count ("GGTATT", buffer, seqlen); + write_count ("GGTATTTTAATT", buffer, seqlen); + write_count ("GGTATTTTAATTTATAGT", buffer, seqlen); + free (buffer); + fclose (f); + return 0; +} + +/********** + build & benchmark results + +BUILD COMMANDS FOR: knucleotide.gcc + +Fri Sep 15 13:56:07 PDT 2006 + +/usr/bin/gcc -pipe -Wall -O3 -fomit-frame-pointer -funroll-loops -march=pentium4 knucleotide.c -o knucleotide.gcc_run + +================================================================= +COMMAND LINE (%A is single numeric argument): + +knucleotide.gcc_run %A +N=2500 + +*******/ -- cgit