From cb1d47636f9bb9adbdfb3f18982417b746b2bf40 Mon Sep 17 00:00:00 2001 From: Jianyi Cheng Date: Fri, 12 Jun 2020 13:02:33 +0100 Subject: Adding benchmarks to branch_jc --- benchmarks/kmeans/lloyds_algorithm_top.cpp | 326 ++++++++++++++++++++++++++++ benchmarks/kmeans/lloyds_algorithm_top.h | 112 ++++++++++ benchmarks/kmeans/lloyds_algorithm_util.cpp | 258 ++++++++++++++++++++++ benchmarks/kmeans/lloyds_algorithm_util.h | 33 +++ 4 files changed, 729 insertions(+) create mode 100644 benchmarks/kmeans/lloyds_algorithm_top.cpp create mode 100644 benchmarks/kmeans/lloyds_algorithm_top.h create mode 100644 benchmarks/kmeans/lloyds_algorithm_util.cpp create mode 100644 benchmarks/kmeans/lloyds_algorithm_util.h (limited to 'benchmarks/kmeans') diff --git a/benchmarks/kmeans/lloyds_algorithm_top.cpp b/benchmarks/kmeans/lloyds_algorithm_top.cpp new file mode 100644 index 0000000..f2db1ce --- /dev/null +++ b/benchmarks/kmeans/lloyds_algorithm_top.cpp @@ -0,0 +1,326 @@ +// source: https://github.com/FelixWinterstein/Vivado-KMeans/tree/b1121f826bdac8db9502e4bf0c8f3b08425bc061/lloyds_algorithm_HLS/source + +/********************************************************************** +* Felix Winterstein, Imperial College London +* +* File: lloyds_algorithm_top.cpp +* +* Revision 1.01 +* Additional Comments: distributed under a BSD license, see LICENSE.txt +* +**********************************************************************/ + +#include "lloyds_algorithm_top.h" +#include "lloyds_algorithm_util.h" + + +// global array for the data (keep it local to this file) +data_type data_int_memory[N]; +data_type centre_positions[K*P]; +centre_type centre_buffer[K*P]; + + +// top-level function of the design +void lloyds_algorithm_top( volatile data_type *data, + volatile data_type *cntr_pos_init, + node_pointer n, + centre_index_type k, + volatile coord_type_ext *distortion_out, + volatile data_type *clusters_out) +{ + // set the interface properties + #pragma HLS interface ap_none register port=n + #pragma HLS interface ap_none register port=k + #pragma HLS interface ap_fifo port=data depth=256 + + #pragma HLS interface ap_fifo port=cntr_pos_init depth=256 + #pragma HLS interface ap_fifo port=distortion_out depth=256 + #pragma HLS interface ap_fifo port=clusters_out depth=256 + + /* + #pragma HLS data_pack variable=data + #pragma HLS data_pack variable=cntr_pos_init + #pragma HLS data_pack variable=clusters_out + */ + #pragma HLS data_pack variable=data_int_memory + #pragma HLS data_pack variable=centre_positions + #pragma HLS data_pack variable=centre_buffer + + // specify the type of memory instantiated for these arrays: two-port block ram + #pragma HLS resource variable=data_int_memory core=RAM_2P_BRAM + #pragma HLS resource variable=centre_positions core=RAM_2P_BRAM + #pragma HLS resource variable=centre_buffer core=RAM_2P_LUTRAM + + // partition the arrays according to the parallelism degree P + // NOTE: the part. factor must be updated if P is changed (in lloyds_alogrithm_top.h) ! + #pragma HLS array_partition variable=centre_buffer block factor=40 dim=1 + #pragma HLS array_partition variable=centre_positions block factor=40 dim=1 + + init_node_memory(data,n); + + centre_type filt_centres_out[K]; + data_type new_centre_positions[K]; + // more struct-packing + #pragma HLS data_pack variable=filt_centres_out + #pragma HLS data_pack variable=filt_centres_out + + // iterate over a constant number of outer clustering iterations + it_loop: for (uint l=0; l=n-P+1) { + //if (i>=n) { + break; + } + } + + + // readout centres + read_out_centres_loop: for(centre_index_type i=0; i<=k; i++) { + #pragma HLS pipeline II=1 + + coord_type_ext arr_count[P]; + coord_type_ext arr_sum_sq[P]; + coord_type_vector_ext arr_wgtCent[P]; + #pragma HLS array_partition variable=arr_count complete + #pragma HLS array_partition variable=arr_sum_sq complete + #pragma HLS array_partition variable=arr_wgtCent complete + + for (uint p=0; p +#include "ap_int.h" // custom data types + +#define D 3 // data dimensionality +#define N 32768 // max. number of data points +#define K 256 // max. number of centres +#define L 6 // max. number of iterations +#define P 40 // parallelisation degree + +#define COORD_BITWIDTH 16 +#define COORD_BITWITDH_EXT 32 +#define NODE_POINTER_BITWIDTH 15 // log2(N) +#define CNTR_INDEX_BITWIDTH 8 // log2(K) + +// pointer types to tree nodes and centre lists +typedef ap_uint node_pointer; +typedef ap_uint centre_index_type; + +// force register insertion in the generated RTL for some signals +#define FORCE_REGISTERS +// ... used for saturation +#define MAX_FIXED_POINT_VAL_EXT (1<<(COORD_BITWITDH_EXT-1))-1 + +typedef unsigned int uint; +typedef ap_int coord_type; +typedef ap_int coord_type_vector; +typedef ap_int coord_type_ext; +typedef ap_int coord_type_vector_ext; + +//bit width definitions for multiplications +#define MUL_INTEGER_BITS 12 +#define MUL_FRACTIONAL_BITS 6 +#define MUL_MAX_VAL (1<<(MUL_INTEGER_BITS+MUL_FRACTIONAL_BITS-1))-1 +#define MUL_MIN_VAL -1*(1<<(MUL_INTEGER_BITS+MUL_FRACTIONAL_BITS-1)) +typedef ap_int mul_input_type; + +// this should be always 1 +#define FILE_INDEX 1 + +// data point types +struct data_type { + //coord_type value[D]; + coord_type_vector value; + data_type& operator=(const data_type& a); + data_type& operator=(const volatile data_type& a); +}; + + +// data point types ext +struct data_type_ext { + coord_type_vector_ext value; + data_type_ext& operator=(const data_type_ext& a); +}; + + +// centre types +struct centre_type { + data_type_ext wgtCent; // sum of all points assigned to this centre + coord_type_ext sum_sq; // sum of norm of all points assigned to this centre + coord_type count; + centre_type& operator=(const centre_type& a); +}; +typedef centre_type* centre_ptr; + + +#ifdef FORCE_REGISTERS +template +T Reg(T in) { + #pragma HLS INLINE off + #pragma HLS INTERFACE port=return register + return in; +} +#else +template +T Reg(T in) { + #pragma HLS INLINE + return in; +} +#endif + + + +void lloyds_algorithm_top( volatile data_type *data, + volatile data_type *cntr_pos_init, + node_pointer n, + centre_index_type k, + volatile coord_type_ext *distortion_out, + volatile data_type *clusters_out); + +void init_node_memory(volatile data_type *node_data, node_pointer n); + +void update_centres(centre_type *centres_in,centre_index_type k, data_type *centres_positions_out); + +void lloyds ( node_pointer n, + centre_index_type k, + centre_type *centres_out); + +#endif /* LLOYDS_ALGORITHM_TOP_H */ diff --git a/benchmarks/kmeans/lloyds_algorithm_util.cpp b/benchmarks/kmeans/lloyds_algorithm_util.cpp new file mode 100644 index 0000000..fa1db19 --- /dev/null +++ b/benchmarks/kmeans/lloyds_algorithm_util.cpp @@ -0,0 +1,258 @@ +/********************************************************************** +* Felix Winterstein, Imperial College London +* +* File: lloyds_algorithm_util.cpp +* +* Revision 1.01 +* Additional Comments: distributed under a BSD license, see LICENSE.txt +* +**********************************************************************/ + +#include +#include "lloyds_algorithm_util.h" + + +data_type& data_type::operator=(const data_type& a) +{ + + value = a.value; + return *this; +} + +data_type& data_type::operator=(const volatile data_type& a) +{ + + value = a.value; + return *this; +} + + + +data_type_ext& data_type_ext::operator=(const data_type_ext& a) +{ + value = a.value; + return *this; +} + + + +centre_type& centre_type::operator=(const centre_type& a) +{ + count = a.count; + wgtCent = a.wgtCent; + sum_sq = a.sum_sq; + //position = a.position; + return *this; +} + + +void set_coord_type_vector_item(coord_type_vector *a, const coord_type b, const uint idx) +{ + #pragma HLS function_instantiate variable=idx + a->range((idx+1)*COORD_BITWIDTH-1,idx*COORD_BITWIDTH) = b; +} + + +void set_coord_type_vector_ext_item(coord_type_vector_ext *a, const coord_type_ext b, const uint idx) +{ + #pragma HLS function_instantiate variable=idx + a->range((idx+1)*COORD_BITWITDH_EXT-1,idx*COORD_BITWITDH_EXT) = b; +} + + +coord_type get_coord_type_vector_item(const coord_type_vector a, const uint idx) +{ + #pragma HLS function_instantiate variable=idx + coord_type tmp= a.range((idx+1)*COORD_BITWIDTH-1,idx*COORD_BITWIDTH); + return tmp; +} + + +coord_type_ext get_coord_type_vector_ext_item(const coord_type_vector_ext a, const uint idx) +{ + #pragma HLS function_instantiate variable=idx + coord_type_ext tmp= a.range((idx+1)*COORD_BITWITDH_EXT-1,idx*COORD_BITWITDH_EXT); + return tmp; +} + + +/* ****** helper functions *******/ + + +// conversion from data_type_ext to data_type +data_type conv_long_to_short(data_type_ext p) +{ + #pragma HLS inline + data_type result; + for (uint d=0; d MUL_MAX_VAL) { + val = MUL_MAX_VAL; + } else if (val < MUL_MIN_VAL) { + val = MUL_MIN_VAL; + } + return (mul_input_type)val; +} + + +// fixed point multiplication with saturation and scaling +coord_type_ext fi_mul(coord_type_ext op1, coord_type_ext op2) +{ + #pragma HLS inline + mul_input_type tmp_op1 = saturate_mul_input(op1); + mul_input_type tmp_op2 = saturate_mul_input(op2); + + ap_int<2*(MUL_INTEGER_BITS+MUL_FRACTIONAL_BITS)> result_unscaled; + result_unscaled = tmp_op1*tmp_op2; + #pragma HLS resource variable=result_unscaled core=MulnS + + ap_int<2*(MUL_INTEGER_BITS+MUL_FRACTIONAL_BITS)> result_scaled; + result_scaled = result_unscaled >> MUL_FRACTIONAL_BITS; + coord_type_ext result; + result = (coord_type_ext)result_scaled; + return result; +} + +// tree adder +coord_type_ext tree_adder(coord_type_ext *input_array,const uint m) +{ + #pragma HLS inline + + for(uint j=0;j uint((m+m-uint(m/(1<<(j)))*(1<<(j)))/(1<<(j+1)))*(1<<(j+1))) { + input_array[uint(m/(1<<(j+1)))] = input_array[uint(m/(1<<(j+1))-1)*2+2]; + //printf("[%d] = [%d]\n",uint(m/(1<<(j+1))),uint(m/(1<<(j+1))-1)*2+2); + } + } + if (j== ceil(log2(m))-1) { + coord_type_ext tmp1 = input_array[0]; + coord_type_ext tmp2 = input_array[1]; + coord_type_ext tmp3 = tmp1+tmp2; + input_array[0] = tmp3; + //printf("[%d] = [%d]+[%d]\n",0,0,1); + #pragma HLS resource variable=tmp3 core=AddSubnS + } + } + return input_array[0]; +} + + +// tree compare select +void tree_cs(coord_type_ext *input_array,centre_index_type *index_array,coord_type_ext *res_val,centre_index_type *res_idx,const uint m) +{ + #pragma HLS inline + + for(uint j=0;j uint(m/(1<<(j+1)))*(1<<(j+1)) ) { + input_array[uint(m/(1<<(j+1)))] = (input_array[uint(m/(1<<(j+1))-1)*2+2]); + index_array[uint(m/(1<<(j+1)))] = (index_array[uint(m/(1<<(j+1))-1)*2+2]); + } + } + if (j== ceil(log2(m))-1) { + coord_type_ext tmp1 = input_array[0]; + coord_type_ext tmp1_idx = index_array[0]; + coord_type_ext tmp2 = input_array[1]; + coord_type_ext tmp2_idx = index_array[1]; + coord_type_ext tmp3; + centre_index_type tmp3_idx; + if (tmp1 < tmp2) { + tmp3 = tmp1; + tmp3_idx = tmp1_idx; + } else { + tmp3 = tmp2; + tmp3_idx = tmp2_idx; + } + input_array[0] = (tmp3); + index_array[0] = (tmp3_idx); + } + } + *res_val= input_array[0]; + *res_idx= index_array[0]; +} + + +// compute the Euclidean distance +void compute_distance(data_type p1, data_type p2, coord_type_ext *dist) +{ + #pragma HLS inline + + data_type tmp_p1 = p1; + data_type tmp_p2 = p2; + coord_type_ext tmp_mul_res[D]; + + for (uint d=0; d