@article{05_ieee_stand_veril_regis_trans_level_synth,
	url = {https://doi.org/10.1109/IEEESTD.2005.339572},
	doi = {10.1109/IEEESTD.2005.339572},
	journaltitle = {IEC 62142-2005 First edition 2005-06 IEEE Std 1364.1},
	pages = {1--116},
	title = {{IEEE} Standard for {Verilog} Register Transfer Level Synthesis},
	type = {Standard},
	year = {2005}
}

@article{06_ieee_stand_veril_hardw_descr_languag,
	url = {https://doi.org/10.1109/IEEESTD.2006.99495},
	doi = {10.1109/IEEESTD.2006.99495},
	journaltitle = {IEEE Std 1364-2005 (Revision of IEEE Std 1364-2001)},
	month = 4,
	pages = {1--590},
	title = {{IEEE} Standard for Verilog Hardware Description Language},
	type = {Standard},
	year = {2006}
}

@inbook{aiken16_trace_sched,
	abstract = {Since its introduction by Joseph A. Fisher in 1979, trace scheduling has influenced much of the work on compile-time ILP. Initially developed for use in microcode compaction, trace scheduling quickly became the main technique for machine-level compile-time parallelism exploitation. Trace scheduling has been used since the 1980s in many state-of-the-art compilers (e.g., Intel, Fujitsu, HP).},
	author = {Aiken, Alex and Banerjee, Utpal and Kejariwal, Arun and Nicolau, Alexandru},
	location = {Boston, MA},
	publisher = {Springer US},
	url = {https://doi.org/10.1007/978-1-4899-7797-7_4},
	booktitle = {Instruction Level Parallelism},
	doi = {10.1007/978-1-4899-7797-7_4},
	isbn = {978-1-4899-7797-7},
	keywords = {trace scheduling,static scheduling},
	pages = {79--116},
	title = {Trace Scheduling},
	year = {2016}
}

@inproceedings{armand11_modul_integ_sat_smt_solver,
	abstract = {We present a way to enjoy the power of SAT and SMT provers in Coq without compromising soundness. This requires these provers to return not only a yes/no answer, but also a proof witness that can be independently rechecked. We present such a checker, written and fully certified in Coq. It is conceived in a modular way, in order to tame the proofs' complexity and to be extendable. It can currently check witnesses from the SAT solver ZChaff and from the SMT solver veriT. Experiments highlight the efficiency of this checker. On top of it, new reflexive Coq tactics have been built that can decide a subset of Coq's logic by calling external provers and carefully checking their answers.},
	author = {Armand, Michael and Faure, Germain and Grégoire, Benjamin and Keller, Chantal and Théry, Laurent and Werner, Benjamin},
	editor = {Jouannaud, Jean-Pierre and Shao, Zhong},
	location = {Berlin, Heidelberg},
	publisher = {Springer Berlin Heidelberg},
	booktitle = {Certified Programs and Proofs},
	isbn = {978-3-642-25379-9},
	keywords = {SAT,verification,coq},
	pages = {135--150},
	title = {A Modular Integration of SAT/SMT Solvers to Coq through Proof Witnesses},
	year = {2011}
}

@article{aubury96_handel_c_languag_refer_guide,
	author = {Aubury, Matthew and Page, Ian and Randall, Geoff and Saul, Jonathan and Watts, Robin},
	journaltitle = {Computing Laboratory. Oxford University, UK},
	title = {Handel-C Language Reference Guide},
	year = {1996}
}

@inproceedings{bachrach12_chisel,
	author = {Bachrach, Jonathan and Vo, Huy and Richards, Brian and Lee, Yunsup and Waterman, Andrew and Avižienis, Rimas and Wawrzynek, John and Asanović, Krste},
	organization = {IEEE},
	booktitle = {DAC Design Automation Conference 2012},
	doi = {https://doi.org/10.1145/2228360.2228584},
	pages = {1212--1221},
	title = {{Chisel: Constructing hardware in a Scala embedded language}},
	year = {2012}
}

@inproceedings{ball93_branc_predic_free,
	keywords = {hyperblocks, static scheduling, scheduling},
	author = {Ball, Thomas and Larus, James R.},
	title = {Branch Prediction for Free},
	year = {1993},
	isbn = {0897915984},
	publisher = {Association for Computing Machinery},
	address = {New York, NY, USA},
	url = {https://doi.org/10.1145/155090.155119},
	doi = {10.1145/155090.155119},
	abstract = {Many compilers rely on branch prediction to improve program performance by identifying frequently executed regions and by aiding in scheduling instructions. Profile-based predictors require a time-consuming and inconvenient compile-profile-compile cycle in order to make predictions. We present a program-based branch predictor that performs well for a large and diverse set of programs written in C and Fortran. In addition to using natural loop analysis to predict branches that control the iteration of loops, we focus on heuristics for predicting non-loop branches, which dominate the dynamic branch count of many programs. The heuristics are simple and require little program analysis, yet they are effective in terms of coverage and miss rate. Although program-based prediction does not equal the accuracy of profile-based prediction, we believe it reaches a sufficiently high level to be useful. Additional type and semantic information available to a compiler would enhance our heuristics.},
	booktitle = {Proceedings of the ACM SIGPLAN 1993 Conference on Programming Language Design and Implementation},
	pages = {300–313},
	numpages = {14},
	location = {Albuquerque, New Mexico, USA},
	series = {PLDI '93}
}

@article{banerjee14_verif_code_motion_techn_using_value_propag,
	author = {{Banerjee}, K. and {Karfa}, C. and {Sarkar}, D. and {Mandal}, C.},
	doi = {10.1109/TCAD.2014.2314392},
	issn = {1937-4151},
	journaltitle = {IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
	month = 8,
	number = {8},
	pages = {1180--1193},
	title = {Verification of Code Motion Techniques Using Value Propagation},
	volume = {33},
	year = {2014}
}

@article{barthe14_formal_verif_ssa_based_middl_end_compc,
	abstract = {CompCert is a formally verified compiler that generates compact and efficient code for a large subset of the C language. However, CompCert foregoes using SSA, an intermediate representation employed by many compilers that enables writing simpler, faster optimizers. In fact, it has remained an open problem to verify formally an SSA-based compiler. We report on a formally verified, SSA-based middle-end for CompCert. In addition to providing a formally verified SSA-based middle-end, we address two problems raised by Leroy in [2009]: giving an intuitive formal semantics to SSA, and leveraging its global properties to reason locally about program optimizations.},
	author = {Barthe, Gilles and Demange, Delphine and Pichardie, David},
	location = {New York, NY, USA},
	publisher = {Association for Computing Machinery},
	url = {https://doi.org/10.1145/2579080},
	doi = {10.1145/2579080},
	issn = {0164-0925},
	journaltitle = {ACM Trans. Program. Lang. Syst.},
	keywords = {CompCertSSA,CompCert,SSA,coq,verification,compiler optimisation},
	month = 3,
	number = {1},
	title = {Formal Verification of an SSA-Based Middle-End for CompCert},
	volume = {36},
	year = {2014}
}

@book{bertot04_inter_theor_provin_progr_devel,
	author = {Bertot, Yves and Castéran, Pierre},
	publisher = {Springer Berlin Heidelberg},
	url = {https://doi.org/10.1007/978-3-662-07964-5},
	doi = {10.1007/978-3-662-07964-5},
	title = {Interactive Theorem Proving and Program Development},
	year = {2004}
}

@inproceedings{bertot06_struc_approac_provin_compil_optim,
	author = {Bertot, Yves and Grégoire, Benjamin and Leroy, Xavier},
	editor = {Filliâtre, Jean-Christophe and Paulin-Mohring, Christine and Werner, Benjamin},
	location = {Berlin, Heidelberg},
	publisher = {Springer},
	booktitle = {Types for Proofs and Programs},
	isbn = {978-3-540-31429-5},
	pages = {66--81},
	title = {A Structured Approach to Proving Compiler Optimizations Based on Dataflow Analysis},
	year = {2006}
}

@article{besson18_compc,
	author = {Besson, Frédéric and Blazy, Sandrine and Wilke, Pierre},
	publisher = {Springer Science and Business Media {LLC}},
	url = {https://doi.org/10.1007/s10817-018-9496-y},
	doi = {10.1007/s10817-018-9496-y},
	journaltitle = {Journal of Automated Reasoning},
	month = 11,
	number = {2},
	pages = {369--392},
	title = {{CompCertS}: A Memory-Aware Verified C Compiler Using a Pointer as Integer Semantics},
	volume = {63},
	year = {2018}
}

@inproceedings{blazy05_formal_verif_memor_model_c,
	author = {Blazy, Sandrine and Leroy, Xavier},
	editor = {Lau, Kung-Kiu and Banach, Richard},
	location = {Berlin, Heidelberg},
	publisher = {Springer Berlin Heidelberg},
	booktitle = {Formal Methods and Software Engineering},
	doi = {0.1007/11576280_20},
	isbn = {978-3-540-32250-4},
	pages = {280--299},
	title = {Formal Verification of a Memory Model for {C}-Like Imperative Languages},
	year = {2005}
}

@inproceedings{bourgeat20_essen_blues,
	author = {Bourgeat, Thomas and Pit-Claudel, Clément and Chlipala, Adam and Arvind},
	location = {London, UK},
	publisher = {ACM},
	url = {https://doi.org/10.1145/3385412.3385965},
	booktitle = {Proceedings of the 41st ACM SIGPLAN Conference on Programming Language Design and Implementation},
	doi = {10.1145/3385412.3385965},
	isbn = {9781450376136},
	keywords = {Hardware Description Language,Compiler Correctness,Semantics},
	pages = {243--257},
	series = {PLDI 2020},
	title = {The Essence of {Bluespec}: A Core Language for Rule-Based Hardware Design},
	year = {2020}
}

@inproceedings{bouton09,
	abstract = {This article describes the first public version of the satisfiability modulo theory (SMT) solver veriT. It is open-source, proof-producing, and complete for quantifier-free formulas with uninterpreted functions and difference logic on real numbers and integers.},
	author = {Bouton, Thomas and Caminha B. de Oliveira, Diego and Déharbe, David and Fontaine, Pascal},
	editor = {Schmidt, Renate A.},
	location = {Berlin, Heidelberg},
	publisher = {Springer Berlin Heidelberg},
	booktitle = {Automated Deduction -- CADE-22},
	isbn = {978-3-642-02959-2},
	pages = {151--156},
	title = {veriT: An Open, Trustable and Efficient SMT-Solver},
	year = {2009}
}

@article{bowen98_handel_c_languag_refer_manual,
	author = {Bowen, Matthew},
	journaltitle = {Embedded Solutions Ltd},
	title = {Handel-C Language Reference Manual},
	volume = {2},
	year = {1998}
}

@inproceedings{canis11_legup,
	author = {Canis, Andrew and Choi, Jongsok and Aldham, Mark and Zhang, Victor and Kammoona, Ahmed and Anderson, Jason Helge and Brown, Stephen Dean and Czajkowski, Tomasz S.},
	publisher = {ACM},
	booktitle = {{FPGA}},
	doi = {10.1145/1950413.1950423},
	pages = {33--36},
	title = {{LegUp}: high-level synthesis for {FPGA}-based processor/accelerator systems},
	year = {2011}
}

@article{canis13_legup,
	author = {Canis, Andrew and Choi, Jongsok and Aldham, Mark and Zhang, Victor and Kammoona, Ahmed and Czajkowski, Tomasz and Brown, Stephen D. and Anderson, Jason H.},
	location = {New York, NY, USA},
	publisher = {Association for Computing Machinery},
	doi = {10.1145/2514740},
	issn = {1539-9087},
	journaltitle = {ACM Trans. Embed. Comput. Syst.},
	keywords = {High-level synthesis,FPGAs,hardware/software codesign,synthesis,performance,power,field-programmable gate arrays},
	month = 9,
	number = {2},
	title = {Legup: an Open-Source High-Level Synthesis Tool for Fpga-Based Processor/accelerator Systems},
	volume = {13},
	year = {2013}
}

@inproceedings{chapman92_verif_bedroc,
	author = {{Chapman}, R. and {Brown}, G. and {Leeser}, M.},
	publisher = {IEEE Computer Society},
	booktitle = {[1992] Proceedings The European Conference on Design Automation},
	doi = {10.1109/EDAC.1992.205894},
	month = 3,
	pages = {59--63},
	title = {Verified high-level synthesis in BEDROC},
	year = {1992}
}

@misc{chauhan20_formal_ensur_equiv_c_rtl,
	author = {Chauhan, Pankaj},
	url = {https://bit.ly/2KbT0ki},
	title = {Formally Ensuring Equivalence between C++ and RTL designs},
	year = {2020}
}

@inproceedings{cheng20_combin_dynam_static_sched_high_level_synth,
	author = {Cheng, Jianyi and Josipovic, Lana and Constantinides, George A. and Ienne, Paolo and Wickerson, John},
	location = {Seaside, CA, USA},
	publisher = {Association for Computing Machinery},
	booktitle = {The 2020 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
	doi = {10.1145/3373087.3375297},
	isbn = {9781450370998},
	keywords = {high-level synthesis,dynamic scheduling,static analysis},
	pages = {288--298},
	series = {FPGA '20},
	title = {Combining Dynamic \& Static Scheduling in High-Level Synthesis},
	year = {2020}
}

@inproceedings{choi18_hls_based_optim_desig_space,
	author = {{Choi}, Y. and {Cong}, J.},
	booktitle = {2018 IEEE/ACM International Conference on Computer-Aided Design (ICCAD)},
	doi = {10.1145/3240765.3240815},
	pages = {1--8},
	title = {HLS-Based Optimization and Design Space Exploration for Applications with Variable Loop Bounds},
	year = {2018}
}

@article{chouksey19_trans_valid_code_motion_trans_invol_loops,
	author = {{Chouksey}, R. and {Karfa}, C. and {Bhaduri}, P.},
	doi = {10.1109/TCAD.2018.2846654},
	issn = {1937-4151},
	journaltitle = {IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
	month = jul,
	number = {7},
	pages = {1378--1382},
	title = {Translation Validation of Code Motion Transformations Involving Loops},
	volume = {38},
	year = {2019}
}

@article{chouksey20_verif_sched_condit_behav_high_level_synth,
	author = {Chouksey, R. and Karfa, C.},
	url = {https://doi.org/10.1109/TVLSI.2020.2978242},
	doi = {10.1109/TVLSI.2020.2978242},
	issn = {1557-9999},
	journaltitle = {IEEE Transactions on Very Large Scale Integration (VLSI) Systems},
	pages = {1--14},
	title = {Verification of Scheduling of Conditional Behaviors in High-Level Synthesis},
	year = {2020}
}

@inproceedings{clarke03_behav_c_veril,
	author = {{Clarke}, E. and {Kroening}, D. and {Yorav}, K.},
	url = {https://doi.org/10.1145/775832.775928},
	booktitle = {Proceedings 2003. Design Automation Conference (IEEE Cat. No.03CH37451)},
	doi = {10.1145/775832.775928},
	pages = {368--371},
	title = {Behavioral consistency of {C} and {Verilog} programs using bounded model checking},
	year = {2003}
}

@inproceedings{cong06_sdc,
	abstract = {Scheduling plays a central role in the behavioral synthesis process, which automatically compiles high-level specifications into optimized hardware implementations. However, most of the existing behavior-level scheduling heuristics either have a limited efficiency in a specific class of applications or lack general support of various design constraints. In this paper we describe a new scheduler that converts a rich set of scheduling constraints into a system of difference constraints (SDC) and performs a variety of powerful optimizations under a unified mathematical programming framework. In particular, we show that our SDC-based scheduling algorithm can efficiently support resource constraints, frequency constraints, latency constraints, and relative timing constraints, and effectively optimize longest path latency, expected overall latency, and the slack distribution. Experiments demonstrate that our proposed technique provides efficient solutions for a broader range of applications with higher quality of results (in terms of system performance) when compared to the state-of-the-art scheduling heuristics},
	author = {Cong, J. and Zhang, Zhiru},
	url = {https://doi.org/10.1145/1146909.1147025},
	booktitle = {2006 43rd ACM/IEEE Design Automation Conference},
	doi = {10.1145/1146909.1147025},
	issn = {0738-100X},
	keywords = {high-level synthesis,static scheduling},
	month = jul,
	pages = {433--438},
	title = {An efficient and versatile scheduling algorithm based on SDC formulation},
	year = {2006}
}

@article{cong11_high_level_synth_fpgas,
	author = {Cong, Jason and Liu, Bin and Neuendorffer, Stephen and Noguera, Juanjo and Vissers, Kees A. and Zhang, Zhiru},
	doi = {10.1109/TCAD.2011.2110592},
	journaltitle = {{IEEE} Trans. Comput. Aided Des. Integr. Circuits Syst.},
	number = {4},
	pages = {473--491},
	title = {High-Level Synthesis for FPGAs: From Prototyping to Deployment},
	volume = {30},
	year = {2011}
}

@article{courant21_verif_code_gener_polyh_model,
	author = {Courant, Nathanaël and Leroy, Xavier},
	title = {Verified Code Generation for the Polyhedral Model},
	year = {2021},
	issue_date = {January 2021},
	publisher = {Association for Computing Machinery},
	address = {New York, NY, USA},
	volume = {5},
	number = {POPL},
	url = {https://doi.org/10.1145/3434321},
	doi = {10.1145/3434321},
	abstract = {The polyhedral model is a high-level intermediate representation for loop nests that supports elegantly a great many loop optimizations. In a compiler, after polyhedral loop optimizations have been performed, it is necessary and difficult to regenerate sequential or parallel loop nests before continuing compilation. This paper reports on the formalization and proof of semantic preservation of such a code generator that produces sequential code from a polyhedral representation. The formalization and proofs are mechanized using the Coq proof assistant.},
	journal = {Proc. ACM Program. Lang.},
	month = {jan},
	articleno = {40},
	numpages = {24},
	keywords = {Compiler verification, Polyhedral code generation, Polyhedral model}
}

@inbook{coussy08_gaut,
	abstract = {This chapter presents GAUT, an academic and open-source high-level synthesis tool dedicated to digital signal processing applications. Starting from an algorithmic bit-accurate specification written in C/C++, GAUT extracts the potential parallelism before processing the allocation, the scheduling and the binding tasks. Mandatory synthesis constraints are the throughput and the clock period while the memory mapping and the I/O timing diagram are optional. GAUT next generates a potentially pipelined architecture composed of a processing unit, a memory unit and a communication with a GALS/LIS interface.},
	author = {Coussy, Philippe and Chavet, Cyrille and Bomel, Pierre and Heller, Dominique and Senn, Eric and Martin, Eric},
	editor = {Coussy, Philippe and Morawiec, Adam},
	location = {Dordrecht},
	publisher = {Springer Netherlands},
	url = {https://doi.org/10.1007/978-1-4020-8588-8_9},
	booktitle = {High-Level Synthesis: From Algorithm to Digital Circuit},
	doi = {10.1007/978-1-4020-8588-8_9},
	isbn = {978-1-4020-8588-8},
	pages = {147--169},
	title = {GAUT: A High-Level Synthesis Tool for DSP Applications},
	year = {2008}
}

@article{coussy09_introd_to_high_level_synth,
	author = {{Coussy}, P. and {Gajski}, D. D. and {Meredith}, M. and {Takach}, A.},
	doi = {10.1109/MDT.2009.69},
	journaltitle = {IEEE Design Test of Computers},
	keywords = {high level synthesis;high-level synthesis;optimized RTL hardware;abstraction level design;HLS techniques;High level synthesis;Assembly;Application software;Circuit simulation;Design methodology;Space exploration;Computer architecture;Design optimization;Hardware design languages;Circuit synthesis;high-level synthesis;RTL abstraction;custom processors;hardware synthesis and verification;architectures;design and test},
	month = jul,
	number = {4},
	pages = {8--17},
	title = {An Introduction To High-Level Synthesis},
	volume = {26},
	year = {2009}
}

@inproceedings{davidthomas_asap16,
	author = {Thomas, David B.},
	publisher = {{IEEE} Computer Society},
	booktitle = {{ASAP}},
	doi = {10.1109/ASAP.2016.7760777},
	pages = {91--98},
	title = {Synthesisable recursion for {C++} {HLS} tools},
	year = {2016}
}

@thesis{ellis08_correc,
	author = {Ellis, Martin},
	institution = {Newcastle University},
	url = {https://theses.ncl.ac.uk/jspui/handle/10443/828},
	title = {Correct synthesis and integration of compiler-generated function units},
	type = {phdthesis},
	year = {2008}
}

@article{fisher81_trace_sched,
	author = {Fisher, Joseph A.},
	doi = {10.1109/TC.1981.1675827},
	journaltitle = {IEEE Transactions on Computers},
	keywords = {static scheduling,trace scheduling},
	number = {7},
	pages = {478--490},
	title = {Trace Scheduling: A Technique for Global Microcode Compaction},
	volume = {C-30},
	year = {1981}
}

@inproceedings{gajski10_what_hls,
	author = {Gajski, Dan and Austin, Todd and Svoboda, Steve},
	booktitle = {Design Automation Conference},
	doi = {10.1145/1837274.1837489},
	pages = {857--858},
	title = {What input-language is the best choice for high level synthesis (HLS)?},
	year = {2010}
}

@misc{gauthier20_high_level_synth,
	author = {Gauthier, Stephane and Wadood, Zubair},
	url = {https://info.silexica.com/high-level-synthesis/1},
	note = {White paper},
	title = {High-Level Synthesis: Can it outperform hand-coded {HDL}?},
	year = {2020}
}

@inproceedings{grass94_high,
	author = {{Grass}, W. and {Mutz}, M. and {Tiedemann}, W. -.},
	booktitle = {Proceedings of Twentieth Euromicro Conference. System Architecture and Integration},
	doi = {10.1109/EURMIC.1994.390403},
	month = sep,
	pages = {83--91},
	title = {High level synthesis based on formal methods},
	year = {1994}
}

@inproceedings{greaves08_kiwi,
	author = {Greaves, David J. and Singh, Satnam},
	publisher = {{IEEE} Computer Society},
	booktitle = {{FCCM}},
	doi = {10.1109/FCCM.2008.46},
	pages = {3--12},
	title = {Kiwi: Synthesis of {FPGA} Circuits from Parallel Programs},
	year = {2008}
}

@misc{greaves19_resear_note,
	author = {Greaves, David J.},
	eprint = {1905.03746},
	eprintclass = {cs.PL},
	eprinttype = {arXiv},
	title = {Research Note: An Open Source Bluespec Compiler},
	year = {2019}
}

@inproceedings{gupta03_spark,
	author = {{Gupta}, S. and {Dutt}, N. and {Gupta}, R. and {Nicolau}, A.},
	url = {https://doi.org/10.1109/ICVD.2003.1183177},
	booktitle = {16th International Conference on VLSI Design, 2003. Proceedings.},
	doi = {10.1109/ICVD.2003.1183177},
	issn = {1063-9667},
	month = jan,
	pages = {461--466},
	title = {{SPARK}: a high-level synthesis framework for applying parallelizing compiler transformations},
	year = {2003}
}

@inproceedings{herklotz20_findin_under_bugs_fpga_synth_tools,
	author = {Herklotz, Yann and Wickerson, John},
	location = {Seaside, CA, USA},
	booktitle = {ACM/SIGDA Int. Symp. on Field-Programmable Gate Arrays},
	doi = {10.1145/3373087.3375310},
	isbn = {978-1-4503-7099-8/20/02},
	keywords = {automated testing,compiler defect,compiler testing,random program generation,random testing},
	title = {Finding and Understanding Bugs in {FPGA} Synthesis Tools},
	year = {2020}
}

@inproceedings{herklotz21_empir_study_reliab_high_level_synth_tools,
	author = {Herklotz, Yann and Du, Zewei and Ramanathan, Nadesh and Wickerson, John},
	booktitle = {2021 IEEE 29th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)},
	doi = {10.1109/FCCM51124.2021.00034},
	pages = {219--223},
	title = {An Empirical Study of the Reliability of High-Level Synthesis Tools},
	year = {2021}
}

@article{herklotz21_formal_verif_high_level_synth,
	abstract = {High-level synthesis (HLS), which refers to the automatic compilation of software into hardware, is rapidly gaining popularity. In a world increasingly reliant on application-specific hardware accelerators, HLS promises hardware designs of comparable performance and energy efficiency to those coded by hand in a hardware description language such as Verilog, while maintaining the convenience and the rich ecosystem of software development. However, current HLS tools cannot always guarantee that the hardware designs they produce are equivalent to the software they were given, thus undermining any reasoning conducted at the software level. Furthermore, there is mounting evidence that existing HLS tools are quite unreliable, sometimes generating wrong hardware or crashing when given valid inputs. To address this problem, we present the first HLS tool that is mechanically verified to preserve the behaviour of its input software. Our tool, called Vericert, extends the CompCert verified C compiler with a new hardware-oriented intermediate language and a Verilog back end, and has been proven correct in Coq. Vericert supports most C constructs, including all integer operations, function calls, local arrays, structs, unions, and general control-flow statements. An evaluation on the PolyBench/C benchmark suite indicates that Vericert generates hardware that is around an order of magnitude slower (only around 2\texttimes{} slower in the absence of division) and about the same size as hardware generated by an existing, optimising (but unverified) HLS tool.},
	author = {Herklotz, Yann and Pollard, James D. and Ramanathan, Nadesh and Wickerson, John},
	location = {New York, NY, USA},
	publisher = {Association for Computing Machinery},
	url = {https://doi.org/10.1145/3485494},
	doi = {10.1145/3485494},
	journaltitle = {Proceedings of the ACM on Programming Languages},
	keywords = {high-level synthesis,Coq,Verilog,CompCert,C},
	month = oct,
	number = {OOPSLA},
	title = {Formal Verification of High-Level Synthesis},
	volume = {5},
	year = {2021}
}

@misc{herklotz21_veric,
	author = {Herklotz, Yann and Pollard, James D. and Ramanathan, Nadesh and Wickerson, John},
	publisher = {Zenodo},
	url = {https://doi.org/10.5281/zenodo.5093839},
	doi = {10.5281/zenodo.5093839},
	month = jul,
	title = {Vericert v1.2.1},
	version = {v1.2.1},
	year = {2021}
}

@inproceedings{homsirikamol14_can,
	author = {Homsirikamol, Ekawat and Gaj, Kris},
	publisher = {IEEE},
	booktitle = {ReConFig},
	doi = {10.1109/ReConFig.2014.7032504},
	pages = {1--8},
	title = {Can high-level synthesis compete against a hand-written code in the cryptographic domain? {A} case study},
	year = {2014}
}

@article{hwang91_formal_approac_to_sched_probl,
	author = {{Hwang}, C. -. and {Lee}, J. -. and {Hsu}, Y. -.},
	url = {https://doi.org/10.1109/43.75629},
	doi = {10.1109/43.75629},
	journaltitle = {IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
	month = apr,
	number = {4},
	pages = {464--475},
	title = {A Formal Approach To the Scheduling Problem in High Level Synthesis},
	volume = {10},
	year = {1991}
}

@inproceedings{hwang99_fsmd,
	author = {Hwang, Enoch and Vahid, Frank and Hsu, Yu-Chin},
	booktitle = {Proceedings of the conference on Design, automation and test in Europe},
	doi = {10.1109/DATE.1999.761092},
	pages = {7--es},
	title = {FSMD functional partitioning for low power},
	year = {1999}
}

@inbook{hwu93_super,
	abstract = {A compiler for VLIW and superscalar processors must expose sufficient instruction-level parallelism (ILP) to effectively utilize the parallel hardware. However, ILP within basic blocks is extremely limited for control-intensive programs. We have developed a set of techniques for exploiting ILP across basic block boundaries. These techniques are based on a novel structure called the superblock. The superblock enables the optimizer and scheduler to extract more ILP along the important execution paths by systematically removing constraints due to the unimportant paths. Superblock optimization and scheduling have been implemented in the IMPACT-I compiler. This implementation gives us a unique opportunity to fully understand the issues involved in incorporating these techniques into a real compiler. Superblock optimizations and scheduling are shown to be useful while taking into account a variety of architectural features.},
	author = {Hwu, Wen-Mei W. and Mahlke, Scott A. and Chen, William Y. and Chang, Pohua P. and Warter, Nancy J. and Bringmann, Roger A. and Ouellette, Roland G. and Hank, Richard E. and Kiyohara, Tokuzo and Haab, Grant E. and Holm, John G. and Lavery, Daniel M.},
	editor = {Rau, B. R. and Fisher, J. A.},
	location = {Boston, MA},
	publisher = {Springer US},
	url = {https://doi.org/10.1007/978-1-4615-3200-2_7},
	booktitle = {Instruction-Level Parallelism: A Special Issue of The Journal of Supercomputing},
	doi = {10.1007/978-1-4615-3200-2_7},
	isbn = {978-1-4615-3200-2},
	keywords = {superblock scheduling,trace scheduling,static scheduling},
	pages = {229--248},
	title = {The Superblock: An Effective Technique for VLIW and Superscalar Compilation},
	year = {1993}
}

@misc{intel19_intel_quart,
	author = {Intel},
	url = {https://intel.ly/2m7wbCs},
	title = {{Intel Quartus}},
	urldate = {2019-01-14},
	year = {2019}
}

@misc{intel20_high_synth_compil,
	author = {Intel},
	url = {https://intel.ly/2UDiWr5},
	title = {High-level Synthesis Compiler},
	urldate = {2020-11-18},
	year = {2020}
}

@misc{intel20_sdk_openc_applic,
	author = {Intel},
	url = {https://intel.ly/30sYHz0},
	title = {{SDK} for {OpenCL} Applications},
	urldate = {2020-07-20},
	year = {2020}
}

@inproceedings{jifeng93_towar,
	author = {Jifeng, He and Page, Ian and Bowen, Jonathan},
	editor = {Milne, George J. and Pierre, Laurence},
	location = {Berlin, Heidelberg},
	publisher = {Springer},
	booktitle = {Correct Hardware Design and Verification Methods},
	isbn = {978-3-540-70655-7},
	pages = {214--225},
	title = {Towards a provably correct hardware implementation of Occam},
	year = {1993}
}

@article{josipovic17_out_of_order_load_store,
	author = {Josipovic, Lana and Brisk, Philip and Ienne, Paolo},
	location = {New York, NY, USA},
	publisher = {Association for Computing Machinery},
	doi = {10.1145/3126525},
	issn = {1539-9087},
	journaltitle = {ACM Trans. Embed. Comput. Syst.},
	keywords = {Load-store queue,dynamic scheduling,allocation,spatial computing},
	month = sep,
	number = {5s},
	title = {An Out-Of-Order Load-Store Queue for Spatial Computing},
	volume = {16},
	year = {2017}
}

@inproceedings{josipovic18_dynam_sched_high_level_synth,
	author = {Josipović, Lana and Ghosal, Radhika and Ienne, Paolo},
	location = {Monterey, CALIFORNIA, USA},
	publisher = {Association for Computing Machinery},
	booktitle = {Proceedings of the 2018 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
	doi = {10.1145/3174243.3174264},
	isbn = {9781450356145},
	keywords = {dynamically scheduled circuits,compiler,pipelining,high-level synthesis},
	pages = {127--136},
	series = {FPGA '18},
	title = {Dynamically Scheduled High-Level Synthesis},
	year = {2018}
}

@inproceedings{josipovic20_buffer_placem_sizin_high_perfor_dataf_circuit,
	author = {Josipović, Lana and Sheikhha, Shabnam and Guerrieri, Andrea and Ienne, Paolo and Cortadella, Jordi},
	location = {Seaside, CA, USA},
	publisher = {Association for Computing Machinery},
	booktitle = {The 2020 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
	doi = {10.1145/3373087.3375314},
	isbn = {9781450370998},
	keywords = {buffers,high-level synthesis,dataflow circuits,timing optimization},
	pages = {186--196},
	series = {FPGA '20},
	title = {Buffer Placement and Sizing for High-Performance Dataflow Circuits},
	year = {2020}
}

@inproceedings{jourdan12_valid_lr_parser,
	author = {Jourdan, Jacques-Henri and Pottier, François and Leroy, Xavier},
	editor = {Seidl, Helmut},
	location = {Berlin, Heidelberg},
	publisher = {Springer Berlin Heidelberg},
	booktitle = {Programming Languages and Systems},
	doi = {10.1007/978-3-642-28869-2_20},
	isbn = {978-3-642-28869-2},
	pages = {397--416},
	title = {Validating LR(1) Parsers},
	year = {2012}
}

@inproceedings{karfa06_formal_verif_method_sched_high_synth,
	author = {Karfa, C and Mandal, C and Sarkar, D and Pentakota, S R. and Reade, Chris},
	location = {Washington, DC, USA},
	publisher = {IEEE Computer Society},
	booktitle = {Proceedings of the 7th International Symposium on Quality Electronic Design},
	doi = {10.1109/ISQED.2006.10},
	isbn = {0-7695-2523-7},
	pages = {71--78},
	series = {ISQED '06},
	title = {A Formal Verification Method of Scheduling in High-level Synthesis},
	year = {2006}
}

@inproceedings{karfa07_hand_verif_high_synth,
	author = {Karfa, C. and Sarkar, D. and Mandal, C. and Reade, C.},
	location = {Stresa-Lago Maggiore, Italy},
	publisher = {ACM},
	booktitle = {Proceedings of the 17th ACM Great Lakes Symposium on VLSI},
	doi = {10.1145/1228784.1228885},
	isbn = {978-1-59593-605-9},
	pages = {429--434},
	series = {GLSVLSI '07},
	title = {Hand-in-hand Verification of High-level Synthesis},
	year = {2007}
}

@article{karfa08_equiv_check_method_sched_verif,
	author = {{Karfa}, C. and {Sarkar}, D. and {Mandal}, C. and {Kumar}, P.},
	doi = {10.1109/TCAD.2007.913390},
	issn = {1937-4151},
	journaltitle = {IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
	month = mar,
	number = {3},
	pages = {556--569},
	title = {An Equivalence-Checking Method for Scheduling Verification in High-Level Synthesis},
	volume = {27},
	year = {2008}
}

@article{karfa10_verif_datap_contr_gener_phase,
	author = {{Karfa}, C. and {Sarkar}, D. and {Mandal}, C.},
	doi = {10.1109/TCAD.2009.2035542},
	issn = {1937-4151},
	journaltitle = {IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
	month = mar,
	number = {3},
	pages = {479--492},
	title = {Verification of Datapath and Controller Generation Phase in High-Level Synthesis of Digital Circuits},
	volume = {29},
	year = {2010}
}

@article{karfa12_formal_verif_code_motion_techn,
	author = {Karfa, C. and Mandal, C. and Sarkar, D.},
	location = {New York, NY, USA},
	publisher = {Association for Computing Machinery},
	doi = {10.1145/2209291.2209303},
	issn = {1084-4309},
	journaltitle = {ACM Trans. Des. Autom. Electron. Syst.},
	month = jul,
	number = {3},
	title = {Formal Verification of Code Motion Techniques Using Data-Flow-Driven Equivalence Checking},
	volume = {17},
	year = {2012}
}

@inproceedings{kildall73_unified_approac_global_progr_optim,
	author = {Kildall, Gary A.},
	location = {Boston, Massachusetts},
	publisher = {Association for Computing Machinery},
	booktitle = {Proceedings of the 1st Annual ACM SIGACT-SIGPLAN Symposium on Principles of Programming Languages},
	doi = {10.1145/512927.512945},
	isbn = {9781450373494},
	pages = {194--206},
	series = {POPL '73},
	title = {A Unified Approach to Global Program Optimization},
	year = {1973}
}

@inproceedings{kim04_autom_fsmd,
	author = {{Youngsik Kim} and {Kopuri}, S. and {Mansouri}, N.},
	booktitle = {International Symposium on Signals, Circuits and Systems. Proceedings, SCS 2003. (Cat. No.03EX720)},
	doi = {10.1109/ISQED.2004.1283659},
	month = mar,
	pages = {110--115},
	title = {Automated formal verification of scheduling process using finite state machines with datapath (FSMD)},
	year = {2004}
}

@inproceedings{kundu07_autom,
	author = {{Sudipta Kundu} and {Lerner}, S. and {Rajesh Gupta}},
	booktitle = {2007 IEEE/ACM International Conference on Computer-Aided Design},
	doi = {10.1109/ICCAD.2007.4397284},
	issn = {1558-2434},
	month = nov,
	pages = {318--325},
	title = {Automated refinement checking of concurrent systems},
	year = {2007}
}

@inproceedings{kundu08_valid_high_level_synth,
	author = {Kundu, Sudipta and Lerner, Sorin and Gupta, Rajesh},
	editor = {Gupta, Aarti and Malik, Sharad},
	location = {Berlin, Heidelberg},
	publisher = {Springer},
	booktitle = {Computer Aided Verification},
	doi = {10.1007/978-3-540-70545-1_44},
	isbn = {978-3-540-70545-1},
	pages = {459--472},
	title = {Validating High-Level Synthesis},
	year = {2008}
}

@article{lahti19_are_we_there_yet,
	author = {{Lahti}, S. and {Sjövall}, P. and {Vanne}, J. and {Hämäläinen}, T. D.},
	doi = {10.1109/TCAD.2018.2834439},
	issn = {1937-4151},
	journaltitle = {IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
	month = may,
	number = {5},
	pages = {898--911},
	title = {Are We There Yet? a Study on the State of High-Level Synthesis},
	volume = {38},
	year = {2019}
}

@inproceedings{lam88_softw_pipel,
	author = {Lam, M.},
	title = {Software Pipelining: An Effective Scheduling Technique for VLIW Machines},
	year = {1988},
	isbn = {0897912691},
	publisher = {Association for Computing Machinery},
	address = {New York, NY, USA},
	url = {https://doi.org/10.1145/53990.54022},
	doi = {10.1145/53990.54022},
	abstract = {This paper shows that software pipelining is an effective and viable scheduling technique for VLIW processors. In software pipelining, iterations of a loop in the source program are continuously initiated at constant intervals, before the preceding iterations complete. The advantage of software pipelining is that optimal performance can be achieved with compact object code.This paper extends previous results of software pipelining in two ways: First, this paper shows that by using an improved algorithm, near-optimal performance can be obtained without specialized hardware. Second, we propose a hierarchical reduction scheme whereby entire control constructs are reduced to an object similar to an operation in a basic block. With this scheme, all innermost loops, including those containing conditional statements, can be software pipelined. It also diminishes the start-up cost of loops with small number of iterations. Hierarchical reduction complements the software pipelining technique, permitting a consistent performance improvement be obtained.The techniques proposed have been validated by an implementation of a compiler for Warp, a systolic array consisting of 10 VLIW processors. This compiler has been used for developing a large number of applications in the areas of image, signal and scientific processing.},
	booktitle = {Proceedings of the ACM SIGPLAN 1988 Conference on Programming Language Design and Implementation},
	pages = {318–328},
	numpages = {11},
	location = {Atlanta, Georgia, USA},
	series = {PLDI '88}
}

@inproceedings{leroy06_formal_certif_compil_back_end,
	author = {Leroy, Xavier},
	location = {Charleston, South Carolina, USA},
	publisher = {ACM},
	url = {https://doi.org/10.1145/1111037.1111042},
	booktitle = {Conference Record of the 33rd ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages},
	doi = {10.1145/1111037.1111042},
	isbn = {1595930272},
	pages = {42--54},
	series = {POPL '06},
	title = {Formal Certification of a Compiler Back-End or: Programming a Compiler with a Proof Assistant},
	year = {2006}
}

@article{leroy09_formal_verif_compil_back_end,
	author = {Leroy, Xavier},
	doi = {10.1007/s10817-009-9155-4},
	issn = {1573-0670},
	journaltitle = {Journal of Automated Reasoning},
	number = {4},
	pages = {363},
	title = {A Formally Verified Compiler Back-End},
	volume = {43},
	year = {2009}
}

@article{leroy09_formal_verif_realis_compil,
	author = {Leroy, Xavier},
	location = {New York, NY, USA},
	publisher = {ACM},
	doi = {10.1145/1538788.1538814},
	issn = {0001-0782},
	journaltitle = {Commun. ACM},
	month = jul,
	number = {7},
	pages = {107--115},
	title = {Formal Verification of a Realistic Compiler},
	volume = {52},
	year = {2009}
}

@inproceedings{lidbury15_many_core_compil_fuzzin,
	author = {Lidbury, Christopher and Lascu, Andrei and Chong, Nathan and Donaldson, Alastair F.},
	location = {Portland, OR, USA},
	publisher = {ACM},
	booktitle = {Proceedings of the 36th ACM SIGPLAN Conference on Programming Language Design and Implementation},
	doi = {10.1145/2737924.2737986},
	isbn = {9781450334686},
	pages = {65--76},
	series = {PLDI '15},
	title = {Many-Core Compiler Fuzzing},
	year = {2015}
}

@inproceedings{liu16_effic_high_level_synth_desig,
	author = {{Dong Liu} and {Schafer}, B. C.},
	booktitle = {2016 26th International Conference on Field Programmable Logic and Applications (FPL)},
	doi = {10.1109/FPL.2016.7577370},
	issn = {1946-1488},
	month = aug,
	pages = {1--8},
	title = {Efficient and reliable High-Level Synthesis Design Space Explorer for FPGAs},
	year = {2016}
}

@inproceedings{loow19_proof_trans_veril_devel_hol,
	author = {Lööw, Andreas and Myreen, Magnus O.},
	location = {Montreal, Quebec, Canada},
	publisher = {IEEE Press},
	url = {https://doi.org/10.1109/FormaliSE.2019.00020},
	booktitle = {Proceedings of the 7th International Workshop on Formal Methods in Software Engineering},
	doi = {10.1109/FormaliSE.2019.00020},
	pages = {99--108},
	series = {FormaliSE '19},
	title = {A Proof-producing Translator for Verilog Development in HOL},
	year = {2019}
}

@inproceedings{loow19_verif_compil_verif_proces,
	author = {Lööw, Andreas and Kumar, Ramana and Tan, Yong Kiam and Myreen, Magnus O. and Norrish, Michael and Abrahamsson, Oskar and Fox, Anthony},
	location = {Phoenix, AZ, USA},
	publisher = {ACM},
	booktitle = {Proceedings of the 40th ACM SIGPLAN Conference on Programming Language Design and Implementation},
	doi = {10.1145/3314221.3314622},
	isbn = {978-1-4503-6712-7},
	keywords = {compiler verification,hardware verification,program verification,verified stack},
	pages = {1041--1053},
	series = {PLDI 2019},
	title = {Verified Compilation on a Verified Processor},
	year = {2019}
}

@inproceedings{loow21_lutsig,
	abstract = {We report on a new verified Verilog compiler called Lutsig. Lutsig currently targets (a class of) FPGAs and is capable of producing technology mapped netlists for FPGAs. We have connected Lutsig to existing Verilog development tools, and in this paper we show how Lutsig, as a consequence of this connection, fits into a hardware development methodology for verified circuits in the HOL4 theorem prover. One important step in the methodology is transporting properties proved at the behavioral Verilog level down to technology mapped netlists, and Lutsig is the component in the methodology that enables such transportation.},
	author = {Lööw, Andreas},
	location = {Virtual, Denmark},
	publisher = {ACM},
	url = {https://doi.org/10.1145/3437992.3439916},
	booktitle = {Proceedings of the 10th ACM SIGPLAN International Conference on Certified Programs and Proofs},
	doi = {10.1145/3437992.3439916},
	isbn = {9781450382991},
	keywords = {hardware verification,hardware synthesis,compiler verification},
	pages = {46--60},
	series = {CPP 2021},
	title = {Lutsig: A Verified Verilog Compiler for Verified Circuit Development},
	year = {2021}
}

@article{mahlke92_effec_compil_suppor_predic_execut_using_hyper,
	author = {Mahlke, Scott A. and Lin, David C. and Chen, William Y. and Hank, Richard E. and Bringmann, Roger A.},
	location = {New York, NY, USA},
	publisher = {Association for Computing Machinery},
	url = {https://doi.org/10.1145/144965.144998},
	doi = {10.1145/144965.144998},
	issn = {1050-916X},
	journaltitle = {SIGMICRO Newsl.},
	keywords = {speculative execution,static scheduling,hyperblocks},
	month = dec,
	number = {1-2},
	pages = {45--54},
	title = {Effective Compiler Support for Predicated Execution Using the Hyperblock},
	volume = {23},
	year = {1992}
}

@article{mahlke93_sentin_sched,
	abstract = {Speculative execution is an important source of parallelism for VLIW and superscalar processors. A serious challenge with compiler-controlled speculative execution is to efficiently handle exceptions for speculative instructions. In this article, a set of architectural features and compile-time scheduling support collectively referred to as sentinel scheduling is introduced. Sentinel scheduling provides an effective framework for both compiler-controlled speculative execution and exception handling. All program exceptions are accurately detected and reported in a timely manner with sentinel scheduling. Recovery from exceptions is also ensured with the model. Experimental results show the effectiveness of sentinel scheduling for exploiting instruction-level parallelism and overhead associated with exception handling.},
	author = {Mahlke, Scott A. and Chen, William Y. and Bringmann, Roger A. and Hank, Richard E. and Hwu, Wen-Mei W. and Rau, B. Ramakrishna and Schlansker, Michael S.},
	location = {New York, NY, USA},
	publisher = {Association for Computing Machinery},
	url = {https://doi.org/10.1145/161541.159765},
	doi = {10.1145/161541.159765},
	issn = {0734-2071},
	journaltitle = {ACM Trans. Comput. Syst.},
	keywords = {speculative execution,static scheduling,hyperblocks},
	month = nov,
	number = {4},
	pages = {376--408},
	title = {Sentinel Scheduling: A Model for Compiler-Controlled Speculative Execution},
	volume = {11},
	year = {1993}
}

@inproceedings{mahlke94_charac_impac_predic_execut_branc_predic,
	abstract = {Branch instructions are recognized as a major impediment to exploiting instruction level parallelism. Even with sophisticated branch prediction techniques, many frequently executed branches remain difficult to predict. An architecture supporting predicated execution may allow the compiler to remove many of these hard-to-predict branches, reducing the number of branch mispredictions and thereby improving performance. We present an in-depth analysis of the characteristics of those branches which are frequently mispredicted and examine the effectiveness of an advanced compiler to eliminate these branches. Over the benchmarks studied, an average of 27% of the dynamic branches and 56% of the dynamic branch mispredictions are eliminated with predicated execution support.},
	author = {Mahlke, Scott A. and Hank, Richard E. and Bringmann, Roger A. and Gyllenhaal, John C. and Gallagher, David M. and Hwu, Wen-mei W.},
	location = {San Jose, California, USA},
	publisher = {Association for Computing Machinery},
	url = {https://doi.org/10.1145/192724.192755},
	booktitle = {Proceedings of the 27th Annual International Symposium on Microarchitecture},
	doi = {10.1145/192724.192755},
	isbn = {0897917073},
	keywords = {speculative execution,static scheduling,hyperblocks},
	pages = {217--227},
	series = {MICRO 27},
	title = {Characterizing the Impact of Predicated Execution on Branch Prediction},
	year = {1994}
}

@misc{mentor20_catap_high_level_synth,
	author = {Mentor},
	url = {https://www.mentor.com/hls-lp/catapult-high-level-synthesis/c-systemc-hls},
	title = {Catapult High-Level Synthesis},
	urldate = {2020-06-06},
	year = {2020}
}

@inproceedings{meredith10_veril,
	author = {{Meredith}, P. and {Katelman}, M. and {Meseguer}, J. and G., {Roşu}},
	url = {https://doi.org/10.1109/MEMCOD.2010.5558634},
	booktitle = {Eighth ACM/IEEE International Conference on Formal Methods and Models for Codesign (MEMOCODE 2010)},
	doi = {10.1109/MEMCOD.2010.5558634},
	month = jul,
	pages = {179--188},
	title = {A formal executable semantics of {Verilog}},
	year = {2010}
}

@book{micheli94_synth_optim_digit_circuit,
	author = {De Micheli, Giovanni},
	publisher = {McGraw-Hill Higher Education},
	edition = {1st},
	isbn = {0070163332},
	title = {Synthesis and Optimization of Digital Circuits},
	year = {1994}
}

@inproceedings{nigam20_predic_accel_desig_time_sensit_affin_types,
	author = {Nigam, Rachit and Atapattu, Sachille and Thomas, Samuel and Li, Zhijing and Bauer, Theodore and Ye, Yuwei and Koti, Apurva and Sampson, Adrian and Zhang, Zhiru},
	location = {London, UK},
	publisher = {ACM},
	url = {https://doi.org/10.1145/3385412.3385974},
	booktitle = {Proceedings of the 41st ACM SIGPLAN Conference on Programming Language Design and Implementation},
	doi = {10.1145/3385412.3385974},
	isbn = {9781450376136},
	keywords = {Affine Type Systems,High-Level Synthesis},
	pages = {393--407},
	series = {PLDI 2020},
	title = {Predictable Accelerator Design with Time-Sensitive Affine Types},
	year = {2020}
}

@inproceedings{nikhil04_blues_system_veril,
	author = {{Nikhil}, R.},
	url = {https://doi.org/10.1109/MEMCOD.2004.1459818},
	booktitle = {Proceedings. Second ACM and IEEE International Conference on Formal Methods and Models for Co-Design, 2004. MEMOCODE '04.},
	doi = {10.1109/MEMCOD.2004.1459818},
	pages = {69--70},
	title = {Bluespec System Verilog: efficient, correct RTL from high level specifications},
	year = {2004}
}

@inproceedings{noronha17_rapid_fpga,
	author = {{Noronha}, D. H. and {Pinilla}, J. P. and {Wilton}, S. J. E.},
	booktitle = {2017 International Conference on ReConFigurable Computing and FPGAs (ReConFig)},
	doi = {10.1109/RECONFIG.2017.8279807},
	keywords = {high-level synthesis,FPGA,inlining,compiler optimisation},
	pages = {1--6},
	title = {Rapid circuit-specific inlining tuning for FPGA high-level synthesis},
	year = {2017}
}

@inproceedings{ottenstein90_progr_depen_web,
	abstract = {The Program Dependence Web (PDW) is a program representation that can be directly interpreted using control-, data-, or demand-driven models of execution. A PDW combines a single-assignment version of the program with explicit operators that manage the flow of data values. The PDW can be viewed as an augmented Program Dependence Graph. Translation to the PDW representation provides the basis for projects to compile Fortran onto dynamic dataflow architectures and simulators. A second application of the PDW is the construction of various compositional semantics for program dependence graphs.},
	author = {Ottenstein, Karl J. and Ballance, Robert A. and MacCabe, Arthur B.},
	location = {White Plains, New York, USA},
	publisher = {Association for Computing Machinery},
	url = {https://doi.org/10.1145/93542.93578},
	booktitle = {Proceedings of the ACM SIGPLAN 1990 Conference on Programming Language Design and Implementation},
	doi = {10.1145/93542.93578},
	isbn = {0897913647},
	keywords = {gated-SSA,SSA,program dependence graph},
	pages = {257--271},
	series = {PLDI '90},
	title = {The Program Dependence Web: A Representation Supporting Control-, Data-, and Demand-Driven Interpretation of Imperative Languages},
	year = {1990}
}

@inproceedings{page91_compil_occam,
	author = {Page, Ian and Luk, Wayne},
	booktitle = {FPGAs, Oxford Workshop on Field Programmable Logic and Applications},
	pages = {271--283},
	title = {Compiling Occam into field-programmable gate arrays},
	volume = {15},
	year = {1991}
}

@inproceedings{paulin89_sched_bindin_algor_high_level_synth,
	author = {Paulin, P. G. and Knight, J. P.},
	location = {Las Vegas, Nevada, USA},
	publisher = {ACM},
	url = {https://doi.org/10.1145/74382.74383},
	booktitle = {Proceedings of the 26th ACM/IEEE Design Automation Conference},
	doi = {10.1145/74382.74383},
	isbn = {0897913108},
	pages = {1--6},
	series = {DAC '89},
	title = {Scheduling and Binding Algorithms for High-Level Synthesis},
	year = {1989}
}

@inproceedings{pelcat16_desig_hdl,
	author = {Pelcat, Maxime and Bourrasset, Cédric and Maggiani, Luca and Berry, François},
	booktitle = {2016 International Conference on Embedded Computer Systems: Architectures, Modeling and Simulation (SAMOS)},
	doi = {10.1109/SAMOS.2016.7818341},
	pages = {140--147},
	title = {Design productivity of a high level synthesis compiler versus HDL},
	year = {2016}
}

@article{perna11_correc_hardw_synth,
	author = {Perna, Juan and Woodcock, Jim and Sampaio, Augusto and Iyoda, Juliano},
	date = {2011-12-01},
	doi = {10.1007/s00236-011-0142-y},
	issn = {1432-0525},
	journaltitle = {Acta Informatica},
	number = {7},
	pages = {363--396},
	title = {Correct Hardware Synthesis},
	volume = {48}
}

@article{perna12_mechan_wire_wise_verif_handel_c_synth,
	author = {Perna, Juan and Woodcock, Jim},
	doi = {10.1016/j.scico.2010.02.007},
	issn = {0167-6423},
	journaltitle = {Science of Computer Programming},
	number = {4},
	pages = {424--443},
	title = {Mechanised Wire-Wise Verification of {Handel-C} Synthesis},
	volume = {77},
	year = {2012}
}

@inproceedings{pilato13_bambu,
	author = {{Pilato}, C. and {Ferrandi}, F.},
	booktitle = {2013 23rd International Conference on Field programmable Logic and Applications},
	doi = {10.1109/FPL.2013.6645550},
	pages = {1--4},
	title = {Bambu: A modular framework for the high level synthesis of memory-intensive applications},
	year = {2013}
}

@inproceedings{pnueli98_trans,
	author = {Pnueli, A. and Siegel, M. and Singerman, E.},
	editor = {Steffen, Bernhard},
	location = {Berlin, Heidelberg},
	publisher = {Springer},
	booktitle = {Tools and Algorithms for the Construction and Analysis of Systems},
	doi = {10.1007/BFb0054170},
	isbn = {978-3-540-69753-4},
	pages = {151--166},
	title = {Translation validation},
	year = {1998}
}

@inproceedings{poly_hls_zhao2017,
	author = {Zhao, Jieru and Feng, Liang and Sinha, Sharad and Zhang, Wei and Liang, Yun and He, Bingsheng},
	booktitle = {2017 IEEE/ACM International Conference on Computer-Aided Design (ICCAD)},
	doi = {10.1109/ICCAD.2017.8203809},
	pages = {430--437},
	title = {COMBA: A comprehensive model-based analysis framework for high level synthesis of real applications},
	year = {2017}
}

@inproceedings{poly_hls_zuo2013,
	author = {Zuo, Wei and Li, Peng and Chen, Deming and Pouchet, Louis-Noël and Zhong, Shunan and Cong, Jason},
	organization = {IEEE},
	booktitle = {2013 International Conference on Hardware/Software Codesign and System Synthesis (CODES+ ISSS)},
	doi = {https://doi.org/10.1109/CODES-ISSS.2013.6659002},
	pages = {1--10},
	title = {Improving polyhedral code generation for high-level synthesis},
	year = {2013}
}

@inproceedings{pouchet13_polyh,
	author = {Pouchet, Louis-Noel and Zhang, Peng and Sadayappan, Ponnuswamy and Cong, Jason},
	booktitle = {Proceedings of the ACM/SIGDA international symposium on Field programmable gate arrays},
	doi = {https://doi.org/10.1145/2435264.2435273},
	pages = {29--38},
	title = {Polyhedral-based data reuse optimization for configurable computing},
	year = {2013}
}

@inproceedings{rau92_code_gener_schem_sched_loops,
	author = {Rau, B. Ramakrishna and Schlansker, Michael S. and Tirumalai, P. P.},
	location = {Portland, Oregon, USA},
	publisher = {IEEE Computer Society Press},
	booktitle = {Proceedings of the 25th Annual International Symposium on Microarchitecture},
	isbn = {0818631759},
	keywords = {modulo scheduling,code motion,loop scheduling,software pipelining,rotating registers},
	pages = {158--169},
	series = {MICRO 25},
	title = {Code Generation Schema for modulo Scheduled Loops},
	year = {1992}
}

@inproceedings{rau92_regis_alloc_softw_pipel_loops,
	abstract = {Software pipelining is an important instruction scheduling technique for efficiently overlapping successive iterations of loops and executing them in parallel. This paper studies the task of register allocation for software pipelined loops, both with and without hardware features that are specifically aimed at supporting software pipelines. Register allocation for software pipelines presents certain novel problems leading to unconventional solutions, especially in the presence of hardware support. This paper formulates these novel problems and presents a number of alternative solution strategies. These alternatives are comprehensively tested against over one thousand loops to determine the best register allocation strategy, both with and without the hardware support for software pipelining.},
	author = {Rau, B. R. and Lee, M. and Tirumalai, P. P. and Schlansker, M. S.},
	location = {San Francisco, California, USA},
	publisher = {Association for Computing Machinery},
	url = {https://doi.org/10.1145/143095.143141},
	booktitle = {Proceedings of the ACM SIGPLAN 1992 Conference on Programming Language Design and Implementation},
	doi = {10.1145/143095.143141},
	isbn = {0897914759},
	keywords = {software pipelining,loop scheduling,register allocation,rotating registers,compiler optimisation},
	pages = {283--299},
	series = {PLDI '92},
	title = {Register Allocation for Software Pipelined Loops},
	year = {1992}
}

@inproceedings{rau94_iterat_sched,
	abstract = {Modulo scheduling is a framework within which a wide variety of algorithms and heuristics may be defined for software pipelining innermost loops. This paper presents a practical algorithm, iterative modulo scheduling, that is capable of dealing with realistic machine models. This paper also characterizes the algorithm in terms of the quality of the generated schedules as well the computational expense incurred.},
	author = {Rau, B. Ramakrishna},
	location = {San Jose, California, USA},
	publisher = {Association for Computing Machinery},
	url = {https://doi.org/10.1145/192724.192731},
	booktitle = {Proceedings of the 27th Annual International Symposium on Microarchitecture},
	doi = {10.1145/192724.192731},
	isbn = {0897917073},
	keywords = {software pipelining,loop scheduling,rotating registers,code motion,compiler optimisation,modulo scheduling},
	pages = {63--74},
	series = {MICRO 27},
	title = {Iterative modulo Scheduling: An Algorithm for Software Pipelining Loops},
	year = {1994}
}

@article{rau96_iterat_modul_sched,
	abstract = {Modulo scheduling is a framework within which algorithms for software pipelining innermost loops may be defined. The framework specifies a set of constraints that must be met in order to achieve a legal modulo schedule. A wide variety of algorithms and heuristics can be defined within this framework. Little work has been done to evaluate and compare alternative algorithms and heuristics for modulo scheduling from the viewpoints of schedule quality as well as computational complexity. This, along with a vague and unfounded perception that modulo scheduling is computationally expensive as well as difficult to implement, have inhibited its incorporation into product compilers. This paper presents iterative modulo scheduling, a practical algorithm that is capable of dealing with realistic machine models. The paper also characterizes the algorithm in terms of the quality of the generated schedules as well the computational expense incurred.},
	author = {Rau, B. Ramakrishna},
	url = {https://doi.org/10.1007/BF03356742},
	date = {1996-02-01},
	issn = {1573-7640},
	journaltitle = {International Journal of Parallel Programming},
	keywords = {loop scheduling,software pipelining,code motion,compiler optimisation,rotating registers,modulo scheduling},
	number = {1},
	pages = {3--64},
	title = {Iterative Modulo Scheduling},
	volume = {24}
}

@inproceedings{schuiki20_llhd,
	author = {Schuiki, Fabian and Kurth, Andreas and Grosser, Tobias and Benini, Luca},
	location = {London, UK},
	publisher = {ACM},
	url = {https://doi.org/10.1145/3385412.3386024},
	booktitle = {Proceedings of the 41st ACM SIGPLAN Conference on Programming Language Design and Implementation},
	doi = {10.1145/3385412.3386024},
	isbn = {9781450376136},
	pages = {258--271},
	series = {PLDI 2020},
	title = {LLHD: A Multi-Level Intermediate Representation for Hardware Description Languages},
	year = {2020}
}

@article{sevcik13_compc,
	abstract = {In this article, we consider the semantic design and verified compilation of a C-like programming language for concurrent shared-memory computation on x86 multiprocessors. The design of such a language is made surprisingly subtle by several factors: the relaxed-memory behavior of the hardware, the effects of compiler optimization on concurrent code, the need to support high-performance concurrent algorithms, and the desire for a reasonably simple programming model. In turn, this complexity makes verified compilation both essential and challenging.We describe ClightTSO, a concurrent extension of CompCert’s Clight in which the TSO-based memory model of x86 multiprocessors is exposed for high-performance code, and CompCertTSO, a formally verified compiler from ClightTSO to x86 assembly language, building on CompCert. CompCertTSO is verified in Coq: for any well-behaved and successfully compiled ClightTSO source program, any permitted observable behavior of the generated assembly code (if it does not run out of memory) is also possible in the source semantics. We also describe some verified fence-elimination optimizations, integrated into CompCertTSO.},
	author = {Ševčı́k, Jaroslav and Vafeiadis, Viktor and Zappa Nardelli, Francesco and Jagannathan, Suresh and Sewell, Peter},
	location = {New York, NY, USA},
	publisher = {ACM},
	url = {https://doi.org/10.1145/2487241.2487248},
	doi = {10.1145/2487241.2487248},
	issn = {0004-5411},
	journaltitle = {J. ACM},
	keywords = {semantics,Relaxed memory models,verified compilation},
	month = jun,
	number = {3},
	title = {CompCertTSO: A Verified Compiler for Relaxed-Memory Concurrency},
	volume = {60},
	year = {2013}
}

@article{six20_certif_effic_instr_sched,
	abstract = {CompCert is a moderately optimizing C compiler with a formal, machine-checked, proof of correctness: after successful compilation, the assembly code has a behavior faithful to the source code. Previously, it only supported target instruction sets with sequential semantics, and did not attempt reordering instructions for optimization. We present here a CompCert backend for a VLIW core (i.e. with explicit parallelism at the instruction level), the first CompCert backend providing scalable and efficient instruction scheduling. Furthermore, its highly modular implementation can be easily adapted to other VLIW or non-VLIW pipelined processors.},
	author = {Six, Cyril and Boulmé, Sylvain and Monniaux, David},
	location = {New York, NY, USA},
	publisher = {Association for Computing Machinery},
	url = {https://doi.org/10.1145/3428197},
	doi = {10.1145/3428197},
	journaltitle = {Proc. ACM Program. Lang.},
	keywords = {coq,translation validation,scheduling,static scheduling,verification,VLIW,operational semantics},
	month = nov,
	number = {OOPSLA},
	title = {Certified and Efficient Instruction Scheduling: Application to Interlocked VLIW Processors},
	volume = {4},
	year = {2020}
}

@unpublished{six21_verif_super_sched_relat_optim,
	author = {Six, Cyril and Gourdin, Léo and Boulmé, Sylvain and Monniaux, David},
	url = {https://hal.archives-ouvertes.fr/hal-03200774},
	file = {https://hal.archives-ouvertes.fr/hal-03200774/file/hal_prepass_scheduling.pdf},
	keywords = {coq,translation validation,scheduling,static scheduling,verification,VLIW,operational semantics},
	month = apr,
	note = {working paper or preprint},
	title = {{Verified Superblock Scheduling with Related Optimizations}},
	year = {2021}
}

@inproceedings{six22_formal_verif_super_sched,
	abstract = {On in-order processors, without dynamic instruction scheduling, program running times may be significantly reduced by compile-time instruction scheduling. We present here the first effective certified instruction scheduler that operates over superblocks (it may move instructions across branches), along with its performance evaluation. It is integrated within the CompCert C compiler, providing a complete machine-checked proof of semantic preservation from C to assembly. Our optimizer composes several passes designed by translation validation: program transformations are proposed by untrusted oracles, which are then validated by certified and scalable checkers. Our main checker is an architecture-independent simulation-test over superblocks modulo register liveness, which relies on hash-consed symbolic execution.},
	author = {Six, Cyril and Gourdin, Léo and Boulmé, Sylvain and Monniaux, David and Fasse, Justus and Nardino, Nicolas},
	location = {Philadelphia, PA, USA},
	publisher = {Association for Computing Machinery},
	url = {https://doi.org/10.1145/3497775.3503679},
	booktitle = {Proceedings of the 11th ACM SIGPLAN International Conference on Certified Programs and Proofs},
	doi = {10.1145/3497775.3503679},
	isbn = {9781450391825},
	keywords = {Symbolic execution,Instruction-level parallelism,Translation validation,the COQ proof assistant},
	pages = {40--54},
	series = {CPP 2022},
	title = {Formally Verified Superblock Scheduling},
	year = {2022}
}

@inproceedings{slind08_brief_overv_hol4,
	author = {Slind, Konrad and Norrish, Michael},
	editor = {Mohamed, Otmane Ait and Muñoz, César and Tahar, Sofiène},
	location = {Berlin, Heidelberg},
	publisher = {Springer Berlin Heidelberg},
	booktitle = {Theorem Proving in Higher Order Logics},
	isbn = {978-3-540-71067-7},
	keywords = {theorem proving;HOL},
	pages = {28--32},
	title = {A Brief Overview of {HOL4}},
	year = {2008}
}

@inproceedings{spatial,
	author = {Koeplinger, David and Feldman, Matthew and Prabhakar, Raghu and Zhang, Yaqi and Hadjis, Stefan and Fiszel, Ruben and Zhao, Tian and Nardi, Luigi and Pedram, Ardavan and Kozyrakis, Christos and Olukotun, Kunle},
	publisher = {ACM},
	booktitle = {{PLDI}},
	doi = {https://doi.org/10.1145/3192366.3192379},
	pages = {296--311},
	title = {Spatial: A Language and Compiler for Application Accelerators},
	year = {2018}
}

@article{takach16_high_level_synth,
	author = {{Takach}, A.},
	url = {https://doi.org/10.1109/MDAT.2016.2544850},
	doi = {10.1109/MDAT.2016.2544850},
	issn = {2168-2364},
	journaltitle = {IEEE Design Test},
	month = jun,
	number = {3},
	pages = {116--124},
	title = {High-Level Synthesis: Status, Trends, and Future Directions},
	volume = {33},
	year = {2016}
}

@inproceedings{tristan08_formal_verif_trans_valid,
	author = {Tristan, Jean-Baptiste and Leroy, Xavier},
	location = {San Francisco, California, USA},
	publisher = {ACM},
	booktitle = {Proceedings of the 35th Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages},
	doi = {10.1145/1328438.1328444},
	isbn = {9781595936899},
	pages = {17--27},
	series = {POPL '08},
	title = {Formal Verification of Translation Validators: A Case Study on Instruction Scheduling Optimizations},
	year = {2008}
}

@inproceedings{tristan10_simpl_verif_valid_softw_pipel,
	author = {Tristan, Jean-Baptiste and Leroy, Xavier},
	location = {Madrid, Spain},
	publisher = {Association for Computing Machinery},
	url = {https://doi.org/10.1145/1706299.1706311},
	booktitle = {Proceedings of the 37th Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages},
	doi = {10.1145/1706299.1706311},
	isbn = {9781605584799},
	keywords = {symbolic execution,coq,verification,translation validation,loop scheduling,compiler optimisation,software pipelining},
	pages = {83--92},
	series = {POPL '10},
	title = {A Simple, Verified Validator for Software Pipelining},
	year = {2010}
}

@inproceedings{venkataramani07_operat,
	author = {Venkataramani, Girish and Goldstein, Seth C.},
	booktitle = {2007 IEEE/ACM International Conference on Computer-Aided Design},
	doi = {10.1109/ICCAD.2007.4397305},
	keywords = {operation chaining},
	pages = {442--449},
	title = {Operation chaining asynchronous pipelined circuits},
	year = {2007}
}

@article{wang20_compc,
	abstract = {We present CompCertELF, the first extension to CompCert that supports verified compilation from C programs all the way to a standard binary file format, i.e., the ELF object format. Previous work on Stack-Aware CompCert provides a verified compilation chain from C programs to assembly programs with a realistic machine memory model. We build CompCertELF by modifying and extending this compilation chain with a verified assembler which further transforms assembly programs into ELF object files. CompCert supports large-scale verification via verified separate compilation: C modules can be written and compiled separately, and then linked together to get a target program that refines the semantics of the program linked from the source modules. However, verified separate compilation in CompCert only works for compilation to assembly programs, not to object files. For the latter, the main difficulty is to bridge the two different views of linking: one for CompCert's programs that allows arbitrary shuffling of global definitions by linking and the other for object files that treats blocks of encoded definitions as indivisible units. We propose a lightweight approach that solves the above problem without any modification to CompCert's framework for verified separate compilation: by introducing a notion of syntactical equivalence between programs and proving the commutativity between syntactical equivalence and the two different kinds of linking, we are able to transit from the more abstract linking operation in CompCert to the more concrete one for ELF object files. By applying this approach to CompCertELF, we obtain the first compiler that supports verified separate compilation of C programs into ELF object files.},
	author = {Wang, Yuting and Xu, Xiangzhe and Wilke, Pierre and Shao, Zhong},
	location = {New York, NY, USA},
	publisher = {ACM},
	url = {https://doi.org/10.1145/3428265},
	doi = {10.1145/3428265},
	journaltitle = {Proc. ACM Program. Lang.},
	keywords = {Generation of Object Files,Assembler Verification,Verified Separate Compilation},
	month = nov,
	number = {OOPSLA},
	title = {CompCertELF: Verified Separate Compilation of C Programs into ELF Object Files},
	volume = {4},
	year = {2020}
}

@misc{wolf_yosys_open_synth_suite,
	author = {Wolf, Clifford},
	url = {https://bit.ly/2kAXg0q},
	title = {{Yosys Open SYnthesis Suite}},
	urldate = {2019-01-11},
	year = {2019}
}

@misc{xilinx20_vivad_high_synth,
	author = {Xilinx},
	url = {https://bit.ly/39ereMx},
	title = {Vivado High-level Synthesis},
	urldate = {2020-07-20},
	year = {2020}
}

@misc{xilinx_vivad_desig_suite,
	author = {Xilinx},
	url = {https://bit.ly/2wZAmld},
	title = {{Vivado Design Suite}},
	urldate = {2019-01-14},
	year = {2019}
}

@misc{xilinx_xst_synth_overv,
	author = {Xilinx},
	url = {https://bit.ly/2lGtkjL},
	title = {{XST} Synthesis Overview},
	urldate = {2019-01-11},
	year = {2019}
}

@inproceedings{yang11_findin_under_bugs_c_compil,
	author = {Yang, Xuejun and Chen, Yang and Eide, Eric and Regehr, John},
	location = {San Jose, California, USA},
	publisher = {ACM},
	url = {https://doi.org/10.1145/1993498.1993532},
	booktitle = {Proceedings of the 32nd ACM SIGPLAN Conference on Programming Language Design and Implementation},
	doi = {10.1145/1993498.1993532},
	isbn = {9781450306638},
	keywords = {random program generation,random testing,automated testing,compiler testing,compiler defect},
	pages = {283--294},
	series = {PLDI '11},
	title = {Finding and Understanding Bugs in {C} Compilers},
	year = {2011}
}

@inproceedings{zhao12_formal_llvm_inter_repres_verif_progr_trans,
	author = {Zhao, Jianzhou and Nagarakatte, Santosh and Martin, Milo M. K. and Zdancewic, Steve},
	editor = {Field, John and Hicks, Michael},
	publisher = {ACM},
	url = {https://doi.org/10.1145/2103656.2103709},
	booktitle = {Proceedings of the 39th {ACM} {SIGPLAN-SIGACT} Symposium on Principles of Programming Languages, {POPL} 2012, Philadelphia, Pennsylvania, USA, January 22-28, 2012},
	doi = {10.1145/2103656.2103709},
	pages = {427--440},
	title = {Formalizing the {LLVM} intermediate representation for verified program transformations},
	year = {2012}
}

@inproceedings{zhu13_mechan_approac_linkin_operat_seman,
	author = {Zhu, Huibiao and Liu, Peng and He, Jifeng and Qin, Shengchao},
	editor = {Wolff, Burkhart and Gaudel, Marie-Claude and Feliachi, Abderrahmane},
	location = {Berlin, Heidelberg},
	publisher = {Springer Berlin Heidelberg},
	booktitle = {Unifying Theories of Programming},
	isbn = {978-3-642-35705-3},
	pages = {164--185},
	title = {Mechanical Approach to Linking Operational Semantics and Algebraic Semantics for Verilog Using Maude},
	year = {2013}
}

@inproceedings{zhang13_sdc,
	abstract = {Modulo scheduling is a popular technique to enable pipelined execution of successive loop iterations for performance improvement. While a variety of modulo scheduling algorithms exist for software pipelining, they are not amenable to many complex design constraints and optimization goals that arise in the hardware synthesis context. In this paper we describe a modulo scheduling framework based on the formulation of system of difference constraints (SDC). Our framework can systematically model a rich set of performance constraints that are specific to the hardware design. The scheduler also exploits the unique mathematical properties of SDC to carry out efficient global optimization and fast incremental update on the constraint system to minimize the resource usage of the synthesized pipeline. Experiments demonstrate that our proposed technique provides efficient solutions for a set of real-life applications and compares favorably against a widely used lifetime-sensitive modulo scheduling algorithm.},
	author = {Zhang, Z. and Liu, B.},
	url = {https://doi.org/10.1109/ICCAD.2013.6691121},
	booktitle = {2013 IEEE/ACM International Conference on Computer-Aided Design (ICCAD)},
	doi = {10.1109/ICCAD.2013.6691121},
	issn = {1558-2434},
	keywords = {high level synthesis;pipeline processing;scheduling;SDC-based modulo scheduling;pipeline synthesis;hardware design;mathematical properties;global optimization;incremental update;Schedules;Pipeline processing;Registers;Optimal scheduling;Scheduling algorithms;Timing},
	month = nov,
	pages = {211--218},
	title = {SDC-based modulo scheduling for pipeline synthesis},
	year = {2013}
}

@inproceedings{ball93_branc_predic_free,
	keywords = {if-conversion},
	author = {Ball, Thomas and Larus, James R.},
	title = {Branch Prediction for Free},
	year = {1993},
	isbn = {0897915984},
	publisher = {Association for Computing Machinery},
	address = {New York, NY, USA},
	url = {https://doi.org/10.1145/155090.155119},
	doi = {10.1145/155090.155119},
	abstract = {Many compilers rely on branch prediction to improve program performance by identifying frequently executed regions and by aiding in scheduling instructions.Profile-based predictors require a time-consuming and inconvenient compile-profile-compile cycle in order to make predictions. We present a program-based branch predictor that performs well for a large and diverse set of programs written in C and Fortran. In addition to using natural loop analysis to predict branches that control the iteration of loops, we focus on heuristics for predicting non-loop branches, which dominate the dynamic branch count of many programs. The heuristics are simple and require little program analysis, yet they are effective in terms of coverage and miss rate. Although program-based prediction does not equal the accuracy of profile-based prediction, we believe it reaches a sufficiently high level to be useful. Additional type and semantic information available to a compiler would enhance our heuristics.},
	booktitle = {Proceedings of the ACM SIGPLAN 1993 Conference on Programming Language Design and Implementation},
	pages = {300–313},
	numpages = {14},
	location = {Albuquerque, New Mexico, USA},
	series = {PLDI '93}
}