aboutsummaryrefslogtreecommitdiffstats
path: root/cparser
diff options
context:
space:
mode:
authorXavier Leroy <xavier.leroy@college-de-france.fr>2021-11-09 15:57:45 +0100
committerXavier Leroy <xavier.leroy@college-de-france.fr>2021-11-16 09:30:36 +0100
commit168495d726e623e0b4bd6364f949ae577fa8b52e (patch)
tree0591835e5eb831cd5b6e19988374867b3950ac5b /cparser
parent6431b483760b6b039f97a1749a055a3c181084b4 (diff)
downloadcompcert-kvx-168495d726e623e0b4bd6364f949ae577fa8b52e.tar.gz
compcert-kvx-168495d726e623e0b4bd6364f949ae577fa8b52e.zip
Revised checks for multi-character constants 'xyz'
The previous code for elaborating character constants has a small bug: the value of a wide character constant consisting of several characters was normalized to type `int`, while, statically, it has type `wchar_t`. If `wchar_t` is `unsigned short`, for example, the constant `L'ab'` would elaborate to 6357090, which is not of type `unsigned short`. This commit fixes the bug by normalizing wide character constants to type `wchar_t`, regardless of how many characters they contain. The previous code was odd in another respect: leading `\0` characters in multi-character constants were ignored. Hence, `'\0bcde'` was accepted while `'abcde'` caused a warning. This commit implements a more predictable behavior: the number of characters in a character literal is limited a priori to sizeof(type of result) / sizeof(type of each character) So, for non-wide character constants we can typically have up to 4 characters (sizeof(int) / sizeof(char)), while for wide character constants we can only have one character. In effect, multiple-character wide character literals are not supported. This is allowed by the ISO C99 standard and seems consistent with GCC and Clang. Finally, a multi-character constant with too many characters was reported either as an error (if the computation overflowed the 64-bit accumulator) or as a warning (otherwise). Here, we make this an error in all cases. GCC and Clang only produce warnings, and truncate the value of the character constant, but an error feels safer.
Diffstat (limited to 'cparser')
-rw-r--r--cparser/Elab.ml43
1 files changed, 19 insertions, 24 deletions
diff --git a/cparser/Elab.ml b/cparser/Elab.ml
index 60d71b3a..eff6f3ba 100644
--- a/cparser/Elab.ml
+++ b/cparser/Elab.ml
@@ -396,34 +396,29 @@ let elab_float_constant f =
(v, ty)
let elab_char_constant loc wide chars =
+ let len = List.length chars in
let nbits = if wide then 8 * !config.sizeof_wchar else 8 in
- (* Treat multi-char constants as a number in base 2^nbits *)
let max_digit = Int64.shift_left 1L nbits in
- let max_val = Int64.shift_left 1L (64 - nbits) in
- let v,_ =
- List.fold_left
- (fun (acc,err) d ->
- if not err then begin
- let overflow = acc < 0L || acc >= max_val
- and out_of_range = d < 0L || d >= max_digit in
- if overflow then
- error loc "character constant too long for its type";
- if out_of_range then
+ (* Treat multi-character constants as a number in base 2^nbits.
+ It must fit in type int for a normal constant and in type wchar_t
+ for a wide constant. *)
+ let v =
+ if len > (if wide then 1 else !config.sizeof_int) then begin
+ error loc "%d-character constant too long for its type" len;
+ 0L
+ end else
+ List.fold_left
+ (fun acc d ->
+ if d < 0L || d >= max_digit then
error loc "escape sequence is out of range (code 0x%LX)" d;
- Int64.add (Int64.shift_left acc nbits) d,overflow || out_of_range
- end else
- Int64.add (Int64.shift_left acc nbits) d,true
- )
- (0L,false) chars in
- if not (integer_representable v IInt) then
- warning loc Unnamed "character constant too long for its type";
- (* C99 6.4.4.4 item 10: single character -> represent at type char
- or wchar_t *)
+ Int64.add (Int64.shift_left acc nbits) d)
+ 0L chars in
+ (* C99 6.4.4.4 items 10 and 11:
+ single-character constant -> represent at type char
+ multi-character constant -> represent at type int
+ wide character constant -> represent at type wchar_t *)
Ceval.normalize_int v
- (if List.length chars = 1 then
- if wide then wchar_ikind() else IChar
- else
- IInt)
+ (if wide then wchar_ikind() else if len = 1 then IChar else IInt)
let elab_string_literal loc wide chars =
let nbits = if wide then 8 * !config.sizeof_wchar else 8 in