From 168495d726e623e0b4bd6364f949ae577fa8b52e Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Tue, 9 Nov 2021 15:57:45 +0100 Subject: Revised checks for multi-character constants 'xyz' The previous code for elaborating character constants has a small bug: the value of a wide character constant consisting of several characters was normalized to type `int`, while, statically, it has type `wchar_t`. If `wchar_t` is `unsigned short`, for example, the constant `L'ab'` would elaborate to 6357090, which is not of type `unsigned short`. This commit fixes the bug by normalizing wide character constants to type `wchar_t`, regardless of how many characters they contain. The previous code was odd in another respect: leading `\0` characters in multi-character constants were ignored. Hence, `'\0bcde'` was accepted while `'abcde'` caused a warning. This commit implements a more predictable behavior: the number of characters in a character literal is limited a priori to sizeof(type of result) / sizeof(type of each character) So, for non-wide character constants we can typically have up to 4 characters (sizeof(int) / sizeof(char)), while for wide character constants we can only have one character. In effect, multiple-character wide character literals are not supported. This is allowed by the ISO C99 standard and seems consistent with GCC and Clang. Finally, a multi-character constant with too many characters was reported either as an error (if the computation overflowed the 64-bit accumulator) or as a warning (otherwise). Here, we make this an error in all cases. GCC and Clang only produce warnings, and truncate the value of the character constant, but an error feels safer. --- cparser/Elab.ml | 43 +++++++++++++++++++------------------------ 1 file changed, 19 insertions(+), 24 deletions(-) (limited to 'cparser') diff --git a/cparser/Elab.ml b/cparser/Elab.ml index 60d71b3a..eff6f3ba 100644 --- a/cparser/Elab.ml +++ b/cparser/Elab.ml @@ -396,34 +396,29 @@ let elab_float_constant f = (v, ty) let elab_char_constant loc wide chars = + let len = List.length chars in let nbits = if wide then 8 * !config.sizeof_wchar else 8 in - (* Treat multi-char constants as a number in base 2^nbits *) let max_digit = Int64.shift_left 1L nbits in - let max_val = Int64.shift_left 1L (64 - nbits) in - let v,_ = - List.fold_left - (fun (acc,err) d -> - if not err then begin - let overflow = acc < 0L || acc >= max_val - and out_of_range = d < 0L || d >= max_digit in - if overflow then - error loc "character constant too long for its type"; - if out_of_range then + (* Treat multi-character constants as a number in base 2^nbits. + It must fit in type int for a normal constant and in type wchar_t + for a wide constant. *) + let v = + if len > (if wide then 1 else !config.sizeof_int) then begin + error loc "%d-character constant too long for its type" len; + 0L + end else + List.fold_left + (fun acc d -> + if d < 0L || d >= max_digit then error loc "escape sequence is out of range (code 0x%LX)" d; - Int64.add (Int64.shift_left acc nbits) d,overflow || out_of_range - end else - Int64.add (Int64.shift_left acc nbits) d,true - ) - (0L,false) chars in - if not (integer_representable v IInt) then - warning loc Unnamed "character constant too long for its type"; - (* C99 6.4.4.4 item 10: single character -> represent at type char - or wchar_t *) + Int64.add (Int64.shift_left acc nbits) d) + 0L chars in + (* C99 6.4.4.4 items 10 and 11: + single-character constant -> represent at type char + multi-character constant -> represent at type int + wide character constant -> represent at type wchar_t *) Ceval.normalize_int v - (if List.length chars = 1 then - if wide then wchar_ikind() else IChar - else - IInt) + (if wide then wchar_ikind() else if len = 1 then IChar else IInt) let elab_string_literal loc wide chars = let nbits = if wide then 8 * !config.sizeof_wchar else 8 in -- cgit