aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXavier Leroy <xavierleroy@users.noreply.github.com>2022-09-19 16:37:17 +0200
committerGitHub <noreply@github.com>2022-09-19 16:37:17 +0200
commit994c6c34182606385140e5695e33c90507ce59ee (patch)
treee9291d64997dd3e2ac8660c0e6fbe1b9a597799e
parent103aa7074a9dd3b1bcb2864d52c89292a2ab7bff (diff)
downloadcompcert-994c6c34182606385140e5695e33c90507ce59ee.tar.gz
compcert-994c6c34182606385140e5695e33c90507ce59ee.zip
Support C11 Unicode string literals and character constants (#452)
* Support C11 Unicode string literals and character constants * Add tests for C11 string literals and character constants * Better error message for ill-formed universal character names E.g. \u followed by fewer than 4 hex digits, or \U followed by fewer than 8 hex digits. * Add new warning `invalid-utf8` for byte sequences that are not valid UTF8. The warning is activated but not fatal by default. * Warn on uses of C11 Unicode character constants and string literals This uses the `c11-extensions` warning, which is off by default. * Support preprocessing option -finput-charset= for GNU toolchains
-rw-r--r--cfrontend/C2C.ml197
-rw-r--r--cparser/C.mli2
-rw-r--r--cparser/Cabs.v17
-rw-r--r--cparser/Ceval.ml6
-rw-r--r--cparser/Cprint.ml2
-rw-r--r--cparser/Cutil.ml4
-rw-r--r--cparser/Diagnostics.ml3
-rw-r--r--cparser/Diagnostics.mli1
-rw-r--r--cparser/Elab.ml128
-rw-r--r--cparser/Elab.mli2
-rw-r--r--cparser/Lexer.mll180
-rw-r--r--cparser/Parser.vy20
-rw-r--r--cparser/pre_parser.mly2
-rw-r--r--driver/Frontend.ml5
-rw-r--r--test/regression/Makefile2
-rw-r--r--test/regression/Results/charlit27
-rw-r--r--test/regression/Results/stringlit15
-rw-r--r--test/regression/charlit.c50
-rw-r--r--test/regression/stringlit.c41
19 files changed, 490 insertions, 214 deletions
diff --git a/cfrontend/C2C.ml b/cfrontend/C2C.ml
index b0dc8e8a..89b9139c 100644
--- a/cfrontend/C2C.ml
+++ b/cfrontend/C2C.ml
@@ -287,105 +287,6 @@ let attributes = [
]
-(** ** Functions used to handle string literals *)
-
-let stringNum = ref 0 (* number of next global for string literals *)
-let stringTable : (string, AST.ident) Hashtbl.t = Hashtbl.create 47
-let wstringTable : (int64 list, AST.ident) Hashtbl.t = Hashtbl.create 47
-
-let is_C_string s = not (String.contains s '\000')
-
-let name_for_string_literal s =
- try
- Hashtbl.find stringTable s
- with Not_found ->
- incr stringNum;
- let name = Printf.sprintf "__stringlit_%d" !stringNum in
- let id = intern_string name in
- let mergeable = if is_C_string s then 1 else 0 in
- Hashtbl.add decl_atom id
- { a_storage = C.Storage_static;
- a_alignment = Some 1;
- a_size = Some (Int64.of_int (String.length s + 1));
- a_sections = [Sections.for_stringlit mergeable];
- a_access = Sections.Access_default;
- a_inline = No_specifier;
- a_loc = Cutil.no_loc };
- Hashtbl.add stringTable s id;
- id
-
-let typeStringLiteral s =
- let sg = if Machine.((!config).char_signed) then Signed else Unsigned in
- Tarray(Tint(I8, sg, noattr), Z.of_uint (String.length s + 1), noattr)
-
-let global_for_string s id =
- let init = ref [] in
- let add_char c =
- init := AST.Init_int8(Z.of_uint(Char.code c)) :: !init in
- add_char '\000';
- for i = String.length s - 1 downto 0 do add_char s.[i] done;
- AST.(id, Gvar { gvar_info = typeStringLiteral s; gvar_init = !init;
- gvar_readonly = true; gvar_volatile = false})
-
-let is_C_wide_string s = not (List.mem 0L s)
-
-let name_for_wide_string_literal s =
- try
- Hashtbl.find wstringTable s
- with Not_found ->
- incr stringNum;
- let name = Printf.sprintf "__stringlit_%d" !stringNum in
- let id = intern_string name in
- let wchar_size = Machine.((!config).sizeof_wchar) in
- let mergeable = if is_C_wide_string s then wchar_size else 0 in
- Hashtbl.add decl_atom id
- { a_storage = C.Storage_static;
- a_alignment = Some wchar_size;
- a_size = Some (Int64.(mul (of_int (List.length s + 1))
- (of_int wchar_size)));
- a_sections = [Sections.for_stringlit mergeable];
- a_access = Sections.Access_default;
- a_inline = No_specifier;
- a_loc = Cutil.no_loc };
- Hashtbl.add wstringTable s id;
- id
-
-let typeWideStringLiteral s =
- let sz =
- match Machine.((!config).sizeof_wchar) with
- | 2 -> I16
- | 4 -> I32
- | _ -> assert false in
- let sg =
- if Machine.((!config).wchar_signed) then Signed else Unsigned in
- Tarray(Tint(sz, sg, noattr), Z.of_uint (List.length s + 1), noattr)
-
-let global_for_wide_string s id =
- let init = ref [] in
- let init_of_char =
- match Machine.((!config).sizeof_wchar) with
- | 2 -> (fun z -> AST.Init_int16 z)
- | 4 -> (fun z -> AST.Init_int32 z)
- | _ -> assert false in
- let add_char c =
- init := init_of_char(Z.of_uint64 c) :: !init in
- List.iter add_char s;
- add_char 0L;
- AST.(id, Gvar { gvar_info = typeWideStringLiteral s;
- gvar_init = List.rev !init;
- gvar_readonly = true; gvar_volatile = false})
-
-let globals_for_strings globs =
- let globs1 =
- Hashtbl.fold
- (fun s id l -> global_for_wide_string s id :: l)
- wstringTable globs in
- let globs2 =
- Hashtbl.fold
- (fun s id l -> global_for_string s id :: l)
- stringTable globs1 in
- globs2
-
(** ** Handling of inlined memcpy functions *)
let constant_size_t a =
@@ -667,6 +568,98 @@ let is_int64 env ty =
| C.TEnum(_, _) -> false
| _ -> assert false
+(** String literals *)
+
+let stringNum = ref 0 (* number of next global for string literals *)
+let stringTable : (string, AST.ident) Hashtbl.t = Hashtbl.create 47
+let wstringTable : (int64 list * ikind, AST.ident) Hashtbl.t = Hashtbl.create 47
+
+let is_C_string s = not (String.contains s '\000')
+
+let name_for_string_literal s =
+ try
+ Hashtbl.find stringTable s
+ with Not_found ->
+ incr stringNum;
+ let name = Printf.sprintf "__stringlit_%d" !stringNum in
+ let id = intern_string name in
+ let mergeable = if is_C_string s then 1 else 0 in
+ Hashtbl.add decl_atom id
+ { a_storage = C.Storage_static;
+ a_alignment = Some 1;
+ a_size = Some (Int64.of_int (String.length s + 1));
+ a_sections = [Sections.for_stringlit mergeable];
+ a_access = Sections.Access_default;
+ a_inline = No_specifier;
+ a_loc = Cutil.no_loc };
+ Hashtbl.add stringTable s id;
+ id
+
+let typeStringLiteral s =
+ let sg = if Machine.((!config).char_signed) then Signed else Unsigned in
+ Tarray(Tint(I8, sg, noattr), Z.of_uint (String.length s + 1), noattr)
+
+let global_for_string s id =
+ let init = ref [] in
+ let add_char c =
+ init := AST.Init_int8(Z.of_uint(Char.code c)) :: !init in
+ add_char '\000';
+ for i = String.length s - 1 downto 0 do add_char s.[i] done;
+ AST.(id, Gvar { gvar_info = typeStringLiteral s; gvar_init = !init;
+ gvar_readonly = true; gvar_volatile = false})
+
+let is_C_wide_string s = not (List.mem 0L s)
+
+let name_for_wide_string_literal s ik =
+ try
+ Hashtbl.find wstringTable (s, ik)
+ with Not_found ->
+ incr stringNum;
+ let name = Printf.sprintf "__stringlit_%d" !stringNum in
+ let id = intern_string name in
+ let wchar_size = Cutil.sizeof_ikind ik in
+ let mergeable = if is_C_wide_string s then wchar_size else 0 in
+ Hashtbl.add decl_atom id
+ { a_storage = C.Storage_static;
+ a_alignment = Some wchar_size;
+ a_size = Some (Int64.(mul (of_int (List.length s + 1))
+ (of_int wchar_size)));
+ a_sections = [Sections.for_stringlit mergeable];
+ a_access = Sections.Access_default;
+ a_inline = No_specifier;
+ a_loc = Cutil.no_loc };
+ Hashtbl.add wstringTable (s, ik) id;
+ id
+
+let typeWideStringLiteral s ik =
+ Tarray(convertIkind ik noattr, Z.of_uint (List.length s + 1), noattr)
+
+let global_for_wide_string (s, ik) id =
+ let init = ref [] in
+ let init_of_char =
+ match Cutil.sizeof_ikind ik with
+ | 2 -> (fun z -> AST.Init_int16 z)
+ | 4 -> (fun z -> AST.Init_int32 z)
+ | _ -> assert false in
+ let add_char c =
+ init := init_of_char(Z.of_uint64 c) :: !init in
+ List.iter add_char s;
+ add_char 0L;
+ AST.(id, Gvar { gvar_info = typeWideStringLiteral s ik;
+ gvar_init = List.rev !init;
+ gvar_readonly = true; gvar_volatile = false})
+
+let globals_for_strings globs =
+ let globs1 =
+ Hashtbl.fold
+ (fun s id l -> global_for_wide_string s id :: l)
+ wstringTable globs in
+ let globs2 =
+ Hashtbl.fold
+ (fun s id l -> global_for_string s id :: l)
+ stringTable globs1 in
+ globs2
+
(** Floating point constants *)
let z_of_str hex str fst =
@@ -1000,9 +993,9 @@ and convertLvalue env e =
| C.EConst(C.CStr s) ->
let ty = typeStringLiteral s in
Evar(name_for_string_literal s, ty)
- | C.EConst(C.CWStr s) ->
- let ty = typeWideStringLiteral s in
- Evar(name_for_wide_string_literal s, ty)
+ | C.EConst(C.CWStr(s, ik)) ->
+ let ty = typeWideStringLiteral s ik in
+ Evar(name_for_wide_string_literal s ik, ty)
| _ ->
error "illegal lvalue"; ezero
diff --git a/cparser/C.mli b/cparser/C.mli
index 763a9277..1388fab9 100644
--- a/cparser/C.mli
+++ b/cparser/C.mli
@@ -64,7 +64,7 @@ type constant =
| CInt of int64 * ikind * string (* as it appeared in the source *)
| CFloat of float_cst * fkind
| CStr of string
- | CWStr of int64 list
+ | CWStr of int64 list * ikind
| CEnum of ident * int64 (* enum tag, integer value *)
(** Attributes *)
diff --git a/cparser/Cabs.v b/cparser/Cabs.v
index bf8c8c74..59cd30ab 100644
--- a/cparser/Cabs.v
+++ b/cparser/Cabs.v
@@ -31,6 +31,13 @@ Record floatInfo := {
suffix_FI:option string
}.
+Inductive encoding :=
+ | EncNone (* no prefix *)
+ | EncWide (* 'L' prefix *)
+ | EncU16 (* 'u' prefix *)
+ | EncU32 (* 'U' prefix *)
+ | EncUTF8. (* 'u8' prefix (strings only) *)
+
Inductive structOrUnion :=
| STRUCT | UNION.
@@ -152,8 +159,8 @@ with constant :=
the source code. *)
| CONST_INT : string -> constant
| CONST_FLOAT : floatInfo -> constant
- | CONST_CHAR : bool -> list char_code -> constant
- | CONST_STRING : bool -> list char_code -> constant
+ | CONST_CHAR : encoding -> list char_code -> constant
+ | CONST_STRING : encoding -> list char_code -> constant
with init_expression :=
| NO_INIT
@@ -194,9 +201,9 @@ Definition generic_assoc := (option type_name * expression)%type.
(* GCC extended asm *)
Inductive asm_operand :=
-| ASMOPERAND: option string -> bool -> list char_code -> expression -> asm_operand.
+| ASMOPERAND: option string -> encoding -> list char_code -> expression -> asm_operand.
-Definition asm_flag := (bool * list char_code)%type.
+Definition asm_flag := (encoding * list char_code)%type.
(*
** Declaration definition (at toplevel)
@@ -227,7 +234,7 @@ with statement :=
| DEFAULT : statement -> loc -> statement
| LABEL : string -> statement -> loc -> statement
| GOTO : string -> loc -> statement
- | ASM : list cvspec -> bool -> list char_code -> list asm_operand -> list asm_operand -> list asm_flag -> loc -> statement
+ | ASM : list cvspec -> encoding -> list char_code -> list asm_operand -> list asm_operand -> list asm_flag -> loc -> statement
| DEFINITION : definition -> statement (*definition or declaration of a variable or type*)
with for_clause :=
diff --git a/cparser/Ceval.ml b/cparser/Ceval.ml
index 14f61e06..0800e25b 100644
--- a/cparser/Ceval.ml
+++ b/cparser/Ceval.ml
@@ -72,7 +72,7 @@ let normalize_int n ik =
type value =
| I of int64
| S of string
- | WS of int64 list
+ | WS of int64 list * ikind
let boolean_value v =
match v with
@@ -83,7 +83,7 @@ let constant = function
| CInt(v, ik, _) -> I (normalize_int v ik)
| CFloat(v, fk) -> raise Notconst
| CStr s -> S s
- | CWStr s -> WS s
+ | CWStr(s, ik) -> WS(s, ik)
| CEnum(id, v) -> I v
let is_signed env ty =
@@ -274,7 +274,7 @@ let constant_expr env ty e =
| TInt(ik, _), I n -> Some(CInt(n, ik, ""))
| TPtr(_, _), I n -> Some(CInt(n, ptr_t_ikind (), ""))
| (TArray(_, _, _) | TPtr(_, _)), S s -> Some(CStr s)
- | (TArray(_, _, _) | TPtr(_, _)), WS s -> Some(CWStr s)
+ | (TArray(_, _, _) | TPtr(_, _)), WS(s, ik) -> Some(CWStr(s, ik))
| TEnum(_, _), I n -> Some(CInt(n, enum_ikind, ""))
| _ -> None
with Notconst -> None
diff --git a/cparser/Cprint.ml b/cparser/Cprint.ml
index caa4fa66..05448784 100644
--- a/cparser/Cprint.ml
+++ b/cparser/Cprint.ml
@@ -75,7 +75,7 @@ let const pp = function
else fprintf pp "\\%03o" (Char.code c)
done;
fprintf pp "\""
- | CWStr l ->
+ | CWStr(l, _) ->
fprintf pp "L\"";
List.iter
(fun c ->
diff --git a/cparser/Cutil.ml b/cparser/Cutil.ml
index 6fd12323..f226d51b 100644
--- a/cparser/Cutil.ml
+++ b/cparser/Cutil.ml
@@ -1024,9 +1024,9 @@ let type_of_constant = function
| CStr s ->
let size = Int64.of_int (String.length s + 1) in
TArray(TInt(IChar,[]), Some size, [])
- | CWStr s ->
+ | CWStr(s, ik) ->
let size = Int64.of_int (List.length s + 1) in
- TArray(TInt(wchar_ikind(), []), Some size, [])
+ TArray(TInt(ik, []), Some size, [])
| CEnum(_, _) -> TInt(IInt, [])
(* Check that a C expression is a lvalue *)
diff --git a/cparser/Diagnostics.ml b/cparser/Diagnostics.ml
index 483b0376..8a8a0c17 100644
--- a/cparser/Diagnostics.ml
+++ b/cparser/Diagnostics.ml
@@ -104,6 +104,7 @@ type warning_type =
| Tentative_incomplete_static
| Reduced_alignment
| Non_linear_cond_expr
+ | Invalid_UTF8
(* List of all warnings with default status.
"true" means the warning is active by default.
@@ -140,6 +141,7 @@ let all_warnings =
(Tentative_incomplete_static, false);
(Reduced_alignment, false);
(Non_linear_cond_expr, false);
+ (Invalid_UTF8, true);
]
(* List of active warnings *)
@@ -182,6 +184,7 @@ let string_of_warning = function
| Tentative_incomplete_static -> "tentative-incomplete-static"
| Reduced_alignment -> "reduced-alignment"
| Non_linear_cond_expr -> "non-linear-cond-expr"
+ | Invalid_UTF8 -> "invalid-utf8"
(* Activate the given warning *)
let activate_warning w () =
diff --git a/cparser/Diagnostics.mli b/cparser/Diagnostics.mli
index 1210353f..47727707 100644
--- a/cparser/Diagnostics.mli
+++ b/cparser/Diagnostics.mli
@@ -57,6 +57,7 @@ type warning_type =
| Tentative_incomplete_static (** static tentative definition with incomplete type *)
| Reduced_alignment (** alignment reduction *)
| Non_linear_cond_expr (** condition that cannot be linearized *)
+ | Invalid_UTF8 (** invalid UTF-8 encoding *)
val warning : (string * int) -> warning_type -> ('a, Format.formatter, unit, unit, unit, unit) format6 -> 'a
(** [warning (f,c) w fmt arg1 ... argN] formats the arguments [arg1] to [argN] as warining according to
diff --git a/cparser/Elab.ml b/cparser/Elab.ml
index 01e745c3..ec79634a 100644
--- a/cparser/Elab.ml
+++ b/cparser/Elab.ml
@@ -395,48 +395,75 @@ let elab_float_constant f =
in
(v, ty)
-let elab_char_constant loc wide chars =
+let check_char_range loc ikind chars =
+ let max = Int64.shift_left 1L (sizeof_ikind ikind * 8) in
+ List.iter
+ (fun c ->
+ if c >= max then error loc "escape sequence 0x%LX is out of range" c)
+ chars
+
+let ikind_of_encoding = function
+ | EncNone -> IChar
+ | EncWide -> wchar_ikind()
+ | EncU16 -> IUShort
+ | EncU32 -> IUInt
+ | EncUTF8 -> IChar
+
+let elab_char_constant loc enc chars =
let len = List.length chars in
- let nbits = if wide then 8 * !config.sizeof_wchar else 8 in
- let max_digit = Int64.shift_left 1L nbits in
- (* Treat multi-character constants as a number in base 2^nbits.
- It must fit in type int for a normal constant and in type wchar_t
- for a wide constant. *)
+ (* We support multi-character constants for EncNone character literals only.
+ We treat them as big-endian numbers in base 256. *)
let v =
- if len > (if wide then 1 else !config.sizeof_int) then begin
- error loc "%d-character constant too long for its type" len;
- 0L
- end else
- List.fold_left
- (fun acc d ->
- if d < 0L || d >= max_digit then
- error loc "escape sequence is out of range (code 0x%LX)" d;
- Int64.add (Int64.shift_left acc nbits) d)
- 0L chars in
- (* C99 6.4.4.4 items 10 and 11:
- single-character constant -> represent at type char
+ match chars, enc with
+ | [], _ -> error loc "empty character constant"; 0L
+ | [c], _ -> c
+ | _, EncNone ->
+ if len > !config.sizeof_int then begin
+ error loc "%d-character constant too long, overflows its type" len;
+ 0L
+ end else begin
+ check_char_range loc IUChar chars;
+ List.fold_left
+ (fun acc d -> Int64.(add (shift_left acc 8) d))
+ 0L chars
+ end
+ | _, _ ->
+ error loc "%d-character constant not supported" len; 0L in
+ (* C11 6.4.4.4 items 10 and 11:
+ normal single-character constant -> represent at type char
multi-character constant -> represent at type int
- wide character constant -> represent at type wchar_t *)
- Ceval.normalize_int v
- (if wide then wchar_ikind() else if len = 1 then IChar else IInt)
-
-let elab_string_literal loc wide chars =
- let nbits = if wide then 8 * !config.sizeof_wchar else 8 in
- let char_max = Int64.shift_left 1L nbits in
- List.iter
- (fun c ->
- if c < 0L || c >= char_max
- then error loc "escape sequence is out of range (code 0x%LX)" c)
- chars;
- if wide then
- CWStr chars
- else begin
- let res = Bytes.create (List.length chars) in
- List.iteri
- (fun i c -> Bytes.set res i (Char.unsafe_chr (Int64.to_int c)))
- chars;
- CStr (Bytes.to_string res)
- end
+ L character constant -> represent at type wchar_t
+ u character constant -> represent at type char16_t
+ U character constant -> represent at type char32_t *)
+ let ik =
+ if enc = EncNone && len > 1 then IInt else ikind_of_encoding enc in
+ let v' = Ceval.normalize_int v ik in
+ if v' <> v then
+ warning loc Constant_conversion
+ "overflow in character constant, changes value from %Ld to %Ld" v v';
+ v'
+
+let elab_string_literal loc enc chars =
+ let ik = ikind_of_encoding enc in
+ check_char_range loc ik chars;
+ match enc with
+ | EncNone | EncUTF8 ->
+ let res = Bytes.create (List.length chars) in
+ List.iteri
+ (fun i c -> Bytes.set res i (Char.unsafe_chr (Int64.to_int c)))
+ chars;
+ CStr (Bytes.to_string res)
+ | EncWide | EncU16 | EncU32 ->
+ CWStr(chars, ik)
+
+let warn_C11_literals loc enc kind =
+ let warn enc =
+ warning loc Celeven_extension "'%s' %s are a C11 extension" enc kind in
+ match enc with
+ | EncNone | EncWide -> ()
+ | EncUTF8 -> warn "u8"
+ | EncU16 -> warn "u"
+ | EncU32 -> warn "U"
let elab_constant loc = function
| CONST_INT s ->
@@ -445,14 +472,22 @@ let elab_constant loc = function
| CONST_FLOAT f ->
let (v, fk) = elab_float_constant f in
CFloat(v, fk)
- | CONST_CHAR(wide, s) ->
- let ikind = if wide then wchar_ikind () else IInt in
- CInt(elab_char_constant loc wide s, ikind, "")
+ | CONST_CHAR(enc, s) ->
+ warn_C11_literals loc enc "character constants";
+ let ikind =
+ match enc with
+ | EncNone -> IInt
+ | EncWide -> wchar_ikind ()
+ | EncU16 -> IUShort
+ | EncU32 -> IUInt
+ | EncUTF8 -> assert false in
+ CInt(elab_char_constant loc enc s, ikind, "")
| CONST_STRING(wide, s) ->
+ warn_C11_literals loc wide "string literals";
elab_string_literal loc wide s
-let elab_simple_string loc wide chars =
- match elab_string_literal loc wide chars with
+let elab_simple_string loc enc chars =
+ match elab_string_literal loc enc chars with
| CStr s -> s
| _ -> error loc "cannot use wide string literal in 'asm'"; ""
@@ -1586,6 +1621,7 @@ and elab_item zi item il =
| COMPOUND_INIT [_, SINGLE_INIT(CONSTANT (CONST_STRING(w, s)))]),
TArray(ty_elt, sz, _)
when is_integer_type env ty_elt ->
+ warn_C11_literals loc w "string literals";
begin match elab_string_literal loc w s, unroll env ty_elt with
| CStr s, TInt((IChar | ISChar | IUChar), _) ->
if not (I.index_below (Int64.of_int(String.length s - 1)) sz) then
@@ -1594,12 +1630,12 @@ and elab_item zi item il =
| CStr _, _ ->
error loc "initialization of an array of non-char elements with a string literal";
elab_list zi il false
- | CWStr s, TInt(_, _) when compatible_types AttrIgnoreTop env ty_elt (TInt(wchar_ikind(), [])) ->
+ | CWStr(s, ik), TInt(_, _) when compatible_types AttrIgnoreTop env ty_elt (TInt(ik, [])) ->
if not (I.index_below (Int64.of_int(List.length s - 1)) sz) then
warning loc Unnamed "initializer string for array of wide chars %s is too long" (I.name zi);
elab_list (I.set zi (init_int_array_wstring sz s)) il false
| CWStr _, _ ->
- error loc "initialization of an array of non-wchar_t elements with a wide string literal";
+ error loc "type mismatch between array destination and wide string literal";
elab_list zi il false
| _ -> assert false
end
diff --git a/cparser/Elab.mli b/cparser/Elab.mli
index bca4f74d..537056fa 100644
--- a/cparser/Elab.mli
+++ b/cparser/Elab.mli
@@ -21,6 +21,6 @@ val elab_file : Cabs.definition list -> C.program
val elab_int_constant : Cabs.loc -> string -> int64 * C.ikind
val elab_float_constant : Cabs.floatInfo -> C.float_cst * C.fkind
-val elab_char_constant : Cabs.loc -> bool -> int64 list -> int64
+val elab_char_constant : Cabs.loc -> Cabs.encoding -> int64 list -> int64
(* These auxiliary functions are exported so that they can be reused
in other projects that deal with C-style source languages. *)
diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll
index 9f7fba1e..2efa4216 100644
--- a/cparser/Lexer.mll
+++ b/cparser/Lexer.mll
@@ -3,6 +3,7 @@
(* The Compcert verified compiler *)
(* *)
(* Jacques-Henri Jourdan, INRIA Paris-Rocquencourt *)
+(* Xavier Leroy, Collège de France and Inria *)
(* *)
(* Copyright Institut National de Recherche en Informatique et en *)
(* Automatique. All rights reserved. This file is distributed *)
@@ -144,9 +145,9 @@ let error lb fmt =
Diagnostics.error
(lb.lex_curr_p.pos_fname,lb.lex_curr_p.pos_lnum) fmt
-let warning lb fmt =
+let warning lb kind fmt =
Diagnostics.warning
- (lb.lex_curr_p.pos_fname,lb.lex_curr_p.pos_lnum) Diagnostics.Unnamed fmt
+ (lb.lex_curr_p.pos_fname,lb.lex_curr_p.pos_lnum) kind fmt
(* Simple character escapes *)
@@ -160,6 +161,89 @@ let convert_escape = function
| 't' -> 9L (* horizontal tab *)
| 'v' -> 11L (* vertical tab *)
| c -> Int64.of_int (Char.code c)
+
+(* Encodings for character and string literals *)
+
+let encoding_of = function
+ | "" -> Cabs.EncNone
+ | "L" -> Cabs.EncWide
+ | "u" -> Cabs.EncU16
+ | "U" -> Cabs.EncU32
+ | "u8" -> Cabs.EncUTF8
+ | _ -> assert false
+
+let combine_encodings loc e1 e2 =
+ if e1 = Cabs.EncNone then e2
+ else if e2 = Cabs.EncNone then e1
+ else if e1 = e2 then e1
+ else Diagnostics.fatal_error
+ Cabs.(loc.filename, loc.lineno)
+ "unsupported non-standard concatenation of string literals"
+
+(* Handling of characters and escapes in string and char constants *)
+
+type chr = Chr of int | Esc of int64
+
+let check_utf8 lexbuf min x =
+ if x > 0x10FFFF || (x >= 0xD800 && x <= 0xDFFF) then
+ warning lexbuf Diagnostics.Invalid_UTF8 "Wrong Unicode value U+%X" x;
+ if x < min then
+ warning lexbuf Diagnostics.Invalid_UTF8
+ "Overlong UTF-8 encoding for Unicode value U+%X" x;
+ Chr x
+
+let check_universal_character lexbuf x =
+ if x > 0x10FFFF
+ || x >= 0xD800 && x <= 0xDFFF
+ || x < 0xA0 && x <> 0x24 && x <> 0x40 && x <> 0x60
+ then begin
+ error lexbuf "Wrong universal character name U+%X" x; Chr 0
+ end else
+ Chr x
+
+let add_char_utf8 x accu =
+ if x <= 0x007F then
+ Int64.of_int x :: accu
+ else if x <= 0x07FF then
+ Int64.of_int (0x80 lor (x land 0x3F)) ::
+ Int64.of_int (0xC0 lor (x lsr 6)) ::
+ accu
+ else if x <= 0xFFFF then
+ Int64.of_int (0x80 lor (x land 0x3F)) ::
+ Int64.of_int (0x80 lor ((x lsr 6) land 0x3F)) ::
+ Int64.of_int (0xE0 lor (x lsr 12)) ::
+ accu
+ else
+ Int64.of_int (0x80 lor (x land 0x3F)) ::
+ Int64.of_int (0x80 lor ((x lsr 6) land 0x3F)) ::
+ Int64.of_int (0x80 lor ((x lsr 12) land 0x3F)) ::
+ Int64.of_int (0xF0 lor (x lsr 18)) ::
+ accu
+
+let add_char_utf16 x accu =
+ if x <= 0xFFFF then
+ Int64.of_int x :: accu
+ else begin
+ let x = x - 0x10000 in
+ Int64.of_int (0xDC00 lor (x land 0x3FF)) ::
+ Int64.of_int (0xD800 lor (x lsr 10)) ::
+ accu
+ end
+
+let add_char enc c accu =
+ match c, enc with
+ | Esc x, _ -> (* Escapes are never encoded *)
+ x :: accu
+ | Chr x, (Cabs.EncNone | Cabs.EncUTF8) -> (* Characters are encoded in UTF8 *)
+ add_char_utf8 x accu
+ | Chr x, Cabs.EncU16 -> (* Characters are encoded in UTF16 *)
+ add_char_utf16 x accu
+ | Chr x, Cabs.EncU32 -> (* Characters are not encoded *)
+ Int64.of_int x :: accu
+ | Chr x, Cabs.EncWide -> (* Depends on size of wchar_t *)
+ if Machine.(!config.sizeof_wchar) = 2
+ then add_char_utf16 x accu
+ else Int64.of_int x :: accu
}
(* Identifiers *)
@@ -249,11 +333,6 @@ let octal_escape_sequence =
| octal_digit octal_digit
| octal_digit octal_digit octal_digit) as n)
let hexadecimal_escape_sequence = "\\x" (hexadecimal_digit+ as n)
-let escape_sequence =
- simple_escape_sequence
- | octal_escape_sequence
- | hexadecimal_escape_sequence
- | universal_character_name
rule initial = parse
| '\n' { new_line lexbuf; initial_linebegin lexbuf }
@@ -283,16 +362,13 @@ rule initial = parse
currentLoc lexbuf)}
| preprocessing_number as s { error lexbuf "invalid numerical constant '%s'@ These characters form a preprocessor number, but not a constant" s;
CONSTANT (Cabs.CONST_INT "0", currentLoc lexbuf) }
- | "'" { let l = char_literal lexbuf.lex_start_p [] lexbuf in
- CONSTANT (Cabs.CONST_CHAR(false, l),
- currentLoc lexbuf) }
- | "L'" { let l = char_literal lexbuf.lex_start_p [] lexbuf in
- CONSTANT (Cabs.CONST_CHAR(true, l),
- currentLoc lexbuf) }
- | "\"" { let l = string_literal lexbuf.lex_start_p [] lexbuf in
- STRING_LITERAL(false, l, currentLoc lexbuf) }
- | "L\"" { let l = string_literal lexbuf.lex_start_p [] lexbuf in
- STRING_LITERAL(true, l, currentLoc lexbuf) }
+ | (""|"L"|"u"|"U") as e "'" { let enc = encoding_of e in
+ let l = char_literal lexbuf.lex_start_p [] lexbuf in
+ CONSTANT (Cabs.CONST_CHAR(enc, l), currentLoc lexbuf) }
+ | (""|"L"|"u"|"U"|"u8") as e "\""
+ { let enc = encoding_of e in
+ let l = string_literal lexbuf.lex_start_p enc [] lexbuf in
+ STRING_LITERAL(enc, l, currentLoc lexbuf) }
| "..." { ELLIPSIS(currentLoc lexbuf) }
| "+=" { ADD_ASSIGN(currentLoc lexbuf) }
| "-=" { SUB_ASSIGN(currentLoc lexbuf) }
@@ -357,39 +433,62 @@ and initial_linebegin = parse
and char = parse
| universal_character_name
{ try
- Int64.of_string ("0x" ^ n)
+ check_universal_character lexbuf (int_of_string ("0x" ^ n))
with Failure _ ->
error lexbuf "overflow in universal character name";
- 0L
+ Chr 0
}
| hexadecimal_escape_sequence
{ try
- Int64.of_string ("0x" ^ n)
+ Esc (Int64.of_string ("0x" ^ n))
with Failure _ ->
error lexbuf "overflow in hexadecimal escape sequence";
- 0L
+ Esc 0L
}
| octal_escape_sequence
- { Int64.of_string ("0o" ^ n) }
+ { Esc (Int64.of_string ("0o" ^ n)) }
| simple_escape_sequence
- { convert_escape c }
+ { Esc (convert_escape c) }
+ | "\\u" | "\\U"
+ { error lexbuf "incomplete universal character name";
+ Chr 0 }
| '\\' (_ as c)
{ error lexbuf "incorrect escape sequence '\\%c'" c;
- Int64.of_int (Char.code c) }
+ Esc (Int64.of_int (Char.code c)) }
+ | ['\x00'-'\x7F'] as c1
+ { Chr (Char.code c1) }
+ | (['\xC0'-'\xDF'] as c1) (['\x80'-'\xBF'] as c2)
+ { check_utf8 lexbuf 0x80
+ ( (Char.code c1 land 0b00011111) lsl 6
+ + (Char.code c2 land 0b00111111)) }
+ | (['\xE0'-'\xEF'] as c1) (['\x80'-'\xBF'] as c2) (['\x80'-'\xBF'] as c3)
+ { check_utf8 lexbuf 0x800
+ ( (Char.code c1 land 0b00001111) lsl 12
+ + (Char.code c2 land 0b00111111) lsl 6
+ + (Char.code c3 land 0b00111111) ) }
+ | (['\xF0'-'\xF7'] as c1) (['\x80'-'\xBF'] as c2) (['\x80'-'\xBF'] as c3) (['\x80'-'\xBF'] as c4)
+ { check_utf8 lexbuf 0x10000
+ ( (Char.code c1 land 0b00000111) lsl 18
+ + (Char.code c2 land 0b00111111) lsl 12
+ + (Char.code c3 land 0b00111111) lsl 6
+ + (Char.code c4 land 0b00111111) ) }
| _ as c
- { Int64.of_int (Char.code c) }
+ { warning lexbuf Diagnostics.Invalid_UTF8
+ "Invalid UTF8 encoding: byte 0x%02x" (Char.code c);
+ Esc (Int64.of_int (Char.code c)) (* re-encode as-is *)
+ }
and char_literal startp accu = parse
| '\'' { lexbuf.lex_start_p <- startp;
List.rev accu }
| '\n' | eof { fatal_error lexbuf "missing terminating \"'\" character" }
- | "" { let c = char lexbuf in char_literal startp (c :: accu) lexbuf }
+ | "" { let c = char lexbuf in char_literal startp (add_char Cabs.EncU32 c accu) lexbuf }
-and string_literal startp accu = parse
+and string_literal startp enc accu = parse
| '\"' { lexbuf.lex_start_p <- startp;
List.rev accu }
| '\n' | eof { fatal_error lexbuf "missing terminating '\"' character" }
- | "" { let c = char lexbuf in string_literal startp (c :: accu) lexbuf }
+ | "" { let c = char lexbuf in string_literal startp enc (add_char enc c accu) lexbuf }
(* We assume gcc -E syntax but try to tolerate variations. *)
and hash = parse
@@ -402,7 +501,8 @@ and hash = parse
try
int_of_string n
with Failure _ ->
- warning lexbuf "invalid line number"; lexbuf.lex_curr_p.pos_lnum
+ warning lexbuf Diagnostics.Unnamed "invalid line number";
+ lexbuf.lex_curr_p.pos_lnum
in
lexbuf.lex_curr_p <- {
lexbuf.lex_curr_p with
@@ -417,7 +517,7 @@ and hash = parse
([^ '\n']* as s) '\n'
{ new_line lexbuf; PRAGMA (s, currentLoc lexbuf) }
| [^ '\n']* '\n'
- { warning lexbuf "unrecognized '#' line";
+ { warning lexbuf Diagnostics.Unnamed "unrecognized '#' line";
new_line lexbuf; initial_linebegin lexbuf }
| [^ '\n']* eof
{ fatal_error lexbuf "unexpected end of file" }
@@ -583,21 +683,21 @@ and singleline_comment = parse
| Pre_parser.STAR loc -> loop (Parser.STAR loc)
| Pre_parser.STATIC loc -> loop (Parser.STATIC loc)
| Pre_parser.STATIC_ASSERT loc -> loop (Parser.STATIC_ASSERT loc)
- | Pre_parser.STRING_LITERAL (wide, str, loc) ->
+ | Pre_parser.STRING_LITERAL (enc, str, loc) ->
(* Merge consecutive string literals *)
- let rec doConcat wide str =
+ let rec doConcat enc str =
match Queue.peek tokens with
- | Pre_parser.STRING_LITERAL (wide', str', loc) ->
+ | Pre_parser.STRING_LITERAL (enc', str', loc') ->
ignore (Queue.pop tokens);
- let (wide'', str'') = doConcat wide' str' in
+ let (enc'', str'') = doConcat enc' str' in
if str'' <> []
- then (wide || wide'', str @ str'')
- else (wide, str)
- | _ -> (wide, str)
- | exception Queue.Empty -> (wide, str)
+ then (combine_encodings loc enc enc'', str @ str'')
+ else (enc, str)
+ | _ -> (enc, str)
+ | exception Queue.Empty -> (enc, str)
in
- let (wide', str') = doConcat wide str in
- loop (Parser.STRING_LITERAL ((wide', str'), loc))
+ let (enc', str') = doConcat enc str in
+ loop (Parser.STRING_LITERAL ((enc', str'), loc))
| Pre_parser.STRUCT loc -> loop (Parser.STRUCT loc)
| Pre_parser.SUB_ASSIGN loc -> loop (Parser.SUB_ASSIGN loc)
| Pre_parser.SWITCH loc -> loop (Parser.SWITCH loc)
diff --git a/cparser/Parser.vy b/cparser/Parser.vy
index 8d7cd055..12337367 100644
--- a/cparser/Parser.vy
+++ b/cparser/Parser.vy
@@ -23,7 +23,7 @@ Require Cabs.
%token<Cabs.string * Cabs.loc> VAR_NAME TYPEDEF_NAME OTHER_NAME
%token<Cabs.string * Cabs.loc> PRAGMA
-%token<bool * list Cabs.char_code * Cabs.loc> STRING_LITERAL
+%token<Cabs.encoding * list Cabs.char_code * Cabs.loc> STRING_LITERAL
%token<Cabs.constant * Cabs.loc> CONSTANT
%token<Cabs.loc> SIZEOF PTR INC DEC LEFT RIGHT LEQ GEQ EQEQ EQ NEQ LT GT
ANDAND BARBAR PLUS MINUS STAR TILDE BANG SLASH PERCENT HAT BAR QUESTION
@@ -124,8 +124,8 @@ primary_expression:
| cst = CONSTANT
{ (Cabs.CONSTANT (fst cst), snd cst) }
| str = STRING_LITERAL
- { let '((wide, chars), loc) := str in
- (Cabs.CONSTANT (Cabs.CONST_STRING wide chars), loc) }
+ { let '((enc, chars), loc) := str in
+ (Cabs.CONSTANT (Cabs.CONST_STRING enc chars), loc) }
| loc = LPAREN expr = expression RPAREN
{ (fst expr, loc)}
| sel = generic_selection
@@ -786,8 +786,8 @@ designator:
static_assert_declaration:
| loc = STATIC_ASSERT LPAREN expr = constant_expression
COMMA str = STRING_LITERAL RPAREN SEMICOLON
- { let '((wide, chars), locs) := str in
- (expr, (Cabs.CONST_STRING wide chars, locs), loc) }
+ { let '((enc, chars), locs) := str in
+ (expr, (Cabs.CONST_STRING enc chars, locs), loc) }
(* 6.8 *)
statement_dangerous:
@@ -922,9 +922,9 @@ jump_statement:
asm_statement:
| loc = ASM attr = asm_attributes LPAREN template = STRING_LITERAL args = asm_arguments
RPAREN SEMICOLON
- { let '(wide, chars, _) := template in
+ { let '(enc, chars, _) := template in
let '(outputs, inputs, flags) := args in
- Cabs.ASM attr wide chars outputs inputs flags loc }
+ Cabs.ASM attr enc chars outputs inputs flags loc }
asm_attributes:
| /* empty */
@@ -954,7 +954,7 @@ asm_operands_ne:
asm_operand:
| n = asm_op_name cstr = STRING_LITERAL LPAREN e = expression RPAREN
- { let '(wide, s, loc) := cstr in Cabs.ASMOPERAND n wide s (fst e) }
+ { let '(enc, s, loc) := cstr in Cabs.ASMOPERAND n enc s (fst e) }
asm_op_name:
| /* empty */ { None }
@@ -962,9 +962,9 @@ asm_op_name:
asm_flags:
| f = STRING_LITERAL
- { let '(wide, s, loc) := f in (wide, s) :: nil }
+ { let '(enc, s, loc) := f in (enc, s) :: nil }
| f = STRING_LITERAL COMMA fl = asm_flags
- { let '(wide, s, loc) := f in (wide, s) :: fl }
+ { let '(enc, s, loc) := f in (enc, s) :: fl }
(* 6.9 *)
translation_unit_file:
diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly
index ad294398..00ca0ade 100644
--- a/cparser/pre_parser.mly
+++ b/cparser/pre_parser.mly
@@ -47,7 +47,7 @@
%token<string * Pre_parser_aux.identifier_type ref * Cabs.loc>
VAR_NAME TYPEDEF_NAME
%token<Cabs.constant * Cabs.loc> CONSTANT
-%token<bool * int64 list * Cabs.loc> STRING_LITERAL
+%token<Cabs.encoding * int64 list * Cabs.loc> STRING_LITERAL
%token<string * Cabs.loc> PRAGMA
%token<Cabs.loc> SIZEOF PTR INC DEC LEFT RIGHT LEQ GEQ EQEQ EQ NEQ LT GT
diff --git a/driver/Frontend.ml b/driver/Frontend.ml
index 6133291e..4ee5c712 100644
--- a/driver/Frontend.ml
+++ b/driver/Frontend.ml
@@ -154,7 +154,8 @@ let gnu_prepro_actions = [
Exact "-iquote", String (gnu_prepro_opt_key "-iquote");
Exact "-P", Self gnu_prepro_opt;
Exact "-C", Self gnu_prepro_opt;
- Exact "-CC", Self gnu_prepro_opt;]
+ Exact "-CC", Self gnu_prepro_opt;
+ Prefix "-finput-charset=", Self gnu_prepro_opt]
let prepro_actions = [
(* Preprocessing options *)
@@ -198,6 +199,8 @@ let gnu_prepro_help =
-C Do not discard comments
-CC Do not discard comments, including during macro
expansion
+ -finput-charset=<charset>
+ Set the input character set, used for reading source files.
|}
let prepro_help = {|Preprocessing options:
diff --git a/test/regression/Makefile b/test/regression/Makefile
index daee05bc..53719900 100644
--- a/test/regression/Makefile
+++ b/test/regression/Makefile
@@ -16,7 +16,7 @@ TESTS=int32 int64 floats floats-basics floats-lit \
funct3 expr5 struct7 struct8 struct11 struct12 casts1 casts2 char1 \
sizeof1 sizeof2 binops bool for1 for2 switch switch2 compound \
decl1 bitfields9 ptrs3 \
- parsing krfun ifconv generic
+ parsing krfun ifconv generic stringlit charlit
# Can run, but only in compiled mode, and have reference output in Results
diff --git a/test/regression/Results/charlit b/test/regression/Results/charlit
new file mode 100644
index 00000000..3954a0c8
--- /dev/null
+++ b/test/regression/Results/charlit
@@ -0,0 +1,27 @@
+c1: 61
+c2: 61
+c3: 61
+c4: 61
+d1: fe
+d2: fe
+d3: fe
+d4: fe
+e1: 34
+e2: 1234
+e3: 1234
+e4: 1234
+f1: e9
+f2: e9
+f3: e9
+f4: e9
+g1: 2b
+g2: 732b
+g3: 732b
+g4: 732b
+h1: 4c
+h2: f34c
+h3: 1f34c
+h4: 1f34c
+m1: 6162
+m2: 1020304
+m3: e9e8
diff --git a/test/regression/Results/stringlit b/test/regression/Results/stringlit
new file mode 100644
index 00000000..d6967ccc
--- /dev/null
+++ b/test/regression/Results/stringlit
@@ -0,0 +1,15 @@
+s1: size 11, contents 61 c3 a9 e7 8c ab f0 9f 8d 8c
+s2: size 11, contents 61 c3 a9 e7 8c ab f0 9f 8d 8c
+s3: size 12, contents 61 e9 732b d83c df4c
+s4: size 20, contents 61 e9 732b 1f34c
+s5: size 20, contents 61 e9 732b 1f34c
+t1: size 11, contents 61 c3 a9 e7 8c ab f0 9f 8d 8c
+t2: size 11, contents 61 c3 a9 e7 8c ab f0 9f 8d 8c
+t3: size 12, contents 61 e9 732b d83c df4c
+t4: size 20, contents 61 e9 732b 1f34c
+t5: size 20, contents 61 e9 732b 1f34c
+e1: size 4, contents 61 e9 e8
+e2: size 4, contents 61 e9 e8
+e3: size 10, contents 61 e9 e8 732b
+e4: size 24, contents 61 e9 e8 732b 1f34c
+e5: size 24, contents 61 e9 e8 732b 1f34c
diff --git a/test/regression/charlit.c b/test/regression/charlit.c
new file mode 100644
index 00000000..5a7e0916
--- /dev/null
+++ b/test/regression/charlit.c
@@ -0,0 +1,50 @@
+#include <stdio.h>
+#include <wchar.h>
+#include <uchar.h>
+
+unsigned char c1 = 'a';
+char16_t c2 = u'a';;
+char32_t c3 = U'a';;
+wchar_t c4 = L'a';;
+
+unsigned char d1 = '\xFE';
+char16_t d2 = u'\xFE';;
+char32_t d3 = U'\xFE';;
+wchar_t d4 = L'\xFE';;
+
+unsigned char e1 = '\x1234'; // warning but no error
+char16_t e2 = u'\x1234';
+char32_t e3 = U'\x1234';
+wchar_t e4 = L'\x1234';
+
+unsigned char f1 = 'é'; // CompCert tolerance
+char16_t f2 = u'é';
+char32_t f3 = U'é';
+wchar_t f4 = L'é';
+
+unsigned char g1 = '猫'; // CompCert tolerance + warning
+char16_t g2 = u'猫';
+char32_t g3 = U'猫';
+wchar_t g4 = L'猫';
+
+unsigned char h1 = '🍌'; // CompCert tolerance + warning
+char16_t h2 = u'🍌'; // CompCert tolerance + warning
+char32_t h3 = U'🍌';
+wchar_t h4 = L'🍌';
+
+int m1 = 'ab';
+int m2 = '\x01\x02\x03\x04';
+int m3 = 'éè'; // CompCert tolerance
+
+#define PRINT(x) printf("%s: %x\n", #x, x)
+
+int main()
+{
+ PRINT(c1); PRINT(c2); PRINT(c3); PRINT(c4);
+ PRINT(d1); PRINT(d2); PRINT(d3); PRINT(d4);
+ PRINT(e1); PRINT(e2); PRINT(e3); PRINT(e4);
+ PRINT(f1); PRINT(f2); PRINT(f3); PRINT(f4);
+ PRINT(g1); PRINT(g2); PRINT(g3); PRINT(g4);
+ PRINT(h1); PRINT(h2); PRINT(h3); PRINT(h4);
+ PRINT(m1); PRINT(m2); PRINT(m3);
+}
diff --git a/test/regression/stringlit.c b/test/regression/stringlit.c
new file mode 100644
index 00000000..155f8ebb
--- /dev/null
+++ b/test/regression/stringlit.c
@@ -0,0 +1,41 @@
+#include <stdio.h>
+#include <wchar.h>
+#include <uchar.h>
+
+/* Without escapes nor universal character names */
+
+unsigned char s1[] = "aé猫🍌";
+unsigned char s2[] = u8"aé猫🍌";
+char16_t s3[] = u"aé猫🍌";
+char32_t s4[] = U"aé猫🍌";
+wchar_t s5[] = L"aé猫🍌";
+
+/* With universal character names */
+
+unsigned char t1[] = "a\u00e9\u732B\U0001F34C";
+unsigned char t2[] = u8"a\u00e9\u732B\U0001F34C";
+char16_t t3[] = u"a\u00e9\u732B\U0001F34C";
+char32_t t4[] = U"a\u00e9\u732B\U0001F34C";
+wchar_t t5[] = L"a\u00e9\u732B\U0001F34C";
+
+/* With numerical escapes */
+
+unsigned char e1[] = "a\xe9\350";
+unsigned char e2[] = u8"a\xe9\350";
+char16_t e3[] = u"a\xe9\350\x732B";
+char32_t e4[] = U"a\xe9\350\x732B\x0001F34C";
+wchar_t e5[] = L"a\xe9\350\x732B\x0001F34C";
+
+#define PRINT(x) \
+ printf("%s: size %u, contents", #x, (int) sizeof(x)); \
+ for (int i = 0; x[i] != 0; i++) printf(" %x", x[i]); \
+ printf("\n")
+
+int main()
+{
+ PRINT(s1); PRINT(s2); PRINT(s3); PRINT(s4); PRINT(s5);
+ PRINT(t1); PRINT(t2); PRINT(t3); PRINT(t4); PRINT(t5);
+ PRINT(e1); PRINT(e2); PRINT(e3); PRINT(e4); PRINT(e5);
+ return 0;
+}
+