aboutsummaryrefslogtreecommitdiffstats
path: root/cfrontend
diff options
context:
space:
mode:
authorXavier Leroy <xavierleroy@users.noreply.github.com>2022-09-19 16:37:17 +0200
committerGitHub <noreply@github.com>2022-09-19 16:37:17 +0200
commit994c6c34182606385140e5695e33c90507ce59ee (patch)
treee9291d64997dd3e2ac8660c0e6fbe1b9a597799e /cfrontend
parent103aa7074a9dd3b1bcb2864d52c89292a2ab7bff (diff)
downloadcompcert-994c6c34182606385140e5695e33c90507ce59ee.tar.gz
compcert-994c6c34182606385140e5695e33c90507ce59ee.zip
Support C11 Unicode string literals and character constants (#452)
* Support C11 Unicode string literals and character constants * Add tests for C11 string literals and character constants * Better error message for ill-formed universal character names E.g. \u followed by fewer than 4 hex digits, or \U followed by fewer than 8 hex digits. * Add new warning `invalid-utf8` for byte sequences that are not valid UTF8. The warning is activated but not fatal by default. * Warn on uses of C11 Unicode character constants and string literals This uses the `c11-extensions` warning, which is off by default. * Support preprocessing option -finput-charset= for GNU toolchains
Diffstat (limited to 'cfrontend')
-rw-r--r--cfrontend/C2C.ml197
1 files changed, 95 insertions, 102 deletions
diff --git a/cfrontend/C2C.ml b/cfrontend/C2C.ml
index b0dc8e8a..89b9139c 100644
--- a/cfrontend/C2C.ml
+++ b/cfrontend/C2C.ml
@@ -287,105 +287,6 @@ let attributes = [
]
-(** ** Functions used to handle string literals *)
-
-let stringNum = ref 0 (* number of next global for string literals *)
-let stringTable : (string, AST.ident) Hashtbl.t = Hashtbl.create 47
-let wstringTable : (int64 list, AST.ident) Hashtbl.t = Hashtbl.create 47
-
-let is_C_string s = not (String.contains s '\000')
-
-let name_for_string_literal s =
- try
- Hashtbl.find stringTable s
- with Not_found ->
- incr stringNum;
- let name = Printf.sprintf "__stringlit_%d" !stringNum in
- let id = intern_string name in
- let mergeable = if is_C_string s then 1 else 0 in
- Hashtbl.add decl_atom id
- { a_storage = C.Storage_static;
- a_alignment = Some 1;
- a_size = Some (Int64.of_int (String.length s + 1));
- a_sections = [Sections.for_stringlit mergeable];
- a_access = Sections.Access_default;
- a_inline = No_specifier;
- a_loc = Cutil.no_loc };
- Hashtbl.add stringTable s id;
- id
-
-let typeStringLiteral s =
- let sg = if Machine.((!config).char_signed) then Signed else Unsigned in
- Tarray(Tint(I8, sg, noattr), Z.of_uint (String.length s + 1), noattr)
-
-let global_for_string s id =
- let init = ref [] in
- let add_char c =
- init := AST.Init_int8(Z.of_uint(Char.code c)) :: !init in
- add_char '\000';
- for i = String.length s - 1 downto 0 do add_char s.[i] done;
- AST.(id, Gvar { gvar_info = typeStringLiteral s; gvar_init = !init;
- gvar_readonly = true; gvar_volatile = false})
-
-let is_C_wide_string s = not (List.mem 0L s)
-
-let name_for_wide_string_literal s =
- try
- Hashtbl.find wstringTable s
- with Not_found ->
- incr stringNum;
- let name = Printf.sprintf "__stringlit_%d" !stringNum in
- let id = intern_string name in
- let wchar_size = Machine.((!config).sizeof_wchar) in
- let mergeable = if is_C_wide_string s then wchar_size else 0 in
- Hashtbl.add decl_atom id
- { a_storage = C.Storage_static;
- a_alignment = Some wchar_size;
- a_size = Some (Int64.(mul (of_int (List.length s + 1))
- (of_int wchar_size)));
- a_sections = [Sections.for_stringlit mergeable];
- a_access = Sections.Access_default;
- a_inline = No_specifier;
- a_loc = Cutil.no_loc };
- Hashtbl.add wstringTable s id;
- id
-
-let typeWideStringLiteral s =
- let sz =
- match Machine.((!config).sizeof_wchar) with
- | 2 -> I16
- | 4 -> I32
- | _ -> assert false in
- let sg =
- if Machine.((!config).wchar_signed) then Signed else Unsigned in
- Tarray(Tint(sz, sg, noattr), Z.of_uint (List.length s + 1), noattr)
-
-let global_for_wide_string s id =
- let init = ref [] in
- let init_of_char =
- match Machine.((!config).sizeof_wchar) with
- | 2 -> (fun z -> AST.Init_int16 z)
- | 4 -> (fun z -> AST.Init_int32 z)
- | _ -> assert false in
- let add_char c =
- init := init_of_char(Z.of_uint64 c) :: !init in
- List.iter add_char s;
- add_char 0L;
- AST.(id, Gvar { gvar_info = typeWideStringLiteral s;
- gvar_init = List.rev !init;
- gvar_readonly = true; gvar_volatile = false})
-
-let globals_for_strings globs =
- let globs1 =
- Hashtbl.fold
- (fun s id l -> global_for_wide_string s id :: l)
- wstringTable globs in
- let globs2 =
- Hashtbl.fold
- (fun s id l -> global_for_string s id :: l)
- stringTable globs1 in
- globs2
-
(** ** Handling of inlined memcpy functions *)
let constant_size_t a =
@@ -667,6 +568,98 @@ let is_int64 env ty =
| C.TEnum(_, _) -> false
| _ -> assert false
+(** String literals *)
+
+let stringNum = ref 0 (* number of next global for string literals *)
+let stringTable : (string, AST.ident) Hashtbl.t = Hashtbl.create 47
+let wstringTable : (int64 list * ikind, AST.ident) Hashtbl.t = Hashtbl.create 47
+
+let is_C_string s = not (String.contains s '\000')
+
+let name_for_string_literal s =
+ try
+ Hashtbl.find stringTable s
+ with Not_found ->
+ incr stringNum;
+ let name = Printf.sprintf "__stringlit_%d" !stringNum in
+ let id = intern_string name in
+ let mergeable = if is_C_string s then 1 else 0 in
+ Hashtbl.add decl_atom id
+ { a_storage = C.Storage_static;
+ a_alignment = Some 1;
+ a_size = Some (Int64.of_int (String.length s + 1));
+ a_sections = [Sections.for_stringlit mergeable];
+ a_access = Sections.Access_default;
+ a_inline = No_specifier;
+ a_loc = Cutil.no_loc };
+ Hashtbl.add stringTable s id;
+ id
+
+let typeStringLiteral s =
+ let sg = if Machine.((!config).char_signed) then Signed else Unsigned in
+ Tarray(Tint(I8, sg, noattr), Z.of_uint (String.length s + 1), noattr)
+
+let global_for_string s id =
+ let init = ref [] in
+ let add_char c =
+ init := AST.Init_int8(Z.of_uint(Char.code c)) :: !init in
+ add_char '\000';
+ for i = String.length s - 1 downto 0 do add_char s.[i] done;
+ AST.(id, Gvar { gvar_info = typeStringLiteral s; gvar_init = !init;
+ gvar_readonly = true; gvar_volatile = false})
+
+let is_C_wide_string s = not (List.mem 0L s)
+
+let name_for_wide_string_literal s ik =
+ try
+ Hashtbl.find wstringTable (s, ik)
+ with Not_found ->
+ incr stringNum;
+ let name = Printf.sprintf "__stringlit_%d" !stringNum in
+ let id = intern_string name in
+ let wchar_size = Cutil.sizeof_ikind ik in
+ let mergeable = if is_C_wide_string s then wchar_size else 0 in
+ Hashtbl.add decl_atom id
+ { a_storage = C.Storage_static;
+ a_alignment = Some wchar_size;
+ a_size = Some (Int64.(mul (of_int (List.length s + 1))
+ (of_int wchar_size)));
+ a_sections = [Sections.for_stringlit mergeable];
+ a_access = Sections.Access_default;
+ a_inline = No_specifier;
+ a_loc = Cutil.no_loc };
+ Hashtbl.add wstringTable (s, ik) id;
+ id
+
+let typeWideStringLiteral s ik =
+ Tarray(convertIkind ik noattr, Z.of_uint (List.length s + 1), noattr)
+
+let global_for_wide_string (s, ik) id =
+ let init = ref [] in
+ let init_of_char =
+ match Cutil.sizeof_ikind ik with
+ | 2 -> (fun z -> AST.Init_int16 z)
+ | 4 -> (fun z -> AST.Init_int32 z)
+ | _ -> assert false in
+ let add_char c =
+ init := init_of_char(Z.of_uint64 c) :: !init in
+ List.iter add_char s;
+ add_char 0L;
+ AST.(id, Gvar { gvar_info = typeWideStringLiteral s ik;
+ gvar_init = List.rev !init;
+ gvar_readonly = true; gvar_volatile = false})
+
+let globals_for_strings globs =
+ let globs1 =
+ Hashtbl.fold
+ (fun s id l -> global_for_wide_string s id :: l)
+ wstringTable globs in
+ let globs2 =
+ Hashtbl.fold
+ (fun s id l -> global_for_string s id :: l)
+ stringTable globs1 in
+ globs2
+
(** Floating point constants *)
let z_of_str hex str fst =
@@ -1000,9 +993,9 @@ and convertLvalue env e =
| C.EConst(C.CStr s) ->
let ty = typeStringLiteral s in
Evar(name_for_string_literal s, ty)
- | C.EConst(C.CWStr s) ->
- let ty = typeWideStringLiteral s in
- Evar(name_for_wide_string_literal s, ty)
+ | C.EConst(C.CWStr(s, ik)) ->
+ let ty = typeWideStringLiteral s ik in
+ Evar(name_for_wide_string_literal s ik, ty)
| _ ->
error "illegal lvalue"; ezero