diff options
Diffstat (limited to 'cparser')
-rw-r--r-- | cparser/C.mli | 2 | ||||
-rw-r--r-- | cparser/Cabs.v | 17 | ||||
-rw-r--r-- | cparser/Ceval.ml | 6 | ||||
-rw-r--r-- | cparser/Cprint.ml | 2 | ||||
-rw-r--r-- | cparser/Cutil.ml | 4 | ||||
-rw-r--r-- | cparser/Diagnostics.ml | 3 | ||||
-rw-r--r-- | cparser/Diagnostics.mli | 1 | ||||
-rw-r--r-- | cparser/Elab.ml | 128 | ||||
-rw-r--r-- | cparser/Elab.mli | 2 | ||||
-rw-r--r-- | cparser/Lexer.mll | 180 | ||||
-rw-r--r-- | cparser/Parser.vy | 20 | ||||
-rw-r--r-- | cparser/pre_parser.mly | 2 |
12 files changed, 257 insertions, 110 deletions
diff --git a/cparser/C.mli b/cparser/C.mli index 763a9277..1388fab9 100644 --- a/cparser/C.mli +++ b/cparser/C.mli @@ -64,7 +64,7 @@ type constant = | CInt of int64 * ikind * string (* as it appeared in the source *) | CFloat of float_cst * fkind | CStr of string - | CWStr of int64 list + | CWStr of int64 list * ikind | CEnum of ident * int64 (* enum tag, integer value *) (** Attributes *) diff --git a/cparser/Cabs.v b/cparser/Cabs.v index bf8c8c74..59cd30ab 100644 --- a/cparser/Cabs.v +++ b/cparser/Cabs.v @@ -31,6 +31,13 @@ Record floatInfo := { suffix_FI:option string }. +Inductive encoding := + | EncNone (* no prefix *) + | EncWide (* 'L' prefix *) + | EncU16 (* 'u' prefix *) + | EncU32 (* 'U' prefix *) + | EncUTF8. (* 'u8' prefix (strings only) *) + Inductive structOrUnion := | STRUCT | UNION. @@ -152,8 +159,8 @@ with constant := the source code. *) | CONST_INT : string -> constant | CONST_FLOAT : floatInfo -> constant - | CONST_CHAR : bool -> list char_code -> constant - | CONST_STRING : bool -> list char_code -> constant + | CONST_CHAR : encoding -> list char_code -> constant + | CONST_STRING : encoding -> list char_code -> constant with init_expression := | NO_INIT @@ -194,9 +201,9 @@ Definition generic_assoc := (option type_name * expression)%type. (* GCC extended asm *) Inductive asm_operand := -| ASMOPERAND: option string -> bool -> list char_code -> expression -> asm_operand. +| ASMOPERAND: option string -> encoding -> list char_code -> expression -> asm_operand. -Definition asm_flag := (bool * list char_code)%type. +Definition asm_flag := (encoding * list char_code)%type. (* ** Declaration definition (at toplevel) @@ -227,7 +234,7 @@ with statement := | DEFAULT : statement -> loc -> statement | LABEL : string -> statement -> loc -> statement | GOTO : string -> loc -> statement - | ASM : list cvspec -> bool -> list char_code -> list asm_operand -> list asm_operand -> list asm_flag -> loc -> statement + | ASM : list cvspec -> encoding -> list char_code -> list asm_operand -> list asm_operand -> list asm_flag -> loc -> statement | DEFINITION : definition -> statement (*definition or declaration of a variable or type*) with for_clause := diff --git a/cparser/Ceval.ml b/cparser/Ceval.ml index 14f61e06..0800e25b 100644 --- a/cparser/Ceval.ml +++ b/cparser/Ceval.ml @@ -72,7 +72,7 @@ let normalize_int n ik = type value = | I of int64 | S of string - | WS of int64 list + | WS of int64 list * ikind let boolean_value v = match v with @@ -83,7 +83,7 @@ let constant = function | CInt(v, ik, _) -> I (normalize_int v ik) | CFloat(v, fk) -> raise Notconst | CStr s -> S s - | CWStr s -> WS s + | CWStr(s, ik) -> WS(s, ik) | CEnum(id, v) -> I v let is_signed env ty = @@ -274,7 +274,7 @@ let constant_expr env ty e = | TInt(ik, _), I n -> Some(CInt(n, ik, "")) | TPtr(_, _), I n -> Some(CInt(n, ptr_t_ikind (), "")) | (TArray(_, _, _) | TPtr(_, _)), S s -> Some(CStr s) - | (TArray(_, _, _) | TPtr(_, _)), WS s -> Some(CWStr s) + | (TArray(_, _, _) | TPtr(_, _)), WS(s, ik) -> Some(CWStr(s, ik)) | TEnum(_, _), I n -> Some(CInt(n, enum_ikind, "")) | _ -> None with Notconst -> None diff --git a/cparser/Cprint.ml b/cparser/Cprint.ml index caa4fa66..05448784 100644 --- a/cparser/Cprint.ml +++ b/cparser/Cprint.ml @@ -75,7 +75,7 @@ let const pp = function else fprintf pp "\\%03o" (Char.code c) done; fprintf pp "\"" - | CWStr l -> + | CWStr(l, _) -> fprintf pp "L\""; List.iter (fun c -> diff --git a/cparser/Cutil.ml b/cparser/Cutil.ml index 6fd12323..f226d51b 100644 --- a/cparser/Cutil.ml +++ b/cparser/Cutil.ml @@ -1024,9 +1024,9 @@ let type_of_constant = function | CStr s -> let size = Int64.of_int (String.length s + 1) in TArray(TInt(IChar,[]), Some size, []) - | CWStr s -> + | CWStr(s, ik) -> let size = Int64.of_int (List.length s + 1) in - TArray(TInt(wchar_ikind(), []), Some size, []) + TArray(TInt(ik, []), Some size, []) | CEnum(_, _) -> TInt(IInt, []) (* Check that a C expression is a lvalue *) diff --git a/cparser/Diagnostics.ml b/cparser/Diagnostics.ml index 483b0376..8a8a0c17 100644 --- a/cparser/Diagnostics.ml +++ b/cparser/Diagnostics.ml @@ -104,6 +104,7 @@ type warning_type = | Tentative_incomplete_static | Reduced_alignment | Non_linear_cond_expr + | Invalid_UTF8 (* List of all warnings with default status. "true" means the warning is active by default. @@ -140,6 +141,7 @@ let all_warnings = (Tentative_incomplete_static, false); (Reduced_alignment, false); (Non_linear_cond_expr, false); + (Invalid_UTF8, true); ] (* List of active warnings *) @@ -182,6 +184,7 @@ let string_of_warning = function | Tentative_incomplete_static -> "tentative-incomplete-static" | Reduced_alignment -> "reduced-alignment" | Non_linear_cond_expr -> "non-linear-cond-expr" + | Invalid_UTF8 -> "invalid-utf8" (* Activate the given warning *) let activate_warning w () = diff --git a/cparser/Diagnostics.mli b/cparser/Diagnostics.mli index 1210353f..47727707 100644 --- a/cparser/Diagnostics.mli +++ b/cparser/Diagnostics.mli @@ -57,6 +57,7 @@ type warning_type = | Tentative_incomplete_static (** static tentative definition with incomplete type *) | Reduced_alignment (** alignment reduction *) | Non_linear_cond_expr (** condition that cannot be linearized *) + | Invalid_UTF8 (** invalid UTF-8 encoding *) val warning : (string * int) -> warning_type -> ('a, Format.formatter, unit, unit, unit, unit) format6 -> 'a (** [warning (f,c) w fmt arg1 ... argN] formats the arguments [arg1] to [argN] as warining according to diff --git a/cparser/Elab.ml b/cparser/Elab.ml index 01e745c3..ec79634a 100644 --- a/cparser/Elab.ml +++ b/cparser/Elab.ml @@ -395,48 +395,75 @@ let elab_float_constant f = in (v, ty) -let elab_char_constant loc wide chars = +let check_char_range loc ikind chars = + let max = Int64.shift_left 1L (sizeof_ikind ikind * 8) in + List.iter + (fun c -> + if c >= max then error loc "escape sequence 0x%LX is out of range" c) + chars + +let ikind_of_encoding = function + | EncNone -> IChar + | EncWide -> wchar_ikind() + | EncU16 -> IUShort + | EncU32 -> IUInt + | EncUTF8 -> IChar + +let elab_char_constant loc enc chars = let len = List.length chars in - let nbits = if wide then 8 * !config.sizeof_wchar else 8 in - let max_digit = Int64.shift_left 1L nbits in - (* Treat multi-character constants as a number in base 2^nbits. - It must fit in type int for a normal constant and in type wchar_t - for a wide constant. *) + (* We support multi-character constants for EncNone character literals only. + We treat them as big-endian numbers in base 256. *) let v = - if len > (if wide then 1 else !config.sizeof_int) then begin - error loc "%d-character constant too long for its type" len; - 0L - end else - List.fold_left - (fun acc d -> - if d < 0L || d >= max_digit then - error loc "escape sequence is out of range (code 0x%LX)" d; - Int64.add (Int64.shift_left acc nbits) d) - 0L chars in - (* C99 6.4.4.4 items 10 and 11: - single-character constant -> represent at type char + match chars, enc with + | [], _ -> error loc "empty character constant"; 0L + | [c], _ -> c + | _, EncNone -> + if len > !config.sizeof_int then begin + error loc "%d-character constant too long, overflows its type" len; + 0L + end else begin + check_char_range loc IUChar chars; + List.fold_left + (fun acc d -> Int64.(add (shift_left acc 8) d)) + 0L chars + end + | _, _ -> + error loc "%d-character constant not supported" len; 0L in + (* C11 6.4.4.4 items 10 and 11: + normal single-character constant -> represent at type char multi-character constant -> represent at type int - wide character constant -> represent at type wchar_t *) - Ceval.normalize_int v - (if wide then wchar_ikind() else if len = 1 then IChar else IInt) - -let elab_string_literal loc wide chars = - let nbits = if wide then 8 * !config.sizeof_wchar else 8 in - let char_max = Int64.shift_left 1L nbits in - List.iter - (fun c -> - if c < 0L || c >= char_max - then error loc "escape sequence is out of range (code 0x%LX)" c) - chars; - if wide then - CWStr chars - else begin - let res = Bytes.create (List.length chars) in - List.iteri - (fun i c -> Bytes.set res i (Char.unsafe_chr (Int64.to_int c))) - chars; - CStr (Bytes.to_string res) - end + L character constant -> represent at type wchar_t + u character constant -> represent at type char16_t + U character constant -> represent at type char32_t *) + let ik = + if enc = EncNone && len > 1 then IInt else ikind_of_encoding enc in + let v' = Ceval.normalize_int v ik in + if v' <> v then + warning loc Constant_conversion + "overflow in character constant, changes value from %Ld to %Ld" v v'; + v' + +let elab_string_literal loc enc chars = + let ik = ikind_of_encoding enc in + check_char_range loc ik chars; + match enc with + | EncNone | EncUTF8 -> + let res = Bytes.create (List.length chars) in + List.iteri + (fun i c -> Bytes.set res i (Char.unsafe_chr (Int64.to_int c))) + chars; + CStr (Bytes.to_string res) + | EncWide | EncU16 | EncU32 -> + CWStr(chars, ik) + +let warn_C11_literals loc enc kind = + let warn enc = + warning loc Celeven_extension "'%s' %s are a C11 extension" enc kind in + match enc with + | EncNone | EncWide -> () + | EncUTF8 -> warn "u8" + | EncU16 -> warn "u" + | EncU32 -> warn "U" let elab_constant loc = function | CONST_INT s -> @@ -445,14 +472,22 @@ let elab_constant loc = function | CONST_FLOAT f -> let (v, fk) = elab_float_constant f in CFloat(v, fk) - | CONST_CHAR(wide, s) -> - let ikind = if wide then wchar_ikind () else IInt in - CInt(elab_char_constant loc wide s, ikind, "") + | CONST_CHAR(enc, s) -> + warn_C11_literals loc enc "character constants"; + let ikind = + match enc with + | EncNone -> IInt + | EncWide -> wchar_ikind () + | EncU16 -> IUShort + | EncU32 -> IUInt + | EncUTF8 -> assert false in + CInt(elab_char_constant loc enc s, ikind, "") | CONST_STRING(wide, s) -> + warn_C11_literals loc wide "string literals"; elab_string_literal loc wide s -let elab_simple_string loc wide chars = - match elab_string_literal loc wide chars with +let elab_simple_string loc enc chars = + match elab_string_literal loc enc chars with | CStr s -> s | _ -> error loc "cannot use wide string literal in 'asm'"; "" @@ -1586,6 +1621,7 @@ and elab_item zi item il = | COMPOUND_INIT [_, SINGLE_INIT(CONSTANT (CONST_STRING(w, s)))]), TArray(ty_elt, sz, _) when is_integer_type env ty_elt -> + warn_C11_literals loc w "string literals"; begin match elab_string_literal loc w s, unroll env ty_elt with | CStr s, TInt((IChar | ISChar | IUChar), _) -> if not (I.index_below (Int64.of_int(String.length s - 1)) sz) then @@ -1594,12 +1630,12 @@ and elab_item zi item il = | CStr _, _ -> error loc "initialization of an array of non-char elements with a string literal"; elab_list zi il false - | CWStr s, TInt(_, _) when compatible_types AttrIgnoreTop env ty_elt (TInt(wchar_ikind(), [])) -> + | CWStr(s, ik), TInt(_, _) when compatible_types AttrIgnoreTop env ty_elt (TInt(ik, [])) -> if not (I.index_below (Int64.of_int(List.length s - 1)) sz) then warning loc Unnamed "initializer string for array of wide chars %s is too long" (I.name zi); elab_list (I.set zi (init_int_array_wstring sz s)) il false | CWStr _, _ -> - error loc "initialization of an array of non-wchar_t elements with a wide string literal"; + error loc "type mismatch between array destination and wide string literal"; elab_list zi il false | _ -> assert false end diff --git a/cparser/Elab.mli b/cparser/Elab.mli index bca4f74d..537056fa 100644 --- a/cparser/Elab.mli +++ b/cparser/Elab.mli @@ -21,6 +21,6 @@ val elab_file : Cabs.definition list -> C.program val elab_int_constant : Cabs.loc -> string -> int64 * C.ikind val elab_float_constant : Cabs.floatInfo -> C.float_cst * C.fkind -val elab_char_constant : Cabs.loc -> bool -> int64 list -> int64 +val elab_char_constant : Cabs.loc -> Cabs.encoding -> int64 list -> int64 (* These auxiliary functions are exported so that they can be reused in other projects that deal with C-style source languages. *) diff --git a/cparser/Lexer.mll b/cparser/Lexer.mll index 9f7fba1e..2efa4216 100644 --- a/cparser/Lexer.mll +++ b/cparser/Lexer.mll @@ -3,6 +3,7 @@ (* The Compcert verified compiler *) (* *) (* Jacques-Henri Jourdan, INRIA Paris-Rocquencourt *) +(* Xavier Leroy, Collège de France and Inria *) (* *) (* Copyright Institut National de Recherche en Informatique et en *) (* Automatique. All rights reserved. This file is distributed *) @@ -144,9 +145,9 @@ let error lb fmt = Diagnostics.error (lb.lex_curr_p.pos_fname,lb.lex_curr_p.pos_lnum) fmt -let warning lb fmt = +let warning lb kind fmt = Diagnostics.warning - (lb.lex_curr_p.pos_fname,lb.lex_curr_p.pos_lnum) Diagnostics.Unnamed fmt + (lb.lex_curr_p.pos_fname,lb.lex_curr_p.pos_lnum) kind fmt (* Simple character escapes *) @@ -160,6 +161,89 @@ let convert_escape = function | 't' -> 9L (* horizontal tab *) | 'v' -> 11L (* vertical tab *) | c -> Int64.of_int (Char.code c) + +(* Encodings for character and string literals *) + +let encoding_of = function + | "" -> Cabs.EncNone + | "L" -> Cabs.EncWide + | "u" -> Cabs.EncU16 + | "U" -> Cabs.EncU32 + | "u8" -> Cabs.EncUTF8 + | _ -> assert false + +let combine_encodings loc e1 e2 = + if e1 = Cabs.EncNone then e2 + else if e2 = Cabs.EncNone then e1 + else if e1 = e2 then e1 + else Diagnostics.fatal_error + Cabs.(loc.filename, loc.lineno) + "unsupported non-standard concatenation of string literals" + +(* Handling of characters and escapes in string and char constants *) + +type chr = Chr of int | Esc of int64 + +let check_utf8 lexbuf min x = + if x > 0x10FFFF || (x >= 0xD800 && x <= 0xDFFF) then + warning lexbuf Diagnostics.Invalid_UTF8 "Wrong Unicode value U+%X" x; + if x < min then + warning lexbuf Diagnostics.Invalid_UTF8 + "Overlong UTF-8 encoding for Unicode value U+%X" x; + Chr x + +let check_universal_character lexbuf x = + if x > 0x10FFFF + || x >= 0xD800 && x <= 0xDFFF + || x < 0xA0 && x <> 0x24 && x <> 0x40 && x <> 0x60 + then begin + error lexbuf "Wrong universal character name U+%X" x; Chr 0 + end else + Chr x + +let add_char_utf8 x accu = + if x <= 0x007F then + Int64.of_int x :: accu + else if x <= 0x07FF then + Int64.of_int (0x80 lor (x land 0x3F)) :: + Int64.of_int (0xC0 lor (x lsr 6)) :: + accu + else if x <= 0xFFFF then + Int64.of_int (0x80 lor (x land 0x3F)) :: + Int64.of_int (0x80 lor ((x lsr 6) land 0x3F)) :: + Int64.of_int (0xE0 lor (x lsr 12)) :: + accu + else + Int64.of_int (0x80 lor (x land 0x3F)) :: + Int64.of_int (0x80 lor ((x lsr 6) land 0x3F)) :: + Int64.of_int (0x80 lor ((x lsr 12) land 0x3F)) :: + Int64.of_int (0xF0 lor (x lsr 18)) :: + accu + +let add_char_utf16 x accu = + if x <= 0xFFFF then + Int64.of_int x :: accu + else begin + let x = x - 0x10000 in + Int64.of_int (0xDC00 lor (x land 0x3FF)) :: + Int64.of_int (0xD800 lor (x lsr 10)) :: + accu + end + +let add_char enc c accu = + match c, enc with + | Esc x, _ -> (* Escapes are never encoded *) + x :: accu + | Chr x, (Cabs.EncNone | Cabs.EncUTF8) -> (* Characters are encoded in UTF8 *) + add_char_utf8 x accu + | Chr x, Cabs.EncU16 -> (* Characters are encoded in UTF16 *) + add_char_utf16 x accu + | Chr x, Cabs.EncU32 -> (* Characters are not encoded *) + Int64.of_int x :: accu + | Chr x, Cabs.EncWide -> (* Depends on size of wchar_t *) + if Machine.(!config.sizeof_wchar) = 2 + then add_char_utf16 x accu + else Int64.of_int x :: accu } (* Identifiers *) @@ -249,11 +333,6 @@ let octal_escape_sequence = | octal_digit octal_digit | octal_digit octal_digit octal_digit) as n) let hexadecimal_escape_sequence = "\\x" (hexadecimal_digit+ as n) -let escape_sequence = - simple_escape_sequence - | octal_escape_sequence - | hexadecimal_escape_sequence - | universal_character_name rule initial = parse | '\n' { new_line lexbuf; initial_linebegin lexbuf } @@ -283,16 +362,13 @@ rule initial = parse currentLoc lexbuf)} | preprocessing_number as s { error lexbuf "invalid numerical constant '%s'@ These characters form a preprocessor number, but not a constant" s; CONSTANT (Cabs.CONST_INT "0", currentLoc lexbuf) } - | "'" { let l = char_literal lexbuf.lex_start_p [] lexbuf in - CONSTANT (Cabs.CONST_CHAR(false, l), - currentLoc lexbuf) } - | "L'" { let l = char_literal lexbuf.lex_start_p [] lexbuf in - CONSTANT (Cabs.CONST_CHAR(true, l), - currentLoc lexbuf) } - | "\"" { let l = string_literal lexbuf.lex_start_p [] lexbuf in - STRING_LITERAL(false, l, currentLoc lexbuf) } - | "L\"" { let l = string_literal lexbuf.lex_start_p [] lexbuf in - STRING_LITERAL(true, l, currentLoc lexbuf) } + | (""|"L"|"u"|"U") as e "'" { let enc = encoding_of e in + let l = char_literal lexbuf.lex_start_p [] lexbuf in + CONSTANT (Cabs.CONST_CHAR(enc, l), currentLoc lexbuf) } + | (""|"L"|"u"|"U"|"u8") as e "\"" + { let enc = encoding_of e in + let l = string_literal lexbuf.lex_start_p enc [] lexbuf in + STRING_LITERAL(enc, l, currentLoc lexbuf) } | "..." { ELLIPSIS(currentLoc lexbuf) } | "+=" { ADD_ASSIGN(currentLoc lexbuf) } | "-=" { SUB_ASSIGN(currentLoc lexbuf) } @@ -357,39 +433,62 @@ and initial_linebegin = parse and char = parse | universal_character_name { try - Int64.of_string ("0x" ^ n) + check_universal_character lexbuf (int_of_string ("0x" ^ n)) with Failure _ -> error lexbuf "overflow in universal character name"; - 0L + Chr 0 } | hexadecimal_escape_sequence { try - Int64.of_string ("0x" ^ n) + Esc (Int64.of_string ("0x" ^ n)) with Failure _ -> error lexbuf "overflow in hexadecimal escape sequence"; - 0L + Esc 0L } | octal_escape_sequence - { Int64.of_string ("0o" ^ n) } + { Esc (Int64.of_string ("0o" ^ n)) } | simple_escape_sequence - { convert_escape c } + { Esc (convert_escape c) } + | "\\u" | "\\U" + { error lexbuf "incomplete universal character name"; + Chr 0 } | '\\' (_ as c) { error lexbuf "incorrect escape sequence '\\%c'" c; - Int64.of_int (Char.code c) } + Esc (Int64.of_int (Char.code c)) } + | ['\x00'-'\x7F'] as c1 + { Chr (Char.code c1) } + | (['\xC0'-'\xDF'] as c1) (['\x80'-'\xBF'] as c2) + { check_utf8 lexbuf 0x80 + ( (Char.code c1 land 0b00011111) lsl 6 + + (Char.code c2 land 0b00111111)) } + | (['\xE0'-'\xEF'] as c1) (['\x80'-'\xBF'] as c2) (['\x80'-'\xBF'] as c3) + { check_utf8 lexbuf 0x800 + ( (Char.code c1 land 0b00001111) lsl 12 + + (Char.code c2 land 0b00111111) lsl 6 + + (Char.code c3 land 0b00111111) ) } + | (['\xF0'-'\xF7'] as c1) (['\x80'-'\xBF'] as c2) (['\x80'-'\xBF'] as c3) (['\x80'-'\xBF'] as c4) + { check_utf8 lexbuf 0x10000 + ( (Char.code c1 land 0b00000111) lsl 18 + + (Char.code c2 land 0b00111111) lsl 12 + + (Char.code c3 land 0b00111111) lsl 6 + + (Char.code c4 land 0b00111111) ) } | _ as c - { Int64.of_int (Char.code c) } + { warning lexbuf Diagnostics.Invalid_UTF8 + "Invalid UTF8 encoding: byte 0x%02x" (Char.code c); + Esc (Int64.of_int (Char.code c)) (* re-encode as-is *) + } and char_literal startp accu = parse | '\'' { lexbuf.lex_start_p <- startp; List.rev accu } | '\n' | eof { fatal_error lexbuf "missing terminating \"'\" character" } - | "" { let c = char lexbuf in char_literal startp (c :: accu) lexbuf } + | "" { let c = char lexbuf in char_literal startp (add_char Cabs.EncU32 c accu) lexbuf } -and string_literal startp accu = parse +and string_literal startp enc accu = parse | '\"' { lexbuf.lex_start_p <- startp; List.rev accu } | '\n' | eof { fatal_error lexbuf "missing terminating '\"' character" } - | "" { let c = char lexbuf in string_literal startp (c :: accu) lexbuf } + | "" { let c = char lexbuf in string_literal startp enc (add_char enc c accu) lexbuf } (* We assume gcc -E syntax but try to tolerate variations. *) and hash = parse @@ -402,7 +501,8 @@ and hash = parse try int_of_string n with Failure _ -> - warning lexbuf "invalid line number"; lexbuf.lex_curr_p.pos_lnum + warning lexbuf Diagnostics.Unnamed "invalid line number"; + lexbuf.lex_curr_p.pos_lnum in lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with @@ -417,7 +517,7 @@ and hash = parse ([^ '\n']* as s) '\n' { new_line lexbuf; PRAGMA (s, currentLoc lexbuf) } | [^ '\n']* '\n' - { warning lexbuf "unrecognized '#' line"; + { warning lexbuf Diagnostics.Unnamed "unrecognized '#' line"; new_line lexbuf; initial_linebegin lexbuf } | [^ '\n']* eof { fatal_error lexbuf "unexpected end of file" } @@ -583,21 +683,21 @@ and singleline_comment = parse | Pre_parser.STAR loc -> loop (Parser.STAR loc) | Pre_parser.STATIC loc -> loop (Parser.STATIC loc) | Pre_parser.STATIC_ASSERT loc -> loop (Parser.STATIC_ASSERT loc) - | Pre_parser.STRING_LITERAL (wide, str, loc) -> + | Pre_parser.STRING_LITERAL (enc, str, loc) -> (* Merge consecutive string literals *) - let rec doConcat wide str = + let rec doConcat enc str = match Queue.peek tokens with - | Pre_parser.STRING_LITERAL (wide', str', loc) -> + | Pre_parser.STRING_LITERAL (enc', str', loc') -> ignore (Queue.pop tokens); - let (wide'', str'') = doConcat wide' str' in + let (enc'', str'') = doConcat enc' str' in if str'' <> [] - then (wide || wide'', str @ str'') - else (wide, str) - | _ -> (wide, str) - | exception Queue.Empty -> (wide, str) + then (combine_encodings loc enc enc'', str @ str'') + else (enc, str) + | _ -> (enc, str) + | exception Queue.Empty -> (enc, str) in - let (wide', str') = doConcat wide str in - loop (Parser.STRING_LITERAL ((wide', str'), loc)) + let (enc', str') = doConcat enc str in + loop (Parser.STRING_LITERAL ((enc', str'), loc)) | Pre_parser.STRUCT loc -> loop (Parser.STRUCT loc) | Pre_parser.SUB_ASSIGN loc -> loop (Parser.SUB_ASSIGN loc) | Pre_parser.SWITCH loc -> loop (Parser.SWITCH loc) diff --git a/cparser/Parser.vy b/cparser/Parser.vy index 8d7cd055..12337367 100644 --- a/cparser/Parser.vy +++ b/cparser/Parser.vy @@ -23,7 +23,7 @@ Require Cabs. %token<Cabs.string * Cabs.loc> VAR_NAME TYPEDEF_NAME OTHER_NAME %token<Cabs.string * Cabs.loc> PRAGMA -%token<bool * list Cabs.char_code * Cabs.loc> STRING_LITERAL +%token<Cabs.encoding * list Cabs.char_code * Cabs.loc> STRING_LITERAL %token<Cabs.constant * Cabs.loc> CONSTANT %token<Cabs.loc> SIZEOF PTR INC DEC LEFT RIGHT LEQ GEQ EQEQ EQ NEQ LT GT ANDAND BARBAR PLUS MINUS STAR TILDE BANG SLASH PERCENT HAT BAR QUESTION @@ -124,8 +124,8 @@ primary_expression: | cst = CONSTANT { (Cabs.CONSTANT (fst cst), snd cst) } | str = STRING_LITERAL - { let '((wide, chars), loc) := str in - (Cabs.CONSTANT (Cabs.CONST_STRING wide chars), loc) } + { let '((enc, chars), loc) := str in + (Cabs.CONSTANT (Cabs.CONST_STRING enc chars), loc) } | loc = LPAREN expr = expression RPAREN { (fst expr, loc)} | sel = generic_selection @@ -786,8 +786,8 @@ designator: static_assert_declaration: | loc = STATIC_ASSERT LPAREN expr = constant_expression COMMA str = STRING_LITERAL RPAREN SEMICOLON - { let '((wide, chars), locs) := str in - (expr, (Cabs.CONST_STRING wide chars, locs), loc) } + { let '((enc, chars), locs) := str in + (expr, (Cabs.CONST_STRING enc chars, locs), loc) } (* 6.8 *) statement_dangerous: @@ -922,9 +922,9 @@ jump_statement: asm_statement: | loc = ASM attr = asm_attributes LPAREN template = STRING_LITERAL args = asm_arguments RPAREN SEMICOLON - { let '(wide, chars, _) := template in + { let '(enc, chars, _) := template in let '(outputs, inputs, flags) := args in - Cabs.ASM attr wide chars outputs inputs flags loc } + Cabs.ASM attr enc chars outputs inputs flags loc } asm_attributes: | /* empty */ @@ -954,7 +954,7 @@ asm_operands_ne: asm_operand: | n = asm_op_name cstr = STRING_LITERAL LPAREN e = expression RPAREN - { let '(wide, s, loc) := cstr in Cabs.ASMOPERAND n wide s (fst e) } + { let '(enc, s, loc) := cstr in Cabs.ASMOPERAND n enc s (fst e) } asm_op_name: | /* empty */ { None } @@ -962,9 +962,9 @@ asm_op_name: asm_flags: | f = STRING_LITERAL - { let '(wide, s, loc) := f in (wide, s) :: nil } + { let '(enc, s, loc) := f in (enc, s) :: nil } | f = STRING_LITERAL COMMA fl = asm_flags - { let '(wide, s, loc) := f in (wide, s) :: fl } + { let '(enc, s, loc) := f in (enc, s) :: fl } (* 6.9 *) translation_unit_file: diff --git a/cparser/pre_parser.mly b/cparser/pre_parser.mly index ad294398..00ca0ade 100644 --- a/cparser/pre_parser.mly +++ b/cparser/pre_parser.mly @@ -47,7 +47,7 @@ %token<string * Pre_parser_aux.identifier_type ref * Cabs.loc> VAR_NAME TYPEDEF_NAME %token<Cabs.constant * Cabs.loc> CONSTANT -%token<bool * int64 list * Cabs.loc> STRING_LITERAL +%token<Cabs.encoding * int64 list * Cabs.loc> STRING_LITERAL %token<string * Cabs.loc> PRAGMA %token<Cabs.loc> SIZEOF PTR INC DEC LEFT RIGHT LEQ GEQ EQEQ EQ NEQ LT GT |