From a5f03d96eee482cd84861fc8cefff9eb451c0cad Mon Sep 17 00:00:00 2001 From: xleroy Date: Sun, 29 Mar 2009 09:47:11 +0000 Subject: Cleaned up configure script. Distribution of CIL as an expanded source tree with changes applied (instead of original .tar.gz + patches to be applied at config time). git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@1020 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- cil/src/check.ml | 1017 +++++++ cil/src/check.mli | 45 + cil/src/cil.ml | 6427 +++++++++++++++++++++++++++++++++++++++ cil/src/cil.mli | 2455 +++++++++++++++ cil/src/cillower.ml | 57 + cil/src/cillower.mli | 42 + cil/src/ciloptions.ml | 196 ++ cil/src/ciloptions.mli | 48 + cil/src/cilutil.ml | 72 + cil/src/escape.ml | 93 + cil/src/escape.mli | 48 + cil/src/ext/astslicer.ml | 454 +++ cil/src/ext/availexps.ml | 359 +++ cil/src/ext/bitmap.ml | 224 ++ cil/src/ext/bitmap.mli | 50 + cil/src/ext/blockinggraph.ml | 769 +++++ cil/src/ext/blockinggraph.mli | 40 + cil/src/ext/callgraph.ml | 250 ++ cil/src/ext/callgraph.mli | 123 + cil/src/ext/canonicalize.ml | 292 ++ cil/src/ext/canonicalize.mli | 48 + cil/src/ext/cfg.ml | 289 ++ cil/src/ext/cfg.mli | 36 + cil/src/ext/ciltools.ml | 228 ++ cil/src/ext/dataflow.ml | 466 +++ cil/src/ext/dataflow.mli | 151 + cil/src/ext/dataslicing.ml | 462 +++ cil/src/ext/dataslicing.mli | 41 + cil/src/ext/deadcodeelim.ml | 173 ++ cil/src/ext/dominators.ml | 241 ++ cil/src/ext/dominators.mli | 29 + cil/src/ext/epicenter.ml | 114 + cil/src/ext/heap.ml | 112 + cil/src/ext/heapify.ml | 250 ++ cil/src/ext/liveness.ml | 190 ++ cil/src/ext/logcalls.ml | 268 ++ cil/src/ext/logcalls.mli | 41 + cil/src/ext/logwrites.ml | 139 + cil/src/ext/oneret.ml | 187 ++ cil/src/ext/oneret.mli | 44 + cil/src/ext/partial.ml | 851 ++++++ cil/src/ext/pta/golf.ml | 1657 ++++++++++ cil/src/ext/pta/golf.mli | 83 + cil/src/ext/pta/olf.ml | 1108 +++++++ cil/src/ext/pta/olf.mli | 80 + cil/src/ext/pta/ptranal.ml | 597 ++++ cil/src/ext/pta/ptranal.mli | 156 + cil/src/ext/pta/setp.ml | 342 +++ cil/src/ext/pta/setp.mli | 180 ++ cil/src/ext/pta/steensgaard.ml | 1417 +++++++++ cil/src/ext/pta/steensgaard.mli | 71 + cil/src/ext/pta/uref.ml | 94 + cil/src/ext/pta/uref.mli | 65 + cil/src/ext/reachingdefs.ml | 511 ++++ cil/src/ext/sfi.ml | 337 ++ cil/src/ext/simplemem.ml | 132 + cil/src/ext/simplify.ml | 845 +++++ cil/src/ext/ssa.ml | 696 +++++ cil/src/ext/ssa.mli | 45 + cil/src/ext/stackoverflow.ml | 246 ++ cil/src/ext/stackoverflow.mli | 43 + cil/src/ext/usedef.ml | 188 ++ cil/src/formatcil.ml | 215 ++ cil/src/formatcil.mli | 103 + cil/src/formatlex.mll | 308 ++ cil/src/formatparse.mly | 1455 +++++++++ cil/src/frontc/cabs.ml | 396 +++ cil/src/frontc/cabs2cil.ml | 6238 +++++++++++++++++++++++++++++++++++++ cil/src/frontc/cabs2cil.mli | 49 + cil/src/frontc/cabsvisit.ml | 577 ++++ cil/src/frontc/cabsvisit.mli | 115 + cil/src/frontc/clexer.mli | 55 + cil/src/frontc/clexer.mll | 664 ++++ cil/src/frontc/cparser.mly | 1521 +++++++++ cil/src/frontc/cprint.ml | 1014 ++++++ cil/src/frontc/frontc.ml | 256 ++ cil/src/frontc/frontc.mli | 55 + cil/src/frontc/lexerhack.ml | 22 + cil/src/frontc/patch.ml | 837 +++++ cil/src/frontc/patch.mli | 42 + cil/src/libmaincil.ml | 108 + cil/src/machdep.c | 220 ++ cil/src/main.ml | 288 ++ cil/src/mergecil.ml | 1770 +++++++++++ cil/src/mergecil.mli | 42 + cil/src/rmtmps.ml | 778 +++++ cil/src/rmtmps.mli | 82 + cil/src/testcil.ml | 440 +++ 88 files changed, 43964 insertions(+) create mode 100644 cil/src/check.ml create mode 100644 cil/src/check.mli create mode 100644 cil/src/cil.ml create mode 100644 cil/src/cil.mli create mode 100755 cil/src/cillower.ml create mode 100755 cil/src/cillower.mli create mode 100755 cil/src/ciloptions.ml create mode 100755 cil/src/ciloptions.mli create mode 100644 cil/src/cilutil.ml create mode 100644 cil/src/escape.ml create mode 100644 cil/src/escape.mli create mode 100644 cil/src/ext/astslicer.ml create mode 100644 cil/src/ext/availexps.ml create mode 100644 cil/src/ext/bitmap.ml create mode 100644 cil/src/ext/bitmap.mli create mode 100644 cil/src/ext/blockinggraph.ml create mode 100644 cil/src/ext/blockinggraph.mli create mode 100644 cil/src/ext/callgraph.ml create mode 100644 cil/src/ext/callgraph.mli create mode 100644 cil/src/ext/canonicalize.ml create mode 100644 cil/src/ext/canonicalize.mli create mode 100644 cil/src/ext/cfg.ml create mode 100644 cil/src/ext/cfg.mli create mode 100755 cil/src/ext/ciltools.ml create mode 100755 cil/src/ext/dataflow.ml create mode 100755 cil/src/ext/dataflow.mli create mode 100644 cil/src/ext/dataslicing.ml create mode 100644 cil/src/ext/dataslicing.mli create mode 100644 cil/src/ext/deadcodeelim.ml create mode 100755 cil/src/ext/dominators.ml create mode 100755 cil/src/ext/dominators.mli create mode 100644 cil/src/ext/epicenter.ml create mode 100644 cil/src/ext/heap.ml create mode 100644 cil/src/ext/heapify.ml create mode 100644 cil/src/ext/liveness.ml create mode 100644 cil/src/ext/logcalls.ml create mode 100644 cil/src/ext/logcalls.mli create mode 100644 cil/src/ext/logwrites.ml create mode 100644 cil/src/ext/oneret.ml create mode 100644 cil/src/ext/oneret.mli create mode 100644 cil/src/ext/partial.ml create mode 100644 cil/src/ext/pta/golf.ml create mode 100644 cil/src/ext/pta/golf.mli create mode 100644 cil/src/ext/pta/olf.ml create mode 100644 cil/src/ext/pta/olf.mli create mode 100644 cil/src/ext/pta/ptranal.ml create mode 100644 cil/src/ext/pta/ptranal.mli create mode 100644 cil/src/ext/pta/setp.ml create mode 100644 cil/src/ext/pta/setp.mli create mode 100644 cil/src/ext/pta/steensgaard.ml create mode 100644 cil/src/ext/pta/steensgaard.mli create mode 100644 cil/src/ext/pta/uref.ml create mode 100644 cil/src/ext/pta/uref.mli create mode 100644 cil/src/ext/reachingdefs.ml create mode 100755 cil/src/ext/sfi.ml create mode 100644 cil/src/ext/simplemem.ml create mode 100755 cil/src/ext/simplify.ml create mode 100644 cil/src/ext/ssa.ml create mode 100644 cil/src/ext/ssa.mli create mode 100644 cil/src/ext/stackoverflow.ml create mode 100644 cil/src/ext/stackoverflow.mli create mode 100755 cil/src/ext/usedef.ml create mode 100644 cil/src/formatcil.ml create mode 100644 cil/src/formatcil.mli create mode 100644 cil/src/formatlex.mll create mode 100644 cil/src/formatparse.mly create mode 100644 cil/src/frontc/cabs.ml create mode 100644 cil/src/frontc/cabs2cil.ml create mode 100644 cil/src/frontc/cabs2cil.mli create mode 100644 cil/src/frontc/cabsvisit.ml create mode 100644 cil/src/frontc/cabsvisit.mli create mode 100644 cil/src/frontc/clexer.mli create mode 100644 cil/src/frontc/clexer.mll create mode 100644 cil/src/frontc/cparser.mly create mode 100644 cil/src/frontc/cprint.ml create mode 100644 cil/src/frontc/frontc.ml create mode 100644 cil/src/frontc/frontc.mli create mode 100755 cil/src/frontc/lexerhack.ml create mode 100644 cil/src/frontc/patch.ml create mode 100644 cil/src/frontc/patch.mli create mode 100644 cil/src/libmaincil.ml create mode 100644 cil/src/machdep.c create mode 100644 cil/src/main.ml create mode 100644 cil/src/mergecil.ml create mode 100644 cil/src/mergecil.mli create mode 100644 cil/src/rmtmps.ml create mode 100644 cil/src/rmtmps.mli create mode 100644 cil/src/testcil.ml (limited to 'cil/src') diff --git a/cil/src/check.ml b/cil/src/check.ml new file mode 100644 index 00000000..4dc8850a --- /dev/null +++ b/cil/src/check.ml @@ -0,0 +1,1017 @@ +(* MODIF: Loop constructor replaced by 3 constructors: While, DoWhile, For. *) + +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* A consistency checker for CIL *) +open Cil +module E = Errormsg +module H = Hashtbl +open Pretty + + +(* A few parameters to customize the checking *) +type checkFlags = + NoCheckGlobalIds (* Do not check that the global ids have the proper + * hash value *) + +let checkGlobalIds = ref true + + (* Attributes must be sorted *) +type ctxAttr = + CALocal (* Attribute of a local variable *) + | CAGlobal (* Attribute of a global variable *) + | CAType (* Attribute of a type *) + +let valid = ref true + +let warn fmt = + valid := false; + Cil.warn fmt + +let warnContext fmt = + valid := false; + Cil.warnContext fmt + +let checkAttributes (attrs: attribute list) : unit = + let rec loop lastname = function + [] -> () + | Attr(an, _) :: resta -> + if an < lastname then + ignore (warn "Attributes not sorted"); + loop an resta + in + loop "" attrs + + + (* Keep track of defined types *) +let typeDefs : (string, typ) H.t = H.create 117 + + + (* Keep track of all variables names, enum tags and type names *) +let varNamesEnv : (string, unit) H.t = H.create 117 + + (* We also keep a map of variables indexed by id, to ensure that only one + * varinfo has a given id *) +let varIdsEnv: (int, varinfo) H.t = H.create 117 + + (* And keep track of all varinfo's to check the uniqueness of the + * identifiers *) +let allVarIds: (int, varinfo) H.t = H.create 117 + + (* Also keep a list of environments. We place an empty string in the list to + * mark the start of a local environment (i.e. a function) *) +let varNamesList : (string * int) list ref = ref [] +let defineName s = + if s = "" then + E.s (bug "Empty name\n"); + if H.mem varNamesEnv s then + ignore (warn "Multiple definitions for %s\n" s); + H.add varNamesEnv s () + +let defineVariable vi = + defineName vi.vname; + varNamesList := (vi.vname, vi.vid) :: !varNamesList; + (* Check the id *) + if H.mem allVarIds vi.vid then + ignore (warn "Id %d is already defined (%s)\n" vi.vid vi.vname); + H.add allVarIds vi.vid vi; + (* And register it in the current scope also *) + H.add varIdsEnv vi.vid vi + +(* Check that a varinfo has already been registered *) +let checkVariable vi = + try + (* Check in the current scope only *) + if vi != H.find varIdsEnv vi.vid then + ignore (warnContext "varinfos for %s not shared\n" vi.vname); + with Not_found -> + ignore (warn "Unknown id (%d) for %s\n" vi.vid vi.vname) + + +let startEnv () = + varNamesList := ("", -1) :: !varNamesList + +let endEnv () = + let rec loop = function + [] -> E.s (bug "Cannot find start of env") + | ("", _) :: rest -> varNamesList := rest + | (s, id) :: rest -> begin + H.remove varNamesEnv s; + H.remove varIdsEnv id; + loop rest + end + in + loop !varNamesList + + + +(* The current function being checked *) +let currentReturnType : typ ref = ref voidType + +(* A map of labels in the current function *) +let labels: (string, unit) H.t = H.create 17 + +(* A list of statements seen in the current function *) +let statements: stmt list ref = ref [] + +(* A list of the targets of Gotos *) +let gotoTargets: (string * stmt) list ref = ref [] + +(*** TYPES ***) +(* Cetain types can only occur in some contexts, so keep a list of context *) +type ctxType = + CTStruct (* In a composite type *) + | CTUnion + | CTFArg (* In a function argument type *) + | CTFRes (* In a function result type *) + | CTArray (* In an array type *) + | CTPtr (* In a pointer type *) + | CTExp (* In an expression, as the type of + * the result of binary operators, or + * in a cast *) + | CTSizeof (* In a sizeof *) + | CTDecl (* In a typedef, or a declaration *) + +let d_context () = function + CTStruct -> text "CTStruct" + | CTUnion -> text "CTUnion" + | CTFArg -> text "CTFArg" + | CTFRes -> text "CTFRes" + | CTArray -> text "CTArray" + | CTPtr -> text "CTPtr" + | CTExp -> text "CTExp" + | CTSizeof -> text "CTSizeof" + | CTDecl -> text "CTDecl" + + +(* Keep track of all tags that we use. For each tag remember also the info + * structure and a flag whether it was actually defined or just used. A + * forward declaration acts as a definition. *) +type defuse = + Defined (* We actually have seen a definition of this tag *) + | Forward (* We have seen a forward declaration for it. This is done using + * a GType with an empty type name *) + | Used (* Only uses *) +let compUsed : (int, compinfo * defuse ref) H.t = H.create 117 +let enumUsed : (string, enuminfo * defuse ref) H.t = H.create 117 +let typUsed : (string, typeinfo * defuse ref) H.t = H.create 117 + +(* For composite types we also check that the names are unique *) +let compNames : (string, unit) H.t = H.create 17 + + + (* Check a type *) +let rec checkType (t: typ) (ctx: ctxType) = + (* Check that it appears in the right context *) + let rec checkContext = function + TVoid _ -> ctx = CTPtr || ctx = CTFRes || ctx = CTDecl + | TNamed (ti, a) -> checkContext ti.ttype + | TArray _ -> + (ctx = CTStruct || ctx = CTUnion + || ctx = CTSizeof || ctx = CTDecl || ctx = CTArray || ctx = CTPtr) + | TComp _ -> ctx <> CTExp + | _ -> true + in + if not (checkContext t) then + ignore (warn "Type (%a) used in wrong context. Expected context: %a" + d_plaintype t d_context ctx); + match t with + (TVoid a | TBuiltin_va_list a) -> checkAttributes a + | TInt (ik, a) -> checkAttributes a + | TFloat (_, a) -> checkAttributes a + | TPtr (t, a) -> checkAttributes a; checkType t CTPtr + + | TNamed (ti, a) -> + checkAttributes a; + if ti.tname = "" then + ignore (warnContext "Using a typeinfo for an empty-named type\n"); + checkTypeInfo Used ti + + | TComp (comp, a) -> + checkAttributes a; + (* Mark it as a forward. We'll check it later. If we try to check it + * now we might encounter undefined types *) + checkCompInfo Used comp + + + | TEnum (enum, a) -> begin + checkAttributes a; + checkEnumInfo Used enum + end + + | TArray(bt, len, a) -> + checkAttributes a; + checkType bt CTArray; + (match len with + None -> () + | Some l -> begin + let t = checkExp true l in + match t with + TInt((IInt|IUInt), _) -> () + | _ -> E.s (bug "Type of array length is not integer") + end) + + | TFun (rt, targs, isva, a) -> + checkAttributes a; + checkType rt CTFRes; + List.iter + (fun (an, at, aa) -> + checkType at CTFArg; + checkAttributes aa) (argsToList targs) + +(* Check that a type is a promoted integral type *) +and checkIntegralType (t: typ) = + checkType t CTExp; + match unrollType t with + TInt _ -> () + | _ -> ignore (warn "Non-integral type") + +(* Check that a type is a promoted arithmetic type *) +and checkArithmeticType (t: typ) = + checkType t CTExp; + match unrollType t with + TInt _ | TFloat _ -> () + | _ -> ignore (warn "Non-arithmetic type") + +(* Check that a type is a promoted boolean type *) +and checkBooleanType (t: typ) = + checkType t CTExp; + match unrollType t with + TInt _ | TFloat _ | TPtr _ -> () + | _ -> ignore (warn "Non-boolean type") + + +(* Check that a type is a pointer type *) +and checkPointerType (t: typ) = + checkType t CTExp; + match unrollType t with + TPtr _ -> () + | _ -> ignore (warn "Non-pointer type") + + +and typeMatch (t1: typ) (t2: typ) = + if typeSig t1 <> typeSig t2 then + match unrollType t1, unrollType t2 with + (* Allow free interchange of TInt and TEnum *) + TInt (IInt, _), TEnum _ -> () + | TEnum _, TInt (IInt, _) -> () + + | _, _ -> ignore (warn "Type mismatch:@! %a@!and %a@!" + d_type t1 d_type t2) + +and checkCompInfo (isadef: defuse) comp = + let fullname = compFullName comp in + try + let oldci, olddef = H.find compUsed comp.ckey in + (* Check that it is the same *) + if oldci != comp then + ignore (warnContext "compinfo for %s not shared\n" fullname); + (match !olddef, isadef with + | Defined, Defined -> + ignore (warnContext "Multiple definition of %s\n" fullname) + | _, Defined -> olddef := Defined + | Defined, _ -> () + | _, Forward -> olddef := Forward + | _, _ -> ()) + with Not_found -> begin (* This is the first time we see it *) + (* Check that the name is not empty *) + if comp.cname = "" then + E.s (bug "Compinfo with empty name"); + (* Check that the name is unique *) + if H.mem compNames fullname then + ignore (warn "Duplicate name %s" fullname); + (* Add it to the map before we go on *) + H.add compUsed comp.ckey (comp, ref isadef); + H.add compNames fullname (); + (* Do not check the compinfo unless this is a definition. Otherwise you + * might run into undefined types. *) + if isadef = Defined then begin + checkAttributes comp.cattr; + let fctx = if comp.cstruct then CTStruct else CTUnion in + let rec checkField f = + if not + (f.fcomp == comp && (* Each field must share the self cell of + * the host *) + f.fname <> "") then + ignore (warn "Self pointer not set in field %s of %s" + f.fname fullname); + checkType f.ftype fctx; + (* Check the bitfields *) + (match unrollType f.ftype, f.fbitfield with + | TInt (ik, a), Some w -> + checkAttributes a; + if w < 0 || w >= bitsSizeOf (TInt(ik, a)) then + ignore (warn "Wrong width (%d) in bitfield" w) + | _, Some w -> + ignore (E.error "Bitfield on a non integer type\n") + | _ -> ()); + checkAttributes f.fattr + in + List.iter checkField comp.cfields + end + end + + +and checkEnumInfo (isadef: defuse) enum = + if enum.ename = "" then + E.s (bug "Enuminfo with empty name"); + try + let oldei, olddef = H.find enumUsed enum.ename in + (* Check that it is the same *) + if oldei != enum then + ignore (warnContext "enuminfo for %s not shared\n" enum.ename); + (match !olddef, isadef with + Defined, Defined -> + ignore (warnContext "Multiple definition of enum %s\n" enum.ename) + | _, Defined -> olddef := Defined + | Defined, _ -> () + | _, Forward -> olddef := Forward + | _, _ -> ()) + with Not_found -> begin (* This is the first time we see it *) + (* Add it to the map before we go on *) + H.add enumUsed enum.ename (enum, ref isadef); + checkAttributes enum.eattr; + List.iter (fun (tn, _, _) -> defineName tn) enum.eitems; + end + +and checkTypeInfo (isadef: defuse) ti = + try + let oldti, olddef = H.find typUsed ti.tname in + (* Check that it is the same *) + if oldti != ti then + ignore (warnContext "typeinfo for %s not shared\n" ti.tname); + (match !olddef, isadef with + Defined, Defined -> + ignore (warnContext "Multiple definition of type %s\n" ti.tname) + | Defined, Used -> () + | Used, Defined -> + ignore (warnContext "Use of type %s before its definition\n" ti.tname) + | _, _ -> + ignore (warnContext "Bug in checkTypeInfo for %s\n" ti.tname)) + with Not_found -> begin (* This is the first time we see it *) + if ti.tname = "" then + ignore (warnContext "typeinfo with empty name"); + checkType ti.ttype CTDecl; + (* Add it to the map before we go on *) + H.add typUsed ti.tname (ti, ref isadef); + end + +(* Check an lvalue. If isconst then the lvalue appears in a context where + * only a compile-time constant can appear. Return the type of the lvalue. + * See the typing rule from cil.mli *) +and checkLval (isconst: bool) (lv: lval) : typ = + match lv with + Var vi, off -> + checkVariable vi; + checkOffset vi.vtype off + + | Mem addr, off -> begin + if isconst then + ignore (warn "Memory operation in constant"); + let ta = checkExp false addr in + match unrollType ta with + TPtr (t, _) -> checkOffset t off + | _ -> E.s (bug "Mem on a non-pointer") + end + +(* Check an offset. The basetype is the type of the object referenced by the + * base. Return the type of the lvalue constructed from a base value of right + * type and the offset. See the typing rules from cil.mli *) +and checkOffset basetyp : offset -> typ = function + NoOffset -> basetyp + | Index (ei, o) -> + checkExpType false ei intType; + begin + match unrollType basetyp with + TArray (t, _, _) -> checkOffset t o + | t -> E.s (bug "typeOffset: Index on a non-array: %a" d_plaintype t) + end + + | Field (fi, o) -> + (* Now check that the host is shared propertly *) + checkCompInfo Used fi.fcomp; + (* Check that this exact field is part of the host *) + if not (List.exists (fun f -> f == fi) fi.fcomp.cfields) then + ignore (warn "Field %s not part of %s" + fi.fname (compFullName fi.fcomp)); + checkOffset fi.ftype o + +and checkExpType (isconst: bool) (e: exp) (t: typ) = + let t' = checkExp isconst e in (* compute the type *) + if isconst then begin (* For initializers allow a string to initialize an + * array of characters *) + if typeSig t' <> typeSig t then + match e, t with + | _ -> typeMatch t' t + end else + typeMatch t' t + +(* Check an expression. isconst specifies if the expression occurs in a + * context where only a compile-time constant can occur. Return the computed + * type of the expression *) +and checkExp (isconst: bool) (e: exp) : typ = + E.withContext + (fun _ -> dprintf "check%s: %a" + (if isconst then "Const" else "Exp") d_exp e) + (fun _ -> + match e with + | Const(CInt64 (_, ik, _)) -> TInt(ik, []) + | Const(CChr _) -> charType + | Const(CStr s) -> charPtrType + | Const(CWStr s) -> TPtr(!wcharType,[]) + | Const(CReal (_, fk, _)) -> TFloat(fk, []) + | Const(CEnum (_, _, ei)) -> TEnum(ei, []) + | Lval(lv) -> + if isconst then + ignore (warn "Lval in constant"); + checkLval isconst lv + + | SizeOf(t) -> begin + (* Sizeof cannot be applied to certain types *) + checkType t CTSizeof; + (match unrollType t with + (TFun _ | TVoid _) -> + ignore (warn "Invalid operand for sizeof") + | _ ->()); + uintType + end + | SizeOfE(e) -> + (* The expression in a sizeof can be anything *) + let te = checkExp false e in + checkExp isconst (SizeOf(te)) + + | SizeOfStr s -> uintType + + | AlignOf(t) -> begin + (* Sizeof cannot be applied to certain types *) + checkType t CTSizeof; + (match unrollType t with + (TFun _ | TVoid _) -> + ignore (warn "Invalid operand for sizeof") + | _ ->()); + uintType + end + | AlignOfE(e) -> + (* The expression in an AlignOfE can be anything *) + let te = checkExp false e in + checkExp isconst (AlignOf(te)) + + | UnOp (Neg, e, tres) -> + checkArithmeticType tres; checkExpType isconst e tres; tres + + | UnOp (BNot, e, tres) -> + checkIntegralType tres; checkExpType isconst e tres; tres + + | UnOp (LNot, e, tres) -> + let te = checkExp isconst e in + checkBooleanType te; + checkIntegralType tres; (* Must check that t is well-formed *) + typeMatch tres intType; + tres + + | BinOp (bop, e1, e2, tres) -> begin + let t1 = checkExp isconst e1 in + let t2 = checkExp isconst e2 in + match bop with + (Mult | Div) -> + typeMatch t1 t2; checkArithmeticType tres; + typeMatch t1 tres; tres + | (Eq|Ne|Lt|Le|Ge|Gt) -> + typeMatch t1 t2; checkArithmeticType t1; + typeMatch tres intType; tres + | Mod|BAnd|BOr|BXor -> + typeMatch t1 t2; checkIntegralType tres; + typeMatch t1 tres; tres + | LAnd | LOr -> + typeMatch t1 t2; checkBooleanType tres; + typeMatch t1 tres; tres + | Shiftlt | Shiftrt -> + typeMatch t1 tres; checkIntegralType t1; + checkIntegralType t2; tres + | (PlusA | MinusA) -> + typeMatch t1 t2; typeMatch t1 tres; + checkArithmeticType tres; tres + | (PlusPI | MinusPI | IndexPI) -> + checkPointerType tres; + typeMatch t1 tres; + checkIntegralType t2; + tres + | MinusPP -> + checkPointerType t1; checkPointerType t2; + typeMatch t1 t2; + typeMatch tres intType; + tres + end + | AddrOf (lv) -> begin + let tlv = checkLval isconst lv in + (* Only certain types can be in AddrOf *) + match unrollType tlv with + | TVoid _ -> + E.s (bug "AddrOf on improper type"); + + | (TInt _ | TFloat _ | TPtr _ | TComp _ | TFun _ | TArray _ ) -> + TPtr(tlv, []) + + | TEnum _ -> intPtrType + | _ -> E.s (bug "AddrOf on unknown type") + end + + | StartOf lv -> begin + let tlv = checkLval isconst lv in + match unrollType tlv with + TArray (t,_, _) -> TPtr(t, []) + | _ -> E.s (bug "StartOf on a non-array") + end + + | CastE (tres, e) -> begin + let et = checkExp isconst e in + checkType tres CTExp; + (* Not all types can be cast *) + match unrollType et with + TArray _ -> E.s (bug "Cast of an array type") + | TFun _ -> E.s (bug "Cast of a function type") + | TComp _ -> E.s (bug "Cast of a composite type") + | TVoid _ -> E.s (bug "Cast of a void type") + | _ -> tres + end) + () (* The argument of withContext *) + +and checkInit (i: init) : typ = + E.withContext + (fun _ -> dprintf "checkInit: %a" d_init i) + (fun _ -> + match i with + SingleInit e -> checkExp true e +(* + | ArrayInit (bt, len, initl) -> begin + checkType bt CTSizeof; + if List.length initl > len then + ignore (warn "Too many initializers in array"); + List.iter (fun i -> checkInitType i bt) initl; + TArray(bt, Some (integer len), []) + end +*) + | CompoundInit (ct, initl) -> begin + checkType ct CTSizeof; + (match unrollType ct with + TArray(bt, Some (Const(CInt64(len, _, _))), _) -> + let rec loopIndex i = function + [] -> + if i <> len then + ignore (warn "Wrong number of initializers in array") + + | (Index(Const(CInt64(i', _, _)), NoOffset), ei) :: rest -> + if i' <> i then + ignore (warn "Initializer for index %s when %s was expected\n" + (Int64.format "%d" i') (Int64.format "%d" i)); + checkInitType ei bt; + loopIndex (Int64.succ i) rest + | _ :: rest -> + ignore (warn "Malformed initializer for array element") + in + loopIndex Int64.zero initl + | TArray(_, _, _) -> + ignore (warn "Malformed initializer for array") + | TComp (comp, _) -> + if comp.cstruct then + let rec loopFields + (nextflds: fieldinfo list) + (initl: (offset * init) list) : unit = + match nextflds, initl with + [], [] -> () (* We are done *) + | f :: restf, (Field(f', NoOffset), i) :: resti -> + if f.fname <> f'.fname then + ignore (warn "Expected initializer for field %s and found one for %s\n" f.fname f'.fname); + checkInitType i f.ftype; + loopFields restf resti + | [], _ :: _ -> + ignore (warn "Too many initializers for struct") + | _ :: _, [] -> + ignore (warn "Too few initializers for struct") + | _, _ -> + ignore (warn "Malformed initializer for struct") + in + loopFields + (List.filter (fun f -> f.fname <> missingFieldName) + comp.cfields) + initl + + else (* UNION *) + if comp.cfields == [] then begin + if initl != [] then + ignore (warn "Initializer for empty union not empty"); + end else begin + match initl with + [(Field(f, NoOffset), ei)] -> + if f.fcomp != comp then + ignore (bug "Wrong designator for union initializer"); + if !msvcMode && f != List.hd comp.cfields then + ignore (warn "On MSVC you can only initialize the first field of a union"); + checkInitType ei f.ftype + + | _ -> + ignore (warn "Malformed initializer for union") + end + | _ -> + E.s (warn "Type of Compound is not array or struct or union")); + ct + end) + () (* The arguments of withContext *) + + +and checkInitType (i: init) (t: typ) : unit = + let it = checkInit i in + typeMatch it t + +and checkStmt (s: stmt) = + E.withContext + (fun _ -> + (* Print context only for certain small statements *) + match s.skind with + (*Loop _*) While _ | DoWhile _ | For _ | If _ | Switch _ -> nil + | _ -> dprintf "checkStmt: %a" d_stmt s) + (fun _ -> + (* Check the labels *) + let checkLabel = function + Label (ln, l, _) -> + if H.mem labels ln then + ignore (warn "Multiply defined label %s" ln); + H.add labels ln () + | Case (e, _) -> checkExpType true e intType + | _ -> () (* Not yet implemented *) + in + List.iter checkLabel s.labels; + (* See if we have seen this statement before *) + if List.memq s !statements then + ignore (warn "Statement is shared"); + (* Remember that we have seen this one *) + statements := s :: !statements; + match s.skind with + Break _ | Continue _ -> () + | Goto (gref, l) -> + currentLoc := l; + (* Find a label *) + let lab = + match List.filter (function Label _ -> true | _ -> false) + !gref.labels with + Label (lab, _, _) :: _ -> lab + | _ -> + ignore (warn "Goto to block without a label\n"); + "" + in + (* Remember it as a target *) + gotoTargets := (lab, !gref) :: !gotoTargets + + + | Return (re,l) -> begin + currentLoc := l; + match re, !currentReturnType with + None, TVoid _ -> () + | _, TVoid _ -> ignore (warn "Invalid return value") + | None, _ -> ignore (warn "Invalid return value") + | Some re', rt' -> checkExpType false re' rt' + end +(* + | Loop (b, l, _, _) -> checkBlock b +*) + | While (e, b, l) -> + currentLoc := l; + let te = checkExp false e in + checkBooleanType te; + checkBlock b; + | DoWhile (e, b, l) -> + currentLoc := l; + let te = checkExp false e in + checkBooleanType te; + checkBlock b; + | For (bInit, e, bIter, b, l) -> + currentLoc := l; + checkBlock bInit; + let te = checkExp false e in + checkBooleanType te; + checkBlock bIter; + checkBlock b; + | Block b -> checkBlock b + | If (e, bt, bf, l) -> + currentLoc := l; + let te = checkExp false e in + checkBooleanType te; + checkBlock bt; + checkBlock bf + | Switch (e, b, cases, l) -> + currentLoc := l; + checkExpType false e intType; + (* Remember the statements so far *) + let prevStatements = !statements in + checkBlock b; + (* Now make sure that all the cases do occur in that block *) + List.iter + (fun c -> + if not (List.exists (function Case _ -> true | _ -> false) + c.labels) then + ignore (warn "Case in switch statment without a \"case\"\n"); + (* Make sure it is in there *) + let rec findCase = function + | l when l == prevStatements -> (* Not found *) + ignore (warnContext + "Cannot find target of switch statement") + | [] -> E.s (E.bug "Check: findCase") + | c' :: rest when c == c' -> () (* Found *) + | _ :: rest -> findCase rest + in + findCase !statements) + cases; + | TryFinally (b, h, l) -> + currentLoc := l; + checkBlock b; + checkBlock h + + | TryExcept (b, (il, e), h, l) -> + currentLoc := l; + checkBlock b; + List.iter checkInstr il; + checkExpType false e intType; + checkBlock h + + | Instr il -> List.iter checkInstr il) + () (* argument of withContext *) + +and checkBlock (b: block) : unit = + List.iter checkStmt b.bstmts + + +and checkInstr (i: instr) = + match i with + | Set (dest, e, l) -> + currentLoc := l; + let t = checkLval false dest in + (* Not all types can be assigned to *) + (match unrollType t with + TFun _ -> ignore (warn "Assignment to a function type") + | TArray _ -> ignore (warn "Assignment to an array type") + | TVoid _ -> ignore (warn "Assignment to a void type") + | _ -> ()); + checkExpType false e t + + | Call(dest, what, args, l) -> + currentLoc := l; + let (rt, formals, isva) = + match checkExp false what with + TFun(rt, formals, isva, _) -> rt, formals, isva + | _ -> E.s (bug "Call to a non-function") + in + (* Now check the return value*) + (match dest, unrollType rt with + None, TVoid _ -> () + | Some _, TVoid _ -> ignore (warn "void value is assigned") + | None, _ -> () (* "Call of function is not assigned" *) + | Some destlv, rt' -> + let desttyp = checkLval false destlv in + if typeSig desttyp <> typeSig rt then begin + (* Not all types can be assigned to *) + (match unrollType desttyp with + TFun _ -> ignore (warn "Assignment to a function type") + | TArray _ -> ignore (warn "Assignment to an array type") + | TVoid _ -> ignore (warn "Assignment to a void type") + | _ -> ()); + (* Not all types can be cast *) + (match rt' with + TArray _ -> ignore (warn "Cast of an array type") + | TFun _ -> ignore (warn "Cast of a function type") + | TComp _ -> ignore (warn "Cast of a composite type") + | TVoid _ -> ignore (warn "Cast of a void type") + + | _ -> ()) + end); + (* Now check the arguments *) + let rec loopArgs formals args = + match formals, args with + [], _ when (isva || args = []) -> () + | (fn,ft,_) :: formals, a :: args -> + checkExpType false a ft; + loopArgs formals args + | _, _ -> ignore (warn "Not enough arguments") + in + if formals = None then + ignore (warn "Call to function without prototype\n") + else + loopArgs (argsToList formals) args + + | Asm _ -> () (* Not yet implemented *) + +let rec checkGlobal = function + GAsm _ -> () + | GPragma _ -> () + | GText _ -> () + | GType (ti, l) -> + currentLoc := l; + E.withContext (fun _ -> dprintf "GType(%s)" ti.tname) + (fun _ -> + checkTypeInfo Defined ti; + if ti.tname <> "" then defineName ti.tname) + () + + | GCompTag (comp, l) -> + currentLoc := l; + checkCompInfo Defined comp; + + | GCompTagDecl (comp, l) -> + currentLoc := l; + checkCompInfo Forward comp; + + | GEnumTag (enum, l) -> + currentLoc := l; + checkEnumInfo Defined enum + + | GEnumTagDecl (enum, l) -> + currentLoc := l; + checkEnumInfo Forward enum + + | GVarDecl (vi, l) -> + currentLoc := l; + (* We might have seen it already *) + E.withContext (fun _ -> dprintf "GVarDecl(%s)" vi.vname) + (fun _ -> + (* If we have seen this vid already then it must be for the exact + * same varinfo *) + if H.mem varIdsEnv vi.vid then + checkVariable vi + else begin + defineVariable vi; + checkAttributes vi.vattr; + checkType vi.vtype CTDecl; + if not (vi.vglob && + vi.vstorage <> Register) then + E.s (bug "Invalid declaration of %s" vi.vname) + end) + () + + | GVar (vi, init, l) -> + currentLoc := l; + (* Maybe this is the first occurrence *) + E.withContext (fun _ -> dprintf "GVar(%s)" vi.vname) + (fun _ -> + checkGlobal (GVarDecl (vi, l)); + (* Check the initializer *) + begin match init.init with + None -> () + | Some i -> ignore (checkInitType i vi.vtype) + end; + (* Cannot be a function *) + if isFunctionType vi.vtype then + E.s (bug "GVar for a function (%s)\n" vi.vname); + ) + () + + + | GFun (fd, l) -> begin + currentLoc := l; + (* Check if this is the first occurrence *) + let vi = fd.svar in + let fname = vi.vname in + E.withContext (fun _ -> dprintf "GFun(%s)" fname) + (fun _ -> + checkGlobal (GVarDecl (vi, l)); + (* Check that the argument types in the type are identical to the + * formals *) + let rec loopArgs targs formals = + match targs, formals with + [], [] -> () + | (fn, ft, fa) :: targs, fo :: formals -> + if fn <> fo.vname || ft != fo.vtype || fa != fo.vattr then + ignore (warnContext + "Formal %s not shared (type + locals) in %s" + fo.vname fname); + loopArgs targs formals + + | _ -> + E.s (bug "Type has different number of formals for %s" + fname) + in + begin match vi.vtype with + TFun (rt, args, isva, a) -> begin + currentReturnType := rt; + loopArgs (argsToList args) fd.sformals + end + | _ -> E.s (bug "Function %s does not have a function type" + fname) + end; + ignore (fd.smaxid >= 0 || E.s (bug "smaxid < 0 for %s" fname)); + (* Now start a new environment, in a finally clause *) + begin try + startEnv (); + (* Do the locals *) + let doLocal tctx v = + if v.vglob then + ignore (warnContext + "Local %s has the vglob flag set" v.vname); + if v.vstorage <> NoStorage && v.vstorage <> Register then + ignore (warnContext + "Local %s has storage %a\n" v.vname + d_storage v.vstorage); + checkType v.vtype tctx; + checkAttributes v.vattr; + defineVariable v + in + List.iter (doLocal CTFArg) fd.sformals; + List.iter (doLocal CTDecl) fd.slocals; + statements := []; + gotoTargets := []; + checkBlock fd.sbody; + H.clear labels; + (* Now verify that we have scanned all targets *) + List.iter + (fun (lab, t) -> if not (List.memq t !statements) then + ignore (warnContext + "Target of \"goto %s\" statement does not appear in function body" lab)) + !gotoTargets; + statements := []; + gotoTargets := []; + (* Done *) + endEnv () + with e -> + endEnv (); + raise e + end; + ()) + () (* final argument of withContext *) + end + + +let checkFile flags fl = + if !E.verboseFlag then ignore (E.log "Checking file %s\n" fl.fileName); + valid := true; + List.iter + (function + NoCheckGlobalIds -> checkGlobalIds := false) + flags; + iterGlobals fl (fun g -> try checkGlobal g with _ -> ()); + (* Check that for all struct/union tags there is a definition *) + H.iter + (fun k (comp, isadef) -> + if !isadef = Used then + begin + valid := false; + ignore (E.warn "Compinfo %s is referenced but not defined" + (compFullName comp)) + end) + compUsed; + (* Check that for all enum tags there is a definition *) + H.iter + (fun k (enum, isadef) -> + if !isadef = Used then + begin + valid := false; + ignore (E.warn "Enuminfo %s is referenced but not defined" + enum.ename) + end) + enumUsed; + (* Clean the hashes to let the GC do its job *) + H.clear typeDefs; + H.clear varNamesEnv; + H.clear varIdsEnv; + H.clear allVarIds; + H.clear compNames; + H.clear compUsed; + H.clear enumUsed; + H.clear typUsed; + varNamesList := []; + if !E.verboseFlag then + ignore (E.log "Finished checking file %s\n" fl.fileName); + !valid + diff --git a/cil/src/check.mli b/cil/src/check.mli new file mode 100644 index 00000000..fdcb8b82 --- /dev/null +++ b/cil/src/check.mli @@ -0,0 +1,45 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + + (* Checks the well-formedness of the file. Prints warnings and + * returns false if errors are found *) + +type checkFlags = + NoCheckGlobalIds (* Do not check that the global ids have the proper + * hash value *) + +val checkFile: checkFlags list -> Cil.file -> bool diff --git a/cil/src/cil.ml b/cil/src/cil.ml new file mode 100644 index 00000000..2c4e12a7 --- /dev/null +++ b/cil/src/cil.ml @@ -0,0 +1,6427 @@ +(* MODIF: Loop constructor replaced by 3 constructors: While, DoWhile, For. *) +(* MODIF: useLogicalOperators flag set to true by default. *) + +(* + * + * Copyright (c) 2001-2003, + * George C. Necula + * Scott McPeak + * Wes Weimer + * Ben Liblit + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +open Escape +open Pretty +open Trace (* sm: 'trace' function *) +module E = Errormsg +module H = Hashtbl +module IH = Inthash + +(* + * CIL: An intermediate language for analyzing C progams. + * + * Version Tue Dec 12 15:21:52 PST 2000 + * Scott McPeak, George Necula, Wes Weimer + * + *) + +(* The module Cilversion is generated automatically by Makefile from + * information in configure.in *) +let cilVersion = Cilversion.cilVersion +let cilVersionMajor = Cilversion.cilVersionMajor +let cilVersionMinor = Cilversion.cilVersionMinor +let cilVersionRevision = Cilversion.cilVersionRev + +(* A few globals that control the interpretation of C source *) +let msvcMode = ref false (* Whether the pretty printer should + * print output for the MS VC + * compiler. Default is GCC *) + +let useLogicalOperators = ref (*false*) true + + +module M = Machdep +(* Cil.initCil will set this to the current machine description. + Makefile.cil generates the file obj/@ARCHOS@/machdep.ml, + which contains the descriptions of gcc and msvc. *) +let theMachine : M.mach ref = ref M.gcc + + +let lowerConstants: bool ref = ref true + (** Do lower constants (default true) *) +let insertImplicitCasts: bool ref = ref true + (** Do insert implicit casts (default true) *) + + +let little_endian = ref true +let char_is_unsigned = ref false +let underscore_name = ref false + +type lineDirectiveStyle = + | LineComment + | LinePreprocessorInput + | LinePreprocessorOutput + +let lineDirectiveStyle = ref (Some LinePreprocessorInput) + +let print_CIL_Input = ref false + +let printCilAsIs = ref false + +let lineLength = ref 80 + +(* sm: return the string 's' if we're printing output for gcc, suppres + * it if we're printing for CIL to parse back in. the purpose is to + * hide things from gcc that it complains about, but still be able + * to do lossless transformations when CIL is the consumer *) +let forgcc (s: string) : string = + if (!print_CIL_Input) then "" else s + + +let debugConstFold = false + +(** The Abstract Syntax of CIL *) + + +(** The top-level representation of a CIL source file. Its main contents is + the list of global declarations and definitions. *) +type file = + { mutable fileName: string; (** The complete file name *) + mutable globals: global list; (** List of globals as they will appear + in the printed file *) + mutable globinit: fundec option; + (** An optional global initializer function. This is a function where + * you can put stuff that must be executed before the program is + * started. This function, is conceptually at the end of the file, + * although it is not part of the globals list. Use {!Cil.getGlobInit} + * to create/get one. *) + mutable globinitcalled: bool; + (** Whether the global initialization function is called in main. This + should always be false if there is no global initializer. When + you create a global initialization CIL will try to insert code in + main to call it. *) + } + +and comment = location * string + +(** The main type for representing global declarations and definitions. A list + of these form a CIL file. The order of globals in the file is generally + important. *) +and global = + | GType of typeinfo * location + (** A typedef. All uses of type names (through the [TNamed] constructor) + must be preceeded in the file by a definition of the name. The string + is the defined name and always not-empty. *) + + | GCompTag of compinfo * location + (** Defines a struct/union tag with some fields. There must be one of + these for each struct/union tag that you use (through the [TComp] + constructor) since this is the only context in which the fields are + printed. Consequently nested structure tag definitions must be + broken into individual definitions with the innermost structure + defined first. *) + + | GCompTagDecl of compinfo * location + (** Declares a struct/union tag. Use as a forward declaration. This is + * printed without the fields. *) + + | GEnumTag of enuminfo * location + (** Declares an enumeration tag with some fields. There must be one of + these for each enumeration tag that you use (through the [TEnum] + constructor) since this is the only context in which the items are + printed. *) + + | GEnumTagDecl of enuminfo * location + (** Declares an enumeration tag. Use as a forward declaration. This is + * printed without the items. *) + + | GVarDecl of varinfo * location + (** A variable declaration (not a definition). If the variable has a + function type then this is a prototype. There can be several + declarations and at most one definition for a given variable. If both + forms appear then they must share the same varinfo structure. A + prototype shares the varinfo with the fundec of the definition. Either + has storage Extern or there must be a definition in this file *) + + | GVar of varinfo * initinfo * location + (** A variable definition. Can have an initializer. The initializer is + * updateable so that you can change it without requiring to recreate + * the list of globals. There can be at most one definition for a + * variable in an entire program. Cannot have storage Extern or function + * type. *) + + + | GFun of fundec * location + (** A function definition. *) + + | GAsm of string * location (** Global asm statement. These ones + can contain only a template *) + | GPragma of attribute * location (** Pragmas at top level. Use the same + syntax as attributes *) + | GText of string (** Some text (printed verbatim) at + top level. E.g., this way you can + put comments in the output. *) + + +(** The various types available. Every type is associated with a list of + * attributes, which are always kept in sorted order. Use {!Cil.addAttribute} + * and {!Cil.addAttributes} to construct list of attributes. If you want to + * inspect a type, you should use {!Cil.unrollType} to see through the uses + * of named types. *) +and typ = + TVoid of attributes (** Void type *) + | TInt of ikind * attributes (** An integer type. The kind specifies + the sign and width. *) + | TFloat of fkind * attributes (** A floating-point type. The kind + specifies the precision. *) + + | TPtr of typ * attributes + (** Pointer type. *) + + | TArray of typ * exp option * attributes + (** Array type. It indicates the base type and the array length. *) + + | TFun of typ * (string * typ * attributes) list option * bool * attributes + (** Function type. Indicates the type of the result, the name, type + * and name attributes of the formal arguments ([None] if no + * arguments were specified, as in a function whose definition or + * prototype we have not seen; [Some \[\]] means void). Use + * {!Cil.argsToList} to obtain a list of arguments. The boolean + * indicates if it is a variable-argument function. If this is the + * type of a varinfo for which we have a function declaration then + * the information for the formals must match that in the + * function's sformals. *) + + | TNamed of typeinfo * attributes + (* The use of a named type. All uses of the same type name must + * share the typeinfo. Each such type name must be preceeded + * in the file by a [GType] global. This is printed as just the + * type name. The actual referred type is not printed here and is + * carried only to simplify processing. To see through a sequence + * of named type references, use {!Cil.unrollType}. The attributes + * are in addition to those given when the type name was defined. *) + + | TComp of compinfo * attributes + (** A reference to a struct or a union type. All references to the + same struct or union must share the same compinfo among them and + with a [GCompTag] global that preceeds all uses (except maybe + those that are pointers to the composite type). The attributes + given are those pertaining to this use of the type and are in + addition to the attributes that were given at the definition of + the type and which are stored in the compinfo. *) + + | TEnum of enuminfo * attributes + (** A reference to an enumeration type. All such references must + share the enuminfo among them and with a [GEnumTag] global that + preceeds all uses. The attributes refer to this use of the + enumeration and are in addition to the attributes of the + enumeration itself, which are stored inside the enuminfo *) + + + + | TBuiltin_va_list of attributes + (** This is the same as the gcc's type with the same name *) + +(** Various kinds of integers *) +and ikind = + IChar (** [char] *) + | ISChar (** [signed char] *) + | IUChar (** [unsigned char] *) + | IInt (** [int] *) + | IUInt (** [unsigned int] *) + | IShort (** [short] *) + | IUShort (** [unsigned short] *) + | ILong (** [long] *) + | IULong (** [unsigned long] *) + | ILongLong (** [long long] (or [_int64] on Microsoft Visual C) *) + | IULongLong (** [unsigned long long] (or [unsigned _int64] on Microsoft + Visual C) *) + +(** Various kinds of floating-point numbers*) +and fkind = + FFloat (** [float] *) + | FDouble (** [double] *) + | FLongDouble (** [long double] *) + +(** An attribute has a name and some optional parameters *) +and attribute = Attr of string * attrparam list + +(** Attributes are lists sorted by the attribute name *) +and attributes = attribute list + +(** The type of parameters in attributes *) +and attrparam = + | AInt of int (** An integer constant *) + | AStr of string (** A string constant *) + | ACons of string * attrparam list (** Constructed attributes. These + are printed [foo(a1,a2,...,an)]. + The list of parameters can be + empty and in that case the + parentheses are not printed. *) + | ASizeOf of typ (** A way to talk about types *) + | ASizeOfE of attrparam + | ASizeOfS of typsig (** Replacement for ASizeOf in type + signatures. Only used for + attributes inside typsigs.*) + | AAlignOf of typ + | AAlignOfE of attrparam + | AAlignOfS of typsig + | AUnOp of unop * attrparam + | ABinOp of binop * attrparam * attrparam + | ADot of attrparam * string (** a.foo **) + + +(** Information about a composite type (a struct or a union). Use + {!Cil.mkCompInfo} + to create non-recursive or (potentially) recursive versions of this. Make + sure you have a [GCompTag] for each one of these. *) +and compinfo = { + mutable cstruct: bool; (** True if struct, False if union *) + mutable cname: string; (** The name. Always non-empty. Use + * {!Cil.compFullName} to get the + * full name of a comp (along with + * the struct or union) *) + mutable ckey: int; (** A unique integer constructed from + * the name. Use {!Hashtbl.hash} on + * the string returned by + * {!Cil.compFullName}. All compinfo + * for a given key are shared. *) + mutable cfields: fieldinfo list; (** Information about the fields *) + mutable cattr: attributes; (** The attributes that are defined at + the same time as the composite + type *) + mutable cdefined: bool; (** Whether this is a defined + * compinfo. *) + mutable creferenced: bool; (** True if used. Initially set to + * false *) + } + +(** Information about a struct/union field *) +and fieldinfo = { + mutable fcomp: compinfo; (** The compinfo of the host. Note + that this must be shared with the + host since there can be only one + compinfo for a given id *) + mutable fname: string; (** The name of the field. Might be + * the value of + * {!Cil.missingFieldName} in which + * case it must be a bitfield and is + * not printed and it does not + * participate in initialization *) + mutable ftype: typ; (** The type *) + mutable fbitfield: int option; (** If a bitfield then ftype should be + an integer type *) + mutable fattr: attributes; (** The attributes for this field + * (not for its type) *) + mutable floc: location; (** The location where this field + * is defined *) +} + + + +(** Information about an enumeration. This is shared by all references to an + enumeration. Make sure you have a [GEnumTag] for each of of these. *) +and enuminfo = { + mutable ename: string; (** The name. Always non-empty *) + mutable eitems: (string * exp * location) list; (** Items with names + and values. This list + should be + non-empty. The item + values must be + compile-time + constants. *) + mutable eattr: attributes; (** Attributes *) + mutable ereferenced: bool; (** True if used. Initially set to false*) +} + +(** Information about a defined type *) +and typeinfo = { + mutable tname: string; + (** The name. Can be empty only in a [GType] when introducing a composite + * or enumeration tag. If empty cannot be refered to from the file *) + mutable ttype: typ; + (** The actual type. *) + mutable treferenced: bool; + (** True if used. Initially set to false*) +} + + +(** Information about a variable. These structures are shared by all + * references to the variable. So, you can change the name easily, for + * example. Use one of the {!Cil.makeLocalVar}, {!Cil.makeTempVar} or + * {!Cil.makeGlobalVar} to create instances of this data structure. *) +and varinfo = { + mutable vname: string; (** The name of the variable. Cannot + * be empty. *) + mutable vtype: typ; (** The declared type of the + * variable. *) + mutable vattr: attributes; (** A list of attributes associated + * with the variable. *) + mutable vstorage: storage; (** The storage-class *) + (* The other fields are not used in varinfo when they appear in the formal + * argument list in a [TFun] type *) + + + mutable vglob: bool; (** True if this is a global variable*) + + (** Whether this varinfo is for an inline function. *) + mutable vinline: bool; + + mutable vdecl: location; (** Location of variable declaration *) + + mutable vid: int; (** A unique integer identifier. *) + mutable vaddrof: bool; (** True if the address of this + variable is taken. CIL will set + * these flags when it parses C, but + * you should make sure to set the + * flag whenever your transformation + * create [AddrOf] expression. *) + + mutable vreferenced: bool; (** True if this variable is ever + referenced. This is computed by + [removeUnusedVars]. It is safe to + just initialize this to False *) +} + +(** Storage-class information *) +and storage = + NoStorage | (** The default storage. Nothing is + * printed *) + Static | + Register | + Extern + + +(** Expressions (Side-effect free)*) +and exp = + Const of constant (** Constant *) + | Lval of lval (** Lvalue *) + | SizeOf of typ (** sizeof(). Has [unsigned + * int] type (ISO 6.5.3.4). This is + * not turned into a constant because + * some transformations might want to + * change types *) + + | SizeOfE of exp (** sizeof() *) + | SizeOfStr of string + (** sizeof(string_literal). We separate this case out because this is the + * only instance in which a string literal should not be treated as + * having type pointer to character. *) + + | AlignOf of typ (** Has [unsigned int] type *) + | AlignOfE of exp + + + | UnOp of unop * exp * typ (** Unary operation. Includes + the type of the result *) + + | BinOp of binop * exp * exp * typ + (** Binary operation. Includes the + type of the result. The arithemtic + conversions are made explicit + for the arguments *) + | CastE of typ * exp (** Use {!Cil.mkCast} to make casts *) + + | AddrOf of lval (** Always use {!Cil.mkAddrOf} to + * construct one of these. Apply to an + * lvalue of type [T] yields an + * expression of type [TPtr(T)] *) + + | StartOf of lval (** There is no C correspondent for this. C has + * implicit coercions from an array to the address + * of the first element. [StartOf] is used in CIL to + * simplify type checking and is just an explicit + * form of the above mentioned implicit conversion. + * It is not printed. Given an lval of type + * [TArray(T)] produces an expression of type + * [TPtr(T)]. *) + + +(** Literal constants *) +and constant = + | CInt64 of int64 * ikind * string option + (** Integer constant. Give the ikind (see ISO9899 6.1.3.2) + * and the textual representation, if available. Use + * {!Cil.integer} or {!Cil.kinteger} to create these. Watch + * out for integers that cannot be represented on 64 bits. + * OCAML does not give Overflow exceptions. *) + | CStr of string (** String constant (of pointer type) *) + | CWStr of int64 list (** Wide string constant (of type "wchar_t *") *) + | CChr of char (** Character constant. This has type int, so use + * charConstToInt to read the value in case + * sign-extension is needed. *) + | CReal of float * fkind * string option (** Floating point constant. Give + the fkind (see ISO 6.4.4.2) and + also the textual representation, + if available *) + | CEnum of exp * string * enuminfo + (** An enumeration constant with the given value, name, from the given + * enuminfo. This is not used if {!Cil.lowerEnum} is false (default). + * Use {!Cillower.lowerEnumVisitor} to replace these with integer + * constants. *) + +(** Unary operators *) +and unop = + Neg (** Unary minus *) + | BNot (** Bitwise complement (~) *) + | LNot (** Logical Not (!) *) + +(** Binary operations *) +and binop = + PlusA (** arithmetic + *) + | PlusPI (** pointer + integer *) + | IndexPI (** pointer + integer but only when + * it arises from an expression + * [e\[i\]] when [e] is a pointer and + * not an array. This is semantically + * the same as PlusPI but CCured uses + * this as a hint that the integer is + * probably positive. *) + | MinusA (** arithmetic - *) + | MinusPI (** pointer - integer *) + | MinusPP (** pointer - pointer *) + | Mult (** * *) + | Div (** / *) + | Mod (** % *) + | Shiftlt (** shift left *) + | Shiftrt (** shift right *) + + | Lt (** < (arithmetic comparison) *) + | Gt (** > (arithmetic comparison) *) + | Le (** <= (arithmetic comparison) *) + | Ge (** > (arithmetic comparison) *) + | Eq (** == (arithmetic comparison) *) + | Ne (** != (arithmetic comparison) *) + | BAnd (** bitwise and *) + | BXor (** exclusive-or *) + | BOr (** inclusive-or *) + + | LAnd (** logical and *) + | LOr (** logical or *) + + + + +(** An lvalue denotes the contents of a range of memory addresses. This range + * is denoted as a host object along with an offset within the object. The + * host object can be of two kinds: a local or global variable, or an object + * whose address is in a pointer expression. We distinguish the two cases so + * that we can tell quickly whether we are accessing some component of a + * variable directly or we are accessing a memory location through a pointer.*) +and lval = + lhost * offset + +(** The host part of an {!Cil.lval}. *) +and lhost = + | Var of varinfo + (** The host is a variable. *) + + | Mem of exp + (** The host is an object of type [T] when the expression has pointer + * [TPtr(T)]. *) + + +(** The offset part of an {!Cil.lval}. Each offset can be applied to certain + * kinds of lvalues and its effect is that it advances the starting address + * of the lvalue and changes the denoted type, essentially focussing to some + * smaller lvalue that is contained in the original one. *) +and offset = + | NoOffset (** No offset. Can be applied to any lvalue and does + * not change either the starting address or the type. + * This is used when the lval consists of just a host + * or as a terminator in a list of other kinds of + * offsets. *) + + | Field of fieldinfo * offset + (** A field offset. Can be applied only to an lvalue + * that denotes a structure or a union that contains + * the mentioned field. This advances the offset to the + * beginning of the mentioned field and changes the + * type to the type of the mentioned field. *) + + | Index of exp * offset + (** An array index offset. Can be applied only to an + * lvalue that denotes an array. This advances the + * starting address of the lval to the beginning of the + * mentioned array element and changes the denoted type + * to be the type of the array element *) + + + +(* The following equivalences hold *) +(* Mem(AddrOf(Mem a, aoff)), off = Mem a, aoff + off *) +(* Mem(AddrOf(Var v, aoff)), off = Var v, aoff + off *) +(* AddrOf (Mem a, NoOffset) = a *) + +(** Initializers for global variables. You can create an initializer with + * {!Cil.makeZeroInit}. *) +and init = + | SingleInit of exp (** A single initializer *) + | CompoundInit of typ * (offset * init) list + (** Used only for initializers of structures, unions and arrays. + * The offsets are all of the form [Field(f, NoOffset)] or + * [Index(i, NoOffset)] and specify the field or the index being + * initialized. For structures all fields + * must have an initializer (except the unnamed bitfields), in + * the proper order. This is necessary since the offsets are not + * printed. For arrays the list must contain a prefix of the + * initializers; the rest are 0-initialized. + * For unions there must be exactly one initializer. If + * the initializer is not for the first field then a field + * designator is printed, so you better be on GCC since MSVC does + * not understand this. You can scan an initializer list with + * {!Cil.foldLeftCompound}. *) + +(** We want to be able to update an initializer in a global variable, so we + * define it as a mutable field *) +and initinfo = { + mutable init : init option; + } + + +(** Function definitions. *) +and fundec = + { mutable svar: varinfo; + (** Holds the name and type as a variable, so we can refer to it + * easily from the program. All references to this function either + * in a function call or in a prototype must point to the same + * varinfo. *) + mutable sformals: varinfo list; + (** Formals. These must be shared with the formals that appear in the + * type of the function. Use {!Cil.setFormals} or + * {!Cil.setFunctionType} to set these + * formals and ensure that they are reflected in the function type. + * Do not make copies of these because the body refers to them. *) + mutable slocals: varinfo list; + (** Locals. Does not include the sformals. Do not make copies of + * these because the body refers to them. *) + mutable smaxid: int; (** Max local id. Starts at 0. *) + mutable sbody: block; (** The function body. *) + mutable smaxstmtid: int option; (** max id of a (reachable) statement + * in this function, if we have + * computed it. range = 0 ... + * (smaxstmtid-1). This is computed by + * {!Cil.computeCFGInfo}. *) + mutable sallstmts: stmt list; (** After you call {!Cil.computeCFGInfo} + * this field is set to contain all + * statements in the function *) + } + + +(** A block is a sequence of statements with the control falling through from + one element to the next *) +and block = + { mutable battrs: attributes; (** Attributes for the block *) + mutable bstmts: stmt list; (** The statements comprising the block*) + } + + +(** Statements. + The statement is the structural unit in the control flow graph. Use mkStmt + to make a statement and then fill in the fields. *) +and stmt = { + mutable labels: label list; (** Whether the statement starts with + some labels, case statements or + default statement *) + mutable skind: stmtkind; (** The kind of statement *) + + (* Now some additional control flow information. Initially this is not + * filled in. *) + mutable sid: int; (** A number (>= 0) that is unique + in a function. *) + mutable succs: stmt list; (** The successor statements. They can + always be computed from the skind + and the context in which this + statement appears *) + mutable preds: stmt list; (** The inverse of the succs function*) + } + +(** Labels *) +and label = + Label of string * location * bool + (** A real label. If the bool is "true", the label is from the + * input source program. If the bool is "false", the label was + * created by CIL or some other transformation *) + | Case of exp * location (** A case statement *) + | Default of location (** A default statement *) + + + +(* The various kinds of statements *) +and stmtkind = + | Instr of instr list (** A group of instructions that do not + contain control flow. Control + implicitly falls through. *) + | Return of exp option * location (** The return statement. This is a + leaf in the CFG. *) + + | Goto of stmt ref * location (** A goto statement. Appears from + actual goto's in the code. *) + | Break of location (** A break to the end of the nearest + enclosing loop or Switch *) + | Continue of location (** A continue to the start of the + nearest enclosing loop *) + | If of exp * block * block * location (** A conditional. + Two successors, the "then" and + the "else" branches. Both + branches fall-through to the + successor of the If statement *) + | Switch of exp * block * (stmt list) * location + (** A switch statement. The block + contains within all of the cases. + We also have direct pointers to the + statements that implement the + cases. Which cases they implement + you can get from the labels of the + statement *) + +(* + | Loop of block * location * (stmt option) * (stmt option) + (** A [while(1)] loop. The + * termination test is implemented + * in the body of a loop using a + * [Break] statement. If + * prepareCFG has been called, the + * first stmt option will point to + * the stmt containing the + * continue label for this loop + * and the second will point to + * the stmt containing the break + * label for this loop. *) +*) + | While of exp * block * location (** A while loop. *) + | DoWhile of exp * block * location (** A do...while loop. *) + | For of block * exp * block * block * location (** A for loop. *) + + | Block of block (** Just a block of statements. Use it + as a way to keep some attributes + local *) + (** On MSVC we support structured exception handling. This is what you + * might expect. Control can get into the finally block either from the + * end of the body block, or if an exception is thrown. The location + * corresponds to the try keyword. *) + | TryFinally of block * block * location + + (** On MSVC we support structured exception handling. The try/except + * statement is a bit tricky: + __try { blk } + __except (e) { + handler + } + + The argument to __except must be an expression. However, we keep a + list of instructions AND an expression in case you need to make + function calls. We'll print those as a comma expression. The control + can get to the __except expression only if an exception is thrown. + After that, depending on the value of the expression the control + goes to the handler, propagates the exception, or retries the + exception !!! The location corresponds to the try keyword. + *) + | TryExcept of block * (instr list * exp) * block * location + + +(** Instructions. They may cause effects directly but may not have control + flow.*) +and instr = + Set of lval * exp * location (** An assignment. A cast is present + if the exp has different type + from lval *) + | Call of lval option * exp * exp list * location + (** optional: result is an lval. A cast might be + necessary if the declared result type of the + function is not the same as that of the + destination. If the function is declared then + casts are inserted for those arguments that + correspond to declared formals. (The actual + number of arguments might be smaller or larger + than the declared number of arguments. C allows + this.) If the type of the result variable is not + the same as the declared type of the function + result then an implicit cast exists. *) + + (* See the GCC specification for the meaning of ASM. + * If the source is MS VC then only the templates + * are used *) + (* sm: I've added a notes.txt file which contains more + * information on interpreting Asm instructions *) + | Asm of attributes * (* Really only const and volatile can appear + * here *) + string list * (* templates (CR-separated) *) + (string * lval) list * (* outputs must be lvals with + * constraints. I would like these + * to be actually variables, but I + * run into some trouble with ASMs + * in the Linux sources *) + (string * exp) list * (* inputs with constraints *) + string list * (* register clobbers *) + location + (** An inline assembly instruction. The arguments are (1) a list of + attributes (only const and volatile can appear here and only for + GCC), (2) templates (CR-separated), (3) a list of + outputs, each of which is an lvalue with a constraint, (4) a list + of input expressions along with constraints, (5) clobbered + registers, and (5) location information *) + + + +(** Describes a location in a source file *) +and location = { + line: int; (** The line number. -1 means "do not know" *) + file: string; (** The name of the source file*) + byte: int; (** The byte position in the source file *) +} + +(* Type signatures. Two types are identical iff they have identical + * signatures *) +and typsig = + TSArray of typsig * int64 option * attribute list + | TSPtr of typsig * attribute list + | TSComp of bool * string * attribute list + | TSFun of typsig * typsig list * bool * attribute list + | TSEnum of string * attribute list + | TSBase of typ + + + +(** To be able to add/remove features easily, each feature should be package + * as an interface with the following interface. These features should be *) +type featureDescr = { + fd_enabled: bool ref; + (** The enable flag. Set to default value *) + + fd_name: string; + (** This is used to construct an option "--doxxx" and "--dontxxx" that + * enable and disable the feature *) + + fd_description: string; + (* A longer name that can be used to document the new options *) + + fd_extraopt: (string * Arg.spec * string) list; + (** Additional command line options *) + + fd_doit: (file -> unit); + (** This performs the transformation *) + + fd_post_check: bool; + (* Whether to perform a CIL consistency checking after this stage, if + * checking is enabled (--check is passed to cilly) *) +} + +let locUnknown = { line = -1; + file = ""; + byte = -1;} + +(* A reference to the current location *) +let currentLoc : location ref = ref locUnknown + +(* A reference to the current global being visited *) +let currentGlobal: global ref = ref (GText "dummy") + + +let compareLoc (a: location) (b: location) : int = + let namecmp = compare a.file b.file in + if namecmp != 0 + then namecmp + else + let linecmp = a.line - b.line in + if linecmp != 0 + then linecmp + else a.byte - b.byte + +let argsToList : (string * typ * attributes) list option + -> (string * typ * attributes) list + = function + None -> [] + | Some al -> al + + +(* A hack to allow forward reference of d_exp *) +let pd_exp : (unit -> exp -> doc) ref = + ref (fun _ -> E.s (E.bug "pd_exp not initialized")) + +(** Different visiting actions. 'a will be instantiated with [exp], [instr], + etc. *) +type 'a visitAction = + SkipChildren (** Do not visit the children. Return + the node as it is. *) + | DoChildren (** Continue with the children of this + node. Rebuild the node on return + if any of the children changes + (use == test) *) + | ChangeTo of 'a (** Replace the expression with the + given one *) + | ChangeDoChildrenPost of 'a * ('a -> 'a) (** First consider that the entire + exp is replaced by the first + parameter. Then continue with + the children. On return rebuild + the node if any of the children + has changed and then apply the + function on the node *) + + + +(* sm/gn: cil visitor interface for traversing Cil trees. *) +(* Use visitCilStmt and/or visitCilFile to use this. *) +(* Some of the nodes are changed in place if the children are changed. Use + * one of Change... actions if you want to copy the node *) + +(** A visitor interface for traversing CIL trees. Create instantiations of + * this type by specializing the class {!Cil.nopCilVisitor}. *) +class type cilVisitor = object + + method vvdec: varinfo -> varinfo visitAction + (** Invoked for each variable declaration. The subtrees to be traversed + * are those corresponding to the type and attributes of the variable. + * Note that variable declarations are all the [GVar], [GVarDecl], [GFun], + * all the [varinfo] in formals of function types, and the formals and + * locals for function definitions. This means that the list of formals + * in a function definition will be traversed twice, once as part of the + * function type and second as part of the formals in a function + * definition. *) + + method vvrbl: varinfo -> varinfo visitAction + (** Invoked on each variable use. Here only the [SkipChildren] and + * [ChangeTo] actions make sense since there are no subtrees. Note that + * the type and attributes of the variable are not traversed for a + * variable use *) + + method vexpr: exp -> exp visitAction + (** Invoked on each expression occurence. The subtrees are the + * subexpressions, the types (for a [Cast] or [SizeOf] expression) or the + * variable use. *) + + method vlval: lval -> lval visitAction + (** Invoked on each lvalue occurence *) + + method voffs: offset -> offset visitAction + (** Invoked on each offset occurrence that is *not* as part + * of an initializer list specification, i.e. in an lval or + * recursively inside an offset. *) + + method vinitoffs: offset -> offset visitAction + (** Invoked on each offset appearing in the list of a + * CompoundInit initializer. *) + + method vinst: instr -> instr list visitAction + (** Invoked on each instruction occurrence. The [ChangeTo] action can + * replace this instruction with a list of instructions *) + + method vstmt: stmt -> stmt visitAction + (** Control-flow statement. *) + + method vblock: block -> block visitAction (** Block. Replaced in + place. *) + method vfunc: fundec -> fundec visitAction (** Function definition. + Replaced in place. *) + method vglob: global -> global list visitAction (** Global (vars, types, + etc.) *) + method vinit: init -> init visitAction (** Initializers for globals *) + method vtype: typ -> typ visitAction (** Use of some type. Note + * that for structure/union + * and enumeration types the + * definition of the + * composite type is not + * visited. Use [vglob] to + * visit it. *) + method vattr: attribute -> attribute list visitAction + (** Attribute. Each attribute can be replaced by a list *) + method vattrparam: attrparam -> attrparam visitAction + (** Attribute parameters. *) + + (** Add here instructions while visiting to queue them to + * preceede the current statement or instruction being processed *) + method queueInstr: instr list -> unit + + (** Gets the queue of instructions and resets the queue *) + method unqueueInstr: unit -> instr list + +end + +(* the default visitor does nothing at each node, but does *) +(* not stop; hence they return true *) +class nopCilVisitor : cilVisitor = object + method vvrbl (v:varinfo) = DoChildren (* variable *) + method vvdec (v:varinfo) = DoChildren (* variable + * declaration *) + method vexpr (e:exp) = DoChildren (* expression *) + method vlval (l:lval) = DoChildren (* lval (base is 1st + * field) *) + method voffs (o:offset) = DoChildren (* lval or recursive offset *) + method vinitoffs (o:offset) = DoChildren (* initializer offset *) + method vinst (i:instr) = DoChildren (* imperative instruction *) + method vstmt (s:stmt) = DoChildren (* constrol-flow statement *) + method vblock (b: block) = DoChildren + method vfunc (f:fundec) = DoChildren (* function definition *) + method vglob (g:global) = DoChildren (* global (vars, types, etc.) *) + method vinit (i:init) = DoChildren (* global initializers *) + method vtype (t:typ) = DoChildren (* use of some type *) + method vattr (a: attribute) = DoChildren + method vattrparam (a: attrparam) = DoChildren + + val mutable instrQueue = [] + + method queueInstr (il: instr list) = + List.iter (fun i -> instrQueue <- i :: instrQueue) il + + method unqueueInstr () = + let res = List.rev instrQueue in + instrQueue <- []; + res + +end + +let assertEmptyQueue vis = + if vis#unqueueInstr () <> [] then + (* Either a visitor inserted an instruction somewhere that it shouldn't + have (i.e. at the top level rather than inside of a statement), or + there's a bug in the visitor engine. *) + E.s (E.bug "Visitor's instruction queue is not empty.\n You should only use queueInstr inside a function body!"); + () + + +let lu = locUnknown + +(* sm: utility *) +let startsWith (prefix: string) (s: string) : bool = +( + let prefixLen = (String.length prefix) in + (String.length s) >= prefixLen && + (String.sub s 0 prefixLen) = prefix +) + + +let get_instrLoc (inst : instr) = + match inst with + Set(_, _, loc) -> loc + | Call(_, _, _, loc) -> loc + | Asm(_, _, _, _, _, loc) -> loc +let get_globalLoc (g : global) = + match g with + | GFun(_,l) -> (l) + | GType(_,l) -> (l) + | GEnumTag(_,l) -> (l) + | GEnumTagDecl(_,l) -> (l) + | GCompTag(_,l) -> (l) + | GCompTagDecl(_,l) -> (l) + | GVarDecl(_,l) -> (l) + | GVar(_,_,l) -> (l) + | GAsm(_,l) -> (l) + | GPragma(_,l) -> (l) + | GText(_) -> locUnknown + +let rec get_stmtLoc (statement : stmtkind) = + match statement with + Instr([]) -> lu + | Instr(hd::tl) -> get_instrLoc(hd) + | Return(_, loc) -> loc + | Goto(_, loc) -> loc + | Break(loc) -> loc + | Continue(loc) -> loc + | If(_, _, _, loc) -> loc + | Switch (_, _, _, loc) -> loc +(* + | Loop (_, loc, _, _) -> loc +*) + | While (_, _, loc) -> loc + | DoWhile (_, _, loc) -> loc + | For (_, _, _, _, loc) -> loc + | Block b -> if b.bstmts == [] then lu + else get_stmtLoc ((List.hd b.bstmts).skind) + | TryFinally (_, _, l) -> l + | TryExcept (_, _, _, l) -> l + + +(* The next variable identifier to use. Counts up *) +let nextGlobalVID = ref 1 + +(* The next compindo identifier to use. Counts up. *) +let nextCompinfoKey = ref 1 + +(* Some error reporting functions *) +let d_loc (_: unit) (loc: location) : doc = + text loc.file ++ chr ':' ++ num loc.line + +let d_thisloc (_: unit) : doc = d_loc () !currentLoc + +let error (fmt : ('a,unit,doc) format) : 'a = + let f d = + E.hadErrors := true; + ignore (eprintf "@!%t: Error: %a@!" + d_thisloc insert d); + nil + in + Pretty.gprintf f fmt + +let unimp (fmt : ('a,unit,doc) format) : 'a = + let f d = + E.hadErrors := true; + ignore (eprintf "@!%t: Unimplemented: %a@!" + d_thisloc insert d); + nil + in + Pretty.gprintf f fmt + +let bug (fmt : ('a,unit,doc) format) : 'a = + let f d = + E.hadErrors := true; + ignore (eprintf "@!%t: Bug: %a@!" + d_thisloc insert d); + E.showContext (); + nil + in + Pretty.gprintf f fmt + +let errorLoc (loc: location) (fmt : ('a,unit,doc) format) : 'a = + let f d = + E.hadErrors := true; + ignore (eprintf "@!%a: Error: %a@!" + d_loc loc insert d); + E.showContext (); + nil + in + Pretty.gprintf f fmt + +let warn (fmt : ('a,unit,doc) format) : 'a = + let f d = + ignore (eprintf "@!%t: Warning: %a@!" + d_thisloc insert d); + nil + in + Pretty.gprintf f fmt + + +let warnOpt (fmt : ('a,unit,doc) format) : 'a = + let f d = + if !E.warnFlag then + ignore (eprintf "@!%t: Warning: %a@!" + d_thisloc insert d); + nil + in + Pretty.gprintf f fmt + +let warnContext (fmt : ('a,unit,doc) format) : 'a = + let f d = + ignore (eprintf "@!%t: Warning: %a@!" + d_thisloc insert d); + E.showContext (); + nil + in + Pretty.gprintf f fmt + +let warnContextOpt (fmt : ('a,unit,doc) format) : 'a = + let f d = + if !E.warnFlag then + ignore (eprintf "@!%t: Warning: %a@!" + d_thisloc insert d); + E.showContext (); + nil + in + Pretty.gprintf f fmt + +let warnLoc (loc: location) (fmt : ('a,unit,doc) format) : 'a = + let f d = + ignore (eprintf "@!%a: Warning: %a@!" + d_loc loc insert d); + E.showContext (); + nil + in + Pretty.gprintf f fmt + + + +(* Construct an integer. Use only for values that fit on 31 bits. + For larger values, use kinteger *) +let integer (i: int) = Const (CInt64(Int64.of_int i, IInt, None)) + +let zero = integer 0 +let one = integer 1 +let mone = integer (-1) + +(** Given the character c in a (CChr c), sign-extend it to 32 bits. + (This is the official way of interpreting character constants, according to + ISO C 6.4.4.4.10, which says that character constants are chars cast to ints) + Returns CInt64(sign-extened c, IInt, None) *) +let charConstToInt (c: char) : constant = + let c' = Char.code c in + let value = + if c' < 128 + then Int64.of_int c' + else Int64.of_int (c' - 256) + in + CInt64(value, IInt, None) + + +let rec isInteger = function + | Const(CInt64 (n,_,_)) -> Some n + | Const(CChr c) -> isInteger (Const (charConstToInt c)) (* sign-extend *) + | Const(CEnum(v, s, ei)) -> isInteger v + | CastE(_, e) -> isInteger e + | _ -> None + + + +let rec isZero (e: exp) : bool = isInteger e = Some Int64.zero + +let voidType = TVoid([]) +let intType = TInt(IInt,[]) +let uintType = TInt(IUInt,[]) +let longType = TInt(ILong,[]) +let ulongType = TInt(IULong,[]) +let charType = TInt(IChar, []) + +let charPtrType = TPtr(charType,[]) +let charConstPtrType = TPtr(TInt(IChar, [Attr("const", [])]),[]) +let stringLiteralType = ref charPtrType + +let voidPtrType = TPtr(voidType, []) +let intPtrType = TPtr(intType, []) +let uintPtrType = TPtr(uintType, []) + +let doubleType = TFloat(FDouble, []) + + +(* An integer type that fits pointers. Initialized by initCIL *) +let upointType = ref voidType + +(* An integer type that fits wchar_t. Initialized by initCIL *) +let wcharKind = ref IChar +let wcharType = ref voidType + + +(* An integer type that is the type of sizeof. Initialized by initCIL *) +let typeOfSizeOf = ref voidType +let kindOfSizeOf = ref IUInt + +let initCIL_called = ref false + +(** Returns true if and only if the given integer type is signed. *) +let isSigned = function + | IUChar + | IUShort + | IUInt + | IULong + | IULongLong -> + false + | ISChar + | IShort + | IInt + | ILong + | ILongLong -> + true + | IChar -> + not !theMachine.M.char_is_unsigned + +let mkStmt (sk: stmtkind) : stmt = + { skind = sk; + labels = []; + sid = -1; succs = []; preds = [] } + +let mkBlock (slst: stmt list) : block = + { battrs = []; bstmts = slst; } + +let mkEmptyStmt () = mkStmt (Instr []) +let mkStmtOneInstr (i: instr) = mkStmt (Instr [i]) + +let dummyInstr = (Asm([], ["dummy statement!!"], [], [], [], lu)) +let dummyStmt = mkStmt (Instr [dummyInstr]) + +let compactStmts (b: stmt list) : stmt list = + (* Try to compress statements. Scan the list of statements and remember + * the last instrunction statement encountered, along with a Clist of + * instructions in it. *) + let rec compress (lastinstrstmt: stmt) (* Might be dummStmt *) + (lastinstrs: instr Clist.clist) + (body: stmt list) = + let finishLast (tail: stmt list) : stmt list = + if lastinstrstmt == dummyStmt then tail + else begin + lastinstrstmt.skind <- Instr (Clist.toList lastinstrs); + lastinstrstmt :: tail + end + in + match body with + [] -> finishLast [] + | ({skind=Instr il} as s) :: rest -> + let ils = Clist.fromList il in + if lastinstrstmt != dummyStmt && s.labels == [] then + compress lastinstrstmt (Clist.append lastinstrs ils) rest + else + finishLast (compress s ils rest) + + | s :: rest -> + let res = s :: compress dummyStmt Clist.empty rest in + finishLast res + in + compress dummyStmt Clist.empty b + + +(** Construct sorted lists of attributes ***) +let rec addAttribute (Attr(an, _) as a: attribute) (al: attributes) = + let rec insertSorted = function + [] -> [a] + | ((Attr(an0, _) as a0) :: rest) as l -> + if an < an0 then a :: l + else if Util.equals a a0 then l (* Do not add if already in there *) + else a0 :: insertSorted rest (* Make sure we see all attributes with + * this name *) + in + insertSorted al + +(** The second attribute list is sorted *) +and addAttributes al0 (al: attributes) : attributes = + if al0 == [] then al else + List.fold_left (fun acc a -> addAttribute a acc) al al0 + +and dropAttribute (an: string) (al: attributes) = + List.filter (fun (Attr(an', _)) -> an <> an') al + +and dropAttributes (anl: string list) (al: attributes) = + List.fold_left (fun acc an -> dropAttribute an acc) al anl + +and filterAttributes (s: string) (al: attribute list) : attribute list = + List.filter (fun (Attr(an, _)) -> an = s) al + +(* sm: *) +let hasAttribute s al = + (filterAttributes s al <> []) + + +type attributeClass = + AttrName of bool + (* Attribute of a name. If argument is true and we are on MSVC then + * the attribute is printed using __declspec as part of the storage + * specifier *) + | AttrFunType of bool + (* Attribute of a function type. If argument is true and we are on + * MSVC then the attribute is printed just before the function name *) + + | AttrType (* Attribute of a type *) + +(* This table contains the mapping of predefined attributes to classes. + * Extend this table with more attributes as you need. This table is used to + * determine how to associate attributes with names or type during cabs2cil + * conversion *) +let attributeHash: (string, attributeClass) H.t = + let table = H.create 13 in + List.iter (fun a -> H.add table a (AttrName false)) + [ "section"; "constructor"; "destructor"; "unused"; "used"; "weak"; + "no_instrument_function"; "alias"; "no_check_memory_usage"; + "exception"; "model"; (* "restrict"; *) + "aconst"; "__asm__" (* Gcc uses this to specifiy the name to be used in + * assembly for a global *)]; + + (* Now come the MSVC declspec attributes *) + List.iter (fun a -> H.add table a (AttrName true)) + [ "thread"; "naked"; "dllimport"; "dllexport"; + "selectany"; "allocate"; "nothrow"; "novtable"; "property"; "noreturn"; + "uuid"; "align" ]; + + List.iter (fun a -> H.add table a (AttrFunType false)) + [ "format"; "regparm"; "longcall"; + "noinline"; "always_inline"; ]; + + List.iter (fun a -> H.add table a (AttrFunType true)) + [ "stdcall";"cdecl"; "fastcall" ]; + + List.iter (fun a -> H.add table a AttrType) + [ "const"; "volatile"; "restrict"; "mode" ]; + table + + +(* Partition the attributes into classes *) +let partitionAttributes + ~(default:attributeClass) + (attrs: attribute list) : + attribute list * attribute list * attribute list = + let rec loop (n,f,t) = function + [] -> n, f, t + | (Attr(an, _) as a) :: rest -> + match (try H.find attributeHash an with Not_found -> default) with + AttrName _ -> loop (addAttribute a n, f, t) rest + | AttrFunType _ -> + loop (n, addAttribute a f, t) rest + | AttrType -> loop (n, f, addAttribute a t) rest + in + loop ([], [], []) attrs + + +(* Get the full name of a comp *) +let compFullName comp = + (if comp.cstruct then "struct " else "union ") ^ comp.cname + + +let missingFieldName = "___missing_field_name" + +(** Creates a a (potentially recursive) composite type. Make sure you add a + * GTag for it to the file! **) +let mkCompInfo + (isstruct: bool) + (n: string) + (* fspec is a function that when given a forward + * representation of the structure type constructs the type of + * the fields. The function can ignore this argument if not + * constructing a recursive type. *) + (mkfspec: compinfo -> (string * typ * int option * attribute list * + location) list) + (a: attribute list) : compinfo = + + (* make an new name for anonymous structs *) + if n = "" then + E.s (E.bug "mkCompInfo: missing structure name\n"); + (* Make a new self cell and a forward reference *) + let comp = + { cstruct = isstruct; cname = ""; ckey = 0; cfields = []; + cattr = a; creferenced = false; + (* Make this compinfo undefined by default *) + cdefined = false; } + in + comp.cname <- n; + comp.ckey <- !nextCompinfoKey; + incr nextCompinfoKey; + let flds = + List.map (fun (fn, ft, fb, fa, fl) -> + { fcomp = comp; + ftype = ft; + fname = fn; + fbitfield = fb; + fattr = fa; + floc = fl}) (mkfspec comp) in + comp.cfields <- flds; + if flds <> [] then comp.cdefined <- true; + comp + +(** Make a copy of a compinfo, changing the name and the key *) +let copyCompInfo (ci: compinfo) (n: string) : compinfo = + let ci' = {ci with cname = n; + ckey = !nextCompinfoKey; } in + incr nextCompinfoKey; + (* Copy the fields and set the new pointers to parents *) + ci'.cfields <- List.map (fun f -> {f with fcomp = ci'}) ci'.cfields; + ci' + +(**** Utility functions ******) + +let rec typeAttrs = function + TVoid a -> a + | TInt (_, a) -> a + | TFloat (_, a) -> a + | TNamed (t, a) -> addAttributes a (typeAttrs t.ttype) + | TPtr (_, a) -> a + | TArray (_, _, a) -> a + | TComp (comp, a) -> addAttributes comp.cattr a + | TEnum (enum, a) -> addAttributes enum.eattr a + | TFun (_, _, _, a) -> a + | TBuiltin_va_list a -> a + + +let setTypeAttrs t a = + match t with + TVoid _ -> TVoid a + | TInt (i, _) -> TInt (i, a) + | TFloat (f, _) -> TFloat (f, a) + | TNamed (t, _) -> TNamed(t, a) + | TPtr (t', _) -> TPtr(t', a) + | TArray (t', l, _) -> TArray(t', l, a) + | TComp (comp, _) -> TComp (comp, a) + | TEnum (enum, _) -> TEnum (enum, a) + | TFun (r, args, v, _) -> TFun(r,args,v,a) + | TBuiltin_va_list _ -> TBuiltin_va_list a + + +let typeAddAttributes a0 t = +begin + match a0 with + | [] -> + (* no attributes, keep same type *) + t + | _ -> + (* anything else: add a0 to existing attributes *) + let add (a: attributes) = addAttributes a0 a in + match t with + TVoid a -> TVoid (add a) + | TInt (ik, a) -> TInt (ik, add a) + | TFloat (fk, a) -> TFloat (fk, add a) + | TEnum (enum, a) -> TEnum (enum, add a) + | TPtr (t, a) -> TPtr (t, add a) + | TArray (t, l, a) -> TArray (t, l, add a) + | TFun (t, args, isva, a) -> TFun(t, args, isva, add a) + | TComp (comp, a) -> TComp (comp, add a) + | TNamed (t, a) -> TNamed (t, add a) + | TBuiltin_va_list a -> TBuiltin_va_list (add a) +end + +let typeRemoveAttributes (anl: string list) t = + let drop (al: attributes) = dropAttributes anl al in + match t with + TVoid a -> TVoid (drop a) + | TInt (ik, a) -> TInt (ik, drop a) + | TFloat (fk, a) -> TFloat (fk, drop a) + | TEnum (enum, a) -> TEnum (enum, drop a) + | TPtr (t, a) -> TPtr (t, drop a) + | TArray (t, l, a) -> TArray (t, l, drop a) + | TFun (t, args, isva, a) -> TFun(t, args, isva, drop a) + | TComp (comp, a) -> TComp (comp, drop a) + | TNamed (t, a) -> TNamed (t, drop a) + | TBuiltin_va_list a -> TBuiltin_va_list (drop a) + +let unrollType (t: typ) : typ = + let rec withAttrs (al: attributes) (t: typ) : typ = + match t with + TNamed (r, a') -> withAttrs (addAttributes al a') r.ttype + | x -> typeAddAttributes al x + in + withAttrs [] t + +let rec unrollTypeDeep (t: typ) : typ = + let rec withAttrs (al: attributes) (t: typ) : typ = + match t with + TNamed (r, a') -> withAttrs (addAttributes al a') r.ttype + | TPtr(t, a') -> TPtr(unrollTypeDeep t, addAttributes al a') + | TArray(t, l, a') -> TArray(unrollTypeDeep t, l, addAttributes al a') + | TFun(rt, args, isva, a') -> + TFun (unrollTypeDeep rt, + (match args with + None -> None + | Some argl -> + Some (List.map (fun (an,at,aa) -> + (an, unrollTypeDeep at, aa)) argl)), + isva, + addAttributes al a') + | x -> typeAddAttributes al x + in + withAttrs [] t + +let isVoidType t = + match unrollType t with + TVoid _ -> true + | _ -> false +let isVoidPtrType t = + match unrollType t with + TPtr(tau,_) when isVoidType tau -> true + | _ -> false + +let var vi : lval = (Var vi, NoOffset) +(* let assign vi e = Instrs(Set (var vi, e), lu) *) + +let mkString s = Const(CStr s) + + +let mkWhile ~(guard:exp) ~(body: stmt list) : stmt list = + (* Do it like this so that the pretty printer recognizes it *) +(* + [ mkStmt (Loop (mkBlock (mkStmt (If(guard, + mkBlock [ mkEmptyStmt () ], + mkBlock [ mkStmt (Break lu)], lu)) :: + body), lu, None, None)) ] +*) + [ mkStmt (While (guard, mkBlock body, lu)) ] + + + +let mkFor ~(start: stmt list) ~(guard: exp) ~(next: stmt list) + ~(body: stmt list) : stmt list = + (start @ + (mkWhile guard (body @ next))) + + +let mkForIncr ~(iter : varinfo) ~(first: exp) ~stopat:(past: exp) ~(incr: exp) + ~(body: stmt list) : stmt list = + (* See what kind of operator we need *) + let compop, nextop = + match unrollType iter.vtype with + TPtr _ -> Lt, PlusPI + | _ -> Lt, PlusA + in + mkFor + [ mkStmt (Instr [(Set (var iter, first, lu))]) ] + (BinOp(compop, Lval(var iter), past, intType)) + [ mkStmt (Instr [(Set (var iter, + (BinOp(nextop, Lval(var iter), incr, iter.vtype)), + lu))])] + body + + +let rec stripCasts (e: exp) = + match e with CastE(_, e') -> stripCasts e' | _ -> e + + + +(* the name of the C function we call to get ccgr ASTs +external parse : string -> file = "cil_main" +*) +(* + Pretty Printing + *) + +let d_ikind () = function + IChar -> text "char" + | ISChar -> text "signed char" + | IUChar -> text "unsigned char" + | IInt -> text "int" + | IUInt -> text "unsigned int" + | IShort -> text "short" + | IUShort -> text "unsigned short" + | ILong -> text "long" + | IULong -> text "unsigned long" + | ILongLong -> + if !msvcMode then text "__int64" else text "long long" + | IULongLong -> + if !msvcMode then text "unsigned __int64" + else text "unsigned long long" + +let d_fkind () = function + FFloat -> text "float" + | FDouble -> text "double" + | FLongDouble -> text "long double" + +let d_storage () = function + NoStorage -> nil + | Static -> text "static " + | Extern -> text "extern " + | Register -> text "register " + +(* sm: need this value below *) +let mostNeg32BitInt : int64 = (Int64.of_string "-0x80000000") +let mostNeg64BitInt : int64 = (Int64.of_string "-0x8000000000000000") + +(* constant *) +let d_const () c = + match c with + CInt64(_, _, Some s) -> text s (* Always print the text if there is one *) + | CInt64(i, ik, None) -> + (** We must make sure to capture the type of the constant. For some + * constants this is done with a suffix, for others with a cast prefix.*) + let suffix : string = + match ik with + IUInt -> "U" + | ILong -> "L" + | IULong -> "UL" + | ILongLong -> if !msvcMode then "L" else "LL" + | IULongLong -> if !msvcMode then "UL" else "ULL" + | _ -> "" + in + let prefix : string = + if suffix <> "" then "" + else if ik = IInt then "" + else "(" ^ (sprint !lineLength (d_ikind () ik)) ^ ")" + in + (* Watch out here for negative integers that we should be printing as + * large positive ones *) + if i < Int64.zero + && (match ik with + IUInt | IULong | IULongLong | IUChar | IUShort -> true | _ -> false) then + let high = Int64.shift_right i 32 in + if ik <> IULongLong && ik <> ILongLong && high = Int64.of_int (-1) then + (* Print only the low order 32 bits *) + text (prefix ^ "0x" ^ + (Int64.format "%x" + (Int64.logand i (Int64.shift_right_logical high 32)) + ^ suffix)) + else + text (prefix ^ "0x" ^ Int64.format "%x" i ^ suffix) + else ( + if (i = mostNeg32BitInt) then + (* sm: quirk here: if you print -2147483648 then this is two tokens *) + (* in C, and the second one is too large to represent in a signed *) + (* int.. so we do what's done in limits.h, and print (-2147483467-1); *) + (* in gcc this avoids a warning, but it might avoid a real problem *) + (* on another compiler or a 64-bit architecture *) + text (prefix ^ "(-0x7FFFFFFF-1)") + else if (i = mostNeg64BitInt) then + (* The same is true of the largest 64-bit negative. *) + text (prefix ^ "(-0x7FFFFFFFFFFFFFFF-1)") + else + text (prefix ^ (Int64.to_string i ^ suffix)) + ) + + | CStr(s) -> text ("\"" ^ escape_string s ^ "\"") + | CWStr(s) -> + (* text ("L\"" ^ escape_string s ^ "\"") *) + (List.fold_left (fun acc elt -> + acc ++ + if (elt >= Int64.zero && + elt <= (Int64.of_int 255)) then + text (escape_char (Char.chr (Int64.to_int elt))) + else + ( text (Printf.sprintf "\\x%LX\"" elt) ++ break ++ + (text "\"")) + ) (text "L\"") s ) ++ text "\"" + (* we cannot print L"\xabcd" "feedme" as L"\xabcdfeedme" -- + * the former has 7 wide characters and the later has 3. *) + + | CChr(c) -> text ("'" ^ escape_char c ^ "'") + | CReal(_, _, Some s) -> text s + | CReal(f, _, None) -> text (string_of_float f) + | CEnum(_, s, ei) -> text s + + +(* Parentheses level. An expression "a op b" is printed parenthesized if its + * parentheses level is >= that that of its context. Identifiers have the + * lowest level and weakly binding operators (e.g. |) have the largest level. + * The correctness criterion is that a smaller level MUST correspond to a + * stronger precedence! + *) +let derefStarLevel = 20 +let indexLevel = 20 +let arrowLevel = 20 +let addrOfLevel = 30 +let additiveLevel = 60 +let comparativeLevel = 70 +let bitwiseLevel = 75 +let getParenthLevel = function + | BinOp((LAnd | LOr), _,_,_) -> 80 + (* Bit operations. *) + | BinOp((BOr|BXor|BAnd),_,_,_) -> bitwiseLevel (* 75 *) + + (* Comparisons *) + | BinOp((Eq|Ne|Gt|Lt|Ge|Le),_,_,_) -> + comparativeLevel (* 70 *) + (* Additive. Shifts can have higher + * level than + or - but I want + * parentheses around them *) + | BinOp((MinusA|MinusPP|MinusPI|PlusA| + PlusPI|IndexPI|Shiftlt|Shiftrt),_,_,_) + -> additiveLevel (* 60 *) + + (* Multiplicative *) + | BinOp((Div|Mod|Mult),_,_,_) -> 40 + + (* Unary *) + | CastE(_,_) -> 30 + | AddrOf(_) -> 30 + | StartOf(_) -> 30 + | UnOp((Neg|BNot|LNot),_,_) -> 30 + + (* Lvals *) + | Lval(Mem _ , _) -> 20 + | Lval(Var _, (Field _|Index _)) -> 20 + | SizeOf _ | SizeOfE _ | SizeOfStr _ -> 20 + | AlignOf _ | AlignOfE _ -> 20 + + | Lval(Var _, NoOffset) -> 0 (* Plain variables *) + | Const _ -> 0 (* Constants *) + + + +(* Separate out the storage-modifier name attributes *) +let separateStorageModifiers (al: attribute list) = + let isstoragemod (Attr(an, _): attribute) : bool = + try + match H.find attributeHash an with + AttrName issm -> issm + | _ -> E.s (E.bug "separateStorageModifier: %s is not a name attribute" an) + with Not_found -> false + in + let stom, rest = List.partition isstoragemod al in + if not !msvcMode then + stom, rest + else + (* Put back the declspec. Put it without the leading __ since these will + * be added later *) + let stom' = + List.map (fun (Attr(an, args)) -> + Attr("declspec", [ACons(an, args)])) stom in + stom', rest + + +let isIntegralType t = + match unrollType t with + (TInt _ | TEnum _) -> true + | _ -> false + +let isArithmeticType t = + match unrollType t with + (TInt _ | TEnum _ | TFloat _) -> true + | _ -> false + + +let isPointerType t = + match unrollType t with + TPtr _ -> true + | _ -> false + +let isFunctionType t = + match unrollType t with + TFun _ -> true + | _ -> false + +(**** Compute the type of an expression ****) +let rec typeOf (e: exp) : typ = + match e with + | Const(CInt64 (_, ik, _)) -> TInt(ik, []) + + (* Character constants have type int. ISO/IEC 9899:1999 (E), + * section 6.4.4.4 [Character constants], paragraph 10, if you + * don't believe me. *) + | Const(CChr _) -> intType + + (* The type of a string is a pointer to characters ! The only case when + * you would want it to be an array is as an argument to sizeof, but we + * have SizeOfStr for that *) + | Const(CStr s) -> !stringLiteralType + + | Const(CWStr s) -> TPtr(!wcharType,[]) + + | Const(CReal (_, fk, _)) -> TFloat(fk, []) + + | Const(CEnum(_, _, ei)) -> TEnum(ei, []) + + | Lval(lv) -> typeOfLval lv + | SizeOf _ | SizeOfE _ | SizeOfStr _ -> !typeOfSizeOf + | AlignOf _ | AlignOfE _ -> !typeOfSizeOf + | UnOp (_, _, t) -> t + | BinOp (_, _, _, t) -> t + | CastE (t, _) -> t + | AddrOf (lv) -> TPtr(typeOfLval lv, []) + | StartOf (lv) -> begin + match unrollType (typeOfLval lv) with + TArray (t,_, _) -> TPtr(t, []) + | _ -> E.s (E.bug "typeOf: StartOf on a non-array") + end + +and typeOfInit (i: init) : typ = + match i with + SingleInit e -> typeOf e + | CompoundInit (t, _) -> t + +and typeOfLval = function + Var vi, off -> typeOffset vi.vtype off + | Mem addr, off -> begin + match unrollType (typeOf addr) with + TPtr (t, _) -> typeOffset t off + | _ -> E.s (bug "typeOfLval: Mem on a non-pointer") + end + +and typeOffset basetyp = + let blendAttributes baseAttrs = + let (_, _, contageous) = + partitionAttributes ~default:(AttrName false) baseAttrs in + typeAddAttributes contageous + in + function + NoOffset -> basetyp + | Index (_, o) -> begin + match unrollType basetyp with + TArray (t, _, baseAttrs) -> + let elementType = typeOffset t o in + blendAttributes baseAttrs elementType + | t -> E.s (E.bug "typeOffset: Index on a non-array") + end + | Field (fi, o) -> + match unrollType basetyp with + TComp (_, baseAttrs) -> + let fieldType = typeOffset fi.ftype o in + blendAttributes baseAttrs fieldType + | _ -> E.s (bug "typeOffset: Field on a non-compound") + + +(** + ** + ** MACHINE DEPENDENT PART + ** + **) +exception SizeOfError of string * typ + + +(* Get the minimum aligment in bytes for a given type *) +let rec alignOf_int = function + | TInt((IChar|ISChar|IUChar), _) -> 1 + | TInt((IShort|IUShort), _) -> !theMachine.M.alignof_short + | TInt((IInt|IUInt), _) -> !theMachine.M.alignof_int + | TInt((ILong|IULong), _) -> !theMachine.M.alignof_long + | TInt((ILongLong|IULongLong), _) -> !theMachine.M.alignof_longlong + | TEnum _ -> !theMachine.M.alignof_enum + | TFloat(FFloat, _) -> !theMachine.M.alignof_float + | TFloat(FDouble, _) -> !theMachine.M.alignof_double + | TFloat(FLongDouble, _) -> !theMachine.M.alignof_longdouble + | TNamed (t, _) -> alignOf_int t.ttype + | TArray (t, _, _) -> alignOf_int t + | TPtr _ | TBuiltin_va_list _ -> !theMachine.M.alignof_ptr + + (* For composite types get the maximum alignment of any field inside *) + | TComp (c, _) -> + (* On GCC the zero-width fields do not contribute to the alignment. On + * MSVC only those zero-width that _do_ appear after other + * bitfields contribute to the alignment. So we drop those that + * do not occur after othe bitfields *) + let rec dropZeros (afterbitfield: bool) = function + | f :: rest when f.fbitfield = Some 0 && not afterbitfield -> + dropZeros afterbitfield rest + | f :: rest -> f :: dropZeros (f.fbitfield <> None) rest + | [] -> [] + in + let fields = dropZeros false c.cfields in + List.fold_left + (fun sofar f -> + (* Bitfields with zero width do not contribute to the alignment in + * GCC *) + if not !msvcMode && f.fbitfield = Some 0 then sofar else + max sofar (alignOf_int f.ftype)) 1 fields + (* These are some error cases *) + | TFun _ when not !msvcMode -> !theMachine.M.alignof_fun + + | TFun _ as t -> raise (SizeOfError ("function", t)) + | TVoid _ as t -> raise (SizeOfError ("void", t)) + + +let bitsSizeOfInt (ik: ikind): int = + match ik with + | IChar | ISChar | IUChar -> 8 + | IInt | IUInt -> 8 * !theMachine.M.sizeof_int + | IShort | IUShort -> 8 * !theMachine.M.sizeof_short + | ILong | IULong -> 8 * !theMachine.M.sizeof_long + | ILongLong | IULongLong -> 8 * !theMachine.M.sizeof_longlong + +(* Represents an integer as for a given kind. + Returns a flag saying whether the value was changed + during truncation (because it was too large to fit in k). *) +let truncateInteger64 (k: ikind) (i: int64) : int64 * bool = + let nrBits = bitsSizeOfInt k in + let signed = isSigned k in + if nrBits = 64 then + i, false + else begin + let i1 = Int64.shift_left i (64 - nrBits) in + let i2 = + if signed then Int64.shift_right i1 (64 - nrBits) + else Int64.shift_right_logical i1 (64 - nrBits) + in + let truncated = + if i2 = i then false + else + (* Examine the bits that we chopped off. If they are all zero, then + * any difference between i2 and i is due to a simple sign-extension. + * e.g. casting the constant 0x80000000 to int makes it + * 0xffffffff80000000. + * Suppress the truncation warning in this case. *) + let chopped = Int64.shift_right_logical i (64 - nrBits) + in chopped <> Int64.zero + in + i2, truncated + end + +(* Construct an integer constant with possible truncation *) +let kinteger64 (k: ikind) (i: int64) : exp = + let i', truncated = truncateInteger64 k i in + if truncated then + ignore (warnOpt "Truncating integer %s to %s\n" + (Int64.format "0x%x" i) (Int64.format "0x%x" i')); + Const (CInt64(i', k, None)) + +(* Construct an integer of a given kind. *) +let kinteger (k: ikind) (i: int) = kinteger64 k (Int64.of_int i) + + +type offsetAcc = + { oaFirstFree: int; (* The first free bit *) + oaLastFieldStart: int; (* Where the previous field started *) + oaLastFieldWidth: int; (* The width of the previous field. Might not + * be same as FirstFree - FieldStart because + * of internal padding *) + oaPrevBitPack: (int * ikind * int) option; (* If the previous fields + * were packed bitfields, + * the bit where packing + * has started, the ikind + * of the bitfield and the + * width of the ikind *) + } + + +(* GCC version *) +(* Does not use the sofar.oaPrevBitPack *) +let rec offsetOfFieldAcc_GCC (fi: fieldinfo) + (sofar: offsetAcc) : offsetAcc = + (* field type *) + let ftype = unrollType fi.ftype in + let ftypeAlign = 8 * alignOf_int ftype in + let ftypeBits = bitsSizeOf ftype in +(* + if fi.fcomp.cname = "comp2468" || + fi.fcomp.cname = "comp2469" || + fi.fcomp.cname = "comp2470" || + fi.fcomp.cname = "comp2471" || + fi.fcomp.cname = "comp2472" || + fi.fcomp.cname = "comp2473" || + fi.fcomp.cname = "comp2474" || + fi.fcomp.cname = "comp2475" || + fi.fcomp.cname = "comp2476" || + fi.fcomp.cname = "comp2477" || + fi.fcomp.cname = "comp2478" then + + ignore (E.log "offsetOfFieldAcc_GCC(%s of %s:%a%a,firstFree=%d,pack=%a)\n" + fi.fname fi.fcomp.cname + d_type ftype + insert + (match fi.fbitfield with + None -> nil + | Some wdthis -> dprintf ":%d" wdthis) + sofar.oaFirstFree + insert + (match sofar.oaPrevBitPack with + None -> text "None" + | Some (packstart, _, wdpack) -> + dprintf "Some(packstart=%d,wd=%d)" + packstart wdpack)); +*) + match ftype, fi.fbitfield with + (* A width of 0 means that we must end the current packing. It seems that + * GCC pads only up to the alignment boundary for the type of this field. + * *) + | _, Some 0 -> + let firstFree = addTrailing sofar.oaFirstFree ftypeAlign in + { oaFirstFree = firstFree; + oaLastFieldStart = firstFree; + oaLastFieldWidth = 0; + oaPrevBitPack = None } + + (* A bitfield cannot span more alignment boundaries of its type than the + * type itself *) + | _, Some wdthis + when (sofar.oaFirstFree + wdthis + ftypeAlign - 1) / ftypeAlign + - sofar.oaFirstFree / ftypeAlign > ftypeBits / ftypeAlign -> + let start = addTrailing sofar.oaFirstFree ftypeAlign in + { oaFirstFree = start + wdthis; + oaLastFieldStart = start; + oaLastFieldWidth = wdthis; + oaPrevBitPack = None } + + (* Try a simple method. Just put the field down *) + | _, Some wdthis -> + { oaFirstFree = sofar.oaFirstFree + wdthis; + oaLastFieldStart = sofar.oaFirstFree; + oaLastFieldWidth = wdthis; + oaPrevBitPack = None + } + + (* Non-bitfield *) + | _, None -> + (* Align this field *) + let newStart = addTrailing sofar.oaFirstFree ftypeAlign in + { oaFirstFree = newStart + ftypeBits; + oaLastFieldStart = newStart; + oaLastFieldWidth = ftypeBits; + oaPrevBitPack = None; + } + +(* MSVC version *) +and offsetOfFieldAcc_MSVC (fi: fieldinfo) + (sofar: offsetAcc) : offsetAcc = + (* field type *) + let ftype = unrollType fi.ftype in + let ftypeAlign = 8 * alignOf_int ftype in + let ftypeBits = bitsSizeOf ftype in +(* + ignore (E.log "offsetOfFieldAcc_MSVC(%s of %s:%a%a,firstFree=%d, pack=%a)\n" + fi.fname fi.fcomp.cname + d_type ftype + insert + (match fi.fbitfield with + None -> nil + | Some wdthis -> dprintf ":%d" wdthis) + sofar.oaFirstFree + insert + (match sofar.oaPrevBitPack with + None -> text "None" + | Some (prevpack, _, wdpack) -> dprintf "Some(prev=%d,wd=%d)" + prevpack wdpack)); +*) + match ftype, fi.fbitfield, sofar.oaPrevBitPack with + (* Ignore zero-width bitfields that come after non-bitfields *) + | TInt (ikthis, _), Some 0, None -> + let firstFree = sofar.oaFirstFree in + { oaFirstFree = firstFree; + oaLastFieldStart = firstFree; + oaLastFieldWidth = 0; + oaPrevBitPack = None } + + (* If we are in a bitpack and we see a bitfield for a type with the + * different width than the pack, then we finish the pack and retry *) + | _, Some _, Some (packstart, _, wdpack) when wdpack != ftypeBits -> + let firstFree = + if sofar.oaFirstFree = packstart then packstart else + packstart + wdpack + in + offsetOfFieldAcc_MSVC fi + { oaFirstFree = addTrailing firstFree ftypeAlign; + oaLastFieldStart = sofar.oaLastFieldStart; + oaLastFieldWidth = sofar.oaLastFieldWidth; + oaPrevBitPack = None } + + (* A width of 0 means that we must end the current packing. *) + | TInt (ikthis, _), Some 0, Some (packstart, _, wdpack) -> + let firstFree = + if sofar.oaFirstFree = packstart then packstart else + packstart + wdpack + in + let firstFree = addTrailing firstFree ftypeAlign in + { oaFirstFree = firstFree; + oaLastFieldStart = firstFree; + oaLastFieldWidth = 0; + oaPrevBitPack = Some (firstFree, ikthis, ftypeBits) } + + (* Check for a bitfield that fits in the current pack after some other + * bitfields *) + | TInt(ikthis, _), Some wdthis, Some (packstart, ikprev, wdpack) + when packstart + wdpack >= sofar.oaFirstFree + wdthis -> + { oaFirstFree = sofar.oaFirstFree + wdthis; + oaLastFieldStart = sofar.oaFirstFree; + oaLastFieldWidth = wdthis; + oaPrevBitPack = sofar.oaPrevBitPack + } + + + | _, _, Some (packstart, _, wdpack) -> (* Finish up the bitfield pack and + * restart. *) + let firstFree = + if sofar.oaFirstFree = packstart then packstart else + packstart + wdpack + in + offsetOfFieldAcc_MSVC fi + { oaFirstFree = addTrailing firstFree ftypeAlign; + oaLastFieldStart = sofar.oaLastFieldStart; + oaLastFieldWidth = sofar.oaLastFieldWidth; + oaPrevBitPack = None } + + (* No active bitfield pack. But we are seeing a bitfield. *) + | TInt(ikthis, _), Some wdthis, None -> + let firstFree = addTrailing sofar.oaFirstFree ftypeAlign in + { oaFirstFree = firstFree + wdthis; + oaLastFieldStart = firstFree; + oaLastFieldWidth = wdthis; + oaPrevBitPack = Some (firstFree, ikthis, ftypeBits); } + + (* No active bitfield pack. Non-bitfield *) + | _, None, None -> + (* Align this field *) + let firstFree = addTrailing sofar.oaFirstFree ftypeAlign in + { oaFirstFree = firstFree + ftypeBits; + oaLastFieldStart = firstFree; + oaLastFieldWidth = ftypeBits; + oaPrevBitPack = None; + } + + | _, Some _, None -> E.s (E.bug "offsetAcc") + + +and offsetOfFieldAcc ~(fi: fieldinfo) + ~(sofar: offsetAcc) : offsetAcc = + if !msvcMode then offsetOfFieldAcc_MSVC fi sofar + else offsetOfFieldAcc_GCC fi sofar + +(* The size of a type, in bits. If struct or array then trailing padding is + * added *) +and bitsSizeOf t = + if not !initCIL_called then + E.s (E.error "You did not call Cil.initCIL before using the CIL library"); + match t with + | TInt (ik,_) -> bitsSizeOfInt ik + | TFloat(FDouble, _) -> 8 * !theMachine.M.sizeof_double + | TFloat(FLongDouble, _) -> 8 * !theMachine.M.sizeof_longdouble + | TFloat _ -> 8 * !theMachine.M.sizeof_float + | TEnum _ -> 8 * !theMachine.M.sizeof_enum + | TPtr _ -> 8 * !theMachine.M.sizeof_ptr + | TBuiltin_va_list _ -> 8 * !theMachine.M.sizeof_ptr + | TNamed (t, _) -> bitsSizeOf t.ttype + | TComp (comp, _) when comp.cfields == [] -> begin + (* Empty structs are allowed in msvc mode *) + if not comp.cdefined && not !msvcMode then + raise (SizeOfError ("abstract type", t)) (*abstract type*) + else + 0 + end + + | TComp (comp, _) when comp.cstruct -> (* Struct *) + (* Go and get the last offset *) + let startAcc = + { oaFirstFree = 0; + oaLastFieldStart = 0; + oaLastFieldWidth = 0; + oaPrevBitPack = None; + } in + let lastoff = + List.fold_left (fun acc fi -> offsetOfFieldAcc ~fi ~sofar:acc) + startAcc comp.cfields + in + if !msvcMode && lastoff.oaFirstFree = 0 && comp.cfields <> [] then + (* On MSVC if we have just a zero-width bitfields then the length + * is 32 and is not padded *) + 32 + else + addTrailing lastoff.oaFirstFree (8 * alignOf_int t) + + | TComp (comp, _) -> (* when not comp.cstruct *) + (* Get the maximum of all fields *) + let startAcc = + { oaFirstFree = 0; + oaLastFieldStart = 0; + oaLastFieldWidth = 0; + oaPrevBitPack = None; + } in + let max = + List.fold_left (fun acc fi -> + let lastoff = offsetOfFieldAcc ~fi ~sofar:startAcc in + if lastoff.oaFirstFree > acc then + lastoff.oaFirstFree else acc) 0 comp.cfields in + (* Add trailing by simulating adding an extra field *) + addTrailing max (8 * alignOf_int t) + + | TArray(t, Some len, _) -> begin + match constFold true len with + Const(CInt64(l,_,_)) -> + addTrailing ((bitsSizeOf t) * (Int64.to_int l)) (8 * alignOf_int t) + | _ -> raise (SizeOfError ("array non-constant length", t)) + end + + + | TVoid _ -> 8 * !theMachine.M.sizeof_void + | TFun _ when not !msvcMode -> (* On GCC the size of a function is defined *) + 8 * !theMachine.M.sizeof_fun + + | TArray (_, None, _) -> (* it seems that on GCC the size of such an + * array is 0 *) + 0 + + | TFun _ -> raise (SizeOfError ("function", t)) + + +and addTrailing nrbits roundto = + (nrbits + roundto - 1) land (lnot (roundto - 1)) + +and sizeOf t = + try + integer ((bitsSizeOf t) lsr 3) + with SizeOfError _ -> SizeOf(t) + + +and bitsOffset (baset: typ) (off: offset) : int * int = + let rec loopOff (baset: typ) (width: int) (start: int) = function + NoOffset -> start, width + | Index(e, off) -> begin + let ei = + match isInteger e with + Some i64 -> Int64.to_int i64 + | None -> raise (SizeOfError ("index not constant", baset)) + in + let bt = + match unrollType baset with + TArray(bt, _, _) -> bt + | _ -> E.s (E.bug "bitsOffset: Index on a non-array") + in + let bitsbt = bitsSizeOf bt in + loopOff bt bitsbt (start + ei * bitsbt) off + end + | Field(f, off) when not f.fcomp.cstruct -> + (* All union fields start at offset 0 *) + loopOff f.ftype (bitsSizeOf f.ftype) start off + + | Field(f, off) -> + (* Construct a list of fields preceeding and including this one *) + let prevflds = + let rec loop = function + [] -> E.s (E.bug "bitsOffset: Cannot find field %s in %s\n" + f.fname f.fcomp.cname) + | fi' :: _ when fi' == f -> [fi'] + | fi' :: rest -> fi' :: loop rest + in + loop f.fcomp.cfields + in + let lastoff = + List.fold_left (fun acc fi' -> offsetOfFieldAcc ~fi:fi' ~sofar:acc) + { oaFirstFree = 0; (* Start at 0 because each struct is done + * separately *) + oaLastFieldStart = 0; + oaLastFieldWidth = 0; + oaPrevBitPack = None } prevflds + in + (* ignore (E.log "Field %s of %s: start=%d, lastFieldStart=%d\n" + f.fname f.fcomp.cname start lastoff.oaLastFieldStart); *) + loopOff f.ftype lastoff.oaLastFieldWidth + (start + lastoff.oaLastFieldStart) off + in + loopOff baset (bitsSizeOf baset) 0 off + + + + +(*** Constant folding. If machdep is true then fold even sizeof operations ***) +and constFold (machdep: bool) (e: exp) : exp = + match e with + BinOp(bop, e1, e2, tres) -> constFoldBinOp machdep bop e1 e2 tres + | UnOp(unop, e1, tres) -> begin + try + let tk = + match unrollType tres with + TInt(ik, _) -> ik + | TEnum _ -> IInt + | _ -> raise Not_found (* probably a float *) + in + match constFold machdep e1 with + Const(CInt64(i,ik,_)) -> begin + match unop with + Neg -> kinteger64 tk (Int64.neg i) + | BNot -> kinteger64 tk (Int64.lognot i) + | LNot -> if i = Int64.zero then one else zero + end + | e1c -> UnOp(unop, e1c, tres) + with Not_found -> e + end + (* Characters are integers *) + | Const(CChr c) -> Const(charConstToInt c) + | Const(CEnum (v, _, _)) -> constFold machdep v + | SizeOf t when machdep -> begin + try + let bs = bitsSizeOf t in + kinteger !kindOfSizeOf (bs / 8) + with SizeOfError _ -> e + end + | SizeOfE e when machdep -> constFold machdep (SizeOf (typeOf e)) + | SizeOfStr s when machdep -> kinteger !kindOfSizeOf (1 + String.length s) + | AlignOf t when machdep -> kinteger !kindOfSizeOf (alignOf_int t) + | AlignOfE e when machdep -> begin + (* The alignmetn of an expression is not always the alignment of its + * type. I know that for strings this is not true *) + match e with + Const (CStr _) when not !msvcMode -> + kinteger !kindOfSizeOf !theMachine.M.alignof_str + (* For an array, it is the alignment of the array ! *) + | _ -> constFold machdep (AlignOf (typeOf e)) + end + + | CastE(it, + AddrOf (Mem (CastE(TPtr(bt, _), z)), off)) + when machdep && isZero z -> begin + try + let start, width = bitsOffset bt off in + if start mod 8 <> 0 then + E.s (error "Using offset of bitfield\n"); + constFold machdep (CastE(it, (integer (start / 8)))) + with SizeOfError _ -> e + end + + + | CastE (t, e) -> begin + match constFold machdep e, unrollType t with + (* Might truncate silently *) + Const(CInt64(i,k,_)), TInt(nk,_) -> + let i', _ = truncateInteger64 nk i in + Const(CInt64(i', nk, None)) + | e', _ -> CastE (t, e') + end + + | _ -> e + +and constFoldBinOp (machdep: bool) bop e1 e2 tres = + let e1' = constFold machdep e1 in + let e2' = constFold machdep e2 in + if isIntegralType tres then begin + let newe = + let rec mkInt = function + Const(CChr c) -> Const(charConstToInt c) + | Const(CEnum (v, s, ei)) -> mkInt v + | CastE(TInt (ik, ta), e) -> begin + match mkInt e with + Const(CInt64(i, _, _)) -> + let i', _ = truncateInteger64 ik i in + Const(CInt64(i', ik, None)) + + | e' -> CastE(TInt(ik, ta), e') + end + | e -> e + in + let tk = + match unrollType tres with + TInt(ik, _) -> ik + | TEnum _ -> IInt + | _ -> E.s (bug "constFoldBinOp") + in + (* See if the result is unsigned *) + let isunsigned typ = not (isSigned typ) in + let ge (unsigned: bool) (i1: int64) (i2: int64) : bool = + if unsigned then + let l1 = Int64.shift_right_logical i1 1 in + let l2 = Int64.shift_right_logical i2 1 in (* Both positive now *) + (l1 > l2) || (l1 = l2 && + Int64.logand i1 Int64.one >= Int64.logand i2 Int64.one) + else i1 >= i2 + in + let shiftInBounds i2 = + (* We only try to fold shifts if the second arg is positive and + less than 64. Otherwise, the semantics are processor-dependent, + so let the compiler sort it out. *) + i2 >= Int64.zero && i2 < (Int64.of_int 64) + in + (* Assume that the necessary promotions have been done *) + match bop, mkInt e1', mkInt e2' with + | PlusA, Const(CInt64(z,_,_)), e2'' when z = Int64.zero -> e2'' + | PlusA, e1'', Const(CInt64(z,_,_)) when z = Int64.zero -> e1'' + | PlusPI, e1'', Const(CInt64(z,_,_)) when z = Int64.zero -> e1'' + | IndexPI, e1'', Const(CInt64(z,_,_)) when z = Int64.zero -> e1'' + | MinusPI, e1'', Const(CInt64(z,_,_)) when z = Int64.zero -> e1'' + | PlusA, Const(CInt64(i1,ik1,_)),Const(CInt64(i2,ik2,_)) when ik1 = ik2 -> + kinteger64 tk (Int64.add i1 i2) + | MinusA, Const(CInt64(i1,ik1,_)),Const(CInt64(i2,ik2,_)) when ik1 = ik2 -> + kinteger64 tk (Int64.sub i1 i2) + | Mult, Const(CInt64(i1,ik1,_)), Const(CInt64(i2,ik2,_)) when ik1 = ik2 -> + kinteger64 tk (Int64.mul i1 i2) + | Mult, Const(CInt64(0L,_,_)), _ -> zero + | Mult, Const(CInt64(1L,_,_)), e2'' -> e2'' + | Mult, _, Const(CInt64(0L,_,_)) -> zero + | Mult, e1'', Const(CInt64(1L,_,_)) -> e1'' + | Div, Const(CInt64(i1,ik1,_)),Const(CInt64(i2,ik2,_)) when ik1 = ik2 -> begin + try kinteger64 tk (Int64.div i1 i2) + with Division_by_zero -> BinOp(bop, e1', e2', tres) + end + | Div, e1'', Const(CInt64(1L,_,_)) -> e1'' + + | Mod, Const(CInt64(i1,ik1,_)),Const(CInt64(i2,ik2,_)) when ik1 = ik2 -> begin + try kinteger64 tk (Int64.rem i1 i2) + with Division_by_zero -> BinOp(bop, e1', e2', tres) + end + | BAnd, Const(CInt64(i1,ik1,_)),Const(CInt64(i2,ik2,_)) when ik1 = ik2 -> + kinteger64 tk (Int64.logand i1 i2) + | BAnd, Const(CInt64(0L,_,_)), _ -> zero + | BAnd, _, Const(CInt64(0L,_,_)) -> zero + | BOr, Const(CInt64(i1,ik1,_)),Const(CInt64(i2,ik2,_)) when ik1 = ik2 -> + kinteger64 tk (Int64.logor i1 i2) + | BOr, _, _ when isZero e1' -> e2' + | BOr, _, _ when isZero e2' -> e1' + | BXor, Const(CInt64(i1,ik1,_)),Const(CInt64(i2,ik2,_)) when ik1 = ik2 -> + kinteger64 tk (Int64.logxor i1 i2) + + | Shiftlt, Const(CInt64(i1,ik1,_)),Const(CInt64(i2,_,_)) when shiftInBounds i2 -> + kinteger64 tk (Int64.shift_left i1 (Int64.to_int i2)) + | Shiftlt, Const(CInt64(0L,_,_)), _ -> zero + | Shiftlt, e1'', Const(CInt64(0L,_,_)) -> e1'' + + | Shiftrt, Const(CInt64(i1,ik1,_)),Const(CInt64(i2,_,_)) when shiftInBounds i2 -> + if isunsigned ik1 then + kinteger64 tk (Int64.shift_right_logical i1 (Int64.to_int i2)) + else + kinteger64 tk (Int64.shift_right i1 (Int64.to_int i2)) + | Shiftrt, Const(CInt64(0L,_,_)), _ -> zero + | Shiftrt, e1'', Const(CInt64(0L,_,_)) -> e1'' + + | Eq, Const(CInt64(i1,ik1,_)),Const(CInt64(i2,ik2,_)) when ik1 = ik2 -> + integer (if i1 = i2 then 1 else 0) + | Ne, Const(CInt64(i1,ik1,_)),Const(CInt64(i2,ik2,_)) when ik1 = ik2 -> + integer (if i1 <> i2 then 1 else 0) + | Le, Const(CInt64(i1,ik1,_)),Const(CInt64(i2,ik2,_)) when ik1 = ik2 -> + integer (if ge (isunsigned ik1) i2 i1 then 1 else 0) + + | Ge, Const(CInt64(i1,ik1,_)),Const(CInt64(i2,ik2,_)) when ik1 = ik2 -> + integer (if ge (isunsigned ik1) i1 i2 then 1 else 0) + + | Lt, Const(CInt64(i1,ik1,_)),Const(CInt64(i2,ik2,_)) when ik1 = ik2 -> + integer (if i1 <> i2 && ge (isunsigned ik1) i2 i1 then 1 else 0) + + | Gt, Const(CInt64(i1,ik1,_)),Const(CInt64(i2,ik2,_)) when ik1 = ik2 -> + integer (if i1 <> i2 && ge (isunsigned ik1) i1 i2 then 1 else 0) + | LAnd, _, _ when isZero e1' || isZero e2' -> zero + | LOr, _, _ when isZero e1' -> e2' + | LOr, _, _ when isZero e2' -> e1' + | _ -> BinOp(bop, e1', e2', tres) + in + if debugConstFold then + ignore (E.log "Folded %a to %a\n" + (!pd_exp) (BinOp(bop, e1', e2', tres)) (!pd_exp) newe); + newe + end else + BinOp(bop, e1', e2', tres) + + + +let parseInt (str: string) : exp = + let hasSuffix str = + let l = String.length str in + fun s -> + let ls = String.length s in + l >= ls && s = String.uppercase (String.sub str (l - ls) ls) + in + let l = String.length str in + (* See if it is octal or hex *) + let octalhex = (l >= 1 && String.get str 0 = '0') in + (* The length of the suffix and a list of possible kinds. See ISO + * 6.4.4.1 *) + let hasSuffix = hasSuffix str in + let suffixlen, kinds = + if hasSuffix "ULL" || hasSuffix "LLU" then + 3, [IULongLong] + else if hasSuffix "LL" then + 2, if octalhex then [ILongLong; IULongLong] else [ILongLong] + else if hasSuffix "UL" || hasSuffix "LU" then + 2, [IULong; IULongLong] + else if hasSuffix "L" then + 1, if octalhex then [ILong; IULong; ILongLong; IULongLong] + else [ILong; ILongLong] + else if hasSuffix "U" then + 1, [IUInt; IULong; IULongLong] + else if (!msvcMode && hasSuffix "UI64") then + 4, [IULongLong] + else if (!msvcMode && hasSuffix "I64") then + 3, [ILongLong] + else + 0, if octalhex || true (* !!! This is against the ISO but it + * is what GCC and MSVC do !!! *) + then [IInt; IUInt; ILong; IULong; ILongLong; IULongLong] + else [IInt; ILong; IUInt; ILongLong] + in + (* Convert to integer. To prevent overflow we do the arithmetic + * on Int64 and we take care of overflow. We work only with + * positive integers since the lexer takes care of the sign *) + let rec toInt (base: int64) (acc: int64) (idx: int) : int64 = + let doAcc (what: int) = + let acc' = + Int64.add (Int64.mul base acc) (Int64.of_int what) in + if acc < Int64.zero || (* We clearly overflow since base >= 2 + * *) + (acc' > Int64.zero && acc' < acc) then + E.s (unimp "Cannot represent on 64 bits the integer %s\n" + str) + else + toInt base acc' (idx + 1) + in + if idx >= l - suffixlen then begin + acc + end else + let ch = String.get str idx in + if ch >= '0' && ch <= '9' then + doAcc (Char.code ch - Char.code '0') + else if ch >= 'a' && ch <= 'f' then + doAcc (10 + Char.code ch - Char.code 'a') + else if ch >= 'A' && ch <= 'F' then + doAcc (10 + Char.code ch - Char.code 'A') + else + E.s (bug "Invalid integer constant: %s (char %c at idx=%d)" + str ch idx) + in + try + let i = + if octalhex then + if l >= 2 && + (let c = String.get str 1 in c = 'x' || c = 'X') then + toInt (Int64.of_int 16) Int64.zero 2 + else + toInt (Int64.of_int 8) Int64.zero 1 + else + toInt (Int64.of_int 10) Int64.zero 0 + in + (* Construct an integer of the first kinds that fits. i must be + * POSITIVE *) + let res = + let rec loop = function + | ((IInt | ILong) as k) :: _ + when i < Int64.shift_left (Int64.of_int 1) 31 -> + kinteger64 k i + | ((IUInt | IULong) as k) :: _ + when i < Int64.shift_left (Int64.of_int 1) 32 + -> kinteger64 k i + | (ILongLong as k) :: _ + when i <= Int64.sub (Int64.shift_left + (Int64.of_int 1) 63) + (Int64.of_int 1) + -> + kinteger64 k i + | (IULongLong as k) :: _ -> kinteger64 k i + | _ :: rest -> loop rest + | [] -> E.s (E.unimp "Cannot represent the integer %s\n" + (Int64.to_string i)) + in + loop kinds + in + res + with e -> begin + ignore (E.log "int_of_string %s (%s)\n" str + (Printexc.to_string e)); + zero + end + + + +let d_unop () u = + match u with + Neg -> text "-" + | BNot -> text "~" + | LNot -> text "!" + +let d_binop () b = + match b with + PlusA | PlusPI | IndexPI -> text "+" + | MinusA | MinusPP | MinusPI -> text "-" + | Mult -> text "*" + | Div -> text "/" + | Mod -> text "%" + | Shiftlt -> text "<<" + | Shiftrt -> text ">>" + | Lt -> text "<" + | Gt -> text ">" + | Le -> text "<=" + | Ge -> text ">=" + | Eq -> text "==" + | Ne -> text "!=" + | BAnd -> text "&" + | BXor -> text "^" + | BOr -> text "|" + | LAnd -> text "&&" + | LOr -> text "||" + +let invalidStmt = mkStmt (Instr []) + +(** Construct a hash with the builtins *) +let gccBuiltins : (string, typ * typ list * bool) H.t = + let h = H.create 17 in + (* See if we have builtin_va_list *) + let hasbva = M.gccHas__builtin_va_list in + let ulongLongType = TInt(IULongLong, []) in + let floatType = TFloat(FFloat, []) in + let longDoubleType = TFloat (FLongDouble, []) in + let voidConstPtrType = TPtr(TVoid [Attr ("const", [])], []) in + let sizeType = uintType in + + H.add h "__builtin___fprintf_chk" (intType, [ voidPtrType; intType; charConstPtrType ], true) (* first argument is really FILE*, not void*, but we don't want to build in the definition for FILE *); + H.add h "__builtin___memcpy_chk" (voidPtrType, [ voidPtrType; voidConstPtrType; sizeType; sizeType ], false); + H.add h "__builtin___memmove_chk" (voidPtrType, [ voidPtrType; voidConstPtrType; sizeType; sizeType ], false); + H.add h "__builtin___mempcpy_chk" (voidPtrType, [ voidPtrType; voidConstPtrType; sizeType; sizeType ], false); + H.add h "__builtin___memset_chk" (voidPtrType, [ voidPtrType; intType; sizeType; sizeType ], false); + H.add h "__builtin___printf_chk" (intType, [ intType; charConstPtrType ], true); + H.add h "__builtin___snprintf_chk" (intType, [ charPtrType; sizeType; intType; sizeType; charConstPtrType ], true); + H.add h "__builtin___sprintf_chk" (intType, [ charPtrType; intType; sizeType; charConstPtrType ], true); + H.add h "__builtin___stpcpy_chk" (charPtrType, [ charPtrType; charConstPtrType; sizeType ], false); + H.add h "__builtin___strcat_chk" (charPtrType, [ charPtrType; charConstPtrType; sizeType ], false); + H.add h "__builtin___strcpy_chk" (charPtrType, [ charPtrType; charConstPtrType; sizeType ], false); + H.add h "__builtin___strncat_chk" (charPtrType, [ charPtrType; charConstPtrType; sizeType; sizeType ], false); + H.add h "__builtin___strncpy_chk" (charPtrType, [ charPtrType; charConstPtrType; sizeType; sizeType ], false); + H.add h "__builtin___vfprintf_chk" (intType, [ voidPtrType; intType; charConstPtrType; TBuiltin_va_list [] ], false) (* first argument is really FILE*, not void*, but we don't want to build in the definition for FILE *); + H.add h "__builtin___vprintf_chk" (intType, [ intType; charConstPtrType; TBuiltin_va_list [] ], false); + H.add h "__builtin___vsnprintf_chk" (intType, [ charPtrType; sizeType; intType; sizeType; charConstPtrType; TBuiltin_va_list [] ], false); + H.add h "__builtin___vsprintf_chk" (intType, [ charPtrType; intType; sizeType; charConstPtrType; TBuiltin_va_list [] ], false); + + H.add h "__builtin_acos" (doubleType, [ doubleType ], false); + H.add h "__builtin_acosf" (floatType, [ floatType ], false); + H.add h "__builtin_acosl" (longDoubleType, [ longDoubleType ], false); + + H.add h "__builtin_alloca" (voidPtrType, [ uintType ], false); + + H.add h "__builtin_asin" (doubleType, [ doubleType ], false); + H.add h "__builtin_asinf" (floatType, [ floatType ], false); + H.add h "__builtin_asinl" (longDoubleType, [ longDoubleType ], false); + + H.add h "__builtin_atan" (doubleType, [ doubleType ], false); + H.add h "__builtin_atanf" (floatType, [ floatType ], false); + H.add h "__builtin_atanl" (longDoubleType, [ longDoubleType ], false); + + H.add h "__builtin_atan2" (doubleType, [ doubleType; doubleType ], false); + H.add h "__builtin_atan2f" (floatType, [ floatType; floatType ], false); + H.add h "__builtin_atan2l" (longDoubleType, [ longDoubleType; + longDoubleType ], false); + + H.add h "__builtin_ceil" (doubleType, [ doubleType ], false); + H.add h "__builtin_ceilf" (floatType, [ floatType ], false); + H.add h "__builtin_ceill" (longDoubleType, [ longDoubleType ], false); + + H.add h "__builtin_cos" (doubleType, [ doubleType ], false); + H.add h "__builtin_cosf" (floatType, [ floatType ], false); + H.add h "__builtin_cosl" (longDoubleType, [ longDoubleType ], false); + + H.add h "__builtin_cosh" (doubleType, [ doubleType ], false); + H.add h "__builtin_coshf" (floatType, [ floatType ], false); + H.add h "__builtin_coshl" (longDoubleType, [ longDoubleType ], false); + + H.add h "__builtin_clz" (intType, [ uintType ], false); + H.add h "__builtin_clzl" (intType, [ ulongType ], false); + H.add h "__builtin_clzll" (intType, [ ulongLongType ], false); + H.add h "__builtin_constant_p" (intType, [ intType ], false); + H.add h "__builtin_ctz" (intType, [ uintType ], false); + H.add h "__builtin_ctzl" (intType, [ ulongType ], false); + H.add h "__builtin_ctzll" (intType, [ ulongLongType ], false); + + H.add h "__builtin_exp" (doubleType, [ doubleType ], false); + H.add h "__builtin_expf" (floatType, [ floatType ], false); + H.add h "__builtin_expl" (longDoubleType, [ longDoubleType ], false); + + H.add h "__builtin_expect" (longType, [ longType; longType ], false); + + H.add h "__builtin_fabs" (doubleType, [ doubleType ], false); + H.add h "__builtin_fabsf" (floatType, [ floatType ], false); + H.add h "__builtin_fabsl" (longDoubleType, [ longDoubleType ], false); + + H.add h "__builtin_ffs" (intType, [ uintType ], false); + H.add h "__builtin_ffsl" (intType, [ ulongType ], false); + H.add h "__builtin_ffsll" (intType, [ ulongLongType ], false); + H.add h "__builtin_frame_address" (voidPtrType, [ uintType ], false); + + H.add h "__builtin_floor" (doubleType, [ doubleType ], false); + H.add h "__builtin_floorf" (floatType, [ floatType ], false); + H.add h "__builtin_floorl" (longDoubleType, [ longDoubleType ], false); + + H.add h "__builtin_huge_val" (doubleType, [], false); + H.add h "__builtin_huge_valf" (floatType, [], false); + H.add h "__builtin_huge_vall" (longDoubleType, [], false); + H.add h "__builtin_inf" (doubleType, [], false); + H.add h "__builtin_inff" (floatType, [], false); + H.add h "__builtin_infl" (longDoubleType, [], false); + H.add h "__builtin_memcpy" (voidPtrType, [ voidPtrType; voidConstPtrType; uintType ], false); + H.add h "__builtin_mempcpy" (voidPtrType, [ voidPtrType; voidConstPtrType; sizeType ], false); + + H.add h "__builtin_fmod" (doubleType, [ doubleType ], false); + H.add h "__builtin_fmodf" (floatType, [ floatType ], false); + H.add h "__builtin_fmodl" (longDoubleType, [ longDoubleType ], false); + + H.add h "__builtin_frexp" (doubleType, [ doubleType; intPtrType ], false); + H.add h "__builtin_frexpf" (floatType, [ floatType; intPtrType ], false); + H.add h "__builtin_frexpl" (longDoubleType, [ longDoubleType; + intPtrType ], false); + + H.add h "__builtin_ldexp" (doubleType, [ doubleType; intType ], false); + H.add h "__builtin_ldexpf" (floatType, [ floatType; intType ], false); + H.add h "__builtin_ldexpl" (longDoubleType, [ longDoubleType; + intType ], false); + + H.add h "__builtin_log" (doubleType, [ doubleType ], false); + H.add h "__builtin_logf" (floatType, [ floatType ], false); + H.add h "__builtin_logl" (longDoubleType, [ longDoubleType ], false); + + H.add h "__builtin_log10" (doubleType, [ doubleType ], false); + H.add h "__builtin_log10f" (floatType, [ floatType ], false); + H.add h "__builtin_log10l" (longDoubleType, [ longDoubleType ], false); + + H.add h "__builtin_modff" (floatType, [ floatType; + TPtr(floatType,[]) ], false); + H.add h "__builtin_modfl" (longDoubleType, [ longDoubleType; + TPtr(longDoubleType, []) ], + false); + + H.add h "__builtin_nan" (doubleType, [ charConstPtrType ], false); + H.add h "__builtin_nanf" (floatType, [ charConstPtrType ], false); + H.add h "__builtin_nanl" (longDoubleType, [ charConstPtrType ], false); + H.add h "__builtin_nans" (doubleType, [ charConstPtrType ], false); + H.add h "__builtin_nansf" (floatType, [ charConstPtrType ], false); + H.add h "__builtin_nansl" (longDoubleType, [ charConstPtrType ], false); + H.add h "__builtin_next_arg" ((if hasbva then TBuiltin_va_list [] else voidPtrType), [], false) (* When we parse builtin_next_arg we drop the second argument *); + H.add h "__builtin_object_size" (sizeType, [ voidPtrType; intType ], false); + + H.add h "__builtin_parity" (intType, [ uintType ], false); + H.add h "__builtin_parityl" (intType, [ ulongType ], false); + H.add h "__builtin_parityll" (intType, [ ulongLongType ], false); + + H.add h "__builtin_popcount" (intType, [ uintType ], false); + H.add h "__builtin_popcountl" (intType, [ ulongType ], false); + H.add h "__builtin_popcountll" (intType, [ ulongLongType ], false); + + H.add h "__builtin_powi" (doubleType, [ doubleType; intType ], false); + H.add h "__builtin_powif" (floatType, [ floatType; intType ], false); + H.add h "__builtin_powil" (longDoubleType, [ longDoubleType; intType ], false); + H.add h "__builtin_prefetch" (voidType, [ voidConstPtrType ], true); + H.add h "__builtin_return" (voidType, [ voidConstPtrType ], false); + H.add h "__builtin_return_address" (voidPtrType, [ uintType ], false); + + H.add h "__builtin_sin" (doubleType, [ doubleType ], false); + H.add h "__builtin_sinf" (floatType, [ floatType ], false); + H.add h "__builtin_sinl" (longDoubleType, [ longDoubleType ], false); + + H.add h "__builtin_sinh" (doubleType, [ doubleType ], false); + H.add h "__builtin_sinhf" (floatType, [ floatType ], false); + H.add h "__builtin_sinhl" (longDoubleType, [ longDoubleType ], false); + + H.add h "__builtin_sqrt" (doubleType, [ doubleType ], false); + H.add h "__builtin_sqrtf" (floatType, [ floatType ], false); + H.add h "__builtin_sqrtl" (longDoubleType, [ longDoubleType ], false); + + H.add h "__builtin_stpcpy" (charPtrType, [ charPtrType; charConstPtrType ], false); + H.add h "__builtin_strchr" (charPtrType, [ charPtrType; charType ], false); + H.add h "__builtin_strcmp" (intType, [ charConstPtrType; charConstPtrType ], false); + H.add h "__builtin_strcpy" (charPtrType, [ charPtrType; charConstPtrType ], false); + H.add h "__builtin_strcspn" (uintType, [ charConstPtrType; charConstPtrType ], false); + H.add h "__builtin_strncat" (charPtrType, [ charPtrType; charConstPtrType; sizeType ], false); + H.add h "__builtin_strncmp" (intType, [ charConstPtrType; charConstPtrType; sizeType ], false); + H.add h "__builtin_strncpy" (charPtrType, [ charPtrType; charConstPtrType; sizeType ], false); + H.add h "__builtin_strspn" (intType, [ charConstPtrType; charConstPtrType ], false); + H.add h "__builtin_strpbrk" (charPtrType, [ charConstPtrType; charConstPtrType ], false); + (* When we parse builtin_types_compatible_p, we change its interface *) + H.add h "__builtin_types_compatible_p" + (intType, [ uintType; (* Sizeof the type *) + uintType (* Sizeof the type *) ], + false); + H.add h "__builtin_tan" (doubleType, [ doubleType ], false); + H.add h "__builtin_tanf" (floatType, [ floatType ], false); + H.add h "__builtin_tanl" (longDoubleType, [ longDoubleType ], false); + + H.add h "__builtin_tanh" (doubleType, [ doubleType ], false); + H.add h "__builtin_tanhf" (floatType, [ floatType ], false); + H.add h "__builtin_tanhl" (longDoubleType, [ longDoubleType ], false); + + + if hasbva then begin + H.add h "__builtin_va_end" (voidType, [ TBuiltin_va_list [] ], false); + H.add h "__builtin_varargs_start" + (voidType, [ TBuiltin_va_list [] ], false); + H.add h "__builtin_va_start" (voidType, [ TBuiltin_va_list [] ], false); + (* When we parse builtin_stdarg_start, we drop the second argument *) + H.add h "__builtin_stdarg_start" (voidType, [ TBuiltin_va_list []; ], + false); + (* When we parse builtin_va_arg we change its interface *) + H.add h "__builtin_va_arg" (voidType, [ TBuiltin_va_list []; + uintType; (* Sizeof the type *) + voidPtrType; (* Ptr to res *) ], + false); + H.add h "__builtin_va_copy" (voidType, [ TBuiltin_va_list []; + TBuiltin_va_list [] ], + false); + end; + h + +(** Construct a hash with the builtins *) +let msvcBuiltins : (string, typ * typ list * bool) H.t = + (* These are empty for now but can be added to depending on the application*) + let h = H.create 17 in + (** Take a number of wide string literals *) + H.add h "__annotation" (voidType, [ ], true); + h + + + +let pTypeSig : (typ -> typsig) ref = + ref (fun _ -> E.s (E.bug "pTypeSig not initialized")) + + +(** A printer interface for CIL trees. Create instantiations of + * this type by specializing the class {!Cil.defaultCilPrinter}. *) +class type cilPrinter = object + method pVDecl: unit -> varinfo -> doc + (** Invoked for each variable declaration. Note that variable + * declarations are all the [GVar], [GVarDecl], [GFun], all the [varinfo] + * in formals of function types, and the formals and locals for function + * definitions. *) + + method pVar: varinfo -> doc + (** Invoked on each variable use. *) + + method pLval: unit -> lval -> doc + (** Invoked on each lvalue occurence *) + + method pOffset: doc -> offset -> doc + (** Invoked on each offset occurence. The second argument is the base. *) + + method pInstr: unit -> instr -> doc + (** Invoked on each instruction occurrence. *) + + method pStmt: unit -> stmt -> doc + (** Control-flow statement. This is used by + * {!Cil.printGlobal} and by {!Cil.dumpGlobal}. *) + + method dStmt: out_channel -> int -> stmt -> unit + (** Dump a control-flow statement to a file with a given indentation. This is used by + * {!Cil.dumpGlobal}. *) + + method dBlock: out_channel -> int -> block -> unit + (** Dump a control-flow block to a file with a given indentation. This is + * used by {!Cil.dumpGlobal}. *) + + method pBlock: unit -> block -> Pretty.doc + (** Print a block. *) + + method pGlobal: unit -> global -> doc + (** Global (vars, types, etc.). This can be slow and is used only by + * {!Cil.printGlobal} but by {!Cil.dumpGlobal} for everything else except + * [GVar] and [GFun]. *) + + method dGlobal: out_channel -> global -> unit + (** Dump a global to a file. This is used by {!Cil.dumpGlobal}. *) + + method pFieldDecl: unit -> fieldinfo -> doc + (** A field declaration *) + + method pType: doc option -> unit -> typ -> doc + (* Use of some type in some declaration. The first argument is used to print + * the declared element, or is None if we are just printing a type with no + * name being decalred. Note that for structure/union and enumeration types + * the definition of the composite type is not visited. Use [vglob] to + * visit it. *) + + method pAttr: attribute -> doc * bool + (** Attribute. Also return an indication whether this attribute must be + * printed inside the __attribute__ list or not. *) + + method pAttrParam: unit -> attrparam -> doc + (** Attribute paramter *) + + method pAttrs: unit -> attributes -> doc + (** Attribute lists *) + + method pLabel: unit -> label -> doc + (** Label *) + + method pLineDirective: ?forcefile:bool -> location -> Pretty.doc + (** Print a line-number. This is assumed to come always on an empty line. + * If the forcefile argument is present and is true then the file name + * will be printed always. Otherwise the file name is printed only if it + * is different from the last time time this function is called. The last + * file name is stored in a private field inside the cilPrinter object. *) + + method pStmtKind : stmt -> unit -> stmtkind -> Pretty.doc + (** Print a statement kind. The code to be printed is given in the + * {!Cil.stmtkind} argument. The initial {!Cil.stmt} argument + * records the statement which follows the one being printed; + * {!Cil.defaultCilPrinterClass} uses this information to prettify + * statement printing in certain special cases. *) + + method pExp: unit -> exp -> doc + (** Print expressions *) + + method pInit: unit -> init -> doc + (** Print initializers. This can be slow and is used by + * {!Cil.printGlobal} but not by {!Cil.dumpGlobal}. *) + + method dInit: out_channel -> int -> init -> unit + (** Dump a global to a file with a given indentation. This is used by + * {!Cil.dumpGlobal}. *) +end + + +class defaultCilPrinterClass : cilPrinter = object (self) + val mutable currentFormals : varinfo list = [] + method private getLastNamedArgument (s: string) : exp = + match List.rev currentFormals with + f :: _ -> Lval (var f) + | [] -> + E.s (warn "Cannot find the last named argument when priting call to %s\n" s) + + (*** VARIABLES ***) + (* variable use *) + method pVar (v:varinfo) = text v.vname + + (* variable declaration *) + method pVDecl () (v:varinfo) = + let stom, rest = separateStorageModifiers v.vattr in + (* First the storage modifiers *) + text (if v.vinline then "__inline " else "") + ++ d_storage () v.vstorage + ++ (self#pAttrs () stom) + ++ (self#pType (Some (text v.vname)) () v.vtype) + ++ text " " + ++ self#pAttrs () rest + + (*** L-VALUES ***) + method pLval () (lv:lval) = (* lval (base is 1st field) *) + match lv with + Var vi, o -> self#pOffset (self#pVar vi) o + | Mem e, Field(fi, o) -> + self#pOffset + ((self#pExpPrec arrowLevel () e) ++ text ("->" ^ fi.fname)) o + | Mem e, o -> + self#pOffset + (text "(*" ++ self#pExpPrec derefStarLevel () e ++ text ")") o + + (** Offsets **) + method pOffset (base: doc) = function + | NoOffset -> base + | Field (fi, o) -> + self#pOffset (base ++ text "." ++ text fi.fname) o + | Index (e, o) -> + self#pOffset (base ++ text "[" ++ self#pExp () e ++ text "]") o + + method private pLvalPrec (contextprec: int) () lv = + if getParenthLevel (Lval(lv)) >= contextprec then + text "(" ++ self#pLval () lv ++ text ")" + else + self#pLval () lv + + (*** EXPRESSIONS ***) + method pExp () (e: exp) : doc = + let level = getParenthLevel e in + match e with + Const(c) -> d_const () c + | Lval(l) -> self#pLval () l + | UnOp(u,e1,_) -> + (d_unop () u) ++ chr ' ' ++ (self#pExpPrec level () e1) + + | BinOp(b,e1,e2,_) -> + align + ++ (self#pExpPrec level () e1) + ++ chr ' ' + ++ (d_binop () b) + ++ chr ' ' + ++ (self#pExpPrec level () e2) + ++ unalign + + | CastE(t,e) -> + text "(" + ++ self#pType None () t + ++ text ")" + ++ self#pExpPrec level () e + + | SizeOf (t) -> + text "sizeof(" ++ self#pType None () t ++ chr ')' + | SizeOfE (e) -> + text "sizeof(" ++ self#pExp () e ++ chr ')' + + | SizeOfStr s -> + text "sizeof(" ++ d_const () (CStr s) ++ chr ')' + + | AlignOf (t) -> + text "__alignof__(" ++ self#pType None () t ++ chr ')' + | AlignOfE (e) -> + text "__alignof__(" ++ self#pExp () e ++ chr ')' + | AddrOf(lv) -> + text "& " ++ (self#pLvalPrec addrOfLevel () lv) + + | StartOf(lv) -> self#pLval () lv + + method private pExpPrec (contextprec: int) () (e: exp) = + let thisLevel = getParenthLevel e in + let needParens = + if thisLevel >= contextprec then + true + else if contextprec == bitwiseLevel then + (* quiet down some GCC warnings *) + thisLevel == additiveLevel || thisLevel == comparativeLevel + else + false + in + if needParens then + chr '(' ++ self#pExp () e ++ chr ')' + else + self#pExp () e + + method pInit () = function + SingleInit e -> self#pExp () e + | CompoundInit (t, initl) -> + (* We do not print the type of the Compound *) +(* + let dinit e = d_init () e in + dprintf "{@[%a@]}" + (docList ~sep:(chr ',' ++ break) dinit) initl +*) + let printDesignator = + if not !msvcMode then begin + (* Print only for union when we do not initialize the first field *) + match unrollType t, initl with + TComp(ci, _), [(Field(f, NoOffset), _)] -> + if not (ci.cstruct) && ci.cfields != [] && + (List.hd ci.cfields) != f then + true + else + false + | _ -> false + end else + false + in + let d_oneInit = function + Field(f, NoOffset), i -> + (if printDesignator then + text ("." ^ f.fname ^ " = ") + else nil) ++ self#pInit () i + | Index(e, NoOffset), i -> + (if printDesignator then + text "[" ++ self#pExp () e ++ text "] = " else nil) ++ + self#pInit () i + | _ -> E.s (unimp "Trying to print malformed initializer") + in + chr '{' ++ (align + ++ ((docList ~sep:(chr ',' ++ break) d_oneInit) () initl) + ++ unalign) + ++ chr '}' +(* + | ArrayInit (_, _, il) -> + chr '{' ++ (align + ++ ((docList (chr ',' ++ break) (self#pInit ())) () il) + ++ unalign) + ++ chr '}' +*) + (* dump initializers to a file. *) + method dInit (out: out_channel) (ind: int) (i: init) = + (* Dump an array *) + let dumpArray (bt: typ) (il: 'a list) (getelem: 'a -> init) = + let onALine = (* How many elements on a line *) + match unrollType bt with TComp _ | TArray _ -> 1 | _ -> 4 + in + let rec outputElements (isfirst: bool) (room_on_line: int) = function + [] -> output_string out "}" + | (i: 'a) :: rest -> + if not isfirst then output_string out ", "; + let new_room_on_line = + if room_on_line == 0 then begin + output_string out "\n"; output_string out (String.make ind ' '); + onALine - 1 + end else + room_on_line - 1 + in + self#dInit out (ind + 2) (getelem i); + outputElements false new_room_on_line rest + in + output_string out "{ "; + outputElements true onALine il + in + match i with + SingleInit e -> + fprint out !lineLength (indent ind (self#pExp () e)) + | CompoundInit (t, initl) -> begin + match unrollType t with + TArray(bt, _, _) -> + dumpArray bt initl (fun (_, i) -> i) + | _ -> + (* Now a structure or a union *) + fprint out !lineLength (indent ind (self#pInit () i)) + end +(* + | ArrayInit (bt, len, initl) -> begin + (* If the base type does not contain structs then use the pInit + match unrollType bt with + TComp _ | TArray _ -> + dumpArray bt initl (fun x -> x) + | _ -> *) + fprint out !lineLength (indent ind (self#pInit () i)) + end +*) + + (** What terminator to print after an instruction. sometimes we want to + * print sequences of instructions separated by comma *) + val mutable printInstrTerminator = ";" + + (*** INSTRUCTIONS ****) + method pInstr () (i:instr) = (* imperative instruction *) + match i with + | Set(lv,e,l) -> begin + (* Be nice to some special cases *) + match e with + BinOp((PlusA|PlusPI|IndexPI),Lval(lv'), Const(CInt64(one,_,_)),_) + when Util.equals lv lv' && one = Int64.one && not !printCilAsIs -> + self#pLineDirective l + ++ self#pLval () lv + ++ text (" ++" ^ printInstrTerminator) + + | BinOp((MinusA|MinusPI),Lval(lv'), + Const(CInt64(one,_,_)), _) + when Util.equals lv lv' && one = Int64.one && not !printCilAsIs -> + self#pLineDirective l + ++ self#pLval () lv + ++ text (" --" ^ printInstrTerminator) + + | BinOp((PlusA|PlusPI|IndexPI),Lval(lv'),Const(CInt64(mone,_,_)),_) + when Util.equals lv lv' && mone = Int64.minus_one + && not !printCilAsIs -> + self#pLineDirective l + ++ self#pLval () lv + ++ text (" --" ^ printInstrTerminator) + + | BinOp((PlusA|PlusPI|IndexPI|MinusA|MinusPP|MinusPI|BAnd|BOr|BXor| + Mult|Div|Mod|Shiftlt|Shiftrt) as bop, + Lval(lv'),e,_) when Util.equals lv lv' -> + self#pLineDirective l + ++ self#pLval () lv + ++ text " " ++ d_binop () bop + ++ text "= " + ++ self#pExp () e + ++ text printInstrTerminator + + | _ -> + self#pLineDirective l + ++ self#pLval () lv + ++ text " = " + ++ self#pExp () e + ++ text printInstrTerminator + + end + (* In cabs2cil we have turned the call to builtin_va_arg into a + * three-argument call: the last argument is the address of the + * destination *) + | Call(None, Lval(Var vi, NoOffset), [dest; SizeOf t; adest], l) + when vi.vname = "__builtin_va_arg" && not !printCilAsIs -> + let destlv = match stripCasts adest with + AddrOf destlv -> destlv + | _ -> E.s (E.error "Encountered unexpected call to %s\n" vi.vname) + in + self#pLineDirective l + ++ self#pLval () destlv ++ text " = " + + (* Now the function name *) + ++ text "__builtin_va_arg" + ++ text "(" ++ (align + (* Now the arguments *) + ++ self#pExp () dest + ++ chr ',' ++ break + ++ self#pType None () t + ++ unalign) + ++ text (")" ^ printInstrTerminator) + + (* In cabs2cil we have dropped the last argument in the call to + * __builtin_stdarg_start. *) + | Call(None, Lval(Var vi, NoOffset), [marker], l) + when vi.vname = "__builtin_stdarg_start" && not !printCilAsIs -> begin + let last = self#getLastNamedArgument vi.vname in + self#pInstr () (Call(None,Lval(Var vi,NoOffset),[marker; last],l)) + end + + (* In cabs2cil we have dropped the last argument in the call to + * __builtin_next_arg. *) + | Call(res, Lval(Var vi, NoOffset), [ ], l) + when vi.vname = "__builtin_next_arg" && not !printCilAsIs -> begin + let last = self#getLastNamedArgument vi.vname in + self#pInstr () (Call(res,Lval(Var vi,NoOffset),[last],l)) + end + + (* In cparser we have turned the call to + * __builtin_types_compatible_p(t1, t2) into + * __builtin_types_compatible_p(sizeof t1, sizeof t2), so that we can + * represent the types as expressions. + * Remove the sizeofs when printing. *) + | Call(dest, Lval(Var vi, NoOffset), [SizeOf t1; SizeOf t2], l) + when vi.vname = "__builtin_types_compatible_p" && not !printCilAsIs -> + self#pLineDirective l + (* Print the destination *) + ++ (match dest with + None -> nil + | Some lv -> self#pLval () lv ++ text " = ") + (* Now the call itself *) + ++ dprintf "%s(%a, %a)" vi.vname + (self#pType None) t1 (self#pType None) t2 + ++ text printInstrTerminator + | Call(_, Lval(Var vi, NoOffset), _, l) + when vi.vname = "__builtin_types_compatible_p" && not !printCilAsIs -> + E.s (bug "__builtin_types_compatible_p: cabs2cil should have added sizeof to the arguments.") + + | Call(dest,e,args,l) -> + self#pLineDirective l + ++ (match dest with + None -> nil + | Some lv -> + self#pLval () lv ++ text " = " ++ + (* Maybe we need to print a cast *) + (let destt = typeOfLval lv in + match unrollType (typeOf e) with + TFun (rt, _, _, _) + when not (Util.equals (!pTypeSig rt) + (!pTypeSig destt)) -> + text "(" ++ self#pType None () destt ++ text ")" + | _ -> nil)) + (* Now the function name *) + ++ (let ed = self#pExp () e in + match e with + Lval(Var _, _) -> ed + | _ -> text "(" ++ ed ++ text ")") + ++ text "(" ++ + (align + (* Now the arguments *) + ++ (docList ~sep:(chr ',' ++ break) + (self#pExp ()) () args) + ++ unalign) + ++ text (")" ^ printInstrTerminator) + + | Asm(attrs, tmpls, outs, ins, clobs, l) -> + if !msvcMode then + self#pLineDirective l + ++ text "__asm {" + ++ (align + ++ (docList ~sep:line text () tmpls) + ++ unalign) + ++ text ("}" ^ printInstrTerminator) + else + self#pLineDirective l + ++ text ("__asm__ ") + ++ self#pAttrs () attrs + ++ text " (" + ++ (align + ++ (docList ~sep:line + (fun x -> text ("\"" ^ escape_string x ^ "\"")) + () tmpls) + ++ + (if outs = [] && ins = [] && clobs = [] then + chr ':' + else + (text ": " + ++ (docList ~sep:(chr ',' ++ break) + (fun (c, lv) -> + text ("\"" ^ escape_string c ^ "\" (") + ++ self#pLval () lv + ++ text ")") () outs))) + ++ + (if ins = [] && clobs = [] then + nil + else + (text ": " + ++ (docList ~sep:(chr ',' ++ break) + (fun (c, e) -> + text ("\"" ^ escape_string c ^ "\" (") + ++ self#pExp () e + ++ text ")") () ins))) + ++ + (if clobs = [] then nil + else + (text ": " + ++ (docList ~sep:(chr ',' ++ break) + (fun c -> text ("\"" ^ escape_string c ^ "\"")) + () + clobs))) + ++ unalign) + ++ text (")" ^ printInstrTerminator) + + + (**** STATEMENTS ****) + method pStmt () (s:stmt) = (* control-flow statement *) + self#pStmtNext invalidStmt () s + + method dStmt (out: out_channel) (ind: int) (s:stmt) : unit = + fprint out !lineLength (indent ind (self#pStmt () s)) + + method dBlock (out: out_channel) (ind: int) (b:block) : unit = + fprint out !lineLength (indent ind (align ++ self#pBlock () b)) + + method private pStmtNext (next: stmt) () (s: stmt) = + (* print the labels *) + ((docList ~sep:line (fun l -> self#pLabel () l)) () s.labels) + (* print the statement itself. If the labels are non-empty and the + * statement is empty, print a semicolon *) + ++ + (if s.skind = Instr [] && s.labels <> [] then + text ";" + else + (if s.labels <> [] then line else nil) + ++ self#pStmtKind next () s.skind) + + method private pLabel () = function + Label (s, _, true) -> text (s ^ ": ") + | Label (s, _, false) -> text (s ^ ": /* CIL Label */ ") + | Case (e, _) -> text "case " ++ self#pExp () e ++ text ": " + | Default _ -> text "default: " + + (* The pBlock will put the unalign itself *) + method pBlock () (blk: block) = + let rec dofirst () = function + [] -> nil + | [x] -> self#pStmtNext invalidStmt () x + | x :: rest -> dorest nil x rest + and dorest acc prev = function + [] -> acc ++ (self#pStmtNext invalidStmt () prev) + | x :: rest -> + dorest (acc ++ (self#pStmtNext x () prev) ++ line) + x rest + in + (* Let the host of the block decide on the alignment. The d_block will + * pop the alignment as well *) + text "{" + ++ + (if blk.battrs <> [] then + self#pAttrsGen true blk.battrs + else nil) + ++ line + ++ (dofirst () blk.bstmts) + ++ unalign ++ line ++ text "}" + + + (* Store here the name of the last file printed in a line number. This is + * private to the object *) + val mutable lastFileName = "" + (* Make sure that you only call self#pLineDirective on an empty line *) + method pLineDirective ?(forcefile=false) l = + currentLoc := l; + match !lineDirectiveStyle with + | Some style when l.line > 0 -> + let directive = + match style with + | LineComment -> text "//#line " + | LinePreprocessorOutput when not !msvcMode -> chr '#' + | _ -> text "#line" + in + let filename = + if forcefile || l.file <> lastFileName then + begin + lastFileName <- l.file; + text " \"" ++ text l.file ++ text "\"" + end + else + nil + in + leftflush ++ directive ++ chr ' ' ++ num l.line ++ filename ++ line + | _ -> + nil + + + method private pStmtKind (next: stmt) () = function + Return(None, l) -> + self#pLineDirective l + ++ text "return;" + + | Return(Some e, l) -> + self#pLineDirective l + ++ text "return (" + ++ self#pExp () e + ++ text ");" + + | Goto (sref, l) -> begin + (* Grab one of the labels *) + let rec pickLabel = function + [] -> None + | Label (l, _, _) :: _ -> Some l + | _ :: rest -> pickLabel rest + in + match pickLabel !sref.labels with + Some l -> text ("goto " ^ l ^ ";") + | None -> + ignore (error "Cannot find label for target of goto\n"); + text "goto __invalid_label;" + end + + | Break l -> + self#pLineDirective l + ++ text "break;" + + | Continue l -> + self#pLineDirective l + ++ text "continue;" + + | Instr il -> + align + ++ (docList ~sep:line (fun i -> self#pInstr () i) () il) + ++ unalign + + | If(be,t,{bstmts=[];battrs=[]},l) when not !printCilAsIs -> + self#pLineDirective l + ++ text "if" + ++ (align + ++ text " (" + ++ self#pExp () be + ++ text ") " + ++ self#pBlock () t) + + | If(be,t,{bstmts=[{skind=Goto(gref,_);labels=[]}]; + battrs=[]},l) + when !gref == next && not !printCilAsIs -> + self#pLineDirective l + ++ text "if" + ++ (align + ++ text " (" + ++ self#pExp () be + ++ text ") " + ++ self#pBlock () t) + + | If(be,{bstmts=[];battrs=[]},e,l) when not !printCilAsIs -> + self#pLineDirective l + ++ text "if" + ++ (align + ++ text " (" + ++ self#pExp () (UnOp(LNot,be,intType)) + ++ text ") " + ++ self#pBlock () e) + + | If(be,{bstmts=[{skind=Goto(gref,_);labels=[]}]; + battrs=[]},e,l) + when !gref == next && not !printCilAsIs -> + self#pLineDirective l + ++ text "if" + ++ (align + ++ text " (" + ++ self#pExp () (UnOp(LNot,be,intType)) + ++ text ") " + ++ self#pBlock () e) + + | If(be,t,e,l) -> + self#pLineDirective l + ++ (align + ++ text "if" + ++ (align + ++ text " (" + ++ self#pExp () be + ++ text ") " + ++ self#pBlock () t) + ++ text " " (* sm: indent next code 2 spaces (was 4) *) + ++ (align + ++ text "else " + ++ self#pBlock () e) + ++ unalign) + + | Switch(e,b,_,l) -> + self#pLineDirective l + ++ (align + ++ text "switch (" + ++ self#pExp () e + ++ text ") " + ++ self#pBlock () b) + +(* + | Loop(b, l, _, _) -> begin + (* Maybe the first thing is a conditional. Turn it into a WHILE *) + try + let term, bodystmts = + let rec skipEmpty = function + [] -> [] + | {skind=Instr [];labels=[]} :: rest -> skipEmpty rest + | x -> x + in + (* Bill McCloskey: Do not remove the If if it has labels *) + match skipEmpty b.bstmts with + {skind=If(e,tb,fb,_); labels=[]} :: rest + when not !printCilAsIs -> begin + match skipEmpty tb.bstmts, skipEmpty fb.bstmts with + [], {skind=Break _; labels=[]} :: _ -> e, rest + | {skind=Break _; labels=[]} :: _, [] + -> UnOp(LNot, e, intType), rest + | _ -> raise Not_found + end + | _ -> raise Not_found + in + self#pLineDirective l + ++ text "wh" + ++ (align + ++ text "ile (" + ++ self#pExp () term + ++ text ") " + ++ self#pBlock () {bstmts=bodystmts; battrs=b.battrs}) + + with Not_found -> + self#pLineDirective l + ++ text "wh" + ++ (align + ++ text "ile (1) " + ++ self#pBlock () b) + end +*) + + | While (e, b, l) -> + self#pLineDirective l + ++ (align + ++ text "while (" + ++ self#pExp () e + ++ text ") " + ++ self#pBlock () b) + + | DoWhile (e, b, l) -> + self#pLineDirective l + ++ (align + ++ text "do " + ++ self#pBlock () b + ++ text " while (" + ++ self#pExp () e + ++ text ");") + + | For (bInit, e, bIter, b, l) -> + ignore (E.warn + "in for loops, the 1st and 3rd expressions are not printed"); + self#pLineDirective l + ++ (align + ++ text "for (" + ++ text "/* ??? */" (* self#pBlock () bInit *) + ++ text "; " + ++ self#pExp () e + ++ text "; " + ++ text "/* ??? */" (* self#pBlock() bIter *) + ++ text ") " + ++ self#pBlock () b) + + | Block b -> align ++ self#pBlock () b + + | TryFinally (b, h, l) -> + self#pLineDirective l + ++ text "__try " + ++ align + ++ self#pBlock () b + ++ text " __fin" ++ align ++ text "ally " + ++ self#pBlock () h + + | TryExcept (b, (il, e), h, l) -> + self#pLineDirective l + ++ text "__try " + ++ align + ++ self#pBlock () b + ++ text " __e" ++ align ++ text "xcept(" ++ line + ++ align + (* Print the instructions but with a comma at the end, instead of + * semicolon *) + ++ (printInstrTerminator <- ","; + let res = + (docList ~sep:line (self#pInstr ()) + () il) + in + printInstrTerminator <- ";"; + res) + ++ self#pExp () e + ++ text ") " ++ unalign + ++ self#pBlock () h + + + (*** GLOBALS ***) + method pGlobal () (g:global) : doc = (* global (vars, types, etc.) *) + match g with + | GFun (fundec, l) -> + (* If the function has attributes then print a prototype because + * GCC cannot accept function attributes in a definition *) + let oldattr = fundec.svar.vattr in + (* Always pring the file name before function declarations *) + let proto = + if oldattr <> [] then + (self#pLineDirective l) ++ (self#pVDecl () fundec.svar) + ++ chr ';' ++ line + else nil in + (* Temporarily remove the function attributes *) + fundec.svar.vattr <- []; + let body = (self#pLineDirective ~forcefile:true l) + ++ (self#pFunDecl () fundec) in + fundec.svar.vattr <- oldattr; + proto ++ body ++ line + + | GType (typ, l) -> + self#pLineDirective ~forcefile:true l ++ + text "typedef " + ++ (self#pType (Some (text typ.tname)) () typ.ttype) + ++ text ";\n" + + | GEnumTag (enum, l) -> + self#pLineDirective l ++ + text "enum" ++ align ++ text (" " ^ enum.ename) ++ + text " {" ++ line + ++ (docList ~sep:(chr ',' ++ line) + (fun (n,i, loc) -> + text (n ^ " = ") + ++ self#pExp () i) + () enum.eitems) + ++ unalign ++ line ++ text "} " + ++ self#pAttrs () enum.eattr ++ text";\n" + + | GEnumTagDecl (enum, l) -> (* This is a declaration of a tag *) + self#pLineDirective l ++ + text ("enum " ^ enum.ename ^ ";\n") + + | GCompTag (comp, l) -> (* This is a definition of a tag *) + let n = comp.cname in + let su, su1, su2 = + if comp.cstruct then "struct", "str", "uct" + else "union", "uni", "on" + in + let sto_mod, rest_attr = separateStorageModifiers comp.cattr in + self#pLineDirective ~forcefile:true l ++ + text su1 ++ (align ++ text su2 ++ chr ' ' ++ (self#pAttrs () sto_mod) + ++ text n + ++ text " {" ++ line + ++ ((docList ~sep:line (self#pFieldDecl ())) () + comp.cfields) + ++ unalign) + ++ line ++ text "}" ++ + (self#pAttrs () rest_attr) ++ text ";\n" + + | GCompTagDecl (comp, l) -> (* This is a declaration of a tag *) + self#pLineDirective l ++ + text (compFullName comp) ++ text ";\n" + + | GVar (vi, io, l) -> + self#pLineDirective ~forcefile:true l ++ + self#pVDecl () vi + ++ chr ' ' + ++ (match io.init with + None -> nil + | Some i -> text " = " ++ + (let islong = + match i with + CompoundInit (_, il) when List.length il >= 8 -> true + | _ -> false + in + if islong then + line ++ self#pLineDirective l ++ text " " + else nil) ++ + (self#pInit () i)) + ++ text ";\n" + + (* print global variable 'extern' declarations, and function prototypes *) + | GVarDecl (vi, l) -> + self#pLineDirective l ++ + (self#pVDecl () vi) + ++ text ";\n" + + | GAsm (s, l) -> + self#pLineDirective l ++ + text ("__asm__(\"" ^ escape_string s ^ "\");\n") + + | GPragma (Attr(an, args), l) -> + (* sm: suppress printing pragmas that gcc does not understand *) + (* assume anything starting with "ccured" is ours *) + (* also don't print the 'combiner' pragma *) + (* nor 'cilnoremove' *) + let suppress = + not !print_CIL_Input && + not !msvcMode && + ((startsWith "box" an) || + (startsWith "ccured" an) || + (an = "merger") || + (an = "cilnoremove")) in + let d = + match an, args with + | _, [] -> + text an + | "weak", [ACons (symbol, [])] -> + text "weak " ++ text symbol + | _ -> + text (an ^ "(") + ++ docList ~sep:(chr ',') (self#pAttrParam ()) () args + ++ text ")" + in + self#pLineDirective l + ++ (if suppress then text "/* " else text "") + ++ (text "#pragma ") + ++ d + ++ (if suppress then text " */\n" else text "\n") + + | GText s -> + if s <> "//" then + text s ++ text "\n" + else + nil + + + method dGlobal (out: out_channel) (g: global) : unit = + (* For all except functions and variable with initializers, use the + * pGlobal *) + match g with + GFun (fdec, l) -> + (* If the function has attributes then print a prototype because + * GCC cannot accept function attributes in a definition *) + let oldattr = fdec.svar.vattr in + let proto = + if oldattr <> [] then + (self#pLineDirective l) ++ (self#pVDecl () fdec.svar) + ++ chr ';' ++ line + else nil in + fprint out !lineLength + (proto ++ (self#pLineDirective ~forcefile:true l)); + (* Temporarily remove the function attributes *) + fdec.svar.vattr <- []; + fprint out !lineLength (self#pFunDecl () fdec); + fdec.svar.vattr <- oldattr; + output_string out "\n" + + | GVar (vi, {init = Some i}, l) -> begin + fprint out !lineLength + (self#pLineDirective ~forcefile:true l ++ + self#pVDecl () vi + ++ text " = " + ++ (let islong = + match i with + CompoundInit (_, il) when List.length il >= 8 -> true + | _ -> false + in + if islong then + line ++ self#pLineDirective l ++ text " " + else nil)); + self#dInit out 3 i; + output_string out ";\n" + end + + | g -> fprint out !lineLength (self#pGlobal () g) + + method pFieldDecl () fi = + (self#pType + (Some (text (if fi.fname = missingFieldName then "" else fi.fname))) + () + fi.ftype) + ++ text " " + ++ (match fi.fbitfield with None -> nil + | Some i -> text ": " ++ num i ++ text " ") + ++ self#pAttrs () fi.fattr + ++ text ";" + + method private pFunDecl () f = + self#pVDecl () f.svar + ++ line + ++ text "{ " + ++ (align + (* locals. *) + ++ (docList ~sep:line (fun vi -> self#pVDecl () vi ++ text ";") + () f.slocals) + ++ line ++ line + (* the body *) + ++ ((* remember the declaration *) currentFormals <- f.sformals; + let body = self#pBlock () f.sbody in + currentFormals <- []; + body)) + ++ line + ++ text "}" + + (***** PRINTING DECLARATIONS and TYPES ****) + + method pType (nameOpt: doc option) (* Whether we are declaring a name or + * we are just printing a type *) + () (t:typ) = (* use of some type *) + let name = match nameOpt with None -> nil | Some d -> d in + let printAttributes (a: attributes) = + let pa = self#pAttrs () a in + match nameOpt with + | None when not !print_CIL_Input && not !msvcMode -> + (* Cannot print the attributes in this case because gcc does not + * like them here, except if we are printing for CIL, or for MSVC. + * In fact, for MSVC we MUST print attributes such as __stdcall *) + if pa = nil then nil else + text "/*" ++ pa ++ text "*/" + | _ -> pa + in + match t with + TVoid a -> + text "void" + ++ self#pAttrs () a + ++ text " " + ++ name + + | TInt (ikind,a) -> + d_ikind () ikind + ++ self#pAttrs () a + ++ text " " + ++ name + + | TFloat(fkind, a) -> + d_fkind () fkind + ++ self#pAttrs () a + ++ text " " + ++ name + + | TComp (comp, a) -> (* A reference to a struct *) + let su = if comp.cstruct then "struct" else "union" in + text (su ^ " " ^ comp.cname ^ " ") + ++ self#pAttrs () a + ++ name + + | TEnum (enum, a) -> + text ("enum " ^ enum.ename ^ " ") + ++ self#pAttrs () a + ++ name + | TPtr (bt, a) -> + (* Parenthesize the ( * attr name) if a pointer to a function or an + * array. However, on MSVC the __stdcall modifier must appear right + * before the pointer constructor "(__stdcall *f)". We push them into + * the parenthesis. *) + let (paren: doc option), (bt': typ) = + match bt with + TFun(rt, args, isva, fa) when !msvcMode -> + let an, af', at = partitionAttributes ~default:AttrType fa in + (* We take the af' and we put them into the parentheses *) + Some (text "(" ++ printAttributes af'), + TFun(rt, args, isva, addAttributes an at) + + | TFun _ | TArray _ -> Some (text "("), bt + + | _ -> None, bt + in + let name' = text "*" ++ printAttributes a ++ name in + let name'' = (* Put the parenthesis *) + match paren with + Some p -> p ++ name' ++ text ")" + | _ -> name' + in + self#pType + (Some name'') + () + bt' + + | TArray (elemt, lo, a) -> + (* ignore the const attribute for arrays *) + let a' = dropAttributes [ "const" ] a in + let name' = + if a' == [] then name else + if nameOpt == None then printAttributes a' else + text "(" ++ printAttributes a' ++ name ++ text ")" + in + self#pType + (Some (name' + ++ text "[" + ++ (match lo with None -> nil | Some e -> self#pExp () e) + ++ text "]")) + () + elemt + + | TFun (restyp, args, isvararg, a) -> + let name' = + if a == [] then name else + if nameOpt == None then printAttributes a else + text "(" ++ printAttributes a ++ name ++ text ")" + in + self#pType + (Some + (name' + ++ text "(" + ++ (align + ++ + (if args = Some [] && isvararg then + text "..." + else + (if args = None then nil + else if args = Some [] then text "void" + else + let pArg (aname, atype, aattr) = + let stom, rest = separateStorageModifiers aattr in + (* First the storage modifiers *) + (self#pAttrs () stom) + ++ (self#pType (Some (text aname)) () atype) + ++ text " " + ++ self#pAttrs () rest + in + (docList ~sep:(chr ',' ++ break) pArg) () + (argsToList args)) + ++ (if isvararg then break ++ text ", ..." else nil)) + ++ unalign) + ++ text ")")) + () + restyp + + | TNamed (t, a) -> + text t.tname ++ self#pAttrs () a ++ text " " ++ name + + | TBuiltin_va_list a -> + text "__builtin_va_list" + ++ self#pAttrs () a + ++ text " " + ++ name + + + (**** PRINTING ATTRIBUTES *********) + method pAttrs () (a: attributes) = + self#pAttrsGen false a + + + (* Print one attribute. Return also an indication whether this attribute + * should be printed inside the __attribute__ list *) + method pAttr (Attr(an, args): attribute) : doc * bool = + (* Recognize and take care of some known cases *) + match an, args with + "const", [] -> text "const", false + (* Put the aconst inside the attribute list *) + | "aconst", [] when not !msvcMode -> text "__const__", true + | "thread", [] when not !msvcMode -> text "__thread", false +(* + | "used", [] when not !msvcMode -> text "__attribute_used__", false +*) + | "volatile", [] -> text "volatile", false + | "restrict", [] -> text "__restrict", false + | "missingproto", [] -> text "/* missing proto */", false + | "cdecl", [] when !msvcMode -> text "__cdecl", false + | "stdcall", [] when !msvcMode -> text "__stdcall", false + | "fastcall", [] when !msvcMode -> text "__fastcall", false + | "declspec", args when !msvcMode -> + text "__declspec(" + ++ docList (self#pAttrParam ()) () args + ++ text ")", false + | "w64", [] when !msvcMode -> text "__w64", false + | "asm", args -> + text "__asm__(" + ++ docList (self#pAttrParam ()) () args + ++ text ")", false + (* we suppress printing mode(__si__) because it triggers an *) + (* internal compiler error in all current gcc versions *) + (* sm: I've now encountered a problem with mode(__hi__)... *) + (* I don't know what's going on, but let's try disabling all "mode"..*) + | "mode", [ACons(tag,[])] -> + text "/* mode(" ++ text tag ++ text ") */", false + + (* sm: also suppress "format" because we seem to print it in *) + (* a way gcc does not like *) + | "format", _ -> text "/* format attribute */", false + + (* sm: here's another one I don't want to see gcc warnings about.. *) + | "mayPointToStack", _ when not !print_CIL_Input + (* [matth: may be inside another comment.] + -> text "/*mayPointToStack*/", false + *) + -> text "", false + + | _ -> (* This is the dafault case *) + (* Add underscores to the name *) + let an' = if !msvcMode then "__" ^ an else "__" ^ an ^ "__" in + if args = [] then + text an', true + else + text (an' ^ "(") + ++ (docList (self#pAttrParam ()) () args) + ++ text ")", + true + + method pAttrParam () = function + | AInt n -> num n + | AStr s -> text ("\"" ^ escape_string s ^ "\"") + | ACons(s, []) -> text s + | ACons(s,al) -> + text (s ^ "(") + ++ (docList (self#pAttrParam ()) () al) + ++ text ")" + | ASizeOfE a -> text "sizeof(" ++ self#pAttrParam () a ++ text ")" + | ASizeOf t -> text "sizeof(" ++ self#pType None () t ++ text ")" + | ASizeOfS ts -> text "sizeof()" + | AAlignOfE a -> text "__alignof__(" ++ self#pAttrParam () a ++ text ")" + | AAlignOf t -> text "__alignof__(" ++ self#pType None () t ++ text ")" + | AAlignOfS ts -> text "__alignof__()" + | AUnOp(u,a1) -> + (d_unop () u) ++ text " (" ++ (self#pAttrParam () a1) ++ text ")" + + | ABinOp(b,a1,a2) -> + align + ++ text "(" + ++ (self#pAttrParam () a1) + ++ text ") " + ++ (d_binop () b) + ++ break + ++ text " (" ++ (self#pAttrParam () a2) ++ text ") " + ++ unalign + | ADot (ap, s) -> (self#pAttrParam () ap) ++ text ("." ^ s) + + (* A general way of printing lists of attributes *) + method private pAttrsGen (block: bool) (a: attributes) = + (* Scan all the attributes and separate those that must be printed inside + * the __attribute__ list *) + let rec loop (in__attr__: doc list) = function + [] -> begin + match in__attr__ with + [] -> nil + | _ :: _-> + (* sm: added 'forgcc' calls to not comment things out + * if CIL is the consumer; this is to address a case + * Daniel ran into where blockattribute(nobox) was being + * dropped by the merger + *) + (if block then + text (" " ^ (forgcc "/*") ^ " __blockattribute__(") + else + text "__attribute__((") + + ++ (docList ~sep:(chr ',' ++ break) + (fun a -> a)) () in__attr__ + ++ text ")" + ++ (if block then text (forgcc "*/") else text ")") + end + | x :: rest -> + let dx, ina = self#pAttr x in + if ina then + loop (dx :: in__attr__) rest + else + dx ++ text " " ++ loop in__attr__ rest + in + let res = loop [] a in + if res = nil then + res + else + text " " ++ res ++ text " " + +end (* class defaultCilPrinterClass *) + +let defaultCilPrinter = new defaultCilPrinterClass + +(* Top-level printing functions *) +let printType (pp: cilPrinter) () (t: typ) : doc = + pp#pType None () t + +let printExp (pp: cilPrinter) () (e: exp) : doc = + pp#pExp () e + +let printLval (pp: cilPrinter) () (lv: lval) : doc = + pp#pLval () lv + +let printGlobal (pp: cilPrinter) () (g: global) : doc = + pp#pGlobal () g + +let dumpGlobal (pp: cilPrinter) (out: out_channel) (g: global) : unit = + pp#dGlobal out g + +let printAttr (pp: cilPrinter) () (a: attribute) : doc = + let ad, _ = pp#pAttr a in ad + +let printAttrs (pp: cilPrinter) () (a: attributes) : doc = + pp#pAttrs () a + +let printInstr (pp: cilPrinter) () (i: instr) : doc = + pp#pInstr () i + +let printStmt (pp: cilPrinter) () (s: stmt) : doc = + pp#pStmt () s + +let printBlock (pp: cilPrinter) () (b: block) : doc = + (* We must add the alignment ourselves, beucase pBlock will pop it *) + align ++ pp#pBlock () b + +let dumpStmt (pp: cilPrinter) (out: out_channel) (ind: int) (s: stmt) : unit = + pp#dStmt out ind s + +let dumpBlock (pp: cilPrinter) (out: out_channel) (ind: int) (b: block) : unit = + pp#dBlock out ind b + +let printInit (pp: cilPrinter) () (i: init) : doc = + pp#pInit () i + +let dumpInit (pp: cilPrinter) (out: out_channel) (ind: int) (i: init) : unit = + pp#dInit out ind i + +(* Now define some short cuts *) +let d_exp () e = printExp defaultCilPrinter () e +let _ = pd_exp := d_exp +let d_lval () lv = printLval defaultCilPrinter () lv +let d_offset base () off = defaultCilPrinter#pOffset base off +let d_init () i = printInit defaultCilPrinter () i +let d_type () t = printType defaultCilPrinter () t +let d_global () g = printGlobal defaultCilPrinter () g +let d_attrlist () a = printAttrs defaultCilPrinter () a +let d_attr () a = printAttr defaultCilPrinter () a +let d_attrparam () e = defaultCilPrinter#pAttrParam () e +let d_label () l = defaultCilPrinter#pLabel () l +let d_stmt () s = printStmt defaultCilPrinter () s +let d_block () b = printBlock defaultCilPrinter () b +let d_instr () i = printInstr defaultCilPrinter () i + +let d_shortglobal () = function + GPragma (Attr(an, _), _) -> dprintf "#pragma %s" an + | GType (ti, _) -> dprintf "typedef %s" ti.tname + | GVarDecl (vi, _) -> dprintf "declaration of %s" vi.vname + | GVar (vi, _, _) -> dprintf "definition of %s" vi.vname + | GCompTag(ci,_) -> dprintf "definition of %s" (compFullName ci) + | GCompTagDecl(ci,_) -> dprintf "declaration of %s" (compFullName ci) + | GEnumTag(ei,_) -> dprintf "definition of enum %s" ei.ename + | GEnumTagDecl(ei,_) -> dprintf "declaration of enum %s" ei.ename + | GFun(fd, _) -> dprintf "definition of %s" fd.svar.vname + | GText _ -> text "GText" + | GAsm _ -> text "GAsm" + + +(* sm: given an ordinary CIL object printer, yield one which + * behaves the same, except it never prints #line directives + * (this is useful for debugging printfs) *) +let dn_obj (func: unit -> 'a -> doc) : (unit -> 'a -> doc) = +begin + (* construct the closure to return *) + let theFunc () (obj:'a) : doc = + begin + let prevStyle = !lineDirectiveStyle in + lineDirectiveStyle := None; + let ret = (func () obj) in (* call underlying printer *) + lineDirectiveStyle := prevStyle; + ret + end in + theFunc +end + +(* now define shortcuts for the non-location-printing versions, + * with the naming prefix "dn_" *) +let dn_exp = (dn_obj d_exp) +let dn_lval = (dn_obj d_lval) +(* dn_offset is missing because it has a different interface *) +let dn_init = (dn_obj d_init) +let dn_type = (dn_obj d_type) +let dn_global = (dn_obj d_global) +let dn_attrlist = (dn_obj d_attrlist) +let dn_attr = (dn_obj d_attr) +let dn_attrparam = (dn_obj d_attrparam) +let dn_stmt = (dn_obj d_stmt) +let dn_instr = (dn_obj d_instr) + + +(* Now define a cilPlainPrinter *) +class plainCilPrinterClass = + (* We keep track of the composite types that we have done to avoid + * recursion *) + let donecomps : (int, unit) H.t = H.create 13 in + object (self) + + inherit defaultCilPrinterClass as super + + (*** PLAIN TYPES ***) + method pType (dn: doc option) () (t: typ) = + match dn with + None -> self#pOnlyType () t + | Some d -> d ++ text " : " ++ self#pOnlyType () t + + method private pOnlyType () = function + TVoid a -> dprintf "TVoid(@[%a@])" self#pAttrs a + | TInt(ikind, a) -> dprintf "TInt(@[%a,@?%a@])" + d_ikind ikind self#pAttrs a + | TFloat(fkind, a) -> + dprintf "TFloat(@[%a,@?%a@])" d_fkind fkind self#pAttrs a + | TNamed (t, a) -> + dprintf "TNamed(@[%s,@?%a,@?%a@])" + t.tname self#pOnlyType t.ttype self#pAttrs a + | TPtr(t, a) -> dprintf "TPtr(@[%a,@?%a@])" self#pOnlyType t self#pAttrs a + | TArray(t,l,a) -> + let dl = match l with + None -> text "None" | Some l -> dprintf "Some(@[%a@])" self#pExp l in + dprintf "TArray(@[%a,@?%a,@?%a@])" + self#pOnlyType t insert dl self#pAttrs a + | TEnum(enum,a) -> dprintf "Enum(%s,@[%a@])" enum.ename self#pAttrs a + | TFun(tr,args,isva,a) -> + dprintf "TFun(@[%a,@?%a%s,@?%a@])" + self#pOnlyType tr + insert + (if args = None then text "None" + else (docList ~sep:(chr ',' ++ break) + (fun (an,at,aa) -> + dprintf "%s: %a" an self#pOnlyType at)) + () + (argsToList args)) + (if isva then "..." else "") self#pAttrs a + | TComp (comp, a) -> + if H.mem donecomps comp.ckey then + dprintf "TCompLoop(%s %s, _, %a)" + (if comp.cstruct then "struct" else "union") comp.cname + self#pAttrs comp.cattr + else begin + H.add donecomps comp.ckey (); (* Add it before we do the fields *) + dprintf "TComp(@[%s %s,@?%a,@?%a,@?%a@])" + (if comp.cstruct then "struct" else "union") comp.cname + (docList ~sep:(chr ',' ++ break) + (fun f -> dprintf "%s : %a" f.fname self#pOnlyType f.ftype)) + comp.cfields + self#pAttrs comp.cattr + self#pAttrs a + end + | TBuiltin_va_list a -> + dprintf "TBuiltin_va_list(%a)" self#pAttrs a + + + (* Some plain pretty-printers. Unlike the above these expose all the + * details of the internal representation *) + method pExp () = function + Const(c) -> + let d_plainconst () c = + match c with + CInt64(i, ik, so) -> + dprintf "Int64(%s,%a,%s)" + (Int64.format "%d" i) + d_ikind ik + (match so with Some s -> s | _ -> "None") + | CStr(s) -> + text ("CStr(\"" ^ escape_string s ^ "\")") + | CWStr(s) -> + dprintf "CWStr(%a)" d_const c + + | CChr(c) -> text ("CChr('" ^ escape_char c ^ "')") + | CReal(f, fk, so) -> + dprintf "CReal(%f, %a, %s)" + f + d_fkind fk + (match so with Some s -> s | _ -> "None") + | CEnum(_, s, _) -> text s + in + text "Const(" ++ d_plainconst () c ++ text ")" + + + | Lval(lv) -> + text "Lval(" + ++ (align + ++ self#pLval () lv + ++ unalign) + ++ text ")" + + | CastE(t,e) -> dprintf "CastE(@[%a,@?%a@])" self#pOnlyType t self#pExp e + + | UnOp(u,e1,_) -> + dprintf "UnOp(@[%a,@?%a@])" + d_unop u self#pExp e1 + + | BinOp(b,e1,e2,_) -> + let d_plainbinop () b = + match b with + PlusA -> text "PlusA" + | PlusPI -> text "PlusPI" + | IndexPI -> text "IndexPI" + | MinusA -> text "MinusA" + | MinusPP -> text "MinusPP" + | MinusPI -> text "MinusPI" + | _ -> d_binop () b + in + dprintf "%a(@[%a,@?%a@])" d_plainbinop b + self#pExp e1 self#pExp e2 + + | SizeOf (t) -> + text "sizeof(" ++ self#pType None () t ++ chr ')' + | SizeOfE (e) -> + text "sizeofE(" ++ self#pExp () e ++ chr ')' + | SizeOfStr (s) -> + text "sizeofStr(" ++ d_const () (CStr s) ++ chr ')' + | AlignOf (t) -> + text "__alignof__(" ++ self#pType None () t ++ chr ')' + | AlignOfE (e) -> + text "__alignof__(" ++ self#pExp () e ++ chr ')' + + | StartOf lv -> dprintf "StartOf(%a)" self#pLval lv + | AddrOf (lv) -> dprintf "AddrOf(%a)" self#pLval lv + + + + method private d_plainoffset () = function + NoOffset -> text "NoOffset" + | Field(fi,o) -> + dprintf "Field(@[%s:%a,@?%a@])" + fi.fname self#pOnlyType fi.ftype self#d_plainoffset o + | Index(e, o) -> + dprintf "Index(@[%a,@?%a@])" self#pExp e self#d_plainoffset o + + method pInit () = function + SingleInit e -> dprintf "SI(%a)" d_exp e + | CompoundInit (t, initl) -> + let d_plainoneinit (o, i) = + self#d_plainoffset () o ++ text " = " ++ self#pInit () i + in + dprintf "CI(@[%a,@?%a@])" self#pOnlyType t + (docList ~sep:(chr ',' ++ break) d_plainoneinit) initl +(* + | ArrayInit (t, len, initl) -> + let idx = ref (- 1) in + let d_plainoneinit i = + incr idx; + text "[" ++ num !idx ++ text "] = " ++ self#pInit () i + in + dprintf "AI(@[%a,%d,@?%a@])" self#pOnlyType t len + (docList ~sep:(chr ',' ++ break) d_plainoneinit) initl +*) + method pLval () (lv: lval) = + match lv with + | Var vi, o -> dprintf "Var(@[%s,@?%a@])" vi.vname self#d_plainoffset o + | Mem e, o -> dprintf "Mem(@[%a,@?%a@])" self#pExp e self#d_plainoffset o + + +end +let plainCilPrinter = new plainCilPrinterClass + +(* And now some shortcuts *) +let d_plainexp () e = plainCilPrinter#pExp () e +let d_plaintype () t = plainCilPrinter#pType None () t +let d_plaininit () i = plainCilPrinter#pInit () i +let d_plainlval () l = plainCilPrinter#pLval () l + +(* zra: this allows pretty printers not in cil.ml to + be exposed to cilmain.ml *) +let printerForMaincil = ref defaultCilPrinter + +let rec d_typsig () = function + TSArray (ts, eo, al) -> + dprintf "TSArray(@[%a,@?%a,@?%a@])" + d_typsig ts + insert (text (match eo with None -> "None" + | Some e -> "Some " ^ Int64.to_string e)) + d_attrlist al + | TSPtr (ts, al) -> + dprintf "TSPtr(@[%a,@?%a@])" + d_typsig ts d_attrlist al + | TSComp (iss, name, al) -> + dprintf "TSComp(@[%s %s,@?%a@])" + (if iss then "struct" else "union") name + d_attrlist al + | TSFun (rt, args, isva, al) -> + dprintf "TSFun(@[%a,@?%a,%b,@?%a@])" + d_typsig rt + (docList ~sep:(chr ',' ++ break) (d_typsig ())) args isva + d_attrlist al + | TSEnum (n, al) -> + dprintf "TSEnum(@[%s,@?%a@])" + n d_attrlist al + | TSBase t -> dprintf "TSBase(%a)" d_type t + + +let newVID () = + let t = !nextGlobalVID in + incr nextGlobalVID; + t + + (* Make a varinfo. Used mostly as a helper function below *) +let makeVarinfo global name typ = + (* Strip const from type for locals *) + let vi = + { vname = name; + vid = newVID (); + vglob = global; + vtype = if global then typ else typeRemoveAttributes ["const"] typ; + vdecl = lu; + vinline = false; + vattr = []; + vstorage = NoStorage; + vaddrof = false; + vreferenced = false; (* sm *) + } in + vi + +let copyVarinfo (vi: varinfo) (newname: string) : varinfo = + let vi' = {vi with vname = newname; vid = newVID () } in + vi' + +let makeLocal fdec name typ = (* a helper function *) + fdec.smaxid <- 1 + fdec.smaxid; + let vi = makeVarinfo false name typ in + vi + + (* Make a local variable and add it to a function *) +let makeLocalVar fdec ?(insert = true) name typ = + let vi = makeLocal fdec name typ in + if insert then fdec.slocals <- fdec.slocals @ [vi]; + vi + + +let makeTempVar fdec ?(name = "__cil_tmp") typ : varinfo = + let name = name ^ (string_of_int (1 + fdec.smaxid)) in + makeLocalVar fdec name typ + + + (* Set the formals and re-create the function name based on the information*) +let setFormals (f: fundec) (forms: varinfo list) = + f.sformals <- forms; (* Set the formals *) + match unrollType f.svar.vtype with + TFun(rt, _, isva, fa) -> + f.svar.vtype <- + TFun(rt, + Some (List.map (fun a -> (a.vname, a.vtype, a.vattr)) forms), + isva, fa) + | _ -> E.s (E.bug "Set formals. %s does not have function type\n" + f.svar.vname) + + (* Set the types of arguments and results as given by the function type + * passed as the second argument *) +let setFunctionType (f: fundec) (t: typ) = + match unrollType t with + TFun (rt, Some args, va, a) -> + if List.length f.sformals <> List.length args then + E.s (E.bug "setFunctionType: number of arguments differs from the number of formals"); + (* Change the function type. *) + f.svar.vtype <- t; + (* Change the sformals and we know that indirectly we'll change the + * function type *) + List.iter2 + (fun (an,at,aa) f -> + f.vtype <- at; f.vattr <- aa) + args f.sformals + + | _ -> E.s (E.bug "setFunctionType: not a function type") + + + (* Set the types of arguments and results as given by the function type + * passed as the second argument *) +let setFunctionTypeMakeFormals (f: fundec) (t: typ) = + match unrollType t with + TFun (rt, Some args, va, a) -> + if f.sformals <> [] then + E.s (E.warn "setFunctionTypMakeFormals called on function %s with some formals already" + f.svar.vname); + (* Change the function type. *) + f.svar.vtype <- t; + f.sformals <- []; + + f.sformals <- List.map (fun (n,t,a) -> makeLocal f n t) args; + + setFunctionType f t + + | _ -> E.s (E.bug "setFunctionTypeMakeFormals: not a function type: %a" + d_type t) + + +let setMaxId (f: fundec) = + f.smaxid <- List.length f.sformals + List.length f.slocals + + + (* Make a formal variable for a function. Insert it in both the sformals + * and the type of the function. You can optionally specify where to insert + * this one. If where = "^" then it is inserted first. If where = "$" then + * it is inserted last. Otherwise where must be the name of a formal after + * which to insert this. By default it is inserted at the end. *) +let makeFormalVar fdec ?(where = "$") name typ : varinfo = + (* Search for the insertion place *) + let thenewone = ref fdec.svar in (* Just a placeholder *) + let makeit () : varinfo = + let vi = makeLocal fdec name typ in + thenewone := vi; + vi + in + let rec loopFormals = function + [] -> + if where = "$" then [makeit ()] + else E.s (E.error "makeFormalVar: cannot find insert-after formal %s" + where) + | f :: rest when f.vname = where -> f :: makeit () :: rest + | f :: rest -> f :: loopFormals rest + in + let newformals = + if where = "^" then makeit () :: fdec.sformals else + loopFormals fdec.sformals in + setFormals fdec newformals; + !thenewone + + (* Make a global variable. Your responsibility to make sure that the name + * is unique *) +let makeGlobalVar name typ = + let vi = makeVarinfo true name typ in + vi + + + (* Make an empty function *) +let emptyFunction name = + { svar = makeGlobalVar name (TFun(voidType, Some [], false,[])); + smaxid = 0; + slocals = []; + sformals = []; + sbody = mkBlock []; + smaxstmtid = None; + sallstmts = []; + } + + + + (* A dummy function declaration handy for initialization *) +let dummyFunDec = emptyFunction "@dummy" +let dummyFile = + { globals = []; + fileName = ""; + globinit = None; + globinitcalled = false;} + +let saveBinaryFile (cil_file : file) (filename : string) = + let outchan = open_out_bin filename in + Marshal.to_channel outchan cil_file [] ; + close_out outchan + +let saveBinaryFileChannel (cil_file : file) (outchan : out_channel) = + Marshal.to_channel outchan cil_file [] + +let loadBinaryFile (filename : string) : file = + let inchan = open_in_bin filename in + let cil_file = (Marshal.from_channel inchan : file) in + close_in inchan ; + cil_file + + +(* Take the name of a file and make a valid symbol name out of it. There are + * a few chanracters that are not valid in symbols *) +let makeValidSymbolName (s: string) = + let s = String.copy s in (* So that we can update in place *) + let l = String.length s in + for i = 0 to l - 1 do + let c = String.get s i in + let isinvalid = + match c with + '-' | '.' -> true + | _ -> false + in + if isinvalid then + String.set s i '_'; + done; + s + + +(*** Define the visiting engine ****) +(* visit all the nodes in a Cil expression *) +let doVisit (vis: cilVisitor) + (startvisit: 'a -> 'a visitAction) + (children: cilVisitor -> 'a -> 'a) + (node: 'a) : 'a = + let action = startvisit node in + match action with + SkipChildren -> node + | ChangeTo node' -> node' + | _ -> (* DoChildren and ChangeDoChildrenPost *) + let nodepre = match action with + ChangeDoChildrenPost (node', _) -> node' + | _ -> node + in + let nodepost = children vis nodepre in + match action with + ChangeDoChildrenPost (_, f) -> f nodepost + | _ -> nodepost + +(* mapNoCopy is like map but avoid copying the list if the function does not + * change the elements. *) +let rec mapNoCopy (f: 'a -> 'a) = function + [] -> [] + | (i :: resti) as li -> + let i' = f i in + let resti' = mapNoCopy f resti in + if i' != i || resti' != resti then i' :: resti' else li + +let rec mapNoCopyList (f: 'a -> 'a list) = function + [] -> [] + | (i :: resti) as li -> + let il' = f i in + let resti' = mapNoCopyList f resti in + match il' with + [i'] when i' == i && resti' == resti -> li + | _ -> il' @ resti' + +(* A visitor for lists *) +let doVisitList (vis: cilVisitor) + (startvisit: 'a -> 'a list visitAction) + (children: cilVisitor -> 'a -> 'a) + (node: 'a) : 'a list = + let action = startvisit node in + match action with + SkipChildren -> [node] + | ChangeTo nodes' -> nodes' + | _ -> + let nodespre = match action with + ChangeDoChildrenPost (nodespre, _) -> nodespre + | _ -> [node] + in + let nodespost = mapNoCopy (children vis) nodespre in + match action with + ChangeDoChildrenPost (_, f) -> f nodespost + | _ -> nodespost + +let debugVisit = false + +let rec visitCilExpr (vis: cilVisitor) (e: exp) : exp = + doVisit vis vis#vexpr childrenExp e +and childrenExp (vis: cilVisitor) (e: exp) : exp = + let vExp e = visitCilExpr vis e in + let vTyp t = visitCilType vis t in + let vLval lv = visitCilLval vis lv in + match e with + | Const (CEnum(v, s, ei)) -> + let v' = vExp v in + if v' != v then Const (CEnum(v', s, ei)) else e + + | Const _ -> e + | SizeOf t -> + let t'= vTyp t in + if t' != t then SizeOf t' else e + | SizeOfE e1 -> + let e1' = vExp e1 in + if e1' != e1 then SizeOfE e1' else e + | SizeOfStr s -> e + + | AlignOf t -> + let t' = vTyp t in + if t' != t then AlignOf t' else e + | AlignOfE e1 -> + let e1' = vExp e1 in + if e1' != e1 then AlignOfE e1' else e + | Lval lv -> + let lv' = vLval lv in + if lv' != lv then Lval lv' else e + | UnOp (uo, e1, t) -> + let e1' = vExp e1 in let t' = vTyp t in + if e1' != e1 || t' != t then UnOp(uo, e1', t') else e + | BinOp (bo, e1, e2, t) -> + let e1' = vExp e1 in let e2' = vExp e2 in let t' = vTyp t in + if e1' != e1 || e2' != e2 || t' != t then BinOp(bo, e1',e2',t') else e + | CastE (t, e1) -> + let t' = vTyp t in let e1' = vExp e1 in + if t' != t || e1' != e1 then CastE(t', e1') else e + | AddrOf lv -> + let lv' = vLval lv in + if lv' != lv then AddrOf lv' else e + | StartOf lv -> + let lv' = vLval lv in + if lv' != lv then StartOf lv' else e + +and visitCilInit (vis: cilVisitor) (i: init) : init = + doVisit vis vis#vinit childrenInit i +and childrenInit (vis: cilVisitor) (i: init) : init = + let fExp e = visitCilExpr vis e in + let fInit i = visitCilInit vis i in + let fTyp t = visitCilType vis t in + match i with + | SingleInit e -> + let e' = fExp e in + if e' != e then SingleInit e' else i + | CompoundInit (t, initl) -> + let t' = fTyp t in + (* Collect the new initializer list, in reverse. We prefer two + * traversals to ensure tail-recursion. *) + let newinitl : (offset * init) list ref = ref [] in + (* Keep track whether the list has changed *) + let hasChanged = ref false in + let doOneInit ((o, i) as oi) = + let o' = visitCilInitOffset vis o in (* use initializer version *) + let i' = fInit i in + let newio = + if o' != o || i' != i then + begin hasChanged := true; (o', i') end else oi + in + newinitl := newio :: !newinitl + in + List.iter doOneInit initl; + let initl' = if !hasChanged then List.rev !newinitl else initl in + if t' != t || initl' != initl then CompoundInit (t', initl') else i + +and visitCilLval (vis: cilVisitor) (lv: lval) : lval = + doVisit vis vis#vlval childrenLval lv +and childrenLval (vis: cilVisitor) (lv: lval) : lval = + (* and visit its subexpressions *) + let vExp e = visitCilExpr vis e in + let vOff off = visitCilOffset vis off in + match lv with + Var v, off -> + let v' = doVisit vis vis#vvrbl (fun _ x -> x) v in + let off' = vOff off in + if v' != v || off' != off then Var v', off' else lv + | Mem e, off -> + let e' = vExp e in + let off' = vOff off in + if e' != e || off' != off then Mem e', off' else lv + +and visitCilOffset (vis: cilVisitor) (off: offset) : offset = + doVisit vis vis#voffs childrenOffset off +and childrenOffset (vis: cilVisitor) (off: offset) : offset = + let vOff off = visitCilOffset vis off in + match off with + Field (f, o) -> + let o' = vOff o in + if o' != o then Field (f, o') else off + | Index (e, o) -> + let e' = visitCilExpr vis e in + let o' = vOff o in + if e' != e || o' != o then Index (e', o') else off + | NoOffset -> off + +(* sm: for offsets in initializers, the 'startvisit' will be the + * vinitoffs method, but we can re-use the childrenOffset from + * above since recursive offsets are visited by voffs. (this point + * is moot according to cil.mli which claims the offsets in + * initializers will never recursively contain offsets) + *) +and visitCilInitOffset (vis: cilVisitor) (off: offset) : offset = + doVisit vis vis#vinitoffs childrenOffset off + +and visitCilInstr (vis: cilVisitor) (i: instr) : instr list = + let oldloc = !currentLoc in + currentLoc := (get_instrLoc i); + assertEmptyQueue vis; + let res = doVisitList vis vis#vinst childrenInstr i in + currentLoc := oldloc; + (* See if we have accumulated some instructions *) + vis#unqueueInstr () @ res + +and childrenInstr (vis: cilVisitor) (i: instr) : instr = + let fExp = visitCilExpr vis in + let fLval = visitCilLval vis in + match i with + | Set(lv,e,l) -> + let lv' = fLval lv in let e' = fExp e in + if lv' != lv || e' != e then Set(lv',e',l) else i + | Call(None,f,args,l) -> + let f' = fExp f in let args' = mapNoCopy fExp args in + if f' != f || args' != args then Call(None,f',args',l) else i + | Call(Some lv,fn,args,l) -> + let lv' = fLval lv in let fn' = fExp fn in + let args' = mapNoCopy fExp args in + if lv' != lv || fn' != fn || args' != args + then Call(Some lv', fn', args', l) else i + + | Asm(sl,isvol,outs,ins,clobs,l) -> + let outs' = mapNoCopy (fun ((s,lv) as pair) -> + let lv' = fLval lv in + if lv' != lv then (s,lv') else pair) outs in + let ins' = mapNoCopy (fun ((s,e) as pair) -> + let e' = fExp e in + if e' != e then (s,e') else pair) ins in + if outs' != outs || ins' != ins then + Asm(sl,isvol,outs',ins',clobs,l) else i + + +(* visit all nodes in a Cil statement tree in preorder *) +and visitCilStmt (vis: cilVisitor) (s: stmt) : stmt = + let oldloc = !currentLoc in + currentLoc := (get_stmtLoc s.skind) ; + assertEmptyQueue vis; + let toPrepend : instr list ref = ref [] in (* childrenStmt may add to this *) + let res = doVisit vis vis#vstmt (childrenStmt toPrepend) s in + (* Now see if we have saved some instructions *) + toPrepend := !toPrepend @ vis#unqueueInstr (); + (match !toPrepend with + [] -> () (* Return the same statement *) + | _ -> + (* Make our statement contain the instructions to prepend *) + res.skind <- Block { battrs = []; bstmts = [ mkStmt (Instr !toPrepend); + mkStmt res.skind ] }); + currentLoc := oldloc; + res + +and childrenStmt (toPrepend: instr list ref) (vis:cilVisitor) (s:stmt): stmt = + let fExp e = (visitCilExpr vis e) in + let fBlock b = visitCilBlock vis b in + let fInst i = visitCilInstr vis i in + (* Just change the statement kind *) + let skind' = + match s.skind with + Break _ | Continue _ | Goto _ | Return (None, _) -> s.skind + | Return (Some e, l) -> + let e' = fExp e in + if e' != e then Return (Some e', l) else s.skind +(* + | Loop (b, l, s1, s2) -> + let b' = fBlock b in + if b' != b then Loop (b', l, s1, s2) else s.skind +*) + | While (e, b, l) -> + let e' = fExp e in + let b' = fBlock b in + if e' != e || b' != b then While (e', b', l) else s.skind + | DoWhile (e, b, l) -> + let b' = fBlock b in + let e' = fExp e in + if e' != e || b' != b then DoWhile (e', b', l) else s.skind + | For (bInit, e, bIter, b, l) -> + let bInit' = fBlock bInit in + let e' = fExp e in + let bIter' = fBlock bIter in + let b' = fBlock b in + if bInit' != bInit || e' != e || bIter' != bIter || b' != b then + For (bInit', e', bIter', b', l) else s.skind + | If(e, s1, s2, l) -> + let e' = fExp e in + (*if e queued any instructions, pop them here and remember them so that + they are inserted before the If stmt, not in the then block. *) + toPrepend := vis#unqueueInstr (); + let s1'= fBlock s1 in let s2'= fBlock s2 in + (* the stmts in the blocks should have cleaned up after themselves.*) + assertEmptyQueue vis; + if e' != e || s1' != s1 || s2' != s2 then + If(e', s1', s2', l) else s.skind + | Switch (e, b, stmts, l) -> + let e' = fExp e in + toPrepend := vis#unqueueInstr (); (* insert these before the switch *) + let b' = fBlock b in + (* the stmts in b should have cleaned up after themselves.*) + assertEmptyQueue vis; + (* Don't do stmts, but we better not change those *) + if e' != e || b' != b then Switch (e', b', stmts, l) else s.skind + | Instr il -> + let il' = mapNoCopyList fInst il in + if il' != il then Instr il' else s.skind + | Block b -> + let b' = fBlock b in + if b' != b then Block b' else s.skind + | TryFinally (b, h, l) -> + let b' = fBlock b in + let h' = fBlock h in + if b' != b || h' != h then TryFinally(b', h', l) else s.skind + | TryExcept (b, (il, e), h, l) -> + let b' = fBlock b in + assertEmptyQueue vis; + (* visit the instructions *) + let il' = mapNoCopyList fInst il in + (* Visit the expression *) + let e' = fExp e in + let il'' = + let more = vis#unqueueInstr () in + if more != [] then + il' @ more + else + il' + in + let h' = fBlock h in + (* Now collect the instructions *) + if b' != b || il'' != il || e' != e || h' != h then + TryExcept(b', (il'', e'), h', l) + else s.skind + in + if skind' != s.skind then s.skind <- skind'; + (* Visit the labels *) + let labels' = + let fLabel = function + Case (e, l) as lb -> + let e' = fExp e in + if e' != e then Case (e', l) else lb + | lb -> lb + in + mapNoCopy fLabel s.labels + in + if labels' != s.labels then s.labels <- labels'; + s + + + +and visitCilBlock (vis: cilVisitor) (b: block) : block = + doVisit vis vis#vblock childrenBlock b +and childrenBlock (vis: cilVisitor) (b: block) : block = + let fStmt s = visitCilStmt vis s in + let stmts' = mapNoCopy fStmt b.bstmts in + if stmts' != b.bstmts then { battrs = b.battrs; bstmts = stmts'} else b + + +and visitCilType (vis : cilVisitor) (t : typ) : typ = + doVisit vis vis#vtype childrenType t +and childrenType (vis : cilVisitor) (t : typ) : typ = + (* look for types referred to inside t's definition *) + let fTyp t = visitCilType vis t in + let fAttr a = visitCilAttributes vis a in + match t with + TPtr(t1, a) -> + let t1' = fTyp t1 in + let a' = fAttr a in + if t1' != t || a' != a then TPtr(t1', a') else t + | TArray(t1, None, a) -> + let t1' = fTyp t1 in + let a' = fAttr a in + if t1' != t || a' != a then TArray(t1', None, a') else t + | TArray(t1, Some e, a) -> + let t1' = fTyp t1 in + let e' = visitCilExpr vis e in + let a' = fAttr a in + if t1' != t || e' != e || a' != a then TArray(t1', Some e', a') else t + + (* DON'T recurse into the compinfo, this is done in visitCilGlobal. + User can iterate over cinfo.cfields manually, if desired.*) + | TComp(cinfo, a) -> + let a' = fAttr a in + if a != a' then TComp(cinfo, a') else t + + | TFun(rettype, args, isva, a) -> + let rettype' = fTyp rettype in + (* iterate over formals, as variable declarations *) + let argslist = argsToList args in + let visitArg ((an,at,aa) as arg) = + let at' = fTyp at in + let aa' = fAttr aa in + if at' != at || aa' != aa then (an,at',aa') else arg + in + let argslist' = mapNoCopy visitArg argslist in + let a' = fAttr a in + if rettype' != rettype || argslist' != argslist || a' != a then + let args' = if argslist' == argslist then args else Some argslist' in + TFun(rettype', args', isva, a') else t + + | TNamed(t1, a) -> (* Do not go into the type. Will do it at the time of + * GType *) + let a' = fAttr a in + if a' != a then TNamed (t1, a') else t + + | _ -> (* other types (TVoid, TInt, TFloat, TEnum, and TBuiltin_va_list) + don't contain nested types, but they do have attributes. *) + let a = typeAttrs t in + let a' = fAttr a in + if a' != a then setTypeAttrs t a' else t + + +(* for declarations, we visit the types inside; but for uses, *) +(* we just visit the varinfo node *) +and visitCilVarDecl (vis : cilVisitor) (v : varinfo) : varinfo = + doVisit vis vis#vvdec childrenVarDecl v +and childrenVarDecl (vis : cilVisitor) (v : varinfo) : varinfo = + v.vtype <- visitCilType vis v.vtype; + v.vattr <- visitCilAttributes vis v.vattr; + v + +and visitCilAttributes (vis: cilVisitor) (al: attribute list) : attribute list= + let al' = + mapNoCopyList (doVisitList vis vis#vattr childrenAttribute) al in + if al' != al then + (* Must re-sort *) + addAttributes al' [] + else + al +and childrenAttribute (vis: cilVisitor) (a: attribute) : attribute = + let fAttrP a = visitCilAttrParams vis a in + match a with + Attr (n, args) -> + let args' = mapNoCopy fAttrP args in + if args' != args then Attr(n, args') else a + + +and visitCilAttrParams (vis: cilVisitor) (a: attrparam) : attrparam = + doVisit vis vis#vattrparam childrenAttrparam a +and childrenAttrparam (vis: cilVisitor) (aa: attrparam) : attrparam = + let fTyp t = visitCilType vis t in + let fAttrP a = visitCilAttrParams vis a in + match aa with + AInt _ | AStr _ -> aa + | ACons(n, args) -> + let args' = mapNoCopy fAttrP args in + if args' != args then ACons(n, args') else aa + | ASizeOf t -> + let t' = fTyp t in + if t' != t then ASizeOf t' else aa + | ASizeOfE e -> + let e' = fAttrP e in + if e' != e then ASizeOfE e' else aa + | AAlignOf t -> + let t' = fTyp t in + if t' != t then AAlignOf t' else aa + | AAlignOfE e -> + let e' = fAttrP e in + if e' != e then AAlignOfE e' else aa + | ASizeOfS _ | AAlignOfS _ -> + ignore (warn "Visitor inside of a type signature."); + aa + | AUnOp (uo, e1) -> + let e1' = fAttrP e1 in + if e1' != e1 then AUnOp (uo, e1') else aa + | ABinOp (bo, e1, e2) -> + let e1' = fAttrP e1 in + let e2' = fAttrP e2 in + if e1' != e1 || e2' != e2 then ABinOp (bo, e1', e2') else aa + | ADot (ap, s) -> + let ap' = fAttrP ap in + if ap' != ap then ADot (ap', s) else aa + + +let rec visitCilFunction (vis : cilVisitor) (f : fundec) : fundec = + if debugVisit then ignore (E.log "Visiting function %s\n" f.svar.vname); + assertEmptyQueue vis; + let f = doVisit vis vis#vfunc childrenFunction f in + + let toPrepend = vis#unqueueInstr () in + if toPrepend <> [] then + f.sbody.bstmts <- mkStmt (Instr toPrepend) :: f.sbody.bstmts; + f + +and childrenFunction (vis : cilVisitor) (f : fundec) : fundec = + f.svar <- visitCilVarDecl vis f.svar; (* hit the function name *) + (* visit local declarations *) + f.slocals <- mapNoCopy (visitCilVarDecl vis) f.slocals; + (* visit the formals *) + let newformals = mapNoCopy (visitCilVarDecl vis) f.sformals in + (* Make sure the type reflects the formals *) + setFormals f newformals; + (* Remember any new instructions that were generated while visiting + variable declarations. *) + let toPrepend = vis#unqueueInstr () in + + f.sbody <- visitCilBlock vis f.sbody; (* visit the body *) + if toPrepend <> [] then + f.sbody.bstmts <- mkStmt (Instr toPrepend) :: f.sbody.bstmts; + f + +let rec visitCilGlobal (vis: cilVisitor) (g: global) : global list = + (*(trace "visit" (dprintf "visitCilGlobal\n"));*) + let oldloc = !currentLoc in + currentLoc := (get_globalLoc g) ; + currentGlobal := g; + let res = doVisitList vis vis#vglob childrenGlobal g in + currentLoc := oldloc; + res +and childrenGlobal (vis: cilVisitor) (g: global) : global = + match g with + | GFun (f, l) -> + let f' = visitCilFunction vis f in + if f' != f then GFun (f', l) else g + | GType(t, l) -> + t.ttype <- visitCilType vis t.ttype; + g + + | GEnumTagDecl _ | GCompTagDecl _ -> g (* Nothing to visit *) + | GEnumTag (enum, _) -> + (trace "visit" (dprintf "visiting global enum %s\n" enum.ename)); + (* Do the values and attributes of the enumerated items *) + let itemVisit (name, exp, loc) = (name, visitCilExpr vis exp, loc) in + enum.eitems <- mapNoCopy itemVisit enum.eitems; + enum.eattr <- visitCilAttributes vis enum.eattr; + g + + | GCompTag (comp, _) -> + (trace "visit" (dprintf "visiting global comp %s\n" comp.cname)); + (* Do the types and attirbutes of the fields *) + let fieldVisit = fun fi -> + fi.ftype <- visitCilType vis fi.ftype; + fi.fattr <- visitCilAttributes vis fi.fattr + in + List.iter fieldVisit comp.cfields; + comp.cattr <- visitCilAttributes vis comp.cattr; + g + + | GVarDecl(v, l) -> + let v' = visitCilVarDecl vis v in + if v' != v then GVarDecl (v', l) else g + | GVar (v, inito, l) -> + let v' = visitCilVarDecl vis v in + (match inito.init with + None -> () + | Some i -> let i' = visitCilInit vis i in + if i' != i then inito.init <- Some i'); + + if v' != v then GVar (v', inito, l) else g + + | GPragma (a, l) -> begin + match visitCilAttributes vis [a] with + [a'] -> if a' != a then GPragma (a', l) else g + | _ -> E.s (E.unimp "visitCilAttributes returns more than one attribute") + end + | _ -> g + + +(** A visitor that does constant folding. If "machdep" is true then we do + * machine dependent simplification (e.g., sizeof) *) +class constFoldVisitorClass (machdep: bool) : cilVisitor = object + inherit nopCilVisitor + + method vinst i = + match i with + (* Skip two functions to which we add Sizeof to the type arguments. + See the comments for these above. *) + Call(_,(Lval (Var vi,NoOffset)),_,_) + when ((vi.vname = "__builtin_va_arg") + || (vi.vname = "__builtin_types_compatible_p")) -> + SkipChildren + | _ -> DoChildren + method vexpr (e: exp) = + (* Do it bottom up *) + ChangeDoChildrenPost (e, constFold machdep) + +end +let constFoldVisitor (machdep: bool) = new constFoldVisitorClass machdep + +(* Iterate over all globals, including the global initializer *) +let iterGlobals (fl: file) + (doone: global -> unit) : unit = + let doone' g = + currentLoc := get_globalLoc g; + doone g + in + List.iter doone' fl.globals; + (match fl.globinit with + None -> () + | Some g -> doone' (GFun(g, locUnknown))) + +(* Fold over all globals, including the global initializer *) +let foldGlobals (fl: file) + (doone: 'a -> global -> 'a) + (acc: 'a) : 'a = + let doone' acc g = + currentLoc := get_globalLoc g; + doone acc g + in + let acc' = List.fold_left doone' acc fl.globals in + (match fl.globinit with + None -> acc' + | Some g -> doone' acc' (GFun(g, locUnknown))) + + +(* A visitor for the whole file that does not change the globals *) +let visitCilFileSameGlobals (vis : cilVisitor) (f : file) : unit = + let fGlob g = visitCilGlobal vis g in + iterGlobals f (fun g -> + match fGlob g with + [g'] when g' == g || Util.equals g' g -> () (* Try to do the pointer check first *) + | gl -> + ignore (E.log "You used visitCilFilSameGlobals but the global got changed:\n %a\nchanged to %a\n" d_global g (docList ~sep:line (d_global ())) gl); + ()) + +(* Be careful with visiting the whole file because it might be huge. *) +let visitCilFile (vis : cilVisitor) (f : file) : unit = + let fGlob g = visitCilGlobal vis g in + (* Scan the globals. Make sure this is tail recursive. *) + let rec loop (acc: global list) = function + [] -> f.globals <- List.rev acc + | g :: restg -> + loop ((List.rev (fGlob g)) @ acc) restg + in + loop [] f.globals; + (* the global initializer *) + (match f.globinit with + None -> () + | Some g -> f.globinit <- Some (visitCilFunction vis g)) + + + +(** Create or fetch the global initializer. Tries to put a call to in the the + * function with the main_name *) +let getGlobInit ?(main_name="main") (fl: file) = + match fl.globinit with + Some f -> f + | None -> begin + (* Sadly, we cannot use the Filename library because it does not like + * function names with multiple . in them *) + let f = + let len = String.length fl.fileName in + (* Find the last path separator and record the first . that we see, + * going backwards *) + let lastDot = ref len in + let rec findLastPathSep i = + if i < 0 then -1 else + let c = String.get fl.fileName i in + if c = '/' || c = '\\' then i + else begin + if c = '.' && !lastDot = len then + lastDot := i; + findLastPathSep (i - 1) + end + in + let lastPathSep = findLastPathSep (len - 1) in + let basenoext = + String.sub fl.fileName (lastPathSep + 1) (!lastDot - lastPathSep - 1) + in + emptyFunction + (makeValidSymbolName ("__globinit_" ^ basenoext)) + in + fl.globinit <- Some f; + (* Now try to add a call to the global initialized at the beginning of + * main *) + let inserted = ref false in + List.iter + (fun g -> + match g with + GFun(m, lm) when m.svar.vname = main_name -> + (* Prepend a prototype to the global initializer *) + fl.globals <- GVarDecl (f.svar, lm) :: fl.globals; + m.sbody.bstmts <- + compactStmts (mkStmt (Instr [Call(None, + Lval(var f.svar), + [], locUnknown)]) + :: m.sbody.bstmts); + inserted := true; + if !E.verboseFlag then + ignore (E.log "Inserted the globinit\n"); + fl.globinitcalled <- true; + | _ -> ()) + fl.globals; + + if not !inserted then + ignore (E.warn "Cannot find %s to add global initializer %s" + main_name f.svar.vname); + + f + end + + + +(* Fold over all globals, including the global initializer *) +let mapGlobals (fl: file) + (doone: global -> global) : unit = + fl.globals <- List.map doone fl.globals; + (match fl.globinit with + None -> () + | Some g -> begin + match doone (GFun(g, locUnknown)) with + GFun(g', _) -> fl.globinit <- Some g' + | _ -> E.s (E.bug "mapGlobals: globinit is not a function") + end) + + + +let dumpFile (pp: cilPrinter) (out : out_channel) (outfile: string) file = + printDepth := 99999; (* We don't want ... in the output *) + (* If we are in RELEASE mode then we do not print indentation *) + + Pretty.fastMode := true; + + if !E.verboseFlag then + ignore (E.log "printing file %s\n" outfile); + let print x = fprint out 78 x in + print (text ("/* Generated by CIL v. " ^ cilVersion ^ " */\n" ^ + (* sm: I want to easily tell whether the generated output + * is with print_CIL_Input or not *) + "/* print_CIL_Input is " ^ (if !print_CIL_Input then "true" else "false") ^ " */\n\n")); + iterGlobals file (fun g -> dumpGlobal pp out g); + + (* sm: we have to flush the output channel; if we don't then under *) + (* some circumstances (I haven't figure out exactly when, but it happens *) + (* more often with big inputs), we get a truncated output file *) + flush out + + + +(****************** + ****************** + ******************) + + + +(******************** OPTIMIZATIONS *****) +let rec peepHole1 (* Process one statement and possibly replace it *) + (doone: instr -> instr list option) + (* Scan a block and recurse inside nested blocks *) + (ss: stmt list) : unit = + let rec doInstrList (il: instr list) : instr list = + match il with + [] -> [] + | i :: rest -> begin + match doone i with + None -> i :: doInstrList rest + | Some sl -> doInstrList (sl @ rest) + end + in + + List.iter + (fun s -> + match s.skind with + Instr il -> s.skind <- Instr (doInstrList il) + | If (e, tb, eb, _) -> + peepHole1 doone tb.bstmts; + peepHole1 doone eb.bstmts + | Switch (e, b, _, _) -> peepHole1 doone b.bstmts +(* + | Loop (b, l, _, _) -> peepHole1 doone b.bstmts +*) + | While (_, b, _) -> peepHole1 doone b.bstmts + | DoWhile (_, b, _) -> peepHole1 doone b.bstmts + | For (bInit, _, bIter, b, _) -> + peepHole1 doone bInit.bstmts; + peepHole1 doone bIter.bstmts; + peepHole1 doone b.bstmts + | Block b -> peepHole1 doone b.bstmts + | TryFinally (b, h, l) -> + peepHole1 doone b.bstmts; + peepHole1 doone h.bstmts + | TryExcept (b, (il, e), h, l) -> + peepHole1 doone b.bstmts; + peepHole1 doone h.bstmts; + s.skind <- TryExcept(b, (doInstrList il, e), h, l); + | Return _ | Goto _ | Break _ | Continue _ -> ()) + ss + +let rec peepHole2 (* Process two statements and possibly replace them both *) + (dotwo: instr * instr -> instr list option) + (ss: stmt list) : unit = + let rec doInstrList (il: instr list) : instr list = + match il with + [] -> [] + | [i] -> [i] + | (i1 :: ((i2 :: rest) as rest2)) -> + begin + match dotwo (i1,i2) with + None -> i1 :: doInstrList rest2 + | Some sl -> doInstrList (sl @ rest) + end + in + List.iter + (fun s -> + match s.skind with + Instr il -> s.skind <- Instr (doInstrList il) + | If (e, tb, eb, _) -> + peepHole2 dotwo tb.bstmts; + peepHole2 dotwo eb.bstmts + | Switch (e, b, _, _) -> peepHole2 dotwo b.bstmts +(* + | Loop (b, l, _, _) -> peepHole2 dotwo b.bstmts +*) + | While (_, b, _) -> peepHole2 dotwo b.bstmts + | DoWhile (_, b, _) -> peepHole2 dotwo b.bstmts + | For (bInit, _, bIter, b, _) -> + peepHole2 dotwo bInit.bstmts; + peepHole2 dotwo bIter.bstmts; + peepHole2 dotwo b.bstmts + | Block b -> peepHole2 dotwo b.bstmts + | TryFinally (b, h, l) -> peepHole2 dotwo b.bstmts; + peepHole2 dotwo h.bstmts + | TryExcept (b, (il, e), h, l) -> + peepHole2 dotwo b.bstmts; + peepHole2 dotwo h.bstmts; + s.skind <- TryExcept (b, (doInstrList il, e), h, l) + + | Return _ | Goto _ | Break _ | Continue _ -> ()) + ss + + + + +(*** Type signatures ***) + +(* Helper class for typeSig: replace any types in attributes with typsigs *) +class typeSigVisitor(typeSigConverter: typ->typsig) = object + inherit nopCilVisitor + method vattrparam ap = + match ap with + | ASizeOf t -> ChangeTo (ASizeOfS (typeSigConverter t)) + | AAlignOf t -> ChangeTo (AAlignOfS (typeSigConverter t)) + | _ -> DoChildren +end + +let typeSigAddAttrs a0 t = + if a0 == [] then t else + match t with + TSBase t -> TSBase (typeAddAttributes a0 t) + | TSPtr (ts, a) -> TSPtr (ts, addAttributes a0 a) + | TSArray (ts, l, a) -> TSArray(ts, l, addAttributes a0 a) + | TSComp (iss, n, a) -> TSComp (iss, n, addAttributes a0 a) + | TSEnum (n, a) -> TSEnum (n, addAttributes a0 a) + | TSFun(ts, tsargs, isva, a) -> TSFun(ts, tsargs, isva, addAttributes a0 a) + +(* Compute a type signature. + Use ~ignoreSign:true to convert all signed integer types to unsigned, + so that signed and unsigned will compare the same. *) +let rec typeSigWithAttrs ?(ignoreSign=false) doattr t = + let typeSig = typeSigWithAttrs ~ignoreSign doattr in + let attrVisitor = new typeSigVisitor typeSig in + let doattr al = visitCilAttributes attrVisitor (doattr al) in + match t with + | TInt (ik, al) -> + let ik' = if ignoreSign then begin + match ik with + | ISChar | IChar -> IUChar + | IShort -> IUShort + | IInt -> IUInt + | ILong -> IULong + | ILongLong -> IULongLong + | _ -> ik + end else + ik + in + TSBase (TInt (ik', doattr al)) + | TFloat (fk, al) -> TSBase (TFloat (fk, doattr al)) + | TVoid al -> TSBase (TVoid (doattr al)) + | TEnum (enum, a) -> TSEnum (enum.ename, doattr a) + | TPtr (t, a) -> TSPtr (typeSig t, doattr a) + | TArray (t,l,a) -> (* We do not want fancy expressions in array lengths. + * So constant fold the lengths *) + let l' = + match l with + Some l -> begin + match constFold true l with + Const(CInt64(i, _, _)) -> Some i + | e -> E.s (E.bug "Invalid length in array type: %a\n" + (!pd_exp) e) + end + | None -> None + in + TSArray(typeSig t, l', doattr a) + + | TComp (comp, a) -> + TSComp (comp.cstruct, comp.cname, doattr (addAttributes comp.cattr a)) + | TFun(rt,args,isva,a) -> + TSFun(typeSig rt, + List.map (fun (_, atype, _) -> (typeSig atype)) (argsToList args), + isva, doattr a) + | TNamed(t, a) -> typeSigAddAttrs (doattr a) (typeSig t.ttype) + | TBuiltin_va_list al -> TSBase (TBuiltin_va_list (doattr al)) + +let typeSig t = + typeSigWithAttrs (fun al -> al) t + +let _ = pTypeSig := typeSig + +(* Remove the attribute from the top-level of the type signature *) +let setTypeSigAttrs (a: attribute list) = function + TSBase t -> TSBase (setTypeAttrs t a) + | TSPtr (ts, _) -> TSPtr (ts, a) + | TSArray (ts, l, _) -> TSArray(ts, l, a) + | TSComp (iss, n, _) -> TSComp (iss, n, a) + | TSEnum (n, _) -> TSEnum (n, a) + | TSFun (ts, tsargs, isva, _) -> TSFun (ts, tsargs, isva, a) + + +let typeSigAttrs = function + TSBase t -> typeAttrs t + | TSPtr (ts, a) -> a + | TSArray (ts, l, a) -> a + | TSComp (iss, n, a) -> a + | TSEnum (n, a) -> a + | TSFun (ts, tsargs, isva, a) -> a + + + +let dExp: doc -> exp = + fun d -> Const(CStr(sprint !lineLength d)) + +let dInstr: doc -> location -> instr = + fun d l -> Asm([], [sprint !lineLength d], [], [], [], l) + +let dGlobal: doc -> location -> global = + fun d l -> GAsm(sprint !lineLength d, l) + +let rec addOffset (toadd: offset) (off: offset) : offset = + match off with + NoOffset -> toadd + | Field(fid', offset) -> Field(fid', addOffset toadd offset) + | Index(e, offset) -> Index(e, addOffset toadd offset) + + (* Add an offset at the end of an lv *) +let addOffsetLval toadd (b, off) : lval = + b, addOffset toadd off + +let rec removeOffset (off: offset) : offset * offset = + match off with + NoOffset -> NoOffset, NoOffset + | Field(f, NoOffset) -> NoOffset, off + | Index(i, NoOffset) -> NoOffset, off + | Field(f, restoff) -> + let off', last = removeOffset restoff in + Field(f, off'), last + | Index(i, restoff) -> + let off', last = removeOffset restoff in + Index(i, off'), last + +let removeOffsetLval ((b, off): lval) : lval * offset = + let off', last = removeOffset off in + (b, off'), last + + (* Make an AddrOf. Given an lval of type T will give back an expression of + * type ptr(T) *) +let mkAddrOf ((b, off) as lval) : exp = + (* Never take the address of a register variable *) + (match lval with + Var vi, off when vi.vstorage = Register -> vi.vstorage <- NoStorage + | _ -> ()); + match lval with + Mem e, NoOffset -> e + | b, Index(z, NoOffset) when isZero z -> StartOf (b, NoOffset)(* array *) + | _ -> AddrOf lval + + +let mkAddrOrStartOf (lv: lval) : exp = + match unrollType (typeOfLval lv) with + TArray _ -> StartOf lv + | _ -> mkAddrOf lv + + + (* Make a Mem, while optimizing AddrOf. The type of the addr must be + * TPtr(t) and the type of the resulting lval is t. Note that in CIL the + * implicit conversion between a function and a pointer to a function does + * not apply. You must do the conversion yourself using AddrOf *) +let mkMem ~(addr: exp) ~(off: offset) : lval = + let res = + match addr, off with + AddrOf lv, _ -> addOffsetLval off lv + | StartOf lv, _ -> (* Must be an array *) + addOffsetLval (Index(zero, off)) lv + | _, _ -> Mem addr, off + in +(* ignore (E.log "memof : %a:%a\nresult = %a\n" + d_plainexp addr d_plainoffset off d_plainexp res); *) + res + + + +let splitFunctionType (ftype: typ) + : typ * (string * typ * attributes) list option * bool * attributes = + match unrollType ftype with + TFun (rt, args, isva, a) -> rt, args, isva, a + | _ -> E.s (bug "splitFunctionType invoked on a non function type %a" + d_type ftype) + +let splitFunctionTypeVI (fvi: varinfo) + : typ * (string * typ * attributes) list option * bool * attributes = + match unrollType fvi.vtype with + TFun (rt, args, isva, a) -> rt, args, isva, a + | _ -> E.s (bug "Function %s invoked on a non function type" fvi.vname) + +let isArrayType t = + match unrollType t with + TArray _ -> true + | _ -> false + + +let rec isConstant = function + | Const _ -> true + | UnOp (_, e, _) -> isConstant e + | BinOp (_, e1, e2, _) -> isConstant e1 && isConstant e2 + | Lval (Var vi, NoOffset) -> + (vi.vglob && isArrayType vi.vtype || isFunctionType vi.vtype) + | Lval _ -> false + | SizeOf _ | SizeOfE _ | SizeOfStr _ | AlignOf _ | AlignOfE _ -> true + | CastE (_, e) -> isConstant e + | AddrOf (Var vi, off) | StartOf (Var vi, off) + -> vi.vglob && isConstantOff off + | AddrOf (Mem e, off) | StartOf(Mem e, off) + -> isConstant e && isConstantOff off + +and isConstantOff = function + NoOffset -> true + | Field(fi, off) -> isConstantOff off + | Index(e, off) -> isConstant e && isConstantOff off + + +let getCompField (cinfo:compinfo) (fieldName:string) : fieldinfo = + (List.find (fun fi -> fi.fname = fieldName) cinfo.cfields) + + +let rec mkCastT ~(e: exp) ~(oldt: typ) ~(newt: typ) = + (* Do not remove old casts because they are conversions !!! *) + if Util.equals (typeSig oldt) (typeSig newt) then begin + e + end else begin + (* Watch out for constants *) + match newt, e with + TInt(newik, []), Const(CInt64(i, _, _)) -> kinteger64 newik i + | _ -> CastE(newt,e) + end + +let mkCast ~(e: exp) ~(newt: typ) = + mkCastT e (typeOf e) newt + +type existsAction = + ExistsTrue (* We have found it *) + | ExistsFalse (* Stop processing this branch *) + | ExistsMaybe (* This node is not what we are + * looking for but maybe its + * successors are *) +let existsType (f: typ -> existsAction) (t: typ) : bool = + let memo : (int, unit) H.t = H.create 17 in (* Memo table *) + let rec loop t = + match f t with + ExistsTrue -> true + | ExistsFalse -> false + | ExistsMaybe -> + (match t with + TNamed (t', _) -> loop t'.ttype + | TComp (c, _) -> loopComp c + | TArray (t', _, _) -> loop t' + | TPtr (t', _) -> loop t' + | TFun (rt, args, _, _) -> + (loop rt || List.exists (fun (_, at, _) -> loop at) + (argsToList args)) + | _ -> false) + and loopComp c = + if H.mem memo c.ckey then + (* We are looping, the answer must be false *) + false + else begin + H.add memo c.ckey (); + List.exists (fun f -> loop f.ftype) c.cfields + end + in + loop t + + +(* Try to do an increment, with constant folding *) +let increm (e: exp) (i: int) = + let et = typeOf e in + let bop = if isPointerType et then PlusPI else PlusA in + constFold false (BinOp(bop, e, integer i, et)) + +exception LenOfArray +let lenOfArray (eo: exp option) : int = + match eo with + None -> raise LenOfArray + | Some e -> begin + match constFold true e with + | Const(CInt64(ni, _, _)) when ni >= Int64.zero -> + Int64.to_int ni + | e -> raise LenOfArray + end + + +(*** Make a initializer for zeroe-ing a data type ***) +let rec makeZeroInit (t: typ) : init = + match unrollType t with + TInt (ik, _) -> SingleInit (Const(CInt64(Int64.zero, ik, None))) + | TFloat(fk, _) -> SingleInit(Const(CReal(0.0, fk, None))) + | TEnum _ -> SingleInit zero + | TComp (comp, _) as t' when comp.cstruct -> + let inits = + List.fold_right + (fun f acc -> + if f.fname <> missingFieldName then + (Field(f, NoOffset), makeZeroInit f.ftype) :: acc + else + acc) + comp.cfields [] + in + CompoundInit (t', inits) + + | TComp (comp, _) when not comp.cstruct -> + let fstfield, rest = + match comp.cfields with + f :: rest -> f, rest + | [] -> E.s (unimp "Cannot create init for empty union") + in + let fieldToInit = + if !msvcMode then + (* ISO C99 [6.7.8.10] says that the first field of the union + is the one we should initialize. *) + fstfield + else begin + (* gcc initializes the whole union to zero. So choose the largest + field, and set that to zero. Choose the first field if possible. + MSVC also initializes the whole union, but use the ISO behavior + for MSVC because it only allows compound initializers to refer + to the first union field. *) + let fieldSize f = try bitsSizeOf f.ftype with SizeOfError _ -> 0 in + let widestField, widestFieldWidth = + List.fold_left (fun acc thisField -> + let widestField, widestFieldWidth = acc in + let thisSize = fieldSize thisField in + if thisSize > widestFieldWidth then + thisField, thisSize + else + acc) + (fstfield, fieldSize fstfield) + rest + in + widestField + end + in + CompoundInit(t, [(Field(fieldToInit, NoOffset), + makeZeroInit fieldToInit.ftype)]) + + | TArray(bt, Some len, _) as t' -> + let n = + match constFold true len with + Const(CInt64(n, _, _)) -> Int64.to_int n + | _ -> E.s (E.unimp "Cannot understand length of array") + in + let initbt = makeZeroInit bt in + let rec loopElems acc i = + if i < 0 then acc + else loopElems ((Index(integer i, NoOffset), initbt) :: acc) (i - 1) + in + CompoundInit(t', loopElems [] (n - 1)) + + | TArray (bt, None, at) as t' -> + (* Unsized array, allow it and fill it in later + * (see cabs2cil.ml, collectInitializer) *) + CompoundInit (t', []) + + | TPtr _ as t -> SingleInit(CastE(t, zero)) + | x -> E.s (unimp "Cannot initialize type: %a" d_type x) + + +(**** Fold over the list of initializers in a Compound. In the case of an + * array initializer only the initializers present are scanned (a prefix of + * all initializers) *) +let foldLeftCompound + ~(doinit: offset -> init -> typ -> 'a -> 'a) + ~(ct: typ) + ~(initl: (offset * init) list) + ~(acc: 'a) : 'a = + match unrollType ct with + TArray(bt, _, _) -> + List.fold_left (fun acc (o, i) -> doinit o i bt acc) acc initl + + | TComp (comp, _) -> + let getTypeOffset = function + Field(f, NoOffset) -> f.ftype + | _ -> E.s (bug "foldLeftCompound: malformed initializer") + in + List.fold_left + (fun acc (o, i) -> doinit o i (getTypeOffset o) acc) acc initl + + | _ -> E.s (unimp "Type of Compound is not array or struct or union") + +(**** Fold over the list of initializers in a Compound. Like foldLeftCompound + * but scans even the zero-initializers that are missing at the end of the + * array *) +let foldLeftCompoundAll + ~(doinit: offset -> init -> typ -> 'a -> 'a) + ~(ct: typ) + ~(initl: (offset * init) list) + ~(acc: 'a) : 'a = + match unrollType ct with + TArray(bt, leno, _) -> begin + let part = + List.fold_left (fun acc (o, i) -> doinit o i bt acc) acc initl in + (* See how many more we have to do *) + match leno with + Some lene -> begin + match constFold true lene with + Const(CInt64(i, _, _)) -> + let len_array = Int64.to_int i in + let len_init = List.length initl in + if len_array > len_init then + let zi = makeZeroInit bt in + let rec loop acc i = + if i >= len_array then acc + else + loop (doinit (Index(integer i, NoOffset)) zi bt acc) + (i + 1) + in + loop part (len_init + 1) + else + part + | _ -> E.s (unimp "foldLeftCompoundAll: array with initializer and non-constant length\n") + end + + | _ -> E.s (unimp "foldLeftCompoundAll: TArray with initializer and no length") + end + | TComp (comp, _) -> + let getTypeOffset = function + Field(f, NoOffset) -> f.ftype + | _ -> E.s (bug "foldLeftCompound: malformed initializer") + in + List.fold_left + (fun acc (o, i) -> doinit o i (getTypeOffset o) acc) acc initl + + | _ -> E.s (E.unimp "Type of Compound is not array or struct or union") + + + +let rec isCompleteType t = + match unrollType t with + | TArray(t, None, _) -> false + | TArray(t, Some z, _) when isZero z -> false + | TComp (comp, _) -> (* Struct or union *) + List.for_all (fun fi -> isCompleteType fi.ftype) comp.cfields + | _ -> true + + +module A = Alpha + + +(** Uniquefy the variable names *) +let uniqueVarNames (f: file) : unit = + (* Setup the alpha conversion table for globals *) + let gAlphaTable: (string, + location A.alphaTableData ref) H.t = H.create 113 in + (* Keep also track of the global names that we have used. Map them to the + * variable ID. We do this only to check that we do not have two globals + * with the same name. *) + let globalNames: (string, int) H.t = H.create 113 in + (* Scan the file and add the global names to the table *) + iterGlobals f + (function + GVarDecl(vi, l) + | GVar(vi, _, l) + | GFun({svar = vi}, l) -> + (* See if we have used this name already for something else *) + (try + let oldid = H.find globalNames vi.vname in + if oldid <> vi.vid then + ignore (warn "The name %s is used for two distinct globals" + vi.vname); + (* Here if we have used this name already. Go ahead *) + () + with Not_found -> begin + (* Here if this is the first time we define a name *) + H.add globalNames vi.vname vi.vid; + (* And register it *) + A.registerAlphaName gAlphaTable None vi.vname !currentLoc; + () + end) + | _ -> ()); + + (* Now we must scan the function bodies and rename the locals *) + iterGlobals f + (function + GFun(fdec, l) -> begin + currentLoc := l; + (* Setup an undo list to be able to revert the changes to the + * global alpha table *) + let undolist = ref [] in + (* Process one local variable *) + let processLocal (v: varinfo) = + let newname, oldloc = + A.newAlphaName gAlphaTable (Some undolist) v.vname + !currentLoc + in + if false && newname <> v.vname then (* Disable this warning *) + ignore (warn "uniqueVarNames: Changing the name of local %s in %s to %s (due to duplicate at %a)\n" + v.vname fdec.svar.vname newname d_loc oldloc); + v.vname <- newname + in + (* Do the formals first *) + List.iter processLocal fdec.sformals; + (* Fix the type again *) + setFormals fdec fdec.sformals; + (* And now the locals *) + List.iter processLocal fdec.slocals; + (* Undo the changes to the global table *) + A.undoAlphaChanges gAlphaTable !undolist; + () + end + | _ -> ()); + () + + +(* A visitor that makes a deep copy of a function body *) +class copyFunctionVisitor (newname: string) = object (self) + inherit nopCilVisitor + + (* Keep here a maping from locals to their copies *) + val map : (string, varinfo) H.t = H.create 113 + (* Keep here a maping from statements to their copies *) + val stmtmap : (int, stmt) H.t = H.create 113 + val sid = ref 0 (* Will have to assign ids to statements *) + (* Keep here a list of statements to be patched *) + val patches : stmt list ref = ref [] + + val argid = ref 0 + + (* This is the main function *) + method vfunc (f: fundec) : fundec visitAction = + (* We need a map from the old locals/formals to the new ones *) + H.clear map; + argid := 0; + (* Make a copy of the fundec. *) + let f' = {f with svar = f.svar} in + let patchfunction (f' : fundec) = + (* Change the name. Only this late to allow the visitor to copy the + * svar *) + f'.svar.vname <- newname; + let findStmt (i: int) = + try H.find stmtmap i + with Not_found -> E.s (bug "Cannot find the copy of stmt#%d" i) + in + let patchstmt (s: stmt) = + match s.skind with + Goto (sr, l) -> + (* Make a copy of the reference *) + let sr' = ref (findStmt !sr.sid) in + s.skind <- Goto (sr',l) + | Switch (e, body, cases, l) -> + s.skind <- Switch (e, body, + List.map (fun cs -> findStmt cs.sid) cases, l) + | _ -> () + in + List.iter patchstmt !patches; + f' + in + patches := []; + sid := 0; + H.clear stmtmap; + ChangeDoChildrenPost (f', patchfunction) + + (* We must create a new varinfo for each declaration. Memoize to + * maintain sharing *) + method vvdec (v: varinfo) = + (* Some varinfo have empty names. Give them some name *) + if v.vname = "" then begin + v.vname <- "arg" ^ string_of_int !argid; incr argid + end; + try + ChangeTo (H.find map v.vname) + with Not_found -> begin + let v' = {v with vid = newVID () } in + H.add map v.vname v'; + ChangeDoChildrenPost (v', fun x -> x) + end + + (* We must replace references to local variables *) + method vvrbl (v: varinfo) = + if v.vglob then SkipChildren else + try + ChangeTo (H.find map v.vname) + with Not_found -> + E.s (bug "Cannot find the new copy of local variable %s" v.vname) + + + (* Replace statements. *) + method vstmt (s: stmt) : stmt visitAction = + s.sid <- !sid; incr sid; + let s' = {s with sid = s.sid} in + H.add stmtmap s.sid s'; (* Remember where we copied this *) + (* if we have a Goto or a Switch remember them to fixup at end *) + (match s'.skind with + (Goto _ | Switch _) -> patches := s' :: !patches + | _ -> ()); + (* Do the children *) + ChangeDoChildrenPost (s', fun x -> x) + + (* Copy blocks since they are mutable *) + method vblock (b: block) = + ChangeDoChildrenPost ({b with bstmts = b.bstmts}, fun x -> x) + + + method vglob _ = E.s (bug "copyFunction should not be used on globals") +end + +(* We need a function that copies a CIL function. *) +let copyFunction (f: fundec) (newname: string) : fundec = + visitCilFunction (new copyFunctionVisitor(newname)) f + +(********* Compute the CFG ********) +let sid_counter = ref 0 + +let new_sid () = + let id = !sid_counter in + incr sid_counter; + id + +let statements : stmt list ref = ref [] +(* Clear all info about the CFG in statements *) +class clear : cilVisitor = object + inherit nopCilVisitor + method vstmt s = begin + s.sid <- !sid_counter ; + incr sid_counter ; + statements := s :: !statements; + s.succs <- [] ; + s.preds <- [] ; + DoChildren + end + method vexpr _ = SkipChildren + method vtype _ = SkipChildren + method vinst _ = SkipChildren +end + +let link source dest = begin + if not (List.mem dest source.succs) then + source.succs <- dest :: source.succs ; + if not (List.mem source dest.preds) then + dest.preds <- source :: dest.preds +end +let trylink source dest_option = match dest_option with + None -> () +| Some(dest) -> link source dest + + +(** Cmopute the successors and predecessors of a block, given a fallthrough *) +let rec succpred_block b fallthrough = + let rec handle sl = match sl with + [] -> () + | [a] -> succpred_stmt a fallthrough + | hd :: ((next :: _) as tl) -> + succpred_stmt hd (Some next) ; + handle tl + in handle b.bstmts + + +and succpred_stmt s fallthrough = + match s.skind with + Instr _ -> trylink s fallthrough + | Return _ -> () + | Goto(dest,l) -> link s !dest + | Break _ + | Continue _ + | Switch _ -> + failwith "computeCFGInfo: cannot be called on functions with break, continue or switch statements. Use prepareCFG first to remove them." + + | If(e1,b1,b2,l) -> + (match b1.bstmts with + [] -> trylink s fallthrough + | hd :: tl -> (link s hd ; succpred_block b1 fallthrough )) ; + (match b2.bstmts with + [] -> trylink s fallthrough + | hd :: tl -> (link s hd ; succpred_block b2 fallthrough )) + +(* + | Loop(b,l,_,_) -> + begin match b.bstmts with + [] -> failwith "computeCFGInfo: empty loop" + | hd :: tl -> + link s hd ; + succpred_block b (Some(hd)) + end +*) + + | While (e, b, l) -> begin match b.bstmts with + | [] -> failwith "computeCFGInfo: empty loop" + | hd :: tl -> link s hd ; + succpred_block b (Some(hd)) + end + + | DoWhile (e, b, l) ->begin match b.bstmts with + | [] -> failwith "computeCFGInfo: empty loop" + | hd :: tl -> link s hd ; + succpred_block b (Some(hd)) + end + + | For (bInit, e, bIter, b, l) -> + (match bInit.bstmts with + | [] -> failwith "computeCFGInfo: empty loop" + | hd :: tl -> link s hd ; + succpred_block bInit (Some(hd))) ; + (match bIter.bstmts with + | [] -> failwith "computeCFGInfo: empty loop" + | hd :: tl -> link s hd ; + succpred_block bIter (Some(hd))) ; + (match b.bstmts with + | [] -> failwith "computeCFGInfo: empty loop" + | hd :: tl -> link s hd ; + succpred_block b (Some(hd))) ; + + | Block(b) -> begin match b.bstmts with + [] -> trylink s fallthrough + | hd :: tl -> link s hd ; + succpred_block b fallthrough + end + | TryExcept _ | TryFinally _ -> + failwith "computeCFGInfo: structured exception handling not implemented" + +(* [weimer] Sun May 5 12:25:24 PDT 2002 + * This code was pulled from ext/switch.ml because it looks like we really + * want it to be part of CIL. + * + * Here is the magic handling to + * (1) replace switch statements with if/goto + * (2) remove "break" + * (3) remove "default" + * (4) remove "continue" + *) +let is_case_label l = match l with + | Case _ | Default _ -> true + | _ -> false + +let switch_count = ref (-1) +let get_switch_count () = + switch_count := 1 + !switch_count ; + !switch_count + +let switch_label = ref (-1) + +let rec xform_switch_stmt s break_dest cont_dest label_index = begin + s.labels <- List.map (fun lab -> match lab with + Label _ -> lab + | Case(e,l) -> + let suffix = + match isInteger e with + | Some value -> + if value < Int64.zero then + "neg_" ^ Int64.to_string (Int64.neg value) + else + Int64.to_string value + | None -> + incr switch_label; + "exp_" ^ string_of_int !switch_label + in + let str = Pretty.sprint !lineLength + (Pretty.dprintf "switch_%d_%s" label_index suffix) in + (Label(str,l,false)) + | Default(l) -> (Label(Printf.sprintf + "switch_%d_default" label_index,l,false)) + ) s.labels ; + match s.skind with + | Instr _ | Return _ | Goto _ -> () + | Break(l) -> begin try + s.skind <- Goto(break_dest (),l) + with e -> + ignore (error "prepareCFG: break: %a@!" d_stmt s) ; + raise e + end + | Continue(l) -> begin try + s.skind <- Goto(cont_dest (),l) + with e -> + ignore (error "prepareCFG: continue: %a@!" d_stmt s) ; + raise e + end + | If(e,b1,b2,l) -> xform_switch_block b1 break_dest cont_dest label_index ; + xform_switch_block b2 break_dest cont_dest label_index + | Switch(e,b,sl,l) -> begin + (* change + * switch (se) { + * case 0: s0 ; + * case 1: s1 ; break; + * ... + * } + * + * into: + * + * if (se == 0) goto label_0; + * else if (se == 1) goto label_1; + * ... + * else if (0) { // body_block + * label_0: s0; + * label_1: s1; goto label_break; + * ... + * } else if (0) { // break_block + * label_break: ; // break_stmt + * } + *) + let i = get_switch_count () in + let break_stmt = mkStmt (Instr []) in + break_stmt.labels <- + [Label((Printf.sprintf "switch_%d_break" i),l,false)] ; + let break_block = mkBlock [ break_stmt ] in + let body_block = b in + let body_if_stmtkind = (If(zero,body_block,break_block,l)) in + + (* The default case, if present, must be used only if *all* + non-default cases fail [ISO/IEC 9899:1999, §6.8.4.2, ¶5]. As a + result, we sort the order in which we handle the labels (but not the + order in which we print out the statements, so fall-through still + works as expected). *) + let compare_choices s1 s2 = match s1.labels, s2.labels with + | (Default(_) :: _), _ -> 1 + | _, (Default(_) :: _) -> -1 + | _, _ -> 0 + in + + let rec handle_choices sl = match sl with + [] -> body_if_stmtkind + | stmt_hd :: stmt_tl -> begin + let rec handle_labels lab_list = begin + match lab_list with + [] -> handle_choices stmt_tl + | Case(ce,cl) :: lab_tl -> + let pred = BinOp(Eq,e,ce,intType) in + let then_block = mkBlock [ mkStmt (Goto(ref stmt_hd,cl)) ] in + let else_block = mkBlock [ mkStmt (handle_labels lab_tl) ] in + If(pred,then_block,else_block,cl) + | Default(dl) :: lab_tl -> + (* ww: before this was 'if (1) goto label', but as Ben points + out this might confuse someone down the line who doesn't have + special handling for if(1) into thinking that there are two + paths here. The simpler 'goto label' is what we want. *) + Block(mkBlock [ mkStmt (Goto(ref stmt_hd,dl)) ; + mkStmt (handle_labels lab_tl) ]) + | Label(_,_,_) :: lab_tl -> handle_labels lab_tl + end in + handle_labels stmt_hd.labels + end in + s.skind <- handle_choices (List.sort compare_choices sl) ; + xform_switch_block b (fun () -> ref break_stmt) cont_dest i + end +(* + | Loop(b,l,_,_) -> + let i = get_switch_count () in + let break_stmt = mkStmt (Instr []) in + break_stmt.labels <- + [Label((Printf.sprintf "while_%d_break" i),l,false)] ; + let cont_stmt = mkStmt (Instr []) in + cont_stmt.labels <- + [Label((Printf.sprintf "while_%d_continue" i),l,false)] ; + b.bstmts <- cont_stmt :: b.bstmts ; + let this_stmt = mkStmt + (Loop(b,l,Some(cont_stmt),Some(break_stmt))) in + let break_dest () = ref break_stmt in + let cont_dest () = ref cont_stmt in + xform_switch_block b break_dest cont_dest label_index ; + break_stmt.succs <- s.succs ; + let new_block = mkBlock [ this_stmt ; break_stmt ] in + s.skind <- Block new_block +*) + | While (e, b, l) -> + let i = get_switch_count () in + let break_stmt = mkStmt (Instr []) in + break_stmt.labels <- + [Label((Printf.sprintf "while_%d_break" i),l,false)] ; + let cont_stmt = mkStmt (Instr []) in + cont_stmt.labels <- + [Label((Printf.sprintf "while_%d_continue" i),l,false)] ; + b.bstmts <- cont_stmt :: b.bstmts ; + let this_stmt = mkStmt + (While(e,b,l)) in + let break_dest () = ref break_stmt in + let cont_dest () = ref cont_stmt in + xform_switch_block b break_dest cont_dest label_index ; + break_stmt.succs <- s.succs ; + let new_block = mkBlock [ this_stmt ; break_stmt ] in + s.skind <- Block new_block + + | DoWhile (e, b, l) -> + let i = get_switch_count () in + let break_stmt = mkStmt (Instr []) in + break_stmt.labels <- + [Label((Printf.sprintf "while_%d_break" i),l,false)] ; + let cont_stmt = mkStmt (Instr []) in + cont_stmt.labels <- + [Label((Printf.sprintf "while_%d_continue" i),l,false)] ; + b.bstmts <- cont_stmt :: b.bstmts ; + let this_stmt = mkStmt + (DoWhile(e,b,l)) in + let break_dest () = ref break_stmt in + let cont_dest () = ref cont_stmt in + xform_switch_block b break_dest cont_dest label_index ; + break_stmt.succs <- s.succs ; + let new_block = mkBlock [ this_stmt ; break_stmt ] in + s.skind <- Block new_block + + | For (bInit, e, bIter , b, l) -> + let i = get_switch_count () in + let break_stmt = mkStmt (Instr []) in + break_stmt.labels <- + [Label((Printf.sprintf "while_%d_break" i),l,false)] ; + let cont_stmt = mkStmt (Instr []) in + cont_stmt.labels <- + [Label((Printf.sprintf "while_%d_continue" i),l,false)] ; + b.bstmts <- cont_stmt :: b.bstmts ; + let this_stmt = mkStmt + (For(bInit,e,bIter,b,l)) in + let break_dest () = ref break_stmt in + let cont_dest () = ref cont_stmt in + xform_switch_block b break_dest cont_dest label_index ; + break_stmt.succs <- s.succs ; + let new_block = mkBlock [ this_stmt ; break_stmt ] in + s.skind <- Block new_block + + + | Block(b) -> xform_switch_block b break_dest cont_dest label_index + + | TryExcept _ | TryFinally _ -> + failwith "xform_switch_statement: structured exception handling not implemented" + +end and xform_switch_block b break_dest cont_dest label_index = + try + let rec link_succs sl = match sl with + | [] -> () + | hd :: tl -> (if hd.succs = [] then hd.succs <- tl) ; link_succs tl + in + link_succs b.bstmts ; + List.iter (fun stmt -> + xform_switch_stmt stmt break_dest cont_dest label_index) b.bstmts ; + with e -> + List.iter (fun stmt -> ignore + (warn "prepareCFG: %a@!" d_stmt stmt)) b.bstmts ; + raise e + +(* prepare a function for computeCFGInfo by removing break, continue, + * default and switch statements/labels and replacing them with Ifs and + * Gotos. *) +let prepareCFG (fd : fundec) : unit = + xform_switch_block fd.sbody + (fun () -> failwith "prepareCFG: break with no enclosing loop") + (fun () -> failwith "prepareCFG: continue with no enclosing loop") (-1) + +(* make the cfg and return a list of statements *) +let computeCFGInfo (f : fundec) (global_numbering : bool) : unit = + if not global_numbering then + sid_counter := 0 ; + statements := []; + let clear_it = new clear in + ignore (visitCilBlock clear_it f.sbody) ; + f.smaxstmtid <- Some (!sid_counter) ; + succpred_block f.sbody (None); + let res = List.rev !statements in + statements := []; + f.sallstmts <- res; + () + +let initCIL () = + if not !initCIL_called then begin + (* Set the machine *) + theMachine := if !msvcMode then M.msvc else M.gcc; + (* Pick type for string literals *) + stringLiteralType := if !theMachine.M.const_string_literals then + charConstPtrType + else + charPtrType; + (* Find the right ikind given the size *) + let findIkind (unsigned: bool) (sz: int) : ikind = + (* Test the most common sizes first *) + if sz = !theMachine.M.sizeof_int then + if unsigned then IUInt else IInt + else if sz = !theMachine.M.sizeof_long then + if unsigned then IULong else ILong + else if sz = 1 then + if unsigned then IUChar else IChar + else if sz = !theMachine.M.sizeof_short then + if unsigned then IUShort else IShort + else if sz = !theMachine.M.sizeof_longlong then + if unsigned then IULongLong else ILongLong + else + E.s(E.unimp "initCIL: cannot find the right ikind for size %d\n" sz) + in + upointType := TInt(findIkind true !theMachine.M.sizeof_ptr, []); + kindOfSizeOf := findIkind true !theMachine.M.sizeof_sizeof; + typeOfSizeOf := TInt(!kindOfSizeOf, []); + H.add gccBuiltins "__builtin_memset" + (voidPtrType, [ voidPtrType; intType; intType ], false); + wcharKind := findIkind false !theMachine.M.sizeof_wchar; + wcharType := TInt(!wcharKind, []); + char_is_unsigned := !theMachine.M.char_is_unsigned; + little_endian := !theMachine.M.little_endian; + underscore_name := !theMachine.M.underscore_name; + nextGlobalVID := 1; + nextCompinfoKey := 1; + initCIL_called := true + end + + +(* We want to bring all type declarations before the data declarations. This + * is needed for code of the following form: + + int f(); // Prototype without arguments + typedef int FOO; + int f(FOO x) { ... } + + In CIL the prototype also lists the type of the argument as being FOO, + which is undefined. + + There is one catch with this scheme. If the type contains an array whose + length refers to variables then those variables must be declared before + the type *) + +let pullTypesForward = true + + + (* Scan a type and collect the variables that are refered *) +class getVarsInGlobalClass (pacc: varinfo list ref) = object + inherit nopCilVisitor + method vvrbl (vi: varinfo) = + pacc := vi :: !pacc; + SkipChildren + + method vglob = function + GType _ | GCompTag _ -> DoChildren + | _ -> SkipChildren + +end + +let getVarsInGlobal (g : global) : varinfo list = + let pacc : varinfo list ref = ref [] in + let v : cilVisitor = new getVarsInGlobalClass pacc in + ignore (visitCilGlobal v g); + !pacc + +let hasPrefix p s = + let pl = String.length p in + (String.length s >= pl) && String.sub s 0 pl = p + +let pushGlobal (g: global) + ~(types:global list ref) + ~(variables: global list ref) = + if not pullTypesForward then + variables := g :: !variables + else + begin + (* Collect a list of variables that are refered from the type. Return + * Some if the global should go with the types and None if it should go + * to the variables. *) + let varsintype : (varinfo list * location) option = + match g with + GType (_, l) | GCompTag (_, l) -> Some (getVarsInGlobal g, l) + | GEnumTag (_, l) | GPragma (Attr("pack", _), l) + | GCompTagDecl (_, l) | GEnumTagDecl (_, l) -> Some ([], l) + (** Move the warning pragmas early + | GPragma(Attr(s, _), l) when hasPrefix "warning" s -> Some ([], l) + *) + | _ -> None (* Does not go with the types *) + in + match varsintype with + None -> variables := g :: !variables + | Some (vl, loc) -> + types := + (* insert declarations for referred variables ('vl'), before + * the type definition 'g' itself *) + g :: (List.fold_left (fun acc v -> GVarDecl(v, loc) :: acc) + !types vl) + end + + +type formatArg = + Fe of exp + | Feo of exp option (** For array lengths *) + | Fu of unop + | Fb of binop + | Fk of ikind + | FE of exp list (** For arguments in a function call *) + | Ff of (string * typ * attributes) (** For a formal argument *) + | FF of (string * typ * attributes) list (* For formal argument lists *) + | Fva of bool (** For the ellipsis in a function type *) + | Fv of varinfo + | Fl of lval + | Flo of lval option (** For the result of a function call *) + | Fo of offset + | Fc of compinfo + | Fi of instr + | FI of instr list + | Ft of typ + | Fd of int + | Fg of string + | Fs of stmt + | FS of stmt list + | FA of attributes + + | Fp of attrparam + | FP of attrparam list + + | FX of string + +let d_formatarg () = function + Fe e -> dprintf "Fe(%a)" d_exp e + | Feo None -> dprintf "Feo(None)" + | Feo (Some e) -> dprintf "Feo(%a)" d_exp e + | FE _ -> dprintf "FE()" + | Fk ik -> dprintf "Fk()" + | Fva b -> dprintf "Fva(%b)" b + | Ff (an, _, _) -> dprintf "Ff(%s)" an + | FF _ -> dprintf "FF(...)" + | FA _ -> dprintf "FA(...)" + | Fu uo -> dprintf "Fu()" + | Fb bo -> dprintf "Fb()" + | Fv v -> dprintf "Fv(%s)" v.vname + | Fl l -> dprintf "Fl(%a)" d_lval l + | Flo None -> dprintf "Flo(None)" + | Flo (Some l) -> dprintf "Flo(%a)" d_lval l + | Fo o -> dprintf "Fo" + | Fc ci -> dprintf "Fc(%s)" ci.cname + | Fi i -> dprintf "Fi(...)" + | FI i -> dprintf "FI(...)" + | Ft t -> dprintf "Ft(%a)" d_type t + | Fd n -> dprintf "Fd(%d)" n + | Fg s -> dprintf "Fg(%s)" s + | Fp _ -> dprintf "Fp(...)" + | FP n -> dprintf "FP(...)" + | Fs _ -> dprintf "FS" + | FS _ -> dprintf "FS" + + | FX _ -> dprintf "FX()" + + diff --git a/cil/src/cil.mli b/cil/src/cil.mli new file mode 100644 index 00000000..31c4e65c --- /dev/null +++ b/cil/src/cil.mli @@ -0,0 +1,2455 @@ +(* MODIF: Loop constructor replaced by 3 constructors: While, DoWhile, For. *) + +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* + * CIL: An intermediate language for analyzing C programs. + * + * George Necula + * + *) + +(** CIL API Documentation. An html version of this document can be found at + * http://manju.cs.berkeley.edu/cil. *) + +(** Call this function to perform some initialization. Call if after you have + * set {!Cil.msvcMode}. *) +val initCIL: unit -> unit + + +(** This are the CIL version numbers. A CIL version is a number of the form + * M.m.r (major, minor and release) *) +val cilVersion: string +val cilVersionMajor: int +val cilVersionMinor: int +val cilVersionRevision: int + +(** This module defines the abstract syntax of CIL. It also provides utility + * functions for traversing the CIL data structures, and pretty-printing + * them. The parser for both the GCC and MSVC front-ends can be invoked as + * [Frontc.parse: string -> unit ->] {!Cil.file}. This function must be given + * the name of a preprocessed C file and will return the top-level data + * structure that describes a whole source file. By default the parsing and + * elaboration into CIL is done as for GCC source. If you want to use MSVC + * source you must set the {!Cil.msvcMode} to [true] and must also invoke the + * function [Frontc.setMSVCMode: unit -> unit]. *) + + +(** {b The Abstract Syntax of CIL} *) + + +(** The top-level representation of a CIL source file (and the result of the + * parsing and elaboration). Its main contents is the list of global + * declarations and definitions. You can iterate over the globals in a + * {!Cil.file} using the following iterators: {!Cil.mapGlobals}, + * {!Cil.iterGlobals} and {!Cil.foldGlobals}. You can also use the + * {!Cil.dummyFile} when you need a {!Cil.file} as a placeholder. For each + * global item CIL stores the source location where it appears (using the + * type {!Cil.location}) *) + +type file = + { mutable fileName: string; (** The complete file name *) + mutable globals: global list; (** List of globals as they will appear + in the printed file *) + mutable globinit: fundec option; + (** An optional global initializer function. This is a function where + * you can put stuff that must be executed before the program is + * started. This function, is conceptually at the end of the file, + * although it is not part of the globals list. Use {!Cil.getGlobInit} + * to create/get one. *) + mutable globinitcalled: bool; + (** Whether the global initialization function is called in main. This + * should always be false if there is no global initializer. When you + * create a global initialization CIL will try to insert code in main + * to call it. This will not happen if your file does not contain a + * function called "main" *) + } +(** Top-level representation of a C source file *) + +and comment = location * string + +(** {b Globals}. The main type for representing global declarations and + * definitions. A list of these form a CIL file. The order of globals in the + * file is generally important. *) + +(** A global declaration or definition *) +and global = + | GType of typeinfo * location + (** A typedef. All uses of type names (through the [TNamed] constructor) + must be preceded in the file by a definition of the name. The string + is the defined name and always not-empty. *) + + | GCompTag of compinfo * location + (** Defines a struct/union tag with some fields. There must be one of + these for each struct/union tag that you use (through the [TComp] + constructor) since this is the only context in which the fields are + printed. Consequently nested structure tag definitions must be + broken into individual definitions with the innermost structure + defined first. *) + + | GCompTagDecl of compinfo * location + (** Declares a struct/union tag. Use as a forward declaration. This is + * printed without the fields. *) + + | GEnumTag of enuminfo * location + (** Declares an enumeration tag with some fields. There must be one of + these for each enumeration tag that you use (through the [TEnum] + constructor) since this is the only context in which the items are + printed. *) + + | GEnumTagDecl of enuminfo * location + (** Declares an enumeration tag. Use as a forward declaration. This is + * printed without the items. *) + + | GVarDecl of varinfo * location + (** A variable declaration (not a definition). If the variable has a + function type then this is a prototype. There can be several + declarations and at most one definition for a given variable. If both + forms appear then they must share the same varinfo structure. A + prototype shares the varinfo with the fundec of the definition. Either + has storage Extern or there must be a definition in this file *) + + | GVar of varinfo * initinfo * location + (** A variable definition. Can have an initializer. The initializer is + * updateable so that you can change it without requiring to recreate + * the list of globals. There can be at most one definition for a + * variable in an entire program. Cannot have storage Extern or function + * type. *) + + | GFun of fundec * location + (** A function definition. *) + + | GAsm of string * location (** Global asm statement. These ones + can contain only a template *) + | GPragma of attribute * location (** Pragmas at top level. Use the same + syntax as attributes *) + | GText of string (** Some text (printed verbatim) at + top level. E.g., this way you can + put comments in the output. *) + +(** {b Types}. A C type is represented in CIL using the type {!Cil.typ}. + * Among types we differentiate the integral types (with different kinds + * denoting the sign and precision), floating point types, enumeration types, + * array and pointer types, and function types. Every type is associated with + * a list of attributes, which are always kept in sorted order. Use + * {!Cil.addAttribute} and {!Cil.addAttributes} to construct list of + * attributes. If you want to inspect a type, you should use + * {!Cil.unrollType} or {!Cil.unrollTypeDeep} to see through the uses of + * named types. *) +(** CIL is configured at build-time with the sizes and alignments of the + * underlying compiler (GCC or MSVC). CIL contains functions that can compute + * the size of a type (in bits) {!Cil.bitsSizeOf}, the alignment of a type + * (in bytes) {!Cil.alignOf_int}, and can convert an offset into a start and + * width (both in bits) using the function {!Cil.bitsOffset}. At the moment + * these functions do not take into account the [packed] attributes and + * pragmas. *) + +and typ = + TVoid of attributes (** Void type. Also predefined as {!Cil.voidType} *) + | TInt of ikind * attributes + (** An integer type. The kind specifies the sign and width. Several + * useful variants are predefined as {!Cil.intType}, {!Cil.uintType}, + * {!Cil.longType}, {!Cil.charType}. *) + + + | TFloat of fkind * attributes + (** A floating-point type. The kind specifies the precision. You can + * also use the predefined constant {!Cil.doubleType}. *) + + | TPtr of typ * attributes + (** Pointer type. Several useful variants are predefined as + * {!Cil.charPtrType}, {!Cil.charConstPtrType} (pointer to a + * constant character), {!Cil.voidPtrType}, + * {!Cil.intPtrType} *) + + | TArray of typ * exp option * attributes + (** Array type. It indicates the base type and the array length. *) + + | TFun of typ * (string * typ * attributes) list option * bool * attributes + (** Function type. Indicates the type of the result, the name, type + * and name attributes of the formal arguments ([None] if no + * arguments were specified, as in a function whose definition or + * prototype we have not seen; [Some \[\]] means void). Use + * {!Cil.argsToList} to obtain a list of arguments. The boolean + * indicates if it is a variable-argument function. If this is the + * type of a varinfo for which we have a function declaration then + * the information for the formals must match that in the + * function's sformals. Use {!Cil.setFormals}, or + * {!Cil.setFunctionType}, or {!Cil.makeFormalVar} for this + * purpose. *) + + | TNamed of typeinfo * attributes + (* The use of a named type. Each such type name must be preceded + * in the file by a [GType] global. This is printed as just the + * type name. The actual referred type is not printed here and is + * carried only to simplify processing. To see through a sequence + * of named type references, use {!Cil.unrollType} or + * {!Cil.unrollTypeDeep}. The attributes are in addition to those + * given when the type name was defined. *) + + | TComp of compinfo * attributes +(** The most delicate issue for C types is that recursion that is possible by + * using structures and pointers. To address this issue we have a more + * complex representation for structured types (struct and union). Each such + * type is represented using the {!Cil.compinfo} type. For each composite + * type the {!Cil.compinfo} structure must be declared at top level using + * [GCompTag] and all references to it must share the same copy of the + * structure. The attributes given are those pertaining to this use of the + * type and are in addition to the attributes that were given at the + * definition of the type and which are stored in the {!Cil.compinfo}. *) + + | TEnum of enuminfo * attributes + (** A reference to an enumeration type. All such references must + share the enuminfo among them and with a [GEnumTag] global that + precedes all uses. The attributes refer to this use of the + enumeration and are in addition to the attributes of the + enumeration itself, which are stored inside the enuminfo *) + + + | TBuiltin_va_list of attributes + (** This is the same as the gcc's type with the same name *) + +(** + There are a number of functions for querying the kind of a type. These are + {!Cil.isIntegralType}, + {!Cil.isArithmeticType}, + {!Cil.isPointerType}, + {!Cil.isFunctionType}, + {!Cil.isArrayType}. + + There are two easy ways to scan a type. First, you can use the +{!Cil.existsType} to return a boolean answer about a type. This function +is controlled by a user-provided function that is queried for each type that is +used to construct the current type. The function can specify whether to +terminate the scan with a boolean result or to continue the scan for the +nested types. + + The other method for scanning types is provided by the visitor interface (see + {!Cil.cilVisitor}). + + If you want to compare types (or to use them as hash-values) then you should +use instead type signatures (represented as {!Cil.typsig}). These +contain the same information as types but canonicalized such that simple Ocaml +structural equality will tell whether two types are equal. Use +{!Cil.typeSig} to compute the signature of a type. If you want to ignore +certain type attributes then use {!Cil.typeSigWithAttrs}. + +*) + + +(** Various kinds of integers *) +and ikind = + IChar (** [char] *) + | ISChar (** [signed char] *) + | IUChar (** [unsigned char] *) + | IInt (** [int] *) + | IUInt (** [unsigned int] *) + | IShort (** [short] *) + | IUShort (** [unsigned short] *) + | ILong (** [long] *) + | IULong (** [unsigned long] *) + | ILongLong (** [long long] (or [_int64] on Microsoft Visual C) *) + | IULongLong (** [unsigned long long] (or [unsigned _int64] on Microsoft + Visual C) *) + +(** Various kinds of floating-point numbers*) +and fkind = + FFloat (** [float] *) + | FDouble (** [double] *) + | FLongDouble (** [long double] *) + + +(** {b Attributes.} *) + +and attribute = Attr of string * attrparam list +(** An attribute has a name and some optional parameters. The name should not + * start or end with underscore. When CIL parses attribute names it will + * strip leading and ending underscores (to ensure that the multitude of GCC + * attributes such as const, __const and __const__ all mean the same thing.) *) + +(** Attributes are lists sorted by the attribute name. Use the functions + * {!Cil.addAttribute} and {!Cil.addAttributes} to insert attributes in an + * attribute list and maintain the sortedness. *) +and attributes = attribute list + +(** The type of parameters of attributes *) +and attrparam = + | AInt of int (** An integer constant *) + | AStr of string (** A string constant *) + | ACons of string * attrparam list (** Constructed attributes. These + are printed [foo(a1,a2,...,an)]. + The list of parameters can be + empty and in that case the + parentheses are not printed. *) + | ASizeOf of typ (** A way to talk about types *) + | ASizeOfE of attrparam + | ASizeOfS of typsig (** Replacement for ASizeOf in type + signatures. Only used for + attributes inside typsigs.*) + | AAlignOf of typ + | AAlignOfE of attrparam + | AAlignOfS of typsig + | AUnOp of unop * attrparam + | ABinOp of binop * attrparam * attrparam + | ADot of attrparam * string (** a.foo **) + +(** {b Structures.} The {!Cil.compinfo} describes the definition of a + * structure or union type. Each such {!Cil.compinfo} must be defined at the + * top-level using the [GCompTag] constructor and must be shared by all + * references to this type (using either the [TComp] type constructor or from + * the definition of the fields. + + If all you need is to scan the definition of each + * composite type once, you can do that by scanning all top-level [GCompTag]. + + * Constructing a {!Cil.compinfo} can be tricky since it must contain fields + * that might refer to the host {!Cil.compinfo} and furthermore the type of + * the field might need to refer to the {!Cil.compinfo} for recursive types. + * Use the {!Cil.mkCompInfo} function to create a {!Cil.compinfo}. You can + * easily fetch the {!Cil.fieldinfo} for a given field in a structure with + * {!Cil.getCompField}. *) + +(** The definition of a structure or union type. Use {!Cil.mkCompInfo} to + * make one and use {!Cil.copyCompInfo} to copy one (this ensures that a new + * key is assigned and that the fields have the right pointers to parents.). *) +and compinfo = { + mutable cstruct: bool; + (** True if struct, False if union *) + mutable cname: string; + (** The name. Always non-empty. Use {!Cil.compFullName} to get the full + * name of a comp (along with the struct or union) *) + mutable ckey: int; + (** A unique integer. This is assigned by {!Cil.mkCompInfo} using a + * global variable in the Cil module. Thus two identical structs in two + * different files might have different keys. Use {!Cil.copyCompInfo} to + * copy structures so that a new key is assigned. *) + mutable cfields: fieldinfo list; + (** Information about the fields. Notice that each fieldinfo has a + * pointer back to the host compinfo. This means that you should not + * share fieldinfo's between two compinfo's *) + mutable cattr: attributes; + (** The attributes that are defined at the same time as the composite + * type. These attributes can be supplemented individually at each + * reference to this [compinfo] using the [TComp] type constructor. *) + mutable cdefined: bool; + (** This boolean flag can be used to distinguish between structures + that have not been defined and those that have been defined but have + no fields (such things are allowed in gcc). *) + mutable creferenced: bool; + (** True if used. Initially set to false. *) + } + +(** {b Structure fields.} The {!Cil.fieldinfo} structure is used to describe + * a structure or union field. Fields, just like variables, can have + * attributes associated with the field itself or associated with the type of + * the field (stored along with the type of the field). *) + +(** Information about a struct/union field *) +and fieldinfo = { + mutable fcomp: compinfo; + (** The host structure that contains this field. There can be only one + * [compinfo] that contains the field. *) + mutable fname: string; + (** The name of the field. Might be the value of {!Cil.missingFieldName} + * in which case it must be a bitfield and is not printed and it does not + * participate in initialization *) + mutable ftype: typ; + (** The type *) + mutable fbitfield: int option; + (** If a bitfield then ftype should be an integer type and the width of + * the bitfield must be 0 or a positive integer smaller or equal to the + * width of the integer type. A field of width 0 is used in C to control + * the alignment of fields. *) + mutable fattr: attributes; + (** The attributes for this field (not for its type) *) + mutable floc: location; + (** The location where this field is defined *) +} + + + +(** {b Enumerations.} Information about an enumeration. This is shared by all + * references to an enumeration. Make sure you have a [GEnumTag] for each of + * of these. *) + +(** Information about an enumeration *) +and enuminfo = { + mutable ename: string; + (** The name. Always non-empty. *) + mutable eitems: (string * exp * location) list; + (** Items with names and values. This list should be non-empty. The item + * values must be compile-time constants. *) + mutable eattr: attributes; + (** The attributes that are defined at the same time as the enumeration + * type. These attributes can be supplemented individually at each + * reference to this [enuminfo] using the [TEnum] type constructor. *) + mutable ereferenced: bool; + (** True if used. Initially set to false*) +} + +(** {b Enumerations.} Information about an enumeration. This is shared by all + * references to an enumeration. Make sure you have a [GEnumTag] for each of + * of these. *) + +(** Information about a defined type *) +and typeinfo = { + mutable tname: string; + (** The name. Can be empty only in a [GType] when introducing a composite + * or enumeration tag. If empty cannot be referred to from the file *) + mutable ttype: typ; + (** The actual type. This includes the attributes that were present in + * the typedef *) + mutable treferenced: bool; + (** True if used. Initially set to false*) +} + +(** {b Variables.} + Each local or global variable is represented by a unique {!Cil.varinfo} +structure. A global {!Cil.varinfo} can be introduced with the [GVarDecl] or +[GVar] or [GFun] globals. A local varinfo can be introduced as part of a +function definition {!Cil.fundec}. + + All references to a given global or local variable must refer to the same +copy of the [varinfo]. Each [varinfo] has a globally unique identifier that +can be used to index maps and hashtables (the name can also be used for this +purpose, except for locals from different functions). This identifier is +constructor using a global counter. + + It is very important that you construct [varinfo] structures using only one + of the following functions: +- {!Cil.makeGlobalVar} : to make a global variable +- {!Cil.makeTempVar} : to make a temporary local variable whose name +will be generated so that to avoid conflict with other locals. +- {!Cil.makeLocalVar} : like {!Cil.makeTempVar} but you can specify the +exact name to be used. +- {!Cil.copyVarinfo}: make a shallow copy of a varinfo assigning a new name +and a new unique identifier + + A [varinfo] is also used in a function type to denote the list of formals. + +*) + +(** Information about a variable. *) +and varinfo = { + mutable vname: string; + (** The name of the variable. Cannot be empty. It is primarily your + * responsibility to ensure the uniqueness of a variable name. For local + * variables {!Cil.makeTempVar} helps you ensure that the name is unique. + *) + + mutable vtype: typ; + (** The declared type of the variable. *) + + mutable vattr: attributes; + (** A list of attributes associated with the variable.*) + mutable vstorage: storage; + (** The storage-class *) + + mutable vglob: bool; + (** True if this is a global variable*) + + mutable vinline: bool; + (** Whether this varinfo is for an inline function. *) + + mutable vdecl: location; + (** Location of variable declaration. *) + + mutable vid: int; + (** A unique integer identifier. This field will be + * set for you if you use one of the {!Cil.makeFormalVar}, + * {!Cil.makeLocalVar}, {!Cil.makeTempVar}, {!Cil.makeGlobalVar}, or + * {!Cil.copyVarinfo}. *) + + mutable vaddrof: bool; + (** True if the address of this variable is taken. CIL will set these + * flags when it parses C, but you should make sure to set the flag + * whenever your transformation create [AddrOf] expression. *) + + mutable vreferenced: bool; + (** True if this variable is ever referenced. This is computed by + * [removeUnusedVars]. It is safe to just initialize this to False *) +} + +(** Storage-class information *) +and storage = + NoStorage (** The default storage. Nothing is printed *) + | Static + | Register + | Extern + + +(** {b Expressions.} The CIL expression language contains only the side-effect free expressions of +C. They are represented as the type {!Cil.exp}. There are several +interesting aspects of CIL expressions: + + Integer and floating point constants can carry their textual representation. +This way the integer 15 can be printed as 0xF if that is how it occurred in the +source. + + CIL uses 64 bits to represent the integer constants and also stores the width +of the integer type. Care must be taken to ensure that the constant is +representable with the given width. Use the functions {!Cil.kinteger}, +{!Cil.kinteger64} and {!Cil.integer} to construct constant +expressions. CIL predefines the constants {!Cil.zero}, +{!Cil.one} and {!Cil.mone} (for -1). + + Use the functions {!Cil.isConstant} and {!Cil.isInteger} to test if +an expression is a constant and a constant integer respectively. + + CIL keeps the type of all unary and binary expressions. You can think of that +type qualifying the operator. Furthermore there are different operators for +arithmetic and comparisons on arithmetic types and on pointers. + + Another unusual aspect of CIL is that the implicit conversion between an +expression of array type and one of pointer type is made explicit, using the +[StartOf] expression constructor (which is not printed). If you apply the +[AddrOf}]constructor to an lvalue of type [T] then you will be getting an +expression of type [TPtr(T)]. + + You can find the type of an expression with {!Cil.typeOf}. + + You can perform constant folding on expressions using the function +{!Cil.constFold}. +*) + +(** Expressions (Side-effect free)*) +and exp = + Const of constant (** Constant *) + | Lval of lval (** Lvalue *) + | SizeOf of typ + (** sizeof(). Has [unsigned int] type (ISO 6.5.3.4). This is not + * turned into a constant because some transformations might want to + * change types *) + + | SizeOfE of exp + (** sizeof() *) + + | SizeOfStr of string + (** sizeof(string_literal). We separate this case out because this is the + * only instance in which a string literal should not be treated as + * having type pointer to character. *) + + | AlignOf of typ + (** This corresponds to the GCC __alignof_. Has [unsigned int] type *) + | AlignOfE of exp + + + | UnOp of unop * exp * typ + (** Unary operation. Includes the type of the result. *) + + | BinOp of binop * exp * exp * typ + (** Binary operation. Includes the type of the result. The arithmetic + * conversions are made explicit for the arguments. *) + + | CastE of typ * exp + (** Use {!Cil.mkCast} to make casts. *) + + | AddrOf of lval + (** Always use {!Cil.mkAddrOf} to construct one of these. Apply to an + * lvalue of type [T] yields an expression of type [TPtr(T)] *) + + | StartOf of lval + (** Conversion from an array to a pointer to the beginning of the array. + * Given an lval of type [TArray(T)] produces an expression of type + * [TPtr(T)]. In C this operation is implicit, the [StartOf] operator is + * not printed. We have it in CIL because it makes the typing rules + * simpler. *) + +(** {b Constants.} *) + +(** Literal constants *) +and constant = + | CInt64 of int64 * ikind * string option + (** Integer constant. Give the ikind (see ISO9899 6.1.3.2) and the + * textual representation, if available. (This allows us to print a + * constant as, for example, 0xF instead of 15.) Use {!Cil.integer} or + * {!Cil.kinteger} to create these. Watch out for integers that cannot be + * represented on 64 bits. OCAML does not give Overflow exceptions. *) + | CStr of string + (* String constant. The escape characters inside the string have been + * already interpreted. This constant has pointer to character type! The + * only case when you would like a string literal to have an array type + * is when it is an argument to sizeof. In that case you should use + * SizeOfStr. *) + | CWStr of int64 list + (* Wide character string constant. Note that the local interpretation + * of such a literal depends on {!Cil.wcharType} and {!Cil.wcharKind}. + * Such a constant has type pointer to {!Cil.wcharType}. The + * escape characters in the string have not been "interpreted" in + * the sense that L"A\xabcd" remains "A\xabcd" rather than being + * represented as the wide character list with two elements: 65 and + * 43981. That "interpretation" depends on the underlying wide + * character type. *) + | CChr of char + (** Character constant. This has type int, so use charConstToInt + * to read the value in case sign-extension is needed. *) + | CReal of float * fkind * string option + (** Floating point constant. Give the fkind (see ISO 6.4.4.2) and also + * the textual representation, if available. *) + | CEnum of exp * string * enuminfo + (** An enumeration constant with the given value, name, from the given + * enuminfo. This is used only if {!Cil.lowerConstants} is true + * (default). Use {!Cil.constFoldVisitor} to replace these with integer + * constants. *) + +(** Unary operators *) +and unop = + Neg (** Unary minus *) + | BNot (** Bitwise complement (~) *) + | LNot (** Logical Not (!) *) + +(** Binary operations *) +and binop = + PlusA (** arithmetic + *) + | PlusPI (** pointer + integer *) + | IndexPI (** pointer + integer but only when + * it arises from an expression + * [e\[i\]] when [e] is a pointer and + * not an array. This is semantically + * the same as PlusPI but CCured uses + * this as a hint that the integer is + * probably positive. *) + | MinusA (** arithmetic - *) + | MinusPI (** pointer - integer *) + | MinusPP (** pointer - pointer *) + | Mult (** * *) + | Div (** / *) + | Mod (** % *) + | Shiftlt (** shift left *) + | Shiftrt (** shift right *) + + | Lt (** < (arithmetic comparison) *) + | Gt (** > (arithmetic comparison) *) + | Le (** <= (arithmetic comparison) *) + | Ge (** > (arithmetic comparison) *) + | Eq (** == (arithmetic comparison) *) + | Ne (** != (arithmetic comparison) *) + | BAnd (** bitwise and *) + | BXor (** exclusive-or *) + | BOr (** inclusive-or *) + + | LAnd (** logical and. Unlike other + * expressions this one does not + * always evaluate both operands. If + * you want to use these, you must + * set {!Cil.useLogicalOperators}. *) + | LOr (** logical or. Unlike other + * expressions this one does not + * always evaluate both operands. If + * you want to use these, you must + * set {!Cil.useLogicalOperators}. *) + +(** {b Lvalues.} Lvalues are the sublanguage of expressions that can appear at the left of an assignment or as operand to the address-of operator. +In C the syntax for lvalues is not always a good indication of the meaning +of the lvalue. For example the C value +{v +a[0][1][2] + v} + might involve 1, 2 or 3 memory reads when used in an expression context, +depending on the declared type of the variable [a]. If [a] has type [int +\[4\]\[4\]\[4\]] then we have one memory read from somewhere inside the area +that stores the array [a]. On the other hand if [a] has type [int ***] then +the expression really means [* ( * ( * (a + 0) + 1) + 2)], in which case it is +clear that it involves three separate memory operations. + +An lvalue denotes the contents of a range of memory addresses. This range +is denoted as a host object along with an offset within the object. The +host object can be of two kinds: a local or global variable, or an object +whose address is in a pointer expression. We distinguish the two cases so +that we can tell quickly whether we are accessing some component of a +variable directly or we are accessing a memory location through a pointer. +To make it easy to +tell what an lvalue means CIL represents lvalues as a host object and an +offset (see {!Cil.lval}). The host object (represented as +{!Cil.lhost}) can be a local or global variable or can be the object +pointed-to by a pointer expression. The offset (represented as +{!Cil.offset}) is a sequence of field or array index designators. + + Both the typing rules and the meaning of an lvalue is very precisely +specified in CIL. + + The following are a few useful function for operating on lvalues: +- {!Cil.mkMem} - makes an lvalue of [Mem] kind. Use this to ensure +that certain equivalent forms of lvalues are canonized. +For example, [*&x = x]. +- {!Cil.typeOfLval} - the type of an lvalue +- {!Cil.typeOffset} - the type of an offset, given the type of the +host. +- {!Cil.addOffset} and {!Cil.addOffsetLval} - extend sequences +of offsets. +- {!Cil.removeOffset} and {!Cil.removeOffsetLval} - shrink sequences +of offsets. + +The following equivalences hold {v +Mem(AddrOf(Mem a, aoff)), off = Mem a, aoff + off +Mem(AddrOf(Var v, aoff)), off = Var v, aoff + off +AddrOf (Mem a, NoOffset) = a + v} + +*) +(** An lvalue *) +and lval = + lhost * offset + +(** The host part of an {!Cil.lval}. *) +and lhost = + | Var of varinfo + (** The host is a variable. *) + + | Mem of exp + (** The host is an object of type [T] when the expression has pointer + * [TPtr(T)]. *) + + +(** The offset part of an {!Cil.lval}. Each offset can be applied to certain + * kinds of lvalues and its effect is that it advances the starting address + * of the lvalue and changes the denoted type, essentially focusing to some + * smaller lvalue that is contained in the original one. *) +and offset = + | NoOffset (** No offset. Can be applied to any lvalue and does + * not change either the starting address or the type. + * This is used when the lval consists of just a host + * or as a terminator in a list of other kinds of + * offsets. *) + + | Field of fieldinfo * offset + (** A field offset. Can be applied only to an lvalue + * that denotes a structure or a union that contains + * the mentioned field. This advances the offset to the + * beginning of the mentioned field and changes the + * type to the type of the mentioned field. *) + + | Index of exp * offset + (** An array index offset. Can be applied only to an + * lvalue that denotes an array. This advances the + * starting address of the lval to the beginning of the + * mentioned array element and changes the denoted type + * to be the type of the array element *) + + +(** {b Initializers.} +A special kind of expressions are those that can appear as initializers for +global variables (initialization of local variables is turned into +assignments). The initializers are represented as type {!Cil.init}. You +can create initializers with {!Cil.makeZeroInit} and you can conveniently +scan compound initializers them with {!Cil.foldLeftCompound} or with {!Cil.foldLeftCompoundAll}. +*) +(** Initializers for global variables. *) +and init = + | SingleInit of exp (** A single initializer *) + | CompoundInit of typ * (offset * init) list + (** Used only for initializers of structures, unions and arrays. The + * offsets are all of the form [Field(f, NoOffset)] or [Index(i, + * NoOffset)] and specify the field or the index being initialized. For + * structures all fields must have an initializer (except the unnamed + * bitfields), in the proper order. This is necessary since the offsets + * are not printed. For unions there must be exactly one initializer. If + * the initializer is not for the first field then a field designator is + * printed, so you better be on GCC since MSVC does not understand this. + * For arrays, however, we allow you to give only a prefix of the + * initializers. You can scan an initializer list with + * {!Cil.foldLeftCompound} or with {!Cil.foldLeftCompoundAll}. *) + + +(** We want to be able to update an initializer in a global variable, so we + * define it as a mutable field *) +and initinfo = { + mutable init : init option; + } + +(** {b Function definitions.} +A function definition is always introduced with a [GFun] constructor at the +top level. All the information about the function is stored into a +{!Cil.fundec}. Some of the information (e.g. its name, type, +storage, attributes) is stored as a {!Cil.varinfo} that is a field of the +[fundec]. To refer to the function from the expression language you must use +the [varinfo]. + + The function definition contains, in addition to the body, a list of all the +local variables and separately a list of the formals. Both kind of variables +can be referred to in the body of the function. The formals must also be shared +with the formals that appear in the function type. For that reason, to +manipulate formals you should use the provided functions +{!Cil.makeFormalVar} and {!Cil.setFormals} and {!Cil.makeFormalVar}. +*) +(** Function definitions. *) +and fundec = + { mutable svar: varinfo; + (** Holds the name and type as a variable, so we can refer to it + * easily from the program. All references to this function either + * in a function call or in a prototype must point to the same + * [varinfo]. *) + mutable sformals: varinfo list; + (** Formals. These must be in the same order and with the same + * information as the formal information in the type of the function. + * Use {!Cil.setFormals} or + * {!Cil.setFunctionType} or {!Cil.makeFormalVar} + * to set these formals and ensure that they + * are reflected in the function type. Do not make copies of these + * because the body refers to them. *) + mutable slocals: varinfo list; + (** Locals. Does NOT include the sformals. Do not make copies of + * these because the body refers to them. *) + mutable smaxid: int; (** Max local id. Starts at 0. Used for + * creating the names of new temporary + * variables. Updated by + * {!Cil.makeLocalVar} and + * {!Cil.makeTempVar}. You can also use + * {!Cil.setMaxId} to set it after you + * have added the formals and locals. *) + mutable sbody: block; (** The function body. *) + mutable smaxstmtid: int option; (** max id of a (reachable) statement + * in this function, if we have + * computed it. range = 0 ... + * (smaxstmtid-1). This is computed by + * {!Cil.computeCFGInfo}. *) + mutable sallstmts: stmt list; (** After you call {!Cil.computeCFGInfo} + * this field is set to contain all + * statements in the function *) + } + + +(** A block is a sequence of statements with the control falling through from + one element to the next *) +and block = + { mutable battrs: attributes; (** Attributes for the block *) + mutable bstmts: stmt list; (** The statements comprising the block*) + } + + +(** {b Statements}. +CIL statements are the structural elements that make the CFG. They are +represented using the type {!Cil.stmt}. Every +statement has a (possibly empty) list of labels. The +{!Cil.stmtkind} field of a statement indicates what kind of statement it +is. + + Use {!Cil.mkStmt} to make a statement and the fill-in the fields. + +CIL also comes with support for control-flow graphs. The [sid] field in +[stmt] can be used to give unique numbers to statements, and the [succs] +and [preds] fields can be used to maintain a list of successors and +predecessors for every statement. The CFG information is not computed by +default. Instead you must explicitly use the functions +{!Cil.prepareCFG} and {!Cil.computeCFGInfo} to do it. + +*) +(** Statements. *) +and stmt = { + mutable labels: label list; + (** Whether the statement starts with some labels, case statements or + * default statements. *) + + mutable skind: stmtkind; + (** The kind of statement *) + + mutable sid: int; + (** A number (>= 0) that is unique in a function. Filled in only after + * the CFG is computed. *) + mutable succs: stmt list; + (** The successor statements. They can always be computed from the skind + * and the context in which this statement appears. Filled in only after + * the CFG is computed. *) + mutable preds: stmt list; + (** The inverse of the succs function. *) + } + +(** Labels *) +and label = + Label of string * location * bool + (** A real label. If the bool is "true", the label is from the + * input source program. If the bool is "false", the label was + * created by CIL or some other transformation *) + | Case of exp * location (** A case statement. This expression + * is lowered into a constant if + * {!Cil.lowerConstants} is set to + * true. *) + | Default of location (** A default statement *) + + + +(** The various kinds of control-flow statements statements *) +and stmtkind = + | Instr of instr list + (** A group of instructions that do not contain control flow. Control + * implicitly falls through. *) + + | Return of exp option * location + (** The return statement. This is a leaf in the CFG. *) + + | Goto of stmt ref * location + (** A goto statement. Appears from actual goto's in the code or from + * goto's that have been inserted during elaboration. The reference + * points to the statement that is the target of the Goto. This means that + * you have to update the reference whenever you replace the target + * statement. The target statement MUST have at least a label. *) + + | Break of location + (** A break to the end of the nearest enclosing loop or Switch *) + + | Continue of location + (** A continue to the start of the nearest enclosing loop *) + | If of exp * block * block * location + (** A conditional. Two successors, the "then" and the "else" branches. + * Both branches fall-through to the successor of the If statement. *) + + | Switch of exp * block * (stmt list) * location + (** A switch statement. The statements that implement the cases can be + * reached through the provided list. For each such target you can find + * among its labels what cases it implements. The statements that + * implement the cases are somewhere within the provided [block]. *) + +(* + | Loop of block * location * (stmt option) * (stmt option) + (** A [while(1)] loop. The termination test is implemented in the body of + * a loop using a [Break] statement. If prepareCFG has been called, + * the first stmt option will point to the stmt containing the continue + * label for this loop and the second will point to the stmt containing + * the break label for this loop. *) +*) + + | While of exp * block * location + (** A [while] loop. *) + + | DoWhile of exp * block * location + (** A [do...while] loop. *) + + | For of block * exp * block * block * location + (** A [for] loop. *) + + | Block of block + (** Just a block of statements. Use it as a way to keep some block + * attributes local *) + + (** On MSVC we support structured exception handling. This is what you + * might expect. Control can get into the finally block either from the + * end of the body block, or if an exception is thrown. *) + | TryFinally of block * block * location + + (** On MSVC we support structured exception handling. The try/except + * statement is a bit tricky: + [__try { blk } + __except (e) { + handler + }] + + The argument to __except must be an expression. However, we keep a + list of instructions AND an expression in case you need to make + function calls. We'll print those as a comma expression. The control + can get to the __except expression only if an exception is thrown. + After that, depending on the value of the expression the control + goes to the handler, propagates the exception, or retries the + exception !!! + *) + | TryExcept of block * (instr list * exp) * block * location + + +(** {b Instructions}. + An instruction {!Cil.instr} is a statement that has no local +(intraprocedural) control flow. It can be either an assignment, +function call, or an inline assembly instruction. *) + +(** Instructions. *) +and instr = + Set of lval * exp * location + (** An assignment. The type of the expression is guaranteed to be the same + * with that of the lvalue *) + | Call of lval option * exp * exp list * location + (** A function call with the (optional) result placed in an lval. It is + * possible that the returned type of the function is not identical to + * that of the lvalue. In that case a cast is printed. The type of the + * actual arguments are identical to those of the declared formals. The + * number of arguments is the same as that of the declared formals, except + * for vararg functions. This construct is also used to encode a call to + * "__builtin_va_arg". In this case the second argument (which should be a + * type T) is encoded SizeOf(T) *) + + | Asm of attributes * (* Really only const and volatile can appear + * here *) + string list * (* templates (CR-separated) *) + (string * lval) list * (* outputs must be lvals with + * constraints. I would like these + * to be actually variables, but I + * run into some trouble with ASMs + * in the Linux sources *) + (string * exp) list * (* inputs with constraints *) + string list * (* register clobbers *) + location + (** There are for storing inline assembly. They follow the GCC + * specification: +{v + asm [volatile] ("...template..." "..template.." + : "c1" (o1), "c2" (o2), ..., "cN" (oN) + : "d1" (i1), "d2" (i2), ..., "dM" (iM) + : "r1", "r2", ..., "nL" ); + v} + +where the parts are + + - [volatile] (optional): when present, the assembler instruction + cannot be removed, moved, or otherwise optimized + - template: a sequence of strings, with %0, %1, %2, etc. in the string to + refer to the input and output expressions. I think they're numbered + consecutively, but the docs don't specify. Each string is printed on + a separate line. This is the only part that is present for MSVC inline + assembly. + - "ci" (oi): pairs of constraint-string and output-lval; the + constraint specifies that the register used must have some + property, like being a floating-point register; the constraint + string for outputs also has "=" to indicate it is written, or + "+" to indicate it is both read and written; 'oi' is the + name of a C lvalue (probably a variable name) to be used as + the output destination + - "dj" (ij): pairs of constraint and input expression; the constraint + is similar to the "ci"s. the 'ij' is an arbitrary C expression + to be loaded into the corresponding register + - "rk": registers to be regarded as "clobbered" by the instruction; + "memory" may be specified for arbitrary memory effects + +an example (from gcc manual): +{v + asm volatile ("movc3 %0,%1,%2" + : /* no outputs */ + : "g" (from), "g" (to), "g" (count) + : "r0", "r1", "r2", "r3", "r4", "r5"); + v} +*) + +(** Describes a location in a source file. *) +and location = { + line: int; (** The line number. -1 means "do not know" *) + file: string; (** The name of the source file*) + byte: int; (** The byte position in the source file *) +} + + +(** Type signatures. Two types are identical iff they have identical + * signatures. These contain the same information as types but canonicalized. + * For example, two function types that are identical except for the name of + * the formal arguments are given the same signature. Also, [TNamed] + * constructors are unrolled. *) +and typsig = + TSArray of typsig * int64 option * attribute list + | TSPtr of typsig * attribute list + | TSComp of bool * string * attribute list + | TSFun of typsig * typsig list * bool * attribute list + | TSEnum of string * attribute list + | TSBase of typ + + + +(** {b Lowering Options} *) + +val lowerConstants: bool ref + (** Do lower constants (default true) *) + +val insertImplicitCasts: bool ref + (** Do insert implicit casts (default true) *) + +(** To be able to add/remove features easily, each feature should be package + * as an interface with the following interface. These features should be *) +type featureDescr = { + fd_enabled: bool ref; + (** The enable flag. Set to default value *) + + fd_name: string; + (** This is used to construct an option "--doxxx" and "--dontxxx" that + * enable and disable the feature *) + + fd_description: string; + (* A longer name that can be used to document the new options *) + + fd_extraopt: (string * Arg.spec * string) list; + (** Additional command line options *) + + fd_doit: (file -> unit); + (** This performs the transformation *) + + fd_post_check: bool; + (* Whether to perform a CIL consistency checking after this stage, if + * checking is enabled (--check is passed to cilly). Set this to true if + * your feature makes any changes for the program. *) +} + +(** Comparison function for locations. + ** Compares first by filename, then line, then byte *) +val compareLoc: location -> location -> int + +(** {b Values for manipulating globals} *) + +(** Make an empty function *) +val emptyFunction: string -> fundec + +(** Update the formals of a [fundec] and make sure that the function type + has the same information. Will copy the name as well into the type. *) +val setFormals: fundec -> varinfo list -> unit + +(** Set the types of arguments and results as given by the function type + * passed as the second argument. Will not copy the names from the function + * type to the formals *) +val setFunctionType: fundec -> typ -> unit + + +(** Set the type of the function and make formal arguments for them *) +val setFunctionTypeMakeFormals: fundec -> typ -> unit + +(** Update the smaxid after you have populated with locals and formals + * (unless you constructed those using {!Cil.makeLocalVar} or + * {!Cil.makeTempVar}. *) +val setMaxId: fundec -> unit + +(** A dummy function declaration handy when you need one as a placeholder. It + * contains inside a dummy varinfo. *) +val dummyFunDec: fundec + +(** A dummy file *) +val dummyFile: file + +(** Write a {!Cil.file} in binary form to the filesystem. The file can be + * read back in later using {!Cil.loadBinaryFile}, possibly saving parsing + * time. The second argument is the name of the file that should be + * created. *) +val saveBinaryFile : file -> string -> unit + +(** Write a {!Cil.file} in binary form to the filesystem. The file can be + * read back in later using {!Cil.loadBinaryFile}, possibly saving parsing + * time. Does not close the channel. *) +val saveBinaryFileChannel : file -> out_channel -> unit + +(** Read a {!Cil.file} in binary form from the filesystem. The first + * argument is the name of a file previously created by + * {!Cil.saveBinaryFile}. *) +val loadBinaryFile : string -> file + +(** Get the global initializer and create one if it does not already exist. + * When it creates a global initializer it attempts to place a call to it in + * the main function named by the optional argument (default "main") *) +val getGlobInit: ?main_name:string -> file -> fundec + +(** Iterate over all globals, including the global initializer *) +val iterGlobals: file -> (global -> unit) -> unit + +(** Fold over all globals, including the global initializer *) +val foldGlobals: file -> ('a -> global -> 'a) -> 'a -> 'a + +(** Map over all globals, including the global initializer and change things + in place *) +val mapGlobals: file -> (global -> global) -> unit + +val new_sid : unit -> int + +(** Prepare a function for CFG information computation by + * {!Cil.computeCFGInfo}. This function converts all [Break], [Switch], + * [Default] and [Continue] {!Cil.stmtkind}s and {!Cil.label}s into [If]s + * and [Goto]s, giving the function body a very CFG-like character. This + * function modifies its argument in place. *) +val prepareCFG: fundec -> unit + +(** Compute the CFG information for all statements in a fundec and return a + * list of the statements. The input fundec cannot have [Break], [Switch], + * [Default], or [Continue] {!Cil.stmtkind}s or {!Cil.label}s. Use + * {!Cil.prepareCFG} to transform them away. The second argument should + * be [true] if you wish a global statement number, [false] if you wish a + * local (per-function) statement numbering. The list of statements is set + * in the sallstmts field of a fundec. + * + * NOTE: unless you want the simpler control-flow graph provided by + * prepareCFG, or you need the function's smaxstmtid and sallstmt fields + * filled in, we recommend you use {!Cfg.computeFileCFG} instead of this + * function to compute control-flow information. + * {!Cfg.computeFileCFG} is newer and will handle switch, break, and + * continue correctly.*) +val computeCFGInfo: fundec -> bool -> unit + + +(** Create a deep copy of a function. There should be no sharing between the + * copy and the original function *) +val copyFunction: fundec -> string -> fundec + + +(** CIL keeps the types at the beginning of the file and the variables at the + * end of the file. This function will take a global and add it to the + * corresponding stack. Its operation is actually more complicated because if + * the global declares a type that contains references to variables (e.g. in + * sizeof in an array length) then it will also add declarations for the + * variables to the types stack *) +val pushGlobal: global -> types: global list ref + -> variables: global list ref -> unit + +(** An empty statement. Used in pretty printing *) +val invalidStmt: stmt + +(** A list of the GCC built-in functions. Maps the name to the result and + * argument types, and whether it is vararg *) +val gccBuiltins: (string, typ * typ list * bool) Hashtbl.t + + +(** A list of the MSVC built-in functions. Maps the name to the result and + * argument types, and whether it is vararg *) +val msvcBuiltins: (string, typ * typ list * bool) Hashtbl.t + +(** {b Values for manipulating initializers} *) + + +(** Make a initializer for zero-ing a data type *) +val makeZeroInit: typ -> init + + +(** Fold over the list of initializers in a Compound. [doinit] is called on + * every present initializer, even if it is of compound type. In the case of + * arrays there might be missing zero-initializers at the end of the list. + * These are not scanned. This is much like [List.fold_left] except we also + * pass the type of the initializer *) +val foldLeftCompound: + doinit: (offset -> init -> typ -> 'a -> 'a) -> + ct: typ -> + initl: (offset * init) list -> + acc: 'a -> 'a + + +(** Fold over the list of initializers in a Compound, like + * {!Cil.foldLeftCompound} but in the case of an array it scans even missing + * zero initializers at the end of the array *) +val foldLeftCompoundAll: + doinit: (offset -> init -> typ -> 'a -> 'a) -> + ct: typ -> + initl: (offset * init) list -> + acc: 'a -> 'a + + + +(** {b Values for manipulating types} *) + +(** void *) +val voidType: typ + +(* is the given type "void"? *) +val isVoidType: typ -> bool + +(* is the given type "void *"? *) +val isVoidPtrType: typ -> bool + +(** int *) +val intType: typ + +(** unsigned int *) +val uintType: typ + +(** long *) +val longType: typ + +(** unsigned long *) +val ulongType: typ + +(** char *) +val charType: typ + +(** char * *) +val charPtrType: typ + +(** wchar_t (depends on architecture) and is set when you call + * {!Cil.initCIL}. *) +val wcharKind: ikind ref +val wcharType: typ ref + +(** char const * *) +val charConstPtrType: typ + +(** void * *) +val voidPtrType: typ + +(** int * *) +val intPtrType: typ + +(** unsigned int * *) +val uintPtrType: typ + +(** double *) +val doubleType: typ + +(* An unsigned integer type that fits pointers. Depends on {!Cil.msvcMode} + * and is set when you call {!Cil.initCIL}. *) +val upointType: typ ref + +(* An unsigned integer type that is the type of sizeof. Depends on + * {!Cil.msvcMode} and is set when you call {!Cil.initCIL}. *) +val typeOfSizeOf: typ ref + +(** Returns true if and only if the given integer type is signed. *) +val isSigned: ikind -> bool + + +(** Creates a a (potentially recursive) composite type. The arguments are: + * (1) a boolean indicating whether it is a struct or a union, (2) the name + * (always non-empty), (3) a function that when given a representation of the + * structure type constructs the type of the fields recursive type (the first + * argument is only useful when some fields need to refer to the type of the + * structure itself), and (4) a list of attributes to be associated with the + * composite type. The resulting compinfo has the field "cdefined" only if + * the list of fields is non-empty. *) +val mkCompInfo: bool -> (* whether it is a struct or a union *) + string -> (* name of the composite type; cannot be empty *) + (compinfo -> + (string * typ * int option * attributes * location) list) -> + (* a function that when given a forward + representation of the structure type constructs the type of + the fields. The function can ignore this argument if not + constructing a recursive type. *) + attributes -> compinfo + +(** Makes a shallow copy of a {!Cil.compinfo} changing the name and the key.*) +val copyCompInfo: compinfo -> string -> compinfo + +(** This is a constant used as the name of an unnamed bitfield. These fields + do not participate in initialization and their name is not printed. *) +val missingFieldName: string + +(** Get the full name of a comp *) +val compFullName: compinfo -> string + +(** Returns true if this is a complete type. + This means that sizeof(t) makes sense. + Incomplete types are not yet defined + structures and empty arrays. *) +val isCompleteType: typ -> bool + +(** Unroll a type until it exposes a non + * [TNamed]. Will collect all attributes appearing in [TNamed]!!! *) +val unrollType: typ -> typ + +(** Unroll all the TNamed in a type (even under type constructors such as + * [TPtr], [TFun] or [TArray]. Does not unroll the types of fields in [TComp] + * types. Will collect all attributes *) +val unrollTypeDeep: typ -> typ + +(** Separate out the storage-modifier name attributes *) +val separateStorageModifiers: attribute list -> attribute list * attribute list + +(** True if the argument is an integral type (i.e. integer or enum) *) +val isIntegralType: typ -> bool + +(** True if the argument is an arithmetic type (i.e. integer, enum or + floating point *) +val isArithmeticType: typ -> bool + +(**True if the argument is a pointer type *) +val isPointerType: typ -> bool + +(** True if the argument is a function type *) +val isFunctionType: typ -> bool + +(** Obtain the argument list ([] if None) *) +val argsToList: (string * typ * attributes) list option + -> (string * typ * attributes) list + +(** True if the argument is an array type *) +val isArrayType: typ -> bool + +(** Raised when {!Cil.lenOfArray} fails either because the length is [None] + * or because it is a non-constant expression *) +exception LenOfArray + +(** Call to compute the array length as present in the array type, to an + * integer. Raises {!Cil.LenOfArray} if not able to compute the length, such + * as when there is no length or the length is not a constant. *) +val lenOfArray: exp option -> int + +(** Return a named fieldinfo in compinfo, or raise Not_found *) +val getCompField: compinfo -> string -> fieldinfo + + +(** A datatype to be used in conjunction with [existsType] *) +type existsAction = + ExistsTrue (* We have found it *) + | ExistsFalse (* Stop processing this branch *) + | ExistsMaybe (* This node is not what we are + * looking for but maybe its + * successors are *) + +(** Scans a type by applying the function on all elements. + When the function returns ExistsTrue, the scan stops with + true. When the function returns ExistsFalse then the current branch is not + scanned anymore. Care is taken to + apply the function only once on each composite type, thus avoiding + circularity. When the function returns ExistsMaybe then the types that + construct the current type are scanned (e.g. the base type for TPtr and + TArray, the type of fields for a TComp, etc). *) +val existsType: (typ -> existsAction) -> typ -> bool + + +(** Given a function type split it into return type, + * arguments, is_vararg and attributes. An error is raised if the type is not + * a function type *) +val splitFunctionType: + typ -> typ * (string * typ * attributes) list option * bool * attributes +(** Same as {!Cil.splitFunctionType} but takes a varinfo. Prints a nicer + * error message if the varinfo is not for a function *) +val splitFunctionTypeVI: + varinfo -> typ * (string * typ * attributes) list option * bool * attributes + + +(** {b Type signatures} *) + +(** Type signatures. Two types are identical iff they have identical + * signatures. These contain the same information as types but canonicalized. + * For example, two function types that are identical except for the name of + * the formal arguments are given the same signature. Also, [TNamed] + * constructors are unrolled. You shoud use [Util.equals] to compare type + * signatures because they might still contain circular structures (through + * attributes, and sizeof) *) + +(** Print a type signature *) +val d_typsig: unit -> typsig -> Pretty.doc + +(** Compute a type signature *) +val typeSig: typ -> typsig + +(** Like {!Cil.typeSig} but customize the incorporation of attributes. + Use ~ignoreSign:true to convert all signed integer types to unsigned, + so that signed and unsigned will compare the same. *) +val typeSigWithAttrs: ?ignoreSign:bool -> (attributes -> attributes) -> typ -> typsig + +(** Replace the attributes of a signature (only at top level) *) +val setTypeSigAttrs: attributes -> typsig -> typsig + +(** Get the top-level attributes of a signature *) +val typeSigAttrs: typsig -> attributes + +(*********************************************************) +(** LVALUES *) + +(** Make a varinfo. Use this (rarely) to make a raw varinfo. Use other + * functions to make locals ({!Cil.makeLocalVar} or {!Cil.makeFormalVar} or + * {!Cil.makeTempVar}) and globals ({!Cil.makeGlobalVar}). Note that this + * function will assign a new identifier. The first argument specifies + * whether the varinfo is for a global. *) +val makeVarinfo: bool -> string -> typ -> varinfo + +(** Make a formal variable for a function. Insert it in both the sformals + and the type of the function. You can optionally specify where to insert + this one. If where = "^" then it is inserted first. If where = "$" then + it is inserted last. Otherwise where must be the name of a formal after + which to insert this. By default it is inserted at the end. *) +val makeFormalVar: fundec -> ?where:string -> string -> typ -> varinfo + +(** Make a local variable and add it to a function's slocals (only if insert = + true, which is the default). Make sure you know what you are doing if you + set insert=false. *) +val makeLocalVar: fundec -> ?insert:bool -> string -> typ -> varinfo + +(** Make a temporary variable and add it to a function's slocals. The name of + the temporary variable will be generated based on the given name hint so + that to avoid conflicts with other locals. *) +val makeTempVar: fundec -> ?name: string -> typ -> varinfo + + +(** Make a global variable. Your responsibility to make sure that the name + is unique *) +val makeGlobalVar: string -> typ -> varinfo + +(** Make a shallow copy of a [varinfo] and assign a new identifier *) +val copyVarinfo: varinfo -> string -> varinfo + + +(** Generate a new variable ID. This will be different than any variable ID + * that is generated by {!Cil.makeLocalVar} and friends *) +val newVID: unit -> int + +(** Add an offset at the end of an lvalue. Make sure the type of the lvalue + * and the offset are compatible. *) +val addOffsetLval: offset -> lval -> lval + +(** [addOffset o1 o2] adds [o1] to the end of [o2]. *) +val addOffset: offset -> offset -> offset + +(** Remove ONE offset from the end of an lvalue. Returns the lvalue with the + * trimmed offset and the final offset. If the final offset is [NoOffset] + * then the original [lval] did not have an offset. *) +val removeOffsetLval: lval -> lval * offset + +(** Remove ONE offset from the end of an offset sequence. Returns the + * trimmed offset and the final offset. If the final offset is [NoOffset] + * then the original [lval] did not have an offset. *) +val removeOffset: offset -> offset * offset + +(** Compute the type of an lvalue *) +val typeOfLval: lval -> typ + +(** Compute the type of an offset from a base type *) +val typeOffset: typ -> offset -> typ + + +(*******************************************************) +(** {b Values for manipulating expressions} *) + + +(* Construct integer constants *) + +(** 0 *) +val zero: exp + +(** 1 *) +val one: exp + +(** -1 *) +val mone: exp + + +(** Construct an integer of a given kind, using OCaml's int64 type. If needed + * it will truncate the integer to be within the representable range for the + * given kind. *) +val kinteger64: ikind -> int64 -> exp + +(** Construct an integer of a given kind. Converts the integer to int64 and + * then uses kinteger64. This might truncate the value if you use a kind + * that cannot represent the given integer. This can only happen for one of + * the Char or Short kinds *) +val kinteger: ikind -> int -> exp + +(** Construct an integer of kind IInt. You can use this always since the + OCaml integers are 31 bits and are guaranteed to fit in an IInt *) +val integer: int -> exp + + +(** True if the given expression is a (possibly cast'ed) + character or an integer constant *) +val isInteger: exp -> int64 option + +(** True if the expression is a compile-time constant *) +val isConstant: exp -> bool + +(** True if the given expression is a (possibly cast'ed) integer or character + constant with value zero *) +val isZero: exp -> bool + +(** Given the character c in a (CChr c), sign-extend it to 32 bits. + (This is the official way of interpreting character constants, according to + ISO C 6.4.4.4.10, which says that character constants are chars cast to ints) + Returns CInt64(sign-extened c, IInt, None) *) +val charConstToInt: char -> constant + +(** Do constant folding on an expression. If the first argument is true then + will also compute compiler-dependent expressions such as sizeof *) +val constFold: bool -> exp -> exp + +(** Do constant folding on a binary operation. The bulk of the work done by + [constFold] is done here. If the first argument is true then + will also compute compiler-dependent expressions such as sizeof *) +val constFoldBinOp: bool -> binop -> exp -> exp -> typ -> exp + +(** Increment an expression. Can be arithmetic or pointer type *) +val increm: exp -> int -> exp + + +(** Makes an lvalue out of a given variable *) +val var: varinfo -> lval + +(** Make an AddrOf. Given an lvalue of type T will give back an expression of + type ptr(T). It optimizes somewhat expressions like "& v" and "& v[0]" *) +val mkAddrOf: lval -> exp + + +(** Like mkAddrOf except if the type of lval is an array then it uses + StartOf. This is the right operation for getting a pointer to the start + of the storage denoted by lval. *) +val mkAddrOrStartOf: lval -> exp + +(** Make a Mem, while optimizing AddrOf. The type of the addr must be + TPtr(t) and the type of the resulting lval is t. Note that in CIL the + implicit conversion between an array and the pointer to the first + element does not apply. You must do the conversion yourself using + StartOf *) +val mkMem: addr:exp -> off:offset -> lval + +(** Make an expression that is a string constant (of pointer type) *) +val mkString: string -> exp + +(** Construct a cast when having the old type of the expression. If the new + * type is the same as the old type, then no cast is added. *) +val mkCastT: e:exp -> oldt:typ -> newt:typ -> exp + +(** Like {!Cil.mkCastT} but uses typeOf to get [oldt] *) +val mkCast: e:exp -> newt:typ -> exp + +(** Removes casts from this expression, but ignores casts within + other expression constructs. So we delete the (A) and (B) casts from + "(A)(B)(x + (C)y)", but leave the (C) cast. *) +val stripCasts: exp -> exp + +(** Compute the type of an expression *) +val typeOf: exp -> typ + +(** Convert a string representing a C integer literal to an expression. + * Handles the prefixes 0x and 0 and the suffixes L, U, UL, LL, ULL *) +val parseInt: string -> exp + + +(**********************************************) +(** {b Values for manipulating statements} *) + +(** Construct a statement, given its kind. Initialize the [sid] field to -1, + and [labels], [succs] and [preds] to the empty list *) +val mkStmt: stmtkind -> stmt + +(** Construct a block with no attributes, given a list of statements *) +val mkBlock: stmt list -> block + +(** Construct a statement consisting of just one instruction *) +val mkStmtOneInstr: instr -> stmt + +(** Try to compress statements so as to get maximal basic blocks *) +(* use this instead of List.@ because you get fewer basic blocks *) +val compactStmts: stmt list -> stmt list + +(** Returns an empty statement (of kind [Instr]) *) +val mkEmptyStmt: unit -> stmt + +(** A instr to serve as a placeholder *) +val dummyInstr: instr + +(** A statement consisting of just [dummyInstr] *) +val dummyStmt: stmt + +(** Make a while loop. Can contain Break or Continue *) +val mkWhile: guard:exp -> body:stmt list -> stmt list + +(** Make a for loop for(i=start; i first:exp -> stopat:exp -> incr:exp + -> body:stmt list -> stmt list + +(** Make a for loop for(start; guard; next) \{ ... \}. The body can + contain Break but not Continue !!! *) +val mkFor: start:stmt list -> guard:exp -> next: stmt list -> + body: stmt list -> stmt list + + + +(**************************************************) +(** {b Values for manipulating attributes} *) + +(** Various classes of attributes *) +type attributeClass = + AttrName of bool + (** Attribute of a name. If argument is true and we are on MSVC then + the attribute is printed using __declspec as part of the storage + specifier *) + | AttrFunType of bool + (** Attribute of a function type. If argument is true and we are on + MSVC then the attribute is printed just before the function name *) + | AttrType (** Attribute of a type *) + +(** This table contains the mapping of predefined attributes to classes. + Extend this table with more attributes as you need. This table is used to + determine how to associate attributes with names or types *) +val attributeHash: (string, attributeClass) Hashtbl.t + +(** Partition the attributes into classes:name attributes, function type, + and type attributes *) +val partitionAttributes: default:attributeClass -> + attributes -> attribute list * (* AttrName *) + attribute list * (* AttrFunType *) + attribute list (* AttrType *) + +(** Add an attribute. Maintains the attributes in sorted order of the second + argument *) +val addAttribute: attribute -> attributes -> attributes + +(** Add a list of attributes. Maintains the attributes in sorted order. The + second argument must be sorted, but not necessarily the first *) +val addAttributes: attribute list -> attributes -> attributes + +(** Remove all attributes with the given name. Maintains the attributes in + sorted order. *) +val dropAttribute: string -> attributes -> attributes + +(** Remove all attributes with names appearing in the string list. + * Maintains the attributes in sorted order *) +val dropAttributes: string list -> attributes -> attributes + +(** Retains attributes with the given name *) +val filterAttributes: string -> attributes -> attributes + +(** True if the named attribute appears in the attribute list. The list of + attributes must be sorted. *) +val hasAttribute: string -> attributes -> bool + +(** Returns all the attributes contained in a type. This requires a traversal + of the type structure, in case of composite, enumeration and named types *) +val typeAttrs: typ -> attribute list + +val setTypeAttrs: typ -> attributes -> typ (* Resets the attributes *) + + +(** Add some attributes to a type *) +val typeAddAttributes: attribute list -> typ -> typ + +(** Remove all attributes with the given names from a type. Note that this + does not remove attributes from typedef and tag definitions, just from + their uses *) +val typeRemoveAttributes: string list -> typ -> typ + + +(****************** + ****************** VISITOR + ******************) +(** {b The visitor} *) + +(** Different visiting actions. 'a will be instantiated with [exp], [instr], + etc. *) +type 'a visitAction = + SkipChildren (** Do not visit the children. Return + the node as it is. *) + | DoChildren (** Continue with the children of this + node. Rebuild the node on return + if any of the children changes + (use == test) *) + | ChangeTo of 'a (** Replace the expression with the + given one *) + | ChangeDoChildrenPost of 'a * ('a -> 'a) (** First consider that the entire + exp is replaced by the first + parameter. Then continue with + the children. On return rebuild + the node if any of the children + has changed and then apply the + function on the node *) + + + +(** A visitor interface for traversing CIL trees. Create instantiations of + * this type by specializing the class {!Cil.nopCilVisitor}. Each of the + * specialized visiting functions can also call the [queueInstr] to specify + * that some instructions should be inserted before the current instruction + * or statement. Use syntax like [self#queueInstr] to call a method + * associated with the current object. *) +class type cilVisitor = object + method vvdec: varinfo -> varinfo visitAction + (** Invoked for each variable declaration. The subtrees to be traversed + * are those corresponding to the type and attributes of the variable. + * Note that variable declarations are all the [GVar], [GVarDecl], [GFun], + * all the [varinfo] in formals of function types, and the formals and + * locals for function definitions. This means that the list of formals + * in a function definition will be traversed twice, once as part of the + * function type and second as part of the formals in a function + * definition. *) + + method vvrbl: varinfo -> varinfo visitAction + (** Invoked on each variable use. Here only the [SkipChildren] and + * [ChangeTo] actions make sense since there are no subtrees. Note that + * the type and attributes of the variable are not traversed for a + * variable use *) + + method vexpr: exp -> exp visitAction + (** Invoked on each expression occurrence. The subtrees are the + * subexpressions, the types (for a [Cast] or [SizeOf] expression) or the + * variable use. *) + + method vlval: lval -> lval visitAction + (** Invoked on each lvalue occurrence *) + + method voffs: offset -> offset visitAction + (** Invoked on each offset occurrence that is *not* as part + * of an initializer list specification, i.e. in an lval or + * recursively inside an offset. *) + + method vinitoffs: offset -> offset visitAction + (** Invoked on each offset appearing in the list of a + * CompoundInit initializer. *) + + method vinst: instr -> instr list visitAction + (** Invoked on each instruction occurrence. The [ChangeTo] action can + * replace this instruction with a list of instructions *) + + method vstmt: stmt -> stmt visitAction + (** Control-flow statement. The default [DoChildren] action does not + * create a new statement when the components change. Instead it updates + * the contents of the original statement. This is done to preserve the + * sharing with [Goto] and [Case] statements that point to the original + * statement. If you use the [ChangeTo] action then you should take care + * of preserving that sharing yourself. *) + + method vblock: block -> block visitAction (** Block. *) + method vfunc: fundec -> fundec visitAction (** Function definition. + Replaced in place. *) + method vglob: global -> global list visitAction (** Global (vars, types, + etc.) *) + method vinit: init -> init visitAction (** Initializers for globals *) + method vtype: typ -> typ visitAction (** Use of some type. Note + * that for structure/union + * and enumeration types the + * definition of the + * composite type is not + * visited. Use [vglob] to + * visit it. *) + method vattr: attribute -> attribute list visitAction + (** Attribute. Each attribute can be replaced by a list *) + method vattrparam: attrparam -> attrparam visitAction + (** Attribute parameters. *) + + (** Add here instructions while visiting to queue them to preceede the + * current statement or instruction being processed. Use this method only + * when you are visiting an expression that is inside a function body, or + * a statement, because otherwise there will no place for the visitor to + * place your instructions. *) + method queueInstr: instr list -> unit + + (** Gets the queue of instructions and resets the queue. This is done + * automatically for you when you visit statments. *) + method unqueueInstr: unit -> instr list + +end + +(** Default Visitor. Traverses the CIL tree without modifying anything *) +class nopCilVisitor: cilVisitor + +(* other cil constructs *) + +(** Visit a file. This will will re-cons all globals TWICE (so that it is + * tail-recursive). Use {!Cil.visitCilFileSameGlobals} if your visitor will + * not change the list of globals. *) +val visitCilFile: cilVisitor -> file -> unit + +(** A visitor for the whole file that does not change the globals (but maybe + * changes things inside the globals). Use this function instead of + * {!Cil.visitCilFile} whenever appropriate because it is more efficient for + * long files. *) +val visitCilFileSameGlobals: cilVisitor -> file -> unit + +(** Visit a global *) +val visitCilGlobal: cilVisitor -> global -> global list + +(** Visit a function definition *) +val visitCilFunction: cilVisitor -> fundec -> fundec + +(* Visit an expression *) +val visitCilExpr: cilVisitor -> exp -> exp + +(** Visit an lvalue *) +val visitCilLval: cilVisitor -> lval -> lval + +(** Visit an lvalue or recursive offset *) +val visitCilOffset: cilVisitor -> offset -> offset + +(** Visit an initializer offset *) +val visitCilInitOffset: cilVisitor -> offset -> offset + +(** Visit an instruction *) +val visitCilInstr: cilVisitor -> instr -> instr list + +(** Visit a statement *) +val visitCilStmt: cilVisitor -> stmt -> stmt + +(** Visit a block *) +val visitCilBlock: cilVisitor -> block -> block + +(** Visit a type *) +val visitCilType: cilVisitor -> typ -> typ + +(** Visit a variable declaration *) +val visitCilVarDecl: cilVisitor -> varinfo -> varinfo + +(** Visit an initializer *) +val visitCilInit: cilVisitor -> init -> init + + +(** Visit a list of attributes *) +val visitCilAttributes: cilVisitor -> attribute list -> attribute list + +(* And some generic visitors. The above are built with these *) + + +(** {b Utility functions} *) + +(** Whether the pretty printer should print output for the MS VC compiler. + Default is GCC. After you set this function you should call {!Cil.initCIL}. *) +val msvcMode: bool ref + + +(** Whether to use the logical operands LAnd and LOr. By default, do not use + * them because they are unlike other expressions and do not evaluate both of + * their operands *) +val useLogicalOperators: bool ref + + +(** A visitor that does constant folding. Pass as argument whether you want + * machine specific simplifications to be done, or not. *) +val constFoldVisitor: bool -> cilVisitor + +(** Styles of printing line directives *) +type lineDirectiveStyle = + | LineComment + | LinePreprocessorInput + | LinePreprocessorOutput + +(** How to print line directives *) +val lineDirectiveStyle: lineDirectiveStyle option ref + +(** Whether we print something that will only be used as input to our own + * parser. In that case we are a bit more liberal in what we print *) +val print_CIL_Input: bool ref + +(** Whether to print the CIL as they are, without trying to be smart and + * print nicer code. Normally this is false, in which case the pretty + * printer will turn the while(1) loops of CIL into nicer loops, will not + * print empty "else" blocks, etc. These is one case howewer in which if you + * turn this on you will get code that does not compile: if you use varargs + * the __builtin_va_arg function will be printed in its internal form. *) +val printCilAsIs: bool ref + +(** The length used when wrapping output lines. Setting this variable to + * a large integer will prevent wrapping and make #line directives more + * accurate. + *) +val lineLength: int ref + +(** Return the string 's' if we're printing output for gcc, suppres + * it if we're printing for CIL to parse back in. the purpose is to + * hide things from gcc that it complains about, but still be able + * to do lossless transformations when CIL is the consumer *) +val forgcc: string -> string + +(** {b Debugging support} *) + +(** A reference to the current location. If you are careful to set this to + * the current location then you can use some built-in logging functions that + * will print the location. *) +val currentLoc: location ref + +(** A reference to the current global being visited *) +val currentGlobal: global ref + + +(** CIL has a fairly easy to use mechanism for printing error messages. This + * mechanism is built on top of the pretty-printer mechanism (see + * {!Pretty.doc}) and the error-message modules (see {!Errormsg.error}). + + Here is a typical example for printing a log message: {v +ignore (Errormsg.log "Expression %a is not positive (at %s:%i)\n" + d_exp e loc.file loc.line) + v} + + and here is an example of how you print a fatal error message that stop the +* execution: {v +Errormsg.s (Errormsg.bug "Why am I here?") + v} + + Notice that you can use C format strings with some extension. The most +useful extension is "%a" that means to consumer the next two argument from +the argument list and to apply the first to [unit] and then to the second +and to print the resulting {!Pretty.doc}. For each major type in CIL there is +a corresponding function that pretty-prints an element of that type: +*) + + +(** Pretty-print a location *) +val d_loc: unit -> location -> Pretty.doc + +(** Pretty-print the {!Cil.currentLoc} *) +val d_thisloc: unit -> Pretty.doc + +(** Pretty-print an integer of a given kind *) +val d_ikind: unit -> ikind -> Pretty.doc + +(** Pretty-print a floating-point kind *) +val d_fkind: unit -> fkind -> Pretty.doc + +(** Pretty-print storage-class information *) +val d_storage: unit -> storage -> Pretty.doc + +(** Pretty-print a constant *) +val d_const: unit -> constant -> Pretty.doc + + +val derefStarLevel: int +val indexLevel: int +val arrowLevel: int +val addrOfLevel: int +val additiveLevel: int +val comparativeLevel: int +val bitwiseLevel: int + +(** Parentheses level. An expression "a op b" is printed parenthesized if its + * parentheses level is >= that that of its context. Identifiers have the + * lowest level and weakly binding operators (e.g. |) have the largest level. + * The correctness criterion is that a smaller level MUST correspond to a + * stronger precedence! + *) +val getParenthLevel: exp -> int + +(** A printer interface for CIL trees. Create instantiations of + * this type by specializing the class {!Cil.defaultCilPrinterClass}. *) +class type cilPrinter = object + method pVDecl: unit -> varinfo -> Pretty.doc + (** Invoked for each variable declaration. Note that variable + * declarations are all the [GVar], [GVarDecl], [GFun], all the [varinfo] + * in formals of function types, and the formals and locals for function + * definitions. *) + + method pVar: varinfo -> Pretty.doc + (** Invoked on each variable use. *) + + method pLval: unit -> lval -> Pretty.doc + (** Invoked on each lvalue occurrence *) + + method pOffset: Pretty.doc -> offset -> Pretty.doc + (** Invoked on each offset occurrence. The second argument is the base. *) + + method pInstr: unit -> instr -> Pretty.doc + (** Invoked on each instruction occurrence. *) + + method pLabel: unit -> label -> Pretty.doc + (** Print a label. *) + + method pStmt: unit -> stmt -> Pretty.doc + (** Control-flow statement. This is used by + * {!Cil.printGlobal} and by {!Cil.dumpGlobal}. *) + + method dStmt: out_channel -> int -> stmt -> unit + (** Dump a control-flow statement to a file with a given indentation. + * This is used by {!Cil.dumpGlobal}. *) + + method dBlock: out_channel -> int -> block -> unit + (** Dump a control-flow block to a file with a given indentation. + * This is used by {!Cil.dumpGlobal}. *) + + method pBlock: unit -> block -> Pretty.doc + + method pBlock: unit -> block -> Pretty.doc + (** Print a block. *) + + method pGlobal: unit -> global -> Pretty.doc + (** Global (vars, types, etc.). This can be slow and is used only by + * {!Cil.printGlobal} but not by {!Cil.dumpGlobal}. *) + + method dGlobal: out_channel -> global -> unit + (** Dump a global to a file with a given indentation. This is used by + * {!Cil.dumpGlobal} *) + + method pFieldDecl: unit -> fieldinfo -> Pretty.doc + (** A field declaration *) + + method pType: Pretty.doc option -> unit -> typ -> Pretty.doc + (* Use of some type in some declaration. The first argument is used to print + * the declared element, or is None if we are just printing a type with no + * name being declared. Note that for structure/union and enumeration types + * the definition of the composite type is not visited. Use [vglob] to + * visit it. *) + + method pAttr: attribute -> Pretty.doc * bool + (** Attribute. Also return an indication whether this attribute must be + * printed inside the __attribute__ list or not. *) + + method pAttrParam: unit -> attrparam -> Pretty.doc + (** Attribute parameter *) + + method pAttrs: unit -> attributes -> Pretty.doc + (** Attribute lists *) + + method pLineDirective: ?forcefile:bool -> location -> Pretty.doc + (** Print a line-number. This is assumed to come always on an empty line. + * If the forcefile argument is present and is true then the file name + * will be printed always. Otherwise the file name is printed only if it + * is different from the last time time this function is called. The last + * file name is stored in a private field inside the cilPrinter object. *) + + method pStmtKind: stmt -> unit -> stmtkind -> Pretty.doc + (** Print a statement kind. The code to be printed is given in the + * {!Cil.stmtkind} argument. The initial {!Cil.stmt} argument + * records the statement which follows the one being printed; + * {!Cil.defaultCilPrinterClass} uses this information to prettify + * statement printing in certain special cases. *) + + method pExp: unit -> exp -> Pretty.doc + (** Print expressions *) + + method pInit: unit -> init -> Pretty.doc + (** Print initializers. This can be slow and is used by + * {!Cil.printGlobal} but not by {!Cil.dumpGlobal}. *) + + method dInit: out_channel -> int -> init -> unit + (** Dump a global to a file with a given indentation. This is used by + * {!Cil.dumpGlobal} *) +end + +class defaultCilPrinterClass: cilPrinter +val defaultCilPrinter: cilPrinter + +(** These are pretty-printers that will show you more details on the internal + * CIL representation, without trying hard to make it look like C *) +class plainCilPrinterClass: cilPrinter +val plainCilPrinter: cilPrinter + +(* zra: This is the pretty printer that Maincil will use. + by default it is set to defaultCilPrinter *) +val printerForMaincil: cilPrinter ref + +(* Top-level printing functions *) +(** Print a type given a pretty printer *) +val printType: cilPrinter -> unit -> typ -> Pretty.doc + +(** Print an expression given a pretty printer *) +val printExp: cilPrinter -> unit -> exp -> Pretty.doc + +(** Print an lvalue given a pretty printer *) +val printLval: cilPrinter -> unit -> lval -> Pretty.doc + +(** Print a global given a pretty printer *) +val printGlobal: cilPrinter -> unit -> global -> Pretty.doc + +(** Print an attribute given a pretty printer *) +val printAttr: cilPrinter -> unit -> attribute -> Pretty.doc + +(** Print a set of attributes given a pretty printer *) +val printAttrs: cilPrinter -> unit -> attributes -> Pretty.doc + +(** Print an instruction given a pretty printer *) +val printInstr: cilPrinter -> unit -> instr -> Pretty.doc + +(** Print a statement given a pretty printer. This can take very long + * (or even overflow the stack) for huge statements. Use {!Cil.dumpStmt} + * instead. *) +val printStmt: cilPrinter -> unit -> stmt -> Pretty.doc + +(** Print a block given a pretty printer. This can take very long + * (or even overflow the stack) for huge block. Use {!Cil.dumpBlock} + * instead. *) +val printBlock: cilPrinter -> unit -> block -> Pretty.doc + +(** Dump a statement to a file using a given indentation. Use this instead of + * {!Cil.printStmt} whenever possible. *) +val dumpStmt: cilPrinter -> out_channel -> int -> stmt -> unit + +(** Dump a block to a file using a given indentation. Use this instead of + * {!Cil.printBlock} whenever possible. *) +val dumpBlock: cilPrinter -> out_channel -> int -> block -> unit + +(** Print an initializer given a pretty printer. This can take very long + * (or even overflow the stack) for huge initializers. Use {!Cil.dumpInit} + * instead. *) +val printInit: cilPrinter -> unit -> init -> Pretty.doc + +(** Dump an initializer to a file using a given indentation. Use this instead of + * {!Cil.printInit} whenever possible. *) +val dumpInit: cilPrinter -> out_channel -> int -> init -> unit + +(** Pretty-print a type using {!Cil.defaultCilPrinter} *) +val d_type: unit -> typ -> Pretty.doc + +(** Pretty-print an expression using {!Cil.defaultCilPrinter} *) +val d_exp: unit -> exp -> Pretty.doc + +(** Pretty-print an lvalue using {!Cil.defaultCilPrinter} *) +val d_lval: unit -> lval -> Pretty.doc + +(** Pretty-print an offset using {!Cil.defaultCilPrinter}, given the pretty + * printing for the base. *) +val d_offset: Pretty.doc -> unit -> offset -> Pretty.doc + +(** Pretty-print an initializer using {!Cil.defaultCilPrinter}. This can be + * extremely slow (or even overflow the stack) for huge initializers. Use + * {!Cil.dumpInit} instead. *) +val d_init: unit -> init -> Pretty.doc + +(** Pretty-print a binary operator *) +val d_binop: unit -> binop -> Pretty.doc + +(** Pretty-print a unary operator *) +val d_unop: unit -> unop -> Pretty.doc + +(** Pretty-print an attribute using {!Cil.defaultCilPrinter} *) +val d_attr: unit -> attribute -> Pretty.doc + +(** Pretty-print an argument of an attribute using {!Cil.defaultCilPrinter} *) +val d_attrparam: unit -> attrparam -> Pretty.doc + +(** Pretty-print a list of attributes using {!Cil.defaultCilPrinter} *) +val d_attrlist: unit -> attributes -> Pretty.doc + +(** Pretty-print an instruction using {!Cil.defaultCilPrinter} *) +val d_instr: unit -> instr -> Pretty.doc + +(** Pretty-print a label using {!Cil.defaultCilPrinter} *) +val d_label: unit -> label -> Pretty.doc + +(** Pretty-print a statement using {!Cil.defaultCilPrinter}. This can be + * extremely slow (or even overflow the stack) for huge statements. Use + * {!Cil.dumpStmt} instead. *) +val d_stmt: unit -> stmt -> Pretty.doc + +(** Pretty-print a block using {!Cil.defaultCilPrinter}. This can be + * extremely slow (or even overflow the stack) for huge blocks. Use + * {!Cil.dumpBlock} instead. *) +val d_block: unit -> block -> Pretty.doc + +(** Pretty-print the internal representation of a global using + * {!Cil.defaultCilPrinter}. This can be extremely slow (or even overflow the + * stack) for huge globals (such as arrays with lots of initializers). Use + * {!Cil.dumpGlobal} instead. *) +val d_global: unit -> global -> Pretty.doc + + +(** Versions of the above pretty printers, that don't print #line directives *) +val dn_exp : unit -> exp -> Pretty.doc +val dn_lval : unit -> lval -> Pretty.doc +(* dn_offset is missing because it has a different interface *) +val dn_init : unit -> init -> Pretty.doc +val dn_type : unit -> typ -> Pretty.doc +val dn_global : unit -> global -> Pretty.doc +val dn_attrlist : unit -> attributes -> Pretty.doc +val dn_attr : unit -> attribute -> Pretty.doc +val dn_attrparam : unit -> attrparam -> Pretty.doc +val dn_stmt : unit -> stmt -> Pretty.doc +val dn_instr : unit -> instr -> Pretty.doc + + +(** Pretty-print a short description of the global. This is useful for error + * messages *) +val d_shortglobal: unit -> global -> Pretty.doc + +(** Pretty-print a global. Here you give the channel where the printout + * should be sent. *) +val dumpGlobal: cilPrinter -> out_channel -> global -> unit + +(** Pretty-print an entire file. Here you give the channel where the printout + * should be sent. *) +val dumpFile: cilPrinter -> out_channel -> string -> file -> unit + + +(* the following error message producing functions also print a location in + * the code. use {!Errormsg.bug} and {!Errormsg.unimp} if you do not want + * that *) + +(** Like {!Errormsg.bug} except that {!Cil.currentLoc} is also printed *) +val bug: ('a,unit,Pretty.doc) format -> 'a + +(** Like {!Errormsg.unimp} except that {!Cil.currentLoc}is also printed *) +val unimp: ('a,unit,Pretty.doc) format -> 'a + +(** Like {!Errormsg.error} except that {!Cil.currentLoc} is also printed *) +val error: ('a,unit,Pretty.doc) format -> 'a + +(** Like {!Cil.error} except that it explicitly takes a location argument, + * instead of using the {!Cil.currentLoc} *) +val errorLoc: location -> ('a,unit,Pretty.doc) format -> 'a + +(** Like {!Errormsg.warn} except that {!Cil.currentLoc} is also printed *) +val warn: ('a,unit,Pretty.doc) format -> 'a + + +(** Like {!Errormsg.warnOpt} except that {!Cil.currentLoc} is also printed. + * This warning is printed only of {!Errormsg.warnFlag} is set. *) +val warnOpt: ('a,unit,Pretty.doc) format -> 'a + +(** Like {!Errormsg.warn} except that {!Cil.currentLoc} and context + is also printed *) +val warnContext: ('a,unit,Pretty.doc) format -> 'a + +(** Like {!Errormsg.warn} except that {!Cil.currentLoc} and context is also + * printed. This warning is printed only of {!Errormsg.warnFlag} is set. *) +val warnContextOpt: ('a,unit,Pretty.doc) format -> 'a + +(** Like {!Cil.warn} except that it explicitly takes a location argument, + * instead of using the {!Cil.currentLoc} *) +val warnLoc: location -> ('a,unit,Pretty.doc) format -> 'a + +(** Sometimes you do not want to see the syntactic sugar that the above + * pretty-printing functions add. In that case you can use the following + * pretty-printing functions. But note that the output of these functions is + * not valid C *) + +(** Pretty-print the internal representation of an expression *) +val d_plainexp: unit -> exp -> Pretty.doc + +(** Pretty-print the internal representation of an integer *) +val d_plaininit: unit -> init -> Pretty.doc + +(** Pretty-print the internal representation of an lvalue *) +val d_plainlval: unit -> lval -> Pretty.doc + +(** Pretty-print the internal representation of an lvalue offset +val d_plainoffset: unit -> offset -> Pretty.doc *) + +(** Pretty-print the internal representation of a type *) +val d_plaintype: unit -> typ -> Pretty.doc + + + +(** {b ALPHA conversion} has been moved to the Alpha module. *) + + +(** Assign unique names to local variables. This might be necessary after you + * transformed the code and added or renamed some new variables. Names are + * not used by CIL internally, but once you print the file out the compiler + * downstream might be confused. You might + * have added a new global that happens to have the same name as a local in + * some function. Rename the local to ensure that there would never be + * confusioin. Or, viceversa, you might have added a local with a name that + * conflicts with a global *) +val uniqueVarNames: file -> unit + +(** {b Optimization Passes} *) + +(** A peephole optimizer that processes two adjacent statements and possibly + replaces them both. If some replacement happens, then the new statements + are themselves subject to optimization *) +val peepHole2: (instr * instr -> instr list option) -> stmt list -> unit + +(** Similar to [peepHole2] except that the optimization window consists of + one statement, not two *) +val peepHole1: (instr -> instr list option) -> stmt list -> unit + +(** {b Machine dependency} *) + + +(** Raised when one of the bitsSizeOf functions cannot compute the size of a + * type. This can happen because the type contains array-length expressions + * that we don't know how to compute or because it is a type whose size is + * not defined (e.g. TFun or an undefined compinfo). The string is an + * explanation of the error *) +exception SizeOfError of string * typ + +(** The size of a type, in bits. Trailing padding is added for structs and + * arrays. Raises {!Cil.SizeOfError} when it cannot compute the size. This + * function is architecture dependent, so you should only call this after you + * call {!Cil.initCIL}. Remember that on GCC sizeof(void) is 1! *) +val bitsSizeOf: typ -> int + +(* The size of a type, in bytes. Returns a constant expression or a "sizeof" + * expression if it cannot compute the size. This function is architecture + * dependent, so you should only call this after you call {!Cil.initCIL}. *) +val sizeOf: typ -> exp + +(** The minimum alignment (in bytes) for a type. This function is + * architecture dependent, so you should only call this after you call + * {!Cil.initCIL}. *) +val alignOf_int: typ -> int + +(** Give a type of a base and an offset, returns the number of bits from the + * base address and the width (also expressed in bits) for the subobject + * denoted by the offset. Raises {!Cil.SizeOfError} when it cannot compute + * the size. This function is architecture dependent, so you should only call + * this after you call {!Cil.initCIL}. *) +val bitsOffset: typ -> offset -> int * int + + +(** Whether "char" is unsigned. Set after you call {!Cil.initCIL} *) +val char_is_unsigned: bool ref + +(** Whether the machine is little endian. Set after you call {!Cil.initCIL} *) +val little_endian: bool ref + +(** Whether the compiler generates assembly labels by prepending "_" to the + identifier. That is, will function foo() have the label "foo", or "_foo"? + Set after you call {!Cil.initCIL} *) +val underscore_name: bool ref + +(** Represents a location that cannot be determined *) +val locUnknown: location + +(** Return the location of an instruction *) +val get_instrLoc: instr -> location + +(** Return the location of a global, or locUnknown *) +val get_globalLoc: global -> location + +(** Return the location of a statement, or locUnknown *) +val get_stmtLoc: stmtkind -> location + + +(** Generate an {!Cil.exp} to be used in case of errors. *) +val dExp: Pretty.doc -> exp + +(** Generate an {!Cil.instr} to be used in case of errors. *) +val dInstr: Pretty.doc -> location -> instr + +(** Generate a {!Cil.global} to be used in case of errors. *) +val dGlobal: Pretty.doc -> location -> global + +(** Like map but try not to make a copy of the list *) +val mapNoCopy: ('a -> 'a) -> 'a list -> 'a list + +(** Like map but each call can return a list. Try not to make a copy of the + list *) +val mapNoCopyList: ('a -> 'a list) -> 'a list -> 'a list + +(** sm: return true if the first is a prefix of the second string *) +val startsWith: string -> string -> bool + + +(** {b An Interpreter for constructing CIL constructs} *) + +(** The type of argument for the interpreter *) +type formatArg = + Fe of exp + | Feo of exp option (** For array lengths *) + | Fu of unop + | Fb of binop + | Fk of ikind + | FE of exp list (** For arguments in a function call *) + | Ff of (string * typ * attributes) (** For a formal argument *) + | FF of (string * typ * attributes) list (** For formal argument lists *) + | Fva of bool (** For the ellipsis in a function type *) + | Fv of varinfo + | Fl of lval + | Flo of lval option + + | Fo of offset + + | Fc of compinfo + | Fi of instr + | FI of instr list + | Ft of typ + | Fd of int + | Fg of string + | Fs of stmt + | FS of stmt list + | FA of attributes + + | Fp of attrparam + | FP of attrparam list + + | FX of string + + +(** Pretty-prints a format arg *) +val d_formatarg: unit -> formatArg -> Pretty.doc + +val lowerConstants: bool ref + (** Do lower constant expressions into constants (default true) *) diff --git a/cil/src/cillower.ml b/cil/src/cillower.ml new file mode 100755 index 00000000..61745bf4 --- /dev/null +++ b/cil/src/cillower.ml @@ -0,0 +1,57 @@ +(* + * + * Copyright (c) 2001-2003, + * George C. Necula + * Scott McPeak + * Wes Weimer + * Ben Liblit + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(** A number of lowering passes over CIL *) +open Cil +open Pretty +module E = Errormsg + +(** Lower CEnum constants *) +class lowerEnumVisitorClass : cilVisitor = object (self) + inherit nopCilVisitor + + method vexpr (e: exp) = + match e with + Const (CEnum(v, s, ei)) -> + ChangeTo (visitCilExpr (self :>cilVisitor) v) + + | _ -> DoChildren + +end + +let lowerEnumVisitor = new lowerEnumVisitorClass diff --git a/cil/src/cillower.mli b/cil/src/cillower.mli new file mode 100755 index 00000000..a62c9e3b --- /dev/null +++ b/cil/src/cillower.mli @@ -0,0 +1,42 @@ +(* + * + * Copyright (c) 2001-2003, + * George C. Necula + * Scott McPeak + * Wes Weimer + * Ben Liblit + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(** A number of lowering passes over CIL *) + +(** Replace enumeration constants with integer constants *) +val lowerEnumVisitor : Cil.cilVisitor diff --git a/cil/src/ciloptions.ml b/cil/src/ciloptions.ml new file mode 100755 index 00000000..9a2b4bd5 --- /dev/null +++ b/cil/src/ciloptions.ml @@ -0,0 +1,196 @@ +(* + * + * Copyright (c) 2001-2003, + * George C. Necula + * Scott McPeak + * Wes Weimer + * Ben Liblit + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + + +module E = Errormsg + +let setDebugFlag v name = + E.debugFlag := v; + if v then Pretty.flushOften := true + +type outfile = + { fname: string; + fchan: out_channel } + +let setTraceDepth n = + Pretty.printDepth := n + + + (* Processign of output file arguments *) +let openFile (what: string) (takeit: outfile -> unit) (fl: string) = + if !E.verboseFlag then + ignore (Printf.printf "Setting %s to %s\n" what fl); + (try takeit { fname = fl; + fchan = open_out fl } + with _ -> + raise (Arg.Bad ("Cannot open " ^ what ^ " file " ^ fl))) + + +let fileNames : string list ref = ref [] +let recordFile fname = + fileNames := fname :: (!fileNames) + + (* Parsing of files with additional names *) +let parseExtraFile (s: string) = + try + let sfile = open_in s in + while true do + let line = try input_line sfile with e -> (close_in sfile; raise e) in + let linelen = String.length line in + let rec scan (pos: int) (* next char to look at *) + (start: int) : unit (* start of the word, + or -1 if none *) = + if pos >= linelen then + if start >= 0 then + recordFile (String.sub line start (pos - start)) + else + () (* Just move on to the next line *) + else + let c = String.get line pos in + match c with + ' ' | '\n' | '\r' | '\t' -> + (* whitespace *) + if start >= 0 then begin + recordFile (String.sub line start (pos - start)); + end; + scan (pos + 1) (-1) + + | _ -> (* non-whitespace *) + if start >= 0 then + scan (pos + 1) start + else + scan (pos + 1) pos + in + scan 0 (-1) + done + with Sys_error _ -> E.s (E.error "Cannot find extra file: %s\n" s) + | End_of_file -> () + + +let options : (string * Arg.spec * string) list = + [ + (* General Options *) + "", Arg.Unit (fun () -> ()), "\n\t\tGeneral Options\n" ; + + "--version", Arg.Unit + (fun _ -> print_endline ("CIL version " ^ Cil.cilVersion ^ + "\nMore information at http://cil.sourceforge.net/\n"); + exit 0), + "output version information and exit"; + "--verbose", Arg.Unit (fun _ -> E.verboseFlag := true), + "Print lots of random stuff. This is passed on from cilly."; + "--warnall", Arg.Unit (fun _ -> E.warnFlag := true), "Show all warnings"; + "--debug", Arg.String (setDebugFlag true), + " turns on debugging flag xxx"; + "--nodebug", Arg.String (setDebugFlag false), + " turns off debugging flag xxx"; + + "--flush", Arg.Unit (fun _ -> Pretty.flushOften := true), + "Flush the output streams often (aids debugging)" ; + "--check", Arg.Unit (fun _ -> Cilutil.doCheck := true), + "Run a consistency check over the CIL after every operation."; + "--nocheck", Arg.Unit (fun _ -> Cilutil.doCheck := false), + "turns off consistency checking of CIL"; + "--noPrintLn", Arg.Unit (fun _ -> Cil.lineDirectiveStyle := None; + Cprint.printLn := false), + "Don't output #line directives in the output."; + "--commPrintLn", Arg.Unit (fun _ -> Cil.lineDirectiveStyle := Some Cil.LineComment; + Cprint.printLnComment := true), + "Print #line directives in the output, but put them in comments."; + "--stats", Arg.Unit (fun _ -> Cilutil.printStats := true), + "Print statistics about running times and memory usage."; + + + "--log", Arg.String (openFile "log" (fun oc -> E.logChannel := oc.fchan)), + "Set the name of the log file. By default stderr is used"; + + "--MSVC", Arg.Unit (fun _ -> Cil.msvcMode := true; + Frontc.setMSVCMode (); + if not Machdep.hasMSVC then + ignore (E.warn "Will work in MSVC mode but will be using machine-dependent parameters for GCC since you do not have the MSVC compiler installed\n") + ), "Enable MSVC compatibility. Default is GNU."; + + "--testcil", Arg.String (fun s -> Cilutil.testcil := s), + "test CIL using the given compiler"; + + "--ignore-merge-conflicts", + Arg.Unit (fun _ -> Mergecil.ignore_merge_conflicts := true), + "ignore merging conflicts"; + "--sliceGlobal", Arg.Unit (fun _ -> Cilutil.sliceGlobal := true), + "output is the slice of #pragma cilnoremove(sym) symbols"; + + (* sm: some more debugging options *) + "--tr", Arg.String Trace.traceAddMulti, + ": subsystem to show debug printfs for"; + "--pdepth", Arg.Int setTraceDepth, + ": set max print depth (default: 5)"; + + "--extrafiles", Arg.String parseExtraFile, + ": the name of a file that contains a list of additional files to process, separated by whitespace of newlines"; + + (* Lowering Options *) + "", Arg.Unit (fun () -> ()), "\n\t\tLowering Options\n" ; + + "--noLowerConstants", Arg.Unit (fun _ -> Cil.lowerConstants := false), + "do not lower constant expressions"; + + "--noInsertImplicitCasts", Arg.Unit (fun _ -> Cil.insertImplicitCasts := false), + "do not insert implicit casts"; + + "--forceRLArgEval", + Arg.Unit (fun n -> Cabs2cil.forceRLArgEval := true), + "Forces right to left evaluation of function arguments"; + "--nocil", Arg.Int (fun n -> Cabs2cil.nocil := n), + "Do not compile to CIL the global with the given index"; + "--disallowDuplication", Arg.Unit (fun n -> Cabs2cil.allowDuplication := false), + "Prevent small chunks of code from being duplicated"; + "--keepunused", Arg.Set Rmtmps.keepUnused, + "Do not remove the unused variables and types"; + "--rmUnusedInlines", Arg.Set Rmtmps.rmUnusedInlines, + "Delete any unused inline functions. This is the default in MSVC mode"; + + + + "", Arg.Unit (fun () -> ()), "\n\t\tOutput Options\n" ; + "--printCilAsIs", Arg.Unit (fun _ -> Cil.printCilAsIs := true), + "do not try to simplify the CIL when printing. Without this flag, CIL will attempt to produce prettier output by e.g. changing while(1) into more meaningful loops."; + "--noWrap", Arg.Unit (fun _ -> Cil.lineLength := 100000), + "do not wrap long lines when printing"; + + ] + diff --git a/cil/src/ciloptions.mli b/cil/src/ciloptions.mli new file mode 100755 index 00000000..13f65cf4 --- /dev/null +++ b/cil/src/ciloptions.mli @@ -0,0 +1,48 @@ +(* + * + * Copyright (c) 2001-2003, + * George C. Necula + * Scott McPeak + * Wes Weimer + * Ben Liblit + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + + +(** The command-line options for CIL *) +val options : (string * Arg.spec * string) list + + +(** The list of file names *) +val fileNames : string list ref + +(** Adds the file to the start of fileNames *) +val recordFile: string -> unit diff --git a/cil/src/cilutil.ml b/cil/src/cilutil.ml new file mode 100644 index 00000000..b9a4da98 --- /dev/null +++ b/cil/src/cilutil.ml @@ -0,0 +1,72 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* Keep here the globally-visible flags *) +let doCheck= ref false (* Whether to check CIL *) + +let logCalls = ref false (* Whether to produce a log with all the function + * calls made *) +let logWrites = ref false (* Whether to produce a log with all the mem + * writes made *) +let doPartial = ref false (* Whether to do partial evaluation and constant + * folding *) +let doSimpleMem = ref false (* reduce complex memory expressions so that + * they contain at most one lval *) +let doOneRet = ref false (* make a functions have at most one 'return' *) +let doStackGuard = ref false (* instrument function calls and returns to +maintain a separate stack for return addresses *) +let doHeapify = ref false (* move stack-allocated arrays to the heap *) +let makeCFG = ref false (* turn the input CIL file into something more like + * a CFG *) +let printStats = ref false + +(* when 'sliceGlobal' is set, then when 'rmtmps' runs, only globals*) +(* marked with #pragma cilnoremove(whatever) are kept; when used with *) +(* cilly.asm.exe, the effect is to slice the input on the noremove symbols *) +let sliceGlobal = ref false + + +let printStages = ref false + + +let doCxxPP = ref false + +let libDir = ref "" + +let dumpFCG = ref false +let testcil = ref "" + diff --git a/cil/src/escape.ml b/cil/src/escape.ml new file mode 100644 index 00000000..198c9e5c --- /dev/null +++ b/cil/src/escape.ml @@ -0,0 +1,93 @@ +(* + * + * Copyright (c) 2003, + * Ben Liblit + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + + +(** OCaml types used to represent wide characters and strings *) +type wchar = int64 +type wstring = wchar list + + +let escape_char = function + | '\007' -> "\\a" + | '\b' -> "\\b" + | '\t' -> "\\t" + | '\n' -> "\\n" + | '\011' -> "\\v" + | '\012' -> "\\f" + | '\r' -> "\\r" + | '"' -> "\\\"" + | '\'' -> "\\'" + | '\\' -> "\\\\" + | ' ' .. '~' as printable -> String.make 1 printable + | unprintable -> Printf.sprintf "\\%03o" (Char.code unprintable) + +let escape_string str = + let length = String.length str in + let buffer = Buffer.create length in + for index = 0 to length - 1 do + Buffer.add_string buffer (escape_char (String.get str index)) + done; + Buffer.contents buffer + +(* a wide char represented as an int64 *) +let escape_wchar = + (* limit checks whether upper > probe *) + let limit upper probe = (Int64.to_float (Int64.sub upper probe)) > 0.5 in + let fits_byte = limit (Int64.of_int 0x100) in + let fits_octal_escape = limit (Int64.of_int 0o1000) in + let fits_universal_4 = limit (Int64.of_int 0x10000) in + let fits_universal_8 = limit (Int64.of_string "0x100000000") in + fun charcode -> + if fits_byte charcode then + escape_char (Char.chr (Int64.to_int charcode)) + else if fits_octal_escape charcode then + Printf.sprintf "\\%03Lo" charcode + else if fits_universal_4 charcode then + Printf.sprintf "\\u%04Lx" charcode + else if fits_universal_8 charcode then + Printf.sprintf "\\u%04Lx" charcode + else + invalid_arg "Cprint.escape_string_intlist" + +(* a wide string represented as a list of int64s *) +let escape_wstring (str : int64 list) = + let length = List.length str in + let buffer = Buffer.create length in + let append charcode = + let addition = escape_wchar charcode in + Buffer.add_string buffer addition + in + List.iter append str; + Buffer.contents buffer diff --git a/cil/src/escape.mli b/cil/src/escape.mli new file mode 100644 index 00000000..b932ef14 --- /dev/null +++ b/cil/src/escape.mli @@ -0,0 +1,48 @@ +(* + * + * Copyright (c) 2003, + * Ben Liblit + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* + * Character and string escaping utilities + *) + +(** OCaml types used to represent wide characters and strings *) +type wchar = int64 +type wstring = wchar list + +(** escape various constructs in accordance with C lexical rules *) +val escape_char : char -> string +val escape_string : string -> string +val escape_wchar : wchar -> string +val escape_wstring : wstring -> string diff --git a/cil/src/ext/astslicer.ml b/cil/src/ext/astslicer.ml new file mode 100644 index 00000000..ffba4827 --- /dev/null +++ b/cil/src/ext/astslicer.ml @@ -0,0 +1,454 @@ +(* MODIF: Loop constructor replaced by 3 constructors: While, DoWhile, For. *) + +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +open Cil +module E = Errormsg +(* + * Weimer: an AST Slicer for use in Daniel's Delta Debugging Algorithm. + *) +let debug = ref false + +(* + * This type encapsulates a mapping form program locations to names + * in our naming convention. + *) +type enumeration_info = { + statements : (stmt, string) Hashtbl.t ; + instructions : (instr, string) Hashtbl.t ; +} + +(********************************************************************** + * Enumerate 1 + * + * Given a cil file, enumerate all of the statement names in it using + * our naming scheme. + **********************************************************************) +let enumerate out (f : Cil.file) = + let st_ht = Hashtbl.create 32767 in + let in_ht = Hashtbl.create 32767 in + + let emit base i ht elt = + let str = Printf.sprintf "%s.%d" base !i in + Printf.fprintf out "%s\n" str ; + Hashtbl.add ht elt str ; + incr i + in + let emit_call base i str2 ht elt = + let str = Printf.sprintf "%s.%d" base !i in + Printf.fprintf out "%s - %s\n" str str2 ; + Hashtbl.add ht elt str ; + incr i + in + let descend base i = + let res = (Printf.sprintf "%s.%d" base !i),(ref 0) in + res + in + let rec doBlock b base i = + doStmtList b.bstmts base i + and doStmtList sl base i = + List.iter (fun s -> match s.skind with + | Instr(il) -> doIL il base i + | Return(_,_) + | Goto(_,_) + | Continue(_) + | Break(_) -> emit base i st_ht s + | If(e,b1,b2,_) -> + emit base i st_ht s ; + decr i ; + Printf.fprintf out "(\n" ; + let base',i' = descend base i in + doBlock b1 base' i' ; + Printf.fprintf out ") (\n" ; + let base'',i'' = descend base i in + doBlock b2 base'' i'' ; + Printf.fprintf out ")\n" ; + incr i + | Switch(_,b,_,_) +(* + | Loop(b,_,_,_) +*) + | While(_,b,_) + | DoWhile(_,b,_) + | For(_,_,_,b,_) + | Block(b) -> + emit base i st_ht s ; + decr i ; + let base',i' = descend base i in + Printf.fprintf out "(\n" ; + doBlock b base' i' ; + Printf.fprintf out ")\n" ; + incr i + | TryExcept _ | TryFinally _ -> + E.s (E.unimp "astslicer:enumerate") + ) sl + and doIL il base i = + List.iter (fun ins -> match ins with + | Set _ + | Asm _ -> emit base i in_ht ins + | Call(_,(Lval(Var(vi),NoOffset)),_,_) -> + emit_call base i vi.vname in_ht ins + | Call(_,f,_,_) -> emit_call base i "*" in_ht ins + ) il + in + let doGlobal g = match g with + | GFun(fd,_) -> + Printf.fprintf out "%s (\n" fd.svar.vname ; + let cur = ref 0 in + doBlock fd.sbody fd.svar.vname cur ; + Printf.fprintf out ")\n" ; + () + | _ -> () + in + List.iter doGlobal f.globals ; + { statements = st_ht ; + instructions = in_ht ; } + +(********************************************************************** + * Enumerate 2 + * + * Given a cil file and some enumeration information, do a log-calls-like + * transformation on it that prints out our names as you reach them. + **********************************************************************) +(* + * This is the visitor that handles annotations + *) +let print_it pfun name = + ((Call(None,Lval(Var(pfun),NoOffset), + [mkString (name ^ "\n")],locUnknown))) + +class enumVisitor pfun st_ht in_ht = object + inherit nopCilVisitor + method vinst i = + if Hashtbl.mem in_ht i then begin + let name = Hashtbl.find in_ht i in + let newinst = print_it pfun name in + ChangeTo([newinst ; i]) + end else + DoChildren + method vstmt s = + if Hashtbl.mem st_ht s then begin + let name = Hashtbl.find st_ht s in + let newinst = print_it pfun name in + let newstmt = mkStmtOneInstr newinst in + let newblock = mkBlock [newstmt ; s] in + let replace_with = mkStmt (Block(newblock)) in + ChangeDoChildrenPost(s,(fun i -> replace_with)) + end else + DoChildren + method vfunc f = + let newinst = print_it pfun f.svar.vname in + let newstmt = mkStmtOneInstr newinst in + let new_f = { f with sbody = { f.sbody with + bstmts = newstmt :: f.sbody.bstmts }} in + ChangeDoChildrenPost(new_f,(fun i -> i)) +end + +let annotate (f : Cil.file) ei = begin + (* Create a prototype for the logging function *) + let printfFun = + let fdec = emptyFunction "printf" in + let argf = makeLocalVar fdec "format" charConstPtrType in + fdec.svar.vtype <- TFun(intType, Some [ ("format", charConstPtrType, [])], + true, []); + fdec + in + let visitor = (new enumVisitor printfFun.svar ei.statements + ei.instructions) in + visitCilFileSameGlobals visitor f; + f +end + +(********************************************************************** + * STAGE 2 + * + * Perform a transitive-closure-like operation on the parts of the program + * that the user wants to keep. We use a CIL visitor to walk around + * and a number of hash tables to keep track of the things we want to keep. + **********************************************************************) +(* + * Hashtables: + * ws - wanted stmts + * wi - wanted instructions + * wt - wanted typeinfo + * wc - wanted compinfo + * we - wanted enuminfo + * wv - wanted varinfo + *) + +let mode = ref false (* was our parented wanted? *) +let finished = ref true (* set to false if we update something *) + +(* In the given hashtable, mark the given element was "wanted" *) +let update ht elt = + if Hashtbl.mem ht elt && (Hashtbl.find ht elt = true) then () + else begin + Hashtbl.add ht elt true ; + finished := false + end + +(* Handle a particular stage of the AST tree walk. Use "mode" (i.e., + * whether our parent was wanted) and the hashtable (which tells us whether + * the user had any special instructions for this element) to determine + * what do to. *) +let handle ht elt rep = + if !mode then begin + if Hashtbl.mem ht elt && (Hashtbl.find ht elt = false) then begin + (* our parent is Wanted but we were told to ignore this subtree, + * so we won't be wanted. *) + mode := false ; + ChangeDoChildrenPost(rep,(fun elt -> mode := true ; elt)) + end else begin + (* we were not told to ignore this subtree, and our parent is + * Wanted, so we will be Wanted too! *) + update ht elt ; + DoChildren + end + end else if Hashtbl.mem ht elt && (Hashtbl.find ht elt = true) then begin + (* our parent was not wanted but we were wanted, so turn the + * mode on for now *) + mode := true ; + ChangeDoChildrenPost(rep,(fun elt -> mode := false ; elt)) + end else + DoChildren + +let handle_no_default ht elt rep old_mode = + if Hashtbl.mem ht elt && (Hashtbl.find ht elt = true) then begin + (* our parent was not wanted but we were wanted, so turn the + * mode on for now *) + mode := true ; + ChangeDoChildrenPost(rep,(fun elt -> mode := old_mode ; elt)) + end else begin + mode := false ; + ChangeDoChildrenPost(rep,(fun elt -> mode := old_mode ; elt)) + end + +(* + * This is the visitor that handles elements (marks them as wanted) + *) +class transVisitor ws wi wt wc we wv = object + inherit nopCilVisitor + + method vvdec vi = handle_no_default wv vi vi !mode + method vvrbl vi = handle wv vi vi + method vinst i = handle wi i [i] + method vstmt s = handle ws s s + method vfunc f = handle wv f.svar f + method vglob g = begin + match g with + | GType(ti,_) -> handle wt ti [g] + | GCompTag(ci,_) + | GCompTagDecl(ci,_) -> handle wc ci [g] + | GEnumTag(ei,_) + | GEnumTagDecl(ei,_) -> handle we ei [g] + | GVarDecl(vi,_) + | GVar(vi,_,_) -> handle wv vi [g] + | GFun(f,_) -> handle wv f.svar [g] + | _ -> DoChildren + end + method vtype t = begin + match t with + | TNamed(ti,_) -> handle wt ti t + | TComp(ci,_) -> handle wc ci t + | TEnum(ei,_) -> handle we ei t + | _ -> DoChildren + end +end + +(********************************************************************** + * STAGE 3 + * + * Eliminate all of the elements from the program that are not marked + * "keep". + **********************************************************************) +(* + * This is the visitor that throws away elements + *) +let handle ht elt keep drop = + if (Hashtbl.mem ht elt) && (Hashtbl.find ht elt = true) then + (* DoChildren *) ChangeDoChildrenPost(keep,(fun a -> a)) + else + ChangeTo(drop) + +class dropVisitor ws wi wt wc we wv = object + inherit nopCilVisitor + + method vinst i = handle wi i [i] [] + method vstmt s = handle ws s s (mkStmt (Instr([]))) + method vglob g = begin + match g with + | GType(ti,_) -> handle wt ti [g] [] + | GCompTag(ci,_) + | GCompTagDecl(ci,_) -> handle wc ci [g] [] + | GEnumTag(ei,_) + | GEnumTagDecl(ei,_) -> handle we ei [g] [] + | GVarDecl(vi,_) + | GVar(vi,_,_) -> handle wv vi [g] [] + | GFun(f,l) -> + let new_locals = List.filter (fun vi -> + Hashtbl.mem wv vi && (Hashtbl.find wv vi = true)) f.slocals in + let new_fundec = { f with slocals = new_locals} in + handle wv f.svar [(GFun(new_fundec,l))] [] + | _ -> DoChildren + end +end + +(********************************************************************** + * STAGE 1 + * + * Mark up the file with user-given information about what to keep and + * what to drop. + **********************************************************************) +type mark = Wanted | Unwanted | Unspecified +(* Given a cil file and a list of strings, mark all of the given ASTSlicer + * points as wanted or unwanted. *) +let mark_file (f : Cil.file) (names : (string, mark) Hashtbl.t) = + let ws = Hashtbl.create 32767 in + let wi = Hashtbl.create 32767 in + let wt = Hashtbl.create 32767 in + let wc = Hashtbl.create 32767 in + let we = Hashtbl.create 32767 in + let wv = Hashtbl.create 32767 in + if !debug then Printf.printf "Applying user marks to file ...\n" ; + let descend base i = + let res = (Printf.sprintf "%s.%d" base !i),(ref 0) in + res + in + let check base i (default : mark) = + let str = Printf.sprintf "%s.%d" base !i in + if !debug then Printf.printf "Looking for [%s]\n" str ; + try Hashtbl.find names str + with _ -> default + in + let mark ht stmt wanted = match wanted with + Unwanted -> Hashtbl.replace ht stmt false + | Wanted -> Hashtbl.replace ht stmt true + | Unspecified -> () + in + let rec doBlock b base i default = + doStmtList b.bstmts base i default + and doStmtList sl base i default = + List.iter (fun s -> match s.skind with + | Instr(il) -> doIL il base i default + | Return(_,_) + | Goto(_,_) + | Continue(_) + | Break(_) -> + mark ws s (check base i default) ; incr i + | If(e,b1,b2,_) -> + let inside = check base i default in + mark ws s inside ; + let base',i' = descend base i in + doBlock b1 base' i' inside ; + let base'',i'' = descend base i in + doBlock b2 base'' i'' inside ; + incr i + | Switch(_,b,_,_) +(* + | Loop(b,_,_,_) +*) + | While(_,b,_) + | DoWhile(_,b,_) + | For(_,_,_,b,_) + | Block(b) -> + let inside = check base i default in + mark ws s inside ; + let base',i' = descend base i in + doBlock b base' i' inside ; + incr i + | TryExcept _ | TryFinally _ -> + E.s (E.unimp "astslicer: mark") + ) sl + and doIL il base i default = + List.iter (fun ins -> mark wi ins (check base i default) ; incr i) il + in + let doGlobal g = match g with + | GFun(fd,_) -> + let cur = ref 0 in + if Hashtbl.mem names fd.svar.vname then begin + if Hashtbl.find names fd.svar.vname = Wanted then begin + Hashtbl.replace wv fd.svar true ; + doBlock fd.sbody fd.svar.vname cur (Wanted); + end else begin + Hashtbl.replace wv fd.svar false ; + doBlock fd.sbody fd.svar.vname cur (Unspecified); + end + end else begin + doBlock fd.sbody fd.svar.vname cur (Unspecified); + end + | _ -> () + in + List.iter doGlobal f.globals ; + if !debug then begin + Hashtbl.iter (fun k v -> + ignore (Pretty.printf "want-s %b %a\n" v d_stmt k)) ws ; + Hashtbl.iter (fun k v -> + ignore (Pretty.printf "want-i %b %a\n" v d_instr k)) wi ; + Hashtbl.iter (fun k v -> + ignore (Pretty.printf "want-v %b %s\n" v k.vname)) wv ; + end ; + (* + * Now repeatedly mark all other things that must be kept. + *) + let visitor = (new transVisitor ws wi wt wc we wv) in + finished := false ; + if !debug then (Printf.printf "\nPerforming Transitive Closure\n\n" ); + while not !finished do + finished := true ; + visitCilFileSameGlobals visitor f + done ; + if !debug then begin + Hashtbl.iter (fun k v -> + if v then ignore (Pretty.printf "want-s %a\n" d_stmt k)) ws ; + Hashtbl.iter (fun k v -> + if v then ignore (Pretty.printf "want-i %a\n" d_instr k)) wi ; + Hashtbl.iter (fun k v -> + if v then ignore (Pretty.printf "want-t %s\n" k.tname)) wt ; + Hashtbl.iter (fun k v -> + if v then ignore (Pretty.printf "want-c %s\n" k.cname)) wc ; + Hashtbl.iter (fun k v -> + if v then ignore (Pretty.printf "want-e %s\n" k.ename)) we ; + Hashtbl.iter (fun k v -> + if v then ignore (Pretty.printf "want-v %s\n" k.vname)) wv ; + end ; + + (* + * Now drop everything we didn't need. + *) + if !debug then (Printf.printf "Dropping Unwanted Elements\n" ); + let visitor = (new dropVisitor ws wi wt wc we wv) in + visitCilFile visitor f diff --git a/cil/src/ext/availexps.ml b/cil/src/ext/availexps.ml new file mode 100644 index 00000000..28c22c0e --- /dev/null +++ b/cil/src/ext/availexps.ml @@ -0,0 +1,359 @@ +(* compute available expressions, although in a somewhat + non-traditional way. the abstract state is a mapping from + variable ids to expressions as opposed to a set of + expressions *) + +open Cil +open Pretty + +module E = Errormsg +module DF = Dataflow +module UD = Usedef +module IH = Inthash +module U = Util +module S = Stats + +let debug = ref false + +(* exp IH.t -> exp IH.t -> bool *) +let eh_equals eh1 eh2 = + if not(IH.length eh1 = IH.length eh2) + then false + else IH.fold (fun vid e b -> + if not b then b else + try let e2 = IH.find eh2 vid in + if not(Util.equals e e2) + then false + else true + with Not_found -> false) + eh1 true + +let eh_pretty () eh = line ++ seq line (fun (vid,e) -> + text "AE:vid:" ++ num vid ++ text ": " ++ + (d_exp () e)) (IH.tolist eh) + +(* the result must be the intersection of eh1 and eh2 *) +(* exp IH.t -> exp IH.t -> exp IH.t *) +let eh_combine eh1 eh2 = + if !debug then ignore(E.log "eh_combine: combining %a\n and\n %a\n" + eh_pretty eh1 eh_pretty eh2); + let eh' = IH.copy eh1 in (* eh' gets all of eh1 *) + IH.iter (fun vid e1 -> + try let e2l = IH.find_all eh2 vid in + if not(List.exists (fun e2 -> Util.equals e1 e2) e2l) + (* remove things from eh' that eh2 doesn't have *) + then let e1l = IH.find_all eh' vid in + let e1l' = List.filter (fun e -> not(Util.equals e e1)) e1l in + IH.remove_all eh' vid; + List.iter (fun e -> IH.add eh' vid e) e1l' + with Not_found -> + IH.remove_all eh' vid) eh1; + if !debug then ignore(E.log "with result %a\n" + eh_pretty eh'); + eh' + +(* On a memory write, kill expressions containing memory writes + * or variables whose address has been taken. *) +let exp_ok = ref false +class memReadOrAddrOfFinderClass = object(self) + inherit nopCilVisitor + + method vexpr e = match e with + Lval(Mem _, _) -> + exp_ok := true; + SkipChildren + | _ -> DoChildren + + method vvrbl vi = + if vi.vaddrof then + (exp_ok := true; + SkipChildren) + else DoChildren + +end + +let memReadOrAddrOfFinder = new memReadOrAddrOfFinderClass + +(* exp -> bool *) +let exp_has_mem_read e = + exp_ok := false; + ignore(visitCilExpr memReadOrAddrOfFinder e); + !exp_ok + +let eh_kill_mem eh = + IH.iter (fun vid e -> + if exp_has_mem_read e + then IH.remove eh vid) + eh + +(* need to kill exps containing a particular vi sometimes *) +let has_vi = ref false +class viFinderClass vi = object(self) + inherit nopCilVisitor + + method vvrbl vi' = + if vi.vid = vi'.vid + then (has_vi := true; SkipChildren) + else DoChildren + +end + +let exp_has_vi e vi = + let vis = new viFinderClass vi in + has_vi := false; + ignore(visitCilExpr vis e); + !has_vi + +let eh_kill_vi eh vi = + IH.iter (fun vid e -> + if exp_has_vi e vi + then IH.remove eh vid) + eh + +let varHash = IH.create 32 + +let eh_kill_addrof_or_global eh = + if !debug then ignore(E.log "eh_kill: in eh_kill\n"); + IH.iter (fun vid e -> + try let vi = IH.find varHash vid in + if vi.vaddrof + then begin + if !debug then ignore(E.log "eh_kill: %s has its address taken\n" + vi.vname); + IH.remove eh vid + end + else if vi.vglob + then begin + if !debug then ignore(E.log "eh_kill: %s is global\n" + vi.vname); + IH.remove eh vid + end + with Not_found -> ()) eh + +let eh_handle_inst i eh = match i with + (* if a pointer write, kill things with read in them. + also kill mappings from vars that have had their address taken, + and globals. + otherwise kill things with lv in them and add e *) + Set(lv,e,_) -> (match lv with + (Mem _, _) -> + (eh_kill_mem eh; + eh_kill_addrof_or_global eh; + eh) + | (Var vi, NoOffset) -> + (match e with + Lval(Var vi', NoOffset) -> (* ignore x = x *) + if vi'.vid = vi.vid then eh else + (IH.replace eh vi.vid e; + eh_kill_vi eh vi; + eh) + | _ -> + (IH.replace eh vi.vid e; + eh_kill_vi eh vi; + eh)) + | _ -> eh) (* do nothing for now. *) +| Call(Some(Var vi,NoOffset),_,_,_) -> + (IH.remove eh vi.vid; + eh_kill_vi eh vi; + eh_kill_mem eh; + eh_kill_addrof_or_global eh; + eh) +| Call(_,_,_,_) -> + (eh_kill_mem eh; + eh_kill_addrof_or_global eh; + eh) +| Asm(_,_,_,_,_,_) -> + let _,d = UD.computeUseDefInstr i in + (UD.VS.iter (fun vi -> + eh_kill_vi eh vi) d; + eh) + +let allExpHash = IH.create 128 + +module AvailableExps = + struct + + let name = "Available Expressions" + + let debug = debug + + (* mapping from var id to expression *) + type t = exp IH.t + + let copy = IH.copy + + let stmtStartData = IH.create 64 + + let pretty = eh_pretty + + let computeFirstPredecessor stm eh = + eh_combine (IH.copy allExpHash) eh + + let combinePredecessors (stm:stmt) ~(old:t) (eh:t) = + if S.time "eh_equals" (eh_equals old) eh then None else + Some(S.time "eh_combine" (eh_combine old) eh) + + let doInstr i eh = + let action = eh_handle_inst i in + DF.Post(action) + + let doStmt stm astate = DF.SDefault + + let doGuard c astate = DF.GDefault + + let filterStmt stm = true + + end + +module AE = DF.ForwardsDataFlow(AvailableExps) + +(* make an exp IH.t with everything in it, + * also, fill in varHash while we're here. + *) +class expCollectorClass = object(self) + inherit nopCilVisitor + + method vinst i = match i with + Set((Var vi,NoOffset),e,_) -> + let e2l = IH.find_all allExpHash vi.vid in + if not(List.exists (fun e2 -> Util.equals e e2) e2l) + then IH.add allExpHash vi.vid e; + DoChildren + | _ -> DoChildren + + method vvrbl vi = + (if not(IH.mem varHash vi.vid) + then + (if !debug && vi.vglob then ignore(E.log "%s is global\n" vi.vname); + if !debug && not(vi.vglob) then ignore(E.log "%s is not global\n" vi.vname); + IH.add varHash vi.vid vi)); + DoChildren + +end + +let expCollector = new expCollectorClass + +let make_all_exps fd = + IH.clear allExpHash; + IH.clear varHash; + ignore(visitCilFunction expCollector fd) + + + +(* set all statement data to allExpHash, make + * a list of statements + *) +let all_stmts = ref [] +class allExpSetterClass = object(self) + inherit nopCilVisitor + + method vstmt s = + all_stmts := s :: (!all_stmts); + IH.add AvailableExps.stmtStartData s.sid (IH.copy allExpHash); + DoChildren + +end + +let allExpSetter = new allExpSetterClass + +let set_all_exps fd = + IH.clear AvailableExps.stmtStartData; + ignore(visitCilFunction allExpSetter fd) + +(* + * Computes AEs for function fd. + * + * + *) +(*let iAEsHtbl = Hashtbl.create 128*) +let computeAEs fd = + try let slst = fd.sbody.bstmts in + let first_stm = List.hd slst in + S.time "make_all_exps" make_all_exps fd; + all_stmts := []; + (*S.time "set_all_exps" set_all_exps fd;*) + (*Hashtbl.clear iAEsHtbl;*) + (*IH.clear (IH.find AvailableExps.stmtStartData first_stm.sid);*) + IH.add AvailableExps.stmtStartData first_stm.sid (IH.create 4); + S.time "compute" AE.compute [first_stm](*(List.rev !all_stmts)*) + with Failure "hd" -> if !debug then ignore(E.log "fn w/ no stmts?\n") + | Not_found -> if !debug then ignore(E.log "no data for first_stm?\n") + + +(* get the AE data for a statement *) +let getAEs sid = + try Some(IH.find AvailableExps.stmtStartData sid) + with Not_found -> None + +(* get the AE data for an instruction list *) +let instrAEs il sid eh out = + (*if Hashtbl.mem iAEsHtbl (sid,out) + then Hashtbl.find iAEsHtbl (sid,out) + else*) + let proc_one hil i = + match hil with + [] -> let eh' = IH.copy eh in + let eh'' = eh_handle_inst i eh' in + (*if !debug then ignore(E.log "instrAEs: proc_one []: for %a\n data is %a\n" + d_instr i eh_pretty eh'');*) + eh''::hil + | eh'::ehrst as l -> + let eh' = IH.copy eh' in + let eh'' = eh_handle_inst i eh' in + (*if !debug then ignore(E.log "instrAEs: proc_one: for %a\n data is %a\n" + d_instr i eh_pretty eh'');*) + eh''::l + in + let folded = List.fold_left proc_one [eh] il in + (*let foldedout = List.tl (List.rev folded) in*) + let foldednotout = List.rev (List.tl folded) in + (*Hashtbl.add iAEsHtbl (sid,true) foldedout; + Hashtbl.add iAEsHtbl (sid,false) foldednotout;*) + (*if out then foldedout else*) foldednotout + +class aeVisitorClass = object(self) + inherit nopCilVisitor + + val mutable sid = -1 + + val mutable ae_dat_lst = [] + + val mutable cur_ae_dat = None + + method vstmt stm = + sid <- stm.sid; + match getAEs sid with + None -> + if !debug then ignore(E.log "aeVis: stm %d has no data\n" sid); + cur_ae_dat <- None; + DoChildren + | Some eh -> + match stm.skind with + Instr il -> + if !debug then ignore(E.log "aeVist: visit il\n"); + ae_dat_lst <- S.time "instrAEs" (instrAEs il stm.sid eh) false; + DoChildren + | _ -> + if !debug then ignore(E.log "aeVisit: visit non-il\n"); + cur_ae_dat <- None; + DoChildren + + method vinst i = + if !debug then ignore(E.log "aeVist: before %a, ae_dat_lst is %d long\n" + d_instr i (List.length ae_dat_lst)); + try + let data = List.hd ae_dat_lst in + cur_ae_dat <- Some(data); + ae_dat_lst <- List.tl ae_dat_lst; + if !debug then ignore(E.log "aeVisit: data is %a\n" eh_pretty data); + DoChildren + with Failure "hd" -> + if !debug then ignore(E.log "aeVis: il ae_dat_lst mismatch\n"); + DoChildren + + method get_cur_eh () = + match cur_ae_dat with + None -> getAEs sid + | Some eh -> Some eh + +end diff --git a/cil/src/ext/bitmap.ml b/cil/src/ext/bitmap.ml new file mode 100644 index 00000000..da1f8b99 --- /dev/null +++ b/cil/src/ext/bitmap.ml @@ -0,0 +1,224 @@ + + (* Imperative bitmaps *) +type t = { mutable nrWords : int; + mutable nrBits : int; (* This is 31 * nrWords *) + mutable bitmap : int array } + + + (* Enlarge a bitmap to contain at + * least newBits *) +let enlarge b newWords = + let newbitmap = + if newWords > b.nrWords then + let a = Array.create newWords 0 in + Array.blit b.bitmap 0 a 0 b.nrWords; + a + else + b.bitmap in + b.nrWords <- newWords; + b.nrBits <- (newWords lsl 5) - newWords; + b.bitmap <- newbitmap + + + (* Create a new empty bitmap *) +let make size = + let wrd = (size + 30) / 31 in + { nrWords = wrd; + nrBits = (wrd lsl 5) - wrd; + bitmap = Array.make wrd 0 + } + +let size t = t.nrBits + (* Make an initialized array *) +let init size how = + let wrd = (size + 30) / 31 in + let how' w = + let first = (w lsl 5) - w in + let last = min size (first + 31) in + let rec loop i acc = + if i >= last then acc + else + let acc' = acc lsl 1 in + if how i then loop (i + 1) (acc' lor 1) + else loop (i + 1) acc' + in + loop first 0 + in + { nrWords = wrd; + nrBits = (wrd lsl 5) - wrd; + bitmap = Array.init wrd how' + } + +let clone b = + { nrWords = b.nrWords; + nrBits = b.nrBits; + bitmap = Array.copy b.bitmap; + } + +let cloneEmpty b = + { nrWords = b.nrWords; + nrBits = b.nrBits; + bitmap = Array.make b.nrWords 0; + } + +let union b1 b2 = + begin + let n = b2.nrWords in + if b1.nrWords < n then enlarge b1 n else (); + let a1 = b1.bitmap in + let a2 = b2.bitmap in + let changed = ref false in + for i=0 to n - 1 do + begin + let t = a1.(i) in + let upd = t lor a2.(i) in + let _ = if upd <> t then changed := true else () in + Array.unsafe_set a1 i upd + end + done; + ! changed + end + (* lin += (lout - def) *) +let accLive lin lout def = + begin (* Need to enlarge def to lout *) + let n = lout.nrWords in + if def.nrWords < n then enlarge def n else (); + (* Need to enlarge lin to lout *) + if lin.nrWords < n then enlarge lin n else (); + let changed = ref false in + let alin = lin.bitmap in + let alout = lout.bitmap in + let adef = def.bitmap in + for i=0 to n - 1 do + begin + let old = alin.(i) in + let nw = old lor (alout.(i) land (lnot adef.(i))) in + alin.(i) <- nw; + changed := (old <> nw) || (!changed) + end + done; + !changed + end + + (* b1 *= b2 *) +let inters b1 b2 = + begin + let n = min b1.nrWords b2.nrWords in + let a1 = b1.bitmap in + let a2 = b2.bitmap in + for i=0 to n - 1 do + begin + a1.(i) <- a1.(i) land a2.(i) + end + done; + if n < b1.nrWords then + Array.fill a1 n (b1.nrWords - n) 0 + else + () + end + +let emptyInt b start = + let n = b.nrWords in + let a = b.bitmap in + let rec loop i = i >= n || (a.(i) = 0 && loop (i + 1)) + in + loop start + +let empty b = emptyInt b 0 + + (* b1 =? b2 *) +let equal b1 b2 = + begin + let n = min b1.nrWords b2.nrWords in + let a1 = b1.bitmap in + let a2 = b2.bitmap in + let res = ref true in + for i=0 to n - 1 do + begin + if a1.(i) != a2.(i) then res := false else () + end + done; + if !res then + if b1.nrWords > n then + emptyInt b1 n + else if b2.nrWords > n then + emptyInt b2 n + else + true + else + false + end + +let assign b1 b2 = + begin + let n = b2.nrWords in + if b1.nrWords < n then enlarge b1 n else (); + let a1 = b1.bitmap in + let a2 = b2.bitmap in + Array.blit a2 0 a1 0 n + end + + (* b1 -= b2 *) +let diff b1 b2 = + begin + let n = min b1.nrWords b2.nrWords in + let a1 = b1.bitmap in + let a2 = b2.bitmap in + for i=0 to n - 1 do + a1.(i) <- a1.(i) land (lnot a2.(i)) + done; + if n < b1.nrWords then + Array.fill a1 n (b1.nrWords - n) 0 + else + () + end + + + + +let get bmp i = + assert (i >= 0); + if i >= bmp.nrBits then enlarge bmp (i / 31 + 1) else (); + let wrd = i / 31 in + let msk = 1 lsl (i + wrd - (wrd lsl 5)) in + bmp.bitmap.(wrd) land msk != 0 + + +let set bmp i tv = + assert(i >= 0); + let wrd = i / 31 in + let msk = 1 lsl (i + wrd - (wrd lsl 5)) in + if i >= bmp.nrBits then enlarge bmp (wrd + 1) else (); + if tv then + bmp.bitmap.(wrd) <- bmp.bitmap.(wrd) lor msk + else + bmp.bitmap.(wrd) <- bmp.bitmap.(wrd) land (lnot msk) + + + + (* Iterate over all elements in a + * bitmap *) +let fold f bmp arg = + let a = bmp.bitmap in + let n = bmp.nrWords in + let rec allWords i bit arg = + if i >= n then + arg + else + let rec allBits msk bit left arg = + if left = 0 then + allWords (i + 1) bit arg + else + allBits ((lsr) msk 1) (bit + 1) (left - 1) + (if (land) msk 1 != 0 then f arg bit else arg) + in + allBits a.(i) bit 31 arg + in + allWords 0 0 arg + + +let iter f t = fold (fun x y -> f y) t () + +let toList bmp = fold (fun acc i -> i :: acc) bmp [] + +let card bmp = fold (fun acc _ -> acc + 1) bmp 0 diff --git a/cil/src/ext/bitmap.mli b/cil/src/ext/bitmap.mli new file mode 100644 index 00000000..5247e35d --- /dev/null +++ b/cil/src/ext/bitmap.mli @@ -0,0 +1,50 @@ + + (* Imperative bitmaps *) + +type t + (* Create a bitmap given the number + * of bits *) +val make : int -> t +val init : int -> (int -> bool) -> t (* Also initialize it *) + +val size : t -> int (* How much space it is reserved *) + + (* The cardinality of a set *) +val card : t -> int + + (* Make a copy of a bitmap *) +val clone : t -> t + +val cloneEmpty : t -> t (* An empty set with the same + * dimentions *) + +val set : t -> int -> bool -> unit +val get : t -> int -> bool + (* destructive union. The first + * element is updated. Returns true + * if any change was actually + * necessary *) +val union : t -> t -> bool + + (* accLive livein liveout def. Does + * liveIn += (liveout - def) *) +val accLive : t -> t -> t -> bool + + (* Copy the second argument onto the + * first *) +val assign : t -> t -> unit + + +val inters : t -> t -> unit +val diff : t -> t -> unit + + +val empty : t -> bool + +val equal : t -> t -> bool + +val toList : t -> int list + +val iter : (int -> unit) -> t -> unit +val fold : ('a -> int -> 'a) -> t -> 'a -> 'a + diff --git a/cil/src/ext/blockinggraph.ml b/cil/src/ext/blockinggraph.ml new file mode 100644 index 00000000..281678ae --- /dev/null +++ b/cil/src/ext/blockinggraph.ml @@ -0,0 +1,769 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +open Cil +open Pretty +module E = Errormsg + +let debug = false + +let fingerprintAll = true + + +type blockkind = + NoBlock + | BlockTrans + | BlockPoint + | EndPoint + +(* For each function we have a node *) +type node = +{ + nodeid: int; + name: string; + mutable scanned: bool; + mutable expand: bool; + mutable fptr: bool; + mutable stacksize: int; + mutable fds: fundec option; + mutable bkind: blockkind; + mutable origkind: blockkind; + mutable preds: node list; + mutable succs: node list; + mutable predstmts: (stmt * node) list; +} + +type blockpt = +{ + id: int; + point: stmt; + callfun: string; + infun: string; + mutable leadsto: blockpt list; +} + + +(* Fresh ids for each node. *) +let curNodeNum : int ref = ref 0 +let getFreshNodeNum () : int = + let num = !curNodeNum in + incr curNodeNum; + num + +(* Initialize a node. *) +let newNode (name: string) (fptr: bool) (mangle: bool) : node = + let id = getFreshNodeNum () in + { nodeid = id; name = if mangle then name ^ (string_of_int id) else name; + scanned = false; expand = false; + fptr = fptr; stacksize = 0; fds = None; + bkind = NoBlock; origkind = NoBlock; + preds = []; succs = []; predstmts = []; } + + +(* My type signature ignores attributes and function pointers. *) +let myTypeSig (t: typ) : typsig = + let rec removeFunPtrs (ts: typsig) : typsig = + match ts with + TSPtr (TSFun _, a) -> + TSPtr (TSBase voidType, a) + | TSPtr (base, a) -> + TSPtr (removeFunPtrs base, a) + | TSArray (base, e, a) -> + TSArray (removeFunPtrs base, e, a) + | TSFun (ret, args, v, a) -> + TSFun (removeFunPtrs ret, List.map removeFunPtrs args, v, a) + | _ -> ts + in + removeFunPtrs (typeSigWithAttrs (fun _ -> []) t) + + +(* We add a dummy function whose name is "@@functionPointer@@" that is called + * at all invocations of function pointers and itself calls all functions + * whose address is taken. *) +let functionPointerName = "@@functionPointer@@" + +(* We map names to nodes *) +let functionNodes: (string, node) Hashtbl.t = Hashtbl.create 113 +let getFunctionNode (n: string) : node = + Util.memoize + functionNodes + n + (fun _ -> newNode n false false) + +(* We map types to nodes for function pointers *) +let functionPtrNodes: (typsig, node) Hashtbl.t = Hashtbl.create 113 +let getFunctionPtrNode (t: typ) : node = + Util.memoize + functionPtrNodes + (myTypeSig t) + (fun _ -> newNode functionPointerName true true) + +let startNode: node = newNode "@@startNode@@" true false + + +(* +(** Dump the function call graph. *) +let dumpFunctionCallGraph (start: node) = + Hashtbl.iter (fun _ x -> x.scanned <- false) functionNodes; + let rec dumpOneNode (ind: int) (n: node) : unit = + output_string !E.logChannel "\n"; + for i = 0 to ind do + output_string !E.logChannel " " + done; + output_string !E.logChannel (n.name ^ " "); + begin + match n.bkind with + NoBlock -> () + | BlockTrans -> output_string !E.logChannel " " + | BlockPoint -> output_string !E.logChannel " " + | EndPoint -> output_string !E.logChannel " " + end; + if n.scanned then (* Already dumped *) + output_string !E.logChannel " " + else begin + n.scanned <- true; + List.iter (fun n -> if n.bkind <> EndPoint then dumpOneNode (ind + 1) n) + n.succs + end + in + dumpOneNode 0 start; + output_string !E.logChannel "\n\n" +*) + +let dumpFunctionCallGraphToFile () = + let channel = open_out "graph" in + let dumpNode _ (n: node) : unit = + let first = ref true in + let dumpSucc (n: node) : unit = + if !first then + first := false + else + output_string channel ","; + output_string channel n.name + in + output_string channel (string_of_int n.nodeid); + output_string channel ":"; + output_string channel (string_of_int n.stacksize); + output_string channel ":"; + if n.fds = None && not n.fptr then + output_string channel "x"; + output_string channel ":"; + output_string channel n.name; + output_string channel ":"; + List.iter dumpSucc n.succs; + output_string channel "\n"; + in + dumpNode () startNode; + Hashtbl.iter dumpNode functionNodes; + Hashtbl.iter dumpNode functionPtrNodes; + close_out channel + + +let addCall (callerNode: node) (calleeNode: node) (sopt: stmt option) = + if not (List.exists (fun n -> n.name = calleeNode.name) + callerNode.succs) then begin + if debug then + ignore (E.log "found call from %s to %s\n" + callerNode.name calleeNode.name); + callerNode.succs <- calleeNode :: callerNode.succs; + calleeNode.preds <- callerNode :: calleeNode.preds; + end; + match sopt with + Some s -> + if not (List.exists (fun (s', _) -> s' = s) calleeNode.predstmts) then + calleeNode.predstmts <- (s, callerNode) :: calleeNode.predstmts + | None -> () + + +class findCallsVisitor (host: node) : cilVisitor = object + inherit nopCilVisitor + + val mutable curStmt : stmt ref = ref (mkEmptyStmt ()) + + method vstmt s = + curStmt := s; + DoChildren + + method vinst i = + match i with + | Call(_,Lval(Var(vi),NoOffset),args,l) -> + addCall host (getFunctionNode vi.vname) (Some !curStmt); + SkipChildren + + | Call(_,e,_,l) -> (* Calling a function pointer *) + addCall host (getFunctionPtrNode (typeOf e)) (Some !curStmt); + SkipChildren + + | _ -> SkipChildren (* No calls in other instructions *) + + (* There are no calls in expressions and types *) + method vexpr e = SkipChildren + method vtype t = SkipChildren + +end + + +let endPt = { id = 0; point = mkEmptyStmt (); callfun = "end"; infun = "end"; + leadsto = []; } + +(* These values will be initialized for real in makeBlockingGraph. *) +let curId : int ref = ref 1 +let startName : string ref = ref "" +let blockingPoints : blockpt list ref = ref [] +let blockingPointsNew : blockpt Queue.t = Queue.create () +let blockingPointsHash : (int, blockpt) Hashtbl.t = Hashtbl.create 113 + +let getFreshNum () : int = + let num = !curId in + curId := !curId + 1; + num + +let getBlockPt (s: stmt) (cfun: string) (ifun: string) : blockpt = + try + Hashtbl.find blockingPointsHash s.sid + with Not_found -> + let num = getFreshNum () in + let bpt = { id = num; point = s; callfun = cfun; infun = ifun; + leadsto = []; } in + Hashtbl.add blockingPointsHash s.sid bpt; + blockingPoints := bpt :: !blockingPoints; + Queue.add bpt blockingPointsNew; + bpt + + +type action = + Process of stmt * node + | Next of stmt * node + | Return of node + +let getStmtNode (s: stmt) : node option = + match s.skind with + Instr instrs -> begin + let len = List.length instrs in + if len > 0 then + match List.nth instrs (len - 1) with + Call (_, Lval (Var vi, NoOffset), args, _) -> + Some (getFunctionNode vi.vname) + | Call (_, e, _, _) -> (* Calling a function pointer *) + Some (getFunctionPtrNode (typeOf e)) + | _ -> + None + else + None + end + | _ -> None + +let addBlockingPointEdge (bptFrom: blockpt) (bptTo: blockpt) : unit = + if not (List.exists (fun bpt -> bpt = bptTo) bptFrom.leadsto) then + bptFrom.leadsto <- bptTo :: bptFrom.leadsto + +let findBlockingPointEdges (bpt: blockpt) : unit = + let seenStmts = Hashtbl.create 117 in + let worklist = Queue.create () in + Queue.add (Next (bpt.point, getFunctionNode bpt.infun)) worklist; + while Queue.length worklist > 0 do + let act = Queue.take worklist in + match act with + Process (curStmt, curNode) -> begin + Hashtbl.add seenStmts curStmt.sid (); + match getStmtNode curStmt with + Some node -> begin + if debug then + ignore (E.log "processing node %s\n" node.name); + match node.bkind with + NoBlock -> + Queue.add (Next (curStmt, curNode)) worklist + | BlockTrans -> begin + let processFundec (fd: fundec) : unit = + let s = List.hd fd.sbody.bstmts in + if not (Hashtbl.mem seenStmts s.sid) then + let n = getFunctionNode fd.svar.vname in + Queue.add (Process (s, n)) worklist + in + match node.fds with + Some fd -> + processFundec fd + | None -> + List.iter + (fun n -> + match n.fds with + Some fd -> processFundec fd + | None -> E.s (bug "expected fundec")) + node.succs + end + | BlockPoint -> + addBlockingPointEdge bpt + (getBlockPt curStmt node.name curNode.name) + | EndPoint -> + addBlockingPointEdge bpt endPt + end + | _ -> + Queue.add (Next (curStmt, curNode)) worklist + end + | Next (curStmt, curNode) -> begin + match curStmt.Cil.succs with + [] -> + if debug then + ignore (E.log "hit end of %s\n" curNode.name); + Queue.add (Return curNode) worklist + | _ -> + List.iter (fun s -> + if not (Hashtbl.mem seenStmts s.sid) then + Queue.add (Process (s, curNode)) worklist) + curStmt.Cil.succs + end + | Return curNode when curNode.bkind = NoBlock -> + () + | Return curNode when curNode.name = !startName -> + addBlockingPointEdge bpt endPt + | Return curNode -> + List.iter (fun (s, n) -> if n.bkind <> NoBlock then + Queue.add (Next (s, n)) worklist) + curNode.predstmts; + List.iter (fun n -> if n.fptr then + Queue.add (Return n) worklist) + curNode.preds + done + +let markYieldPoints (n: node) : unit = + let rec markNode (n: node) : unit = + if n.bkind = NoBlock then + match n.origkind with + BlockTrans -> + if n.expand || n.fptr then begin + n.bkind <- BlockTrans; + List.iter markNode n.succs + end else begin + n.bkind <- BlockPoint + end + | _ -> + n.bkind <- n.origkind + in + Hashtbl.iter (fun _ n -> n.bkind <- NoBlock) functionNodes; + Hashtbl.iter (fun _ n -> n.bkind <- NoBlock) functionPtrNodes; + markNode n + +let makeBlockingGraph (start: node) = + let startStmt = + match start.fds with + Some fd -> List.hd fd.sbody.bstmts + | None -> E.s (bug "expected fundec") + in + curId := 1; + startName := start.name; + blockingPoints := [endPt]; + Queue.clear blockingPointsNew; + Hashtbl.clear blockingPointsHash; + ignore (getBlockPt startStmt start.name start.name); + while Queue.length blockingPointsNew > 0 do + let bpt = Queue.take blockingPointsNew in + findBlockingPointEdges bpt; + done + +let dumpBlockingGraph () = + List.iter + (fun bpt -> + if bpt.id < 2 then begin + ignore (E.log "bpt %d (%s): " bpt.id bpt.callfun) + end else begin + ignore (E.log "bpt %d (%s in %s): " bpt.id bpt.callfun bpt.infun) + end; + List.iter (fun bpt -> ignore (E.log "%d " bpt.id)) bpt.leadsto; + ignore (E.log "\n")) + !blockingPoints; + ignore (E.log "\n") + +let beforeFun = + makeGlobalVar "before_bg_node" + (TFun (voidType, Some [("node_idx", intType, []); + ("num_edges", intType, [])], + false, [])) + +let initFun = + makeGlobalVar "init_blocking_graph" + (TFun (voidType, Some [("num_nodes", intType, [])], + false, [])) + +let fingerprintVar = + let vi = makeGlobalVar "stack_fingerprint" intType in + vi.vstorage <- Extern; + vi + +let startNodeAddrs = + let vi = makeGlobalVar "start_node_addrs" (TPtr (voidPtrType, [])) in + vi.vstorage <- Extern; + vi + +let startNodeStacks = + let vi = makeGlobalVar "start_node_stacks" (TPtr (intType, [])) in + vi.vstorage <- Extern; + vi + +let startNodeAddrsArray = + makeGlobalVar "start_node_addrs_array" (TArray (voidPtrType, None, [])) + +let startNodeStacksArray = + makeGlobalVar "start_node_stacks_array" (TArray (intType, None, [])) + +let insertInstr (newInstr: instr) (s: stmt) : unit = + match s.skind with + Instr instrs -> + let rec insert (instrs: instr list) : instr list = + match instrs with + [] -> E.s (bug "instr list does not end with call\n") + | [Call _] -> newInstr :: instrs + | i :: rest -> i :: (insert rest) + in + s.skind <- Instr (insert instrs) + | _ -> + E.s (bug "instr stmt expected\n") + +let instrumentBlockingPoints () = + List.iter + (fun bpt -> + if bpt.id > 1 then + let arg1 = integer bpt.id in + let arg2 = integer (List.length bpt.leadsto) in + let call = Call (None, Lval (var beforeFun), + [arg1; arg2], locUnknown) in + insertInstr call bpt.point; + addCall (getFunctionNode bpt.infun) + (getFunctionNode beforeFun.vname) None) + !blockingPoints + + +let startNodes : node list ref = ref [] + +let makeAndDumpBlockingGraphs () : unit = + if List.length !startNodes > 1 then + E.s (unimp "We can't handle more than one start node right now.\n"); + List.iter + (fun n -> + markYieldPoints n; + (*dumpFunctionCallGraph n;*) + makeBlockingGraph n; + dumpBlockingGraph (); + instrumentBlockingPoints ()) + !startNodes + + +let pragmas : (string, int) Hashtbl.t = Hashtbl.create 13 + +let gatherPragmas (f: file) : unit = + List.iter + (function + GPragma (Attr ("stacksize", [AStr s; AInt n]), _) -> + Hashtbl.add pragmas s n + | _ -> ()) + f.globals + + +let blockingNodes : node list ref = ref [] + +let markBlockingFunctions () : unit = + let rec markFunction (n: node) : unit = + if debug then + ignore (E.log "marking %s\n" n.name); + if n.origkind = NoBlock then begin + n.origkind <- BlockTrans; + List.iter markFunction n.preds; + end + in + List.iter (fun n -> List.iter markFunction n.preds) !blockingNodes + +let hasFunctionTypeAttribute (n: string) (t: typ) : bool = + let _, _, _, a = splitFunctionType t in + hasAttribute n a + +let markVar (vi: varinfo) : unit = + let node = getFunctionNode vi.vname in + if node.origkind = NoBlock then begin + if hasAttribute "yield" vi.vattr then begin + node.origkind <- BlockPoint; + blockingNodes := node :: !blockingNodes; + end else if hasFunctionTypeAttribute "noreturn" vi.vtype then begin + node.origkind <- EndPoint; + end else if hasAttribute "expand" vi.vattr then begin + node.expand <- true; + end + end; + begin + try + node.stacksize <- Hashtbl.find pragmas node.name + with Not_found -> begin + match filterAttributes "stacksize" vi.vattr with + (Attr (_, [AInt n])) :: _ when n > node.stacksize -> + node.stacksize <- n + | _ -> () + end + end + +let makeFunctionCallGraph (f: Cil.file) : unit = + Hashtbl.clear functionNodes; + (* Scan the file and construct the control-flow graph *) + List.iter + (function + GFun(fdec, _) -> + let curNode = getFunctionNode fdec.svar.vname in + if fdec.svar.vaddrof then begin + addCall (getFunctionPtrNode fdec.svar.vtype) + curNode None; + end; + if hasAttribute "start" fdec.svar.vattr then begin + startNodes := curNode :: !startNodes; + end; + markVar fdec.svar; + curNode.fds <- Some fdec; + let vis = new findCallsVisitor curNode in + ignore (visitCilBlock vis fdec.sbody) + + | GVarDecl(vi, _) when isFunctionType vi.vtype -> + (* TODO: what if we take the addr of an extern? *) + markVar vi + + | _ -> ()) + f.globals + +let makeStartNodeLinks () : unit = + addCall startNode (getFunctionNode "main") None; + List.iter (fun n -> addCall startNode n None) !startNodes + +let funType (ret_t: typ) (args: (string * typ) list) = + TFun(ret_t, + Some (List.map (fun (n,t) -> (n, t, [])) args), + false, []) + +class instrumentClass = object + inherit nopCilVisitor + + val mutable curNode : node ref = ref (getFunctionNode "main") + val mutable seenRet : bool ref = ref false + + val mutable funId : int ref = ref 0 + + method vfunc (fdec: fundec) : fundec visitAction = begin + (* Remember the current function. *) + curNode := getFunctionNode fdec.svar.vname; + seenRet := false; + funId := Random.bits (); + (* Add useful locals. *) + ignore (makeLocalVar fdec "savesp" voidPtrType); + ignore (makeLocalVar fdec "savechunk" voidPtrType); + ignore (makeLocalVar fdec "savebottom" voidPtrType); + (* Add macro for function entry when we're done. *) + let addEntryNode (fdec: fundec) : fundec = + if not !seenRet then E.s (bug "didn't find a return statement"); + let node = getFunctionNode fdec.svar.vname in + if fingerprintAll || node.origkind <> NoBlock then begin + let fingerprintSet = + Set (var fingerprintVar, BinOp (BXor, Lval (var fingerprintVar), + integer !funId, intType), + locUnknown) + in + fdec.sbody.bstmts <- mkStmtOneInstr fingerprintSet :: fdec.sbody.bstmts + end; + let nodeFun = emptyFunction ("NODE_CALL_"^(string_of_int node.nodeid)) in + let nodeCall = Call (None, Lval (var nodeFun.svar), [], locUnknown) in + nodeFun.svar.vtype <- funType voidType []; + nodeFun.svar.vstorage <- Static; + fdec.sbody.bstmts <- mkStmtOneInstr nodeCall :: fdec.sbody.bstmts; + fdec + in + ChangeDoChildrenPost (fdec, addEntryNode) + end + + method vstmt (s: stmt) : stmt visitAction = begin + begin + match s.skind with + Instr instrs -> begin + let instrumentNode (callNode: node) : unit = + (* Make calls to macros. *) + let suffix = "_" ^ (string_of_int !curNode.nodeid) ^ + "_" ^ (string_of_int callNode.nodeid) + in + let beforeFun = emptyFunction ("BEFORE_CALL" ^ suffix) in + let beforeCall = Call (None, Lval (var beforeFun.svar), + [], locUnknown) in + beforeFun.svar.vtype <- funType voidType []; + beforeFun.svar.vstorage <- Static; + let afterFun = emptyFunction ("AFTER_CALL" ^ suffix) in + let afterCall = Call (None, Lval (var afterFun.svar), + [], locUnknown) in + afterFun.svar.vtype <- funType voidType []; + afterFun.svar.vstorage <- Static; + (* Insert instrumentation around call site. *) + let rec addCalls (is: instr list) : instr list = + match is with + [call] -> [beforeCall; call; afterCall] + | cur :: rest -> cur :: addCalls rest + | [] -> E.s (bug "expected list of non-zero length") + in + s.skind <- Instr (addCalls instrs) + in + (* If there's a call site here, instrument it. *) + let len = List.length instrs in + if len > 0 then begin + match List.nth instrs (len - 1) with + Call (_, Lval (Var vi, NoOffset), _, _) -> + (* + if (try String.sub vi.vname 0 10 <> "NODE_CALL_" + with Invalid_argument _ -> true) then +*) + instrumentNode (getFunctionNode vi.vname) + | Call (_, e, _, _) -> (* Calling a function pointer *) + instrumentNode (getFunctionPtrNode (typeOf e)) + | _ -> () + end; + DoChildren + end + | Cil.Return _ -> begin + if !seenRet then E.s (bug "found multiple returns"); + seenRet := true; + if fingerprintAll || !curNode.origkind <> NoBlock then begin + let fingerprintSet = + Set (var fingerprintVar, BinOp (BXor, Lval (var fingerprintVar), + integer !funId, intType), + locUnknown) + in + s.skind <- Block (mkBlock [mkStmtOneInstr fingerprintSet; + mkStmt s.skind]); + end; + SkipChildren + end + | _ -> DoChildren + end + end +end + +let makeStartNodeTable (globs: global list) : global list = + if List.length !startNodes = 0 then + globs + else + let addrInitInfo = { init = None } in + let stackInitInfo = { init = None } in + let rec processNode (nodes: node list) (i: int) = + match nodes with + node :: rest -> + let curGlobs, addrInit, stackInit = processNode rest (i + 1) in + let fd = + match node.fds with + Some fd -> fd + | None -> E.s (bug "expected fundec") + in + let stack = + makeGlobalVar ("NODE_STACK_" ^ (string_of_int node.nodeid)) intType + in + GVarDecl (fd.svar, locUnknown) :: curGlobs, + ((Index (integer i, NoOffset), SingleInit (mkAddrOf (var fd.svar))) :: + addrInit), + ((Index (integer i, NoOffset), SingleInit (Lval (var stack))) :: + stackInit) + | [] -> (GVarDecl (startNodeAddrs, locUnknown) :: + GVarDecl (startNodeStacks, locUnknown) :: + GVar (startNodeAddrsArray, addrInitInfo, locUnknown) :: + GVar (startNodeStacksArray, stackInitInfo, locUnknown) :: + []), + [Index (integer i, NoOffset), SingleInit zero], + [Index (integer i, NoOffset), SingleInit zero] + in + let newGlobs, addrInit, stackInit = processNode !startNodes 0 in + addrInitInfo.init <- + Some (CompoundInit (TArray (voidPtrType, None, []), addrInit)); + stackInitInfo.init <- + Some (CompoundInit (TArray (intType, None, []), stackInit)); + let file = { fileName = "startnode.h"; globals = newGlobs; + globinit = None; globinitcalled = false; } in + let channel = open_out file.fileName in + dumpFile defaultCilPrinter channel file; + close_out channel; + GText ("#include \"" ^ file.fileName ^ "\"") :: globs + +let instrumentProgram (f: file) : unit = + (* Add function prototypes. *) + f.globals <- makeStartNodeTable f.globals; + f.globals <- GText ("#include \"stack.h\"") :: + GVarDecl (initFun, locUnknown) :: + GVarDecl (beforeFun, locUnknown) :: + GVarDecl (fingerprintVar, locUnknown) :: + f.globals; + (* Add instrumentation to call sites. *) + visitCilFile ((new instrumentClass) :> cilVisitor) f; + (* Force creation of this node. *) + ignore (getFunctionNode beforeFun.vname); + (* Add initialization call to main(). *) + let mainNode = getFunctionNode "main" in + match mainNode.fds with + Some fdec -> + let arg1 = integer (List.length !blockingPoints) in + let initInstr = Call (None, Lval (var initFun), [arg1], locUnknown) in + let addrsInstr = + Set (var startNodeAddrs, StartOf (var startNodeAddrsArray), + locUnknown) + in + let stacksInstr = + Set (var startNodeStacks, StartOf (var startNodeStacksArray), + locUnknown) + in + let newStmt = + if List.length !startNodes = 0 then + mkStmtOneInstr initInstr + else + mkStmt (Instr [addrsInstr; stacksInstr; initInstr]) + in + fdec.sbody.bstmts <- newStmt :: fdec.sbody.bstmts; + addCall mainNode (getFunctionNode initFun.vname) None + | None -> + E.s (bug "expected main fundec") + + + +let feature : featureDescr = + { fd_name = "FCG"; + fd_enabled = ref false; + fd_description = "computing and printing a static call graph"; + fd_extraopt = []; + fd_doit = + (function (f : file) -> + Random.init 0; (* Use the same seed so that results are predictable. *) + gatherPragmas f; + makeFunctionCallGraph f; + makeStartNodeLinks (); + markBlockingFunctions (); + (* makeAndDumpBlockingGraphs (); *) + instrumentProgram f; + dumpFunctionCallGraphToFile ()); + fd_post_check = true; + } diff --git a/cil/src/ext/blockinggraph.mli b/cil/src/ext/blockinggraph.mli new file mode 100644 index 00000000..72f9ba7b --- /dev/null +++ b/cil/src/ext/blockinggraph.mli @@ -0,0 +1,40 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* This module finds and analyzes yield points. *) + +val feature: Cil.featureDescr diff --git a/cil/src/ext/callgraph.ml b/cil/src/ext/callgraph.ml new file mode 100644 index 00000000..58472ac6 --- /dev/null +++ b/cil/src/ext/callgraph.ml @@ -0,0 +1,250 @@ +(* callgraph.ml *) +(* code for callgraph.mli *) + +(* see copyright notice at end of this file *) + +open Cil +open Trace +open Printf +module P = Pretty +module IH = Inthash +module H = Hashtbl +module E = Errormsg + +(* ------------------- interface ------------------- *) +(* a call node describes the local calling structure for a + * single function: which functions it calls, and which + * functions call it *) +type callnode = { + (* An id *) + cnid: int; + + (* the function this node describes *) + cnInfo: nodeinfo; + + (* set of functions this one calls, indexed by the node id *) + cnCallees: callnode IH.t; + + (* set of functions that call this one , indexed by the node id *) + cnCallers: callnode IH.t; +} + +and nodeinfo = + NIVar of varinfo * bool ref + (* Node corresponding to a function. If the boolean + * is true, then the function is defined, otherwise + * it is external *) + + | NIIndirect of string (* Indirect nodes have a string associated to them. + * These strings must be invalid function names *) + * varinfo list ref + (* A list of functions that this indirect node might + * denote *) + +let nodeName (n: nodeinfo) : string = + match n with + NIVar (v, _) -> v.vname + | NIIndirect (n, _) -> n + +(* a call graph is a hashtable, mapping a function name to + * the node which describes that function's call structure *) +type callgraph = + (string, callnode) Hashtbl.t + +(* given the name of a function, retrieve its callnode; this will create a + * node if one doesn't already exist. Will use the given nodeinfo only when + * creating nodes. *) +let nodeId = ref 0 +let getNodeByName (cg: callgraph) (ni: nodeinfo) : callnode = + let name = nodeName ni in + try + H.find cg name + with Not_found -> ( + (* make a new node *) + let ret:callnode = { + cnInfo = ni; + cnid = !nodeId; + cnCallees = IH.create 5; + cnCallers = IH.create 5; + } + in + incr nodeId; + (* add it to the table, then return it *) + H.add cg name ret; + ret + ) + +(* Get the node for a variable *) +let getNodeForVar (cg: callgraph) (v: varinfo) : callnode = + getNodeByName cg (NIVar (v, ref false)) + +let getNodeForIndirect (cg: callgraph) (e: exp) : callnode = + getNodeByName cg (NIIndirect ("", ref [])) + + +(* Find the name of an indirect node that a function whose address is taken + * belongs *) +let markFunctionAddrTaken (cg: callgraph) (f: varinfo) : unit = + (* + ignore (E.log "markFunctionAddrTaken %s\n" f.vname); + *) + let n = getNodeForIndirect cg (AddrOf (Var f, NoOffset)) in + match n.cnInfo with + NIIndirect (_, r) -> r := f :: !r + | _ -> assert false + + + +class cgComputer (graph: callgraph) = object(self) + inherit nopCilVisitor + + (* the current function we're in, so when we visit a call node + * we know who is the caller *) + val mutable curFunc: callnode option = None + + + (* begin visiting a function definition *) + method vfunc (f:fundec) : fundec visitAction = begin + (trace "callgraph" (P.dprintf "entering function %s\n" f.svar.vname)); + let node = getNodeForVar graph f.svar in + (match node.cnInfo with + NIVar (v, r) -> r := true + | _ -> assert false); + curFunc <- (Some node); + DoChildren + end + + (* visit an instruction; we're only interested in calls *) + method vinst (i:instr) : instr list visitAction = begin + (*(trace "callgraph" (P.dprintf "visiting instruction: %a\n" dn_instr i));*) + let caller : callnode = + match curFunc with + None -> assert false + | Some c -> c + in + let callerName: string = nodeName caller.cnInfo in + (match i with + Call(_,f,_,_) -> ( + let callee: callnode = + match f with + | Lval(Var(vi),NoOffset) -> + (trace "callgraph" (P.dprintf "I see a call by %s to %s\n" + callerName vi.vname)); + getNodeForVar graph vi + + | _ -> + (trace "callgraph" (P.dprintf "indirect call: %a\n" + dn_instr i)); + getNodeForIndirect graph f + in + + (* add one entry to each node's appropriate list *) + IH.replace caller.cnCallees callee.cnid callee; + IH.replace callee.cnCallers caller.cnid caller + ) + + | _ -> ()); (* ignore other kinds instructions *) + + DoChildren + end + + method vexpr (e: exp) = + (match e with + AddrOf (Var fv, NoOffset) when isFunctionType fv.vtype -> + markFunctionAddrTaken graph fv + | _ -> ()); + + DoChildren +end + +let computeGraph (f:file) : callgraph = begin + let graph = H.create 37 in + let obj:cgComputer = new cgComputer graph in + + (* visit the whole file, computing the graph *) + visitCilFileSameGlobals (obj :> cilVisitor) f; + + + (* return the computed graph *) + graph +end + +let printGraph (out:out_channel) (g:callgraph) : unit = begin + let printEntry _ (n:callnode) : unit = + let name = nodeName n.cnInfo in + (Printf.fprintf out " %s" name) + in + + let printCalls (node:callnode) : unit = + (fprintf out " calls:"); + (IH.iter printEntry node.cnCallees); + (fprintf out "\n is called by:"); + (IH.iter printEntry node.cnCallers); + (fprintf out "\n") + in + + H.iter (fun (name: string) (node: callnode) -> + match node.cnInfo with + NIVar (v, def) -> + (fprintf out "%s (%s):\n" + v.vname (if !def then "defined" else "external")); + printCalls node + + | NIIndirect (n, funcs) -> + fprintf out "Indirect %s:\n" n; + fprintf out " possible aliases: "; + List.iter (fun a -> fprintf out "%s " a.vname) !funcs; + fprintf out "\n" + + ) + + g + end + +let doCallGraph = ref false + +let feature : featureDescr = + { fd_name = "callgraph"; + fd_enabled = doCallGraph; + fd_description = "generation of a static call graph"; + fd_extraopt = []; + fd_doit = + (function (f: file) -> + let graph:callgraph = computeGraph f in + printGraph stdout graph); + fd_post_check = false; + } + + +(* + * + * Copyright (c) 2001-2002 by + * George C. Necula necula@cs.berkeley.edu + * Scott McPeak smcpeak@cs.berkeley.edu + * Wes Weimer weimer@cs.berkeley.edu + * Ben Liblit liblit@cs.berkeley.edu + * + * All rights reserved. Permission to use, copy, modify and distribute + * this software for research purposes only is hereby granted, + * provided that the following conditions are met: + * 1. XSRedistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. The name of the authors may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * DISCLAIMER: + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) diff --git a/cil/src/ext/callgraph.mli b/cil/src/ext/callgraph.mli new file mode 100644 index 00000000..bc760180 --- /dev/null +++ b/cil/src/ext/callgraph.mli @@ -0,0 +1,123 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +(* callgraph.mli *) +(* compute a static call graph *) + +(* module maintainer: scott *) +(* see copyright notice at end of this file *) + + +(* ------------------ types ------------------- *) +(* a call node describes the local calling structure for a + * single function: which functions it calls, and which + * functions call it *) +type callnode = { + (* An id *) + cnid: int; + + (* the function this node describes *) + cnInfo: nodeinfo; + + (* set of functions this one calls, indexed by the node id *) + cnCallees: callnode Inthash.t; + + (* set of functions that call this one , indexed by the node id *) + cnCallers: callnode Inthash.t; +} + +and nodeinfo = + NIVar of Cil.varinfo * bool ref + (* Node corresponding to a function. If the boolean + * is true, then the function is defined, otherwise + * it is external *) + + | NIIndirect of string (* Indirect nodes have a string associated to them. + * These strings must be invalid function names *) + * Cil.varinfo list ref + (* A list of functions that this indirect node might + * denote *) + + +val nodeName: nodeinfo -> string + +(* a call graph is a hashtable, mapping a function name to + * the node which describes that function's call structure *) +type callgraph = + (string, callnode) Hashtbl.t + + +(* ----------------- functions ------------------- *) +(* given a CIL file, compute its static call graph *) +val computeGraph : Cil.file -> callgraph + +(* print the callgraph in a human-readable format to a channel *) +val printGraph : out_channel -> callgraph -> unit + + +val feature: Cil.featureDescr +(* + * + * Copyright (c) 2001-2002 by + * George C. Necula necula@cs.berkeley.edu + * Scott McPeak smcpeak@cs.berkeley.edu + * Wes Weimer weimer@cs.berkeley.edu + * Ben Liblit liblit@cs.berkeley.edu + * + * All rights reserved. Permission to use, copy, modify and distribute + * this software for research purposes only is hereby granted, + * provided that the following conditions are met: + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. The name of the authors may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * DISCLAIMER: + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) diff --git a/cil/src/ext/canonicalize.ml b/cil/src/ext/canonicalize.ml new file mode 100644 index 00000000..a75deeac --- /dev/null +++ b/cil/src/ext/canonicalize.ml @@ -0,0 +1,292 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + + + +(************************************************************************ + * canonicalize performs several transformations to correct differences + * between C and C++, so that the output is (hopefully) valid C++ code. + * This is incomplete -- certain fixes which are necessary + * for some programs are not yet implemented. + * + * #1) C allows global variables to have multiple declarations and multiple + * (equivalent) definitions. This transformation removes all but one + * declaration and all but one definition. + * + * #2) Any variables that use C++ keywords as identifiers are renamed. + * + * #3) __inline is #defined to inline, and __restrict is #defined to nothing. + * + * #4) C allows function pointers with no specified arguments to be used on + * any argument list. To make C++ accept this code, we insert a cast + * from the function pointer to a type that matches the arguments. Of + * course, this does nothing to guarantee that the pointer actually has + * that type. + * + * #5) Makes casts from int to enum types explicit. (CIL changes enum + * constants to int constants, but doesn't use a cast.) + * + ************************************************************************) + +open Cil +module E = Errormsg +module H = Hashtbl + +(* For transformation #1. Stores all variable definitions in the file. *) +let varDefinitions: (varinfo, global) H.t = H.create 111 + + +class canonicalizeVisitor = object(self) + inherit nopCilVisitor + val mutable currentFunction: fundec = Cil.dummyFunDec; + + (* A hashtable to prevent duplicate declarations. *) + val alreadyDeclared: (varinfo, unit) H.t = H.create 111 + val alreadyDefined: (varinfo, unit) H.t = H.create 111 + + (* move variable declarations around *) + method vglob g = match g with + GVar(v, ({init = Some _} as inito), l) -> + (* A definition. May have been moved to an earlier position. *) + if H.mem alreadyDefined v then begin + ignore (E.warn "Duplicate definition of %s at %a.\n" + v.vname d_loc !currentLoc); + ChangeTo [] (* delete from here. *) + end else begin + H.add alreadyDefined v (); + if H.mem alreadyDeclared v then begin + (* Change the earlier declaration to Extern *) + let oldS = v.vstorage in + ignore (E.log "changing storage of %s from %a\n" + v.vname d_storage oldS); + v.vstorage <- Extern; + let newv = {v with vstorage = oldS} in + ChangeDoChildrenPost([GVar(newv, inito, l)], (fun g -> g) ) + end else + DoChildren + end + | GVar(v, {init=None}, l) + | GVarDecl(v, l) when not (isFunctionType v.vtype) -> begin + (* A declaration. May have been moved to an earlier position. *) + if H.mem alreadyDefined v || H.mem alreadyDeclared v then + ChangeTo [] (* delete from here. *) + else begin + H.add alreadyDeclared v (); + DoChildren + end + end + | GFun(f, l) -> + currentFunction <- f; + DoChildren + | _ -> + DoChildren + +(* #2. rename any identifiers whose names are C++ keywords *) + method vvdec v = + match v.vname with + | "bool" + | "catch" + | "cdecl" + | "class" + | "const_cast" + | "delete" + | "dynamic_cast" + | "explicit" + | "export" + | "false" + | "friend" + | "mutable" + | "namespace" + | "new" + | "operator" + | "pascal" + | "private" + | "protected" + | "public" + | "register" + | "reinterpret_cast" + | "static_cast" + | "template" + | "this" + | "throw" + | "true" + | "try" + | "typeid" + | "typename" + | "using" + | "virtual" + | "wchar_t"-> + v.vname <- v.vname ^ "__cil2cpp"; + DoChildren + | _ -> DoChildren + + method vinst i = +(* #5. If an assignment or function call uses expressions as enum values, + add an explicit cast. *) + match i with + Set (dest, exp, l) -> begin + let typeOfDest = typeOfLval dest in + match unrollType typeOfDest with + TEnum _ -> (* add an explicit cast *) + let newI = Set(dest, mkCast exp typeOfDest, l) in + ChangeTo [newI] + | _ -> SkipChildren + end + | Call (dest, f, args, l) -> begin + let rt, formals, isva, attrs = splitFunctionType (typeOf f) in + if isva then + SkipChildren (* ignore vararg functions *) + else + match formals with + Some formals' -> begin + let newArgs = try + (*Iterate over the arguments, looking for formals that + expect enum types, and insert casts where necessary. *) + List.map2 + (fun (actual: exp) (formalName, formalType, _) -> + match unrollType formalType with + TEnum _ -> mkCast actual formalType + | _ -> actual) + args + formals' + with Invalid_argument _ -> + E.s (error "Number of arguments to %a doesn't match type.\n" + d_exp f) + in + let newI = Call(dest, f, newArgs, l) in + ChangeTo [newI] + end + | None -> begin + (* #4. No arguments were specified for this type. To fix this, infer the + type from the arguments that are used n this instruction, and insert + a cast to that type.*) + match f with + Lval(Mem(fp), off) -> + let counter: int ref = ref 0 in + let newFormals = List.map + (fun (actual:exp) -> + incr counter; + let formalName = "a" ^ (string_of_int !counter) in + (formalName, typeOf actual, []))(* (name,type,attrs) *) + args in + let newFuncPtrType = + TPtr((TFun (rt, Some newFormals, false, attrs)), []) in + let newFuncPtr = Lval(Mem(mkCast fp newFuncPtrType), off) in + ChangeTo [Call(dest, newFuncPtr, args, l)] + | _ -> + ignore (warn "cppcanon: %a has no specified arguments, but it's not a function pointer." d_exp f); + SkipChildren + end + end + | _ -> SkipChildren + + method vinit i = +(* #5. If an initializer uses expressions as enum values, + add an explicit cast. *) + match i with + SingleInit e -> DoChildren (* we don't handle simple initializers here, + because we don't know what type is expected. + This should be done in vglob if needed. *) + | CompoundInit(t, initList) -> + let changed: bool ref = ref false in + let initList' = List.map + (* iterate over the list, adding casts for any expression that + is expected to be an enum type. *) + (function + (Field(fi, off), SingleInit e) -> begin + match unrollType fi.ftype with + TEnum _ -> (* add an explicit cast *) + let newE = mkCast e fi.ftype in + changed := true; + (Field(fi, off), SingleInit newE) + | _ -> (* not enum, no cast needed *) + (Field(fi, off), SingleInit e) + end + | other -> + (* This is a more complicated initializer, and I don't think + it can have type enum. It's children might, though. *) + other) + initList in + if !changed then begin + (* There may be other casts needed in other parts of the + initialization, so do the children too. *) + ChangeDoChildrenPost(CompoundInit(t, initList'), (fun x -> x)) + end else + DoChildren + + +(* #5. If a function returns an enum type, add an explicit cast to the + return type. *) + method vstmt stmt = + (match stmt.skind with + Return (Some exp, l) -> begin + let typeOfDest, _, _, _ = + splitFunctionType currentFunction.svar.vtype in + match unrollType typeOfDest with + TEnum _ -> + stmt.skind <- Return (Some (mkCast exp typeOfDest), l) + | _ -> () + end + | _ -> ()); + DoChildren +end (* class canonicalizeVisitor *) + + + +(* Entry point for this extension *) +let canonicalize (f:file) = + visitCilFile (new canonicalizeVisitor) f; + + (* #3. Finally, add some #defines to change C keywords to their C++ + equivalents: *) + f.globals <- + GText( "#ifdef __cplusplus\n" + ^" #define __restrict\n" (* "restrict" doesn't work *) + ^" #define __inline inline\n" + ^"#endif") + ::f.globals + + + +let feature : featureDescr = + { fd_name = "canonicalize"; + fd_enabled = ref false; + fd_description = "fixing some C-isms so that the result is C++ compliant."; + fd_extraopt = []; + fd_doit = canonicalize; + fd_post_check = true; + } diff --git a/cil/src/ext/canonicalize.mli b/cil/src/ext/canonicalize.mli new file mode 100644 index 00000000..37bc0d83 --- /dev/null +++ b/cil/src/ext/canonicalize.mli @@ -0,0 +1,48 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(************************************************************************ + * canonicalize performs several transformations to correct differences + * between C and C++, so that the output is (hopefully) valid C++ code. + * This is incomplete -- certain fixes which are necessary + * for some programs are not yet implemented. + * + * See canonicalize.ml for a list of changes. + * + ************************************************************************) + +val feature: Cil.featureDescr diff --git a/cil/src/ext/cfg.ml b/cil/src/ext/cfg.ml new file mode 100644 index 00000000..8b19c797 --- /dev/null +++ b/cil/src/ext/cfg.ml @@ -0,0 +1,289 @@ +(* MODIF: Loop constructor replaced by 3 constructors: While, DoWhile, For. *) + +(* + * + * Copyright (c) 2001-2003, + * George C. Necula + * Scott McPeak + * Wes Weimer + * Simon Goldsmith + * S.P Rahul, Aman Bhargava + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* Authors: Aman Bhargava, S. P. Rahul *) +(* sfg: this stuff was stolen from optim.ml - the code to print the cfg as + a dot graph is mine *) + +open Pretty +open Cil +module E=Errormsg + +(* entry points: cfgFun, printCfgChannel, printCfgFilename *) + +(* known issues: + * -sucessors of if somehow end up with two edges each + *) + +(*------------------------------------------------------------*) +(* Notes regarding CFG computation: + 1) Initially only succs and preds are computed. sid's are filled in + later, in whatever order is suitable (e.g. for forward problems, reverse + depth-first postorder). + 2) If a stmt (return, break or continue) has no successors, then + function return must follow. + No predecessors means it is the start of the function + 3) We use the fact that initially all the succs and preds are assigned [] +*) + +(* Fill in the CFG info for the stmts in a block + next = succ of the last stmt in this block + break = succ of any Break in this block + cont = succ of any Continue in this block + None means the succ is the function return. It does not mean the break/cont + is invalid. We assume the validity has already been checked. +*) +(* At the end of CFG computation, + - numNodes = total number of CFG nodes + - length(nodeList) = numNodes +*) + +let numNodes = ref 0 (* number of nodes in the CFG *) +let nodeList : stmt list ref = ref [] (* All the nodes in a flat list *) (* ab: Added to change dfs from quadratic to linear *) +let start_id = ref 0 (* for unique ids across many functions *) + +(* entry point *) + +(** Compute a control flow graph for fd. Stmts in fd have preds and succs + filled in *) +let rec cfgFun (fd : fundec): int = + begin + numNodes := !start_id; + nodeList := []; + + cfgBlock fd.sbody None None None; + !numNodes - !start_id + end + + +and cfgStmts (ss: stmt list) + (next:stmt option) (break:stmt option) (cont:stmt option) = + match ss with + [] -> (); + | [s] -> cfgStmt s next break cont + | hd::tl -> + cfgStmt hd (Some (List.hd tl)) break cont; + cfgStmts tl next break cont + +and cfgBlock (blk: block) + (next:stmt option) (break:stmt option) (cont:stmt option) = + cfgStmts blk.bstmts next break cont + +(* Fill in the CFG info for a stmt + Meaning of next, break, cont should be clear from earlier comment +*) +and cfgStmt (s: stmt) (next:stmt option) (break:stmt option) (cont:stmt option) = + incr numNodes; + s.sid <- !numNodes; + nodeList := s :: !nodeList; (* Future traversals can be made in linear time. e.g. *) + if s.succs <> [] then + E.s (bug "CFG must be cleared before being computed!"); + let addSucc (n: stmt) = + if not (List.memq n s.succs) then + s.succs <- n::s.succs; + if not (List.memq s n.preds) then + n.preds <- s::n.preds + in + let addOptionSucc (n: stmt option) = + match n with + None -> () + | Some n' -> addSucc n' + in + let addBlockSucc (b: block) = + match b.bstmts with + [] -> addOptionSucc next + | hd::_ -> addSucc hd + in + match s.skind with + Instr _ -> addOptionSucc next + | Return _ -> () + | Goto (p,_) -> addSucc !p + | Break _ -> addOptionSucc break + | Continue _ -> addOptionSucc cont + | If (_, blk1, blk2, _) -> + (* The succs of If is [true branch;false branch] *) + addBlockSucc blk2; + addBlockSucc blk1; + cfgBlock blk1 next break cont; + cfgBlock blk2 next break cont + | Block b -> + addBlockSucc b; + cfgBlock b next break cont + | Switch(_,blk,l,_) -> + List.iter addSucc (List.rev l); (* Add successors in order *) + (* sfg: if there's no default, need to connect s->next *) + if not (List.exists + (fun stmt -> List.exists + (function Default _ -> true | _ -> false) + stmt.labels) + l) + then + addOptionSucc next; + cfgBlock blk next next cont +(* + | Loop(blk,_,_,_) -> +*) + | While(_,blk,_) + | DoWhile(_,blk,_) + | For(_,_,_,blk,_) -> + addBlockSucc blk; + cfgBlock blk (Some s) next (Some s) + (* Since all loops have terminating condition true, we don't put + any direct successor to stmt following the loop *) + | TryExcept _ | TryFinally _ -> + E.s (E.unimp "try/except/finally") + +(*------------------------------------------------------------*) + +(**************************************************************) +(* do something for all stmts in a fundec *) + +let rec forallStmts (todo) (fd : fundec) = + begin + fasBlock todo fd.sbody; + end + +and fasBlock (todo) (b : block) = + List.iter (fasStmt todo) b.bstmts + +and fasStmt (todo) (s : stmt) = + begin + ignore(todo s); + match s.skind with + | Block b -> fasBlock todo b + | If (_, tb, fb, _) -> (fasBlock todo tb; fasBlock todo fb) + | Switch (_, b, _, _) -> fasBlock todo b +(* + | Loop (b, _, _, _) -> fasBlock todo b +*) + | While (_, b, _) -> fasBlock todo b + | DoWhile (_, b, _) -> fasBlock todo b + | For (_, _, _, b, _) -> fasBlock todo b + | (Return _ | Break _ | Continue _ | Goto _ | Instr _) -> () + | TryExcept _ | TryFinally _ -> E.s (E.unimp "try/except/finally") + end +;; + +(**************************************************************) +(* printing the control flow graph - you have to compute it first *) + +let d_cfgnodename () (s : stmt) = + dprintf "%d" s.sid + +let d_cfgnodelabel () (s : stmt) = + let label = + begin + match s.skind with + | If (e, _, _, _) -> "if" (*sprint ~width:999 (dprintf "if %a" d_exp e)*) +(* + | Loop _ -> "loop" +*) + | While _ -> "while" + | DoWhile _ -> "dowhile" + | For _ -> "for" + | Break _ -> "break" + | Continue _ -> "continue" + | Goto _ -> "goto" + | Instr _ -> "instr" + | Switch _ -> "switch" + | Block _ -> "block" + | Return _ -> "return" + | TryExcept _ -> "try-except" + | TryFinally _ -> "try-finally" + end in + dprintf "%d: %s" s.sid label + +let d_cfgedge (src) () (dest) = + dprintf "%a -> %a" + d_cfgnodename src + d_cfgnodename dest + +let d_cfgnode () (s : stmt) = + dprintf "%a [label=\"%a\"]\n\t%a" + d_cfgnodename s + d_cfgnodelabel s + (d_list "\n\t" (d_cfgedge s)) s.succs + +(**********************************************************************) +(* entry points *) + +(** print control flow graph (in dot form) for fundec to channel *) +let printCfgChannel (chan : out_channel) (fd : fundec) = + let pnode (s:stmt) = fprintf chan "%a\n" d_cfgnode s in + begin + ignore (fprintf chan "digraph CFG_%s {\n" fd.svar.vname); + forallStmts pnode fd; + ignore(fprintf chan "}\n"); + end + +(** Print control flow graph (in dot form) for fundec to file *) +let printCfgFilename (filename : string) (fd : fundec) = + let chan = open_out filename in + begin + printCfgChannel chan fd; + close_out chan; + end + + +;; + +(**********************************************************************) + +let clearCFGinfo (fd : fundec) = + let clear s = + s.sid <- -1; + s.succs <- []; + s.preds <- []; + in + forallStmts clear fd + +let clearFileCFG (f : file) = + iterGlobals f (fun g -> + match g with GFun(fd,_) -> + clearCFGinfo fd + | _ -> ()) + +let computeFileCFG (f : file) = + iterGlobals f (fun g -> + match g with GFun(fd,_) -> + numNodes := cfgFun fd; + start_id := !start_id + !numNodes + | _ -> ()) diff --git a/cil/src/ext/cfg.mli b/cil/src/ext/cfg.mli new file mode 100644 index 00000000..19c51666 --- /dev/null +++ b/cil/src/ext/cfg.mli @@ -0,0 +1,36 @@ +(** Code to compute the control-flow graph of a function or file. + This will fill in the [preds] and [succs] fields of {!Cil.stmt} + + This is required for several other extensions, such as {!Dataflow}. +*) + +open Cil + + +(** Compute the CFG for an entire file, by calling cfgFun on each function. *) +val computeFileCFG: Cil.file -> unit + +(** clear the sid, succs, and preds fields of each statement. *) +val clearFileCFG: Cil.file -> unit + +(** Compute a control flow graph for fd. Stmts in fd have preds and succs + filled in *) +val cfgFun : fundec -> int + +(** clear the sid, succs, and preds fields of each statment in a function *) +val clearCFGinfo: Cil.fundec -> unit + +(** print control flow graph (in dot form) for fundec to channel *) +val printCfgChannel : out_channel -> fundec -> unit + +(** Print control flow graph (in dot form) for fundec to file *) +val printCfgFilename : string -> fundec -> unit + +(** Next statement id that will be assigned. *) +val start_id: int ref + +(** All of the nodes in a file. *) +val nodeList : stmt list ref + +(** number of nodes in the CFG *) +val numNodes : int ref diff --git a/cil/src/ext/ciltools.ml b/cil/src/ext/ciltools.ml new file mode 100755 index 00000000..78f1aafc --- /dev/null +++ b/cil/src/ext/ciltools.ml @@ -0,0 +1,228 @@ +open Cil + +(* Contributed by Nathan Cooprider *) + +let isOne e = + isInteger e = Some Int64.one + + +(* written by Zach *) +let is_volatile_tp tp = + List.exists (function (Attr("volatile",_)) -> true + | _ -> false) (typeAttrs tp) + +(* written by Zach *) +let is_volatile_vi vi = + let vi_vol = + List.exists (function (Attr("volatile",_)) -> true + | _ -> false) vi.vattr in + let typ_vol = is_volatile_tp vi.vtype in + vi_vol || typ_vol + +(***************************************************************************** + * A collection of useful functions that were not already in CIL as far as I + * could tell. However, I have been surprised before . . . + ****************************************************************************) + +type sign = Signed | Unsigned + +exception Not_an_integer + +(***************************************************************************** + * A bunch of functions for accessing integers. Originally written for + * somebody who didn't know CIL and just wanted to mess with it at the + * OCaml level. + ****************************************************************************) + +let unbox_int_type (ye : typ) : (int * sign) = + let tp = unrollType ye in + let s = + match tp with + TInt (i, _) -> + if (isSigned i) then + Signed + else + Unsigned + | _ -> raise Not_an_integer + in + (bitsSizeOf tp), s + +(* depricated. Use isInteger directly instead *) +let unbox_int_exp (e : exp) : int64 = + match isInteger e with + None -> raise Not_an_integer + | Some (x) -> x + +let box_int_to_exp (n : int64) (ye : typ) : exp = + let tp = unrollType ye in + match tp with + TInt (i, _) -> + kinteger64 i n + | _ -> raise Not_an_integer + +let cil_to_ocaml_int (e : exp) : (int64 * int * sign) = + let v, s = unbox_int_type (typeOf e) in + unbox_int_exp (e), v, s + +exception Weird_bitwidth + +(* (int64 * int * sign) : exp *) +let ocaml_int_to_cil v n s = + let char_size = bitsSizeOf charType in + let int_size = bitsSizeOf intType in + let short_size = bitsSizeOf (TInt(IShort,[]))in + let long_size = bitsSizeOf longType in + let longlong_size = bitsSizeOf (TInt(ILongLong,[])) in + let i = + match s with + Signed -> + if (n = char_size) then + ISChar + else if (n = int_size) then + IInt + else if (n = short_size) then + IShort + else if (n = long_size) then + ILong + else if (n = longlong_size) then + ILongLong + else + raise Weird_bitwidth + | Unsigned -> + if (n = char_size) then + IUChar + else if (n = int_size) then + IUInt + else if (n = short_size) then + IUShort + else if (n = long_size) then + IULong + else if (n = longlong_size) then + IULongLong + else + raise Weird_bitwidth + in + kinteger64 i v + +(***************************************************************************** + * a couple of type functions that I thought would be useful: + ****************************************************************************) + +let rec isCompositeType tp = + match tp with + TComp _ -> true + | TPtr(x, _) -> isCompositeType x + | TArray(x,_,_) -> isCompositeType x + | TFun(x,_,_,_) -> isCompositeType x + | TNamed (x,_) -> isCompositeType x.ttype + | _ -> false + +(** START OF deepHasAttribute ************************************************) +let visited = ref [] +class attribute_checker target rflag = object (self) + inherit nopCilVisitor + method vtype t = + match t with + TComp(cinfo, a) -> + if(not (List.exists (fun x -> cinfo.cname = x) !visited )) then begin + visited := cinfo.cname :: !visited; + List.iter + (fun f -> + if (hasAttribute target f.fattr) then + rflag := true + else + ignore(visitCilType (new attribute_checker target rflag) + f.ftype)) cinfo.cfields; + end; + DoChildren + | TNamed(t1, a) -> + if(not (List.exists (fun x -> t1.tname = x) !visited )) then begin + visited := t1.tname :: !visited; + ignore(visitCilType (new attribute_checker target rflag) t1.ttype); + end; + DoChildren + | _ -> + DoChildren + method vattr (Attr(name,params)) = + if (name = target) then rflag := true; + DoChildren +end + +let deepHasAttribute s t = + let found = ref false in + visited := []; + ignore(visitCilType (new attribute_checker s found) t); + !found +(** END OF deepHasAttribute **************************************************) + +(** Stuff from ptranal, slightly modified ************************************) + +(***************************************************************************** + * A transformation to make every instruction be in its own statement. + ****************************************************************************) + +class callBBVisitor = object + inherit nopCilVisitor + + method vstmt s = + match s.skind with + Instr(il) -> begin + if (List.length il > 1) then + let list_of_stmts = List.map (fun one_inst -> + mkStmtOneInstr one_inst) il in + let block = mkBlock list_of_stmts in + s.skind <- Block block; + ChangeTo(s) + else + SkipChildren + end + | _ -> DoChildren + + method vvdec _ = SkipChildren + method vexpr _ = SkipChildren + method vlval _ = SkipChildren + method vtype _ = SkipChildren +end + +let one_instruction_per_statement f = + let thisVisitor = new callBBVisitor in + visitCilFileSameGlobals thisVisitor f + +(***************************************************************************** + * A transformation that gives each variable a unique identifier. + ****************************************************************************) + +class vidVisitor = object + inherit nopCilVisitor + val count = ref 0 + + method vvdec vi = + vi.vid <- !count ; + incr count ; SkipChildren +end + +let globally_unique_vids f = + let thisVisitor = new vidVisitor in + visitCilFileSameGlobals thisVisitor f + +(** End of stuff from ptranal ************************************************) + +class sidVisitor = object + inherit nopCilVisitor + val count = ref 0 + + method vstmt s = + s.sid <- !count ; + incr count ; + DoChildren +end + +let globally_unique_sids f = + let thisVisitor = new sidVisitor in + visitCilFileSameGlobals thisVisitor f + +(** Comparing expressions without a Out_of_memory error **********************) + +let compare_exp x y = + compare x y + diff --git a/cil/src/ext/dataflow.ml b/cil/src/ext/dataflow.ml new file mode 100755 index 00000000..7f28f841 --- /dev/null +++ b/cil/src/ext/dataflow.ml @@ -0,0 +1,466 @@ +(* MODIF: Loop constructor replaced by 3 constructors: While, DoWhile, For. *) + +module IH = Inthash +module E = Errormsg + +open Cil +open Pretty + +(** A framework for data flow analysis for CIL code. Before using + this framework, you must initialize the Control-flow Graph for your + program, e.g using {!Cfg.computeFileCFG} *) + +type 't action = + Default (** The default action *) + | Done of 't (** Do not do the default action. Use this result *) + | Post of ('t -> 't) (** The default action, followed by the given + * transformer *) + +type 't stmtaction = + SDefault (** The default action *) + | SDone (** Do not visit this statement or its successors *) + | SUse of 't (** Visit the instructions and successors of this statement + as usual, but use the specified state instead of the + one that was passed to doStmt *) + +(* For if statements *) +type 't guardaction = + GDefault (** The default state *) + | GUse of 't (** Use this data for the branch *) + | GUnreachable (** The branch will never be taken. *) + + +(****************************************************************** + ********** + ********** FORWARDS + ********** + ********************************************************************) + +module type ForwardsTransfer = sig + val name: string (** For debugging purposes, the name of the analysis *) + + val debug: bool ref (** Whether to turn on debugging *) + + type t (** The type of the data we compute for each block start. May be + * imperative. *) + + val copy: t -> t + (** Make a deep copy of the data *) + + + val stmtStartData: t Inthash.t + (** For each statement id, the data at the start. Not found in the hash + * table means nothing is known about the state at this point. At the end + * of the analysis this means that the block is not reachable. *) + + val pretty: unit -> t -> Pretty.doc + (** Pretty-print the state *) + + val computeFirstPredecessor: Cil.stmt -> t -> t + (** Give the first value for a predecessors, compute the value to be set + * for the block *) + + val combinePredecessors: Cil.stmt -> old:t -> t -> t option + (** Take some old data for the start of a statement, and some new data for + * the same point. Return None if the combination is identical to the old + * data. Otherwise, compute the combination, and return it. *) + + val doInstr: Cil.instr -> t -> t action + (** The (forwards) transfer function for an instruction. The + * {!Cil.currentLoc} is set before calling this. The default action is to + * continue with the state unchanged. *) + + val doStmt: Cil.stmt -> t -> t stmtaction + (** The (forwards) transfer function for a statement. The {!Cil.currentLoc} + * is set before calling this. The default action is to do the instructions + * in this statement, if applicable, and continue with the successors. *) + + val doGuard: Cil.exp -> t -> t guardaction + (** Generate the successor to an If statement assuming the given expression + * is nonzero. Analyses that don't need guard information can return + * GDefault; this is equivalent to returning GUse of the input. + * A return value of GUnreachable indicates that this half of the branch + * will not be taken and should not be explored. This will be called + * twice per If, once for "then" and once for "else". + *) + + val filterStmt: Cil.stmt -> bool + (** Whether to put this statement in the worklist. This is called when a + * block would normally be put in the worklist. *) + +end + + +module ForwardsDataFlow = + functor (T : ForwardsTransfer) -> + struct + + (** Keep a worklist of statements to process. It is best to keep a queue, + * because this way it is more likely that we are going to process all + * predecessors of a statement before the statement itself. *) + let worklist: Cil.stmt Queue.t = Queue.create () + + (** We call this function when we have encountered a statement, with some + * state. *) + let reachedStatement (s: stmt) (d: T.t) : unit = + (** see if we know about it already *) + E.pushContext (fun _ -> dprintf "Reached statement %d with %a" + s.sid T.pretty d); + let newdata: T.t option = + try + let old = IH.find T.stmtStartData s.sid in + match T.combinePredecessors s ~old:old d with + None -> (* We are done here *) + if !T.debug then + ignore (E.log "FF(%s): reached stmt %d with %a\n implies the old state %a\n" + T.name s.sid T.pretty d T.pretty old); + None + | Some d' -> begin + (* We have changed the data *) + if !T.debug then + ignore (E.log "FF(%s): weaken data for block %d: %a\n" + T.name s.sid T.pretty d'); + Some d' + end + with Not_found -> (* was bottom before *) + let d' = T.computeFirstPredecessor s d in + if !T.debug then + ignore (E.log "FF(%s): set data for block %d: %a\n" + T.name s.sid T.pretty d'); + Some d' + in + E.popContext (); + match newdata with + None -> () + | Some d' -> + IH.replace T.stmtStartData s.sid d'; + if T.filterStmt s && + not (Queue.fold (fun exists s' -> exists || s'.sid = s.sid) + false + worklist) then + Queue.add s worklist + + + (** Get the two successors of an If statement *) + let ifSuccs (s:stmt) : stmt * stmt = + let fstStmt blk = match blk.bstmts with + [] -> Cil.dummyStmt + | fst::_ -> fst + in + match s.skind with + If(e, b1, b2, _) -> + let thenSucc = fstStmt b1 in + let elseSucc = fstStmt b2 in + let oneFallthrough () = + let fallthrough = + List.filter + (fun s' -> thenSucc != s' && elseSucc != s') + s.succs + in + match fallthrough with + [] -> E.s (bug "Bad CFG: missing fallthrough for If.") + | [s'] -> s' + | _ -> E.s (bug "Bad CFG: multiple fallthrough for If.") + in + (* If thenSucc or elseSucc is Cil.dummyStmt, it's an empty block. + So the successor is the statement after the if *) + let stmtOrFallthrough s' = + if s' == Cil.dummyStmt then + oneFallthrough () + else + s' + in + (stmtOrFallthrough thenSucc, + stmtOrFallthrough elseSucc) + + | _-> E.s (bug "ifSuccs on a non-If Statement.") + + (** Process a statement *) + let processStmt (s: stmt) : unit = + currentLoc := get_stmtLoc s.skind; + if !T.debug then + ignore (E.log "FF(%s).stmt %d at %t\n" T.name s.sid d_thisloc); + + (* It must be the case that the block has some data *) + let init: T.t = + try T.copy (IH.find T.stmtStartData s.sid) + with Not_found -> + E.s (E.bug "FF(%s): processing block without data" T.name) + in + + (** See what the custom says *) + match T.doStmt s init with + SDone -> () + | (SDefault | SUse _) as act -> begin + let curr = match act with + SDefault -> init + | SUse d -> d + | SDone -> E.s (bug "SDone") + in + (* Do the instructions in order *) + let handleInstruction (s: T.t) (i: instr) : T.t = + currentLoc := get_instrLoc i; + + (* Now handle the instruction itself *) + let s' = + let action = T.doInstr i s in + match action with + | Done s' -> s' + | Default -> s (* do nothing *) + | Post f -> f s + in + s' + in + + let after: T.t = + match s.skind with + Instr il -> + (* Handle instructions starting with the first one *) + List.fold_left handleInstruction curr il + + | Goto _ | Break _ | Continue _ | If _ + | TryExcept _ | TryFinally _ + | Switch _ | (*Loop _*) While _ | DoWhile _ | For _ + | Return _ | Block _ -> curr + in + currentLoc := get_stmtLoc s.skind; + + (* Handle If guards *) + let succsToReach = match s.skind with + If (e, _, _, _) -> begin + let not_e = UnOp(LNot, e, intType) in + let thenGuard = T.doGuard e after in + let elseGuard = T.doGuard not_e after in + if thenGuard = GDefault && elseGuard = GDefault then + (* this is the common case *) + s.succs + else begin + let doBranch succ guard = + match guard with + GDefault -> reachedStatement succ after + | GUse d -> reachedStatement succ d + | GUnreachable -> + if !T.debug then + ignore (E.log "FF(%s): Not exploring branch to %d\n" + T.name succ.sid); + + () + in + let thenSucc, elseSucc = ifSuccs s in + doBranch thenSucc thenGuard; + doBranch elseSucc elseGuard; + [] + end + end + | _ -> s.succs + in + (* Reach the successors *) + List.iter (fun s' -> reachedStatement s' after) succsToReach; + + end + + + + + (** Compute the data flow. Must have the CFG initialized *) + let compute (sources: stmt list) = + Queue.clear worklist; + List.iter (fun s -> Queue.add s worklist) sources; + + (** All initial stmts must have non-bottom data *) + List.iter (fun s -> + if not (IH.mem T.stmtStartData s.sid) then + E.s (E.error "FF(%s): initial stmt %d does not have data" + T.name s.sid)) + sources; + if !T.debug then + ignore (E.log "\nFF(%s): processing\n" + T.name); + let rec fixedpoint () = + if !T.debug && not (Queue.is_empty worklist) then + ignore (E.log "FF(%s): worklist= %a\n" + T.name + (docList (fun s -> num s.sid)) + (List.rev + (Queue.fold (fun acc s -> s :: acc) [] worklist))); + try + let s = Queue.take worklist in + processStmt s; + fixedpoint (); + with Queue.Empty -> + if !T.debug then + ignore (E.log "FF(%s): done\n\n" T.name) + in + fixedpoint () + + end + + + +(****************************************************************** + ********** + ********** BACKWARDS + ********** + ********************************************************************) +module type BackwardsTransfer = sig + val name: string (* For debugging purposes, the name of the analysis *) + + val debug: bool ref (** Whether to turn on debugging *) + + type t (** The type of the data we compute for each block start. In many + * presentations of backwards data flow analysis we maintain the + * data at the block end. This is not easy to do with JVML because + * a block has many exceptional ends. So we maintain the data for + * the statement start. *) + + val pretty: unit -> t -> Pretty.doc (** Pretty-print the state *) + + val stmtStartData: t Inthash.t + (** For each block id, the data at the start. This data structure must be + * initialized with the initial data for each block *) + + val combineStmtStartData: Cil.stmt -> old:t -> t -> t option + (** When the analysis reaches the start of a block, combine the old data + * with the one we have just computed. Return None if the combination is + * the same as the old data, otherwise return the combination. In the + * latter case, the predecessors of the statement are put on the working + * list. *) + + + val combineSuccessors: t -> t -> t + (** Take the data from two successors and combine it *) + + + val doStmt: Cil.stmt -> t action + (** The (backwards) transfer function for a branch. The {!Cil.currentLoc} is + * set before calling this. If it returns None, then we have some default + * handling. Otherwise, the returned data is the data before the branch + * (not considering the exception handlers) *) + + val doInstr: Cil.instr -> t -> t action + (** The (backwards) transfer function for an instruction. The + * {!Cil.currentLoc} is set before calling this. If it returns None, then we + * have some default handling. Otherwise, the returned data is the data + * before the branch (not considering the exception handlers) *) + + val filterStmt: Cil.stmt -> Cil.stmt -> bool + (** Whether to put this predecessor block in the worklist. We give the + * predecessor and the block whose predecessor we are (and whose data has + * changed) *) + +end + +module BackwardsDataFlow = + functor (T : BackwardsTransfer) -> + struct + + let getStmtStartData (s: stmt) : T.t = + try IH.find T.stmtStartData s.sid + with Not_found -> + E.s (E.bug "BF(%s): stmtStartData is not initialized for %d" + T.name s.sid) + + (** Process a statement and return true if the set of live return + * addresses on its entry has changed. *) + let processStmt (s: stmt) : bool = + if !T.debug then + ignore (E.log "FF(%s).stmt %d\n" T.name s.sid); + + + (* Find the state before the branch *) + currentLoc := get_stmtLoc s.skind; + let d: T.t = + match T.doStmt s with + Done d -> d + | (Default | Post _) as action -> begin + (* Do the default one. Combine the successors *) + let res = + match s.succs with + [] -> E.s (E.bug "You must doStmt for the statements with no successors") + | fst :: rest -> + List.fold_left (fun acc succ -> + T.combineSuccessors acc (getStmtStartData succ)) + (getStmtStartData fst) + rest + in + (* Now do the instructions *) + let res' = + match s.skind with + Instr il -> + (* Now scan the instructions in reverse order. This may + * Stack_overflow on very long blocks ! *) + let handleInstruction (i: instr) (s: T.t) : T.t = + currentLoc := get_instrLoc i; + (* First handle the instruction itself *) + let action = T.doInstr i s in + match action with + | Done s' -> s' + | Default -> s (* do nothing *) + | Post f -> f s + in + (* Handle instructions starting with the last one *) + List.fold_right handleInstruction il res + + | _ -> res + in + match action with + Post f -> f res' + | _ -> res' + end + in + + (* See if the state has changed. The only changes are that it may grow.*) + let s0 = getStmtStartData s in + + match T.combineStmtStartData s ~old:s0 d with + None -> (* The old data is good enough *) + false + + | Some d' -> + (* We have changed the data *) + if !T.debug then + ignore (E.log "BF(%s): set data for block %d: %a\n" + T.name s.sid T.pretty d'); + IH.replace T.stmtStartData s.sid d'; + true + + + (** Compute the data flow. Must have the CFG initialized *) + let compute (sinks: stmt list) = + let worklist: Cil.stmt Queue.t = Queue.create () in + List.iter (fun s -> Queue.add s worklist) sinks; + if !T.debug && not (Queue.is_empty worklist) then + ignore (E.log "\nBF(%s): processing\n" + T.name); + let rec fixedpoint () = + if !T.debug && not (Queue.is_empty worklist) then + ignore (E.log "BF(%s): worklist= %a\n" + T.name + (docList (fun s -> num s.sid)) + (List.rev + (Queue.fold (fun acc s -> s :: acc) [] worklist))); + try + let s = Queue.take worklist in + let changes = processStmt s in + if changes then begin + (* We must add all predecessors of block b, only if not already + * in and if the filter accepts them. *) + List.iter + (fun p -> + if not (Queue.fold (fun exists s' -> exists || p.sid = s'.sid) + false worklist) && + T.filterStmt p s then + Queue.add p worklist) + s.preds; + end; + fixedpoint (); + + with Queue.Empty -> + if !T.debug then + ignore (E.log "BF(%s): done\n\n" T.name) + in + fixedpoint (); + + end + + diff --git a/cil/src/ext/dataflow.mli b/cil/src/ext/dataflow.mli new file mode 100755 index 00000000..e72c5db0 --- /dev/null +++ b/cil/src/ext/dataflow.mli @@ -0,0 +1,151 @@ +(** A framework for data flow analysis for CIL code. Before using + this framework, you must initialize the Control-flow Graph for your + program, e.g using {!Cfg.computeFileCFG} *) + +type 't action = + Default (** The default action *) + | Done of 't (** Do not do the default action. Use this result *) + | Post of ('t -> 't) (** The default action, followed by the given + * transformer *) + +type 't stmtaction = + SDefault (** The default action *) + | SDone (** Do not visit this statement or its successors *) + | SUse of 't (** Visit the instructions and successors of this statement + as usual, but use the specified state instead of the + one that was passed to doStmt *) + +(* For if statements *) +type 't guardaction = + GDefault (** The default state *) + | GUse of 't (** Use this data for the branch *) + | GUnreachable (** The branch will never be taken. *) + + +(****************************************************************** + ********** + ********** FORWARDS + ********** + ********************************************************************) + +module type ForwardsTransfer = sig + val name: string (** For debugging purposes, the name of the analysis *) + + val debug: bool ref (** Whether to turn on debugging *) + + type t (** The type of the data we compute for each block start. May be + * imperative. *) + + val copy: t -> t + (** Make a deep copy of the data *) + + + val stmtStartData: t Inthash.t + (** For each statement id, the data at the start. Not found in the hash + * table means nothing is known about the state at this point. At the end + * of the analysis this means that the block is not reachable. *) + + val pretty: unit -> t -> Pretty.doc + (** Pretty-print the state *) + + val computeFirstPredecessor: Cil.stmt -> t -> t + (** Give the first value for a predecessors, compute the value to be set + * for the block *) + + val combinePredecessors: Cil.stmt -> old:t -> t -> t option + (** Take some old data for the start of a statement, and some new data for + * the same point. Return None if the combination is identical to the old + * data. Otherwise, compute the combination, and return it. *) + + val doInstr: Cil.instr -> t -> t action + (** The (forwards) transfer function for an instruction. The + * {!Cil.currentLoc} is set before calling this. The default action is to + * continue with the state unchanged. *) + + val doStmt: Cil.stmt -> t -> t stmtaction + (** The (forwards) transfer function for a statement. The {!Cil.currentLoc} + * is set before calling this. The default action is to do the instructions + * in this statement, if applicable, and continue with the successors. *) + + val doGuard: Cil.exp -> t -> t guardaction + (** Generate the successor to an If statement assuming the given expression + * is nonzero. Analyses that don't need guard information can return + * GDefault; this is equivalent to returning GUse of the input. + * A return value of GUnreachable indicates that this half of the branch + * will not be taken and should not be explored. This will be called + * twice per If, once for "then" and once for "else". + *) + + val filterStmt: Cil.stmt -> bool + (** Whether to put this statement in the worklist. This is called when a + * block would normally be put in the worklist. *) + +end + +module ForwardsDataFlow (T : ForwardsTransfer) : sig + val compute: Cil.stmt list -> unit + (** Fill in the T.stmtStartData, given a number of initial statements to + * start from. All of the initial statements must have some entry in + * T.stmtStartData (i.e., the initial data should not be bottom) *) +end + +(****************************************************************** + ********** + ********** BACKWARDS + ********** + ********************************************************************) +module type BackwardsTransfer = sig + val name: string (** For debugging purposes, the name of the analysis *) + + val debug: bool ref (** Whether to turn on debugging *) + + type t (** The type of the data we compute for each block start. In many + * presentations of backwards data flow analysis we maintain the + * data at the block end. This is not easy to do with JVML because + * a block has many exceptional ends. So we maintain the data for + * the statement start. *) + + val pretty: unit -> t -> Pretty.doc (** Pretty-print the state *) + + val stmtStartData: t Inthash.t + (** For each block id, the data at the start. This data structure must be + * initialized with the initial data for each block *) + + val combineStmtStartData: Cil.stmt -> old:t -> t -> t option + (** When the analysis reaches the start of a block, combine the old data + * with the one we have just computed. Return None if the combination is + * the same as the old data, otherwise return the combination. In the + * latter case, the predecessors of the statement are put on the working + * list. *) + + + val combineSuccessors: t -> t -> t + (** Take the data from two successors and combine it *) + + + val doStmt: Cil.stmt -> t action + (** The (backwards) transfer function for a branch. The {!Cil.currentLoc} is + * set before calling this. If it returns None, then we have some default + * handling. Otherwise, the returned data is the data before the branch + * (not considering the exception handlers) *) + + val doInstr: Cil.instr -> t -> t action + (** The (backwards) transfer function for an instruction. The + * {!Cil.currentLoc} is set before calling this. If it returns None, then we + * have some default handling. Otherwise, the returned data is the data + * before the branch (not considering the exception handlers) *) + + val filterStmt: Cil.stmt -> Cil.stmt -> bool + (** Whether to put this predecessor block in the worklist. We give the + * predecessor and the block whose predecessor we are (and whose data has + * changed) *) + +end + +module BackwardsDataFlow (T : BackwardsTransfer) : sig + val compute: Cil.stmt list -> unit + (** Fill in the T.stmtStartData, given a number of initial statements to + * start from (the sinks for the backwards data flow). All of the statements + * (not just the initial ones!) must have some entry in T.stmtStartData + * (i.e., the initial data should not be bottom) *) +end diff --git a/cil/src/ext/dataslicing.ml b/cil/src/ext/dataslicing.ml new file mode 100644 index 00000000..35390b40 --- /dev/null +++ b/cil/src/ext/dataslicing.ml @@ -0,0 +1,462 @@ +(* MODIF: Loop constructor replaced by 3 constructors: While, DoWhile, For. *) + +(* + * + * Copyright (c) 2004, + * Jeremy Condit + * George C. Necula + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +open Cil +open Pretty +module E = Errormsg + +let debug = false + +let numRegions : int = 2 + +let newGlobals : global list ref = ref [] + +let curFundec : fundec ref = ref dummyFunDec +let curLocation : location ref = ref locUnknown + +let applyOption (fn : 'a -> 'b) (ao : 'a option) : 'b option = + match ao with + | Some a -> Some (fn a) + | None -> None + +let getRegion (attrs : attributes) : int = + try + match List.hd (filterAttributes "region" attrs) with + | Attr (_, [AInt i]) -> i + | _ -> E.s (bug "bad region attribute") + with Failure _ -> + 1 + +let checkRegion (i : int) (attrs : attributes) : bool = + (getRegion attrs) = i + +let regionField (i : int) : string = + "r" ^ (string_of_int i) + +let regionStruct (i : int) (name : string) : string = + name ^ "_r" ^ (string_of_int i) + +let foldRegions (fn : int -> 'a -> 'a) (base : 'a) : 'a = + let rec helper (i : int) : 'a = + if i <= numRegions then + fn i (helper (i + 1)) + else + base + in + helper 1 + +let rec getTypeName (t : typ) : string = + match t with + | TVoid _ -> "void" + | TInt _ -> "int" + | TFloat _ -> "float" + | TComp (cinfo, _) -> "comp_" ^ cinfo.cname + | TNamed (tinfo, _) -> "td_" ^ tinfo.tname + | TPtr (bt, _) -> "ptr_" ^ (getTypeName bt) + | TArray (bt, _, _) -> "array_" ^ (getTypeName bt) + | TFun _ -> "fn" + | _ -> E.s (unimp "typename") + +let isAllocFunction (fn : exp) : bool = + match fn with + | Lval (Var vinfo, NoOffset) when vinfo.vname = "malloc" -> true + | _ -> false + +let isExternalFunction (fn : exp) : bool = + match fn with + | Lval (Var vinfo, NoOffset) when vinfo.vstorage = Extern -> true + | _ -> false + +let types : (int * typsig, typ) Hashtbl.t = Hashtbl.create 113 +let typeInfos : (int * string, typeinfo) Hashtbl.t = Hashtbl.create 113 +let compInfos : (int * int, compinfo) Hashtbl.t = Hashtbl.create 113 +let varTypes : (typsig, typ) Hashtbl.t = Hashtbl.create 113 +let varCompInfos : (typsig, compinfo) Hashtbl.t = Hashtbl.create 113 + +let rec sliceCompInfo (i : int) (cinfo : compinfo) : compinfo = + try + Hashtbl.find compInfos (i, cinfo.ckey) + with Not_found -> + mkCompInfo cinfo.cstruct (regionStruct i cinfo.cname) + (fun cinfo' -> + Hashtbl.add compInfos (i, cinfo.ckey) cinfo'; + List.fold_right + (fun finfo rest -> + let t = sliceType i finfo.ftype in + if not (isVoidType t) then + (finfo.fname, t, finfo.fbitfield, + finfo.fattr, finfo.floc) :: rest + else + rest) + cinfo.cfields []) + cinfo.cattr + +and sliceTypeInfo (i : int) (tinfo : typeinfo) : typeinfo = + try + Hashtbl.find typeInfos (i, tinfo.tname) + with Not_found -> + let result = + { tinfo with tname = regionStruct i tinfo.tname; + ttype = sliceType i tinfo.ttype; } + in + Hashtbl.add typeInfos (i, tinfo.tname) result; + result + +and sliceType (i : int) (t : typ) : typ = + let ts = typeSig t in + try + Hashtbl.find types (i, ts) + with Not_found -> + let result = + match t with + | TVoid _ -> t + | TInt (_, attrs) -> if checkRegion i attrs then t else TVoid [] + | TFloat (_, attrs) -> if checkRegion i attrs then t else TVoid [] + | TComp (cinfo, attrs) -> TComp (sliceCompInfo i cinfo, attrs) + | TNamed (tinfo, attrs) -> TNamed (sliceTypeInfo i tinfo, attrs) + | TPtr (TVoid _, _) -> t (* Avoid discarding void*. *) + | TPtr (bt, attrs) -> + let bt' = sliceType i bt in + if not (isVoidType bt') then TPtr (bt', attrs) else TVoid [] + | TArray (bt, eo, attrs) -> + TArray (sliceType i bt, applyOption (sliceExp 1) eo, attrs) + | TFun (ret, args, va, attrs) -> + if checkRegion i attrs then + TFun (sliceTypeAll ret, + applyOption + (List.map (fun (aname, atype, aattrs) -> + (aname, sliceTypeAll atype, aattrs))) + args, + va, attrs) + else + TVoid [] + | TBuiltin_va_list _ -> t + | _ -> E.s (unimp "type %a" d_type t) + in + Hashtbl.add types (i, ts) result; + result + +and sliceTypeAll (t : typ) : typ = + begin + match t with + | TComp (cinfo, _) when hasAttribute "var_type_sliced" cinfo.cattr -> + E.s (bug "tried to slice twice") + | _ -> () + end; + let ts = typeSig t in + try + Hashtbl.find varTypes ts + with Not_found -> + let cinfo = + let name = ("var_" ^ (getTypeName t)) in + if debug then ignore (E.log "creating %s\n" name); + try + Hashtbl.find varCompInfos ts + with Not_found -> + mkCompInfo true name + (fun cinfo -> + Hashtbl.add varCompInfos ts cinfo; + foldRegions + (fun i rest -> + let t' = sliceType i t in + if not (isVoidType t') then + (regionField i, t', None, [], !curLocation) :: rest + else + rest) + []) + [Attr ("var_type_sliced", [])] + in + let t' = + if List.length cinfo.cfields > 1 then + begin + newGlobals := GCompTag (cinfo, !curLocation) :: !newGlobals; + TComp (cinfo, []) + end + else + t + in + Hashtbl.add varTypes ts t'; + t' + +and sliceLval (i : int) (lv : lval) : lval = + if debug then ignore (E.log "lval %a\n" d_lval lv); + let lh, offset = lv in + match lh with + | Var vinfo -> + let t = sliceTypeAll vinfo.vtype in + let offset' = + match t with + | TComp (cinfo, _) when hasAttribute "var_type_sliced" cinfo.cattr -> + Field (getCompField cinfo (regionField i), offset) + | _ -> offset + in + Var vinfo, offset' + | Mem e -> + Mem (sliceExp i e), offset + +and sliceExp (i : int) (e : exp) : exp = + if debug then ignore (E.log "exp %a\n" d_exp e); + match e with + | Const c -> Const c + | Lval lv -> Lval (sliceLval i lv) + | UnOp (op, e1, t) -> UnOp (op, sliceExp i e1, sliceType i t) + | BinOp (op, e1, e2, t) -> BinOp (op, sliceExp i e1, sliceExp i e2, + sliceType i t) + | CastE (t, e) -> sliceCast i t e + | AddrOf lv -> AddrOf (sliceLval i lv) + | StartOf lv -> StartOf (sliceLval i lv) + | SizeOf t -> SizeOf (sliceTypeAll t) + | _ -> E.s (unimp "exp %a" d_exp e) + +and sliceCast (i : int) (t : typ) (e : exp) : exp = + let te = typeOf e in + match t, te with + | TInt (k1, _), TInt (k2, attrs2) when k1 = k2 -> + (* Note: We strip off integer cast operations. *) + sliceExp (getRegion attrs2) e + | TInt (k1, _), TPtr _ -> + (* Note: We strip off integer cast operations. *) + sliceExp i e + | TPtr _, _ when isZero e -> + CastE (sliceType i t, sliceExp i e) + | TPtr (bt1, _), TPtr (bt2, _) when (typeSig bt1) = (typeSig bt2) -> + CastE (sliceType i t, sliceExp i e) + | _ -> + E.s (unimp "sketchy cast (%a) -> (%a)\n" d_type te d_type t) + +and sliceExpAll (e : exp) (l : location) : instr list * exp = + let t = typeOf e in + let t' = sliceTypeAll t in + match t' with + | TComp (cinfo, _) when hasAttribute "var_type_sliced" cinfo.cattr -> + let vinfo = makeTempVar !curFundec t in + let instrs = + foldRegions + (fun i rest -> + try + let finfo = getCompField cinfo (regionField i) in + if not (isVoidType finfo.ftype) then + Set ((Var vinfo, Field (finfo, NoOffset)), + sliceExp i e, l) :: rest + else + rest + with Not_found -> + rest) + [] + in + instrs, Lval (var vinfo) + | _ -> [], sliceExp 1 e + +let sliceVar (vinfo : varinfo) : unit = + if hasAttribute "var_sliced" vinfo.vattr then + E.s (bug "tried to slice a var twice"); + let t = sliceTypeAll vinfo.vtype in + if debug then ignore (E.log "setting %s type to %a\n" vinfo.vname d_type t); + vinfo.vattr <- addAttribute (Attr ("var_sliced", [])) vinfo.vattr; + vinfo.vtype <- t + +let sliceInstr (inst : instr) : instr list = + match inst with + | Set (lv, e, loc) -> + if debug then ignore (E.log "set %a %a\n" d_lval lv d_exp e); + let t = typeOfLval lv in + foldRegions + (fun i rest -> + if not (isVoidType (sliceType i t)) then + Set (sliceLval i lv, sliceExp i e, loc) :: rest + else + rest) + [] + | Call (ret, fn, args, l) when isAllocFunction fn -> + let lv = + match ret with + | Some lv -> lv + | None -> E.s (bug "malloc call has no return lval") + in + let t = typeOfLval lv in + foldRegions + (fun i rest -> + if not (isVoidType (sliceType i t)) then + Call (Some (sliceLval i lv), sliceExp 1 fn, + List.map (sliceExp i) args, l) :: rest + else + rest) + [] + | Call (ret, fn, args, l) when isExternalFunction fn -> + [Call (applyOption (sliceLval 1) ret, sliceExp 1 fn, + List.map (sliceExp 1) args, l)] + | Call (ret, fn, args, l) -> + let ret', set = + match ret with + | Some lv -> + let vinfo = makeTempVar !curFundec (typeOfLval lv) in + Some (var vinfo), [Set (lv, Lval (var vinfo), l)] + | None -> + None, [] + in + let instrs, args' = + List.fold_right + (fun arg (restInstrs, restArgs) -> + let instrs, arg' = sliceExpAll arg l in + instrs @ restInstrs, (arg' :: restArgs)) + args ([], []) + in + instrs @ (Call (ret', sliceExp 1 fn, args', l) :: set) + | _ -> E.s (unimp "inst %a" d_instr inst) + +let sliceReturnExp (eo : exp option) (l : location) : stmtkind = + match eo with + | Some e -> + begin + match sliceExpAll e l with + | [], e' -> Return (Some e', l) + | instrs, e' -> Block (mkBlock [mkStmt (Instr instrs); + mkStmt (Return (Some e', l))]) + end + | None -> Return (None, l) + +let rec sliceStmtKind (sk : stmtkind) : stmtkind = + match sk with + | Instr instrs -> Instr (List.flatten (List.map sliceInstr instrs)) + | Block b -> Block (sliceBlock b) + | If (e, b1, b2, l) -> If (sliceExp 1 e, sliceBlock b1, sliceBlock b2, l) + | Break l -> Break l + | Continue l -> Continue l + | Return (eo, l) -> sliceReturnExp eo l + | Switch (e, b, sl, l) -> Switch (sliceExp 1 e, sliceBlock b, + List.map sliceStmt sl, l) +(* + | Loop (b, l, so1, so2) -> Loop (sliceBlock b, l, + applyOption sliceStmt so1, + applyOption sliceStmt so2) +*) + | While (e, b, l) -> While (sliceExp 1 e, sliceBlock b, l) + | DoWhile (e, b, l) -> DoWhile (sliceExp 1 e, sliceBlock b, l) + | For (bInit, e, bIter, b, l) -> + For (sliceBlock bInit, sliceExp 1e, sliceBlock bIter, sliceBlock b, l) + | Goto _ -> sk + | _ -> E.s (unimp "statement") + +and sliceStmt (s : stmt) : stmt = + (* Note: We update statements destructively so that goto/switch work. *) + s.skind <- sliceStmtKind s.skind; + s + +and sliceBlock (b : block) : block = + ignore (List.map sliceStmt b.bstmts); + b + +let sliceFundec (fd : fundec) (l : location) : unit = + curFundec := fd; + curLocation := l; + ignore (sliceBlock fd.sbody); + curFundec := dummyFunDec; + curLocation := locUnknown + +let sliceGlobal (g : global) : unit = + match g with + | GType (tinfo, l) -> + newGlobals := + foldRegions (fun i rest -> GType (sliceTypeInfo i tinfo, l) :: rest) + !newGlobals + | GCompTag (cinfo, l) -> + newGlobals := + foldRegions (fun i rest -> GCompTag (sliceCompInfo i cinfo, l) :: rest) + !newGlobals + | GCompTagDecl (cinfo, l) -> + newGlobals := + foldRegions (fun i rest -> GCompTagDecl (sliceCompInfo i cinfo, l) :: + rest) + !newGlobals + | GFun (fd, l) -> + sliceFundec fd l; + newGlobals := GFun (fd, l) :: !newGlobals + | GVarDecl _ + | GVar _ -> + (* Defer processing of vars until end. *) + newGlobals := g :: !newGlobals + | _ -> + E.s (unimp "global %a\n" d_global g) + +let sliceGlobalVars (g : global) : unit = + match g with + | GFun (fd, l) -> + curFundec := fd; + curLocation := l; + List.iter sliceVar fd.slocals; + List.iter sliceVar fd.sformals; + setFunctionType fd (sliceType 1 fd.svar.vtype); + curFundec := dummyFunDec; + curLocation := locUnknown; + | GVar (vinfo, _, l) -> + curLocation := l; + sliceVar vinfo; + curLocation := locUnknown + | _ -> () + +class dropAttrsVisitor = object + inherit nopCilVisitor + + method vvrbl (vinfo : varinfo) = + vinfo.vattr <- dropAttribute "var_sliced" vinfo.vattr; + DoChildren + + method vglob (g : global) = + begin + match g with + | GCompTag (cinfo, _) -> + cinfo.cattr <- dropAttribute "var_type_sliced" cinfo.cattr; + | _ -> () + end; + DoChildren +end + +let sliceFile (f : file) : unit = + newGlobals := []; + List.iter sliceGlobal f.globals; + List.iter sliceGlobalVars f.globals; + f.globals <- List.rev !newGlobals; + visitCilFile (new dropAttrsVisitor) f + +let feature : featureDescr = + { fd_name = "DataSlicing"; + fd_enabled = ref false; + fd_description = "data slicing"; + fd_extraopt = []; + fd_doit = sliceFile; + fd_post_check = true; + } diff --git a/cil/src/ext/dataslicing.mli b/cil/src/ext/dataslicing.mli new file mode 100644 index 00000000..00606484 --- /dev/null +++ b/cil/src/ext/dataslicing.mli @@ -0,0 +1,41 @@ +(* + * + * Copyright (c) 2001-2002, + * Jeremy Condit + * George C. Necula + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* This feature implements data slicing. The user annotates base types + * and function types with region(i) annotations, and this transformation + * will separate the fields into parallel data structures accordingly. *) + +val feature: Cil.featureDescr diff --git a/cil/src/ext/deadcodeelim.ml b/cil/src/ext/deadcodeelim.ml new file mode 100644 index 00000000..e560e01d --- /dev/null +++ b/cil/src/ext/deadcodeelim.ml @@ -0,0 +1,173 @@ +(* Eliminate assignment instructions whose results are not + used *) + +open Cil +open Pretty + +module E = Errormsg +module RD = Reachingdefs +module UD = Usedef +module IH = Inthash +module S = Stats + +module IS = Set.Make( + struct + type t = int + let compare = compare + end) + +let debug = RD.debug + + +let usedDefsSet = ref IS.empty +(* put used def ids into usedDefsSet *) +(* assumes reaching definitions have already been computed *) +class usedDefsCollectorClass = object(self) + inherit RD.rdVisitorClass + + method add_defids iosh e u = + UD.VS.iter (fun vi -> + if IH.mem iosh vi.vid then + let ios = IH.find iosh vi.vid in + if !debug then ignore(E.log "DCE: IOS size for vname=%s at stmt=%d: %d\n" + vi.vname sid (RD.IOS.cardinal ios)); + RD.IOS.iter (function + Some(i) -> + if !debug then ignore(E.log "DCE: def %d used: %a\n" i d_plainexp e); + usedDefsSet := IS.add i (!usedDefsSet) + | None -> ()) ios + else if !debug then ignore(E.log "DCE: vid %d:%s not in stm:%d iosh at %a\n" + vi.vid vi.vname sid d_plainexp e)) u + + method vexpr e = + let u = UD.computeUseExp e in + match self#get_cur_iosh() with + Some(iosh) -> self#add_defids iosh e u; DoChildren + | None -> + if !debug then ignore(E.log "DCE: use but no rd data: %a\n" d_plainexp e); + DoChildren + + method vinst i = + let handle_inst iosh i = match i with + | Asm(_,_,slvl,_,_,_) -> List.iter (fun (s,lv) -> + match lv with (Var v, off) -> + if s.[0] = '+' then + self#add_defids iosh (Lval(Var v, off)) (UD.VS.singleton v) + | _ -> ()) slvl + | _ -> () + in + begin try + cur_rd_dat <- Some(List.hd rd_dat_lst); + rd_dat_lst <- List.tl rd_dat_lst + with Failure "hd" -> () + end; + match self#get_cur_iosh() with + Some iosh -> handle_inst iosh i; DoChildren + | None -> DoChildren + +end + +(*************************************************** + * Also need to find reads from volatiles + * uses two functions I've put in ciltools which + * are basically what Zach wrote, except one is for + * types and one is for vars. Another difference is + * they filter out pointers to volatiles. This + * handles DMA + ***************************************************) +class hasVolatile flag = object (self) + inherit nopCilVisitor + method vlval l = + let tp = typeOfLval l in + if (Ciltools.is_volatile_tp tp) then flag := true; + DoChildren + method vexpr e = + DoChildren +end + +let exp_has_volatile e = + let flag = ref false in + ignore (visitCilExpr (new hasVolatile flag) e); + !flag + (***************************************************) + +let removedCount = ref 0 +(* Filter out instructions whose definition ids are not + in usedDefsSet *) +class uselessInstrElim : cilVisitor = object(self) + inherit nopCilVisitor + + method vstmt stm = + + let test (i,(_,s,iosh)) = + match i with + Call _ -> true + | Set((Var vi,NoOffset),e,_) -> + if vi.vglob || (Ciltools.is_volatile_vi vi) || (exp_has_volatile e) then true else + let _, defd = UD.computeUseDefInstr i in + let rec loop n = + if n < 0 then false else + if IS.mem (n+s) (!usedDefsSet) + then true + else loop (n-1) + in + if loop (UD.VS.cardinal defd - 1) + then true + else (incr removedCount; false) + | _ -> true + in + + let filter il stmdat = + let rd_dat_lst = RD.instrRDs il stm.sid stmdat false in + let ildatlst = List.combine il rd_dat_lst in + let ildatlst' = List.filter test ildatlst in + let (newil,_) = List.split ildatlst' in + newil + in + + match RD.getRDs stm.sid with + None -> DoChildren + | Some(_,s,iosh) -> + match stm.skind with + Instr il -> + stm.skind <- Instr(filter il ((),s,iosh)); + SkipChildren + | _ -> DoChildren + +end + +(* until fixed point is reached *) +let elim_dead_code_fp (fd : fundec) : fundec = + (* fundec -> fundec *) + let rec loop fd = + usedDefsSet := IS.empty; + removedCount := 0; + S.time "reaching definitions" RD.computeRDs fd; + ignore(visitCilFunction (new usedDefsCollectorClass :> cilVisitor) fd); + let fd' = visitCilFunction (new uselessInstrElim) fd in + if !removedCount = 0 then fd' else loop fd' + in + loop fd + +(* just once *) +let elim_dead_code (fd : fundec) : fundec = + (* fundec -> fundec *) + usedDefsSet := IS.empty; + removedCount := 0; + S.time "reaching definitions" RD.computeRDs fd; + ignore(visitCilFunction (new usedDefsCollectorClass :> cilVisitor) fd); + let fd' = visitCilFunction (new uselessInstrElim) fd in + fd' + +class deadCodeElimClass : cilVisitor = object(self) + inherit nopCilVisitor + + method vfunc fd = + let fd' = elim_dead_code fd in + ChangeTo(fd') + +end + +let dce f = + if !debug then ignore(E.log "DCE: starting dead code elimination\n"); + visitCilFile (new deadCodeElimClass) f diff --git a/cil/src/ext/dominators.ml b/cil/src/ext/dominators.ml new file mode 100755 index 00000000..d838d23f --- /dev/null +++ b/cil/src/ext/dominators.ml @@ -0,0 +1,241 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(** Compute dominator information for the statements in a function *) +open Cil +open Pretty +module E = Errormsg +module H = Hashtbl +module U = Util +module IH = Inthash + +module DF = Dataflow + +let debug = false + +(* For each statement we maintain a set of statements that dominate it *) +module BS = Set.Make(struct + type t = Cil.stmt + let compare v1 v2 = Pervasives.compare v1.sid v2.sid + end) + + + + +(** Customization module for dominators *) +module DT = struct + let name = "dom" + + let debug = ref debug + + type t = BS.t + + (** For each statement in a function we keep the set of dominator blocks. + * Indexed by statement id *) + let stmtStartData: t IH.t = IH.create 17 + + let copy (d: t) = d + + let pretty () (d: t) = + dprintf "{%a}" + (docList (fun s -> dprintf "%d" s.sid)) + (BS.elements d) + + let computeFirstPredecessor (s: stmt) (d: BS.t) : BS.t = + (* Make sure we add this block to the set *) + BS.add s d + + let combinePredecessors (s: stmt) ~(old: BS.t) (d: BS.t) : BS.t option = + (* First, add this block to the data from the predecessor *) + let d' = BS.add s d in + if BS.subset old d' then + None + else + Some (BS.inter old d') + + let doInstr (i: instr) (d: t) = DF.Default + + let doStmt (s: stmt) (d: t) = DF.SDefault + + let doGuard condition _ = DF.GDefault + + + let filterStmt _ = true +end + + + +module Dom = DF.ForwardsDataFlow(DT) + +let getStmtDominators (data: BS.t IH.t) (s: stmt) : BS.t = + try IH.find data s.sid + with Not_found -> BS.empty (* Not reachable *) + + +let getIdom (idomInfo: stmt option IH.t) (s: stmt) = + try IH.find idomInfo s.sid + with Not_found -> + E.s (E.bug "Immediate dominator information not set for statement %d" + s.sid) + +(** Check whether one block dominates another. This assumes that the "idom" + * field has been computed. *) +let rec dominates (idomInfo: stmt option IH.t) (s1: stmt) (s2: stmt) = + s1 == s2 || + (let s2idom = getIdom idomInfo s2 in + match s2idom with + None -> false + | Some s2idom -> dominates idomInfo s1 s2idom) + + + + +let computeIDom (f: fundec) : stmt option IH.t = + (* We must prepare the CFG info first *) + prepareCFG f; + computeCFGInfo f false; + + IH.clear DT.stmtStartData; + let idomData: stmt option IH.t = IH.create 13 in + + let _ = + match f.sbody.bstmts with + [] -> () (* function has no body *) + | start :: _ -> begin + (* We start with only the start block *) + IH.add DT.stmtStartData start.sid (BS.singleton start); + + Dom.compute [start]; + + (* Dump the dominators information *) + if debug then + List.iter + (fun s -> + let sdoms = getStmtDominators DT.stmtStartData s in + if not (BS.mem s sdoms) then begin + (* It can be that the block is not reachable *) + if s.preds <> [] then + E.s (E.bug "Statement %d is not in its list of dominators" + s.sid); + end; + ignore (E.log "Dominators for %d: %a\n" s.sid + DT.pretty (BS.remove s sdoms))) + f.sallstmts; + + (* Now fill the immediate dominators for all nodes *) + let rec fillOneIdom (s: stmt) = + try + ignore (IH.find idomData s.sid) + (* Already set *) + with Not_found -> begin + (* Get the dominators *) + let sdoms = getStmtDominators DT.stmtStartData s in + (* Fill the idom for the dominators first *) + let idom = + BS.fold + (fun d (sofar: stmt option) -> + if d.sid = s.sid then + sofar (* Ignore the block itself *) + else begin + (* fill the idom information recursively *) + fillOneIdom d; + match sofar with + None -> Some d + | Some sofar' -> + (* See if d is dominated by sofar. We know that the + * idom information has been computed for both sofar + * and for d*) + if dominates idomData sofar' d then + Some d + else + sofar + end) + sdoms + None + in + IH.replace idomData s.sid idom + end + in + (* Scan all blocks and compute the idom *) + List.iter fillOneIdom f.sallstmts + end + in + idomData + + + +(** Compute the start of the natural loops. For each start, keep a list of + * origin of a back edge. The loop consists of the loop start and all + * predecessors of the origins of back edges, up to and including the loop + * start *) +let findNaturalLoops (f: fundec) + (idomData: stmt option IH.t) : (stmt * stmt list) list = + let loops = + List.fold_left + (fun acc b -> + (* Iterate over all successors, and see if they are among the + * dominators for this block *) + List.fold_left + (fun acc s -> + if dominates idomData s b then + (* s is the start of a natural loop *) + let rec addNaturalLoop = function + [] -> [(s, [b])] + | (s', backs) :: rest when s'.sid = s.sid -> + (s', b :: backs) :: rest + | l :: rest -> l :: addNaturalLoop rest + in + addNaturalLoop acc + else + acc) + acc + b.succs) + [] + f.sallstmts + in + + if debug then + ignore (E.log "Natural loops:\n%a\n" + (docList ~sep:line + (fun (s, backs) -> + dprintf " Start: %d, backs:%a" + s.sid + (docList (fun b -> num b.sid)) + backs)) + loops); + + loops diff --git a/cil/src/ext/dominators.mli b/cil/src/ext/dominators.mli new file mode 100755 index 00000000..0abf82e9 --- /dev/null +++ b/cil/src/ext/dominators.mli @@ -0,0 +1,29 @@ + + +(** Compute dominators using data flow analysis *) +(** Author: George Necula + 5/28/2004 + **) + +(** Invoke on a code after filling in the CFG info and it computes the + * immediate dominator information. We map each statement to its immediate + * dominator (None for the start statement, and for the unreachable + * statements). *) +val computeIDom: Cil.fundec -> Cil.stmt option Inthash.t + + +(** This is like Inthash.find but gives an error if the information is + * Not_found *) +val getIdom: Cil.stmt option Inthash.t -> Cil.stmt -> Cil.stmt option + +(** Check whether one statement dominates another. *) +val dominates: Cil.stmt option Inthash.t -> Cil.stmt -> Cil.stmt -> bool + + +(** Compute the start of the natural loops. This assumes that the "idom" + * field has been computed. For each start, keep a list of origin of a back + * edge. The loop consists of the loop start and all predecessors of the + * origins of back edges, up to and including the loop start *) +val findNaturalLoops: Cil.fundec -> + Cil.stmt option Inthash.t -> + (Cil.stmt * Cil.stmt list) list diff --git a/cil/src/ext/epicenter.ml b/cil/src/ext/epicenter.ml new file mode 100644 index 00000000..a8045e85 --- /dev/null +++ b/cil/src/ext/epicenter.ml @@ -0,0 +1,114 @@ +(* epicenter.ml *) +(* code for epicenter.mli *) + +(* module maintainer: scott *) +(* see copyright at end of this file *) + +open Callgraph +open Cil +open Trace +open Pretty +module H = Hashtbl +module IH = Inthash + +let sliceFile (f:file) (epicenter:string) (maxHops:int) : unit = + (* compute the static call graph *) + let graph:callgraph = (computeGraph f) in + + (* will accumulate here the set of names of functions already seen *) + let seen: (string, unit) H.t = (H.create 117) in + + (* when removing "unused" symbols, keep all seen functions *) + let isRoot : global -> bool = function + | GFun ({svar = {vname = vname}}, _) -> + H.mem seen vname + | _ -> + false + in + + (* recursive depth-first search through the call graph, finding + * all nodes within 'hops' hops of 'node' and marking them to + * to be retained *) + let rec dfs (node:callnode) (hops:int) : unit = + (* only recurse if we haven't already marked this node *) + if not (H.mem seen (nodeName node.cnInfo)) then + begin + (* add this node *) + H.add seen (nodeName node.cnInfo) (); + trace "epicenter" (dprintf "will keep %s\n" (nodeName node.cnInfo)); + + (* if we cannot do any more hops, stop *) + if (hops > 0) then + + (* recurse on all the node's callers and callees *) + let recurse _ (adjacent:callnode) : unit = + (dfs adjacent (hops - 1)) + in + IH.iter recurse node.cnCallees; + IH.iter recurse node.cnCallers + end + in + dfs (Hashtbl.find graph epicenter) maxHops; + + (* finally, throw away anything we haven't decided to keep *) + Cilutil.sliceGlobal := true; + Rmtmps.removeUnusedTemps ~isRoot:isRoot f + +let doEpicenter = ref false +let epicenterName = ref "" +let epicenterHops = ref 0 + +let feature : featureDescr = + { fd_name = "epicenter"; + fd_enabled = doEpicenter; + fd_description = "remove all functions except those within some number " ^ + "of hops (in the call graph) from a given function"; + fd_extraopt = + [ + ("--epicenter-name", + Arg.String (fun s -> epicenterName := s), + ": do an epicenter slice starting from function "); + ("--epicenter-hops", Arg.Int (fun n -> epicenterHops := n), + ": specify max # of hops for epicenter slice"); + ]; + + fd_doit = + (fun f -> + sliceFile f !epicenterName !epicenterHops); + + fd_post_check = true; + } + + +(* + * + * Copyright (c) 2001-2002 by + * George C. Necula necula@cs.berkeley.edu + * Scott McPeak smcpeak@cs.berkeley.edu + * Wes Weimer weimer@cs.berkeley.edu + * Ben Liblit liblit@cs.berkeley.edu + * + * All rights reserved. Permission to use, copy, modify and distribute + * this software for research purposes only is hereby granted, + * provided that the following conditions are met: + * 1. XSRedistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. The name of the authors may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * DISCLAIMER: + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) diff --git a/cil/src/ext/heap.ml b/cil/src/ext/heap.ml new file mode 100644 index 00000000..10f48a04 --- /dev/null +++ b/cil/src/ext/heap.ml @@ -0,0 +1,112 @@ +(* See copyright notice at the end of the file *) + +(* The type of a heap (priority queue): keys are integers, data values + * are whatever you like *) +type ('a) t = { + elements : (int * ('a option)) array ; + mutable size : int ; (* current number of elements *) + capacity : int ; (* max number of elements *) +} + +let create size = { + elements = Array.create (size+1) (max_int,None) ; + size = 0 ; + capacity = size ; +} + +let clear heap = heap.size <- 0 + +let is_full heap = (heap.size = heap.capacity) + +let is_empty heap = (heap.size = 0) + +let insert heap prio elt = begin + if is_full heap then begin + raise (Invalid_argument "Heap.insert") + end ; + heap.size <- heap.size + 1 ; + let i = ref heap.size in + while ( fst heap.elements.(!i / 2) < prio ) do + heap.elements.(!i) <- heap.elements.(!i / 2) ; + i := (!i / 2) + done ; + heap.elements.(!i) <- (prio,Some(elt)) + end + +let examine_max heap = + if is_empty heap then begin + raise (Invalid_argument "Heap.examine_max") + end ; + match heap.elements.(1) with + p,Some(elt) -> p,elt + | p,None -> failwith "Heap.examine_max" + +let extract_max heap = begin + if is_empty heap then begin + raise (Invalid_argument "Heap.extract_max") + end ; + let max = heap.elements.(1) in + let last = heap.elements.(heap.size) in + heap.size <- heap.size - 1 ; + let i = ref 1 in + let break = ref false in + while (!i * 2 <= heap.size) && not !break do + let child = ref (!i * 2) in + + (* find smaller child *) + if (!child <> heap.size && + fst heap.elements.(!child+1) > fst heap.elements.(!child)) then begin + incr child + end ; + + (* percolate one level *) + if (fst last < fst heap.elements.(!child)) then begin + heap.elements.(!i) <- heap.elements.(!child) ; + i := !child + end else begin + break := true + end + done ; + heap.elements.(!i) <- last ; + match max with + p,Some(elt) -> p,elt + | p,None -> failwith "Heap.examine_min" + end + + +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) diff --git a/cil/src/ext/heapify.ml b/cil/src/ext/heapify.ml new file mode 100644 index 00000000..a583181e --- /dev/null +++ b/cil/src/ext/heapify.ml @@ -0,0 +1,250 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* + * Heapify: a program transform that looks over functions, finds those + * that have local (stack) variables that contain arrays, puts all such + * local variables into a single heap allocated structure, changes all + * accesses to such variables into accesses to fields of that structure + * and frees the structure on return. + *) +open Cil + +(* utilities that should be in Cil.ml *) +(* sfg: this function appears to never be called *) +let mkSimpleField ci fn ft fl = + { fcomp = ci ; fname = fn ; ftype = ft ; fbitfield = None ; fattr = []; + floc = fl } + + +(* actual Heapify begins *) + +let heapifyNonArrays = ref false + +(* Does this local var contain an array? *) +let rec containsArray (t:typ) : bool = (* does this type contain an array? *) + match unrollType t with + TArray _ -> true + | TComp(ci, _) -> (* look at the types of the fields *) + List.exists (fun fi -> containsArray fi.ftype) ci.cfields + | _ -> + (* Ignore other types, including TInt and TPtr. We don't care whether + there are arrays in the base types of pointers; only about whether + this local variable itself needs to be moved to the heap. *) + false + + +class heapifyModifyVisitor big_struct big_struct_fields varlist free + (currentFunction: fundec) = object(self) + inherit nopCilVisitor (* visit lvalues and statements *) + method vlval l = match l with (* should we change this one? *) + Var(vi),vi_offset when List.mem_assoc vi varlist -> (* check list *) + let i = List.assoc vi varlist in (* find field offset *) + let big_struct_field = List.nth big_struct_fields i in + let new_lval = Mem(Lval(big_struct, NoOffset)), + Field(big_struct_field,vi_offset) in (* rewrite the lvalue *) + ChangeDoChildrenPost(new_lval, (fun l -> l)) + | _ -> DoChildren (* ignore other lvalues *) + method vstmt s = match s.skind with (* also rewrite the return *) + Return(None,loc) -> + let free_instr = Call(None,free,[Lval(big_struct,NoOffset)],loc) in + self#queueInstr [free_instr]; (* insert free_instr before the return *) + DoChildren + | Return(Some exp ,loc) -> + (* exp may depend on big_struct, so evaluate it before calling free. + * This becomes: tmp = exp; free(big_struct); return tmp; *) + let exp_new = visitCilExpr (self :> cilVisitor) exp in + let ret_tmp = makeTempVar currentFunction (typeOf exp_new) in + let eval_ret_instr = Set(var ret_tmp, exp_new, loc) in + let free_instr = Call(None,free,[Lval(big_struct,NoOffset)],loc) in + (* insert the instructions before the return *) + self#queueInstr [eval_ret_instr; free_instr]; + s.skind <- (Return(Some(Lval(var ret_tmp)), loc)); + DoChildren + | _ -> DoChildren (* ignore other statements *) +end + +class heapifyAnalyzeVisitor f alloc free = object + inherit nopCilVisitor (* only look at function bodies *) + method vglob gl = match gl with + GFun(fundec,funloc) -> + let counter = ref 0 in (* the number of local vars containing arrays *) + let varlist = ref [] in (* a list of (var,id) pairs, in reverse order *) + List.iter (fun vi -> + (* find all local vars with arrays. If the user requests it, + we also look for non-array vars whose address is taken. *) + if (containsArray vi.vtype) || (vi.vaddrof && !heapifyNonArrays) + then begin + varlist := (vi,!counter) :: !varlist ; (* add it to the list *) + incr counter (* put the next such var in the next slot *) + end + ) fundec.slocals ; + if (!varlist <> []) then begin (* some local vars contain arrays *) + let name = (fundec.svar.vname ^ "_heapify") in + let ci = mkCompInfo true name (* make a big structure *) + (fun _ -> List.rev_map (* reverse the list to fix the order *) + (* each local var becomes a field *) + (fun (vi,i) -> vi.vname,vi.vtype,None,[],vi.vdecl) !varlist) [] in + let vi = makeLocalVar fundec name (TPtr(TComp(ci,[]),[])) in + let modify = new heapifyModifyVisitor (Var(vi)) ci.cfields + !varlist free fundec in (* rewrite accesses to local vars *) + fundec.sbody <- visitCilBlock modify fundec.sbody ; + let alloc_stmt = mkStmt (* allocate the big struct on the heap *) + (Instr [Call(Some(Var(vi),NoOffset), alloc, + [SizeOf(TComp(ci,[]))],funloc)]) in + fundec.sbody.bstmts <- alloc_stmt :: fundec.sbody.bstmts ; + fundec.slocals <- List.filter (fun vi -> (* remove local vars *) + not (List.mem_assoc vi !varlist)) fundec.slocals ; + let typedec = (GCompTag(ci,funloc)) in (* declare the big struct *) + ChangeTo([typedec ; GFun(fundec,funloc)]) (* done! *) + end else + DoChildren (* ignore everything else *) + | _ -> DoChildren +end + +let heapify (f : file) (alloc : exp) (free : exp) = + visitCilFile (new heapifyAnalyzeVisitor f alloc free) f; + f + +(* heapify code ends here *) + +let default_heapify (f : file) = + let alloc_fun = emptyFunction "malloc" in + let free_fun = emptyFunction "free" in + let alloc_exp = (Lval((Var(alloc_fun.svar)),NoOffset)) in + let free_exp = (Lval((Var(free_fun.svar)),NoOffset)) in + ignore (heapify f alloc_exp free_exp) + +(* StackGuard clone *) + +class sgModifyVisitor restore_ra_stmt = object + inherit nopCilVisitor + method vstmt s = match s.skind with (* also rewrite the return *) + Return(_,loc) -> let new_block = mkBlock [restore_ra_stmt ; s] in + ChangeTo(mkStmt (Block(new_block))) + | _ -> DoChildren (* ignore other statements *) +end + +class sgAnalyzeVisitor f push pop get_ra set_ra = object + inherit nopCilVisitor + method vfunc fundec = + let needs_guarding = List.fold_left + (fun acc vi -> acc || containsArray vi.vtype) + false fundec.slocals in + if needs_guarding then begin + let ra_tmp = makeLocalVar fundec "return_address" voidPtrType in + let ra_exp = Lval(Var(ra_tmp),NoOffset) in + let save_ra_stmt = mkStmt (* save the current return address *) + (Instr [Call(Some(Var(ra_tmp),NoOffset), get_ra, [], locUnknown) ; + Call(None, push, [ra_exp], locUnknown)]) in + let restore_ra_stmt = mkStmt (* restore the old return address *) + (Instr [Call(Some(Var(ra_tmp),NoOffset), pop, [], locUnknown) ; + Call(None, set_ra, [ra_exp], locUnknown)]) in + let modify = new sgModifyVisitor restore_ra_stmt in + fundec.sbody <- visitCilBlock modify fundec.sbody ; + fundec.sbody.bstmts <- save_ra_stmt :: fundec.sbody.bstmts ; + ChangeTo(fundec) (* done! *) + end else DoChildren +end + +let stackguard (f : file) (push : exp) (pop : exp) + (get_ra : exp) (set_ra : exp) = + visitCilFileSameGlobals (new sgAnalyzeVisitor f push pop get_ra set_ra) f; + f + (* stackguard code ends *) + +let default_stackguard (f : file) = + let expify fundec = Lval(Var(fundec.svar),NoOffset) in + let push = expify (emptyFunction "stackguard_push") in + let pop = expify (emptyFunction "stackguard_pop") in + let get_ra = expify (emptyFunction "stackguard_get_ra") in + let set_ra = expify (emptyFunction "stackguard_set_ra") in + let global_decl = +"extern void * stackguard_get_ra(); +extern void stackguard_set_ra(void *new_ra); +/* You must provide an implementation for functions that get and set the + * return address. Such code is unfortunately architecture specific. + */ +struct stackguard_stack { + void * data; + struct stackguard_stack * next; +} * stackguard_stack; + +void stackguard_push(void *ra) { + void * old = stackguard_stack; + stackguard_stack = (struct stackguard_stack *) + malloc(sizeof(stackguard_stack)); + stackguard_stack->data = ra; + stackguard_stack->next = old; +} + +void * stackguard_pop() { + void * ret = stackguard_stack->data; + void * next = stackguard_stack->next; + free(stackguard_stack); + stackguard_stack->next = next; + return ret; +}" in + f.globals <- GText(global_decl) :: f.globals ; + ignore (stackguard f push pop get_ra set_ra ) + + +let feature1 : featureDescr = + { fd_name = "stackGuard"; + fd_enabled = Cilutil.doStackGuard; + fd_description = "instrument function calls and returns to maintain a separate stack for return addresses" ; + fd_extraopt = []; + fd_doit = (function (f: file) -> default_stackguard f); + fd_post_check = true; + } +let feature2 : featureDescr = + { fd_name = "heapify"; + fd_enabled = Cilutil.doHeapify; + fd_description = "move stack-allocated arrays to the heap" ; + fd_extraopt = [ + "--heapifyAll", Arg.Set heapifyNonArrays, + "When using heapify, move all local vars whose address is taken, not just arrays."; + ]; + fd_doit = (function (f: file) -> default_heapify f); + fd_post_check = true; + } + + + + + + diff --git a/cil/src/ext/liveness.ml b/cil/src/ext/liveness.ml new file mode 100644 index 00000000..72cd6073 --- /dev/null +++ b/cil/src/ext/liveness.ml @@ -0,0 +1,190 @@ + +(* Calculate which variables are live at + * each statememnt. + * + * + * + *) + +open Cil +open Pretty + +module DF = Dataflow +module UD = Usedef +module IH = Inthash +module E = Errormsg + +let debug = ref false + +let live_label = ref "" +let live_func = ref "" + +module VS = UD.VS + +let debug_print () vs = (VS.fold + (fun vi d -> + d ++ text "name: " ++ text vi.vname + ++ text " id: " ++ num vi.vid ++ text " ") + vs nil) ++ line + +let min_print () vs = (VS.fold + (fun vi d -> + d ++ text vi.vname + ++ text "(" ++ d_type () vi.vtype ++ text ")" + ++ text ",") + vs nil) ++ line + +let printer = ref debug_print + +module LiveFlow = struct + let name = "Liveness" + let debug = debug + type t = VS.t + + let pretty () vs = + let fn = !printer in + fn () vs + + let stmtStartData = IH.create 32 + + let combineStmtStartData (stm:stmt) ~(old:t) (now:t) = + if not(VS.compare old now = 0) + then Some(VS.union old now) + else None + + let combineSuccessors = VS.union + + let doStmt stmt = + if !debug then ignore(E.log "looking at: %a\n" d_stmt stmt); + match stmt.succs with + [] -> let u,d = UD.computeUseDefStmtKind stmt.skind in + if !debug then ignore(E.log "doStmt: no succs %d\n" stmt.sid); + DF.Done u + | _ -> + let handle_stm vs = match stmt.skind with + Instr _ -> vs + | s -> let u, d = UD.computeUseDefStmtKind s in + VS.union u (VS.diff vs d) + in + DF.Post handle_stm + + let doInstr i vs = + let transform vs' = + let u,d = UD.computeUseDefInstr i in + VS.union u (VS.diff vs' d) + in + DF.Post transform + + let filterStmt stm1 stm2 = true + +end + +module L = DF.BackwardsDataFlow(LiveFlow) + +let sink_stmts = ref [] +class sinkFinderClass = object(self) + inherit nopCilVisitor + + method vstmt s = match s.succs with + [] -> (sink_stmts := s :: (!sink_stmts); + DoChildren) + | _ -> DoChildren + +end + +(* gives list of return statements from a function *) +(* fundec -> stm list *) +let find_sinks fdec = + sink_stmts := []; + ignore(visitCilFunction (new sinkFinderClass) fdec); + !sink_stmts + +(* XXX: This does not compute the best ordering to + * give to the work-list algorithm. + *) +let all_stmts = ref [] +class nullAdderClass = object(self) + inherit nopCilVisitor + + method vstmt s = + all_stmts := s :: (!all_stmts); + IH.add LiveFlow.stmtStartData s.sid VS.empty; + DoChildren + +end + +let null_adder fdec = + ignore(visitCilFunction (new nullAdderClass) fdec); + !all_stmts + +let computeLiveness fdec = + IH.clear LiveFlow.stmtStartData; + UD.onlyNoOffsetsAreDefs := false; + all_stmts := []; + let a = null_adder fdec in + L.compute a + +let print_everything () = + let d = IH.fold (fun i vs d -> + d ++ num i ++ text ": " ++ LiveFlow.pretty () vs) + LiveFlow.stmtStartData nil in + ignore(printf "%t" (fun () -> d)) + +let match_label lbl = match lbl with + Label(str,_,b) -> + if !debug then ignore(E.log "Liveness: label seen: %s\n" str); + (*b && *)(String.compare str (!live_label) = 0) +| _ -> false + +class doFeatureClass = object(self) + inherit nopCilVisitor + + method vfunc fd = + if String.compare fd.svar.vname (!live_func) = 0 then + (Cfg.clearCFGinfo fd; + ignore(Cfg.cfgFun fd); + computeLiveness fd; + if String.compare (!live_label) "" = 0 then + (printer := min_print; + print_everything(); + SkipChildren) + else DoChildren) + else SkipChildren + + method vstmt s = + if List.exists match_label s.labels then try + let vs = IH.find LiveFlow.stmtStartData s.sid in + (printer := min_print; + ignore(printf "%a" LiveFlow.pretty vs); + SkipChildren) + with Not_found -> + if !debug then ignore(E.log "Liveness: stmt: %d not found\n" s.sid); + DoChildren + else + (if List.length s.labels = 0 then + if !debug then ignore(E.log "Liveness: no label at sid=%d\n" s.sid); + DoChildren) + +end + +let do_live_feature (f:file) = + visitCilFile (new doFeatureClass) f + +let feature = + { + fd_name = "Liveness"; + fd_enabled = ref false; + fd_description = "Spit out live variables at a label"; + fd_extraopt = [ + "--live_label", + Arg.String (fun s -> live_label := s), + "Output the variables live at this label"; + "--live_func", + Arg.String (fun s -> live_func := s), + "Output the variables live at each statement in this function."; + "--live_debug", + Arg.Unit (fun n -> debug := true), + "Print lots of debugging info";]; + fd_doit = do_live_feature; + fd_post_check = false + } diff --git a/cil/src/ext/logcalls.ml b/cil/src/ext/logcalls.ml new file mode 100644 index 00000000..0cdbc153 --- /dev/null +++ b/cil/src/ext/logcalls.ml @@ -0,0 +1,268 @@ +(** See copyright notice at the end of this file *) + +(** Add printf before each function call *) + +open Pretty +open Cil +open Trace +module E = Errormsg +module H = Hashtbl + +let i = ref 0 +let name = ref "" + +(* Switches *) +let printFunctionName = ref "printf" + +let addProto = ref false + +let printf: varinfo option ref = ref None +let makePrintfFunction () : varinfo = + match !printf with + Some v -> v + | None -> begin + let v = makeGlobalVar !printFunctionName + (TFun(voidType, Some [("format", charPtrType, [])], + true, [])) in + printf := Some v; + addProto := true; + v + end + +let mkPrint (format: string) (args: exp list) : instr = + let p: varinfo = makePrintfFunction () in + Call(None, Lval(var p), (mkString format) :: args, !currentLoc) + + +let d_string (fmt : ('a,unit,doc,string) format4) : 'a = + let f (d: doc) : string = + Pretty.sprint 200 d + in + Pretty.gprintf f fmt + +let currentFunc: string ref = ref "" + +class logCallsVisitorClass = object + inherit nopCilVisitor + + (* Watch for a declaration for our printer *) + + method vinst i = begin + match i with + | Call(lo,e,al,l) -> + let pre = mkPrint (d_string "call %a\n" d_exp e) [] in + let post = mkPrint (d_string "return from %a\n" d_exp e) [] in +(* + let str1 = prefix ^ + (Pretty.sprint 800 ( Pretty.dprintf "Calling %a(%a)\n" + d_exp e + (docList ~sep:(chr ',' ++ break ) (fun arg -> + try + match unrollType (typeOf arg) with + TInt _ | TEnum _ -> dprintf "%a = %%d" d_exp arg + | TFloat _ -> dprintf "%a = %%g" d_exp arg + | TVoid _ -> text "void" + | TComp _ -> text "comp" + | _ -> dprintf "%a = %%p" d_exp arg + with _ -> dprintf "%a = %%p" d_exp arg)) al)) in + let log_args = List.filter (fun arg -> + match unrollType (typeOf arg) with + TVoid _ | TComp _ -> false + | _ -> true) al in + let str2 = prefix ^ (Pretty.sprint 800 + ( Pretty.dprintf "Returned from %a\n" d_exp e)) in + let newinst str args = ((Call (None, Lval(var printfFun.svar), + ( [ (* one ; *) mkString str ] @ args), + locUnknown)) : instr )in + let ilist = ([ (newinst str1 log_args) ; i ; (newinst str2 []) ] : instr list) in + *) + ChangeTo [ pre; i; post ] + + | _ -> DoChildren + end + method vstmt (s : stmt) = begin + match s.skind with + Return _ -> + let pre = mkPrint (d_string "exit %s\n" !currentFunc) [] in + ChangeTo (mkStmt (Block (mkBlock [ mkStmtOneInstr pre; s ]))) + | _ -> DoChildren + +(* +(Some(e),l) -> + let str = prefix ^ Pretty.sprint 800 ( Pretty.dprintf + "Return(%%p) from %s\n" funstr ) in + let newinst = ((Call (None, Lval(var printfFun.svar), + ( [ (* one ; *) mkString str ; e ]), + locUnknown)) : instr )in + let new_stmt = mkStmtOneInstr newinst in + let slist = [ new_stmt ; s ] in + (ChangeTo(mkStmt(Block(mkBlock slist)))) + | Return(None,l) -> + let str = prefix ^ (Pretty.sprint 800 ( Pretty.dprintf + "Return void from %s\n" funstr)) in + let newinst = ((Call (None, Lval(var printfFun.svar), + ( [ (* one ; *) mkString str ]), + locUnknown)) : instr )in + let new_stmt = mkStmtOneInstr newinst in + let slist = [ new_stmt ; s ] in + (ChangeTo(mkStmt(Block(mkBlock slist)))) + | _ -> DoChildren +*) + end +end + +let logCallsVisitor = new logCallsVisitorClass + + +let logCalls (f: file) : unit = + + let doGlobal = function + | GVarDecl (v, _) when v.vname = !printFunctionName -> + if !printf = None then + printf := Some v + + | GFun (fdec, loc) -> + currentFunc := fdec.svar.vname; + (* do the body *) + ignore (visitCilFunction logCallsVisitor fdec); + (* Now add the entry instruction *) + let pre = mkPrint (d_string "enter %s\n" !currentFunc) [] in + fdec.sbody <- + mkBlock [ mkStmtOneInstr pre; + mkStmt (Block fdec.sbody) ] +(* + (* debugging 'anagram', it's really nice to be able to see the strings *) + (* inside fat pointers, even if it's a bit of a hassle and a hack here *) + let isFatCharPtr (cinfo:compinfo) = + cinfo.cname="wildp_char" || + cinfo.cname="fseqp_char" || + cinfo.cname="seqp_char" in + + (* Collect expressions that denote the actual arguments *) + let actargs = + (* make lvals out of args which pass test below *) + (List.map + (fun vi -> match unrollType vi.vtype with + | TComp(cinfo, _) when isFatCharPtr(cinfo) -> + (* access the _p field for these *) + (* luckily it's called "_p" in all three fat pointer variants *) + Lval(Var(vi), Field(getCompField cinfo "_p", NoOffset)) + | _ -> + Lval(var vi)) + + (* decide which args to pass *) + (List.filter + (fun vi -> match unrollType vi.vtype with + | TPtr(TInt(k, _), _) when isCharType(k) -> + !printPtrs || !printStrings + | TComp(cinfo, _) when isFatCharPtr(cinfo) -> + !printStrings + | TVoid _ | TComp _ -> false + | TPtr _ | TArray _ | TFun _ -> !printPtrs + | _ -> true) + fdec.sformals) + ) in + + (* make a format string for printing them *) + (* sm: expanded width to 200 because I want one per line *) + let formatstr = prefix ^ (Pretty.sprint 200 + (dprintf "entering %s(%a)\n" fdec.svar.vname + (docList ~sep:(chr ',' ++ break) + (fun vi -> match unrollType vi.vtype with + | TInt _ | TEnum _ -> dprintf "%s = %%d" vi.vname + | TFloat _ -> dprintf "%s = %%g" vi.vname + | TVoid _ -> dprintf "%s = (void)" vi.vname + | TComp(cinfo, _) -> ( + if !printStrings && isFatCharPtr(cinfo) then + dprintf "%s = \"%%s\"" vi.vname + else + dprintf "%s = (comp)" vi.vname + ) + | TPtr(TInt(k, _), _) when isCharType(k) -> ( + if (!printStrings) then + dprintf "%s = \"%%s\"" vi.vname + else if (!printPtrs) then + dprintf "%s = %%p" vi.vname + else + dprintf "%s = (str)" vi.vname + ) + | TPtr _ | TArray _ | TFun _ -> ( + if (!printPtrs) then + dprintf "%s = %%p" vi.vname + else + dprintf "%s = (ptr)" vi.vname + ) + | _ -> dprintf "%s = (?type?)" vi.vname)) + fdec.sformals)) in + + i := 0 ; + name := fdec.svar.vname ; + if !allInsts then ( + let thisVisitor = new verboseLogVisitor printfFun !name prefix in + fdec.sbody <- visitCilBlock thisVisitor fdec.sbody + ); + fdec.sbody.bstmts <- + mkStmt (Instr [Call (None, Lval(var printfFun.svar), + ( (* one :: *) mkString formatstr + :: actargs), + loc)]) :: fdec.sbody.bstmts + *) + | _ -> () + in + Stats.time "logCalls" (iterGlobals f) doGlobal; + if !addProto then begin + let p = makePrintfFunction () in + E.log "Adding prototype for call logging function %s\n" p.vname; + f.globals <- GVarDecl (p, locUnknown) :: f.globals + end + +let feature : featureDescr = + { fd_name = "logcalls"; + fd_enabled = Cilutil.logCalls; + fd_description = "generation of code to log function calls"; + fd_extraopt = [ + ("--logcallprintf", Arg.String (fun s -> printFunctionName := s), + "the name of the printf function to use"); + ("--logcalladdproto", Arg.Unit (fun s -> addProto := true), + "whether to add the prototype for the printf function") + ]; + fd_doit = logCalls; + fd_post_check = true + } + +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) diff --git a/cil/src/ext/logcalls.mli b/cil/src/ext/logcalls.mli new file mode 100644 index 00000000..22a1e96a --- /dev/null +++ b/cil/src/ext/logcalls.mli @@ -0,0 +1,41 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + + +(* A simple CIL transformer that inserts calls to a runtime function to log + * the call in each function *) +val feature: Cil.featureDescr diff --git a/cil/src/ext/logwrites.ml b/cil/src/ext/logwrites.ml new file mode 100644 index 00000000..3afd0679 --- /dev/null +++ b/cil/src/ext/logwrites.ml @@ -0,0 +1,139 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +open Pretty +open Cil +module E = Errormsg +module H = Hashtbl + +(* David Park at Stanford points out that you cannot take the address of a + * bitfield in GCC. *) + +(* Returns true if the given lvalue offset ends in a bitfield access. *) +let rec is_bitfield lo = match lo with + | NoOffset -> false + | Field(fi,NoOffset) -> not (fi.fbitfield = None) + | Field(_,lo) -> is_bitfield lo + | Index(_,lo) -> is_bitfield lo + +(* Return an expression that evaluates to the address of the given lvalue. + * For most lvalues, this is merely AddrOf(lv). However, for bitfields + * we do some offset gymnastics. + *) +let addr_of_lv (lh,lo) = + if is_bitfield lo then begin + (* we figure out what the address would be without the final bitfield + * access, and then we add in the offset of the bitfield from the + * beginning of its enclosing comp *) + let rec split_offset_and_bitfield lo = match lo with + | NoOffset -> failwith "logwrites: impossible" + | Field(fi,NoOffset) -> (NoOffset,fi) + | Field(e,lo) -> let a,b = split_offset_and_bitfield lo in + ((Field(e,a)),b) + | Index(e,lo) -> let a,b = split_offset_and_bitfield lo in + ((Index(e,a)),b) + in + let new_lv_offset, bf = split_offset_and_bitfield lo in + let new_lv = (lh, new_lv_offset) in + let enclosing_type = TComp(bf.fcomp, []) in + let bits_offset, bits_width = + bitsOffset enclosing_type (Field(bf,NoOffset)) in + let bytes_offset = bits_offset / 8 in + let lvPtr = mkCast ~e:(mkAddrOf (new_lv)) ~newt:(charPtrType) in + (BinOp(PlusPI, lvPtr, (integer bytes_offset), ulongType)) + end else (AddrOf (lh,lo)) + +class logWriteVisitor = object + inherit nopCilVisitor + (* Create a prototype for the logging function, but don't put it in the + * file *) + val printfFun = + let fdec = emptyFunction "syslog" in + fdec.svar.vtype <- TFun(intType, + Some [ ("prio", intType, []); + ("format", charConstPtrType, []) ], + true, []); + fdec + + method vinst (i: instr) : instr list visitAction = + match i with + Set(lv, e, l) -> begin + (* Check if we need to log *) + match lv with + (Var(v), off) when not v.vglob -> SkipChildren + | _ -> let str = Pretty.sprint 80 + (Pretty.dprintf "Write %%p to 0x%%08x at %%s:%%d (%a)\n" d_lval lv) + in + ChangeTo + [ Call((None), (Lval(Var(printfFun.svar),NoOffset)), + [ one ; + mkString str ; e ; addr_of_lv lv; + mkString l.file; + integer l.line], locUnknown); + i] + end + | Call(Some lv, f, args, l) -> begin + (* Check if we need to log *) + match lv with + (Var(v), off) when not v.vglob -> SkipChildren + | _ -> let str = Pretty.sprint 80 + (Pretty.dprintf "Write retval to 0x%%08x at %%s:%%d (%a)\n" d_lval lv) + in + ChangeTo + [ Call((None), (Lval(Var(printfFun.svar),NoOffset)), + [ one ; + mkString str ; AddrOf lv; + mkString l.file; + integer l.line], locUnknown); + i] + end + | _ -> SkipChildren + +end + +let feature : featureDescr = + { fd_name = "logwrites"; + fd_enabled = Cilutil.logWrites; + fd_description = "generation of code to log memory writes"; + fd_extraopt = []; + fd_doit = + (function (f: file) -> + let lwVisitor = new logWriteVisitor in + visitCilFileSameGlobals lwVisitor f); + fd_post_check = true; + } + diff --git a/cil/src/ext/oneret.ml b/cil/src/ext/oneret.ml new file mode 100644 index 00000000..b3ce4a10 --- /dev/null +++ b/cil/src/ext/oneret.ml @@ -0,0 +1,187 @@ +(* MODIF: Loop constructor replaced by 3 constructors: While, DoWhile, For. *) + +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* Make sure that there is exactly one Return statement in the whole body. + * Replace all the other returns with Goto. This is convenient if you later + * want to insert some finalizer code, since you have a precise place where + * to put it *) +open Cil +open Pretty + +module E = Errormsg + +let dummyVisitor = new nopCilVisitor + +let oneret (f: Cil.fundec) : unit = + let fname = f.svar.vname in + (* Get the return type *) + let retTyp = + match f.svar.vtype with + TFun(rt, _, _, _) -> rt + | _ -> E.s (E.bug "Function %s does not have a function type\n" + f.svar.vname) + in + (* Does it return anything ? *) + let hasRet = match unrollType retTyp with TVoid _ -> false | _ -> true in + + (* Memoize the return result variable. Use only if hasRet *) + let lastloc = ref locUnknown in + let retVar : varinfo option ref = ref None in + let getRetVar (x: unit) : varinfo = + match !retVar with + Some rv -> rv + | None -> begin + let rv = makeLocalVar f "__retres" retTyp in (* don't collide *) + retVar := Some rv; + rv + end + in + (* Remember if we have introduced goto's *) + let haveGoto = ref false in + (* Memoize the return statement *) + let retStmt : stmt ref = ref dummyStmt in + let getRetStmt (x: unit) : stmt = + if !retStmt == dummyStmt then begin + (* Must create a statement *) + let rv = + if hasRet then Some (Lval(Var (getRetVar ()), NoOffset)) else None + in + let sr = mkStmt (Return (rv, !lastloc)) in + retStmt := sr; + sr + end else + !retStmt + in + (* Now scan all the statements. Know if you are the main body of the + * function and be prepared to add new statements at the end *) + let rec scanStmts (mainbody: bool) = function + | [] when mainbody -> (* We are at the end of the function. Now it is + * time to add the return statement *) + let rs = getRetStmt () in + if !haveGoto then + rs.labels <- (Label("return_label", !lastloc, false)) :: rs.labels; + [rs] + + | [] -> [] + + | ({skind=Return (retval, l)} as s) :: rests -> + currentLoc := l; +(* + ignore (E.log "Fixing return(%a) at %a\n" + insert + (match retval with None -> text "None" + | Some e -> d_exp () e) + d_loc l); +*) + if hasRet && retval = None then + E.s (error "Found return without value in function %s\n" fname); + if not hasRet && retval <> None then + E.s (error "Found return in subroutine %s\n" fname); + (* Keep this statement because it might have labels. But change it to + * an instruction that sets the return value (if any). *) + s.skind <- begin + match retval with + Some rval -> Instr [Set((Var (getRetVar ()), NoOffset), rval, l)] + | None -> Instr [] + end; + (* See if this is the last statement in function *) + if mainbody && rests == [] then + s :: scanStmts mainbody rests + else begin + (* Add a Goto *) + let sgref = ref (getRetStmt ()) in + let sg = mkStmt (Goto (sgref, l)) in + haveGoto := true; + s :: sg :: (scanStmts mainbody rests) + end + + | ({skind=If(eb,t,e,l)} as s) :: rests -> + currentLoc := l; + s.skind <- If(eb, scanBlock false t, scanBlock false e, l); + s :: scanStmts mainbody rests +(* + | ({skind=Loop(b,l,lb1,lb2)} as s) :: rests -> + currentLoc := l; + s.skind <- Loop(scanBlock false b, l,lb1,lb2); + s :: scanStmts mainbody rests +*) + | ({skind=While(e,b,l)} as s) :: rests -> + currentLoc := l; + s.skind <- While(e, scanBlock false b, l); + s :: scanStmts mainbody rests + | ({skind=DoWhile(e,b,l)} as s) :: rests -> + currentLoc := l; + s.skind <- DoWhile(e, scanBlock false b, l); + s :: scanStmts mainbody rests + | ({skind=For(bInit,e,bIter,b,l)} as s) :: rests -> + currentLoc := l; + s.skind <- For(scanBlock false bInit, e, scanBlock false bIter, + scanBlock false b, l); + s :: scanStmts mainbody rests + | ({skind=Switch(e, b, cases, l)} as s) :: rests -> + currentLoc := l; + s.skind <- Switch(e, scanBlock false b, cases, l); + s :: scanStmts mainbody rests + | ({skind=Block b} as s) :: rests -> + s.skind <- Block (scanBlock false b); + s :: scanStmts mainbody rests + | ({skind=(Goto _ | Instr _ | Continue _ | Break _ + | TryExcept _ | TryFinally _)} as s) + :: rests -> s :: scanStmts mainbody rests + + and scanBlock (mainbody: bool) (b: block) = + { bstmts = scanStmts mainbody b.bstmts; battrs = b.battrs; } + + in + ignore (visitCilBlock dummyVisitor f.sbody) ; (* sets CurrentLoc *) + lastloc := !currentLoc ; (* last location in the function *) + f.sbody <- scanBlock true f.sbody + + +let feature : featureDescr = + { fd_name = "oneRet"; + fd_enabled = Cilutil.doOneRet; + fd_description = "make each function have at most one 'return'" ; + fd_extraopt = []; + fd_doit = (function (f: file) -> + Cil.iterGlobals f (fun glob -> match glob with + Cil.GFun(fd,_) -> oneret fd; + | _ -> ())); + fd_post_check = true; + } diff --git a/cil/src/ext/oneret.mli b/cil/src/ext/oneret.mli new file mode 100644 index 00000000..f98ab4d1 --- /dev/null +++ b/cil/src/ext/oneret.mli @@ -0,0 +1,44 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + + +(* Make sure that there is only one Return statement in the whole body. + * Replace all the other returns with Goto. Make sure that there is a return + * if the function is supposed to return something, and it is not declared to + * not return. *) +val oneret: Cil.fundec -> unit +val feature : Cil.featureDescr diff --git a/cil/src/ext/partial.ml b/cil/src/ext/partial.ml new file mode 100644 index 00000000..4beca3fc --- /dev/null +++ b/cil/src/ext/partial.ml @@ -0,0 +1,851 @@ +(* See copyright notice at the end of the file *) +(***************************************************************************** + * Partial Evaluation & Constant Folding + * + * Soundness Assumptions: + * (1) Whole program analysis. You may call functions that are not defined + * (e.g., library functions) but they may not call back. + * (2) An undefined function may not return the address of a function whose + * address is not already taken in the code I can see. + * (3) A function pointer call may only call a function that has its + * address visibly taken in the code I can see. + * + * (More assumptions in the comments below) + *****************************************************************************) +open Cil +open Pretty + +(***************************************************************************** + * A generic signature for Alias Analysis information. Used to compute the + * call graph and do symbolic execution. + ****************************************************************************) +module type AliasInfo = + sig + val can_have_the_same_value : Cil.exp -> Cil.exp -> bool + val resolve_function_pointer : Cil.exp -> (Cil.fundec list) + end + +(***************************************************************************** + * A generic signature for Symbolic Execution execution algorithms. Such + * algorithms are used below to perform constant folding and dead-code + * elimination. You write a "basic-block" symex algorithm, we'll make it + * a whole-program CFG-pruner. + ****************************************************************************) +module type Symex = + sig + type t (* the type of a symex algorithm state object *) + val empty : t (* all values unknown *) + val equal : t -> t -> bool (* are these the same? *) + val assign : t -> Cil.lval -> Cil.exp -> (Cil.exp * t) + (* incorporate an assignment, return the RHS *) + val unassign : t -> Cil.lval -> t + (* lose all information about the given lvalue: assume an + * unknown external value has been assigned to it *) + val assembly : t -> Cil.instr -> t (* handle ASM *) + val assume : t -> Cil.exp -> t (* incorporate an assumption *) + val evaluate : t -> Cil.exp -> Cil.exp (* symbolic evaluation *) + val join : (t list) -> t (* join a bunch of states *) + val call : t -> Cil.fundec -> (Cil.exp list) -> (Cil.exp list * t) + (* we are calling the given function with the given actuals *) + val return : t -> Cil.fundec -> t + (* we are returning from the given function *) + val call_to_unknown_function : t -> t + (* throw away information that may have been changed *) + val debug : t -> unit + end + +(***************************************************************************** + * A generic signature for whole-progam call graphs. + ****************************************************************************) +module type CallGraph = + sig + type t (* the type of a call graph *) + val compute : Cil.file -> t (* file for which we compute the graph *) + val can_call : t -> Cil.fundec -> (Cil.fundec list) + val can_be_called_by : t -> Cil.fundec -> (Cil.fundec list) + val fundec_of_varinfo : t -> Cil.varinfo -> Cil.fundec + end + +(***************************************************************************** + * My cheap-o Alias Analysis. Assume all expressions can have the same + * value and any function with its address taken can be the target of + * any function pointer. + * + * Soundness Assumptions: + * (1) Someone must call "find_all_functions_With_address_taken" before the + * results are valid. This is already done in the code below. + ****************************************************************************) +let all_functions_with_address_taken = ref [] +let find_all_functions_with_address_taken (f : Cil.file) = + iterGlobals f (fun g -> match g with + GFun(fd,_) -> if fd.svar.vaddrof then + all_functions_with_address_taken := fd :: + !all_functions_with_address_taken + | _ -> ()) + +module EasyAlias = + struct + let can_have_the_same_value e1 e2 = true + let resolve_function_pointer e1 = !all_functions_with_address_taken + end + +(***************************************************************************** + * My particular method for computing the Call Graph. + ****************************************************************************) +module EasyCallGraph = functor (A : AliasInfo) -> + struct + type callGraphNode = { + fd : Cil.fundec ; + mutable calledBy : Cil.fundec list ; + mutable calls : Cil.fundec list ; + } + type t = (Cil.varinfo, callGraphNode) Hashtbl.t + + let cgCreateNode cg fundec = + let newnode = { fd = fundec ; calledBy = [] ; calls = [] } in + Hashtbl.add cg fundec.svar newnode + + let cgFindNode cg svar = Hashtbl.find cg svar + + let cgAddEdge cg caller callee = + try + let n1 = cgFindNode cg caller in + let n2 = cgFindNode cg callee in + n1.calls <- n2.fd :: n1.calls ; + n1.calledBy <- n1.fd :: n1.calledBy + with _ -> () + + class callGraphVisitor cg = object + inherit nopCilVisitor + val the_fun = ref None + + method vinst i = + let _ = match i with + Call(_,Lval(Var(callee),NoOffset),_,_) -> begin + (* known function call *) + match !the_fun with + None -> failwith "callGraphVisitor: call outside of any function" + | Some(enclosing) -> cgAddEdge cg enclosing callee + end + | Call(_,e,_,_) -> begin + (* unknown function call *) + match !the_fun with + None -> failwith "callGraphVisitor: call outside of any function" + | Some(enclosing) -> let lst = A.resolve_function_pointer e in + List.iter (fun possible_target_fd -> + cgAddEdge cg enclosing possible_target_fd.svar) lst + end + | _ -> () + in SkipChildren + + method vfunc f = the_fun := Some(f.svar) ; DoChildren + end + + let compute (f : Cil.file) = + let cg = Hashtbl.create 511 in + iterGlobals f (fun g -> match g with + GFun(fd,_) -> cgCreateNode cg fd + | _ -> () + ) ; + visitCilFileSameGlobals (new callGraphVisitor cg) f ; + cg + + let can_call cg fd = + let n = cgFindNode cg fd.svar in n.calls + let can_be_called_by cg fd = + let n = cgFindNode cg fd.svar in n.calledBy + let fundec_of_varinfo cg vi = + let n = cgFindNode cg vi in n.fd + end (* END OF: module EasyCallGraph *) + +(***************************************************************************** + * Necula's Constant Folding Strategem (re-written to be applicative) + * + * Soundness Assumptions: + * (1) Inline assembly does not affect constant folding. + ****************************************************************************) +module OrderedInt = + struct + type t = int + let compare = compare + end +module IntMap = Map.Make(OrderedInt) + +module NeculaFolding = functor (A : AliasInfo) -> + struct + (* Register file. Maps identifiers of local variables to expressions. + * We also remember if the expression depends on memory or depends on + * variables that depend on memory *) + type reg = { + rvi : varinfo ; + rval : exp ; + rmem : bool + } + type t = reg IntMap.t + let empty = IntMap.empty + let equal t1 t2 = (compare t1 t2 = 0) (* use OCAML here *) + let dependsOnMem = ref false + (* Rewrite an expression based on the current register file *) + class rewriteExpClass (regFile : t) = object + inherit nopCilVisitor + method vexpr = function + | Lval (Var v, NoOffset) -> begin + try + let defined = (IntMap.find v.vid regFile) in + if (defined.rmem) then dependsOnMem := true; + (match defined.rval with + | Const(x) -> ChangeTo (defined.rval) + | _ -> DoChildren) + with Not_found -> DoChildren + end + | Lval (Mem _, _) -> dependsOnMem := true; DoChildren + | _ -> DoChildren + end + (* Rewrite an expression and return the new expression along with an + * indication of whether it depends on memory *) + let rewriteExp r (e: exp) : exp * bool = + dependsOnMem := false; + let e' = constFold true (visitCilExpr (new rewriteExpClass r) e) in + e', !dependsOnMem + let eval r e = + let new_e, depends = rewriteExp r e in + new_e + + let setMemory regFile = + (* Get a list of all mappings that depend on memory *) + let depids = ref [] in + IntMap.iter (fun id v -> if v.rmem then depids := id :: !depids) regFile; + (* And remove them from the register file *) + List.fold_left (fun acc id -> IntMap.remove id acc) regFile !depids + + let setRegister regFile (v: varinfo) ((e,b): exp * bool) = + IntMap.add v.vid { rvi = v ; rval = e ; rmem = b; } regFile + + let resetRegister regFile (id: int) = + IntMap.remove id regFile + + class findLval lv contains = object + inherit nopCilVisitor + method vlval l = + if l = lv then + (contains := true ; SkipChildren) + else + DoChildren + end + + let removeMappingsThatDependOn regFile l = + (* Get a list of all mappings that depend on l *) + let depids = ref [] in + IntMap.iter (fun id reg -> + let found = ref false in + ignore (visitCilExpr (new findLval l found) reg.rval) ; + if !found then + depids := id :: !depids + ) regFile ; + (* And remove them from the register file *) + List.fold_left (fun acc id -> IntMap.remove id acc) regFile !depids + + let assign r l e = + let (newe,b) = rewriteExp r e in + let r' = match l with + (Var v, NoOffset) -> + let r'' = setRegister r v (newe,b) in + removeMappingsThatDependOn r'' l + | (Mem _, _) -> setMemory r + | _ -> r + in newe, r' + + let unassign r l = + let r' = match l with + (Var v, NoOffset) -> + let r'' = resetRegister r v.vid in + removeMappingsThatDependOn r'' l + | (Mem _, _) -> setMemory r + | _ -> r + in r' + + let assembly r i = r (* no-op in Necula-world *) + let assume r e = r (* no-op in Necula-world *) + + let evaluate r e = + let (newe,_) = rewriteExp r e in + newe + + (* Join two symex states *) + let join2 (r1 : t) (r2 : t) = + let keep = ref [] in + IntMap.iter (fun id reg -> + try + let reg' = IntMap.find id r2 in + if reg'.rval = reg.rval && reg'.rmem = reg.rmem then + keep := (id,reg) :: !keep + with _ -> () + ) r1 ; + List.fold_left (fun acc (id,v) -> + IntMap.add id v acc) (IntMap.empty) !keep + + let join (lst : t list) = match lst with + [] -> failwith "empty list" + | r :: tl -> List.fold_left + (fun (acc : t) (elt : t) -> join2 acc elt) r tl + + let call r fd el = + let new_arg_list = ref [] in + let final_r = List.fold_left2 (fun r vi e -> + let newe, r' = assign r ((Var(vi),NoOffset)) e in + new_arg_list := newe :: !new_arg_list ; + r' + ) r fd.sformals el in + (List.rev !new_arg_list), final_r + + let return r fd = + let regFile = + List.fold_left (fun r vi -> IntMap.remove vi.vid r) r fd.sformals + in + (* Get a list of all globals *) + let depids = ref [] in + IntMap.iter (fun vid reg -> + if reg.rvi.vglob || reg.rvi.vaddrof then depids := vid :: !depids + ) regFile ; + (* And remove them from the register file *) + List.fold_left (fun acc id -> IntMap.remove id acc) regFile !depids + + + let call_to_unknown_function r = + setMemory r + + let debug r = + IntMap.iter (fun key reg -> + ignore (Pretty.printf "%s <- %a (%b)@!" reg.rvi.vname d_exp reg.rval reg.rmem) + ) r + end (* END OF: NeculaFolding *) + +(***************************************************************************** + * A transformation to make every function call end its statement. So + * { x=1; Foo(); y=1; } + * becomes at least: + * { { x=1; Foo(); } + * { y=1; } } + * But probably more like: + * { { x=1; } { Foo(); } { y=1; } } + ****************************************************************************) +let rec contains_call il = match il with + [] -> false + | Call(_) :: tl -> true + | _ :: tl -> contains_call tl + +class callBBVisitor = object + inherit nopCilVisitor + + method vstmt s = + match s.skind with + Instr(il) when contains_call il -> begin + let list_of_stmts = List.map (fun one_inst -> + mkStmtOneInstr one_inst) il in + let block = mkBlock list_of_stmts in + ChangeDoChildrenPost(s, (fun _ -> + s.skind <- Block(block) ; + s)) + end + | _ -> DoChildren + + method vvdec _ = SkipChildren + method vexpr _ = SkipChildren + method vlval _ = SkipChildren + method vtype _ = SkipChildren +end + +let calls_end_basic_blocks f = + let thisVisitor = new callBBVisitor in + visitCilFileSameGlobals thisVisitor f + +(***************************************************************************** + * A transformation that gives each variable a unique identifier. + ****************************************************************************) +class vidVisitor = object + inherit nopCilVisitor + val count = ref 0 + + method vvdec vi = + vi.vid <- !count ; + incr count ; SkipChildren +end + +let globally_unique_vids f = + let thisVisitor = new vidVisitor in + visitCilFileSameGlobals thisVisitor f + +(***************************************************************************** + * The Weimeric Partial Evaluation Data-Flow Engine + * + * This functor performs flow-sensitive, context-insensitive whole-program + * data-flow analysis with an eye toward partial evaluation and constant + * folding. + * + * Toposort the whole-program inter-procedural CFG to compute + * (1) the number of actual predecessors for each statement + * (2) the global toposort ordering + * + * Perform standard data-flow analysis (joins, etc) on the ICFG until you + * hit a fixed point. If this changed the structure of the ICFG (by + * removing an IF-branch or an empty function call), redo the whole thing. + * + * Soundness Assumptions: + * (1) A "call instruction" is the last thing in its statement. + * Use "calls_end_basic_blocks" to get this. cil/src/main.ml does + * this when you pass --makeCFG. + * (2) All variables have globally unique identifiers. + * Use "globally_unique_vids" to get this. cil/src/main.ml does + * this when you pass --makeCFG. + * (3) This may not be a strict soundness requirement, but I wrote this + * assuming that the input file has all switch/break/continue + * statements removed. + ****************************************************************************) +module MakePartial = + functor (S : Symex) -> + functor (C : CallGraph) -> + functor (A : AliasInfo) -> + struct + + let debug = false + + (* We keep this information about every statement. Ideally this should + * be put in the stmt itself, but CIL doesn't give us space. *) + type sinfo = { (* statement info *) + incoming_state : (int, S.t) Hashtbl.t ; + (* mapping from stmt.sid to Symex.state *) + reachable_preds : (int, bool) Hashtbl.t ; + (* basically a set of all of the stmt.sids that can really + * reach this statement *) + mutable last_used_state : S.t option ; + (* When we last did the Post() of this statement, what + * incoming state did we use? If our new incoming state is + * the same, we don't have to do it again. *) + mutable priority : int ; + (* Whole-program toposort priority. High means "do me first". + * The first stmt in "main()" will have the highest priority. + *) + } + let sinfo_ht = Hashtbl.create 511 + let clear_sinfo () = Hashtbl.clear sinfo_ht + + (* We construct sinfo nodes lazily: if you ask for one that isn't + * there, we build it. *) + let get_sinfo stmt = + try + Hashtbl.find sinfo_ht stmt.sid + with _ -> + let new_sinfo = { incoming_state = Hashtbl.create 3 ; + reachable_preds = Hashtbl.create 3 ; + last_used_state = None ; + priority = (-1) ; } in + Hashtbl.add sinfo_ht stmt.sid new_sinfo ; + new_sinfo + + (* Topological Sort is a DFS in which you assign a priority right as + * you finished visiting the children. While we're there we compute + * the actual number of unique predecessors for each statement. The CIL + * information may be out of date because we keep changing the CFG by + * removing IFs and whatnot. *) + let toposort_counter = ref 1 + let add_edge s1 s2 = + let si2 = get_sinfo s2 in + Hashtbl.replace si2.reachable_preds s1.sid true + + let rec toposort c stmt = + let si = get_sinfo stmt in + if si.priority >= 0 then + () (* already visited! *) + else begin + si.priority <- 0 ; (* currently visiting *) + (* handle function calls in this basic block *) + (match stmt.skind with + (Instr(il)) -> + List.iter (fun i -> + let fd_list = match i with + Call(_,Lval(Var(vi),NoOffset),_,_) -> + begin + try + let fd = C.fundec_of_varinfo c vi in + [fd] + with e -> [] (* calling external function *) + end + | Call(_,e,_,_) -> + A.resolve_function_pointer e + | _ -> [] + in + List.iter (fun fd -> + if List.length fd.sbody.bstmts > 0 then + let fun_stmt = List.hd fd.sbody.bstmts in + add_edge stmt fun_stmt ; + toposort c fun_stmt + ) fd_list + ) il + | _ -> ()); + List.iter (fun succ -> + add_edge stmt succ ; toposort c succ) stmt.succs ; + si.priority <- !toposort_counter ; + incr toposort_counter + end + + (* we set this to true whenever we eliminate an IF or otherwise + * change the CFG *) + let changed_cfg = ref false + + (* Partially evaluate / constant fold a statement. Basically this just + * asks the Symex algorithm to evaluate the RHS in the current state + * and then compute a new state that incorporates the assignment. + * + * However, we have special handling for ifs and calls. If we can + * evaluate an if predicate to a constant, we remove the if. + * + * If we are going to make a call to a function with an empty body, we + * remove the function call. *) + let partial_stmt c state stmt handle_funcall = + let result = match stmt.skind with + Instr(il) -> + let state = ref state in + let new_il = List.map (fun i -> + if debug then begin + ignore (Pretty.printf "Instr %a@!" d_instr i ) + end ; + match i with + | Set(l,e,loc) -> + let e', state' = S.assign !state l e in + state := state' ; + [Set(l,e',loc)] + | Call(lo,(Lval(Var(vi),NoOffset)),al,loc) -> + let result = begin + try + let fd = C.fundec_of_varinfo c vi in + begin + match fd.sbody.bstmts with + [] -> [] (* no point in making this call *) + | hd :: tl -> + let al', state' = S.call !state fd al in + handle_funcall stmt hd state' ; + let state'' = S.return state' fd in + state := state'' ; + [Call(lo,(Lval(Var(vi),NoOffset)),al',loc)] + end + with e -> + let state'' = S.call_to_unknown_function !state in + let al' = List.map (S.evaluate !state) al in + state := state'' ; + [Call(lo,(Lval(Var(vi),NoOffset)),al',loc)] + end in + (* handle return value *) + begin + match lo with + Some(lv) -> state := S.unassign !state lv + | _ -> () + end ; + result + | Call(lo,f,al,loc) -> + let al' = List.map (S.evaluate !state) al in + state := S.call_to_unknown_function !state ; + (match lo with + Some(lv) -> state := S.unassign !state lv + | None -> ()) ; + [Call(lo,f,al',loc)] + | Asm(_) -> state := S.assembly !state i ; [i] + ) il in + stmt.skind <- Instr(List.flatten new_il) ; + if debug then begin + ignore (Pretty.printf "New Stmt is %a@!" d_stmt stmt) ; + end ; + !state + + | If(e,b1,b2,loc) -> + let e' = S.evaluate state e in + (* Pretty.printf "%a evals to %a\n" d_exp e d_exp e' ; *) + + (* helper function to remove an IF branch *) + let remove b remains = begin + changed_cfg := true ; + (match b.bstmts with + | [] -> () + | hd :: tl -> + stmt.succs <- List.filter (fun succ -> succ.sid <> hd.sid) + stmt.succs + ) + end in + + if (e' = one) then begin + if b2.bstmts = [] && b2.battrs = [] then begin + stmt.skind <- Block(b1) ; + match b1.bstmts with + [] -> failwith "partial: completely empty if" + | hd :: tl -> stmt.succs <- [hd] + end else + stmt.skind <- Block( + { bstmts = + [ mkStmt (Block(b1)) ; + mkStmt (If(zero,b2,{bstmts=[];battrs=[];},loc)) ] ; + battrs = [] } ) ; + remove b2 b1 ; + state + end else if (e' = zero) then begin + if b1.bstmts = [] && b1.battrs = [] then begin + stmt.skind <- Block(b2) ; + match b2.bstmts with + [] -> failwith "partial: completely empty if" + | hd :: tl -> stmt.succs <- [hd] + end else + stmt.skind <- Block( + { bstmts = + [ mkStmt (Block(b2)) ; + mkStmt (If(zero,b1,{bstmts=[];battrs=[];},loc)) ] ; + battrs = [] } ) ; + remove b1 b2 ; + state + end else begin + stmt.skind <- If(e',b1,b2,loc) ; + state + end + + | Return(Some(e),loc) -> + let e' = S.evaluate state e in + stmt.skind <- Return(Some(e'),loc) ; + state + + | Block(b) -> + if debug && List.length stmt.succs > 1 then begin + ignore (Pretty.printf "(%a) has successors [%a]@!" + d_stmt stmt + (docList ~sep:(chr '@') (d_stmt ())) + stmt.succs) + end ; + state + + | _ -> state + in result + + (* + * This is the main conceptual entry-point for the partial evaluation + * data-flow functor. + *) + let dataflow (file : Cil.file) (* whole program *) + (c : C.t) (* control-flow graph *) + (initial_state : S.t) (* any assumptions? *) + (initial_stmt : Cil.stmt) (* entry point *) + = begin + (* count the total number of statements in the program *) + let num_stmts = ref 1 in + iterGlobals file (fun g -> match g with + GFun(fd,_) -> begin + match fd.smaxstmtid with + Some(i) -> if i > !num_stmts then num_stmts := i + | None -> () + end + | _ -> () + ) ; + (if debug then + Printf.printf "Dataflow: at most %d statements in program\n" !num_stmts); + + (* create a priority queue in which to store statements *) + let worklist = Heap.create !num_stmts in + + let finished = ref false in + let passes = ref 0 in + + (* add something to the work queue *) + let enqueue caller callee state = begin + let si = get_sinfo callee in + Hashtbl.replace si.incoming_state caller.sid state ; + Heap.insert worklist si.priority callee + end in + + (* we will be finished when we complete a round of data-flow that + * does not change the ICFG *) + while not !finished do + clear_sinfo () ; + incr passes ; + + (* we must recompute the ordering and the predecessor information + * because we may have changed it by removing IFs *) + (if debug then Printf.printf "Dataflow: Topological Sorting & Reachability\n" ); + toposort c initial_stmt ; + + let initial_si = get_sinfo initial_stmt in + Heap.insert worklist initial_si.priority initial_stmt ; + + while not (Heap.is_empty worklist) do + let (p,s) = Heap.extract_max worklist in + if debug then begin + ignore (Pretty.printf "Working on stmt %d (%a) %a@!" + s.sid + (docList ~sep:(chr ',' ++ break) (fun s -> dprintf "%d" s.sid)) + s.succs + d_stmt s) ; + flush stdout ; + end ; + let si = get_sinfo s in + + (* Even though this stmt is on the worklist, we may not have + * to do anything with it if the join of all of the incoming + * states is the same as the last state we used here. *) + let must_recompute, incoming_state = + begin + let list_of_incoming_states = ref [] in + Hashtbl.iter (fun true_pred_sid b -> + let this_pred_state = + try + Hashtbl.find si.incoming_state true_pred_sid + with _ -> + (* this occurs when we're evaluating a statement and we + * have not yet evaluated all of its predecessors (the + * first time we look at a loop head, say). We must be + * conservative. We'll come back later with better + * information (as we work toward the fix-point). *) + S.empty + in + if debug then begin + Printf.printf " Incoming State from %d\n" true_pred_sid ; + S.debug this_pred_state ; + flush stdout ; + end ; + list_of_incoming_states := this_pred_state :: + !list_of_incoming_states + ) si.reachable_preds ; + let merged_incoming_state = + if !list_of_incoming_states = [] then + (* this occurs when we're looking at the first statement + * in "main" -- it has no preds *) + initial_state + else + S.join !list_of_incoming_states + in + if debug then begin + Printf.printf " Merged State:\n" ; + S.debug merged_incoming_state ; + flush stdout ; + end ; + let must_recompute = match si.last_used_state with + None -> true + | Some(last) -> not (S.equal merged_incoming_state last) + in must_recompute, merged_incoming_state + end + in + if must_recompute then begin + si.last_used_state <- Some(incoming_state) ; + let outgoing_state = + (* partially evaluate and optimize the statement *) + partial_stmt c incoming_state s enqueue in + let fresh_succs = s.succs in + (* touch every successor so that we will reconsider it *) + List.iter (fun succ -> + enqueue s succ outgoing_state + ) fresh_succs ; + end else begin + if debug then begin + Printf.printf "No need to recompute.\n" + end + end + done ; + (if debug then Printf.printf "Dataflow: Pass %d Complete\n" !passes) ; + if !changed_cfg then begin + (if debug then Printf.printf "Dataflow: Restarting (CFG Changed)\n") ; + changed_cfg := false + end else + finished := true + done ; + (if debug then Printf.printf "Dataflow: Completed (%d passes)\n" !passes) + + end + + let simplify file c fd (assumptions : (Cil.lval * Cil.exp) list) = + let starting_state = List.fold_left (fun s (l,e) -> + let e',s' = S.assign s l e in + s' + ) S.empty assumptions in + dataflow file c starting_state (List.hd fd.sbody.bstmts) + + end + + +(* + * Currently our partial-eval optimizer is built out of basically nothing. + * The alias analysis is fake, the call grpah is cheap, and we're using + * George's old basic-block symex. Still, it works. + *) +(* Don't you love Functor application? *) +module BasicCallGraph = EasyCallGraph(EasyAlias) +module BasicSymex = NeculaFolding(EasyAlias) +module BasicPartial = MakePartial(BasicSymex)(BasicCallGraph)(EasyAlias) + +(* + * A very easy entry-point to partial evaluation/symbolic execution. + * You pass the Cil file and a list of assumptions (lvalue, exp pairs that + * should be treated as assignments that occur before the program starts). + * + * We partially evaluate and optimize starting from "main". The Cil.file + * is modified in place. + *) +let partial (f : Cil.file) (assumptions : (Cil.lval * Cil.exp) list) = + try + find_all_functions_with_address_taken f ; + let c = BasicCallGraph.compute f in + try + iterGlobals f (fun g -> match g with + GFun(fd,_) when fd.svar.vname = "main" -> + BasicPartial.simplify f c fd assumptions + | _ -> ()) ; + with e -> begin + Printf.printf "Error in DataFlow: %s\n" (Printexc.to_string e) ; + raise e + end + with e -> begin + Printf.printf "Error in Partial: %s\n" (Printexc.to_string e) ; + raise e + end + +let feature : featureDescr = + { fd_name = "partial"; + fd_enabled = Cilutil.doPartial; + fd_description = "interprocedural partial evaluation and constant folding" ; + fd_extraopt = []; + fd_doit = (function (f: file) -> + if not !Cilutil.makeCFG then begin + Errormsg.s (Errormsg.error "--dopartial: you must also specify --domakeCFG\n") + end ; + partial f [] ) ; + fd_post_check = false; + } + +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) diff --git a/cil/src/ext/pta/golf.ml b/cil/src/ext/pta/golf.ml new file mode 100644 index 00000000..5ea47ff1 --- /dev/null +++ b/cil/src/ext/pta/golf.ml @@ -0,0 +1,1657 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(***********************************************************************) +(* *) +(* Exceptions *) +(* *) +(***********************************************************************) + +exception Inconsistent (* raised if constraint system is inconsistent *) +exception WellFormed (* raised if types are not well-formed *) +exception NoContents +exception APFound (* raised if an alias pair is found, a control + flow exception *) + + +module U = Uref +module S = Setp +module H = Hashtbl +module Q = Queue + + +(** Subtyping kinds *) +type polarity = + Pos + | Neg + | Sub + +(** Path kinds, for CFL reachability *) +type pkind = + Positive + | Negative + | Match + | Seed + +(** Context kinds -- open or closed *) +type context = + Open + | Closed + +(* A configuration is a context (open or closed) coupled with a pair + of stamps representing a state in the cartesian product DFA. *) +type configuration = context * int * int + +module ConfigHash = +struct + type t = configuration + let equal t t' = t = t' + let hash t = Hashtbl.hash t +end + +module CH = H.Make (ConfigHash) + +type config_map = unit CH.t + +(** Generic bounds *) +type 'a bound = {index : int; info : 'a U.uref} + +(** For label paths. *) +type 'a path = { + kind : pkind; + reached_global : bool; + head : 'a U.uref; + tail : 'a U.uref +} + +module Bound = +struct + type 'a t = 'a bound + let compare (x : 'a t) (y : 'a t) = + if U.equal (x.info, y.info) then x.index - y.index + else Pervasives.compare (U.deref x.info) (U.deref y.info) +end + +module Path = +struct + type 'a t = 'a path + let compare (x : 'a t) (y : 'a t) = + if U.equal (x.head, y.head) then + begin + if U.equal (x.tail, y.tail) then + begin + if x.reached_global = y.reached_global then + Pervasives.compare x.kind y.kind + else Pervasives.compare x.reached_global y.reached_global + end + else Pervasives.compare (U.deref x.tail) (U.deref y.tail) + end + else Pervasives.compare (U.deref x.head) (U.deref y.head) +end + +module B = S.Make (Bound) + +module P = S.Make (Path) + +type 'a boundset = 'a B.t + +type 'a pathset = 'a P.t + +(** Constants, which identify elements in points-to sets *) +(** jk : I'd prefer to make this an 'a constant and specialize it to varinfo + for use with the Cil frontend, but for now, this will do *) +type constant = int * string * Cil.varinfo + +module Constant = +struct + type t = constant + let compare (xid, _, _) (yid, _, _) = xid - yid +end +module C = Set.Make (Constant) + +(** Sets of constants. Set union is used when two labels containing + constant sets are unified *) +type constantset = C.t + +type lblinfo = { + mutable l_name: string; + (** either empty or a singleton, the initial location for this label *) + loc : constantset; + (** Name of this label *) + l_stamp : int; + (** Unique integer for this label *) + mutable l_global : bool; + (** True if this location is globally accessible *) + mutable aliases: constantset; + (** Set of constants (tags) for checking aliases *) + mutable p_lbounds: lblinfo boundset; + (** Set of umatched (p) lower bounds *) + mutable n_lbounds: lblinfo boundset; + (** Set of unmatched (n) lower bounds *) + mutable p_ubounds: lblinfo boundset; + (** Set of umatched (p) upper bounds *) + mutable n_ubounds: lblinfo boundset; + (** Set of unmatched (n) upper bounds *) + mutable m_lbounds: lblinfo boundset; + (** Set of matched (m) lower bounds *) + mutable m_ubounds: lblinfo boundset; + (** Set of matched (m) upper bounds *) + + mutable m_upath: lblinfo pathset; + mutable m_lpath: lblinfo pathset; + mutable n_upath: lblinfo pathset; + mutable n_lpath: lblinfo pathset; + mutable p_upath: lblinfo pathset; + mutable p_lpath: lblinfo pathset; + + mutable l_seeded : bool; + mutable l_ret : bool; + mutable l_param : bool; +} + +(** Constructor labels *) +and label = lblinfo U.uref + +(** The type of lvalues. *) +type lvalue = { + l: label; + contents: tau +} + +and vinfo = { + v_stamp : int; + v_name : string; + + mutable v_hole : (int,unit) H.t; + mutable v_global : bool; + mutable v_mlbs : tinfo boundset; + mutable v_mubs : tinfo boundset; + mutable v_plbs : tinfo boundset; + mutable v_pubs : tinfo boundset; + mutable v_nlbs : tinfo boundset; + mutable v_nubs : tinfo boundset +} + +and rinfo = { + r_stamp : int; + rl : label; + points_to : tau; + mutable r_global: bool; +} + +and finfo = { + f_stamp : int; + fl : label; + ret : tau; + mutable args : tau list; + mutable f_global : bool; +} + +and pinfo = { + p_stamp : int; + ptr : tau; + lam : tau; + mutable p_global : bool; +} + +and tinfo = Var of vinfo + | Ref of rinfo + | Fun of finfo + | Pair of pinfo + +and tau = tinfo U.uref + +type tconstraint = Unification of tau * tau + | Leq of tau * (int * polarity) * tau + + +(** Association lists, used for printing recursive types. The first element + is a type that has been visited. The second element is the string + representation of that type (so far). If the string option is set, then + this type occurs within itself, and is associated with the recursive var + name stored in the option. When walking a type, add it to an association + list. + + Example : suppose we have the constraint 'a = ref('a). The type is unified + via cyclic unification, and would loop infinitely if we attempted to print + it. What we want to do is print the type u rv. ref(rv). This is accomplished + in the following manner: + + -- ref('a) is visited. It is not in the association list, so it is added + and the string "ref(" is stored in the second element. We recurse to print + the first argument of the constructor. + + -- In the recursive call, we see that 'a (or ref('a)) is already in the + association list, so the type is recursive. We check the string option, + which is None, meaning that this is the first recurrence of the type. We + create a new recursive variable, rv and set the string option to 'rv. Next, + we prepend u rv. to the string representation we have seen before, "ref(", + and return "rv" as the string representation of this type. + + -- The string so far is "u rv.ref(". The recursive call returns, and we + complete the type by printing the result of the call, "rv", and ")" + + In a type where the recursive variable appears twice, e.g. 'a = pair('a,'a), + the second time we hit 'a, the string option will be set, so we know to + reuse the same recursive variable name. +*) +type association = tau * string ref * string option ref + +module PathHash = +struct + type t = int list + let equal t t' = t = t' + let hash t = Hashtbl.hash t +end + +module PH = H.Make (PathHash) + +(***********************************************************************) +(* *) +(* Global Variables *) +(* *) +(***********************************************************************) + +(** Print the instantiations constraints. *) +let print_constraints : bool ref = ref false + +(** If true, print all constraints (including induced) and show + additional debug output. *) +let debug = ref false + +(** Just debug all the constraints (including induced) *) +let debug_constraints = ref false + +(** Debug smart alias queries *) +let debug_aliases = ref false + +let smart_aliases = ref false + +(** If true, make the flow step a no-op *) +let no_flow = ref false + +(** If true, disable subtyping (unification at all levels) *) +let no_sub = ref false + +(** If true, treat indexed edges as regular subtyping *) +let analyze_mono = ref true + +(** A list of equality constraints. *) +let eq_worklist : tconstraint Q.t = Q.create () + +(** A list of leq constraints. *) +let leq_worklist : tconstraint Q.t = Q.create () + +let path_worklist : (lblinfo path) Q.t = Q.create () + +let path_hash : (lblinfo path) PH.t = PH.create 32 + +(** A count of the constraints introduced from the AST. Used for debugging. *) +let toplev_count = ref 0 + +(** A hashtable containing stamp pairs of labels that must be aliased. *) +let cached_aliases : (int * int,unit) H.t = H.create 64 + +(** A hashtable mapping pairs of tau's to their join node. *) +let join_cache : (int * int, tau) H.t = H.create 64 + +(***********************************************************************) +(* *) +(* Utility Functions *) +(* *) +(***********************************************************************) + +let find = U.deref + +let die s = + Printf.printf "*******\nAssertion failed: %s\n*******\n" s; + assert false + +let fresh_appsite : (unit -> int) = + let appsite_index = ref 0 in + fun () -> + incr appsite_index; + !appsite_index + +(** Generate a unique integer. *) +let fresh_index : (unit -> int) = + let counter = ref 0 in + fun () -> + incr counter; + !counter + +let fresh_stamp : (unit -> int) = + let stamp = ref 0 in + fun () -> + incr stamp; + !stamp + +(** Return a unique integer representation of a tau *) +let get_stamp (t : tau) : int = + match find t with + Var v -> v.v_stamp + | Ref r -> r.r_stamp + | Pair p -> p.p_stamp + | Fun f -> f.f_stamp + +(** Negate a polarity. *) +let negate (p : polarity) : polarity = + match p with + Pos -> Neg + | Neg -> Pos + | Sub -> die "negate" + +(** Consistency checks for inferred types *) +let pair_or_var (t : tau) = + match find t with + Pair _ -> true + | Var _ -> true + | _ -> false + +let ref_or_var (t : tau) = + match find t with + Ref _ -> true + | Var _ -> true + | _ -> false + +let fun_or_var (t : tau) = + match find t with + Fun _ -> true + | Var _ -> true + | _ -> false + + + +(** Apply [f] structurally down [t]. Guaranteed to terminate, even if [t] + is recursive *) +let iter_tau f t = + let visited : (int,tau) H.t = H.create 4 in + let rec iter_tau' t = + if H.mem visited (get_stamp t) then () else + begin + f t; + H.add visited (get_stamp t) t; + match U.deref t with + Pair p -> + iter_tau' p.ptr; + iter_tau' p.lam + | Fun f -> + List.iter iter_tau' (f.args); + iter_tau' f.ret + | Ref r -> iter_tau' r.points_to + | _ -> () + end + in + iter_tau' t + +(* Extract a label's bounds according to [positive] and [upper]. *) +let get_bounds (p :polarity ) (upper : bool) (l : label) : lblinfo boundset = + let li = find l in + match p with + Pos -> if upper then li.p_ubounds else li.p_lbounds + | Neg -> if upper then li.n_ubounds else li.n_lbounds + | Sub -> if upper then li.m_ubounds else li.m_lbounds + +let equal_tau (t : tau) (t' : tau) = + get_stamp t = get_stamp t' + +let get_label_stamp (l : label) : int = + (find l).l_stamp + +(** Return true if [t] is global (treated monomorphically) *) +let get_global (t : tau) : bool = + match find t with + Var v -> v.v_global + | Ref r -> r.r_global + | Pair p -> p.p_global + | Fun f -> f.f_global + +let is_ret_label l = (find l).l_ret || (find l).l_global (* todo - check *) + +let is_param_label l = (find l).l_param || (find l).l_global + +let is_global_label l = (find l).l_global + +let is_seeded_label l = (find l).l_seeded + +let set_global_label (l : label) (b : bool) : unit = + assert ((not (is_global_label l)) || b); + (U.deref l).l_global <- b + +(** Aliases for set_global *) +let global_tau = get_global + + +(** Get_global for lvalues *) +let global_lvalue lv = get_global lv.contents + + + +(***********************************************************************) +(* *) +(* Printing Functions *) +(* *) +(***********************************************************************) + +let string_of_configuration (c, i, i') = + let context = match c with + Open -> "O" + | Closed -> "C" + in + Printf.sprintf "(%s,%d,%d)" context i i' + +let string_of_polarity p = + match p with + Pos -> "+" + | Neg -> "-" + | Sub -> "M" + +(** Convert a label to a string, short representation *) +let string_of_label (l : label) : string = + "\"" ^ (find l).l_name ^ "\"" + +(** Return true if the element [e] is present in the association list, + according to uref equality *) +let rec assoc_list_mem (e : tau) (l : association list) = + match l with + | [] -> None + | (h, s, so) :: t -> + if U.equal (h,e) then Some (s, so) else assoc_list_mem e t + +(** Given a tau, create a unique recursive variable name. This should always + return the same name for a given tau *) +let fresh_recvar_name (t : tau) : string = + match find t with + Pair p -> "rvp" ^ string_of_int p.p_stamp + | Ref r -> "rvr" ^ string_of_int r.r_stamp + | Fun f -> "rvf" ^ string_of_int f.f_stamp + | _ -> die "fresh_recvar_name" + + +(** Return a string representation of a tau, using association lists. *) +let string_of_tau (t : tau) : string = + let tau_map : association list ref = ref [] in + let rec string_of_tau' t = + match assoc_list_mem t !tau_map with + Some (s, so) -> (* recursive type. see if a var name has been set *) + begin + match !so with + None -> + let rv = fresh_recvar_name t in + s := "u " ^ rv ^ "." ^ !s; + so := Some rv; + rv + | Some rv -> rv + end + | None -> (* type's not recursive. Add it to the assoc list and cont. *) + let s = ref "" + and so : string option ref = ref None in + tau_map := (t, s, so) :: !tau_map; + begin + match find t with + Var v -> s := v.v_name; + | Pair p -> + assert (ref_or_var p.ptr); + assert (fun_or_var p.lam); + s := "{"; + s := !s ^ string_of_tau' p.ptr; + s := !s ^ ","; + s := !s ^ string_of_tau' p.lam; + s := !s ^"}" + | Ref r -> + assert (pair_or_var r.points_to); + s := "ref(|"; + s := !s ^ string_of_label r.rl; + s := !s ^ "|,"; + s := !s ^ string_of_tau' r.points_to; + s := !s ^ ")" + | Fun f -> + assert (pair_or_var f.ret); + let rec string_of_args = function + h :: [] -> + assert (pair_or_var h); + s := !s ^ string_of_tau' h + | h :: t -> + assert (pair_or_var h); + s := !s ^ string_of_tau' h ^ ","; + string_of_args t + | [] -> () + in + s := "fun(|"; + s := !s ^ string_of_label f.fl; + s := !s ^ "|,"; + s := !s ^ "<"; + if List.length f.args > 0 then string_of_args f.args + else s := !s ^ "void"; + s := !s ^">,"; + s := !s ^ string_of_tau' f.ret; + s := !s ^ ")" + end; + tau_map := List.tl !tau_map; + !s + in + string_of_tau' t + +(** Convert an lvalue to a string *) +let rec string_of_lvalue (lv : lvalue) : string = + let contents = string_of_tau lv.contents + and l = string_of_label lv.l in + assert (pair_or_var lv.contents); (* do a consistency check *) + Printf.sprintf "[%s]^(%s)" contents l + +let print_path (p : lblinfo path) : unit = + let string_of_pkind = function + Positive -> "p" + | Negative -> "n" + | Match -> "m" + | Seed -> "s" + in + Printf.printf + "%s --%s--> %s (%d) : " + (string_of_label p.head) + (string_of_pkind p.kind) + (string_of_label p.tail) + (PathHash.hash p) + +(** Print a list of tau elements, comma separated *) +let rec print_tau_list (l : tau list) : unit = + let rec print_t_strings = function + h :: [] -> print_endline h + | h :: t -> + print_string h; + print_string ", "; + print_t_strings t + | [] -> () + in + print_t_strings (List.map string_of_tau l) + +let print_constraint (c : tconstraint) = + match c with + Unification (t, t') -> + let lhs = string_of_tau t + and rhs = string_of_tau t' in + Printf.printf "%s == %s\n" lhs rhs + | Leq (t, (i, p), t') -> + let lhs = string_of_tau t + and rhs = string_of_tau t' in + Printf.printf "%s <={%d,%s} %s\n" lhs i (string_of_polarity p) rhs + +(***********************************************************************) +(* *) +(* Type Operations -- these do not create any constraints *) +(* *) +(***********************************************************************) + +(** Create an lvalue with label [lbl] and tau contents [t]. *) +let make_lval (lbl, t : label * tau) : lvalue = + {l = lbl; contents = t} + +let make_label_int (is_global : bool) (name :string) (vio : Cil.varinfo option) : label = + let locc = + match vio with + Some vi -> C.add (fresh_index (), name, vi) C.empty + | None -> C.empty + in + U.uref { + l_name = name; + l_global = is_global; + l_stamp = fresh_stamp (); + loc = locc; + aliases = locc; + p_ubounds = B.empty; + p_lbounds = B.empty; + n_ubounds = B.empty; + n_lbounds = B.empty; + m_ubounds = B.empty; + m_lbounds = B.empty; + m_upath = P.empty; + m_lpath = P.empty; + n_upath = P.empty; + n_lpath = P.empty; + p_upath = P.empty; + p_lpath = P.empty; + l_seeded = false; + l_ret = false; + l_param = false + } + +(** Create a new label with name [name]. Also adds a fresh constant + with name [name] to this label's aliases set. *) +let make_label (is_global : bool) (name : string) (vio : Cil.varinfo option) : label = + make_label_int is_global name vio + +(** Create a new label with an unspecified name and an empty alias set. *) +let fresh_label (is_global : bool) : label = + let index = fresh_index () in + make_label_int is_global ("l_" ^ string_of_int index) None + +(** Create a fresh bound (edge in the constraint graph). *) +let make_bound (i, a : int * label) : lblinfo bound = + {index = i; info = a} + +let make_tau_bound (i, a : int * tau) : tinfo bound = + {index = i; info = a} + +(** Create a fresh named variable with name '[name]. *) +let make_var (b: bool) (name : string) : tau = + U.uref (Var {v_name = ("'" ^ name); + v_hole = H.create 8; + v_stamp = fresh_index (); + v_global = b; + v_mlbs = B.empty; + v_mubs = B.empty; + v_plbs = B.empty; + v_pubs = B.empty; + v_nlbs = B.empty; + v_nubs = B.empty}) + +(** Create a fresh unnamed variable (name will be 'fv). *) +let fresh_var (is_global : bool) : tau = + make_var is_global ("fv" ^ string_of_int (fresh_index ())) + +(** Create a fresh unnamed variable (name will be 'fi). *) +let fresh_var_i (is_global : bool) : tau = + make_var is_global ("fi" ^ string_of_int (fresh_index())) + +(** Create a Fun constructor. *) +let make_fun (lbl, a, r : label * (tau list) * tau) : tau = + U.uref (Fun {fl = lbl; + f_stamp = fresh_index (); + f_global = false; + args = a; + ret = r }) + +(** Create a Ref constructor. *) +let make_ref (lbl,pt : label * tau) : tau = + U.uref (Ref {rl = lbl; + r_stamp = fresh_index (); + r_global = false; + points_to = pt}) + +(** Create a Pair constructor. *) +let make_pair (p,f : tau * tau) : tau = + U.uref (Pair {ptr = p; + p_stamp = fresh_index (); + p_global = false; + lam = f}) + +(** Copy the toplevel constructor of [t], putting fresh variables in each + argement of the constructor. *) +let copy_toplevel (t : tau) : tau = + match find t with + Pair _ -> make_pair (fresh_var_i false, fresh_var_i false) + | Ref _ -> make_ref (fresh_label false, fresh_var_i false) + | Fun f -> + let fresh_fn = fun _ -> fresh_var_i false in + make_fun (fresh_label false, + List.map fresh_fn f.args, fresh_var_i false) + | _ -> die "copy_toplevel" + + +let has_same_structure (t : tau) (t' : tau) = + match find t, find t' with + Pair _, Pair _ -> true + | Ref _, Ref _ -> true + | Fun _, Fun _ -> true + | Var _, Var _ -> true + | _ -> false + + +let pad_args (f, f' : finfo * finfo) : unit = + let padding = ref ((List.length f.args) - (List.length f'.args)) + in + if !padding == 0 then () + else + let to_pad = + if !padding > 0 then f' else (padding := -(!padding); f) + in + for i = 1 to !padding do + to_pad.args <- to_pad.args @ [fresh_var false] + done + + +let pad_args2 (fi, tlr : finfo * tau list ref) : unit = + let padding = ref (List.length fi.args - List.length !tlr) + in + if !padding == 0 then () + else + if !padding > 0 then + for i = 1 to !padding do + tlr := !tlr @ [fresh_var false] + done + else + begin + padding := -(!padding); + for i = 1 to !padding do + fi.args <- fi.args @ [fresh_var false] + done + end + +(***********************************************************************) +(* *) +(* Constraint Generation/ Resolution *) +(* *) +(***********************************************************************) + + +(** Make the type a global type *) +let set_global (t : tau) (b : bool) : unit = + let set_global_down t = + match find t with + Var v -> v.v_global <- true + | Ref r -> set_global_label r.rl true + | Fun f -> set_global_label f.fl true + | _ -> () + in + if !debug && b then Printf.printf "Set global: %s\n" (string_of_tau t); + assert ((not (get_global t)) || b); + if b then iter_tau set_global_down t; + match find t with + Var v -> v.v_global <- b + | Ref r -> r.r_global <- b + | Pair p -> p.p_global <- b + | Fun f -> f.f_global <- b + + +let rec unify_int (t, t' : tau * tau) : unit = + if equal_tau t t' then () + else + let ti, ti' = find t, find t' in + U.unify combine (t, t'); + match ti, ti' with + Var v, Var v' -> + set_global t' (v.v_global || get_global t'); + merge_vholes (v, v'); + merge_vlbs (v, v'); + merge_vubs (v, v') + | Var v, _ -> + set_global t' (v.v_global || get_global t'); + trigger_vhole v t'; + notify_vlbs t v; + notify_vubs t v + | _, Var v -> + set_global t (v.v_global || get_global t); + trigger_vhole v t; + notify_vlbs t' v; + notify_vubs t' v + | Ref r, Ref r' -> + set_global t (r.r_global || r'.r_global); + unify_ref (r, r') + | Fun f, Fun f' -> + set_global t (f.f_global || f'.f_global); + unify_fun (f, f') + | Pair p, Pair p' -> () + | _ -> raise Inconsistent +and notify_vlbs (t : tau) (vi : vinfo) : unit = + let notify p bounds = + List.iter + (fun b -> + add_constraint (Unification (b.info,copy_toplevel t)); + add_constraint (Leq (b.info, (b.index, p), t))) + bounds + in + notify Sub (B.elements vi.v_mlbs); + notify Pos (B.elements vi.v_plbs); + notify Neg (B.elements vi.v_nlbs) +and notify_vubs (t : tau) (vi : vinfo) : unit = + let notify p bounds = + List.iter + (fun b -> + add_constraint (Unification (b.info,copy_toplevel t)); + add_constraint (Leq (t, (b.index, p), b.info))) + bounds + in + notify Sub (B.elements vi.v_mubs); + notify Pos (B.elements vi.v_pubs); + notify Neg (B.elements vi.v_nubs) +and unify_ref (ri,ri' : rinfo * rinfo) : unit = + add_constraint (Unification (ri.points_to, ri'.points_to)) +and unify_fun (fi, fi' : finfo * finfo) : unit = + let rec union_args = function + _, [] -> false + | [], _ -> true + | h :: t, h' :: t' -> + add_constraint (Unification (h, h')); + union_args(t, t') + in + unify_label(fi.fl, fi'.fl); + add_constraint (Unification (fi.ret, fi'.ret)); + if union_args (fi.args, fi'.args) then fi.args <- fi'.args; +and unify_label (l, l' : label * label) : unit = + let pick_name (li, li' : lblinfo * lblinfo) = + if String.length li.l_name > 1 && String.sub (li.l_name) 0 2 = "l_" then + li.l_name <- li'.l_name + else () + in + let combine_label (li, li' : lblinfo *lblinfo) : lblinfo = + let rm_self b = not (li.l_stamp = get_label_stamp b.info) + in + pick_name (li, li'); + li.l_global <- li.l_global || li'.l_global; + li.aliases <- C.union li.aliases li'.aliases; + li.p_ubounds <- B.union li.p_ubounds li'.p_ubounds; + li.p_lbounds <- B.union li.p_lbounds li'.p_lbounds; + li.n_ubounds <- B.union li.n_ubounds li'.n_ubounds; + li.n_lbounds <- B.union li.n_lbounds li'.n_lbounds; + li.m_ubounds <- B.union li.m_ubounds (B.filter rm_self li'.m_ubounds); + li.m_lbounds <- B.union li.m_lbounds (B.filter rm_self li'.m_lbounds); + li.m_upath <- P.union li.m_upath li'.m_upath; + li.m_lpath<- P.union li.m_lpath li'.m_lpath; + li.n_upath <- P.union li.n_upath li'.n_upath; + li.n_lpath <- P.union li.n_lpath li'.n_lpath; + li.p_upath <- P.union li.p_upath li'.p_upath; + li.p_lpath <- P.union li.p_lpath li'.p_lpath; + li.l_seeded <- li.l_seeded || li'.l_seeded; + li.l_ret <- li.l_ret || li'.l_ret; + li.l_param <- li.l_param || li'.l_param; + li + in + if !debug_constraints then + Printf.printf "%s == %s\n" (string_of_label l) (string_of_label l'); + U.unify combine_label (l, l') +and merge_vholes (vi, vi' : vinfo * vinfo) : unit = + H.iter + (fun i -> fun _ -> H.replace vi'.v_hole i ()) + vi.v_hole +and merge_vlbs (vi, vi' : vinfo * vinfo) : unit = + vi'.v_mlbs <- B.union vi.v_mlbs vi'.v_mlbs; + vi'.v_plbs <- B.union vi.v_plbs vi'.v_plbs; + vi'.v_nlbs <- B.union vi.v_nlbs vi'.v_nlbs +and merge_vubs (vi, vi' : vinfo * vinfo) : unit = + vi'.v_mubs <- B.union vi.v_mubs vi'.v_mubs; + vi'.v_pubs <- B.union vi.v_pubs vi'.v_pubs; + vi'.v_nubs <- B.union vi.v_nubs vi'.v_nubs +and trigger_vhole (vi : vinfo) (t : tau) = + let add_self_loops (t : tau) : unit = + match find t with + Var v -> + H.iter + (fun i -> fun _ -> H.replace v.v_hole i ()) + vi.v_hole + | Ref r -> + H.iter + (fun i -> fun _ -> + leq_label (r.rl, (i, Pos), r.rl); + leq_label (r.rl, (i, Neg), r.rl)) + vi.v_hole + | Fun f -> + H.iter + (fun i -> fun _ -> + leq_label (f.fl, (i, Pos), f.fl); + leq_label (f.fl, (i, Neg), f.fl)) + vi.v_hole + | _ -> () + in + iter_tau add_self_loops t +(** Pick the representative info for two tinfo's. This function prefers the + first argument when both arguments are the same structure, but when + one type is a structure and the other is a var, it picks the structure. + All other actions (e.g., updating the info) is done in unify_int *) +and combine (ti, ti' : tinfo * tinfo) : tinfo = + match ti, ti' with + Var _, _ -> ti' + | _, _ -> ti +and leq_int (t, (i, p), t') : unit = + if equal_tau t t' then () + else + let ti, ti' = find t, find t' in + match ti, ti' with + Var v, Var v' -> + begin + match p with + Pos -> + v.v_pubs <- B.add (make_tau_bound (i, t')) v.v_pubs; + v'.v_plbs <- B.add (make_tau_bound (i, t)) v'.v_plbs + | Neg -> + v.v_nubs <- B.add (make_tau_bound (i, t')) v.v_nubs; + v'.v_nlbs <- B.add (make_tau_bound (i, t)) v'.v_nlbs + | Sub -> + v.v_mubs <- B.add (make_tau_bound (i, t')) v.v_mubs; + v'.v_mlbs <- B.add (make_tau_bound (i, t)) v'.v_mlbs + end + | Var v, _ -> + add_constraint (Unification (t, copy_toplevel t')); + add_constraint (Leq (t, (i, p), t')) + | _, Var v -> + add_constraint (Unification (t', copy_toplevel t)); + add_constraint (Leq (t, (i, p), t')) + | Ref r, Ref r' -> leq_ref (r, (i, p), r') + | Fun f, Fun f' -> add_constraint (Unification (t, t')) + | Pair pr, Pair pr' -> + add_constraint (Leq (pr.ptr, (i, p), pr'.ptr)); + add_constraint (Leq (pr.lam, (i, p), pr'.lam)) + | _ -> raise Inconsistent +and leq_ref (ri, (i, p), ri') : unit = + let add_self_loops (t : tau) : unit = + match find t with + Var v -> H.replace v.v_hole i () + | Ref r -> + leq_label (r.rl, (i, Pos), r.rl); + leq_label (r.rl, (i, Neg), r.rl) + | Fun f -> + leq_label (f.fl, (i, Pos), f.fl); + leq_label (f.fl, (i, Neg), f.fl) + | _ -> () + in + iter_tau add_self_loops ri.points_to; + add_constraint (Unification (ri.points_to, ri'.points_to)); + leq_label(ri.rl, (i, p), ri'.rl) +and leq_label (l,(i, p), l') : unit = + if !debug_constraints then + Printf.printf + "%s <={%d,%s} %s\n" + (string_of_label l) i (string_of_polarity p) (string_of_label l'); + let li, li' = find l, find l' in + match p with + Pos -> + li.l_ret <- true; + li.p_ubounds <- B.add (make_bound (i, l')) li.p_ubounds; + li'.p_lbounds <- B.add (make_bound (i, l)) li'.p_lbounds + | Neg -> + li'.l_param <- true; + li.n_ubounds <- B.add (make_bound (i, l')) li.n_ubounds; + li'.n_lbounds <- B.add (make_bound (i, l)) li'.n_lbounds + | Sub -> + if U.equal (l, l') then () + else + begin + li.m_ubounds <- B.add (make_bound(0, l')) li.m_ubounds; + li'.m_lbounds <- B.add (make_bound(0, l)) li'.m_lbounds + end +and add_constraint_int (c : tconstraint) (toplev : bool) = + if !debug_constraints && toplev then + begin + Printf.printf "%d:>" !toplev_count; + print_constraint c; + incr toplev_count + end + else + if !debug_constraints then print_constraint c else (); + begin + match c with + Unification _ -> Q.add c eq_worklist + | Leq _ -> Q.add c leq_worklist + end; + solve_constraints () +and add_constraint (c : tconstraint) = + add_constraint_int c false +and add_toplev_constraint (c : tconstraint) = + if !print_constraints && not !debug_constraints then + begin + Printf.printf "%d:>" !toplev_count; + incr toplev_count; + print_constraint c + end + else (); + add_constraint_int c true +and fetch_constraint () : tconstraint option = + try Some (Q.take eq_worklist) + with Q.Empty -> (try Some (Q.take leq_worklist) + with Q.Empty -> None) +(** The main solver loop. *) +and solve_constraints () : unit = + match fetch_constraint () with + Some c -> + begin + match c with + Unification (t, t') -> unify_int (t, t') + | Leq (t, (i, p), t') -> + if !no_sub then unify_int (t, t') + else + if !analyze_mono then leq_int (t, (0, Sub), t') + else leq_int (t, (i, p), t') + end; + solve_constraints () + | None -> () + + +(***********************************************************************) +(* *) +(* Interface Functions *) +(* *) +(***********************************************************************) + +(** Return the contents of the lvalue. *) +let rvalue (lv : lvalue) : tau = + lv.contents + +(** Dereference the rvalue. If it does not have enough structure to support + the operation, then the correct structure is added via new unification + constraints. *) +let rec deref (t : tau) : lvalue = + match U.deref t with + Pair p -> + begin + match U.deref p.ptr with + Var _ -> + let is_global = global_tau p.ptr in + let points_to = fresh_var is_global in + let l = fresh_label is_global in + let r = make_ref (l, points_to) + in + add_toplev_constraint (Unification (p.ptr, r)); + make_lval (l, points_to) + | Ref r -> make_lval (r.rl, r.points_to) + | _ -> raise WellFormed + end + | Var v -> + let is_global = global_tau t in + add_toplev_constraint + (Unification (t, make_pair (fresh_var is_global, + fresh_var is_global))); + deref t + | _ -> raise WellFormed + +(** Form the union of [t] and [t'], if it doesn't exist already. *) +let join (t : tau) (t' : tau) : tau = + try H.find join_cache (get_stamp t, get_stamp t') + with Not_found -> + let t'' = fresh_var false in + add_toplev_constraint (Leq (t, (0, Sub), t'')); + add_toplev_constraint (Leq (t', (0, Sub), t'')); + H.add join_cache (get_stamp t, get_stamp t') t''; + t'' + +(** Form the union of a list [tl], expected to be the initializers of some + structure or array type. *) +let join_inits (tl : tau list) : tau = + let t' = fresh_var false in + List.iter + (fun t -> add_toplev_constraint (Leq (t, (0, Sub), t'))) + tl; + t' + +(** Take the address of an lvalue. Does not add constraints. *) +let address (lv : lvalue) : tau = + make_pair (make_ref (lv.l, lv.contents), fresh_var false) + +(** For this version of golf, instantiation is handled at [apply] *) +let instantiate (lv : lvalue) (i : int) : lvalue = + lv + +(** Constraint generated from assigning [t] to [lv]. *) +let assign (lv : lvalue) (t : tau) : unit = + add_toplev_constraint (Leq (t, (0, Sub), lv.contents)) + +let assign_ret (i : int) (lv : lvalue) (t : tau) : unit = + add_toplev_constraint (Leq (t, (i, Pos), lv.contents)) + +(** Project out the first (ref) component or a pair. If the argument [t] has + no discovered structure, raise NoContents. *) +let proj_ref (t : tau) : tau = + match U.deref t with + Pair p -> p.ptr + | Var v -> raise NoContents + | _ -> raise WellFormed + +(* Project out the second (fun) component of a pair. If the argument [t] has + no discovered structure, create it on the fly by adding constraints. *) +let proj_fun (t : tau) : tau = + match U.deref t with + Pair p -> p.lam + | Var v -> + let p, f = fresh_var false, fresh_var false in + add_toplev_constraint (Unification (t, make_pair(p, f))); + f + | _ -> raise WellFormed + +let get_args (t : tau) : tau list = + match U.deref t with + Fun f -> f.args + | _ -> raise WellFormed + +let get_finfo (t : tau) : finfo = + match U.deref t with + Fun f -> f + | _ -> raise WellFormed + +(** Function type [t] is applied to the arguments [actuals]. Unifies the + actuals with the formals of [t]. If no functions have been discovered for + [t] yet, create a fresh one and unify it with t. The result is the return + value of the function plus the index of this application site. *) +let apply (t : tau) (al : tau list) : (tau * int) = + let i = fresh_appsite () in + let f = proj_fun t in + let actuals = ref al in + let fi,ret = + match U.deref f with + Fun fi -> fi, fi.ret + | Var v -> + let new_l, new_ret, new_args = + fresh_label false, fresh_var false, + List.map (function _ -> fresh_var false) !actuals + in + let new_fun = make_fun (new_l, new_args, new_ret) in + add_toplev_constraint (Unification (new_fun, f)); + (get_finfo new_fun, new_ret) + | _ -> raise WellFormed + in + pad_args2 (fi, actuals); + List.iter2 + (fun actual -> fun formal -> + add_toplev_constraint (Leq (actual,(i, Neg), formal))) + !actuals fi.args; + (ret, i) + +(** Create a new function type with name [name], list of formal arguments + [formals], and return value [ret]. Adds no constraints. *) +let make_function (name : string) (formals : lvalue list) (ret : tau) : tau = + let f = make_fun (make_label false name None, + List.map (fun x -> rvalue x) formals, + ret) + in + make_pair (fresh_var false, f) + +(** Create an lvalue. If [is_global] is true, the lvalue will be treated + monomorphically. *) +let make_lvalue (is_global : bool) (name : string) (vio : Cil.varinfo option) : lvalue = + if !debug && is_global then + Printf.printf "Making global lvalue : %s\n" name + else (); + make_lval (make_label is_global name vio, make_var is_global name) + +(** Create a fresh non-global named variable. *) +let make_fresh (name : string) : tau = + make_var false name + +(** The default type for constants. *) +let bottom () : tau = + make_var false "bottom" + +(** Unify the result of a function with its return value. *) +let return (t : tau) (t' : tau) = + add_toplev_constraint (Leq (t', (0, Sub), t)) + +(***********************************************************************) +(* *) +(* Query/Extract Solutions *) +(* *) +(***********************************************************************) + +let make_summary = leq_label + +let path_signature k l l' b : int list = + let ksig = + match k with + Positive -> 1 + | Negative -> 2 + | _ -> 3 + in + [ksig; + get_label_stamp l; + get_label_stamp l'; + if b then 1 else 0] + +let make_path (k, l, l', b) = + let psig = path_signature k l l' b in + if PH.mem path_hash psig then () + else + let new_path = {kind = k; head = l; tail = l'; reached_global = b} + and li, li' = find l, find l' in + PH.add path_hash psig new_path; + Q.add new_path path_worklist; + begin + match k with + Positive -> + li.p_upath <- P.add new_path li.p_upath; + li'.p_lpath <- P.add new_path li'.p_lpath + | Negative -> + li.n_upath <- P.add new_path li.n_upath; + li'.n_lpath <- P.add new_path li'.n_lpath + | _ -> + li.m_upath <- P.add new_path li.m_upath; + li'.m_lpath <- P.add new_path li'.m_lpath + end; + if !debug then + begin + print_string "Discovered path : "; + print_path new_path; + print_newline () + end + +let backwards_tabulate (l : label) : unit = + let rec loop () = + let rule1 p = + if !debug then print_endline "rule1"; + B.iter + (fun lb -> + make_path (Match, lb.info, p.tail, + p.reached_global || is_global_label p.head)) + (find p.head).m_lbounds + and rule2 p = + if !debug then print_endline "rule2"; + B.iter + (fun lb -> + make_path (Negative, lb.info, p.tail, + p.reached_global || is_global_label p.head)) + (find p.head).n_lbounds + and rule2m p = + if !debug then print_endline "rule2m"; + B.iter + (fun lb -> + make_path (Match, lb.info, p.tail, + p.reached_global || is_global_label p.head)) + (find p.head).n_lbounds + and rule3 p = + if !debug then print_endline "rule3"; + B.iter + (fun lb -> + make_path (Positive, lb.info, p.tail, + p.reached_global || is_global_label p.head)) + (find p.head).p_lbounds + and rule4 p = + if !debug then print_endline "rule4"; + B.iter + (fun lb -> + make_path(Negative, lb.info, p.tail, + p.reached_global || is_global_label p.head)) + (find p.head).m_lbounds + and rule5 p = + if !debug then print_endline "rule5"; + B.iter + (fun lb -> + make_path (Positive, lb.info, p.tail, + p.reached_global || is_global_label p.head)) + (find p.head).m_lbounds + and rule6 p = + if !debug then print_endline "rule6"; + B.iter + (fun lb -> + if is_seeded_label lb.info then () + else + begin + (find lb.info).l_seeded <- true; (* set seeded *) + make_path (Seed, lb.info, lb.info, + is_global_label lb.info) + end) + (find p.head).p_lbounds + and rule7 p = + if !debug then print_endline "rule7"; + if not (is_ret_label p.tail && is_param_label p.head) then () + else + B.iter + (fun lb -> + B.iter + (fun ub -> + if lb.index = ub.index then + begin + if !debug then + Printf.printf "New summary : %s %s\n" + (string_of_label lb.info) + (string_of_label ub.info); + make_summary (lb.info, (0, Sub), ub.info); + (* rules 1, 4, and 5 *) + P.iter + (fun ubp -> (* rule 1 *) + make_path (Match, lb.info, ubp.tail, + ubp.reached_global)) + (find ub.info).m_upath; + P.iter + (fun ubp -> (* rule 4 *) + make_path (Negative, lb.info, ubp.tail, + ubp.reached_global)) + (find ub.info).n_upath; + P.iter + (fun ubp -> (* rule 5 *) + make_path (Positive, lb.info, ubp.tail, + ubp.reached_global)) + (find ub.info).p_upath + end) + (find p.tail).p_ubounds) + (find p.head).n_lbounds + in + let matched_backward_rules p = + rule1 p; + if p.reached_global then rule2m p else rule2 p; + rule3 p; + rule6 p; + rule7 p + and negative_backward_rules p = + rule2 p; + rule3 p; + rule4 p; + rule6 p; + rule7 p + and positive_backward_rules p = + rule3 p; + rule5 p; + rule6 p; + rule7 p + in (* loop *) + if Q.is_empty path_worklist then () + else + let p = Q.take path_worklist in + if !debug then + begin + print_string "Processing path: "; + print_path p; + print_newline () + end; + begin + match p.kind with + Positive -> + if is_global_label p.tail then matched_backward_rules p + else positive_backward_rules p + | Negative -> negative_backward_rules p + | _ -> matched_backward_rules p + end; + loop () + in (* backwards_tabulate *) + if !debug then + begin + Printf.printf "Tabulating for %s..." (string_of_label l); + if is_global_label l then print_string "(global)"; + print_newline () + end; + make_path (Seed, l, l, is_global_label l); + loop () + +let collect_ptsets (l : label) : constantset = (* todo -- cache aliases *) + let li = find l + and collect init s = + P.fold (fun x a -> C.union a (find x.head).aliases) s init + in + backwards_tabulate l; + collect (collect (collect li.aliases li.m_lpath) li.n_lpath) li.p_lpath + +let extract_ptlabel (lv : lvalue) : label option = + try + match find (proj_ref lv.contents) with + Var v -> None + | Ref r -> Some r.rl; + | _ -> raise WellFormed + with NoContents -> None + +let points_to_aux (t : tau) : constant list = + try + match find (proj_ref t) with + Var v -> [] + | Ref r -> C.elements (collect_ptsets r.rl) + | _ -> raise WellFormed + with NoContents -> [] + +let points_to_names (lv : lvalue) : string list = + List.map (fun (_, str, _) -> str) (points_to_aux lv.contents) + +let points_to (lv : lvalue) : Cil.varinfo list = + let rec get_vinfos l : Cil.varinfo list = match l with + | (_, _, h) :: t -> h :: get_vinfos t + | [] -> [] + in + get_vinfos (points_to_aux lv.contents) + +let epoints_to (t : tau) : Cil.varinfo list = + let rec get_vinfos l : Cil.varinfo list = match l with + | (_, _, h) :: t -> h :: get_vinfos t + | [] -> [] + in + get_vinfos (points_to_aux t) + +let smart_alias_query (l : label) (l' : label) : bool = + (* Set of dead configurations *) + let dead_configs : config_map = CH.create 16 in + (* the set of discovered configurations *) + let discovered : config_map = CH.create 16 in + let rec filter_match (i : int) = + B.filter (fun (b : lblinfo bound) -> i = b.index) + in + let rec simulate c l l' = + let config = (c, get_label_stamp l, get_label_stamp l') in + if U.equal (l, l') then + begin + if !debug then + Printf.printf + "%s and %s are aliased\n" + (string_of_label l) + (string_of_label l'); + raise APFound + end + else if CH.mem discovered config then () + else + begin + if !debug_aliases then + Printf.printf + "Exploring configuration %s\n" + (string_of_configuration config); + CH.add discovered config (); + B.iter + (fun lb -> simulate c lb.info l') + (get_bounds Sub false l); (* epsilon closure of l *) + B.iter + (fun lb -> simulate c l lb.info) + (get_bounds Sub false l'); (* epsilon closure of l' *) + B.iter + (fun lb -> + let matching = + filter_match lb.index (get_bounds Pos false l') + in + B.iter + (fun b -> simulate Closed lb.info b.info) + matching; + if is_global_label l' then (* positive self-loops on l' *) + simulate Closed lb.info l') + (get_bounds Pos false l); (* positive transitions on l *) + if is_global_label l then + B.iter + (fun lb -> simulate Closed l lb.info) + (get_bounds Pos false l'); (* positive self-loops on l *) + begin + match c with (* negative transitions on l, only if Open *) + Open -> + B.iter + (fun lb -> + let matching = + filter_match lb.index (get_bounds Neg false l') + in + B.iter + (fun b -> simulate Open lb.info b.info) + matching ; + if is_global_label l' then (* neg self-loops on l' *) + simulate Open lb.info l') + (get_bounds Neg false l); + if is_global_label l then + B.iter + (fun lb -> simulate Open l lb.info) + (get_bounds Neg false l') (* negative self-loops on l *) + | _ -> () + end; + (* if we got this far, then the configuration was not used *) + CH.add dead_configs config (); + end + in + try + begin + if H.mem cached_aliases (get_label_stamp l, get_label_stamp l') then + true + else + begin + simulate Open l l'; + if !debug then + Printf.printf + "%s and %s are NOT aliased\n" + (string_of_label l) + (string_of_label l'); + false + end + end + with APFound -> + CH.iter + (fun config -> fun _ -> + if not (CH.mem dead_configs config) then + H.add + cached_aliases + (get_label_stamp l, get_label_stamp l') + ()) + discovered; + true + +(** todo : uses naive alias query for now *) +let may_alias (t1 : tau) (t2 : tau) : bool = + try + let l1 = + match find (proj_ref t1) with + Ref r -> r.rl + | Var v -> raise NoContents + | _ -> raise WellFormed + and l2 = + match find (proj_ref t2) with + Ref r -> r.rl + | Var v -> raise NoContents + | _ -> raise WellFormed + in + not (C.is_empty (C.inter (collect_ptsets l1) (collect_ptsets l2))) + with NoContents -> false + +let alias_query (b : bool) (lvl : lvalue list) : int * int = + let naive_count = ref 0 in + let smart_count = ref 0 in + let lbls = List.map extract_ptlabel lvl in (* label option list *) + let ptsets = + List.map + (function + Some l -> collect_ptsets l + | None -> C.empty) + lbls in + let record_alias s lo s' lo' = + match lo, lo' with + Some l, Some l' -> + if !debug_aliases then + Printf.printf + "Checking whether %s and %s are aliased...\n" + (string_of_label l) + (string_of_label l'); + if C.is_empty (C.inter s s') then () + else + begin + incr naive_count; + if !smart_aliases && smart_alias_query l l' then + incr smart_count + end + | _ -> () + in + let rec check_alias sets labels = + match sets,labels with + s :: st, l :: lt -> + List.iter2 (record_alias s l) ptsets lbls; + check_alias st lt + | [], [] -> () + | _ -> die "check_alias" + in + check_alias ptsets lbls; + (!naive_count, !smart_count) + +let alias_frequency (lvl : (lvalue * bool) list) : int * int = + let extract_lbl (lv, b : lvalue * bool) = (lv.l, b) in + let naive_count = ref 0 in + let smart_count = ref 0 in + let lbls = List.map extract_lbl lvl in + let ptsets = + List.map + (fun (lbl, b) -> + if b then (find lbl).loc (* symbol access *) + else collect_ptsets lbl) + lbls in + let record_alias s (l, b) s' (l', b') = + if !debug_aliases then + Printf.printf + "Checking whether %s and %s are aliased...\n" + (string_of_label l) + (string_of_label l'); + if C.is_empty (C.inter s s') then () + else + begin + if !debug_aliases then + Printf.printf + "%s and %s are aliased naively...\n" + (string_of_label l) + (string_of_label l'); + incr naive_count; + if !smart_aliases then + if b || b' || smart_alias_query l l' then incr smart_count + else + Printf.printf + "%s and %s are not aliased by smart queries...\n" + (string_of_label l) + (string_of_label l'); + end + in + let rec check_alias sets labels = + match sets, labels with + s :: st, l :: lt -> + List.iter2 (record_alias s l) ptsets lbls; + check_alias st lt + | [], [] -> () + | _ -> die "check_alias" + in + check_alias ptsets lbls; + (!naive_count, !smart_count) + + +(** an interface for extracting abstract locations from this analysis *) + +type absloc = label + +let absloc_of_lvalue (l : lvalue) : absloc = l.l +let absloc_eq (a1, a2) = smart_alias_query a1 a2 +let absloc_print_name = ref true +let d_absloc () (p : absloc) = + let a = find p in + if !absloc_print_name then Pretty.dprintf "%s" a.l_name + else Pretty.dprintf "%d" a.l_stamp + +let phonyAddrOf (lv : lvalue) : lvalue = + make_lval (fresh_label true, address lv) + +(* transitive closure of points to, starting from l *) +let rec tauPointsTo (l : tau) : absloc list = + match find l with + Var _ -> [] + | Ref r -> r.rl :: tauPointsTo r.points_to + | _ -> [] + +let rec absloc_points_to (l : lvalue) : absloc list = + tauPointsTo l.contents + + +(** The following definitions are only introduced for the + compatability with Olf. *) + +exception UnknownLocation + +let finished_constraints () = () +let apply_undefined (_ : tau list) = (fresh_var true, 0) +let assign_undefined (_ : lvalue) = () + +let absloc_epoints_to = tauPointsTo diff --git a/cil/src/ext/pta/golf.mli b/cil/src/ext/pta/golf.mli new file mode 100644 index 00000000..569855c5 --- /dev/null +++ b/cil/src/ext/pta/golf.mli @@ -0,0 +1,83 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +type lvalue +type tau +type absloc + +(* only for compatability with Olf *) +exception UnknownLocation + +val debug : bool ref +val debug_constraints : bool ref +val debug_aliases : bool ref +val smart_aliases : bool ref +val finished_constraints : unit -> unit (* only for compatability with Olf *) +val print_constraints : bool ref +val no_flow : bool ref +val no_sub : bool ref +val analyze_mono : bool ref +val solve_constraints : unit -> unit +val rvalue : lvalue -> tau +val deref : tau -> lvalue +val join : tau -> tau -> tau +val join_inits : tau list -> tau +val address : lvalue -> tau +val instantiate : lvalue -> int -> lvalue +val assign : lvalue -> tau -> unit +val assign_ret : int -> lvalue -> tau -> unit +val apply : tau -> tau list -> (tau * int) +val apply_undefined : tau list -> (tau * int) (* only for compatability with Olf *) +val assign_undefined : lvalue -> unit (* only for compatability with Olf *) +val make_function : string -> lvalue list -> tau -> tau +val make_lvalue : bool -> string -> (Cil.varinfo option) -> lvalue +val bottom : unit -> tau +val return : tau -> tau -> unit +val make_fresh : string -> tau +val points_to_names : lvalue -> string list +val points_to : lvalue -> Cil.varinfo list +val epoints_to : tau -> Cil.varinfo list +val string_of_lvalue : lvalue -> string +val global_lvalue : lvalue -> bool +val alias_query : bool -> lvalue list -> int * int +val alias_frequency : (lvalue * bool) list -> int * int + +val may_alias : tau -> tau -> bool + +val absloc_points_to : lvalue -> absloc list +val absloc_epoints_to : tau -> absloc list +val absloc_of_lvalue : lvalue -> absloc +val absloc_eq : (absloc * absloc) -> bool +val d_absloc : unit -> absloc -> Pretty.doc +val phonyAddrOf : lvalue -> lvalue diff --git a/cil/src/ext/pta/olf.ml b/cil/src/ext/pta/olf.ml new file mode 100644 index 00000000..0d770028 --- /dev/null +++ b/cil/src/ext/pta/olf.ml @@ -0,0 +1,1108 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(***********************************************************************) +(* *) +(* Exceptions *) +(* *) +(***********************************************************************) + +exception Inconsistent (* raised if constraint system is inconsistent *) +exception WellFormed (* raised if types are not well-formed *) +exception NoContents +exception APFound (* raised if an alias pair is found, a control + flow exception *) +exception ReachedTop (* raised if top (from an undefined function) + flows to a c_absloc during the flow step *) +exception UnknownLocation + +let solve_constraints () = () (* only for compatability with Golf *) + +open Cil + +module U = Uref +module S = Setp +module H = Hashtbl +module Q = Queue + +(** Generic bounds *) +type 'a bound = {info : 'a U.uref} + +module Bound = +struct + type 'a t = 'a bound + let compare (x : 'a t) (y : 'a t) = + Pervasives.compare (U.deref x.info) (U.deref y.info) +end + +module B = S.Make (Bound) + +type 'a boundset = 'a B.t + +(** Abslocs, which identify elements in points-to sets *) +(** jk : I'd prefer to make this an 'a absloc and specialize it to + varinfo for use with the Cil frontend, but for now, this will do *) +type absloc = int * string * Cil.varinfo option + +module Absloc = +struct + type t = absloc + let compare (xid, _, _) (yid, _, _) = xid - yid +end + +module C = Set.Make (Absloc) + +(** Sets of abslocs. Set union is used when two c_abslocs containing + absloc sets are unified *) +type abslocset = C.t + +let d_absloc () (a: absloc) : Pretty.doc = + let i,s,_ = a in + Pretty.dprintf "<%d, %s>" i s + +type c_abslocinfo = { + mutable l_name: string; (** name of the location *) + loc : absloc; + l_stamp : int; + mutable l_top : bool; + mutable aliases : abslocset; + mutable lbounds : c_abslocinfo boundset; + mutable ubounds : c_abslocinfo boundset; + mutable flow_computed : bool +} +and c_absloc = c_abslocinfo U.uref + +(** The type of lvalues. *) +type lvalue = { + l: c_absloc; + contents: tau +} +and vinfo = { + v_stamp : int; + v_name : string; + mutable v_top : bool; + mutable v_lbounds : tinfo boundset; + mutable v_ubounds : tinfo boundset +} +and rinfo = { + r_stamp : int; + rl : c_absloc; + points_to : tau +} +and finfo = { + f_stamp : int; + fl : c_absloc; + ret : tau; + mutable args : tau list +} +and pinfo = { + p_stamp : int; + ptr : tau; + lam : tau +} +and tinfo = + Var of vinfo + | Ref of rinfo + | Fun of finfo + | Pair of pinfo +and tau = tinfo U.uref + +type tconstraint = + Unification of tau * tau + | Leq of tau * tau + +(** Association lists, used for printing recursive types. The first + element is a type that has been visited. The second element is the + string representation of that type (so far). If the string option is + set, then this type occurs within itself, and is associated with the + recursive var name stored in the option. When walking a type, add it + to an association list. + + Example: suppose we have the constraint 'a = ref('a). The type is + unified via cyclic unification, and would loop infinitely if we + attempted to print it. What we want to do is print the type u + rv. ref(rv). This is accomplished in the following manner: + + -- ref('a) is visited. It is not in the association list, so it is + added and the string "ref(" is stored in the second element. We + recurse to print the first argument of the constructor. + + -- In the recursive call, we see that 'a (or ref('a)) is already + in the association list, so the type is recursive. We check the + string option, which is None, meaning that this is the first + recurrence of the type. We create a new recursive variable, rv and + set the string option to 'rv. Next, we prepend u rv. to the string + representation we have seen before, "ref(", and return "rv" as the + string representation of this type. + + -- The string so far is "u rv.ref(". The recursive call returns, + and we complete the type by printing the result of the call, "rv", + and ")" + + In a type where the recursive variable appears twice, e.g. 'a = + pair('a,'a), the second time we hit 'a, the string option will be + set, so we know to reuse the same recursive variable name. +*) +type association = tau * string ref * string option ref + +(** The current state of the solver engine either adding more + constraints, or finished adding constraints and querying graph *) +type state = + AddingConstraints + | FinishedConstraints + +(***********************************************************************) +(* *) +(* Global Variables *) +(* *) +(***********************************************************************) + +(** A count of the constraints introduced from the AST. Used for + debugging. *) +let toplev_count = ref 0 + +let solver_state : state ref = ref AddingConstraints + +(** Print the instantiations constraints. *) +let print_constraints : bool ref = ref false + +(** If true, print all constraints (including induced) and show + additional debug output. *) +let debug = ref false + +(** Just debug all the constraints (including induced) *) +let debug_constraints = ref false + +(** Debug the flow step *) +let debug_flow_step = ref false + +(** Compatibility with GOLF *) +let debug_aliases = ref false +let smart_aliases = ref false +let no_flow = ref false +let analyze_mono = ref false + +(** If true, disable subtyping (unification at all levels) *) +let no_sub = ref false + +(** A list of equality constraints. *) +let eq_worklist : tconstraint Q.t = Q.create () + +(** A list of leq constraints. *) +let leq_worklist : tconstraint Q.t = Q.create () + +(** A hashtable containing stamp pairs of c_abslocs that must be aliased. *) +let cached_aliases : (int * int, unit) H.t = H.create 64 + +(** A hashtable mapping pairs of tau's to their join node. *) +let join_cache : (int * int, tau) H.t = H.create 64 + +(** *) +let label_prefix = "l_" + + +(***********************************************************************) +(* *) +(* Utility Functions *) +(* *) +(***********************************************************************) + +let starts_with s p = + let n = String.length p in + if String.length s < n then false + else String.sub s 0 n = p + + +let die s = + Printf.printf "*******\nAssertion failed: %s\n*******\n" s; + assert false + +let insist b s = + if not b then die s else () + + +let can_add_constraints () = + !solver_state = AddingConstraints + +let can_query_graph () = + !solver_state = FinishedConstraints + +let finished_constraints () = + insist (!solver_state = AddingConstraints) "inconsistent states"; + solver_state := FinishedConstraints + +let find = U.deref + +(** return the prefix of the list up to and including the first + element satisfying p. if no element satisfies p, return the empty + list *) +let rec keep_until p l = + match l with + [] -> [] + | x :: xs -> if p x then [x] else x :: keep_until p xs + + +(** Generate a unique integer. *) +let fresh_index : (unit -> int) = + let counter = ref 0 in + fun () -> + incr counter; + !counter + +let fresh_stamp : (unit -> int) = + let stamp = ref 0 in + fun () -> + incr stamp; + !stamp + +(** Return a unique integer representation of a tau *) +let get_stamp (t : tau) : int = + match find t with + Var v -> v.v_stamp + | Ref r -> r.r_stamp + | Pair p -> p.p_stamp + | Fun f -> f.f_stamp + +(** Consistency checks for inferred types *) +let pair_or_var (t : tau) = + match find t with + Pair _ -> true + | Var _ -> true + | _ -> false + +let ref_or_var (t : tau) = + match find t with + Ref _ -> true + | Var _ -> true + | _ -> false + +let fun_or_var (t : tau) = + match find t with + Fun _ -> true + | Var _ -> true + | _ -> false + + +(** Apply [f] structurally down [t]. Guaranteed to terminate, even if [t] + is recursive *) +let iter_tau f t = + let visited : (int, tau) H.t = H.create 4 in + let rec iter_tau' t = + if H.mem visited (get_stamp t) then () else + begin + f t; + H.add visited (get_stamp t) t; + match find t with + Pair p -> + iter_tau' p.ptr; + iter_tau' p.lam + | Fun f -> + List.iter iter_tau' f.args; + iter_tau' f.ret; + | Ref r -> iter_tau' r.points_to + | _ -> () + end + in + iter_tau' t + +let equal_absloc = function + (i, _, _), (i', _, _) -> i = i' + +let equal_c_absloc l l' = + (find l).l_stamp = (find l').l_stamp + +let equal_tau (t : tau) (t' : tau) = + get_stamp t = get_stamp t' + +let top_c_absloc l = + (find l).l_top + +let get_flow_computed l = + (find l).flow_computed + +let set_flow_computed l = + (find l).flow_computed <- true + +let rec top_tau (t : tau) = + match find t with + Pair p -> top_tau p.ptr || top_tau p.lam + | Ref r -> top_c_absloc r.rl + | Fun f -> top_c_absloc f.fl + | Var v -> v.v_top + +let get_c_absloc_stamp (l : c_absloc) : int = + (find l).l_stamp + +let set_top_c_absloc (l : c_absloc) (b: bool) : unit = + (find l).l_top <- b + +let get_aliases (l : c_absloc) = + if top_c_absloc l then raise ReachedTop + else (find l).aliases + +(***********************************************************************) +(* *) +(* Printing Functions *) +(* *) +(***********************************************************************) + +(** Convert a c_absloc to a string, short representation *) +let string_of_c_absloc (l : c_absloc) : string = + "\"" ^ + (find l).l_name ^ + if top_c_absloc l then "(top)" else "" ^ + "\"" + +(** Return true if the element [e] is present in the association list, + according to uref equality *) +let rec assoc_list_mem (e : tau) (l : association list) = + match l with + [] -> None + | (h, s, so) :: t -> + if U.equal (h, e) then Some (s, so) + else assoc_list_mem e t + +(** Given a tau, create a unique recursive variable name. This should + always return the same name for a given tau *) +let fresh_recvar_name (t : tau) : string = + match find t with + Pair p -> "rvp" ^ string_of_int p.p_stamp + | Ref r -> "rvr" ^ string_of_int r.r_stamp + | Fun f -> "rvf" ^ string_of_int f.f_stamp + | _ -> die "fresh_recvar_name" + + +(** Return a string representation of a tau, using association lists. *) +let string_of_tau (t : tau) : string = + let tau_map : association list ref = ref [] in + let rec string_of_tau' t = + match assoc_list_mem t !tau_map with + Some (s, so) -> (* recursive type. see if a var name has been set *) + begin + match !so with + None -> + let rv = fresh_recvar_name t in + s := "u " ^ rv ^ "." ^ !s; + so := Some rv; + rv + | Some rv -> rv + end + | None -> (* type's not recursive. Add it to the assoc list and cont. *) + let s = ref "" + and so : string option ref = ref None in + tau_map := (t, s, so) :: !tau_map; + begin + match find t with + Var v -> s := v.v_name + | Pair p -> + insist (ref_or_var p.ptr) "wellformed"; + insist (fun_or_var p.lam) "wellformed"; + s := "{"; + s := !s ^ string_of_tau' p.ptr; + s := !s ^ ","; + s := !s ^ string_of_tau' p.lam; + s := !s ^ "}" + | Ref r -> + insist (pair_or_var r.points_to) "wellformed"; + s := "ref(|"; + s := !s ^ string_of_c_absloc r.rl; + s := !s ^ "|,"; + s := !s ^ string_of_tau' r.points_to; + s := !s ^ ")" + | Fun f -> + let rec string_of_args = function + [] -> () + | h :: [] -> + insist (pair_or_var h) "wellformed"; + s := !s ^ string_of_tau' h + | h :: t -> + insist (pair_or_var h) "wellformed"; + s := !s ^ string_of_tau' h ^ ","; + string_of_args t + in + insist (pair_or_var f.ret) "wellformed"; + s := "fun(|"; + s := !s ^ string_of_c_absloc f.fl; + s := !s ^ "|,"; + s := !s ^ "<"; + if List.length f.args > 0 then string_of_args f.args + else s := !s ^ "void"; + s := !s ^ ">,"; + s := !s ^ string_of_tau' f.ret; + s := !s ^ ")" + end; + tau_map := List.tl !tau_map; + !s + in + string_of_tau' t + +(** Convert an lvalue to a string *) +let rec string_of_lvalue (lv : lvalue) : string = + let contents = string_of_tau lv.contents + and l = string_of_c_absloc lv.l + in + insist (pair_or_var lv.contents) "inconsistency at string_of_lvalue"; + (* do a consistency check *) + Printf.sprintf "[%s]^(%s)" contents l + +(** Print a list of tau elements, comma separated *) +let rec print_tau_list (l : tau list) : unit = + let rec print_t_strings = function + [] -> () + | h :: [] -> print_endline h + | h :: t -> + print_string h; + print_string ", "; + print_t_strings t + in + print_t_strings (List.map string_of_tau l) + +let print_constraint (c : tconstraint) = + match c with + Unification (t, t') -> + let lhs = string_of_tau t in + let rhs = string_of_tau t' in + Printf.printf "%s == %s\n" lhs rhs + | Leq (t, t') -> + let lhs = string_of_tau t in + let rhs = string_of_tau t' in + Printf.printf "%s <= %s\n" lhs rhs + +(***********************************************************************) +(* *) +(* Type Operations -- these do not create any constraints *) +(* *) +(***********************************************************************) + +(** Create an lvalue with c_absloc [lbl] and tau contents [t]. *) +let make_lval (loc, t : c_absloc * tau) : lvalue = + {l = loc; contents = t} + +let make_c_absloc_int (is_top : bool) (name : string) (vio : Cil.varinfo option) : c_absloc = + let my_absloc = (fresh_index (), name, vio) in + let locc = C.add my_absloc C.empty + in + U.uref { + l_name = name; + l_top = is_top; + l_stamp = fresh_stamp (); + loc = my_absloc; + aliases = locc; + ubounds = B.empty; + lbounds = B.empty; + flow_computed = false + } + +(** Create a new c_absloc with name [name]. Also adds a fresh absloc + with name [name] to this c_absloc's aliases set. *) +let make_c_absloc (is_top : bool) (name : string) (vio : Cil.varinfo option) = + make_c_absloc_int is_top name vio + +let fresh_c_absloc (is_top : bool) : c_absloc = + let index = fresh_index () in + make_c_absloc_int is_top (label_prefix ^ string_of_int index) None + +(** Create a fresh bound (edge in the constraint graph). *) +let make_bound (a : c_absloc) : c_abslocinfo bound = + {info = a} + +let make_tau_bound (t : tau) : tinfo bound = + {info = t} + +(** Create a fresh named variable with name '[name]. *) +let make_var (is_top : bool) (name : string) : tau = + U.uref (Var {v_name = ("'" ^ name); + v_top = is_top; + v_stamp = fresh_index (); + v_lbounds = B.empty; + v_ubounds = B.empty}) + +let fresh_var (is_top : bool) : tau = + make_var is_top ("fi" ^ string_of_int (fresh_index ())) + +(** Create a fresh unnamed variable (name will be 'fi). *) +let fresh_var_i (is_top : bool) : tau = + make_var is_top ("fi" ^ string_of_int (fresh_index ())) + +(** Create a Fun constructor. *) +let make_fun (lbl, a, r : c_absloc * (tau list) * tau) : tau = + U.uref (Fun {fl = lbl; + f_stamp = fresh_index (); + args = a; + ret = r}) + +(** Create a Ref constructor. *) +let make_ref (lbl, pt : c_absloc * tau) : tau = + U.uref (Ref {rl = lbl; + r_stamp = fresh_index (); + points_to = pt}) + +(** Create a Pair constructor. *) +let make_pair (p, f : tau * tau) : tau = + U.uref (Pair {ptr = p; + p_stamp = fresh_index (); + lam = f}) + +(** Copy the toplevel constructor of [t], putting fresh variables in each + argement of the constructor. *) +let copy_toplevel (t : tau) : tau = + match find t with + Pair _ -> make_pair (fresh_var_i false, fresh_var_i false) + | Ref _ -> make_ref (fresh_c_absloc false, fresh_var_i false) + | Fun f -> + make_fun (fresh_c_absloc false, + List.map (fun _ -> fresh_var_i false) f.args, + fresh_var_i false) + | _ -> die "copy_toplevel" + +let has_same_structure (t : tau) (t' : tau) = + match find t, find t' with + Pair _, Pair _ -> true + | Ref _, Ref _ -> true + | Fun _, Fun _ -> true + | Var _, Var _ -> true + | _ -> false + +let pad_args (fi, tlr : finfo * tau list ref) : unit = + let padding = List.length fi.args - List.length !tlr + in + if padding == 0 then () + else + if padding > 0 then + for i = 1 to padding do + tlr := !tlr @ [fresh_var false] + done + else + for i = 1 to -padding do + fi.args <- fi.args @ [fresh_var false] + done + +(***********************************************************************) +(* *) +(* Constraint Generation/ Resolution *) +(* *) +(***********************************************************************) + +let set_top (b : bool) (t : tau) : unit = + let set_top_down t = + match find t with + Var v -> v.v_top <- b + | Ref r -> set_top_c_absloc r.rl b + | Fun f -> set_top_c_absloc f.fl b + | Pair p -> () + in + iter_tau set_top_down t + +let rec unify_int (t, t' : tau * tau) : unit = + if equal_tau t t' then () + else + let ti, ti' = find t, find t' in + U.unify combine (t, t'); + match ti, ti' with + Var v, Var v' -> + set_top (v.v_top || v'.v_top) t'; + merge_v_lbounds (v, v'); + merge_v_ubounds (v, v') + | Var v, _ -> + set_top (v.v_top || top_tau t') t'; + notify_vlbounds t v; + notify_vubounds t v + | _, Var v -> + set_top (v.v_top || top_tau t) t; + notify_vlbounds t' v; + notify_vubounds t' v + | Ref r, Ref r' -> unify_ref (r, r') + | Fun f, Fun f' -> unify_fun (f, f') + | Pair p, Pair p' -> unify_pair (p, p') + | _ -> raise Inconsistent +and notify_vlbounds (t : tau) (vi : vinfo) : unit = + let notify bounds = + List.iter + (fun b -> + add_constraint (Unification (b.info, copy_toplevel t)); + add_constraint (Leq (b.info, t))) + bounds + in + notify (B.elements vi.v_lbounds) +and notify_vubounds (t : tau) (vi : vinfo) : unit = + let notify bounds = + List.iter + (fun b -> + add_constraint (Unification (b.info, copy_toplevel t)); + add_constraint (Leq (t, b.info))) + bounds + in + notify (B.elements vi.v_ubounds) +and unify_ref (ri, ri' : rinfo * rinfo) : unit = + unify_c_abslocs (ri.rl, ri'.rl); + add_constraint (Unification (ri.points_to, ri'.points_to)) +and unify_fun (fi, fi' : finfo * finfo) : unit = + let rec union_args = function + _, [] -> false + | [], _ -> true + | h :: t, h' :: t' -> + add_constraint (Unification (h, h')); + union_args(t, t') + in + unify_c_abslocs (fi.fl, fi'.fl); + add_constraint (Unification (fi.ret, fi'.ret)); + if union_args (fi.args, fi'.args) then fi.args <- fi'.args +and unify_pair (pi, pi' : pinfo * pinfo) : unit = + add_constraint (Unification (pi.ptr, pi'.ptr)); + add_constraint (Unification (pi.lam, pi'.lam)) +and unify_c_abslocs (l, l' : c_absloc * c_absloc) : unit = + let pick_name (li, li' : c_abslocinfo * c_abslocinfo) = + if starts_with li.l_name label_prefix then li.l_name <- li'.l_name + else () in + let combine_c_absloc (li, li' : c_abslocinfo * c_abslocinfo) : c_abslocinfo = + pick_name (li, li'); + li.l_top <- li.l_top || li'.l_top; + li.aliases <- C.union li.aliases li'.aliases; + li.ubounds <- B.union li.ubounds li'.ubounds; + li.lbounds <- B.union li.lbounds li'.lbounds; + li + in + if !debug_constraints then + Printf.printf + "%s == %s\n" + (string_of_c_absloc l) + (string_of_c_absloc l'); + U.unify combine_c_absloc (l, l') +and merge_v_lbounds (vi, vi' : vinfo * vinfo) : unit = + vi'.v_lbounds <- B.union vi.v_lbounds vi'.v_lbounds; +and merge_v_ubounds (vi, vi' : vinfo * vinfo) : unit = + vi'.v_ubounds <- B.union vi.v_ubounds vi'.v_ubounds; +(** Pick the representative info for two tinfo's. This function + prefers the first argument when both arguments are the same + structure, but when one type is a structure and the other is a + var, it picks the structure. All other actions (e.g., updating + the info) is done in unify_int *) +and combine (ti, ti' : tinfo * tinfo) : tinfo = + match ti, ti' with + Var _, _ -> ti' + | _, _ -> ti +and leq_int (t, t') : unit = + if equal_tau t t' then () + else + let ti, ti' = find t, find t' in + match ti, ti' with + Var v, Var v' -> + v.v_ubounds <- B.add (make_tau_bound t') v.v_ubounds; + v'.v_lbounds <- B.add (make_tau_bound t) v'.v_lbounds + | Var v, _ -> + add_constraint (Unification (t, copy_toplevel t')); + add_constraint (Leq (t, t')) + | _, Var v -> + add_constraint (Unification (t', copy_toplevel t)); + add_constraint (Leq (t, t')) + | Ref r, Ref r' -> leq_ref (r, r') + | Fun f, Fun f' -> + (* TODO: check, why not do subtyping here? *) + add_constraint (Unification (t, t')) + | Pair pr, Pair pr' -> + add_constraint (Leq (pr.ptr, pr'.ptr)); + add_constraint (Leq (pr.lam, pr'.lam)) + | _ -> raise Inconsistent +and leq_ref (ri, ri') : unit = + leq_c_absloc (ri.rl, ri'.rl); + add_constraint (Unification (ri.points_to, ri'.points_to)) +and leq_c_absloc (l, l') : unit = + let li, li' = find l, find l' in + if !debug_constraints then + Printf.printf + "%s <= %s\n" + (string_of_c_absloc l) + (string_of_c_absloc l'); + if U.equal (l, l') then () + else + begin + li.ubounds <- B.add (make_bound l') li.ubounds; + li'.lbounds <- B.add (make_bound l) li'.lbounds + end +and add_constraint_int (c : tconstraint) (toplev : bool) = + if !debug_constraints && toplev then + begin + Printf.printf "%d:>" !toplev_count; + print_constraint c; + incr toplev_count + end + else + if !debug_constraints then print_constraint c else (); + insist (can_add_constraints ()) + "can't add constraints after compute_results is called"; + begin + match c with + Unification _ -> Q.add c eq_worklist + | Leq _ -> Q.add c leq_worklist + end; + solve_constraints () (* solve online *) +and add_constraint (c : tconstraint) = + add_constraint_int c false +and add_toplev_constraint (c : tconstraint) = + if !print_constraints && not !debug_constraints then + begin + Printf.printf "%d:>" !toplev_count; + incr toplev_count; + print_constraint c + end + else (); + add_constraint_int c true +and fetch_constraint () : tconstraint option = + try Some (Q.take eq_worklist) + with Q.Empty -> + begin + try Some (Q.take leq_worklist) + with Q.Empty -> None + end +(** The main solver loop. *) +and solve_constraints () : unit = + match fetch_constraint () with + None -> () + | Some c -> + begin + match c with + Unification (t, t') -> unify_int (t, t') + | Leq (t, t') -> + if !no_sub then unify_int (t, t') + else leq_int (t, t') + end; + solve_constraints () + +(***********************************************************************) +(* *) +(* Interface Functions *) +(* *) +(***********************************************************************) + +(** Return the contents of the lvalue. *) +let rvalue (lv : lvalue) : tau = + lv.contents + +(** Dereference the rvalue. If it does not have enough structure to + support the operation, then the correct structure is added via new + unification constraints. *) +let rec deref (t : tau) : lvalue = + match find t with + Pair p -> + begin + match find p.ptr with + | Var _ -> + let is_top = top_tau p.ptr in + let points_to = fresh_var is_top in + let l = fresh_c_absloc is_top in + let r = make_ref (l, points_to) + in + add_toplev_constraint (Unification (p.ptr, r)); + make_lval (l, points_to) + | Ref r -> make_lval (r.rl, r.points_to) + | _ -> raise WellFormed + end + | Var v -> + let is_top = top_tau t in + add_toplev_constraint + (Unification (t, make_pair (fresh_var is_top, fresh_var is_top))); + deref t + | _ -> raise WellFormed + + +(** Form the union of [t] and [t'], if it doesn't exist already. *) +let join (t : tau) (t' : tau) : tau = + let s, s' = get_stamp t, get_stamp t' in + try H.find join_cache (s, s') + with Not_found -> + let t'' = fresh_var false in + add_toplev_constraint (Leq (t, t'')); + add_toplev_constraint (Leq (t', t'')); + H.add join_cache (s, s') t''; + t'' + +(** Form the union of a list [tl], expected to be the initializers of some + structure or array type. *) +let join_inits (tl : tau list) : tau = + let t' = fresh_var false in + List.iter (function t -> add_toplev_constraint (Leq (t, t'))) tl; + t' + +(** Take the address of an lvalue. Does not add constraints. *) +let address (lv : lvalue) : tau = + make_pair (make_ref (lv.l, lv.contents), fresh_var false ) + +(** No instantiation in this analysis *) +let instantiate (lv : lvalue) (i : int) : lvalue = + lv + +(** Constraint generated from assigning [t] to [lv]. *) +let assign (lv : lvalue) (t : tau) : unit = + add_toplev_constraint (Leq (t, lv.contents)) + +let assign_ret (i : int) (lv : lvalue) (t : tau) : unit = + add_toplev_constraint (Leq (t, lv.contents)) + +(** Project out the first (ref) component or a pair. If the argument + [t] has no discovered structure, raise NoContents. *) +let proj_ref (t : tau) : tau = + match find t with + Pair p -> p.ptr + | Var v -> raise NoContents + | _ -> raise WellFormed + +(* Project out the second (fun) component of a pair. If the argument + [t] has no discovered structure, create it on the fly by adding + constraints. *) +let proj_fun (t : tau) : tau = + match find t with + Pair p -> p.lam + | Var v -> + let p, f = fresh_var false, fresh_var false in + add_toplev_constraint (Unification (t, make_pair (p, f))); + f + | _ -> raise WellFormed + +let get_args (t : tau) : tau list = + match find t with + Fun f -> f.args + | _ -> raise WellFormed + +let get_finfo (t : tau) : finfo = + match find t with + Fun f -> f + | _ -> raise WellFormed + +(** Function type [t] is applied to the arguments [actuals]. Unifies + the actuals with the formals of [t]. If no functions have been + discovered for [t] yet, create a fresh one and unify it with + t. The result is the return value of the function plus the index + of this application site. + + For this analysis, the application site is always 0 *) +let apply (t : tau) (al : tau list) : (tau * int) = + let f = proj_fun t in + let actuals = ref al in + let fi, ret = + match find f with + Fun fi -> fi, fi.ret + | Var v -> + let new_l, new_ret, new_args = + fresh_c_absloc false, + fresh_var false, + List.map (function _ -> fresh_var false) !actuals + in + let new_fun = make_fun (new_l, new_args, new_ret) in + add_toplev_constraint (Unification (new_fun, f)); + (get_finfo new_fun, new_ret) + | _ -> raise WellFormed + in + pad_args (fi, actuals); + List.iter2 + (fun actual -> fun formal -> + add_toplev_constraint (Leq (actual, formal))) + !actuals fi.args; + (ret, 0) + +let make_undefined_lvalue () = + make_lval (make_c_absloc false "undefined" None, + make_var true "undefined") + +let make_undefined_rvalue () = + make_var true "undefined" + +let assign_undefined (lv : lvalue) : unit = + assign lv (make_undefined_rvalue ()) + +let apply_undefined (al : tau list) : (tau * int) = + List.iter + (fun actual -> assign (make_undefined_lvalue ()) actual) + al; + (fresh_var true, 0) + +(** Create a new function type with name [name], list of formal + arguments [formals], and return value [ret]. Adds no constraints. *) +let make_function (name : string) (formals : lvalue list) (ret : tau) : tau = + let f = make_fun (make_c_absloc false name None, + List.map (fun x -> rvalue x) formals, + ret) + in + make_pair (fresh_var false, f) + +(** Create an lvalue. *) +let make_lvalue (b : bool ) (name : string) (vio : Cil.varinfo option) = + make_lval (make_c_absloc false name vio, + make_var false name) + +(** Create a fresh named variable. *) +let make_fresh (name : string) : tau = + make_var false name + +(** The default type for abslocs. *) +let bottom () : tau = + make_var false "bottom" + +(** Unify the result of a function with its return value. *) +let return (t : tau) (t' : tau) = + add_toplev_constraint (Leq (t', t)) + +(***********************************************************************) +(* *) +(* Query/Extract Solutions *) +(* *) +(***********************************************************************) + +module IntHash = Hashtbl.Make (struct + type t = int + let equal x y = x = y + let hash x = x + end) + +(** todo : reached_top !! *) +let collect_ptset_fast (l : c_absloc) : abslocset = + let onpath : unit IntHash.t = IntHash.create 101 in + let path : c_absloc list ref = ref [] in + let compute_path (i : int) = + keep_until (fun l -> i = get_c_absloc_stamp l) !path in + let collapse_cycle (cycle : c_absloc list) = + match cycle with + l :: ls -> + List.iter (fun l' -> unify_c_abslocs (l, l')) ls; + C.empty + | [] -> die "collapse cycle" in + let rec flow_step (l : c_absloc) : abslocset = + let stamp = get_c_absloc_stamp l in + if IntHash.mem onpath stamp then (* already seen *) + collapse_cycle (compute_path stamp) + else + let li = find l in + IntHash.add onpath stamp (); + path := l :: !path; + B.iter + (fun lb -> li.aliases <- C.union li.aliases (flow_step lb.info)) + li.lbounds; + path := List.tl !path; + IntHash.remove onpath stamp; + li.aliases + in + insist (can_query_graph ()) "collect_ptset_fast can't query graph"; + if get_flow_computed l then get_aliases l + else + begin + set_flow_computed l; + flow_step l + end + +(** this is a quadratic flow step. keep it for debugging the fast + version above. *) +let collect_ptset_slow (l : c_absloc) : abslocset = + let onpath : unit IntHash.t = IntHash.create 101 in + let rec flow_step (l : c_absloc) : abslocset = + if top_c_absloc l then raise ReachedTop + else + let stamp = get_c_absloc_stamp l in + if IntHash.mem onpath stamp then C.empty + else + let li = find l in + IntHash.add onpath stamp (); + B.iter + (fun lb -> li.aliases <- C.union li.aliases (flow_step lb.info)) + li.lbounds; + li.aliases + in + insist (can_query_graph ()) "collect_ptset_slow can't query graph"; + if get_flow_computed l then get_aliases l + else + begin + set_flow_computed l; + flow_step l + end + +let collect_ptset = + collect_ptset_slow + (* if !debug_flow_step then collect_ptset_slow + else collect_ptset_fast *) + +let may_alias (t1 : tau) (t2 : tau) : bool = + let get_l (t : tau) : c_absloc = + match find (proj_ref t) with + Ref r -> r.rl + | Var v -> raise NoContents + | _ -> raise WellFormed + in + try + let l1 = get_l t1 + and l2 = get_l t2 in + equal_c_absloc l1 l2 || + not (C.is_empty (C.inter (collect_ptset l1) (collect_ptset l2))) + with + NoContents -> false + | ReachedTop -> raise UnknownLocation + +let points_to_aux (t : tau) : absloc list = + try + match find (proj_ref t) with + Var v -> [] + | Ref r -> C.elements (collect_ptset r.rl) + | _ -> raise WellFormed + with + NoContents -> [] + | ReachedTop -> raise UnknownLocation + +let points_to (lv : lvalue) : Cil.varinfo list = + let rec get_vinfos l : Cil.varinfo list = + match l with + [] -> [] + | (_, _, Some h) :: t -> h :: get_vinfos t + | (_, _, None) :: t -> get_vinfos t + in + get_vinfos (points_to_aux lv.contents) + +let epoints_to (t : tau) : Cil.varinfo list = + let rec get_vinfos l : Cil.varinfo list = match l with + [] -> [] + | (_, _, Some h) :: t -> h :: get_vinfos t + | (_, _, None) :: t -> get_vinfos t + in + get_vinfos (points_to_aux t) + +let points_to_names (lv : lvalue) : string list = + List.map (fun v -> v.vname) (points_to lv) + +let absloc_points_to (lv : lvalue) : absloc list = + points_to_aux lv.contents + +let absloc_epoints_to (t : tau) : absloc list = + points_to_aux t + +let absloc_of_lvalue (lv : lvalue) : absloc = + (find lv.l).loc + +let absloc_eq = equal_absloc diff --git a/cil/src/ext/pta/olf.mli b/cil/src/ext/pta/olf.mli new file mode 100644 index 00000000..43794825 --- /dev/null +++ b/cil/src/ext/pta/olf.mli @@ -0,0 +1,80 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +type lvalue +type tau +type absloc + +(** Raised if a pointer flows to an undefined function. + We assume that such a function can have any effect on the pointer's contents +*) +exception UnknownLocation + +val debug : bool ref +val debug_constraints : bool ref +val debug_aliases : bool ref +val smart_aliases : bool ref +val finished_constraints : unit -> unit +val print_constraints : bool ref +val no_flow : bool ref +val no_sub : bool ref +val analyze_mono : bool ref +val solve_constraints : unit -> unit (* only for compatability with Golf *) +val rvalue : lvalue -> tau +val deref : tau -> lvalue +val join : tau -> tau -> tau +val join_inits : tau list -> tau +val address : lvalue -> tau +val instantiate : lvalue -> int -> lvalue +val assign : lvalue -> tau -> unit +val assign_ret : int -> lvalue -> tau -> unit +val apply : tau -> tau list -> (tau * int) +val apply_undefined : tau list -> (tau * int) +val assign_undefined : lvalue -> unit +val make_function : string -> lvalue list -> tau -> tau +val make_lvalue : bool -> string -> (Cil.varinfo option) -> lvalue +val bottom : unit -> tau +val return : tau -> tau -> unit +val make_fresh : string -> tau +val points_to_names : lvalue -> string list +val points_to : lvalue -> Cil.varinfo list +val epoints_to : tau -> Cil.varinfo list +val string_of_lvalue : lvalue -> string +val may_alias : tau -> tau -> bool + +val absloc_points_to : lvalue -> absloc list +val absloc_epoints_to : tau -> absloc list +val absloc_of_lvalue : lvalue -> absloc +val absloc_eq : (absloc * absloc) -> bool +val d_absloc : unit -> absloc -> Pretty.doc diff --git a/cil/src/ext/pta/ptranal.ml b/cil/src/ext/pta/ptranal.ml new file mode 100644 index 00000000..c91bda81 --- /dev/null +++ b/cil/src/ext/pta/ptranal.ml @@ -0,0 +1,597 @@ +(* MODIF: Loop constructor replaced by 3 constructors: While, DoWhile, For. *) + +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +exception Bad_return +exception Bad_function + + +open Cil + +module H = Hashtbl + +module A = Olf +exception UnknownLocation = A.UnknownLocation + +type access = A.lvalue * bool + +type access_map = (lval, access) H.t + +(** a mapping from varinfo's back to fundecs *) +module VarInfoKey = +struct + type t = varinfo + let compare v1 v2 = v1.vid - v2.vid +end + +module F = Map.Make (VarInfoKey) + + +(***********************************************************************) +(* *) +(* Global Variables *) +(* *) +(***********************************************************************) + +let model_strings = ref false +let print_constraints = A.print_constraints +let debug_constraints = A.debug_constraints +let debug_aliases = A.debug_aliases +let smart_aliases = A.smart_aliases +let debug = A.debug +let analyze_mono = A.analyze_mono +let no_flow = A.no_flow +let no_sub = A.no_sub +let fun_ptrs_as_funs = ref false +let show_progress = ref false +let debug_may_aliases = ref false + +let found_undefined = ref false + +let conservative_undefineds = ref false + +let current_fundec : fundec option ref = ref None + +let fun_access_map : (fundec, access_map) H.t = H.create 64 + +(* A mapping from varinfos to fundecs *) +let fun_varinfo_map = ref F.empty + +let current_ret : A.tau option ref = ref None + +let lvalue_hash : (varinfo,A.lvalue) H.t = H.create 64 + +let expressions : (exp,A.tau) H.t = H.create 64 + +let lvalues : (lval,A.lvalue) H.t = H.create 64 + +let fresh_index : (unit -> int) = + let count = ref 0 in + fun () -> + incr count; + !count + +let alloc_names = [ + "malloc"; + "calloc"; + "realloc"; + "xmalloc"; + "__builtin_alloca"; + "alloca"; + "kmalloc" +] + +let all_globals : varinfo list ref = ref [] +let all_functions : fundec list ref = ref [] + + +(***********************************************************************) +(* *) +(* Utility Functions *) +(* *) +(***********************************************************************) + +let is_undefined_fun = function + Lval (lh, o) -> + if isFunctionType (typeOfLval (lh, o)) then + match lh with + Var v -> v.vstorage = Extern + | _ -> false + else false + | _ -> false + +let is_alloc_fun = function + Lval (lh, o) -> + if isFunctionType (typeOfLval (lh, o)) then + match lh with + Var v -> List.mem v.vname alloc_names + | _ -> false + else false + | _ -> false + +let next_alloc = function + Lval (Var v, o) -> + let name = Printf.sprintf "%s@%d" v.vname (fresh_index ()) + in + A.address (A.make_lvalue false name (Some v)) (* check *) + | _ -> raise Bad_return + +let is_effect_free_fun = function + Lval (lh, o) when isFunctionType (typeOfLval (lh, o)) -> + begin + match lh with + Var v -> + begin + try ("CHECK_" = String.sub v.vname 0 6) + with Invalid_argument _ -> false + end + | _ -> false + end + | _ -> false + + +(***********************************************************************) +(* *) +(* AST Traversal Functions *) +(* *) +(***********************************************************************) + +(* should do nothing, might need to worry about Index case *) +(* let analyzeOffset (o : offset ) : A.tau = A.bottom () *) + +let analyze_var_decl (v : varinfo ) : A.lvalue = + try H.find lvalue_hash v + with Not_found -> + let lv = A.make_lvalue false v.vname (Some v) + in + H.add lvalue_hash v lv; + lv + +let isFunPtrType (t : typ) : bool = + match t with + TPtr (t, _) -> isFunctionType t + | _ -> false + +let rec analyze_lval (lv : lval ) : A.lvalue = + let find_access (l : A.lvalue) (is_var : bool) : A.lvalue = + match !current_fundec with + None -> l + | Some f -> + let accesses = H.find fun_access_map f in + if H.mem accesses lv then l + else + begin + H.add accesses lv (l, is_var); + l + end in + let result = + match lv with + Var v, _ -> (* instantiate every syntactic occurrence of a function *) + let alv = + if isFunctionType (typeOfLval lv) then + A.instantiate (analyze_var_decl v) (fresh_index ()) + else analyze_var_decl v + in + find_access alv true + | Mem e, _ -> + (* assert (not (isFunctionType(typeOf(e))) ); *) + let alv = + if !fun_ptrs_as_funs && isFunPtrType (typeOf e) then + analyze_expr_as_lval e + else A.deref (analyze_expr e) + in + find_access alv false + in + H.replace lvalues lv result; + result +and analyze_expr_as_lval (e : exp) : A.lvalue = + match e with + Lval l -> analyze_lval l + | _ -> assert false (* todo -- other kinds of expressions? *) +and analyze_expr (e : exp ) : A.tau = + let result = + match e with + Const (CStr s) -> + if !model_strings then + A.address (A.make_lvalue + false + s + (Some (makeVarinfo false s charConstPtrType))) + else A.bottom () + | Const c -> A.bottom () + | Lval l -> A.rvalue (analyze_lval l) + | SizeOf _ -> A.bottom () + | SizeOfStr _ -> A.bottom () + | AlignOf _ -> A.bottom () + | UnOp (op, e, t) -> analyze_expr e + | BinOp (op, e, e', t) -> A.join (analyze_expr e) (analyze_expr e') + | CastE (t, e) -> analyze_expr e + | AddrOf l -> + if !fun_ptrs_as_funs && isFunctionType (typeOfLval l) then + A.rvalue (analyze_lval l) + else A.address (analyze_lval l) + | StartOf l -> A.address (analyze_lval l) + | AlignOfE _ -> A.bottom () + | SizeOfE _ -> A.bottom () + in + H.add expressions e result; + result + + +(* check *) +let rec analyze_init (i : init ) : A.tau = + match i with + SingleInit e -> analyze_expr e + | CompoundInit (t, oi) -> + A.join_inits (List.map (function (_, i) -> analyze_init i) oi) + +let analyze_instr (i : instr ) : unit = + match i with + Set (lval, rhs, l) -> + A.assign (analyze_lval lval) (analyze_expr rhs) + | Call (res, fexpr, actuals, l) -> + if not (isFunctionType (typeOf fexpr)) then + () (* todo : is this a varargs? *) + else if is_alloc_fun fexpr then + begin + if !debug then print_string "Found allocation function...\n"; + match res with + Some r -> A.assign (analyze_lval r) (next_alloc fexpr) + | None -> () + end + else if is_effect_free_fun fexpr then + List.iter (fun e -> ignore (analyze_expr e)) actuals + else (* todo : check to see if the thing is an undefined function *) + let fnres, site = + if is_undefined_fun fexpr & !conservative_undefineds then + A.apply_undefined (List.map analyze_expr actuals) + else + A.apply (analyze_expr fexpr) (List.map analyze_expr actuals) + in + begin + match res with + Some r -> + begin + A.assign_ret site (analyze_lval r) fnres; + found_undefined := true; + end + | None -> () + end + | Asm _ -> () + +let rec analyze_stmt (s : stmt ) : unit = + match s.skind with + Instr il -> List.iter analyze_instr il + | Return (eo, l) -> + begin + match eo with + Some e -> + begin + match !current_ret with + Some ret -> A.return ret (analyze_expr e) + | None -> raise Bad_return + end + | None -> () + end + | Goto (s', l) -> () (* analyze_stmt(!s') *) + | If (e, b, b', l) -> + (* ignore the expression e; expressions can't be side-effecting *) + analyze_block b; + analyze_block b' + | Switch (e, b, sl, l) -> + analyze_block b; + List.iter analyze_stmt sl +(* + | Loop (b, l, _, _) -> analyze_block b +*) + | While (_, b, _) -> analyze_block b + | DoWhile (_, b, _) -> analyze_block b + | For (bInit, _, bIter, b, _) -> + analyze_block bInit; + analyze_block bIter; + analyze_block b + | Block b -> analyze_block b + | TryFinally (b, h, _) -> + analyze_block b; + analyze_block h + | TryExcept (b, (il, _), h, _) -> + analyze_block b; + List.iter analyze_instr il; + analyze_block h + | Break l -> () + | Continue l -> () + + +and analyze_block (b : block ) : unit = + List.iter analyze_stmt b.bstmts + +let analyze_function (f : fundec ) : unit = + let oldlv = analyze_var_decl f.svar in + let ret = A.make_fresh (f.svar.vname ^ "_ret") in + let formals = List.map analyze_var_decl f.sformals in + let newf = A.make_function f.svar.vname formals ret in + if !show_progress then + Printf.printf "Analyzing function %s\n" f.svar.vname; + fun_varinfo_map := F.add f.svar f (!fun_varinfo_map); + current_fundec := Some f; + H.add fun_access_map f (H.create 8); + A.assign oldlv newf; + current_ret := Some ret; + analyze_block f.sbody + +let analyze_global (g : global ) : unit = + match g with + GVarDecl (v, l) -> () (* ignore (analyze_var_decl(v)) -- no need *) + | GVar (v, init, l) -> + all_globals := v :: !all_globals; + begin + match init.init with + Some i -> A.assign (analyze_var_decl v) (analyze_init i) + | None -> ignore (analyze_var_decl v) + end + | GFun (f, l) -> + all_functions := f :: !all_functions; + analyze_function f + | _ -> () + +let analyze_file (f : file) : unit = + iterGlobals f analyze_global + + +(***********************************************************************) +(* *) +(* High-level Query Interface *) +(* *) +(***********************************************************************) + +(* Same as analyze_expr, but no constraints. *) +let rec traverse_expr (e : exp) : A.tau = + H.find expressions e + +and traverse_expr_as_lval (e : exp) : A.lvalue = + match e with + | Lval l -> traverse_lval l + | _ -> assert false (* todo -- other kinds of expressions? *) + +and traverse_lval (lv : lval ) : A.lvalue = + H.find lvalues lv + +let may_alias (e1 : exp) (e2 : exp) : bool = + let tau1,tau2 = traverse_expr e1, traverse_expr e2 in + let result = A.may_alias tau1 tau2 in + if !debug_may_aliases then + begin + let doc1 = d_exp () e1 in + let doc2 = d_exp () e2 in + let s1 = Pretty.sprint ~width:30 doc1 in + let s2 = Pretty.sprint ~width:30 doc2 in + Printf.printf + "%s and %s may alias? %s\n" + s1 + s2 + (if result then "yes" else "no") + end; + result + +let resolve_lval (lv : lval) : varinfo list = + A.points_to (traverse_lval lv) + +let resolve_exp (e : exp) : varinfo list = + A.epoints_to (traverse_expr e) + +let resolve_funptr (e : exp) : fundec list = + let varinfos = A.epoints_to (traverse_expr e) in + List.fold_left + (fun fdecs -> fun vinf -> + try F.find vinf !fun_varinfo_map :: fdecs + with Not_found -> fdecs) + [] + varinfos + +let count_hash_elts h = + let result = ref 0 in + H.iter (fun _ -> fun _ -> incr result) lvalue_hash; + !result + +let compute_may_aliases (b : bool) : unit = + let rec compute_may_aliases_aux (exps : exp list) = + match exps with + [] -> () + | h :: t -> + ignore (List.map (may_alias h) t); + compute_may_aliases_aux t + and exprs : exp list ref = ref [] in + H.iter (fun e -> fun _ -> exprs := e :: !exprs) expressions; + compute_may_aliases_aux !exprs + + +let compute_results (show_sets : bool) : unit = + let total_pointed_to = ref 0 + and total_lvalues = H.length lvalue_hash + and counted_lvalues = ref 0 + and lval_elts : (string * (string list)) list ref = ref [] in + let print_result (name, set) = + let rec print_set s = + match s with + [] -> () + | h :: [] -> print_string h + | h :: t -> + print_string (h ^ ", "); + print_set t + and ptsize = List.length set in + total_pointed_to := !total_pointed_to + ptsize; + if ptsize > 0 then + begin + print_string (name ^ "(" ^ (string_of_int ptsize) ^ ") -> "); + print_set set; + print_newline () + end + in + (* Make the most pessimistic assumptions about globals if an + undefined function is present. Such a function can write to every + global variable *) + let hose_globals () : unit = + List.iter + (fun vd -> A.assign_undefined (analyze_var_decl vd)) + !all_globals + in + let show_progress_fn (counted : int ref) (total : int) : unit = + incr counted; + if !show_progress then + Printf.printf "Computed flow for %d of %d sets\n" !counted total + in + if !conservative_undefineds && !found_undefined then hose_globals (); + A.finished_constraints (); + if show_sets then + begin + print_endline "Computing points-to sets..."; + Hashtbl.iter + (fun vinf -> fun lv -> + show_progress_fn counted_lvalues total_lvalues; + try lval_elts := (vinf.vname, A.points_to_names lv) :: !lval_elts + with A.UnknownLocation -> ()) + lvalue_hash; + List.iter print_result !lval_elts; + Printf.printf + "Total number of things pointed to: %d\n" + !total_pointed_to + end; + if !debug_may_aliases then + begin + Printf.printf "Printing may alias relationships\n"; + compute_may_aliases true + end + +let print_types () : unit = + print_string "Printing inferred types of lvalues...\n"; + Hashtbl.iter + (fun vi -> fun lv -> + Printf.printf "%s : %s\n" vi.vname (A.string_of_lvalue lv)) + lvalue_hash + + + +(** Alias queries. For each function, gather sets of locals, formals, and + globals. Do n^2 work for each of these functions, reporting whether or not + each pair of values is aliased. Aliasing is determined by taking points-to + set intersections. +*) +let compute_aliases = compute_may_aliases + + +(***********************************************************************) +(* *) +(* Abstract Location Interface *) +(* *) +(***********************************************************************) + +type absloc = A.absloc + +let rec lvalue_of_varinfo (vi : varinfo) : A.lvalue = + H.find lvalue_hash vi + +let lvalue_of_lval = traverse_lval +let tau_of_expr = traverse_expr + +(** return an abstract location for a varinfo, resp. lval *) +let absloc_of_varinfo vi = + A.absloc_of_lvalue (lvalue_of_varinfo vi) + +let absloc_of_lval lv = + A.absloc_of_lvalue (lvalue_of_lval lv) + +let absloc_e_points_to e = + A.absloc_epoints_to (tau_of_expr e) + +let absloc_lval_aliases lv = + A.absloc_points_to (lvalue_of_lval lv) + +(* all abslocs that e transitively points to *) +let absloc_e_transitive_points_to (e : Cil.exp) : absloc list = + let rec lv_trans_ptsto (worklist : varinfo list) (acc : varinfo list) : absloc list = + match worklist with + [] -> List.map absloc_of_varinfo acc + | vi :: wklst'' -> + if List.mem vi acc then lv_trans_ptsto wklst'' acc + else + lv_trans_ptsto + (List.rev_append + (A.points_to (lvalue_of_varinfo vi)) + wklst'') + (vi :: acc) + in + lv_trans_ptsto (A.epoints_to (tau_of_expr e)) [] + +let absloc_eq a b = A.absloc_eq (a, b) + +let d_absloc: unit -> absloc -> Pretty.doc = A.d_absloc + + +let ptrAnalysis = ref false +let ptrResults = ref false +let ptrTypes = ref false + + + +(** Turn this into a CIL feature *) +let feature : featureDescr = { + fd_name = "ptranal"; + fd_enabled = ptrAnalysis; + fd_description = "alias analysis"; + fd_extraopt = [ + ("--ptr_may_aliases", + Arg.Unit (fun _ -> debug_may_aliases := true), + "Print out results of may alias queries"); + ("--ptr_unify", Arg.Unit (fun _ -> no_sub := true), + "Make the alias analysis unification-based"); + ("--ptr_model_strings", Arg.Unit (fun _ -> model_strings := true), + "Make the alias analysis model string constants"); + ("--ptr_conservative", + Arg.Unit (fun _ -> conservative_undefineds := true), + "Treat undefineds conservatively in alias analysis"); + ("--ptr_results", Arg.Unit (fun _ -> ptrResults := true), + "print the results of the alias analysis"); + ("--ptr_mono", Arg.Unit (fun _ -> analyze_mono := true), + "run alias analysis monomorphically"); + ("--ptr_types",Arg.Unit (fun _ -> ptrTypes := true), + "print inferred points-to analysis types") + ]; + fd_doit = (function (f: file) -> + analyze_file f; + compute_results !ptrResults; + if !ptrTypes then print_types ()); + fd_post_check = false (* No changes *) +} diff --git a/cil/src/ext/pta/ptranal.mli b/cil/src/ext/pta/ptranal.mli new file mode 100644 index 00000000..36eb7a54 --- /dev/null +++ b/cil/src/ext/pta/ptranal.mli @@ -0,0 +1,156 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(***********************************************************************) +(* *) +(* Flags *) +(* *) +(***********************************************************************) + +(** Print extra debugging info *) +val debug : bool ref + +(** Debug constraints (print all constraints) *) +val debug_constraints : bool ref + +(** Debug smart alias queries *) +val debug_aliases : bool ref + +(** Debug may alias queries *) +val debug_may_aliases : bool ref + +val smart_aliases : bool ref + +(** Print out the top level constraints *) +val print_constraints : bool ref + +(** Make the analysis monomorphic *) +val analyze_mono : bool ref + +(** Disable subtyping *) +val no_sub : bool ref + +(** Make the flow step a no-op *) +val no_flow : bool ref + +(** Show the progress of the flow step *) +val show_progress : bool ref + +(** Treat undefined functions conservatively *) +val conservative_undefineds : bool ref + +(***********************************************************************) +(* *) +(* Building the Points-to Graph *) +(* *) +(***********************************************************************) + +(** Analyze a file *) +val analyze_file : Cil.file -> unit + +(** Print the type of each lvalue in the program *) +val print_types : unit -> unit + +(***********************************************************************) +(* *) +(* High-level Query Interface *) +(* *) +(***********************************************************************) + +(** If undefined functions are analyzed conservatively, any of the + high-level queries may raise this exception *) +exception UnknownLocation + +val may_alias : Cil.exp -> Cil.exp -> bool + +val resolve_lval : Cil.lval -> (Cil.varinfo list) + +val resolve_exp : Cil.exp -> (Cil.varinfo list) + +val resolve_funptr : Cil.exp -> (Cil.fundec list) + +(***********************************************************************) +(* *) +(* Low-level Query Interface *) +(* *) +(***********************************************************************) + +(** type for abstract locations *) +type absloc + +(** Give an abstract location for a varinfo *) +val absloc_of_varinfo : Cil.varinfo -> absloc + +(** Give an abstract location for an Cil lvalue *) +val absloc_of_lval : Cil.lval -> absloc + +(** may the two abstract locations be aliased? *) +val absloc_eq : absloc -> absloc -> bool + +val absloc_e_points_to : Cil.exp -> absloc list +val absloc_e_transitive_points_to : Cil.exp -> absloc list + +val absloc_lval_aliases : Cil.lval -> absloc list + +(** Print a string representing an absloc, for debugging. *) +val d_absloc : unit -> absloc -> Pretty.doc + + +(***********************************************************************) +(* *) +(* Printing results *) +(* *) +(***********************************************************************) + +(** Compute points to sets for variables. If true is passed, print the sets. *) +val compute_results : bool -> unit + +(* + +Deprecated these. -- jk + +(** Compute alias relationships. If true is passed, print all alias pairs. *) + val compute_aliases : bool -> unit + +(** Compute alias frequncy *) +val compute_alias_frequency : unit -> unit + + +*) + +val compute_aliases : bool -> unit + + +val feature: Cil.featureDescr diff --git a/cil/src/ext/pta/setp.ml b/cil/src/ext/pta/setp.ml new file mode 100644 index 00000000..a39b9722 --- /dev/null +++ b/cil/src/ext/pta/setp.ml @@ -0,0 +1,342 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +(***********************************************************************) +(* *) +(* Objective Caml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the GNU Library General Public License, with *) +(* the special exception on linking described in file ../LICENSE. *) +(* *) +(***********************************************************************) + +(* $Id: setp.ml,v 1.3 2003-02-19 19:26:31 jkodumal Exp $ *) + +(* Sets over ordered types *) + +module type PolyOrderedType = + sig + type 'a t + val compare: 'a t -> 'a t -> int + end + +module type S = + sig + type 'a elt + type 'a t + val empty: 'a t + val is_empty: 'a t -> bool + val mem: 'a elt -> 'a t -> bool + val add: 'a elt -> 'a t -> 'a t + val singleton: 'a elt -> 'a t + val remove: 'a elt -> 'a t -> 'a t + val union: 'a t -> 'a t -> 'a t + val inter: 'a t -> 'a t -> 'a t + val diff: 'a t -> 'a t -> 'a t + val compare: 'a t -> 'a t -> int + val equal: 'a t -> 'a t -> bool + val subset: 'a t -> 'a t -> bool + val iter: ('a elt -> unit) -> 'a t -> unit + val fold: ('a elt -> 'b -> 'b) -> 'a t -> 'b -> 'b + val for_all: ('a elt -> bool) -> 'a t -> bool + val exists: ('a elt -> bool) -> 'a t -> bool + val filter: ('a elt -> bool) -> 'a t -> 'a t + val partition: ('a elt -> bool) -> 'a t -> 'a t * 'a t + val cardinal: 'a t -> int + val elements: 'a t -> 'a elt list + val min_elt: 'a t -> 'a elt + val max_elt: 'a t -> 'a elt + val choose: 'a t -> 'a elt + end + +module Make(Ord: PolyOrderedType) = + struct + type 'a elt = 'a Ord.t + type 'a t = Empty | Node of 'a t * 'a elt * 'a t * int + + (* Sets are represented by balanced binary trees (the heights of the + children differ by at most 2 *) + + let height = function + Empty -> 0 + | Node(_, _, _, h) -> h + + (* Creates a new node with left son l, value x and right son r. + l and r must be balanced and | height l - height r | <= 2. + Inline expansion of height for better speed. *) + + let create l x r = + let hl = match l with Empty -> 0 | Node(_,_,_,h) -> h in + let hr = match r with Empty -> 0 | Node(_,_,_,h) -> h in + Node(l, x, r, (if hl >= hr then hl + 1 else hr + 1)) + + (* Same as create, but performs one step of rebalancing if necessary. + Assumes l and r balanced. + Inline expansion of create for better speed in the most frequent case + where no rebalancing is required. *) + + let bal l x r = + let hl = match l with Empty -> 0 | Node(_,_,_,h) -> h in + let hr = match r with Empty -> 0 | Node(_,_,_,h) -> h in + if hl > hr + 2 then begin + match l with + Empty -> invalid_arg "Set.bal" + | Node(ll, lv, lr, _) -> + if height ll >= height lr then + create ll lv (create lr x r) + else begin + match lr with + Empty -> invalid_arg "Set.bal" + | Node(lrl, lrv, lrr, _)-> + create (create ll lv lrl) lrv (create lrr x r) + end + end else if hr > hl + 2 then begin + match r with + Empty -> invalid_arg "Set.bal" + | Node(rl, rv, rr, _) -> + if height rr >= height rl then + create (create l x rl) rv rr + else begin + match rl with + Empty -> invalid_arg "Set.bal" + | Node(rll, rlv, rlr, _) -> + create (create l x rll) rlv (create rlr rv rr) + end + end else + Node(l, x, r, (if hl >= hr then hl + 1 else hr + 1)) + + (* Same as bal, but repeat rebalancing until the final result + is balanced. *) + + let rec join l x r = + match bal l x r with + Empty -> invalid_arg "Set.join" + | Node(l', x', r', _) as t' -> + let d = height l' - height r' in + if d < -2 || d > 2 then join l' x' r' else t' + + (* Merge two trees l and r into one. + All elements of l must precede the elements of r. + Assumes | height l - height r | <= 2. *) + + let rec merge t1 t2 = + match (t1, t2) with + (Empty, t) -> t + | (t, Empty) -> t + | (Node(l1, v1, r1, h1), Node(l2, v2, r2, h2)) -> + bal l1 v1 (bal (merge r1 l2) v2 r2) + + (* Same as merge, but does not assume anything about l and r. *) + + let rec concat t1 t2 = + match (t1, t2) with + (Empty, t) -> t + | (t, Empty) -> t + | (Node(l1, v1, r1, h1), Node(l2, v2, r2, h2)) -> + join l1 v1 (join (concat r1 l2) v2 r2) + + (* Splitting *) + + let rec split x = function + Empty -> + (Empty, None, Empty) + | Node(l, v, r, _) -> + let c = Ord.compare x v in + if c = 0 then (l, Some v, r) + else if c < 0 then + let (ll, vl, rl) = split x l in (ll, vl, join rl v r) + else + let (lr, vr, rr) = split x r in (join l v lr, vr, rr) + + (* Implementation of the set operations *) + + let empty = Empty + + let is_empty = function Empty -> true | _ -> false + + let rec mem x = function + Empty -> false + | Node(l, v, r, _) -> + let c = Ord.compare x v in + c = 0 || mem x (if c < 0 then l else r) + + let rec add x = function + Empty -> Node(Empty, x, Empty, 1) + | Node(l, v, r, _) as t -> + let c = Ord.compare x v in + if c = 0 then t else + if c < 0 then bal (add x l) v r else bal l v (add x r) + + let singleton x = Node(Empty, x, Empty, 1) + + let rec remove x = function + Empty -> Empty + | Node(l, v, r, _) -> + let c = Ord.compare x v in + if c = 0 then merge l r else + if c < 0 then bal (remove x l) v r else bal l v (remove x r) + + let rec union s1 s2 = + match (s1, s2) with + (Empty, t2) -> t2 + | (t1, Empty) -> t1 + | (Node(l1, v1, r1, h1), Node(l2, v2, r2, h2)) -> + if h1 >= h2 then + if h2 = 1 then add v2 s1 else begin + let (l2, _, r2) = split v1 s2 in + join (union l1 l2) v1 (union r1 r2) + end + else + if h1 = 1 then add v1 s2 else begin + let (l1, _, r1) = split v2 s1 in + join (union l1 l2) v2 (union r1 r2) + end + + let rec inter s1 s2 = + match (s1, s2) with + (Empty, t2) -> Empty + | (t1, Empty) -> Empty + | (Node(l1, v1, r1, _), t2) -> + match split v1 t2 with + (l2, None, r2) -> + concat (inter l1 l2) (inter r1 r2) + | (l2, Some _, r2) -> + join (inter l1 l2) v1 (inter r1 r2) + + let rec diff s1 s2 = + match (s1, s2) with + (Empty, t2) -> Empty + | (t1, Empty) -> t1 + | (Node(l1, v1, r1, _), t2) -> + match split v1 t2 with + (l2, None, r2) -> + join (diff l1 l2) v1 (diff r1 r2) + | (l2, Some _, r2) -> + concat (diff l1 l2) (diff r1 r2) + + let rec compare_aux l1 l2 = + match (l1, l2) with + ([], []) -> 0 + | ([], _) -> -1 + | (_, []) -> 1 + | (Empty :: t1, Empty :: t2) -> + compare_aux t1 t2 + | (Node(Empty, v1, r1, _) :: t1, Node(Empty, v2, r2, _) :: t2) -> + let c = Ord.compare v1 v2 in + if c <> 0 then c else compare_aux (r1::t1) (r2::t2) + | (Node(l1, v1, r1, _) :: t1, t2) -> + compare_aux (l1 :: Node(Empty, v1, r1, 0) :: t1) t2 + | (t1, Node(l2, v2, r2, _) :: t2) -> + compare_aux t1 (l2 :: Node(Empty, v2, r2, 0) :: t2) + + let compare s1 s2 = + compare_aux [s1] [s2] + + let equal s1 s2 = + compare s1 s2 = 0 + + let rec subset s1 s2 = + match (s1, s2) with + Empty, _ -> + true + | _, Empty -> + false + | Node (l1, v1, r1, _), (Node (l2, v2, r2, _) as t2) -> + let c = Ord.compare v1 v2 in + if c = 0 then + subset l1 l2 && subset r1 r2 + else if c < 0 then + subset (Node (l1, v1, Empty, 0)) l2 && subset r1 t2 + else + subset (Node (Empty, v1, r1, 0)) r2 && subset l1 t2 + + let rec iter f = function + Empty -> () + | Node(l, v, r, _) -> iter f l; f v; iter f r + + let rec fold f s accu = + match s with + Empty -> accu + | Node(l, v, r, _) -> fold f l (f v (fold f r accu)) + + let rec for_all p = function + Empty -> true + | Node(l, v, r, _) -> p v && for_all p l && for_all p r + + let rec exists p = function + Empty -> false + | Node(l, v, r, _) -> p v || exists p l || exists p r + + let filter p s = + let rec filt accu = function + | Empty -> accu + | Node(l, v, r, _) -> + filt (filt (if p v then add v accu else accu) l) r in + filt Empty s + + let partition p s = + let rec part (t, f as accu) = function + | Empty -> accu + | Node(l, v, r, _) -> + part (part (if p v then (add v t, f) else (t, add v f)) l) r in + part (Empty, Empty) s + + let rec cardinal = function + Empty -> 0 + | Node(l, v, r, _) -> cardinal l + 1 + cardinal r + + let rec elements_aux accu = function + Empty -> accu + | Node(l, v, r, _) -> elements_aux (v :: elements_aux accu r) l + + let elements s = + elements_aux [] s + + let rec min_elt = function + Empty -> raise Not_found + | Node(Empty, v, r, _) -> v + | Node(l, v, r, _) -> min_elt l + + let rec max_elt = function + Empty -> raise Not_found + | Node(l, v, Empty, _) -> v + | Node(l, v, r, _) -> max_elt r + + let choose = min_elt + + end diff --git a/cil/src/ext/pta/setp.mli b/cil/src/ext/pta/setp.mli new file mode 100644 index 00000000..a3b30313 --- /dev/null +++ b/cil/src/ext/pta/setp.mli @@ -0,0 +1,180 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +(***********************************************************************) +(* *) +(* Objective Caml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the GNU Library General Public License, with *) +(* the special exception on linking described in file ../LICENSE. *) +(* *) +(***********************************************************************) + +(* $Id: setp.mli,v 1.3 2003-02-19 19:26:31 jkodumal Exp $ *) + +(** Sets over ordered types. + + This module implements the set data structure, given a total ordering + function over the set elements. All operations over sets + are purely applicative (no side-effects). + The implementation uses balanced binary trees, and is therefore + reasonably efficient: insertion and membership take time + logarithmic in the size of the set, for instance. +*) + +module type PolyOrderedType = + sig + type 'a t + (** The type of the set elements. *) + val compare : 'a t -> 'a t -> int + (** A total ordering function over the set elements. + This is a two-argument function [f] such that + [f e1 e2] is zero if the elements [e1] and [e2] are equal, + [f e1 e2] is strictly negative if [e1] is smaller than [e2], + and [f e1 e2] is strictly positive if [e1] is greater than [e2]. + Example: a suitable ordering function is + the generic structural comparison function {!Pervasives.compare}. *) + end +(** Input signature of the functor {!Set.Make}. *) + +module type S = + sig + type 'a elt + (** The type of the set elements. *) + + type 'a t + (** The type of sets. *) + + val empty: 'a t + (** The empty set. *) + + val is_empty: 'a t -> bool + (** Test whether a set is empty or not. *) + + val mem: 'a elt -> 'a t -> bool + (** [mem x s] tests whether [x] belongs to the set [s]. *) + + val add: 'a elt -> 'a t -> 'a t + (** [add x s] returns a set containing all elements of [s], + plus [x]. If [x] was already in [s], [s] is returned unchanged. *) + + val singleton: 'a elt -> 'a t + (** [singleton x] returns the one-element set containing only [x]. *) + + val remove: 'a elt -> 'a t -> 'a t + (** [remove x s] returns a set containing all elements of [s], + except [x]. If [x] was not in [s], [s] is returned unchanged. *) + + val union: 'a t -> 'a t -> 'a t + (** Set union. *) + + val inter: 'a t -> 'a t -> 'a t + (** Set interseection. *) + + (** Set difference. *) + val diff: 'a t -> 'a t -> 'a t + + val compare: 'a t -> 'a t -> int + (** Total ordering between sets. Can be used as the ordering function + for doing sets of sets. *) + + val equal: 'a t -> 'a t -> bool + (** [equal s1 s2] tests whether the sets [s1] and [s2] are + equal, that is, contain equal elements. *) + + val subset: 'a t -> 'a t -> bool + (** [subset s1 s2] tests whether the set [s1] is a subset of + the set [s2]. *) + + val iter: ('a elt -> unit) -> 'a t -> unit + (** [iter f s] applies [f] in turn to all elements of [s]. + The order in which the elements of [s] are presented to [f] + is unspecified. *) + + val fold: ('a elt -> 'b -> 'b) -> 'a t -> 'b -> 'b + (** [fold f s a] computes [(f xN ... (f x2 (f x1 a))...)], + where [x1 ... xN] are the elements of [s]. + The order in which elements of [s] are presented to [f] is + unspecified. *) + + val for_all: ('a elt -> bool) -> 'a t -> bool + (** [for_all p s] checks if all elements of the set + satisfy the predicate [p]. *) + + val exists: ('a elt -> bool) -> 'a t -> bool + (** [exists p s] checks if at least one element of + the set satisfies the predicate [p]. *) + + val filter: ('a elt -> bool) -> 'a t -> 'a t + (** [filter p s] returns the set of all elements in [s] + that satisfy predicate [p]. *) + + val partition: ('a elt -> bool) -> 'a t -> 'a t * 'a t + (** [partition p s] returns a pair of sets [(s1, s2)], where + [s1] is the set of all the elements of [s] that satisfy the + predicate [p], and [s2] is the set of all the elements of + [s] that do not satisfy [p]. *) + + val cardinal: 'a t -> int + (** Return the number of elements of a set. *) + + val elements: 'a t -> 'a elt list + (** Return the list of all elements of the given set. + The returned list is sorted in increasing order with respect + to the ordering [Ord.compare], where [Ord] is the argument + given to {!Set.Make}. *) + + val min_elt: 'a t -> 'a elt + (** Return the smallest element of the given set + (with respect to the [Ord.compare] ordering), or raise + [Not_found] if the set is empty. *) + + val max_elt: 'a t -> 'a elt + (** Same as {!Set.S.min_elt}, but returns the largest element of the + given set. *) + + val choose: 'a t -> 'a elt + (** Return one element of the given set, or raise [Not_found] if + the set is empty. Which element is chosen is unspecified, + but equal elements will be chosen for equal sets. *) + end +(** Output signature of the functor {!Set.Make}. *) + +module Make (Ord : PolyOrderedType) : S with type 'a elt = 'a Ord.t +(** Functor building an implementation of the set structure + given a totally ordered type. *) diff --git a/cil/src/ext/pta/steensgaard.ml b/cil/src/ext/pta/steensgaard.ml new file mode 100644 index 00000000..63686934 --- /dev/null +++ b/cil/src/ext/pta/steensgaard.ml @@ -0,0 +1,1417 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(***********************************************************************) +(* *) +(* *) +(* This file is currently unused by CIL. It is included in the *) +(* distribution for reference only. *) +(* *) +(* *) +(***********************************************************************) + + +(***********************************************************************) +(* *) +(* Type Declarations *) +(* *) +(***********************************************************************) + +exception Inconsistent of string +exception Bad_cache +exception No_contents +exception Bad_proj +exception Bad_type_copy +exception Instantiation_cycle + +module U = Uref +module S = Setp +module H = Hashtbl +module Q = Queue + +(** Polarity kinds-- positive, negative, or nonpolar. *) +type polarity = Pos + | Neg + | Non + +(** Label bounds. The polymorphic type is a hack for recursive modules *) +type 'a bound = {index : int; info : 'a} + +(** The 'a type may in general contain urefs, which makes Pervasives.compare + incorrect. However, the bounds will always be correct because if two tau's + get unified, their cached instantiations will be re-entered into the + worklist, ensuring that any labels find the new bounds *) +module Bound = +struct + type 'a t = 'a bound + let compare (x : 'a t) (y : 'a t) = + Pervasives.compare x y +end + +module B = S.Make(Bound) + +type 'a boundset = 'a B.t + +(** Constants, which identify elements in points-to sets *) +type constant = int * string + +module Constant = +struct + type t = constant + + let compare ((xid,_) : t) ((yid,_) : t) = + Pervasives.compare xid yid +end + +module C = Set.Make(Constant) + +(** Sets of constants. Set union is used when two labels containing + constant sets are unified *) +type constantset = C.t + +type lblinfo = { + mutable l_name: string; + (** Name of this label *) + mutable aliases: constantset; + (** Set of constants (tags) for checking aliases *) + p_bounds: label boundset U.uref; + (** Set of umatched (p) lower bounds *) + n_bounds: label boundset U.uref; + (** Set of unmatched (n) lower bounds *) + mutable p_cached: bool; + (** Flag indicating whether all reachable p edges have been locally cached *) + mutable n_cached: bool; + (** Flag indicating whether all reachable n edges have been locally cached *) + mutable on_path: bool; + (** For cycle detection during reachability queries *) +} + +(** Constructor labels *) +and label = lblinfo U.uref + +(** The type of lvalues. *) +type lvalue = { + l: label; + contents: tau +} + +(** Data for variables. *) +and vinfo = { + v_name: string; + mutable v_global: bool; + v_cache: cache +} + +(** Data for ref constructors. *) +and rinfo = { + rl: label; + mutable r_global: bool; + points_to: tau; + r_cache: cache +} + +(** Data for fun constructors. *) +and finfo = { + fl: label; + mutable f_global: bool; + args: tau list ref; + ret: tau; + f_cache: cache +} + +(* Data for pairs. Note there is no label. *) +and pinfo = { + mutable p_global: bool; + ptr: tau; + lam: tau; + p_cache: cache +} + +(** Type constructors discovered by type inference *) +and tinfo = Wild + | Var of vinfo + | Ref of rinfo + | Fun of finfo + | Pair of pinfo + +(** The top-level points-to type. *) +and tau = tinfo U.uref + +(** The instantiation constraint cache. The index is used as a key. *) +and cache = (int,polarity * tau) H.t + +(* Type of semi-unification constraints *) +type su_constraint = Instantiation of tau * (int * polarity) * tau + | Unification of tau * tau + +(** Association lists, used for printing recursive types. The first element + is a type that has been visited. The second element is the string + representation of that type (so far). If the string option is set, then + this type occurs within itself, and is associated with the recursive var + name stored in the option. When walking a type, add it to an association + list. + + Example : suppose we have the constraint 'a = ref('a). The type is unified + via cyclic unification, and would loop infinitely if we attempted to print + it. What we want to do is print the type u rv. ref(rv). This is accomplished + in the following manner: + + -- ref('a) is visited. It is not in the association list, so it is added + and the string "ref(" is stored in the second element. We recurse to print + the first argument of the constructor. + + -- In the recursive call, we see that 'a (or ref('a)) is already in the + association list, so the type is recursive. We check the string option, + which is None, meaning that this is the first recurrence of the type. We + create a new recursive variable, rv and set the string option to 'rv. Next, + we prepend u rv. to the string representation we have seen before, "ref(", + and return "rv" as the string representation of this type. + + -- The string so far is "u rv.ref(". The recursive call returns, and we + complete the type by printing the result of the call, "rv", and ")" + + In a type where the recursive variable appears twice, e.g. 'a = pair('a,'a), + the second time we hit 'a, the string option will be set, so we know to + reuse the same recursive variable name. +*) +type association = tau * string ref * string option ref + +(***********************************************************************) +(* *) +(* Global Variables *) +(* *) +(***********************************************************************) + +(** Print the instantiations constraints (loops with cyclic structures). *) +let print_constraints : bool ref = ref false + +(** Solve constraints as they are introduced. If this is false, constraints + are solved in batch fashion at calls to solveConstraints. *) +let solve_online : bool ref = ref true + +(** If true, print all constraints (including induced) and show additional + debug output. *) +let debug = ref false +let debug_constraints = debug + +(** If true, print out extra verbose debug information (including contents + of label sets *) +let verbose_debug = ref false + + +(** If true, make the flow step a no-op *) +let no_flow = ref false + +let no_sub = ref false + +(** If true, do not add instantiation constraints *) +let analyze_mono = ref false + +(** A counter for generating unique integers. *) +let counter : int ref = ref 0 + +(** A list of equality constraints. *) +let eq_worklist : su_constraint Q.t = Q.create() + +(** A list of instantiation constraints. *) +let inst_worklist : su_constraint Q.t = Q.create() + +(***********************************************************************) +(* *) +(* Utility Functions *) +(* *) +(***********************************************************************) + +(** Consistency check for inferred types *) +let pair_or_var (t : tau) = + match (U.deref t) with + | Pair _ -> true + | Var _ -> true + | _ -> false + +let ref_or_var (t : tau) = + match (U.deref t) with + | Ref _ -> true + | Var _ -> true + | _ -> false + +let fun_or_var (t : tau) = + match (U.deref t) with + | Fun _ -> true + | Var _ -> true + | _ -> false + +(** Generate a unique integer. *) +let fresh_index () : int = + incr counter; + !counter + +(** Negate a polarity. *) +let negate (p : polarity) : polarity = + match p with + | Pos -> Neg + | Neg -> Pos + | Non -> Non + +(** Compute the least-upper-bounds of two polarities. *) +let lub (p,p' : polarity * polarity) : polarity = + match p with + | Pos -> + begin + match p' with + | Pos -> Pos + | _ -> Non + end + | Neg -> + begin + match p' with + | Neg -> Neg + | _ -> Non + end + | Non -> Non + +(** Extract the cache from a type *) +let get_cache (t : tau) : cache = + match U.deref t with + | Wild -> raise Bad_cache + | Var v -> v.v_cache + | Ref r -> r.r_cache + | Pair p -> p.p_cache + | Fun f -> f.f_cache + +(** Determine whether or not a type is global *) +let get_global (t : tau) : bool = + match U.deref t with + | Wild -> false + | Var v -> v.v_global + | Ref r -> r.r_global + | Pair p -> p.p_global + | Fun f -> f.f_global + +(** Return true if a type is monomorphic (global). *) +let global_tau = get_global + +let global_lvalue lv = get_global lv.contents + +(** Return true if e is a member of l (according to uref equality) *) +let rec ulist_mem e l = + match l with + | [] -> false + | h :: t -> if (U.equal(h,e)) then true else ulist_mem e t + +(** Convert a polarity to a string *) +let string_of_polarity p = + match p with + | Pos -> "+" + | Neg -> "-" + | Non -> "T" + +(** Convert a label to a string, short representation *) +let string_of_label2 (l : label) : string = + "\"" ^ (U.deref l).l_name ^ "\"" + +(** Convert a label to a string, long representation *) +let string_of_label (l : label ) : string = + let rec constset_to_string = function + | (_,s) :: [] -> s + | (_,s) :: t -> s ^ "," ^ (constset_to_string t) + | [] -> "" + in + let aliases = constset_to_string (C.elements ((U.deref l).aliases)) + in + if ( (aliases = "") || (not !verbose_debug)) + then string_of_label2 l + else aliases + +(** Return true if the element [e] is present in the association list *) +let rec assoc_list_mem (e : tau) (l : association list) = + match l with + | [] -> None + | (h,s,so) :: t -> + if (U.equal(h,e)) then (Some (s,so)) else assoc_list_mem e t + +(** Given a tau, create a unique recursive variable name. This should always + return the same name for a given tau *) +let fresh_recvar_name (t : tau) : string = + match U.deref t with + | Pair p -> "rvp" ^ string_of_int((Hashtbl.hash p)) + | Ref r -> "rvr" ^ string_of_int((Hashtbl.hash r)) + | Fun f -> "rvf" ^ string_of_int((Hashtbl.hash f)) + | _ -> raise (Inconsistent ("recvar_name")) + +(** Return a string representation of a tau, using association lists. *) +let string_of_tau (t : tau ) : string = + let tau_map : association list ref = ref [] in + let rec string_of_tau' t = + match (assoc_list_mem t (!tau_map)) with + | Some (s,so) -> (* recursive type. see if a var name has been set *) + begin + match (!so) with + | None -> + begin + let rv = fresh_recvar_name(t) in + s := "u " ^ rv ^ "." ^ (!s); + so := Some (rv); + rv + end + | Some rv -> rv + end + | None -> (* type's not recursive. Add it to the assoc list and cont. *) + let s = ref "" in + let so : string option ref = ref None in + begin + tau_map := (t,s,so) :: (!tau_map); + + (match (U.deref t) with + | Wild -> s := "_"; + | Var v -> s := v.v_name; + | Pair p -> + begin + assert (ref_or_var(p.ptr)); + assert (fun_or_var(p.lam)); + s := "{"; + s := (!s) ^ (string_of_tau' p.ptr); + s := (!s) ^ ","; + s := (!s) ^ (string_of_tau' p.lam); + s := (!s) ^"}" + + end + | Ref r -> + begin + assert(pair_or_var(r.points_to)); + s := "ref(|"; + s := (!s) ^ (string_of_label r.rl); + s := (!s) ^ "|,"; + s := (!s) ^ (string_of_tau' r.points_to); + s := (!s) ^ ")" + + end + | Fun f -> + begin + assert(pair_or_var(f.ret)); + let rec string_of_args = function + | h :: [] -> + begin + assert(pair_or_var(h)); + s := (!s) ^ (string_of_tau' h) + end + | h :: t -> + begin + assert(pair_or_var(h)); + s := (!s) ^ (string_of_tau' h) ^ ","; + string_of_args t + end + | [] -> () + in + s := "fun(|"; + s := (!s) ^ (string_of_label f.fl); + s := (!s) ^ "|,"; + s := (!s) ^ "<"; + if (List.length !(f.args) > 0) + then + string_of_args !(f.args) + else + s := (!s) ^ "void"; + s := (!s) ^">,"; + s := (!s) ^ (string_of_tau' f.ret); + s := (!s) ^ ")" + end); + tau_map := List.tl (!tau_map); + !s + end + in + string_of_tau' t + +(** Convert an lvalue to a string *) +let rec string_of_lvalue (lv : lvalue) : string = + let contents = (string_of_tau(lv.contents)) in + let l = (string_of_label lv.l) in + assert(pair_or_var(lv.contents)); + Printf.sprintf "[%s]^(%s)" contents l + +(** Print a list of tau elements, comma separated *) +let rec print_tau_list (l : tau list) : unit = + let t_strings = List.map string_of_tau l in + let rec print_t_strings = function + | h :: [] -> print_string h; print_newline(); + | h :: t -> print_string h; print_string ", "; print_t_strings t + | [] -> () + in + print_t_strings t_strings + +(** Print a constraint. *) +let print_constraint (c : su_constraint) = + match c with + | Unification (t,t') -> + let lhs = string_of_tau t in + let rhs = string_of_tau t' in + Printf.printf "%s == %s\n" lhs rhs + | Instantiation (t,(i,p),t') -> + let lhs = string_of_tau t in + let rhs = string_of_tau t' in + let index = string_of_int i in + let pol = string_of_polarity p in + Printf.printf "%s <={%s,%s} %s\n" lhs index pol rhs + +(* If [positive] is true, return the p-edge bounds, otherwise, return + the n-edge bounds. *) +let get_bounds (positive : bool) (l : label) : label boundset U.uref = + if (positive) then + (U.deref l).p_bounds + else + (U.deref l).n_bounds + +(** Used for cycle detection during the flow step. Returns true if the + label [l] is found on the current path. *) +let on_path (l : label) : bool = + (U.deref l).on_path + +(** Used for cycle detection during the flow step. Identifies [l] as being + on/off the current path. *) +let set_on_path (l : label) (b : bool) : unit = + (U.deref l).on_path <- b + +(** Make the type a global type *) +let set_global (t : tau) (b : bool) : bool = + if (!debug && b) + then + Printf.printf "Setting a new global : %s\n" (string_of_tau t); + begin + assert ( (not (get_global(t)) ) || b ); + (match U.deref t with + | Wild -> () + | Var v -> v.v_global <- b + | Ref r -> r.r_global <- b + | Pair p -> p.p_global <- b + | Fun f -> f.f_global <- b); + b + end + +(** Return a label's bounds as a string *) +let string_of_bounds (is_pos : bool) (l : label) : string = + let bounds = + if (is_pos) then + U.deref ((U.deref l).p_bounds) + else + U.deref ((U.deref l).n_bounds) + in + B.fold (fun b -> fun res -> res ^ (string_of_label2 b.info) ^ " " + ) bounds "" + +(***********************************************************************) +(* *) +(* Type Operations -- these do not create any constraints *) +(* *) +(***********************************************************************) + +let wild_val = U.uref Wild + +(** The wild (don't care) value. *) +let wild () : tau = + wild_val + +(** Create an lvalue with label [lbl] and tau contents [t]. *) +let make_lval (lbl,t : label * tau) : lvalue = + {l = lbl; contents = t} + +(** Create a new label with name [name]. Also adds a fresh constant + with name [name] to this label's aliases set. *) +let make_label (name : string) : label = + U.uref { + l_name = name; + aliases = (C.add (fresh_index(),name) C.empty); + p_bounds = U.uref (B.empty); + n_bounds = U.uref (B.empty); + p_cached = false; + n_cached = false; + on_path = false + } + +(** Create a new label with an unspecified name and an empty alias set. *) +let fresh_label () : label = + U.uref { + l_name = "l_" ^ (string_of_int (fresh_index())); + aliases = (C.empty); + p_bounds = U.uref (B.empty); + n_bounds = U.uref (B.empty); + p_cached = false; + n_cached = false; + on_path = false + } + +(** Create a fresh bound. *) +let make_bound (i,a : int * 'a) : 'a bound = + {index = i; info = a } + +(** Create a fresh named variable with name '[name]. *) +let make_var (b: bool) (name : string) : tau = + U.uref (Var {v_name = ("'" ^name); + v_global = b; + v_cache = H.create 4}) + +(** Create a fresh unnamed variable (name will be 'fv). *) +let fresh_var () : tau = + make_var false ("fv" ^ (string_of_int (fresh_index())) ) + +(** Create a fresh unnamed variable (name will be 'fi). *) +let fresh_var_i () : tau = + make_var false ("fi" ^ (string_of_int (fresh_index())) ) + +(** Create a Fun constructor. *) +let make_fun (lbl,a,r : label * (tau list) * tau) : tau = + U.uref (Fun {fl = lbl ; + f_global = false; + args = ref a; + ret = r; + f_cache = H.create 4}) + +(** Create a Ref constructor. *) +let make_ref (lbl,pt : label * tau) : tau = + U.uref (Ref {rl = lbl ; + r_global = false; + points_to = pt; + r_cache = H.create 4}) + +(** Create a Pair constructor. *) +let make_pair (p,f : tau * tau) : tau = + U.uref (Pair {ptr = p; + p_global = false; + lam = f; + p_cache = H.create 4}) + +(** Copy the toplevel constructor of [t], putting fresh variables in each + argement of the constructor. *) +let copy_toplevel (t : tau) : tau = + match U.deref t with + | Pair _ -> + make_pair (fresh_var_i(), fresh_var_i()) + | Ref _ -> + make_ref (fresh_label(),fresh_var_i()) + | Fun f -> + let fresh_fn = fun _ -> fresh_var_i() + in + make_fun (fresh_label(), List.map fresh_fn !(f.args) , fresh_var_i()) + | _ -> raise Bad_type_copy + +let pad_args (l,l' : (tau list ref) * (tau list ref)) : unit = + let padding = ref ((List.length (!l)) - (List.length (!l'))) + in + if (!padding == 0) then () + else + let to_pad = + if (!padding > 0) then l' else (padding := -(!padding);l) + in + for i = 1 to (!padding) do + to_pad := (!to_pad) @ [fresh_var()] + done + +(***********************************************************************) +(* *) +(* Constraint Generation/ Resolution *) +(* *) +(***********************************************************************) + +(** Returns true if the constraint has no effect, i.e. either the left-hand + side or the right-hand side is wild. *) +let wild_constraint (t,t' : tau * tau) : bool = + let ti,ti' = U.deref t, U.deref t' in + match ti,ti' with + | Wild, _ -> true + | _, Wild -> true + | _ -> false + +exception Cycle_found + +(** Cycle detection between instantiations. Returns true if there is a cycle + from t to t' *) +let exists_cycle (t,t' : tau * tau) : bool = + let visited : tau list ref = ref [] in + let rec exists_cycle' t = + if (ulist_mem t (!visited)) + then + begin (* + print_string "Instantiation cycle found :"; + print_tau_list (!visited); + print_newline(); + print_string (string_of_tau t); + print_newline(); *) + (* raise Instantiation_cycle *) + (* visited := List.tl (!visited) *) (* check *) + end + else + begin + visited := t :: (!visited); + if (U.equal(t,t')) + then raise Cycle_found + else + H.iter (fun _ -> fun (_,t'') -> + if (U.equal (t,t'')) then () + else + ignore (exists_cycle' t'') + ) (get_cache t) ; + visited := List.tl (!visited) + end + in + try + exists_cycle' t; + false + with + | Cycle_found -> true + +exception Subterm + +(** Returns true if [t'] is a proper subterm of [t] *) +let proper_subterm (t,t') = + let visited : tau list ref = ref [] in + let rec proper_subterm' t = + if (ulist_mem t (!visited)) + then () (* recursive type *) + else + if (U.equal (t,t')) + then raise Subterm + else + begin + visited := t :: (!visited); + ( + match (U.deref t) with + | Wild -> () + | Var _ -> () + | Ref r -> + proper_subterm' r.points_to + | Pair p -> + proper_subterm' p.ptr; + proper_subterm' p.lam + | Fun f -> + proper_subterm' f.ret; + List.iter (proper_subterm') !(f.args) + ); + visited := List.tl (!visited) + end + in + try + if (U.equal(t,t')) then false + else + begin + proper_subterm' t; + false + end + with + | Subterm -> true + +(** The extended occurs check. Search for a cycle of instantiations from [t] + to [t']. If such a cycle exists, check to see that [t'] is a proper subterm + of [t]. If it is, then return true *) +let eoc (t,t') : bool = + if (exists_cycle(t,t') && proper_subterm(t,t')) + then + begin + if (!debug) + then + Printf.printf "Occurs check : %s occurs within %s\n" (string_of_tau t') + (string_of_tau t) + else + (); + true + end + else + false + +(** Resolve an instantiation constraint *) +let rec instantiate_int (t,(i,p),t' : tau * (int * polarity) * tau) = + if ( wild_constraint(t,t') || (not (store(t,(i,p),t'))) || + U.equal(t,t') ) + then () + else + let ti,ti' = U.deref t, U.deref t' in + match ti,ti' with + | Ref r, Ref r' -> + instantiate_ref(r,(i,p),r') + | Fun f, Fun f' -> + instantiate_fun(f,(i,p),f') + | Pair pr, Pair pr' -> + begin + add_constraint_int (Instantiation (pr.ptr,(i,p),pr'.ptr)); + add_constraint_int (Instantiation (pr.lam,(i,p),pr'.lam)) + end + | Var v, _ -> () + | _,Var v' -> + if eoc(t,t') + then + add_constraint_int (Unification (t,t')) + else + begin + unstore(t,i); + add_constraint_int (Unification ((copy_toplevel t),t')); + add_constraint_int (Instantiation (t,(i,p),t')) + end + | _ -> raise (Inconsistent("instantiate")) + +(** Apply instantiations to the ref's label, and structurally down the type. + Contents of ref constructors are instantiated with polarity Non. *) +and instantiate_ref (ri,(i,p),ri') : unit = + add_constraint_int (Instantiation(ri.points_to,(i,Non),ri'.points_to)); + instantiate_label (ri.rl,(i,p),ri'.rl) + +(** Apply instantiations to the fun's label, and structurally down the type. + Flip the polarity for the function's args. If the lengths of the argument + lists don't match, extend the shorter list as necessary. *) +and instantiate_fun (fi,(i,p),fi') : unit = + pad_args (fi.args, fi'.args); + assert(List.length !(fi.args) == List.length !(fi'.args)); + add_constraint_int (Instantiation (fi.ret,(i,p),fi'.ret)); + List.iter2 (fun t ->fun t' -> + add_constraint_int (Instantiation(t,(i,negate p),t'))) + !(fi.args) !(fi'.args); + instantiate_label (fi.fl,(i,p),fi'.fl) + +(** Instantiate a label. Update the label's bounds with new flow edges. + *) +and instantiate_label (l,(i,p),l' : label * (int * polarity) * label) : unit = + if (!debug) then + Printf.printf "%s <= {%d,%s} %s\n" (string_of_label l) i + (string_of_polarity p) (string_of_label l'); + let li,li' = U.deref l, U.deref l' in + match p with + | Pos -> + U.update (li'.p_bounds, + B.add(make_bound (i,l)) (U.deref li'.p_bounds) + ) + | Neg -> + U.update (li.n_bounds, + B.add(make_bound (i,l')) (U.deref li.n_bounds) + ) + | Non -> + begin + U.update (li'.p_bounds, + B.add(make_bound (i,l)) (U.deref li'.p_bounds) + ); + U.update (li.n_bounds, + B.add(make_bound (i,l')) (U.deref li.n_bounds) + ) + end + +(** Resolve a unification constraint. Does the uref unification after grabbing + a copy of the information before the two infos are unified. The other + interesting feature of this function is the way 'globalness' is propagated. + If a non-global is unified with a global, the non-global becomes global. + If the ecr became global, there is a problem because none of its cached + instantiations know that the type became monomorphic. In this case, they + must be re-inserted via merge-cache. Merge-cache always reinserts cached + instantiations from the non-ecr type, i.e. the type that was 'killed' by the + unification. *) +and unify_int (t,t' : tau * tau) : unit = + if (wild_constraint(t,t') || U.equal(t,t')) + then () + else + let ti, ti' = U.deref t, U.deref t' in + begin + U.unify combine (t,t'); + match ti,ti' with + | Var v, _ -> + begin + if (set_global t' (v.v_global || (get_global t'))) + then (H.iter (merge_cache t') (get_cache t')) + else (); + H.iter (merge_cache t') v.v_cache + end + | _, Var v -> + begin + if (set_global t (v.v_global || (get_global t))) + then (H.iter (merge_cache t) (get_cache t)) + else (); + H.iter (merge_cache t) v.v_cache + end + | Ref r, Ref r' -> + begin + if (set_global t (r.r_global || r'.r_global)) + then (H.iter (merge_cache t) (get_cache t)) + else (); + H.iter (merge_cache t) r'.r_cache; + unify_ref(r,r') + end + | Fun f, Fun f' -> + begin + if (set_global t (f.f_global || f'.f_global)) + then (H.iter (merge_cache t) (get_cache t)) + else (); + H.iter (merge_cache t) f'.f_cache; + unify_fun (f,f'); + end + | Pair p, Pair p' -> + begin + if (set_global t (p.p_global || p'.p_global)) + then (H.iter (merge_cache t) (get_cache t)) + else (); + H.iter (merge_cache t) p'.p_cache; + add_constraint_int (Unification (p.ptr,p'.ptr)); + add_constraint_int (Unification (p.lam,p'.lam)) + end + | _ -> raise (Inconsistent("unify")) + end + +(** Unify the ref's label, and apply unification structurally down the type. *) +and unify_ref (ri,ri' : rinfo * rinfo) : unit = + add_constraint_int (Unification (ri.points_to,ri'.points_to)); + unify_label(ri.rl,ri'.rl) + +(** Unify the fun's label, and apply unification structurally down the type, + at arguments and return value. When combining two lists of different lengths, + always choose the longer list for the representative. *) +and unify_fun (li,li' : finfo * finfo) : unit = + let rec union_args = function + | _, [] -> false + | [], _ -> true + | h :: t, h' :: t' -> + add_constraint_int (Unification (h,h')); union_args(t,t') + in + begin + unify_label(li.fl,li'.fl); + add_constraint_int (Unification (li.ret,li'.ret)); + if (union_args(!(li.args),!(li'.args))) + then li.args := !(li'.args); + end + +(** Unify two labels, combining the set of constants denoting aliases. *) +and unify_label (l,l' : label * label) : unit = + let pick_name (li,li' : lblinfo * lblinfo) = + if ( (String.length li.l_name) > 1 && (String.sub (li.l_name) 0 2) = "l_") + then + li.l_name <- li'.l_name + else () + in + let combine_label (li,li' : lblinfo *lblinfo) : lblinfo = + let p_bounds = U.deref (li.p_bounds) in + let p_bounds' = U.deref (li'.p_bounds) in + let n_bounds = U.deref (li.n_bounds) in + let n_bounds' = U.deref (li'.n_bounds) in + begin + pick_name(li,li'); + li.aliases <- C.union (li.aliases) (li'.aliases); + U.update (li.p_bounds, (B.union p_bounds p_bounds')); + U.update (li.n_bounds, (B.union n_bounds n_bounds')); + li + end + in(* + if (!debug) then + begin + Printf.printf "Unifying %s with %s...\n" + (string_of_label l) (string_of_label l'); + Printf.printf "pbounds : %s\n" (string_of_bounds true l); + Printf.printf "nbounds : %s\n" (string_of_bounds false l); + Printf.printf "pbounds : %s\n" (string_of_bounds true l'); + Printf.printf "nbounds : %s\n\n" (string_of_bounds false l') + end; *) + U.unify combine_label (l,l') + (* if (!debug) then + begin + Printf.printf "pbounds : %s\n" (string_of_bounds true l); + Printf.printf "nbounds : %s\n" (string_of_bounds false l) + end *) + +(** Re-assert a cached instantiation constraint, since the old type was + killed by a unification *) +and merge_cache (rep : tau) (i : int) (p,t' : polarity * tau) : unit = + add_constraint_int (Instantiation (rep,(i,p),t')) + +(** Pick the representative info for two tinfo's. This function prefers the + first argument when both arguments are the same structure, but when + one type is a structure and the other is a var, it picks the structure. *) +and combine (ti,ti' : tinfo * tinfo) : tinfo = + match ti,ti' with + | Var _, _ -> ti' + | _,_ -> ti + +(** Add a new constraint induced by other constraints. *) +and add_constraint_int (c : su_constraint) = + if (!print_constraints && !debug) then print_constraint c else (); + begin + match c with + | Instantiation _ -> + Q.add c inst_worklist + | Unification _ -> + Q.add c eq_worklist + end; + if (!debug) then solve_constraints() else () + +(** Add a new constraint introduced through this module's interface (a + top-level constraint). *) +and add_constraint (c : su_constraint) = + begin + add_constraint_int (c); + if (!print_constraints && not (!debug)) then print_constraint c else (); + if (!solve_online) then solve_constraints() else () + end + + +(* Fetch constraints, preferring equalities. *) +and fetch_constraint () : su_constraint option = + if (Q.length eq_worklist > 0) + then + Some (Q.take eq_worklist) + else if (Q.length inst_worklist > 0) + then + Some (Q.take inst_worklist) + else + None + +(** Returns the target of a cached instantiation, if it exists. *) +and target (t,i,p : tau * int * polarity) : (polarity * tau) option = + let cache = get_cache t in + if (global_tau t) then Some (Non,t) + else + try + Some (H.find cache i) + with + | Not_found -> None + +(** Caches a new instantiation, or applies well-formedness. *) +and store ( t,(i,p),t' : tau * (int * polarity) * tau) : bool = + let cache = get_cache t in + match target(t,i,p) with + | Some (p'',t'') -> + if (U.equal (t',t'') && (lub(p,p'') = p'')) + then + false + else + begin + add_constraint_int (Unification (t',t'')); + H.replace cache i (lub(p,p''),t''); + (* add a new forced instantiation as well *) + if (lub(p,p'') = p'') + then () + else + begin + unstore(t,i); + add_constraint_int (Instantiation (t,(i,lub(p,p'')),t'')) + end; + false + end + | None -> + begin + H.add cache i (p,t'); + true + end + +(** Remove a cached instantiation. Used when type structure changes *) +and unstore (t,i : tau * int) = +let cache = get_cache t in + H.remove cache i + +(** The main solver loop. *) +and solve_constraints () : unit = + match fetch_constraint () with + | Some c -> + begin + (match c with + | Instantiation (t,(i,p),t') -> instantiate_int (t,(i,p),t') + | Unification (t,t') -> unify_int (t,t') + ); + solve_constraints() + end + | None -> () + + +(***********************************************************************) +(* *) +(* Interface Functions *) +(* *) +(***********************************************************************) + +(** Return the contents of the lvalue. *) +let rvalue (lv : lvalue) : tau = + lv.contents + +(** Dereference the rvalue. If it does not have enough structure to support + the operation, then the correct structure is added via new unification + constraints. *) +let rec deref (t : tau) : lvalue = + match U.deref t with + | Pair p -> + ( + match U.deref (p.ptr) with + | Var _ -> + begin + (* let points_to = make_pair(fresh_var(),fresh_var()) in *) + let points_to = fresh_var() in + let l = fresh_label() in + let r = make_ref(l,points_to) + in + add_constraint (Unification (p.ptr,r)); + make_lval(l, points_to) + end + | Ref r -> make_lval(r.rl, r.points_to) + | _ -> raise (Inconsistent("deref")) + ) + | Var v -> + begin + add_constraint (Unification (t,make_pair(fresh_var(),fresh_var()))); + deref t + end + | _ -> raise (Inconsistent("deref -- no top level pair")) + +(** Form the union of [t] and [t']. *) +let join (t : tau) (t' : tau) : tau = + let t'' = fresh_var() in + add_constraint (Unification (t,t'')); + add_constraint (Unification (t',t'')); + t'' + +(** Form the union of a list [tl], expected to be the initializers of some + structure or array type. *) +let join_inits (tl : tau list) : tau = + let t' = fresh_var() in + begin + List.iter (function t'' -> add_constraint (Unification(t',t''))) tl; + t' + end + +(** Take the address of an lvalue. Does not add constraints. *) +let address (lv : lvalue) : tau = + make_pair (make_ref (lv.l, lv.contents), fresh_var() ) + +(** Instantiate a type with index i. By default, uses positive polarity. + Adds an instantiation constraint. *) +let instantiate (lv : lvalue) (i : int) : lvalue = + if (!analyze_mono) then lv + else + begin + let l' = fresh_label () in + let t' = fresh_var_i () in + instantiate_label(lv.l,(i,Pos),l'); + add_constraint (Instantiation (lv.contents,(i,Pos),t')); + make_lval(l',t') (* check -- fresh label ?? *) + end + +(** Constraint generated from assigning [t] to [lv]. *) +let assign (lv : lvalue) (t : tau) : unit = + add_constraint (Unification (lv.contents,t)) + + +(** Project out the first (ref) component or a pair. If the argument [t] has + no discovered structure, raise No_contents. *) +let proj_ref (t : tau) : tau = + match U.deref t with + | Pair p -> p.ptr + | Var v -> raise No_contents + | _ -> raise Bad_proj + +(* Project out the second (fun) component of a pair. If the argument [t] has + no discovered structure, create it on the fly by adding constraints. *) +let proj_fun (t : tau) : tau = + match U.deref t with + | Pair p -> p.lam + | Var v -> + let p,f = fresh_var(), fresh_var() in + add_constraint (Unification (t,make_pair(p,f))); + f + | _ -> raise Bad_proj + +let get_args (t : tau) : tau list ref = + match U.deref t with + | Fun f -> f.args + | _ -> raise (Inconsistent("get_args")) + +(** Function type [t] is applied to the arguments [actuals]. Unifies the + actuals with the formals of [t]. If no functions have been discovered for + [t] yet, create a fresh one and unify it with t. The result is the return + value of the function. *) +let apply (t : tau) (al : tau list) : tau = + let f = proj_fun(t) in + let actuals = ref al in + let formals,ret = + match U.deref f with + | Fun fi -> (fi.args),fi.ret + | Var v -> + let new_l,new_ret,new_args = + fresh_label(), fresh_var (), + List.map (function _ -> fresh_var()) (!actuals) + in + let new_fun = make_fun(new_l,new_args,new_ret) in + add_constraint (Unification(new_fun,f)); + (get_args new_fun,new_ret) + | Ref _ -> raise (Inconsistent ("apply_ref")) + | Pair _ -> raise (Inconsistent ("apply_pair")) + | Wild -> raise (Inconsistent("apply_wild")) + in + pad_args(formals,actuals); + List.iter2 (fun actual -> fun formal -> + add_constraint (Unification (actual,formal)) + ) !actuals !formals; + ret + +(** Create a new function type with name [name], list of formal arguments + [formals], and return value [ret]. Adds no constraints. *) +let make_function (name : string) (formals : lvalue list) (ret : tau) : tau = + let + f = make_fun(make_label(name),List.map (fun x -> rvalue x) formals, ret) + in + make_pair(fresh_var(),f) + +(** Create an lvalue. If [is_global] is true, the lvalue will be treated + monomorphically. *) +let make_lvalue (is_global : bool) (name : string) : lvalue = + if (!debug && is_global) + then + Printf.printf "Making global lvalue : %s\n" name + else (); + make_lval(make_label(name), make_var is_global name) + + +(** Create a fresh non-global named variable. *) +let make_fresh (name : string) : tau = + make_var false (name) + +(** The default type for constants. *) +let bottom () : tau = + make_var false ("bottom") + +(** Unify the result of a function with its return value. *) +let return (t : tau) (t' : tau) = + add_constraint (Unification (t,t')) + + +(***********************************************************************) +(* *) +(* Query/Extract Solutions *) +(* *) +(***********************************************************************) + +(** Unify the data stored in two label bounds. *) +let combine_lbounds (s,s' : label boundset * label boundset) = + B.union s s' + +(** Truncates a list of urefs [l] to those elements up to and including the + first occurence of the specified element [elt]. *) +let truncate l elt = + let keep = ref true in + List.filter + (fun x -> + if (not (!keep)) + then + false + else + begin + if (U.equal(x,elt)) + then + keep := false + else (); + true + end + ) l + +let debug_cycle_bounds is_pos c = + let rec debug_cycle_bounds' = function + | h :: [] -> + Printf.printf "%s --> %s\n" (string_of_bounds is_pos h) + (string_of_label2 h) + | h :: t -> + begin + Printf.printf "%s --> %s\n" (string_of_bounds is_pos h) + (string_of_label2 h); + debug_cycle_bounds' t + end + | [] -> () + in + debug_cycle_bounds' c + +(** For debugging, print a cycle of instantiations *) +let debug_cycle (is_pos,c,l,p) = + let kind = if is_pos then "P" else "N" in + let rec string_of_cycle = function + | h :: [] -> string_of_label2 h + | [] -> "" + | h :: t -> Printf.sprintf "%s,%s" (string_of_label2 h) (string_of_cycle t) + in + Printf.printf "Collapsing %s cycle around %s:\n" kind (string_of_label2 l); + Printf.printf "Elements are: %s\n" (string_of_cycle c); + Printf.printf "Per-element bounds:\n"; + debug_cycle_bounds is_pos c; + Printf.printf "Full path is: %s" (string_of_cycle p); + print_newline() + +(** Compute pos or neg flow, depending on [is_pos]. Searches for cycles in the + instantiations (can these even occur?) and unifies either the positive or + negative edge sets for the labels on the cycle. Note that this does not + ever unify the labels themselves. The return is the new bounds of the + argument label *) +let rec flow (is_pos : bool) (path : label list) (l : label) : label boundset = + let collapse_cycle () = + let cycle = truncate path l in + debug_cycle (is_pos,cycle,l,path); + List.iter (fun x -> U.unify combine_lbounds + ((get_bounds is_pos x),get_bounds is_pos l) + ) cycle + in + if (on_path l) + then + begin + collapse_cycle (); + (* set_on_path l false; *) + B.empty + end + else + if ( (is_pos && (U.deref l).p_cached) || + ( (not is_pos) && (U.deref l).n_cached) ) then + begin + U.deref (get_bounds is_pos l) + end + else + begin + let newbounds = ref B.empty in + let base = get_bounds is_pos l in + set_on_path l true; + if (is_pos) then + (U.deref l).p_cached <- true + else + (U.deref l).n_cached <- true; + B.iter + (fun x -> + if (U.equal(x.info,l)) then () + else + (newbounds := + (B.union (!newbounds) (flow is_pos (l :: path) x.info))) + ) (U.deref base); + set_on_path l false; + U.update (base,(B.union (U.deref base) !newbounds)); + U.deref base + end + +(** Compute and cache any positive flow. *) +let pos_flow l : constantset = + let result = ref C.empty in + begin + ignore (flow true [] l); + B.iter (fun x -> result := C.union (!result) (U.deref(x.info)).aliases ) + (U.deref (get_bounds true l)); + !result + end + +(** Compute and cache any negative flow. *) +let neg_flow l : constantset = + let result = ref C.empty in + begin + ignore (flow false [] l); + B.iter (fun x -> result := C.union (!result) (U.deref(x.info)).aliases ) + (U.deref (get_bounds false l)); + !result + end + +(** Compute and cache any pos-neg flow. Assumes that both pos_flow and + neg_flow have been computed for the label [l]. *) +let pos_neg_flow(l : label) : constantset = + let result = ref C.empty in + begin + B.iter (fun x -> result := C.union (!result) (pos_flow x.info)) + (U.deref (get_bounds false l)); + !result + end + +(** Compute a points-to set by computing positive, then negative, then + positive-negative flow for a label. *) +let points_to_int (lv : lvalue) : constantset = + let visited_caches : cache list ref = ref [] in + let rec points_to_tau (t : tau) : constantset = + try + begin + match U.deref (proj_ref t) with + | Var v -> C.empty + | Ref r -> + begin + let pos = pos_flow r.rl in + let neg = neg_flow r.rl in + let interproc = C.union (pos_neg_flow r.rl) (C.union pos neg) + in + C.union ((U.deref(r.rl)).aliases) interproc + end + | _ -> raise (Inconsistent ("points_to")) + end + with + | No_contents -> + begin + match (U.deref t) with + | Var v -> rebuild_flow v.v_cache + | _ -> raise (Inconsistent ("points_to")) + end + and rebuild_flow (c : cache) : constantset = + if (List.mem c (!visited_caches) ) (* cyclic instantiations *) + then + begin + (* visited_caches := List.tl (!visited_caches); *) (* check *) + C.empty + end + else + begin + visited_caches := c :: (!visited_caches); + let result = ref (C.empty) in + H.iter (fun _ -> fun(p,t) -> + match p with + | Pos -> () + | _ -> result := C.union (!result) (points_to_tau t) + ) c; + visited_caches := List.tl (!visited_caches); + !result + end + in + if (!no_flow) then + (U.deref lv.l).aliases + else + points_to_tau (lv.contents) + +let points_to (lv : lvalue) : string list = + List.map snd (C.elements (points_to_int lv)) + +let alias_query (a_progress : bool) (lv : lvalue list) : int * int = + (0,0) (* todo *) +(* + let a_count = ref 0 in + let ptsets = List.map points_to_int lv in + let total_sets = List.length ptsets in + let counted_sets = ref 0 in + let record_alias s s' = + if (C.is_empty (C.inter s s')) + then () + else (incr a_count) + in + let rec check_alias = function + | h :: t -> + begin + List.iter (record_alias h) ptsets; + check_alias t + end + | [] -> () + in + check_alias ptsets; + !a_count +*) diff --git a/cil/src/ext/pta/steensgaard.mli b/cil/src/ext/pta/steensgaard.mli new file mode 100644 index 00000000..f009e7e0 --- /dev/null +++ b/cil/src/ext/pta/steensgaard.mli @@ -0,0 +1,71 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(***********************************************************************) +(* *) +(* *) +(* This file is currently unused by CIL. It is included in the *) +(* distribution for reference only. *) +(* *) +(* *) +(***********************************************************************) + +type lvalue +type tau +val debug : bool ref +val debug_constraints : bool ref +val print_constraints : bool ref +val no_flow : bool ref +val no_sub : bool ref +val analyze_mono : bool ref +val solve_online : bool ref +val solve_constraints : unit -> unit +val rvalue : lvalue -> tau +val deref : tau -> lvalue +val join : tau -> tau -> tau +val join_inits : tau list -> tau +val address : lvalue -> tau +val instantiate : lvalue -> int -> lvalue +val assign : lvalue -> tau -> unit +val apply : tau -> tau list -> tau +val make_function : string -> lvalue list -> tau -> tau +val make_lvalue : bool -> string -> lvalue +val bottom : unit -> tau +val return : tau -> tau -> unit +val make_fresh : string -> tau +val points_to : lvalue -> string list +val string_of_lvalue : lvalue -> string +val global_lvalue : lvalue -> bool +val alias_query : bool -> lvalue list -> int * int diff --git a/cil/src/ext/pta/uref.ml b/cil/src/ext/pta/uref.ml new file mode 100644 index 00000000..53f36400 --- /dev/null +++ b/cil/src/ext/pta/uref.ml @@ -0,0 +1,94 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +exception Bad_find + +type 'a urefC = + Ecr of 'a * int + | Link of 'a uref +and 'a uref = 'a urefC ref + +let rec find p = + match !p with + | Ecr _ -> p + | Link p' -> + let p'' = find p' + in p := Link p''; p'' + +let uref x = ref (Ecr(x,0)) + +let equal (p,p') = (find p == find p') + +let deref p = + match ! (find p) with + | Ecr (x,_) -> x + | _ -> raise Bad_find + +let update (p,x) = + let p' = find p + in + match !p' with + | Ecr (_,rank) -> p' := Ecr(x,rank) + | _ -> raise Bad_find + +let unify f (p,q) = + let p',q' = find p, find q in + match (!p',!q') with + | (Ecr(px,pr),Ecr(qx,qr)) -> + let x = f(px,qx) in + if (p' == q') then + p' := Ecr(x,pr) + else if pr == qr then + (q' := Ecr(x,qr+1); p' := Link q') + else if pr < qr then + (q' := Ecr(x,qr); p' := Link q') + else (* pr > qr *) + (p' := Ecr(x,pr); q' := Link p') + | _ -> raise Bad_find + +let union (p,q) = + let p',q' = find p, find q in + match (!p',!q') with + | (Ecr(px,pr),Ecr(qx,qr)) -> + if (p' == q') then + () + else if pr == qr then + (q' := Ecr(qx, qr+1); p' := Link q') + else if pr < qr then + p' := Link q' + else (* pr > qr *) + q' := Link p' + | _ -> raise Bad_find + + diff --git a/cil/src/ext/pta/uref.mli b/cil/src/ext/pta/uref.mli new file mode 100644 index 00000000..1dee5036 --- /dev/null +++ b/cil/src/ext/pta/uref.mli @@ -0,0 +1,65 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +type 'a uref + +(** Union-find with union by rank and path compression + + This is an implementation of Tarjan's union-find data structure using + generics. The interface is analagous to standard references, with the + addition of a union operation which makes two references indistinguishable. + +*) + +val uref: 'a -> 'a uref + (** Create a new uref *) + +val equal: 'a uref * 'a uref -> bool + (** Test whether two urefs share the same equivalence class *) + +val deref: 'a uref -> 'a + (** Extract the contents of this reference *) + +val update: 'a uref * 'a -> unit + (** Update the value stored in this reference *) + +val unify: ('a * 'a -> 'a) -> 'a uref * 'a uref -> unit + (** [unify f (p,q)] unifies references [p] and [q], making them + indistinguishable. The contents of the reference are the result of + [f] *) + +val union: 'a uref * 'a uref -> unit + (** [unify (p,q)] unifies references [p] and [q], making them + indistinguishable. The contents of the reference are the contents of + one of the first or second arguments (unspecified) *) diff --git a/cil/src/ext/reachingdefs.ml b/cil/src/ext/reachingdefs.ml new file mode 100644 index 00000000..b6af37cb --- /dev/null +++ b/cil/src/ext/reachingdefs.ml @@ -0,0 +1,511 @@ +(* Calculate reaching definitions for each instruction. + * Determine when it is okay to replace some variables with + * expressions. + * + * After calling computeRDs on a fundec, + * ReachingDef.stmtStartData will contain a mapping from + * statement ids to data about which definitions reach each + * statement. ReachingDef.defIdStmtHash will contain a + * mapping from definition ids to the statement in which + * that definition takes place. + * + * instrRDs takes a list of instructions, and the + * definitions that reach the first instruction, and + * for each instruction figures out which definitions + * reach into or out of each instruction. + * + *) + +open Cil +open Pretty + +module E = Errormsg +module DF = Dataflow +module UD = Usedef +module IH = Inthash +module U = Util +module S = Stats + +let debug_fn = ref "" + +module IOS = + Set.Make(struct + type t = int option + let compare io1 io2 = + match io1, io2 with + Some i1, Some i2 -> Pervasives.compare i1 i2 + | Some i1, None -> 1 + | None, Some i2 -> -1 + | None, None -> 0 + end) + +let debug = ref false + +(* return the intersection of + Inthashes ih1 and ih2 *) +let ih_inter ih1 ih2 = + let ih' = IH.copy ih1 in + IH.iter (fun id vi -> + if not(IH.mem ih2 id) then + IH.remove ih' id else + ()) ih1; + ih' + +let ih_union ih1 ih2 = + let ih' = IH.copy ih1 in + IH.iter (fun id vi -> + if not(IH.mem ih' id) + then IH.add ih' id vi + else ()) ih2; + ih' + +(* Lookup varinfo in iosh. If the set contains None + or is not a singleton, return None, otherwise + return Some of the singleton *) +(* IOS.t IH.t -> varinfo -> int option *) +let iosh_singleton_lookup iosh vi = + if IH.mem iosh vi.vid then + let ios = IH.find iosh vi.vid in + if not (IOS.cardinal ios = 1) then None + else IOS.choose ios + else None + +(* IOS.t IH.t -> varinfo -> IOS.t *) +let iosh_lookup iosh vi = + if IH.mem iosh vi.vid + then Some(IH.find iosh vi.vid) + else None + +(* return Some(vid) if iosh contains defId. + return None otherwise *) +(* IOS.t IH.t -> int -> int option *) +let iosh_defId_find iosh defId = + (* int -> IOS.t -> int option -> int option*) + let get_vid vid ios io = + match io with + Some(i) -> Some(i) + | None -> + let there = IOS.exists + (function None -> false + | Some(i') -> defId = i') ios in + if there then Some(vid) else None + in + IH.fold get_vid iosh None + +(* The resulting iosh will contain the + union of the same entries from iosh1 and + iosh2. If iosh1 has an entry that iosh2 + does not, then the result will contain + None in addition to the things from the + entry in iosh1. *) +(* XXX this function is a performance bottleneck *) +let iosh_combine iosh1 iosh2 = + let iosh' = IH.copy iosh1 in + IH.iter (fun id ios1 -> + try let ios2 = IH.find iosh2 id in + let newset = IOS.union ios1 ios2 in + IH.replace iosh' id newset; + with Not_found -> + let newset = IOS.add None ios1 in + IH.replace iosh' id newset) iosh1; + IH.iter (fun id ios2 -> + if not(IH.mem iosh1 id) then + let newset = IOS.add None ios2 in + IH.add iosh' id newset) iosh2; + iosh' + + +(* determine if two IOS.t IH.t s are the same *) +let iosh_equals iosh1 iosh2 = +(* if IH.length iosh1 = 0 && not(IH.length iosh2 = 0) || + IH.length iosh2 = 0 && not(IH.length iosh1 = 0)*) + if not(IH.length iosh1 = IH.length iosh2) + then + (if !debug then ignore(E.log "iosh_equals: length not same\n"); + false) + else + IH.fold (fun vid ios b -> + if not b then b else + try let ios2 = IH.find iosh2 vid in + if not(IOS.compare ios ios2 = 0) then + (if !debug then ignore(E.log "iosh_equals: sets for vid %d not equal\n" vid); + false) + else true + with Not_found -> + (if !debug then ignore(E.log "iosh_equals: vid %d not in iosh2\n" vid); + false)) iosh1 true + +(* replace an entire set with a singleton. + if nothing was there just add the singleton *) +(* IOS.t IH.t -> int -> varinfo -> unit *) +let iosh_replace iosh i vi = + if IH.mem iosh vi.vid then + let newset = IOS.singleton (Some i) in + IH.replace iosh vi.vid newset + else + let newset = IOS.singleton (Some i) in + IH.add iosh vi.vid newset + +(* remove definitions that are killed. + add definitions that are gend *) +(* Takes the defs, the data, and a function for + obtaining the next def id *) +(* VS.t -> IOS.t IH.t -> (unit->int) -> unit *) +let proc_defs vs iosh f = + let pd vi = + let newi = f() in + (*if !debug then + ignore (E.log "proc_defs: genning %d\n" newi);*) + iosh_replace iosh newi vi + in + UD.VS.iter pd vs + +let idMaker () start = + let counter = ref start in + fun () -> + let ret = !counter in + counter := !counter + 1; + ret + +(* given reaching definitions into a list of + instructions, figure out the definitions that + reach in/out of each instruction *) +(* if out is true then calculate the definitions that + go out of each instruction, if it is false then + calculate the definitions reaching into each instruction *) +(* instr list -> int -> (varinfo IH.t * int) -> bool -> (varinfo IH.t * int) list *) +let iRDsHtbl = Hashtbl.create 128 +let instrRDs il sid (ivih, s, iosh) out = + if Hashtbl.mem iRDsHtbl (sid,out) then Hashtbl.find iRDsHtbl (sid,out) else + +(* let print_instr i (_,s', iosh') = *) +(* let d = d_instr () i ++ line in *) +(* fprint stdout 80 d; *) +(* flush stdout *) +(* in *) + + let proc_one hil i = + match hil with + | [] -> + let _, defd = UD.computeUseDefInstr i in + if UD.VS.is_empty defd + then ((*if !debug then print_instr i ((), s, iosh);*) + [((), s, iosh)]) + else + let iosh' = IH.copy iosh in + proc_defs defd iosh' (idMaker () s); + (*if !debug then + print_instr i ((), s + UD.VS.cardinal defd, iosh');*) + ((), s + UD.VS.cardinal defd, iosh')::hil + | (_, s', iosh')::hrst as l -> + let _, defd = UD.computeUseDefInstr i in + if UD.VS.is_empty defd + then + ((*if !debug then + print_instr i ((),s', iosh');*) + ((), s', iosh')::l) + else let iosh'' = IH.copy iosh' in + proc_defs defd iosh'' (idMaker () s'); + (*if !debug then + print_instr i ((), s' + UD.VS.cardinal defd, iosh'');*) + ((),s' + UD.VS.cardinal defd, iosh'')::l + in + let folded = List.fold_left proc_one [((),s,iosh)] il in + let foldedout = List.tl (List.rev folded) in + let foldednotout = List.rev (List.tl folded) in + Hashtbl.add iRDsHtbl (sid,true) foldedout; + Hashtbl.add iRDsHtbl (sid,false) foldednotout; + if out then foldedout else foldednotout + + + +(* The right hand side of an assignment is either + a function call or an expression *) +type rhs = RDExp of exp | RDCall of instr + +(* take the id number of a definition and return + the rhs of the definition if there is one. + Returns None if, for example, the definition is + caused by an assembly instruction *) +(* stmt IH.t -> (()*int*IOS.t IH.t) IH.t -> int -> (rhs * int * IOS.t IH.t) option *) +let rhsHtbl = IH.create 64 (* to avoid recomputation *) +let getDefRhs didstmh stmdat defId = + if IH.mem rhsHtbl defId then IH.find rhsHtbl defId else + let stm = + try IH.find didstmh defId + with Not_found -> E.s (E.error "getDefRhs: defId %d not found\n" defId) in + let (_,s,iosh) = + try IH.find stmdat stm.sid + with Not_found -> E.s (E.error "getDefRhs: sid %d not found \n" stm.sid) in + match stm.skind with + Instr il -> + let ivihl = instrRDs il stm.sid ((),s,iosh) true in (* defs that reach out of each instr *) + let ivihl_in = instrRDs il stm.sid ((),s,iosh) false in (* defs that reach into each instr *) + let iihl = List.combine (List.combine il ivihl) ivihl_in in + (try let ((i,(_,_,diosh)),(_,_,iosh_in)) = List.find (fun ((i,(_,_,iosh')),_) -> + match S.time "iosh_defId_find" (iosh_defId_find iosh') defId with + Some vid -> + (match i with + Set((Var vi',NoOffset),_,_) -> vi'.vid = vid (* _ -> NoOffset *) + | Call(Some(Var vi',NoOffset),_,_,_) -> vi'.vid = vid (* _ -> NoOffset *) + | Call(None,_,_,_) -> false + | Asm(_,_,sll,_,_,_) -> List.exists + (function (_,(Var vi',NoOffset)) -> vi'.vid = vid | _ -> false) sll + | _ -> false) + | None -> false) iihl in + (match i with + Set((lh,_),e,_) -> + (match lh with + Var(vi') -> + (IH.add rhsHtbl defId (Some(RDExp(e),stm.sid,iosh_in)); + Some(RDExp(e), stm.sid, iosh_in)) + | _ -> E.s (E.error "Reaching Defs getDefRhs: right vi not first\n")) + | Call(lvo,e,el,_) -> + (IH.add rhsHtbl defId (Some(RDCall(i),stm.sid,iosh_in)); + Some(RDCall(i), stm.sid, iosh_in)) + | Asm(a,sl,slvl,sel,sl',_) -> None) (* ? *) + with Not_found -> + (if !debug then ignore (E.log "getDefRhs: No instruction defines %d\n" defId); + IH.add rhsHtbl defId None; + None)) + | _ -> E.s (E.error "getDefRhs: defining statement not an instruction list %d\n" defId) + (*None*) + +let prettyprint didstmh stmdat () (_,s,iosh) = text "" + (*seq line (fun (vid,ios) -> + num vid ++ text ": " ++ + IOS.fold (fun io d -> match io with + None -> d ++ text "None " + | Some i -> + let stm = IH.find didstmh i in + match getDefRhs didstmh stmdat i with + None -> d ++ num i + | Some(RDExp(e),_,_) -> + d ++ num i ++ text " " ++ (d_exp () e) + | Some(RDCall(c),_,_) -> + d ++ num i ++ text " " ++ (d_instr () c)) + ios nil) + (IH.tolist iosh)*) + +module ReachingDef = + struct + + let name = "Reaching Definitions" + + let debug = debug + + (* Should the analysis calculate may-reach + or must-reach *) + let mayReach = ref false + + + (* An integer that tells the id number of + the first definition *) + (* Also a hash from variable ids to a set of + definition ids that reach this statement. + None means there is a path to this point on which + there is no definition of the variable *) + type t = (unit * int * IOS.t IH.t) + + let copy (_, i, iosh) = ((), i, IH.copy iosh) + + (* entries for starting statements must + be added before calling compute *) + let stmtStartData = IH.create 32 + + (* a mapping from definition ids to + the statement corresponding to that id *) + let defIdStmtHash = IH.create 32 + + (* mapping from statement ids to statements + for better performance of ok_to_replace *) + let sidStmtHash = IH.create 64 + + (* pretty printer *) + let pretty = prettyprint defIdStmtHash stmtStartData + + + (* The first id to use when computeFirstPredecessor + is next called *) + let nextDefId = ref 0 + + (* Count the number of variable definitions in + a statement *) + let num_defs stm = + match stm.skind with + Instr(il) -> List.fold_left (fun s i -> + let _, d = UD.computeUseDefInstr i in + s + UD.VS.cardinal d) 0 il + | _ -> let _, d = UD.computeUseDefStmtKind stm.skind in + UD.VS.cardinal d + + (* the first predecessor is just the data in along with + the id of the first definition of the statement, + which we get from nextDefId *) + let computeFirstPredecessor stm (_, s, iosh) = + let startDefId = max !nextDefId s in + let numds = num_defs stm in + let rec loop n = + if n < 0 + then () + else + (if !debug then + ignore (E.log "RD: defId %d -> stm %d\n" (startDefId + n) stm.sid); + IH.add defIdStmtHash (startDefId + n) stm; + loop (n-1)) + in + loop (numds - 1); + nextDefId := startDefId + numds; + ((), startDefId, IH.copy iosh) + + + let combinePredecessors (stm:stmt) ~(old:t) ((_, s, iosh):t) = + match old with (_, os, oiosh) -> + if S.time "iosh_equals" (iosh_equals oiosh) iosh then None else + Some((), os, S.time "iosh_combine" (iosh_combine oiosh) iosh) + + (* return an action that removes things that + are redefinied and adds the generated defs *) + let doInstr inst (_, s, iosh) = + let transform (_, s', iosh') = + let _, defd = UD.computeUseDefInstr inst in + proc_defs defd iosh' (idMaker () s'); + ((), s' + UD.VS.cardinal defd, iosh') + in + DF.Post transform + + (* all the work gets done at the instruction level *) + let doStmt stm (_, s, iosh) = + if not(IH.mem sidStmtHash stm.sid) then + IH.add sidStmtHash stm.sid stm; + if !debug then ignore(E.log "RD: looking at %a\n" d_stmt stm); + DF.SDefault + + let doGuard condition _ = DF.GDefault + + let filterStmt stm = true + +end + +module RD = DF.ForwardsDataFlow(ReachingDef) + +(* map all variables in vil to a set containing + None in iosh *) +(* IOS.t IH.t -> varinfo list -> () *) +let iosh_none_fill iosh vil = + List.iter (fun vi -> + IH.add iosh vi.vid (IOS.singleton None)) + vil + +(* Computes the reaching definitions for a + function. *) +(* Cil.fundec -> unit *) +let computeRDs fdec = + try + if compare fdec.svar.vname (!debug_fn) = 0 then + (debug := true; + ignore (E.log "%s =\n%a\n" (!debug_fn) d_block fdec.sbody)); + let bdy = fdec.sbody in + let slst = bdy.bstmts in + let _ = IH.clear ReachingDef.stmtStartData in + let _ = IH.clear ReachingDef.defIdStmtHash in + let _ = IH.clear rhsHtbl in + let _ = Hashtbl.clear iRDsHtbl in + let _ = ReachingDef.nextDefId := 0 in + let fst_stm = List.hd slst in + let fst_iosh = IH.create 32 in + let _ = UD.onlyNoOffsetsAreDefs := false in + (*let _ = iosh_none_fill fst_iosh fdec.sformals in*) + let _ = IH.add ReachingDef.stmtStartData fst_stm.sid ((), 0, fst_iosh) in + let _ = ReachingDef.computeFirstPredecessor fst_stm ((), 0, fst_iosh) in + if !debug then + ignore (E.log "computeRDs: fst_stm.sid=%d\n" fst_stm.sid); + RD.compute [fst_stm]; + if compare fdec.svar.vname (!debug_fn) = 0 then + debug := false + (* now ReachingDef.stmtStartData has the reaching def data in it *) + with Failure "hd" -> if compare fdec.svar.vname (!debug_fn) = 0 then + debug := false + +(* return the definitions that reach the statement + with statement id sid *) +let getRDs sid = + try + Some (IH.find ReachingDef.stmtStartData sid) + with Not_found -> + None +(* E.s (E.error "getRDs: sid %d not found\n" sid) *) + +let getDefIdStmt defid = + try + Some(IH.find ReachingDef.defIdStmtHash defid) + with Not_found -> + None + +let getStmt sid = + try Some(IH.find ReachingDef.sidStmtHash sid) + with Not_found -> None + +(* Pretty print the reaching definition data for + a function *) +let ppFdec fdec = + seq line (fun stm -> + let ivih = IH.find ReachingDef.stmtStartData stm.sid in + ReachingDef.pretty () ivih) fdec.sbody.bstmts + + +(* If this class is extended with a visitor on expressions, + then the current rd data is available at each expression *) +class rdVisitorClass = object (self) + inherit nopCilVisitor + + (* the statement being worked on *) + val mutable sid = -1 + + (* if a list of instructions is being processed, + then this is the corresponding list of + reaching definitions *) + val mutable rd_dat_lst = [] + + (* these are the reaching defs for the current + instruction if there is one *) + val mutable cur_rd_dat = None + + method vstmt stm = + sid <- stm.sid; + match getRDs sid with + None -> + if !debug then ignore(E.log "rdVis: stm %d had no data\n" sid); + cur_rd_dat <- None; + DoChildren + | Some(_,s,iosh) -> + match stm.skind with + Instr il -> + if !debug then ignore(E.log "rdVis: visit il\n"); + rd_dat_lst <- instrRDs il stm.sid ((),s,iosh) false; + DoChildren + | _ -> + if !debug then ignore(E.log "rdVis: visit non-il\n"); + cur_rd_dat <- None; + DoChildren + + method vinst i = + if !debug then ignore(E.log "rdVis: before %a, rd_dat_lst is %d long\n" + d_instr i (List.length rd_dat_lst)); + try + cur_rd_dat <- Some(List.hd rd_dat_lst); + rd_dat_lst <- List.tl rd_dat_lst; + DoChildren + with Failure "hd" -> + if !debug then ignore(E.log "rdVis: il rd_dat_lst mismatch\n"); + DoChildren + + method get_cur_iosh () = + match cur_rd_dat with + None -> (match getRDs sid with + None -> None + | Some(_,_,iosh) -> Some iosh) + | Some(_,_,iosh) -> Some iosh + +end + diff --git a/cil/src/ext/sfi.ml b/cil/src/ext/sfi.ml new file mode 100755 index 00000000..9886526c --- /dev/null +++ b/cil/src/ext/sfi.ml @@ -0,0 +1,337 @@ +(* + * + * Copyright (c) 2005, + * George C. Necula + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(** This is a module that inserts runtime checks for memory reads/writes and + * allocations *) + +open Pretty +open Cil +module E = Errormsg +module H = Hashtbl + +let doSfi = ref false +let doSfiReads = ref false +let doSfiWrites = ref true + +(* A number of functions to be skipped *) +let skipFunctions : (string, unit) H.t = H.create 13 +let mustSfiFunction (f: fundec) : bool = + not (H.mem skipFunctions f.svar.vname) + +(** Some functions are known to be allocators *) +type dataLocation = + InResult (* Interesting data is in the return value *) + | InArg of int (* in the nth argument. Starts from 1. *) + | InArgTimesArg of int * int (* (for size) data is the product of two + * arguments *) + | PointedToByArg of int (* pointed to by nth argument *) + +(** Compute the data based on the location and the actual argument list *) +let extractData (dl: dataLocation) (args: exp list) (res: lval option) : exp = + let getArg (n: int) = + try List.nth args (n - 1) (* Args are based at 1 *) + with _ -> E.s (E.bug "Cannot extract argument %d at %a" + n d_loc !currentLoc) + in + match dl with + InResult -> begin + match res with + None -> + E.s (E.bug "Cannot extract InResult data (at %a)" d_loc !currentLoc) + | Some r -> Lval r + end + | InArg n -> getArg n + | InArgTimesArg (n1, n2) -> + let a1 = getArg n1 in + let a2 = getArg n2 in + BinOp(Mult, mkCast ~e:a1 ~newt:longType, + mkCast ~e:a2 ~newt:longType, longType) + | PointedToByArg n -> + let a = getArg n in + Lval (mkMem a NoOffset) + + + +(* for each allocator, where is the length and where is the result *) +let allocators: (string, (dataLocation * dataLocation)) H.t = H.create 13 +let _ = + H.add allocators "malloc" (InArg 1, InResult); + H.add allocators "calloc" (InArgTimesArg (1, 2), InResult); + H.add allocators "realloc" (InArg 2, InResult) + +(* for each deallocator, where is the data being deallocated *) +let deallocators: (string, dataLocation) H.t = H.create 13 +let _= + H.add deallocators "free" (InArg 1); + H.add deallocators "realloc" (InArg 1) + +(* Returns true if the given lvalue offset ends in a bitfield access. *) +let rec is_bitfield lo = match lo with + | NoOffset -> false + | Field(fi,NoOffset) -> not (fi.fbitfield = None) + | Field(_,lo) -> is_bitfield lo + | Index(_,lo) -> is_bitfield lo + +(* Return an expression that evaluates to the address of the given lvalue. + * For most lvalues, this is merely AddrOf(lv). However, for bitfields + * we do some offset gymnastics. + *) +let addr_of_lv (lv: lval) = + let lh, lo = lv in + if is_bitfield lo then begin + (* we figure out what the address would be without the final bitfield + * access, and then we add in the offset of the bitfield from the + * beginning of its enclosing comp *) + let rec split_offset_and_bitfield lo = match lo with + | NoOffset -> failwith "logwrites: impossible" + | Field(fi,NoOffset) -> (NoOffset,fi) + | Field(e,lo) -> let a,b = split_offset_and_bitfield lo in + ((Field(e,a)),b) + | Index(e,lo) -> let a,b = split_offset_and_bitfield lo in + ((Index(e,a)),b) + in + let new_lv_offset, bf = split_offset_and_bitfield lo in + let new_lv = (lh, new_lv_offset) in + let enclosing_type = TComp(bf.fcomp, []) in + let bits_offset, bits_width = + bitsOffset enclosing_type (Field(bf,NoOffset)) in + let bytes_offset = bits_offset / 8 in + let lvPtr = mkCast ~e:(mkAddrOf (new_lv)) ~newt:(charPtrType) in + (BinOp(PlusPI, lvPtr, (integer bytes_offset), ulongType)) + end else + (mkAddrOf (lh,lo)) + + +let mustLogLval (forwrite: bool) (lv: lval) : bool = + match lv with + Var v, off -> (* Inside a variable. We assume the array offsets are fine *) + false + | Mem e, off -> + if forwrite && not !doSfiWrites then + false + else if not forwrite && not !doSfiReads then + false + + (* If this is an lval of function type, we do not log it *) + else if isFunctionType (typeOfLval lv) then + false + else + true + +(* Create prototypes for the logging functions *) +let mkProto (name: string) (args: (string * typ * attributes) list) = + let fdec = emptyFunction name in + fdec.svar.vtype <- TFun(voidType, + Some args, false, []); + fdec + + +let logReads = mkProto "logRead" [ ("addr", voidPtrType, []); + ("what", charPtrType, []); + ("file", charPtrType, []); + ("line", intType, []) ] +let callLogRead (lv: lval) = + let what = Pretty.sprint 80 (d_lval () lv) in + Call(None, + Lval(Var(logReads.svar),NoOffset), + [ addr_of_lv lv; mkString what; mkString !currentLoc.file; + integer !currentLoc.line], !currentLoc ) + +let logWrites = mkProto "logWrite" [ ("addr", voidPtrType, []); + ("what", charPtrType, []); + ("file", charPtrType, []); + ("line", intType, []) ] +let callLogWrite (lv: lval) = + let what = Pretty.sprint 80 (d_lval () lv) in + Call(None, + Lval(Var(logWrites.svar), NoOffset), + [ addr_of_lv lv; mkString what; mkString !currentLoc.file; + integer !currentLoc.line], !currentLoc ) + +let logStackFrame = mkProto "logStackFrame" [ ("func", charPtrType, []) ] +let callLogStack (fname: string) = + Call(None, + Lval(Var(logStackFrame.svar), NoOffset), + [ mkString fname; ], !currentLoc ) + +let logAlloc = mkProto "logAlloc" [ ("addr", voidPtrType, []); + ("size", intType, []); + ("file", charPtrType, []); + ("line", intType, []) ] +let callLogAlloc (szloc: dataLocation) + (resLoc: dataLocation) + (args: exp list) + (res: lval option) = + let sz = extractData szloc args res in + let res = extractData resLoc args res in + Call(None, + Lval(Var(logAlloc.svar), NoOffset), + [ res; sz; mkString !currentLoc.file; + integer !currentLoc.line ], !currentLoc ) + + +let logFree = mkProto "logFree" [ ("addr", voidPtrType, []); + ("file", charPtrType, []); + ("line", intType, []) ] +let callLogFree (dataloc: dataLocation) + (args: exp list) + (res: lval option) = + let data = extractData dataloc args res in + Call(None, + Lval(Var(logFree.svar), NoOffset), + [ data; mkString !currentLoc.file; + integer !currentLoc.line ], !currentLoc ) + +class sfiVisitorClass : Cil.cilVisitor = object (self) + inherit nopCilVisitor + + method vexpr (e: exp) : exp visitAction = + match e with + Lval lv when mustLogLval false lv -> (* A read *) + self#queueInstr [ callLogRead lv ]; + DoChildren + + | _ -> DoChildren + + + method vinst (i: instr) : instr list visitAction = + match i with + Set(lv, e, l) when mustLogLval true lv -> + self#queueInstr [ callLogWrite lv ]; + DoChildren + + | Call(lvo, f, args, l) -> + (* Instrument the write *) + (match lvo with + Some lv when mustLogLval true lv -> + self#queueInstr [ callLogWrite lv ] + | _ -> ()); + (* Do the expressions in the call, and then see if we need to + * instrument the function call *) + ChangeDoChildrenPost + ([i], + (fun il -> + currentLoc := l; + match f with + Lval (Var fv, NoOffset) -> begin + (* Is it an allocator? *) + try + let szloc, resloc = H.find allocators fv.vname in + il @ [callLogAlloc szloc resloc args lvo] + with Not_found -> begin + (* Is it a deallocator? *) + try + let resloc = H.find deallocators fv.vname in + il @ [ callLogFree resloc args lvo ] + with Not_found -> + il + end + end + | _ -> il)) + + | _ -> DoChildren + + method vfunc (fdec: fundec) = + (* Instead a stack log at the start of a function *) + ChangeDoChildrenPost + (fdec, + fun fdec -> + fdec.sbody <- + mkBlock + [ mkStmtOneInstr (callLogStack fdec.svar.vname); + mkStmt (Block fdec.sbody) ]; + fdec) + +end + +let doit (f: file) = + let sfiVisitor = new sfiVisitorClass in + let compileLoc (l: location) = function + ACons("inres", []) -> InResult + | ACons("inarg", [AInt n]) -> InArg n + | ACons("inargxarg", [AInt n1; AInt n2]) -> InArgTimesArg (n1, n2) + | ACons("pointedby", [AInt n]) -> PointedToByArg n + | _ -> E.warn "Invalid location at %a" d_loc l; + InResult + in + iterGlobals f + (fun glob -> + match glob with + GFun(fdec, _) when mustSfiFunction fdec -> + ignore (visitCilFunction sfiVisitor fdec) + | GPragma(Attr("sfiignore", al), l) -> + List.iter + (function AStr fn -> H.add skipFunctions fn () + | _ -> E.warn "Invalid argument in \"sfiignore\" pragma at %a" + d_loc l) + al + + | GPragma(Attr("sfialloc", al), l) -> begin + match al with + AStr fname :: locsz :: locres :: [] -> + H.add allocators fname (compileLoc l locsz, compileLoc l locres) + | _ -> E.warn "Invalid sfialloc pragma at %a" d_loc l + end + + | GPragma(Attr("sfifree", al), l) -> begin + match al with + AStr fname :: locwhat :: [] -> + H.add deallocators fname (compileLoc l locwhat) + | _ -> E.warn "Invalid sfifree pragma at %a" d_loc l + end + + + | _ -> ()); + (* Now add the prototypes for the instrumentation functions *) + f.globals <- + GVarDecl (logReads.svar, locUnknown) :: + GVarDecl (logWrites.svar, locUnknown) :: + GVarDecl (logStackFrame.svar, locUnknown) :: + GVarDecl (logAlloc.svar, locUnknown) :: + GVarDecl (logFree.svar, locUnknown) :: f.globals + + +let feature : featureDescr = + { fd_name = "sfi"; + fd_enabled = doSfi; + fd_description = "instrument memory operations"; + fd_extraopt = [ + "--sfireads", Arg.Set doSfiReads, "SFI for reads"; + "--sfiwrites", Arg.Set doSfiWrites, "SFI for writes"; + ]; + fd_doit = doit; + fd_post_check = true; + } + diff --git a/cil/src/ext/simplemem.ml b/cil/src/ext/simplemem.ml new file mode 100644 index 00000000..1b27815c --- /dev/null +++ b/cil/src/ext/simplemem.ml @@ -0,0 +1,132 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* + * Simplemem: Transform a program so that all memory expressions are + * "simple". Introduce well-typed temporaries to hold intermediate values + * for expressions that would normally involve more than one memory + * reference. + * + * If simplemem succeeds, each lvalue should contain only one Mem() + * constructor. + *) +open Cil + +(* current context: where should we put our temporaries? *) +let thefunc = ref None + +(* build up a list of assignments to temporary variables *) +let assignment_list = ref [] + +(* turn "int a[5][5]" into "int ** temp" *) +let rec array_to_pointer tau = + match unrollType tau with + TArray(dest,_,al) -> TPtr(array_to_pointer dest,al) + | _ -> tau + +(* create a temporary variable in the current function *) +let make_temp tau = + let tau = array_to_pointer tau in + match !thefunc with + Some(fundec) -> makeTempVar fundec ~name:("mem_") tau + | None -> failwith "simplemem: temporary needed outside a function" + +(* separate loffsets into "scalar addition parts" and "memory parts" *) +let rec separate_loffsets lo = + match lo with + NoOffset -> NoOffset, NoOffset + | Field(fi,rest) -> + let s,m = separate_loffsets rest in + Field(fi,s) , m + | Index(_) -> NoOffset, lo + +(* Recursively decompose the lvalue so that what is under a "Mem()" + * constructor is put into a temporary variable. *) +let rec handle_lvalue (lb,lo) = + let s,m = separate_loffsets lo in + match lb with + Var(vi) -> + handle_loffset (lb,s) m + | Mem(Lval(Var(_),NoOffset)) -> + (* special case to avoid generating "tmp = ptr;" *) + handle_loffset (lb,s) m + | Mem(e) -> + begin + let new_vi = make_temp (typeOf e) in + assignment_list := (Set((Var(new_vi),NoOffset),e,!currentLoc)) + :: !assignment_list ; + handle_loffset (Mem(Lval(Var(new_vi),NoOffset)),NoOffset) lo + end +and handle_loffset lv lo = + match lo with + NoOffset -> lv + | Field(f,o) -> handle_loffset (addOffsetLval (Field(f,NoOffset)) lv) o + | Index(exp,o) -> handle_loffset (addOffsetLval (Index(exp,NoOffset)) lv) o + +(* the transformation is implemented as a Visitor *) +class simpleVisitor = object + inherit nopCilVisitor + + method vfunc fundec = (* we must record the current context *) + thefunc := Some(fundec) ; + DoChildren + + method vlval lv = ChangeDoChildrenPost(lv, + (fun lv -> handle_lvalue lv)) + + method unqueueInstr () = + let result = List.rev !assignment_list in + assignment_list := [] ; + result +end + +(* Main entry point: apply the transformation to a file *) +let simplemem (f : file) = + try + visitCilFileSameGlobals (new simpleVisitor) f; + f + with e -> Printf.printf "Exception in Simplemem.simplemem: %s\n" + (Printexc.to_string e) ; raise e + +let feature : featureDescr = + { fd_name = "simpleMem"; + fd_enabled = Cilutil.doSimpleMem; + fd_description = "simplify all memory expressions" ; + fd_extraopt = []; + fd_doit = (function (f: file) -> ignore (simplemem f)) ; + fd_post_check = true; + } diff --git a/cil/src/ext/simplify.ml b/cil/src/ext/simplify.ml new file mode 100755 index 00000000..776d4916 --- /dev/null +++ b/cil/src/ext/simplify.ml @@ -0,0 +1,845 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * Sumit Gulwani + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* This module simplifies the expressions in a program in the following ways: + +1. All expressions are either + + basic::= + Const _ + Addrof(Var v, NoOffset) + StartOf(Var v, NoOffset) + Lval(Var v, off), where v is a variable whose address is not taken + and off contains only "basic" + + exp::= + basic + Lval(Mem basic, NoOffset) + BinOp(bop, basic, basic) + UnOp(uop, basic) + CastE(t, basic) + + lval ::= + Mem basic, NoOffset + Var v, off, where v is a variable whose address is not taken and off + contains only "basic" + + - all sizeof and alignof are turned into constants + - accesses to variables whose address is taken is turned into "Mem" accesses + - same for accesses to arrays + - all field and index computations are turned into address arithmetic, + including bitfields. + +*) + + +open Pretty +open Cil +module E = Errormsg +module H = Hashtbl + +type taExp = exp (* Three address expression *) +type bExp = exp (* Basic expression *) + +let debug = true + +(* Whether to split structs *) +let splitStructs = ref true + +let onlyVariableBasics = ref false +let noStringConstantsBasics = ref false + +exception BitfieldAccess + +(* Turn an expression into a three address expression (and queue some + * instructions in the process) *) +let rec makeThreeAddress + (setTemp: taExp -> bExp) (* Given an expression save it into a temp and + * return that temp *) + (e: exp) : taExp = + match e with + SizeOf _ | SizeOfE _ | AlignOf _ | AlignOfE _ | SizeOfStr _ -> + constFold true e + | Const _ -> e + | AddrOf (Var _, NoOffset) -> e + | Lval lv -> Lval (simplifyLval setTemp lv) + | BinOp(bo, e1, e2, tres) -> + BinOp(bo, makeBasic setTemp e1, makeBasic setTemp e2, tres) + | UnOp(uo, e1, tres) -> + UnOp(uo, makeBasic setTemp e1, tres) + | CastE(t, e) -> + CastE(t, makeBasic setTemp e) + | AddrOf lv -> begin + match simplifyLval setTemp lv with + Mem a, NoOffset -> a + | _ -> (* This is impossible, because we are taking the address + * of v and simplifyLval should turn it into a Mem, except if the + * sizeof has failed. *) + E.s (bug "Simplify: makeThreeAddress for AddrOf(LV=%a, LVT=%a)" + d_lval lv d_type (typeOfLval lv)) + end + | StartOf lv -> + makeThreeAddress setTemp (AddrOf (addOffsetLval (Index(zero, NoOffset)) + lv)) + +(* Make a basic expression *) +and makeBasic (setTemp: taExp -> bExp) (e: exp) : bExp = + let dump = false (* !currentLoc.line = 395 *) in + if dump then + ignore (E.log "makeBasic %a\n" d_plainexp e); + (* Make it a three address expression first *) + let e' = makeThreeAddress setTemp e in + if dump then + ignore (E.log " e'= %a\n" d_plainexp e); + (* See if it is a basic one *) + match e' with + | Lval (Var _, _) -> e' + | Const _ | AddrOf (Var _, NoOffset) | StartOf (Var _, NoOffset) -> + if !onlyVariableBasics then setTemp e' else e' + | SizeOf _ | SizeOfE _ | AlignOf _ | AlignOfE _ | SizeOfStr _ -> + E.s (bug "Simplify: makeBasic found SizeOf: %a" d_exp e') + + (* We cannot make a function to be Basic, unless it actually is a variable + * already. If this is a function pointer the best we can do is to make + * the address of the function basic *) + | Lval (Mem a, NoOffset) when isFunctionType (typeOf e') -> + if dump then + ignore (E.log " a function type\n"); + let a' = makeBasic setTemp a in + Lval (Mem a', NoOffset) + + | _ -> setTemp e' (* Put it into a temporary otherwise *) + + +and simplifyLval + (setTemp: taExp -> bExp) + (lv: lval) : lval = + (* Add, watching for a zero *) + let add (e1: exp) (e2: exp) = + if isZero e2 then e1 else BinOp(PlusA, e1, e2, !upointType) + in + (* Convert an offset to an integer, and possibly a residual bitfield offset*) + let rec offsetToInt + (t: typ) (* The type of the host *) + (off: offset) : exp * offset = + match off with + NoOffset -> zero, NoOffset + | Field(fi, off') -> begin + let start = + try + let start, _ = bitsOffset t (Field(fi, NoOffset)) in + start + with SizeOfError (whystr, t') -> + E.s (E.bug "%a: Cannot compute sizeof: %s: %a" + d_loc !currentLoc whystr d_type t') + in + if start land 7 <> 0 then begin + (* We have a bitfield *) + assert (off' = NoOffset); + zero, Field(fi, off') + end else begin + let next, restoff = offsetToInt fi.ftype off' in + add (integer (start / 8)) next, restoff + end + end + | Index(ei, off') -> begin + let telem = match unrollType t with + TArray(telem, _, _) -> telem + | _ -> E.s (bug "Simplify: simplifyLval: index on a non-array") + in + let next, restoff = offsetToInt telem off' in + add + (BinOp(Mult, ei, SizeOf telem, !upointType)) + next, + restoff + end + in + let tres = TPtr(typeOfLval lv, []) in + match lv with + Mem a, off -> + let offidx, restoff = offsetToInt (typeOfLval (Mem a, NoOffset)) off in + let a' = + if offidx <> zero then + add (mkCast a !upointType) offidx + else + a + in + let a' = makeBasic setTemp a' in + Mem (mkCast a' tres), restoff + + | Var v, off when v.vaddrof -> (* We are taking this variable's address *) + let offidx, restoff = offsetToInt v.vtype off in + (* We cannot call makeBasic recursively here, so we must do it + * ourselves *) + let a = mkAddrOrStartOf (Var v, NoOffset) in + let a' = + if offidx = zero then a else + add (mkCast a !upointType) (makeBasic setTemp offidx) + in + let a' = setTemp a' in + Mem (mkCast a' tres), restoff + + | Var v, off -> + (Var v, simplifyOffset setTemp off) + + +(* Simplify an offset and make sure it has only three address expressions in + * indices *) +and simplifyOffset (setTemp: taExp -> bExp) = function + NoOffset -> NoOffset + | Field(fi, off) -> Field(fi, simplifyOffset setTemp off) + | Index(ei, off) -> + let ei' = makeBasic setTemp ei in + Index(ei', simplifyOffset setTemp off) + + + + +(** This is a visitor that will turn all expressions into three address code *) +class threeAddressVisitor (fi: fundec) = object (self) + inherit nopCilVisitor + + method private makeTemp (e1: exp) : exp = + let t = makeTempVar fi (typeOf e1) in + (* Add this instruction before the current statement *) + self#queueInstr [Set(var t, e1, !currentLoc)]; + Lval(var t) + + (* We'll ensure that this gets called only for top-level expressions + * inside functions. We must turn them into three address code. *) + method vexpr (e: exp) = + let e' = makeThreeAddress self#makeTemp e in + ChangeTo e' + + + (** We want the argument in calls to be simple variables *) + method vinst (i: instr) = + match i with + Call (someo, f, args, loc) -> + let someo' = + match someo with + Some lv -> Some (simplifyLval self#makeTemp lv) + | _ -> None + in + let f' = makeBasic self#makeTemp f in + let args' = List.map (makeBasic self#makeTemp) args in + ChangeTo [ Call (someo', f', args', loc) ] + | _ -> DoChildren + + (* This method will be called only on top-level "lvals" (those on the + * left of assignments and function calls) *) + method vlval (lv: lval) = + ChangeTo (simplifyLval self#makeTemp lv) +end + +(******************** + Next is an old version of the code that was splitting structs into + * variables. It was not working on variables that are arguments or returns + * of function calls. +(** This is a visitor that splits structured variables into separate + * variables. *) +let isStructType (t: typ): bool = + match unrollType t with + TComp (ci, _) -> ci.cstruct + | _ -> false + +(* Keep track of how we change the variables. For each variable id we keep a + * hash table that maps an offset (a sequence of fieldinfo) into a + * replacement variable. We also keep track of the splittable vars: those + * with structure type but whose address is not take and which do not appear + * as the argument to a Return *) +let splittableVars: (int, unit) H.t = H.create 13 +let replacementVars: (int * offset, varinfo) H.t = H.create 13 + +let findReplacement (fi: fundec) (v: varinfo) (off: offset) : varinfo = + try + H.find replacementVars (v.vid, off) + with Not_found -> begin + let t = typeOfLval (Var v, off) in + (* make a name for this variable *) + let rec mkName = function + | Field(fi, off) -> "_" ^ fi.fname ^ mkName off + | _ -> "" + in + let v' = makeTempVar fi ~name:(v.vname ^ mkName off ^ "_") t in + H.add replacementVars (v.vid, off) v'; + if debug then + ignore (E.log "Simplify: %s (%a) replace %a with %s\n" + fi.svar.vname + d_loc !currentLoc + d_lval (Var v, off) + v'.vname); + v' + end + + (* Now separate the offset into a sequence of field accesses and the + * rest of the offset *) +let rec separateOffset (off: offset): offset * offset = + match off with + NoOffset -> NoOffset, NoOffset + | Field(fi, off') when fi.fcomp.cstruct -> + let off1, off2 = separateOffset off' in + Field(fi, off1), off2 + | _ -> NoOffset, off + + +class splitStructVisitor (fi: fundec) = object (self) + inherit nopCilVisitor + + method vlval (lv: lval) = + match lv with + Var v, off when H.mem splittableVars v.vid -> + (* The type of this lval better not be a struct *) + if isStructType (typeOfLval lv) then + E.s (unimp "Simplify: found lval of struct type %a : %a\n" + d_lval lv d_type (typeOfLval lv)); + let off1, restoff = separateOffset off in + let lv' = + if off1 <> NoOffset then begin + (* This is a splittable variable and we have an offset that makes + * it a scalar. Find the replacement variable for this *) + let v' = findReplacement fi v off1 in + if restoff = NoOffset then + Var v', NoOffset + else (* We have some more stuff. Use Mem *) + Mem (mkAddrOrStartOf (Var v', NoOffset)), restoff + end else begin (* off1 = NoOffset *) + if restoff = NoOffset then + E.s (bug "Simplify: splitStructVisitor:lval") + else + simplifyLval + (fun e1 -> + let t = makeTempVar fi (typeOf e1) in + (* Add this instruction before the current statement *) + self#queueInstr [Set(var t, e1, !currentLoc)]; + Lval(var t)) + (Mem (mkAddrOrStartOf (Var v, NoOffset)), restoff) + end + in + ChangeTo lv' + + | _ -> DoChildren + + method vinst (i: instr) = + (* Accumulate to the list of instructions a number of assignments of + * non-splittable lvalues *) + let rec accAssignment (ci: compinfo) (dest: lval) (what: lval) + (acc: instr list) : instr list = + List.fold_left + (fun acc f -> + let dest' = addOffsetLval (Field(f, NoOffset)) dest in + let what' = addOffsetLval (Field(f, NoOffset)) what in + match unrollType f.ftype with + TComp(ci, _) when ci.cstruct -> + accAssignment ci dest' what' acc + | TArray _ -> (* We must copy the array *) + (Set((Mem (AddrOf dest'), NoOffset), + Lval (Mem (AddrOf what'), NoOffset), !currentLoc)) :: acc + | _ -> (* If the type of f is not a struct then leave this alone *) + (Set(dest', Lval what', !currentLoc)) :: acc) + acc + ci.cfields + in + let doAssignment (ci: compinfo) (dest: lval) (what: lval) : instr list = + let il' = accAssignment ci dest what [] in + List.concat (List.map (visitCilInstr (self :> cilVisitor)) il') + in + match i with + Set(((Var v, off) as lv), what, _) when H.mem splittableVars v.vid -> + let off1, restoff = separateOffset off in + if restoff <> NoOffset then (* This means that we are only assigning + * part of a replacement variable. Leave + * this alone because the vlval will take + * care of it *) + DoChildren + else begin + (* The type of the replacement has to be a structure *) + match unrollType (typeOfLval lv) with + TComp (ci, _) when ci.cstruct -> + (* The assigned thing better be an lvalue *) + let whatlv = + match what with + Lval lv -> lv + | _ -> E.s (unimp "Simplify: assigned struct is not lval") + in + ChangeTo (doAssignment ci (Var v, off) whatlv) + + | _ -> (* vlval will take care of it *) + DoChildren + end + + | Set(dest, Lval (Var v, off), _) when H.mem splittableVars v.vid -> + let off1, restoff = separateOffset off in + if restoff <> NoOffset then (* vlval will do this *) + DoChildren + else begin + (* The type of the replacement has to be a structure *) + match unrollType (typeOfLval dest) with + TComp (ci, _) when ci.cstruct -> + ChangeTo (doAssignment ci dest (Var v, off)) + + | _ -> (* vlval will take care of it *) + DoChildren + end + + | _ -> DoChildren + +end +*) + +(* Whether to split the arguments of functions *) +let splitArguments = true + +(* Whether we try to do the splitting all in one pass. The advantage is that + * it is faster and it generates nicer names *) +let lu = locUnknown + +(* Go over the code and split some temporary variables of stucture type into + * several separate variables. The hope is that the compiler will have an + * easier time to do standard optimizations with the resulting scalars *) +(* Unfortunately, implementing this turns out to be more complicated than I + * thought *) + +(** Iterate over the fields of a structured type. Returns the empty list if + * no splits. The offsets are in order in which they appear in the structure + * type. Along with the offset we pass a string that identifies the + * meta-component, and the type of that component. *) +let rec foldRightStructFields + (doit: offset -> string -> typ -> 'a) (* Invoked on non-struct fields *) + (off: offset) + (post: 'a list) (** A suffix to what you compute *) + (fields: fieldinfo list) : 'a list = + List.fold_right + (fun f post -> + let off' = addOffset (Field(f, NoOffset)) off in + match unrollType f.ftype with + TComp (comp, _) when comp.cstruct -> (* struct type: recurse *) + foldRightStructFields doit off' post comp.cfields + | _ -> + (doit off' f.fname f.ftype) :: post) + fields + post + + +let rec foldStructFields + (t: typ) + (doit: offset -> string -> typ -> 'a) + : 'a list = + match unrollType t with + TComp (comp, _) when comp.cstruct -> + foldRightStructFields doit NoOffset [] comp.cfields + | _ -> [] + + +(* Map a variable name to a list of component variables, along with the + * accessor offset. The fields are in the order in which they appear in the + * structure. *) +let newvars : (string, (offset * varinfo) list) H.t = H.create 13 + +(* Split a variable and return the replacements, in the proper order. If this + * variable is not split, then return just the variable. *) +let splitOneVar (v: varinfo) + (mknewvar: string -> typ -> varinfo) : varinfo list = + try + (* See if we have already split it *) + List.map snd (H.find newvars v.vname) + with Not_found -> begin + let vars: (offset * varinfo) list = + foldStructFields v.vtype + (fun off n t -> (* make a new one *) + let newname = v.vname ^ "_" ^ n in + let v'= mknewvar newname t in + (off, v')) + in + if vars = [] then + [ v ] + else begin + (* Now remember the newly created vars *) + H.add newvars v.vname vars; + List.map snd vars (* Return just the vars *) + end + end + + +(* A visitor that finds all locals that appear in a call or have their + * address taken *) +let dontSplitLocals : (string, bool) H.t = H.create 111 +class findVarsCantSplitClass : cilVisitor = object (self) + inherit nopCilVisitor + + (* expressions, to see the address being taken *) + method vexpr (e: exp) : exp visitAction = + match e with + AddrOf (Var v, NoOffset) -> + H.add dontSplitLocals v.vname true; SkipChildren + (* See if we take the address of the "_ms" field in a variable *) + | _ -> DoChildren + + + (* variables involved in call instructions *) + method vinst (i: instr) : instr list visitAction = + match i with + Call (res, f, args, _) -> + (match res with + Some (Var v, NoOffset) -> H.add dontSplitLocals v.vname true + | _ -> ()); + if not splitArguments then + List.iter (fun a -> + match a with + Lval (Var v, NoOffset) -> H.add dontSplitLocals v.vname true + | _ -> ()) args; + (* Now continue the visit *) + DoChildren + + | _ -> DoChildren + + (* Variables used in return should not be split *) + method vstmt (s: stmt) : stmt visitAction = + match s.skind with + Return (Some (Lval (Var v, NoOffset)), _) -> + H.add dontSplitLocals v.vname true; DoChildren + | Return (Some e, _) -> + DoChildren + | _ -> DoChildren + + method vtype t = SkipChildren + +end +let findVarsCantSplit = new findVarsCantSplitClass + +let isVar lv = + match lv with + (Var v, NoOffset) -> true + | _ -> false + + +class splitVarVisitorClass(func:fundec option) : cilVisitor = object (self) + inherit nopCilVisitor + + method private makeTemp (e1: exp) : exp = + let fi:fundec = match func with + Some f -> f + | None -> + E.s (bug "You can't create a temporary if you're not in a function.") + in + let t = makeTempVar fi (typeOf e1) in + (* Add this instruction before the current statement *) + self#queueInstr [Set(var t, e1, !currentLoc)]; + Lval(var t) + + + (* We must process the function types *) + method vtype t = + (* We invoke the visitor first and then we fix it *) + let postProcessFunType (t: typ) : typ = + match t with + TFun(rt, Some params, isva, a) -> + let rec loopParams = function + [] -> [] + | ((pn, pt, pa) :: rest) as params -> + let rest' = loopParams rest in + let res: (string * typ * attributes) list = + foldStructFields pt + (fun off n t -> + (* Careful with no-name parameters, or we end up with + * many parameters named _p ! *) + ((if pn <> "" then pn ^ n else ""), t, pa)) + in + if res = [] then (* Not a fat *) + if rest' == rest then + params (* No change at all. Try not to reallocate so that + * the visitor does not allocate. *) + else + (pn, pt, pa) :: rest' + else (* Some change *) + res @ rest' + in + let params' = loopParams params in + if params == params' then + t + else + TFun(rt, Some params', isva, a) + + | t -> t + in + if splitArguments then + ChangeDoChildrenPost(t, postProcessFunType) + else + SkipChildren + + (* Whenever we see a variable with a field access we try to replace it + * by its components *) + method vlval ((b, off) : lval) : lval visitAction = + try + match b, off with + Var v, (Field _ as off) -> + (* See if this variable has some splits.Might throw Not_found *) + let splits = H.find newvars v.vname in + (* Now find among the splits one that matches this offset. And + * return the remaining offset *) + let rec find = function + [] -> + E.s (E.bug "Cannot find component %a of %s\n" + (d_offset nil) off v.vname) + | (splitoff, splitvar) :: restsplits -> + let rec matches = function + Field(f1, rest1), Field(f2, rest2) + when f1.fname = f2.fname -> + matches (rest1, rest2) + | off, NoOffset -> + (* We found a match *) + (Var splitvar, off) + | NoOffset, restoff -> + ignore (warn "Found aggregate lval %a\n" + d_lval (b, off)); + find restsplits + + | _, _ -> (* We did not match this one; go on *) + find restsplits + in + matches (off, splitoff) + in + ChangeTo (find splits) + | _ -> DoChildren + with Not_found -> DoChildren + + (* Sometimes we pass the variable as a whole to a function or we + * assign it to something *) + method vinst (i: instr) : instr list visitAction = + match i with + (* Split into several instructions and then do children inside + * the rhs. Howver, v might appear in the rhs and if we + * duplicate the instruction we might get bad + * results. (e.g. test/small1/simplify_Structs2.c). So first copy + * the rhs to temp variables, then to v. + * + * Optimization: if the rhs is a variable, skip the temporary vars. + * Either the rhs = lhs, in which case this is all a nop, or it's not, + * in which case the rhs and lhs don't overlap.*) + + Set ((Var v, NoOffset), Lval lv, l) when H.mem newvars v.vname -> begin + let needTemps = not (isVar lv) in + let vars4v = H.find newvars v.vname in + if vars4v = [] then E.s (errorLoc l "No fields in split struct"); + ChangeTo + (List.map + (fun (off, newv) -> + let lv' = + visitCilLval (self :> cilVisitor) + (addOffsetLval off lv) in + (* makeTemp creates a temp var and puts (Lval lv') in it, + before any instructions in this ChangeTo list are handled.*) + let lv_tmp = if needTemps then + self#makeTemp (Lval lv') + else + (Lval lv') + in + Set((Var newv, NoOffset), lv_tmp, l)) + vars4v) + end + + | Set (lv, Lval (Var v, NoOffset), l) when H.mem newvars v.vname -> begin + (* Split->NonSplit assignment. no overlap between lhs and rhs + is possible*) + let vars4v = H.find newvars v.vname in + if vars4v = [] then E.s (errorLoc l "No fields in split struct"); + ChangeTo + (List.map + (fun (off, newv) -> + let lv' = + visitCilLval (self :> cilVisitor) + (addOffsetLval off lv) in + Set(lv', Lval (Var newv, NoOffset), l)) + vars4v) + end + + (* Split all function arguments in calls *) + | Call (ret, f, args, l) when splitArguments -> + (* Visit the children first and then see if we must change the + * arguments *) + let finishArgs = function + [Call (ret', f', args', l')] as i' -> + let mustChange = ref false in + let newargs = + (* Look for opportunities to split arguments. If we can + * split, we must split the original argument (in args). + * Otherwise, we use the result of processing children + * (in args'). *) + List.fold_right2 + (fun a a' acc -> + match a with + Lval (Var v, NoOffset) when H.mem newvars v.vname -> + begin + mustChange := true; + (List.map + (fun (_, newv) -> + Lval (Var newv, NoOffset)) + (H.find newvars v.vname)) + @ acc + end + | Lval lv -> begin + let newargs = + foldStructFields (typeOfLval lv) + (fun off n t -> + let lv' = addOffsetLval off lv in + Lval lv') in + if newargs = [] then + a' :: acc (* not a split var *) + else begin + mustChange := true; + newargs @ acc + end + end + | _ -> (* only lvals are split, right? *) + a' :: acc) + args args' + [] + in + if !mustChange then + [Call (ret', f', newargs, l')] + else + i' + | _ -> E.s (E.bug "splitVarVisitorClass: expecting call") + in + ChangeDoChildrenPost ([i], finishArgs) + + | _ -> DoChildren + + + method vfunc (func: fundec) : fundec visitAction = + H.clear newvars; + H.clear dontSplitLocals; + (* Visit the type of the function itself *) + if splitArguments then + func.svar.vtype <- visitCilType (self :> cilVisitor) func.svar.vtype; + + (* Go over the block and find the candidates *) + ignore (visitCilBlock findVarsCantSplit func.sbody); + + (* Now go over the formals and create the splits *) + if splitArguments then begin + (* Split all formals because we will split all arguments in function + * types *) + let newformals = + List.fold_right + (fun form acc -> + (* Process the type first *) + form.vtype <- + visitCilType (self : #cilVisitor :> cilVisitor) form.vtype; + let form' = + splitOneVar form + (fun s t -> makeLocalVar func ~insert:false s t) + in + (* Now it is a good time to check if we actually can split this + * one *) + if List.length form' > 1 && + H.mem dontSplitLocals form.vname then + ignore (warn "boxsplit: can't split formal \"%s\" in %s. Make sure you never take the address of a formal.\n" + form.vname func.svar.vname); + form' @ acc) + func.sformals [] + in + (* Now make sure we fix the type. *) + setFormals func newformals + end; + (* Now go over the locals and create the splits *) + List.iter + (fun l -> + (* Process the type of the local *) + l.vtype <- visitCilType (self :> cilVisitor) l.vtype; + (* Now see if we must split it *) + if not (H.mem dontSplitLocals l.vname) then begin + ignore (splitOneVar l (fun s t -> makeTempVar func ~name:s t)) + end) + func.slocals; + (* Now visit the body and change references to these variables *) + ignore (visitCilBlock (self :> cilVisitor) func.sbody); + H.clear newvars; + H.clear dontSplitLocals; + SkipChildren (* We are done with this function *) + + (* Try to catch the occurrences of the variable in a sizeof expression *) + method vexpr (e: exp) = + match e with + | SizeOfE (Lval(Var v, NoOffset)) -> begin + try + let splits = H.find newvars v.vname in + (* We cound here on no padding between the elements ! *) + ChangeTo + (List.fold_left + (fun acc (_, thisv) -> + BinOp(PlusA, SizeOfE(Lval(Var thisv, NoOffset)), + acc, uintType)) + zero + splits) + with Not_found -> DoChildren + end + | _ -> DoChildren +end + +let doGlobal = function + GFun(fi, _) -> + (* Visit the body and change all expressions into three address code *) + let v = new threeAddressVisitor fi in + fi.sbody <- visitCilBlock v fi.sbody; + if !splitStructs then begin + H.clear dontSplitLocals; + let splitVarVisitor = new splitVarVisitorClass (Some fi) in + ignore (visitCilFunction splitVarVisitor fi); + end + | GVarDecl(vi, _) when isFunctionType vi.vtype -> + (* we might need to split the args/return value in the function type. *) + if !splitStructs then begin + H.clear dontSplitLocals; + let splitVarVisitor = new splitVarVisitorClass None in + ignore (visitCilVarDecl splitVarVisitor vi); + end + | _ -> () + +let feature : featureDescr = + { fd_name = "simplify"; + fd_enabled = ref false; + fd_description = "compiles CIL to 3-address code"; + fd_extraopt = [ + ("--no-split-structs", Arg.Unit (fun _ -> splitStructs := false), + "do not split structured variables"); + ]; + fd_doit = (function f -> iterGlobals f doGlobal); + fd_post_check = true; +} + diff --git a/cil/src/ext/ssa.ml b/cil/src/ext/ssa.ml new file mode 100644 index 00000000..942c92b6 --- /dev/null +++ b/cil/src/ext/ssa.ml @@ -0,0 +1,696 @@ +module B=Bitmap +module E = Errormsg + +open Cil +open Pretty + +let debug = false + +(* Globalsread, Globalswritten should be closed under call graph *) + +module StringOrder = + struct + type t = string + let compare s1 s2 = + if s1 = s2 then 0 else + if s1 < s2 then -1 else 1 + end + +module StringSet = Set.Make (StringOrder) + +module IntOrder = + struct + type t = int + let compare i1 i2 = + if i1 = i2 then 0 else + if i1 < i2 then -1 else 1 + end + +module IntSet = Set.Make (IntOrder) + + +type cfgInfo = { + name: string; (* The function name *) + start : int; + size : int; + blocks: cfgBlock array; (** Dominating blocks must come first *) + successors: int list array; (* block indices *) + predecessors: int list array; + mutable nrRegs: int; + mutable regToVarinfo: varinfo array; (** Map register IDs to varinfo *) + } + +(** A block corresponds to a statement *) +and cfgBlock = { + bstmt: Cil.stmt; + + (* We abstract the statement as a list of def/use instructions *) + instrlist: instruction list; + mutable livevars: (reg * int) list; + (** For each variable ID that is live at the start of the block, the + * block whose definition reaches this point. If that block is the same + * as the current one, then the variable is a phi variable *) + mutable reachable: bool; + } + +and instruction = (reg list * reg list) + (* lhs variables, variables on rhs. *) + + +and reg = int + +type idomInfo = int array (* immediate dominator *) + +and dfInfo = (int list) array (* dominance frontier *) + +and oneSccInfo = { + nodes: int list; + headers: int list; + backEdges: (int*int) list; + } + +and sccInfo = oneSccInfo list + +(* Muchnick's Domin_Fast, 7.16 *) + +let compute_idom (flowgraph: cfgInfo): idomInfo = + let start = flowgraph.start in + let size = flowgraph.size in + let successors = flowgraph.successors in + let predecessors = flowgraph.predecessors in + let n0 = size in (* a new node (not in the flowgraph) *) + let idom = Array.make size (-1) in (* Make an array of immediate dominators *) + let nnodes = size + 1 in + let nodeSet = B.init nnodes (fun i -> true) in + + let ndfs = Array.create nnodes 0 in (* mapping from depth-first + * number to nodes. DForder + * starts at 1, with 0 used as + * an invalid entry *) + let parent = Array.create nnodes 0 in (* the parent in depth-first + * spanning tree *) + + (* A semidominator of w is the node v with the minimal DForder such + * that there is a path from v to w containing only nodes with the + * DForder larger than w. *) + let sdno = Array.create nnodes 0 in (* depth-first number of + * semidominator *) + + (* The set of nodes whose + * semidominator is ndfs(i) *) + let bucket = Array.init nnodes (fun _ -> B.cloneEmpty nodeSet) in + + (* The functions link and eval maintain a forest within the + * depth-first spanning tree. Ancestor is n0 is the node is a root in + * the forest. Label(v) is the node in the ancestor chain with the + * smallest depth-first number of its semidominator. Child and Size + * are used to keep the trees in the forest balanced *) + let ancestor = Array.create nnodes 0 in + let label = Array.create nnodes 0 in + let child = Array.create nnodes 0 in + let size = Array.create nnodes 0 in + + + let n = ref 0 in (* depth-first scan and numbering. + * Initialize data structures. *) + ancestor.(n0) <- n0; + label.(n0) <- n0; + let rec depthFirstSearchDom v = + incr n; + sdno.(v) <- !n; + ndfs.(!n) <- v; label.(v) <- v; + ancestor.(v) <- n0; (* All nodes are roots initially *) + child.(v) <- n0; size.(v) <- 1; + List.iter + (fun w -> + if sdno.(w) = 0 then begin + parent.(w) <- v; depthFirstSearchDom w + end) + successors.(v); + in + (* Determine the ancestor of v whose semidominator has the the minimal + * DFnumber. In the process, compress the paths in the forest. *) + let eval v = + let rec compress v = + if ancestor.(ancestor.(v)) <> n0 then + begin + compress ancestor.(v); + if sdno.(label.(ancestor.(v))) < sdno.(label.(v)) then + label.(v) <- label.(ancestor.(v)); + ancestor.(v) <- ancestor.(ancestor.(v)) + end + in + if ancestor.(v) = n0 then label.(v) + else begin + compress v; + if sdno.(label.(ancestor.(v))) >= sdno.(label.(v)) then + label.(v) + else label.(ancestor.(v)) + end + in + + let link v w = + let s = ref w in + while sdno.(label.(w)) < sdno.(label.(child.(!s))) do + if size.(!s) + size.(child.(child.(!s))) >= 2* size.(child.(!s)) then + (ancestor.(child.(!s)) <- !s; + child.(!s) <- child.(child.(!s))) + else + (size.(child.(!s)) <- size.(!s); + ancestor.(!s) <- child.(!s); s := child.(!s)); + done; + label.(!s) <- label.(w); + size.(v) <- size.(v) + size.(w); + if size.(v) < 2 * size.(w) then begin + let tmp = !s in + s := child.(v); + child.(v) <- tmp; + end; + while !s <> n0 do + ancestor.(!s) <- v; + s := child.(!s); + done; + in + (* Start now *) + depthFirstSearchDom start; + for i = !n downto 2 do + let w = ndfs.(i) in + List.iter (fun v -> + let u = eval v in + if sdno.(u) < sdno.(w) then sdno.(w) <- sdno.(u);) + predecessors.(w); + B.set bucket.(ndfs.(sdno.(w))) w true; + link parent.(w) w; + while not (B.empty bucket.(parent.(w))) do + let v = + match B.toList bucket.(parent.(w)) with + x :: _ -> x + | [] -> ignore(print_string "Error in dominfast");0 in + B.set bucket.(parent.(w)) v false; + let u = eval v in + idom.(v) <- if sdno.(u) < sdno.(v) then u else parent.(w); + done; + done; + + for i=2 to !n do + let w = ndfs.(i) in + if idom.(w) <> ndfs.(sdno.(w)) then begin + let newDom = idom.(idom.(w)) in + idom.(w) <- newDom; + end + done; + idom + + + + + +let dominance_frontier (flowgraph: cfgInfo) : dfInfo = + let idom = compute_idom flowgraph in + let size = flowgraph.size in + let children = Array.create size [] in + for i = 0 to size - 1 do + if (idom.(i) != -1) then children.(idom.(i)) <- i :: children.(idom.(i)); + done; + + let size = flowgraph.size in + let start = flowgraph.start in + let successors = flowgraph.successors in + + let df = Array.create size [] in + (* Compute the dominance frontier *) + + let bottom = Array.make size true in (* bottom of the dominator tree *) + for i = 0 to size - 1 do + if (i != start) && idom.(i) <> -1 then bottom.(idom.(i)) <- false; + done; + + let processed = Array.make size false in (* to record the nodes added to work_list *) + let workList = ref ([]) in (* to iterate in a bottom-up traversal of the dominator tree *) + for i = 0 to size - 1 do + if (bottom.(i)) then workList := i :: !workList; + done; + while (!workList != []) do + let x = List.hd !workList in + let update y = if idom.(y) <> x then df.(x) <- y::df.(x) in + (* compute local component *) + +(* We use whichPred instead of whichSucc because ultimately this info is + * needed by control dependence dag which is constructed from REVERSE + * dominance frontier *) + List.iter (fun succ -> update succ) successors.(x); + (* add on up component *) + List.iter (fun z -> List.iter (fun y -> update y) df.(z)) children.(x); + processed.(x) <- true; + workList := List.tl !workList; + if (x != start) then begin + let i = idom.(x) in + if i <> -1 && + (List.for_all (fun child -> processed.(child)) children.(i)) then workList := i :: !workList; + end; + done; + df + + +(* Computes for each register, the set of nodes that need a phi definition + * for the register *) + +let add_phi_functions_info (flowgraph: cfgInfo) : unit = + let df = dominance_frontier flowgraph in + let size = flowgraph.size in + let nrRegs = flowgraph.nrRegs in + + + let defs = Array.init size (fun i -> B.init nrRegs (fun j -> false)) in + for i = 0 to size-1 do + List.iter + (fun (lhs,rhs) -> + List.iter (fun (r: reg) -> B.set defs.(i) r true) lhs; + ) + flowgraph.blocks.(i).instrlist + done; + let iterCount = ref 0 in + let hasAlready = Array.create size 0 in + let work = Array.create size 0 in + let w = ref ([]) in + let dfPlus = Array.init nrRegs ( + fun i -> + let defIn = B.make size in + for j = 0 to size - 1 do + if B.get defs.(j) i then B.set defIn j true + done; + let res = ref [] in + incr iterCount; + B.iter (fun x -> work.(x) <- !iterCount; w := x :: !w;) defIn; + while (!w != []) do + let x = List.hd !w in + w := List.tl !w; + List.iter (fun y -> + if (hasAlready.(y) < !iterCount) then begin + res := y :: !res; + hasAlready.(y) <- !iterCount; + if (work.(y) < !iterCount) then begin + work.(y) <- !iterCount; + w := y :: !w; + end; + end; + ) df.(x) + done; + (* res := List.filter (fun blkId -> B.get liveIn.(blkId) i) !res; *) + !res + ) in + let result = Array.create size ([]) in + for i = 0 to nrRegs - 1 do + List.iter (fun node -> result.(node) <- i::result.(node);) dfPlus.(i) + done; +(* result contains for each node, the list of variables that need phi + * definition *) + for i = 0 to size-1 do + flowgraph.blocks.(i).livevars <- + List.map (fun r -> (r, i)) result.(i); + done + + + +(* add dominating definitions info *) + +let add_dom_def_info (f: cfgInfo): unit = + let blocks = f.blocks in + let start = f.start in + let size = f.size in + let nrRegs = f.nrRegs in + + let idom = compute_idom f in + let children = Array.create size [] in + for i = 0 to size - 1 do + if (idom.(i) != -1) then children.(idom.(i)) <- i :: children.(idom.(i)); + done; + + if debug then begin + ignore (E.log "Immediate dominators\n"); + for i = 0 to size - 1 do + ignore (E.log " block %d: idom=%d, children=%a\n" + i idom.(i) + (docList num) children.(i)); + done + end; + + (* For each variable, maintain a stack of blocks that define it. When you + * process a block, the top of the stack is the closest dominator that + * defines the variable *) + let s = Array.make nrRegs ([start]) in + + (* Search top-down in the idom tree *) + let rec search (x: int): unit = (* x is a graph node *) + (* Push the current block for the phi variables *) + List.iter + (fun ((r: reg), dr) -> + if x = dr then s.(r) <- x::s.(r)) + blocks.(x).livevars; + + (* Clear livevars *) + blocks.(x).livevars <- []; + + (* Compute livevars *) + for i = 0 to nrRegs-1 do + match s.(i) with + | [] -> assert false + | fst :: _ -> + blocks.(x).livevars <- (i, fst) :: blocks.(x).livevars + done; + + + (* Update s for the children *) + List.iter + (fun (lhs,rhs) -> + List.iter (fun (lreg: reg) -> s.(lreg) <- x::s.(lreg) ) lhs; + ) + blocks.(x).instrlist; + + + (* Go and do the children *) + List.iter search children.(x); + + (* Then we pop x, whenever it is on top of a stack *) + Array.iteri + (fun i istack -> + let rec dropX = function + [] -> [] + | x' :: rest when x = x' -> dropX rest + | l -> l + in + s.(i) <- dropX istack) + s; + in + search(start) + + + +let prune_cfg (f: cfgInfo): cfgInfo = + let size = f.size in + if size = 0 then f else + let reachable = Array.make size false in + let worklist = ref([f.start]) in + while (!worklist != []) do + let h = List.hd !worklist in + worklist := List.tl !worklist; + reachable.(h) <- true; + List.iter (fun s -> if (reachable.(s) = false) then worklist := s::!worklist; + ) f.successors.(h); + done; +(* + let dummyblock = { bstmt = mkEmptyStmt (); + instrlist = []; + livevars = [] } + in +*) + let successors = Array.init size (fun i -> List.filter (fun s -> reachable.(s)) f.successors.(i)) in + let predecessors = Array.init size (fun i -> List.filter (fun s -> reachable.(s)) f.predecessors.(i)) in + Array.iteri (fun i b -> b.reachable <- reachable.(i)) f.blocks; + let result: cfgInfo = + { name = f.name; + start = f.start; + size = f.size; + successors = successors; + predecessors = predecessors; + blocks = f.blocks; + nrRegs = f.nrRegs; + regToVarinfo = f.regToVarinfo; + } + in + result + + +let add_ssa_info (f: cfgInfo): unit = + let f = prune_cfg f in + let d_reg () (r: int) = + dprintf "%s(%d)" f.regToVarinfo.(r).vname r + in + if debug then begin + ignore (E.log "Doing SSA for %s. Initial data:\n" f.name); + Array.iteri (fun i b -> + ignore (E.log " block %d:\n succs=@[%a@]\n preds=@[%a@]\n instr=@[%a@]\n" + i + (docList num) f.successors.(i) + (docList num) f.predecessors.(i) + (docList ~sep:line (fun (lhs, rhs) -> + dprintf "%a := @[%a@]" + (docList (d_reg ())) lhs (docList (d_reg ())) rhs)) + b.instrlist)) + f.blocks; + end; + + add_phi_functions_info f; + add_dom_def_info f; + + if debug then begin + ignore (E.log "After SSA\n"); + Array.iter (fun b -> + ignore (E.log " block %d livevars: @[%a@]\n" + b.bstmt.sid + (docList (fun (i, fst) -> + dprintf "%a def at %d" d_reg i fst)) + b.livevars)) + f.blocks; + end + + +let set2list s = + let result = ref([]) in + IntSet.iter (fun element -> result := element::!result) s; + !result + + + + +let preorderDAG (nrNodes: int) (successors: (int list) array): int list = + let processed = Array.make nrNodes false in + let revResult = ref ([]) in + let predecessorsSet = Array.make nrNodes (IntSet.empty) in + for i = 0 to nrNodes -1 do + List.iter (fun s -> predecessorsSet.(s) <- IntSet.add i predecessorsSet.(s)) successors.(i); + done; + let predecessors = Array.init nrNodes (fun i -> set2list predecessorsSet.(i)) in + let workList = ref([]) in + for i = 0 to nrNodes - 1 do + if (predecessors.(i) = []) then workList := i::!workList; + done; + while (!workList != []) do + let x = List.hd !workList in + workList := List.tl !workList; + revResult := x::!revResult; + processed.(x) <- true; + List.iter (fun s -> + if (List.for_all (fun p -> processed.(p)) predecessors.(s)) then + workList := s::!workList; + ) successors.(x); + done; + List.rev !revResult + + +(* Muchnick Fig 7.12 *) +(* takes an SCC description as an input and returns prepares the appropriate SCC *) +let preorder (nrNodes: int) (successors: (int list) array) (r: int): oneSccInfo = + if debug then begin + ignore (E.log "Inside preorder \n"); + for i = 0 to nrNodes - 1 do + ignore (E.log "succ(%d) = %a" i (docList (fun i -> num i)) successors.(i)); + done; + end; + let i = ref(0) in + let j = ref(0) in + let pre = Array.make nrNodes (-1) in + let post = Array.make nrNodes (-1) in + let visit = Array.make nrNodes (false) in + let backEdges = ref ([]) in + let headers = ref(IntSet.empty) in + let rec depth_first_search_pp (x:int) = + visit.(x) <- true; + pre.(x) <- !j; + incr j; + List.iter (fun (y:int) -> + if (not visit.(y)) then + (depth_first_search_pp y) + else + if (post.(y) = -1) then begin + backEdges := (x,y)::!backEdges; + headers := IntSet.add y !headers; + end; + ) successors.(x); + post.(x) <- !i; + incr i; + in + depth_first_search_pp r; + let nodes = Array.make nrNodes (-1) in + for y = 0 to nrNodes - 1 do + if (pre.(y) != -1) then nodes.(pre.(y)) <- y; + done; + let nodeList = List.filter (fun i -> (i != -1)) (Array.to_list nodes) in + let result = { headers = set2list !headers; backEdges = !backEdges; nodes = nodeList; } in + result + + +exception Finished + + +let strong_components (f: cfgInfo) (debug: bool) = + let size = f.size in + let parent = Array.make size (-1) in + let color = Array.make size (-1) in + let finish = Array.make size (-1) in + let root = Array.make size (-1) in + +(* returns a list of SCC. Each SCC is a tuple of SCC root and SCC nodes *) + let dfs (successors: (int list) array) (order: int array) = + let time = ref(-1) in + let rec dfs_visit u = + color.(u) <- 1; + incr time; + (* d.(u) <- time; *) + List.iter (fun v -> + if color.(v) = 0 then (parent.(v) <- u; dfs_visit v) + ) successors.(u); + color.(u) <- 2; + incr time; + finish.(u) <- !time + in + for u = 0 to size - 1 do + color.(u) <- 0; (* white = 0, gray = 1, black = 2 *) + parent.(u) <- -1; (* nil = -1 *) + root.(u) <- 0; (* Is u a root? *) + done; + time := 0; + Array.iter (fun u -> + if (color.(u) = 0) then begin + root.(u) <- 1; + dfs_visit u; + end; + ) order; + in + + let simpleOrder = Array.init size (fun i -> i) in + dfs f.successors simpleOrder; + Array.sort (fun i j -> if (finish.(i) > finish.(j)) then -1 else 1) simpleOrder; + + dfs f.predecessors simpleOrder; +(* SCCs have been computed. (The trees represented by non-null parent edges + * represent the SCCS. We call the black nodes as the roots). Now put the + * result in the ouput format *) + let allScc = ref([]) in + for u = 0 to size - 1 do + if root.(u) = 1 then begin + let sccNodes = ref(IntSet.empty) in + let workList = ref([u]) in + while (!workList != []) do + let h=List.hd !workList in + workList := List.tl !workList; + sccNodes := IntSet.add h !sccNodes; + List.iter (fun s -> if parent.(s)=h then workList := s::!workList;) f.predecessors.(h); + done; + allScc := (u,!sccNodes)::!allScc; + if (debug) then begin + ignore (E.log "Got an SCC with root %d and nodes %a" u (docList num) (set2list !sccNodes)); + end; + end; + done; + !allScc + + +let stronglyConnectedComponents (f: cfgInfo) (debug: bool): sccInfo = + let size = f.size in + if (debug) then begin + ignore (E.log "size = %d\n" size); + for i = 0 to size - 1 do + ignore (E.log "Successors(%d): %a\n" i (docList (fun n -> num n)) f.successors.(i)); + done; + end; + + let allScc = strong_components f debug in + let all_sccArray = Array.of_list allScc in + + if (debug) then begin + ignore (E.log "Computed SCCs\n"); + for i = 0 to (Array.length all_sccArray) - 1 do + ignore(E.log "SCC #%d: " i); + let (_,sccNodes) = all_sccArray.(i) in + IntSet.iter (fun i -> ignore(E.log "%d, " i)) sccNodes; + ignore(E.log "\n"); + done; + end; + + + (* Construct sccId: Node -> Scc Id *) + let sccId = Array.make size (-1) in + Array.iteri (fun i (r,sccNodes) -> + IntSet.iter (fun n -> sccId.(n) <- i) sccNodes; + ) all_sccArray; + + if (debug) then begin + ignore (E.log "\nComputed SCC IDs: "); + for i = 0 to size - 1 do + ignore (E.log "SCCID(%d) = %d " i sccId.(i)); + done; + end; + + + (* Construct sccCFG *) + let nrScc = Array.length all_sccArray in + let successors = Array.make nrScc [] in + for x = 0 to nrScc - 1 do + successors.(x) <- + let s = ref(IntSet.empty) in + IntSet.iter (fun y -> + List.iter (fun z -> + let sy = sccId.(y) in + let sz = sccId.(z) in + if (not(sy = sz)) then begin + s := IntSet.add sz !s; + end + ) f.successors.(y) + ) (snd all_sccArray.(x)); + set2list !s + done; + + if (debug) then begin + ignore (E.log "\nComputed SCC CFG, which should be a DAG:"); + ignore (E.log "nrSccs = %d " nrScc); + for i = 0 to nrScc - 1 do + ignore (E.log "successors(%d) = [%a] " i (docList (fun j -> num j)) successors.(i)); + done; + end; + + + (* Order SCCs. The graph is a DAG here *) + let sccorder = preorderDAG nrScc successors in + + if (debug) then begin + ignore (E.log "\nComputed SCC Preorder: "); + ignore (E.log "Nodes in Preorder = [%a]" (docList (fun i -> num i)) sccorder); + end; + + (* Order nodes of each SCC. The graph is a SCC here.*) + let scclist = List.map (fun i -> + let successors = Array.create size [] in + for j = 0 to size - 1 do + successors.(j) <- List.filter (fun x -> IntSet.mem x (snd all_sccArray.(i))) f.successors.(j); + done; + preorder f.size successors (fst all_sccArray.(i)) + ) sccorder in + if (debug) then begin + ignore (E.log "Computed Preorder for Nodes of each SCC\n"); + List.iter (fun scc -> + ignore (E.log "BackEdges = %a \n" + (docList (fun (src,dest) -> dprintf "(%d,%d)" src dest)) + scc.backEdges);) + scclist; + end; + scclist + + + + + + + + + diff --git a/cil/src/ext/ssa.mli b/cil/src/ext/ssa.mli new file mode 100644 index 00000000..be244d81 --- /dev/null +++ b/cil/src/ext/ssa.mli @@ -0,0 +1,45 @@ +type cfgInfo = { + name: string; (* The function name *) + start : int; + size : int; + blocks: cfgBlock array; (** Dominating blocks must come first *) + successors: int list array; (* block indices *) + predecessors: int list array; + mutable nrRegs: int; + mutable regToVarinfo: Cil.varinfo array; (** Map register IDs to varinfo *) + } + +(** A block corresponds to a statement *) +and cfgBlock = { + bstmt: Cil.stmt; + + (* We abstract the statement as a list of def/use instructions *) + instrlist: instruction list; + mutable livevars: (reg * int) list; + (** For each variable ID that is live at the start of the block, the + * block whose definition reaches this point. If that block is the same + * as the current one, then the variable is a phi variable *) + mutable reachable: bool; + } + +and instruction = (reg list * reg list) + (* lhs variables, variables on rhs. *) + + +and reg = int + +type idomInfo = int array (* immediate dominator *) + +and dfInfo = (int list) array (* dominance frontier *) + +and oneSccInfo = { + nodes: int list; + headers: int list; + backEdges: (int*int) list; + } + +and sccInfo = oneSccInfo list + +val add_ssa_info: cfgInfo -> unit +val stronglyConnectedComponents: cfgInfo -> bool -> sccInfo +val prune_cfg: cfgInfo -> cfgInfo diff --git a/cil/src/ext/stackoverflow.ml b/cil/src/ext/stackoverflow.ml new file mode 100644 index 00000000..da2c4018 --- /dev/null +++ b/cil/src/ext/stackoverflow.ml @@ -0,0 +1,246 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +module H = Hashtbl +open Cil +open Pretty +module E = Errormsg + +let debug = false + + +(* For each function we have a node *) +type node = { name: string; + mutable scanned: bool; + mutable mustcheck: bool; + mutable succs: node list } +(* We map names to nodes *) +let functionNodes: (string, node) H.t = H.create 113 +let getFunctionNode (n: string) : node = + Util.memoize + functionNodes + n + (fun _ -> { name = n; mustcheck = false; scanned = false; succs = [] }) + +(** Dump the function call graph. Assume that there is a main *) +let dumpGraph = true +let dumpFunctionCallGraph () = + H.iter (fun _ x -> x.scanned <- false) functionNodes; + let rec dumpOneNode (ind: int) (n: node) : unit = + output_string !E.logChannel "\n"; + for i = 0 to ind do + output_string !E.logChannel " " + done; + output_string !E.logChannel (n.name ^ " "); + if n.scanned then (* Already dumped *) + output_string !E.logChannel " " + else begin + n.scanned <- true; + List.iter (dumpOneNode (ind + 1)) n.succs + end + in + try + let main = H.find functionNodes "main" in + dumpOneNode 0 main + with Not_found -> begin + ignore (E.log + "I would like to dump the function graph but there is no main"); + end + +(* We add a dummy function whose name is "@@functionPointer@@" that is called + * at all invocations of function pointers and itself calls all functions + * whose address is taken. *) +let functionPointerName = "@@functionPointer@@" + +let checkSomeFunctions = ref false + +let init () = + H.clear functionNodes; + checkSomeFunctions := false + + +let addCall (caller: string) (callee: string) = + let callerNode = getFunctionNode caller in + let calleeNode = getFunctionNode callee in + if not (List.exists (fun n -> n.name = callee) callerNode.succs) then begin + if debug then + ignore (E.log "found call from %s to %s\n" caller callee); + callerNode.succs <- calleeNode :: callerNode.succs; + end; + () + + +class findCallsVisitor (host: string) : cilVisitor = object + inherit nopCilVisitor + + method vinst i = + match i with + | Call(_,Lval(Var(vi),NoOffset),_,l) -> + addCall host vi.vname; + SkipChildren + + | Call(_,e,_,l) -> (* Calling a function pointer *) + addCall host functionPointerName; + SkipChildren + + | _ -> SkipChildren (* No calls in other instructions *) + + (* There are no calls in expressions and types *) + method vexpr e = SkipChildren + method vtype t = SkipChildren + +end + +(* Now detect the cycles in the call graph. Do a depth first search of the + * graph (stack is the list of nodes already visited in the current path). + * Return true if we have found a cycle. *) +let rec breakCycles (stack: node list) (n: node) : bool = + if n.scanned then (* We have already scanned this node. There are no cycles + * going through this node *) + false + else if n.mustcheck then + (* We are reaching a node that we already know we much check. Return with + * no new cycles. *) + false + else if List.memq n stack then begin + (* We have found a cycle. Mark the node n to be checked and return *) + if debug then + ignore (E.log "Will place an overflow check in %s\n" n.name); + checkSomeFunctions := true; + n.mustcheck <- true; + n.scanned <- true; + true + end else begin + let res = List.exists (fun nd -> breakCycles (n :: stack) nd) n.succs in + n.scanned <- true; + if res && n.mustcheck then + false + else + res + end +let findCheckPlacement () = + H.iter (fun _ nd -> + if nd.name <> functionPointerName + && not nd.scanned && not nd.mustcheck then begin + ignore (breakCycles [] nd) + end) + functionNodes + +let makeFunctionCallGraph (f: Cil.file) : unit = + init (); + (* Scan the file and construct the control-flow graph *) + List.iter + (function + GFun(fdec, _) -> + if fdec.svar.vaddrof then + addCall functionPointerName fdec.svar.vname; + let vis = new findCallsVisitor fdec.svar.vname in + ignore (visitCilBlock vis fdec.sbody) + + | _ -> ()) + f.globals + +let makeAndDumpFunctionCallGraph (f: file) = + makeFunctionCallGraph f; + dumpFunctionCallGraph () + + +let addCheck (f: Cil.file) : unit = + makeFunctionCallGraph f; + findCheckPlacement (); + if !checkSomeFunctions then begin + (* Add a declaration for the stack threshhold variable. The program is + * stopped when the stack top is less than this value. *) + let stackThreshholdVar = makeGlobalVar "___stack_threshhold" !upointType in + stackThreshholdVar.vstorage <- Extern; + (* And the initialization function *) + let computeStackThreshhold = + makeGlobalVar "___compute_stack_threshhold" + (TFun(!upointType, Some [], false, [])) in + computeStackThreshhold.vstorage <- Extern; + (* And the failure function *) + let stackOverflow = + makeGlobalVar "___stack_overflow" + (TFun(voidType, Some [], false, [])) in + stackOverflow.vstorage <- Extern; + f.globals <- + GVar(stackThreshholdVar, {init=None}, locUnknown) :: + GVarDecl(computeStackThreshhold, locUnknown) :: + GVarDecl(stackOverflow, locUnknown) :: f.globals; + (* Now scan and instrument each function definition *) + List.iter + (function + GFun(fdec, l) -> + (* If this is main we must introduce the initialization of the + * bottomOfStack *) + let nd = getFunctionNode fdec.svar.vname in + if fdec.svar.vname = "main" then begin + if nd.mustcheck then + E.s (E.error "The \"main\" function is recursive!!"); + let loc = makeLocalVar fdec "__a_local" intType in + loc.vaddrof <- true; + fdec.sbody <- + mkBlock + [ mkStmtOneInstr + (Call (Some(var stackThreshholdVar), + Lval(var computeStackThreshhold), [], l)); + mkStmt (Block fdec.sbody) ] + end else if nd.mustcheck then begin + let loc = makeLocalVar fdec "__a_local" intType in + loc.vaddrof <- true; + fdec.sbody <- + mkBlock + [ mkStmt + (If(BinOp(Le, + CastE(!upointType, AddrOf (var loc)), + Lval(var stackThreshholdVar), intType), + mkBlock [mkStmtOneInstr + (Call(None, Lval(var stackOverflow), + [], l))], + mkBlock [], + l)); + mkStmt (Block fdec.sbody) ] + end else + () + + | _ -> ()) + f.globals; + () + end + + + + diff --git a/cil/src/ext/stackoverflow.mli b/cil/src/ext/stackoverflow.mli new file mode 100644 index 00000000..6ec02007 --- /dev/null +++ b/cil/src/ext/stackoverflow.mli @@ -0,0 +1,43 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* This module inserts code to check for stack overflow. It saves the address + * of the top of the stack in "main" and then it picks one function *) + +val addCheck: Cil.file -> unit + +val makeAndDumpFunctionCallGraph: Cil.file -> unit diff --git a/cil/src/ext/usedef.ml b/cil/src/ext/usedef.ml new file mode 100755 index 00000000..57f226aa --- /dev/null +++ b/cil/src/ext/usedef.ml @@ -0,0 +1,188 @@ +(* MODIF: Loop constructor replaced by 3 constructors: While, DoWhile, For. *) + + +open Cil +open Pretty + +(** compute use/def information *) + +module VS = Set.Make (struct + type t = Cil.varinfo + let compare v1 v2 = Pervasives.compare v1.vid v2.vid + end) + +(** Set this global to how you want to handle function calls *) +let getUseDefFunctionRef: (exp -> VS.t * VS.t) ref = + ref (fun _ -> (VS.empty, VS.empty)) + +(** Say if you want to consider a variable use *) +let considerVariableUse: (varinfo -> bool) ref = + ref (fun _ -> true) + + +(** Say if you want to consider a variable def *) +let considerVariableDef: (varinfo -> bool) ref = + ref (fun _ -> true) + +(** Save if you want to consider a variable addrof as a use *) +let considerVariableAddrOfAsUse: (varinfo -> bool) ref = + ref (fun _ -> true) + +(* When this is true, only definitions of a variable without + an offset are counted as definitions. So: + a = 5; would be a definition, but + a[1] = 5; would not *) +let onlyNoOffsetsAreDefs: bool ref = ref false + +let varUsed: VS.t ref = ref VS.empty +let varDefs: VS.t ref = ref VS.empty + +class useDefVisitorClass : cilVisitor = object (self) + inherit nopCilVisitor + + (** this will be invoked on variable definitions only because we intercept + * all uses of variables in expressions ! *) + method vvrbl (v: varinfo) = + if (!considerVariableDef) v && + not(!onlyNoOffsetsAreDefs) then + varDefs := VS.add v !varDefs; + SkipChildren + + (** If onlyNoOffsetsAreDefs is true, then we need to see the + * varinfo in an lval along with the offset. Otherwise just + * DoChildren *) + method vlval (l: lval) = + if !onlyNoOffsetsAreDefs then + match l with + (Var vi, NoOffset) -> + if (!considerVariableDef) vi then + varDefs := VS.add vi !varDefs; + SkipChildren + | _ -> DoChildren + else DoChildren + + method vexpr = function + Lval (Var v, off) -> + ignore (visitCilOffset (self :> cilVisitor) off); + if (!considerVariableUse) v then + varUsed := VS.add v !varUsed; + SkipChildren (* So that we do not see the v *) + + | AddrOf (Var v, off) + | StartOf (Var v, off) -> + ignore (visitCilOffset (self :> cilVisitor) off); + if (!considerVariableAddrOfAsUse) v then + varUsed := VS.add v !varUsed; + SkipChildren + + | _ -> DoChildren + + (* For function calls, do the transitive variable read/defs *) + method vinst = function + Call (_, f, _, _) -> begin + (* we will call DoChildren to compute the use and def that appear in + * this instruction. We also add in the stuff computed by + * getUseDefFunctionRef *) + let use, def = !getUseDefFunctionRef f in + varUsed := VS.union !varUsed use; + varDefs := VS.union !varDefs def; + DoChildren; + end + | Asm(_,_,slvl,_,_,_) -> List.iter (fun (s,lv) -> + match lv with (Var v, off) -> + if s.[0] = '+' then + varUsed := VS.add v !varUsed; + | _ -> ()) slvl; + DoChildren + | _ -> DoChildren + +end + +let useDefVisitor = new useDefVisitorClass + +(** Compute the use information for an expression (accumulate to an existing + * set) *) +let computeUseExp ?(acc=VS.empty) (e: exp) : VS.t = + varUsed := acc; + ignore (visitCilExpr useDefVisitor e); + !varUsed + + +(** Compute the use/def information for an instruction *) +let computeUseDefInstr ?(acc_used=VS.empty) + ?(acc_defs=VS.empty) + (i: instr) : VS.t * VS.t = + varUsed := acc_used; + varDefs := acc_defs; + ignore (visitCilInstr useDefVisitor i); + !varUsed, !varDefs + + +(** Compute the use/def information for a statement kind. Do not descend into + * the nested blocks. *) +let computeUseDefStmtKind ?(acc_used=VS.empty) + ?(acc_defs=VS.empty) + (sk: stmtkind) : VS.t * VS.t = + varUsed := acc_used; + varDefs := acc_defs; + let ve e = ignore (visitCilExpr useDefVisitor e) in + let _ = + match sk with + Return (None, _) -> () + | Return (Some e, _) -> ve e + | If (e, _, _, _) -> ve e + | Break _ | Goto _ | Continue _ -> () +(* + | Loop (_, _, _, _) -> () +*) + | While _ | DoWhile _ | For _ -> () + | Switch (e, _, _, _) -> ve e + | Instr il -> + List.iter (fun i -> ignore (visitCilInstr useDefVisitor i)) il + | TryExcept _ | TryFinally _ -> () + | Block _ -> () + in + !varUsed, !varDefs + +(* Compute the use/def information for a statement kind. + DO descend into nested blocks *) +let rec computeDeepUseDefStmtKind ?(acc_used=VS.empty) + ?(acc_defs=VS.empty) + (sk: stmtkind) : VS.t * VS.t = + let handle_block b = + List.fold_left (fun (u,d) s -> + let u',d' = computeDeepUseDefStmtKind s.skind in + (VS.union u u', VS.union d d')) (VS.empty, VS.empty) + b.bstmts + in + varUsed := acc_used; + varDefs := acc_defs; + let ve e = ignore (visitCilExpr useDefVisitor e) in + match sk with + Return (None, _) -> !varUsed, !varDefs + | Return (Some e, _) -> + let _ = ve e in + !varUsed, !varDefs + | If (e, tb, fb, _) -> + let _ = ve e in + let u, d = !varUsed, !varDefs in + let u', d' = handle_block tb in + let u'', d'' = handle_block fb in + (VS.union (VS.union u u') u'', VS.union (VS.union d d') d'') + | Break _ | Goto _ | Continue _ -> !varUsed, !varDefs +(* + | Loop (b, _, _, _) -> handle_block b +*) + | While (_, b, _) -> handle_block b + | DoWhile (_, b, _) -> handle_block b + | For (_, _, _, b, _) -> handle_block b + | Switch (e, b, _, _) -> + let _ = ve e in + let u, d = !varUsed, !varDefs in + let u', d' = handle_block b in + (VS.union u u', VS.union d d') + | Instr il -> + List.iter (fun i -> ignore (visitCilInstr useDefVisitor i)) il; + !varUsed, !varDefs + | TryExcept _ | TryFinally _ -> !varUsed, !varDefs + | Block b -> handle_block b diff --git a/cil/src/formatcil.ml b/cil/src/formatcil.ml new file mode 100644 index 00000000..33bc749f --- /dev/null +++ b/cil/src/formatcil.ml @@ -0,0 +1,215 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +open Cil +open Pretty +open Trace (* sm: 'trace' function *) +module E = Errormsg +module H = Hashtbl + +let noMemoize = ref false + +let expMemoTable : + (string, (((string * formatArg) list -> exp) * + (exp -> formatArg list option))) H.t = H.create 23 + +let typeMemoTable : + (string, (((string * formatArg) list -> typ) * + (typ -> formatArg list option))) H.t = H.create 23 + +let lvalMemoTable : + (string, (((string * formatArg) list -> lval) * + (lval -> formatArg list option))) H.t = H.create 23 + +let instrMemoTable : + (string, ((location -> (string * formatArg) list -> instr) * + (instr -> formatArg list option))) H.t = H.create 23 + +let stmtMemoTable : + (string, ((string -> typ -> varinfo) -> + location -> + (string * formatArg) list -> stmt)) H.t = H.create 23 + +let stmtsMemoTable : + (string, ((string -> typ -> varinfo) -> + location -> + (string * formatArg) list -> stmt list)) H.t = H.create 23 + + +let doParse (prog: string) + (theParser: (Lexing.lexbuf -> Formatparse.token) + -> Lexing.lexbuf -> 'a) + (memoTable: (string, 'a) H.t) : 'a = + try + if !noMemoize then raise Not_found else + H.find memoTable prog + with Not_found -> begin + let lexbuf = Formatlex.init prog in + try + Formatparse.initialize Formatlex.initial lexbuf; + let res = theParser Formatlex.initial lexbuf in + H.add memoTable prog res; + Formatlex.finish (); + res + with Parsing.Parse_error -> begin + Formatlex.finish (); + E.s (E.error "Parsing error: %s" prog) + end + | e -> begin + ignore (E.log "Caught %s while parsing\n" (Printexc.to_string e)); + Formatlex.finish (); + raise e + end + end + + +let cExp (prog: string) : (string * formatArg) list -> exp = + let cf = doParse prog Formatparse.expression expMemoTable in + (fst cf) + +let cLval (prog: string) : (string * formatArg) list -> lval = + let cf = doParse prog Formatparse.lval lvalMemoTable in + (fst cf) + +let cType (prog: string) : (string * formatArg) list -> typ = + let cf = doParse prog Formatparse.typename typeMemoTable in + (fst cf) + +let cInstr (prog: string) : location -> (string * formatArg) list -> instr = + let cf = doParse prog Formatparse.instr instrMemoTable in + (fst cf) + +let cStmt (prog: string) : (string -> typ -> varinfo) -> + location -> (string * formatArg) list -> stmt = + let cf = doParse prog Formatparse.stmt stmtMemoTable in + cf + +let cStmts (prog: string) : + (string -> typ -> varinfo) -> + location -> (string * formatArg) list -> stmt list = + let cf = doParse prog Formatparse.stmt_list stmtsMemoTable in + cf + + + +(* Match an expression *) +let dExp (prog: string) : exp -> formatArg list option = + let df = doParse prog Formatparse.expression expMemoTable in + (snd df) + +(* Match an lvalue *) +let dLval (prog: string) : lval -> formatArg list option = + let df = doParse prog Formatparse.lval lvalMemoTable in + (snd df) + + +(* Match a type *) +let dType (prog: string) : typ -> formatArg list option = + let df = doParse prog Formatparse.typename typeMemoTable in + (snd df) + + + +(* Match an instruction *) +let dInstr (prog: string) : instr -> formatArg list option = + let df = doParse prog Formatparse.instr instrMemoTable in + (snd df) + + +let test () = + (* Construct a dummy function *) + let func = emptyFunction "test_formatcil" in + (* Construct a few varinfo *) + let res = makeLocalVar func "res" (TPtr(intType, [])) in + let fptr = makeLocalVar func "fptr" + (TPtr(TFun(intType, None, false, []), [])) in + (* Construct an instruction *) + let makeInstr () = + Call(Some (var res), + Lval (Mem (CastE(TPtr(TFun(TPtr(intType, []), + Some [ ("", intType, []); + ("a2", TPtr(intType, []), []); + ("a3", TPtr(TPtr(intType, []), + []), []) ], + false, []), []), + Lval (var fptr))), + NoOffset), + [ ], locUnknown) + in + let times = 100000 in + (* Make the instruction the regular way *) + Stats.time "make instruction regular" + (fun _ -> for i = 0 to times do ignore (makeInstr ()) done) + (); + (* Now make the instruction interpreted *) + noMemoize := true; + Stats.time "make instruction interpreted" + (fun _ -> for i = 0 to times do + let _ = + cInstr "%v:res = (* ((int * (*)(int, int * a2, int * * a3))%v:fptr))();" + locUnknown [ ("res", Fv res); + ("fptr", Fv fptr) ] + in + () + done) + (); + (* Now make the instruction interpreted with memoization *) + noMemoize := false; + Stats.time "make instruction interpreted memoized" + (fun _ -> for i = 0 to times do + let _ = + cInstr "%v:res = (* ((int * (*)(int, int * a2, int * * a3))%v:fptr))();" + locUnknown [ ("res", Fv res); ("fptr", Fv fptr) ] + in + () + done) + (); + (* Now make the instruction interpreted with partial application *) + let partInstr = + cInstr "%v:res = (* ((int * (*)(int, int * a2, int * * a3))%v:fptr))();" in + Stats.time "make instruction interpreted partial" + (fun _ -> for i = 0 to times do + let _ = + partInstr + locUnknown [ ("res", Fv res); ("fptr", Fv fptr) ] + in + () + done) + (); + + () + + diff --git a/cil/src/formatcil.mli b/cil/src/formatcil.mli new file mode 100644 index 00000000..d353c5eb --- /dev/null +++ b/cil/src/formatcil.mli @@ -0,0 +1,103 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + + +(** {b An Interpreter for constructing CIL constructs} *) + + +(** Constructs an expression based on the program and the list of arguments. + * Each argument consists of a name followed by the actual data. This + * argument will be placed instead of occurrences of "%v:name" in the pattern + * (where the "v" is dependent on the type of the data). The parsing of the + * string is memoized. * Only the first expression is parsed. *) +val cExp: string -> (string * Cil.formatArg) list -> Cil.exp + +(** Constructs an lval based on the program and the list of arguments. + * Only the first lvalue is parsed. + * The parsing of the string is memoized. *) +val cLval: string -> (string * Cil.formatArg) list -> Cil.lval + +(** Constructs a type based on the program and the list of arguments. + * Only the first type is parsed. + * The parsing of the string is memoized. *) +val cType: string -> (string * Cil.formatArg) list -> Cil.typ + + +(** Constructs an instruction based on the program and the list of arguments. + * Only the first instruction is parsed. + * The parsing of the string is memoized. *) +val cInstr: string -> Cil.location -> + (string * Cil.formatArg) list -> Cil.instr + +(* Constructs a statement based on the program and the list of arguments. We + * also pass a function that can be used to make new varinfo's for the + * declared variables, and a location to be used for the statements. Only the + * first statement is parsed. The parsing of the string is memoized. *) +val cStmt: string -> + (string -> Cil.typ -> Cil.varinfo) -> + Cil.location -> (string * Cil.formatArg) list -> Cil.stmt + +(** Constructs a list of statements *) +val cStmts: string -> + (string -> Cil.typ -> Cil.varinfo) -> + Cil.location -> (string * Cil.formatArg) list -> + Cil.stmt list + +(** Deconstructs an expression based on the program. Produces an optional + * list of format arguments. The parsing of the string is memoized. *) +val dExp: string -> Cil.exp -> Cil.formatArg list option + +(** Deconstructs an lval based on the program. Produces an optional + * list of format arguments. The parsing of the string is memoized. *) +val dLval: string -> Cil.lval -> Cil.formatArg list option + + +(** Deconstructs a type based on the program. Produces an optional list of + * format arguments. The parsing of the string is memoized. *) +val dType: string -> Cil.typ -> Cil.formatArg list option + + +(** Deconstructs an instruction based on the program. Produces an optional + * list of format arguments. The parsing of the string is memoized. *) +val dInstr: string -> Cil.instr -> Cil.formatArg list option + + +(** If set then will not memoize the parsed patterns *) +val noMemoize: bool ref + +(** Just a testing function *) +val test: unit -> unit diff --git a/cil/src/formatlex.mll b/cil/src/formatlex.mll new file mode 100644 index 00000000..584a060d --- /dev/null +++ b/cil/src/formatlex.mll @@ -0,0 +1,308 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +(* A simple lexical analyzer for constructing CIL based on format strings *) +{ +open Formatparse +exception Eof +exception InternalError of string +module H = Hashtbl +module E = Errormsg +(* +** Keyword hashtable +*) +let keywords = H.create 211 + +(* +** Useful primitives +*) +let scan_ident id = + try H.find keywords id + with Not_found -> IDENT id (* default to variable name *) + +(* +** Buffer processor +*) + + +let init ~(prog: string) : Lexing.lexbuf = + H.clear keywords; + Lexerhack.currentPattern := prog; + List.iter + (fun (key, token) -> H.add keywords key token) + [ ("const", CONST); ("__const", CONST); ("__const__", CONST); + ("static", STATIC); + ("extern", EXTERN); + ("long", LONG); + ("short", SHORT); + ("signed", SIGNED); + ("unsigned", UNSIGNED); + ("volatile", VOLATILE); + ("char", CHAR); + ("int", INT); + ("float", FLOAT); + ("double", DOUBLE); + ("void", VOID); + ("enum", ENUM); + ("struct", STRUCT); + ("typedef", TYPEDEF); + ("union", UNION); + ("break", BREAK); + ("continue", CONTINUE); + ("goto", GOTO); + ("return", RETURN); + ("switch", SWITCH); + ("case", CASE); + ("default", DEFAULT); + ("while", WHILE); + ("do", DO); + ("for", FOR); + ("if", IF); + ("else", ELSE); + ("__attribute__", ATTRIBUTE); ("__attribute", ATTRIBUTE); + ("__int64", INT64); + ("__builtin_va_arg", BUILTIN_VA_ARG); + ]; + E.startParsingFromString prog + +let finish () = + E.finishParsing () + +(*** Error handling ***) +let error msg = + E.parse_error msg + + +(*** escape character management ***) +let scan_escape str = + match str with + "n" -> "\n" + | "r" -> "\r" + | "t" -> "\t" + | "b" -> "\b" + | "f" -> "\012" (* ASCII code 12 *) + | "v" -> "\011" (* ASCII code 11 *) + | "a" -> "\007" (* ASCII code 7 *) + | "e" -> "\027" (* ASCII code 27. This is a GCC extension *) + | _ -> str + +let get_value chr = + match chr with + '0'..'9' -> (Char.code chr) - (Char.code '0') + | 'a'..'z' -> (Char.code chr) - (Char.code 'a') + 10 + | 'A'..'Z' -> (Char.code chr) - (Char.code 'A') + 10 + | _ -> 0 +let scan_hex_escape str = + String.make 1 (Char.chr ( + (get_value (String.get str 0)) * 16 + + (get_value (String.get str 1)) + )) +let scan_oct_escape str = + (* weimer: wide-character constants like L'\400' may be bigger than + * 256 (in fact, may be up to 511), so Char.chr cannot be used directly *) + let the_value = (get_value (String.get str 0)) * 64 + + (get_value (String.get str 1)) * 8 + + (get_value (String.get str 2)) in + if the_value < 256 then String.make 1 (Char.chr the_value ) + else (String.make 1 (Char.chr (the_value / 256))) ^ + (String.make 1 (Char.chr (the_value mod 256))) + +(* ISO standard locale-specific function to convert a wide character + * into a sequence of normal characters. Here we work on strings. + * We convert L"Hi" to "H\000i\000" *) +let wbtowc wstr = + let len = String.length wstr in + let dest = String.make (len * 2) '\000' in + for i = 0 to len-1 do + dest.[i*2] <- wstr.[i] ; + done ; + dest + +(* This function converst the "Hi" in L"Hi" to { L'H', L'i', L'\0' } *) +let wstr_to_warray wstr = + let len = String.length wstr in + let res = ref "{ " in + for i = 0 to len-1 do + res := !res ^ (Printf.sprintf "L'%c', " wstr.[i]) + done ; + res := !res ^ "}" ; + !res + +let getArgName (l: Lexing.lexbuf) (prefixlen: int) = + let lexeme = Lexing.lexeme l in + let ll = String.length lexeme in + if ll > prefixlen then + String.sub lexeme (prefixlen + 1) (ll - prefixlen - 1) + else + "" +} + +let decdigit = ['0'-'9'] +let octdigit = ['0'-'7'] +let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F'] +let letter = ['a'- 'z' 'A'-'Z'] + +let floatsuffix = ['f' 'F' 'l' 'L'] + +let usuffix = ['u' 'U'] +let lsuffix = "l"|"L"|"ll"|"LL" +let intsuffix = lsuffix | usuffix | usuffix lsuffix | lsuffix usuffix + +let intnum = decdigit+ intsuffix? +let octnum = '0' octdigit+ intsuffix? +let hexnum = '0' ['x' 'X'] hexdigit+ intsuffix? + +let exponent = ['e' 'E']['+' '-']? decdigit+ +let fraction = '.' decdigit+ +let floatraw = (intnum? fraction) + |(intnum exponent) + |(intnum? fraction exponent) + |(intnum '.') + |(intnum '.' exponent) +let floatnum = floatraw floatsuffix? + +let ident = (letter|'_')(letter|decdigit|'_')* +let attribident = (letter|'_')(letter|decdigit|'_'|':') +let blank = [' ' '\t' '\012' '\r'] +let escape = '\\' _ +let hex_escape = '\\' ['x' 'X'] hexdigit hexdigit +let oct_escape = '\\' octdigit octdigit octdigit + + +(* The arguments are of the form %l:foo *) +let argname = ':' ident + +rule initial = + parse blank { initial lexbuf} +| "/*" { let _ = comment lexbuf in + initial lexbuf} +| "//" { endline lexbuf } +| "\n" { E.newline (); initial lexbuf} +| floatnum {CST_FLOAT (Lexing.lexeme lexbuf)} +| hexnum {CST_INT (Lexing.lexeme lexbuf)} +| octnum {CST_INT (Lexing.lexeme lexbuf)} +| intnum {CST_INT (Lexing.lexeme lexbuf)} + +| "<<=" {INF_INF_EQ} +| ">>=" {SUP_SUP_EQ} +| "*=" {STAR_EQ} +| "/=" {SLASH_EQ} +| "&=" {AND_EQ} +| "|=" {PIPE_EQ} +| "^=" {CIRC_EQ} +| "%=" {PERCENT_EQ} + + +| "..." {ELLIPSIS} +| "-=" {MINUS_EQ} +| "+=" {PLUS_EQ} +| "*=" {STAR_EQ} +| "<<" {INF_INF} +| ">>" {SUP_SUP} +| "==" {EQ_EQ} +| "!=" {EXCLAM_EQ} +| "<=" {INF_EQ} +| ">=" {SUP_EQ} +| "=" {EQ} +| "<" {INF} +| ">" {SUP} +| "++" {PLUS_PLUS} +| "--" {MINUS_MINUS} +| "->" {ARROW} +| '+' {PLUS} +| '-' {MINUS} +| '*' {STAR} +| '/' {SLASH} +| '!' {EXCLAM} +| '&' {AND} +| '|' {PIPE} +| '^' {CIRC} +| '~' {TILDE} +| '[' {LBRACKET} +| ']' {RBRACKET} +| '{' {LBRACE} +| '}' {RBRACE} +| '(' {LPAREN} +| ')' {RPAREN} +| ';' {SEMICOLON} +| ',' {COMMA} +| '.' {DOT} +| ':' {COLON} +| '?' {QUEST} +| "sizeof" {SIZEOF} + +| "%eo" argname {ARG_eo (getArgName lexbuf 3) } +| "%e" argname {ARG_e (getArgName lexbuf 2) } +| "%E" argname {ARG_E (getArgName lexbuf 2) } +| "%u" argname {ARG_u (getArgName lexbuf 2) } +| "%b" argname {ARG_b (getArgName lexbuf 2) } +| "%t" argname {ARG_t (getArgName lexbuf 2) } +| "%d" argname {ARG_d (getArgName lexbuf 2) } +| "%lo" argname {ARG_lo (getArgName lexbuf 3) } +| "%l" argname {ARG_l (getArgName lexbuf 2) } +| "%i" argname {ARG_i (getArgName lexbuf 2) } +| "%I" argname {ARG_I (getArgName lexbuf 2) } +| "%o" argname {ARG_o (getArgName lexbuf 2) } +| "%va" argname {ARG_va (getArgName lexbuf 3) } +| "%v" argname {ARG_v (getArgName lexbuf 2) } +| "%k" argname {ARG_k (getArgName lexbuf 2) } +| "%f" argname {ARG_f (getArgName lexbuf 2) } +| "%F" argname {ARG_F (getArgName lexbuf 2) } +| "%p" argname {ARG_p (getArgName lexbuf 2) } +| "%P" argname {ARG_P (getArgName lexbuf 2) } +| "%s" argname {ARG_s (getArgName lexbuf 2) } +| "%S" argname {ARG_S (getArgName lexbuf 2) } +| "%g" argname {ARG_g (getArgName lexbuf 2) } +| "%A" argname {ARG_A (getArgName lexbuf 2) } +| "%c" argname {ARG_c (getArgName lexbuf 2) } + +| '%' {PERCENT} +| ident {scan_ident (Lexing.lexeme lexbuf)} +| eof {EOF} +| _ {E.parse_error + "Formatlex: Invalid symbol" + } + +and comment = + parse + "*/" { () } +| '\n' { E.newline (); comment lexbuf } +| _ { comment lexbuf } + + +and endline = parse + '\n' { E.newline (); initial lexbuf} +| _ { endline lexbuf} diff --git a/cil/src/formatparse.mly b/cil/src/formatparse.mly new file mode 100644 index 00000000..75bdbb33 --- /dev/null +++ b/cil/src/formatparse.mly @@ -0,0 +1,1455 @@ +/* MODIF: Loop constructor replaced by 3 constructors: While, DoWhile, For. */ + +/*(* Parser for constructing CIL from format strings *) +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +*/ +%{ +open Cil +open Pretty +module E = Errormsg + +let parse_error msg : 'a = (* sm: c++-mode highlight hack: -> ' <- *) + E.hadErrors := true; + E.parse_error + msg + + +let getArg (argname: string) (args: (string * formatArg) list) = + try + snd (List.find (fun (n, a) -> n = argname) args) + with _ -> + E.s (error "Pattern string %s does not have argument with name %s\n" + !Lexerhack.currentPattern argname) + +let wrongArgType (which: string) (expected: string) (found: formatArg) = + E.s (bug "Expecting %s argument (%s) and found %a\n" + expected which d_formatarg found) + +let doUnop (uo: unop) subexp = + ((fun args -> + let e = (fst subexp) args in + UnOp(uo, e, typeOf e)), + + (fun e -> match e with + UnOp(uo', e', _) when uo = uo' -> (snd subexp) e' + | _ -> None)) + +let buildPlus e1 e2 : exp = + let t1 = typeOf e1 in + if isPointerType t1 then + BinOp(PlusPI, e1, e2, t1) + else + BinOp(PlusA, e1, e2, t1) + +let buildMinus e1 e2 : exp = + let t1 = typeOf e1 in + let t2 = typeOf e2 in + if isPointerType t1 then + if isPointerType t2 then + BinOp(MinusPP, e1, e2, intType) + else + BinOp(MinusPI, e1, e2, t1) + else + BinOp(MinusA, e1, e2, t1) + +let doBinop bop e1t e2t = + ((fun args -> + let e1 = (fst e1t) args in + let e2 = (fst e2t) args in + let t1 = typeOf e1 in + BinOp(bop, e1, e2, t1)), + + (fun e -> match e with + BinOp(bop', e1, e2, _) when bop' = bop -> begin + match (snd e1t) e1, (snd e2t) e2 with + Some m1, Some m2 -> Some (m1 @ m2) + | _, _ -> None + end + | _ -> None)) + +(* Check the equivalence of two format lists *) +let rec checkSameFormat (fl1: formatArg list) (fl2: formatArg list) = + match fl1, fl2 with + [], [] -> true + | h1::t1, h2::t2 -> begin + let rec checkOffsetEq o1 o2 = + match o1, o2 with + NoOffset, NoOffset -> true + | Field(f1, o1'), Field(f2, o2') -> + f1.fname = f2.fname && checkOffsetEq o1' o2' + | Index(e1, o1'), Index(e2, o2') -> + checkOffsetEq o1' o2' && checkExpEq e1 e2 + | _, _ -> false + + and checkExpEq e1 e2 = + match e1, e2 with + Const(CInt64(n1, _, _)), Const(CInt64(n2, _, _)) -> n1 = n2 + | Lval l1, Lval l2 -> checkLvalEq l1 l2 + | UnOp(uo1, e1, _), UnOp(uo2, e2, _) -> + uo1 = uo2 && checkExpEq e1 e2 + | BinOp(bo1, e11, e12, _), BinOp(bo2, e21, e22, _) -> + bo1 = bo2 && checkExpEq e11 e21 && checkExpEq e21 e22 + | AddrOf l1, AddrOf l2 -> checkLvalEq l1 l2 + | StartOf l1, StartOf l2 -> checkLvalEq l1 l2 + | SizeOf t1, SizeOf t2 -> typeSig t1 = typeSig t2 + | _, _ -> + ignore (E.warn "checkSameFormat for Fe"); false + + and checkLvalEq l1 l2 = + match l1, l2 with + (Var v1, o1), (Var v2, o2) -> v1 == v2 && checkOffsetEq o1 o2 + | (Mem e1, o1), (Mem e2, o2) -> + checkOffsetEq o1 o2 && checkExpEq e1 e2 + | _, _ -> false + in + let hdeq = + match h1, h2 with + Fv v1, Fv v2 -> v1 == v2 + | Fd n1, Fd n2 -> n1 = n2 + | Fe e1, Fe e2 -> checkExpEq e1 e2 + | Fi i1, Fi i2 -> ignore (E.warn "checkSameFormat for Fi"); false + | Ft t1, Ft t2 -> typeSig t1 = typeSig t2 + | Fl l1, Fl l2 -> checkLvalEq l1 l2 + | Fo o1, Fo o2 -> checkOffsetEq o1 o2 + | Fc c1, Fc c2 -> c1 == c2 + | _, _ -> false + in + hdeq || checkSameFormat t1 t2 + end + | _, _ -> false + +let matchBinopEq (bopeq: binop -> bool) lvt et = + (fun i -> match i with + Set (lv, BinOp(bop', Lval (lv'), e', _), l) when bopeq bop' -> begin + match lvt lv, lvt lv', et e' with + Some m1, Some m1', Some m2 -> + (* Must check that m1 and m2 are the same *) + if checkSameFormat m1 m1' then + Some (m1 @ m2) + else + None + | _, _, _ -> None + end + | _ -> None) + +let doBinopEq bop lvt et = + ((fun loc args -> + let l = (fst lvt) args in + Set(l, BinOp(bop, (Lval l), (fst et) args, typeOfLval l), loc)), + + matchBinopEq (fun bop' -> bop = bop') (snd lvt) (snd et)) + + +let getField (bt: typ) (fname: string) : fieldinfo = + match unrollType bt with + TComp(ci, _) -> begin + try + List.find (fun f -> fname = f.fname) ci.cfields + with Not_found -> + E.s (bug "Cannot find field %s in %s\n" fname (compFullName ci)) + end + | t -> E.s (bug "Trying to access field %s in non-struct\n" fname) + + +let matchIntType (ik: ikind) (t:typ) : formatArg list option = + match unrollType t with + TInt(ik', _) when ik = ik' -> Some [] + | _ -> None + +let matchFloatType (fk: fkind) (t:typ) : formatArg list option = + match unrollType t with + TFloat(fk', _) when fk = fk' -> Some [] + | _ -> None + +let doAttr (id: string) + (aargs: (((string * formatArg) list -> attrparam list) * + (attrparam list -> formatArg list option)) option) + = + let t = match aargs with + Some t -> t + | None -> (fun _ -> []), + (function [] -> Some [] | _ -> None) + in + ((fun args -> Attr (id, (fst t) args)), + + (fun attrs -> + (* Find the attributes with the same ID *) + List.fold_left + (fun acc a -> + match acc, a with + Some _, _ -> acc (* We found one already *) + | None, Attr(id', args) when id = id' -> + (* Now match the arguments *) + (snd t) args + | None, _ -> acc) + None + attrs)) + + +type falist = formatArg list + +type maybeInit = + NoInit + | InitExp of exp + | InitCall of lval * exp list + +%} + +%token IDENT +%token CST_CHAR +%token CST_INT +%token CST_FLOAT +%token CST_STRING +%token CST_WSTRING +%token NAMED_TYPE + +%token EOF +%token CHAR INT DOUBLE FLOAT VOID INT64 INT32 +%token ENUM STRUCT TYPEDEF UNION +%token SIGNED UNSIGNED LONG SHORT +%token VOLATILE EXTERN STATIC CONST RESTRICT AUTO REGISTER + +%token ARG_e ARG_eo ARG_E ARG_u ARG_b ARG_t ARG_d ARG_lo ARG_l ARG_i +%token ARG_o ARG_va ARG_f ARG_F ARG_A ARG_v ARG_k ARG_c ARG_d +%token ARG_s ARG_p ARG_P ARG_I ARG_S ARG_g + +%token SIZEOF ALIGNOF + +%token EQ +%token ARROW DOT + +%token EQ_EQ EXCLAM_EQ INF SUP INF_EQ SUP_EQ +%token MINUS_EQ PLUS_EQ STAR_EQ +%token PLUS MINUS STAR SLASH PERCENT +%token TILDE AND PIPE CIRC +%token EXCLAM AND_AND PIPE_PIPE +%token INF_INF SUP_SUP +%token PLUS_PLUS MINUS_MINUS + +%token RPAREN LPAREN RBRACE LBRACE LBRACKET RBRACKET +%token COLON SEMICOLON COMMA ELLIPSIS QUEST + +%token BREAK CONTINUE GOTO RETURN +%token SWITCH CASE DEFAULT +%token WHILE DO FOR +%token IF THEN ELSE + +%token PLUS_EQ MINUS_EQ STAR_EQ SLASH_EQ PERCENT_EQ +%token AND_EQ PIPE_EQ CIRC_EQ INF_INF_EQ SUP_SUP_EQ + +%token ATTRIBUTE INLINE ASM TYPEOF FUNCTION__ PRETTY_FUNCTION__ LABEL__ +%token BUILTIN_VA_ARG BUILTIN_VA_LIST +%token BLOCKATTRIBUTE +%token DECLSPEC +%token MSASM MSATTR +%token PRAGMA + + +/* operator precedence */ +%nonassoc IF +%nonassoc ELSE + + +%left COMMA + + /*(* Set the following precedences higer than COMMA *)*/ +%nonassoc ARG_e ARG_d ARG_lo ARG_l ARG_i ARG_v ARG_I ARG_g +%right EQ PLUS_EQ MINUS_EQ STAR_EQ SLASH_EQ PERCENT_EQ + AND_EQ PIPE_EQ CIRC_EQ INF_INF_EQ SUP_SUP_EQ +%right COLON +%left PIPE_PIPE +%left AND_AND +%left ARG_b +%left PIPE +%left CIRC +%left AND +%left EQ_EQ EXCLAM_EQ +%left INF SUP INF_EQ SUP_EQ +%left INF_INF SUP_SUP +%left PLUS MINUS +%left STAR SLASH PERCENT CONST RESTRICT VOLATILE +%right ARG_u EXCLAM TILDE PLUS_PLUS MINUS_MINUS CAST RPAREN ADDROF SIZEOF ALIGNOF +%left LBRACKET +%left DOT ARROW LPAREN LBRACE +%nonassoc IDENT QUEST CST_INT + +%start initialize expression typename offset lval instr stmt stmt_list + + +%type initialize +%type <((string -> Cil.typ -> Cil.varinfo) -> Cil.location -> (string * Cil.formatArg) list -> Cil.stmt)> stmt +%type <((string -> Cil.typ -> Cil.varinfo) -> Cil.location -> (string * Cil.formatArg) list -> Cil.stmt list)> stmt_list + +%type <((string * Cil.formatArg) list -> Cil.exp) * (Cil.exp -> Cil.formatArg list option)> expression + +%type <((string * Cil.formatArg) list -> Cil.exp) * (Cil.exp -> Cil.formatArg list option)> constant + +%type <((string * Cil.formatArg) list -> Cil.lval) * (Cil.lval -> Cil.formatArg list option)> lval + +%type <((string * Cil.formatArg) list -> Cil.typ) * (Cil.typ -> Cil.formatArg list option)> typename + +%type <(Cil.attributes -> (string * Cil.formatArg) list -> Cil.typ) * (Cil.typ -> Cil.formatArg list option)> type_spec + +%type <((string * Cil.formatArg) list -> (string * Cil.typ * Cil.attributes) list option * bool) * ((string * Cil.typ * Cil.attributes) list option * bool -> Cil.formatArg list option)> parameters + + +%type <(Cil.location -> (string * Cil.formatArg) list -> Cil.instr) * (Cil.instr -> Cil.formatArg list option)> instr + +%type <(Cil.typ -> (string * Cil.formatArg) list -> Cil.offset) * (Cil.offset -> Cil.formatArg list option)> offset + + +%% + + +initialize: + /* empty */ { } +; + +/* (*** Expressions ***) */ + + +expression: +| ARG_e { (* Count arguments eagerly *) + let currentArg = $1 in + ((fun args -> + match getArg currentArg args with + Fe e -> e + | a -> wrongArgType currentArg + "expression" a), + + (fun e -> Some [ Fe e ])) + } + +| constant { $1 } + +| lval %prec IDENT + { ((fun args -> Lval ((fst $1) args)), + + (fun e -> match e with + Lval l -> (snd $1) l + | _ -> None)) + } + +| SIZEOF expression + { ((fun args -> SizeOfE ((fst $2) args)), + + fun e -> match e with + SizeOfE e' -> (snd $2) e' + | _ -> None) + } + +| SIZEOF LPAREN typename RPAREN + { ((fun args -> SizeOf ((fst $3) args)), + + (fun e -> match e with + SizeOf t -> (snd $3) t + | _ -> None)) + } + +| ALIGNOF expression + { ((fun args -> AlignOfE ((fst $2) args)), + + (fun e -> match e with + AlignOfE e' -> (snd $2) e' | _ -> None)) + } + +| ALIGNOF LPAREN typename RPAREN + { ((fun args -> AlignOf ((fst $3) args)), + + (fun e -> match e with + AlignOf t' -> (snd $3) t' | _ -> None)) + } + +| PLUS expression + { $2 } +| MINUS expression + { doUnop Neg $2 } + +| EXCLAM expression + { doUnop LNot $2 } + +| TILDE expression + { doUnop BNot $2 } + +| argu expression %prec ARG_u + { ((fun args -> + let e = (fst $2) args in + UnOp((fst $1) args, e, typeOf e)), + + (fun e -> match e with + UnOp(uo, e', _) -> begin + match (snd $1) uo, (snd $2) e' with + Some m1, Some m2 -> Some (m1 @ m2) + | _ -> None + end + | _ -> None)) + } + + +| AND expression %prec ADDROF + { ((fun args -> + match (fst $2) args with + Lval l -> mkAddrOf l + | _ -> E.s (bug "AddrOf applied to a non lval")), + (fun e -> match e with + AddrOf l -> (snd $2) (Lval l) + | e -> (snd $2) (Lval (mkMem e NoOffset)))) + } + +| LPAREN expression RPAREN + { $2 } + +| expression PLUS expression + { ((fun args -> buildPlus ((fst $1) args) + ((fst $3) args)), + (fun e -> match e with + BinOp((PlusPI|PlusA), e1, e2, _) -> begin + match (snd $1) e1, (snd $3) e2 with + Some m1, Some m2 -> Some (m1 @ m2) + | _, _ -> None + end + | _ -> None)) + } + +| expression MINUS expression + { ((fun args -> buildMinus ((fst $1) args) + ((fst $3) args)), + + (fun e -> match e with + BinOp((MinusPP|MinusPI|MinusA), e1, e2, _) -> + begin + match (snd $1) e1, (snd $3) e2 with + Some m1, Some m2 -> Some (m1 @ m2) + | _, _ -> None + end + | _ -> None)) + } +| expression argb expression %prec ARG_b + { ((fun args -> + let e1 = (fst $1) args in + let bop = (fst $2) args in + let e2 = (fst $3) args in + let t1 = typeOf e1 in + BinOp(bop, e1, e2, t1)), + + (fun e -> match e with + BinOp(bop, e1, e2, _) -> begin + match (snd $1) e1,(snd $2) bop,(snd $3) e2 with + Some m1, Some m2, Some m3 -> + Some (m1 @ m2 @ m3) + | _, _, _ -> None + end + | _ -> None)) + } + +| expression STAR expression + { doBinop Mult $1 $3 } +| expression SLASH expression + { doBinop Div $1 $3 } +| expression PERCENT expression + { doBinop Mod $1 $3 } +| expression INF_INF expression + { doBinop Shiftlt $1 $3 } +| expression SUP_SUP expression + { doBinop Shiftrt $1 $3 } +| expression AND expression + { doBinop BAnd $1 $3 } +| expression PIPE expression + { doBinop BOr $1 $3 } +| expression CIRC expression + { doBinop BXor $1 $3 } +| expression EQ_EQ expression + { doBinop Eq $1 $3 } +| expression EXCLAM_EQ expression + { doBinop Ne $1 $3 } +| expression INF expression + { doBinop Lt $1 $3 } +| expression SUP expression + { doBinop Gt $1 $3 } +| expression INF_EQ expression + { doBinop Le $1 $3 } +| expression SUP_EQ expression + { doBinop Ge $1 $3 } + +| LPAREN typename RPAREN expression + { ((fun args -> + let t = (fst $2) args in + let e = (fst $4) args in + mkCast e t), + + (fun e -> + let t', e' = + match e with + CastE (t', e') -> t', e' + | _ -> typeOf e, e + in + match (snd $2) t', (snd $4 e') with + Some m1, Some m2 -> Some (m1 @ m2) + | _, _ -> None)) + } +; + +/*(* Separate the ARG_ to ensure that the counting of arguments is right *)*/ +argu : +| ARG_u { let currentArg = $1 in + ((fun args -> + match getArg currentArg args with + Fu uo -> uo + | a -> wrongArgType currentArg "unnop" a), + + fun uo -> Some [ Fu uo ]) + } +; + +argb : +| ARG_b { let currentArg = $1 in + ((fun args -> + match getArg currentArg args with + Fb bo -> bo + | a -> wrongArgType currentArg "binop" a), + + fun bo -> Some [ Fb bo ]) + } +; + +constant: +| ARG_d { let currentArg = $1 in + ((fun args -> + match getArg currentArg args with + Fd n -> integer n + | a -> wrongArgType currentArg "integer" a), + + fun e -> match e with + Const(CInt64(n, _, _)) -> + Some [ Fd (Int64.to_int n) ] + | _ -> None) + } + +| ARG_g { let currentArg = $1 in + ((fun args -> + match getArg currentArg args with + Fg s -> Const(CStr s) + | a -> wrongArgType currentArg "string" a), + + fun e -> match e with + Const(CStr s) -> + Some [ Fg s ] + | _ -> None) + } +| CST_INT { let n = parseInt $1 in + ((fun args -> n), + + (fun e -> match e, n with + Const(CInt64(e', _, _)), + Const(CInt64(n', _, _)) when e' = n' -> Some [] + | _ -> None)) + } +; + + +/*(***************** LVALUES *******************)*/ +lval: +| ARG_l { let currentArg = $1 in + ((fun args -> + match getArg currentArg args with + Fl l -> l + | Fv v -> Var v, NoOffset + | a -> wrongArgType currentArg "lval" a), + + fun l -> Some [ Fl l ]) + } + +| argv offset %prec ARG_v + { ((fun args -> + let v = (fst $1) args in + (Var v, (fst $2) v.vtype args)), + + (fun l -> match l with + Var vi, off -> begin + match (snd $1) vi, (snd $2) off with + Some m1, Some m2 -> Some (m1 @ m2) + | _ -> None + end + | _ -> None)) + } + +| STAR expression { ((fun args -> mkMem ((fst $2) args) NoOffset), + + (fun l -> match l with + Mem e, NoOffset -> (snd $2) e + | _, _ -> None)) + } + +| expression ARROW IDENT offset + { ((fun args -> + let e = (fst $1) args in + let baset = + match unrollTypeDeep (typeOf e) with + TPtr (t, _) -> t + | _ -> E.s (bug "Expecting a pointer for field %s\n" $3) + in + let fi = getField baset $3 in + mkMem e (Field(fi, (fst $4) fi.ftype args))), + + (fun l -> match l with + Mem e, Field(fi, off) when fi.fname = $3 -> begin + match (snd $1) e, (snd $4) off with + Some m1, Some m2 -> Some (m1 @ m2) + | _, _ -> None + end + | _, _ -> None)) + } + +| LPAREN STAR expression RPAREN offset + { ((fun args -> + let e = (fst $3) args in + let baset = + match unrollTypeDeep (typeOf e) with + TPtr (t, _) -> t + | _ -> E.s (bug "Expecting a pointer\n") + in + mkMem e ((fst $5) baset args)), + + (fun l -> match l with + Mem e, off -> begin + match (snd $3) e, (snd $5 off) with + Some m1, Some m2 -> Some (m1 @ m2) + | _, _ -> None + end + | _, _ -> None)) + } + ; + +argv : +| ARG_v { let currentArg = $1 in + ((fun args -> + match getArg currentArg args with + Fv v -> v + | a -> wrongArgType currentArg "varinfo" a), + + fun v -> Some [ Fv v ]) + } +| IDENT { let currentArg = $1 in + ((fun args -> + match getArg currentArg args with + Fv v -> v + | a -> wrongArgType currentArg "varinfo" a), + (fun v -> + E.s (bug "identifiers (%s) are not supported for deconstruction" currentArg))) + } +; + + +/*(********** OFFSETS *************)*/ +offset: +| ARG_o { let currentArg = $1 in + ((fun t args -> + match getArg currentArg args with + Fo o -> o + | a -> wrongArgType currentArg "offset" a), + + (fun off -> Some [ Fo off ])) + } + +| /* empty */ { ((fun t args -> NoOffset), + + (fun off -> match off with + NoOffset -> Some [] + | _ -> None)) + } + +| DOT IDENT offset { ((fun t args -> + let fi = getField t $2 in + Field (fi, (fst $3) fi.ftype args)), + + (fun off -> match off with + Field (fi, off') when fi.fname = $2 -> + (snd $3) off' + | _ -> None)) + } + +| LBRACKET expression RBRACKET offset + { ((fun t args -> + let bt = + match unrollType t with + TArray(bt, _, _) -> bt + | _ -> E.s (error "Formatcil: expecting an array for index") + in + let e = (fst $2) args in + Index(e, (fst $4) bt args)), + + (fun off -> match off with + Index (e, off') -> begin + match (snd $2) e, (snd $4) off with + Some m1, Some m2 -> Some (m1 @ m2) + | _, _ -> None + end + | _ -> None)) + } +; + + +/*(************ TYPES **************)*/ +typename: one_formal { ((fun args -> + let (_, ft, _) = (fst $1) args in + ft), + + (fun t -> (snd $1) ("", t, []))) + } +; + +one_formal: +/*(* Do not allow attributes for the name *)*/ +| type_spec attributes decl + { ((fun args -> + let tal = (fst $2) args in + let ts = (fst $1) tal args in + let (fn, ft, _) = (fst $3) ts args in + (fn, ft, [])), + + (fun (fn, ft, fa) -> + match (snd $3) (fn, ft) with + Some (restt, m3) -> begin + match (snd $1) restt, + (snd $2) (typeAttrs restt)with + Some m1, Some m2 -> + Some (m1 @ m2 @ m3) + | _, _ -> None + end + | _ -> None)) + } + +| ARG_f + { let currentArg = $1 in + ((fun args -> + match getArg currentArg args with + Ff (fn, ft, fa) -> (fn, ft, fa) + | a -> wrongArgType currentArg "formal" a), + + (fun (fn, ft, fa) -> Some [ Ff (fn, ft, fa) ])) + } +; + +type_spec: +| ARG_t { let currentArg = $1 in + ((fun al args -> + match getArg currentArg args with + Ft t -> typeAddAttributes al t + | a -> wrongArgType currentArg "type" a), + + (fun t -> Some [ Ft t ])) + } + +| VOID { ((fun al args -> TVoid al), + + (fun t -> match unrollType t with + TVoid _ -> Some [] + | _ -> None)) } + +| ARG_k { let currentArg = $1 in + ((fun al args -> + match getArg currentArg args with + Fk ik -> TInt(ik, al) + | a -> wrongArgType currentArg "ikind" a), + + (fun t -> match unrollType t with + TInt(ik, _) -> Some [ Fk ik ] + | _ -> None)) + } + +| CHAR { ((fun al args -> TInt(IChar, al)), + (matchIntType IChar)) } +| UNSIGNED CHAR { ((fun al args -> TInt(IUChar, al)), + matchIntType IUChar) } + +| SHORT { ((fun al args -> TInt(IShort, al)), + matchIntType IShort) } +| UNSIGNED SHORT { ((fun al args -> TInt(IUShort, al)), + matchIntType IUShort) } + +| INT { ((fun al args -> TInt(IInt, al)), + matchIntType IInt) } +| UNSIGNED INT { ((fun al args -> TInt(IUInt, al)), matchIntType IUInt) } + +| LONG { ((fun al args -> TInt(ILong, al)), + matchIntType ILong) } +| UNSIGNED LONG { ((fun al args -> TInt(IULong, al)), + matchIntType IULong) } + +| LONG LONG { ((fun al args -> TInt(ILongLong, al)), + + matchIntType ILongLong) + } +| UNSIGNED LONG LONG { ((fun al args -> TInt(IULongLong, al)), + + matchIntType IULongLong) + } + +| FLOAT { ((fun al args -> TFloat(FFloat, al)), + matchFloatType FFloat) + } +| DOUBLE { ((fun al args -> TFloat(FDouble, al)), + matchFloatType FDouble) } + +| STRUCT ARG_c { let currentArg = $2 in + ((fun al args -> + match getArg currentArg args with + Fc ci -> TComp(ci, al) + | a -> wrongArgType currentArg "compinfo" a), + + (fun t -> match unrollType t with + TComp(ci, _) -> Some [ Fc ci ] + | _ -> None)) + } +| UNION ARG_c { let currentArg = $2 in + ((fun al args -> + match getArg currentArg args with + Fc ci -> TComp(ci, al) + | a -> wrongArgType currentArg "compinfo" a), + + (fun t -> match unrollType t with + TComp(ci, _) -> Some [ Fc ci ] + | _ -> None)) + + } + +| TYPEOF LPAREN expression RPAREN + { ((fun al args -> typeAddAttributes al + (typeOf ((fst $3) args))), + + (fun t -> E.s (bug "Cannot match typeof(e)\n"))) + } +; + +decl: +| STAR attributes decl + { ((fun ts args -> + let al = (fst $2) args in + (fst $3) (TPtr(ts, al)) args), + + (fun (fn, ft) -> + match (snd $3) (fn, ft) with + Some (TPtr(bt, al), m2) -> begin + match (snd $2) al with + Some m1 -> Some (bt, m1 @ m2) + | _ -> None + end + | _ -> None)) + } + +| direct_decl { $1 } +; + +direct_decl: +| /* empty */ { ((fun ts args -> ("", ts, [])), + + (* Match any name in this case *) + (fun (fn, ft) -> + Some (unrollType ft, []))) + } + +| IDENT { ((fun ts args -> ($1, ts, [])), + + (fun (fn, ft) -> + if fn = "" || fn = $1 then + Some (unrollType ft, []) + else + None)) + } + +| LPAREN attributes decl RPAREN + { ((fun ts args -> + let al = (fst $2) args in + (fst $3) (typeAddAttributes al ts) args), + + (fun (fn, ft) -> begin + match (snd $3) (fn, ft) with + Some (restt, m2) -> begin + match (snd $2) (typeAttrs restt) with + Some m1 -> Some (restt, m1 @ m2) + | _ -> None + end + | _ -> None + end)) + } + +| direct_decl LBRACKET exp_opt RBRACKET + { ((fun ts args -> + (fst $1) (TArray(ts, (fst $3) args, [])) args), + + (fun (fn, ft) -> + match (snd $1) (fn, ft) with + Some (TArray(bt, lo, _), m1) -> begin + match (snd $3) lo with + Some m2 -> Some (unrollType bt, m1 @ m2) + | _ -> None + end + | _ -> None)) + } + + +/*(* We use parentheses around the function to avoid conflicts *)*/ +| LPAREN attributes decl RPAREN LPAREN parameters RPAREN + { ((fun ts args -> + let al = (fst $2) args in + let pars, isva = (fst $6) args in + (fst $3) (TFun(ts, pars, isva, al)) args), + + (fun (fn, ft) -> + match (snd $3) (fn, ft) with + Some (TFun(rt, args, isva, al), m1) -> begin + match (snd $2) al, (snd $6) (args, isva) with + Some m2, Some m6 + -> Some (unrollType rt, m1 @ m2 @ m6) + | _ -> None + end + | _ -> None)) + } +; + +parameters: +| /* empty */ { ((fun args -> (None, false)), + + (* Match any formals *) + (fun (pars, isva) -> + match pars, isva with + (_, false) -> Some [] + | _ -> None)) + } + +| parameters_ne { ((fun args -> + let (pars : (string * typ * attributes) list), + (isva : bool) = (fst $1) args in + (Some pars), isva), + + (function + ((Some pars), isva) -> (snd $1) (pars, isva) + | _ -> None)) + } +; +parameters_ne: +| ELLIPSIS + { ((fun args -> ([], true)), + + (function + ([], true) -> Some [] + | _ -> None)) + } + +| ARG_va { let currentArg = $1 in + ((fun args -> + match getArg currentArg args with + Fva isva -> ([], isva) + | a -> wrongArgType currentArg "vararg" a), + + (function + ([], isva) -> Some [ Fva isva ] + | _ -> None)) + } + +| ARG_F { let currentArg = $1 in + ((fun args -> + match getArg currentArg args with + FF fl -> ( fl, false) + | a -> wrongArgType currentArg "formals" a), + + (function + (pars, false) -> Some [ FF pars ] + | _ -> None)) + } + +| one_formal { ((fun args -> ([(fst $1) args], false)), + + (function + ([ f ], false) -> (snd $1) f + | _ -> None)) + } + + +| one_formal COMMA parameters_ne + { ((fun args -> + let this = (fst $1) args in + let (rest, isva) = (fst $3) args in + (this :: rest, isva)), + + (function + ((f::rest, isva)) -> begin + match (snd $1) f, (snd $3) (rest, isva) with + Some m1, Some m2 -> Some (m1 @ m2) + | _, _ -> None + end + | _ -> None)) + } +; + + + + + +exp_opt: + /* empty */ { ((fun args -> None), + (* Match anything if the pattern does not have a len *) + (fun _ -> Some [])) } + +| expression { ((fun args -> Some ((fst $1) args)), + + (fun lo -> match lo with + Some e -> (snd $1) e + | _ -> None)) + } +| ARG_eo { let currentArg = $1 in + ((fun args -> + match getArg currentArg args with + Feo lo -> lo + | a -> wrongArgType currentArg "exp_opt" a), + + fun lo -> Some [ Feo lo ]) + } +; + + + +attributes: + /*(* Ignore other attributes *)*/ + /* empty */ { ((fun args -> []), + (fun attrs -> Some [])) } + +| ARG_A { let currentArg = $1 in + ((fun args -> + match getArg currentArg args with + FA al -> al + | a -> wrongArgType currentArg "attributes" a), + + (fun al -> Some [ FA al ])) + } + +| attribute attributes + { ((fun args -> + addAttribute ((fst $1) args) ((fst $2) args)), + (* Pass all the attributes down *) + (fun attrs -> + match (snd $1) attrs, (snd $2) attrs with + Some m1, Some m2 -> Some (m1 @ m2) + | _, _ -> None)) + } +; + +attribute: +| CONST { doAttr "const" None } +| RESTRICT { doAttr "restrict" None } +| VOLATILE { doAttr "volatile" None } +| ATTRIBUTE LPAREN LPAREN attr RPAREN RPAREN + { $4 } + +; + + +attr: +| IDENT + { doAttr $1 None } + +| IDENT LPAREN attr_args_ne RPAREN + { doAttr $1 (Some $3) } +; + +attr_args_ne: + attr_arg { ((fun args -> [ (fst $1) args ]), + + (fun aargs -> match aargs with + [ arg ] -> (snd $1) arg + | _ -> None)) + } +| attr_arg COMMA attr_args_ne { ((fun args -> + let this = (fst $1) args in + this :: ((fst $3) args)), + + (fun aargs -> match aargs with + h :: rest -> begin + match (snd $1) h, (snd $3) rest with + Some m1, Some m2 -> Some (m1 @ m2) + | _, _ -> None + end + | _ -> None)) + } +| ARG_P { let currentArg = $1 in + ((fun args -> + match getArg currentArg args with + FP al -> al + | a -> wrongArgType currentArg "attrparams" a), + + (fun al -> Some [ FP al ])) + } +; + +attr_arg: +| IDENT { ((fun args -> ACons($1, [])), + + (fun aarg -> match aarg with + ACons(id, []) when id = $1 -> Some [] + | _ -> None)) + } +| IDENT LPAREN attr_args_ne RPAREN + { ((fun args -> ACons($1, (fst $3) args)), + + (fun aarg -> match aarg with + ACons(id, args) when id = $1 -> + (snd $3) args + | _ -> None)) + } +| ARG_p { let currentArg = $1 in + ((fun args -> + match getArg currentArg args with + Fp p -> p + | a -> wrongArgType currentArg "attrparam" a), + + (fun ap -> Some [ Fp ap])) + } + +; + +/* (********** INSTRUCTIONS ***********) */ +instr: +| ARG_i SEMICOLON + { let currentArg = $1 in + ((fun loc args -> + match getArg currentArg args with + Fi i -> i + | a -> wrongArgType currentArg "instr" a), + + (fun i -> Some [ Fi i])) + } + +| lval EQ expression SEMICOLON + { ((fun loc args -> + Set((fst $1) args, (fst $3) args, loc)), + + (fun i -> match i with + Set (lv, e, l) -> begin + match (snd $1) lv, (snd $3) e with + Some m1, Some m2 -> Some (m1 @ m2) + | _, _ -> None + end + | _ -> None)) + } + +| lval PLUS_EQ expression SEMICOLON + { ((fun loc args -> + let l = (fst $1) args in + Set(l, buildPlus (Lval l) ((fst $3) args), loc)), + + matchBinopEq + (fun bop -> bop = PlusPI || bop = PlusA) + (snd $1) (snd $3)) + } + +| lval MINUS_EQ expression SEMICOLON + { ((fun loc args -> + let l = (fst $1) args in + Set(l, + buildMinus (Lval l) ((fst $3) args), loc)), + + matchBinopEq (fun bop -> bop = MinusA + || bop = MinusPP + || bop = MinusPI) + (snd $1) (snd $3)) + } +| lval STAR_EQ expression SEMICOLON + { doBinopEq Mult $1 $3 } + +| lval SLASH_EQ expression SEMICOLON + { doBinopEq Div $1 $3 } + +| lval PERCENT_EQ expression SEMICOLON + { doBinopEq Mod $1 $3 } + +| lval AND_EQ expression SEMICOLON + { doBinopEq BAnd $1 $3 } + +| lval PIPE_EQ expression SEMICOLON + { doBinopEq BOr $1 $3 } + +| lval CIRC_EQ expression SEMICOLON + { doBinopEq BXor $1 $3 } + +| lval INF_INF_EQ expression SEMICOLON + { doBinopEq Shiftlt $1 $3 } + +| lval SUP_SUP_EQ expression SEMICOLON + { doBinopEq Shiftrt $1 $3 } + +/* (* Would be nice to be able to condense the next three rules but we get + * into conflicts *)*/ +| lval EQ lval LPAREN arguments RPAREN SEMICOLON + { ((fun loc args -> + Call(Some ((fst $1) args), Lval ((fst $3) args), + (fst $5) args, loc)), + + (fun i -> match i with + Call(Some l, Lval f, args, loc) -> begin + match (snd $1) l, (snd $3) f, (snd $5) args with + Some m1, Some m2, Some m3 -> + Some (m1 @ m2 @ m3) + | _, _, _ -> None + end + | _ -> None)) + } + +| lval LPAREN arguments RPAREN SEMICOLON + { ((fun loc args -> + Call(None, Lval ((fst $1) args), + (fst $3) args, loc)), + + (fun i -> match i with + Call(None, Lval f, args, loc) -> begin + match (snd $1) f, (snd $3) args with + Some m1, Some m2 -> Some (m1 @ m2) + | _, _ -> None + end + | _ -> None)) + } + +| arglo lval LPAREN arguments RPAREN SEMICOLON + { ((fun loc args -> + Call((fst $1) args, Lval ((fst $2) args), + (fst $4) args, loc)), + + (fun i -> match i with + Call(lo, Lval f, args, loc) -> begin + match (snd $1) lo, (snd $2) f, (snd $4) args with + Some m1, Some m2, Some m3 -> + Some (m1 @ m2 @ m3) + | _, _, _ -> None + end + | _ -> None)) + } +; + +/* (* Separate this out to ensure that the counting or arguments is right *)*/ +arglo: + ARG_lo { let currentArg = $1 in + ((fun args -> + let res = + match getArg currentArg args with + Flo x -> x + | a -> wrongArgType currentArg "lval option" a + in + res), + + (fun lo -> Some [ Flo lo ])) + } +; +arguments: + /* empty */ { ((fun args -> []), + + (fun actuals -> match actuals with + [] -> Some [] + | _ -> None)) + } + +| arguments_ne { $1 } +; + +arguments_ne: + expression { ((fun args -> [ (fst $1) args ]), + + (fun actuals -> match actuals with + [ h ] -> (snd $1) h + | _ -> None)) + } + +| ARG_E { let currentArg = $1 in + ((fun args -> + match getArg currentArg args with + FE el -> el + | a -> wrongArgType currentArg "arguments" a), + + (fun actuals -> Some [ FE actuals ])) + } + +| expression COMMA arguments_ne + { ((fun args -> ((fst $1) args) :: ((fst $3) args)), + + (fun actuals -> match actuals with + h :: rest -> begin + match (snd $1) h, (snd $3) rest with + Some m1, Some m2 -> Some (m1 @ m2) + | _, _ -> None + end + | _ -> None)) + } +; + + +/*(******** STATEMENTS *********)*/ +stmt: + IF LPAREN expression RPAREN stmt %prec IF + { (fun mkTemp loc args -> + mkStmt (If((fst $3) args, + mkBlock [ $5 mkTemp loc args ], + mkBlock [], loc))) + } +| IF LPAREN expression RPAREN stmt ELSE stmt + { (fun mkTemp loc args -> + mkStmt (If((fst $3) args, + mkBlock [ $5 mkTemp loc args ], + mkBlock [ $7 mkTemp loc args], loc))) + } +| RETURN exp_opt SEMICOLON + { (fun mkTemp loc args -> + mkStmt (Return((fst $2) args, loc))) + } +| BREAK SEMICOLON + { (fun mkTemp loc args -> + mkStmt (Break loc)) + } +| CONTINUE SEMICOLON + { (fun mkTemp loc args -> + mkStmt (Continue loc)) + } +| LBRACE stmt_list RBRACE + { (fun mkTemp loc args -> + let stmts = $2 mkTemp loc args in + mkStmt (Block (mkBlock (stmts)))) + } +| WHILE LPAREN expression RPAREN stmt + { (fun mkTemp loc args -> + let e = (fst $3) args in + let e = + if isPointerType(typeOf e) then + mkCast e !upointType + else e + in +(* + mkStmt + (Loop (mkBlock [ mkStmt + (If(e, + mkBlock [], + mkBlock [ mkStmt + (Break loc) ], + loc)); + $5 mkTemp loc args ], + loc, None, None)) +*) + mkStmt + (While (e, mkBlock [ $5 mkTemp loc args ], loc))) + } +| instr_list { (fun mkTemp loc args -> + mkStmt (Instr ($1 loc args))) + } +| ARG_s { let currentArg = $1 in + (fun mkTemp loc args -> + match getArg currentArg args with + Fs s -> s + | a -> wrongArgType currentArg "stmt" a) } +; + +stmt_list: + /* empty */ { (fun mkTemp loc args -> []) } + +| ARG_S { let currentArg = $1 in + (fun mkTemp loc args -> + match getArg currentArg args with + | FS sl -> sl + | a -> wrongArgType currentArg "stmts" a) + } +| stmt stmt_list + { (fun mkTemp loc args -> + let this = $1 mkTemp loc args in + this :: ($2 mkTemp loc args)) + } +/* (* We can also have a declaration *) */ +| type_spec attributes decl maybe_init SEMICOLON stmt_list + { (fun mkTemp loc args -> + let tal = (fst $2) args in + let ts = (fst $1) tal args in + let (n, t, _) = (fst $3) ts args in + let init = $4 args in + (* Before we proceed we must create the variable *) + let v = mkTemp n t in + (* Now we parse the rest *) + let rest = $6 mkTemp loc ((n, Fv v) :: args) in + (* Now we add the initialization instruction to the + * front *) + match init with + NoInit -> rest + | InitExp e -> + mkStmtOneInstr (Set((Var v, NoOffset), e, loc)) + :: rest + | InitCall (f, args) -> + mkStmtOneInstr (Call(Some (Var v, NoOffset), + Lval f, args, loc)) + :: rest + + ) + } +; + +instr_list: + /*(* Set this rule to very low precedence to ensure that we shift as + many instructions as possible *)*/ + instr %prec COMMA + { (fun loc args -> [ ((fst $1) loc args) ]) } +| ARG_I { let currentArg = $1 in + (fun loc args -> + match getArg currentArg args with + | FI il -> il + | a -> wrongArgType currentArg "instrs" a) + } +| instr instr_list + { (fun loc args -> + let this = (fst $1) loc args in + this :: ($2 loc args)) + } +; + + +maybe_init: +| { (fun args -> NoInit) } +| EQ expression { (fun args -> InitExp ((fst $2) args)) } +| EQ lval LPAREN arguments RPAREN + { (fun args -> + InitCall((fst $2) args, (fst $4) args)) } +; +%% + + + + + + + diff --git a/cil/src/frontc/cabs.ml b/cil/src/frontc/cabs.ml new file mode 100644 index 00000000..78ac02f4 --- /dev/null +++ b/cil/src/frontc/cabs.ml @@ -0,0 +1,396 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(** This file was originally part of Hugues Casee's frontc 2.0, and has been + * extensively changed since. +** +** 1.0 3.22.99 Hugues Cassé First version. +** 2.0 George Necula 12/12/00: Many extensions + **) + +(* +** Types +*) + +type cabsloc = { + lineno : int; + filename: string; + byteno: int; +} + +let cabslu = {lineno = -10; + filename = "cabs loc unknown"; + byteno = -10;} + +(* clexer puts comments here *) +let commentsGA = GrowArray.make 100 (GrowArray.Elem(cabslu,"",false)) + +type typeSpecifier = (* Merge all specifiers into one type *) + Tvoid (* Type specifier ISO 6.7.2 *) + | Tchar + | Tshort + | Tint + | Tlong + | Tint64 + | Tfloat + | Tdouble + | Tsigned + | Tunsigned + | Tnamed of string + (* each of the following three kinds of specifiers contains a field + * or item list iff it corresponds to a definition (as opposed to + * a forward declaration or simple reference to the type); they + * also have a list of __attribute__s that appeared between the + * keyword and the type name (definitions only) *) + | Tstruct of string * field_group list option * attribute list + | Tunion of string * field_group list option * attribute list + | Tenum of string * enum_item list option * attribute list + | TtypeofE of expression (* GCC __typeof__ *) + | TtypeofT of specifier * decl_type (* GCC __typeof__ *) + +and storage = + NO_STORAGE | AUTO | STATIC | EXTERN | REGISTER + +and funspec = + INLINE | VIRTUAL | EXPLICIT + +and cvspec = + CV_CONST | CV_VOLATILE | CV_RESTRICT + +(* Type specifier elements. These appear at the start of a declaration *) +(* Everywhere they appear in this file, they appear as a 'spec_elem list', *) +(* which is not interpreted by cabs -- rather, this "word soup" is passed *) +(* on to the compiler. Thus, we can represent e.g. 'int long float x' even *) +(* though the compiler will of course choke. *) +and spec_elem = + SpecTypedef + | SpecCV of cvspec (* const/volatile *) + | SpecAttr of attribute (* __attribute__ *) + | SpecStorage of storage + | SpecInline + | SpecType of typeSpecifier + | SpecPattern of string (* specifier pattern variable *) + +(* decided to go ahead and replace 'spec_elem list' with specifier *) +and specifier = spec_elem list + + +(* Declarator type. They modify the base type given in the specifier. Keep + * them in the order as they are printed (this means that the top level + * constructor for ARRAY and PTR is the inner-level in the meaning of the + * declared type) *) +and decl_type = + | JUSTBASE (* Prints the declared name *) + | PARENTYPE of attribute list * decl_type * attribute list + (* Prints "(attrs1 decl attrs2)". + * attrs2 are attributes of the + * declared identifier and it is as + * if they appeared at the very end + * of the declarator. attrs1 can + * contain attributes for the + * identifier or attributes for the + * enclosing type. *) + | ARRAY of decl_type * attribute list * expression + (* Prints "decl [ attrs exp ]". + * decl is never a PTR. *) + | PTR of attribute list * decl_type (* Prints "* attrs decl" *) + | PROTO of decl_type * single_name list * bool + (* Prints "decl (args[, ...])". + * decl is never a PTR.*) + +(* The base type and the storage are common to all names. Each name might + * contain type or storage modifiers *) +(* e.g.: int x, y; *) +and name_group = specifier * name list + +(* The optional expression is the bitfield *) +and field_group = specifier * (name * expression option) list + +(* like name_group, except the declared variables are allowed to have initializers *) +(* e.g.: int x=1, y=2; *) +and init_name_group = specifier * init_name list + +(* The decl_type is in the order in which they are printed. Only the name of + * the declared identifier is pulled out. The attributes are those that are + * printed after the declarator *) +(* e.g: in "int *x", "*x" is the declarator; "x" will be pulled out as *) +(* the string, and decl_type will be PTR([], JUSTBASE) *) +and name = string * decl_type * attribute list * cabsloc + +(* A variable declarator ("name") with an initializer *) +and init_name = name * init_expression + +(* Single names are for declarations that cannot come in groups, like + * function parameters and functions *) +and single_name = specifier * name + + +and enum_item = string * expression * cabsloc + +(* +** Declaration definition (at toplevel) +*) +and definition = + FUNDEF of single_name * block * cabsloc * cabsloc + | DECDEF of init_name_group * cabsloc (* global variable(s), or function prototype *) + | TYPEDEF of name_group * cabsloc + | ONLYTYPEDEF of specifier * cabsloc + | GLOBASM of string * cabsloc + | PRAGMA of expression * cabsloc + | LINKAGE of string * cabsloc * definition list (* extern "C" { ... } *) + (* toplevel form transformer, from the first definition to the *) + (* second group of definitions *) + | TRANSFORMER of definition * definition list * cabsloc + (* expression transformer: source and destination *) + | EXPRTRANSFORMER of expression * expression * cabsloc + + +(* the string is a file name, and then the list of toplevel forms *) +and file = string * definition list + + +(* +** statements +*) + +(* A block contains a list of local label declarations ( GCC's ({ __label__ + * l1, l2; ... }) ) , a list of definitions and a list of statements *) +and block = + { blabels: string list; + battrs: attribute list; + bstmts: statement list + } + +(* GCC asm directives have lots of extra information to guide the optimizer *) +and asm_details = + { aoutputs: (string * expression) list; (* constraints and expressions for outputs *) + ainputs: (string * expression) list; (* constraints and expressions for inputs *) + aclobbers: string list (* clobbered registers *) + } + +and statement = + NOP of cabsloc + | COMPUTATION of expression * cabsloc + | BLOCK of block * cabsloc + | SEQUENCE of statement * statement * cabsloc + | IF of expression * statement * statement * cabsloc + | WHILE of expression * statement * cabsloc + | DOWHILE of expression * statement * cabsloc + | FOR of for_clause * expression * expression * statement * cabsloc + | BREAK of cabsloc + | CONTINUE of cabsloc + | RETURN of expression * cabsloc + | SWITCH of expression * statement * cabsloc + | CASE of expression * statement * cabsloc + | CASERANGE of expression * expression * statement * cabsloc + | DEFAULT of statement * cabsloc + | LABEL of string * statement * cabsloc + | GOTO of string * cabsloc + | COMPGOTO of expression * cabsloc (* GCC's "goto *exp" *) + | DEFINITION of definition (*definition or declaration of a variable or type*) + + | ASM of attribute list * (* typically only volatile and const *) + string list * (* template *) + asm_details option * (* extra details to guide GCC's optimizer *) + cabsloc + + (** MS SEH *) + | TRY_EXCEPT of block * expression * block * cabsloc + | TRY_FINALLY of block * block * cabsloc + +and for_clause = + FC_EXP of expression + | FC_DECL of definition + +(* +** Expressions +*) +and binary_operator = + ADD | SUB | MUL | DIV | MOD + | AND | OR + | BAND | BOR | XOR | SHL | SHR + | EQ | NE | LT | GT | LE | GE + | ASSIGN + | ADD_ASSIGN | SUB_ASSIGN | MUL_ASSIGN | DIV_ASSIGN | MOD_ASSIGN + | BAND_ASSIGN | BOR_ASSIGN | XOR_ASSIGN | SHL_ASSIGN | SHR_ASSIGN + +and unary_operator = + MINUS | PLUS | NOT | BNOT | MEMOF | ADDROF + | PREINCR | PREDECR | POSINCR | POSDECR + +and expression = + NOTHING + | UNARY of unary_operator * expression + | LABELADDR of string (* GCC's && Label *) + | BINARY of binary_operator * expression * expression + | QUESTION of expression * expression * expression + + (* A CAST can actually be a constructor expression *) + | CAST of (specifier * decl_type) * init_expression + + (* There is a special form of CALL in which the function called is + __builtin_va_arg and the second argument is sizeof(T). This + should be printed as just T *) + | CALL of expression * expression list + | COMMA of expression list + | CONSTANT of constant + | VARIABLE of string + | EXPR_SIZEOF of expression + | TYPE_SIZEOF of specifier * decl_type + | EXPR_ALIGNOF of expression + | TYPE_ALIGNOF of specifier * decl_type + | INDEX of expression * expression + | MEMBEROF of expression * string + | MEMBEROFPTR of expression * string + | GNU_BODY of block + | EXPR_PATTERN of string (* pattern variable, and name *) + +and constant = + | CONST_INT of string (* the textual representation *) + | CONST_FLOAT of string (* the textual representaton *) + | CONST_CHAR of int64 list + | CONST_WCHAR of int64 list + | CONST_STRING of string + | CONST_WSTRING of int64 list + (* ww: wstrings are stored as an int64 list at this point because + * we might need to feed the wide characters piece-wise into an + * array initializer (e.g., wchar_t foo[] = L"E\xabcd";). If that + * doesn't happen we will convert it to an (escaped) string before + * passing it to Cil. *) + +and init_expression = + | NO_INIT + | SINGLE_INIT of expression + | COMPOUND_INIT of (initwhat * init_expression) list + +and initwhat = + NEXT_INIT + | INFIELD_INIT of string * initwhat + | ATINDEX_INIT of expression * initwhat + | ATINDEXRANGE_INIT of expression * expression + + + (* Each attribute has a name and some + * optional arguments *) +and attribute = string * expression list + + +(*********** HELPER FUNCTIONS **********) + +let missingFieldDecl = ("___missing_field_name", JUSTBASE, [], cabslu) + +let rec isStatic = function + [] -> false + | (SpecStorage STATIC) :: _ -> true + | _ :: rest -> isStatic rest + +let rec isExtern = function + [] -> false + | (SpecStorage EXTERN) :: _ -> true + | _ :: rest -> isExtern rest + +let rec isInline = function + [] -> false + | SpecInline :: _ -> true + | _ :: rest -> isInline rest + +let rec isTypedef = function + [] -> false + | SpecTypedef :: _ -> true + | _ :: rest -> isTypedef rest + + +let get_definitionloc (d : definition) : cabsloc = + match d with + | FUNDEF(_, _, l, _) -> l + | DECDEF(_, l) -> l + | TYPEDEF(_, l) -> l + | ONLYTYPEDEF(_, l) -> l + | GLOBASM(_, l) -> l + | PRAGMA(_, l) -> l + | TRANSFORMER(_, _, l) -> l + | EXPRTRANSFORMER(_, _, l) -> l + | LINKAGE (_, l, _) -> l + +let get_statementloc (s : statement) : cabsloc = +begin + match s with + | NOP(loc) -> loc + | COMPUTATION(_,loc) -> loc + | BLOCK(_,loc) -> loc + | SEQUENCE(_,_,loc) -> loc + | IF(_,_,_,loc) -> loc + | WHILE(_,_,loc) -> loc + | DOWHILE(_,_,loc) -> loc + | FOR(_,_,_,_,loc) -> loc + | BREAK(loc) -> loc + | CONTINUE(loc) -> loc + | RETURN(_,loc) -> loc + | SWITCH(_,_,loc) -> loc + | CASE(_,_,loc) -> loc + | CASERANGE(_,_,_,loc) -> loc + | DEFAULT(_,loc) -> loc + | LABEL(_,_,loc) -> loc + | GOTO(_,loc) -> loc + | COMPGOTO (_, loc) -> loc + | DEFINITION d -> get_definitionloc d + | ASM(_,_,_,loc) -> loc + | TRY_EXCEPT(_, _, _, loc) -> loc + | TRY_FINALLY(_, _, loc) -> loc +end + + +let explodeStringToInts (s: string) : int64 list = + let rec allChars i acc = + if i < 0 then acc + else allChars (i - 1) (Int64.of_int (Char.code (String.get s i)) :: acc) + in + allChars (-1 + String.length s) [] + +let valueOfDigit chr = + let int_value = + match chr with + '0'..'9' -> (Char.code chr) - (Char.code '0') + | 'a'..'z' -> (Char.code chr) - (Char.code 'a') + 10 + | 'A'..'Z' -> (Char.code chr) - (Char.code 'A') + 10 + | _ -> Errormsg.s (Errormsg.bug "not a digit") in + Int64.of_int int_value + + +open Pretty +let d_cabsloc () cl = + text cl.filename ++ text ":" ++ num cl.lineno diff --git a/cil/src/frontc/cabs2cil.ml b/cil/src/frontc/cabs2cil.ml new file mode 100644 index 00000000..31b65b5b --- /dev/null +++ b/cil/src/frontc/cabs2cil.ml @@ -0,0 +1,6238 @@ +(* MODIF: allow E.Error to propagate *) + +(* MODIF: for pointer comparison, avoid systematic cast to unsigned int *) + +(* MODIF: Loop constructor replaced by 3 constructors: While, DoWhile, For. *) +(* MODIF: Return statement no longer added when the body of the function + falls-through. *) + +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* Type check and elaborate ABS to CIL *) + +(* The references to ISO means ANSI/ISO 9899-1999 *) +module A = Cabs +module E = Errormsg +module H = Hashtbl +module IH = Inthash +module AL = Alpha + +open Cabs +open Pretty +open Cil +open Trace + + +let mydebugfunction () = + E.s (error "mydebugfunction") + +let debugGlobal = false + +(** NDC added command line parameter **) +(* Turn on tranformation that forces correct parameter evaluation order *) +let forceRLArgEval = ref false + +(* Leave a certain global alone. Use a negative number to disable. *) +let nocil: int ref = ref (-1) + +(* Indicates whether we're allowed to duplicate small chunks. *) +let allowDuplication: bool ref = ref true + +(* ---------- source error message handling ------------- *) +let lu = locUnknown +let cabslu = {lineno = -10; + filename = "cabs lu"; + byteno = -10;} + + +(** Interface to the Cprint printer *) +let withCprint (f: 'a -> unit) (x: 'a) : unit = + Cprint.commit (); Cprint.flush (); + let old = !Cprint.out in + Cprint.out := !E.logChannel; + f x; + Cprint.commit (); Cprint.flush (); + flush !Cprint.out; + Cprint.out := old + + +(** Keep a list of the variable ID for the variables that were created to + * hold the result of function calls *) +let callTempVars: unit IH.t = IH.create 13 + +(* Keep a list of functions that were called without a prototype. *) +let noProtoFunctions : bool IH.t = IH.create 13 + +(* Check that s starts with the prefix p *) +let prefix p s = + let lp = String.length p in + let ls = String.length s in + lp <= ls && String.sub s 0 lp = p + +(***** COMPUTED GOTO ************) + +(* The address of labels are small integers (starting from 0). A computed + * goto is replaced with a switch on the address of the label. We generate + * only one such switch and we'll jump to it from all computed gotos. To + * accomplish this we'll add a local variable to store the target of the + * goto. *) + +(* The local variable in which to put the detination of the goto and the + * statement where to jump *) +let gotoTargetData: (varinfo * stmt) option ref = ref None + +(* The "addresses" of labels *) +let gotoTargetHash: (string, int) H.t = H.create 13 +let gotoTargetNextAddr: int ref = ref 0 + + +(********** TRANSPARENT UNION ******) +(* Check if a type is a transparent union, and return the first field if it + * is *) +let isTransparentUnion (t: typ) : fieldinfo option = + match unrollType t with + TComp (comp, _) when not comp.cstruct -> + (* Turn transparent unions into the type of their first field *) + if hasAttribute "transparent_union" (typeAttrs t) then begin + match comp.cfields with + f :: _ -> Some f + | _ -> E.s (unimp "Empty transparent union: %s" (compFullName comp)) + end else + None + | _ -> None + +(* When we process an argument list, remember the argument index which has a + * transparent union type, along with the original type. We need this to + * process function definitions *) +let transparentUnionArgs : (int * typ) list ref = ref [] + +let debugLoc = false +let convLoc (l : cabsloc) = + if debugLoc then + ignore (E.log "convLoc at %s: line %d, btye %d\n" l.filename l.lineno l.byteno); + {line = l.lineno; file = l.filename; byte = l.byteno;} + + +let isOldStyleVarArgName n = + if !msvcMode then n = "va_alist" + else n = "__builtin_va_alist" + +let isOldStyleVarArgTypeName n = + if !msvcMode then n = "va_list" || n = "__ccured_va_list" + else n = "__builtin_va_alist_t" + +(* Weimer + * multi-character character constants + * In MSCV, this code works: + * + * long l1 = 'abcd'; // note single quotes + * char * s = "dcba"; + * long * lptr = ( long * )s; + * long l2 = *lptr; + * assert(l1 == l2); + * + * We need to change a multi-character character literal into the + * appropriate integer constant. However, the plot sickens: we + * must also be able to handle things like 'ab\nd' (value = * "d\nba") + * and 'abc' (vale = *"cba"). + * + * First we convert 'AB\nD' into the list [ 65 ; 66 ; 10 ; 68 ], then we + * multiply and add to get the desired value. + *) + +(* Given a character constant (like 'a' or 'abc') as a list of 64-bit + * values, turn it into a CIL constant. Multi-character constants are + * treated as multi-digit numbers with radix given by the bit width of + * the specified type (either char or wchar_t). *) +let reduce_multichar typ : int64 list -> int64 = + let radix = bitsSizeOf typ in + List.fold_left + (fun acc -> Int64.add (Int64.shift_left acc radix)) + Int64.zero + +let interpret_character_constant char_list = + let value = reduce_multichar charType char_list in + if value < (Int64.of_int 256) then + (* ISO C 6.4.4.4.10: single-character constants have type int *) + (CChr(Char.chr (Int64.to_int value))), intType + else begin + let orig_rep = None (* Some("'" ^ (String.escaped str) ^ "'") *) in + if value <= (Int64.of_int32 Int32.max_int) then + (CInt64(value,IULong,orig_rep)),(TInt(IULong,[])) + else + (CInt64(value,IULongLong,orig_rep)),(TInt(IULongLong,[])) + end + +(*** EXPRESSIONS *************) + + (* We collect here the program *) +let theFile : global list ref = ref [] +let theFileTypes : global list ref = ref [] + +let initGlobals () = theFile := []; theFileTypes := [] + + +let cabsPushGlobal (g: global) = + pushGlobal g ~types:theFileTypes ~variables:theFile + +(* Keep track of some variable ids that must be turned into definitions. We + * do this when we encounter what appears a definition of a global but + * without initializer. We leave it a declaration because maybe down the road + * we see another definition with an initializer. But if we don't see any + * then we turn the last such declaration into a definition without + * initializer *) +let mustTurnIntoDef: bool IH.t = IH.create 117 + +(* Globals that have already been defined. Indexed by the variable name. *) +let alreadyDefined: (string, location) H.t = H.create 117 + +(* Globals that were created due to static local variables. We chose their + * names to be distinct from any global encountered at the time. But we might + * see a global with conflicting name later in the file. *) +let staticLocals: (string, varinfo) H.t = H.create 13 + + +(* Typedefs. We chose their names to be distinct from any global encounterd + * at the time. But we might see a global with conflicting name later in the + * file *) +let typedefs: (string, typeinfo) H.t = H.create 13 + +let popGlobals () = + let rec revonto (tail: global list) = function + [] -> tail + + | GVarDecl (vi, l) :: rest + when vi.vstorage != Extern && IH.mem mustTurnIntoDef vi.vid -> + IH.remove mustTurnIntoDef vi.vid; + revonto (GVar (vi, {init = None}, l) :: tail) rest + + | x :: rest -> revonto (x :: tail) rest + in + revonto (revonto [] !theFile) !theFileTypes + + +(********* ENVIRONMENTS ***************) + +(* The environment is kept in two distinct data structures. A hash table maps + * each original variable name into a varinfo (for variables, or an + * enumeration tag, or a type). (Note that the varinfo might contain an + * alpha-converted name different from that of the lookup name.) The Ocaml + * hash tables can keep multiple mappings for a single key. Each time the + * last mapping is returned and upon deletion the old mapping is restored. To + * keep track of local scopes we also maintain a list of scopes (represented + * as lists). *) +type envdata = + EnvVar of varinfo (* The name refers to a variable + * (which could also be a function) *) + | EnvEnum of exp * typ (* The name refers to an enumeration + * tag for which we know the value + * and the host type *) + | EnvTyp of typ (* The name is of the form "struct + * foo", or "union foo" or "enum foo" + * and refers to a type. Note that + * the name of the actual type might + * be different from foo due to alpha + * conversion *) + | EnvLabel of string (* The name refers to a label. This + * is useful for GCC's locally + * declared labels. The lookup name + * for this category is "label foo" *) + +let env : (string, envdata * location) H.t = H.create 307 +(* We also keep a global environment. This is always a subset of the env *) +let genv : (string, envdata * location) H.t = H.create 307 + + (* In the scope we keep the original name, so we can remove them from the + * hash table easily *) +type undoScope = + UndoRemoveFromEnv of string + | UndoResetAlphaCounter of location AL.alphaTableData ref * + location AL.alphaTableData + | UndoRemoveFromAlphaTable of string + +let scopes : undoScope list ref list ref = ref [] + +let isAtTopLevel () = + !scopes = [] + + +(* When you add to env, you also add it to the current scope *) +let addLocalToEnv (n: string) (d: envdata) = +(* ignore (E.log "%a: adding local %s to env\n" d_loc !currentLoc n); *) + H.add env n (d, !currentLoc); + (* If we are in a scope, then it means we are not at top level. Add the + * name to the scope *) + (match !scopes with + [] -> begin + match d with + EnvVar _ -> + E.s (E.bug "addLocalToEnv: not in a scope when adding %s!" n) + | _ -> () (* We might add types *) + end + | s :: _ -> + s := (UndoRemoveFromEnv n) :: !s) + + +let addGlobalToEnv (k: string) (d: envdata) : unit = +(* ignore (E.log "%a: adding global %s to env\n" d_loc !currentLoc k); *) + H.add env k (d, !currentLoc); + (* Also add it to the global environment *) + H.add genv k (d, !currentLoc) + + + +(* Create a new name based on a given name. The new name is formed from a + * prefix (obtained from the given name as the longest prefix that ends with + * a non-digit), followed by a '_' and then by a positive integer suffix. The + * first argument is a table mapping name prefixes with the largest suffix + * used so far for that prefix. The largest suffix is one when only the + * version without suffix has been used. *) +let alphaTable : (string, location AL.alphaTableData ref) H.t = H.create 307 + (* vars and enum tags. For composite types we have names like "struct + * foo" or "union bar" *) + +(* To keep different name scopes different, we add prefixes to names + * specifying the kind of name: the kind can be one of "" for variables or + * enum tags, "struct" for structures and unions (they share the name space), + * "enum" for enumerations, or "type" for types *) +let kindPlusName (kind: string) + (origname: string) : string = + if kind = "" then origname else + kind ^ " " ^ origname + + +let stripKind (kind: string) (kindplusname: string) : string = + let l = 1 + String.length kind in + if l > 1 then + String.sub kindplusname l (String.length kindplusname - l) + else + kindplusname + +let newAlphaName (globalscope: bool) (* The name should have global scope *) + (kind: string) + (origname: string) : string * location = + let lookupname = kindPlusName kind origname in + (* If we are in a scope then it means that we are alpha-converting a local + * name. Go and add stuff to reset the state of the alpha table but only to + * the top-most scope (that of the enclosing function) *) + let rec findEnclosingFun = function + [] -> (* At global scope *)() + | [s] -> begin + let prefix = AL.getAlphaPrefix lookupname in + try + let countref = H.find alphaTable prefix in + s := (UndoResetAlphaCounter (countref, !countref)) :: !s + with Not_found -> + s := (UndoRemoveFromAlphaTable prefix) :: !s + end + | _ :: rest -> findEnclosingFun rest + in + if not globalscope then + findEnclosingFun !scopes; + let newname, oldloc = + AL.newAlphaName alphaTable None lookupname !currentLoc in + stripKind kind newname, oldloc + + + + +let explodeString (nullterm: bool) (s: string) : char list = + let rec allChars i acc = + if i < 0 then acc + else allChars (i - 1) ((String.get s i) :: acc) + in + allChars (-1 + String.length s) + (if nullterm then [Char.chr 0] else []) + +(*** In order to process GNU_BODY expressions we must record that a given + *** COMPUTATION is interesting *) +let gnu_body_result : (A.statement * ((exp * typ) option ref)) ref + = ref (A.NOP cabslu, ref None) + +(*** When we do statements we need to know the current return type *) +let currentReturnType : typ ref = ref (TVoid([])) +let currentFunctionFDEC: fundec ref = ref dummyFunDec + + +let lastStructId = ref 0 +let anonStructName (k: string) (suggested: string) = + incr lastStructId; + "__anon" ^ k ^ (if suggested <> "" then "_" ^ suggested else "") + ^ "_" ^ (string_of_int (!lastStructId)) + + +let constrExprId = ref 0 + + +let startFile () = + H.clear env; + H.clear genv; + H.clear alphaTable; + lastStructId := 0 + + + +let enterScope () = + scopes := (ref []) :: !scopes + + (* Exit a scope and clean the environment. We do not yet delete from + * the name table *) +let exitScope () = + let this, rest = + match !scopes with + car :: cdr -> car, cdr + | [] -> E.s (error "Not in a scope") + in + scopes := rest; + let rec loop = function + [] -> () + | UndoRemoveFromEnv n :: t -> + H.remove env n; loop t + | UndoRemoveFromAlphaTable n :: t -> H.remove alphaTable n; loop t + | UndoResetAlphaCounter (vref, oldv) :: t -> + vref := oldv; + loop t + in + loop !this + +(* Lookup a variable name. Return also the location of the definition. Might + * raise Not_found *) +let lookupVar (n: string) : varinfo * location = + match H.find env n with + (EnvVar vi), loc -> vi, loc + | _ -> raise Not_found + +let lookupGlobalVar (n: string) : varinfo * location = + match H.find genv n with + (EnvVar vi), loc -> vi, loc + | _ -> raise Not_found + +let docEnv () = + let acc : (string * (envdata * location)) list ref = ref [] in + let doone () = function + EnvVar vi, l -> + dprintf "Var(%s,global=%b) (at %a)" vi.vname vi.vglob d_loc l + | EnvEnum (tag, typ), l -> dprintf "Enum (at %a)" d_loc l + | EnvTyp t, l -> text "typ" + | EnvLabel l, _ -> text ("label " ^ l) + in + H.iter (fun k d -> acc := (k, d) :: !acc) env; + docList ~sep:line (fun (k, d) -> dprintf " %s -> %a" k doone d) () !acc + + + +(* Add a new variable. Do alpha-conversion if necessary *) +let alphaConvertVarAndAddToEnv (addtoenv: bool) (vi: varinfo) : varinfo = +(* + ignore (E.log "%t: alphaConvert(addtoenv=%b) %s" d_thisloc addtoenv vi.vname); +*) + (* Announce the name to the alpha conversion table *) + let newname, oldloc = newAlphaName (addtoenv && vi.vglob) "" vi.vname in + (* Make a copy of the vi if the name has changed. Never change the name for + * global variables *) + let newvi = + if vi.vname = newname then + vi + else begin + if vi.vglob then begin + (* Perhaps this is because we have seen a static local which happened + * to get the name that we later want to use for a global. *) + try + let static_local_vi = H.find staticLocals vi.vname in + H.remove staticLocals vi.vname; + (* Use the new name for the static local *) + static_local_vi.vname <- newname; + (* And continue using the last one *) + vi + with Not_found -> begin + (* Or perhaps we have seen a typedef which stole our name. This is + possible because typedefs use the same name space *) + try + let typedef_ti = H.find typedefs vi.vname in + H.remove typedefs vi.vname; + (* Use the new name for the typedef instead *) + typedef_ti.tname <- newname; + (* And continue using the last name *) + vi + with Not_found -> + E.s (E.error "It seems that we would need to rename global %s (to %s) because of previous occurrence at %a" + vi.vname newname d_loc oldloc); + end + end else begin + (* We have changed the name of a local variable. Can we try to detect + * if the other variable was also local in the same scope? Not for + * now. *) + copyVarinfo vi newname + end + end + in + (* Store all locals in the slocals (in reversed order). We'll reverse them + * and take out the formals at the end of the function *) + if not vi.vglob then + !currentFunctionFDEC.slocals <- newvi :: !currentFunctionFDEC.slocals; + + (if addtoenv then + if vi.vglob then + addGlobalToEnv vi.vname (EnvVar newvi) + else + addLocalToEnv vi.vname (EnvVar newvi)); +(* + ignore (E.log " new=%s\n" newvi.vname); +*) +(* ignore (E.log "After adding %s alpha table is: %a\n" + newvi.vname docAlphaTable alphaTable); *) + newvi + + +(* Strip the "const" from the type. It is unfortunate that const variables + * can only be set in initialization. Once we decided to move all + * declarations to the top of the functions, we have no way of setting a + * "const" variable. Furthermore, if the type of the variable is an array or + * a struct we must recursively strip the "const" from fields and array + * elements. *) +let rec stripConstLocalType (t: typ) : typ = + let dc a = + if hasAttribute "const" a then + dropAttribute "const" a + else a + in + match t with + | TPtr (bt, a) -> + (* We want to be able to detect by pointer equality if the type has + * changed. So, don't realloc the type unless necessary. *) + let a' = dc a in if a != a' then TPtr(bt, a') else t + | TInt (ik, a) -> + let a' = dc a in if a != a' then TInt(ik, a') else t + | TFloat(fk, a) -> + let a' = dc a in if a != a' then TFloat(fk, a') else t + | TNamed (ti, a) -> + (* We must go and drop the consts from the typeinfo as well ! *) + let t' = stripConstLocalType ti.ttype in + if t != t' then begin + (* ignore (warn "Stripping \"const\" from typedef %s\n" ti.tname); *) + ti.ttype <- t' + end; + let a' = dc a in if a != a' then TNamed(ti, a') else t + + | TEnum (ei, a) -> + let a' = dc a in if a != a' then TEnum(ei, a') else t + + | TArray(bt, leno, a) -> + (* We never assign to the array. So, no need to change the const. But + * we must change it on the base type *) + let bt' = stripConstLocalType bt in + if bt' != bt then TArray(bt', leno, a) else t + + | TComp(ci, a) -> + (* Must change both this structure as well as its fields *) + List.iter + (fun f -> + let t' = stripConstLocalType f.ftype in + if t' != f.ftype then begin + ignore (warnOpt "Stripping \"const\" from field %s of %s\n" + f.fname (compFullName ci)); + f.ftype <- t' + end) + ci.cfields; + let a' = dc a in if a != a' then TComp(ci, a') else t + + (* We never assign functions either *) + | TFun(rt, args, va, a) -> t + | TVoid _ -> E.s (bug "cabs2cil: stripConstLocalType: void") + | TBuiltin_va_list a -> + let a' = dc a in if a != a' then TBuiltin_va_list a' else t + + +let constFoldTypeVisitor = object (self) + inherit nopCilVisitor + method vtype t: typ visitAction = + match t with + TArray(bt, Some len, a) -> + let len' = constFold true len in + ChangeDoChildrenPost ( + TArray(bt, Some len', a), + (fun x -> x) + ) + | _ -> DoChildren +end + +(* Const-fold any expressions that appear as array lengths in this type *) +let constFoldType (t:typ) : typ = + visitCilType constFoldTypeVisitor t + + + +(* Create a new temporary variable *) +let newTempVar typ = + if !currentFunctionFDEC == dummyFunDec then + E.s (bug "newTempVar called outside a function"); +(* ignore (E.log "stripConstLocalType(%a) for temporary\n" d_type typ); *) + let t' = stripConstLocalType typ in + (* Start with the name "tmp". The alpha converter will fix it *) + let vi = makeVarinfo false "tmp" t' in + alphaConvertVarAndAddToEnv false vi (* Do not add to the environment *) +(* + { vname = "tmp"; (* addNewVar will make the name fresh *) + vid = newVarId "tmp" false; + vglob = false; + vtype = t'; + vdecl = locUnknown; + vinline = false; + vattr = []; + vaddrof = false; + vreferenced = false; (* sm *) + vstorage = NoStorage; + } +*) + +let mkAddrOfAndMark ((b, off) as lval) : exp = + (* Mark the vaddrof flag if b is a variable *) + (match b with + Var vi -> vi.vaddrof <- true + | _ -> ()); + mkAddrOf lval + +(* Call only on arrays *) +let mkStartOfAndMark ((b, off) as lval) : exp = + (* Mark the vaddrof flag if b is a variable *) + (match b with + Var vi -> vi.vaddrof <- true + | _ -> ()); + let res = StartOf lval in + res + + + + (* Keep a set of self compinfo for composite types *) +let compInfoNameEnv : (string, compinfo) H.t = H.create 113 +let enumInfoNameEnv : (string, enuminfo) H.t = H.create 113 + + +let lookupTypeNoError (kind: string) + (n: string) : typ * location = + let kn = kindPlusName kind n in + match H.find env kn with + EnvTyp t, l -> t, l + | _ -> raise Not_found + +let lookupType (kind: string) + (n: string) : typ * location = + try + lookupTypeNoError kind n + with Not_found -> + E.s (error "Cannot find type %s (kind:%s)\n" n kind) + +(* Create the self ref cell and add it to the map. Return also an indication + * if this is a new one. *) +let createCompInfo (iss: bool) (n: string) : compinfo * bool = + (* Add to the self cell set *) + let key = (if iss then "struct " else "union ") ^ n in + try + H.find compInfoNameEnv key, false (* Only if not already in *) + with Not_found -> begin + (* Create a compinfo. This will have "cdefined" false. *) + let res = mkCompInfo iss n (fun _ -> []) [] in + H.add compInfoNameEnv key res; + res, true + end + +(* Create the self ref cell and add it to the map. Return an indication + * whether this is a new one. *) +let createEnumInfo (n: string) : enuminfo * bool = + (* Add to the self cell set *) + try + H.find enumInfoNameEnv n, false (* Only if not already in *) + with Not_found -> begin + (* Create a enuminfo *) + let enum = { ename = n; eitems = []; + eattr = []; ereferenced = false; } in + H.add enumInfoNameEnv n enum; + enum, true + end + + + (* kind is either "struct" or "union" or "enum" and n is a name *) +let findCompType (kind: string) (n: string) (a: attributes) = + let makeForward () = + (* This is a forward reference, either because we have not seen this + * struct already or because we want to create a version with different + * attributes *) + if kind = "enum" then + let enum, isnew = createEnumInfo n in + if isnew then + cabsPushGlobal (GEnumTagDecl (enum, !currentLoc)); + TEnum (enum, a) + else + let iss = if kind = "struct" then true else false in + let self, isnew = createCompInfo iss n in + if isnew then + cabsPushGlobal (GCompTagDecl (self, !currentLoc)); + TComp (self, a) + in + try + let old, _ = lookupTypeNoError kind n in (* already defined *) + let olda = typeAttrs old in + if Util.equals olda a then old else makeForward () + with Not_found -> makeForward () + + +(* A simple visitor that searchs a statement for labels *) +class canDropStmtClass pRes = object + inherit nopCilVisitor + + method vstmt s = + if s.labels != [] then + (pRes := false; SkipChildren) + else + if !pRes then DoChildren else SkipChildren + + method vinst _ = SkipChildren + method vexpr _ = SkipChildren + +end +let canDropStatement (s: stmt) : bool = + let pRes = ref true in + let vis = new canDropStmtClass pRes in + ignore (visitCilStmt vis s); + !pRes + +(**** Occasionally we see structs with no name and no fields *) + + +module BlockChunk = + struct + type chunk = { + stmts: stmt list; + postins: instr list; (* Some instructions to append at + * the ends of statements (in + * reverse order) *) + (* A list of case statements visible at the + * outer level *) + cases: (label * stmt) list + } + + let d_chunk () (c: chunk) = + dprintf "@[{ @[%a@] };@?%a@]" + (docList ~sep:(chr ';') (d_stmt ())) c.stmts + (docList ~sep:(chr ';') (d_instr ())) (List.rev c.postins) + + let empty = + { stmts = []; postins = []; cases = []; } + + let isEmpty (c: chunk) = + c.postins == [] && c.stmts == [] + + let isNotEmpty (c: chunk) = not (isEmpty c) + + let i2c (i: instr) = + { empty with postins = [i] } + + (* Occasionally, we'll have to push postins into the statements *) + let pushPostIns (c: chunk) : stmt list = + if c.postins = [] then c.stmts + else + let rec toLast = function + [{skind=Instr il} as s] as stmts -> + s.skind <- Instr (il @ (List.rev c.postins)); + stmts + + | [] -> [mkStmt (Instr (List.rev c.postins))] + + | a :: rest -> a :: toLast rest + in + compactStmts (toLast c.stmts) + + + let c2block (c: chunk) : block = + { battrs = []; + bstmts = pushPostIns c; + } + + (* Add an instruction at the end. Never refer to this instruction again + * after you call this *) + let (+++) (c: chunk) (i : instr) = + {c with postins = i :: c.postins} + + (* Append two chunks. Never refer to the original chunks after you call + * this. And especially never share c2 with somebody else *) + let (@@) (c1: chunk) (c2: chunk) = + { stmts = compactStmts (pushPostIns c1 @ c2.stmts); + postins = c2.postins; + cases = c1.cases @ c2.cases; + } + + let skipChunk = empty + + let returnChunk (e: exp option) (l: location) : chunk = + { stmts = [ mkStmt (Return(e, l)) ]; + postins = []; + cases = [] + } + + let ifChunk (be: exp) (l: location) (t: chunk) (e: chunk) : chunk = + + { stmts = [ mkStmt(If(be, c2block t, c2block e, l))]; + postins = []; + cases = t.cases @ e.cases; + } + + (* We can duplicate a chunk if it has a few simple statements, and if + * it does not have cases *) + let duplicateChunk (c: chunk) = (* raises Failure if you should not + * duplicate this chunk *) + if not !allowDuplication then + raise (Failure "cannot duplicate: disallowed by user"); + if c.cases != [] then raise (Failure "cannot duplicate: has cases") else + let pCount = ref (List.length c.postins) in + { stmts = + List.map + (fun s -> + if s.labels != [] then + raise (Failure "cannot duplicate: has labels"); +(* + (match s.skind with + If _ | Switch _ | (*Loop _*) + While _ | DoWhile _ | For _ | Block _ -> + raise (Failure "cannot duplicate: complex stmt") + | Instr il -> + pCount := !pCount + List.length il + | _ -> incr pCount); + if !pCount > 5 then raise (Failure ("cannot duplicate: too many instr")); +*) + (* We can just copy it because there is nothing to share here. + * Except maybe for the ref cell in Goto but it is Ok to share + * that, I think *) + { s with sid = s.sid}) c.stmts; + postins = c.postins; (* There is no shared stuff in instructions *) + cases = [] + } +(* + let duplicateChunk (c: chunk) = + if isEmpty c then c else raise (Failure ("cannot duplicate: isNotEmpty")) +*) + (* We can drop a chunk if it does not have labels inside *) + let canDrop (c: chunk) = + List.for_all canDropStatement c.stmts + +(* + let loopChunk (body: chunk) : chunk = + (* Make the statement *) + let loop = mkStmt (Loop (c2block body, !currentLoc, None, None)) in + { stmts = [ loop (* ; n *) ]; + postins = []; + cases = body.cases; + } +*) + + let whileChunk (e: exp) (body: chunk) : chunk = + let loop = mkStmt (While (e, c2block body, !currentLoc)) in + + { stmts = [ loop ]; + postins = []; + cases = body.cases; + } + + let doWhileChunk (e: exp) (body: chunk) : chunk = + let loop = mkStmt (DoWhile (e, c2block body, !currentLoc)) in + + { stmts = [ loop ]; + postins = []; + cases = body.cases; + } + + let forChunk (bInit: chunk) (e: exp) (bIter: chunk) + (body: chunk) : chunk = + let loop = mkStmt (For (c2block bInit, e, c2block bIter, + c2block body, !currentLoc)) in + + { stmts = [ loop ]; + postins = []; + cases = body.cases; + } + + let breakChunk (l: location) : chunk = + { stmts = [ mkStmt (Break l) ]; + postins = []; + cases = []; + } + + let continueChunk (l: location) : chunk = + { stmts = [ mkStmt (Continue l) ]; + postins = []; + cases = [] + } + + (* Keep track of the gotos *) + let backPatchGotos : (string, stmt ref list ref) H.t = H.create 17 + let addGoto (lname: string) (bref: stmt ref) : unit = + let gotos = + try + H.find backPatchGotos lname + with Not_found -> begin + let gotos = ref [] in + H.add backPatchGotos lname gotos; + gotos + end + in + gotos := bref :: !gotos + + (* Keep track of the labels *) + let labelStmt : (string, stmt) H.t = H.create 17 + let initLabels () = + H.clear backPatchGotos; + H.clear labelStmt + + let resolveGotos () = + H.iter + (fun lname gotos -> + try + let dest = H.find labelStmt lname in + List.iter (fun gref -> gref := dest) !gotos + with Not_found -> begin + E.s (error "Label %s not found\n" lname) + end) + backPatchGotos + + (* Get the first statement in a chunk. Might need to change the + * statements in the chunk *) + let getFirstInChunk (c: chunk) : stmt * stmt list = + (* Get the first statement and add the label to it *) + match c.stmts with + s :: _ -> s, c.stmts + | [] -> (* Add a statement *) + let n = mkEmptyStmt () in + n, n :: c.stmts + + let consLabel (l: string) (c: chunk) (loc: location) + (in_original_program_text : bool) : chunk = + (* Get the first statement and add the label to it *) + let labstmt, stmts' = getFirstInChunk c in + (* Add the label *) + labstmt.labels <- Label (l, loc, in_original_program_text) :: + labstmt.labels; + H.add labelStmt l labstmt; + if c.stmts == stmts' then c else {c with stmts = stmts'} + + let s2c (s:stmt) : chunk = + { stmts = [ s ]; + postins = []; + cases = []; + } + + let gotoChunk (ln: string) (l: location) : chunk = + let gref = ref dummyStmt in + addGoto ln gref; + { stmts = [ mkStmt (Goto (gref, l)) ]; + postins = []; + cases = []; + } + + let caseRangeChunk (el: exp list) (l: location) (next: chunk) = + let fst, stmts' = getFirstInChunk next in + let labels = List.map (fun e -> Case (e, l)) el in + let cases = List.map (fun l -> (l, fst)) labels in + fst.labels <- labels @ fst.labels; + { next with stmts = stmts'; cases = cases @ next.cases} + + let defaultChunk (l: location) (next: chunk) = + let fst, stmts' = getFirstInChunk next in + let lb = Default l in + fst.labels <- lb :: fst.labels; + { next with stmts = stmts'; cases = (lb, fst) :: next.cases} + + + let switchChunk (e: exp) (body: chunk) (l: location) = + (* Make the statement *) + let switch = mkStmt (Switch (e, c2block body, + List.map (fun (_, s) -> s) body.cases, + l)) in + { stmts = [ switch (* ; n *) ]; + postins = []; + cases = []; + } + + let mkFunctionBody (c: chunk) : block = + resolveGotos (); initLabels (); + if c.cases <> [] then + E.s (error "Switch cases not inside a switch statement\n"); + c2block c + + end + +open BlockChunk + + +(************ Labels ***********) +(* +(* Since we turn dowhile and for loops into while we need to take care in + * processing the continue statement. For each loop that we enter we place a + * marker in a list saying what kinds of loop it is. When we see a continue + * for a Non-while loop we must generate a label for the continue *) +type loopstate = + While + | NotWhile of string ref + +let continues : loopstate list ref = ref [] + +let startLoop iswhile = + continues := (if iswhile then While else NotWhile (ref "")) :: !continues +*) + +(* We need to take care while processing the continue statement... + * For each loop that we enter we place a marker in a list saying what + * chunk of code we must duplicate before each continue statement + * in order to preserve the semantics. *) +type loopMarker = + | DuplicateBeforeContinue of chunk + | ContinueUnchanged + +let continues : loopMarker list ref = ref [] + +let startLoop lstate = + continues := lstate :: !continues + +let continueDuplicateChunk (l: location) : chunk = + match !continues with + | [] -> E.s (error "continue not in a loop") + | DuplicateBeforeContinue c :: _ -> c @@ continueChunk l + | ContinueUnchanged :: _ -> continueChunk l + +(* Sometimes we need to create new label names *) +let newLabelName (base: string) = fst (newAlphaName false "label" base) + +(* +let continueOrLabelChunk (l: location) : chunk = + match !continues with + [] -> E.s (error "continue not in a loop") + | While :: _ -> continueChunk l + | NotWhile lr :: _ -> + if !lr = "" then begin + lr := newLabelName "__Cont" + end; + gotoChunk !lr l + +let consLabContinue (c: chunk) = + match !continues with + [] -> E.s (error "labContinue not in a loop") + | While :: rest -> c + | NotWhile lr :: rest -> if !lr = "" then c else consLabel !lr c !currentLoc false +*) + +let exitLoop () = + match !continues with + [] -> E.s (error "exit Loop not in a loop") + | _ :: rest -> continues := rest + + +(* In GCC we can have locally declared labels. *) +let genNewLocalLabel (l: string) = + (* Call the newLabelName to register the label name in the alpha conversion + * table. *) + let l' = newLabelName l in + (* Add it to the environment *) + addLocalToEnv (kindPlusName "label" l) (EnvLabel l'); + l' + +let lookupLabel (l: string) = + try + match H.find env (kindPlusName "label" l) with + EnvLabel l', _ -> l' + | _ -> raise Not_found + with Not_found -> + l + + +(** ALLOCA ***) +let allocaFun () = + let name = + if !msvcMode then "alloca" + (* Use __builtin_alloca where possible, because this can be used + even when gcc is invoked with -fno-builtin *) + else "__builtin_alloca" + in + let fdec = emptyFunction name in + fdec.svar.vtype <- + TFun(voidPtrType, Some [ ("len", !typeOfSizeOf, []) ], false, []); + fdec.svar + +(* Maps local variables that are variable sized arrays to the expression that + * denotes their length *) +let varSizeArrays : exp IH.t = IH.create 17 + +(**** EXP actions ***) +type expAction = + ADrop (* Drop the result. Only the + * side-effect is interesting *) + | ASet of lval * typ (* Put the result in a given lval, + * provided it matches the type. The + * type is the type of the lval. *) + | AExp of typ option (* Return the exp as usual. + * Optionally we can specify an + * expected type. This is useful for + * constants. The expected type is + * informational only, we do not + * guarantee that the converted + * expression has that type.You must + * use a doCast afterwards to make + * sure. *) + | AExpLeaveArrayFun (* Do it like an expression, but do + * not convert arrays of functions + * into pointers *) + + +(*** Result of compiling conditional expressions *) +type condExpRes = + CEExp of chunk * exp (* Do a chunk and then an expression *) + | CEAnd of condExpRes * condExpRes + | CEOr of condExpRes * condExpRes + | CENot of condExpRes + +(******** CASTS *********) +let integralPromotion (t : typ) : typ = (* c.f. ISO 6.3.1.1 *) + match unrollType t with + (* We assume that an IInt can hold even an IUShort *) + TInt ((IShort|IUShort|IChar|ISChar|IUChar), a) -> TInt(IInt, a) + | TInt _ -> t + | TEnum (_, a) -> TInt(IInt, a) + | t -> E.s (error "integralPromotion: not expecting %a" d_type t) + + +let arithmeticConversion (* c.f. ISO 6.3.1.8 *) + (t1: typ) + (t2: typ) : typ = + let checkToInt _ = () in (* dummies for now *) + let checkToFloat _ = () in + match unrollType t1, unrollType t2 with + TFloat(FLongDouble, _), _ -> checkToFloat t2; t1 + | _, TFloat(FLongDouble, _) -> checkToFloat t1; t2 + | TFloat(FDouble, _), _ -> checkToFloat t2; t1 + | _, TFloat (FDouble, _) -> checkToFloat t1; t2 + | TFloat(FFloat, _), _ -> checkToFloat t2; t1 + | _, TFloat (FFloat, _) -> checkToFloat t1; t2 + | _, _ -> begin + let t1' = integralPromotion t1 in + let t2' = integralPromotion t2 in + match unrollType t1', unrollType t2' with + TInt(IULongLong, _), _ -> checkToInt t2'; t1' + | _, TInt(IULongLong, _) -> checkToInt t1'; t2' + + (* We assume a long long is always larger than a long *) + | TInt(ILongLong, _), _ -> checkToInt t2'; t1' + | _, TInt(ILongLong, _) -> checkToInt t1'; t2' + + | TInt(IULong, _), _ -> checkToInt t2'; t1' + | _, TInt(IULong, _) -> checkToInt t1'; t2' + + + | TInt(ILong,_), TInt(IUInt,_) + when bitsSizeOf t1' <= bitsSizeOf t2' -> TInt(IULong,[]) + | TInt(IUInt,_), TInt(ILong,_) + when bitsSizeOf t2' <= bitsSizeOf t1' -> TInt(IULong,[]) + + | TInt(ILong, _), _ -> checkToInt t2'; t1' + | _, TInt(ILong, _) -> checkToInt t1'; t2' + + | TInt(IUInt, _), _ -> checkToInt t2'; t1' + | _, TInt(IUInt, _) -> checkToInt t1'; t2' + + | TInt(IInt, _), TInt (IInt, _) -> t1' + + | _, _ -> E.s (error "arithmeticConversion") + end + + +(* Specify whether the cast is from the source code *) +let rec castTo ?(fromsource=false) + (ot : typ) (nt : typ) (e : exp) : (typ * exp ) = +(* + ignore (E.log "%t: castTo:%s %a->%a\n" + d_thisloc + (if fromsource then "(source)" else "") + d_type ot d_type nt); +*) + if not fromsource && Util.equals (typeSig ot) (typeSig nt) then + (* Do not put the cast if it is not necessary, unless it is from the + * source. *) + (ot, e) + else begin + let result = (nt, + if !insertImplicitCasts || fromsource then mkCastT e ot nt else e) in +(* + ignore (E.log "castTo: ot=%a nt=%a\n result is %a\n" + d_type ot d_type nt + d_plainexp (snd result)); +*) + (* Now see if we can have a cast here *) + match ot, nt with + TNamed(r, _), _ -> castTo ~fromsource:fromsource r.ttype nt e + | _, TNamed(r, _) -> castTo ~fromsource:fromsource ot r.ttype e + | TInt(ikindo,_), TInt(ikindn,_) -> + (* We used to ignore attributes on integer-integer casts. Not anymore *) + (* if ikindo = ikindn then (nt, e) else *) + result + + | TPtr (told, _), TPtr(tnew, _) -> result + + | TInt _, TPtr _ -> result + + | TPtr _, TInt _ -> result + + | TArray _, TPtr _ -> result + + | TArray(t1,_,_), TArray(t2,None,_) when Util.equals (typeSig t1) (typeSig t2) -> (nt, e) + + | TPtr _, TArray(_,_,_) -> (nt, e) + + | TEnum _, TInt _ -> result + | TFloat _, (TInt _|TEnum _) -> result + | (TInt _|TEnum _), TFloat _ -> result + | TFloat _, TFloat _ -> result + | TInt _, TEnum _ -> result + | TEnum _, TEnum _ -> result + + | TEnum _, TPtr _ -> result + | TBuiltin_va_list _, (TInt _ | TPtr _) -> + result + + | (TInt _ | TPtr _), TBuiltin_va_list _ -> + ignore (warnOpt "Casting %a to __builtin_va_list" d_type ot); + result + + | TPtr _, TEnum _ -> + ignore (warnOpt "Casting a pointer into an enumeration type"); + result + + (* The expression is evaluated for its side-effects *) + | (TInt _ | TEnum _ | TPtr _ ), TVoid _ -> + (ot, e) + + (* Even casts between structs are allowed when we are only + * modifying some attributes *) + | TComp (comp1, a1), TComp (comp2, a2) when comp1.ckey = comp2.ckey -> + (nt, e) + + (** If we try to pass a transparent union value to a function + * expecting a transparent union argument, the argument type would + * have been changed to the type of the first argument, and we'll + * see a cast from a union to the type of the first argument. Turn + * that into a field access *) + | TComp(tunion, a1), nt -> begin + match isTransparentUnion ot with + None -> E.s (error "castTo %a -> %a@!" d_type ot d_type nt) + | Some fstfield -> begin + (* We do it now only if the expression is an lval *) + let e' = + match e with + Lval lv -> + Lval (addOffsetLval (Field(fstfield, NoOffset)) lv) + | _ -> E.s (unimp "castTo: transparent union expression is not an lval: %a\n" d_exp e) + in + (* Continue casting *) + castTo ~fromsource:fromsource fstfield.ftype nt e' + end + end + | _ -> E.s (error "cabs2cil: castTo %a -> %a@!" d_type ot d_type nt) + end + + +(* A cast that is used for conditional expressions. Pointers are Ok *) +let checkBool (ot : typ) (e : exp) : bool = + match unrollType ot with + TInt _ -> true + | TPtr _ -> true + | TEnum _ -> true + | TFloat _ -> true + | _ -> E.s (error "castToBool %a" d_type ot) + +(* Given an expression that is being coerced to bool, + is it a nonzero constant? *) +let rec isConstTrue (e:exp): bool = + match e with + | Const(CInt64 (n,_,_)) -> n <> Int64.zero + | Const(CChr c) -> 0 <> Char.code c + | Const(CStr _ | CWStr _) -> true + | Const(CReal(f, _, _)) -> f <> 0.0; + | CastE(_, e) -> isConstTrue e + | _ -> false + +(* Given an expression that is being coerced to bool, is it zero? + This is a more general version of Cil.isZero, which only handles integers. + On constant expressions, either isConstTrue or isConstFalse will hold. *) +let rec isConstFalse (e:exp): bool = + match e with + | Const(CInt64 (n,_,_)) -> n = Int64.zero + | Const(CChr c) -> 0 = Char.code c + | Const(CReal(f, _, _)) -> f = 0.0; + | CastE(_, e) -> isConstFalse e + | _ -> false + + + +(* We have our own version of addAttributes that does not allow duplicates *) +let cabsAddAttributes al0 (al: attributes) : attributes = + if al0 == [] then al else + List.fold_left + (fun acc (Attr(an, _) as a) -> + (* See if the attribute is already in there *) + match filterAttributes an acc with + [] -> addAttribute a acc (* Nothing with that name *) + | a' :: _ -> + if Util.equals a a' then + acc (* Already in *) + else begin + ignore (warnOpt + "Duplicate attribute %a along with %a" + d_attr a d_attr a'); + (* let acc' = dropAttribute an acc in *) + (** Keep both attributes *) + addAttribute a acc + end) + al + al0 + +let cabsTypeAddAttributes a0 t = + begin + match a0 with + | [] -> + (* no attributes, keep same type *) + t + | _ -> + (* anything else: add a0 to existing attributes *) + let add (a: attributes) = cabsAddAttributes a0 a in + match t with + TVoid a -> TVoid (add a) + | TInt (ik, a) -> + (* Here we have to watch for the mode attribute *) +(* sm: This stuff is to handle a GCC extension where you can request integers*) +(* of specific widths using the "mode" attribute syntax; for example: *) +(* typedef int int8_t __attribute__ ((__mode__ ( __QI__ ))) ; *) +(* The cryptic "__QI__" defines int8_t to be 8 bits wide, instead of the *) +(* 32 bits you'd guess if you didn't know about "mode". The relevant *) +(* testcase is test/small2/mode_sizes.c, and it was inspired by my *) +(* /usr/include/sys/types.h. *) +(* *) +(* A consequence of this handling is that we throw away the mode *) +(* attribute, which we used to go out of our way to avoid printing anyway.*) + let ik', a0' = + (* Go over the list of new attributes and come back with a + * filtered list and a new integer kind *) + List.fold_left + (fun (ik', a0') a0one -> + match a0one with + Attr("mode", [ACons(mode,[])]) -> begin + (trace "gccwidth" (dprintf "I see mode %s applied to an int type\n" + mode (* #$@!#@ ML! d_type t *) )); + (* the cases below encode the 32-bit assumption.. *) + match (ik', mode) with + | (IInt, "__QI__") -> (IChar, a0') + | (IInt, "__byte__") -> (IChar, a0') + | (IInt, "__HI__") -> (IShort, a0') + | (IInt, "__SI__") -> (IInt, a0') (* same as t *) + | (IInt, "__word__") -> (IInt, a0') + | (IInt, "__pointer__") -> (IInt, a0') + | (IInt, "__DI__") -> (ILongLong, a0') + + | (IUInt, "__QI__") -> (IUChar, a0') + | (IUInt, "__byte__") -> (IUChar, a0') + | (IUInt, "__HI__") -> (IUShort, a0') + | (IUInt, "__SI__") -> (IUInt, a0') + | (IUInt, "__word__") -> (IUInt, a0') + | (IUInt, "__pointer__")-> (IUInt, a0') + | (IUInt, "__DI__") -> (IULongLong, a0') + + | _ -> + (ignore (error "GCC width mode %s applied to unexpected type, or unexpected mode" + mode)); + (ik', a0one :: a0') + + end + | _ -> (ik', a0one :: a0')) + (ik, []) + a0 + in + TInt (ik', cabsAddAttributes a0' a) + + | TFloat (fk, a) -> TFloat (fk, add a) + | TEnum (enum, a) -> TEnum (enum, add a) + | TPtr (t, a) -> TPtr (t, add a) + | TArray (t, l, a) -> TArray (t, l, add a) + | TFun (t, args, isva, a) -> TFun(t, args, isva, add a) + | TComp (comp, a) -> TComp (comp, add a) + | TNamed (t, a) -> TNamed (t, add a) + | TBuiltin_va_list a -> TBuiltin_va_list (add a) + end + + +(* Do types *) + (* Combine the types. Raises the Failure exception with an error message. + * isdef says whether the new type is for a definition *) +type combineWhat = + CombineFundef (* The new definition is for a function definition. The old + * is for a prototype *) + | CombineFunarg (* Comparing a function argument type with an old prototype + * arg *) + | CombineFunret (* Comparing the return of a function with that from an old + * prototype *) + | CombineOther + +(* We sometimes want to succeed in combining two structure types that are + * identical except for the names of the structs. We keep a list of types + * that are known to be equal *) +let isomorphicStructs : (string * string, bool) H.t = H.create 15 + +let rec combineTypes (what: combineWhat) (oldt: typ) (t: typ) : typ = + match oldt, t with + | TVoid olda, TVoid a -> TVoid (cabsAddAttributes olda a) + | TInt (oldik, olda), TInt (ik, a) -> + let combineIK oldk k = + if oldk = k then oldk else + (* GCC allows a function definition to have a more precise integer + * type than a prototype that says "int" *) + if not !msvcMode && oldk = IInt && bitsSizeOf t <= 32 + && (what = CombineFunarg || what = CombineFunret) then + k + else + raise (Failure "different integer types") + in + TInt (combineIK oldik ik, cabsAddAttributes olda a) + | TFloat (oldfk, olda), TFloat (fk, a) -> + let combineFK oldk k = + if oldk = k then oldk else + (* GCC allows a function definition to have a more precise integer + * type than a prototype that says "double" *) + if not !msvcMode && oldk = FDouble && k = FFloat + && (what = CombineFunarg || what = CombineFunret) then + k + else + raise (Failure "different floating point types") + in + TFloat (combineFK oldfk fk, cabsAddAttributes olda a) + | TEnum (_, olda), TEnum (ei, a) -> + TEnum (ei, cabsAddAttributes olda a) + + (* Strange one. But seems to be handled by GCC *) + | TEnum (oldei, olda) , TInt(IInt, a) -> TEnum(oldei, + cabsAddAttributes olda a) + (* Strange one. But seems to be handled by GCC *) + | TInt(IInt, olda), TEnum (ei, a) -> TEnum(ei, cabsAddAttributes olda a) + + + | TComp (oldci, olda) , TComp (ci, a) -> + if oldci.cstruct <> ci.cstruct then + raise (Failure "different struct/union types"); + let comb_a = cabsAddAttributes olda a in + if oldci.cname = ci.cname then + TComp (oldci, comb_a) + else + (* Now maybe they are actually the same *) + if H.mem isomorphicStructs (oldci.cname, ci.cname) then + (* We know they are the same *) + TComp (oldci, comb_a) + else begin + (* If one has 0 fields (undefined) while the other has some fields + * we accept it *) + let oldci_nrfields = List.length oldci.cfields in + let ci_nrfields = List.length ci.cfields in + if oldci_nrfields = 0 then + TComp (ci, comb_a) + else if ci_nrfields = 0 then + TComp (oldci, comb_a) + else begin + (* Make sure that at least they have the same number of fields *) + if oldci_nrfields <> ci_nrfields then begin +(* + ignore (E.log "different number of fields: %s had %d and %s had %d\n" + oldci.cname oldci_nrfields + ci.cname ci_nrfields); +*) + raise (Failure "different structs(number of fields)"); + end; + (* Assume they are the same *) + H.add isomorphicStructs (oldci.cname, ci.cname) true; + H.add isomorphicStructs (ci.cname, oldci.cname) true; + (* Check that the fields are isomorphic and watch for Failure *) + (try + List.iter2 (fun oldf f -> + if oldf.fbitfield <> f.fbitfield then + raise (Failure "different structs(bitfield info)"); + if oldf.fattr <> f.fattr then + raise (Failure "different structs(field attributes)"); + (* Make sure the types are compatible *) + ignore (combineTypes CombineOther oldf.ftype f.ftype); + ) oldci.cfields ci.cfields + with Failure _ as e -> begin + (* Our assumption was wrong. Forget the isomorphism *) + ignore (E.log "\tFailed in our assumption that %s and %s are isomorphic\n" + oldci.cname ci.cname); + H.remove isomorphicStructs (oldci.cname, ci.cname); + H.remove isomorphicStructs (ci.cname, oldci.cname); + raise e + end); + (* We get here if we succeeded *) + TComp (oldci, comb_a) + end + end + + | TArray (oldbt, oldsz, olda), TArray (bt, sz, a) -> + let newbt = combineTypes CombineOther oldbt bt in + let newsz = + match oldsz, sz with + None, Some _ -> sz + | Some _, None -> oldsz + | None, None -> sz + | Some oldsz', Some sz' -> + (* They are not structurally equal. But perhaps they are equal if + * we evaluate them. Check first machine independent comparison *) + let checkEqualSize (machdep: bool) = + Util.equals (constFold machdep oldsz') + (constFold machdep sz') + in + if checkEqualSize false then + oldsz + else if checkEqualSize true then begin + ignore (warn "Array type comparison succeeds only based on machine-dependent constant evaluation: %a and %a\n" + d_exp oldsz' d_exp sz'); + oldsz + end else + raise (Failure "different array lengths") + + in + TArray (newbt, newsz, cabsAddAttributes olda a) + + | TPtr (oldbt, olda), TPtr (bt, a) -> + TPtr (combineTypes CombineOther oldbt bt, cabsAddAttributes olda a) + + | TFun (_, _, _, [Attr("missingproto",_)]), TFun _ -> t + + | TFun (oldrt, oldargs, oldva, olda), TFun (rt, args, va, a) -> + let newrt = combineTypes + (if what = CombineFundef then CombineFunret else CombineOther) + oldrt rt + in + if oldva != va then + raise (Failure "diferent vararg specifiers"); + (* If one does not have arguments, believe the one with the + * arguments *) + let newargs = + if oldargs = None then args else + if args = None then oldargs else + let oldargslist = argsToList oldargs in + let argslist = argsToList args in + if List.length oldargslist <> List.length argslist then + raise (Failure "different number of arguments") + else begin + (* Go over the arguments and update the old ones with the + * adjusted types *) + Some + (List.map2 + (fun (on, ot, oa) (an, at, aa) -> + (* Update the names. Always prefer the new name. This is + * very important if the prototype uses different names than + * the function definition. *) + let n = if an <> "" then an else on in + let t = + combineTypes + (if what = CombineFundef then + CombineFunarg else CombineOther) + ot at + in + let a = addAttributes oa aa in + (n, t, a)) + oldargslist argslist) + end + in + TFun (newrt, newargs, oldva, cabsAddAttributes olda a) + + | TNamed (oldt, olda), TNamed (t, a) when oldt.tname = t.tname -> + TNamed (oldt, cabsAddAttributes olda a) + + | TBuiltin_va_list olda, TBuiltin_va_list a -> + TBuiltin_va_list (cabsAddAttributes olda a) + + (* Unroll first the new type *) + | _, TNamed (t, a) -> + let res = combineTypes what oldt t.ttype in + cabsTypeAddAttributes a res + + (* And unroll the old type as well if necessary *) + | TNamed (oldt, a), _ -> + let res = combineTypes what oldt.ttype t in + cabsTypeAddAttributes a res + + | _ -> raise (Failure "different type constructors") + + +(* Create and cache varinfo's for globals. Starts with a varinfo but if the + * global has been declared already it might come back with another varinfo. + * Returns the varinfo to use (might be the old one), and an indication + * whether the variable exists already in the environment *) +let makeGlobalVarinfo (isadef: bool) (vi: varinfo) : varinfo * bool = + try (* See if already defined, in the global environment. We could also + * look it up in the whole environment but in that case we might see a + * local. This can happen when we declare an extern variable with + * global scope but we are in a local scope. *) + let oldvi, oldloc = lookupGlobalVar vi.vname in + (* It was already defined. We must reuse the varinfo. But clean up the + * storage. *) + let newstorage = (** See 6.2.2 *) + match oldvi.vstorage, vi.vstorage with + (* Extern and something else is that thing *) + | Extern, other + | other, Extern -> other + + | NoStorage, other + | other, NoStorage -> other + + + | _ -> + if vi.vstorage != oldvi.vstorage then + ignore (warn + "Inconsistent storage specification for %s. Previous declaration: %a" + vi.vname d_loc oldloc); + vi.vstorage + in + oldvi.vinline <- oldvi.vinline || vi.vinline; + oldvi.vstorage <- newstorage; + (* Union the attributes *) + oldvi.vattr <- cabsAddAttributes oldvi.vattr vi.vattr; + begin + try + oldvi.vtype <- + combineTypes + (if isadef then CombineFundef else CombineOther) + oldvi.vtype vi.vtype; + with Failure reason -> + ignore (E.log "old type = %a\n" d_plaintype oldvi.vtype); + ignore (E.log "new type = %a\n" d_plaintype vi.vtype); + E.s (error "Declaration of %s does not match previous declaration from %a (%s)." + vi.vname d_loc oldloc reason) + end; + + (* Found an old one. Keep the location always from the definition *) + if isadef then begin + oldvi.vdecl <- vi.vdecl; + end; + oldvi, true + + with Not_found -> begin (* A new one. *) + (* Announce the name to the alpha conversion table. This will not + * actually change the name of the vi. See the definition of + * alphaConvertVarAndAddToEnv *) + alphaConvertVarAndAddToEnv true vi, false + end + +let conditionalConversion (t2: typ) (t3: typ) : typ = + let tresult = (* ISO 6.5.15 *) + match unrollType t2, unrollType t3 with + (TInt _ | TEnum _ | TFloat _), + (TInt _ | TEnum _ | TFloat _) -> + arithmeticConversion t2 t3 + | TComp (comp2,_), TComp (comp3,_) + when comp2.ckey = comp3.ckey -> t2 + | TPtr(_, _), TPtr(TVoid _, _) -> t2 + | TPtr(TVoid _, _), TPtr(_, _) -> t3 + | TPtr _, TPtr _ when Util.equals (typeSig t2) (typeSig t3) -> t2 + | TPtr _, TInt _ -> t2 (* most likely comparison with 0 *) + | TInt _, TPtr _ -> t3 (* most likely comparison with 0 *) + + (* When we compare two pointers of diffent type, we combine them + * using the same algorithm when combining multiple declarations of + * a global *) + | (TPtr _) as t2', (TPtr _ as t3') -> begin + try combineTypes CombineOther t2' t3' + with Failure msg -> begin + ignore (warn "A.QUESTION: %a does not match %a (%s)" + d_type (unrollType t2) d_type (unrollType t3) msg); + t2 (* Just pick one *) + end + end + | _, _ -> E.s (error "A.QUESTION for invalid combination of types") + in + tresult + +(* Some utilitites for doing initializers *) + +let debugInit = false + +type preInit = + | NoInitPre + | SinglePre of exp + | CompoundPre of int ref (* the maximum used index *) + * preInit array ref (* an array with initializers *) + +(* Instructions on how to handle designators *) +type handleDesignators = + | Handle (* Handle them yourself *) + | DoNotHandle (* Do not handle them your self *) + | HandleAsNext (* First behave as if you have a NEXT_INIT. Useful for going + * into nested designators *) + | HandleFirst (* Handle only the first designator *) + +(* Set an initializer *) +let rec setOneInit (this: preInit) + (o: offset) (e: exp) : preInit = + match o with + NoOffset -> SinglePre e + | _ -> + let idx, (* Index in the current comp *) + restoff (* Rest offset *) = + match o with + | Index(Const(CInt64(i,_,_)), off) -> Int64.to_int i, off + | Field (f, off) -> + (* Find the index of the field *) + let rec loop (idx: int) = function + [] -> E.s (bug "Cannot find field %s" f.fname) + | f' :: _ when f'.fname = f.fname -> idx + | _ :: restf -> loop (idx + 1) restf + in + loop 0 f.fcomp.cfields, off + | _ -> E.s (bug "setOneInit: non-constant index") + in + let pMaxIdx, pArray = + match this with + NoInitPre -> (* No initializer so far here *) + ref idx, ref (Array.create (max 32 (idx + 1)) NoInitPre) + + | CompoundPre (pMaxIdx, pArray) -> + if !pMaxIdx < idx then begin + pMaxIdx := idx; + (* Maybe we also need to grow the array *) + let l = Array.length !pArray in + if l <= idx then begin + let growBy = max (max 32 (idx + 1 - l)) (l / 2) in + let newarray = Array.make (growBy + idx) NoInitPre in + Array.blit !pArray 0 newarray 0 l; + pArray := newarray + end + end; + pMaxIdx, pArray + | SinglePre e -> + E.s (unimp "Index %d is already initialized" idx) + in + assert (idx >= 0 && idx < Array.length !pArray); + let this' = setOneInit !pArray.(idx) restoff e in + !pArray.(idx) <- this'; + CompoundPre (pMaxIdx, pArray) + + +(* collect a CIL initializer, given the original syntactic initializer + * 'preInit'; this returns a type too, since initialization of an array + * with unspecified size actually changes the array's type + * (ANSI C, 6.7.8, para 22) *) +let rec collectInitializer + (this: preInit) + (thistype: typ) : (init * typ) = + if this = NoInitPre then (makeZeroInit thistype), thistype + else + match unrollType thistype, this with + | _ , SinglePre e -> SingleInit e, thistype + | TArray (bt, leno, at), CompoundPre (pMaxIdx, pArray) -> + let (len: int), newtype = + (* normal case: use array's declared length, newtype=thistype *) + match leno with + Some len -> begin + match constFold true len with + Const(CInt64(ni, _, _)) when ni >= 0L -> + (Int64.to_int ni), TArray(bt,leno,at) + + | _ -> E.s (error "Array length is not a constant expression %a" + d_exp len) + end + | _ -> + (* unsized array case, length comes from initializers *) + (!pMaxIdx + 1, + TArray (bt, Some (integer (!pMaxIdx + 1)), at)) + in + if !pMaxIdx >= len then + E.s (E.bug "collectInitializer: too many initializers(%d >= %d)\n" + !pMaxIdx len); + (* len could be extremely big. So omit the last initializers, if they + * are many (more than 16) *) +(* + ignore (E.log "collectInitializer: len = %d, pMaxIdx= %d\n" + len !pMaxIdx); *) + let endAt = + if len - 1 > !pMaxIdx + 16 then + !pMaxIdx + else + len - 1 + in + (* Make one zero initializer to be used next *) + let oneZeroInit = makeZeroInit bt in + let rec collect (acc: (offset * init) list) (idx: int) = + if idx = -1 then acc + else + let thisi = + if idx > !pMaxIdx then oneZeroInit + else (fst (collectInitializer !pArray.(idx) bt)) + in + collect ((Index(integer idx, NoOffset), thisi) :: acc) (idx - 1) + in + + CompoundInit (newtype, collect [] endAt), newtype + + | TComp (comp, _), CompoundPre (pMaxIdx, pArray) when comp.cstruct -> + let rec collect (idx: int) = function + [] -> [] + | f :: restf -> + if f.fname = missingFieldName then + collect (idx + 1) restf + else + let thisi = + if idx > !pMaxIdx then + makeZeroInit f.ftype + else + collectFieldInitializer !pArray.(idx) f + in + (Field(f, NoOffset), thisi) :: collect (idx + 1) restf + in + CompoundInit (thistype, collect 0 comp.cfields), thistype + + | TComp (comp, _), CompoundPre (pMaxIdx, pArray) when not comp.cstruct -> + (* Find the field to initialize *) + let rec findField (idx: int) = function + [] -> E.s (bug "collectInitializer: union") + | _ :: rest when idx < !pMaxIdx && !pArray.(idx) = NoInitPre -> + findField (idx + 1) rest + | f :: _ when idx = !pMaxIdx -> + Field(f, NoOffset), + collectFieldInitializer !pArray.(idx) f + | _ -> E.s (error "Can initialize only one field for union") + in + if !msvcMode && !pMaxIdx != 0 then + ignore (warn "On MSVC we can initialize only the first field of a union"); + CompoundInit (thistype, [ findField 0 comp.cfields ]), thistype + + | _ -> E.s (unimp "collectInitializer") + +and collectFieldInitializer + (this: preInit) + (f: fieldinfo) : init = + (* collect, and rewrite type *) + let init,newtype = (collectInitializer this f.ftype) in + f.ftype <- newtype; + init + + +type stackElem = + InArray of offset * typ * int * int ref (* offset of parent, base type, + * length, current index. If the + * array length is unspecified we + * use Int.max_int *) + | InComp of offset * compinfo * fieldinfo list (* offset of parent, + base comp, current fields *) + + +(* A subobject is given by its address. The address is read from the end of + * the list (the bottom of the stack), starting with the current object *) +type subobj = { mutable stack: stackElem list; (* With each stack element we + * store the offset of its + * PARENT *) + mutable eof: bool; (* The stack is empty and we reached the + * end *) + mutable soTyp: typ; (* The type of the subobject. Set using + * normalSubobj after setting stack. *) + mutable soOff: offset; (* The offset of the subobject. Set + * using normalSubobj after setting + * stack. *) + curTyp: typ; (* Type of current object. See ISO for + * the definition of the current object *) + curOff: offset; (* The offset of the current obj *) + host: varinfo; (* The host that we are initializing. + * For error messages *) + } + + +(* Make a subobject iterator *) +let rec makeSubobj + (host: varinfo) + (curTyp: typ) + (curOff: offset) = + let so = + { host = host; curTyp = curTyp; curOff = curOff; + stack = []; eof = false; + (* The next are fixed by normalSubobj *) + soTyp = voidType; soOff = NoOffset } in + normalSubobj so; + so + + (* Normalize a stack so the we always point to a valid subobject. Do not + * descend into type *) +and normalSubobj (so: subobj) : unit = + match so.stack with + [] -> so.soOff <- so.curOff; so.soTyp <- so.curTyp + (* The array is over *) + | InArray (parOff, bt, leno, current) :: rest -> + if leno = !current then begin (* The array is over *) + if debugInit then ignore (E.log "Past the end of array\n"); + so.stack <- rest; + advanceSubobj so + end else begin + so.soTyp <- bt; + so.soOff <- addOffset (Index(integer !current, NoOffset)) parOff + end + + (* The fields are over *) + | InComp (parOff, comp, nextflds) :: rest -> + if nextflds == [] then begin (* No more fields here *) + if debugInit then ignore (E.log "Past the end of structure\n"); + so.stack <- rest; + advanceSubobj so + end else begin + let fst = List.hd nextflds in + so.soTyp <- fst.ftype; + so.soOff <- addOffset (Field(fst, NoOffset)) parOff + end + + (* Advance to the next subobject. Always apply to a normalized object *) +and advanceSubobj (so: subobj) : unit = + if so.eof then E.s (bug "advanceSubobj past end"); + match so.stack with + | [] -> if debugInit then ignore (E.log "Setting eof to true\n"); + so.eof <- true + | InArray (parOff, bt, leno, current) :: rest -> + if debugInit then ignore (E.log " Advancing to [%d]\n" (!current + 1)); + (* so.stack <- InArray (parOff, bt, leno, current + 1) :: rest; *) + incr current; + normalSubobj so + + (* The fields are over *) + | InComp (parOff, comp, nextflds) :: rest -> + if debugInit then + ignore (E.log "Advancing past .%s\n" (List.hd nextflds).fname); + let flds' = try List.tl nextflds with _ -> E.s (bug "advanceSubobj") in + so.stack <- InComp(parOff, comp, flds') :: rest; + normalSubobj so + + + +(* Find the fields to initialize in a composite. *) +let fieldsToInit + (comp: compinfo) + (designator: string option) + : fieldinfo list = + (* Never look at anonymous fields *) + let flds1 = + List.filter (fun f -> f.fname <> missingFieldName) comp.cfields in + let flds2 = + match designator with + None -> flds1 + | Some fn -> + let rec loop = function + [] -> E.s (error "Cannot find designated field %s" fn) + | (f :: _) as nextflds when f.fname = fn -> nextflds + | _ :: rest -> loop rest + in + loop flds1 + in + (* If it is a union we only initialize one field *) + match flds2 with + [] -> [] + | (f :: rest) as toinit -> + if comp.cstruct then toinit else [f] + + +let integerArrayLength (leno: exp option) : int = + match leno with + None -> max_int + | Some len -> begin + try lenOfArray leno + with LenOfArray -> + E.s (error "Initializing non-constant-length array\n length=%a\n" + d_exp len) + end + +(* sm: I'm sure something like this already exists, but ... *) +let isNone (o : 'a option) : bool = + match o with + | None -> true + | Some _ -> false + + +let annonCompFieldNameId = ref 0 +let annonCompFieldName = "__annonCompField" + + + +(* Utility ***) +let rec replaceLastInList + (lst: A.expression list) + (how: A.expression -> A.expression) : A.expression list= + match lst with + [] -> [] + | [e] -> [how e] + | h :: t -> h :: replaceLastInList t how + + + + + +let convBinOp (bop: A.binary_operator) : binop = + match bop with + A.ADD -> PlusA + | A.SUB -> MinusA + | A.MUL -> Mult + | A.DIV -> Div + | A.MOD -> Mod + | A.BAND -> BAnd + | A.BOR -> BOr + | A.XOR -> BXor + | A.SHL -> Shiftlt + | A.SHR -> Shiftrt + | A.EQ -> Eq + | A.NE -> Ne + | A.LT -> Lt + | A.LE -> Le + | A.GT -> Gt + | A.GE -> Ge + | _ -> E.s (error "convBinOp") + +(**** PEEP-HOLE optimizations ***) +let afterConversion (c: chunk) : chunk = + (* Now scan the statements and find Instr blocks *) + + (** We want to collapse sequences of the form "tmp = f(); v = tmp". This + * will help significantly with the handling of calls to malloc, where it + * is important to have the cast at the same place as the call *) + let collapseCallCast = function + Call(Some(Var vi, NoOffset), f, args, l), + Set(destlv, CastE (newt, Lval(Var vi', NoOffset)), _) + when (not vi.vglob && + String.length vi.vname >= 3 && + (* Watch out for the possibility that we have an implied cast in + * the call *) + (let tcallres = + match unrollType (typeOf f) with + TFun (rt, _, _, _) -> rt + | _ -> E.s (E.bug "Function call to a non-function") + in + Util.equals (typeSig tcallres) (typeSig vi.vtype) && + Util.equals (typeSig newt) (typeSig (typeOfLval destlv))) && + IH.mem callTempVars vi.vid && + vi' == vi) + -> Some [Call(Some destlv, f, args, l)] + | i1,i2 -> None + in + (* First add in the postins *) + let sl = pushPostIns c in + peepHole2 collapseCallCast sl; + { c with stmts = sl; postins = [] } + +(***** Try to suggest a name for the anonymous structures *) +let suggestAnonName (nl: A.name list) = + match nl with + [] -> "" + | (n, _, _, _) :: _ -> n + + +(** Optional constant folding of binary operations *) +let optConstFoldBinOp (machdep: bool) (bop: binop) + (e1: exp) (e2:exp) (t: typ) = + if !lowerConstants then + constFoldBinOp machdep bop e1 e2 t + else + BinOp(bop, e1, e2, t) + +(****** TYPE SPECIFIERS *******) +let rec doSpecList (suggestedAnonName: string) (* This string will be part of + * the names for anonymous + * structures and enums *) + (specs: A.spec_elem list) + (* Returns the base type, the storage, whether it is inline and the + * (unprocessed) attributes *) + : typ * storage * bool * A.attribute list = + (* Do one element and collect the type specifiers *) + let isinline = ref false in (* If inline appears *) + (* The storage is placed here *) + let storage : storage ref = ref NoStorage in + + (* Collect the attributes. Unfortunately, we cannot treat GCC + * __attributes__ and ANSI C const/volatile the same way, since they + * associate with structures differently. Specifically, ANSI + * qualifiers never apply to structures (ISO 6.7.3), whereas GCC + * attributes always do (GCC manual 4.30). Therefore, they are + * collected and processed separately. *) + let attrs : A.attribute list ref = ref [] in (* __attribute__, etc. *) + let cvattrs : A.cvspec list ref = ref [] in (* const/volatile *) + + let doSpecElem (se: A.spec_elem) + (acc: A.typeSpecifier list) + : A.typeSpecifier list = + match se with + A.SpecTypedef -> acc + | A.SpecInline -> isinline := true; acc + | A.SpecStorage st -> + if !storage <> NoStorage then + E.s (error "Multiple storage specifiers"); + let sto' = + match st with + A.NO_STORAGE -> NoStorage + | A.AUTO -> NoStorage + | A.REGISTER -> Register + | A.STATIC -> Static + | A.EXTERN -> Extern + in + storage := sto'; + acc + + | A.SpecCV cv -> cvattrs := cv :: !cvattrs; acc + | A.SpecAttr a -> attrs := a :: !attrs; acc + | A.SpecType ts -> ts :: acc + | A.SpecPattern _ -> E.s (E.bug "SpecPattern in cabs2cil input") + in + (* Now scan the list and collect the type specifiers. Preserve the order *) + let tspecs = List.fold_right doSpecElem specs [] in + + let tspecs' = + (* GCC allows a named type that appears first to be followed by things + * like "short", "signed", "unsigned" or "long". *) + match tspecs with + A.Tnamed n :: (_ :: _ as rest) when not !msvcMode -> + (* If rest contains "short" or "long" then drop the Tnamed *) + if List.exists (function A.Tshort -> true + | A.Tlong -> true | _ -> false) rest then + rest + else + tspecs + + | _ -> tspecs + in + (* Sort the type specifiers *) + let sortedspecs = + let order = function (* Don't change this *) + | A.Tvoid -> 0 + | A.Tsigned -> 1 + | A.Tunsigned -> 2 + | A.Tchar -> 3 + | A.Tshort -> 4 + | A.Tlong -> 5 + | A.Tint -> 6 + | A.Tint64 -> 7 + | A.Tfloat -> 8 + | A.Tdouble -> 9 + | _ -> 10 (* There should be at most one of the others *) + in + List.stable_sort (fun ts1 ts2 -> compare (order ts1) (order ts2)) tspecs' + in + let getTypeAttrs () : A.attribute list = + (* Partitions the attributes in !attrs. + Type attributes are removed from attrs and returned, so that they + can go into the type definition. Name attributes are left in attrs, + so they will be returned by doSpecAttr and used in the variable + declaration. + Testcase: small1/attr9.c *) + let an, af, at = cabsPartitionAttributes ~default:AttrType !attrs in + attrs := an; (* Save the name attributes for later *) + if af <> [] then + E.s (error "Invalid position for function type attributes."); + at + in + + (* And now try to make sense of it. See ISO 6.7.2 *) + let bt = + match sortedspecs with + [A.Tvoid] -> TVoid [] + | [A.Tchar] -> TInt(IChar, []) + | [A.Tsigned; A.Tchar] -> TInt(ISChar, []) + | [A.Tunsigned; A.Tchar] -> TInt(IUChar, []) + + | [A.Tshort] -> TInt(IShort, []) + | [A.Tsigned; A.Tshort] -> TInt(IShort, []) + | [A.Tshort; A.Tint] -> TInt(IShort, []) + | [A.Tsigned; A.Tshort; A.Tint] -> TInt(IShort, []) + + | [A.Tunsigned; A.Tshort] -> TInt(IUShort, []) + | [A.Tunsigned; A.Tshort; A.Tint] -> TInt(IUShort, []) + + | [] -> TInt(IInt, []) + | [A.Tint] -> TInt(IInt, []) + | [A.Tsigned] -> TInt(IInt, []) + | [A.Tsigned; A.Tint] -> TInt(IInt, []) + + | [A.Tunsigned] -> TInt(IUInt, []) + | [A.Tunsigned; A.Tint] -> TInt(IUInt, []) + + | [A.Tlong] -> TInt(ILong, []) + | [A.Tsigned; A.Tlong] -> TInt(ILong, []) + | [A.Tlong; A.Tint] -> TInt(ILong, []) + | [A.Tsigned; A.Tlong; A.Tint] -> TInt(ILong, []) + + | [A.Tunsigned; A.Tlong] -> TInt(IULong, []) + | [A.Tunsigned; A.Tlong; A.Tint] -> TInt(IULong, []) + + | [A.Tlong; A.Tlong] -> TInt(ILongLong, []) + | [A.Tsigned; A.Tlong; A.Tlong] -> TInt(ILongLong, []) + | [A.Tlong; A.Tlong; A.Tint] -> TInt(ILongLong, []) + | [A.Tsigned; A.Tlong; A.Tlong; A.Tint] -> TInt(ILongLong, []) + + | [A.Tunsigned; A.Tlong; A.Tlong] -> TInt(IULongLong, []) + | [A.Tunsigned; A.Tlong; A.Tlong; A.Tint] -> TInt(IULongLong, []) + + (* int64 is to support MSVC *) + | [A.Tint64] -> TInt(ILongLong, []) + | [A.Tsigned; A.Tint64] -> TInt(ILongLong, []) + + | [A.Tunsigned; A.Tint64] -> TInt(IULongLong, []) + + | [A.Tfloat] -> TFloat(FFloat, []) + | [A.Tdouble] -> TFloat(FDouble, []) + + | [A.Tlong; A.Tdouble] -> TFloat(FLongDouble, []) + + (* Now the other type specifiers *) + | [A.Tnamed n] -> begin + if n = "__builtin_va_list" && + Machdep.gccHas__builtin_va_list then begin + TBuiltin_va_list [] + end else + let t = + match lookupType "type" n with + (TNamed _) as x, _ -> x + | typ -> E.s (error "Named type %s is not mapped correctly\n" n) + in + t + end + + | [A.Tstruct (n, None, _)] -> (* A reference to a struct *) + if n = "" then E.s (error "Missing struct tag on incomplete struct"); + findCompType "struct" n [] + | [A.Tstruct (n, Some nglist, extraAttrs)] -> (* A definition of a struct *) + let n' = + if n <> "" then n else anonStructName "struct" suggestedAnonName in + (* Use the (non-cv, non-name) attributes in !attrs now *) + let a = extraAttrs @ (getTypeAttrs ()) in + makeCompType true n' nglist (doAttributes a) + + | [A.Tunion (n, None, _)] -> (* A reference to a union *) + if n = "" then E.s (error "Missing union tag on incomplete union"); + findCompType "union" n [] + | [A.Tunion (n, Some nglist, extraAttrs)] -> (* A definition of a union *) + let n' = + if n <> "" then n else anonStructName "union" suggestedAnonName in + (* Use the attributes now *) + let a = extraAttrs @ (getTypeAttrs ()) in + makeCompType false n' nglist (doAttributes a) + + | [A.Tenum (n, None, _)] -> (* Just a reference to an enum *) + if n = "" then E.s (error "Missing enum tag on incomplete enum"); + findCompType "enum" n [] + + | [A.Tenum (n, Some eil, extraAttrs)] -> (* A definition of an enum *) + let n' = + if n <> "" then n else anonStructName "enum" suggestedAnonName in + (* make a new name for this enumeration *) + let n'', _ = newAlphaName true "enum" n' in + + (* Create the enuminfo, or use one that was created already for a + * forward reference *) + let enum, _ = createEnumInfo n'' in + let a = extraAttrs @ (getTypeAttrs ()) in + enum.eattr <- doAttributes a; + let res = TEnum (enum, []) in + + (* sm: start a scope for the enum tag values, since they * + * can refer to earlier tags *) + enterScope (); + + (* as each name,value pair is determined, this is called *) + let rec processName kname (i: exp) loc rest = begin + (* add the name to the environment, but with a faked 'typ' field; + * we don't know the full type yet (since that includes all of the + * tag values), but we won't need them in here *) + addLocalToEnv kname (EnvEnum (i, res)); + + (* add this tag to the list so that it ends up in the real + * environment when we're finished *) + let newname, _ = newAlphaName true "" kname in + + (kname, (newname, i, loc)) :: loop (increm i 1) rest + end + + and loop i = function + [] -> [] + | (kname, A.NOTHING, cloc) :: rest -> + (* use the passed-in 'i' as the value, since none specified *) + processName kname i (convLoc cloc) rest + + | (kname, e, cloc) :: rest -> + (* constant-eval 'e' to determine tag value *) + let e' = getIntConstExp e in + let e' = + match isInteger (constFold true e') with + Some i -> if !lowerConstants then kinteger64 IInt i else e' + | _ -> E.s (error "Constant initializer %a not an integer" d_exp e') + in + processName kname e' (convLoc cloc) rest + in + + (* sm: now throw away the environment we built for eval'ing the enum + * tags, so we can add to the new one properly *) + exitScope (); + + let fields = loop zero eil in + (* Now set the right set of items *) + enum.eitems <- List.map (fun (_, x) -> x) fields; + (* Record the enum name in the environment *) + addLocalToEnv (kindPlusName "enum" n'') (EnvTyp res); + (* And define the tag *) + cabsPushGlobal (GEnumTag (enum, !currentLoc)); + res + + + | [A.TtypeofE e] -> + let (c, e', t) = doExp false e AExpLeaveArrayFun in + let t' = + match e' with + StartOf(lv) -> typeOfLval lv + (* If this is a string literal, then we treat it as in sizeof*) + | Const (CStr s) -> begin + match typeOf e' with + TPtr(bt, _) -> (* This is the type of array elements *) + TArray(bt, Some (SizeOfStr s), []) + | _ -> E.s (bug "The typeOf a string is not a pointer type") + end + | _ -> t + in +(* + ignore (E.log "typeof(%a) = %a\n" d_exp e' d_plaintype t'); +*) + t' + + | [A.TtypeofT (specs, dt)] -> + let typ = doOnlyType specs dt in + typ + + | _ -> + E.s (error "Invalid combination of type specifiers") + in + bt,!storage,!isinline,List.rev (!attrs @ (convertCVtoAttr !cvattrs)) + +(* given some cv attributes, convert them into named attributes for + * uniform processing *) +and convertCVtoAttr (src: A.cvspec list) : A.attribute list = + match src with + | [] -> [] + | CV_CONST :: tl -> ("const",[]) :: (convertCVtoAttr tl) + | CV_VOLATILE :: tl -> ("volatile",[]) :: (convertCVtoAttr tl) + | CV_RESTRICT :: tl -> ("restrict",[]) :: (convertCVtoAttr tl) + + +and makeVarInfoCabs + ~(isformal: bool) + ~(isglobal: bool) + (ldecl : location) + (bt, sto, inline, attrs) + (n,ndt,a) + : varinfo = + let vtype, nattr = + doType (AttrName false) bt (A.PARENTYPE(attrs, ndt, a)) in + if inline && not (isFunctionType vtype) then + ignore (error "inline for a non-function: %s" n); + let t = + if not isglobal && not isformal then begin + (* Sometimes we call this on the formal argument of a function with no + * arguments. Don't call stripConstLocalType in that case *) +(* ignore (E.log "stripConstLocalType(%a) for %s\n" d_type vtype n); *) + stripConstLocalType vtype + end else + vtype + in + let vi = makeVarinfo isglobal n t in + vi.vstorage <- sto; + vi.vattr <- nattr; + vi.vdecl <- ldecl; + + if false then + ignore (E.log "Created varinfo %s : %a\n" vi.vname d_type vi.vtype); + + vi + +(* Process a local variable declaration and allow variable-sized arrays *) +and makeVarSizeVarInfo (ldecl : location) + spec_res + (n,ndt,a) + : varinfo * chunk * exp * bool = + if not !msvcMode then + match isVariableSizedArray ndt with + None -> + makeVarInfoCabs ~isformal:false + ~isglobal:false + ldecl spec_res (n,ndt,a), empty, zero, false + | Some (ndt', se, len) -> + makeVarInfoCabs ~isformal:false + ~isglobal:false + ldecl spec_res (n,ndt',a), se, len, true + else + makeVarInfoCabs ~isformal:false + ~isglobal:false + ldecl spec_res (n,ndt,a), empty, zero, false + +and doAttr (a: A.attribute) : attribute list = + (* Strip the leading and trailing underscore *) + let stripUnderscore (n: string) : string = + let l = String.length n in + let rec start i = + if i >= l then + E.s (error "Invalid attribute name %s" n); + if String.get n i = '_' then start (i + 1) else i + in + let st = start 0 in + let rec finish i = + (* We know that we will stop at >= st >= 0 *) + if String.get n i = '_' then finish (i - 1) else i + in + let fin = finish (l - 1) in + String.sub n st (fin - st + 1) + in + match a with + | (s, []) -> [Attr (stripUnderscore s, [])] + | (s, el) -> + + let rec attrOfExp (strip: bool) + ?(foldenum=true) + (a: A.expression) : attrparam = + match a with + A.VARIABLE n -> begin + let n' = if strip then stripUnderscore n else n in + (** See if this is an enumeration *) + try + if not foldenum then raise Not_found; + + match H.find env n' with + EnvEnum (tag, _), _ -> begin + match isInteger (constFold true tag) with + Some i64 when !lowerConstants -> AInt (Int64.to_int i64) + | _ -> ACons(n', []) + end + | _ -> ACons (n', []) + with Not_found -> ACons(n', []) + end + | A.CONSTANT (A.CONST_STRING s) -> AStr s + | A.CONSTANT (A.CONST_INT str) -> AInt (int_of_string str) + | A.CALL(A.VARIABLE n, args) -> begin + let n' = if strip then stripUnderscore n else n in + let ae' = List.map ae args in + ACons(n', ae') + end + | A.EXPR_SIZEOF e -> ASizeOfE (ae e) + | A.TYPE_SIZEOF (bt, dt) -> ASizeOf (doOnlyType bt dt) + | A.EXPR_ALIGNOF e -> AAlignOfE (ae e) + | A.TYPE_ALIGNOF (bt, dt) -> AAlignOf (doOnlyType bt dt) + | A.BINARY(A.AND, aa1, aa2) -> + ABinOp(LAnd, ae aa1, ae aa2) + | A.BINARY(A.OR, aa1, aa2) -> + ABinOp(LOr, ae aa1, ae aa2) + | A.BINARY(abop, aa1, aa2) -> + ABinOp (convBinOp abop, ae aa1, ae aa2) + | A.UNARY(A.PLUS, aa) -> ae aa + | A.UNARY(A.MINUS, aa) -> AUnOp (Neg, ae aa) + | A.UNARY(A.BNOT, aa) -> AUnOp(BNot, ae aa) + | A.UNARY(A.NOT, aa) -> AUnOp(LNot, ae aa) + | A.MEMBEROF (e, s) -> ADot (ae e, s) + | _ -> + ignore (E.log "Invalid expression in attribute: "); + withCprint Cprint.print_expression a; + E.s (error "cabs2cil: invalid expression") + + and ae (e: A.expression) = attrOfExp false e in + + (* Sometimes we need to convert attrarg into attr *) + let arg2attr = function + | ACons (s, args) -> Attr (s, args) + | a -> + E.s (error "Invalid form of attribute: %a" + d_attrparam a); + in + if s = "__attribute__" then (* Just a wrapper for many attributes*) + List.map (fun e -> arg2attr (attrOfExp true ~foldenum:false e)) el + else if s = "__blockattribute__" then (* Another wrapper *) + List.map (fun e -> arg2attr (attrOfExp true ~foldenum:false e)) el + else if s = "__declspec" then + List.map (fun e -> arg2attr (attrOfExp false ~foldenum:false e)) el + else + [Attr(stripUnderscore s, List.map (attrOfExp ~foldenum:false false) el)] + +and doAttributes (al: A.attribute list) : attribute list = + List.fold_left (fun acc a -> cabsAddAttributes (doAttr a) acc) [] al + +(* A version of Cil.partitionAttributes that works on CABS attributes. + It would be better to use Cil.partitionAttributes instead to avoid + the extra doAttr conversions here, but that's hard to do in doSpecList.*) +and cabsPartitionAttributes + ~(default:attributeClass) + (attrs: A.attribute list) : + A.attribute list * A.attribute list * A.attribute list = + let rec loop (n,f,t) = function + [] -> n, f, t + | a :: rest -> + let kind = match doAttr a with + [] -> default + | Attr(an, _)::_ -> + (try H.find attributeHash an with Not_found -> default) + in + match kind with + AttrName _ -> loop (a::n, f, t) rest + | AttrFunType _ -> + loop (n, a::f, t) rest + | AttrType -> loop (n, f, a::t) rest + in + loop ([], [], []) attrs + + + +and doType (nameortype: attributeClass) (* This is AttrName if we are doing + * the type for a name, or AttrType + * if we are doing this type in a + * typedef *) + (bt: typ) (* The base type *) + (dt: A.decl_type) + (* Returns the new type and the accumulated name (or type attribute + if nameoftype = AttrType) attributes *) + : typ * attribute list = + + (* Now do the declarator type. But remember that the structure of the + * declarator type is as printed, meaning that it is the reverse of the + * right one *) + let rec doDeclType (bt: typ) (acc: attribute list) = function + A.JUSTBASE -> bt, acc + | A.PARENTYPE (a1, d, a2) -> + let a1' = doAttributes a1 in + let a1n, a1f, a1t = partitionAttributes AttrType a1' in + let a2' = doAttributes a2 in + let a2n, a2f, a2t = partitionAttributes nameortype a2' in +(* + ignore (E.log "doType: %a @[a1n=%a@!a1f=%a@!a1t=%a@!a2n=%a@!a2f=%a@!a2t=%a@]@!" d_loc !currentLoc d_attrlist a1n d_attrlist a1f d_attrlist a1t d_attrlist a2n d_attrlist a2f d_attrlist a2t); +*) + let bt' = cabsTypeAddAttributes a1t bt in +(* + ignore (E.log "bt' = %a\n" d_type bt'); +*) + let bt'', a1fadded = + match unrollType bt with + TFun _ -> cabsTypeAddAttributes a1f bt', true + | _ -> bt', false + in + (* Now recurse *) + let restyp, nattr = doDeclType bt'' acc d in + (* Add some more type attributes *) + let restyp = cabsTypeAddAttributes a2t restyp in + (* See if we can add some more type attributes *) + let restyp' = + match unrollType restyp with + TFun _ -> + if a1fadded then + cabsTypeAddAttributes a2f restyp + else + cabsTypeAddAttributes a2f + (cabsTypeAddAttributes a1f restyp) + | TPtr ((TFun _ as tf), ap) when not !msvcMode -> + if a1fadded then + TPtr(cabsTypeAddAttributes a2f tf, ap) + else + TPtr(cabsTypeAddAttributes a2f + (cabsTypeAddAttributes a1f tf), ap) + | _ -> + if a1f <> [] && not a1fadded then + E.s (error "Invalid position for (prefix) function type attributes:%a" + d_attrlist a1f); + if a2f <> [] then + E.s (error "Invalid position for (post) function type attributes:%a" + d_attrlist a2f); + restyp + in +(* + ignore (E.log "restyp' = %a\n" d_type restyp'); +*) + (* Now add the name attributes and return *) + restyp', cabsAddAttributes a1n (cabsAddAttributes a2n nattr) + + | A.PTR (al, d) -> + let al' = doAttributes al in + let an, af, at = partitionAttributes AttrType al' in + (* Now recurse *) + let restyp, nattr = doDeclType (TPtr(bt, at)) acc d in + (* See if we can do anything with function type attributes *) + let restyp' = + match unrollType restyp with + TFun _ -> cabsTypeAddAttributes af restyp + | TPtr((TFun _ as tf), ap) -> + TPtr(cabsTypeAddAttributes af tf, ap) + | _ -> + if af <> [] then + E.s (error "Invalid position for function type attributes:%a" + d_attrlist af); + restyp + in + (* Now add the name attributes and return *) + restyp', cabsAddAttributes an nattr + + + | A.ARRAY (d, al, len) -> + let lo = + match len with + A.NOTHING -> None + | _ -> + let len' = doPureExp len in + let _, len'' = castTo (typeOf len') intType len' in + let elsz = + try (bitsSizeOf bt + 7) / 8 + with _ -> 1 (** We get this if we cannot compute the size of + * one element. This can happen, when we define + * an extern, for example. We use 1 for now *) + in + (match constFold true len' with + Const(CInt64(i, _, _)) -> + if i < 0L then + E.s (error "Length of array is negative\n"); + if Int64.mul i (Int64.of_int elsz) >= 0x80000000L then + E.s (error "Length of array is too large\n") + + + | l -> + if isConstant l then + (* e.g., there may be a float constant involved. + * We'll leave it to the user to ensure the length is + * non-negative, etc.*) + ignore(warn "Unable to do constant-folding on array length %a. Some CIL operations on this array may fail." + d_exp l) + else + E.s (error "Length of array is not a constant: %a\n" + d_exp l)); + Some len'' + in + let al' = doAttributes al in + doDeclType (TArray(bt, lo, al')) acc d + + | A.PROTO (d, args, isva) -> + (* Start a scope for the parameter names *) + enterScope (); + (* Intercept the old-style use of varargs.h. On GCC this means that + * we have ellipsis and a last argument "builtin_va_alist: + * builtin_va_alist_t". On MSVC we do not have the ellipsis and we + * have a last argument "va_alist: va_list" *) + let args', isva' = + if args != [] && !msvcMode = not isva then begin + let newisva = ref isva in + let rec doLast = function + [([A.SpecType (A.Tnamed atn)], (an, A.JUSTBASE, [], _))] + when isOldStyleVarArgTypeName atn && + isOldStyleVarArgName an -> begin + (* Turn it into a vararg *) + newisva := true; + (* And forget about this argument *) + [] + end + + | a :: rest -> a :: doLast rest + | [] -> [] + in + let args' = doLast args in + (args', !newisva) + end else (args, isva) + in + (* Make the argument as for a formal *) + let doOneArg (s, (n, ndt, a, cloc)) : varinfo = + let s' = doSpecList n s in + let ndt' = match isVariableSizedArray ndt with + None -> ndt + | Some (ndt', se, len) -> + (* If this is a variable-sized array, we replace the array + type with a pointer type. This is the defined behavior + for array parameters, so we do not need to add this to + varSizeArrays, fix sizeofs, etc. *) + if isNotEmpty se then + E.s (error "array parameter: length not pure"); + ndt' + in + let vi = makeVarInfoCabs ~isformal:true ~isglobal:false + (convLoc cloc) s' (n,ndt',a) in + (* Add the formal to the environment, so it can be referenced by + other formals (e.g. in an array type, although that will be + changed to a pointer later, or though typeof). *) + addLocalToEnv vi.vname (EnvVar vi); + vi + in + let targs : varinfo list option = + match List.map doOneArg args' with + | [] -> None (* No argument list *) + | [t] when isVoidType t.vtype -> + Some [] + | l -> Some l + in + exitScope (); + (* Turn [] types into pointers in the arguments and the result type. + * Turn function types into pointers to respective. This simplifies + * our life a lot, and is what the standard requires. *) + let rec fixupArgumentTypes (argidx: int) (args: varinfo list) : unit = + match args with + [] -> () + | a :: args' -> + (match unrollType a.vtype with + TArray(t,_,attr) -> a.vtype <- TPtr(t, attr) + | TFun _ -> a.vtype <- TPtr(a.vtype, []) + | TComp (comp, _) -> begin + match isTransparentUnion a.vtype with + None -> () + | Some fstfield -> + transparentUnionArgs := + (argidx, a.vtype) :: !transparentUnionArgs; + a.vtype <- fstfield.ftype; + end + | _ -> ()); + fixupArgumentTypes (argidx + 1) args' + in + let args = + match targs with + None -> None + | Some argl -> + fixupArgumentTypes 0 argl; + Some (List.map (fun a -> (a.vname, a.vtype, a.vattr)) argl) + in + let tres = + match unrollType bt with + TArray(t,_,attr) -> TPtr(t, attr) + | _ -> bt + in + doDeclType (TFun (tres, args, isva', [])) acc d + + in + doDeclType bt [] dt + +(* If this is a declarator for a variable size array then turn it into a + pointer type and a length *) +and isVariableSizedArray (dt: A.decl_type) + : (A.decl_type * chunk * exp) option = + let res = ref None in + let rec findArray = function + ARRAY (JUSTBASE, al, lo) when lo != A.NOTHING -> + (* Try to compile the expression to a constant *) + let (se, e', _) = doExp true lo (AExp (Some intType)) in + if isNotEmpty se || not (isConstant e') then begin + res := Some (se, e'); + PTR (al, JUSTBASE) + end else + ARRAY (JUSTBASE, al, lo) + | ARRAY (dt, al, lo) -> ARRAY (findArray dt, al, lo) + | PTR (al, dt) -> PTR (al, findArray dt) + | JUSTBASE -> JUSTBASE + | PARENTYPE (prea, dt, posta) -> PARENTYPE (prea, findArray dt, posta) + | PROTO (dt, f, a) -> PROTO (findArray dt, f, a) + in + let dt' = findArray dt in + match !res with + None -> None + | Some (se, e) -> Some (dt', se, e) + +and doOnlyType (specs: A.spec_elem list) (dt: A.decl_type) : typ = + let bt',sto,inl,attrs = doSpecList "" specs in + if sto <> NoStorage || inl then + E.s (error "Storage or inline specifier in type only"); + let tres, nattr = doType AttrType bt' (A.PARENTYPE(attrs, dt, [])) in + if nattr <> [] then + E.s (error "Name attributes in only_type: %a" + d_attrlist nattr); + tres + + +and makeCompType (isstruct: bool) + (n: string) + (nglist: A.field_group list) + (a: attribute list) = + (* Make a new name for the structure *) + let kind = if isstruct then "struct" else "union" in + let n', _ = newAlphaName true kind n in + (* Create the self cell for use in fields and forward references. Or maybe + * one exists already from a forward reference *) + let comp, _ = createCompInfo isstruct n' in + let doFieldGroup ((s: A.spec_elem list), + (nl: (A.name * A.expression option) list)) : 'a list = + (* Do the specifiers exactly once *) + let sugg = match nl with + [] -> "" + | ((n, _, _, _), _) :: _ -> n + in + let bt, sto, inl, attrs = doSpecList sugg s in + (* Do the fields *) + let makeFieldInfo + (((n,ndt,a,cloc) : A.name), (widtho : A.expression option)) + : fieldinfo = + if sto <> NoStorage || inl then + E.s (error "Storage or inline not allowed for fields"); + let ftype, nattr = + doType (AttrName false) bt (A.PARENTYPE(attrs, ndt, a)) in + (* check for fields whose type is an undefined struct. This rules + out circularity: + struct C1 { struct C2 c2; }; //This line is now an error. + struct C2 { struct C1 c1; int dummy; }; + *) + (match unrollType ftype with + TComp (ci',_) when not ci'.cdefined -> + E.s (error "Type of field %s is an undefined struct.\n" n) + | _ -> ()); + let width = + match widtho with + None -> None + | Some w -> begin + (match unrollType ftype with + TInt (ikind, a) -> () + | TEnum _ -> () + | _ -> E.s (error "Base type for bitfield is not an integer type")); + match isIntegerConstant w with + Some n -> Some n + | None -> E.s (error "bitfield width is not an integer constant") + end + in + (* If the field is unnamed and its type is a structure of union type + * then give it a distinguished name *) + let n' = + if n = missingFieldName then begin + match unrollType ftype with + TComp _ -> begin + incr annonCompFieldNameId; + annonCompFieldName ^ (string_of_int !annonCompFieldNameId) + end + | _ -> n + end else + n + in + { fcomp = comp; + fname = n'; + ftype = ftype; + fbitfield = width; + fattr = nattr; + floc = convLoc cloc + } + in + List.map makeFieldInfo nl + in + + + let flds = List.concat (List.map doFieldGroup nglist) in + if comp.cfields <> [] then begin + (* This appears to be a multiply defined structure. This can happen from + * a construct like "typedef struct foo { ... } A, B;". This is dangerous + * because at the time B is processed some forward references in { ... } + * appear as backward references, which coild lead to circularity in + * the type structure. We do a thourough check and then we reuse the type + * for A *) + let fieldsSig fs = List.map (fun f -> typeSig f.ftype) fs in + if not (Util.equals (fieldsSig comp.cfields) (fieldsSig flds)) then + ignore (error "%s seems to be multiply defined" (compFullName comp)) + end else + comp.cfields <- flds; + +(* ignore (E.log "makeComp: %s: %a\n" comp.cname d_attrlist a); *) + comp.cattr <- a; + let res = TComp (comp, []) in + (* This compinfo is defined, even if there are no fields *) + comp.cdefined <- true; + (* Create a typedef for this one *) + cabsPushGlobal (GCompTag (comp, !currentLoc)); + + (* There must be a self cell created for this already *) + addLocalToEnv (kindPlusName kind n) (EnvTyp res); + (* Now create a typedef with just this type *) + res + +and preprocessCast (specs: A.specifier) + (dt: A.decl_type) + (ie: A.init_expression) + : A.specifier * A.decl_type * A.init_expression = + let typ = doOnlyType specs dt in + (* If we are casting to a union type then we have to treat this as a + * constructor expression. This is to handle the gcc extension that allows + * cast from a type of a field to the type of the union *) + let ie' = + match unrollType typ, ie with + TComp (c, _), A.SINGLE_INIT _ when not c.cstruct -> + A.COMPOUND_INIT [(A.INFIELD_INIT ("___matching_field", + A.NEXT_INIT), + ie)] + | _, _ -> ie + in + (* Maybe specs contains an unnamed composite. Replace with the name so that + * when we do again the specs we get the right name *) + let specs1 = + match typ with + TComp (ci, _) -> + List.map + (function + A.SpecType (A.Tstruct ("", flds, [])) -> + A.SpecType (A.Tstruct (ci.cname, None, [])) + | A.SpecType (A.Tunion ("", flds, [])) -> + A.SpecType (A.Tunion (ci.cname, None, [])) + | s -> s) specs + | _ -> specs + in + specs1, dt, ie' + +and getIntConstExp (aexp) : exp = + let c, e, _ = doExp true aexp (AExp None) in + if not (isEmpty c) then + E.s (error "Constant expression %a has effects" d_exp e); + match e with + (* first, filter for those Const exps that are integers *) + | Const (CInt64 _ ) -> e + | Const (CEnum _) -> e + | Const (CChr i) -> Const(charConstToInt i) + + (* other Const expressions are not ok *) + | Const _ -> E.s (error "Expected integer constant and got %a" d_exp e) + + (* now, anything else that 'doExp true' returned is ok (provided + that it didn't yield side effects); this includes, in particular, + the various sizeof and alignof expression kinds *) + | _ -> e + +(* this is like 'isIntConstExp', but retrieves the actual integer + * the expression denotes; I have not extended it to work with + * sizeof/alignof since (for CCured) we can't const-eval those, + * and it's not clear whether they can be bitfield width specifiers + * anyway (since that's where this function is used) *) +and isIntegerConstant (aexp) : int option = + match doExp true aexp (AExp None) with + (c, e, _) when isEmpty c -> begin + match isInteger e with + Some i64 -> Some (Int64.to_int i64) + | _ -> None + end + | _ -> None + + (* Process an expression and in the process do some type checking, + * extract the effects as separate statements *) +and doExp (asconst: bool) (* This expression is used as a constant *) + (e: A.expression) + (what: expAction) : (chunk * exp * typ) = + (* A subexpression of array type is automatically turned into StartOf(e). + * Similarly an expression of function type is turned into AddrOf. So + * essentially doExp should never return things of type TFun or TArray *) + let processArrayFun e t = + match e, unrollType t with + (Lval(lv) | CastE(_, Lval lv)), TArray(tbase, _, a) -> + mkStartOfAndMark lv, TPtr(tbase, a) + | (Lval(lv) | CastE(_, Lval lv)), TFun _ -> + mkAddrOfAndMark lv, TPtr(t, []) + | _, (TArray _ | TFun _) -> + E.s (error "Array or function expression is not lval: %a@!" + d_plainexp e) + | _ -> e, t + in + (* Before we return we call finishExp *) + let finishExp ?(newWhat=what) + (se: chunk) (e: exp) (t: typ) : chunk * exp * typ = + match newWhat with + ADrop -> (se, e, t) + | AExpLeaveArrayFun -> + (se, e, t) (* It is important that we do not do "processArrayFun" in + * this case. We exploit this when we process the typeOf + * construct *) + | AExp _ -> + let (e', t') = processArrayFun e t in +(* + ignore (E.log "finishExp: e'=%a, t'=%a\n" + d_exp e' d_type t'); +*) + (se, e', t') + + | ASet (lv, lvt) -> begin + (* See if the set was done already *) + match e with + Lval(lv') when lv == lv' -> + (se, e, t) + | _ -> + let (e', t') = processArrayFun e t in + let (t'', e'') = castTo t' lvt e' in +(* + ignore (E.log "finishExp: e = %a\n e'' = %a\n" d_plainexp e d_plainexp e''); +*) + (se +++ (Set(lv, e'', !currentLoc)), e'', t'') + end + in + let rec findField (n: string) (fidlist: fieldinfo list) : offset = + (* Depth first search for the field. This appears to be what GCC does. + * MSVC checks that there are no ambiguous field names, so it does not + * matter how we search *) + let rec search = function + [] -> NoOffset (* Did not find *) + | fid :: rest when fid.fname = n -> Field(fid, NoOffset) + | fid :: rest when prefix annonCompFieldName fid.fname -> begin + match unrollType fid.ftype with + TComp (ci, _) -> + let off = search ci.cfields in + if off = NoOffset then + search rest (* Continue searching *) + else + Field (fid, off) + | _ -> E.s (bug "unnamed field type is not a struct/union") + end + | _ :: rest -> search rest + in + let off = search fidlist in + if off = NoOffset then + E.s (error "Cannot find field %s" n); + off + in + try + match e with + | A.NOTHING when what = ADrop -> finishExp empty (integer 0) intType + | A.NOTHING -> + let res = Const(CStr "exp_nothing") in + finishExp empty res (typeOf res) + + (* Do the potential lvalues first *) + | A.VARIABLE n -> begin + (* Look up in the environment *) + try + let envdata = H.find env n in + match envdata with + EnvVar vi, _ -> + (* if isconst && + not (isFunctionType vi.vtype) && + not (isArrayType vi.vtype)then + E.s (error "variable appears in constant"); *) + finishExp empty (Lval(var vi)) vi.vtype + | EnvEnum (tag, typ), _ -> + if !Cil.lowerConstants then + finishExp empty tag typ + else begin + let ei = + match unrollType typ with + TEnum(ei, _) -> ei + | _ -> assert false + in + finishExp empty (Const (CEnum(tag, n, ei))) typ + end + + | _ -> raise Not_found + with Not_found -> begin + if isOldStyleVarArgName n then + E.s (error "Cannot resolve variable %s. This could be a CIL bug due to the handling of old-style variable argument functions.\n" n) + else + E.s (error "Cannot resolve variable %s.\n" n) + end + end + | A.INDEX (e1, e2) -> begin + (* Recall that doExp turns arrays into StartOf pointers *) + let (se1, e1', t1) = doExp false e1 (AExp None) in + let (se2, e2', t2) = doExp false e2 (AExp None) in + let se = se1 @@ se2 in + let (e1'', t1, e2'', tresult) = + (* Either e1 or e2 can be the pointer *) + match unrollType t1, unrollType t2 with + TPtr(t1e,_), (TInt _|TEnum _) -> e1', t1, e2', t1e + | (TInt _|TEnum _), TPtr(t2e,_) -> e2', t2, e1', t2e + | _ -> + E.s (error + "Expecting a pointer type in index:@! t1=%a@!t2=%a@!" + d_plaintype t1 d_plaintype t2) + in + (* We have to distinguish the construction based on the type of e1'' *) + let res = + match e1'' with + StartOf array -> (* A real array indexing operation *) + addOffsetLval (Index(e2'', NoOffset)) array + | _ -> (* Turn into *(e1 + e2) *) + mkMem (BinOp(IndexPI, e1'', e2'', t1)) NoOffset + in + (* Do some optimization of StartOf *) + finishExp se (Lval res) tresult + + end + | A.UNARY (A.MEMOF, e) -> + if asconst then + ignore (warn "MEMOF in constant"); + let (se, e', t) = doExp false e (AExp None) in + let tresult = + match unrollType t with + | TPtr(te, _) -> te + | _ -> E.s (error "Expecting a pointer type in *. Got %a@!" + d_plaintype t) + in + finishExp se + (Lval (mkMem e' NoOffset)) + tresult + + (* e.str = (& e + off(str)). If e = (be + beoff) then e.str = (be + * + beoff + off(str)) *) + | A.MEMBEROF (e, str) -> + (* member of is actually allowed if we only take the address *) + (* if isconst then + E.s (error "MEMBEROF in constant"); *) + let (se, e', t') = doExp false e (AExp None) in + let lv = + match e' with + Lval x -> x + | CastE(_, Lval x) -> x + | _ -> E.s (error "Expected an lval in MEMBEROF (field %s)" str) + in + let field_offset = + match unrollType t' with + TComp (comp, _) -> findField str comp.cfields + | _ -> E.s (error "expecting a struct with field %s" str) + in + let lv' = Lval(addOffsetLval field_offset lv) in + let field_type = typeOf lv' in + finishExp se lv' field_type + + (* e->str = * (e + off(str)) *) + | A.MEMBEROFPTR (e, str) -> + if asconst then + ignore (warn "MEMBEROFPTR in constant"); + let (se, e', t') = doExp false e (AExp None) in + let pointedt = + match unrollType t' with + TPtr(t1, _) -> t1 + | TArray(t1,_,_) -> t1 + | _ -> E.s (error "expecting a pointer to a struct") + in + let field_offset = + match unrollType pointedt with + TComp (comp, _) -> findField str comp.cfields + | x -> + E.s (error + "expecting a struct with field %s. Found %a. t1 is %a" + str d_type x d_type t') + in + let lv' = Lval (mkMem e' field_offset) in + let field_type = typeOf lv' in + finishExp se lv' field_type + + | A.CONSTANT ct -> begin + let hasSuffix str = + let l = String.length str in + fun s -> + let ls = String.length s in + l >= ls && s = String.uppercase (String.sub str (l - ls) ls) + in + match ct with + A.CONST_INT str -> begin + let res = parseInt str in + finishExp empty res (typeOf res) + end + +(* + | A.CONST_WSTRING wstr -> + let len = List.length wstr in + let wchar_t = !wcharType in + (* We will make an array big enough to contain the wide + * characters and the wide-null terminator *) + let ws_t = TArray(wchar_t, Some (integer len), []) in + let ws = + makeGlobalVar ("wide_string" ^ string_of_int !lastStructId) + ws_t + in + ws.vstorage <- Static; + incr lastStructId; + (* Make the initializer. Idx is a wide_char index. *) + let rec loop (idx: int) (s: int64 list) = + match s with + [] -> [] + | wc::rest -> + let wc_cilexp = Const (CInt64(wc, IInt, None)) in + (Index(integer idx, NoOffset), + SingleInit (mkCast wc_cilexp wchar_t)) + :: loop (idx + 1) rest + in + (* Add the definition for the array *) + cabsPushGlobal (GVar(ws, + {init = Some (CompoundInit(ws_t, + loop 0 wstr))}, + !currentLoc)); + finishExp empty (StartOf(Var ws, NoOffset)) + (TPtr(wchar_t, [])) + *) + + | A.CONST_WSTRING (ws: int64 list) -> + let res = Const(CWStr ((* intlist_to_wstring *) ws)) in + finishExp empty res (typeOf res) + + | A.CONST_STRING s -> + (* Maybe we burried __FUNCTION__ in there *) + let s' = + try + let start = String.index s (Char.chr 0) in + let l = String.length s in + let tofind = (String.make 1 (Char.chr 0)) ^ "__FUNCTION__" in + let past = start + String.length tofind in + if past <= l && + String.sub s start (String.length tofind) = tofind then + (if start > 0 then String.sub s 0 start else "") ^ + !currentFunctionFDEC.svar.vname ^ + (if past < l then String.sub s past (l - past) else "") + else + s + with Not_found -> s + in + let res = Const(CStr s') in + finishExp empty res (typeOf res) + + | A.CONST_CHAR char_list -> + let a, b = (interpret_character_constant char_list) in + finishExp empty (Const a) b + + | A.CONST_WCHAR char_list -> + (* matth: I can't see a reason for a list of more than one char + * here, since the kinteger64 below will take only the lower 16 + * bits of value. ('abc' makes sense, because CHAR constants have + * type int, and so more than one char may be needed to represent + * the value. But L'abc' has type wchar, and so is equivalent to + * L'c'). But gcc allows L'abc', so I'll leave this here in case + * I'm missing some architecture dependent behavior. *) + let value = reduce_multichar !wcharType char_list in + let result = kinteger64 !wcharKind value in + finishExp empty result (typeOf result) + + | A.CONST_FLOAT str -> begin + (* Maybe it ends in U or UL. Strip those *) + let l = String.length str in + let hasSuffix = hasSuffix str in + let baseint, kind = + if hasSuffix "L" then + String.sub str 0 (l - 1), FLongDouble + else if hasSuffix "F" then + String.sub str 0 (l - 1), FFloat + else if hasSuffix "D" then + String.sub str 0 (l - 1), FDouble + else + str, FDouble + in + try + finishExp empty + (Const(CReal(float_of_string baseint, kind, + Some str))) + (TFloat(kind,[])) + with e -> begin + ignore (E.log "float_of_string %s (%s)\n" str + (Printexc.to_string e)); + let res = Const(CStr "booo CONS_FLOAT") in + finishExp empty res (typeOf res) + end + end + end + + | A.TYPE_SIZEOF (bt, dt) -> + let typ = doOnlyType bt dt in + finishExp empty (SizeOf(typ)) !typeOfSizeOf + + (* Intercept the sizeof("string") *) + | A.EXPR_SIZEOF (A.CONSTANT (A.CONST_STRING s)) -> begin + (* Process the string first *) + match doExp asconst (A.CONSTANT (A.CONST_STRING s)) (AExp None) with + _, Const(CStr s), _ -> + finishExp empty (SizeOfStr s) !typeOfSizeOf + | _ -> E.s (bug "cabs2cil: sizeOfStr") + end + + | A.EXPR_SIZEOF e -> + (* Allow non-constants in sizeof *) + (* Do not convert arrays and functions into pointers. *) + let (se, e', t) = doExp false e AExpLeaveArrayFun in +(* + ignore (E.log "sizeof: %a e'=%a, t=%a\n" + d_loc !currentLoc d_plainexp e' d_type t); +*) + (* !!!! The book says that the expression is not evaluated, so we + * drop the potential side-effects + if isNotEmpty se then + ignore (warn "Warning: Dropping side-effect in EXPR_SIZEOF\n"); +*) + let size = + match e' with (* If we are taking the sizeof an + * array we must drop the StartOf *) + StartOf(lv) -> SizeOfE (Lval(lv)) + + (* Maybe we are taking the sizeof for a CStr. In that case we + * mean the pointer to the start of the string *) + | Const(CStr _) -> SizeOf (charPtrType) + + (* Maybe we are taking the sizeof a variable-sized array *) + | Lval (Var vi, NoOffset) -> begin + try + IH.find varSizeArrays vi.vid + with Not_found -> SizeOfE e' + end + | _ -> SizeOfE e' + in + finishExp empty size !typeOfSizeOf + + | A.TYPE_ALIGNOF (bt, dt) -> + let typ = doOnlyType bt dt in + finishExp empty (AlignOf(typ)) !typeOfSizeOf + + | A.EXPR_ALIGNOF e -> + let (se, e', t) = doExp false e AExpLeaveArrayFun in + (* !!!! The book says that the expression is not evaluated, so we + * drop the potential side-effects + if isNotEmpty se then + ignore (warn "Warning: Dropping side-effect in EXPR_ALIGNOF\n"); +*) + let e'' = + match e' with (* If we are taking the alignof an + * array we must drop the StartOf *) + StartOf(lv) -> Lval(lv) + + | _ -> e' + in + finishExp empty (AlignOfE(e'')) !typeOfSizeOf + + | A.CAST ((specs, dt), ie) -> + let s', dt', ie' = preprocessCast specs dt ie in + (* We know now that we can do s' and dt' many times *) + let typ = doOnlyType s' dt' in + let what' = + match what with + AExp (Some _) -> AExp (Some typ) + | AExp None -> what + | ADrop | AExpLeaveArrayFun -> what + | ASet (lv, lvt) -> + (* If the cast from typ to lvt would be dropped, then we + * continue with a Set *) + if false && Util.equals (typeSig typ) (typeSig lvt) then + what + else + AExp None (* We'll create a temporary *) + in + (* Remember here if we have done the Set *) + let (se, e', t'), (needcast: bool) = + match ie' with + A.SINGLE_INIT e -> doExp asconst e what', true + + | A.NO_INIT -> E.s (error "missing expression in cast") + + | A.COMPOUND_INIT _ -> begin + (* Pretend that we are declaring and initializing a brand new + * variable *) + let newvar = "__constr_expr_" ^ string_of_int (!constrExprId) in + incr constrExprId; + let spec_res = doSpecList "" s' in + let se1 = + if !scopes == [] then begin + ignore (createGlobal spec_res + ((newvar, dt', [], cabslu), ie')); + empty + end else + createLocal spec_res ((newvar, dt', [], cabslu), ie') + in + (* Now pretend that e is just a reference to the newly created + * variable *) + let se, e', t' = doExp asconst (A.VARIABLE newvar) what' in + (* If typ is an array then the doExp above has already added a + * StartOf. We must undo that now so that it is done once by + * the finishExp at the end of this case *) + let e2, t2 = + match unrollType typ, e' with + TArray _, StartOf lv -> Lval lv, typ + | _, _ -> e', t' + in + (* If we are here, then the type t2 is guaranteed to match the + * type of the expression e2, so we do not need a cast. We have + * to worry about this because otherwise, we might need to cast + * between arrays or structures. *) + (se1 @@ se, e2, t2), false + end + in + let (t'', e'') = + match typ with + TVoid _ when what' = ADrop -> (t', e') (* strange GNU thing *) + | _ -> + (* Do this to check the cast, unless we are sure that we do not + * need the check. *) + let newtyp, newexp = + if needcast then + castTo ~fromsource:true t' typ e' + else + t', e' + in + newtyp, newexp + in + finishExp se e'' t'' + + | A.UNARY(A.MINUS, e) -> + let (se, e', t) = doExp asconst e (AExp None) in + if isIntegralType t then + let tres = integralPromotion t in + let e'' = + match e' with + | Const(CInt64(i, ik, _)) -> kinteger64 ik (Int64.neg i) + | _ -> UnOp(Neg, mkCastT e' t tres, tres) + in + finishExp se e'' tres + else + if isArithmeticType t then + finishExp se (UnOp(Neg,e',t)) t + else + E.s (error "Unary - on a non-arithmetic type") + + | A.UNARY(A.BNOT, e) -> + let (se, e', t) = doExp asconst e (AExp None) in + if isIntegralType t then + let tres = integralPromotion t in + let e'' = UnOp(BNot, mkCastT e' t tres, tres) in + finishExp se e'' tres + else + E.s (error "Unary ~ on a non-integral type") + + | A.UNARY(A.PLUS, e) -> doExp asconst e what + + + | A.UNARY(A.ADDROF, e) -> begin + match e with + A.COMMA el -> (* GCC extension *) + doExp false + (A.COMMA (replaceLastInList el (fun e -> A.UNARY(A.ADDROF, e)))) + what + | A.QUESTION (e1, e2, e3) -> (* GCC extension *) + doExp false + (A.QUESTION (e1, A.UNARY(A.ADDROF, e2), A.UNARY(A.ADDROF, e3))) + what + | A.VARIABLE s when + isOldStyleVarArgName s + && (match !currentFunctionFDEC.svar.vtype with + TFun(_, _, true, _) -> true | _ -> false) -> + (* We are in an old-style variable argument function and we are + * taking the address of the argument that was removed while + * processing the function type. We compute the address based on + * the address of the last real argument *) + if !msvcMode then begin + let rec getLast = function + [] -> E.s (unimp "old-style variable argument function without real arguments") + | [a] -> a + | _ :: rest -> getLast rest + in + let last = getLast !currentFunctionFDEC.sformals in + let res = mkAddrOfAndMark (var last) in + let tres = typeOf res in + let tres', res' = castTo tres (TInt(IULong, [])) res in + (* Now we must add to this address to point to the next + * argument. Round up to a multiple of 4 *) + let sizeOfLast = + (((bitsSizeOf last.vtype) + 31) / 32) * 4 + in + let res'' = + BinOp(PlusA, res', kinteger IULong sizeOfLast, tres') + in + finishExp empty res'' tres' + end else begin (* On GCC the only reliable way to do this is to + * call builtin_next_arg. If we take the address of + * a local we are going to get the address of a copy + * of the local ! *) + + doExp asconst + (A.CALL (A.VARIABLE "__builtin_next_arg", + [A.CONSTANT (A.CONST_INT "0")])) + what + end + + | (A.VARIABLE _ | A.UNARY (A.MEMOF, _) | (* Regular lvalues *) + A.INDEX _ | A.MEMBEROF _ | A.MEMBEROFPTR _ | + A.CAST (_, A.COMPOUND_INIT _)) -> begin + let (se, e', t) = doExp false e (AExp None) in + (* ignore (E.log "ADDROF on %a : %a\n" d_plainexp e' + d_plaintype t); *) + match e' with + ( Lval x | CastE(_, Lval x)) -> + finishExp se (mkAddrOfAndMark x) (TPtr(t, [])) + + | StartOf (lv) -> + let tres = TPtr(typeOfLval lv, []) in (* pointer to array *) + finishExp se (mkAddrOfAndMark lv) tres + + (* Function names are converted into pointers to the function. + * Taking the address-of again does not change things *) + | AddrOf (Var v, NoOffset) when isFunctionType v.vtype -> + finishExp se e' t + + | _ -> E.s (error "Expected lval for ADDROF. Got %a@!" + d_plainexp e') + end + | _ -> E.s (error "Unexpected operand for addrof") + end + | A.UNARY((A.PREINCR|A.PREDECR) as uop, e) -> begin + match e with + A.COMMA el -> (* GCC extension *) + doExp asconst + (A.COMMA (replaceLastInList el + (fun e -> A.UNARY(uop, e)))) + what + | A.QUESTION (e1, e2q, e3q) -> (* GCC extension *) + doExp asconst + (A.QUESTION (e1, A.UNARY(uop, e2q), + A.UNARY(uop, e3q))) + what + + | (A.VARIABLE _ | A.UNARY (A.MEMOF, _) | (* Regular lvalues *) + A.INDEX _ | A.MEMBEROF _ | A.MEMBEROFPTR _ | + A.CAST _ (* A GCC extension *)) -> begin + let uop' = if uop = A.PREINCR then PlusA else MinusA in + if asconst then + ignore (warn "PREINCR or PREDECR in constant"); + let (se, e', t) = doExp false e (AExp None) in + let lv = + match e' with + Lval x -> x + | CastE (_, Lval x) -> x (* A GCC extension. The operation is + * done at the cast type. The result + * is also of the cast type *) + | _ -> E.s (error "Expected lval for ++ or --") + in + let tresult, result = doBinOp uop' e' t one intType in + finishExp (se +++ (Set(lv, mkCastT result tresult t, + !currentLoc))) + e' + tresult (* Should this be t instead ??? *) + end + | _ -> E.s (error "Unexpected operand for prefix -- or ++") + end + + | A.UNARY((A.POSINCR|A.POSDECR) as uop, e) -> begin + match e with + A.COMMA el -> (* GCC extension *) + doExp asconst + (A.COMMA (replaceLastInList el + (fun e -> A.UNARY(uop, e)))) + what + | A.QUESTION (e1, e2q, e3q) -> (* GCC extension *) + doExp asconst + (A.QUESTION (e1, A.UNARY(uop, e2q), A.UNARY(uop, e3q))) + what + + | (A.VARIABLE _ | A.UNARY (A.MEMOF, _) | (* Regular lvalues *) + A.INDEX _ | A.MEMBEROF _ | A.MEMBEROFPTR _ | + A.CAST _ (* A GCC extension *) ) -> begin + if asconst then + ignore (warn "POSTINCR or POSTDECR in constant"); + (* If we do not drop the result then we must save the value *) + let uop' = if uop = A.POSINCR then PlusA else MinusA in + let (se, e', t) = doExp false e (AExp None) in + let lv = + match e' with + Lval x -> x + | CastE (_, Lval x) -> x (* GCC extension. The addition must + * be be done at the cast type. The + * result of this is also of the cast + * type *) + | _ -> E.s (error "Expected lval for ++ or --") + in + let tresult, opresult = doBinOp uop' e' t one intType in + let se', result = + if what <> ADrop then + let tmp = newTempVar t in + se +++ (Set(var tmp, e', !currentLoc)), Lval(var tmp) + else + se, e' + in + finishExp + (se' +++ (Set(lv, mkCastT opresult tresult t, + !currentLoc))) + result + tresult (* Should this be t instead ??? *) + end + | _ -> E.s (error "Unexpected operand for suffix ++ or --") + end + + | A.BINARY(A.ASSIGN, e1, e2) -> begin + match e1 with + A.COMMA el -> (* GCC extension *) + doExp asconst + (A.COMMA (replaceLastInList el + (fun e -> A.BINARY(A.ASSIGN, e, e2)))) + what + | A.QUESTION (e1, e2q, e3q) -> (* GCC extension *) + doExp asconst + (A.QUESTION (e1, A.BINARY(A.ASSIGN, e2q, e2), + A.BINARY(A.ASSIGN, e3q, e2))) + what + | A.CAST (t, A.SINGLE_INIT e) -> (* GCC extension *) + doExp asconst + (A.CAST (t, + A.SINGLE_INIT (A.BINARY(A.ASSIGN, e, + A.CAST (t, A.SINGLE_INIT e2))))) + what + + | (A.VARIABLE _ | A.UNARY (A.MEMOF, _) | (* Regular lvalues *) + A.INDEX _ | A.MEMBEROF _ | A.MEMBEROFPTR _ ) -> begin + if asconst then ignore (warn "ASSIGN in constant"); + let (se1, e1', lvt) = doExp false e1 (AExp None) in + let lv = + match e1' with + Lval x -> x + | _ -> E.s (error "Expected lval for assignment. Got %a\n" + d_plainexp e1') + in + let (se2, e'', t'') = doExp false e2 (ASet(lv, lvt)) in + finishExp (se1 @@ se2) e1' lvt + end + | _ -> E.s (error "Invalid left operand for ASSIGN") + end + + | A.BINARY((A.ADD|A.SUB|A.MUL|A.DIV|A.MOD|A.BAND|A.BOR|A.XOR| + A.SHL|A.SHR|A.EQ|A.NE|A.LT|A.GT|A.GE|A.LE) as bop, e1, e2) -> + let bop' = convBinOp bop in + let (se1, e1', t1) = doExp asconst e1 (AExp None) in + let (se2, e2', t2) = doExp asconst e2 (AExp None) in + let tresult, result = doBinOp bop' e1' t1 e2' t2 in + finishExp (se1 @@ se2) result tresult + + (* assignment operators *) + | A.BINARY((A.ADD_ASSIGN|A.SUB_ASSIGN|A.MUL_ASSIGN|A.DIV_ASSIGN| + A.MOD_ASSIGN|A.BAND_ASSIGN|A.BOR_ASSIGN|A.SHL_ASSIGN| + A.SHR_ASSIGN|A.XOR_ASSIGN) as bop, e1, e2) -> begin + match e1 with + A.COMMA el -> (* GCC extension *) + doExp asconst + (A.COMMA (replaceLastInList el + (fun e -> A.BINARY(bop, e, e2)))) + what + | A.QUESTION (e1, e2q, e3q) -> (* GCC extension *) + doExp asconst + (A.QUESTION (e1, A.BINARY(bop, e2q, e2), + A.BINARY(bop, e3q, e2))) + what + + | (A.VARIABLE _ | A.UNARY (A.MEMOF, _) | (* Regular lvalues *) + A.INDEX _ | A.MEMBEROF _ | A.MEMBEROFPTR _ | + A.CAST _ (* GCC extension *) ) -> begin + if asconst then + ignore (warn "op_ASSIGN in constant"); + let bop' = match bop with + A.ADD_ASSIGN -> PlusA + | A.SUB_ASSIGN -> MinusA + | A.MUL_ASSIGN -> Mult + | A.DIV_ASSIGN -> Div + | A.MOD_ASSIGN -> Mod + | A.BAND_ASSIGN -> BAnd + | A.BOR_ASSIGN -> BOr + | A.XOR_ASSIGN -> BXor + | A.SHL_ASSIGN -> Shiftlt + | A.SHR_ASSIGN -> Shiftrt + | _ -> E.s (error "binary +=") + in + let (se1, e1', t1) = doExp false e1 (AExp None) in + let lv1 = + match e1' with + Lval x -> x + | CastE (_, Lval x) -> x (* GCC extension. The operation and + * the result are at the cast type *) + | _ -> E.s (error "Expected lval for assignment with arith") + in + let (se2, e2', t2) = doExp false e2 (AExp None) in + let tresult, result = doBinOp bop' e1' t1 e2' t2 in + (* We must cast the result to the type of the lv1, which may be + * different than t1 if lv1 was a Cast *) + let _, result' = castTo tresult (typeOfLval lv1) result in + (* The type of the result is the type of the left-hand side *) + finishExp (se1 @@ se2 +++ + (Set(lv1, result', !currentLoc))) + e1' + t1 + end + | _ -> E.s (error "Unexpected left operand for assignment with arith") + end + + + | A.BINARY((A.AND|A.OR), _, _) | A.UNARY(A.NOT, _) -> begin + let ce = doCondExp asconst e in + (* We must normalize the result to 0 or 1 *) + match ce with + CEExp (se, ((Const _) as c)) -> + finishExp se (if isConstTrue c then one else zero) intType + | CEExp (se, (UnOp(LNot, _, _) as e)) -> + (* already normalized to 0 or 1 *) + finishExp se e intType + | CEExp (se, e) -> + let e' = + let te = typeOf e in + let _, zte = castTo intType te zero in + BinOp(Ne, e, zte, te) + in + finishExp se e' intType + | _ -> + let tmp = var (newTempVar intType) in + finishExp (compileCondExp ce + (empty +++ (Set(tmp, integer 1, + !currentLoc))) + (empty +++ (Set(tmp, integer 0, + !currentLoc)))) + (Lval tmp) + intType + end + + | A.CALL(f, args) -> + if asconst then + ignore (warn "CALL in constant"); + let (sf, f', ft') = + match f with (* Treat the VARIABLE case separate + * becase we might be calling a + * function that does not have a + * prototype. In that case assume it + * takes INTs as arguments *) + A.VARIABLE n -> begin + try + let vi, _ = lookupVar n in + (empty, Lval(var vi), vi.vtype) (* Found. Do not use + * finishExp. Simulate what = + * AExp None *) + with Not_found -> begin + ignore (warnOpt "Calling function %s without prototype." n); + let ftype = TFun(intType, None, false, + [Attr("missingproto",[])]) in + (* Add a prototype to the environment *) + let proto, _ = + makeGlobalVarinfo false (makeGlobalVar n ftype) in + (* Make it EXTERN *) + proto.vstorage <- Extern; + IH.add noProtoFunctions proto.vid true; + (* Add it to the file as well *) + cabsPushGlobal (GVarDecl (proto, !currentLoc)); + (empty, Lval(var proto), ftype) + end + end + | _ -> doExp false f (AExp None) + in + (* Get the result type and the argument types *) + let (resType, argTypes, isvar, f'') = + match unrollType ft' with + TFun(rt,at,isvar,a) -> (rt,at,isvar,f') + | TPtr (t, _) -> begin + match unrollType t with + TFun(rt,at,isvar,a) -> (* Make the function pointer + * explicit *) + let f'' = + match f' with + AddrOf lv -> Lval(lv) + | _ -> Lval(mkMem f' NoOffset) + in + (rt,at,isvar, f'') + | x -> + E.s (error "Unexpected type of the called function %a: %a" + d_exp f' d_type x) + end + | x -> E.s (error "Unexpected type of the called function %a: %a" + d_exp f' d_type x) + in + let argTypesList = argsToList argTypes in + (* Drop certain qualifiers from the result type *) + let resType' = resType in + (* Before we do the arguments we try to intercept a few builtins. For + * these we have defined then with a different type, so we do not + * want to give warnings. We'll just leave the arguments of these + * functions alone*) + let isSpecialBuiltin = + match f'' with + Lval (Var fv, NoOffset) -> + fv.vname = "__builtin_stdarg_start" || + fv.vname = "__builtin_va_arg" || + fv.vname = "__builtin_va_start" || + fv.vname = "__builtin_expect" || + fv.vname = "__builtin_next_arg" + | _ -> false + in + + (** If the "--forceRLArgEval" flag was used, make sure + we evaluate args right-to-left. + Added by Nathan Cooprider. **) + let force_right_to_left_evaluation (c, e, t) = + (* If chunk is empty then it is not already evaluated *) + (* constants don't need to be pulled out *) + if (!forceRLArgEval && (not (isConstant e)) && + (not isSpecialBuiltin)) then + (* create a temporary *) + let tmp = newTempVar t in + (* create an instruction to give the e to the temporary *) + let i = Set(var tmp, e, !currentLoc) in + (* add the instruction to the chunk *) + (* change the expression to be the temporary *) + (c +++ i, (Lval(var tmp)), t) + else + (c, e, t) + in + (* Do the arguments. In REVERSE order !!! Both GCC and MSVC do this *) + let rec loopArgs + : (string * typ * attributes) list * A.expression list + -> (chunk * exp list) = function + | ([], []) -> (empty, []) + + | args, [] -> + if not isSpecialBuiltin then + ignore (warnOpt + "Too few arguments in call to %a." + d_exp f'); + (empty, []) + + | ((_, at, _) :: atypes, a :: args) -> + let (ss, args') = loopArgs (atypes, args) in + (* Do not cast as part of translating the argument. We let + * the castTo to do this work. This was necessary for + * test/small1/union5, in which a transparent union is passed + * as an argument *) + let (sa, a', att) = force_right_to_left_evaluation + (doExp false a (AExp None)) in + let (_, a'') = castTo att at a' in + (ss @@ sa, a'' :: args') + + | ([], args) -> (* No more types *) + if not isvar && argTypes != None && not isSpecialBuiltin then + (* Do not give a warning for functions without a prototype*) + ignore (warnOpt "Too many arguments in call to %a" d_exp f'); + let rec loop = function + [] -> (empty, []) + | a :: args -> + let (ss, args') = loop args in + let (sa, a', at) = force_right_to_left_evaluation + (doExp false a (AExp None)) in + (ss @@ sa, a' :: args') + in + loop args + in + let (sargs, args') = loopArgs (argTypesList, args) in + (* Setup some pointer to the elements of the call. We may change + * these below *) + let prechunk: chunk ref = ref (sf @@ sargs) in (* comes before *) + + (* Do we actually have a call, or an expression? *) + let piscall: bool ref = ref true in + + let pf: exp ref = ref f'' in (* function to call *) + let pargs: exp list ref = ref args' in (* arguments *) + let pis__builtin_va_arg: bool ref = ref false in + let pwhat: expAction ref = ref what in (* what to do with result *) + + let pres: exp ref = ref zero in (* If we do not have a call, this is + * the result *) + let prestype: typ ref = ref intType in + + let rec dropCasts = function CastE (_, e) -> dropCasts e | e -> e in + (* Get the name of the last formal *) + let getNameLastFormal () : string = + match !currentFunctionFDEC.svar.vtype with + TFun(_, Some args, true, _) -> begin + match List.rev args with + (last_par_name, _, _) :: _ -> last_par_name + | _ -> "" + end + | _ -> "" + in + + (* Try to intercept some builtins *) + (match !pf with + Lval(Var fv, NoOffset) -> begin + if fv.vname = "__builtin_va_arg" then begin + match !pargs with + marker :: SizeOf resTyp :: _ -> begin + (* Make a variable of the desired type *) + let destlv, destlvtyp = + match !pwhat with + ASet (lv, lvt) -> lv, lvt + | _ -> var (newTempVar resTyp), resTyp + in + pwhat := (ASet (destlv, destlvtyp)); + pargs := [marker; SizeOf resTyp; AddrOf destlv]; + pis__builtin_va_arg := true; + end + | _ -> + ignore (warn "Invalid call to %s\n" fv.vname); + end else if fv.vname = "__builtin_stdarg_start" then begin + match !pargs with + marker :: last :: [] -> begin + let isOk = + match dropCasts last with + Lval (Var lastv, NoOffset) -> + lastv.vname = getNameLastFormal () + | _ -> false + in + if not isOk then + ignore (warn "The second argument in call to %s should be the last formal argument\n" fv.vname); + + (* Check that "lastv" is indeed the last variable in the + * prototype and then drop it *) + pargs := [ marker ] + end + | _ -> + ignore (warn "Invalid call to %s\n" fv.vname); + + (* We have to turn uses of __builtin_varargs_start into uses + * of __builtin_stdarg_start (because we have dropped the + * __builtin_va_alist argument from this function) *) + + end else if fv.vname = "__builtin_varargs_start" then begin + (* Lookup the prototype for the replacement *) + let v, _ = + try lookupGlobalVar "__builtin_stdarg_start" + with Not_found -> E.s (bug "Cannot find __builtin_stdarg_start to replace %s\n" fv.vname) + in + pf := Lval (var v) + end else if fv.vname = "__builtin_next_arg" then begin + match !pargs with + last :: [] -> begin + let isOk = + match dropCasts last with + Lval (Var lastv, NoOffset) -> + lastv.vname = getNameLastFormal () + | _ -> false + in + if not isOk then + ignore (warn "The argument in call to %s should be the last formal argument\n" fv.vname); + + pargs := [ ] + end + | _ -> + ignore (warn "Invalid call to %s\n" fv.vname); + end else if fv.vname = "__builtin_constant_p" then begin + (* Drop the side-effects *) + prechunk := empty; + + (* Constant-fold the argument and see if it is a constant *) + (match !pargs with + [ arg ] -> begin + match constFold true arg with + Const _ -> piscall := false; + pres := integer 1; + prestype := intType + + | _ -> piscall := false; + pres := integer 0; + prestype := intType + end + | _ -> + ignore (warn "Invalid call to builtin_constant_p")); + end + end + | _ -> ()); + + + (* Now we must finish the call *) + if !piscall then begin + let addCall (calldest: lval option) (res: exp) (t: typ) = + prechunk := !prechunk +++ + (Call(calldest, !pf, !pargs, !currentLoc)); + pres := res; + prestype := t + in + match !pwhat with + ADrop -> addCall None zero intType + + (* Set to a variable of corresponding type *) + | ASet(lv, vtype) -> + (* Make an exception here for __builtin_va_arg *) + if !pis__builtin_va_arg then + addCall None (Lval(lv)) vtype + else + addCall (Some lv) (Lval(lv)) vtype + + | _ -> begin + let tmp, restyp' = + match !pwhat with + AExp (Some t) -> newTempVar t, t + | _ -> newTempVar resType', resType' + in + (* Remember that this variable has been created for this + * specific call. We will use this in collapseCallCast and + * above in finishCall. *) + IH.add callTempVars tmp.vid (); + addCall (Some (var tmp)) (Lval(var tmp)) restyp' + end + end; + + finishExp !prechunk !pres !prestype + + + | A.COMMA el -> + if asconst then + ignore (warn "COMMA in constant"); + let rec loop sofar = function + [e] -> + let (se, e', t') = doExp false e what in (* Pass on the action *) + (sofar @@ se, e', t') +(* + finishExp (sofar @@ se) e' t' (* does not hurt to do it twice. + * GN: it seems it does *) +*) + | e :: rest -> + let (se, _, _) = doExp false e ADrop in + loop (sofar @@ se) rest + | [] -> E.s (error "empty COMMA expression") + in + loop empty el + + | A.QUESTION (e1,e2,e3) when what = ADrop -> + if asconst then + ignore (warn "QUESTION with ADrop in constant"); + let (se3,_,_) = doExp false e3 ADrop in + let se2 = + match e2 with + A.NOTHING -> skipChunk + | _ -> let (se2,_,_) = doExp false e2 ADrop in se2 + in + finishExp (doCondition asconst e1 se2 se3) zero intType + + | A.QUESTION (e1, e2, e3) -> begin (* what is not ADrop *) + (* Compile the conditional expression *) + let ce1 = doCondExp asconst e1 in + (* Now we must find the type of both branches, in order to compute + * the type of the result *) + let se2, e2'o (* is an option. None means use e1 *), t2 = + match e2 with + A.NOTHING -> begin (* The same as the type of e1 *) + match ce1 with + CEExp (_, e1') -> empty, None, typeOf e1' (* Do not promote + to bool *) + | _ -> empty, None, intType + end + | _ -> + let se2, e2', t2 = doExp asconst e2 (AExp None) in + se2, Some e2', t2 + in + (* Do e3 for real *) + let se3, e3', t3 = doExp asconst e3 (AExp None) in + (* Compute the type of the result *) + let tresult = conditionalConversion t2 t3 in + match ce1 with + CEExp (se1, e1') when isConstFalse e1' && canDrop se2 -> + finishExp (se1 @@ se3) (snd (castTo t3 tresult e3')) tresult + | CEExp (se1, e1') when isConstTrue e1' && canDrop se3 -> + begin + match e2'o with + None -> (* use e1' *) + finishExp (se1 @@ se2) (snd (castTo t2 tresult e1')) tresult + | Some e2' -> + finishExp (se1 @@ se2) (snd (castTo t2 tresult e2')) tresult + end + + | _ -> (* Use a conditional *) begin + match e2 with + A.NOTHING -> + let tmp = var (newTempVar tresult) in + let (se1, _, _) = doExp asconst e1 (ASet(tmp, tresult)) in + let (se3, _, _) = doExp asconst e3 (ASet(tmp, tresult)) in + finishExp (se1 @@ ifChunk (Lval(tmp)) lu + skipChunk se3) + (Lval(tmp)) + tresult + | _ -> + let lv, lvt = + match what with + | ASet (lv, lvt) -> lv, lvt + | _ -> + let tmp = newTempVar tresult in + var tmp, tresult + in + (* Now do e2 and e3 for real *) + let (se2, _, _) = doExp asconst e2 (ASet(lv, lvt)) in + let (se3, _, _) = doExp asconst e3 (ASet(lv, lvt)) in + finishExp (doCondition asconst e1 se2 se3) (Lval(lv)) tresult + end + +(* + (* Do these only to collect the types *) + let se2, e2', t2' = + match e2 with + A.NOTHING -> (* A GNU thing. Use e1 as e2 *) + doExp isconst e1 (AExp None) + | _ -> doExp isconst e2 (AExp None) in + (* Do e3 for real *) + let se3, e3', t3' = doExp isconst e3 (AExp None) in + (* Compute the type of the result *) + let tresult = conditionalConversion e2' t2' e3' t3' in + if (isEmpty se2 || e2 = A.NOTHING) + && isEmpty se3 && isconst then begin + (* Use the Question. This allows Question in initializers without + * having to do constant folding *) + let se1, e1', t1 = doExp isconst e1 (AExp None) in + ignore (checkBool t1 e1'); + let e2'' = + if e2 = A.NOTHING then + mkCastT e1' t1 tresult + else mkCastT e2' t2' tresult (* We know se2 is empty *) + in + let e3'' = mkCastT e3' t3' tresult in + let resexp = + match e1' with + Const(CInt64(i, _, _)) when i <> Int64.zero -> e2'' + | Const(CInt64(z, _, _)) when z = Int64.zero -> e3'' + | _ -> Question(e1', e2'', e3'') + in + finishExp se1 resexp tresult + end else begin (* Now use a conditional *) + match e2 with + A.NOTHING -> + let tmp = var (newTempVar tresult) in + let (se1, _, _) = doExp isconst e1 (ASet(tmp, tresult)) in + let (se3, _, _) = doExp isconst e3 (ASet(tmp, tresult)) in + finishExp (se1 @@ ifChunk (Lval(tmp)) lu + skipChunk se3) + (Lval(tmp)) + tresult + | _ -> + let lv, lvt = + match what with + | ASet (lv, lvt) -> lv, lvt + | _ -> + let tmp = newTempVar tresult in + var tmp, tresult + in + (* Now do e2 and e3 for real *) + let (se2, _, _) = doExp isconst e2 (ASet(lv, lvt)) in + let (se3, _, _) = doExp isconst e3 (ASet(lv, lvt)) in + finishExp (doCondition isconst e1 se2 se3) (Lval(lv)) tresult + end +*) + end + + | A.GNU_BODY b -> begin + (* Find the last A.COMPUTATION and remember it. This one is invoked + * on the reversed list of statements. *) + let rec findLastComputation = function + s :: _ -> + let rec findLast = function + A.SEQUENCE (_, s, loc) -> findLast s + | CASE (_, s, _) -> findLast s + | CASERANGE (_, _, s, _) -> findLast s + | LABEL (_, s, _) -> findLast s + | (A.COMPUTATION _) as s -> s + | _ -> raise Not_found + in + findLast s + | [] -> raise Not_found + in + (* Save the previous data *) + let old_gnu = ! gnu_body_result in + let lastComp, isvoidbody = + match what with + ADrop -> (* We are dropping the result *) + A.NOP cabslu, true + | _ -> + try findLastComputation (List.rev b.A.bstmts), false + with Not_found -> + E.s (error "Cannot find COMPUTATION in GNU.body") + (* A.NOP cabslu, true *) + in + (* Prepare some data to be filled by doExp *) + let data : (exp * typ) option ref = ref None in + gnu_body_result := (lastComp, data); + + let se = doBody b in + + gnu_body_result := old_gnu; + match !data with + None when isvoidbody -> finishExp se zero voidType + | None -> E.s (bug "Cannot find COMPUTATION in GNU.body") + | Some (e, t) -> finishExp se e t + end + + | A.LABELADDR l -> begin (* GCC's taking the address of a label *) + let l = lookupLabel l in (* To support locallly declared labels *) + let addrval = + try H.find gotoTargetHash l + with Not_found -> begin + let res = !gotoTargetNextAddr in + incr gotoTargetNextAddr; + H.add gotoTargetHash l res; + res + end + in + finishExp empty (mkCast (integer addrval) voidPtrType) voidPtrType + end + + | A.EXPR_PATTERN _ -> E.s (E.bug "EXPR_PATTERN in cabs2cil input") + + with e -> begin + ignore (E.log "error in doExp (%s)@!" (Printexc.to_string e)); + E.hadErrors := true; + (i2c (dInstr (dprintf "booo_exp(%t)" d_thisloc) !currentLoc), + integer 0, intType) + end + +(* bop is always the arithmetic version. Change it to the appropriate pointer + * version if necessary *) +and doBinOp (bop: binop) (e1: exp) (t1: typ) (e2: exp) (t2: typ) : typ * exp = + let doArithmetic () = + let tres = arithmeticConversion t1 t2 in + (* Keep the operator since it is arithmetic *) + tres, + optConstFoldBinOp false bop (mkCastT e1 t1 tres) (mkCastT e2 t2 tres) tres + in + let doArithmeticComp () = + let tres = arithmeticConversion t1 t2 in + (* Keep the operator since it is arithemtic *) + intType, + optConstFoldBinOp false bop + (mkCastT e1 t1 tres) (mkCastT e2 t2 tres) intType + in + let doIntegralArithmetic () = + let tres = unrollType (arithmeticConversion t1 t2) in + match tres with + TInt _ -> + tres, + optConstFoldBinOp false bop + (mkCastT e1 t1 tres) (mkCastT e2 t2 tres) tres + | _ -> E.s (error "%a operator on a non-integer type" d_binop bop) + in + let pointerComparison e1 t1 e2 t2 = + (* XL: Do not cast both sides -- what's the point? *) + intType, + optConstFoldBinOp false bop e1 e2 intType + in + + match bop with + (Mult|Div) -> doArithmetic () + | (Mod|BAnd|BOr|BXor) -> doIntegralArithmetic () + | (Shiftlt|Shiftrt) -> (* ISO 6.5.7. Only integral promotions. The result + * has the same type as the left hand side *) + if !msvcMode then + (* MSVC has a bug. We duplicate it here *) + doIntegralArithmetic () + else + let t1' = integralPromotion t1 in + let t2' = integralPromotion t2 in + t1', + optConstFoldBinOp false bop (mkCastT e1 t1 t1') (mkCastT e2 t2 t2') t1' + + | (PlusA|MinusA) + when isArithmeticType t1 && isArithmeticType t2 -> doArithmetic () + | (Eq|Ne|Lt|Le|Ge|Gt) + when isArithmeticType t1 && isArithmeticType t2 -> + doArithmeticComp () + | PlusA when isPointerType t1 && isIntegralType t2 -> + t1, + optConstFoldBinOp false PlusPI e1 + (mkCastT e2 t2 (integralPromotion t2)) t1 + | PlusA when isIntegralType t1 && isPointerType t2 -> + t2, + optConstFoldBinOp false PlusPI e2 + (mkCastT e1 t1 (integralPromotion t1)) t2 + | MinusA when isPointerType t1 && isIntegralType t2 -> + t1, + optConstFoldBinOp false MinusPI e1 + (mkCastT e2 t2 (integralPromotion t2)) t1 + | MinusA when isPointerType t1 && isPointerType t2 -> + let commontype = t1 in + intType, + optConstFoldBinOp false MinusPP (mkCastT e1 t1 commontype) + (mkCastT e2 t2 commontype) intType + | (Le|Lt|Ge|Gt|Eq|Ne) when isPointerType t1 && isPointerType t2 -> + pointerComparison e1 t1 e2 t2 + | (Eq|Ne) when isPointerType t1 && isZero e2 -> + pointerComparison e1 t1 (mkCastT zero !upointType t1) t1 + | (Eq|Ne) when isPointerType t2 && isZero e1 -> + pointerComparison (mkCastT zero !upointType t2) t2 e2 t2 + + + | (Eq|Ne|Le|Lt|Ge|Gt) when isPointerType t1 && isArithmeticType t2 -> + ignore (warnOpt "Comparison of pointer and non-pointer"); + (* Cast both values to void * *) + doBinOp bop (mkCastT e1 t1 voidPtrType) voidPtrType + (mkCastT e2 t2 voidPtrType) voidPtrType + | (Eq|Ne|Le|Lt|Ge|Gt) when isArithmeticType t1 && isPointerType t2 -> + ignore (warnOpt "Comparison of pointer and non-pointer"); + (* Cast both values to void * *) + doBinOp bop (mkCastT e1 t1 voidPtrType) voidPtrType + (mkCastT e2 t2 voidPtrType) voidPtrType + + | _ -> E.s (error "doBinOp: %a\n" d_plainexp (BinOp(bop,e1,e2,intType))) + +(* Constant fold a conditional. This is because we want to avoid having + * conditionals in the initializers. So, we try very hard to avoid creating + * new statements. *) +and doCondExp (asconst: bool) (** Try to evaluate the conditional expression + * to TRUE or FALSE, because it occurs in a + * constant *) + (e: A.expression) : condExpRes = + let rec addChunkBeforeCE (c0: chunk) = function + CEExp (c, e) -> CEExp (c0 @@ c, e) + | CEAnd (ce1, ce2) -> CEAnd (addChunkBeforeCE c0 ce1, ce2) + | CEOr (ce1, ce2) -> CEOr (addChunkBeforeCE c0 ce1, ce2) + | CENot ce1 -> CENot (addChunkBeforeCE c0 ce1) + in + let rec canDropCE = function + CEExp (c, e) -> canDrop c + | CEAnd (ce1, ce2) | CEOr (ce1, ce2) -> canDropCE ce1 && canDropCE ce2 + | CENot (ce1) -> canDropCE ce1 + in + match e with + A.BINARY (A.AND, e1, e2) -> begin + let ce1 = doCondExp asconst e1 in + let ce2 = doCondExp asconst e2 in + match ce1, ce2 with + CEExp (se1, ((Const _) as ci1)), _ -> + if isConstTrue ci1 then + addChunkBeforeCE se1 ce2 + else + (* se2 might contain labels so we cannot always drop it *) + if canDropCE ce2 then + ce1 + else + CEAnd (ce1, ce2) + | CEExp(se1, e1'), CEExp (se2, e2') when + !useLogicalOperators && isEmpty se1 && isEmpty se2 -> + CEExp (empty, BinOp(LAnd, + mkCast e1' intType, + mkCast e2' intType, intType)) + | _ -> CEAnd (ce1, ce2) + end + + | A.BINARY (A.OR, e1, e2) -> begin + let ce1 = doCondExp asconst e1 in + let ce2 = doCondExp asconst e2 in + match ce1, ce2 with + CEExp (se1, (Const(CInt64 _) as ci1)), _ -> + if isConstFalse ci1 then + addChunkBeforeCE se1 ce2 + else + (* se2 might contain labels so we cannot drop it *) + if canDropCE ce2 then + ce1 + else + CEOr (ce1, ce2) + + | CEExp (se1, e1'), CEExp (se2, e2') when + !useLogicalOperators && isEmpty se1 && isEmpty se2 -> + CEExp (empty, BinOp(LOr, mkCast e1' intType, + mkCast e2' intType, intType)) + | _ -> CEOr (ce1, ce2) + end + + | A.UNARY(A.NOT, e1) -> begin + match doCondExp asconst e1 with + CEExp (se1, (Const _ as ci1)) -> + if isConstFalse ci1 then + CEExp (se1, one) + else + CEExp (se1, zero) + | CEExp (se1, e) when isEmpty se1 -> + let t = typeOf e in + if not ((isPointerType t) || (isArithmeticType t))then + E.s (error "Bad operand to !"); + CEExp (empty, UnOp(LNot, e, intType)) + + | ce1 -> CENot ce1 + end + + | _ -> + let (se, e, t) = doExp asconst e (AExp None) in + ignore (checkBool t e); + CEExp (se, if !lowerConstants then constFold asconst e else e) + +and compileCondExp (ce: condExpRes) (st: chunk) (sf: chunk) : chunk = + match ce with + | CEAnd (ce1, ce2) -> + let (sf1, sf2) = + (* If sf is small then will copy it *) + try (sf, duplicateChunk sf) + with Failure _ -> + let lab = newLabelName "_L" in + (gotoChunk lab lu, consLabel lab sf !currentLoc false) + in + let st' = compileCondExp ce2 st sf1 in + let sf' = sf2 in + compileCondExp ce1 st' sf' + + | CEOr (ce1, ce2) -> + let (st1, st2) = + (* If st is small then will copy it *) + try (st, duplicateChunk st) + with Failure _ -> + let lab = newLabelName "_L" in + (gotoChunk lab lu, consLabel lab st !currentLoc false) + in + let st' = st1 in + let sf' = compileCondExp ce2 st2 sf in + compileCondExp ce1 st' sf' + + | CENot ce1 -> compileCondExp ce1 sf st + + | CEExp (se, e) -> begin + match e with + Const(CInt64(i,_,_)) when i <> Int64.zero && canDrop sf -> se @@ st + | Const(CInt64(z,_,_)) when z = Int64.zero && canDrop st -> se @@ sf + | _ -> se @@ ifChunk e !currentLoc st sf + end + + +(* A special case for conditionals *) +and doCondition (isconst: bool) (* If we are in constants, we do our best to + * eliminate the conditional *) + (e: A.expression) + (st: chunk) + (sf: chunk) : chunk = + compileCondExp (doCondExp isconst e) st sf + + +and doPureExp (e : A.expression) : exp = + let (se, e', _) = doExp true e (AExp None) in + if isNotEmpty se then + E.s (error "doPureExp: not pure"); + e' + +and doInitializer + (vi: varinfo) + (inite: A.init_expression) + (* Return the accumulated chunk, the initializer and the new type (might be + * different for arrays) *) + : chunk * init * typ = + + (* Setup the pre-initializer *) + let topPreInit = ref NoInitPre in + if debugInit then + ignore (E.log "\nStarting a new initializer for %s : %a\n" + vi.vname d_type vi.vtype); + let topSetupInit (o: offset) (e: exp) = + if debugInit then + ignore (E.log " set %a := %a\n" d_lval (Var vi, o) d_exp e); + let newinit = setOneInit !topPreInit o e in + if newinit != !topPreInit then topPreInit := newinit + in + let acc, restl = + let so = makeSubobj vi vi.vtype NoOffset in + doInit vi.vglob topSetupInit so empty [ (A.NEXT_INIT, inite) ] + in + if restl <> [] then + ignore (warn "Ignoring some initializers"); + (* sm: we used to do array-size fixups here, but they only worked + * for toplevel array types; now, collectInitializer does the job, + * including for nested array types *) + let typ' = unrollType vi.vtype in + if debugInit then + ignore (E.log "Collecting the initializer for %s\n" vi.vname); + let (init, typ'') = collectInitializer !topPreInit typ' in + if debugInit then + ignore (E.log "Finished the initializer for %s\n init=%a\n typ=%a\n acc=%a\n" + vi.vname d_init init d_type typ' d_chunk acc); + acc, init, typ'' + + + +(* Consume some initializers. Watch out here. Make sure we use only + * tail-recursion because these things can be big. *) +and doInit + (isconst: bool) + (setone: offset -> exp -> unit) (* Use to announce an intializer *) + (so: subobj) + (acc: chunk) + (initl: (A.initwhat * A.init_expression) list) + + (* Return the resulting chunk along with some unused initializers *) + : chunk * (A.initwhat * A.init_expression) list = + + let whoami () = d_lval () (Var so.host, so.soOff) in + + let initl1 = + match initl with + | (A.NEXT_INIT, + A.SINGLE_INIT (A.CAST ((s, dt), ie))) :: rest -> + let s', dt', ie' = preprocessCast s dt ie in + (A.NEXT_INIT, A.SINGLE_INIT (A.CAST ((s', dt'), ie'))) :: rest + | _ -> initl + in + (* Sometimes we have a cast in front of a compound (in GCC). This + * appears as a single initializer. Ignore the cast *) + let initl2 = + match initl1 with + (what, + A.SINGLE_INIT (A.CAST (_, A.COMPOUND_INIT ci))) :: rest -> + (what, A.COMPOUND_INIT ci) :: rest + | _ -> initl1 + in + let allinitl = initl2 in + + if debugInit then begin + ignore (E.log "doInit for %t %s (current %a). Looking at: " whoami + (if so.eof then "(eof)" else "") + d_lval (Var so.host, so.curOff)); + (match allinitl with + [] -> ignore (E.log "[]") + | (what, ie) :: _ -> + withCprint + Cprint.print_init_expression (A.COMPOUND_INIT [(what, ie)])); + ignore (E.log "\n"); + end; + match unrollType so.soTyp, allinitl with + _, [] -> acc, [] (* No more initializers return *) + + (* No more subobjects *) + | _, (A.NEXT_INIT, _) :: _ when so.eof -> acc, allinitl + + + (* If we are at an array of characters and the initializer is a + * string literal (optionally enclosed in braces) then explode the + * string into characters *) + | TArray(bt, leno, _), + (A.NEXT_INIT, + (A.SINGLE_INIT(A.CONSTANT (A.CONST_STRING s))| + A.COMPOUND_INIT + [(A.NEXT_INIT, + A.SINGLE_INIT(A.CONSTANT + (A.CONST_STRING s)))])) :: restil + when (match unrollType bt with + TInt((IChar|IUChar|ISChar), _) -> true + | TInt _ -> + (*Base type is a scalar other than char. Maybe a wchar_t?*) + E.s (error "Using a string literal to initialize something other than a character array.\n") + | _ -> false (* OK, this is probably an array of strings. Handle *) + ) (* it with the other arrays below.*) + -> + let charinits = + let init c = A.NEXT_INIT, A.SINGLE_INIT(A.CONSTANT (A.CONST_CHAR [c])) + in + let collector = + (* ISO 6.7.8 para 14: final NUL added only if no size specified, or + * if there is room for it; btw, we can't rely on zero-init of + * globals, since this array might be a local variable *) + if ((isNone leno) or ((String.length s) < (integerArrayLength leno))) + then ref [init Int64.zero] + else ref [] + in + for pos = String.length s - 1 downto 0 do + collector := init (Int64.of_int (Char.code (s.[pos]))) :: !collector + done; + !collector + in + (* Create a separate object for the array *) + let so' = makeSubobj so.host so.soTyp so.soOff in + (* Go inside the array *) + let leno = integerArrayLength leno in + so'.stack <- [InArray(so'.curOff, bt, leno, ref 0)]; + normalSubobj so'; + let acc', initl' = doInit isconst setone so' acc charinits in + if initl' <> [] then + ignore (warn "Too many initializers for character array %t" whoami); + (* Advance past the array *) + advanceSubobj so; + (* Continue *) + let res = doInit isconst setone so acc' restil in + res + + (* If we are at an array of WIDE characters and the initializer is a + * WIDE string literal (optionally enclosed in braces) then explore + * the WIDE string into characters *) + (* [weimer] Wed Jan 30 15:38:05 PST 2002 + * Despite what the compiler says, this match case is used and it is + * important. *) + | TArray(bt, leno, _), + (A.NEXT_INIT, + (A.SINGLE_INIT(A.CONSTANT (A.CONST_WSTRING s)) | + A.COMPOUND_INIT + [(A.NEXT_INIT, + A.SINGLE_INIT(A.CONSTANT + (A.CONST_WSTRING s)))])) :: restil + when(let bt' = unrollType bt in + match bt' with + (* compare bt to wchar_t, ignoring signed vs. unsigned *) + TInt _ when (bitsSizeOf bt') = (bitsSizeOf !wcharType) -> true + | TInt _ -> + (*Base type is a scalar other than wchar_t. Maybe a char?*) + E.s (error "Using a wide string literal to initialize something other than a wchar_t array.\n") + | _ -> false (* OK, this is probably an array of strings. Handle *) + ) (* it with the other arrays below.*) + -> + let maxWChar = (* (2**(bitsSizeOf !wcharType)) - 1 *) + Int64.sub (Int64.shift_left Int64.one (bitsSizeOf !wcharType)) + Int64.one in + let charinits = + let init c = + if (compare c maxWChar > 0) then (* if c > maxWChar *) + E.s (error "cab2cil:doInit:character 0x%Lx too big." c); + A.NEXT_INIT, + A.SINGLE_INIT(A.CONSTANT (A.CONST_INT (Int64.to_string c))) + in + (List.map init s) @ + ( + (* ISO 6.7.8 para 14: final NUL added only if no size specified, or + * if there is room for it; btw, we can't rely on zero-init of + * globals, since this array might be a local variable *) + if ((isNone leno) or ((List.length s) < (integerArrayLength leno))) + then [init Int64.zero] + else []) +(* + List.map + (fun c -> + if (compare c maxWChar > 0) then (* if c > maxWChar *) + E.s (error "cab2cil:doInit:character 0x%Lx too big." c) + else + (A.NEXT_INIT, + A.SINGLE_INIT(A.CONSTANT (A.CONST_INT (Int64.to_string c))))) + s +*) + in + (* Create a separate object for the array *) + let so' = makeSubobj so.host so.soTyp so.soOff in + (* Go inside the array *) + let leno = integerArrayLength leno in + so'.stack <- [InArray(so'.curOff, bt, leno, ref 0)]; + normalSubobj so'; + let acc', initl' = doInit isconst setone so' acc charinits in + if initl' <> [] then + (* sm: see above regarding ISO 6.7.8 para 14, which is not implemented + * for wchar_t because, as far as I can tell, we don't even put in + * the automatic NUL (!) *) + ignore (warn "Too many initializers for wchar_t array %t" whoami); + (* Advance past the array *) + advanceSubobj so; + (* Continue *) + doInit isconst setone so acc' restil + + (* If we are at an array and we see a single initializer then it must + * be one for the first element *) + | TArray(bt, leno, al), (A.NEXT_INIT, A.SINGLE_INIT oneinit) :: restil -> + (* Grab the length if there is one *) + let leno = integerArrayLength leno in + so.stack <- InArray(so.soOff, bt, leno, ref 0) :: so.stack; + normalSubobj so; + (* Start over with the fields *) + doInit isconst setone so acc allinitl + + (* If we are at a composite and we see a single initializer of the same + * type as the composite then grab it all. If the type is not the same + * then we must go on and try to initialize the fields *) + | TComp (comp, _), (A.NEXT_INIT, A.SINGLE_INIT oneinit) :: restil -> + let se, oneinit', t' = doExp isconst oneinit (AExp None) in + if (match unrollType t' with + TComp (comp', _) when comp'.ckey = comp.ckey -> true + | _ -> false) + then begin + (* Initialize the whole struct *) + setone so.soOff oneinit'; + (* Advance to the next subobject *) + advanceSubobj so; + doInit isconst setone so (acc @@ se) restil + end else begin (* Try to initialize fields *) + let toinit = fieldsToInit comp None in + so.stack <- InComp(so.soOff, comp, toinit) :: so.stack; + normalSubobj so; + doInit isconst setone so acc allinitl + end + + (* A scalar with a single initializer *) + | _, (A.NEXT_INIT, A.SINGLE_INIT oneinit) :: restil -> + let se, oneinit', t' = doExp isconst oneinit (AExp(Some so.soTyp)) in +(* + ignore (E.log "oneinit'=%a, t'=%a, so.soTyp=%a\n" + d_exp oneinit' d_type t' d_type so.soTyp); +*) + setone so.soOff (mkCastT oneinit' t' so.soTyp); + (* Move on *) + advanceSubobj so; + doInit isconst setone so (acc @@ se) restil + + + (* An array with a compound initializer. The initializer is for the + * array elements *) + | TArray (bt, leno, _), (A.NEXT_INIT, A.COMPOUND_INIT initl) :: restil -> + (* Create a separate object for the array *) + let so' = makeSubobj so.host so.soTyp so.soOff in + (* Go inside the array *) + let leno = integerArrayLength leno in + so'.stack <- [InArray(so'.curOff, bt, leno, ref 0)]; + normalSubobj so'; + let acc', initl' = doInit isconst setone so' acc initl in + if initl' <> [] then + ignore (warn "Too many initializers for array %t" whoami); + (* Advance past the array *) + advanceSubobj so; + (* Continue *) + let res = doInit isconst setone so acc' restil in + res + + (* We have a designator that tells us to select the matching union field. + * This is to support a GCC extension *) + | TComp(ci, _), [(A.NEXT_INIT, + A.COMPOUND_INIT [(A.INFIELD_INIT ("___matching_field", + A.NEXT_INIT), + A.SINGLE_INIT oneinit)])] + when not ci.cstruct -> + (* Do the expression to find its type *) + let _, _, t' = doExp isconst oneinit (AExp None) in + let tsig = typeSigWithAttrs (fun _ -> []) t' in + let rec findField = function + [] -> E.s (error "Cannot find matching union field in cast") + | fi :: rest + when Util.equals (typeSigWithAttrs (fun _ -> []) fi.ftype) tsig + -> fi + | _ :: rest -> findField rest + in + let fi = findField ci.cfields in + (* Change the designator and redo *) + doInit isconst setone so acc [(A.INFIELD_INIT (fi.fname, A.NEXT_INIT), + A.SINGLE_INIT oneinit)] + + + (* A structure with a composite initializer. We initialize the fields*) + | TComp (comp, _), (A.NEXT_INIT, A.COMPOUND_INIT initl) :: restil -> + (* Create a separate subobject iterator *) + let so' = makeSubobj so.host so.soTyp so.soOff in + (* Go inside the comp *) + so'.stack <- [InComp(so'.curOff, comp, fieldsToInit comp None)]; + normalSubobj so'; + let acc', initl' = doInit isconst setone so' acc initl in + if initl' <> [] then + ignore (warn "Too many initializers for structure"); + (* Advance past the structure *) + advanceSubobj so; + (* Continue *) + doInit isconst setone so acc' restil + + (* A scalar with a initializer surrounded by braces *) + | _, (A.NEXT_INIT, A.COMPOUND_INIT [(A.NEXT_INIT, + A.SINGLE_INIT oneinit)]) :: restil -> + let se, oneinit', t' = doExp isconst oneinit (AExp(Some so.soTyp)) in + setone so.soOff (mkCastT oneinit' t' so.soTyp); + (* Move on *) + advanceSubobj so; + doInit isconst setone so (acc @@ se) restil + + | t, (A.NEXT_INIT, _) :: _ -> + E.s (unimp "doInit: unexpected NEXT_INIT for %a\n" d_type t); + + (* We have a designator *) + | _, (what, ie) :: restil when what != A.NEXT_INIT -> + (* Process a designator and position to the designated subobject *) + let rec addressSubobj + (so: subobj) + (what: A.initwhat) + (acc: chunk) : chunk = + (* Always start from the current element *) + so.stack <- []; so.eof <- false; + normalSubobj so; + let rec address (what: A.initwhat) (acc: chunk) : chunk = + match what with + A.NEXT_INIT -> acc + | A.INFIELD_INIT (fn, whatnext) -> begin + match unrollType so.soTyp with + TComp (comp, _) -> + let toinit = fieldsToInit comp (Some fn) in + so.stack <- InComp(so.soOff, comp, toinit) :: so.stack; + normalSubobj so; + address whatnext acc + + | _ -> E.s (error "Field designator %s not in a struct " fn) + end + + | A.ATINDEX_INIT(idx, whatnext) -> begin + match unrollType so.soTyp with + TArray (bt, leno, _) -> + let ilen = integerArrayLength leno in + let nextidx', doidx = + let (doidx, idxe', _) = + doExp true idx (AExp(Some intType)) in + match constFold true idxe', isNotEmpty doidx with + Const(CInt64(x, _, _)), false -> Int64.to_int x, doidx + | _ -> E.s (error + "INDEX initialization designator is not a constant") + in + if nextidx' < 0 || nextidx' >= ilen then + E.s (error "INDEX designator is outside bounds"); + so.stack <- + InArray(so.soOff, bt, ilen, ref nextidx') :: so.stack; + normalSubobj so; + address whatnext (acc @@ doidx) + + | _ -> E.s (error "INDEX designator for a non-array") + end + + | A.ATINDEXRANGE_INIT _ -> + E.s (bug "addressSubobj: INDEXRANGE") + in + address what acc + in + (* First expand the INDEXRANGE by making copies *) + let rec expandRange (top: A.initwhat -> A.initwhat) = function + | A.INFIELD_INIT (fn, whatnext) -> + expandRange (fun what -> top (A.INFIELD_INIT(fn, what))) whatnext + | A.ATINDEX_INIT (idx, whatnext) -> + expandRange (fun what -> top (A.ATINDEX_INIT(idx, what))) whatnext + + | A.ATINDEXRANGE_INIT (idxs, idxe) -> + let (doidxs, idxs', _) = + doExp true idxs (AExp(Some intType)) in + let (doidxe, idxe', _) = + doExp true idxe (AExp(Some intType)) in + if isNotEmpty doidxs || isNotEmpty doidxe then + E.s (error "Range designators are not constants\n"); + let first, last = + match constFold true idxs', constFold true idxe' with + Const(CInt64(s, _, _)), + Const(CInt64(e, _, _)) -> + Int64.to_int s, Int64.to_int e + | _ -> E.s (error + "INDEX_RANGE initialization designator is not a constant") + in + if first < 0 || first > last then + E.s (error + "start index larger than end index in range initializer"); + let rec loop (i: int) = + if i > last then restil + else + (top (A.ATINDEX_INIT(A.CONSTANT(A.CONST_INT(string_of_int i)), + A.NEXT_INIT)), ie) + :: loop (i + 1) + in + doInit isconst setone so acc (loop first) + + | A.NEXT_INIT -> (* We have not found any RANGE *) + let acc' = addressSubobj so what acc in + doInit isconst setone so (acc @@ acc') + ((A.NEXT_INIT, ie) :: restil) + in + expandRange (fun x -> x) what + + | t, (what, ie) :: _ -> + E.s (bug "doInit: cases for t=%a" d_type t) + + +(* Create and add to the file (if not already added) a global. Return the + * varinfo *) +and createGlobal (specs : (typ * storage * bool * A.attribute list)) + (((n,ndt,a,cloc), inite) : A.init_name) : varinfo = + try + if debugGlobal then + ignore (E.log "createGlobal: %s\n" n); + (* Make a first version of the varinfo *) + let vi = makeVarInfoCabs ~isformal:false + ~isglobal:true (convLoc cloc) specs (n,ndt,a) in + (* Add the variable to the environment before doing the initializer + * because it might refer to the variable itself *) + if isFunctionType vi.vtype then begin + if inite != A.NO_INIT then + E.s (error "Function declaration with initializer (%s)\n" + vi.vname); + (* sm: if it's a function prototype, and the storage class *) + (* isn't specified, make it 'extern'; this fixes a problem *) + (* with no-storage prototype and static definition *) + if vi.vstorage = NoStorage then + (*(trace "sm" (dprintf "adding extern to prototype of %s\n" n));*) + vi.vstorage <- Extern; + end; + let vi, alreadyInEnv = makeGlobalVarinfo (inite != A.NO_INIT) vi in +(* + ignore (E.log "createGlobal %a: %s type=%a\n" + d_loc (convLoc cloc) vi.vname d_plaintype vi.vtype); +*) + (* Do the initializer and complete the array type if necessary *) + let init : init option = + if inite = A.NO_INIT then + None + else + let se, ie', et = doInitializer vi inite in + (* Maybe we now have a better type *) + vi.vtype <- et; + if isNotEmpty se then + E.s (error "global initializer"); + Some ie' + in + + try + let oldloc = H.find alreadyDefined vi.vname in + if init != None then begin + E.s (error "Global %s was already defined at %a\n" + vi.vname d_loc oldloc); + end; + if debugGlobal then + ignore (E.log " global %s was already defined\n" vi.vname); + (* Do not declare it again *) + vi + with Not_found -> begin + (* Not already defined *) + if debugGlobal then + ignore (E.log " first definition for %s\n" vi.vname); + if init != None then begin + (* weimer: Sat Dec 8 17:43:34 2001 + * MSVC NT Kernel headers include this lovely line: + * extern const GUID __declspec(selectany) \ + * MOUNTDEV_MOUNTED_DEVICE_GUID = { 0x53f5630d, 0xb6bf, 0x11d0, { \ + * 0x94, 0xf2, 0x00, 0xa0, 0xc9, 0x1e, 0xfb, 0x8b } }; + * So we allow "extern" + "initializer" if "const" is + * around. *) + (* sm: As I read the ISO spec, in particular 6.9.2 and 6.7.8, + * "extern int foo = 3" is exactly equivalent to "int foo = 3"; + * that is, if you put an initializer, then it is a definition, + * and "extern" is redundantly giving the name external linkage. + * gcc emits a warning, I guess because it is contrary to + * usual practice, but I think CIL warnings should be about + * semantic rather than stylistic issues, so I see no reason to + * even emit a warning. *) + if vi.vstorage = Extern then + vi.vstorage <- NoStorage; (* equivalent and canonical *) + + H.add alreadyDefined vi.vname !currentLoc; + IH.remove mustTurnIntoDef vi.vid; + cabsPushGlobal (GVar(vi, {init = init}, !currentLoc)); + vi + end else begin + if not (isFunctionType vi.vtype) + && not (IH.mem mustTurnIntoDef vi.vid) then + begin + IH.add mustTurnIntoDef vi.vid true + end; + if not alreadyInEnv then begin (* Only one declaration *) + (* If it has function type it is a prototype *) + cabsPushGlobal (GVarDecl (vi, !currentLoc)); + vi + end else begin + if debugGlobal then + ignore (E.log " already in env %s\n" vi.vname); + vi + end + end + end + with e -> begin + ignore (E.log "error in createGlobal(%s: %a): %s\n" n + d_loc !currentLoc + (Printexc.to_string e)); + cabsPushGlobal (dGlobal (dprintf "booo - error in global %s (%t)" + n d_thisloc) !currentLoc); + dummyFunDec.svar + end +(* + ignore (E.log "Env after processing global %s is:@!%t@!" + n docEnv); + ignore (E.log "Alpha after processing global %s is:@!%t@!" + n docAlphaTable) +*) + +(* Must catch the Static local variables. Make them global *) +and createLocal ((_, sto, _, _) as specs) + ((((n, ndt, a, cloc) : A.name), + (inite: A.init_expression)) as init_name) + : chunk = + let loc = convLoc cloc in + (* Check if we are declaring a function *) + let rec isProto (dt: decl_type) : bool = + match dt with + | PROTO (JUSTBASE, _, _) -> true + | PROTO (x, _, _) -> isProto x + | PARENTYPE (_, x, _) -> isProto x + | ARRAY (x, _, _) -> isProto x + | PTR (_, x) -> isProto x + | _ -> false + in + match ndt with + (* Maybe we have a function prototype in local scope. Make it global. We + * do this even if the storage is Static *) + | _ when isProto ndt -> + let vi = createGlobal specs init_name in + (* Add it to the environment to shadow previous decls *) + addLocalToEnv n (EnvVar vi); + empty + + | _ when sto = Static -> + if debugGlobal then + ignore (E.log "createGlobal (local static): %s\n" n); + + + (* Now alpha convert it to make sure that it does not conflict with + * existing globals or locals from this function. *) + let newname, _ = newAlphaName true "" n in + (* Make it global *) + let vi = makeVarInfoCabs ~isformal:false + ~isglobal:true + loc specs (newname, ndt, a) in + (* However, we have a problem if a real global appears later with the + * name that we have happened to choose for this one. Remember these names + * for later. *) + H.add staticLocals vi.vname vi; + (* Add it to the environment as a local so that the name goes out of + * scope properly *) + addLocalToEnv n (EnvVar vi); + + (* Maybe this is an array whose length depends on something with local + scope, e.g. "static char device[ sizeof(local) ]". + Const-fold the type to fix this. *) + vi.vtype <- constFoldType vi.vtype; + + let init : init option = + if inite = A.NO_INIT then + None + else begin + let se, ie', et = doInitializer vi inite in + (* Maybe we now have a better type *) + vi.vtype <- et; + if isNotEmpty se then + E.s (error "global static initializer"); + (* Maybe the initializer refers to the function itself. + Push a prototype for the function, just in case. Hopefully, + if does not refer to the locals *) + cabsPushGlobal (GVarDecl (!currentFunctionFDEC.svar, !currentLoc)); + Some ie' + end + in + cabsPushGlobal (GVar(vi, {init = init}, !currentLoc)); + empty + + (* Maybe we have an extern declaration. Make it a global *) + | _ when sto = Extern -> + let vi = createGlobal specs init_name in + (* Add it to the local environment to ensure that it shadows previous + * local variables *) + addLocalToEnv n (EnvVar vi); + empty + + | _ -> + (* Make a variable of potentially variable size. If se0 <> empty then + * it is a variable size variable *) + let vi,se0,len,isvarsize = + makeVarSizeVarInfo loc specs (n, ndt, a) in + + let vi = alphaConvertVarAndAddToEnv true vi in (* Replace vi *) + let se1 = + if isvarsize then begin (* Variable-sized array *) + ignore (warn "Variable-sized local variable %s" vi.vname); + (* Make a local variable to keep the length *) + let savelen = + makeVarInfoCabs + ~isformal:false + ~isglobal:false + loc + (TInt(IUInt, []), NoStorage, false, []) + ("__lengthof" ^ vi.vname,JUSTBASE, []) + in + (* Register it *) + let savelen = alphaConvertVarAndAddToEnv true savelen in + (* Compute the sizeof *) + let sizeof = + BinOp(Mult, + SizeOfE (Lval(Mem(Lval(var vi)), NoOffset)), + Lval (var savelen), !typeOfSizeOf) in + (* Register the length *) + IH.add varSizeArrays vi.vid sizeof; + (* There can be no initializer for this *) + if inite != A.NO_INIT then + E.s (error "Variable-sized array cannot have initializer"); + se0 +++ (Set(var savelen, len, !currentLoc)) + (* Initialize the variable *) + +++ (Call(Some(var vi), Lval(var (allocaFun ())), + [ sizeof ], !currentLoc)) + end else empty + in + if inite = A.NO_INIT then + se1 (* skipChunk *) + else begin + let se4, ie', et = doInitializer vi inite in + (* Fix the length *) + (match vi.vtype, ie', et with + (* We have a length now *) + TArray(_,None, _), _, TArray(_, Some _, _) -> vi.vtype <- et + (* Initializing a local array *) + | TArray(TInt((IChar|IUChar|ISChar), _) as bt, None, a), + SingleInit(Const(CStr s)), _ -> + vi.vtype <- TArray(bt, + Some (integer (String.length s + 1)), + a) + | _, _, _ -> ()); + + (* Now create assignments instead of the initialization *) + se1 @@ se4 @@ (assignInit (Var vi, NoOffset) ie' et empty) + end + +and doAliasFun vtype (thisname:string) (othername:string) + (sname:single_name) (loc: cabsloc) : unit = + (* This prototype declares that name is an alias for + othername, which must be defined in this file *) +(* E.log "%s is alias for %s at %a\n" thisname othername *) +(* d_loc !currentLoc; *) + let rt, formals, isva, _ = splitFunctionType vtype in + if isva then E.s (error "%a: alias unsupported with varargs." + d_loc !currentLoc); + let args = List.map + (fun (n,_,_) -> A.VARIABLE n) + (argsToList formals) in + let call = A.CALL (A.VARIABLE othername, args) in + let stmt = if isVoidType rt then A.COMPUTATION(call, loc) + else A.RETURN(call, loc) + in + let body = { A.blabels = []; A.battrs = []; A.bstmts = [stmt] } in + let fdef = A.FUNDEF (sname, body, loc, loc) in + ignore (doDecl true fdef); + (* get the new function *) + let v,_ = try lookupGlobalVar thisname + with Not_found -> E.s (bug "error in doDecl") in + v.vattr <- dropAttribute "alias" v.vattr + + +(* Do one declaration *) +and doDecl (isglobal: bool) : A.definition -> chunk = function + | A.DECDEF ((s, nl), loc) -> + currentLoc := convLoc(loc); + (* Do the specifiers exactly once *) + let sugg = + match nl with + [] -> "" + | ((n, _, _, _), _) :: _ -> n + in + let spec_res = doSpecList sugg s in + (* Do all the variables and concatenate the resulting statements *) + let doOneDeclarator (acc: chunk) (name: init_name) = + let (n,ndt,a,l),_ = name in + if isglobal then begin + let bt,_,_,attrs = spec_res in + let vtype, nattr = + doType (AttrName false) bt (A.PARENTYPE(attrs, ndt, a)) in + (match filterAttributes "alias" nattr with + [] -> (* ordinary prototype. *) + ignore (createGlobal spec_res name) + (* E.log "%s is not aliased\n" name *) + | [Attr("alias", [AStr othername])] -> + if not (isFunctionType vtype) then begin + ignore (warn + "%a: CIL only supports attribute((alias)) for functions.\n" + d_loc !currentLoc); + ignore (createGlobal spec_res name) + end else + doAliasFun vtype n othername (s, (n,ndt,a,l)) loc + | _ -> E.s (error "Bad alias attribute at %a" d_loc !currentLoc)); + acc + end else + acc @@ createLocal spec_res name + in + let res = List.fold_left doOneDeclarator empty nl in +(* + ignore (E.log "after doDecl %a: res=%a\n" + d_loc !currentLoc d_chunk res); +*) + res + + + + | A.TYPEDEF (ng, loc) -> + currentLoc := convLoc(loc); + doTypedef ng; empty + + | A.ONLYTYPEDEF (s, loc) -> + currentLoc := convLoc(loc); + doOnlyTypedef s; empty + + | A.GLOBASM (s,loc) when isglobal -> + currentLoc := convLoc(loc); + cabsPushGlobal (GAsm (s, !currentLoc)); + empty + + | A.PRAGMA (a, loc) when isglobal -> begin + currentLoc := convLoc(loc); + match doAttr ("dummy", [a]) with + [Attr("dummy", [a'])] -> + let a'' = + match a' with + | ACons (s, args) -> Attr (s, args) + | _ -> E.s (error "Unexpected attribute in #pragma") + in + cabsPushGlobal (GPragma (a'', !currentLoc)); + empty + + | _ -> E.s (error "Too many attributes in pragma") + end + | A.TRANSFORMER (_, _, _) -> E.s (E.bug "TRANSFORMER in cabs2cil input") + | A.EXPRTRANSFORMER (_, _, _) -> + E.s (E.bug "EXPRTRANSFORMER in cabs2cil input") + + (* If there are multiple definitions of extern inline, turn all but the + * first into a prototype *) + | A.FUNDEF (((specs,(n,dt,a,loc')) : A.single_name), + (body : A.block), loc, _) + when isglobal && isExtern specs && isInline specs + && (H.mem genv (n ^ "__extinline")) -> + currentLoc := convLoc(loc); + let othervi, _ = lookupVar (n ^ "__extinline") in + if othervi.vname = n then + (* The previous entry in the env is also an extern inline version + of n. *) + ignore (warn "Duplicate extern inline definition for %s ignored" n) + else begin + (* Otherwise, the previous entry is an ordinary function that + happens to be named __extinline. Renaming n to n__extinline + would confict with other, so report an error. *) + E.s (unimp("Trying to rename %s to\n %s__extinline, but %s__extinline" + ^^ " already exists in the env.\n \"__extinline\" is" + ^^ " reserved for CIL.\n") n n n) + end; + (* Treat it as a prototype *) + doDecl isglobal (A.DECDEF ((specs, [((n,dt,a,loc'), A.NO_INIT)]), loc)) + + | A.FUNDEF (((specs,(n,dt,a, _)) : A.single_name), + (body : A.block), loc1, loc2) when isglobal -> + begin + let funloc = convLoc loc1 in + let endloc = convLoc loc2 in +(* ignore (E.log "Definition of %s at %a\n" n d_loc funloc); *) + currentLoc := funloc; + E.withContext + (fun _ -> dprintf "2cil: %s" n) + (fun _ -> + try + IH.clear callTempVars; + + (* Make the fundec right away, and we'll populate it later. We + * need this throughout the code to create temporaries. *) + currentFunctionFDEC := + { svar = makeGlobalVar "@tempname@" voidType; + slocals = []; (* For now we'll put here both the locals and + * the formals. Then "endFunction" will + * separate them *) + sformals = []; (* Not final yet *) + smaxid = 0; + sbody = dummyFunDec.sbody; (* Not final yet *) + smaxstmtid = None; + sallstmts = []; + }; + !currentFunctionFDEC.svar.vdecl <- funloc; + + constrExprId := 0; + (* Setup the environment. Add the formals to the locals. Maybe + * they need alpha-conv *) + enterScope (); (* Start the scope *) + + IH.clear varSizeArrays; + + (* Do not process transparent unions in function definitions. + * We'll do it later *) + transparentUnionArgs := []; + + (* Fix the NAME and the STORAGE *) + let _ = + let bt,sto,inl,attrs = doSpecList n specs in + !currentFunctionFDEC.svar.vinline <- inl; + + let ftyp, funattr = + doType (AttrName false) bt (A.PARENTYPE(attrs, dt, a)) in + !currentFunctionFDEC.svar.vtype <- ftyp; + !currentFunctionFDEC.svar.vattr <- funattr; + + (* If this is the definition of an extern inline then we change + * its name, by adding the suffix __extinline. We also make it + * static *) + let n', sto' = + let n' = n ^ "__extinline" in + if inl && sto = Extern then + n', Static + else begin + (* Maybe this is the body of a previous extern inline. Then + * we must take that one out of the environment because it + * is not used from here on. This will also ensure that + * then we make this functions' varinfo we will not think + * it is a duplicate definition *) + (try + ignore (lookupVar n'); (* if this succeeds, n' is defined*) + let oldvi, _ = lookupVar n in + if oldvi.vname = n' then begin + (* oldvi is an extern inline function that has been + renamed to n ^ "__extinline". Remove it from the + environment. *) + H.remove env n; H.remove genv n; + H.remove env n'; H.remove genv n' + end + else + (* oldvi is not a renamed extern inline function, and + we should do nothing. The reason the lookup + of n' succeeded is probably because there's + an ordinary function that happens to be named, + n ^ "__extinline", probably as a result of a previous + pass through CIL. See small2/extinline.c*) + () + with Not_found -> ()); + n, sto + end + in + (* Now we have the name and the storage *) + !currentFunctionFDEC.svar.vname <- n'; + !currentFunctionFDEC.svar.vstorage <- sto' + in + + (* Add the function itself to the environment. Add it before + * you do the body because the function might be recursive. Add + * it also before you add the formals to the environment + * because there might be a formal with the same name as the + * function and we want it to take precedence. *) + (* Make a variable out of it and put it in the environment *) + !currentFunctionFDEC.svar <- + fst (makeGlobalVarinfo true !currentFunctionFDEC.svar); + + (* If it is extern inline then we add it to the global + * environment for the original name as well. This will ensure + * that all uses of this function will refer to the renamed + * function *) + addGlobalToEnv n (EnvVar !currentFunctionFDEC.svar); + + if H.mem alreadyDefined !currentFunctionFDEC.svar.vname then + E.s (error "There is a definition already for %s" n); + +(* + ignore (E.log "makefunvar:%s@! type=%a@! vattr=%a@!" + n d_type thisFunctionVI.vtype + d_attrlist thisFunctionVI.vattr); +*) + + (* makeGlobalVarinfo might have changed the type of the function + * (when combining it with the type of the prototype). So get the + * type only now. *) + + (**** Process the TYPE and the FORMALS ***) + let _ = + let (returnType, formals_t, isvararg, funta) = + splitFunctionTypeVI !currentFunctionFDEC.svar + in + (* Record the returnType for doStatement *) + currentReturnType := returnType; + + + (* Create the formals and add them to the environment. *) + (* sfg: extract locations for the formals from dt *) + let doFormal (loc : location) (fn, ft, fa) = + let f = makeVarinfo false fn ft in + (f.vdecl <- loc; + f.vattr <- fa; + alphaConvertVarAndAddToEnv true f) + in + let rec doFormals fl' ll' = + begin + match (fl', ll') with + | [], _ -> [] + + | fl, [] -> (* no more locs available *) + List.map (doFormal !currentLoc) fl + + | f::fl, (_,(_,_,_,l))::ll -> + (* sfg: these lets seem to be necessary to + * force the right order of evaluation *) + let f' = doFormal (convLoc l) f in + let fl' = doFormals fl ll in + f' :: fl' + end + in + let fmlocs = (match dt with PROTO(_, fml, _) -> fml | _ -> []) in + let formals = doFormals (argsToList formals_t) fmlocs in + + (* Recreate the type based on the formals. *) + let ftype = TFun(returnType, + Some (List.map (fun f -> (f.vname, + f.vtype, + f.vattr)) formals), + isvararg, funta) in + (* + ignore (E.log "Funtype of %s: %a\n" n' d_type ftype); + *) + (* Now fix the names of the formals in the type of the function + * as well *) + !currentFunctionFDEC.svar.vtype <- ftype; + !currentFunctionFDEC.sformals <- formals; + in + (* Now change the type of transparent union args back to what it + * was so that the body type checks. We must do it this late + * because makeGlobalVarinfo from above might choke if we give + * the function a type containing transparent unions *) + let _ = + let rec fixbackFormals (idx: int) (args: varinfo list) : unit= + match args with + [] -> () + | a :: args' -> + (* Fix the type back to a transparent union type *) + (try + let origtype = List.assq idx !transparentUnionArgs in + a.vtype <- origtype; + with Not_found -> ()); + fixbackFormals (idx + 1) args' + in + fixbackFormals 0 !currentFunctionFDEC.sformals; + transparentUnionArgs := []; + in + + (********** Now do the BODY *************) + let _ = + let stmts = doBody body in + (* Finish everything *) + exitScope (); + + (* Now fill in the computed goto statement with cases. Do this + * before mkFunctionbody which resolves the gotos *) + (match !gotoTargetData with + Some (switchv, switch) -> + let switche, l = + match switch.skind with + Switch (switche, _, _, l) -> switche, l + | _ -> E.s(bug "the computed goto statement not a switch") + in + (* Build a default chunk that segfaults *) + let default = + defaultChunk + l + (i2c (Set ((Mem (mkCast (integer 0) intPtrType), + NoOffset), + integer 0, l))) + in + let bodychunk = ref default in + H.iter (fun lname laddr -> + bodychunk := + caseRangeChunk + [integer laddr] l + (gotoChunk lname l @@ !bodychunk)) + gotoTargetHash; + (* Now recreate the switch *) + let newswitch = switchChunk switche !bodychunk l in + (* We must still share the old switch statement since we + * have already inserted the goto's *) + let newswitchkind = + match newswitch.stmts with + [ s] + when newswitch.postins == [] && newswitch.cases == []-> + s.skind + | _ -> E.s (bug "Unexpected result from switchChunk") + in + switch.skind <- newswitchkind + + | None -> ()); + (* Now finish the body and store it *) + !currentFunctionFDEC.sbody <- mkFunctionBody stmts; + (* Reset the global parameters *) + gotoTargetData := None; + H.clear gotoTargetHash; + gotoTargetNextAddr := 0; + in + + + +(* + ignore (E.log "endFunction %s at %t:@! sformals=%a@! slocals=%a@!" + !currentFunctionFDEC.svar.vname d_thisloc + (docList ~sep:(chr ',') (fun v -> text v.vname)) + !currentFunctionFDEC.sformals + (docList ~sep:(chr ',') (fun v -> text v.vname)) + !currentFunctionFDEC.slocals); +*) + + let rec dropFormals formals locals = + match formals, locals with + [], l -> l + | f :: formals, l :: locals -> + if f != l then + E.s (bug "formal %s is not in locals (found instead %s)" + f.vname l.vname); + dropFormals formals locals + | _ -> E.s (bug "Too few locals") + in + !currentFunctionFDEC.slocals + <- dropFormals !currentFunctionFDEC.sformals + (List.rev !currentFunctionFDEC.slocals); + setMaxId !currentFunctionFDEC; + + (* Now go over the types of the formals and pull out the formals + * with transparent union type. Replace them with some shadow + * parameters and then add assignments *) + let _ = + let newformals, newbody = + List.fold_right (* So that the formals come out in order *) + (fun f (accform, accbody) -> + match isTransparentUnion f.vtype with + None -> (f :: accform, accbody) + | Some fstfield -> + (* A new shadow to be placed in the formals. Use + * makeTempVar to update smaxid and all others. *) + let shadow = + makeTempVar !currentFunctionFDEC fstfield.ftype in + (* Now take it out of the locals and replace it with + * the current formal. It is not worth optimizing this + * one. *) + !currentFunctionFDEC.slocals <- + f :: + (List.filter (fun x -> x.vid <> shadow.vid) + !currentFunctionFDEC.slocals); + (shadow :: accform, + mkStmt (Instr [Set ((Var f, Field(fstfield, + NoOffset)), + Lval (var shadow), + !currentLoc)]) :: accbody)) + !currentFunctionFDEC.sformals + ([], !currentFunctionFDEC.sbody.bstmts) + in + !currentFunctionFDEC.sbody.bstmts <- newbody; + (* To make sure sharing with the type is proper *) + setFormals !currentFunctionFDEC newformals; + in + + (* Now see whether we can fall through to the end of the function + * *) + (* weimer: Sat Dec 8 17:30:47 2001 MSVC NT kernel headers include + * functions like long convert(x) { __asm { mov eax, x \n cdq } } + * That set a return value via an ASM statement. As a result, I + * am changing this so a final ASM statement does not count as + * "fall through" for the purposes of this warning. *) + (* matth: But it's better to assume assembly will fall through, + * since most such blocks do. It's probably better to print an + * unnecessary warning than to break CIL's invariant that + * return statements are inserted properly. *) + let instrFallsThrough (i : instr) = match i with + Set _ -> true + | Call (None, Lval (Var e, NoOffset), _, _) -> + (* See if this is exit, or if it has the noreturn attribute *) + if e.vname = "exit" then false + else if hasAttribute "noreturn" e.vattr then false + else true + | Call _ -> true + | Asm _ -> true + in + let rec stmtFallsThrough (s: stmt) : bool = + match s.skind with + Instr(il) -> + List.fold_left (fun acc elt -> + acc && instrFallsThrough elt) true il + | Return _ | Break _ | Continue _ -> false + | Goto _ -> false + | If (_, b1, b2, _) -> + blockFallsThrough b1 || blockFallsThrough b2 + | Switch (e, b, targets, _) -> + (* See if there is a "default" case *) + if not + (List.exists (fun s -> + List.exists (function Default _ -> true | _ -> false) + s.labels) + targets) then begin +(* + ignore (E.log "Switch falls through because no default"); + +*) true (* We fall through because there is no default *) + end else begin + (* We must examine all cases. If any falls through, + * then the switch falls through. *) + blockFallsThrough b || blockCanBreak b + end +(* + | Loop (b, _, _, _) -> + (* A loop falls through if it can break. *) + blockCanBreak b +*) + | While (_, b, _) -> blockCanBreak b + | DoWhile (_, b, _) -> blockCanBreak b + | For (_, _, _, b, _) -> blockCanBreak b + | Block b -> blockFallsThrough b + | TryFinally (b, h, _) -> blockFallsThrough h + | TryExcept (b, _, h, _) -> true (* Conservative *) + and blockFallsThrough b = + let rec fall = function + [] -> true + | s :: rest -> + if stmtFallsThrough s then begin +(* + ignore (E.log "Stmt %a falls through\n" d_stmt s); +*) + fall rest + end else begin +(* + ignore (E.log "Stmt %a DOES NOT fall through\n" + d_stmt s); +*) + (* If we are not falling thorough then maybe there + * are labels who are *) + labels rest + end + and labels = function + [] -> false + (* We have a label, perhaps we can jump here *) + | s :: rest when s.labels <> [] -> +(* + ignore (E.log "invoking fall %a: %a\n" + d_loc !currentLoc d_stmt s); +*) + fall (s :: rest) + | _ :: rest -> labels rest + in + let res = fall b.bstmts in +(* + ignore (E.log "blockFallsThrough=%b %a\n" res d_block b); +*) + res + (* will we leave this statement or block with a break command? *) + and stmtCanBreak (s: stmt) : bool = + match s.skind with + Instr _ | Return _ | Continue _ | Goto _ -> false + | Break _ -> true + | If (_, b1, b2, _) -> + blockCanBreak b1 || blockCanBreak b2 + | Switch _ | (*Loop _*) While _ | DoWhile _ | For _ -> + (* switches and loops catch any breaks in their bodies *) + false + | Block b -> blockCanBreak b + | TryFinally (b, h, _) -> blockCanBreak b || blockCanBreak h + | TryExcept (b, _, h, _) -> blockCanBreak b || blockCanBreak h + and blockCanBreak b = + List.exists stmtCanBreak b.bstmts + in + if blockFallsThrough !currentFunctionFDEC.sbody then begin +(* + let retval = + match unrollType !currentReturnType with + TVoid _ -> None + | (TInt _ | TEnum _ | TFloat _ | TPtr _) as rt -> + ignore (warn "Body of function %s falls-through. Adding a return statement\n" !currentFunctionFDEC.svar.vname); + Some (mkCastT zero intType rt) + | _ -> + ignore (warn "Body of function %s falls-through and cannot find an appropriate return value\n" !currentFunctionFDEC.svar.vname); + None + in + if not (hasAttribute "noreturn" + !currentFunctionFDEC.svar.vattr) then + !currentFunctionFDEC.sbody.bstmts <- + !currentFunctionFDEC.sbody.bstmts + @ [mkStmt (Return(retval, endloc))] +*) + end; + + (* ignore (E.log "The env after finishing the body of %s:\n%t\n" + n docEnv); *) + cabsPushGlobal (GFun (!currentFunctionFDEC, funloc)); + empty + with E.Error as e -> raise e + | e -> begin + ignore (E.log "error in collectFunction %s: %s\n" + n (Printexc.to_string e)); + cabsPushGlobal (GAsm("error in function " ^ n, !currentLoc)); + empty + end) + () (* argument of E.withContext *) + end (* FUNDEF *) + + | LINKAGE (n, loc, dl) -> + currentLoc := convLoc loc; + if n <> "C" then + ignore (warn "Encountered linkage specification \"%s\"" n); + if not isglobal then + E.s (error "Encountered linkage specification in local scope"); + (* For now drop the linkage on the floor !!! *) + List.iter + (fun d -> + let s = doDecl isglobal d in + if isNotEmpty s then + E.s (bug "doDecl returns non-empty statement for global")) + dl; + empty + + | _ -> E.s (error "unexpected form of declaration") + +and doTypedef ((specs, nl): A.name_group) = + try + (* Do the specifiers exactly once *) + let bt, sto, inl, attrs = doSpecList (suggestAnonName nl) specs in + if sto <> NoStorage || inl then + E.s (error "Storage or inline specifier not allowed in typedef"); + let createTypedef ((n,ndt,a,loc) : A.name) = + (* E.s (error "doTypeDef") *) + try + let newTyp, tattr = + doType AttrType bt (A.PARENTYPE(attrs, ndt, a)) in + let newTyp' = cabsTypeAddAttributes tattr newTyp in + (* Create a new name for the type. Use the same name space as that of + * variables to avoid confusion between variable names and types. This + * is actually necessary in some cases. *) + let n', _ = newAlphaName true "" n in + let ti = { tname = n'; ttype = newTyp'; treferenced = false } in + (* Since we use the same name space, we might later hit a global with + * the same name and we would want to change the name of the global. + * It is better to change the name of the type instead. So, remember + * all types whose names have changed *) + H.add typedefs n' ti; + let namedTyp = TNamed(ti, []) in + (* Register the type. register it as local because we might be in a + * local context *) + addLocalToEnv (kindPlusName "type" n) (EnvTyp namedTyp); + cabsPushGlobal (GType (ti, !currentLoc)) + with E.Error as e -> raise e + | e -> begin + ignore (E.log "Error on A.TYPEDEF (%s)\n" + (Printexc.to_string e)); + cabsPushGlobal (GAsm ("booo_typedef:" ^ n, !currentLoc)) + end + in + List.iter createTypedef nl + with E.Error as e -> raise e + | e -> begin + ignore (E.log "Error on A.TYPEDEF (%s)\n" + (Printexc.to_string e)); + let fstname = + match nl with + [] -> "" + | (n, _, _, _) :: _ -> n + in + cabsPushGlobal (GAsm ("booo_typedef: " ^ fstname, !currentLoc)) + end + +and doOnlyTypedef (specs: A.spec_elem list) : unit = + try + let bt, sto, inl, attrs = doSpecList "" specs in + if sto <> NoStorage || inl then + E.s (error "Storage or inline specifier not allowed in typedef"); + let restyp, nattr = doType AttrType bt (A.PARENTYPE(attrs, + A.JUSTBASE, [])) in + if nattr <> [] then + ignore (warn "Ignoring identifier attribute"); + (* doSpec will register the type. *) + (* See if we are defining a composite or enumeration type, and in that + * case move the attributes from the defined type into the composite type + * *) + let isadef = + List.exists + (function + A.SpecType(A.Tstruct(_, Some _, _)) -> true + | A.SpecType(A.Tunion(_, Some _, _)) -> true + | A.SpecType(A.Tenum(_, Some _, _)) -> true + | _ -> false) specs + in + match restyp with + TComp(ci, al) -> + if isadef then begin + ci.cattr <- cabsAddAttributes ci.cattr al; + (* The GCompTag was already added *) + end else (* Add a GCompTagDecl *) + cabsPushGlobal (GCompTagDecl(ci, !currentLoc)) + | TEnum(ei, al) -> + if isadef then begin + ei.eattr <- cabsAddAttributes ei.eattr al; + end else + cabsPushGlobal (GEnumTagDecl(ei, !currentLoc)) + | _ -> + ignore (warn "Ignoring un-named typedef that does not introduce a struct or enumeration type\n") + + with E.Error as e -> raise e + | e -> begin + ignore (E.log "Error on A.ONLYTYPEDEF (%s)\n" + (Printexc.to_string e)); + cabsPushGlobal (GAsm ("booo_typedef", !currentLoc)) + end + +and assignInit (lv: lval) + (ie: init) + (iet: typ) + (acc: chunk) : chunk = + match ie with + SingleInit e -> + let (_, e'') = castTo iet (typeOfLval lv) e in + acc +++ (Set(lv, e'', !currentLoc)) + | CompoundInit (t, initl) -> + foldLeftCompound + ~doinit:(fun off i it acc -> + assignInit (addOffsetLval off lv) i it acc) + ~ct:t + ~initl:initl + ~acc:acc +(* + | ArrayInit (bt, len, initl) -> + let idx = ref ( -1 ) in + List.fold_left + (fun acc i -> + assignInit (addOffsetLval (Index(integer !idx, NoOffset)) lv) i bt acc) + acc + initl +*) + (* Now define the processors for body and statement *) +and doBody (blk: A.block) : chunk = + enterScope (); + (* Rename the labels and add them to the environment *) + List.iter (fun l -> ignore (genNewLocalLabel l)) blk.blabels; + (* See if we have some attributes *) + let battrs = doAttributes blk.A.battrs in + + let bodychunk = + afterConversion + (List.fold_left (* !!! @ evaluates its arguments backwards *) + (fun prev s -> let res = doStatement s in + prev @@ res) + empty + blk.A.bstmts) + in + exitScope (); + + + if battrs == [] then + bodychunk + else begin + let b = c2block bodychunk in + b.battrs <- battrs; + s2c (mkStmt (Block b)) + end + +and doStatement (s : A.statement) : chunk = + try + match s with + A.NOP _ -> skipChunk + | A.COMPUTATION (e, loc) -> + currentLoc := convLoc loc; + let (lasts, data) = !gnu_body_result in + if lasts == s then begin (* This is the last in a GNU_BODY *) + let (s', e', t') = doExp false e (AExp None) in + data := Some (e', t'); (* Record the result *) + s' + end else + let (s', _, _) = doExp false e ADrop in + (* drop the side-effect free expression *) + (* And now do some peep-hole optimizations *) + s' + + | A.BLOCK (b, loc) -> + currentLoc := convLoc loc; + doBody b + + | A.SEQUENCE (s1, s2, loc) -> + (doStatement s1) @@ (doStatement s2) + + | A.IF(e,st,sf,loc) -> + let st' = doStatement st in + let sf' = doStatement sf in + currentLoc := convLoc loc; + doCondition false e st' sf' + + | A.WHILE(e,s,loc) -> +(* + startLoop true; + let s' = doStatement s in + exitLoop (); + let loc' = convLoc loc in + currentLoc := loc'; + loopChunk ((doCondition false e skipChunk + (breakChunk loc')) + @@ s') +*) + (** We need to convert A.WHILE(e,s) where e may have side effects + into Cil.While(e',s') where e' is side-effect free. *) + + (* Let e == (sCond , eCond) with sCond a sequence of statements + and eCond a side-effect free expression. *) + let (sCond, eCond, _) = doExp false e (AExp None) in + + (* Then doStatement(A.WHILE((sCond , eCond), s)) + = sCond ; Cil.While(eCond, (doStatement(s) ; sCond)) + where doStatement(A.CONTINUE) = (sCond ; Cil.Continue). *) + + startLoop (DuplicateBeforeContinue sCond); + let s' = doStatement s in + exitLoop (); + let loc' = convLoc loc in + currentLoc := loc'; + sCond @@ (whileChunk eCond (s' @@ sCond)) + + | A.DOWHILE(e,s,loc) -> +(* + startLoop false; + let s' = doStatement s in + let loc' = convLoc loc in + currentLoc := loc'; + let s'' = + consLabContinue (doCondition false e skipChunk (breakChunk loc')) + in + exitLoop (); + loopChunk (s' @@ s'') +*) + (** We need to convert A.DOWHILE(e,s) where e may have side effects + into Cil.DoWhile(e',s') where e' is side-effect free. *) + + (* Let e == (sCond , eCond) with sCond a sequence of statements + and eCond a side-effect free expression. *) + let (sCond, eCond, _) = doExp false e (AExp None) in + + (* Then doStatement(A.DOWHILE((sCond , eCond), s)) + = Cil.DoWhile(eCond, (doStatement(s) ; sCond)) + where doStatement(A.CONTINUE) = (sCond ; Cil.Continue). *) + + startLoop (DuplicateBeforeContinue sCond); + let s' = doStatement s in + exitLoop (); + let loc' = convLoc loc in + currentLoc := loc'; + doWhileChunk eCond (s' @@ sCond) + + | A.FOR(fc1,e2,e3,s,loc) -> +(*begin + let loc' = convLoc loc in + currentLoc := loc'; + enterScope (); (* Just in case we have a declaration *) + let (se1, _, _) = + match fc1 with + FC_EXP e1 -> doExp false e1 ADrop + | FC_DECL d1 -> (doDecl false d1, zero, voidType) + in + let (se3, _, _) = doExp false e3 ADrop in + startLoop false; + let s' = doStatement s in + currentLoc := loc'; + let s'' = consLabContinue se3 in + exitLoop (); + let res = + match e2 with + A.NOTHING -> (* This means true *) + se1 @@ loopChunk (s' @@ s'') + | _ -> + se1 @@ loopChunk ((doCondition false e2 skipChunk (breakChunk loc')) + @@ s' @@ s'') + in + exitScope (); + res + end +*) + (** We need to convert A.FOR(e1,e2,e3,s) where e1, e2 and e3 may + have side effects into Cil.For(bInit,e2',bIter,s') where e2' + is side-effect free. **) + + (* Let e1 == bInit be a block of statements + Let e2 == (bCond , eCond) with bCond a block of statements + and eCond a side-effect free expression + Let e3 == bIter be a sequence of statements. *) + let (bInit, _, _) = match fc1 with + | FC_EXP e1 -> doExp false e1 ADrop + | FC_DECL d1 -> (doDecl false d1, zero, voidType) in + let (bCond, eCond, _) = doExp false e2 (AExp None) in + let eCond' = match eCond with + | Const(CStr "exp_nothing") -> Cil.one + | _ -> eCond in + let (bIter, _, _) = doExp false e3 ADrop in + + (* Then doStatement(A.FOR(bInit, (bCond , eCond), bIter, s)) + = Cil.For({bInit; bCond}, eCond', {bIter; bCond}, {doStatement(s)}) + where doStatement(A.CONTINUE) = Cil.Continue. *) + + startLoop ContinueUnchanged; + let s' = doStatement s in + exitLoop (); + let loc' = convLoc loc in + currentLoc := loc'; + (forChunk (bInit @@ bCond) eCond' (bIter @@ bCond) s') + + | A.BREAK loc -> + let loc' = convLoc loc in + currentLoc := loc'; + breakChunk loc' + + | A.CONTINUE loc -> + let loc' = convLoc loc in + currentLoc := loc'; +(* + continueOrLabelChunk loc' +*) + continueDuplicateChunk loc' + + | A.RETURN (A.NOTHING, loc) -> + let loc' = convLoc loc in + currentLoc := loc'; + if not (isVoidType !currentReturnType) then + ignore (warn "Return statement without a value in function returning %a\n" d_type !currentReturnType); + returnChunk None loc' + + | A.RETURN (e, loc) -> + let loc' = convLoc loc in + currentLoc := loc'; + (* Sometimes we return the result of a void function call *) + if isVoidType !currentReturnType then begin + ignore (warn "Return statement with a value in function returning void"); + let (se, _, _) = doExp false e ADrop in + se @@ returnChunk None loc' + end else begin + let (se, e', et) = + doExp false e (AExp (Some !currentReturnType)) in + let (et'', e'') = castTo et (!currentReturnType) e' in + se @@ (returnChunk (Some e'') loc') + end + + | A.SWITCH (e, s, loc) -> + let loc' = convLoc loc in + currentLoc := loc'; + let (se, e', et) = doExp false e (AExp (Some intType)) in + let (et'', e'') = castTo et intType e' in + let s' = doStatement s in + se @@ (switchChunk e'' s' loc') + + | A.CASE (e, s, loc) -> + let loc' = convLoc loc in + currentLoc := loc'; + let (se, e', et) = doExp true e (AExp None) in + if isNotEmpty se then + E.s (error "Case statement with a non-constant"); + caseRangeChunk [if !lowerConstants then constFold false e' else e'] + loc' (doStatement s) + + | A.CASERANGE (el, eh, s, loc) -> + let loc' = convLoc loc in + currentLoc := loc'; + let (sel, el', etl) = doExp false el (AExp None) in + let (seh, eh', etl) = doExp false eh (AExp None) in + if isNotEmpty sel || isNotEmpty seh then + E.s (error "Case statement with a non-constant"); + let il, ih = + match constFold true el', constFold true eh' with + Const(CInt64(il, _, _)), Const(CInt64(ih, _, _)) -> + Int64.to_int il, Int64.to_int ih + | _ -> E.s (unimp "Cannot understand the constants in case range") + in + if il > ih then + E.s (error "Empty case range"); + let rec mkAll (i: int) = + if i > ih then [] else integer i :: mkAll (i + 1) + in + caseRangeChunk (mkAll il) loc' (doStatement s) + + + | A.DEFAULT (s, loc) -> + let loc' = convLoc loc in + currentLoc := loc'; + defaultChunk loc' (doStatement s) + + | A.LABEL (l, s, loc) -> + let loc' = convLoc loc in + currentLoc := loc'; + (* Lookup the label because it might have been locally defined *) + consLabel (lookupLabel l) (doStatement s) loc' true + + | A.GOTO (l, loc) -> + let loc' = convLoc loc in + currentLoc := loc'; + (* Maybe we need to rename this label *) + gotoChunk (lookupLabel l) loc' + + | A.COMPGOTO (e, loc) -> begin + let loc' = convLoc loc in + currentLoc := loc'; + (* Do the expression *) + let se, e', t' = doExp false e (AExp (Some voidPtrType)) in + match !gotoTargetData with + Some (switchv, switch) -> (* We have already generated this one *) + se + @@ i2c(Set (var switchv, mkCast e' uintType, loc')) + @@ s2c(mkStmt(Goto (ref switch, loc'))) + + | None -> begin + (* Make a temporary variable *) + let vchunk = createLocal + (TInt(IUInt, []), NoStorage, false, []) + (("__compgoto", A.JUSTBASE, [], loc), A.NO_INIT) + in + if not (isEmpty vchunk) then + E.s (unimp "Non-empty chunk in creating temporary for goto *"); + let switchv, _ = + try lookupVar "__compgoto" + with Not_found -> E.s (bug "Cannot find temporary for goto *"); + in + (* Make a switch statement. We'll fill in the statements at the + * end of the function *) + let switch = mkStmt (Switch (Lval(var switchv), + mkBlock [], [], loc')) in + (* And make a label for it since we'll goto it *) + switch.labels <- [Label ("__docompgoto", loc', false)]; + gotoTargetData := Some (switchv, switch); + se @@ i2c (Set(var switchv, mkCast e' uintType, loc')) @@ + s2c switch + end + end + + | A.DEFINITION d -> + let s = doDecl false d in +(* + ignore (E.log "Def at %a: %a\n" d_loc !currentLoc d_chunk s); +*) + s + + + + | A.ASM (asmattr, tmpls, details, loc) -> + (* Make sure all the outs are variables *) + let loc' = convLoc loc in + let attr' = doAttributes asmattr in + currentLoc := loc'; + let stmts : chunk ref = ref empty in + let (tmpls', outs', ins', clobs') = + match details with + | None -> + let tmpls' = + if !msvcMode then + tmpls + else + let pattern = Str.regexp "%" in + let escape = Str.global_replace pattern "%%" in + List.map escape tmpls + in + (tmpls', [], [], []) + | Some { aoutputs = outs; ainputs = ins; aclobbers = clobs } -> + let outs' = + List.map + (fun (c, e) -> + let (se, e', t) = doExp false e (AExp None) in + let lv = + match e' with + | Lval lval + | StartOf lval -> lval + | _ -> E.s (error "Expected lval for ASM outputs") + in + stmts := !stmts @@ se; + (c, lv)) outs + in + (* Get the side-effects out of expressions *) + let ins' = + List.map + (fun (c, e) -> + let (se, e', et) = doExp false e (AExp None) in + stmts := !stmts @@ se; + (c, e')) + ins + in + (tmpls, outs', ins', clobs) + in + !stmts @@ + (i2c (Asm(attr', tmpls', outs', ins', clobs', loc'))) + + | TRY_FINALLY (b, h, loc) -> + let loc' = convLoc loc in + currentLoc := loc'; + let b': chunk = doBody b in + let h': chunk = doBody h in + if b'.cases <> [] || h'.cases <> [] then + E.s (error "Try statements cannot contain switch cases"); + + s2c (mkStmt (TryFinally (c2block b', c2block h', loc'))) + + | TRY_EXCEPT (b, e, h, loc) -> + let loc' = convLoc loc in + currentLoc := loc'; + let b': chunk = doBody b in + (* Now do e *) + let ((se: chunk), e', t') = doExp false e (AExp None) in + let h': chunk = doBody h in + if b'.cases <> [] || h'.cases <> [] || se.cases <> [] then + E.s (error "Try statements cannot contain switch cases"); + (* Now take se and try to convert it to a list of instructions. This + * might not be always possible *) + let il' = + match compactStmts se.stmts with + [] -> se.postins + | [ s ] -> begin + match s.skind with + Instr il -> il @ se.postins + | _ -> E.s (error "Except expression contains unexpected statement") + end + | _ -> E.s (error "Except expression contains too many statements") + in + s2c (mkStmt (TryExcept (c2block b', (il', e'), c2block h', loc'))) + + with e -> begin + (ignore (E.log "Error in doStatement (%s)\n" (Printexc.to_string e))); + consLabel "booo_statement" empty (convLoc (A.get_statementloc s)) false + end + + +(* Translate a file *) +let convFile ((fname : string), (dl : Cabs.definition list)) : Cil.file = + Cil.initCIL (); (* make sure we have initialized CIL *) + (* Clean up the global types *) + E.hadErrors := false; + initGlobals(); + startFile (); + IH.clear noProtoFunctions; + H.clear compInfoNameEnv; + H.clear enumInfoNameEnv; + IH.clear mustTurnIntoDef; + H.clear alreadyDefined; + H.clear staticLocals; + H.clear typedefs; + H.clear isomorphicStructs; + annonCompFieldNameId := 0; + if !E.verboseFlag || !Cilutil.printStages then + ignore (E.log "Converting CABS->CIL\n"); + (* Setup the built-ins, but do not add their prototypes to the file *) + let setupBuiltin name (resTyp, argTypes, isva) = + let v = + makeGlobalVar name (TFun(resTyp, + Some (List.map (fun at -> ("", at, [])) + argTypes), + isva, [])) in + ignore (alphaConvertVarAndAddToEnv true v) + in + H.iter setupBuiltin (if !msvcMode then msvcBuiltins else gccBuiltins); + + let globalidx = ref 0 in + let doOneGlobal (d: A.definition) = + let s = doDecl true d in + if isNotEmpty s then + E.s (bug "doDecl returns non-empty statement for global"); + (* See if this is one of the globals which we can leave alone. Increment + * globalidx and see if we must leave this alone. *) + if + (match d with + A.DECDEF _ -> true + | A.FUNDEF _ -> true + | _ -> false) && (incr globalidx; !globalidx = !nocil) then begin + (* Create a file where we put the CABS output *) + let temp_cabs_name = "__temp_cabs" in + let temp_cabs = open_out temp_cabs_name in + (* Now print the CABS in there *) + Cprint.commit (); Cprint.flush (); + let old = !Cprint.out in (* Save the old output channel *) + Cprint.out := temp_cabs; + Cprint.print_def d; + Cprint.commit (); Cprint.flush (); + flush !Cprint.out; + Cprint.out := old; + close_out temp_cabs; + (* Now read everythign in *and create a GText from it *) + let temp_cabs = open_in temp_cabs_name in + let buff = Buffer.create 1024 in + Buffer.add_string buff "// Start of CABS form\n"; + Buffer.add_channel buff temp_cabs (in_channel_length temp_cabs); + Buffer.add_string buff "// End of CABS form\n"; + close_in temp_cabs; + (* Try to pop the last thing in the file *) + (match !theFile with + _ :: rest -> theFile := rest + | _ -> ()); + (* Insert in the file a GText *) + cabsPushGlobal (GText(Buffer.contents buff)) + end + in + List.iter doOneGlobal dl; + let globals = ref (popGlobals ()) in + + IH.clear noProtoFunctions; + IH.clear mustTurnIntoDef; + H.clear alreadyDefined; + H.clear compInfoNameEnv; + H.clear enumInfoNameEnv; + H.clear isomorphicStructs; + H.clear staticLocals; + H.clear typedefs; + H.clear env; + H.clear genv; + IH.clear callTempVars; + + if false then ignore (E.log "Cabs2cil converted %d globals\n" !globalidx); + (* We are done *) + { fileName = fname; + globals = !globals; + globinit = None; + globinitcalled = false; + } + + + + diff --git a/cil/src/frontc/cabs2cil.mli b/cil/src/frontc/cabs2cil.mli new file mode 100644 index 00000000..986f5a28 --- /dev/null +++ b/cil/src/frontc/cabs2cil.mli @@ -0,0 +1,49 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +val convFile: Cabs.file -> Cil.file + +(** NDC added command line parameter **) +(* Turn on tranformation that forces correct parameter evaluation order *) +val forceRLArgEval: bool ref + +(* Set this integer to the index of the global to be left in CABS form. Use + * -1 to disable *) +val nocil: int ref + +(* Indicates whether we're allowed to duplicate small chunks of code. *) +val allowDuplication: bool ref diff --git a/cil/src/frontc/cabsvisit.ml b/cil/src/frontc/cabsvisit.ml new file mode 100644 index 00000000..b2f9784a --- /dev/null +++ b/cil/src/frontc/cabsvisit.ml @@ -0,0 +1,577 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* cabsvisit.ml *) +(* tree visitor and rewriter for cabs *) + +open Cabs +open Trace +open Pretty +module E = Errormsg + +(* basic interface for a visitor object *) + +(* Different visiting actions. 'a will be instantiated with exp, instr, etc. *) +type 'a visitAction = + SkipChildren (* Do not visit the children. Return + * the node as it is *) + | ChangeTo of 'a (* Replace the expression with the + * given one *) + | DoChildren (* Continue with the children of this + * node. Rebuild the node on return + * if any of the children changes + * (use == test) *) + | ChangeDoChildrenPost of 'a * ('a -> 'a) (* First consider that the entire + * exp is replaced by the first + * paramenter. Then continue with + * the children. On return rebuild + * the node if any of the children + * has changed and then apply the + * function on the node *) + +type nameKind = + NVar (* Variable or function prototype + name *) + | NFun (* A function definition name *) + | NField (* The name of a field *) + | NType (* The name of a type *) + +(* All visit methods are called in preorder! (but you can use + * ChangeDoChildrenPost to change the order) *) +class type cabsVisitor = object + method vexpr: expression -> expression visitAction (* expressions *) + method vinitexpr: init_expression -> init_expression visitAction + method vstmt: statement -> statement list visitAction + method vblock: block -> block visitAction + method vvar: string -> string (* use of a variable + * names *) + method vdef: definition -> definition list visitAction + method vtypespec: typeSpecifier -> typeSpecifier visitAction + method vdecltype: decl_type -> decl_type visitAction + + (* For each declaration we call vname *) + method vname: nameKind -> specifier -> name -> name visitAction + method vspec: specifier -> specifier visitAction (* specifier *) + method vattr: attribute -> attribute list visitAction + + method vEnterScope: unit -> unit + method vExitScope: unit -> unit +end + +let visitorLocation = ref { filename = ""; + lineno = -1; + byteno = -1;} + + (* a default visitor which does nothing to the tree *) +class nopCabsVisitor : cabsVisitor = object + method vexpr (e:expression) = DoChildren + method vinitexpr (e:init_expression) = DoChildren + method vstmt (s: statement) = + visitorLocation := get_statementloc s; + DoChildren + method vblock (b: block) = DoChildren + method vvar (s: string) = s + method vdef (d: definition) = + visitorLocation := get_definitionloc d; + DoChildren + method vtypespec (ts: typeSpecifier) = DoChildren + method vdecltype (dt: decl_type) = DoChildren + method vname k (s:specifier) (n: name) = DoChildren + method vspec (s:specifier) = DoChildren + method vattr (a: attribute) = DoChildren + + method vEnterScope () = () + method vExitScope () = () +end + + (* Map but try not to copy the list unless necessary *) +let rec mapNoCopy (f: 'a -> 'a) = function + [] -> [] + | (i :: resti) as li -> + let i' = f i in + let resti' = mapNoCopy f resti in + if i' != i || resti' != resti then i' :: resti' else li + +let rec mapNoCopyList (f: 'a -> 'a list) = function + [] -> [] + | (i :: resti) as li -> + let il' = f i in + let resti' = mapNoCopyList f resti in + match il' with + [i'] when i' == i && resti' == resti -> li + | _ -> il' @ resti' + +let doVisit (vis: cabsVisitor) + (startvisit: 'a -> 'a visitAction) + (children: cabsVisitor -> 'a -> 'a) + (node: 'a) : 'a = + let action = startvisit node in + match action with + SkipChildren -> node + | ChangeTo node' -> node' + | _ -> + let nodepre = match action with + ChangeDoChildrenPost (node', _) -> node' + | _ -> node + in + let nodepost = children vis nodepre in + match action with + ChangeDoChildrenPost (_, f) -> f nodepost + | _ -> nodepost + +(* A visitor for lists *) +let doVisitList (vis: cabsVisitor) + (startvisit: 'a -> 'a list visitAction) + (children: cabsVisitor -> 'a -> 'a) + (node: 'a) : 'a list = + let action = startvisit node in + match action with + SkipChildren -> [node] + | ChangeTo nodes' -> nodes' + | _ -> + let nodespre = match action with + ChangeDoChildrenPost (nodespre, _) -> nodespre + | _ -> [node] + in + let nodespost = mapNoCopy (children vis) nodespre in + match action with + ChangeDoChildrenPost (_, f) -> f nodespost + | _ -> nodespost + + +let rec visitCabsTypeSpecifier (vis: cabsVisitor) (ts: typeSpecifier) = + doVisit vis vis#vtypespec childrenTypeSpecifier ts + +and childrenTypeSpecifier vis ts = + let childrenFieldGroup ((s, nel) as input) = + let s' = visitCabsSpecifier vis s in + let doOneField ((n, eo) as input) = + let n' = visitCabsName vis NField s' n in + let eo' = + match eo with + None -> None + | Some e -> let e' = visitCabsExpression vis e in + if e' != e then Some e' else eo + in + if n' != n || eo' != eo then (n', eo') else input + in + let nel' = mapNoCopy doOneField nel in + if s' != s || nel' != nel then (s', nel') else input + in + match ts with + Tstruct (n, Some fg, extraAttrs) -> + (*(trace "sm" (dprintf "visiting struct %s\n" n));*) + let fg' = mapNoCopy childrenFieldGroup fg in + if fg' != fg then Tstruct( n, Some fg', extraAttrs) else ts + | Tunion (n, Some fg, extraAttrs) -> + let fg' = mapNoCopy childrenFieldGroup fg in + if fg' != fg then Tunion( n, Some fg', extraAttrs) else ts + | Tenum (n, Some ei, extraAttrs) -> + let doOneEnumItem ((s, e, loc) as ei) = + let e' = visitCabsExpression vis e in + if e' != e then (s, e', loc) else ei + in + vis#vEnterScope (); + let ei' = mapNoCopy doOneEnumItem ei in + vis#vExitScope(); + if ei' != ei then Tenum( n, Some ei', extraAttrs) else ts + | TtypeofE e -> + let e' = visitCabsExpression vis e in + if e' != e then TtypeofE e' else ts + | TtypeofT (s, dt) -> + let s' = visitCabsSpecifier vis s in + let dt' = visitCabsDeclType vis false dt in + if s != s' || dt != dt' then TtypeofT (s', dt') else ts + | ts -> ts + +and childrenSpecElem (vis: cabsVisitor) (se: spec_elem) : spec_elem = + match se with + SpecTypedef | SpecInline | SpecStorage _ | SpecPattern _ -> se + | SpecCV _ -> se (* cop out *) + | SpecAttr a -> begin + let al' = visitCabsAttribute vis a in + match al' with + [a''] when a'' == a -> se + | [a''] -> SpecAttr a'' + | _ -> E.s (E.unimp "childrenSpecElem: visitCabsAttribute returned a list") + end + | SpecType ts -> + let ts' = visitCabsTypeSpecifier vis ts in + if ts' != ts then SpecType ts' else se + +and visitCabsSpecifier (vis: cabsVisitor) (s: specifier) : specifier = + doVisit vis vis#vspec childrenSpec s +and childrenSpec vis s = mapNoCopy (childrenSpecElem vis) s + + +and visitCabsDeclType vis (isfundef: bool) (dt: decl_type) : decl_type = + doVisit vis vis#vdecltype (childrenDeclType isfundef) dt +and childrenDeclType isfundef vis dt = + match dt with + JUSTBASE -> dt + | PARENTYPE (prea, dt1, posta) -> + let prea' = mapNoCopyList (visitCabsAttribute vis) prea in + let dt1' = visitCabsDeclType vis isfundef dt1 in + let posta'= mapNoCopyList (visitCabsAttribute vis) posta in + if prea' != prea || dt1' != dt1 || posta' != posta then + PARENTYPE (prea', dt1', posta') else dt + | ARRAY (dt1, al, e) -> + let dt1' = visitCabsDeclType vis isfundef dt1 in + let al' = mapNoCopy (childrenAttribute vis) al in + let e'= visitCabsExpression vis e in + if dt1' != dt1 || al' != al || e' != e then ARRAY(dt1', al', e') else dt + | PTR (al, dt1) -> + let al' = mapNoCopy (childrenAttribute vis) al in + let dt1' = visitCabsDeclType vis isfundef dt1 in + if al' != al || dt1' != dt1 then PTR(al', dt1') else dt + | PROTO (dt1, snl, b) -> + (* Do not propagate isfundef further *) + let dt1' = visitCabsDeclType vis false dt1 in + let _ = vis#vEnterScope () in + let snl' = mapNoCopy (childrenSingleName vis NVar) snl in + (* Exit the scope only if not in a function definition *) + let _ = if not isfundef then vis#vExitScope () in + if dt1' != dt1 || snl' != snl then PROTO(dt1', snl', b) else dt + + +and childrenNameGroup vis (kind: nameKind) ((s, nl) as input) = + let s' = visitCabsSpecifier vis s in + let nl' = mapNoCopy (visitCabsName vis kind s') nl in + if s' != s || nl' != nl then (s', nl') else input + + +and childrenInitNameGroup vis ((s, inl) as input) = + let s' = visitCabsSpecifier vis s in + let inl' = mapNoCopy (childrenInitName vis s') inl in + if s' != s || inl' != inl then (s', inl') else input + +and visitCabsName vis (k: nameKind) (s: specifier) + (n: name) : name = + doVisit vis (vis#vname k s) (childrenName s k) n +and childrenName (s: specifier) (k: nameKind) vis (n: name) : name = + let (sn, dt, al, loc) = n in + let dt' = visitCabsDeclType vis (k = NFun) dt in + let al' = mapNoCopy (childrenAttribute vis) al in + if dt' != dt || al' != al then (sn, dt', al', loc) else n + +and childrenInitName vis (s: specifier) (inn: init_name) : init_name = + let (n, ie) = inn in + let n' = visitCabsName vis NVar s n in + let ie' = visitCabsInitExpression vis ie in + if n' != n || ie' != ie then (n', ie') else inn + +and childrenSingleName vis (k: nameKind) (sn: single_name) : single_name = + let s, n = sn in + let s' = visitCabsSpecifier vis s in + let n' = visitCabsName vis k s' n in + if s' != s || n' != n then (s', n') else sn + +and visitCabsDefinition vis (d: definition) : definition list = + doVisitList vis vis#vdef childrenDefinition d +and childrenDefinition vis d = + match d with + FUNDEF (sn, b, l, lend) -> + let sn' = childrenSingleName vis NFun sn in + let b' = visitCabsBlock vis b in + (* End the scope that was started by childrenFunctionName *) + vis#vExitScope (); + if sn' != sn || b' != b then FUNDEF (sn', b', l, lend) else d + + | DECDEF ((s, inl), l) -> + let s' = visitCabsSpecifier vis s in + let inl' = mapNoCopy (childrenInitName vis s') inl in + if s' != s || inl' != inl then DECDEF ((s', inl'), l) else d + | TYPEDEF (ng, l) -> + let ng' = childrenNameGroup vis NType ng in + if ng' != ng then TYPEDEF (ng', l) else d + | ONLYTYPEDEF (s, l) -> + let s' = visitCabsSpecifier vis s in + if s' != s then ONLYTYPEDEF (s', l) else d + | GLOBASM _ -> d + | PRAGMA (e, l) -> + let e' = visitCabsExpression vis e in + if e' != e then PRAGMA (e', l) else d + | LINKAGE (n, l, dl) -> + let dl' = mapNoCopyList (visitCabsDefinition vis) dl in + if dl' != dl then LINKAGE (n, l, dl') else d + + | TRANSFORMER _ -> d + | EXPRTRANSFORMER _ -> d + +and visitCabsBlock vis (b: block) : block = + doVisit vis vis#vblock childrenBlock b + +and childrenBlock vis (b: block) : block = + let _ = vis#vEnterScope () in + let battrs' = mapNoCopyList (visitCabsAttribute vis) b.battrs in + let bstmts' = mapNoCopyList (visitCabsStatement vis) b.bstmts in + let _ = vis#vExitScope () in + if battrs' != b.battrs || bstmts' != b.bstmts then + { blabels = b.blabels; battrs = battrs'; bstmts = bstmts' } + else + b + +and visitCabsStatement vis (s: statement) : statement list = + doVisitList vis vis#vstmt childrenStatement s +and childrenStatement vis s = + let ve e = visitCabsExpression vis e in + let vs l s = + match visitCabsStatement vis s with + [s'] -> s' + | sl -> BLOCK ({blabels = []; battrs = []; bstmts = sl }, l) + in + match s with + NOP _ -> s + | COMPUTATION (e, l) -> + let e' = ve e in + if e' != e then COMPUTATION (e', l) else s + | BLOCK (b, l) -> + let b' = visitCabsBlock vis b in + if b' != b then BLOCK (b', l) else s + | SEQUENCE (s1, s2, l) -> + let s1' = vs l s1 in + let s2' = vs l s2 in + if s1' != s1 || s2' != s2 then SEQUENCE (s1', s2', l) else s + | IF (e, s1, s2, l) -> + let e' = ve e in + let s1' = vs l s1 in + let s2' = vs l s2 in + if e' != e || s1' != s1 || s2' != s2 then IF (e', s1', s2', l) else s + | WHILE (e, s1, l) -> + let e' = ve e in + let s1' = vs l s1 in + if e' != e || s1' != s1 then WHILE (e', s1', l) else s + | DOWHILE (e, s1, l) -> + let e' = ve e in + let s1' = vs l s1 in + if e' != e || s1' != s1 then DOWHILE (e', s1', l) else s + | FOR (fc1, e2, e3, s4, l) -> + let _ = vis#vEnterScope () in + let fc1' = + match fc1 with + FC_EXP e1 -> + let e1' = ve e1 in + if e1' != e1 then FC_EXP e1' else fc1 + | FC_DECL d1 -> + let d1' = + match visitCabsDefinition vis d1 with + [d1'] -> d1' + | _ -> E.s (E.unimp "visitCabs: for can have only one definition") + in + if d1' != d1 then FC_DECL d1' else fc1 + in + let e2' = ve e2 in + let e3' = ve e3 in + let s4' = vs l s4 in + let _ = vis#vExitScope () in + if fc1' != fc1 || e2' != e2 || e3' != e3 || s4' != s4 + then FOR (fc1', e2', e3', s4', l) else s + | BREAK _ | CONTINUE _ | GOTO _ -> s + | RETURN (e, l) -> + let e' = ve e in + if e' != e then RETURN (e', l) else s + | SWITCH (e, s1, l) -> + let e' = ve e in + let s1' = vs l s1 in + if e' != e || s1' != s1 then SWITCH (e', s1', l) else s + | CASE (e, s1, l) -> + let e' = ve e in + let s1' = vs l s1 in + if e' != e || s1' != s1 then CASE (e', s1', l) else s + | CASERANGE (e1, e2, s3, l) -> + let e1' = ve e1 in + let e2' = ve e2 in + let s3' = vs l s3 in + if e1' != e1 || e2' != e2 || s3' != s3 then + CASERANGE (e1', e2', s3', l) else s + | DEFAULT (s1, l) -> + let s1' = vs l s1 in + if s1' != s1 then DEFAULT (s1', l) else s + | LABEL (n, s1, l) -> + let s1' = vs l s1 in + if s1' != s1 then LABEL (n, s1', l) else s + | COMPGOTO (e, l) -> + let e' = ve e in + if e' != e then COMPGOTO (e', l) else s + | DEFINITION d -> begin + match visitCabsDefinition vis d with + [d'] when d' == d -> s + | [d'] -> DEFINITION d' + | dl -> let l = get_definitionloc d in + let dl' = List.map (fun d' -> DEFINITION d') dl in + BLOCK ({blabels = []; battrs = []; bstmts = dl' }, l) + end + | ASM (sl, b, details, l) -> + let childrenStringExp ((s, e) as input) = + let e' = ve e in + if e' != e then (s, e') else input + in + let details' = match details with + | None -> details + | Some { aoutputs = outl; ainputs = inl; aclobbers = clobs } -> + let outl' = mapNoCopy childrenStringExp outl in + let inl' = mapNoCopy childrenStringExp inl in + if outl' == outl && inl' == inl then + details + else + Some { aoutputs = outl'; ainputs = inl'; aclobbers = clobs } + in + if details' != details then + ASM (sl, b, details', l) else s + | TRY_FINALLY (b1, b2, l) -> + let b1' = visitCabsBlock vis b1 in + let b2' = visitCabsBlock vis b2 in + if b1' != b1 || b2' != b2 then TRY_FINALLY(b1', b2', l) else s + | TRY_EXCEPT (b1, e, b2, l) -> + let b1' = visitCabsBlock vis b1 in + let e' = visitCabsExpression vis e in + let b2' = visitCabsBlock vis b2 in + if b1' != b1 || e' != e || b2' != b2 then TRY_EXCEPT(b1', e', b2', l) else s + + +and visitCabsExpression vis (e: expression) : expression = + doVisit vis vis#vexpr childrenExpression e +and childrenExpression vis e = + let ve e = visitCabsExpression vis e in + match e with + NOTHING | LABELADDR _ -> e + | UNARY (uo, e1) -> + let e1' = ve e1 in + if e1' != e1 then UNARY (uo, e1') else e + | BINARY (bo, e1, e2) -> + let e1' = ve e1 in + let e2' = ve e2 in + if e1' != e1 || e2' != e2 then BINARY (bo, e1', e2') else e + | QUESTION (e1, e2, e3) -> + let e1' = ve e1 in + let e2' = ve e2 in + let e3' = ve e3 in + if e1' != e1 || e2' != e2 || e3' != e3 then + QUESTION (e1', e2', e3') else e + | CAST ((s, dt), ie) -> + let s' = visitCabsSpecifier vis s in + let dt' = visitCabsDeclType vis false dt in + let ie' = visitCabsInitExpression vis ie in + if s' != s || dt' != dt || ie' != ie then CAST ((s', dt'), ie') else e + | CALL (f, el) -> + let f' = ve f in + let el' = mapNoCopy ve el in + if f' != f || el' != el then CALL (f', el') else e + | COMMA el -> + let el' = mapNoCopy ve el in + if el' != el then COMMA (el') else e + | CONSTANT _ -> e + | VARIABLE s -> + let s' = vis#vvar s in + if s' != s then VARIABLE s' else e + | EXPR_SIZEOF (e1) -> + let e1' = ve e1 in + if e1' != e1 then EXPR_SIZEOF (e1') else e + | TYPE_SIZEOF (s, dt) -> + let s' = visitCabsSpecifier vis s in + let dt' = visitCabsDeclType vis false dt in + if s' != s || dt' != dt then TYPE_SIZEOF (s' ,dt') else e + | EXPR_ALIGNOF (e1) -> + let e1' = ve e1 in + if e1' != e1 then EXPR_ALIGNOF (e1') else e + | TYPE_ALIGNOF (s, dt) -> + let s' = visitCabsSpecifier vis s in + let dt' = visitCabsDeclType vis false dt in + if s' != s || dt' != dt then TYPE_ALIGNOF (s' ,dt') else e + | INDEX (e1, e2) -> + let e1' = ve e1 in + let e2' = ve e2 in + if e1' != e1 || e2' != e2 then INDEX (e1', e2') else e + | MEMBEROF (e1, n) -> + let e1' = ve e1 in + if e1' != e1 then MEMBEROF (e1', n) else e + | MEMBEROFPTR (e1, n) -> + let e1' = ve e1 in + if e1' != e1 then MEMBEROFPTR (e1', n) else e + | GNU_BODY b -> + let b' = visitCabsBlock vis b in + if b' != b then GNU_BODY b' else e + | EXPR_PATTERN _ -> e + +and visitCabsInitExpression vis (ie: init_expression) : init_expression = + doVisit vis vis#vinitexpr childrenInitExpression ie +and childrenInitExpression vis ie = + let rec childrenInitWhat iw = + match iw with + NEXT_INIT -> iw + | INFIELD_INIT (n, iw1) -> + let iw1' = childrenInitWhat iw1 in + if iw1' != iw1 then INFIELD_INIT (n, iw1') else iw + | ATINDEX_INIT (e, iw1) -> + let e' = visitCabsExpression vis e in + let iw1' = childrenInitWhat iw1 in + if e' != e || iw1' != iw1 then ATINDEX_INIT (e', iw1') else iw + | ATINDEXRANGE_INIT (e1, e2) -> + let e1' = visitCabsExpression vis e1 in + let e2' = visitCabsExpression vis e2 in + if e1' != e1 || e2' != e2 then ATINDEXRANGE_INIT (e1, e2) else iw + in + match ie with + NO_INIT -> ie + | SINGLE_INIT e -> + let e' = visitCabsExpression vis e in + if e' != e then SINGLE_INIT e' else ie + | COMPOUND_INIT il -> + let childrenOne ((iw, ie) as input) = + let iw' = childrenInitWhat iw in + let ie' = visitCabsInitExpression vis ie in + if iw' != iw || ie' != ie then (iw', ie') else input + in + let il' = mapNoCopy childrenOne il in + if il' != il then COMPOUND_INIT il' else ie + + +and visitCabsAttribute vis (a: attribute) : attribute list = + doVisitList vis vis#vattr childrenAttribute a + +and childrenAttribute vis ((n, el) as input) = + let el' = mapNoCopy (visitCabsExpression vis) el in + if el' != el then (n, el') else input + +and visitCabsAttributes vis (al: attribute list) : attribute list = + mapNoCopyList (visitCabsAttribute vis) al + +let visitCabsFile (vis: cabsVisitor) ((fname, f): file) : file = + (fname, mapNoCopyList (visitCabsDefinition vis) f) + + (* end of file *) + diff --git a/cil/src/frontc/cabsvisit.mli b/cil/src/frontc/cabsvisit.mli new file mode 100644 index 00000000..d2387892 --- /dev/null +++ b/cil/src/frontc/cabsvisit.mli @@ -0,0 +1,115 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* cabsvisit.mli *) +(* interface for cabsvisit.ml *) + +(* Different visiting actions. 'a will be instantiated with exp, instr, etc. *) +type 'a visitAction = + SkipChildren (* Do not visit the children. Return + * the node as it is *) + | ChangeTo of 'a (* Replace the expression with the + * given one *) + | DoChildren (* Continue with the children of this + * node. Rebuild the node on return + * if any of the children changes + * (use == test) *) + | ChangeDoChildrenPost of 'a * ('a -> 'a) (* First consider that the entire + * exp is replaced by the first + * paramenter. Then continue with + * the children. On return rebuild + * the node if any of the children + * has changed and then apply the + * function on the node *) + +type nameKind = + NVar (** Variable or function prototype + name *) + | NFun (** Function definition name *) + | NField (** The name of a field *) + | NType (** The name of a type *) + + +(* All visit methods are called in preorder! (but you can use + * ChangeDoChildrenPost to change the order) *) +class type cabsVisitor = object + method vexpr: Cabs.expression -> Cabs.expression visitAction (* expressions *) + method vinitexpr: Cabs.init_expression -> Cabs.init_expression visitAction + method vstmt: Cabs.statement -> Cabs.statement list visitAction + method vblock: Cabs.block -> Cabs.block visitAction + method vvar: string -> string (* use of a variable + * names *) + method vdef: Cabs.definition -> Cabs.definition list visitAction + method vtypespec: Cabs.typeSpecifier -> Cabs.typeSpecifier visitAction + method vdecltype: Cabs.decl_type -> Cabs.decl_type visitAction + + (* For each declaration we call vname *) + method vname: nameKind -> Cabs.specifier -> Cabs.name -> Cabs.name visitAction + method vspec: Cabs.specifier -> Cabs.specifier visitAction (* specifier *) + method vattr: Cabs.attribute -> Cabs.attribute list visitAction + + + method vEnterScope: unit -> unit + method vExitScope: unit -> unit +end + + +class nopCabsVisitor: cabsVisitor + + +val visitCabsTypeSpecifier: cabsVisitor -> + Cabs.typeSpecifier -> Cabs.typeSpecifier +val visitCabsSpecifier: cabsVisitor -> Cabs.specifier -> Cabs.specifier + +(** Visits a decl_type. The bool argument is saying whether we are ina + * function definition and thus the scope in a PROTO should extend until the + * end of the function *) +val visitCabsDeclType: cabsVisitor -> bool -> Cabs.decl_type -> Cabs.decl_type +val visitCabsDefinition: cabsVisitor -> Cabs.definition -> Cabs.definition list +val visitCabsBlock: cabsVisitor -> Cabs.block -> Cabs.block +val visitCabsStatement: cabsVisitor -> Cabs.statement -> Cabs.statement list +val visitCabsExpression: cabsVisitor -> Cabs.expression -> Cabs.expression +val visitCabsAttributes: cabsVisitor -> Cabs.attribute list + -> Cabs.attribute list +val visitCabsName: cabsVisitor -> nameKind + -> Cabs.specifier -> Cabs.name -> Cabs.name +val visitCabsFile: cabsVisitor -> Cabs.file -> Cabs.file + + + +(** Set by the visitor to the current location *) +val visitorLocation: Cabs.cabsloc ref diff --git a/cil/src/frontc/clexer.mli b/cil/src/frontc/clexer.mli new file mode 100644 index 00000000..01acfd04 --- /dev/null +++ b/cil/src/frontc/clexer.mli @@ -0,0 +1,55 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + + +(* This interface is generated manually. The corresponding .ml file is + * generated automatically and is placed in ../obj/clexer.ml. The reason we + * want this interface is to avoid confusing make with freshly generated + * interface files *) + + +val init: filename:string -> Lexing.lexbuf +val finish: unit -> unit + +(* This is the main parser function *) +val initial: Lexing.lexbuf -> Cparser.token + + +val push_context: unit -> unit (* Start a context *) +val add_type: string -> unit (* Add a new string as a type name *) +val add_identifier: string -> unit (* Add a new string as a variable name *) +val pop_context: unit -> unit (* Remove all names added in this context *) diff --git a/cil/src/frontc/clexer.mll b/cil/src/frontc/clexer.mll new file mode 100644 index 00000000..08f78819 --- /dev/null +++ b/cil/src/frontc/clexer.mll @@ -0,0 +1,664 @@ +(* + * + * Copyright (c) 2001-2003, + * George C. Necula + * Scott McPeak + * Wes Weimer + * Ben Liblit + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +(* FrontC -- lexical analyzer +** +** 1.0 3.22.99 Hugues Cassé First version. +** 2.0 George Necula 12/12/00: Many extensions +*) +{ +open Cparser +open Pretty +exception Eof +exception InternalError of string +module E = Errormsg +module H = Hashtbl + +let matchingParsOpen = ref 0 + +let currentLoc () = + let l, f, c = E.getPosition () in + { Cabs.lineno = l; + Cabs.filename = f; + Cabs.byteno = c;} + +(* string -> unit *) +let addComment c = + let l = currentLoc() in + let i = GrowArray.max_init_index Cabs.commentsGA in + GrowArray.setg Cabs.commentsGA (i+1) (l,c,false) + +let int64_to_char value = + if (compare value (Int64.of_int 255) > 0) || (compare value Int64.zero < 0) then + begin + let msg = Printf.sprintf "clexer:intlist_to_string: character 0x%Lx too big" value in + E.parse_error msg; + end + else + Char.chr (Int64.to_int value) + +(* takes a not-nul-terminated list, and converts it to a string. *) +let rec intlist_to_string (str: int64 list):string = + match str with + [] -> "" (* add nul-termination *) + | value::rest -> + let this_char = int64_to_char value in + (String.make 1 this_char) ^ (intlist_to_string rest) + +(* Some debugging support for line numbers *) +let dbgToken (t: token) = + if false then begin + ignore (E.log "%a" insert + (match t with + IDENT (n, l) -> dprintf "IDENT(%s,%d)\n" n l.Cabs.lineno + | LBRACE l -> dprintf "LBRACE(%d)\n" l.Cabs.lineno + | RBRACE l -> dprintf "RBRACE(%d)\n" l.Cabs.lineno + | IF l -> dprintf "IF(%d)\n" l.Cabs.lineno + | SWITCH l -> dprintf "SWITCH(%d)\n" l.Cabs.lineno + | RETURN l -> dprintf "RETURN(%d)\n" l.Cabs.lineno + | _ -> nil)); + t + end else + t + + +(* +** Keyword hashtable +*) +let lexicon = H.create 211 +let init_lexicon _ = + H.clear lexicon; + List.iter + (fun (key, builder) -> H.add lexicon key builder) + [ ("auto", fun loc -> AUTO loc); + ("const", fun loc -> CONST loc); + ("__const", fun loc -> CONST loc); + ("__const__", fun loc -> CONST loc); + ("static", fun loc -> STATIC loc); + ("extern", fun loc -> EXTERN loc); + ("long", fun loc -> LONG loc); + ("short", fun loc -> SHORT loc); + ("register", fun loc -> REGISTER loc); + ("signed", fun loc -> SIGNED loc); + ("__signed", fun loc -> SIGNED loc); + ("unsigned", fun loc -> UNSIGNED loc); + ("volatile", fun loc -> VOLATILE loc); + ("__volatile", fun loc -> VOLATILE loc); + (* WW: see /usr/include/sys/cdefs.h for why __signed and __volatile + * are accepted GCC-isms *) + ("char", fun loc -> CHAR loc); + ("int", fun loc -> INT loc); + ("float", fun loc -> FLOAT loc); + ("double", fun loc -> DOUBLE loc); + ("void", fun loc -> VOID loc); + ("enum", fun loc -> ENUM loc); + ("struct", fun loc -> STRUCT loc); + ("typedef", fun loc -> TYPEDEF loc); + ("union", fun loc -> UNION loc); + ("break", fun loc -> BREAK loc); + ("continue", fun loc -> CONTINUE loc); + ("goto", fun loc -> GOTO loc); + ("return", fun loc -> dbgToken (RETURN loc)); + ("switch", fun loc -> dbgToken (SWITCH loc)); + ("case", fun loc -> CASE loc); + ("default", fun loc -> DEFAULT loc); + ("while", fun loc -> WHILE loc); + ("do", fun loc -> DO loc); + ("for", fun loc -> FOR loc); + ("if", fun loc -> dbgToken (IF loc)); + ("else", fun _ -> ELSE); + (*** Implementation specific keywords ***) + ("__signed__", fun loc -> SIGNED loc); + ("__inline__", fun loc -> INLINE loc); + ("inline", fun loc -> INLINE loc); + ("__inline", fun loc -> INLINE loc); + ("_inline", fun loc -> INLINE loc); + ("__attribute__", fun loc -> ATTRIBUTE loc); + ("__attribute", fun loc -> ATTRIBUTE loc); +(* + ("__attribute_used__", fun loc -> ATTRIBUTE_USED loc); +*) + ("__blockattribute__", fun _ -> BLOCKATTRIBUTE); + ("__blockattribute", fun _ -> BLOCKATTRIBUTE); + ("__asm__", fun loc -> ASM loc); + ("asm", fun loc -> ASM loc); + ("__typeof__", fun loc -> TYPEOF loc); + ("__typeof", fun loc -> TYPEOF loc); + ("typeof", fun loc -> TYPEOF loc); + ("__alignof", fun loc -> ALIGNOF loc); + ("__alignof__", fun loc -> ALIGNOF loc); + ("__volatile__", fun loc -> VOLATILE loc); + ("__volatile", fun loc -> VOLATILE loc); + + ("__FUNCTION__", fun loc -> FUNCTION__ loc); + ("__func__", fun loc -> FUNCTION__ loc); (* ISO 6.4.2.2 *) + ("__PRETTY_FUNCTION__", fun loc -> PRETTY_FUNCTION__ loc); + ("__label__", fun _ -> LABEL__); + (*** weimer: GCC arcana ***) + ("__restrict", fun loc -> RESTRICT loc); + ("restrict", fun loc -> RESTRICT loc); +(* ("__extension__", EXTENSION); *) + (**** MS VC ***) + ("__int64", fun _ -> INT64 (currentLoc ())); + ("__int32", fun loc -> INT loc); + ("_cdecl", fun _ -> MSATTR ("_cdecl", currentLoc ())); + ("__cdecl", fun _ -> MSATTR ("__cdecl", currentLoc ())); + ("_stdcall", fun _ -> MSATTR ("_stdcall", currentLoc ())); + ("__stdcall", fun _ -> MSATTR ("__stdcall", currentLoc ())); + ("_fastcall", fun _ -> MSATTR ("_fastcall", currentLoc ())); + ("__fastcall", fun _ -> MSATTR ("__fastcall", currentLoc ())); + ("__w64", fun _ -> MSATTR("__w64", currentLoc ())); + ("__declspec", fun loc -> DECLSPEC loc); + ("__forceinline", fun loc -> INLINE loc); (* !! we turn forceinline + * into inline *) + ("__try", fun loc -> TRY loc); + ("__except", fun loc -> EXCEPT loc); + ("__finally", fun loc -> FINALLY loc); + (* weimer: some files produced by 'GCC -E' expect this type to be + * defined *) + ("__builtin_va_list", + fun _ -> NAMED_TYPE ("__builtin_va_list", currentLoc ())); + ("__builtin_va_arg", fun loc -> BUILTIN_VA_ARG loc); + ("__builtin_types_compatible_p", fun loc -> BUILTIN_TYPES_COMPAT loc); + ("__builtin_offsetof", fun loc -> BUILTIN_OFFSETOF loc); + (* On some versions of GCC __thread is a regular identifier *) + ("__thread", fun loc -> + if Machdep.__thread_is_keyword then + THREAD loc + else + IDENT ("__thread", loc)); + ] + +(* Mark an identifier as a type name. The old mapping is preserved and will + * be reinstated when we exit this context *) +let add_type name = + (* ignore (print_string ("adding type name " ^ name ^ "\n")); *) + H.add lexicon name (fun loc -> NAMED_TYPE (name, loc)) + +let context : string list list ref = ref [] + +let push_context _ = context := []::!context + +let pop_context _ = + match !context with + [] -> raise (InternalError "Empty context stack") + | con::sub -> + (context := sub; + List.iter (fun name -> + (* ignore (print_string ("removing lexicon for " ^ name ^ "\n")); *) + H.remove lexicon name) con) + +(* Mark an identifier as a variable name. The old mapping is preserved and + * will be reinstated when we exit this context *) +let add_identifier name = + match !context with + [] -> () (* Just ignore raise (InternalError "Empty context stack") *) + | con::sub -> + (context := (name::con)::sub; + (* print_string ("adding IDENT for " ^ name ^ "\n"); *) + H.add lexicon name (fun loc -> + dbgToken (IDENT (name, loc)))) + + +(* +** Useful primitives +*) +let scan_ident id = + let here = currentLoc () in + try (H.find lexicon id) here + (* default to variable name, as opposed to type *) + with Not_found -> dbgToken (IDENT (id, here)) + + +(* +** Buffer processor +*) + + +let init ~(filename: string) : Lexing.lexbuf = + init_lexicon (); + (* Inititialize the pointer in Errormsg *) + Lexerhack.add_type := add_type; + Lexerhack.push_context := push_context; + Lexerhack.pop_context := pop_context; + Lexerhack.add_identifier := add_identifier; + E.startParsing filename + + +let finish () = + E.finishParsing () + +(*** Error handling ***) +let error msg = + E.parse_error msg + + +(*** escape character management ***) +let scan_escape (char: char) : int64 = + let result = match char with + 'n' -> '\n' + | 'r' -> '\r' + | 't' -> '\t' + | 'b' -> '\b' + | 'f' -> '\012' (* ASCII code 12 *) + | 'v' -> '\011' (* ASCII code 11 *) + | 'a' -> '\007' (* ASCII code 7 *) + | 'e' | 'E' -> '\027' (* ASCII code 27. This is a GCC extension *) + | '\'' -> '\'' + | '"'-> '"' (* '"' *) + | '?' -> '?' + | '(' when not !Cprint.msvcMode -> '(' + | '{' when not !Cprint.msvcMode -> '{' + | '[' when not !Cprint.msvcMode -> '[' + | '%' when not !Cprint.msvcMode -> '%' + | '\\' -> '\\' + | other -> error ("Unrecognized escape sequence: \\" ^ (String.make 1 other)) + in + Int64.of_int (Char.code result) + +let scan_hex_escape str = + let radix = Int64.of_int 16 in + let the_value = ref Int64.zero in + (* start at character 2 to skip the \x *) + for i = 2 to (String.length str) - 1 do + let thisDigit = Cabs.valueOfDigit (String.get str i) in + (* the_value := !the_value * 16 + thisDigit *) + the_value := Int64.add (Int64.mul !the_value radix) thisDigit + done; + !the_value + +let scan_oct_escape str = + let radix = Int64.of_int 8 in + let the_value = ref Int64.zero in + (* start at character 1 to skip the \x *) + for i = 1 to (String.length str) - 1 do + let thisDigit = Cabs.valueOfDigit (String.get str i) in + (* the_value := !the_value * 8 + thisDigit *) + the_value := Int64.add (Int64.mul !the_value radix) thisDigit + done; + !the_value + +let lex_hex_escape remainder lexbuf = + let prefix = scan_hex_escape (Lexing.lexeme lexbuf) in + prefix :: remainder lexbuf + +let lex_oct_escape remainder lexbuf = + let prefix = scan_oct_escape (Lexing.lexeme lexbuf) in + prefix :: remainder lexbuf + +let lex_simple_escape remainder lexbuf = + let lexchar = Lexing.lexeme_char lexbuf 1 in + let prefix = scan_escape lexchar in + prefix :: remainder lexbuf + +let lex_unescaped remainder lexbuf = + let prefix = Int64.of_int (Char.code (Lexing.lexeme_char lexbuf 0)) in + prefix :: remainder lexbuf + +let lex_comment remainder lexbuf = + let ch = Lexing.lexeme_char lexbuf 0 in + let prefix = Int64.of_int (Char.code ch) in + if ch = '\n' then E.newline(); + prefix :: remainder lexbuf + +let make_char (i:int64):char = + let min_val = Int64.zero in + let max_val = Int64.of_int 255 in + (* if i < 0 || i > 255 then error*) + if compare i min_val < 0 || compare i max_val > 0 then begin + let msg = Printf.sprintf "clexer:make_char: character 0x%Lx too big" i in + error msg + end; + Char.chr (Int64.to_int i) + + +(* ISO standard locale-specific function to convert a wide character + * into a sequence of normal characters. Here we work on strings. + * We convert L"Hi" to "H\000i\000" + matth: this seems unused. +let wbtowc wstr = + let len = String.length wstr in + let dest = String.make (len * 2) '\000' in + for i = 0 to len-1 do + dest.[i*2] <- wstr.[i] ; + done ; + dest +*) + +(* This function converst the "Hi" in L"Hi" to { L'H', L'i', L'\0' } + matth: this seems unused. +let wstr_to_warray wstr = + let len = String.length wstr in + let res = ref "{ " in + for i = 0 to len-1 do + res := !res ^ (Printf.sprintf "L'%c', " wstr.[i]) + done ; + res := !res ^ "}" ; + !res +*) + +(* Pragmas get explicit end-of-line tokens. + * Elsewhere they are silently discarded as whitespace. *) +let pragmaLine = ref false + +} + +let decdigit = ['0'-'9'] +let octdigit = ['0'-'7'] +let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F'] +let letter = ['a'- 'z' 'A'-'Z'] + + +let usuffix = ['u' 'U'] +let lsuffix = "l"|"L"|"ll"|"LL" +let intsuffix = lsuffix | usuffix | usuffix lsuffix | lsuffix usuffix + | usuffix ? "i64" + + +let hexprefix = '0' ['x' 'X'] + +let intnum = decdigit+ intsuffix? +let octnum = '0' octdigit+ intsuffix? +let hexnum = hexprefix hexdigit+ intsuffix? + +let exponent = ['e' 'E']['+' '-']? decdigit+ +let fraction = '.' decdigit+ +let decfloat = (intnum? fraction) + |(intnum exponent) + |(intnum? fraction exponent) + | (intnum '.') + | (intnum '.' exponent) + +let hexfraction = hexdigit* '.' hexdigit+ | hexdigit+ +let binexponent = ['p' 'P'] ['+' '-']? decdigit+ +let hexfloat = hexprefix hexfraction binexponent + | hexprefix hexdigit+ binexponent + +let floatsuffix = ['f' 'F' 'l' 'L'] +let floatnum = (decfloat | hexfloat) floatsuffix? + +let ident = (letter|'_')(letter|decdigit|'_'|'$')* +let blank = [' ' '\t' '\012' '\r']+ +let escape = '\\' _ +let hex_escape = '\\' ['x' 'X'] hexdigit+ +let oct_escape = '\\' octdigit octdigit? octdigit? + +(* Pragmas that are not parsed by CIL. We lex them as PRAGMA_LINE tokens *) +let no_parse_pragma = + "warning" | "GCC" + (* Solaris-style pragmas: *) + | "ident" | "section" | "option" | "asm" | "use_section" | "weak" + | "redefine_extname" + | "TCS_align" + + +rule initial = + parse "/*" { let il = comment lexbuf in + let sl = intlist_to_string il in + addComment sl; + initial lexbuf} +| "//" { let il = onelinecomment lexbuf in + let sl = intlist_to_string il in + addComment sl; + E.newline(); + initial lexbuf + } +| blank {initial lexbuf} +| '\n' { E.newline (); + if !pragmaLine then + begin + pragmaLine := false; + PRAGMA_EOL + end + else + initial lexbuf } +| '\\' '\r' * '\n' { + E.newline (); + initial lexbuf + } +| '#' { hash lexbuf} +| "_Pragma" { PRAGMA (currentLoc ()) } +| '\'' { CST_CHAR (chr lexbuf, currentLoc ())} +| "L'" { CST_WCHAR (chr lexbuf, currentLoc ()) } +| '"' { (* '"' *) +(* matth: BUG: this could be either a regular string or a wide string. + * e.g. if it's the "world" in + * L"Hello, " "world" + * then it should be treated as wide even though there's no L immediately + * preceding it. See test/small1/wchar5.c for a failure case. *) + try CST_STRING (str lexbuf, currentLoc ()) + with e -> + raise (InternalError + ("str: " ^ + Printexc.to_string e))} +| "L\"" { (* weimer: wchar_t string literal *) + try CST_WSTRING(str lexbuf, currentLoc ()) + with e -> + raise (InternalError + ("wide string: " ^ + Printexc.to_string e))} +| floatnum {CST_FLOAT (Lexing.lexeme lexbuf, currentLoc ())} +| hexnum {CST_INT (Lexing.lexeme lexbuf, currentLoc ())} +| octnum {CST_INT (Lexing.lexeme lexbuf, currentLoc ())} +| intnum {CST_INT (Lexing.lexeme lexbuf, currentLoc ())} +| "!quit!" {EOF} +| "..." {ELLIPSIS} +| "+=" {PLUS_EQ} +| "-=" {MINUS_EQ} +| "*=" {STAR_EQ} +| "/=" {SLASH_EQ} +| "%=" {PERCENT_EQ} +| "|=" {PIPE_EQ} +| "&=" {AND_EQ} +| "^=" {CIRC_EQ} +| "<<=" {INF_INF_EQ} +| ">>=" {SUP_SUP_EQ} +| "<<" {INF_INF} +| ">>" {SUP_SUP} +| "==" {EQ_EQ} +| "!=" {EXCLAM_EQ} +| "<=" {INF_EQ} +| ">=" {SUP_EQ} +| "=" {EQ} +| "<" {INF} +| ">" {SUP} +| "++" {PLUS_PLUS (currentLoc ())} +| "--" {MINUS_MINUS (currentLoc ())} +| "->" {ARROW} +| '+' {PLUS (currentLoc ())} +| '-' {MINUS (currentLoc ())} +| '*' {STAR (currentLoc ())} +| '/' {SLASH} +| '%' {PERCENT} +| '!' {EXCLAM (currentLoc ())} +| "&&" {AND_AND (currentLoc ())} +| "||" {PIPE_PIPE} +| '&' {AND (currentLoc ())} +| '|' {PIPE} +| '^' {CIRC} +| '?' {QUEST} +| ':' {COLON} +| '~' {TILDE (currentLoc ())} + +| '{' {dbgToken (LBRACE (currentLoc ()))} +| '}' {dbgToken (RBRACE (currentLoc ()))} +| '[' {LBRACKET} +| ']' {RBRACKET} +| '(' {dbgToken (LPAREN (currentLoc ())) } +| ')' {RPAREN} +| ';' {dbgToken (SEMICOLON (currentLoc ())) } +| ',' {COMMA} +| '.' {DOT} +| "sizeof" {SIZEOF (currentLoc ())} +| "__asm" { if !Cprint.msvcMode then + MSASM (msasm lexbuf, currentLoc ()) + else (ASM (currentLoc ())) } + +(* If we see __pragma we eat it and the matching parentheses as well *) +| "__pragma" { matchingParsOpen := 0; + let _ = matchingpars lexbuf in + initial lexbuf + } + +(* sm: tree transformation keywords *) +| "@transform" {AT_TRANSFORM (currentLoc ())} +| "@transformExpr" {AT_TRANSFORMEXPR (currentLoc ())} +| "@specifier" {AT_SPECIFIER (currentLoc ())} +| "@expr" {AT_EXPR (currentLoc ())} +| "@name" {AT_NAME} + +(* __extension__ is a black. The parser runs into some conflicts if we let it + * pass *) +| "__extension__" {initial lexbuf } +| ident {scan_ident (Lexing.lexeme lexbuf)} +| eof {EOF} +| _ {E.parse_error "Invalid symbol"} +and comment = + parse + "*/" { [] } +(*| '\n' { E.newline (); lex_unescaped comment lexbuf }*) +| _ { lex_comment comment lexbuf } + + +and onelinecomment = parse + '\n' {[]} +| _ { lex_comment onelinecomment lexbuf } + +and matchingpars = parse + '\n' { E.newline (); matchingpars lexbuf } +| blank { matchingpars lexbuf } +| '(' { incr matchingParsOpen; matchingpars lexbuf } +| ')' { decr matchingParsOpen; + if !matchingParsOpen = 0 then + () + else + matchingpars lexbuf + } +| "/*" { let il = comment lexbuf in + let sl = intlist_to_string il in + addComment sl; + matchingpars lexbuf} +| '"' { (* '"' *) + let _ = str lexbuf in + matchingpars lexbuf + } +| _ { matchingpars lexbuf } + +(* # ... *) +and hash = parse + '\n' { E.newline (); initial lexbuf} +| blank { hash lexbuf} +| intnum { (* We are seeing a line number. This is the number for the + * next line *) + let s = Lexing.lexeme lexbuf in + begin try + E.setCurrentLine (int_of_string s - 1) + with Failure _ -> + E.warn "Bad line number in preprocessed file: %s" s + end; + (* A file name must follow *) + file lexbuf } +| "line" { hash lexbuf } (* MSVC line number info *) + (* For pragmas with irregular syntax, like #pragma warning, + * we parse them as a whole line. *) +| "pragma" blank (no_parse_pragma as pragmaName) + { let here = currentLoc () in + PRAGMA_LINE (pragmaName ^ pragma lexbuf, here) + } +| "pragma" { pragmaLine := true; PRAGMA (currentLoc ()) } +| _ { endline lexbuf} + +and file = parse + '\n' {E.newline (); initial lexbuf} +| blank {file lexbuf} +| '"' [^ '\012' '\t' '"']* '"' { (* '"' *) + let n = Lexing.lexeme lexbuf in + let n1 = String.sub n 1 + ((String.length n) - 2) in + E.setCurrentFile n1; + endline lexbuf} + +| _ {endline lexbuf} + +and endline = parse + '\n' { E.newline (); initial lexbuf} +| eof { EOF } +| _ { endline lexbuf} + +and pragma = parse + '\n' { E.newline (); "" } +| _ { let cur = Lexing.lexeme lexbuf in + cur ^ (pragma lexbuf) } + +and str = parse + '"' {[]} (* no nul terminiation in CST_STRING '"' *) +| hex_escape {lex_hex_escape str lexbuf} +| oct_escape {lex_oct_escape str lexbuf} +| escape {lex_simple_escape str lexbuf} +| _ {lex_unescaped str lexbuf} + +and chr = parse + '\'' {[]} +| hex_escape {lex_hex_escape chr lexbuf} +| oct_escape {lex_oct_escape chr lexbuf} +| escape {lex_simple_escape chr lexbuf} +| _ {lex_unescaped chr lexbuf} + +and msasm = parse + blank { msasm lexbuf } +| '{' { msasminbrace lexbuf } +| _ { let cur = Lexing.lexeme lexbuf in + cur ^ (msasmnobrace lexbuf) } + +and msasminbrace = parse + '}' { "" } +| _ { let cur = Lexing.lexeme lexbuf in + cur ^ (msasminbrace lexbuf) } +and msasmnobrace = parse + ['}' ';' '\n'] { lexbuf.Lexing.lex_curr_pos <- + lexbuf.Lexing.lex_curr_pos - 1; + "" } +| "__asm" { lexbuf.Lexing.lex_curr_pos <- + lexbuf.Lexing.lex_curr_pos - 5; + "" } +| _ { let cur = Lexing.lexeme lexbuf in + + cur ^ (msasmnobrace lexbuf) } + +{ + +} diff --git a/cil/src/frontc/cparser.mly b/cil/src/frontc/cparser.mly new file mode 100644 index 00000000..f1e1ef94 --- /dev/null +++ b/cil/src/frontc/cparser.mly @@ -0,0 +1,1521 @@ +/*(* + * + * Copyright (c) 2001-2003, + * George C. Necula + * Scott McPeak + * Wes Weimer + * Ben Liblit + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **) +(** +** 1.0 3.22.99 Hugues Cassé First version. +** 2.0 George Necula 12/12/00: Practically complete rewrite. +*) +*/ +%{ +open Cabs +module E = Errormsg + +let parse_error msg : unit = (* sm: c++-mode highlight hack: -> ' <- *) + E.parse_error msg + +let print = print_string + +(* unit -> string option *) +(* +let getComments () = + match !comments with + [] -> None + | _ -> + let r = Some(String.concat "\n" (List.rev !comments)) in + comments := []; + r +*) + +let currentLoc () = + let l, f, c = E.getPosition () in + { lineno = l; + filename = f; + byteno = c;} + +let cabslu = {lineno = -10; + filename = "cabs loc unknown"; + byteno = -10;} + +(* cabsloc -> cabsloc *) +(* +let handleLoc l = + l.clcomment <- getComments(); + l +*) + +(* +** Expression building +*) +let smooth_expression lst = + match lst with + [] -> NOTHING + | [expr] -> expr + | _ -> COMMA (lst) + + +let currentFunctionName = ref "" + +let announceFunctionName ((n, decl, _, _):name) = + !Lexerhack.add_identifier n; + (* Start a context that includes the parameter names and the whole body. + * Will pop when we finish parsing the function body *) + !Lexerhack.push_context (); + (* Go through all the parameter names and mark them as identifiers *) + let rec findProto = function + PROTO (d, args, _) when isJUSTBASE d -> + List.iter (fun (_, (an, _, _, _)) -> !Lexerhack.add_identifier an) args + + | PROTO (d, _, _) -> findProto d + | PARENTYPE (_, d, _) -> findProto d + | PTR (_, d) -> findProto d + | ARRAY (d, _, _) -> findProto d + | _ -> parse_error "Cannot find the prototype in a function definition"; + raise Parsing.Parse_error + + and isJUSTBASE = function + JUSTBASE -> true + | PARENTYPE (_, d, _) -> isJUSTBASE d + | _ -> false + in + findProto decl; + currentFunctionName := n + + + +let applyPointer (ptspecs: attribute list list) (dt: decl_type) + : decl_type = + (* Outer specification first *) + let rec loop = function + [] -> dt + | attrs :: rest -> PTR(attrs, loop rest) + in + loop ptspecs + +let doDeclaration (loc: cabsloc) (specs: spec_elem list) (nl: init_name list) : definition = + if isTypedef specs then begin + (* Tell the lexer about the new type names *) + List.iter (fun ((n, _, _, _), _) -> !Lexerhack.add_type n) nl; + TYPEDEF ((specs, List.map (fun (n, _) -> n) nl), loc) + end else + if nl = [] then + ONLYTYPEDEF (specs, loc) + else begin + (* Tell the lexer about the new variable names *) + List.iter (fun ((n, _, _, _), _) -> !Lexerhack.add_identifier n) nl; + DECDEF ((specs, nl), loc) + end + + +let doFunctionDef (loc: cabsloc) + (lend: cabsloc) + (specs: spec_elem list) + (n: name) + (b: block) : definition = + let fname = (specs, n) in + FUNDEF (fname, b, loc, lend) + + +let doOldParDecl (names: string list) + ((pardefs: name_group list), (isva: bool)) + : single_name list * bool = + let findOneName n = + (* Search in pardefs for the definition for this parameter *) + let rec loopGroups = function + [] -> ([SpecType Tint], (n, JUSTBASE, [], cabslu)) + | (specs, names) :: restgroups -> + let rec loopNames = function + [] -> loopGroups restgroups + | ((n',_, _, _) as sn) :: _ when n' = n -> (specs, sn) + | _ :: restnames -> loopNames restnames + in + loopNames names + in + loopGroups pardefs + in + let args = List.map findOneName names in + (args, isva) + +let checkConnective (s : string) : unit = +begin + (* checking this means I could possibly have more connectives, with *) + (* different meaning *) + if (s <> "to") then ( + parse_error "transformer connective must be 'to'"; + raise Parsing.Parse_error + ) + else () +end + +let int64_to_char value = + if (compare value (Int64.of_int 255) > 0) || (compare value Int64.zero < 0) then + begin + let msg = Printf.sprintf "cparser:intlist_to_string: character 0x%Lx too big" value in + parse_error msg; + raise Parsing.Parse_error + end + else + Char.chr (Int64.to_int value) + +(* takes a not-nul-terminated list, and converts it to a string. *) +let rec intlist_to_string (str: int64 list):string = + match str with + [] -> "" (* add nul-termination *) + | value::rest -> + let this_char = int64_to_char value in + (String.make 1 this_char) ^ (intlist_to_string rest) + +let fst3 (result, _, _) = result +let snd3 (_, result, _) = result +let trd3 (_, _, result) = result + + +(* + transform: __builtin_offsetof(type, member) + into : (size_t) (&(type * ) 0)->member + *) + +let transformOffsetOf (speclist, dtype) member = + let rec addPointer = function + | JUSTBASE -> + PTR([], JUSTBASE) + | PARENTYPE (attrs1, dtype, attrs2) -> + PARENTYPE (attrs1, addPointer dtype, attrs2) + | ARRAY (dtype, attrs, expr) -> + ARRAY (addPointer dtype, attrs, expr) + | PTR (attrs, dtype) -> + PTR (attrs, addPointer dtype) + | PROTO (dtype, names, variadic) -> + PROTO (addPointer dtype, names, variadic) + in + let nullType = (speclist, addPointer dtype) in + let nullExpr = CONSTANT (CONST_INT "0") in + let castExpr = CAST (nullType, SINGLE_INIT nullExpr) in + + let rec replaceBase = function + | VARIABLE field -> + MEMBEROFPTR (castExpr, field) + | MEMBEROF (base, field) -> + MEMBEROF (replaceBase base, field) + | INDEX (base, index) -> + INDEX (replaceBase base, index) + | _ -> + parse_error "malformed offset expression in __builtin_offsetof"; + raise Parsing.Parse_error + in + let memberExpr = replaceBase member in + let addrExpr = UNARY (ADDROF, memberExpr) in + (* slight cheat: hard-coded assumption that size_t == unsigned int *) + let sizeofType = [SpecType Tunsigned], JUSTBASE in + let resultExpr = CAST (sizeofType, SINGLE_INIT addrExpr) in + resultExpr + +%} + +%token IDENT +%token CST_CHAR +%token CST_WCHAR +%token CST_INT +%token CST_FLOAT +%token NAMED_TYPE + +/* Each character is its own list element, and the terminating nul is not + included in this list. */ +%token CST_STRING +%token CST_WSTRING + +%token EOF +%token CHAR INT DOUBLE FLOAT VOID INT64 INT32 +%token ENUM STRUCT TYPEDEF UNION +%token SIGNED UNSIGNED LONG SHORT +%token VOLATILE EXTERN STATIC CONST RESTRICT AUTO REGISTER +%token THREAD + +%token SIZEOF ALIGNOF + +%token EQ PLUS_EQ MINUS_EQ STAR_EQ SLASH_EQ PERCENT_EQ +%token AND_EQ PIPE_EQ CIRC_EQ INF_INF_EQ SUP_SUP_EQ +%token ARROW DOT + +%token EQ_EQ EXCLAM_EQ INF SUP INF_EQ SUP_EQ +%token PLUS MINUS STAR +%token SLASH PERCENT +%token TILDE AND +%token PIPE CIRC +%token EXCLAM AND_AND +%token PIPE_PIPE +%token INF_INF SUP_SUP +%token PLUS_PLUS MINUS_MINUS + +%token RPAREN +%token LPAREN RBRACE +%token LBRACE +%token LBRACKET RBRACKET +%token COLON +%token SEMICOLON +%token COMMA ELLIPSIS QUEST + +%token BREAK CONTINUE GOTO RETURN +%token SWITCH CASE DEFAULT +%token WHILE DO FOR +%token IF TRY EXCEPT FINALLY +%token ELSE + +%token ATTRIBUTE INLINE ASM TYPEOF FUNCTION__ PRETTY_FUNCTION__ +%token LABEL__ +%token BUILTIN_VA_ARG ATTRIBUTE_USED +%token BUILTIN_VA_LIST +%token BLOCKATTRIBUTE +%token BUILTIN_TYPES_COMPAT BUILTIN_OFFSETOF +%token DECLSPEC +%token MSASM MSATTR +%token PRAGMA_LINE +%token PRAGMA +%token PRAGMA_EOL + +/* sm: cabs tree transformation specification keywords */ +%token AT_TRANSFORM AT_TRANSFORMEXPR AT_SPECIFIER AT_EXPR +%token AT_NAME + +/* operator precedence */ +%nonassoc IF +%nonassoc ELSE + + +%left COMMA +%right EQ PLUS_EQ MINUS_EQ STAR_EQ SLASH_EQ PERCENT_EQ + AND_EQ PIPE_EQ CIRC_EQ INF_INF_EQ SUP_SUP_EQ +%right QUEST COLON +%left PIPE_PIPE +%left AND_AND +%left PIPE +%left CIRC +%left AND +%left EQ_EQ EXCLAM_EQ +%left INF SUP INF_EQ SUP_EQ +%left INF_INF SUP_SUP +%left PLUS MINUS +%left STAR SLASH PERCENT CONST RESTRICT VOLATILE +%right EXCLAM TILDE PLUS_PLUS MINUS_MINUS CAST RPAREN ADDROF SIZEOF ALIGNOF +%left LBRACKET +%left DOT ARROW LPAREN LBRACE +%right NAMED_TYPE /* We'll use this to handle redefinitions of + * NAMED_TYPE as variables */ +%left IDENT + +/* Non-terminals informations */ +%start interpret file +%type file interpret globals + +%type global + + +%type attributes attributes_with_asm asmattr +%type statement +%type constant +%type string_constant +%type expression +%type opt_expression +%type init_expression +%type comma_expression +%type paren_comma_expression +%type arguments +%type bracket_comma_expression +%type string_list +%type wstring_list + +%type initializer +%type <(Cabs.initwhat * Cabs.init_expression) list> initializer_list +%type init_designators init_designators_opt + +%type decl_spec_list +%type type_spec +%type struct_decl_list + + +%type old_proto_decl +%type parameter_decl +%type enumerator +%type enum_list +%type declaration function_def +%type function_def_start +%type type_name +%type block +%type block_element_list +%type local_labels local_label_names +%type old_parameter_list_ne + +%type init_declarator +%type init_declarator_list +%type declarator +%type field_decl +%type <(Cabs.name * expression option) list> field_decl_list +%type direct_decl +%type abs_direct_decl abs_direct_decl_opt +%type abstract_decl + + /* (* Each element is a "* ". *) */ +%type pointer pointer_opt +%type location +%type cvspec +%% + +interpret: + file EOF {$1} +; +file: globals {$1} +; +globals: + /* empty */ { [] } +| global globals { $1 :: $2 } +| SEMICOLON globals { $2 } +; + +location: + /* empty */ { currentLoc () } %prec IDENT + + +/*** Global Definition ***/ +global: +| declaration { $1 } +| function_def { $1 } +/*(* Some C header files ar shared with the C++ compiler and have linkage + * specification *)*/ +| EXTERN string_constant declaration { LINKAGE (fst $2, (*handleLoc*) (snd $2), [ $3 ]) } +| EXTERN string_constant LBRACE globals RBRACE + { LINKAGE (fst $2, (*handleLoc*) (snd $2), $4) } +| ASM LPAREN string_constant RPAREN SEMICOLON + { GLOBASM (fst $3, (*handleLoc*) $1) } +| pragma { $1 } +/* (* Old-style function prototype. This should be somewhere else, like in + * "declaration". For now we keep it at global scope only because in local + * scope it looks too much like a function call *) */ +| IDENT LPAREN old_parameter_list_ne RPAREN old_pardef_list SEMICOLON + { (* Convert pardecl to new style *) + let pardecl, isva = doOldParDecl $3 $5 in + (* Make the function declarator *) + doDeclaration ((*handleLoc*) (snd $1)) [] + [((fst $1, PROTO(JUSTBASE, pardecl,isva), [], cabslu), + NO_INIT)] + } +/* (* Old style function prototype, but without any arguments *) */ +| IDENT LPAREN RPAREN SEMICOLON + { (* Make the function declarator *) + doDeclaration ((*handleLoc*)(snd $1)) [] + [((fst $1, PROTO(JUSTBASE,[],false), [], cabslu), + NO_INIT)] + } +/* transformer for a toplevel construct */ +| AT_TRANSFORM LBRACE global RBRACE IDENT/*to*/ LBRACE globals RBRACE { + checkConnective(fst $5); + TRANSFORMER($3, $7, $1) + } +/* transformer for an expression */ +| AT_TRANSFORMEXPR LBRACE expression RBRACE IDENT/*to*/ LBRACE expression RBRACE { + checkConnective(fst $5); + EXPRTRANSFORMER(fst $3, fst $7, $1) + } +| location error SEMICOLON { PRAGMA (VARIABLE "parse_error", $1) } +; + +id_or_typename: + IDENT {fst $1} +| NAMED_TYPE {fst $1} +| AT_NAME LPAREN IDENT RPAREN { "@name(" ^ fst $3 ^ ")" } /* pattern variable name */ +; + +maybecomma: + /* empty */ { () } +| COMMA { () } +; + +/* *** Expressions *** */ + +primary_expression: /*(* 6.5.1. *)*/ +| IDENT + {VARIABLE (fst $1), snd $1} +| constant + {CONSTANT (fst $1), snd $1} +| paren_comma_expression + {smooth_expression (fst $1), snd $1} +| LPAREN block RPAREN + { GNU_BODY (fst3 $2), $1 } + + /*(* Next is Scott's transformer *)*/ +| AT_EXPR LPAREN IDENT RPAREN /* expression pattern variable */ + { EXPR_PATTERN(fst $3), $1 } +; + +postfix_expression: /*(* 6.5.2 *)*/ +| primary_expression + { $1 } +| postfix_expression bracket_comma_expression + {INDEX (fst $1, smooth_expression $2), snd $1} +| postfix_expression LPAREN arguments RPAREN + {CALL (fst $1, $3), snd $1} +| BUILTIN_VA_ARG LPAREN expression COMMA type_name RPAREN + { let b, d = $5 in + CALL (VARIABLE "__builtin_va_arg", + [fst $3; TYPE_SIZEOF (b, d)]), $1 } +| BUILTIN_TYPES_COMPAT LPAREN type_name COMMA type_name RPAREN + { let b1,d1 = $3 in + let b2,d2 = $5 in + CALL (VARIABLE "__builtin_types_compatible_p", + [TYPE_SIZEOF(b1,d1); TYPE_SIZEOF(b2,d2)]), $1 } +| BUILTIN_OFFSETOF LPAREN type_name COMMA offsetof_member_designator RPAREN + { transformOffsetOf $3 (fst $5), $1 } +| postfix_expression DOT id_or_typename + {MEMBEROF (fst $1, $3), snd $1} +| postfix_expression ARROW id_or_typename + {MEMBEROFPTR (fst $1, $3), snd $1} +| postfix_expression PLUS_PLUS + {UNARY (POSINCR, fst $1), snd $1} +| postfix_expression MINUS_MINUS + {UNARY (POSDECR, fst $1), snd $1} +/* (* We handle GCC constructor expressions *) */ +| LPAREN type_name RPAREN LBRACE initializer_list_opt RBRACE + { CAST($2, COMPOUND_INIT $5), $1 } +; + +offsetof_member_designator: /* GCC extension for __builtin_offsetof */ +| IDENT + { VARIABLE (fst $1), snd $1 } +| offsetof_member_designator DOT IDENT + { MEMBEROF (fst $1, fst $3), snd $1 } +| offsetof_member_designator bracket_comma_expression + { INDEX (fst $1, smooth_expression $2), snd $1 } +; + +unary_expression: /*(* 6.5.3 *)*/ +| postfix_expression + { $1 } +| PLUS_PLUS unary_expression + {UNARY (PREINCR, fst $2), $1} +| MINUS_MINUS unary_expression + {UNARY (PREDECR, fst $2), $1} +| SIZEOF unary_expression + {EXPR_SIZEOF (fst $2), $1} +| SIZEOF LPAREN type_name RPAREN + {let b, d = $3 in TYPE_SIZEOF (b, d), $1} +| ALIGNOF unary_expression + {EXPR_ALIGNOF (fst $2), $1} +| ALIGNOF LPAREN type_name RPAREN + {let b, d = $3 in TYPE_ALIGNOF (b, d), $1} +| PLUS cast_expression + {UNARY (PLUS, fst $2), $1} +| MINUS cast_expression + {UNARY (MINUS, fst $2), $1} +| STAR cast_expression + {UNARY (MEMOF, fst $2), $1} +| AND cast_expression + {UNARY (ADDROF, fst $2), $1} +| EXCLAM cast_expression + {UNARY (NOT, fst $2), $1} +| TILDE cast_expression + {UNARY (BNOT, fst $2), $1} +| AND_AND IDENT { LABELADDR (fst $2), $1 } +; + +cast_expression: /*(* 6.5.4 *)*/ +| unary_expression + { $1 } +| LPAREN type_name RPAREN cast_expression + { CAST($2, SINGLE_INIT (fst $4)), $1 } +; + +multiplicative_expression: /*(* 6.5.5 *)*/ +| cast_expression + { $1 } +| multiplicative_expression STAR cast_expression + {BINARY(MUL, fst $1, fst $3), snd $1} +| multiplicative_expression SLASH cast_expression + {BINARY(DIV, fst $1, fst $3), snd $1} +| multiplicative_expression PERCENT cast_expression + {BINARY(MOD, fst $1, fst $3), snd $1} +; + +additive_expression: /*(* 6.5.6 *)*/ +| multiplicative_expression + { $1 } +| additive_expression PLUS multiplicative_expression + {BINARY(ADD, fst $1, fst $3), snd $1} +| additive_expression MINUS multiplicative_expression + {BINARY(SUB, fst $1, fst $3), snd $1} +; + +shift_expression: /*(* 6.5.7 *)*/ +| additive_expression + { $1 } +| shift_expression INF_INF additive_expression + {BINARY(SHL, fst $1, fst $3), snd $1} +| shift_expression SUP_SUP additive_expression + {BINARY(SHR, fst $1, fst $3), snd $1} +; + + +relational_expression: /*(* 6.5.8 *)*/ +| shift_expression + { $1 } +| relational_expression INF shift_expression + {BINARY(LT, fst $1, fst $3), snd $1} +| relational_expression SUP shift_expression + {BINARY(GT, fst $1, fst $3), snd $1} +| relational_expression INF_EQ shift_expression + {BINARY(LE, fst $1, fst $3), snd $1} +| relational_expression SUP_EQ shift_expression + {BINARY(GE, fst $1, fst $3), snd $1} +; + +equality_expression: /*(* 6.5.9 *)*/ +| relational_expression + { $1 } +| equality_expression EQ_EQ relational_expression + {BINARY(EQ, fst $1, fst $3), snd $1} +| equality_expression EXCLAM_EQ relational_expression + {BINARY(NE, fst $1, fst $3), snd $1} +; + + +bitwise_and_expression: /*(* 6.5.10 *)*/ +| equality_expression + { $1 } +| bitwise_and_expression AND equality_expression + {BINARY(BAND, fst $1, fst $3), snd $1} +; + +bitwise_xor_expression: /*(* 6.5.11 *)*/ +| bitwise_and_expression + { $1 } +| bitwise_xor_expression CIRC bitwise_and_expression + {BINARY(XOR, fst $1, fst $3), snd $1} +; + +bitwise_or_expression: /*(* 6.5.12 *)*/ +| bitwise_xor_expression + { $1 } +| bitwise_or_expression PIPE bitwise_xor_expression + {BINARY(BOR, fst $1, fst $3), snd $1} +; + +logical_and_expression: /*(* 6.5.13 *)*/ +| bitwise_or_expression + { $1 } +| logical_and_expression AND_AND bitwise_or_expression + {BINARY(AND, fst $1, fst $3), snd $1} +; + +logical_or_expression: /*(* 6.5.14 *)*/ +| logical_and_expression + { $1 } +| logical_or_expression PIPE_PIPE logical_and_expression + {BINARY(OR, fst $1, fst $3), snd $1} +; + +conditional_expression: /*(* 6.5.15 *)*/ +| logical_or_expression + { $1 } +| logical_or_expression QUEST opt_expression COLON conditional_expression + {QUESTION (fst $1, $3, fst $5), snd $1} +; + +/*(* The C spec says that left-hand sides of assignment expressions are unary + * expressions. GCC allows cast expressions in there ! *)*/ + +assignment_expression: /*(* 6.5.16 *)*/ +| conditional_expression + { $1 } +| cast_expression EQ assignment_expression + {BINARY(ASSIGN, fst $1, fst $3), snd $1} +| cast_expression PLUS_EQ assignment_expression + {BINARY(ADD_ASSIGN, fst $1, fst $3), snd $1} +| cast_expression MINUS_EQ assignment_expression + {BINARY(SUB_ASSIGN, fst $1, fst $3), snd $1} +| cast_expression STAR_EQ assignment_expression + {BINARY(MUL_ASSIGN, fst $1, fst $3), snd $1} +| cast_expression SLASH_EQ assignment_expression + {BINARY(DIV_ASSIGN, fst $1, fst $3), snd $1} +| cast_expression PERCENT_EQ assignment_expression + {BINARY(MOD_ASSIGN, fst $1, fst $3), snd $1} +| cast_expression AND_EQ assignment_expression + {BINARY(BAND_ASSIGN, fst $1, fst $3), snd $1} +| cast_expression PIPE_EQ assignment_expression + {BINARY(BOR_ASSIGN, fst $1, fst $3), snd $1} +| cast_expression CIRC_EQ assignment_expression + {BINARY(XOR_ASSIGN, fst $1, fst $3), snd $1} +| cast_expression INF_INF_EQ assignment_expression + {BINARY(SHL_ASSIGN, fst $1, fst $3), snd $1} +| cast_expression SUP_SUP_EQ assignment_expression + {BINARY(SHR_ASSIGN, fst $1, fst $3), snd $1} +; + +expression: /*(* 6.5.17 *)*/ + assignment_expression + { $1 } +; + + +constant: + CST_INT {CONST_INT (fst $1), snd $1} +| CST_FLOAT {CONST_FLOAT (fst $1), snd $1} +| CST_CHAR {CONST_CHAR (fst $1), snd $1} +| CST_WCHAR {CONST_WCHAR (fst $1), snd $1} +| string_constant {CONST_STRING (fst $1), snd $1} +| wstring_list {CONST_WSTRING (fst $1), snd $1} +; + +string_constant: +/* Now that we know this constant isn't part of a wstring, convert it + back to a string for easy viewing. */ + string_list { + let queue, location = $1 in + let buffer = Buffer.create (Queue.length queue) in + Queue.iter + (List.iter + (fun value -> + let char = int64_to_char value in + Buffer.add_char buffer char)) + queue; + Buffer.contents buffer, location + } +; +one_string_constant: +/* Don't concat multiple strings. For asm templates. */ + CST_STRING {intlist_to_string (fst $1) } +; +string_list: + one_string { + let queue = Queue.create () in + Queue.add (fst $1) queue; + queue, snd $1 + } +| string_list one_string { + Queue.add (fst $2) (fst $1); + $1 + } +; + +wstring_list: + CST_WSTRING { $1 } +| wstring_list one_string { (fst $1) @ (fst $2), snd $1 } +| wstring_list CST_WSTRING { (fst $1) @ (fst $2), snd $1 } +/* Only the first string in the list needs an L, so L"a" "b" is the same + * as L"ab" or L"a" L"b". */ + +one_string: + CST_STRING {$1} +| FUNCTION__ {(Cabs.explodeStringToInts + !currentFunctionName), $1} +| PRETTY_FUNCTION__ {(Cabs.explodeStringToInts + !currentFunctionName), $1} +; + +init_expression: + expression { SINGLE_INIT (fst $1) } +| LBRACE initializer_list_opt RBRACE + { COMPOUND_INIT $2} + +initializer_list: /* ISO 6.7.8. Allow a trailing COMMA */ + initializer { [$1] } +| initializer COMMA initializer_list_opt { $1 :: $3 } +; +initializer_list_opt: + /* empty */ { [] } +| initializer_list { $1 } +; +initializer: + init_designators eq_opt init_expression { ($1, $3) } +| gcc_init_designators init_expression { ($1, $2) } +| init_expression { (NEXT_INIT, $1) } +; +eq_opt: + EQ { () } + /*(* GCC allows missing = *)*/ +| /*(* empty *)*/ { () } +; +init_designators: + DOT id_or_typename init_designators_opt { INFIELD_INIT($2, $3) } +| LBRACKET expression RBRACKET init_designators_opt + { ATINDEX_INIT(fst $2, $4) } +| LBRACKET expression ELLIPSIS expression RBRACKET + { ATINDEXRANGE_INIT(fst $2, fst $4) } +; +init_designators_opt: + /* empty */ { NEXT_INIT } +| init_designators { $1 } +; + +gcc_init_designators: /*(* GCC supports these strange things *)*/ + id_or_typename COLON { INFIELD_INIT($1, NEXT_INIT) } +; + +arguments: + /* empty */ { [] } +| comma_expression { fst $1 } +; + +opt_expression: + /* empty */ + {NOTHING} +| comma_expression + {smooth_expression (fst $1)} +; + +comma_expression: + expression {[fst $1], snd $1} +| expression COMMA comma_expression { fst $1 :: fst $3, snd $1 } +| error COMMA comma_expression { $3 } +; + +comma_expression_opt: + /* empty */ { NOTHING } +| comma_expression { smooth_expression (fst $1) } +; + +paren_comma_expression: + LPAREN comma_expression RPAREN { $2 } +| LPAREN error RPAREN { [], $1 } +; + +bracket_comma_expression: + LBRACKET comma_expression RBRACKET { fst $2 } +| LBRACKET error RBRACKET { [] } +; + + +/*** statements ***/ +block: /* ISO 6.8.2 */ + block_begin local_labels block_attrs block_element_list RBRACE + {!Lexerhack.pop_context(); + { blabels = $2; + battrs = $3; + bstmts = $4 }, + $1, $5 + } +| error location RBRACE { { blabels = []; + battrs = []; + bstmts = [] }, + $2, $3 + } +; +block_begin: + LBRACE {!Lexerhack.push_context (); $1} +; + +block_attrs: + /* empty */ { [] } +| BLOCKATTRIBUTE paren_attr_list_ne + { [("__blockattribute__", $2)] } +; + +/* statements and declarations in a block, in any order (for C99 support) */ +block_element_list: + /* empty */ { [] } +| declaration block_element_list { DEFINITION($1) :: $2 } +| statement block_element_list { $1 :: $2 } +/*(* GCC accepts a label at the end of a block *)*/ +| IDENT COLON { [ LABEL (fst $1, NOP (snd $1), + snd $1)] } +| pragma block_element_list { $2 } +; + +local_labels: + /* empty */ { [] } +| LABEL__ local_label_names SEMICOLON local_labels { $2 @ $4 } +; +local_label_names: + IDENT { [ fst $1 ] } +| IDENT COMMA local_label_names { fst $1 :: $3 } +; + + + +statement: + SEMICOLON {NOP ((*handleLoc*) $1) } +| comma_expression SEMICOLON + {COMPUTATION (smooth_expression (fst $1), (*handleLoc*)(snd $1))} +| block {BLOCK (fst3 $1, (*handleLoc*)(snd3 $1))} +| IF paren_comma_expression statement %prec IF + {IF (smooth_expression (fst $2), $3, NOP $1, $1)} +| IF paren_comma_expression statement ELSE statement + {IF (smooth_expression (fst $2), $3, $5, (*handleLoc*) $1)} +| SWITCH paren_comma_expression statement + {SWITCH (smooth_expression (fst $2), $3, (*handleLoc*) $1)} +| WHILE paren_comma_expression statement + {WHILE (smooth_expression (fst $2), $3, (*handleLoc*) $1)} +| DO statement WHILE paren_comma_expression SEMICOLON + {DOWHILE (smooth_expression (fst $4), $2, (*handleLoc*) $1)} +| FOR LPAREN for_clause opt_expression + SEMICOLON opt_expression RPAREN statement + {FOR ($3, $4, $6, $8, (*handleLoc*) $1)} +| IDENT COLON statement + {LABEL (fst $1, $3, (*handleLoc*) (snd $1))} +| CASE expression COLON statement + {CASE (fst $2, $4, (*handleLoc*) $1)} +| CASE expression ELLIPSIS expression COLON statement + {CASERANGE (fst $2, fst $4, $6, (*handleLoc*) $1)} +| DEFAULT COLON + {DEFAULT (NOP $1, (*handleLoc*) $1)} +| RETURN SEMICOLON {RETURN (NOTHING, (*handleLoc*) $1)} +| RETURN comma_expression SEMICOLON + {RETURN (smooth_expression (fst $2), (*handleLoc*) $1)} +| BREAK SEMICOLON {BREAK ((*handleLoc*) $1)} +| CONTINUE SEMICOLON {CONTINUE ((*handleLoc*) $1)} +| GOTO IDENT SEMICOLON + {GOTO (fst $2, (*handleLoc*) $1)} +| GOTO STAR comma_expression SEMICOLON + { COMPGOTO (smooth_expression (fst $3), (*handleLoc*) $1) } +| ASM asmattr LPAREN asmtemplate asmoutputs RPAREN SEMICOLON + { ASM ($2, $4, $5, (*handleLoc*) $1) } +| MSASM { ASM ([], [fst $1], None, (*handleLoc*)(snd $1))} +| TRY block EXCEPT paren_comma_expression block + { let b, _, _ = $2 in + let h, _, _ = $5 in + if not !Cprint.msvcMode then + parse_error "try/except in GCC code"; + TRY_EXCEPT (b, COMMA (fst $4), h, (*handleLoc*) $1) } +| TRY block FINALLY block + { let b, _, _ = $2 in + let h, _, _ = $4 in + if not !Cprint.msvcMode then + parse_error "try/finally in GCC code"; + TRY_FINALLY (b, h, (*handleLoc*) $1) } + +| error location SEMICOLON { (NOP $2)} +; + + +for_clause: + opt_expression SEMICOLON { FC_EXP $1 } +| declaration { FC_DECL $1 } +; + +declaration: /* ISO 6.7.*/ + decl_spec_list init_declarator_list SEMICOLON + { doDeclaration ((*handleLoc*)(snd $1)) (fst $1) $2 } +| decl_spec_list SEMICOLON + { doDeclaration ((*handleLoc*)(snd $1)) (fst $1) [] } +; +init_declarator_list: /* ISO 6.7 */ + init_declarator { [$1] } +| init_declarator COMMA init_declarator_list { $1 :: $3 } + +; +init_declarator: /* ISO 6.7 */ + declarator { ($1, NO_INIT) } +| declarator EQ init_expression + { ($1, $3) } +; + +decl_spec_list: /* ISO 6.7 */ + /* ISO 6.7.1 */ +| TYPEDEF decl_spec_list_opt { SpecTypedef :: $2, $1 } +| EXTERN decl_spec_list_opt { SpecStorage EXTERN :: $2, $1 } +| STATIC decl_spec_list_opt { SpecStorage STATIC :: $2, $1 } +| AUTO decl_spec_list_opt { SpecStorage AUTO :: $2, $1 } +| REGISTER decl_spec_list_opt { SpecStorage REGISTER :: $2, $1} + /* ISO 6.7.2 */ +| type_spec decl_spec_list_opt_no_named { SpecType (fst $1) :: $2, snd $1 } + /* ISO 6.7.4 */ +| INLINE decl_spec_list_opt { SpecInline :: $2, $1 } +| cvspec decl_spec_list_opt { (fst $1) :: $2, snd $1 } +| attribute_nocv decl_spec_list_opt { SpecAttr (fst $1) :: $2, snd $1 } +/* specifier pattern variable (must be last in spec list) */ +| AT_SPECIFIER LPAREN IDENT RPAREN { [ SpecPattern(fst $3) ], $1 } +; +/* (* In most cases if we see a NAMED_TYPE we must shift it. Thus we declare + * NAMED_TYPE to have right associativity *) */ +decl_spec_list_opt: + /* empty */ { [] } %prec NAMED_TYPE +| decl_spec_list { fst $1 } +; +/* (* We add this separate rule to handle the special case when an appearance + * of NAMED_TYPE should not be considered as part of the specifiers but as + * part of the declarator. IDENT has higher precedence than NAMED_TYPE *) + */ +decl_spec_list_opt_no_named: + /* empty */ { [] } %prec IDENT +| decl_spec_list { fst $1 } +; +type_spec: /* ISO 6.7.2 */ + VOID { Tvoid, $1} +| CHAR { Tchar, $1 } +| SHORT { Tshort, $1 } +| INT { Tint, $1 } +| LONG { Tlong, $1 } +| INT64 { Tint64, $1 } +| FLOAT { Tfloat, $1 } +| DOUBLE { Tdouble, $1 } +| SIGNED { Tsigned, $1 } +| UNSIGNED { Tunsigned, $1 } +| STRUCT id_or_typename + { Tstruct ($2, None, []), $1 } +| STRUCT just_attributes id_or_typename + { Tstruct ($3, None, $2), $1 } +| STRUCT id_or_typename LBRACE struct_decl_list RBRACE + { Tstruct ($2, Some $4, []), $1 } +| STRUCT LBRACE struct_decl_list RBRACE + { Tstruct ("", Some $3, []), $1 } +| STRUCT just_attributes id_or_typename LBRACE struct_decl_list RBRACE + { Tstruct ($3, Some $5, $2), $1 } +| STRUCT just_attributes LBRACE struct_decl_list RBRACE + { Tstruct ("", Some $4, $2), $1 } +| UNION id_or_typename + { Tunion ($2, None, []), $1 } +| UNION id_or_typename LBRACE struct_decl_list RBRACE + { Tunion ($2, Some $4, []), $1 } +| UNION LBRACE struct_decl_list RBRACE + { Tunion ("", Some $3, []), $1 } +| UNION just_attributes id_or_typename LBRACE struct_decl_list RBRACE + { Tunion ($3, Some $5, $2), $1 } +| UNION just_attributes LBRACE struct_decl_list RBRACE + { Tunion ("", Some $4, $2), $1 } +| ENUM id_or_typename + { Tenum ($2, None, []), $1 } +| ENUM id_or_typename LBRACE enum_list maybecomma RBRACE + { Tenum ($2, Some $4, []), $1 } +| ENUM LBRACE enum_list maybecomma RBRACE + { Tenum ("", Some $3, []), $1 } +| ENUM just_attributes id_or_typename LBRACE enum_list maybecomma RBRACE + { Tenum ($3, Some $5, $2), $1 } +| ENUM just_attributes LBRACE enum_list maybecomma RBRACE + { Tenum ("", Some $4, $2), $1 } +| NAMED_TYPE { Tnamed (fst $1), snd $1 } +| TYPEOF LPAREN expression RPAREN { TtypeofE (fst $3), $1 } +| TYPEOF LPAREN type_name RPAREN { let s, d = $3 in + TtypeofT (s, d), $1 } +; +struct_decl_list: /* (* ISO 6.7.2. Except that we allow empty structs. We + * also allow missing field names. *) + */ + /* empty */ { [] } +| decl_spec_list SEMICOLON struct_decl_list + { (fst $1, + [(missingFieldDecl, None)]) :: $3 } +/*(* GCC allows extra semicolons *)*/ +| SEMICOLON struct_decl_list + { $2 } +| decl_spec_list field_decl_list SEMICOLON struct_decl_list + { (fst $1, $2) + :: $4 } +/*(* MSVC allows pragmas in strange places *)*/ +| pragma struct_decl_list { $2 } + +| error SEMICOLON struct_decl_list + { $3 } +; +field_decl_list: /* (* ISO 6.7.2 *) */ + field_decl { [$1] } +| field_decl COMMA field_decl_list { $1 :: $3 } +; +field_decl: /* (* ISO 6.7.2. Except that we allow unnamed fields. *) */ +| declarator { ($1, None) } +| declarator COLON expression { ($1, Some (fst $3)) } +| COLON expression { (missingFieldDecl, Some (fst $2)) } +; + +enum_list: /* (* ISO 6.7.2.2 *) */ + enumerator {[$1]} +| enum_list COMMA enumerator {$1 @ [$3]} +| enum_list COMMA error { $1 } +; +enumerator: + IDENT {(fst $1, NOTHING, snd $1)} +| IDENT EQ expression {(fst $1, fst $3, snd $1)} +; + + +declarator: /* (* ISO 6.7.5. Plus Microsoft declarators.*) */ + pointer_opt direct_decl attributes_with_asm + { let (n, decl) = $2 in + (n, applyPointer (fst $1) decl, $3, (*(*handleLoc*)*)(snd $1)) } +; + + +direct_decl: /* (* ISO 6.7.5 *) */ + /* (* We want to be able to redefine named + * types as variable names *) */ +| id_or_typename { ($1, JUSTBASE) } + +| LPAREN attributes declarator RPAREN + { let (n,decl,al,loc) = $3 in + (n, PARENTYPE($2,decl,al)) } + +| direct_decl LBRACKET attributes comma_expression_opt RBRACKET + { let (n, decl) = $1 in + (n, ARRAY(decl, $3, $4)) } +| direct_decl LBRACKET attributes error RBRACKET + { let (n, decl) = $1 in + (n, ARRAY(decl, $3, NOTHING)) } +| direct_decl parameter_list_startscope rest_par_list RPAREN + { let (n, decl) = $1 in + let (params, isva) = $3 in + !Lexerhack.pop_context (); + (n, PROTO(decl, params, isva)) + } +; +parameter_list_startscope: + LPAREN { !Lexerhack.push_context () } +; +rest_par_list: +| /* empty */ { ([], false) } +| parameter_decl rest_par_list1 { let (params, isva) = $2 in + ($1 :: params, isva) + } +; +rest_par_list1: + /* empty */ { ([], false) } +| COMMA ELLIPSIS { ([], true) } +| COMMA parameter_decl rest_par_list1 { let (params, isva) = $3 in + ($2 :: params, isva) + } +; + + +parameter_decl: /* (* ISO 6.7.5 *) */ + decl_spec_list declarator { (fst $1, $2) } +| decl_spec_list abstract_decl { let d, a = $2 in + (fst $1, ("", d, a, cabslu)) } +| decl_spec_list { (fst $1, ("", JUSTBASE, [], cabslu)) } +| LPAREN parameter_decl RPAREN { $2 } +; + +/* (* Old style prototypes. Like a declarator *) */ +old_proto_decl: + pointer_opt direct_old_proto_decl { let (n, decl, a) = $2 in + (n, applyPointer (fst $1) decl, + a, snd $1) + } + +; + +direct_old_proto_decl: + direct_decl LPAREN old_parameter_list_ne RPAREN old_pardef_list + { let par_decl, isva = doOldParDecl $3 $5 in + let n, decl = $1 in + (n, PROTO(decl, par_decl, isva), []) + } +| direct_decl LPAREN RPAREN + { let n, decl = $1 in + (n, PROTO(decl, [], false), []) + } + +/* (* appears sometimesm but generates a shift-reduce conflict. *) +| LPAREN STAR direct_decl LPAREN old_parameter_list_ne RPAREN RPAREN LPAREN RPAREN old_pardef_list + { let par_decl, isva + = doOldParDecl $5 $10 in + let n, decl = $3 in + (n, PROTO(decl, par_decl, isva), []) + } +*/ +; + +old_parameter_list_ne: +| IDENT { [fst $1] } +| IDENT COMMA old_parameter_list_ne { let rest = $3 in + (fst $1 :: rest) } +; + +old_pardef_list: + /* empty */ { ([], false) } +| decl_spec_list old_pardef SEMICOLON ELLIPSIS + { ([(fst $1, $2)], true) } +| decl_spec_list old_pardef SEMICOLON old_pardef_list + { let rest, isva = $4 in + ((fst $1, $2) :: rest, isva) + } +; + +old_pardef: + declarator { [$1] } +| declarator COMMA old_pardef { $1 :: $3 } +| error { [] } +; + + +pointer: /* (* ISO 6.7.5 *) */ + STAR attributes pointer_opt { $2 :: fst $3, $1 } +; +pointer_opt: + /**/ { let l = currentLoc () in + ([], l) } +| pointer { $1 } +; + +type_name: /* (* ISO 6.7.6 *) */ + decl_spec_list abstract_decl { let d, a = $2 in + if a <> [] then begin + parse_error "attributes in type name"; + raise Parsing.Parse_error + end; + (fst $1, d) + } +| decl_spec_list { (fst $1, JUSTBASE) } +; +abstract_decl: /* (* ISO 6.7.6. *) */ + pointer_opt abs_direct_decl attributes { applyPointer (fst $1) $2, $3 } +| pointer { applyPointer (fst $1) JUSTBASE, [] } +; + +abs_direct_decl: /* (* ISO 6.7.6. We do not support optional declarator for + * functions. Plus Microsoft attributes. See the + * discussion for declarator. *) */ +| LPAREN attributes abstract_decl RPAREN + { let d, a = $3 in + PARENTYPE ($2, d, a) + } + +| LPAREN error RPAREN + { JUSTBASE } + +| abs_direct_decl_opt LBRACKET comma_expression_opt RBRACKET + { ARRAY($1, [], $3) } +/*(* The next should be abs_direct_decl_opt but we get conflicts *)*/ +| abs_direct_decl parameter_list_startscope rest_par_list RPAREN + { let (params, isva) = $3 in + !Lexerhack.pop_context (); + PROTO ($1, params, isva) + } +; +abs_direct_decl_opt: + abs_direct_decl { $1 } +| /* empty */ { JUSTBASE } +; +function_def: /* (* ISO 6.9.1 *) */ + function_def_start block + { let (loc, specs, decl) = $1 in + currentFunctionName := "<__FUNCTION__ used outside any functions>"; + !Lexerhack.pop_context (); (* The context pushed by + * announceFunctionName *) + doFunctionDef ((*handleLoc*) loc) (trd3 $2) specs decl (fst3 $2) + } + + +function_def_start: /* (* ISO 6.9.1 *) */ + decl_spec_list declarator + { announceFunctionName $2; + (snd $1, fst $1, $2) + } + +/* (* Old-style function prototype *) */ +| decl_spec_list old_proto_decl + { announceFunctionName $2; + (snd $1, fst $1, $2) + } +/* (* New-style function that does not have a return type *) */ +| IDENT parameter_list_startscope rest_par_list RPAREN + { let (params, isva) = $3 in + let fdec = + (fst $1, PROTO(JUSTBASE, params, isva), [], snd $1) in + announceFunctionName fdec; + (* Default is int type *) + let defSpec = [SpecType Tint] in + (snd $1, defSpec, fdec) + } + +/* (* No return type and old-style parameter list *) */ +| IDENT LPAREN old_parameter_list_ne RPAREN old_pardef_list + { (* Convert pardecl to new style *) + let pardecl, isva = doOldParDecl $3 $5 in + (* Make the function declarator *) + let fdec = (fst $1, + PROTO(JUSTBASE, pardecl,isva), + [], snd $1) in + announceFunctionName fdec; + (* Default is int type *) + let defSpec = [SpecType Tint] in + (snd $1, defSpec, fdec) + } +/* (* No return type and no parameters *) */ +| IDENT LPAREN RPAREN + { (* Make the function declarator *) + let fdec = (fst $1, + PROTO(JUSTBASE, [], false), + [], snd $1) in + announceFunctionName fdec; + (* Default is int type *) + let defSpec = [SpecType Tint] in + (snd $1, defSpec, fdec) + } +; + +/* const/volatile as type specifier elements */ +cvspec: + CONST { SpecCV(CV_CONST), $1 } +| VOLATILE { SpecCV(CV_VOLATILE), $1 } +| RESTRICT { SpecCV(CV_RESTRICT), $1 } +; + +/*** GCC attributes ***/ +attributes: + /* empty */ { []} +| attribute attributes { fst $1 :: $2 } +; + +/* (* In some contexts we can have an inline assembly to specify the name to + * be used for a global. We treat this as a name attribute *) */ +attributes_with_asm: + /* empty */ { [] } +| attribute attributes_with_asm { fst $1 :: $2 } +| ASM LPAREN string_constant RPAREN attributes + { ("__asm__", + [CONSTANT(CONST_STRING (fst $3))]) :: $5 } +; + +/* things like __attribute__, but no const/volatile */ +attribute_nocv: + ATTRIBUTE LPAREN paren_attr_list_ne RPAREN + { ("__attribute__", $3), $1 } +/*(* +| ATTRIBUTE_USED { ("__attribute__", + [ VARIABLE "used" ]), $1 } +*)*/ +| DECLSPEC paren_attr_list_ne { ("__declspec", $2), $1 } +| MSATTR { (fst $1, []), snd $1 } + /* ISO 6.7.3 */ +| THREAD { ("__thread",[]), $1 } +; + +/* __attribute__ plus const/volatile */ +attribute: + attribute_nocv { $1 } +| CONST { ("const", []), $1 } +| RESTRICT { ("restrict",[]), $1 } +| VOLATILE { ("volatile",[]), $1 } +; + +/* (* sm: I need something that just includes __attribute__ and nothing more, + * to support them appearing between the 'struct' keyword and the type name. + * Actually, a declspec can appear there as well (on MSVC) *) */ +just_attribute: + ATTRIBUTE LPAREN paren_attr_list_ne RPAREN + { ("__attribute__", $3) } +| DECLSPEC paren_attr_list_ne { ("__declspec", $2) } +; + +/* this can't be empty, b/c I folded that possibility into the calling + * productions to avoid some S/R conflicts */ +just_attributes: + just_attribute { [$1] } +| just_attribute just_attributes { $1 :: $2 } +; + +/** (* PRAGMAS and ATTRIBUTES *) ***/ +pragma: +| PRAGMA attr PRAGMA_EOL { PRAGMA ($2, $1) } +| PRAGMA attr SEMICOLON PRAGMA_EOL { PRAGMA ($2, $1) } +| PRAGMA_LINE { PRAGMA (VARIABLE (fst $1), + snd $1) } +; + +/* (* We want to allow certain strange things that occur in pragmas, so we + * cannot use directly the language of expressions *) */ +primary_attr: + IDENT { VARIABLE (fst $1) } + /*(* The NAMED_TYPE here creates conflicts with IDENT *)*/ +| NAMED_TYPE { VARIABLE (fst $1) } +| LPAREN attr RPAREN { $2 } +| IDENT IDENT { CALL(VARIABLE (fst $1), [VARIABLE (fst $2)]) } +| CST_INT { CONSTANT(CONST_INT (fst $1)) } +| string_constant { CONSTANT(CONST_STRING (fst $1)) } + /*(* Const when it appears in + * attribute lists, is translated + * to aconst *)*/ +| CONST { VARIABLE "aconst" } +| IDENT COLON CST_INT { VARIABLE (fst $1 ^ ":" ^ fst $3) } + +| CST_INT COLON CST_INT { VARIABLE (fst $1 ^ ":" ^ fst $3) } +| DEFAULT COLON CST_INT { VARIABLE ("default:" ^ fst $3) } + + /*(** GCC allows this as an + * attribute for functions, + * synonim for noreturn **)*/ +| VOLATILE { VARIABLE ("__noreturn__") } +; + +postfix_attr: + primary_attr { $1 } + /* (* use a VARIABLE "" so that the + * parentheses are printed *) */ +| IDENT LPAREN RPAREN { CALL(VARIABLE (fst $1), [VARIABLE ""]) } +| IDENT paren_attr_list_ne { CALL(VARIABLE (fst $1), $2) } + +| postfix_attr ARROW id_or_typename {MEMBEROFPTR ($1, $3)} +| postfix_attr DOT id_or_typename {MEMBEROF ($1, $3)} +; + +/*(* Since in attributes we use both IDENT and NAMED_TYPE as indentifiers, + * that leads to conflicts for SIZEOF and ALIGNOF. In those cases we require + * that their arguments be expressions, not attributes *)*/ +unary_attr: + postfix_attr { $1 } +| SIZEOF unary_expression {EXPR_SIZEOF (fst $2) } +| SIZEOF LPAREN type_name RPAREN + {let b, d = $3 in TYPE_SIZEOF (b, d)} + +| ALIGNOF unary_expression {EXPR_ALIGNOF (fst $2) } +| ALIGNOF LPAREN type_name RPAREN {let b, d = $3 in TYPE_ALIGNOF (b, d)} +| PLUS cast_attr {UNARY (PLUS, $2)} +| MINUS cast_attr {UNARY (MINUS, $2)} +| STAR cast_attr {UNARY (MEMOF, $2)} +| AND cast_attr + {UNARY (ADDROF, $2)} +| EXCLAM cast_attr {UNARY (NOT, $2)} +| TILDE cast_attr {UNARY (BNOT, $2)} +; + +cast_attr: + unary_attr { $1 } +; + +multiplicative_attr: + cast_attr { $1 } +| multiplicative_attr STAR cast_attr {BINARY(MUL ,$1 , $3)} +| multiplicative_attr SLASH cast_attr {BINARY(DIV ,$1 , $3)} +| multiplicative_attr PERCENT cast_attr {BINARY(MOD ,$1 , $3)} +; + + +additive_attr: + multiplicative_attr { $1 } +| additive_attr PLUS multiplicative_attr {BINARY(ADD ,$1 , $3)} +| additive_attr MINUS multiplicative_attr {BINARY(SUB ,$1 , $3)} +; + +shift_attr: + additive_attr { $1 } +| shift_attr INF_INF additive_attr {BINARY(SHL ,$1 , $3)} +| shift_attr SUP_SUP additive_attr {BINARY(SHR ,$1 , $3)} +; + +relational_attr: + shift_attr { $1 } +| relational_attr INF shift_attr {BINARY(LT ,$1 , $3)} +| relational_attr SUP shift_attr {BINARY(GT ,$1 , $3)} +| relational_attr INF_EQ shift_attr {BINARY(LE ,$1 , $3)} +| relational_attr SUP_EQ shift_attr {BINARY(GE ,$1 , $3)} +; + +equality_attr: + relational_attr { $1 } +| equality_attr EQ_EQ relational_attr {BINARY(EQ ,$1 , $3)} +| equality_attr EXCLAM_EQ relational_attr {BINARY(NE ,$1 , $3)} +; + + +bitwise_and_attr: + equality_attr { $1 } +| bitwise_and_attr AND equality_attr {BINARY(BAND ,$1 , $3)} +; + +bitwise_xor_attr: + bitwise_and_attr { $1 } +| bitwise_xor_attr CIRC bitwise_and_attr {BINARY(XOR ,$1 , $3)} +; + +bitwise_or_attr: + bitwise_xor_attr { $1 } +| bitwise_or_attr PIPE bitwise_xor_attr {BINARY(BOR ,$1 , $3)} +; + +logical_and_attr: + bitwise_or_attr { $1 } +| logical_and_attr AND_AND bitwise_or_attr {BINARY(AND ,$1 , $3)} +; + +logical_or_attr: + logical_and_attr { $1 } +| logical_or_attr PIPE_PIPE logical_and_attr {BINARY(OR ,$1 , $3)} +; + + +attr: logical_or_attr { $1 } +; + +attr_list_ne: +| attr { [$1] } +| attr COMMA attr_list_ne { $1 :: $3 } +| error COMMA attr_list_ne { $3 } +; +paren_attr_list_ne: + LPAREN attr_list_ne RPAREN { $2 } +| LPAREN error RPAREN { [] } +; +/*** GCC ASM instructions ***/ +asmattr: + /* empty */ { [] } +| VOLATILE asmattr { ("volatile", []) :: $2 } +| CONST asmattr { ("const", []) :: $2 } +; +asmtemplate: + one_string_constant { [$1] } +| one_string_constant asmtemplate { $1 :: $2 } +; +asmoutputs: + /* empty */ { None } +| COLON asmoperands asminputs + { let (ins, clobs) = $3 in + Some {aoutputs = $2; ainputs = ins; aclobbers = clobs} } +; +asmoperands: + /* empty */ { [] } +| asmoperandsne { List.rev $1 } +; +asmoperandsne: + asmoperand { [$1] } +| asmoperandsne COMMA asmoperand { $3 :: $1 } +; +asmoperand: + string_constant LPAREN expression RPAREN { (fst $1, fst $3) } +| string_constant LPAREN error RPAREN { (fst $1, NOTHING ) } +; +asminputs: + /* empty */ { ([], []) } +| COLON asmoperands asmclobber + { ($2, $3) } +; +asmclobber: + /* empty */ { [] } +| COLON asmcloberlst_ne { $2 } +; +asmcloberlst_ne: + one_string_constant { [$1] } +| one_string_constant COMMA asmcloberlst_ne { $1 :: $3 } +; + +%% + + + diff --git a/cil/src/frontc/cprint.ml b/cil/src/frontc/cprint.ml new file mode 100644 index 00000000..570945c0 --- /dev/null +++ b/cil/src/frontc/cprint.ml @@ -0,0 +1,1014 @@ +(* + * + * Copyright (c) 2001-2003, + * George C. Necula + * Scott McPeak + * Wes Weimer + * Ben Liblit + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +(* cprint -- pretty printer of C program from abstract syntax +** +** Project: FrontC +** File: cprint.ml +** Version: 2.1e +** Date: 9.1.99 +** Author: Hugues Cassé +** +** 1.0 2.22.99 Hugues Cassé First version. +** 2.0 3.18.99 Hugues Cassé Compatible with Frontc 2.1, use of CAML +** pretty printer. +** 2.1 3.22.99 Hugues Cassé More efficient custom pretty printer used. +** 2.1a 4.12.99 Hugues Cassé Correctly handle: +** char *m, *m, *p; m + (n - p) +** 2.1b 4.15.99 Hugues Cassé x + (y + z) stays x + (y + z) for +** keeping computation order. +** 2.1c 7.23.99 Hugues Cassé Improvement of case and default display. +** 2.1d 8.25.99 Hugues Cassé Rebuild escape sequences in string and +** characters. +** 2.1e 9.1.99 Hugues Cassé Fix, recognize and correctly display '\0'. +*) + +(* George Necula: I changed this pretty dramatically since CABS changed *) +open Cabs +open Escape +let version = "Cprint 2.1e 9.1.99 Hugues Cassé" + +type loc = { line : int; file : string } + +let lu = {line = -1; file = "loc unknown";} +let cabslu = {lineno = -10; + filename = "cabs loc unknown"; + byteno = -10;} + +let curLoc = ref cabslu + +let msvcMode = ref false + +let printLn = ref true +let printLnComment = ref false + +let printCounters = ref false +let printComments = ref false + +(* +** FrontC Pretty printer +*) +let out = ref stdout +let width = ref 80 +let tab = ref 2 +let max_indent = ref 60 + +let line = ref "" +let line_len = ref 0 +let current = ref "" +let current_len = ref 0 +let spaces = ref 0 +let follow = ref 0 +let roll = ref 0 + +let print_tab size = + for i = 1 to size / 8 do + output_char !out '\t' + done; + for i = 1 to size mod 8 do + output_char !out ' ' + done + +let flush _ = + if !line <> "" then begin + print_tab (!spaces + !follow); + output_string !out !line; + line := ""; + line_len := 0 + end + +let commit _ = + if !current <> "" then begin + if !line = "" then begin + line := !current; + line_len := !current_len + end else begin + line := (!line ^ " " ^ !current); + line_len := !line_len + 1 + !current_len + end; + current := ""; + current_len := 0 + end + + +let addline () = + curLoc := {lineno = !curLoc.lineno+1; + filename = !curLoc.filename; + byteno = -1;} (*sfg: can we do better than this?*) + + +let new_line _ = + commit (); + if !line <> "" then begin + flush (); + addline(); + output_char !out '\n' + end; + follow := 0 + +let force_new_line _ = + commit (); + flush (); + addline(); + output_char !out '\n'; + follow := 0 + +let indent _ = + new_line (); + spaces := !spaces + !tab; + if !spaces >= !max_indent then begin + spaces := !tab; + roll := !roll + 1 + end + +let indentline _ = + new_line (); + if !spaces >= !max_indent then begin + spaces := !tab; + roll := !roll + 1 + end + +let unindent _ = + new_line (); + spaces := !spaces - !tab; + if (!spaces <= 0) && (!roll > 0) then begin + spaces := ((!max_indent - 1) / !tab) * !tab; + roll := !roll - 1 + end + +let space _ = commit () + +let print str = + current := !current ^ str; + current_len := !current_len + (String.length str); + if (!spaces + !follow + !line_len + 1 + !current_len) > !width + then begin + if !line_len = 0 then commit (); + flush (); + addline(); + output_char !out '\n'; + if !follow = 0 then follow := !tab + end + +(* sm: for some reason I couldn't just call print from frontc.... ? *) +let print_unescaped_string str = print str + +let setLoc (l : cabsloc) = + if !printLn then + if (l.lineno <> !curLoc.lineno) || l.filename <> !curLoc.filename then + begin + let oldspaces = !spaces in + (* sm: below, we had '//#' instead of '#', which means printLnComment was disregarded *) + if !printLnComment then print "//" else print "#"; + if !msvcMode then print "line"; + print " "; + print (string_of_int l.lineno); + if (l.filename <> !curLoc.filename) then begin + print (" \"" ^ l.filename ^ "\"") + end; + spaces := oldspaces; + new_line(); + curLoc := l + end + + + +(* +** Useful primitives +*) +let print_list print_sep print_elt lst = + let _ = List.fold_left + (fun com elt -> + if com then print_sep (); + print_elt elt; + true) + false + lst in + () + +let print_commas nl fct lst = + print_list (fun () -> print ","; if nl then new_line() else space()) fct lst + +let print_string (s:string) = + print ("\"" ^ escape_string s ^ "\"") + +let print_wstring (s: int64 list ) = + print ("L\"" ^ escape_wstring s ^ "\"") + +(* +** Base Type Printing +*) + +let rec print_specifiers (specs: spec_elem list) = + comprint "specifier("; + let print_spec_elem = function + SpecTypedef -> print "typedef " + | SpecInline -> print "__inline " + | SpecStorage sto -> + print (match sto with + NO_STORAGE -> (comstring "/*no storage*/") + | AUTO -> "auto " + | STATIC -> "static " + | EXTERN -> "extern " + | REGISTER -> "register ") + | SpecCV cv -> + print (match cv with + | CV_CONST -> "const " + | CV_VOLATILE -> "volatile " + | CV_RESTRICT -> "restrict ") + | SpecAttr al -> print_attribute al; space () + | SpecType bt -> print_type_spec bt + | SpecPattern name -> print ("@specifier(" ^ name ^ ") ") + in + List.iter print_spec_elem specs + ;comprint ")" + + +and print_type_spec = function + Tvoid -> print "void " + | Tchar -> print "char " + | Tshort -> print "short " + | Tint -> print "int " + | Tlong -> print "long " + | Tint64 -> print "__int64 " + | Tfloat -> print "float " + | Tdouble -> print "double " + | Tsigned -> print "signed " + | Tunsigned -> print "unsigned " + | Tnamed s -> comprint "tnamed"; print s; space (); + | Tstruct (n, None, _) -> print ("struct " ^ n ^ " ") + | Tstruct (n, Some flds, extraAttrs) -> + (print_struct_name_attr "struct" n extraAttrs); + (print_fields flds) + | Tunion (n, None, _) -> print ("union " ^ n ^ " ") + | Tunion (n, Some flds, extraAttrs) -> + (print_struct_name_attr "union" n extraAttrs); + (print_fields flds) + | Tenum (n, None, _) -> print ("enum " ^ n ^ " ") + | Tenum (n, Some enum_items, extraAttrs) -> + (print_struct_name_attr "enum" n extraAttrs); + (print_enum_items enum_items) + | TtypeofE e -> print "__typeof__("; print_expression e; print ") " + | TtypeofT (s,d) -> print "__typeof__("; print_onlytype (s, d); print ") " + + +(* print "struct foo", but with specified keyword and a list of + * attributes to put between keyword and name *) +and print_struct_name_attr (keyword: string) (name: string) (extraAttrs: attribute list) = +begin + if extraAttrs = [] then + print (keyword ^ " " ^ name) + else begin + (print (keyword ^ " ")); + (print_attributes extraAttrs); (* prints a final space *) + (print name); + end +end + + +(* This is the main printer for declarations. It is easy bacause the + * declarations are laid out as they need to be printed. *) +and print_decl (n: string) = function + JUSTBASE -> if n <> "___missing_field_name" then + print n + else + comprint "missing field name" + | PARENTYPE (al1, d, al2) -> + print "("; + print_attributes al1; space (); + print_decl n d; space (); + print_attributes al2; print ")" + | PTR (al, d) -> + print "* "; + print_attributes al; space (); + print_decl n d + | ARRAY (d, al, e) -> + print_decl n d; + print "["; + print_attributes al; + if e <> NOTHING then print_expression e; + print "]" + | PROTO(d, args, isva) -> + comprint "proto("; + print_decl n d; + print "("; + print_params args isva; + print ")"; + comprint ")" + + +and print_fields (flds : field_group list) = + if flds = [] then print " { } " + else begin + print " {"; + indent (); + List.iter + (fun fld -> print_field_group fld; print ";"; new_line ()) + flds; + unindent (); + print "} " + end + +and print_enum_items items = + if items = [] then print " { } " + else begin + print " {"; + indent (); + print_commas + true + (fun (id, exp, loc) -> print id; + if exp = NOTHING then () + else begin + space (); + print "= "; + print_expression exp + end) + items; + unindent (); + print "} "; + end + + +and print_onlytype (specs, dt) = + print_specifiers specs; + print_decl "" dt + +and print_name ((n, decl, attrs, _) : name) = + print_decl n decl; + space (); + print_attributes attrs + +and print_init_name ((n, i) : init_name) = + print_name n; + if i <> NO_INIT then begin + space (); + print "= "; + print_init_expression i + end + +and print_name_group (specs, names) = + print_specifiers specs; + print_commas false print_name names + +and print_field_group (specs, fields) = + print_specifiers specs; + print_commas false print_field fields + + +and print_field (name, widtho) = + print_name name; + (match widtho with + None -> () + | Some w -> print " : "; print_expression w) + +and print_init_name_group (specs, names) = + print_specifiers specs; + print_commas false print_init_name names + +and print_single_name (specs, name) = + print_specifiers specs; + print_name name + +and print_params (pars : single_name list) (ell : bool) = + print_commas false print_single_name pars; + if ell then print (if pars = [] then "..." else ", ...") else () + +and print_old_params pars ell = + print_commas false (fun id -> print id) pars; + if ell then print (if pars = [] then "..." else ", ...") else () + + +(* +** Expression printing +** Priorities +** 16 variables +** 15 . -> [] call() +** 14 ++, -- (post) +** 13 ++ -- (pre) ~ ! - + & *(cast) +** 12 * / % +** 11 + - +** 10 << >> +** 9 < <= > >= +** 8 == != +** 7 & +** 6 ^ +** 5 | +** 4 && +** 3 || +** 2 ? : +** 1 = ?= +** 0 , +*) +and get_operator exp = + match exp with + NOTHING -> ("", 16) + | UNARY (op, _) -> + (match op with + MINUS -> ("-", 13) + | PLUS -> ("+", 13) + | NOT -> ("!", 13) + | BNOT -> ("~", 13) + | MEMOF -> ("*", 13) + | ADDROF -> ("&", 13) + | PREINCR -> ("++", 13) + | PREDECR -> ("--", 13) + | POSINCR -> ("++", 14) + | POSDECR -> ("--", 14)) + | LABELADDR s -> ("", 16) (* Like a constant *) + | BINARY (op, _, _) -> + (match op with + MUL -> ("*", 12) + | DIV -> ("/", 12) + | MOD -> ("%", 12) + | ADD -> ("+", 11) + | SUB -> ("-", 11) + | SHL -> ("<<", 10) + | SHR -> (">>", 10) + | LT -> ("<", 9) + | LE -> ("<=", 9) + | GT -> (">", 9) + | GE -> (">=", 9) + | EQ -> ("==", 8) + | NE -> ("!=", 8) + | BAND -> ("&", 7) + | XOR -> ("^", 6) + | BOR -> ("|", 5) + | AND -> ("&&", 4) + | OR -> ("||", 3) + | ASSIGN -> ("=", 1) + | ADD_ASSIGN -> ("+=", 1) + | SUB_ASSIGN -> ("-=", 1) + | MUL_ASSIGN -> ("*=", 1) + | DIV_ASSIGN -> ("/=", 1) + | MOD_ASSIGN -> ("%=", 1) + | BAND_ASSIGN -> ("&=", 1) + | BOR_ASSIGN -> ("|=", 1) + | XOR_ASSIGN -> ("^=", 1) + | SHL_ASSIGN -> ("<<=", 1) + | SHR_ASSIGN -> (">>=", 1)) + | QUESTION _ -> ("", 2) + | CAST _ -> ("", 13) + | CALL _ -> ("", 15) + | COMMA _ -> ("", 0) + | CONSTANT _ -> ("", 16) + | VARIABLE name -> ("", 16) + | EXPR_SIZEOF exp -> ("", 16) + | TYPE_SIZEOF _ -> ("", 16) + | EXPR_ALIGNOF exp -> ("", 16) + | TYPE_ALIGNOF _ -> ("", 16) + | INDEX (exp, idx) -> ("", 15) + | MEMBEROF (exp, fld) -> ("", 15) + | MEMBEROFPTR (exp, fld) -> ("", 15) + | GNU_BODY _ -> ("", 17) + | EXPR_PATTERN _ -> ("", 16) (* sm: not sure about this *) + +and print_comma_exps exps = + print_commas false print_expression exps + +and print_init_expression (iexp: init_expression) : unit = + match iexp with + NO_INIT -> () + | SINGLE_INIT e -> print_expression e + | COMPOUND_INIT initexps -> + let doinitexp = function + NEXT_INIT, e -> print_init_expression e + | i, e -> + let rec doinit = function + NEXT_INIT -> () + | INFIELD_INIT (fn, i) -> print ("." ^ fn); doinit i + | ATINDEX_INIT (e, i) -> + print "["; + print_expression e; + print "]"; + doinit i + | ATINDEXRANGE_INIT (s, e) -> + print "["; + print_expression s; + print " ... "; + print_expression e; + print "]" + in + doinit i; print " = "; + print_init_expression e + in + print "{"; + print_commas false doinitexp initexps; + print "}" + +and print_expression (exp: expression) = print_expression_level 1 exp + +and print_expression_level (lvl: int) (exp : expression) = + let (txt, lvl') = get_operator exp in + let _ = if lvl > lvl' then print "(" else () in + let _ = match exp with + NOTHING -> () + | UNARY (op, exp') -> + (match op with + POSINCR | POSDECR -> + print_expression_level lvl' exp'; + print txt + | _ -> + print txt; space (); (* Print the space to avoid --5 *) + print_expression_level lvl' exp') + | LABELADDR l -> print ("&& " ^ l) + | BINARY (op, exp1, exp2) -> + (*if (op = SUB) && (lvl <= lvl') then print "(";*) + print_expression_level lvl' exp1; + space (); + print txt; + space (); + (*print_expression exp2 (if op = SUB then (lvl' + 1) else lvl');*) + print_expression_level (lvl' + 1) exp2 + (*if (op = SUB) && (lvl <= lvl') then print ")"*) + | QUESTION (exp1, exp2, exp3) -> + print_expression_level 2 exp1; + space (); + print "? "; + print_expression_level 2 exp2; + space (); + print ": "; + print_expression_level 2 exp3; + | CAST (typ, iexp) -> + print "("; + print_onlytype typ; + print ")"; + (* Always print parentheses. In a small number of cases when we print + * constants we don't need them *) + (match iexp with + SINGLE_INIT e -> print_expression_level 15 e + | COMPOUND_INIT _ -> (* print "("; *) + print_init_expression iexp + (* ; print ")" *) + | NO_INIT -> print "") + + | CALL (VARIABLE "__builtin_va_arg", [arg; TYPE_SIZEOF (bt, dt)]) -> + comprint "variable"; + print "__builtin_va_arg"; + print "("; + print_expression_level 1 arg; + print ","; + print_onlytype (bt, dt); + print ")" + | CALL (exp, args) -> + print_expression_level 16 exp; + print "("; + print_comma_exps args; + print ")" + | COMMA exps -> + print_comma_exps exps + | CONSTANT cst -> + (match cst with + CONST_INT i -> print i + | CONST_FLOAT r -> print r + | CONST_CHAR c -> print ("'" ^ escape_wstring c ^ "'") + | CONST_WCHAR c -> print ("L'" ^ escape_wstring c ^ "'") + | CONST_STRING s -> print_string s + | CONST_WSTRING ws -> print_wstring ws) + | VARIABLE name -> + comprint "variable"; + print name + | EXPR_SIZEOF exp -> + print "sizeof("; + print_expression_level 0 exp; + print ")" + | TYPE_SIZEOF (bt,dt) -> + print "sizeof("; + print_onlytype (bt, dt); + print ")" + | EXPR_ALIGNOF exp -> + print "__alignof__("; + print_expression_level 0 exp; + print ")" + | TYPE_ALIGNOF (bt,dt) -> + print "__alignof__("; + print_onlytype (bt, dt); + print ")" + | INDEX (exp, idx) -> + print_expression_level 16 exp; + print "["; + print_expression_level 0 idx; + print "]" + | MEMBEROF (exp, fld) -> + print_expression_level 16 exp; + print ("." ^ fld) + | MEMBEROFPTR (exp, fld) -> + print_expression_level 16 exp; + print ("->" ^ fld) + | GNU_BODY (blk) -> + print "("; + print_block blk; + print ")" + | EXPR_PATTERN (name) -> + print ("@expr(" ^ name ^ ") ") + in + if lvl > lvl' then print ")" else () + + +(* +** Statement printing +*) +and print_statement stat = + match stat with + NOP (loc) -> + setLoc(loc); + print ";"; + new_line () + | COMPUTATION (exp, loc) -> + setLoc(loc); + print_expression exp; + print ";"; + new_line () + | BLOCK (blk, loc) -> print_block blk + + | SEQUENCE (s1, s2, loc) -> + setLoc(loc); + print_statement s1; + print_statement s2; + | IF (exp, s1, s2, loc) -> + setLoc(loc); + print "if("; + print_expression_level 0 exp; + print ")"; + print_substatement s1; + (match s2 with + | NOP(_) -> () + | _ -> begin + print "else"; + print_substatement s2; + end) + | WHILE (exp, stat, loc) -> + setLoc(loc); + print "while("; + print_expression_level 0 exp; + print ")"; + print_substatement stat + | DOWHILE (exp, stat, loc) -> + setLoc(loc); + print "do"; + print_substatement stat; + print "while("; + print_expression_level 0 exp; + print ");"; + new_line (); + | FOR (fc1, exp2, exp3, stat, loc) -> + setLoc(loc); + print "for("; + (match fc1 with + FC_EXP exp1 -> print_expression_level 0 exp1; print ";" + | FC_DECL dec1 -> print_def dec1); + space (); + print_expression_level 0 exp2; + print ";"; + space (); + print_expression_level 0 exp3; + print ")"; + print_substatement stat + | BREAK (loc)-> + setLoc(loc); + print "break;"; new_line () + | CONTINUE (loc) -> + setLoc(loc); + print "continue;"; new_line () + | RETURN (exp, loc) -> + setLoc(loc); + print "return"; + if exp = NOTHING + then () + else begin + print " "; + print_expression_level 1 exp + end; + print ";"; + new_line () + | SWITCH (exp, stat, loc) -> + setLoc(loc); + print "switch("; + print_expression_level 0 exp; + print ")"; + print_substatement stat + | CASE (exp, stat, loc) -> + setLoc(loc); + unindent (); + print "case "; + print_expression_level 1 exp; + print ":"; + indent (); + print_substatement stat + | CASERANGE (expl, exph, stat, loc) -> + setLoc(loc); + unindent (); + print "case "; + print_expression expl; + print " ... "; + print_expression exph; + print ":"; + indent (); + print_substatement stat + | DEFAULT (stat, loc) -> + setLoc(loc); + unindent (); + print "default :"; + indent (); + print_substatement stat + | LABEL (name, stat, loc) -> + setLoc(loc); + print (name ^ ":"); + space (); + print_substatement stat + | GOTO (name, loc) -> + setLoc(loc); + print ("goto " ^ name ^ ";"); + new_line () + | COMPGOTO (exp, loc) -> + setLoc(loc); + print ("goto *"); print_expression exp; print ";"; new_line () + | DEFINITION d -> + print_def d + | ASM (attrs, tlist, details, loc) -> + setLoc(loc); + let print_asm_operand (cnstr, e) = + print_string cnstr; space (); print_expression_level 100 e + in + if !msvcMode then begin + print "__asm {"; + print_list (fun () -> new_line()) print tlist; (* templates *) + print "};" + end else begin + print "__asm__ "; + print_attributes attrs; + print "("; + print_list (fun () -> new_line()) print_string tlist; (* templates *) + begin + match details with + | None -> () + | Some { aoutputs = outs; ainputs = ins; aclobbers = clobs } -> + print ":"; space (); + print_commas false print_asm_operand outs; + if ins <> [] || clobs <> [] then begin + print ":"; space (); + print_commas false print_asm_operand ins; + if clobs <> [] then begin + print ":"; space (); + print_commas false print_string clobs + end; + end + end; + print ");" + end; + new_line () + | TRY_FINALLY (b, h, loc) -> + setLoc loc; + print "__try "; + print_block b; + print "__finally "; + print_block h + + | TRY_EXCEPT (b, e, h, loc) -> + setLoc loc; + print "__try "; + print_block b; + print "__except("; print_expression e; print ")"; + print_block h + +and print_block blk = + new_line(); + print "{"; + indent (); + if blk.blabels <> [] then begin + print "__label__ "; + print_commas false print blk.blabels; + print ";"; + new_line (); + end; + if blk.battrs <> [] then begin + List.iter print_attribute blk.battrs; + new_line (); + end; + List.iter print_statement blk.bstmts; + unindent (); + print "}"; + new_line () + +and print_substatement stat = + match stat with + IF _ + | SEQUENCE _ + | DOWHILE _ -> + new_line (); + print "{"; + indent (); + print_statement stat; + unindent (); + print "}"; + new_line (); + | BLOCK _ -> + print_statement stat + | _ -> + indent (); + print_statement stat; + unindent () + + +(* +** GCC Attributes +*) +and print_attribute (name,args) = + if args = [] then print ( + match name with + "restrict" -> "__restrict" + (* weimer: Fri Dec 7 17:12:35 2001 + * must not print 'restrict' and the code below does allows some + * plain 'restrict's to slip though! *) + | x -> x) + else begin + print name; + print "("; if name = "__attribute__" then print "("; + (match args with + [VARIABLE "aconst"] -> print "const" + | [VARIABLE "restrict"] -> print "__restrict" + | _ -> print_commas false (fun e -> print_expression e) args); + print ")"; if name = "__attribute__" then print ")" + end + +(* Print attributes. *) +and print_attributes attrs = + List.iter (fun a -> print_attribute a; space ()) attrs + +(* +** Declaration printing +*) +and print_defs defs = + let prev = ref false in + List.iter + (fun def -> + (match def with + DECDEF _ -> prev := false + | _ -> + if not !prev then force_new_line (); + prev := true); + print_def def) + defs + +and print_def def = + match def with + FUNDEF (proto, body, loc, _) -> + comprint "fundef"; + if !printCounters then begin + try + let fname = + match proto with + (_, (n, _, _, _)) -> n + in + print_def (DECDEF (([SpecType Tint], + [(fname ^ "__counter", JUSTBASE, [], cabslu), + NO_INIT]), loc)); + with Not_found -> print "/* can't print the counter */" + end; + setLoc(loc); + print_single_name proto; + print_block body; + force_new_line (); + + | DECDEF (names, loc) -> + comprint "decdef"; + setLoc(loc); + print_init_name_group names; + print ";"; + new_line () + + | TYPEDEF (names, loc) -> + comprint "typedef"; + setLoc(loc); + print_name_group names; + print ";"; + new_line (); + force_new_line () + + | ONLYTYPEDEF (specs, loc) -> + comprint "onlytypedef"; + setLoc(loc); + print_specifiers specs; + print ";"; + new_line (); + force_new_line () + + | GLOBASM (asm, loc) -> + setLoc(loc); + print "__asm__ ("; print_string asm; print ");"; + new_line (); + force_new_line () + + | PRAGMA (a,loc) -> + setLoc(loc); + force_new_line (); + print "#pragma "; + let oldwidth = !width in + width := 1000000; (* Do not wrap pragmas *) + print_expression a; + width := oldwidth; + force_new_line () + + | LINKAGE (n, loc, dl) -> + setLoc (loc); + force_new_line (); + print "extern "; print_string n; print_string " {"; + List.iter print_def dl; + print_string "}"; + force_new_line () + + | TRANSFORMER(srcdef, destdeflist, loc) -> + setLoc(loc); + print "@transform {"; + force_new_line(); + print "{"; + force_new_line(); + indent (); + print_def srcdef; + unindent(); + print "}"; + force_new_line(); + print "to {"; + force_new_line(); + indent(); + List.iter print_def destdeflist; + unindent(); + print "}"; + force_new_line() + + | EXPRTRANSFORMER(srcexpr, destexpr, loc) -> + setLoc(loc); + print "@transformExpr { "; + print_expression srcexpr; + print " } to { "; + print_expression destexpr; + print " }"; + force_new_line() + + +(* sm: print a comment if the printComments flag is set *) +and comprint (str : string) : unit = +begin + if (!printComments) then ( + print "/*"; + print str; + print "*/ " + ) + else + () +end + +(* sm: yield either the given string, or "", depending on printComments *) +and comstring (str : string) : string = +begin + if (!printComments) then + str + else + "" +end + + +(* print abstrac_syntax -> () +** Pretty printing the given abstract syntax program. +*) +let printFile (result : out_channel) ((fname, defs) : file) = + out := result; + print_defs defs; + flush () (* sm: should do this here *) + +let set_tab t = tab := t +let set_width w = width := w + diff --git a/cil/src/frontc/frontc.ml b/cil/src/frontc/frontc.ml new file mode 100644 index 00000000..459ae2c3 --- /dev/null +++ b/cil/src/frontc/frontc.ml @@ -0,0 +1,256 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + + +module E = Errormsg +open Trace +open Pretty + +(* Output management *) +let out : out_channel option ref = ref None +let close_me = ref false + +let close_output _ = + match !out with + None -> () + | Some o -> begin + flush o; + if !close_me then close_out o else (); + close_me := false + end + +let set_output filename = + close_output (); + (try out := Some (open_out filename) + with (Sys_error msg) -> + output_string stderr ("Error while opening output: " ^ msg); exit 1); + close_me := true + + (* Signal that we are in MS VC mode *) +let setMSVCMode () = + Cprint.msvcMode := true + +(* filename for patching *) +let patchFileName : string ref = ref "" (* by default do no patching *) + +(* patching file contents *) +let patchFile : Cabs.file option ref = ref None + +(* whether to print the patched CABS files *) +let printPatchedFiles : bool ref = ref false + +(* whether to print a file of prototypes after parsing *) +let doPrintProtos : bool ref = ref false + +(* this seems like something that should be built-in.. *) +let isNone (o : 'a option) : bool = +begin + match o with + | Some _ -> false + | None -> true +end + +(* +** Argument definition +*) +let args : (string * Arg.spec * string) list = +[ + "--cabsonly", Arg.String set_output, ": CABS output file name"; + "--printComments", Arg.Unit (fun _ -> Cprint.printComments := true), + ": print cabs tree structure in comments in cabs output"; + "--patchFile", Arg.String (fun pf -> patchFileName := pf), + ": name the file containing patching transformations"; + "--printPatched", Arg.Unit (fun _ -> printPatchedFiles := true), + ": print patched CABS files after patching, to *.patched"; + "--printProtos", Arg.Unit (fun _ -> doPrintProtos := true), + ": print prototypes to safec.proto.h after parsing"; +] + +exception ParseError of string +exception CabsOnly + +(* parse, and apply patching *) +let rec parse_to_cabs fname = +begin + (* parse the patch file if it isn't parsed already *) + if ((!patchFileName <> "") && (isNone !patchFile)) then ( + (* parse the patch file *) + patchFile := Some(parse_to_cabs_inner !patchFileName); + if !E.hadErrors then + (failwith "There were parsing errors in the patch file") + ); + + (* now parse the file we came here to parse *) + let cabs = parse_to_cabs_inner fname in + if !E.hadErrors then + E.s (E.error "There were parsing errors in %s\n" fname); + + (* and apply the patch file, return transformed file *) + let patched = match !patchFile with + + | Some(pf) -> ( + (* save old value of out so I can use it for debugging during patching *) + let oldOut = !out in + + (* reset out so we don't try to print the patch file to it *) + out := None; + + (trace "patch" (dprintf "newpatching %s\n" fname)); + let result = (Stats.time "newpatch" (Patch.applyPatch pf) cabs) in + + if (!printPatchedFiles) then begin + let outFname:string = fname ^ ".patched" in + (trace "patch" (dprintf "printing patched version of %s to %s\n" + fname outFname)); + let o = (open_out outFname) in + (Cprint.printFile o result); + (close_out o) + end; + + (* restore out *) + Cprint.flush (); + out := oldOut; + + result + ) + | None -> cabs + in + + (* print it ... *) + (match !out with + Some o -> begin + (trace "sm" (dprintf "writing the cabs output\n")); + output_string o ("/* Generated by Frontc */\n"); + Stats.time "printCABS" (Cprint.printFile o) patched; + close_output (); + raise CabsOnly + end + | None -> ()); + if !E.hadErrors then + raise Parsing.Parse_error; + + (* and return the patched source *) + patched +end + + +(* just parse *) +and parse_to_cabs_inner (fname : string) = + try + if !E.verboseFlag then ignore (E.log "Frontc is parsing %s\n" fname); + flush !E.logChannel; + E.hadErrors := false; + let lexbuf = Clexer.init fname in + let cabs = Stats.time "parse" (Cparser.file Clexer.initial) lexbuf in + Clexer.finish (); + (fname, cabs) + with (Sys_error msg) -> begin + ignore (E.log "Cannot open %s : %s\n" fname msg); + Clexer.finish (); + close_output (); + raise (ParseError("Cannot open " ^ fname ^ ": " ^ msg ^ "\n")) + end + | Parsing.Parse_error -> begin + ignore (E.log "Parsing error\n"); + Clexer.finish (); + close_output (); + raise (ParseError("Parse error")) + end + | e -> begin + ignore (E.log "Caught %s while parsing\n" (Printexc.to_string e)); + Clexer.finish (); + raise e + end + + +(* print to safec.proto.h the prototypes of all functions that are defined *) +let printPrototypes ((fname, file) : Cabs.file) : unit = +begin + (*ignore (E.log "file has %d defns\n" (List.length file));*) + + let chan = open_out "safec.proto.h" in + ignore (fprintf chan "/* generated prototypes file, %d defs */\n" (List.length file)); + Cprint.out := chan; + + let counter : int ref = ref 0 in + + let rec loop (d : Cabs.definition) = begin + match d with + | Cabs.FUNDEF(name, _, loc, _) -> ( + match name with + | (_, (funcname, Cabs.PROTO(_,_,_), _, _)) -> ( + incr counter; + ignore (fprintf chan "\n/* %s from %s:%d */\n" + funcname loc.Cabs.filename loc.Cabs.lineno); + flush chan; + Cprint.print_single_name name; + Cprint.print_unescaped_string ";"; + Cprint.force_new_line (); + Cprint.flush () + ) + | _ -> () + ) + + | _ -> () + end in + (List.iter loop file); + + ignore (fprintf chan "\n/* wrote %d prototypes */\n" !counter); + close_out chan; + ignore (E.log "printed %d prototypes from %d defns to safec.proto.h\n" + !counter (List.length file)) +end + + + +let parse fname = + (trace "sm" (dprintf "parsing %s to Cabs\n" fname)); + let cabs = parse_to_cabs fname in + (* Now (return a function that will) convert to CIL *) + fun _ -> + (trace "sm" (dprintf "converting %s from Cabs to CIL\n" fname)); + let cil = Stats.time "conv" Cabs2cil.convFile cabs in + if !doPrintProtos then (printPrototypes cabs); + cil + + + + + + + + diff --git a/cil/src/frontc/frontc.mli b/cil/src/frontc/frontc.mli new file mode 100644 index 00000000..50ad799c --- /dev/null +++ b/cil/src/frontc/frontc.mli @@ -0,0 +1,55 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + + + (* Signal that we are in MS VC mode *) +val setMSVCMode: unit -> unit + + + (* Parse a file in *) +exception ParseError of string + + (* Raised when the front-end is requested to print the CABS and return *) +exception CabsOnly + + (* additional command line arguments *) +val args: (string * Arg.spec * string) list + + (* the main command to parse a file. Return a thunk that can be used to + * convert the AST to CIL. *) +val parse: string -> (unit -> Cil.file) + diff --git a/cil/src/frontc/lexerhack.ml b/cil/src/frontc/lexerhack.ml new file mode 100755 index 00000000..ecae28ef --- /dev/null +++ b/cil/src/frontc/lexerhack.ml @@ -0,0 +1,22 @@ + +module E = Errormsg + +(* We provide here a pointer to a function. It will be set by the lexer and + * used by the parser. In Ocaml lexers depend on parsers, so we we have put + * such functions in a separate module. *) +let add_identifier: (string -> unit) ref = + ref (fun _ -> E.s (E.bug "You called an uninitialized add_identifier")) + +let add_type: (string -> unit) ref = + ref (fun _ -> E.s (E.bug "You called an uninitialized add_type")) + +let push_context: (unit -> unit) ref = + ref (fun _ -> E.s (E.bug "You called an uninitialized push_context")) + +let pop_context: (unit -> unit) ref = + ref (fun _ -> E.s (E.bug "You called an uninitialized pop_context")) + + +(* Keep here the current pattern for formatparse *) +let currentPattern = ref "" + diff --git a/cil/src/frontc/patch.ml b/cil/src/frontc/patch.ml new file mode 100644 index 00000000..fcb4ba62 --- /dev/null +++ b/cil/src/frontc/patch.ml @@ -0,0 +1,837 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + + +(* patch.ml *) +(* CABS file patching *) + +open Cabs +open Trace +open Pretty +open Cabsvisit + +(* binding of a unification variable to a syntactic construct *) +type binding = + | BSpecifier of string * spec_elem list + | BName of string * string + | BExpr of string * expression + +(* thrown when unification fails *) +exception NoMatch + +(* thrown when an attempt to find the associated binding fails *) +exception BadBind of string + +(* trying to isolate performance problems; will hide all the *) +(* potentially expensive debugging output behind "if verbose .." *) +let verbose : bool = true + + +(* raise NoMatch if x and y are not equal *) +let mustEq (x : 'a) (y : 'a) : unit = +begin + if (x <> y) then ( + if verbose then + (trace "patchDebug" (dprintf "mismatch by structural disequality\n")); + raise NoMatch + ) +end + +(* why isn't this in the core Ocaml library? *) +let identity x = x + + +let isPatternVar (s : string) : bool = +begin + ((String.length s) >= 1) && ((String.get s 0) = '@') +end + +(* 's' is actually "@name(blah)"; extract the 'blah' *) +let extractPatternVar (s : string) : string = + (*(trace "patch" (dprintf "extractPatternVar %s\n" s));*) + (String.sub s 6 ((String.length s) - 7)) + + +(* a few debugging printers.. *) +let printExpr (e : expression) = +begin + if (verbose && traceActive "patchDebug") then ( + Cprint.print_expression e; Cprint.force_new_line (); + Cprint.flush () + ) +end + +let printSpec (spec: spec_elem list) = +begin + if (verbose && traceActive "patchDebug") then ( + Cprint.print_specifiers spec; Cprint.force_new_line (); + Cprint.flush () + ) +end + +let printSpecs (pat : spec_elem list) (tgt : spec_elem list) = +begin + (printSpec pat); + (printSpec tgt) +end + +let printDecl (pat : name) (tgt : name) = +begin + if (verbose && traceActive "patchDebug") then ( + Cprint.print_name pat; Cprint.force_new_line (); + Cprint.print_name tgt; Cprint.force_new_line (); + Cprint.flush () + ) +end + +let printDeclType (pat : decl_type) (tgt : decl_type) = +begin + if (verbose && traceActive "patchDebug") then ( + Cprint.print_decl "__missing_field_name" pat; Cprint.force_new_line (); + Cprint.print_decl "__missing_field_name" tgt; Cprint.force_new_line (); + Cprint.flush () + ) +end + +let printDefn (d : definition) = +begin + if (verbose && traceActive "patchDebug") then ( + Cprint.print_def d; + Cprint.flush () + ) +end + + +(* class to describe how to modify the tree for subtitution *) +class substitutor (bindings : binding list) = object(self) + inherit nopCabsVisitor as super + + (* look in the binding list for a given name *) + method findBinding (name : string) : binding = + begin + try + (List.find + (fun b -> + match b with + | BSpecifier(n, _) -> n=name + | BName(n, _) -> n=name + | BExpr(n, _) -> n=name) + bindings) + with + Not_found -> raise (BadBind ("name not found: " ^ name)) + end + + method vexpr (e:expression) : expression visitAction = + begin + match e with + | EXPR_PATTERN(name) -> ( + match (self#findBinding name) with + | BExpr(_, expr) -> ChangeTo(expr) (* substitute bound expression *) + | _ -> raise (BadBind ("wrong type: " ^ name)) + ) + | _ -> DoChildren + end + + (* use of a name *) + method vvar (s:string) : string = + begin + if (isPatternVar s) then ( + let nameString = (extractPatternVar s) in + match (self#findBinding nameString) with + | BName(_, str) -> str (* substitute *) + | _ -> raise (BadBind ("wrong type: " ^ nameString)) + ) + else + s + end + + (* binding introduction of a name *) + method vname (k: nameKind) (spec: specifier) (n: name) : name visitAction = + begin + match n with (s (*variable name*), dtype, attrs, loc) -> ( + let replacement = (self#vvar s) in (* use replacer from above *) + if (s <> replacement) then + ChangeTo(replacement, dtype, attrs, loc) + else + DoChildren (* no replacement *) + ) + end + + method vspec (specList: specifier) : specifier visitAction = + begin + if verbose then (trace "patchDebug" (dprintf "substitutor: vspec\n")); + (printSpec specList); + + (* are any of the specifiers SpecPatterns? we have to check the entire *) + (* list, not just the head, because e.g. "typedef @specifier(foo)" has *) + (* "typedef" as the head of the specifier list *) + if (List.exists (fun elt -> match elt with + | SpecPattern(_) -> true + | _ -> false) + specList) then begin + (* yes, replace the existing list with one got by *) + (* replacing all occurrences of SpecPatterns *) + (trace "patchDebug" (dprintf "at least one spec pattern\n")); + ChangeTo + (List.flatten + (List.map + (* for each specifier element, yield the specifier list *) + (* to which it maps; then we'll flatten the final result *) + (fun elt -> + match elt with + | SpecPattern(name) -> ( + match (self#findBinding name) with + | BSpecifier(_, replacement) -> ( + (trace "patchDebug" (dprintf "replacing pattern %s\n" name)); + replacement + ) + | _ -> raise (BadBind ("wrong type: " ^ name)) + ) + | _ -> [elt] (* leave this one alone *) + ) + specList + ) + ) + end + else + (* none of the specifiers in specList are patterns *) + DoChildren + end + + method vtypespec (tspec: typeSpecifier) : typeSpecifier visitAction = + begin + match tspec with + | Tnamed(str) when (isPatternVar str) -> + ChangeTo(Tnamed(self#vvar str)) + | Tstruct(str, fields, extraAttrs) when (isPatternVar str) -> ( + (trace "patchDebug" (dprintf "substituting %s\n" str)); + ChangeDoChildrenPost(Tstruct((self#vvar str), fields, extraAttrs), identity) + ) + | Tunion(str, fields, extraAttrs) when (isPatternVar str) -> + (trace "patchDebug" (dprintf "substituting %s\n" str)); + ChangeDoChildrenPost(Tunion((self#vvar str), fields, extraAttrs), identity) + | _ -> DoChildren + end + +end + + +(* why can't I have forward declarations in the language?!! *) +let unifyExprFwd : (expression -> expression -> binding list) ref + = ref (fun e e -> []) + + +(* substitution for expressions *) +let substExpr (bindings : binding list) (expr : expression) : expression = +begin + if verbose then + (trace "patchDebug" (dprintf "substExpr with %d bindings\n" (List.length bindings))); + (printExpr expr); + + (* apply the transformation *) + let result = (visitCabsExpression (new substitutor bindings :> cabsVisitor) expr) in + (printExpr result); + + result +end + +let d_loc (_:unit) (loc: cabsloc) : doc = + text loc.filename ++ chr ':' ++ num loc.lineno + + +(* class to describe how to modify the tree when looking for places *) +(* to apply expression transformers *) +class exprTransformer (srcpattern : expression) (destpattern : expression) + (patchline : int) (srcloc : cabsloc) = object(self) + inherit nopCabsVisitor as super + + method vexpr (e:expression) : expression visitAction = + begin + (* see if the source pattern matches this subexpression *) + try ( + let bindings = (!unifyExprFwd srcpattern e) in + + (* match! *) + (trace "patch" (dprintf "expr match: patch line %d, src %a\n" + patchline d_loc srcloc)); + ChangeTo(substExpr bindings destpattern) + ) + + with NoMatch -> ( + (* doesn't apply *) + DoChildren + ) + end + + (* other constructs left unchanged *) +end + + +let unifyList (pat : 'a list) (tgt : 'a list) + (unifyElement : 'a -> 'a -> binding list) : binding list = +begin + if verbose then + (trace "patchDebug" (dprintf "unifyList (pat len %d, tgt len %d)\n" + (List.length pat) (List.length tgt))); + + (* walk down the lists *) + let rec loop pat tgt : binding list = + match pat, tgt with + | [], [] -> [] + | (pelt :: prest), (telt :: trest) -> + (unifyElement pelt telt) @ + (loop prest trest) + | _,_ -> ( + (* no match *) + if verbose then ( + (trace "patchDebug" (dprintf "mismatching list length\n")); + ); + raise NoMatch + ) + in + (loop pat tgt) +end + + +let gettime () : float = + (Unix.times ()).Unix.tms_utime + +let rec applyPatch (patchFile : file) (srcFile : file) : file = +begin + let patch : definition list = (snd patchFile) in + let srcFname : string = (fst srcFile) in + let src : definition list = (snd srcFile) in + + (trace "patchTime" (dprintf "applyPatch start: %f\n" (gettime ()))); + if (traceActive "patchDebug") then + Cprint.out := stdout (* hack *) + else (); + + (* more hackery *) + unifyExprFwd := unifyExpr; + + (* patch a single source definition, yield transformed *) + let rec patchDefn (patch : definition list) (d : definition) : definition list = + begin + match patch with + | TRANSFORMER(srcpattern, destpattern, loc) :: rest -> ( + if verbose then + (trace "patchDebug" + (dprintf "considering applying defn pattern at line %d to src at %a\n" + loc.lineno d_loc (get_definitionloc d))); + + (* see if the source pattern matches the definition 'd' we have *) + try ( + let bindings = (unifyDefn srcpattern d) in + + (* we have a match! apply the substitutions *) + (trace "patch" (dprintf "defn match: patch line %d, src %a\n" + loc.lineno d_loc (get_definitionloc d))); + + (List.map (fun destElt -> (substDefn bindings destElt)) destpattern) + ) + + with NoMatch -> ( + (* no match, continue down list *) + (*(trace "patch" (dprintf "no match\n"));*) + (patchDefn rest d) + ) + ) + + | EXPRTRANSFORMER(srcpattern, destpattern, loc) :: rest -> ( + if verbose then + (trace "patchDebug" + (dprintf "considering applying expr pattern at line %d to src at %a\n" + loc.lineno d_loc (get_definitionloc d))); + + (* walk around in 'd' looking for expressions to modify *) + let dList = (visitCabsDefinition + ((new exprTransformer srcpattern destpattern + loc.lineno (get_definitionloc d)) + :> cabsVisitor) + d + ) in + + (* recursively invoke myself to try additional patches *) + (* since visitCabsDefinition might return a list, I'll try my *) + (* addtional patches on every yielded definition, then collapse *) + (* all of them into a single list *) + (List.flatten (List.map (fun d -> (patchDefn rest d)) dList)) + ) + + | _ :: rest -> ( + (* not a transformer; just keep going *) + (patchDefn rest d) + ) + | [] -> ( + (* reached the end of the patch file with no match *) + [d] (* have to wrap it in a list ... *) + ) + end in + + (* transform all the definitions *) + let result : definition list = + (List.flatten (List.map (fun d -> (patchDefn patch d)) src)) in + + (*Cprint.print_defs result;*) + + if (traceActive "patchDebug") then ( + (* avoid flush bug? yes *) + Cprint.force_new_line (); + Cprint.flush () + ); + + (trace "patchTime" (dprintf "applyPatch finish: %f\n" (gettime ()))); + (srcFname, result) +end + + +(* given a definition pattern 'pat', and a target concrete defintion 'tgt', *) +(* determine if they can be unified; if so, return the list of bindings of *) +(* unification variables in pat; otherwise raise NoMatch *) +and unifyDefn (pat : definition) (tgt : definition) : binding list = +begin + match pat, tgt with + | DECDEF((pspecifiers, pdeclarators), _), + DECDEF((tspecifiers, tdeclarators), _) -> ( + if verbose then + (trace "patchDebug" (dprintf "unifyDefn of DECDEFs\n")); + (unifySpecifiers pspecifiers tspecifiers) @ + (unifyInitDeclarators pdeclarators tdeclarators) + ) + + | TYPEDEF((pspec, pdecl), _), + TYPEDEF((tspec, tdecl), _) -> ( + if verbose then + (trace "patchDebug" (dprintf "unifyDefn of TYPEDEFs\n")); + (unifySpecifiers pspec tspec) @ + (unifyDeclarators pdecl tdecl) + ) + + | ONLYTYPEDEF(pspec, _), + ONLYTYPEDEF(tspec, _) -> ( + if verbose then + (trace "patchDebug" (dprintf "unifyDefn of ONLYTYPEDEFs\n")); + (unifySpecifiers pspec tspec) + ) + + | _, _ -> ( + if verbose then + (trace "patchDebug" (dprintf "mismatching definitions\n")); + raise NoMatch + ) +end + +and unifySpecifier (pat : spec_elem) (tgt : spec_elem) : binding list = +begin + if verbose then + (trace "patchDebug" (dprintf "unifySpecifier\n")); + (printSpecs [pat] [tgt]); + + if (pat = tgt) then [] else + + match pat, tgt with + | SpecType(tspec1), SpecType(tspec2) -> + (unifyTypeSpecifier tspec1 tspec2) + | SpecPattern(name), _ -> + (* record that future occurrances of @specifier(name) will yield this specifier *) + if verbose then + (trace "patchDebug" (dprintf "found specifier match for %s\n" name)); + [BSpecifier(name, [tgt])] + | _,_ -> ( + (* no match *) + if verbose then ( + (trace "patchDebug" (dprintf "mismatching specifiers\n")); + ); + raise NoMatch + ) +end + +and unifySpecifiers (pat : spec_elem list) (tgt : spec_elem list) : binding list = +begin + if verbose then + (trace "patchDebug" (dprintf "unifySpecifiers\n")); + (printSpecs pat tgt); + + (* canonicalize the specifiers by sorting them *) + let pat' = (List.stable_sort compare pat) in + let tgt' = (List.stable_sort compare tgt) in + + (* if they are equal, they match with no further checking *) + if (pat' = tgt') then [] else + + (* walk down the lists; don't walk the sorted lists because the *) + (* pattern must always be last, if it occurs *) + let rec loop pat tgt : binding list = + match pat, tgt with + | [], [] -> [] + | [SpecPattern(name)], _ -> + (* final SpecPattern matches anything which comes after *) + (* record that future occurrences of @specifier(name) will yield this specifier *) + if verbose then + (trace "patchDebug" (dprintf "found specifier match for %s\n" name)); + [BSpecifier(name, tgt)] + | (pspec :: prest), (tspec :: trest) -> + (unifySpecifier pspec tspec) @ + (loop prest trest) + | _,_ -> ( + (* no match *) + if verbose then ( + (trace "patchDebug" (dprintf "mismatching specifier list length\n")); + ); + raise NoMatch + ) + in + (loop pat tgt) +end + +and unifyTypeSpecifier (pat: typeSpecifier) (tgt: typeSpecifier) : binding list = +begin + if verbose then + (trace "patchDebug" (dprintf "unifyTypeSpecifier\n")); + + if (pat = tgt) then [] else + + match pat, tgt with + | Tnamed(s1), Tnamed(s2) -> (unifyString s1 s2) + | Tstruct(name1, None, _), Tstruct(name2, None, _) -> + (unifyString name1 name2) + | Tstruct(name1, Some(fields1), _), Tstruct(name2, Some(fields2), _) -> + (* ignoring extraAttrs b/c we're just trying to come up with a list + * of substitutions, and there's no unify_attributes function, and + * I don't care at this time about checking that they are equal .. *) + (unifyString name1 name2) @ + (unifyList fields1 fields2 unifyField) + | Tunion(name1, None, _), Tstruct(name2, None, _) -> + (unifyString name1 name2) + | Tunion(name1, Some(fields1), _), Tunion(name2, Some(fields2), _) -> + (unifyString name1 name2) @ + (unifyList fields1 fields2 unifyField) + | Tenum(name1, None, _), Tenum(name2, None, _) -> + (unifyString name1 name2) + | Tenum(name1, Some(items1), _), Tenum(name2, Some(items2), _) -> + (mustEq items1 items2); (* enum items *) + (unifyString name1 name2) + | TtypeofE(exp1), TtypeofE(exp2) -> + (unifyExpr exp1 exp2) + | TtypeofT(spec1, dtype1), TtypeofT(spec2, dtype2) -> + (unifySpecifiers spec1 spec2) @ + (unifyDeclType dtype1 dtype2) + | _ -> ( + if verbose then (trace "patchDebug" (dprintf "mismatching typeSpecifiers\n")); + raise NoMatch + ) +end + +and unifyField (pat : field_group) (tgt : field_group) : binding list = +begin + match pat,tgt with (spec1, list1), (spec2, list2) -> ( + (unifySpecifiers spec1 spec2) @ + (unifyList list1 list2 unifyNameExprOpt) + ) +end + +and unifyNameExprOpt (pat : name * expression option) + (tgt : name * expression option) : binding list = +begin + match pat,tgt with + | (name1, None), (name2, None) -> (unifyName name1 name2) + | (name1, Some(exp1)), (name2, Some(exp2)) -> + (unifyName name1 name2) @ + (unifyExpr exp1 exp2) + | _,_ -> [] +end + +and unifyName (pat : name) (tgt : name) : binding list = +begin + match pat,tgt with (pstr, pdtype, pattrs, ploc), (tstr, tdtype, tattrs, tloc) -> + (mustEq pattrs tattrs); + (unifyString pstr tstr) @ + (unifyDeclType pdtype tdtype) +end + +and unifyInitDeclarators (pat : init_name list) (tgt : init_name list) : binding list = +begin + (* + if verbose then + (trace "patchDebug" (dprintf "unifyInitDeclarators, pat %d, tgt %d\n" + (List.length pat) (List.length tgt))); + *) + + match pat, tgt with + | ((pdecl, piexpr) :: prest), + ((tdecl, tiexpr) :: trest) -> + (unifyDeclarator pdecl tdecl) @ + (unifyInitExpr piexpr tiexpr) @ + (unifyInitDeclarators prest trest) + | [], [] -> [] + | _, _ -> ( + if verbose then + (trace "patchDebug" (dprintf "mismatching init declarators\n")); + raise NoMatch + ) +end + +and unifyDeclarators (pat : name list) (tgt : name list) : binding list = + (unifyList pat tgt unifyDeclarator) + +and unifyDeclarator (pat : name) (tgt : name) : binding list = +begin + if verbose then + (trace "patchDebug" (dprintf "unifyDeclarator\n")); + (printDecl pat tgt); + + match pat, tgt with + | (pname, pdtype, pattr, ploc), + (tname, tdtype, tattr, tloc) -> + (mustEq pattr tattr); + (unifyDeclType pdtype tdtype) @ + (unifyString pname tname) +end + +and unifyDeclType (pat : decl_type) (tgt : decl_type) : binding list = +begin + if verbose then + (trace "patchDebug" (dprintf "unifyDeclType\n")); + (printDeclType pat tgt); + + match pat, tgt with + | JUSTBASE, JUSTBASE -> [] + | PARENTYPE(pattr1, ptype, pattr2), + PARENTYPE(tattr1, ttype, tattr2) -> + (mustEq pattr1 tattr1); + (mustEq pattr2 tattr2); + (unifyDeclType ptype ttype) + | ARRAY(ptype, pattr, psz), + ARRAY(ttype, tattr, tsz) -> + (mustEq pattr tattr); + (unifyDeclType ptype ttype) @ + (unifyExpr psz tsz) + | PTR(pattr, ptype), + PTR(tattr, ttype) -> + (mustEq pattr tattr); + (unifyDeclType ptype ttype) + | PROTO(ptype, pformals, pva), + PROTO(ttype, tformals, tva) -> + (mustEq pva tva); + (unifyDeclType ptype ttype) @ + (unifySingleNames pformals tformals) + | _ -> ( + if verbose then + (trace "patchDebug" (dprintf "mismatching decl_types\n")); + raise NoMatch + ) +end + +and unifySingleNames (pat : single_name list) (tgt : single_name list) : binding list = +begin + if verbose then + (trace "patchDebug" (dprintf "unifySingleNames, pat %d, tgt %d\n" + (List.length pat) (List.length tgt))); + + match pat, tgt with + | [], [] -> [] + | (pspec, pdecl) :: prest, + (tspec, tdecl) :: trest -> + (unifySpecifiers pspec tspec) @ + (unifyDeclarator pdecl tdecl) @ + (unifySingleNames prest trest) + | _, _ -> ( + if verbose then + (trace "patchDebug" (dprintf "mismatching single_name lists\n")); + raise NoMatch + ) +end + +and unifyString (pat : string) (tgt : string) : binding list = +begin + (* equal? match with no further ado *) + if (pat = tgt) then [] else + + (* is the pattern a variable? *) + if (isPatternVar pat) then + (* pat is actually "@name(blah)"; extract the 'blah' *) + let varname = (extractPatternVar pat) in + + (* when substituted, this name becomes 'tgt' *) + if verbose then + (trace "patchDebug" (dprintf "found name match for %s\n" varname)); + [BName(varname, tgt)] + + else ( + if verbose then + (trace "patchDebug" (dprintf "mismatching names: %s and %s\n" pat tgt)); + raise NoMatch + ) +end + +and unifyExpr (pat : expression) (tgt : expression) : binding list = +begin + (* if they're equal, that's good enough *) + if (pat = tgt) then [] else + + (* shorter name *) + let ue = unifyExpr in + + (* because of the equality check above, I can omit some cases *) + match pat, tgt with + | UNARY(pop, pexpr), + UNARY(top, texpr) -> + (mustEq pop top); + (ue pexpr texpr) + | BINARY(pop, pexp1, pexp2), + BINARY(top, texp1, texp2) -> + (mustEq pop top); + (ue pexp1 texp1) @ + (ue pexp2 texp2) + | QUESTION(p1, p2, p3), + QUESTION(t1, t2, t3) -> + (ue p1 t1) @ + (ue p2 t2) @ + (ue p3 t3) + | CAST((pspec, ptype), piexpr), + CAST((tspec, ttype), tiexpr) -> + (mustEq ptype ttype); + (unifySpecifiers pspec tspec) @ + (unifyInitExpr piexpr tiexpr) + | CALL(pfunc, pargs), + CALL(tfunc, targs) -> + (ue pfunc tfunc) @ + (unifyExprs pargs targs) + | COMMA(pexprs), + COMMA(texprs) -> + (unifyExprs pexprs texprs) + | EXPR_SIZEOF(pexpr), + EXPR_SIZEOF(texpr) -> + (ue pexpr texpr) + | TYPE_SIZEOF(pspec, ptype), + TYPE_SIZEOF(tspec, ttype) -> + (mustEq ptype ttype); + (unifySpecifiers pspec tspec) + | EXPR_ALIGNOF(pexpr), + EXPR_ALIGNOF(texpr) -> + (ue pexpr texpr) + | TYPE_ALIGNOF(pspec, ptype), + TYPE_ALIGNOF(tspec, ttype) -> + (mustEq ptype ttype); + (unifySpecifiers pspec tspec) + | INDEX(parr, pindex), + INDEX(tarr, tindex) -> + (ue parr tarr) @ + (ue pindex tindex) + | MEMBEROF(pexpr, pfield), + MEMBEROF(texpr, tfield) -> + (mustEq pfield tfield); + (ue pexpr texpr) + | MEMBEROFPTR(pexpr, pfield), + MEMBEROFPTR(texpr, tfield) -> + (mustEq pfield tfield); + (ue pexpr texpr) + | GNU_BODY(pblock), + GNU_BODY(tblock) -> + (mustEq pblock tblock); + [] + | EXPR_PATTERN(name), _ -> + (* match, and contribute binding *) + if verbose then + (trace "patchDebug" (dprintf "found expr match for %s\n" name)); + [BExpr(name, tgt)] + | a, b -> + if (verbose && traceActive "patchDebug") then ( + (trace "patchDebug" (dprintf "mismatching expression\n")); + (printExpr a); + (printExpr b) + ); + raise NoMatch +end + +and unifyInitExpr (pat : init_expression) (tgt : init_expression) : binding list = +begin + (* + Cprint.print_init_expression pat; Cprint.force_new_line (); + Cprint.print_init_expression tgt; Cprint.force_new_line (); + Cprint.flush (); + *) + + match pat, tgt with + | NO_INIT, NO_INIT -> [] + | SINGLE_INIT(pe), SINGLE_INIT(te) -> + (unifyExpr pe te) + | COMPOUND_INIT(plist), + COMPOUND_INIT(tlist) -> ( + let rec loop plist tlist = + match plist, tlist with + | ((pwhat, piexpr) :: prest), + ((twhat, tiexpr) :: trest) -> + (mustEq pwhat twhat); + (unifyInitExpr piexpr tiexpr) @ + (loop prest trest) + | [], [] -> [] + | _, _ -> ( + if verbose then + (trace "patchDebug" (dprintf "mismatching compound init exprs\n")); + raise NoMatch + ) + in + (loop plist tlist) + ) + | _,_ -> ( + if verbose then + (trace "patchDebug" (dprintf "mismatching init exprs\n")); + raise NoMatch + ) +end + +and unifyExprs (pat : expression list) (tgt : expression list) : binding list = + (unifyList pat tgt unifyExpr) + + +(* given the list of bindings 'b', substitute them into 'd' to yield a new definition *) +and substDefn (bindings : binding list) (defn : definition) : definition = +begin + if verbose then + (trace "patchDebug" (dprintf "substDefn with %d bindings\n" (List.length bindings))); + (printDefn defn); + + (* apply the transformation *) + match (visitCabsDefinition (new substitutor bindings :> cabsVisitor) defn) with + | [d] -> d (* expect a singleton list *) + | _ -> (failwith "didn't get a singleton list where I expected one") +end + + +(* end of file *) diff --git a/cil/src/frontc/patch.mli b/cil/src/frontc/patch.mli new file mode 100644 index 00000000..4f32870e --- /dev/null +++ b/cil/src/frontc/patch.mli @@ -0,0 +1,42 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + + +(* patch.mli *) +(* interface for patch.ml *) + +val applyPatch : Cabs.file -> Cabs.file -> Cabs.file diff --git a/cil/src/libmaincil.ml b/cil/src/libmaincil.ml new file mode 100644 index 00000000..952c0132 --- /dev/null +++ b/cil/src/libmaincil.ml @@ -0,0 +1,108 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* libmaincil *) +(* this is a replacement for maincil.ml, for the case when we're + * creating a C-callable library (libcil.a); all it does is register + * a couple of functions and initialize CIL *) + + +module E = Errormsg + +open Cil + + +(* print a Cil 'file' to stdout *) +let unparseToStdout (cil : file) : unit = +begin + dumpFile defaultCilPrinter stdout cil +end;; + +(* a visitor to unroll all types - may need to do some magic to keep attributes *) +class unrollVisitorClass = object (self) + inherit nopCilVisitor + + (* variable declaration *) + method vvdec (vi : varinfo) : varinfo visitAction = + begin + vi.vtype <- unrollTypeDeep vi.vtype; + (*ignore (E.log "varinfo for %s in file '%s' line %d byte %d\n" vi.vname vi.vdecl.file vi.vdecl.line vi.vdecl.byte);*) + SkipChildren + end + + (* global: need to unroll fields of compinfo *) + method vglob (g : global) : global list visitAction = + begin + match g with + GCompTag(ci, loc) as g -> + let doFieldinfo (fi : fieldinfo) : unit = + fi.ftype <- unrollTypeDeep fi.ftype + in begin + ignore(List.map doFieldinfo ci.cfields); + (*ChangeTo [g]*) + SkipChildren + end + | _ -> DoChildren + end +end;; + + +let unrollVisitor = new unrollVisitorClass;; + +(* open and parse a C file into a Cil 'file', unroll all typedefs *) +let parseOneFile (fname: string) : file = + let ast : file = Frontc.parse fname () in + begin + visitCilFile unrollVisitor ast; + ast + end +;; + +let getDummyTypes () : typ * typ = + ( TPtr(TVoid [], []), TInt(IInt, []) ) +;; + +(* register some functions - these may be called from C code *) +Callback.register "cil_parse" parseOneFile; +Callback.register "cil_unparse" unparseToStdout; +(* Callback.register "unroll_type_deep" unrollTypeDeep; *) +Callback.register "get_dummy_types" getDummyTypes; + +(* initalize CIL *) +initCIL (); + + diff --git a/cil/src/machdep.c b/cil/src/machdep.c new file mode 100644 index 00000000..11348653 --- /dev/null +++ b/cil/src/machdep.c @@ -0,0 +1,220 @@ +/* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "../config.h" + +#include + +#ifdef HAVE_STDLIB_H +#include +#endif + +#ifdef HAVE_WCHAR_H +#include +#endif + +#ifdef _GNUCC +#define LONGLONG long long +#define CONST_STRING_LITERALS "true" +#define VERSION __VERSION__ +#define VERSION_MAJOR __GNUC__ +#define VERSION_MINOR __GNUC_MINOR__ +#endif + +#ifdef _MSVC +#define LONGLONG __int64 +#define CONST_STRING_LITERALS "false" +#define VERSION "0" +#define VERSION_MAJOR 0 +#define VERSION_MINOR 0 +#endif + +/* The type for the machine dependency structure is generated from the + Makefile */ +int main() { + fprintf(stderr, "Generating machine dependency information for CIL\n"); + + printf("(* Generated by code in %s *)\n", __FILE__); + printf("\t version_major = %d;\n", VERSION_MAJOR); + printf("\t version_minor = %d;\n", VERSION_MINOR); + printf("\t version = \"%s\";\n", VERSION); + // Size of certain types + printf("\t sizeof_short = %d;\n", sizeof(short)); + printf("\t sizeof_int = %d;\n", sizeof(int)); + printf("\t sizeof_long = %d;\n", sizeof(long)); + printf("\t sizeof_longlong = %d;\n", sizeof(LONGLONG)); + printf("\t sizeof_ptr = %d;\n", sizeof(int *)); + printf("\t sizeof_enum = %d;\n", sizeof(enum e { ONE, TWO })); + printf("\t sizeof_float = %d;\n", sizeof(float)); + printf("\t sizeof_double = %d;\n", sizeof(double)); + printf("\t sizeof_longdouble = %d;\n", sizeof(long double)); + printf("\t sizeof_sizeof = %d;\n", sizeof(sizeof(int))); + printf("\t sizeof_wchar = %d;\n", sizeof(wchar_t)); + printf("\t sizeof_void = %d;\n", sizeof(void)); + printf("\t sizeof_fun = %d;\n", +#ifdef __GNUC__ + sizeof(main) +#else + 0 +#endif + ); + + // The alignment of a short + { + struct shortstruct { + char c; + short s; + }; + printf("\t alignof_short = %d;\n", + (int)(&((struct shortstruct*)0)->s)); + } + + // The alignment of an int + { + struct intstruct { + char c; + int i; + }; + printf("\t alignof_int = %d;\n", + (int)(&((struct intstruct*)0)->i)); + } + + // The alignment of a long + { + struct longstruct { + char c; + long l; + }; + printf("\t alignof_long = %d;\n", + (int)(&((struct longstruct*)0)->l)); + } + + // The alignment of long long + { + struct longlong { + char c; + LONGLONG ll; + }; + printf("\t alignof_longlong = %d;\n", + (int)(&((struct longlong*)0)->ll)); + } + + // The alignment of a ptr + { + struct ptrstruct { + char c; + int * p; + }; + printf("\t alignof_ptr = %d;\n", + (int)(&((struct ptrstruct*)0)->p)); + } + + // The alignment of an enum + { + struct enumstruct { + char c; + enum e2 { THREE, FOUR, FIVE } e; + }; + printf("\t alignof_enum = %d;\n", + (int)(&((struct enumstruct*)0)->e)); + } + + // The alignment of a float + { + struct floatstruct { + char c; + float f; + }; + printf("\t alignof_float = %d;\n", + (int)(&((struct floatstruct*)0)->f)); + } + + // The alignment of double + { + struct s1 { + char c; + double d; + }; + printf("\t alignof_double = %d;\n", + (int)(&((struct s1*)0)->d)); + } + + // The alignment of long double + { + struct s1 { + char c; + long double ld; + }; + printf("\t alignof_longdouble = %d;\n", + (int)(&((struct s1*)0)->ld)); + } + + printf("\t alignof_str = %d;\n", +#ifdef __GNUC__ + __alignof("a string") +#else + 0 +#endif + ); + + printf("\t alignof_fun = %d;\n", +#ifdef __GNUC__ + __alignof(main) +#else + 0 +#endif + ); + + // Whether char is unsigned + printf("\t char_is_unsigned = %s;\n", + ((char)0xff) > 0 ? "true" : "false"); + + + // Whether string literals contain constant characters + puts("\t const_string_literals = " CONST_STRING_LITERALS ";"); + + + // endianity + { + int e = 0x11223344; + printf("\t little_endian = %s;\n", + (0x44 == *(char*)&e) ? "true" : + ((0x11 == *(char*)&e) ? "false" : (exit(1), "false"))); + } + + exit(0); +} diff --git a/cil/src/main.ml b/cil/src/main.ml new file mode 100644 index 00000000..bbdb7309 --- /dev/null +++ b/cil/src/main.ml @@ -0,0 +1,288 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* maincil *) +(* this module is the program entry point for the 'cilly' program, *) +(* which reads a C program file, parses it, translates it to the CIL *) +(* intermediate language, and then renders that back into C *) + + +module F = Frontc +module C = Cil +module CK = Check +module E = Errormsg +open Pretty +open Trace + +type outfile = + { fname: string; + fchan: out_channel } +let outChannel : outfile option ref = ref None +let mergedChannel : outfile option ref = ref None + + +let parseOneFile (fname: string) : C.file = + (* PARSE and convert to CIL *) + if !Cilutil.printStages then ignore (E.log "Parsing %s\n" fname); + let cil = F.parse fname () in + + if (not !Epicenter.doEpicenter) then ( + (* sm: remove unused temps to cut down on gcc warnings *) + (* (Stats.time "usedVar" Rmtmps.removeUnusedTemps cil); *) + (trace "sm" (dprintf "removing unused temporaries\n")); + (Rmtmps.removeUnusedTemps cil) + ); + cil + +(** These are the statically-configured features. To these we append the + * features defined in Feature_config.ml (from Makefile) *) + +let makeCFGFeature : C.featureDescr = + { C.fd_name = "makeCFG"; + C.fd_enabled = Cilutil.makeCFG; + C.fd_description = "make the program look more like a CFG" ; + C.fd_extraopt = []; + C.fd_doit = (fun f -> + ignore (Partial.calls_end_basic_blocks f) ; + ignore (Partial.globally_unique_vids f) ; + Cil.iterGlobals f (fun glob -> match glob with + Cil.GFun(fd,_) -> Cil.prepareCFG fd ; + (* jc: blockinggraph depends on this "true" arg *) + ignore (Cil.computeCFGInfo fd true) + | _ -> ()) + ); + C.fd_post_check = true; + } + +let features : C.featureDescr list = + [ Epicenter.feature; + Simplify.feature; + Canonicalize.feature; + Callgraph.feature; + Logwrites.feature; + Heapify.feature1; + Heapify.feature2; + Oneret.feature; + makeCFGFeature; (* ww: make CFG *must* come before Partial *) + Partial.feature; + Simplemem.feature; + Sfi.feature; + Dataslicing.feature; + Logcalls.feature; + Ptranal.feature; + Liveness.feature; + ] + @ Feature_config.features + +let rec processOneFile (cil: C.file) = + begin + + if !Cilutil.doCheck then begin + ignore (E.log "First CIL check\n"); + ignore (CK.checkFile [] cil); + end; + + (* Scan all the features configured from the Makefile and, if they are + * enabled then run them on the current file *) + List.iter + (fun fdesc -> + if ! (fdesc.C.fd_enabled) then begin + if !E.verboseFlag then + ignore (E.log "Running CIL feature %s (%s)\n" + fdesc.C.fd_name fdesc.C.fd_description); + (* Run the feature, and see how long it takes. *) + Stats.time fdesc.C.fd_name + fdesc.C.fd_doit cil; + (* See if we need to do some checking *) + if !Cilutil.doCheck && fdesc.C.fd_post_check then begin + ignore (E.log "CIL check after %s\n" fdesc.C.fd_name); + ignore (CK.checkFile [] cil); + end + end) + features; + + + (match !outChannel with + None -> () + | Some c -> Stats.time "printCIL" + (C.dumpFile (!C.printerForMaincil) c.fchan c.fname) cil); + + if !E.hadErrors then + E.s (E.error "Error while processing file; see above for details."); + + end + +(***** MAIN *****) +let rec theMain () = + let usageMsg = "Usage: cilly [options] source-files" in + (* Processign of output file arguments *) + let openFile (what: string) (takeit: outfile -> unit) (fl: string) = + if !E.verboseFlag then + ignore (Printf.printf "Setting %s to %s\n" what fl); + (try takeit { fname = fl; + fchan = open_out fl } + with _ -> + raise (Arg.Bad ("Cannot open " ^ what ^ " file " ^ fl))) + in + let outName = ref "" in + (* sm: enabling this by default, since I think usually we + * want 'cilly' transformations to preserve annotations; I + * can easily add a command-line flag if someone sometimes + * wants these suppressed *) + C.print_CIL_Input := true; + + (*********** COMMAND LINE ARGUMENTS *****************) + (* Construct the arguments for the features configured from the Makefile *) + let blankLine = ("", Arg.Unit (fun _ -> ()), "") in + let featureArgs = + List.fold_right + (fun fdesc acc -> + if !(fdesc.C.fd_enabled) then + (* The feature is enabled by default *) + blankLine :: + ("--dont" ^ fdesc.C.fd_name, Arg.Clear(fdesc.C.fd_enabled), + " Disable " ^ fdesc.C.fd_description) :: + fdesc.C.fd_extraopt @ acc + else + (* Disabled by default *) + blankLine :: + ("--do" ^ fdesc.C.fd_name, Arg.Set(fdesc.C.fd_enabled), + " Enable " ^ fdesc.C.fd_description) :: + fdesc.C.fd_extraopt @ acc + ) + features + [blankLine] + in + let featureArgs = + ("", Arg.Unit (fun () -> ()), "\n\t\tCIL Features") :: featureArgs + in + + let argDescr = Ciloptions.options @ + [ + "--out", Arg.String (openFile "output" + (fun oc -> outChannel := Some oc)), + "the name of the output CIL file. The cilly script sets this for you."; + "--mergedout", Arg.String (openFile "merged output" + (fun oc -> mergedChannel := Some oc)), + "specify the name of the merged file"; + ] + @ F.args @ featureArgs in + begin + (* this point in the code is the program entry point *) + + Stats.reset (Stats.has_performance_counters ()); + + (* parse the command-line arguments *) + Arg.parse argDescr Ciloptions.recordFile usageMsg; + Cil.initCIL (); + + Ciloptions.fileNames := List.rev !Ciloptions.fileNames; + + if !Cilutil.testcil <> "" then begin + Testcil.doit !Cilutil.testcil + end else + (* parse each of the files named on the command line, to CIL *) + let files = List.map parseOneFile !Ciloptions.fileNames in + + (* if there's more than one source file, merge them together; *) + (* now we have just one CIL "file" to deal with *) + let one = + match files with + [one] -> one + | [] -> E.s (E.error "No arguments for CIL\n") + | _ -> + let merged = + Stats.time "merge" (Mergecil.merge files) + (if !outName = "" then "stdout" else !outName) in + if !E.hadErrors then + E.s (E.error "There were errors during merging\n"); + (* See if we must save the merged file *) + (match !mergedChannel with + None -> () + | Some mc -> begin + let oldpci = !C.print_CIL_Input in + C.print_CIL_Input := true; + Stats.time "printMerged" + (C.dumpFile !C.printerForMaincil mc.fchan mc.fname) merged; + C.print_CIL_Input := oldpci + end); + merged + in + + if !E.hadErrors then + E.s (E.error "Cabs2cil had some errors"); + + (* process the CIL file (merged if necessary) *) + processOneFile one + end +;; + (* Define a wrapper for main to + * intercept the exit *) +let failed = ref false + +let cleanup () = + if !E.verboseFlag || !Cilutil.printStats then + Stats.print stderr "Timings:\n"; + if !E.logChannel != stderr then + close_out (! E.logChannel); + (match ! outChannel with Some c -> close_out c.fchan | _ -> ()) + + +(* Without this handler, cilly.asm.exe will quit silently with return code 0 + when a segfault happens. *) +let handleSEGV code = + if !Cil.currentLoc == Cil.locUnknown then + E.log "**** Segmentation fault (possibly a stack overflow)\n" + else begin + E.log ("**** Segmentation fault (possibly a stack overflow) "^^ + "while processing %a\n") + Cil.d_loc !Cil.currentLoc + end; + exit code + +let _ = Sys.set_signal Sys.sigsegv (Sys.Signal_handle handleSEGV); + +;; + +begin + try + theMain (); + with F.CabsOnly -> (* this is OK *) () +end; +cleanup (); +exit (if !failed then 1 else 0) + diff --git a/cil/src/mergecil.ml b/cil/src/mergecil.ml new file mode 100644 index 00000000..dee519ed --- /dev/null +++ b/cil/src/mergecil.ml @@ -0,0 +1,1770 @@ +(* MODIF: Loop constructor replaced by 3 constructors: While, DoWhile, For. *) + +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* mergecil.ml *) +(* This module is responsible for merging multiple CIL source trees into + * a single, coherent CIL tree which contains the union of all the + * definitions in the source files. It effectively acts like a linker, + * but at the source code level instead of the object code level. *) + + +module P = Pretty +open Cil +module E = Errormsg +module H = Hashtbl +module A = Alpha +open Trace + +let debugMerge = false +let debugInlines = false + +let ignore_merge_conflicts = ref false + +(* Try to merge structure with the same name. However, do not complain if + * they are not the same *) +let mergeSynonyms = true + + +(** Whether to use path compression *) +let usePathCompression = false + +(* Try to merge definitions of inline functions. They can appear in multiple + * files and we would like them all to be the same. This can slow down the + * merger an order of magnitude !!! *) +let mergeInlines = true + +let mergeInlinesRepeat = mergeInlines && true + +let mergeInlinesWithAlphaConvert = mergeInlines && true + +(* when true, merge duplicate definitions of externally-visible functions; + * this uses a mechanism which is faster than the one for inline functions, + * but only probabilistically accurate *) +let mergeGlobals = true + + +(* Return true if 's' starts with the prefix 'p' *) +let prefix p s = + let lp = String.length p in + let ls = String.length s in + lp <= ls && String.sub s 0 lp = p + + + +(* A name is identified by the index of the file in which it occurs (starting + * at 0 with the first file) and by the actual name. We'll keep name spaces + * separate *) + +(* We define a data structure for the equivalence classes *) +type 'a node = + { nname: string; (* The actual name *) + nfidx: int; (* The file index *) + ndata: 'a; (* Data associated with the node *) + mutable nloc: (location * int) option; + (* location where defined and index within the file of the definition. + * If None then it means that this node actually DOES NOT appear in the + * given file. In rare occasions we need to talk in a given file about + * types that are not defined in that file. This happens with undefined + * structures but also due to cross-contamination of types in a few of + * the cases of combineType (see the definition of combineTypes). We + * try never to choose as representatives nodes without a definition. + * We also choose as representative the one that appears earliest *) + mutable nrep: 'a node; (* A pointer to another node in its class (one + * closer to the representative). The nrep node + * is always in an earlier file, except for the + * case where a name is undefined in one file + * and defined in a later file. If this pointer + * points to the node itself then this is the + * representative. *) + mutable nmergedSyns: bool (* Whether we have merged the synonyms for + * the node of this name *) + } + +let d_nloc () (lo: (location * int) option) : P.doc = + match lo with + None -> P.text "None" + | Some (l, idx) -> P.dprintf "Some(%d at %a)" idx d_loc l + +(* Make a node with a self loop. This is quite tricky. *) +let mkSelfNode (eq: (int * string, 'a node) H.t) (* The equivalence table *) + (syn: (string, 'a node) H.t) (* The synonyms table *) + (fidx: int) (name: string) (data: 'a) + (l: (location * int) option) = + let res = { nname = name; nfidx = fidx; ndata = data; nloc = l; + nrep = Obj.magic 1; nmergedSyns = false; } in + res.nrep <- res; (* Make the self cycle *) + H.add eq (fidx, name) res; (* Add it to the proper table *) + if mergeSynonyms && not (prefix "__anon" name) then + H.add syn name res; + res + +let debugFind = false + +(* Find the representative with or without path compression *) +let rec find (pathcomp: bool) (nd: 'a node) = + if debugFind then + ignore (E.log " find %s(%d)\n" nd.nname nd.nfidx); + if nd.nrep == nd then begin + if debugFind then + ignore (E.log " = %s(%d)\n" nd.nname nd.nfidx); + nd + end else begin + let res = find pathcomp nd.nrep in + if usePathCompression && pathcomp && nd.nrep != res then + nd.nrep <- res; (* Compress the paths *) + res + end + + +(* Union two nodes and return the new representative. We prefer as the + * representative a node defined earlier. We try not to use as + * representatives nodes that are not defined in their files. We return a + * function for undoing the union. Make sure that between the union and the + * undo you do not do path compression *) +let union (nd1: 'a node) (nd2: 'a node) : 'a node * (unit -> unit) = + (* Move to the representatives *) + let nd1 = find true nd1 in + let nd2 = find true nd2 in + if nd1 == nd2 then begin + (* It can happen that we are trying to union two nodes that are already + * equivalent. This is because between the time we check that two nodes + * are not already equivalent and the time we invoke the union operation + * we check type isomorphism which might change the equivalence classes *) +(* + ignore (warn "unioning already equivalent nodes for %s(%d)" + nd1.nname nd1.nfidx); +*) + nd1, fun x -> x + end else begin + let rep, norep = (* Choose the representative *) + if (nd1.nloc != None) = (nd2.nloc != None) then + (* They have the same defined status. Choose the earliest *) + if nd1.nfidx < nd2.nfidx then nd1, nd2 + else if nd1.nfidx > nd2.nfidx then nd2, nd1 + else (* In the same file. Choose the one with the earliest index *) begin + match nd1.nloc, nd2.nloc with + Some (_, didx1), Some (_, didx2) -> + if didx1 < didx2 then nd1, nd2 else + if didx1 > didx2 then nd2, nd1 + else begin + ignore (warn + "Merging two elements (%s and %s) in the same file (%d) with the same idx (%d) within the file" + nd1.nname nd2.nname nd1.nfidx didx1); + nd1, nd2 + end + | _, _ -> (* both none. Does not matter which one we choose. Should + * not happen though. *) + (* sm: it does happen quite a bit when, e.g. merging STLport with + * some client source; I'm disabling the warning since it supposedly + * is harmless anyway, so is useless noise *) + (* sm: re-enabling on claim it now will probably not happen *) + ignore (warn "Merging two undefined elements in the same file: %s and %s\n" nd1.nname nd2.nname); + nd1, nd2 + end + else (* One is defined, the other is not. Choose the defined one *) + if nd1.nloc != None then nd1, nd2 else nd2, nd1 + in + let oldrep = norep.nrep in + norep.nrep <- rep; + rep, (fun () -> norep.nrep <- oldrep) + end +(* +let union (nd1: 'a node) (nd2: 'a node) : 'a node * (unit -> unit) = + if nd1 == nd2 && nd1.nname = "!!!intEnumInfo!!!" then begin + ignore (warn "unioning two identical nodes for %s(%d)" + nd1.nname nd1.nfidx); + nd1, fun x -> x + end else + union nd1 nd2 +*) +(* Find the representative for a node and compress the paths in the process *) +let findReplacement + (pathcomp: bool) + (eq: (int * string, 'a node) H.t) + (fidx: int) + (name: string) : ('a * int) option = + if debugFind then + ignore (E.log "findReplacement for %s(%d)\n" name fidx); + try + let nd = H.find eq (fidx, name) in + if nd.nrep == nd then begin + if debugFind then + ignore (E.log " is a representative\n"); + None (* No replacement if this is the representative of its class *) + end else + let rep = find pathcomp nd in + if rep != rep.nrep then + E.s (bug "find does not return the representative\n"); + if debugFind then + ignore (E.log " RES = %s(%d)\n" rep.nname rep.nfidx); + Some (rep.ndata, rep.nfidx) + with Not_found -> begin + if debugFind then + ignore (E.log " not found in the map\n"); + None + end + +(* Make a node if one does not already exist. Otherwise return the + * representative *) +let getNode (eq: (int * string, 'a node) H.t) + (syn: (string, 'a node) H.t) + (fidx: int) (name: string) (data: 'a) + (l: (location * int) option) = + let debugGetNode = false in + if debugGetNode then + ignore (E.log "getNode(%s(%d), %a)\n" + name fidx d_nloc l); + try + let res = H.find eq (fidx, name) in + + (match res.nloc, l with + (* Maybe we have a better location now *) + None, Some _ -> res.nloc <- l + | Some (old_l, old_idx), Some (l, idx) -> + if old_idx != idx then + ignore (warn "Duplicate definition of node %s(%d) at indices %d(%a) and %d(%a)" + name fidx old_idx d_loc old_l idx d_loc l) + else + () + + | _, _ -> ()); + if debugGetNode then + ignore (E.log " node already found\n"); + find false res (* No path compression *) + with Not_found -> begin + let res = mkSelfNode eq syn fidx name data l in + if debugGetNode then + ignore (E.log " made a new one\n"); + res + end + + + +(* Dump a graph *) +let dumpGraph (what: string) (eq: (int * string, 'a node) H.t) : unit = + ignore (E.log "Equivalence graph for %s is:\n" what); + H.iter (fun (fidx, name) nd -> + ignore (E.log " %s(%d) %s-> " + name fidx (if nd.nloc = None then "(undef)" else "")); + if nd.nrep == nd then + ignore (E.log "*\n") + else + ignore (E.log " %s(%d)\n" nd.nrep.nname nd.nrep.nfidx )) + eq + + + + +(* For each name space we define a set of equivalence classes *) +let vEq: (int * string, varinfo node) H.t = H.create 111 (* Vars *) +let sEq: (int * string, compinfo node) H.t = H.create 111 (* Struct + union *) +let eEq: (int * string, enuminfo node) H.t = H.create 111 (* Enums *) +let tEq: (int * string, typeinfo node) H.t = H.create 111 (* Type names*) +let iEq: (int * string, varinfo node) H.t = H.create 111 (* Inlines *) + +(* Sometimes we want to merge synonyms. We keep some tables indexed by names. + * Each name is mapped to multiple exntries *) +let vSyn: (string, varinfo node) H.t = H.create 111 (* Not actually used *) +let iSyn: (string, varinfo node) H.t = H.create 111 (* Inlines *) +let sSyn: (string, compinfo node) H.t = H.create 111 +let eSyn: (string, enuminfo node) H.t = H.create 111 +let tSyn: (string, typeinfo node) H.t = H.create 111 + +(** A global environment for variables. Put in here only the non-static + * variables, indexed by their name. *) +let vEnv : (string, varinfo node) H.t = H.create 111 + + +(* A set of inline functions indexed by their printout ! *) +let inlineBodies : (P.doc, varinfo node) H.t = H.create 111 + +(** A number of alpha conversion tables. We ought to keep one table for each + * name space. Unfortunately, because of the way the C lexer works, type + * names must be different from variable names!! We one alpha table both for + * variables and types. *) +let vtAlpha : (string, location A.alphaTableData ref) H.t + = H.create 57 (* Variables and + * types *) +let sAlpha : (string, location A.alphaTableData ref) H.t + = H.create 57 (* Structures and + * unions have + * the same name + * space *) +let eAlpha : (string, location A.alphaTableData ref) H.t + = H.create 57 (* Enumerations *) + + +(** Keep track, for all global function definitions, of the names of the formal + * arguments. They might change during merging of function types if the + * prototype occurs after the function definition and uses different names. + * We'll restore the names at the end *) +let formalNames: (int * string, string list) H.t = H.create 111 + + +(* Accumulate here the globals in the merged file *) +let theFileTypes = ref [] +let theFile = ref [] + +(* add 'g' to the merged file *) +let mergePushGlobal (g: global) : unit = + pushGlobal g ~types:theFileTypes ~variables:theFile + +let mergePushGlobals gl = List.iter mergePushGlobal gl + + +(* The index of the current file being scanned *) +let currentFidx = ref 0 + +let currentDeclIdx = ref 0 (* The index of the definition in a file. This is + * maintained both in pass 1 and in pass 2. Make + * sure you count the same things in both passes. *) +(* Keep here the file names *) +let fileNames : (int, string) H.t = H.create 113 + + + +(* Remember the composite types that we have already declared *) +let emittedCompDecls: (string, bool) H.t = H.create 113 +(* Remember the variables also *) +let emittedVarDecls: (string, bool) H.t = H.create 113 + +(* also keep track of externally-visible function definitions; + * name maps to declaration, location, and semantic checksum *) +let emittedFunDefn: (string, fundec * location * int) H.t = H.create 113 +(* and same for variable definitions; name maps to GVar fields *) +let emittedVarDefn: (string, varinfo * init option * location) H.t = H.create 113 + +(** A mapping from the new names to the original names. Used in PASS2 when we + * rename variables. *) +let originalVarNames: (string, string) H.t = H.create 113 + +(* Initialize the module *) +let init () = + H.clear sAlpha; + H.clear eAlpha; + H.clear vtAlpha; + + H.clear vEnv; + + H.clear vEq; + H.clear sEq; + H.clear eEq; + H.clear tEq; + H.clear iEq; + + H.clear vSyn; + H.clear sSyn; + H.clear eSyn; + H.clear tSyn; + H.clear iSyn; + + theFile := []; + theFileTypes := []; + + H.clear formalNames; + H.clear inlineBodies; + + currentFidx := 0; + currentDeclIdx := 0; + H.clear fileNames; + + H.clear emittedVarDecls; + H.clear emittedCompDecls; + + H.clear emittedFunDefn; + H.clear emittedVarDefn; + + H.clear originalVarNames + + +(* Some enumerations have to be turned into an integer. We implement this by + * introducing a special enumeration type which we'll recognize later to be + * an integer *) +let intEnumInfo = + { ename = "!!!intEnumInfo!!!"; (* This is otherwise invalid *) + eitems = []; + eattr = []; + ereferenced = false; + } +(* And add it to the equivalence graph *) +let intEnumInfoNode = + getNode eEq eSyn 0 intEnumInfo.ename intEnumInfo + (Some (locUnknown, 0)) + + (* Combine the types. Raises the Failure exception with an error message. + * isdef says whether the new type is for a definition *) +type combineWhat = + CombineFundef (* The new definition is for a function definition. The old + * is for a prototype *) + | CombineFunarg (* Comparing a function argument type with an old prototype + * arg *) + | CombineFunret (* Comparing the return of a function with that from an old + * prototype *) + | CombineOther + + +let rec combineTypes (what: combineWhat) + (oldfidx: int) (oldt: typ) + (fidx: int) (t: typ) : typ = + match oldt, t with + | TVoid olda, TVoid a -> TVoid (addAttributes olda a) + | TInt (oldik, olda), TInt (ik, a) -> + let combineIK oldk k = + if oldk == k then oldk else + (* GCC allows a function definition to have a more precise integer + * type than a prototype that says "int" *) + if not !msvcMode && oldk = IInt && bitsSizeOf t <= 32 + && (what = CombineFunarg || what = CombineFunret) + then + k + else ( + let msg = + P.sprint ~width:80 + (P.dprintf + "(different integer types %a and %a)" + d_type oldt d_type t) in + raise (Failure msg) + ) + in + TInt (combineIK oldik ik, addAttributes olda a) + + | TFloat (oldfk, olda), TFloat (fk, a) -> + let combineFK oldk k = + if oldk == k then oldk else + (* GCC allows a function definition to have a more precise integer + * type than a prototype that says "double" *) + if not !msvcMode && oldk = FDouble && k = FFloat + && (what = CombineFunarg || what = CombineFunret) + then + k + else + raise (Failure "(different floating point types)") + in + TFloat (combineFK oldfk fk, addAttributes olda a) + + | TEnum (oldei, olda), TEnum (ei, a) -> + (* Matching enumerations always succeeds. But sometimes it maps both + * enumerations to integers *) + matchEnumInfo oldfidx oldei fidx ei; + TEnum (oldei, addAttributes olda a) + + + (* Strange one. But seems to be handled by GCC *) + | TEnum (oldei, olda) , TInt(IInt, a) -> TEnum(oldei, + addAttributes olda a) + + (* Strange one. But seems to be handled by GCC. Warning. Here we are + * leaking types from new to old *) + | TInt(IInt, olda), TEnum (ei, a) -> TEnum(ei, addAttributes olda a) + + | TComp (oldci, olda) , TComp (ci, a) -> + matchCompInfo oldfidx oldci fidx ci; + (* If we get here we were successful *) + TComp (oldci, addAttributes olda a) + + | TArray (oldbt, oldsz, olda), TArray (bt, sz, a) -> + let combbt = combineTypes CombineOther oldfidx oldbt fidx bt in + let combinesz = + match oldsz, sz with + None, Some _ -> sz + | Some _, None -> oldsz + | None, None -> oldsz + | Some oldsz', Some sz' -> + let samesz = + match constFold true oldsz', constFold true sz' with + Const(CInt64(oldi, _, _)), Const(CInt64(i, _, _)) -> oldi = i + | _, _ -> false + in + if samesz then oldsz else + raise (Failure "(different array sizes)") + in + TArray (combbt, combinesz, addAttributes olda a) + + | TPtr (oldbt, olda), TPtr (bt, a) -> + TPtr (combineTypes CombineOther oldfidx oldbt fidx bt, + addAttributes olda a) + + (* WARNING: In this case we are leaking types from new to old !! *) + | TFun (_, _, _, [Attr("missingproto",_)]), TFun _ -> t + + + | TFun _, TFun (_, _, _, [Attr("missingproto",_)]) -> oldt + + | TFun (oldrt, oldargs, oldva, olda), TFun (rt, args, va, a) -> + let newrt = + combineTypes + (if what = CombineFundef then CombineFunret else CombineOther) + oldfidx oldrt fidx rt + in + if oldva != va then + raise (Failure "(diferent vararg specifiers)"); + (* If one does not have arguments, believe the one with the + * arguments *) + let newargs = + if oldargs = None then args else + if args = None then oldargs else + let oldargslist = argsToList oldargs in + let argslist = argsToList args in + if List.length oldargslist <> List.length argslist then + raise (Failure "(different number of arguments)") + else begin + (* Go over the arguments and update the old ones with the + * adjusted types *) + Some + (List.map2 + (fun (on, ot, oa) (an, at, aa) -> + let n = if an <> "" then an else on in + let t = + combineTypes + (if what = CombineFundef then + CombineFunarg else CombineOther) + oldfidx ot fidx at + in + let a = addAttributes oa aa in + (n, t, a)) + oldargslist argslist) + end + in + TFun (newrt, newargs, oldva, addAttributes olda a) + + | TBuiltin_va_list olda, TBuiltin_va_list a -> + TBuiltin_va_list (addAttributes olda a) + + | TNamed (oldt, olda), TNamed (t, a) -> + matchTypeInfo oldfidx oldt fidx t; + (* If we get here we were able to match *) + TNamed(oldt, addAttributes olda a) + + (* Unroll first the new type *) + | _, TNamed (t, a) -> + let res = combineTypes what oldfidx oldt fidx t.ttype in + typeAddAttributes a res + + (* And unroll the old type as well if necessary *) + | TNamed (oldt, a), _ -> + let res = combineTypes what oldfidx oldt.ttype fidx t in + typeAddAttributes a res + + | _ -> ( + (* raise (Failure "(different type constructors)") *) + let msg:string = (P.sprint 1000 (P.dprintf "(different type constructors: %a vs. %a)" + d_type oldt d_type t)) in + raise (Failure msg) + ) + + +(* Match two compinfos and throw a Failure if they do not match *) +and matchCompInfo (oldfidx: int) (oldci: compinfo) + (fidx: int) (ci: compinfo) : unit = + if oldci.cstruct <> ci.cstruct then + raise (Failure "(different struct/union types)"); + (* See if we have a mapping already *) + (* Make the nodes if not already made. Actually return the + * representatives *) + let oldcinode = getNode sEq sSyn oldfidx oldci.cname oldci None in + let cinode = getNode sEq sSyn fidx ci.cname ci None in + if oldcinode == cinode then (* We already know they are the same *) + () + else begin + (* Replace with the representative data *) + let oldci = oldcinode.ndata in + let oldfidx = oldcinode.nfidx in + let ci = cinode.ndata in + let fidx = cinode.nfidx in + + let old_len = List.length oldci.cfields in + let len = List.length ci.cfields in + (* It is easy to catch here the case when the new structure is undefined + * and the old one was defined. We just reuse the old *) + (* More complicated is the case when the old one is not defined but the + * new one is. We still reuse the old one and we'll take care of defining + * it later with the new fields. + * GN: 7/10/04, I could not find when is "later", so I added it below *) + if len <> 0 && old_len <> 0 && old_len <> len then ( + let curLoc = !currentLoc in (* d_global blows this away.. *) + (trace "merge" (P.dprintf "different # of fields\n%d: %a\n%d: %a\n" + old_len d_global (GCompTag(oldci,locUnknown)) + len d_global (GCompTag(ci,locUnknown)) + )); + currentLoc := curLoc; + let msg = Printf.sprintf + "(different number of fields in %s and %s: %d != %d.)" + oldci.cname ci.cname old_len len in + raise (Failure msg) + ); + (* We check that they are defined in the same way. While doing this there + * might be recursion and we have to watch for going into an infinite + * loop. So we add the assumption that they are equal *) + let newrep, undo = union oldcinode cinode in + (* We check the fields but watch for Failure. We only do the check when + * the lengths are the same. Due to the code above this the other + * possibility is that one of the length is 0, in which case we reuse the + * old compinfo. *) + (* But what if the old one is the empty one ? *) + if old_len = len then begin + (try + List.iter2 + (fun oldf f -> + if oldf.fbitfield <> f.fbitfield then + raise (Failure "(different bitfield info)"); + if oldf.fattr <> f.fattr then + raise (Failure "(different field attributes)"); + (* Make sure the types are compatible *) + let newtype = + combineTypes CombineOther oldfidx oldf.ftype fidx f.ftype + in + (* Change the type in the representative *) + oldf.ftype <- newtype; + ) + oldci.cfields ci.cfields + with Failure reason -> begin + (* Our assumption was wrong. Forget the isomorphism *) + undo (); + let msg = + P.sprint ~width:80 + (P.dprintf + "\n\tFailed assumption that %s and %s are isomorphic %s@!%a@!%a" + (compFullName oldci) (compFullName ci) reason + dn_global (GCompTag(oldci,locUnknown)) + dn_global (GCompTag(ci,locUnknown))) + in + raise (Failure msg) + end) + end else begin + (* We will reuse the old one. One of them is empty. If the old one is + * empty, copy over the fields from the new one. Won't this result in + * all sorts of undefined types??? *) + if old_len = 0 then + oldci.cfields <- ci.cfields; + end; + (* We get here when we succeeded checking that they are equal, or one of + * them was empty *) + newrep.ndata.cattr <- addAttributes oldci.cattr ci.cattr; + () + end + +(* Match two enuminfos and throw a Failure if they do not match *) +and matchEnumInfo (oldfidx: int) (oldei: enuminfo) + (fidx: int) (ei: enuminfo) : unit = + (* Find the node for this enum, no path compression. *) + let oldeinode = getNode eEq eSyn oldfidx oldei.ename oldei None in + let einode = getNode eEq eSyn fidx ei.ename ei None in + if oldeinode == einode then (* We already know they are the same *) + () + else begin + (* Replace with the representative data *) + let oldei = oldeinode.ndata in + let ei = einode.ndata in + (* Try to match them. But if you cannot just make them both integers *) + try + (* We do not have a mapping. They better be defined in the same way *) + if List.length oldei.eitems <> List.length ei.eitems then + raise (Failure "(different number of enumeration elements)"); + (* We check that they are defined in the same way. This is a fairly + * conservative check. *) + List.iter2 + (fun (old_iname, old_iv, _) (iname, iv, _) -> + if old_iname <> iname then + raise (Failure "(different names for enumeration items)"); + let samev = + match constFold true old_iv, constFold true iv with + Const(CInt64(oldi, _, _)), Const(CInt64(i, _, _)) -> oldi = i + | _ -> false + in + if not samev then + raise (Failure "(different values for enumeration items)")) + oldei.eitems ei.eitems; + (* Set the representative *) + let newrep, _ = union oldeinode einode in + (* We get here if the enumerations match *) + newrep.ndata.eattr <- addAttributes oldei.eattr ei.eattr; + () + with Failure msg -> begin + (* Get here if you cannot merge two enumeration nodes *) + if oldeinode != intEnumInfoNode then begin + let _ = union oldeinode intEnumInfoNode in () + end; + if einode != intEnumInfoNode then begin + let _ = union einode intEnumInfoNode in () + end; + end + end + + +(* Match two typeinfos and throw a Failure if they do not match *) +and matchTypeInfo (oldfidx: int) (oldti: typeinfo) + (fidx: int) (ti: typeinfo) : unit = + if oldti.tname = "" || ti.tname = "" then + E.s (bug "matchTypeInfo for anonymous type\n"); + (* Find the node for this enum, no path compression. *) + let oldtnode = getNode tEq tSyn oldfidx oldti.tname oldti None in + let tnode = getNode tEq tSyn fidx ti.tname ti None in + if oldtnode == tnode then (* We already know they are the same *) + () + else begin + (* Replace with the representative data *) + let oldti = oldtnode.ndata in + let oldfidx = oldtnode.nfidx in + let ti = tnode.ndata in + let fidx = tnode.nfidx in + (* Check that they are the same *) + (try + ignore (combineTypes CombineOther oldfidx oldti.ttype fidx ti.ttype); + with Failure reason -> begin + let msg = + P.sprint ~width:80 + (P.dprintf + "\n\tFailed assumption that %s and %s are isomorphic %s" + oldti.tname ti.tname reason) in + raise (Failure msg) + end); + let _ = union oldtnode tnode in + () + end + +(* Scan all files and do two things *) +(* 1. Initialize the alpha renaming tables with the names of the globals so + * that when we come in the second pass to generate new names, we do not run + * into conflicts. *) +(* 2. For all declarations of globals unify their types. In the process + * construct a set of equivalence classes on type names, structure and + * enumeration tags *) +(* 3. We clean the referenced flags *) + +let rec oneFilePass1 (f:file) : unit = + H.add fileNames !currentFidx f.fileName; + if debugMerge || !E.verboseFlag then + ignore (E.log "Pre-merging (%d) %s\n" !currentFidx f.fileName); + currentDeclIdx := 0; + if f.globinitcalled || f.globinit <> None then + E.s (E.warn "Merging file %s has global initializer" f.fileName); + + (* We scan each file and we look at all global varinfo. We see if globals + * with the same name have been encountered before and we merge those types + * *) + let matchVarinfo (vi: varinfo) (l: location * int) = + ignore (Alpha.registerAlphaName vtAlpha None vi.vname !currentLoc); + (* Make a node for it and put it in vEq *) + let vinode = mkSelfNode vEq vSyn !currentFidx vi.vname vi (Some l) in + try + let oldvinode = find true (H.find vEnv vi.vname) in + let oldloc, _ = + match oldvinode.nloc with + None -> E.s (bug "old variable is undefined") + | Some l -> l + in + let oldvi = oldvinode.ndata in + (* There is an old definition. We must combine the types. Do this first + * because it might fail *) + let newtype = + try + combineTypes CombineOther + oldvinode.nfidx oldvi.vtype + !currentFidx vi.vtype; + with (Failure reason) -> begin + (* Go ahead *) + let f = if !ignore_merge_conflicts then warn else error in + ignore (f "Incompatible declaration for %s (from %s(%d)).@! Previous was at %a (from %s (%d)) %s " + vi.vname (H.find fileNames !currentFidx) !currentFidx + d_loc oldloc + (H.find fileNames oldvinode.nfidx) oldvinode.nfidx + reason); + raise Not_found + end + in + let newrep, _ = union oldvinode vinode in + (* We do not want to turn non-"const" globals into "const" one. That + * can happen if one file declares the variable a non-const while + * others declare it as "const". *) + if hasAttribute "const" (typeAttrs vi.vtype) != + hasAttribute "const" (typeAttrs oldvi.vtype) then begin + newrep.ndata.vtype <- typeRemoveAttributes ["const"] newtype; + end else begin + newrep.ndata.vtype <- newtype; + end; + (* clean up the storage. *) + let newstorage = + if vi.vstorage = oldvi.vstorage || vi.vstorage = Extern then + oldvi.vstorage + else if oldvi.vstorage = Extern then vi.vstorage + (* Sometimes we turn the NoStorage specifier into Static for inline + * functions *) + else if oldvi.vstorage = Static && + vi.vstorage = NoStorage then Static + else begin + ignore (warn "Inconsistent storage specification for %s. Now is %a and previous was %a at %a" + vi.vname d_storage vi.vstorage d_storage oldvi.vstorage + d_loc oldloc); + vi.vstorage + end + in + newrep.ndata.vstorage <- newstorage; + newrep.ndata.vattr <- addAttributes oldvi.vattr vi.vattr; + () + with Not_found -> (* Not present in the previous files. Remember it for + * later *) + H.add vEnv vi.vname vinode + + in + List.iter + (function + | GVarDecl (vi, l) | GVar (vi, _, l) -> + currentLoc := l; + incr currentDeclIdx; + vi.vreferenced <- false; + if vi.vstorage <> Static then begin + matchVarinfo vi (l, !currentDeclIdx); + end + + | GFun (fdec, l) -> + currentLoc := l; + incr currentDeclIdx; + (* Save the names of the formal arguments *) + let _, args, _, _ = splitFunctionTypeVI fdec.svar in + H.add formalNames (!currentFidx, fdec.svar.vname) + (List.map (fun (fn, _, _) -> fn) (argsToList args)); + fdec.svar.vreferenced <- false; + (* Force inline functions to be static. *) + (* GN: This turns out to be wrong. inline functions are external, + * unless specified to be static. *) + (* + if fdec.svar.vinline && fdec.svar.vstorage = NoStorage then + fdec.svar.vstorage <- Static; + *) + if fdec.svar.vstorage <> Static then begin + matchVarinfo fdec.svar (l, !currentDeclIdx) + end else begin + if fdec.svar.vinline && mergeInlines then + (* Just create the nodes for inline functions *) + ignore (getNode iEq iSyn !currentFidx + fdec.svar.vname fdec.svar (Some (l, !currentDeclIdx))) + end + (* Make nodes for the defined type and structure tags *) + | GType (t, l) -> + incr currentDeclIdx; + t.treferenced <- false; + if t.tname <> "" then (* The empty names are just for introducing + * undefined comp tags *) + ignore (getNode tEq tSyn !currentFidx t.tname t + (Some (l, !currentDeclIdx))) + else begin (* Go inside and clean the referenced flag for the + * declared tags *) + match t.ttype with + TComp (ci, _) -> + ci.creferenced <- false; + (* Create a node for it *) + ignore (getNode sEq sSyn !currentFidx ci.cname ci None) + + | TEnum (ei, _) -> + ei.ereferenced <- false; + ignore (getNode eEq eSyn !currentFidx ei.ename ei None); + + | _ -> E.s (bug "Anonymous Gtype is not TComp") + end + + | GCompTag (ci, l) -> + incr currentDeclIdx; + ci.creferenced <- false; + ignore (getNode sEq sSyn !currentFidx ci.cname ci + (Some (l, !currentDeclIdx))) + | GEnumTag (ei, l) -> + incr currentDeclIdx; + ei.ereferenced <- false; + ignore (getNode eEq eSyn !currentFidx ei.ename ei + (Some (l, !currentDeclIdx))) + + | _ -> ()) + f.globals + + +(* Try to merge synonyms. Do not give an error if they fail to merge *) +let doMergeSynonyms + (syn : (string, 'a node) H.t) + (eq : (int * string, 'a node) H.t) + (compare : int -> 'a -> int -> 'a -> unit) (* A comparison function that + * throws Failure if no match *) + : unit = + H.iter (fun n node -> + if not node.nmergedSyns then begin + (* find all the nodes for the same name *) + let all = H.find_all syn n in + let rec tryone (classes: 'a node list) (* A number of representatives + * for this name *) + (nd: 'a node) : 'a node list (* Returns an expanded set + * of classes *) = + nd.nmergedSyns <- true; + (* Compare in turn with all the classes we have so far *) + let rec compareWithClasses = function + [] -> [nd](* No more classes. Add this as a new class *) + | c :: restc -> + try + compare c.nfidx c.ndata nd.nfidx nd.ndata; + (* Success. Stop here the comparison *) + c :: restc + with Failure _ -> (* Failed. Try next class *) + c :: (compareWithClasses restc) + in + compareWithClasses classes + in + (* Start with an empty set of classes for this name *) + let _ = List.fold_left tryone [] all in + () + end) + syn + + +let matchInlines (oldfidx: int) (oldi: varinfo) + (fidx: int) (i: varinfo) = + let oldinode = getNode iEq iSyn oldfidx oldi.vname oldi None in + let inode = getNode iEq iSyn fidx i.vname i None in + if oldinode == inode then + () + else begin + (* Replace with the representative data *) + let oldi = oldinode.ndata in + let oldfidx = oldinode.nfidx in + let i = inode.ndata in + let fidx = inode.nfidx in + (* There is an old definition. We must combine the types. Do this first + * because it might fail *) + oldi.vtype <- + combineTypes CombineOther + oldfidx oldi.vtype fidx i.vtype; + (* We get here if we have success *) + (* Combine the attributes as well *) + oldi.vattr <- addAttributes oldi.vattr i.vattr; + (* Do not union them yet because we do not know that they are the same. + * We have checked only the types so far *) + () + end + +(************************************************************ + * + * PASS 2 + * + * + ************************************************************) + +(** Keep track of the functions we have used already in the file. We need + * this to avoid removing an inline function that has been used already. + * This can only occur if the inline function is defined after it is used + * already; a bad style anyway *) +let varUsedAlready: (string, unit) H.t = H.create 111 + +(** A visitor that renames uses of variables and types *) +class renameVisitorClass = object (self) + inherit nopCilVisitor + + (* This is either a global variable which we took care of, or a local + * variable. Must do its type and attributes. *) + method vvdec (vi: varinfo) = DoChildren + + (* This is a variable use. See if we must change it *) + method vvrbl (vi: varinfo) : varinfo visitAction = + if not vi.vglob then DoChildren else + if vi.vreferenced then begin + H.add varUsedAlready vi.vname (); + DoChildren + end else begin + match findReplacement true vEq !currentFidx vi.vname with + None -> DoChildren + | Some (vi', oldfidx) -> + if debugMerge then + ignore (E.log "Renaming use of var %s(%d) to %s(%d)\n" + vi.vname !currentFidx vi'.vname oldfidx); + vi'.vreferenced <- true; + H.add varUsedAlready vi'.vname (); + ChangeTo vi' + end + + + (* The use of a type. Change only those types whose underlying info + * is not a root. *) + method vtype (t: typ) = + match t with + TComp (ci, a) when not ci.creferenced -> begin + match findReplacement true sEq !currentFidx ci.cname with + None -> DoChildren + | Some (ci', oldfidx) -> + if debugMerge then + ignore (E.log "Renaming use of %s(%d) to %s(%d)\n" + ci.cname !currentFidx ci'.cname oldfidx); + ChangeTo (TComp (ci', visitCilAttributes (self :> cilVisitor) a)) + end + | TEnum (ei, a) when not ei.ereferenced -> begin + match findReplacement true eEq !currentFidx ei.ename with + None -> DoChildren + | Some (ei', _) -> + if ei' == intEnumInfo then + (* This is actually our friend intEnumInfo *) + ChangeTo (TInt(IInt, visitCilAttributes (self :> cilVisitor) a)) + else + ChangeTo (TEnum (ei', visitCilAttributes (self :> cilVisitor) a)) + end + + | TNamed (ti, a) when not ti.treferenced -> begin + match findReplacement true tEq !currentFidx ti.tname with + None -> DoChildren + | Some (ti', _) -> + ChangeTo (TNamed (ti', visitCilAttributes (self :> cilVisitor) a)) + end + + | _ -> DoChildren + + (* The Field offset might need to be changed to use new compinfo *) + method voffs = function + Field (f, o) -> begin + (* See if the compinfo was changed *) + if f.fcomp.creferenced then + DoChildren + else begin + match findReplacement true sEq !currentFidx f.fcomp.cname with + None -> DoChildren (* We did not replace it *) + | Some (ci', oldfidx) -> begin + (* First, find out the index of the original field *) + let rec indexOf (i: int) = function + [] -> + E.s (bug "Cannot find field %s in %s(%d)\n" + f.fname (compFullName f.fcomp) !currentFidx) + | f' :: rest when f' == f -> i + | _ :: rest -> indexOf (i + 1) rest + in + let index = indexOf 0 f.fcomp.cfields in + if List.length ci'.cfields <= index then + E.s (bug "Too few fields in replacement %s(%d) for %s(%d)\n" + (compFullName ci') oldfidx + (compFullName f.fcomp) !currentFidx); + let f' = List.nth ci'.cfields index in + ChangeDoChildrenPost (Field (f', o), fun x -> x) + end + end + end + | _ -> DoChildren + + method vinitoffs o = + (self#voffs o) (* treat initializer offsets same as lvalue offsets *) + +end + +let renameVisitor = new renameVisitorClass + + +(** A visitor that renames uses of inline functions that were discovered in + * pass 2 to be used before they are defined. This is like the renameVisitor + * except it only looks at the variables (thus it is a bit more efficient) + * and it also renames forward declarations of the inlines to be removed. *) + +class renameInlineVisitorClass = object (self) + inherit nopCilVisitor + + (* This is a variable use. See if we must change it *) + method vvrbl (vi: varinfo) : varinfo visitAction = + if not vi.vglob then DoChildren else + if vi.vreferenced then begin (* Already renamed *) + DoChildren + end else begin + match findReplacement true vEq !currentFidx vi.vname with + None -> DoChildren + | Some (vi', oldfidx) -> + if debugMerge then + ignore (E.log "Renaming var %s(%d) to %s(%d)\n" + vi.vname !currentFidx vi'.vname oldfidx); + vi'.vreferenced <- true; + ChangeTo vi' + end + + (* And rename some declarations of inlines to remove. We cannot drop this + * declaration (see small1/combineinline6) *) + method vglob = function + GVarDecl(vi, l) when vi.vinline -> begin + (* Get the original name *) + let origname = + try H.find originalVarNames vi.vname + with Not_found -> vi.vname + in + (* Now see if this must be replaced *) + match findReplacement true vEq !currentFidx origname with + None -> DoChildren + | Some (vi', _) -> ChangeTo [GVarDecl (vi', l)] + end + | _ -> DoChildren + +end +let renameInlinesVisitor = new renameInlineVisitorClass + + +(* sm: First attempt at a semantic checksum for function bodies. + * Ideally, two function's checksums would be equal only when their + * bodies were provably equivalent; but I'm using a much simpler and + * less accurate heuristic here. It should be good enough for the + * purpose I have in mind, which is doing duplicate removal of + * multiply-instantiated template functions. *) +let functionChecksum (dec: fundec) : int = +begin + (* checksum the structure of the statements (only) *) + let rec stmtListSum (lst : stmt list) : int = + (List.fold_left (fun acc s -> acc + (stmtSum s)) 0 lst) + and stmtSum (s: stmt) : int = + (* strategy is to just throw a lot of prime numbers into the + * computation in hopes of avoiding accidental collision.. *) + match s.skind with + | Instr(l) -> 13 + 67*(List.length l) + | Return(_) -> 17 + | Goto(_) -> 19 + | Break(_) -> 23 + | Continue(_) -> 29 + | If(_,b1,b2,_) -> 31 + 37*(stmtListSum b1.bstmts) + + 41*(stmtListSum b2.bstmts) + | Switch(_,b,_,_) -> 43 + 47*(stmtListSum b.bstmts) + (* don't look at stmt list b/c is not part of tree *) +(* + | Loop(b,_,_,_) -> 49 + 53*(stmtListSum b.bstmts) +*) + | While(_,b,_) -> 49 + 53*(stmtListSum b.bstmts) + | DoWhile(_,b,_) -> 49 + 53*(stmtListSum b.bstmts) + | For(_,_,_,b,_) -> 49 + 53*(stmtListSum b.bstmts) + | Block(b) -> 59 + 61*(stmtListSum b.bstmts) + | TryExcept (b, (il, e), h, _) -> + 67 + 83*(stmtListSum b.bstmts) + 97*(stmtListSum h.bstmts) + | TryFinally (b, h, _) -> + 103 + 113*(stmtListSum b.bstmts) + 119*(stmtListSum h.bstmts) + in + + (* disabled 2nd and 3rd measure because they appear to get different + * values, for the same code, depending on whether the code was just + * parsed into CIL or had previously been parsed into CIL, printed + * out, then re-parsed into CIL *) + let a,b,c,d,e = + (List.length dec.sformals), (* # formals *) + 0 (*(List.length dec.slocals)*), (* # locals *) + 0 (*dec.smaxid*), (* estimate of internal statement count *) + (List.length dec.sbody.bstmts), (* number of statements at outer level *) + (stmtListSum dec.sbody.bstmts) in (* checksum of statement structure *) + (*(trace "sm" (P.dprintf "sum: %s is %d %d %d %d %d\n"*) + (* dec.svar.vname a b c d e));*) + 2*a + 3*b + 5*c + 7*d + 11*e +end + + +(* sm: equality for initializers, etc.; this is like '=', except + * when we reach shared pieces (like references into the type + * structure), we use '==', to prevent circularity *) +(* update: that's no good; I'm using this to find things which + * are equal but from different CIL trees, so nothing will ever + * be '=='.. as a hack I'll just change those places to 'true', + * so these functions are not now checking proper equality.. + * places where equality is not complete are marked "INC" *) +let rec equalInits (x: init) (y: init) : bool = +begin + match x,y with + | SingleInit(xe), SingleInit(ye) -> (equalExps xe ye) + | CompoundInit(xt, xoil), CompoundInit(yt, yoil) -> + (*(xt == yt) &&*) (* INC *) (* types need to be identically equal *) + let rec equalLists xoil yoil : bool = + match xoil,yoil with + | ((xo,xi) :: xrest), ((yo,yi) :: yrest) -> + (equalOffsets xo yo) && + (equalInits xi yi) && + (equalLists xrest yrest) + | [], [] -> true + | _, _ -> false + in + (equalLists xoil yoil) + | _, _ -> false +end + +and equalOffsets (x: offset) (y: offset) : bool = +begin + match x,y with + | NoOffset, NoOffset -> true + | Field(xfi,xo), Field(yfi,yo) -> + (xfi.fname = yfi.fname) && (* INC: same fieldinfo name.. *) + (equalOffsets xo yo) + | Index(xe,xo), Index(ye,yo) -> + (equalExps xe ye) && + (equalOffsets xo yo) + | _,_ -> false +end + +and equalExps (x: exp) (y: exp) : bool = +begin + match x,y with + | Const(xc), Const(yc) -> xc = yc || (* safe to use '=' on literals *) + ( + (* CIL changes (unsigned)0 into 0U during printing.. *) + match xc,yc with + | CInt64(xv,_,_),CInt64(yv,_,_) -> + (Int64.to_int xv) = 0 && (* ok if they're both 0 *) + (Int64.to_int yv) = 0 + | _,_ -> false + ) + | Lval(xl), Lval(yl) -> (equalLvals xl yl) + | SizeOf(xt), SizeOf(yt) -> true (*INC: xt == yt*) (* identical types *) + | SizeOfE(xe), SizeOfE(ye) -> (equalExps xe ye) + | AlignOf(xt), AlignOf(yt) -> true (*INC: xt == yt*) + | AlignOfE(xe), AlignOfE(ye) -> (equalExps xe ye) + | UnOp(xop,xe,xt), UnOp(yop,ye,yt) -> + xop = yop && + (equalExps xe ye) && + true (*INC: xt == yt*) + | BinOp(xop,xe1,xe2,xt), BinOp(yop,ye1,ye2,yt) -> + xop = yop && + (equalExps xe1 ye1) && + (equalExps xe2 ye2) && + true (*INC: xt == yt*) + | CastE(xt,xe), CastE(yt,ye) -> + (*INC: xt == yt &&*) + (equalExps xe ye) + | AddrOf(xl), AddrOf(yl) -> (equalLvals xl yl) + | StartOf(xl), StartOf(yl) -> (equalLvals xl yl) + + (* initializers that go through CIL multiple times sometimes lose casts they + * had the first time; so allow a different of a cast *) + | CastE(xt,xe), ye -> + (equalExps xe ye) + | xe, CastE(yt,ye) -> + (equalExps xe ye) + + | _,_ -> false +end + +and equalLvals (x: lval) (y: lval) : bool = +begin + match x,y with + | (Var(xv),xo), (Var(yv),yo) -> + (* I tried, I really did.. the problem is I see these names + * before merging collapses them, so __T123 != __T456, + * so whatever *) + (*(xv.vname = vy.vname) && (* INC: same varinfo names.. *)*) + (equalOffsets xo yo) + + | (Mem(xe),xo), (Mem(ye),yo) -> + (equalExps xe ye) && + (equalOffsets xo yo) + | _,_ -> false +end + +let equalInitOpts (x: init option) (y: init option) : bool = +begin + match x,y with + | None,None -> true + | Some(xi), Some(yi) -> (equalInits xi yi) + | _,_ -> false +end + + + (* Now we go once more through the file and we rename the globals that we + * keep. We also scan the entire body and we replace references to the + * representative types or variables. We set the referenced flags once we + * have replaced the names. *) +let oneFilePass2 (f: file) = + if debugMerge || !E.verboseFlag then + ignore (E.log "Final merging phase (%d): %s\n" + !currentFidx f.fileName); + currentDeclIdx := 0; (* Even though we don't need it anymore *) + H.clear varUsedAlready; + H.clear originalVarNames; + (* If we find inline functions that are used before being defined, and thus + * before knowing that we can throw them away, then we mark this flag so + * that we can make another pass over the file *) + let repeatPass2 = ref false in + (* Keep a pointer to the contents of the file so far *) + let savedTheFile = !theFile in + + let processOneGlobal (g: global) : unit = + (* Process a varinfo. Reuse an old one, or rename it if necessary *) + let processVarinfo (vi: varinfo) (vloc: location) : varinfo = + if vi.vreferenced then + vi (* Already done *) + else begin + (* Maybe it is static. Rename it then *) + if vi.vstorage = Static then begin + let newName, _ = A.newAlphaName vtAlpha None vi.vname !currentLoc in + (* Remember the original name *) + H.add originalVarNames newName vi.vname; + if debugMerge then ignore (E.log "renaming %s at %a to %s\n" + vi.vname d_loc vloc newName); + vi.vname <- newName; + vi.vid <- newVID (); + vi.vreferenced <- true; + vi + end else begin + (* Find the representative *) + match findReplacement true vEq !currentFidx vi.vname with + None -> vi (* This is the representative *) + | Some (vi', _) -> (* Reuse some previous one *) + vi'.vreferenced <- true; (* Mark it as done already *) + vi'.vaddrof <- vi.vaddrof || vi'.vaddrof; + vi' + end + end + in + try + match g with + | GVarDecl (vi, l) as g -> + currentLoc := l; + incr currentDeclIdx; + let vi' = processVarinfo vi l in + if vi != vi' then (* Drop this declaration *) () + else if H.mem emittedVarDecls vi'.vname then (* No need to keep it *) + () + else begin + H.add emittedVarDecls vi'.vname true; (* Remember that we emitted + * it *) + mergePushGlobals (visitCilGlobal renameVisitor g) + end + + | GVar (vi, init, l) -> + currentLoc := l; + incr currentDeclIdx; + let vi' = processVarinfo vi l in + (* We must keep this definition even if we reuse this varinfo, + * because maybe the previous one was a declaration *) + H.add emittedVarDecls vi.vname true; (* Remember that we emitted it*) + + let emitIt:bool = (not mergeGlobals) || + try + let prevVar, prevInitOpt, prevLoc = + (H.find emittedVarDefn vi'.vname) in + (* previously defined; same initializer? *) + if (equalInitOpts prevInitOpt init.init) + || (init.init = None) then ( + (trace "mergeGlob" + (P.dprintf "dropping global var %s at %a in favor of the one at %a\n" + vi'.vname d_loc l d_loc prevLoc)); + false (* do not emit *) + ) + else if prevInitOpt = None then ( + (* We have an initializer, but the previous one didn't. + We should really convert the previous global from GVar + to GVarDecl, but that's not convenient to do here. *) + true + ) + else ( + (* Both GVars have initializers. *) + (E.s (error "global var %s at %a has different initializer than %a\n" + vi'.vname d_loc l d_loc prevLoc)); + ) + with Not_found -> ( + (* no previous definition *) + (H.add emittedVarDefn vi'.vname (vi', init.init, l)); + true (* emit it *) + ) + in + + if emitIt then + mergePushGlobals (visitCilGlobal renameVisitor (GVar(vi', init, l))) + + | GFun (fdec, l) as g -> + currentLoc := l; + incr currentDeclIdx; + (* We apply the renaming *) + fdec.svar <- processVarinfo fdec.svar l; + (* Get the original name. *) + let origname = + try H.find originalVarNames fdec.svar.vname + with Not_found -> fdec.svar.vname + in + (* Go in there and rename everything as needed *) + let fdec' = + match visitCilGlobal renameVisitor g with + [GFun(fdec', _)] -> fdec' + | _ -> E.s (unimp "renameVisitor for GFun returned something else") + in + let g' = GFun(fdec', l) in + (* Now restore the parameter names *) + let _, args, _, _ = splitFunctionTypeVI fdec'.svar in + let oldnames, foundthem = + try H.find formalNames (!currentFidx, origname), true + with Not_found -> begin + ignore (warnOpt "Cannot find %s in formalNames" origname); + [], false + end + in + if foundthem then begin + let argl = argsToList args in + if List.length oldnames <> List.length argl then + E.s (unimp "After merging the function has more arguments"); + List.iter2 + (fun oldn a -> if oldn <> "" then a.vname <- oldn) + oldnames fdec.sformals; + (* Reflect them in the type *) + setFormals fdec fdec.sformals + end; + (** See if we can remove this inline function *) + if fdec'.svar.vinline && mergeInlines then begin + let printout = + (* Temporarily turn of printing of lines *) + let oldprintln = !lineDirectiveStyle in + lineDirectiveStyle := None; + (* Temporarily set the name to all functions in the same way *) + let newname = fdec'.svar.vname in + fdec'.svar.vname <- "@@alphaname@@"; + (* If we must do alpha conversion then temporarily set the + * names of the local variables and formals in a standard way *) + let nameId = ref 0 in + let oldNames : string list ref = ref [] in + let renameOne (v: varinfo) = + oldNames := v.vname :: !oldNames; + incr nameId; + v.vname <- "___alpha" ^ string_of_int !nameId + in + let undoRenameOne (v: varinfo) = + match !oldNames with + n :: rest -> + oldNames := rest; + v.vname <- n + | _ -> E.s (bug "undoRenameOne") + in + (* Remember the original type *) + let origType = fdec'.svar.vtype in + if mergeInlinesWithAlphaConvert then begin + (* Rename the formals *) + List.iter renameOne fdec'.sformals; + (* Reflect in the type *) + setFormals fdec' fdec'.sformals; + (* Now do the locals *) + List.iter renameOne fdec'.slocals + end; + (* Now print it *) + let res = d_global () g' in + lineDirectiveStyle := oldprintln; + fdec'.svar.vname <- newname; + if mergeInlinesWithAlphaConvert then begin + (* Do the locals in reverse order *) + List.iter undoRenameOne (List.rev fdec'.slocals); + (* Do the formals in reverse order *) + List.iter undoRenameOne (List.rev fdec'.sformals); + (* Restore the type *) + fdec'.svar.vtype <- origType; + end; + res + in + (* Make a node for this inline function using the original name. *) + let inode = + getNode vEq vSyn !currentFidx origname fdec'.svar + (Some (l, !currentDeclIdx)) + in + if debugInlines then begin + ignore (E.log "getNode %s(%d) with loc=%a. declidx=%d\n" + inode.nname inode.nfidx + d_nloc inode.nloc + !currentDeclIdx); + ignore (E.log + "Looking for previous definition of inline %s(%d)\n" + origname !currentFidx); + end; + try + let oldinode = H.find inlineBodies printout in + if debugInlines then + ignore (E.log " Matches %s(%d)\n" + oldinode.nname oldinode.nfidx); + (* There is some other inline function with the same printout. + * We should reuse this, but watch for the case when the inline + * was already used. *) + if H.mem varUsedAlready fdec'.svar.vname then begin + if mergeInlinesRepeat then begin + repeatPass2 := true + end else begin + ignore (warn "Inline function %s because it is used before it is defined" fdec'.svar.vname); + raise Not_found + end + end; + let _ = union oldinode inode in + (* Clean up the vreferenced bit in the new inline, so that we + * can rename it. Reset the name to the original one so that + * we can find the replacement name. *) + fdec'.svar.vreferenced <- false; + fdec'.svar.vname <- origname; + () (* Drop this definition *) + with Not_found -> begin + if debugInlines then ignore (E.log " Not found\n"); + H.add inlineBodies printout inode; + mergePushGlobal g' + end + end else begin + (* either the function is not inline, or we're not attempting to + * merge inlines *) + if (mergeGlobals && + not fdec'.svar.vinline && + fdec'.svar.vstorage <> Static) then + begin + (* sm: this is a non-inline, non-static function. I want to + * consider dropping it if a same-named function has already + * been put into the merged file *) + let curSum = (functionChecksum fdec') in + (*(trace "mergeGlob" (P.dprintf "I see extern function %s, sum is %d\n"*) + (* fdec'.svar.vname curSum));*) + try + let prevFun, prevLoc, prevSum = + (H.find emittedFunDefn fdec'.svar.vname) in + (* previous was found *) + if (curSum = prevSum) then + (trace "mergeGlob" + (P.dprintf "dropping duplicate def'n of func %s at %a in favor of that at %a\n" + fdec'.svar.vname d_loc l d_loc prevLoc)) + else begin + (* the checksums differ, so print a warning but keep the + * older one to avoid a link error later. I think this is + * a reasonable approximation of what ld does. *) + (ignore (warn "def'n of func %s at %a (sum %d) conflicts with the one at %a (sum %d); keeping the one at %a.\n" + fdec'.svar.vname d_loc l curSum d_loc prevLoc + prevSum d_loc prevLoc)) + end + with Not_found -> begin + (* there was no previous definition *) + (mergePushGlobal g'); + (H.add emittedFunDefn fdec'.svar.vname (fdec', l, curSum)) + end + end else begin + (* not attempting to merge global functions, or it was static + * or inline *) + mergePushGlobal g' + end + end + + | GCompTag (ci, l) as g -> begin + currentLoc := l; + incr currentDeclIdx; + if ci.creferenced then + () + else begin + match findReplacement true sEq !currentFidx ci.cname with + None -> + (* A new one, we must rename it and keep the definition *) + (* Make sure this is root *) + (try + let nd = H.find sEq (!currentFidx, ci.cname) in + if nd.nrep != nd then + E.s (bug "Setting creferenced for struct %s(%d) which is not root!\n" + ci.cname !currentFidx); + with Not_found -> begin + E.s (bug "Setting creferenced for struct %s(%d) which is not in the sEq!\n" + ci.cname !currentFidx); + end); + let newname, _ = + A.newAlphaName sAlpha None ci.cname !currentLoc in + ci.cname <- newname; + ci.creferenced <- true; + ci.ckey <- H.hash (compFullName ci); + (* Now we should visit the fields as well *) + H.add emittedCompDecls ci.cname true; (* Remember that we + * emitted it *) + mergePushGlobals (visitCilGlobal renameVisitor g) + | Some (oldci, oldfidx) -> begin + (* We are not the representative. Drop this declaration + * because we'll not be using it. *) + () + end + end + end + | GEnumTag (ei, l) as g -> begin + currentLoc := l; + incr currentDeclIdx; + if ei.ereferenced then + () + else begin + match findReplacement true eEq !currentFidx ei.ename with + None -> (* We must rename it *) + let newname, _ = + A.newAlphaName eAlpha None ei.ename !currentLoc in + ei.ename <- newname; + ei.ereferenced <- true; + (* And we must rename the items to using the same name space + * as the variables *) + ei.eitems <- + List.map + (fun (n, i, loc) -> + let newname, _ = + A.newAlphaName vtAlpha None n !currentLoc in + newname, i, loc) + ei.eitems; + mergePushGlobals (visitCilGlobal renameVisitor g); + | Some (ei', _) -> (* Drop this since we are reusing it from + * before *) + () + end + end + | GCompTagDecl (ci, l) -> begin + currentLoc := l; (* This is here just to introduce an undefined + * structure. But maybe the structure was defined + * already. *) + (* Do not increment currentDeclIdx because it is not incremented in + * pass 1*) + if H.mem emittedCompDecls ci.cname then + () (* It was already declared *) + else begin + H.add emittedCompDecls ci.cname true; + (* Keep it as a declaration *) + mergePushGlobal g; + end + end + + | GEnumTagDecl (ei, l) -> + currentLoc := l; + (* Do not increment currentDeclIdx because it is not incremented in + * pass 1*) + (* Keep it as a declaration *) + mergePushGlobal g + + + | GType (ti, l) as g -> begin + currentLoc := l; + incr currentDeclIdx; + if ti.treferenced then + () + else begin + match findReplacement true tEq !currentFidx ti.tname with + None -> (* We must rename it and keep it *) + let newname, _ = + A.newAlphaName vtAlpha None ti.tname !currentLoc in + ti.tname <- newname; + ti.treferenced <- true; + mergePushGlobals (visitCilGlobal renameVisitor g); + | Some (ti', _) ->(* Drop this since we are reusing it from + * before *) + () + end + end + | g -> mergePushGlobals (visitCilGlobal renameVisitor g) + with e -> begin + let globStr:string = (P.sprint 1000 (P.dprintf + "error when merging global %a: %s" + d_global g (Printexc.to_string e))) in + ignore (E.log "%s\n" globStr); + (*"error when merging global: %s\n" (Printexc.to_string e);*) + mergePushGlobal (GText (P.sprint 80 + (P.dprintf "/* error at %t:" d_thisloc))); + mergePushGlobal g; + mergePushGlobal (GText ("*************** end of error*/")); + raise e + end + in + (* Now do the real PASS 2 *) + List.iter processOneGlobal f.globals; + (* See if we must re-visit the globals in this file because an inline that + * is being removed was used before we saw the definition and we decided to + * remove it *) + if mergeInlinesRepeat && !repeatPass2 then begin + if debugMerge || !E.verboseFlag then + ignore (E.log "Repeat final merging phase (%d): %s\n" + !currentFidx f.fileName); + (* We are going to rescan the globals we have added while processing this + * file. *) + let theseGlobals : global list ref = ref [] in + (* Scan a list of globals until we hit a given tail *) + let rec scanUntil (tail: 'a list) (l: 'a list) = + if tail == l then () + else + match l with + | [] -> E.s (bug "mergecil: scanUntil could not find the marker\n") + | g :: rest -> + theseGlobals := g :: !theseGlobals; + scanUntil tail rest + in + (* Collect in theseGlobals all the globals from this file *) + theseGlobals := []; + scanUntil savedTheFile !theFile; + (* Now reprocess them *) + theFile := savedTheFile; + List.iter (fun g -> + theFile := (visitCilGlobal renameInlinesVisitor g) @ !theFile) + !theseGlobals; + (* Now check if we have inlines that we could not remove + H.iter (fun name _ -> + if not (H.mem inlinesRemoved name) then + ignore (warn "Could not remove inline %s. I have no idea why!\n" + name)) + inlinesToRemove *) + end + + +let merge (files: file list) (newname: string) : file = + init (); + + (* Make the first pass over the files *) + currentFidx := 0; + List.iter (fun f -> oneFilePass1 f; incr currentFidx) files; + + (* Now maybe try to force synonyms to be equal *) + if mergeSynonyms then begin + doMergeSynonyms sSyn sEq matchCompInfo; + doMergeSynonyms eSyn eEq matchEnumInfo; + doMergeSynonyms tSyn tEq matchTypeInfo; + if mergeInlines then begin + (* Copy all the nodes from the iEq to vEq as well. This is needed + * because vEq will be used for variable renaming *) + H.iter (fun k n -> H.add vEq k n) iEq; + doMergeSynonyms iSyn iEq matchInlines; + end + end; + + (* Now maybe dump the graph *) + if debugMerge then begin + dumpGraph "type" tEq; + dumpGraph "struct and union" sEq; + dumpGraph "enum" eEq; + dumpGraph "variable" vEq; + if mergeInlines then dumpGraph "inline" iEq; + end; + (* Make the second pass over the files. This is when we start rewriting the + * file *) + currentFidx := 0; + List.iter (fun f -> oneFilePass2 f; incr currentFidx) files; + + (* Now reverse the result and return the resulting file *) + let rec revonto acc = function + [] -> acc + | x :: t -> revonto (x :: acc) t + in + let res = + { fileName = newname; + globals = revonto (revonto [] !theFile) !theFileTypes; + globinit = None; + globinitcalled = false;} in + init (); (* Make the GC happy *) + (* We have made many renaming changes and sometimes we have just guessed a + * name wrong. Make sure now that the local names are unique. *) + uniqueVarNames res; + res + + + + + diff --git a/cil/src/mergecil.mli b/cil/src/mergecil.mli new file mode 100644 index 00000000..a864c69a --- /dev/null +++ b/cil/src/mergecil.mli @@ -0,0 +1,42 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(** Set this to true to ignore the merge conflicts *) +val ignore_merge_conflicts: bool ref + +(** Merge a number of CIL files *) +val merge: Cil.file list -> string -> Cil.file diff --git a/cil/src/rmtmps.ml b/cil/src/rmtmps.ml new file mode 100644 index 00000000..b7dea931 --- /dev/null +++ b/cil/src/rmtmps.ml @@ -0,0 +1,778 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * Ben Liblit + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* rmtmps.ml *) +(* implementation for rmtmps.mli *) + +open Pretty +open Cil +module H = Hashtbl +module E = Errormsg +module U = Util + +(* Set on the command-line: *) +let keepUnused = ref false +let rmUnusedInlines = ref false + + +let trace = Trace.trace "rmtmps" + + + +(*********************************************************************** + * + * Clearing of "referenced" bits + * + *) + + +let clearReferencedBits file = + let considerGlobal global = + match global with + | GType (info, _) -> + trace (dprintf "clearing mark: %a\n" d_shortglobal global); + info.treferenced <- false + + | GEnumTag (info, _) + | GEnumTagDecl (info, _) -> + trace (dprintf "clearing mark: %a\n" d_shortglobal global); + info.ereferenced <- false + + | GCompTag (info, _) + | GCompTagDecl (info, _) -> + trace (dprintf "clearing mark: %a\n" d_shortglobal global); + info.creferenced <- false + + | GVar ({vname = name} as info, _, _) + | GVarDecl ({vname = name} as info, _) -> + trace (dprintf "clearing mark: %a\n" d_shortglobal global); + info.vreferenced <- false + + | GFun ({svar = info} as func, _) -> + trace (dprintf "clearing mark: %a\n" d_shortglobal global); + info.vreferenced <- false; + let clearMark local = + trace (dprintf "clearing mark: local %s\n" local.vname); + local.vreferenced <- false + in + List.iter clearMark func.slocals + + | _ -> + () + in + iterGlobals file considerGlobal + + +(*********************************************************************** + * + * Scanning and categorization of pragmas + * + *) + + +(* collections of names of things to keep *) +type collection = (string, unit) H.t +type keepers = { + typedefs : collection; + enums : collection; + structs : collection; + unions : collection; + defines : collection; + } + + +(* rapid transfer of control when we find a malformed pragma *) +exception Bad_pragma + +let ccureddeepcopystring = "ccureddeepcopy" +(* Save this length so we don't recompute it each time. *) +let ccureddeepcopystring_length = String.length ccureddeepcopystring + +(* CIL and CCured define several pragmas which prevent removal of + * various global symbols. Here we scan for those pragmas and build + * up collections of the corresponding symbols' names. + *) + +let categorizePragmas file = + + (* names of things which should be retained *) + let keepers = { + typedefs = H.create 0; + enums = H.create 0; + structs = H.create 0; + unions = H.create 0; + defines = H.create 1 + } in + + (* populate these name collections in light of each pragma *) + let considerPragma = + + let badPragma location pragma = + ignore (warnLoc location "Invalid argument to pragma %s" pragma) + in + + function + | GPragma (Attr ("cilnoremove" as directive, args), location) -> + (* a very flexible pragma: can retain typedefs, enums, + * structs, unions, or globals (functions or variables) *) + begin + let processArg arg = + try + match arg with + | AStr specifier -> + (* isolate and categorize one symbol name *) + let collection, name = + (* Two words denotes a typedef, enum, struct, or + * union, as in "type foo" or "enum bar". A + * single word denotes a global function or + * variable. *) + let whitespace = Str.regexp "[ \t]+" in + let words = Str.split whitespace specifier in + match words with + | ["type"; name] -> + keepers.typedefs, name + | ["enum"; name] -> + keepers.enums, name + | ["struct"; name] -> + keepers.structs, name + | ["union"; name] -> + keepers.unions, name + | [name] -> + keepers.defines, name + | _ -> + raise Bad_pragma + in + H.add collection name () + | _ -> + raise Bad_pragma + with Bad_pragma -> + badPragma location directive + in + List.iter processArg args + end + | GVarDecl (v, _) -> begin + (* Look for alias attributes, e.g. Linux modules *) + match filterAttributes "alias" v.vattr with + [] -> () (* ordinary prototype. *) + | [Attr("alias", [AStr othername])] -> + H.add keepers.defines othername () + | _ -> E.s (error "Bad alias attribute at %a" d_loc !currentLoc) + end + + (*** Begin CCured-specific checks: ***) + (* these pragmas indirectly require that we keep the function named in + -- the first arguments of boxmodelof and ccuredwrapperof, and + -- the third argument of ccureddeepcopy*. *) + | GPragma (Attr("ccuredwrapper" as directive, attribute :: _), location) -> + begin + match attribute with + | AStr name -> + H.add keepers.defines name () + | _ -> + badPragma location directive + end + | GPragma (Attr("ccuredvararg", funcname :: (ASizeOf t) :: _), location) -> + begin + match t with + | TComp(c,_) when c.cstruct -> (* struct *) + H.add keepers.structs c.cname () + | TComp(c,_) -> (* union *) + H.add keepers.unions c.cname () + | TNamed(ti,_) -> + H.add keepers.typedefs ti.tname () + | TEnum(ei, _) -> + H.add keepers.enums ei.ename () + | _ -> + () + end + | GPragma (Attr(directive, _ :: _ :: attribute :: _), location) + when String.length directive > ccureddeepcopystring_length + && (Str.first_chars directive ccureddeepcopystring_length) + = ccureddeepcopystring -> + begin + match attribute with + | AStr name -> + H.add keepers.defines name () + | _ -> + badPragma location directive + end + (** end CCured-specific stuff **) + | _ -> + () + in + iterGlobals file considerPragma; + keepers + + + +(*********************************************************************** + * + * Function body elimination from pragmas + * + *) + + +(* When performing global slicing, any functions not explicitly marked + * as pragma roots are reduced to mere declarations. This leaves one + * with a reduced source file that still compiles to object code, but + * which contains the bodies of only explicitly retained functions. + *) + +let amputateFunctionBodies keptGlobals file = + let considerGlobal = function + | GFun ({svar = {vname = name} as info}, location) + when not (H.mem keptGlobals name) -> + trace (dprintf "slicing: reducing to prototype: function %s\n" name); + GVarDecl (info, location) + | other -> + other + in + mapGlobals file considerGlobal + + + +(*********************************************************************** + * + * Root collection from pragmas + * + *) + + +let isPragmaRoot keepers = function + | GType ({tname = name}, _) -> + H.mem keepers.typedefs name + | GEnumTag ({ename = name}, _) + | GEnumTagDecl ({ename = name}, _) -> + H.mem keepers.enums name + | GCompTag ({cname = name; cstruct = structure}, _) + | GCompTagDecl ({cname = name; cstruct = structure}, _) -> + let collection = if structure then keepers.structs else keepers.unions in + H.mem collection name + | GVar ({vname = name}, _, _) + | GVarDecl ({vname = name}, _) + | GFun ({svar = {vname = name}}, _) -> + H.mem keepers.defines name + | _ -> + false + + + +(*********************************************************************** + * + * Common root collecting utilities + * + *) + + +let traceRoot reason global = + trace (dprintf "root (%s): %a@!" reason d_shortglobal global); + true + + +let traceNonRoot reason global = + trace (dprintf "non-root (%s): %a@!" reason d_shortglobal global); + false + + +let hasExportingAttribute funvar = + let rec isExportingAttribute = function + | Attr ("constructor", []) -> true + | Attr ("destructor", []) -> true + | _ -> false + in + List.exists isExportingAttribute funvar.vattr + + + +(*********************************************************************** + * + * Root collection from external linkage + * + *) + + +(* Exported roots are those global symbols which are visible to the + * linker and dynamic loader. For variables, this consists of + * anything that is not "static". For functions, this consists of: + * + * - functions bearing a "constructor" or "destructor" attribute + * - functions declared extern but not inline + * - functions declared neither inline nor static + * + * gcc incorrectly (according to C99) makes inline functions visible to + * the linker. So we can only remove inline functions on MSVC. + *) + +let isExportedRoot global = + let result, reason = match global with + | GVar ({vstorage = Static}, _, _) -> + false, "static variable" + | GVar _ -> + true, "non-static variable" + | GFun ({svar = v}, _) -> begin + if hasExportingAttribute v then + true, "constructor or destructor function" + else if v.vstorage = Static then + false, "static function" + else if v.vinline && v.vstorage != Extern + && (!msvcMode || !rmUnusedInlines) then + false, "inline function" + else + true, "other function" + end + | GVarDecl(v,_) when hasAttribute "alias" v.vattr -> + true, "has GCC alias attribute" + | _ -> + false, "neither function nor variable" + in + trace (dprintf "isExportedRoot %a -> %b, %s@!" + d_shortglobal global result reason); + result + + + +(*********************************************************************** + * + * Root collection for complete programs + * + *) + + +(* Exported roots are "main()" and functions bearing a "constructor" + * or "destructor" attribute. These are the only things which must be + * retained in a complete program. + *) + +let isCompleteProgramRoot global = + let result = match global with + | GFun ({svar = {vname = "main"; vstorage = vstorage}}, _) -> + vstorage <> Static + | GFun (fundec, _) + when hasExportingAttribute fundec.svar -> + true + | _ -> + false + in + trace (dprintf "complete program root -> %b for %a@!" result d_shortglobal global); + result + + +(*********************************************************************** + * + * Transitive reachability closure from roots + * + *) + + +(* This visitor recursively marks all reachable types and variables as used. *) +class markReachableVisitor + ((globalMap: (string, Cil.global) H.t), + (currentFunc: fundec option ref)) = object (self) + inherit nopCilVisitor + + method vglob = function + | GType (typeinfo, _) -> + typeinfo.treferenced <- true; + DoChildren + | GCompTag (compinfo, _) + | GCompTagDecl (compinfo, _) -> + compinfo.creferenced <- true; + DoChildren + | GEnumTag (enuminfo, _) + | GEnumTagDecl (enuminfo, _) -> + enuminfo.ereferenced <- true; + DoChildren + | GVar (varinfo, _, _) + | GVarDecl (varinfo, _) + | GFun ({svar = varinfo}, _) -> + varinfo.vreferenced <- true; + DoChildren + | _ -> + SkipChildren + + method vinst = function + Asm (_, tmpls, _, _, _, _) when !msvcMode -> + (* If we have inline assembly on MSVC, we cannot tell which locals + * are referenced. Keep thsem all *) + (match !currentFunc with + Some fd -> + List.iter (fun v -> + let vre = Str.regexp_string (Str.quote v.vname) in + if List.exists (fun tmp -> + try ignore (Str.search_forward vre tmp 0); true + with Not_found -> false) + tmpls + then + v.vreferenced <- true) fd.slocals + | _ -> assert false); + DoChildren + | _ -> DoChildren + + method vvrbl v = + if not v.vreferenced then + begin + let name = v.vname in + if v.vglob then + trace (dprintf "marking transitive use: global %s\n" name) + else + trace (dprintf "marking transitive use: local %s\n" name); + + (* If this is a global, we need to keep everything used in its + * definition and declarations. *) + if v.vglob then + begin + trace (dprintf "descending: global %s\n" name); + let descend global = + ignore (visitCilGlobal (self :> cilVisitor) global) + in + let globals = Hashtbl.find_all globalMap name in + List.iter descend globals + end + else + v.vreferenced <- true; + end; + SkipChildren + + method vexpr (e: exp) = + match e with + Const (CEnum (_, _, ei)) -> ei.ereferenced <- true; + DoChildren + | _ -> DoChildren + + method vtype typ = + let old : bool = + let visitAttrs attrs = + ignore (visitCilAttributes (self :> cilVisitor) attrs) + in + let visitType typ = + ignore (visitCilType (self :> cilVisitor) typ) + in + match typ with + | TEnum(e, attrs) -> + let old = e.ereferenced in + if not old then + begin + trace (dprintf "marking transitive use: enum %s\n" e.ename); + e.ereferenced <- true; + visitAttrs attrs; + visitAttrs e.eattr + end; + old + + | TComp(c, attrs) -> + let old = c.creferenced in + if not old then + begin + trace (dprintf "marking transitive use: compound %s\n" c.cname); + c.creferenced <- true; + + (* to recurse, we must ask explicitly *) + let recurse f = visitType f.ftype in + List.iter recurse c.cfields; + visitAttrs attrs; + visitAttrs c.cattr + end; + old + + | TNamed(ti, attrs) -> + let old = ti.treferenced in + if not old then + begin + trace (dprintf "marking transitive use: typedef %s\n" ti.tname); + ti.treferenced <- true; + + (* recurse deeper into the type referred-to by the typedef *) + (* to recurse, we must ask explicitly *) + visitType ti.ttype; + visitAttrs attrs + end; + old + + | _ -> + (* for anything else, just look inside it *) + false + in + if old then + SkipChildren + else + DoChildren +end + + +let markReachable file isRoot = + (* build a mapping from global names back to their definitions & + * declarations *) + let globalMap = Hashtbl.create 137 in + let considerGlobal global = + match global with + | GFun ({svar = info}, _) + | GVar (info, _, _) + | GVarDecl (info, _) -> + Hashtbl.add globalMap info.vname global + | _ -> + () + in + iterGlobals file considerGlobal; + + let currentFunc = ref None in + + (* mark everything reachable from the global roots *) + let visitor = new markReachableVisitor (globalMap, currentFunc) in + let visitIfRoot global = + if isRoot global then + begin + trace (dprintf "traversing root global: %a\n" d_shortglobal global); + (match global with + GFun(fd, _) -> currentFunc := Some fd + | _ -> currentFunc := None); + ignore (visitCilGlobal visitor global) + end + else + trace (dprintf "skipping non-root global: %a\n" d_shortglobal global) + in + iterGlobals file visitIfRoot + + +(********************************************************************** + * + * Marking and removing of unused labels + * + **********************************************************************) + +(* We keep only one label, preferably one that was not introduced by CIL. + * Scan a list of labels and return the data for the label that should be + * kept, and the remaining filtered list of labels *) +let labelsToKeep (ll: label list) : (string * location * bool) * label list = + let rec loop (sofar: string * location * bool) = function + [] -> sofar, [] + | l :: rest -> + let newlabel, keepl = + match l with + | Case _ | Default _ -> sofar, true + | Label (ln, lloc, isorig) -> begin + match isorig, sofar with + | false, ("", _, _) -> + (* keep this one only if we have no label so far *) + (ln, lloc, isorig), false + | false, _ -> sofar, false + | true, (_, _, false) -> + (* this is an original label; prefer it to temporary or + * missing labels *) + (ln, lloc, isorig), false + | true, _ -> sofar, false + end + in + let newlabel', rest' = loop newlabel rest in + newlabel', (if keepl then l :: rest' else rest') + in + loop ("", locUnknown, false) ll + +class markUsedLabels (labelMap: (string, unit) H.t) = object + inherit nopCilVisitor + + method vstmt (s: stmt) = + match s.skind with + Goto (dest, _) -> + let (ln, _, _), _ = labelsToKeep !dest.labels in + if ln = "" then + E.s (E.bug "rmtmps: destination of statement does not have labels"); + (* Mark it as used *) + H.replace labelMap ln (); + DoChildren + + | _ -> DoChildren + + (* No need to go into expressions or instructions *) + method vexpr _ = SkipChildren + method vinst _ = SkipChildren + method vtype _ = SkipChildren +end + +class removeUnusedLabels (labelMap: (string, unit) H.t) = object + inherit nopCilVisitor + + method vstmt (s: stmt) = + let (ln, lloc, lorig), lrest = labelsToKeep s.labels in + s.labels <- + (if ln <> "" && H.mem labelMap ln then (* We had labels *) + (Label(ln, lloc, lorig) :: lrest) + else + lrest); + DoChildren + + (* No need to go into expressions or instructions *) + method vexpr _ = SkipChildren + method vinst _ = SkipChildren + method vtype _ = SkipChildren +end + +(*********************************************************************** + * + * Removal of unused symbols + * + *) + + +(* regular expression matching names of uninteresting locals *) +let uninteresting = + let names = [ + (* Cil.makeTempVar *) + "__cil_tmp"; + + (* sm: I don't know where it comes from but these show up all over. *) + (* this doesn't seem to do what I wanted.. *) + "iter"; + + (* various macros in glibc's *) + "__result"; + "__s"; "__s1"; "__s2"; + "__s1_len"; "__s2_len"; + "__retval"; "__len"; + + (* various macros in glibc's *) + "__c"; "__res"; + + (* We remove the __malloc variables *) + ] in + + (* optional alpha renaming *) + let alpha = "\\(___[0-9]+\\)?" in + + let pattern = "\\(" ^ (String.concat "\\|" names) ^ "\\)" ^ alpha ^ "$" in + Str.regexp pattern + + +let removeUnmarked file = + let removedLocals = ref [] in + + let filterGlobal global = + match global with + (* unused global types, variables, and functions are simply removed *) + | GType ({treferenced = false}, _) + | GCompTag ({creferenced = false}, _) + | GCompTagDecl ({creferenced = false}, _) + | GEnumTag ({ereferenced = false}, _) + | GEnumTagDecl ({ereferenced = false}, _) + | GVar ({vreferenced = false}, _, _) + | GVarDecl ({vreferenced = false}, _) + | GFun ({svar = {vreferenced = false}}, _) -> + trace (dprintf "removing global: %a\n" d_shortglobal global); + false + + (* retained functions may wish to discard some unused locals *) + | GFun (func, _) -> + let rec filterLocal local = + if not local.vreferenced then + begin + (* along the way, record the interesting locals that were removed *) + let name = local.vname in + trace (dprintf "removing local: %s\n" name); + if not (Str.string_match uninteresting name 0) then + removedLocals := (func.svar.vname ^ "::" ^ name) :: !removedLocals; + end; + local.vreferenced + in + func.slocals <- List.filter filterLocal func.slocals; + (* We also want to remove unused labels. We do it all here, including + * marking the used labels *) + let usedLabels:(string, unit) H.t = H.create 13 in + ignore (visitCilBlock (new markUsedLabels usedLabels) func.sbody); + (* And now we scan again and we remove them *) + ignore (visitCilBlock (new removeUnusedLabels usedLabels) func.sbody); + true + + (* all other globals are retained *) + | _ -> + trace (dprintf "keeping global: %a\n" d_shortglobal global); + true + in + file.globals <- List.filter filterGlobal file.globals; + !removedLocals + + +(*********************************************************************** + * + * Exported interface + * + *) + + +type rootsFilter = global -> bool + +let isDefaultRoot = isExportedRoot + +let rec removeUnusedTemps ?(isRoot : rootsFilter = isDefaultRoot) file = + if !keepUnused || Trace.traceActive "disableTmpRemoval" then + Trace.trace "disableTmpRemoval" (dprintf "temp removal disabled\n") + else + begin + if !E.verboseFlag then + ignore (E.log "Removing unused temporaries\n" ); + + if Trace.traceActive "printCilTree" then + dumpFile defaultCilPrinter stdout "stdout" file; + + (* digest any pragmas that would create additional roots *) + let keepers = categorizePragmas file in + + (* if slicing, remove the bodies of non-kept functions *) + if !Cilutil.sliceGlobal then + amputateFunctionBodies keepers.defines file; + + (* build up the root set *) + let isRoot global = + isPragmaRoot keepers global || + isRoot global + in + + (* mark everything reachable from the global roots *) + clearReferencedBits file; + markReachable file isRoot; + + (* take out the trash *) + let removedLocals = removeUnmarked file in + + (* print which original source variables were removed *) + if false && removedLocals != [] then + let count = List.length removedLocals in + if count > 2000 then + ignore (E.warn "%d unused local variables removed" count) + else + ignore (E.warn "%d unused local variables removed:@!%a" + count (docList ~sep:(chr ',' ++ break) text) removedLocals) + end diff --git a/cil/src/rmtmps.mli b/cil/src/rmtmps.mli new file mode 100644 index 00000000..e29f0c6b --- /dev/null +++ b/cil/src/rmtmps.mli @@ -0,0 +1,82 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * Ben Liblit + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* rmtmps.mli *) +(* remove unused things from cil files: *) +(* - local temporaries introduced but not used *) +(* - global declarations that are not used *) +(* - types that are not used *) +(* - labels that are not used (gn) *) + + +(* Some clients may wish to augment or replace the standard strategy + * for finding the initially reachable roots. The optional + * "isRoot" argument to Rmtmps.removeUnusedTemps grants this + * flexibility. If given, it should name a function which will return + * true if a given global should be treated as a retained root. + * + * Function Rmtmps.isDefaultRoot encapsulates the default root + * collection, which consists of those global variables and functions + * which are visible to the linker and runtime loader. A client's + * root filter can use this if the goal is to augment rather than + * replace the standard logic. Function Rmtmps.isExportedRoot is an + * alternate name for this same function. + * + * Function Rmtmps.isCompleteProgramRoot is an example of an alternate + * root collection. This function assumes that it is operating on a + * complete program rather than just one object file. It treats + * "main()" as a root, as well as any function carrying the + * "constructor" or "destructor" attribute. All other globals are + * candidates for removal, regardless of their linkage. + * + * Note that certain CIL- and CCured-specific pragmas induce + * additional global roots. This functionality is always present, and + * is not subject to replacement by "filterRoots". + *) + +type rootsFilter = Cil.global -> bool +val isDefaultRoot : rootsFilter +val isExportedRoot : rootsFilter +val isCompleteProgramRoot : rootsFilter + +(* process a complete Cil file *) +val removeUnusedTemps: ?isRoot:rootsFilter -> Cil.file -> unit + + +val keepUnused: bool ref (* Set this to true to turn off this module *) +val rmUnusedInlines: bool ref (* Delete unused inline funcs in gcc mode? *) diff --git a/cil/src/testcil.ml b/cil/src/testcil.ml new file mode 100644 index 00000000..0c0ef018 --- /dev/null +++ b/cil/src/testcil.ml @@ -0,0 +1,440 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* A test for CIL *) +open Pretty +open Cil +module E = Errormsg + +let lu = locUnknown + +(* If you have trouble try to reproduce the problem on a smaller type. Try + * limiting the maxNesting and integerKinds *) +let integerKinds = [ IChar; ISChar; IUChar; IInt; IUInt; IShort; IUShort; + ILong; IULong; ILongLong; IULongLong ] +let floatKinds = [ FFloat; FDouble ] + +let baseTypes = + (List.map (fun ik -> (1, fun _ -> TInt(ik, []))) integerKinds) + @ (List.map (fun fk -> (1, fun _ -> TFloat(fk, []))) floatKinds) + + +(* Make a random struct *) +let maxNesting = ref 3 (* Maximum number of levels for struct nesting *) +let maxFields = ref 8 (* The maximum number of fields in a struct *) +let useBitfields = ref false +let useZeroBitfields = ref true + + + +(* Collect here the globals *) +let globals: global list ref = ref [] +let addGlobal (g:global) = globals := g :: !globals +let getGlobals () = List.rev !globals + +(* Collect here the statements for main *) +let statements: stmt list ref = ref [] +let addStatement (s: stmt) = statements := s :: !statements +let getStatements () = List.rev !statements + +(* Keep here the main function *) +let main: fundec ref = ref dummyFunDec +let mainRetVal: varinfo ref = ref dummyFunDec.svar + +let assertId = ref 0 +let addAssert (b: exp) (extra: stmt list) : unit = + incr assertId; + addStatement (mkStmt (If(UnOp(LNot, b, intType), + mkBlock (extra @ + [mkStmt (Return (Some (integer !assertId), + lu))]), + mkBlock [], lu))) + +let addSetRetVal (b: exp) (extra: stmt list) : unit = + addStatement + (mkStmt (If(UnOp(LNot, b, intType), + mkBlock (extra @ + [mkStmtOneInstr (Set(var !mainRetVal, one, lu))]), + mkBlock [], lu))) + + +let printfFun: fundec = + let fdec = emptyFunction "printf" in + fdec.svar.vtype <- + TFun(intType, Some [ ("format", charPtrType, [])], true, []); + fdec + + +let memsetFun: fundec = + let fdec = emptyFunction "memset" in + fdec.svar.vtype <- + TFun(voidPtrType, Some [ ("start", voidPtrType, []); + ("v", intType, []); + ("len", uintType, [])], false, []); + fdec + +let checkOffsetFun: fundec = + let fdec = emptyFunction "checkOffset" in + fdec.svar.vtype <- + TFun(voidType, Some [ ("start", voidPtrType, []); + ("len", uintType, []); + ("expected_start", intType, []); + ("expected_width", intType, []); + ("name", charPtrType, []) ], false, []); + fdec + +let checkSizeOfFun: fundec = + let fdec = emptyFunction "checkSizeOf" in + fdec.svar.vtype <- + TFun(voidType, Some [ ("len", uintType, []); + ("expected", intType, []); + ("name", charPtrType, []) ], false, []); + fdec + + +let doPrintf format args = + mkStmtOneInstr (Call(None, Lval(var printfFun.svar), + (Const(CStr format)) :: args, lu)) + + +(* Select among the choices, each with a given weight *) +type 'a selection = int * (unit -> 'a) +let select (choices: 'a selection list) : 'a = + (* Find the total weight *) + let total = List.fold_left (fun sum (w, _) -> sum + w) 0 choices in + if total = 0 then E.s (E.bug "Total for choices = 0\n"); + (* Pick a random number *) + let thechoice = Random.int total in + (* Now get the choice *) + let rec loop thechoice = function + [] -> E.s (E.bug "Ran out of choices\n") + | (w, c) :: rest -> + if thechoice < w then c () else loop (thechoice - w) rest + in + loop thechoice choices + + +(* Generate a new name *) +let nameId = ref 0 +let newName (base: string) = + incr nameId; + base ^ (string_of_int !nameId) + + +(********** Testing of SIZEOF ***********) + +(* The current selection of types *) +let typeChoices : typ selection list ref = ref [] + +let baseTypeChoices : typ selection list ref = ref [] + + +let currentNesting = ref 0 +let mkCompType (iss: bool) = + if !currentNesting >= !maxNesting then (* Replace it with an int *) + select !baseTypeChoices + else begin + incr currentNesting; + let ci = + mkCompInfo iss (newName "comp") + (fun _ -> + let nrFields = 1 + (Random.int !maxFields) in + let rec mkFields (i: int) = + if i = nrFields then [] else begin + let ft = select !typeChoices in + let fname = "f" ^ string_of_int i in + let fname', width = + if not !useBitfields || not (isIntegralType ft) + || (Random.int 8 >= 6) then + fname, None + else begin + let tw = bitsSizeOf ft in (* Assume this works for TInt *) + let w = (if !useZeroBitfields then 0 else 1) + + Random.int (3 * tw / 4) in + (if w = 0 then "___missing_field_name" else fname), Some w + end + in + (fname', ft, width, [], lu) :: mkFields (i + 1) + end + in + mkFields 0) + [] + in + decr currentNesting; + (* Register it with the file *) + addGlobal (GCompTag(ci, lu)); + TComp(ci, []) + end + +(* Make a pointer type. They are all equal so make one to void *) +let mkPtrType () = TPtr(TVoid([]), []) + +(* Make an array type. *) +let mkArrayType () = + if !currentNesting >= !maxNesting then + select !baseTypeChoices + else begin + incr currentNesting; + let at = TArray(select !typeChoices, Some (integer (1 + (Random.int 32))), + []) in + decr currentNesting; + at + end + + +let testSizeOf () = + let doOne (i: int) = +(* ignore (E.log "doOne %d\n" i); *) + (* Make a random type *) + let t = select !typeChoices in + (* Create a global with that type *) + let g = makeGlobalVar (newName "g") t in + addGlobal (GVar(g, {init=None}, lu)); + addStatement (mkStmtOneInstr(Call(None, Lval(var memsetFun.svar), + [ mkAddrOrStartOf (var g); zero; + SizeOfE(Lval(var g))], lu))); + try +(* if i = 0 then ignore (E.log "0: %a\n" d_plaintype t); *) + let bsz = + try bitsSizeOf t (* This is what we are testing *) + with e -> begin + ignore (E.log "Exception %s caught while computing bitsSizeOf(%a)\n" + (Printexc.to_string e) d_type t); + raise (Failure "") + end + in +(* ignore (E.log "1 "); *) + if bsz mod 8 <> 0 then begin + ignore (E.log "bitsSizeOf did not return a multiple of 8\n"); + raise (Failure ""); + end; +(* ignore (E.log "2 "); *) + (* Check the offset of all fields in there *) + let rec checkOffsets (lv: lval) (lvt: typ) = + match lvt with + TComp(c, _) -> + List.iter + (fun f -> + if f.fname <> "___missing_field_name" then + checkOffsets (addOffsetLval (Field(f, NoOffset)) lv) f.ftype) + c.cfields + | TArray (bt, Some len, _) -> + let leni = + match isInteger len with + Some i64 -> Int64.to_int i64 + | None -> E.s (E.bug "Array length is not a constant") + in + let i = Random.int leni in + checkOffsets (addOffsetLval (Index(integer i, NoOffset)) lv) bt + + | _ -> (* Now a base type *) + let _, off = lv in + let start, width = bitsOffset t off in + let setLv (v: exp) = + match lvt with + TFloat (FFloat, _) -> + Set((Mem (mkCast (AddrOf lv) intPtrType), NoOffset), + v, lu) + | TFloat (FDouble, _) -> + Set((Mem (mkCast (AddrOf lv) + (TPtr(TInt(IULongLong, []), []))), NoOffset), + mkCast v (TInt(IULongLong, [])), lu) + + | (TPtr _ | TInt((IULongLong|ILongLong), _)) -> + Set(lv, mkCast v lvt, lu) + | _ -> Set(lv, v, lu) + in + let ucharPtrType = TPtr(TInt(IUChar, []), []) in + let s = + mkStmt (Instr ([ setLv mone; + Call(None, Lval(var checkOffsetFun.svar), + [ mkCast (mkAddrOrStartOf (var g)) + ucharPtrType; + SizeOfE (Lval(var g)); + integer start; + integer width; + (Const(CStr(sprint 80 + (d_lval () lv))))],lu); + setLv zero])) in + addStatement s + in + checkOffsets (var g) t; +(* ignore (E.log "3 ");*) + (* Now check the size of *) + let s = mkStmtOneInstr (Call(None, Lval(var checkSizeOfFun.svar), + [ SizeOfE (Lval (var g)); + integer (bitsSizeOf t); + mkString g.vname ], lu)) in + addStatement s; +(* ignore (E.log "10\n"); *) + with _ -> () + in + + (* Make the composite choices more likely *) + typeChoices := + [ (1, mkPtrType); + (5, mkArrayType); + (5, fun _ -> mkCompType true); + (5, fun _ -> mkCompType false); ] + @ baseTypes; + baseTypeChoices := baseTypes; + useBitfields := false; + maxFields := 4; + for i = 0 to 100 do + doOne i + done; + + (* Now test the bitfields. *) + typeChoices := [ (1, fun _ -> mkCompType true) ]; + baseTypeChoices := [(1, fun _ -> TInt(IInt, []))]; + useBitfields := true; + + for i = 0 to 100 do + doOne i + done; + + (* Now make it a bit more complicated *) + baseTypeChoices := + List.map (fun ik -> (1, fun _ -> TInt(ik, []))) + [IInt; ILong; IUInt; IULong ]; + useBitfields := true; + for i = 0 to 100 do + doOne i + done; + + (* An really complicated now *) + baseTypeChoices := baseTypes; + useBitfields := true; + for i = 0 to 100 do + doOne i + done; + + () + + +(* Now the main tester. Pass to it the name of a command "cmd" that when + * invoked will compile "testingcil.c" and run the result *) +let createFile () = + + assertId := 0; + nameId := 0; + + (* Start a new file *) + globals := []; + statements := []; + + (* Now make a main function *) + main := emptyFunction "main"; + !main.svar.vtype <- TFun(intType, None, false, []); + mainRetVal := makeGlobalVar "retval" intType; + + addGlobal (GVar(!mainRetVal, {init=None}, lu)); + addGlobal (GText("#include \"testcil.h\"\n")); + addStatement (mkStmtOneInstr(Set(var !mainRetVal, zero, lu))); + + (* Add prototype for printf *) + addGlobal (GVar(printfFun.svar, {init=None}, lu)); + addGlobal (GVar(memsetFun.svar, {init=None}, lu)); + + (* now fill in the composites and the code of main. For simplicity we add + * the statements of main in reverse order *) + + testSizeOf (); + + + (* Now add a return 0 at the end *) + addStatement (mkStmt (Return(Some (Lval(var !mainRetVal)), lu))); + + + (* Add main at the end *) + addGlobal (GFun(!main, lu)); + !main.sbody.bstmts <- getStatements (); + + (* Now build the CIL.file *) + let file = + { fileName = "testingcil.c"; + globals = getGlobals (); + globinit = None; + globinitcalled = false; + } + in + (* Print the file *) + let oc = open_out "testingcil.c" in + dumpFile defaultCilPrinter oc "testingcil.c" file; + close_out oc + + + + + +(* initialization code for the tester *) +let randomStateFile = "testcil.random" (* The name of a file where we store + * the state of the random number + * generator last time *) +let doit (command: string) = + while true do + (* Initialize the random no generator *) + begin + try + let randomFile = open_in randomStateFile in + (* The file exists so restore the Random state *) + Random.set_state (Marshal.from_channel randomFile); + ignore (E.log "!! Restoring Random state from %s\n" randomStateFile); + close_in randomFile; + (* Leave the file there until we succeed *) + with _ -> begin + (* The file does not exist *) + Random.self_init (); + (* Save the state of the generator *) + let randomFile = open_out randomStateFile in + Marshal.to_channel randomFile (Random.get_state()) [] ; + close_out randomFile; + end + end; + createFile (); + (* Now compile and run the file *) + ignore (E.log "Running %s\n" command); + let err = Sys.command command in + if err <> 0 then + E.s (E.bug "Failed to run the command: %s (errcode=%d)" command err) + else begin + ignore (E.log "Successfully ran one more round. Press CTRL-C to stop\n"); + (* Delete the file *) + Sys.remove randomStateFile + end + done + -- cgit