From a5f03d96eee482cd84861fc8cefff9eb451c0cad Mon Sep 17 00:00:00 2001 From: xleroy Date: Sun, 29 Mar 2009 09:47:11 +0000 Subject: Cleaned up configure script. Distribution of CIL as an expanded source tree with changes applied (instead of original .tar.gz + patches to be applied at config time). git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@1020 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- cil/src/ext/astslicer.ml | 454 +++++++++++ cil/src/ext/availexps.ml | 359 +++++++++ cil/src/ext/bitmap.ml | 224 ++++++ cil/src/ext/bitmap.mli | 50 ++ cil/src/ext/blockinggraph.ml | 769 ++++++++++++++++++ cil/src/ext/blockinggraph.mli | 40 + cil/src/ext/callgraph.ml | 250 ++++++ cil/src/ext/callgraph.mli | 123 +++ cil/src/ext/canonicalize.ml | 292 +++++++ cil/src/ext/canonicalize.mli | 48 ++ cil/src/ext/cfg.ml | 289 +++++++ cil/src/ext/cfg.mli | 36 + cil/src/ext/ciltools.ml | 228 ++++++ cil/src/ext/dataflow.ml | 466 +++++++++++ cil/src/ext/dataflow.mli | 151 ++++ cil/src/ext/dataslicing.ml | 462 +++++++++++ cil/src/ext/dataslicing.mli | 41 + cil/src/ext/deadcodeelim.ml | 173 ++++ cil/src/ext/dominators.ml | 241 ++++++ cil/src/ext/dominators.mli | 29 + cil/src/ext/epicenter.ml | 114 +++ cil/src/ext/heap.ml | 112 +++ cil/src/ext/heapify.ml | 250 ++++++ cil/src/ext/liveness.ml | 190 +++++ cil/src/ext/logcalls.ml | 268 +++++++ cil/src/ext/logcalls.mli | 41 + cil/src/ext/logwrites.ml | 139 ++++ cil/src/ext/oneret.ml | 187 +++++ cil/src/ext/oneret.mli | 44 ++ cil/src/ext/partial.ml | 851 ++++++++++++++++++++ cil/src/ext/pta/golf.ml | 1657 +++++++++++++++++++++++++++++++++++++++ cil/src/ext/pta/golf.mli | 83 ++ cil/src/ext/pta/olf.ml | 1108 ++++++++++++++++++++++++++ cil/src/ext/pta/olf.mli | 80 ++ cil/src/ext/pta/ptranal.ml | 597 ++++++++++++++ cil/src/ext/pta/ptranal.mli | 156 ++++ cil/src/ext/pta/setp.ml | 342 ++++++++ cil/src/ext/pta/setp.mli | 180 +++++ cil/src/ext/pta/steensgaard.ml | 1417 +++++++++++++++++++++++++++++++++ cil/src/ext/pta/steensgaard.mli | 71 ++ cil/src/ext/pta/uref.ml | 94 +++ cil/src/ext/pta/uref.mli | 65 ++ cil/src/ext/reachingdefs.ml | 511 ++++++++++++ cil/src/ext/sfi.ml | 337 ++++++++ cil/src/ext/simplemem.ml | 132 ++++ cil/src/ext/simplify.ml | 845 ++++++++++++++++++++ cil/src/ext/ssa.ml | 696 ++++++++++++++++ cil/src/ext/ssa.mli | 45 ++ cil/src/ext/stackoverflow.ml | 246 ++++++ cil/src/ext/stackoverflow.mli | 43 + cil/src/ext/usedef.ml | 188 +++++ 51 files changed, 15814 insertions(+) create mode 100644 cil/src/ext/astslicer.ml create mode 100644 cil/src/ext/availexps.ml create mode 100644 cil/src/ext/bitmap.ml create mode 100644 cil/src/ext/bitmap.mli create mode 100644 cil/src/ext/blockinggraph.ml create mode 100644 cil/src/ext/blockinggraph.mli create mode 100644 cil/src/ext/callgraph.ml create mode 100644 cil/src/ext/callgraph.mli create mode 100644 cil/src/ext/canonicalize.ml create mode 100644 cil/src/ext/canonicalize.mli create mode 100644 cil/src/ext/cfg.ml create mode 100644 cil/src/ext/cfg.mli create mode 100755 cil/src/ext/ciltools.ml create mode 100755 cil/src/ext/dataflow.ml create mode 100755 cil/src/ext/dataflow.mli create mode 100644 cil/src/ext/dataslicing.ml create mode 100644 cil/src/ext/dataslicing.mli create mode 100644 cil/src/ext/deadcodeelim.ml create mode 100755 cil/src/ext/dominators.ml create mode 100755 cil/src/ext/dominators.mli create mode 100644 cil/src/ext/epicenter.ml create mode 100644 cil/src/ext/heap.ml create mode 100644 cil/src/ext/heapify.ml create mode 100644 cil/src/ext/liveness.ml create mode 100644 cil/src/ext/logcalls.ml create mode 100644 cil/src/ext/logcalls.mli create mode 100644 cil/src/ext/logwrites.ml create mode 100644 cil/src/ext/oneret.ml create mode 100644 cil/src/ext/oneret.mli create mode 100644 cil/src/ext/partial.ml create mode 100644 cil/src/ext/pta/golf.ml create mode 100644 cil/src/ext/pta/golf.mli create mode 100644 cil/src/ext/pta/olf.ml create mode 100644 cil/src/ext/pta/olf.mli create mode 100644 cil/src/ext/pta/ptranal.ml create mode 100644 cil/src/ext/pta/ptranal.mli create mode 100644 cil/src/ext/pta/setp.ml create mode 100644 cil/src/ext/pta/setp.mli create mode 100644 cil/src/ext/pta/steensgaard.ml create mode 100644 cil/src/ext/pta/steensgaard.mli create mode 100644 cil/src/ext/pta/uref.ml create mode 100644 cil/src/ext/pta/uref.mli create mode 100644 cil/src/ext/reachingdefs.ml create mode 100755 cil/src/ext/sfi.ml create mode 100644 cil/src/ext/simplemem.ml create mode 100755 cil/src/ext/simplify.ml create mode 100644 cil/src/ext/ssa.ml create mode 100644 cil/src/ext/ssa.mli create mode 100644 cil/src/ext/stackoverflow.ml create mode 100644 cil/src/ext/stackoverflow.mli create mode 100755 cil/src/ext/usedef.ml (limited to 'cil/src/ext') diff --git a/cil/src/ext/astslicer.ml b/cil/src/ext/astslicer.ml new file mode 100644 index 00000000..ffba4827 --- /dev/null +++ b/cil/src/ext/astslicer.ml @@ -0,0 +1,454 @@ +(* MODIF: Loop constructor replaced by 3 constructors: While, DoWhile, For. *) + +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +open Cil +module E = Errormsg +(* + * Weimer: an AST Slicer for use in Daniel's Delta Debugging Algorithm. + *) +let debug = ref false + +(* + * This type encapsulates a mapping form program locations to names + * in our naming convention. + *) +type enumeration_info = { + statements : (stmt, string) Hashtbl.t ; + instructions : (instr, string) Hashtbl.t ; +} + +(********************************************************************** + * Enumerate 1 + * + * Given a cil file, enumerate all of the statement names in it using + * our naming scheme. + **********************************************************************) +let enumerate out (f : Cil.file) = + let st_ht = Hashtbl.create 32767 in + let in_ht = Hashtbl.create 32767 in + + let emit base i ht elt = + let str = Printf.sprintf "%s.%d" base !i in + Printf.fprintf out "%s\n" str ; + Hashtbl.add ht elt str ; + incr i + in + let emit_call base i str2 ht elt = + let str = Printf.sprintf "%s.%d" base !i in + Printf.fprintf out "%s - %s\n" str str2 ; + Hashtbl.add ht elt str ; + incr i + in + let descend base i = + let res = (Printf.sprintf "%s.%d" base !i),(ref 0) in + res + in + let rec doBlock b base i = + doStmtList b.bstmts base i + and doStmtList sl base i = + List.iter (fun s -> match s.skind with + | Instr(il) -> doIL il base i + | Return(_,_) + | Goto(_,_) + | Continue(_) + | Break(_) -> emit base i st_ht s + | If(e,b1,b2,_) -> + emit base i st_ht s ; + decr i ; + Printf.fprintf out "(\n" ; + let base',i' = descend base i in + doBlock b1 base' i' ; + Printf.fprintf out ") (\n" ; + let base'',i'' = descend base i in + doBlock b2 base'' i'' ; + Printf.fprintf out ")\n" ; + incr i + | Switch(_,b,_,_) +(* + | Loop(b,_,_,_) +*) + | While(_,b,_) + | DoWhile(_,b,_) + | For(_,_,_,b,_) + | Block(b) -> + emit base i st_ht s ; + decr i ; + let base',i' = descend base i in + Printf.fprintf out "(\n" ; + doBlock b base' i' ; + Printf.fprintf out ")\n" ; + incr i + | TryExcept _ | TryFinally _ -> + E.s (E.unimp "astslicer:enumerate") + ) sl + and doIL il base i = + List.iter (fun ins -> match ins with + | Set _ + | Asm _ -> emit base i in_ht ins + | Call(_,(Lval(Var(vi),NoOffset)),_,_) -> + emit_call base i vi.vname in_ht ins + | Call(_,f,_,_) -> emit_call base i "*" in_ht ins + ) il + in + let doGlobal g = match g with + | GFun(fd,_) -> + Printf.fprintf out "%s (\n" fd.svar.vname ; + let cur = ref 0 in + doBlock fd.sbody fd.svar.vname cur ; + Printf.fprintf out ")\n" ; + () + | _ -> () + in + List.iter doGlobal f.globals ; + { statements = st_ht ; + instructions = in_ht ; } + +(********************************************************************** + * Enumerate 2 + * + * Given a cil file and some enumeration information, do a log-calls-like + * transformation on it that prints out our names as you reach them. + **********************************************************************) +(* + * This is the visitor that handles annotations + *) +let print_it pfun name = + ((Call(None,Lval(Var(pfun),NoOffset), + [mkString (name ^ "\n")],locUnknown))) + +class enumVisitor pfun st_ht in_ht = object + inherit nopCilVisitor + method vinst i = + if Hashtbl.mem in_ht i then begin + let name = Hashtbl.find in_ht i in + let newinst = print_it pfun name in + ChangeTo([newinst ; i]) + end else + DoChildren + method vstmt s = + if Hashtbl.mem st_ht s then begin + let name = Hashtbl.find st_ht s in + let newinst = print_it pfun name in + let newstmt = mkStmtOneInstr newinst in + let newblock = mkBlock [newstmt ; s] in + let replace_with = mkStmt (Block(newblock)) in + ChangeDoChildrenPost(s,(fun i -> replace_with)) + end else + DoChildren + method vfunc f = + let newinst = print_it pfun f.svar.vname in + let newstmt = mkStmtOneInstr newinst in + let new_f = { f with sbody = { f.sbody with + bstmts = newstmt :: f.sbody.bstmts }} in + ChangeDoChildrenPost(new_f,(fun i -> i)) +end + +let annotate (f : Cil.file) ei = begin + (* Create a prototype for the logging function *) + let printfFun = + let fdec = emptyFunction "printf" in + let argf = makeLocalVar fdec "format" charConstPtrType in + fdec.svar.vtype <- TFun(intType, Some [ ("format", charConstPtrType, [])], + true, []); + fdec + in + let visitor = (new enumVisitor printfFun.svar ei.statements + ei.instructions) in + visitCilFileSameGlobals visitor f; + f +end + +(********************************************************************** + * STAGE 2 + * + * Perform a transitive-closure-like operation on the parts of the program + * that the user wants to keep. We use a CIL visitor to walk around + * and a number of hash tables to keep track of the things we want to keep. + **********************************************************************) +(* + * Hashtables: + * ws - wanted stmts + * wi - wanted instructions + * wt - wanted typeinfo + * wc - wanted compinfo + * we - wanted enuminfo + * wv - wanted varinfo + *) + +let mode = ref false (* was our parented wanted? *) +let finished = ref true (* set to false if we update something *) + +(* In the given hashtable, mark the given element was "wanted" *) +let update ht elt = + if Hashtbl.mem ht elt && (Hashtbl.find ht elt = true) then () + else begin + Hashtbl.add ht elt true ; + finished := false + end + +(* Handle a particular stage of the AST tree walk. Use "mode" (i.e., + * whether our parent was wanted) and the hashtable (which tells us whether + * the user had any special instructions for this element) to determine + * what do to. *) +let handle ht elt rep = + if !mode then begin + if Hashtbl.mem ht elt && (Hashtbl.find ht elt = false) then begin + (* our parent is Wanted but we were told to ignore this subtree, + * so we won't be wanted. *) + mode := false ; + ChangeDoChildrenPost(rep,(fun elt -> mode := true ; elt)) + end else begin + (* we were not told to ignore this subtree, and our parent is + * Wanted, so we will be Wanted too! *) + update ht elt ; + DoChildren + end + end else if Hashtbl.mem ht elt && (Hashtbl.find ht elt = true) then begin + (* our parent was not wanted but we were wanted, so turn the + * mode on for now *) + mode := true ; + ChangeDoChildrenPost(rep,(fun elt -> mode := false ; elt)) + end else + DoChildren + +let handle_no_default ht elt rep old_mode = + if Hashtbl.mem ht elt && (Hashtbl.find ht elt = true) then begin + (* our parent was not wanted but we were wanted, so turn the + * mode on for now *) + mode := true ; + ChangeDoChildrenPost(rep,(fun elt -> mode := old_mode ; elt)) + end else begin + mode := false ; + ChangeDoChildrenPost(rep,(fun elt -> mode := old_mode ; elt)) + end + +(* + * This is the visitor that handles elements (marks them as wanted) + *) +class transVisitor ws wi wt wc we wv = object + inherit nopCilVisitor + + method vvdec vi = handle_no_default wv vi vi !mode + method vvrbl vi = handle wv vi vi + method vinst i = handle wi i [i] + method vstmt s = handle ws s s + method vfunc f = handle wv f.svar f + method vglob g = begin + match g with + | GType(ti,_) -> handle wt ti [g] + | GCompTag(ci,_) + | GCompTagDecl(ci,_) -> handle wc ci [g] + | GEnumTag(ei,_) + | GEnumTagDecl(ei,_) -> handle we ei [g] + | GVarDecl(vi,_) + | GVar(vi,_,_) -> handle wv vi [g] + | GFun(f,_) -> handle wv f.svar [g] + | _ -> DoChildren + end + method vtype t = begin + match t with + | TNamed(ti,_) -> handle wt ti t + | TComp(ci,_) -> handle wc ci t + | TEnum(ei,_) -> handle we ei t + | _ -> DoChildren + end +end + +(********************************************************************** + * STAGE 3 + * + * Eliminate all of the elements from the program that are not marked + * "keep". + **********************************************************************) +(* + * This is the visitor that throws away elements + *) +let handle ht elt keep drop = + if (Hashtbl.mem ht elt) && (Hashtbl.find ht elt = true) then + (* DoChildren *) ChangeDoChildrenPost(keep,(fun a -> a)) + else + ChangeTo(drop) + +class dropVisitor ws wi wt wc we wv = object + inherit nopCilVisitor + + method vinst i = handle wi i [i] [] + method vstmt s = handle ws s s (mkStmt (Instr([]))) + method vglob g = begin + match g with + | GType(ti,_) -> handle wt ti [g] [] + | GCompTag(ci,_) + | GCompTagDecl(ci,_) -> handle wc ci [g] [] + | GEnumTag(ei,_) + | GEnumTagDecl(ei,_) -> handle we ei [g] [] + | GVarDecl(vi,_) + | GVar(vi,_,_) -> handle wv vi [g] [] + | GFun(f,l) -> + let new_locals = List.filter (fun vi -> + Hashtbl.mem wv vi && (Hashtbl.find wv vi = true)) f.slocals in + let new_fundec = { f with slocals = new_locals} in + handle wv f.svar [(GFun(new_fundec,l))] [] + | _ -> DoChildren + end +end + +(********************************************************************** + * STAGE 1 + * + * Mark up the file with user-given information about what to keep and + * what to drop. + **********************************************************************) +type mark = Wanted | Unwanted | Unspecified +(* Given a cil file and a list of strings, mark all of the given ASTSlicer + * points as wanted or unwanted. *) +let mark_file (f : Cil.file) (names : (string, mark) Hashtbl.t) = + let ws = Hashtbl.create 32767 in + let wi = Hashtbl.create 32767 in + let wt = Hashtbl.create 32767 in + let wc = Hashtbl.create 32767 in + let we = Hashtbl.create 32767 in + let wv = Hashtbl.create 32767 in + if !debug then Printf.printf "Applying user marks to file ...\n" ; + let descend base i = + let res = (Printf.sprintf "%s.%d" base !i),(ref 0) in + res + in + let check base i (default : mark) = + let str = Printf.sprintf "%s.%d" base !i in + if !debug then Printf.printf "Looking for [%s]\n" str ; + try Hashtbl.find names str + with _ -> default + in + let mark ht stmt wanted = match wanted with + Unwanted -> Hashtbl.replace ht stmt false + | Wanted -> Hashtbl.replace ht stmt true + | Unspecified -> () + in + let rec doBlock b base i default = + doStmtList b.bstmts base i default + and doStmtList sl base i default = + List.iter (fun s -> match s.skind with + | Instr(il) -> doIL il base i default + | Return(_,_) + | Goto(_,_) + | Continue(_) + | Break(_) -> + mark ws s (check base i default) ; incr i + | If(e,b1,b2,_) -> + let inside = check base i default in + mark ws s inside ; + let base',i' = descend base i in + doBlock b1 base' i' inside ; + let base'',i'' = descend base i in + doBlock b2 base'' i'' inside ; + incr i + | Switch(_,b,_,_) +(* + | Loop(b,_,_,_) +*) + | While(_,b,_) + | DoWhile(_,b,_) + | For(_,_,_,b,_) + | Block(b) -> + let inside = check base i default in + mark ws s inside ; + let base',i' = descend base i in + doBlock b base' i' inside ; + incr i + | TryExcept _ | TryFinally _ -> + E.s (E.unimp "astslicer: mark") + ) sl + and doIL il base i default = + List.iter (fun ins -> mark wi ins (check base i default) ; incr i) il + in + let doGlobal g = match g with + | GFun(fd,_) -> + let cur = ref 0 in + if Hashtbl.mem names fd.svar.vname then begin + if Hashtbl.find names fd.svar.vname = Wanted then begin + Hashtbl.replace wv fd.svar true ; + doBlock fd.sbody fd.svar.vname cur (Wanted); + end else begin + Hashtbl.replace wv fd.svar false ; + doBlock fd.sbody fd.svar.vname cur (Unspecified); + end + end else begin + doBlock fd.sbody fd.svar.vname cur (Unspecified); + end + | _ -> () + in + List.iter doGlobal f.globals ; + if !debug then begin + Hashtbl.iter (fun k v -> + ignore (Pretty.printf "want-s %b %a\n" v d_stmt k)) ws ; + Hashtbl.iter (fun k v -> + ignore (Pretty.printf "want-i %b %a\n" v d_instr k)) wi ; + Hashtbl.iter (fun k v -> + ignore (Pretty.printf "want-v %b %s\n" v k.vname)) wv ; + end ; + (* + * Now repeatedly mark all other things that must be kept. + *) + let visitor = (new transVisitor ws wi wt wc we wv) in + finished := false ; + if !debug then (Printf.printf "\nPerforming Transitive Closure\n\n" ); + while not !finished do + finished := true ; + visitCilFileSameGlobals visitor f + done ; + if !debug then begin + Hashtbl.iter (fun k v -> + if v then ignore (Pretty.printf "want-s %a\n" d_stmt k)) ws ; + Hashtbl.iter (fun k v -> + if v then ignore (Pretty.printf "want-i %a\n" d_instr k)) wi ; + Hashtbl.iter (fun k v -> + if v then ignore (Pretty.printf "want-t %s\n" k.tname)) wt ; + Hashtbl.iter (fun k v -> + if v then ignore (Pretty.printf "want-c %s\n" k.cname)) wc ; + Hashtbl.iter (fun k v -> + if v then ignore (Pretty.printf "want-e %s\n" k.ename)) we ; + Hashtbl.iter (fun k v -> + if v then ignore (Pretty.printf "want-v %s\n" k.vname)) wv ; + end ; + + (* + * Now drop everything we didn't need. + *) + if !debug then (Printf.printf "Dropping Unwanted Elements\n" ); + let visitor = (new dropVisitor ws wi wt wc we wv) in + visitCilFile visitor f diff --git a/cil/src/ext/availexps.ml b/cil/src/ext/availexps.ml new file mode 100644 index 00000000..28c22c0e --- /dev/null +++ b/cil/src/ext/availexps.ml @@ -0,0 +1,359 @@ +(* compute available expressions, although in a somewhat + non-traditional way. the abstract state is a mapping from + variable ids to expressions as opposed to a set of + expressions *) + +open Cil +open Pretty + +module E = Errormsg +module DF = Dataflow +module UD = Usedef +module IH = Inthash +module U = Util +module S = Stats + +let debug = ref false + +(* exp IH.t -> exp IH.t -> bool *) +let eh_equals eh1 eh2 = + if not(IH.length eh1 = IH.length eh2) + then false + else IH.fold (fun vid e b -> + if not b then b else + try let e2 = IH.find eh2 vid in + if not(Util.equals e e2) + then false + else true + with Not_found -> false) + eh1 true + +let eh_pretty () eh = line ++ seq line (fun (vid,e) -> + text "AE:vid:" ++ num vid ++ text ": " ++ + (d_exp () e)) (IH.tolist eh) + +(* the result must be the intersection of eh1 and eh2 *) +(* exp IH.t -> exp IH.t -> exp IH.t *) +let eh_combine eh1 eh2 = + if !debug then ignore(E.log "eh_combine: combining %a\n and\n %a\n" + eh_pretty eh1 eh_pretty eh2); + let eh' = IH.copy eh1 in (* eh' gets all of eh1 *) + IH.iter (fun vid e1 -> + try let e2l = IH.find_all eh2 vid in + if not(List.exists (fun e2 -> Util.equals e1 e2) e2l) + (* remove things from eh' that eh2 doesn't have *) + then let e1l = IH.find_all eh' vid in + let e1l' = List.filter (fun e -> not(Util.equals e e1)) e1l in + IH.remove_all eh' vid; + List.iter (fun e -> IH.add eh' vid e) e1l' + with Not_found -> + IH.remove_all eh' vid) eh1; + if !debug then ignore(E.log "with result %a\n" + eh_pretty eh'); + eh' + +(* On a memory write, kill expressions containing memory writes + * or variables whose address has been taken. *) +let exp_ok = ref false +class memReadOrAddrOfFinderClass = object(self) + inherit nopCilVisitor + + method vexpr e = match e with + Lval(Mem _, _) -> + exp_ok := true; + SkipChildren + | _ -> DoChildren + + method vvrbl vi = + if vi.vaddrof then + (exp_ok := true; + SkipChildren) + else DoChildren + +end + +let memReadOrAddrOfFinder = new memReadOrAddrOfFinderClass + +(* exp -> bool *) +let exp_has_mem_read e = + exp_ok := false; + ignore(visitCilExpr memReadOrAddrOfFinder e); + !exp_ok + +let eh_kill_mem eh = + IH.iter (fun vid e -> + if exp_has_mem_read e + then IH.remove eh vid) + eh + +(* need to kill exps containing a particular vi sometimes *) +let has_vi = ref false +class viFinderClass vi = object(self) + inherit nopCilVisitor + + method vvrbl vi' = + if vi.vid = vi'.vid + then (has_vi := true; SkipChildren) + else DoChildren + +end + +let exp_has_vi e vi = + let vis = new viFinderClass vi in + has_vi := false; + ignore(visitCilExpr vis e); + !has_vi + +let eh_kill_vi eh vi = + IH.iter (fun vid e -> + if exp_has_vi e vi + then IH.remove eh vid) + eh + +let varHash = IH.create 32 + +let eh_kill_addrof_or_global eh = + if !debug then ignore(E.log "eh_kill: in eh_kill\n"); + IH.iter (fun vid e -> + try let vi = IH.find varHash vid in + if vi.vaddrof + then begin + if !debug then ignore(E.log "eh_kill: %s has its address taken\n" + vi.vname); + IH.remove eh vid + end + else if vi.vglob + then begin + if !debug then ignore(E.log "eh_kill: %s is global\n" + vi.vname); + IH.remove eh vid + end + with Not_found -> ()) eh + +let eh_handle_inst i eh = match i with + (* if a pointer write, kill things with read in them. + also kill mappings from vars that have had their address taken, + and globals. + otherwise kill things with lv in them and add e *) + Set(lv,e,_) -> (match lv with + (Mem _, _) -> + (eh_kill_mem eh; + eh_kill_addrof_or_global eh; + eh) + | (Var vi, NoOffset) -> + (match e with + Lval(Var vi', NoOffset) -> (* ignore x = x *) + if vi'.vid = vi.vid then eh else + (IH.replace eh vi.vid e; + eh_kill_vi eh vi; + eh) + | _ -> + (IH.replace eh vi.vid e; + eh_kill_vi eh vi; + eh)) + | _ -> eh) (* do nothing for now. *) +| Call(Some(Var vi,NoOffset),_,_,_) -> + (IH.remove eh vi.vid; + eh_kill_vi eh vi; + eh_kill_mem eh; + eh_kill_addrof_or_global eh; + eh) +| Call(_,_,_,_) -> + (eh_kill_mem eh; + eh_kill_addrof_or_global eh; + eh) +| Asm(_,_,_,_,_,_) -> + let _,d = UD.computeUseDefInstr i in + (UD.VS.iter (fun vi -> + eh_kill_vi eh vi) d; + eh) + +let allExpHash = IH.create 128 + +module AvailableExps = + struct + + let name = "Available Expressions" + + let debug = debug + + (* mapping from var id to expression *) + type t = exp IH.t + + let copy = IH.copy + + let stmtStartData = IH.create 64 + + let pretty = eh_pretty + + let computeFirstPredecessor stm eh = + eh_combine (IH.copy allExpHash) eh + + let combinePredecessors (stm:stmt) ~(old:t) (eh:t) = + if S.time "eh_equals" (eh_equals old) eh then None else + Some(S.time "eh_combine" (eh_combine old) eh) + + let doInstr i eh = + let action = eh_handle_inst i in + DF.Post(action) + + let doStmt stm astate = DF.SDefault + + let doGuard c astate = DF.GDefault + + let filterStmt stm = true + + end + +module AE = DF.ForwardsDataFlow(AvailableExps) + +(* make an exp IH.t with everything in it, + * also, fill in varHash while we're here. + *) +class expCollectorClass = object(self) + inherit nopCilVisitor + + method vinst i = match i with + Set((Var vi,NoOffset),e,_) -> + let e2l = IH.find_all allExpHash vi.vid in + if not(List.exists (fun e2 -> Util.equals e e2) e2l) + then IH.add allExpHash vi.vid e; + DoChildren + | _ -> DoChildren + + method vvrbl vi = + (if not(IH.mem varHash vi.vid) + then + (if !debug && vi.vglob then ignore(E.log "%s is global\n" vi.vname); + if !debug && not(vi.vglob) then ignore(E.log "%s is not global\n" vi.vname); + IH.add varHash vi.vid vi)); + DoChildren + +end + +let expCollector = new expCollectorClass + +let make_all_exps fd = + IH.clear allExpHash; + IH.clear varHash; + ignore(visitCilFunction expCollector fd) + + + +(* set all statement data to allExpHash, make + * a list of statements + *) +let all_stmts = ref [] +class allExpSetterClass = object(self) + inherit nopCilVisitor + + method vstmt s = + all_stmts := s :: (!all_stmts); + IH.add AvailableExps.stmtStartData s.sid (IH.copy allExpHash); + DoChildren + +end + +let allExpSetter = new allExpSetterClass + +let set_all_exps fd = + IH.clear AvailableExps.stmtStartData; + ignore(visitCilFunction allExpSetter fd) + +(* + * Computes AEs for function fd. + * + * + *) +(*let iAEsHtbl = Hashtbl.create 128*) +let computeAEs fd = + try let slst = fd.sbody.bstmts in + let first_stm = List.hd slst in + S.time "make_all_exps" make_all_exps fd; + all_stmts := []; + (*S.time "set_all_exps" set_all_exps fd;*) + (*Hashtbl.clear iAEsHtbl;*) + (*IH.clear (IH.find AvailableExps.stmtStartData first_stm.sid);*) + IH.add AvailableExps.stmtStartData first_stm.sid (IH.create 4); + S.time "compute" AE.compute [first_stm](*(List.rev !all_stmts)*) + with Failure "hd" -> if !debug then ignore(E.log "fn w/ no stmts?\n") + | Not_found -> if !debug then ignore(E.log "no data for first_stm?\n") + + +(* get the AE data for a statement *) +let getAEs sid = + try Some(IH.find AvailableExps.stmtStartData sid) + with Not_found -> None + +(* get the AE data for an instruction list *) +let instrAEs il sid eh out = + (*if Hashtbl.mem iAEsHtbl (sid,out) + then Hashtbl.find iAEsHtbl (sid,out) + else*) + let proc_one hil i = + match hil with + [] -> let eh' = IH.copy eh in + let eh'' = eh_handle_inst i eh' in + (*if !debug then ignore(E.log "instrAEs: proc_one []: for %a\n data is %a\n" + d_instr i eh_pretty eh'');*) + eh''::hil + | eh'::ehrst as l -> + let eh' = IH.copy eh' in + let eh'' = eh_handle_inst i eh' in + (*if !debug then ignore(E.log "instrAEs: proc_one: for %a\n data is %a\n" + d_instr i eh_pretty eh'');*) + eh''::l + in + let folded = List.fold_left proc_one [eh] il in + (*let foldedout = List.tl (List.rev folded) in*) + let foldednotout = List.rev (List.tl folded) in + (*Hashtbl.add iAEsHtbl (sid,true) foldedout; + Hashtbl.add iAEsHtbl (sid,false) foldednotout;*) + (*if out then foldedout else*) foldednotout + +class aeVisitorClass = object(self) + inherit nopCilVisitor + + val mutable sid = -1 + + val mutable ae_dat_lst = [] + + val mutable cur_ae_dat = None + + method vstmt stm = + sid <- stm.sid; + match getAEs sid with + None -> + if !debug then ignore(E.log "aeVis: stm %d has no data\n" sid); + cur_ae_dat <- None; + DoChildren + | Some eh -> + match stm.skind with + Instr il -> + if !debug then ignore(E.log "aeVist: visit il\n"); + ae_dat_lst <- S.time "instrAEs" (instrAEs il stm.sid eh) false; + DoChildren + | _ -> + if !debug then ignore(E.log "aeVisit: visit non-il\n"); + cur_ae_dat <- None; + DoChildren + + method vinst i = + if !debug then ignore(E.log "aeVist: before %a, ae_dat_lst is %d long\n" + d_instr i (List.length ae_dat_lst)); + try + let data = List.hd ae_dat_lst in + cur_ae_dat <- Some(data); + ae_dat_lst <- List.tl ae_dat_lst; + if !debug then ignore(E.log "aeVisit: data is %a\n" eh_pretty data); + DoChildren + with Failure "hd" -> + if !debug then ignore(E.log "aeVis: il ae_dat_lst mismatch\n"); + DoChildren + + method get_cur_eh () = + match cur_ae_dat with + None -> getAEs sid + | Some eh -> Some eh + +end diff --git a/cil/src/ext/bitmap.ml b/cil/src/ext/bitmap.ml new file mode 100644 index 00000000..da1f8b99 --- /dev/null +++ b/cil/src/ext/bitmap.ml @@ -0,0 +1,224 @@ + + (* Imperative bitmaps *) +type t = { mutable nrWords : int; + mutable nrBits : int; (* This is 31 * nrWords *) + mutable bitmap : int array } + + + (* Enlarge a bitmap to contain at + * least newBits *) +let enlarge b newWords = + let newbitmap = + if newWords > b.nrWords then + let a = Array.create newWords 0 in + Array.blit b.bitmap 0 a 0 b.nrWords; + a + else + b.bitmap in + b.nrWords <- newWords; + b.nrBits <- (newWords lsl 5) - newWords; + b.bitmap <- newbitmap + + + (* Create a new empty bitmap *) +let make size = + let wrd = (size + 30) / 31 in + { nrWords = wrd; + nrBits = (wrd lsl 5) - wrd; + bitmap = Array.make wrd 0 + } + +let size t = t.nrBits + (* Make an initialized array *) +let init size how = + let wrd = (size + 30) / 31 in + let how' w = + let first = (w lsl 5) - w in + let last = min size (first + 31) in + let rec loop i acc = + if i >= last then acc + else + let acc' = acc lsl 1 in + if how i then loop (i + 1) (acc' lor 1) + else loop (i + 1) acc' + in + loop first 0 + in + { nrWords = wrd; + nrBits = (wrd lsl 5) - wrd; + bitmap = Array.init wrd how' + } + +let clone b = + { nrWords = b.nrWords; + nrBits = b.nrBits; + bitmap = Array.copy b.bitmap; + } + +let cloneEmpty b = + { nrWords = b.nrWords; + nrBits = b.nrBits; + bitmap = Array.make b.nrWords 0; + } + +let union b1 b2 = + begin + let n = b2.nrWords in + if b1.nrWords < n then enlarge b1 n else (); + let a1 = b1.bitmap in + let a2 = b2.bitmap in + let changed = ref false in + for i=0 to n - 1 do + begin + let t = a1.(i) in + let upd = t lor a2.(i) in + let _ = if upd <> t then changed := true else () in + Array.unsafe_set a1 i upd + end + done; + ! changed + end + (* lin += (lout - def) *) +let accLive lin lout def = + begin (* Need to enlarge def to lout *) + let n = lout.nrWords in + if def.nrWords < n then enlarge def n else (); + (* Need to enlarge lin to lout *) + if lin.nrWords < n then enlarge lin n else (); + let changed = ref false in + let alin = lin.bitmap in + let alout = lout.bitmap in + let adef = def.bitmap in + for i=0 to n - 1 do + begin + let old = alin.(i) in + let nw = old lor (alout.(i) land (lnot adef.(i))) in + alin.(i) <- nw; + changed := (old <> nw) || (!changed) + end + done; + !changed + end + + (* b1 *= b2 *) +let inters b1 b2 = + begin + let n = min b1.nrWords b2.nrWords in + let a1 = b1.bitmap in + let a2 = b2.bitmap in + for i=0 to n - 1 do + begin + a1.(i) <- a1.(i) land a2.(i) + end + done; + if n < b1.nrWords then + Array.fill a1 n (b1.nrWords - n) 0 + else + () + end + +let emptyInt b start = + let n = b.nrWords in + let a = b.bitmap in + let rec loop i = i >= n || (a.(i) = 0 && loop (i + 1)) + in + loop start + +let empty b = emptyInt b 0 + + (* b1 =? b2 *) +let equal b1 b2 = + begin + let n = min b1.nrWords b2.nrWords in + let a1 = b1.bitmap in + let a2 = b2.bitmap in + let res = ref true in + for i=0 to n - 1 do + begin + if a1.(i) != a2.(i) then res := false else () + end + done; + if !res then + if b1.nrWords > n then + emptyInt b1 n + else if b2.nrWords > n then + emptyInt b2 n + else + true + else + false + end + +let assign b1 b2 = + begin + let n = b2.nrWords in + if b1.nrWords < n then enlarge b1 n else (); + let a1 = b1.bitmap in + let a2 = b2.bitmap in + Array.blit a2 0 a1 0 n + end + + (* b1 -= b2 *) +let diff b1 b2 = + begin + let n = min b1.nrWords b2.nrWords in + let a1 = b1.bitmap in + let a2 = b2.bitmap in + for i=0 to n - 1 do + a1.(i) <- a1.(i) land (lnot a2.(i)) + done; + if n < b1.nrWords then + Array.fill a1 n (b1.nrWords - n) 0 + else + () + end + + + + +let get bmp i = + assert (i >= 0); + if i >= bmp.nrBits then enlarge bmp (i / 31 + 1) else (); + let wrd = i / 31 in + let msk = 1 lsl (i + wrd - (wrd lsl 5)) in + bmp.bitmap.(wrd) land msk != 0 + + +let set bmp i tv = + assert(i >= 0); + let wrd = i / 31 in + let msk = 1 lsl (i + wrd - (wrd lsl 5)) in + if i >= bmp.nrBits then enlarge bmp (wrd + 1) else (); + if tv then + bmp.bitmap.(wrd) <- bmp.bitmap.(wrd) lor msk + else + bmp.bitmap.(wrd) <- bmp.bitmap.(wrd) land (lnot msk) + + + + (* Iterate over all elements in a + * bitmap *) +let fold f bmp arg = + let a = bmp.bitmap in + let n = bmp.nrWords in + let rec allWords i bit arg = + if i >= n then + arg + else + let rec allBits msk bit left arg = + if left = 0 then + allWords (i + 1) bit arg + else + allBits ((lsr) msk 1) (bit + 1) (left - 1) + (if (land) msk 1 != 0 then f arg bit else arg) + in + allBits a.(i) bit 31 arg + in + allWords 0 0 arg + + +let iter f t = fold (fun x y -> f y) t () + +let toList bmp = fold (fun acc i -> i :: acc) bmp [] + +let card bmp = fold (fun acc _ -> acc + 1) bmp 0 diff --git a/cil/src/ext/bitmap.mli b/cil/src/ext/bitmap.mli new file mode 100644 index 00000000..5247e35d --- /dev/null +++ b/cil/src/ext/bitmap.mli @@ -0,0 +1,50 @@ + + (* Imperative bitmaps *) + +type t + (* Create a bitmap given the number + * of bits *) +val make : int -> t +val init : int -> (int -> bool) -> t (* Also initialize it *) + +val size : t -> int (* How much space it is reserved *) + + (* The cardinality of a set *) +val card : t -> int + + (* Make a copy of a bitmap *) +val clone : t -> t + +val cloneEmpty : t -> t (* An empty set with the same + * dimentions *) + +val set : t -> int -> bool -> unit +val get : t -> int -> bool + (* destructive union. The first + * element is updated. Returns true + * if any change was actually + * necessary *) +val union : t -> t -> bool + + (* accLive livein liveout def. Does + * liveIn += (liveout - def) *) +val accLive : t -> t -> t -> bool + + (* Copy the second argument onto the + * first *) +val assign : t -> t -> unit + + +val inters : t -> t -> unit +val diff : t -> t -> unit + + +val empty : t -> bool + +val equal : t -> t -> bool + +val toList : t -> int list + +val iter : (int -> unit) -> t -> unit +val fold : ('a -> int -> 'a) -> t -> 'a -> 'a + diff --git a/cil/src/ext/blockinggraph.ml b/cil/src/ext/blockinggraph.ml new file mode 100644 index 00000000..281678ae --- /dev/null +++ b/cil/src/ext/blockinggraph.ml @@ -0,0 +1,769 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +open Cil +open Pretty +module E = Errormsg + +let debug = false + +let fingerprintAll = true + + +type blockkind = + NoBlock + | BlockTrans + | BlockPoint + | EndPoint + +(* For each function we have a node *) +type node = +{ + nodeid: int; + name: string; + mutable scanned: bool; + mutable expand: bool; + mutable fptr: bool; + mutable stacksize: int; + mutable fds: fundec option; + mutable bkind: blockkind; + mutable origkind: blockkind; + mutable preds: node list; + mutable succs: node list; + mutable predstmts: (stmt * node) list; +} + +type blockpt = +{ + id: int; + point: stmt; + callfun: string; + infun: string; + mutable leadsto: blockpt list; +} + + +(* Fresh ids for each node. *) +let curNodeNum : int ref = ref 0 +let getFreshNodeNum () : int = + let num = !curNodeNum in + incr curNodeNum; + num + +(* Initialize a node. *) +let newNode (name: string) (fptr: bool) (mangle: bool) : node = + let id = getFreshNodeNum () in + { nodeid = id; name = if mangle then name ^ (string_of_int id) else name; + scanned = false; expand = false; + fptr = fptr; stacksize = 0; fds = None; + bkind = NoBlock; origkind = NoBlock; + preds = []; succs = []; predstmts = []; } + + +(* My type signature ignores attributes and function pointers. *) +let myTypeSig (t: typ) : typsig = + let rec removeFunPtrs (ts: typsig) : typsig = + match ts with + TSPtr (TSFun _, a) -> + TSPtr (TSBase voidType, a) + | TSPtr (base, a) -> + TSPtr (removeFunPtrs base, a) + | TSArray (base, e, a) -> + TSArray (removeFunPtrs base, e, a) + | TSFun (ret, args, v, a) -> + TSFun (removeFunPtrs ret, List.map removeFunPtrs args, v, a) + | _ -> ts + in + removeFunPtrs (typeSigWithAttrs (fun _ -> []) t) + + +(* We add a dummy function whose name is "@@functionPointer@@" that is called + * at all invocations of function pointers and itself calls all functions + * whose address is taken. *) +let functionPointerName = "@@functionPointer@@" + +(* We map names to nodes *) +let functionNodes: (string, node) Hashtbl.t = Hashtbl.create 113 +let getFunctionNode (n: string) : node = + Util.memoize + functionNodes + n + (fun _ -> newNode n false false) + +(* We map types to nodes for function pointers *) +let functionPtrNodes: (typsig, node) Hashtbl.t = Hashtbl.create 113 +let getFunctionPtrNode (t: typ) : node = + Util.memoize + functionPtrNodes + (myTypeSig t) + (fun _ -> newNode functionPointerName true true) + +let startNode: node = newNode "@@startNode@@" true false + + +(* +(** Dump the function call graph. *) +let dumpFunctionCallGraph (start: node) = + Hashtbl.iter (fun _ x -> x.scanned <- false) functionNodes; + let rec dumpOneNode (ind: int) (n: node) : unit = + output_string !E.logChannel "\n"; + for i = 0 to ind do + output_string !E.logChannel " " + done; + output_string !E.logChannel (n.name ^ " "); + begin + match n.bkind with + NoBlock -> () + | BlockTrans -> output_string !E.logChannel " " + | BlockPoint -> output_string !E.logChannel " " + | EndPoint -> output_string !E.logChannel " " + end; + if n.scanned then (* Already dumped *) + output_string !E.logChannel " " + else begin + n.scanned <- true; + List.iter (fun n -> if n.bkind <> EndPoint then dumpOneNode (ind + 1) n) + n.succs + end + in + dumpOneNode 0 start; + output_string !E.logChannel "\n\n" +*) + +let dumpFunctionCallGraphToFile () = + let channel = open_out "graph" in + let dumpNode _ (n: node) : unit = + let first = ref true in + let dumpSucc (n: node) : unit = + if !first then + first := false + else + output_string channel ","; + output_string channel n.name + in + output_string channel (string_of_int n.nodeid); + output_string channel ":"; + output_string channel (string_of_int n.stacksize); + output_string channel ":"; + if n.fds = None && not n.fptr then + output_string channel "x"; + output_string channel ":"; + output_string channel n.name; + output_string channel ":"; + List.iter dumpSucc n.succs; + output_string channel "\n"; + in + dumpNode () startNode; + Hashtbl.iter dumpNode functionNodes; + Hashtbl.iter dumpNode functionPtrNodes; + close_out channel + + +let addCall (callerNode: node) (calleeNode: node) (sopt: stmt option) = + if not (List.exists (fun n -> n.name = calleeNode.name) + callerNode.succs) then begin + if debug then + ignore (E.log "found call from %s to %s\n" + callerNode.name calleeNode.name); + callerNode.succs <- calleeNode :: callerNode.succs; + calleeNode.preds <- callerNode :: calleeNode.preds; + end; + match sopt with + Some s -> + if not (List.exists (fun (s', _) -> s' = s) calleeNode.predstmts) then + calleeNode.predstmts <- (s, callerNode) :: calleeNode.predstmts + | None -> () + + +class findCallsVisitor (host: node) : cilVisitor = object + inherit nopCilVisitor + + val mutable curStmt : stmt ref = ref (mkEmptyStmt ()) + + method vstmt s = + curStmt := s; + DoChildren + + method vinst i = + match i with + | Call(_,Lval(Var(vi),NoOffset),args,l) -> + addCall host (getFunctionNode vi.vname) (Some !curStmt); + SkipChildren + + | Call(_,e,_,l) -> (* Calling a function pointer *) + addCall host (getFunctionPtrNode (typeOf e)) (Some !curStmt); + SkipChildren + + | _ -> SkipChildren (* No calls in other instructions *) + + (* There are no calls in expressions and types *) + method vexpr e = SkipChildren + method vtype t = SkipChildren + +end + + +let endPt = { id = 0; point = mkEmptyStmt (); callfun = "end"; infun = "end"; + leadsto = []; } + +(* These values will be initialized for real in makeBlockingGraph. *) +let curId : int ref = ref 1 +let startName : string ref = ref "" +let blockingPoints : blockpt list ref = ref [] +let blockingPointsNew : blockpt Queue.t = Queue.create () +let blockingPointsHash : (int, blockpt) Hashtbl.t = Hashtbl.create 113 + +let getFreshNum () : int = + let num = !curId in + curId := !curId + 1; + num + +let getBlockPt (s: stmt) (cfun: string) (ifun: string) : blockpt = + try + Hashtbl.find blockingPointsHash s.sid + with Not_found -> + let num = getFreshNum () in + let bpt = { id = num; point = s; callfun = cfun; infun = ifun; + leadsto = []; } in + Hashtbl.add blockingPointsHash s.sid bpt; + blockingPoints := bpt :: !blockingPoints; + Queue.add bpt blockingPointsNew; + bpt + + +type action = + Process of stmt * node + | Next of stmt * node + | Return of node + +let getStmtNode (s: stmt) : node option = + match s.skind with + Instr instrs -> begin + let len = List.length instrs in + if len > 0 then + match List.nth instrs (len - 1) with + Call (_, Lval (Var vi, NoOffset), args, _) -> + Some (getFunctionNode vi.vname) + | Call (_, e, _, _) -> (* Calling a function pointer *) + Some (getFunctionPtrNode (typeOf e)) + | _ -> + None + else + None + end + | _ -> None + +let addBlockingPointEdge (bptFrom: blockpt) (bptTo: blockpt) : unit = + if not (List.exists (fun bpt -> bpt = bptTo) bptFrom.leadsto) then + bptFrom.leadsto <- bptTo :: bptFrom.leadsto + +let findBlockingPointEdges (bpt: blockpt) : unit = + let seenStmts = Hashtbl.create 117 in + let worklist = Queue.create () in + Queue.add (Next (bpt.point, getFunctionNode bpt.infun)) worklist; + while Queue.length worklist > 0 do + let act = Queue.take worklist in + match act with + Process (curStmt, curNode) -> begin + Hashtbl.add seenStmts curStmt.sid (); + match getStmtNode curStmt with + Some node -> begin + if debug then + ignore (E.log "processing node %s\n" node.name); + match node.bkind with + NoBlock -> + Queue.add (Next (curStmt, curNode)) worklist + | BlockTrans -> begin + let processFundec (fd: fundec) : unit = + let s = List.hd fd.sbody.bstmts in + if not (Hashtbl.mem seenStmts s.sid) then + let n = getFunctionNode fd.svar.vname in + Queue.add (Process (s, n)) worklist + in + match node.fds with + Some fd -> + processFundec fd + | None -> + List.iter + (fun n -> + match n.fds with + Some fd -> processFundec fd + | None -> E.s (bug "expected fundec")) + node.succs + end + | BlockPoint -> + addBlockingPointEdge bpt + (getBlockPt curStmt node.name curNode.name) + | EndPoint -> + addBlockingPointEdge bpt endPt + end + | _ -> + Queue.add (Next (curStmt, curNode)) worklist + end + | Next (curStmt, curNode) -> begin + match curStmt.Cil.succs with + [] -> + if debug then + ignore (E.log "hit end of %s\n" curNode.name); + Queue.add (Return curNode) worklist + | _ -> + List.iter (fun s -> + if not (Hashtbl.mem seenStmts s.sid) then + Queue.add (Process (s, curNode)) worklist) + curStmt.Cil.succs + end + | Return curNode when curNode.bkind = NoBlock -> + () + | Return curNode when curNode.name = !startName -> + addBlockingPointEdge bpt endPt + | Return curNode -> + List.iter (fun (s, n) -> if n.bkind <> NoBlock then + Queue.add (Next (s, n)) worklist) + curNode.predstmts; + List.iter (fun n -> if n.fptr then + Queue.add (Return n) worklist) + curNode.preds + done + +let markYieldPoints (n: node) : unit = + let rec markNode (n: node) : unit = + if n.bkind = NoBlock then + match n.origkind with + BlockTrans -> + if n.expand || n.fptr then begin + n.bkind <- BlockTrans; + List.iter markNode n.succs + end else begin + n.bkind <- BlockPoint + end + | _ -> + n.bkind <- n.origkind + in + Hashtbl.iter (fun _ n -> n.bkind <- NoBlock) functionNodes; + Hashtbl.iter (fun _ n -> n.bkind <- NoBlock) functionPtrNodes; + markNode n + +let makeBlockingGraph (start: node) = + let startStmt = + match start.fds with + Some fd -> List.hd fd.sbody.bstmts + | None -> E.s (bug "expected fundec") + in + curId := 1; + startName := start.name; + blockingPoints := [endPt]; + Queue.clear blockingPointsNew; + Hashtbl.clear blockingPointsHash; + ignore (getBlockPt startStmt start.name start.name); + while Queue.length blockingPointsNew > 0 do + let bpt = Queue.take blockingPointsNew in + findBlockingPointEdges bpt; + done + +let dumpBlockingGraph () = + List.iter + (fun bpt -> + if bpt.id < 2 then begin + ignore (E.log "bpt %d (%s): " bpt.id bpt.callfun) + end else begin + ignore (E.log "bpt %d (%s in %s): " bpt.id bpt.callfun bpt.infun) + end; + List.iter (fun bpt -> ignore (E.log "%d " bpt.id)) bpt.leadsto; + ignore (E.log "\n")) + !blockingPoints; + ignore (E.log "\n") + +let beforeFun = + makeGlobalVar "before_bg_node" + (TFun (voidType, Some [("node_idx", intType, []); + ("num_edges", intType, [])], + false, [])) + +let initFun = + makeGlobalVar "init_blocking_graph" + (TFun (voidType, Some [("num_nodes", intType, [])], + false, [])) + +let fingerprintVar = + let vi = makeGlobalVar "stack_fingerprint" intType in + vi.vstorage <- Extern; + vi + +let startNodeAddrs = + let vi = makeGlobalVar "start_node_addrs" (TPtr (voidPtrType, [])) in + vi.vstorage <- Extern; + vi + +let startNodeStacks = + let vi = makeGlobalVar "start_node_stacks" (TPtr (intType, [])) in + vi.vstorage <- Extern; + vi + +let startNodeAddrsArray = + makeGlobalVar "start_node_addrs_array" (TArray (voidPtrType, None, [])) + +let startNodeStacksArray = + makeGlobalVar "start_node_stacks_array" (TArray (intType, None, [])) + +let insertInstr (newInstr: instr) (s: stmt) : unit = + match s.skind with + Instr instrs -> + let rec insert (instrs: instr list) : instr list = + match instrs with + [] -> E.s (bug "instr list does not end with call\n") + | [Call _] -> newInstr :: instrs + | i :: rest -> i :: (insert rest) + in + s.skind <- Instr (insert instrs) + | _ -> + E.s (bug "instr stmt expected\n") + +let instrumentBlockingPoints () = + List.iter + (fun bpt -> + if bpt.id > 1 then + let arg1 = integer bpt.id in + let arg2 = integer (List.length bpt.leadsto) in + let call = Call (None, Lval (var beforeFun), + [arg1; arg2], locUnknown) in + insertInstr call bpt.point; + addCall (getFunctionNode bpt.infun) + (getFunctionNode beforeFun.vname) None) + !blockingPoints + + +let startNodes : node list ref = ref [] + +let makeAndDumpBlockingGraphs () : unit = + if List.length !startNodes > 1 then + E.s (unimp "We can't handle more than one start node right now.\n"); + List.iter + (fun n -> + markYieldPoints n; + (*dumpFunctionCallGraph n;*) + makeBlockingGraph n; + dumpBlockingGraph (); + instrumentBlockingPoints ()) + !startNodes + + +let pragmas : (string, int) Hashtbl.t = Hashtbl.create 13 + +let gatherPragmas (f: file) : unit = + List.iter + (function + GPragma (Attr ("stacksize", [AStr s; AInt n]), _) -> + Hashtbl.add pragmas s n + | _ -> ()) + f.globals + + +let blockingNodes : node list ref = ref [] + +let markBlockingFunctions () : unit = + let rec markFunction (n: node) : unit = + if debug then + ignore (E.log "marking %s\n" n.name); + if n.origkind = NoBlock then begin + n.origkind <- BlockTrans; + List.iter markFunction n.preds; + end + in + List.iter (fun n -> List.iter markFunction n.preds) !blockingNodes + +let hasFunctionTypeAttribute (n: string) (t: typ) : bool = + let _, _, _, a = splitFunctionType t in + hasAttribute n a + +let markVar (vi: varinfo) : unit = + let node = getFunctionNode vi.vname in + if node.origkind = NoBlock then begin + if hasAttribute "yield" vi.vattr then begin + node.origkind <- BlockPoint; + blockingNodes := node :: !blockingNodes; + end else if hasFunctionTypeAttribute "noreturn" vi.vtype then begin + node.origkind <- EndPoint; + end else if hasAttribute "expand" vi.vattr then begin + node.expand <- true; + end + end; + begin + try + node.stacksize <- Hashtbl.find pragmas node.name + with Not_found -> begin + match filterAttributes "stacksize" vi.vattr with + (Attr (_, [AInt n])) :: _ when n > node.stacksize -> + node.stacksize <- n + | _ -> () + end + end + +let makeFunctionCallGraph (f: Cil.file) : unit = + Hashtbl.clear functionNodes; + (* Scan the file and construct the control-flow graph *) + List.iter + (function + GFun(fdec, _) -> + let curNode = getFunctionNode fdec.svar.vname in + if fdec.svar.vaddrof then begin + addCall (getFunctionPtrNode fdec.svar.vtype) + curNode None; + end; + if hasAttribute "start" fdec.svar.vattr then begin + startNodes := curNode :: !startNodes; + end; + markVar fdec.svar; + curNode.fds <- Some fdec; + let vis = new findCallsVisitor curNode in + ignore (visitCilBlock vis fdec.sbody) + + | GVarDecl(vi, _) when isFunctionType vi.vtype -> + (* TODO: what if we take the addr of an extern? *) + markVar vi + + | _ -> ()) + f.globals + +let makeStartNodeLinks () : unit = + addCall startNode (getFunctionNode "main") None; + List.iter (fun n -> addCall startNode n None) !startNodes + +let funType (ret_t: typ) (args: (string * typ) list) = + TFun(ret_t, + Some (List.map (fun (n,t) -> (n, t, [])) args), + false, []) + +class instrumentClass = object + inherit nopCilVisitor + + val mutable curNode : node ref = ref (getFunctionNode "main") + val mutable seenRet : bool ref = ref false + + val mutable funId : int ref = ref 0 + + method vfunc (fdec: fundec) : fundec visitAction = begin + (* Remember the current function. *) + curNode := getFunctionNode fdec.svar.vname; + seenRet := false; + funId := Random.bits (); + (* Add useful locals. *) + ignore (makeLocalVar fdec "savesp" voidPtrType); + ignore (makeLocalVar fdec "savechunk" voidPtrType); + ignore (makeLocalVar fdec "savebottom" voidPtrType); + (* Add macro for function entry when we're done. *) + let addEntryNode (fdec: fundec) : fundec = + if not !seenRet then E.s (bug "didn't find a return statement"); + let node = getFunctionNode fdec.svar.vname in + if fingerprintAll || node.origkind <> NoBlock then begin + let fingerprintSet = + Set (var fingerprintVar, BinOp (BXor, Lval (var fingerprintVar), + integer !funId, intType), + locUnknown) + in + fdec.sbody.bstmts <- mkStmtOneInstr fingerprintSet :: fdec.sbody.bstmts + end; + let nodeFun = emptyFunction ("NODE_CALL_"^(string_of_int node.nodeid)) in + let nodeCall = Call (None, Lval (var nodeFun.svar), [], locUnknown) in + nodeFun.svar.vtype <- funType voidType []; + nodeFun.svar.vstorage <- Static; + fdec.sbody.bstmts <- mkStmtOneInstr nodeCall :: fdec.sbody.bstmts; + fdec + in + ChangeDoChildrenPost (fdec, addEntryNode) + end + + method vstmt (s: stmt) : stmt visitAction = begin + begin + match s.skind with + Instr instrs -> begin + let instrumentNode (callNode: node) : unit = + (* Make calls to macros. *) + let suffix = "_" ^ (string_of_int !curNode.nodeid) ^ + "_" ^ (string_of_int callNode.nodeid) + in + let beforeFun = emptyFunction ("BEFORE_CALL" ^ suffix) in + let beforeCall = Call (None, Lval (var beforeFun.svar), + [], locUnknown) in + beforeFun.svar.vtype <- funType voidType []; + beforeFun.svar.vstorage <- Static; + let afterFun = emptyFunction ("AFTER_CALL" ^ suffix) in + let afterCall = Call (None, Lval (var afterFun.svar), + [], locUnknown) in + afterFun.svar.vtype <- funType voidType []; + afterFun.svar.vstorage <- Static; + (* Insert instrumentation around call site. *) + let rec addCalls (is: instr list) : instr list = + match is with + [call] -> [beforeCall; call; afterCall] + | cur :: rest -> cur :: addCalls rest + | [] -> E.s (bug "expected list of non-zero length") + in + s.skind <- Instr (addCalls instrs) + in + (* If there's a call site here, instrument it. *) + let len = List.length instrs in + if len > 0 then begin + match List.nth instrs (len - 1) with + Call (_, Lval (Var vi, NoOffset), _, _) -> + (* + if (try String.sub vi.vname 0 10 <> "NODE_CALL_" + with Invalid_argument _ -> true) then +*) + instrumentNode (getFunctionNode vi.vname) + | Call (_, e, _, _) -> (* Calling a function pointer *) + instrumentNode (getFunctionPtrNode (typeOf e)) + | _ -> () + end; + DoChildren + end + | Cil.Return _ -> begin + if !seenRet then E.s (bug "found multiple returns"); + seenRet := true; + if fingerprintAll || !curNode.origkind <> NoBlock then begin + let fingerprintSet = + Set (var fingerprintVar, BinOp (BXor, Lval (var fingerprintVar), + integer !funId, intType), + locUnknown) + in + s.skind <- Block (mkBlock [mkStmtOneInstr fingerprintSet; + mkStmt s.skind]); + end; + SkipChildren + end + | _ -> DoChildren + end + end +end + +let makeStartNodeTable (globs: global list) : global list = + if List.length !startNodes = 0 then + globs + else + let addrInitInfo = { init = None } in + let stackInitInfo = { init = None } in + let rec processNode (nodes: node list) (i: int) = + match nodes with + node :: rest -> + let curGlobs, addrInit, stackInit = processNode rest (i + 1) in + let fd = + match node.fds with + Some fd -> fd + | None -> E.s (bug "expected fundec") + in + let stack = + makeGlobalVar ("NODE_STACK_" ^ (string_of_int node.nodeid)) intType + in + GVarDecl (fd.svar, locUnknown) :: curGlobs, + ((Index (integer i, NoOffset), SingleInit (mkAddrOf (var fd.svar))) :: + addrInit), + ((Index (integer i, NoOffset), SingleInit (Lval (var stack))) :: + stackInit) + | [] -> (GVarDecl (startNodeAddrs, locUnknown) :: + GVarDecl (startNodeStacks, locUnknown) :: + GVar (startNodeAddrsArray, addrInitInfo, locUnknown) :: + GVar (startNodeStacksArray, stackInitInfo, locUnknown) :: + []), + [Index (integer i, NoOffset), SingleInit zero], + [Index (integer i, NoOffset), SingleInit zero] + in + let newGlobs, addrInit, stackInit = processNode !startNodes 0 in + addrInitInfo.init <- + Some (CompoundInit (TArray (voidPtrType, None, []), addrInit)); + stackInitInfo.init <- + Some (CompoundInit (TArray (intType, None, []), stackInit)); + let file = { fileName = "startnode.h"; globals = newGlobs; + globinit = None; globinitcalled = false; } in + let channel = open_out file.fileName in + dumpFile defaultCilPrinter channel file; + close_out channel; + GText ("#include \"" ^ file.fileName ^ "\"") :: globs + +let instrumentProgram (f: file) : unit = + (* Add function prototypes. *) + f.globals <- makeStartNodeTable f.globals; + f.globals <- GText ("#include \"stack.h\"") :: + GVarDecl (initFun, locUnknown) :: + GVarDecl (beforeFun, locUnknown) :: + GVarDecl (fingerprintVar, locUnknown) :: + f.globals; + (* Add instrumentation to call sites. *) + visitCilFile ((new instrumentClass) :> cilVisitor) f; + (* Force creation of this node. *) + ignore (getFunctionNode beforeFun.vname); + (* Add initialization call to main(). *) + let mainNode = getFunctionNode "main" in + match mainNode.fds with + Some fdec -> + let arg1 = integer (List.length !blockingPoints) in + let initInstr = Call (None, Lval (var initFun), [arg1], locUnknown) in + let addrsInstr = + Set (var startNodeAddrs, StartOf (var startNodeAddrsArray), + locUnknown) + in + let stacksInstr = + Set (var startNodeStacks, StartOf (var startNodeStacksArray), + locUnknown) + in + let newStmt = + if List.length !startNodes = 0 then + mkStmtOneInstr initInstr + else + mkStmt (Instr [addrsInstr; stacksInstr; initInstr]) + in + fdec.sbody.bstmts <- newStmt :: fdec.sbody.bstmts; + addCall mainNode (getFunctionNode initFun.vname) None + | None -> + E.s (bug "expected main fundec") + + + +let feature : featureDescr = + { fd_name = "FCG"; + fd_enabled = ref false; + fd_description = "computing and printing a static call graph"; + fd_extraopt = []; + fd_doit = + (function (f : file) -> + Random.init 0; (* Use the same seed so that results are predictable. *) + gatherPragmas f; + makeFunctionCallGraph f; + makeStartNodeLinks (); + markBlockingFunctions (); + (* makeAndDumpBlockingGraphs (); *) + instrumentProgram f; + dumpFunctionCallGraphToFile ()); + fd_post_check = true; + } diff --git a/cil/src/ext/blockinggraph.mli b/cil/src/ext/blockinggraph.mli new file mode 100644 index 00000000..72f9ba7b --- /dev/null +++ b/cil/src/ext/blockinggraph.mli @@ -0,0 +1,40 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* This module finds and analyzes yield points. *) + +val feature: Cil.featureDescr diff --git a/cil/src/ext/callgraph.ml b/cil/src/ext/callgraph.ml new file mode 100644 index 00000000..58472ac6 --- /dev/null +++ b/cil/src/ext/callgraph.ml @@ -0,0 +1,250 @@ +(* callgraph.ml *) +(* code for callgraph.mli *) + +(* see copyright notice at end of this file *) + +open Cil +open Trace +open Printf +module P = Pretty +module IH = Inthash +module H = Hashtbl +module E = Errormsg + +(* ------------------- interface ------------------- *) +(* a call node describes the local calling structure for a + * single function: which functions it calls, and which + * functions call it *) +type callnode = { + (* An id *) + cnid: int; + + (* the function this node describes *) + cnInfo: nodeinfo; + + (* set of functions this one calls, indexed by the node id *) + cnCallees: callnode IH.t; + + (* set of functions that call this one , indexed by the node id *) + cnCallers: callnode IH.t; +} + +and nodeinfo = + NIVar of varinfo * bool ref + (* Node corresponding to a function. If the boolean + * is true, then the function is defined, otherwise + * it is external *) + + | NIIndirect of string (* Indirect nodes have a string associated to them. + * These strings must be invalid function names *) + * varinfo list ref + (* A list of functions that this indirect node might + * denote *) + +let nodeName (n: nodeinfo) : string = + match n with + NIVar (v, _) -> v.vname + | NIIndirect (n, _) -> n + +(* a call graph is a hashtable, mapping a function name to + * the node which describes that function's call structure *) +type callgraph = + (string, callnode) Hashtbl.t + +(* given the name of a function, retrieve its callnode; this will create a + * node if one doesn't already exist. Will use the given nodeinfo only when + * creating nodes. *) +let nodeId = ref 0 +let getNodeByName (cg: callgraph) (ni: nodeinfo) : callnode = + let name = nodeName ni in + try + H.find cg name + with Not_found -> ( + (* make a new node *) + let ret:callnode = { + cnInfo = ni; + cnid = !nodeId; + cnCallees = IH.create 5; + cnCallers = IH.create 5; + } + in + incr nodeId; + (* add it to the table, then return it *) + H.add cg name ret; + ret + ) + +(* Get the node for a variable *) +let getNodeForVar (cg: callgraph) (v: varinfo) : callnode = + getNodeByName cg (NIVar (v, ref false)) + +let getNodeForIndirect (cg: callgraph) (e: exp) : callnode = + getNodeByName cg (NIIndirect ("", ref [])) + + +(* Find the name of an indirect node that a function whose address is taken + * belongs *) +let markFunctionAddrTaken (cg: callgraph) (f: varinfo) : unit = + (* + ignore (E.log "markFunctionAddrTaken %s\n" f.vname); + *) + let n = getNodeForIndirect cg (AddrOf (Var f, NoOffset)) in + match n.cnInfo with + NIIndirect (_, r) -> r := f :: !r + | _ -> assert false + + + +class cgComputer (graph: callgraph) = object(self) + inherit nopCilVisitor + + (* the current function we're in, so when we visit a call node + * we know who is the caller *) + val mutable curFunc: callnode option = None + + + (* begin visiting a function definition *) + method vfunc (f:fundec) : fundec visitAction = begin + (trace "callgraph" (P.dprintf "entering function %s\n" f.svar.vname)); + let node = getNodeForVar graph f.svar in + (match node.cnInfo with + NIVar (v, r) -> r := true + | _ -> assert false); + curFunc <- (Some node); + DoChildren + end + + (* visit an instruction; we're only interested in calls *) + method vinst (i:instr) : instr list visitAction = begin + (*(trace "callgraph" (P.dprintf "visiting instruction: %a\n" dn_instr i));*) + let caller : callnode = + match curFunc with + None -> assert false + | Some c -> c + in + let callerName: string = nodeName caller.cnInfo in + (match i with + Call(_,f,_,_) -> ( + let callee: callnode = + match f with + | Lval(Var(vi),NoOffset) -> + (trace "callgraph" (P.dprintf "I see a call by %s to %s\n" + callerName vi.vname)); + getNodeForVar graph vi + + | _ -> + (trace "callgraph" (P.dprintf "indirect call: %a\n" + dn_instr i)); + getNodeForIndirect graph f + in + + (* add one entry to each node's appropriate list *) + IH.replace caller.cnCallees callee.cnid callee; + IH.replace callee.cnCallers caller.cnid caller + ) + + | _ -> ()); (* ignore other kinds instructions *) + + DoChildren + end + + method vexpr (e: exp) = + (match e with + AddrOf (Var fv, NoOffset) when isFunctionType fv.vtype -> + markFunctionAddrTaken graph fv + | _ -> ()); + + DoChildren +end + +let computeGraph (f:file) : callgraph = begin + let graph = H.create 37 in + let obj:cgComputer = new cgComputer graph in + + (* visit the whole file, computing the graph *) + visitCilFileSameGlobals (obj :> cilVisitor) f; + + + (* return the computed graph *) + graph +end + +let printGraph (out:out_channel) (g:callgraph) : unit = begin + let printEntry _ (n:callnode) : unit = + let name = nodeName n.cnInfo in + (Printf.fprintf out " %s" name) + in + + let printCalls (node:callnode) : unit = + (fprintf out " calls:"); + (IH.iter printEntry node.cnCallees); + (fprintf out "\n is called by:"); + (IH.iter printEntry node.cnCallers); + (fprintf out "\n") + in + + H.iter (fun (name: string) (node: callnode) -> + match node.cnInfo with + NIVar (v, def) -> + (fprintf out "%s (%s):\n" + v.vname (if !def then "defined" else "external")); + printCalls node + + | NIIndirect (n, funcs) -> + fprintf out "Indirect %s:\n" n; + fprintf out " possible aliases: "; + List.iter (fun a -> fprintf out "%s " a.vname) !funcs; + fprintf out "\n" + + ) + + g + end + +let doCallGraph = ref false + +let feature : featureDescr = + { fd_name = "callgraph"; + fd_enabled = doCallGraph; + fd_description = "generation of a static call graph"; + fd_extraopt = []; + fd_doit = + (function (f: file) -> + let graph:callgraph = computeGraph f in + printGraph stdout graph); + fd_post_check = false; + } + + +(* + * + * Copyright (c) 2001-2002 by + * George C. Necula necula@cs.berkeley.edu + * Scott McPeak smcpeak@cs.berkeley.edu + * Wes Weimer weimer@cs.berkeley.edu + * Ben Liblit liblit@cs.berkeley.edu + * + * All rights reserved. Permission to use, copy, modify and distribute + * this software for research purposes only is hereby granted, + * provided that the following conditions are met: + * 1. XSRedistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. The name of the authors may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * DISCLAIMER: + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) diff --git a/cil/src/ext/callgraph.mli b/cil/src/ext/callgraph.mli new file mode 100644 index 00000000..bc760180 --- /dev/null +++ b/cil/src/ext/callgraph.mli @@ -0,0 +1,123 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +(* callgraph.mli *) +(* compute a static call graph *) + +(* module maintainer: scott *) +(* see copyright notice at end of this file *) + + +(* ------------------ types ------------------- *) +(* a call node describes the local calling structure for a + * single function: which functions it calls, and which + * functions call it *) +type callnode = { + (* An id *) + cnid: int; + + (* the function this node describes *) + cnInfo: nodeinfo; + + (* set of functions this one calls, indexed by the node id *) + cnCallees: callnode Inthash.t; + + (* set of functions that call this one , indexed by the node id *) + cnCallers: callnode Inthash.t; +} + +and nodeinfo = + NIVar of Cil.varinfo * bool ref + (* Node corresponding to a function. If the boolean + * is true, then the function is defined, otherwise + * it is external *) + + | NIIndirect of string (* Indirect nodes have a string associated to them. + * These strings must be invalid function names *) + * Cil.varinfo list ref + (* A list of functions that this indirect node might + * denote *) + + +val nodeName: nodeinfo -> string + +(* a call graph is a hashtable, mapping a function name to + * the node which describes that function's call structure *) +type callgraph = + (string, callnode) Hashtbl.t + + +(* ----------------- functions ------------------- *) +(* given a CIL file, compute its static call graph *) +val computeGraph : Cil.file -> callgraph + +(* print the callgraph in a human-readable format to a channel *) +val printGraph : out_channel -> callgraph -> unit + + +val feature: Cil.featureDescr +(* + * + * Copyright (c) 2001-2002 by + * George C. Necula necula@cs.berkeley.edu + * Scott McPeak smcpeak@cs.berkeley.edu + * Wes Weimer weimer@cs.berkeley.edu + * Ben Liblit liblit@cs.berkeley.edu + * + * All rights reserved. Permission to use, copy, modify and distribute + * this software for research purposes only is hereby granted, + * provided that the following conditions are met: + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. The name of the authors may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * DISCLAIMER: + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) diff --git a/cil/src/ext/canonicalize.ml b/cil/src/ext/canonicalize.ml new file mode 100644 index 00000000..a75deeac --- /dev/null +++ b/cil/src/ext/canonicalize.ml @@ -0,0 +1,292 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + + + +(************************************************************************ + * canonicalize performs several transformations to correct differences + * between C and C++, so that the output is (hopefully) valid C++ code. + * This is incomplete -- certain fixes which are necessary + * for some programs are not yet implemented. + * + * #1) C allows global variables to have multiple declarations and multiple + * (equivalent) definitions. This transformation removes all but one + * declaration and all but one definition. + * + * #2) Any variables that use C++ keywords as identifiers are renamed. + * + * #3) __inline is #defined to inline, and __restrict is #defined to nothing. + * + * #4) C allows function pointers with no specified arguments to be used on + * any argument list. To make C++ accept this code, we insert a cast + * from the function pointer to a type that matches the arguments. Of + * course, this does nothing to guarantee that the pointer actually has + * that type. + * + * #5) Makes casts from int to enum types explicit. (CIL changes enum + * constants to int constants, but doesn't use a cast.) + * + ************************************************************************) + +open Cil +module E = Errormsg +module H = Hashtbl + +(* For transformation #1. Stores all variable definitions in the file. *) +let varDefinitions: (varinfo, global) H.t = H.create 111 + + +class canonicalizeVisitor = object(self) + inherit nopCilVisitor + val mutable currentFunction: fundec = Cil.dummyFunDec; + + (* A hashtable to prevent duplicate declarations. *) + val alreadyDeclared: (varinfo, unit) H.t = H.create 111 + val alreadyDefined: (varinfo, unit) H.t = H.create 111 + + (* move variable declarations around *) + method vglob g = match g with + GVar(v, ({init = Some _} as inito), l) -> + (* A definition. May have been moved to an earlier position. *) + if H.mem alreadyDefined v then begin + ignore (E.warn "Duplicate definition of %s at %a.\n" + v.vname d_loc !currentLoc); + ChangeTo [] (* delete from here. *) + end else begin + H.add alreadyDefined v (); + if H.mem alreadyDeclared v then begin + (* Change the earlier declaration to Extern *) + let oldS = v.vstorage in + ignore (E.log "changing storage of %s from %a\n" + v.vname d_storage oldS); + v.vstorage <- Extern; + let newv = {v with vstorage = oldS} in + ChangeDoChildrenPost([GVar(newv, inito, l)], (fun g -> g) ) + end else + DoChildren + end + | GVar(v, {init=None}, l) + | GVarDecl(v, l) when not (isFunctionType v.vtype) -> begin + (* A declaration. May have been moved to an earlier position. *) + if H.mem alreadyDefined v || H.mem alreadyDeclared v then + ChangeTo [] (* delete from here. *) + else begin + H.add alreadyDeclared v (); + DoChildren + end + end + | GFun(f, l) -> + currentFunction <- f; + DoChildren + | _ -> + DoChildren + +(* #2. rename any identifiers whose names are C++ keywords *) + method vvdec v = + match v.vname with + | "bool" + | "catch" + | "cdecl" + | "class" + | "const_cast" + | "delete" + | "dynamic_cast" + | "explicit" + | "export" + | "false" + | "friend" + | "mutable" + | "namespace" + | "new" + | "operator" + | "pascal" + | "private" + | "protected" + | "public" + | "register" + | "reinterpret_cast" + | "static_cast" + | "template" + | "this" + | "throw" + | "true" + | "try" + | "typeid" + | "typename" + | "using" + | "virtual" + | "wchar_t"-> + v.vname <- v.vname ^ "__cil2cpp"; + DoChildren + | _ -> DoChildren + + method vinst i = +(* #5. If an assignment or function call uses expressions as enum values, + add an explicit cast. *) + match i with + Set (dest, exp, l) -> begin + let typeOfDest = typeOfLval dest in + match unrollType typeOfDest with + TEnum _ -> (* add an explicit cast *) + let newI = Set(dest, mkCast exp typeOfDest, l) in + ChangeTo [newI] + | _ -> SkipChildren + end + | Call (dest, f, args, l) -> begin + let rt, formals, isva, attrs = splitFunctionType (typeOf f) in + if isva then + SkipChildren (* ignore vararg functions *) + else + match formals with + Some formals' -> begin + let newArgs = try + (*Iterate over the arguments, looking for formals that + expect enum types, and insert casts where necessary. *) + List.map2 + (fun (actual: exp) (formalName, formalType, _) -> + match unrollType formalType with + TEnum _ -> mkCast actual formalType + | _ -> actual) + args + formals' + with Invalid_argument _ -> + E.s (error "Number of arguments to %a doesn't match type.\n" + d_exp f) + in + let newI = Call(dest, f, newArgs, l) in + ChangeTo [newI] + end + | None -> begin + (* #4. No arguments were specified for this type. To fix this, infer the + type from the arguments that are used n this instruction, and insert + a cast to that type.*) + match f with + Lval(Mem(fp), off) -> + let counter: int ref = ref 0 in + let newFormals = List.map + (fun (actual:exp) -> + incr counter; + let formalName = "a" ^ (string_of_int !counter) in + (formalName, typeOf actual, []))(* (name,type,attrs) *) + args in + let newFuncPtrType = + TPtr((TFun (rt, Some newFormals, false, attrs)), []) in + let newFuncPtr = Lval(Mem(mkCast fp newFuncPtrType), off) in + ChangeTo [Call(dest, newFuncPtr, args, l)] + | _ -> + ignore (warn "cppcanon: %a has no specified arguments, but it's not a function pointer." d_exp f); + SkipChildren + end + end + | _ -> SkipChildren + + method vinit i = +(* #5. If an initializer uses expressions as enum values, + add an explicit cast. *) + match i with + SingleInit e -> DoChildren (* we don't handle simple initializers here, + because we don't know what type is expected. + This should be done in vglob if needed. *) + | CompoundInit(t, initList) -> + let changed: bool ref = ref false in + let initList' = List.map + (* iterate over the list, adding casts for any expression that + is expected to be an enum type. *) + (function + (Field(fi, off), SingleInit e) -> begin + match unrollType fi.ftype with + TEnum _ -> (* add an explicit cast *) + let newE = mkCast e fi.ftype in + changed := true; + (Field(fi, off), SingleInit newE) + | _ -> (* not enum, no cast needed *) + (Field(fi, off), SingleInit e) + end + | other -> + (* This is a more complicated initializer, and I don't think + it can have type enum. It's children might, though. *) + other) + initList in + if !changed then begin + (* There may be other casts needed in other parts of the + initialization, so do the children too. *) + ChangeDoChildrenPost(CompoundInit(t, initList'), (fun x -> x)) + end else + DoChildren + + +(* #5. If a function returns an enum type, add an explicit cast to the + return type. *) + method vstmt stmt = + (match stmt.skind with + Return (Some exp, l) -> begin + let typeOfDest, _, _, _ = + splitFunctionType currentFunction.svar.vtype in + match unrollType typeOfDest with + TEnum _ -> + stmt.skind <- Return (Some (mkCast exp typeOfDest), l) + | _ -> () + end + | _ -> ()); + DoChildren +end (* class canonicalizeVisitor *) + + + +(* Entry point for this extension *) +let canonicalize (f:file) = + visitCilFile (new canonicalizeVisitor) f; + + (* #3. Finally, add some #defines to change C keywords to their C++ + equivalents: *) + f.globals <- + GText( "#ifdef __cplusplus\n" + ^" #define __restrict\n" (* "restrict" doesn't work *) + ^" #define __inline inline\n" + ^"#endif") + ::f.globals + + + +let feature : featureDescr = + { fd_name = "canonicalize"; + fd_enabled = ref false; + fd_description = "fixing some C-isms so that the result is C++ compliant."; + fd_extraopt = []; + fd_doit = canonicalize; + fd_post_check = true; + } diff --git a/cil/src/ext/canonicalize.mli b/cil/src/ext/canonicalize.mli new file mode 100644 index 00000000..37bc0d83 --- /dev/null +++ b/cil/src/ext/canonicalize.mli @@ -0,0 +1,48 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(************************************************************************ + * canonicalize performs several transformations to correct differences + * between C and C++, so that the output is (hopefully) valid C++ code. + * This is incomplete -- certain fixes which are necessary + * for some programs are not yet implemented. + * + * See canonicalize.ml for a list of changes. + * + ************************************************************************) + +val feature: Cil.featureDescr diff --git a/cil/src/ext/cfg.ml b/cil/src/ext/cfg.ml new file mode 100644 index 00000000..8b19c797 --- /dev/null +++ b/cil/src/ext/cfg.ml @@ -0,0 +1,289 @@ +(* MODIF: Loop constructor replaced by 3 constructors: While, DoWhile, For. *) + +(* + * + * Copyright (c) 2001-2003, + * George C. Necula + * Scott McPeak + * Wes Weimer + * Simon Goldsmith + * S.P Rahul, Aman Bhargava + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* Authors: Aman Bhargava, S. P. Rahul *) +(* sfg: this stuff was stolen from optim.ml - the code to print the cfg as + a dot graph is mine *) + +open Pretty +open Cil +module E=Errormsg + +(* entry points: cfgFun, printCfgChannel, printCfgFilename *) + +(* known issues: + * -sucessors of if somehow end up with two edges each + *) + +(*------------------------------------------------------------*) +(* Notes regarding CFG computation: + 1) Initially only succs and preds are computed. sid's are filled in + later, in whatever order is suitable (e.g. for forward problems, reverse + depth-first postorder). + 2) If a stmt (return, break or continue) has no successors, then + function return must follow. + No predecessors means it is the start of the function + 3) We use the fact that initially all the succs and preds are assigned [] +*) + +(* Fill in the CFG info for the stmts in a block + next = succ of the last stmt in this block + break = succ of any Break in this block + cont = succ of any Continue in this block + None means the succ is the function return. It does not mean the break/cont + is invalid. We assume the validity has already been checked. +*) +(* At the end of CFG computation, + - numNodes = total number of CFG nodes + - length(nodeList) = numNodes +*) + +let numNodes = ref 0 (* number of nodes in the CFG *) +let nodeList : stmt list ref = ref [] (* All the nodes in a flat list *) (* ab: Added to change dfs from quadratic to linear *) +let start_id = ref 0 (* for unique ids across many functions *) + +(* entry point *) + +(** Compute a control flow graph for fd. Stmts in fd have preds and succs + filled in *) +let rec cfgFun (fd : fundec): int = + begin + numNodes := !start_id; + nodeList := []; + + cfgBlock fd.sbody None None None; + !numNodes - !start_id + end + + +and cfgStmts (ss: stmt list) + (next:stmt option) (break:stmt option) (cont:stmt option) = + match ss with + [] -> (); + | [s] -> cfgStmt s next break cont + | hd::tl -> + cfgStmt hd (Some (List.hd tl)) break cont; + cfgStmts tl next break cont + +and cfgBlock (blk: block) + (next:stmt option) (break:stmt option) (cont:stmt option) = + cfgStmts blk.bstmts next break cont + +(* Fill in the CFG info for a stmt + Meaning of next, break, cont should be clear from earlier comment +*) +and cfgStmt (s: stmt) (next:stmt option) (break:stmt option) (cont:stmt option) = + incr numNodes; + s.sid <- !numNodes; + nodeList := s :: !nodeList; (* Future traversals can be made in linear time. e.g. *) + if s.succs <> [] then + E.s (bug "CFG must be cleared before being computed!"); + let addSucc (n: stmt) = + if not (List.memq n s.succs) then + s.succs <- n::s.succs; + if not (List.memq s n.preds) then + n.preds <- s::n.preds + in + let addOptionSucc (n: stmt option) = + match n with + None -> () + | Some n' -> addSucc n' + in + let addBlockSucc (b: block) = + match b.bstmts with + [] -> addOptionSucc next + | hd::_ -> addSucc hd + in + match s.skind with + Instr _ -> addOptionSucc next + | Return _ -> () + | Goto (p,_) -> addSucc !p + | Break _ -> addOptionSucc break + | Continue _ -> addOptionSucc cont + | If (_, blk1, blk2, _) -> + (* The succs of If is [true branch;false branch] *) + addBlockSucc blk2; + addBlockSucc blk1; + cfgBlock blk1 next break cont; + cfgBlock blk2 next break cont + | Block b -> + addBlockSucc b; + cfgBlock b next break cont + | Switch(_,blk,l,_) -> + List.iter addSucc (List.rev l); (* Add successors in order *) + (* sfg: if there's no default, need to connect s->next *) + if not (List.exists + (fun stmt -> List.exists + (function Default _ -> true | _ -> false) + stmt.labels) + l) + then + addOptionSucc next; + cfgBlock blk next next cont +(* + | Loop(blk,_,_,_) -> +*) + | While(_,blk,_) + | DoWhile(_,blk,_) + | For(_,_,_,blk,_) -> + addBlockSucc blk; + cfgBlock blk (Some s) next (Some s) + (* Since all loops have terminating condition true, we don't put + any direct successor to stmt following the loop *) + | TryExcept _ | TryFinally _ -> + E.s (E.unimp "try/except/finally") + +(*------------------------------------------------------------*) + +(**************************************************************) +(* do something for all stmts in a fundec *) + +let rec forallStmts (todo) (fd : fundec) = + begin + fasBlock todo fd.sbody; + end + +and fasBlock (todo) (b : block) = + List.iter (fasStmt todo) b.bstmts + +and fasStmt (todo) (s : stmt) = + begin + ignore(todo s); + match s.skind with + | Block b -> fasBlock todo b + | If (_, tb, fb, _) -> (fasBlock todo tb; fasBlock todo fb) + | Switch (_, b, _, _) -> fasBlock todo b +(* + | Loop (b, _, _, _) -> fasBlock todo b +*) + | While (_, b, _) -> fasBlock todo b + | DoWhile (_, b, _) -> fasBlock todo b + | For (_, _, _, b, _) -> fasBlock todo b + | (Return _ | Break _ | Continue _ | Goto _ | Instr _) -> () + | TryExcept _ | TryFinally _ -> E.s (E.unimp "try/except/finally") + end +;; + +(**************************************************************) +(* printing the control flow graph - you have to compute it first *) + +let d_cfgnodename () (s : stmt) = + dprintf "%d" s.sid + +let d_cfgnodelabel () (s : stmt) = + let label = + begin + match s.skind with + | If (e, _, _, _) -> "if" (*sprint ~width:999 (dprintf "if %a" d_exp e)*) +(* + | Loop _ -> "loop" +*) + | While _ -> "while" + | DoWhile _ -> "dowhile" + | For _ -> "for" + | Break _ -> "break" + | Continue _ -> "continue" + | Goto _ -> "goto" + | Instr _ -> "instr" + | Switch _ -> "switch" + | Block _ -> "block" + | Return _ -> "return" + | TryExcept _ -> "try-except" + | TryFinally _ -> "try-finally" + end in + dprintf "%d: %s" s.sid label + +let d_cfgedge (src) () (dest) = + dprintf "%a -> %a" + d_cfgnodename src + d_cfgnodename dest + +let d_cfgnode () (s : stmt) = + dprintf "%a [label=\"%a\"]\n\t%a" + d_cfgnodename s + d_cfgnodelabel s + (d_list "\n\t" (d_cfgedge s)) s.succs + +(**********************************************************************) +(* entry points *) + +(** print control flow graph (in dot form) for fundec to channel *) +let printCfgChannel (chan : out_channel) (fd : fundec) = + let pnode (s:stmt) = fprintf chan "%a\n" d_cfgnode s in + begin + ignore (fprintf chan "digraph CFG_%s {\n" fd.svar.vname); + forallStmts pnode fd; + ignore(fprintf chan "}\n"); + end + +(** Print control flow graph (in dot form) for fundec to file *) +let printCfgFilename (filename : string) (fd : fundec) = + let chan = open_out filename in + begin + printCfgChannel chan fd; + close_out chan; + end + + +;; + +(**********************************************************************) + +let clearCFGinfo (fd : fundec) = + let clear s = + s.sid <- -1; + s.succs <- []; + s.preds <- []; + in + forallStmts clear fd + +let clearFileCFG (f : file) = + iterGlobals f (fun g -> + match g with GFun(fd,_) -> + clearCFGinfo fd + | _ -> ()) + +let computeFileCFG (f : file) = + iterGlobals f (fun g -> + match g with GFun(fd,_) -> + numNodes := cfgFun fd; + start_id := !start_id + !numNodes + | _ -> ()) diff --git a/cil/src/ext/cfg.mli b/cil/src/ext/cfg.mli new file mode 100644 index 00000000..19c51666 --- /dev/null +++ b/cil/src/ext/cfg.mli @@ -0,0 +1,36 @@ +(** Code to compute the control-flow graph of a function or file. + This will fill in the [preds] and [succs] fields of {!Cil.stmt} + + This is required for several other extensions, such as {!Dataflow}. +*) + +open Cil + + +(** Compute the CFG for an entire file, by calling cfgFun on each function. *) +val computeFileCFG: Cil.file -> unit + +(** clear the sid, succs, and preds fields of each statement. *) +val clearFileCFG: Cil.file -> unit + +(** Compute a control flow graph for fd. Stmts in fd have preds and succs + filled in *) +val cfgFun : fundec -> int + +(** clear the sid, succs, and preds fields of each statment in a function *) +val clearCFGinfo: Cil.fundec -> unit + +(** print control flow graph (in dot form) for fundec to channel *) +val printCfgChannel : out_channel -> fundec -> unit + +(** Print control flow graph (in dot form) for fundec to file *) +val printCfgFilename : string -> fundec -> unit + +(** Next statement id that will be assigned. *) +val start_id: int ref + +(** All of the nodes in a file. *) +val nodeList : stmt list ref + +(** number of nodes in the CFG *) +val numNodes : int ref diff --git a/cil/src/ext/ciltools.ml b/cil/src/ext/ciltools.ml new file mode 100755 index 00000000..78f1aafc --- /dev/null +++ b/cil/src/ext/ciltools.ml @@ -0,0 +1,228 @@ +open Cil + +(* Contributed by Nathan Cooprider *) + +let isOne e = + isInteger e = Some Int64.one + + +(* written by Zach *) +let is_volatile_tp tp = + List.exists (function (Attr("volatile",_)) -> true + | _ -> false) (typeAttrs tp) + +(* written by Zach *) +let is_volatile_vi vi = + let vi_vol = + List.exists (function (Attr("volatile",_)) -> true + | _ -> false) vi.vattr in + let typ_vol = is_volatile_tp vi.vtype in + vi_vol || typ_vol + +(***************************************************************************** + * A collection of useful functions that were not already in CIL as far as I + * could tell. However, I have been surprised before . . . + ****************************************************************************) + +type sign = Signed | Unsigned + +exception Not_an_integer + +(***************************************************************************** + * A bunch of functions for accessing integers. Originally written for + * somebody who didn't know CIL and just wanted to mess with it at the + * OCaml level. + ****************************************************************************) + +let unbox_int_type (ye : typ) : (int * sign) = + let tp = unrollType ye in + let s = + match tp with + TInt (i, _) -> + if (isSigned i) then + Signed + else + Unsigned + | _ -> raise Not_an_integer + in + (bitsSizeOf tp), s + +(* depricated. Use isInteger directly instead *) +let unbox_int_exp (e : exp) : int64 = + match isInteger e with + None -> raise Not_an_integer + | Some (x) -> x + +let box_int_to_exp (n : int64) (ye : typ) : exp = + let tp = unrollType ye in + match tp with + TInt (i, _) -> + kinteger64 i n + | _ -> raise Not_an_integer + +let cil_to_ocaml_int (e : exp) : (int64 * int * sign) = + let v, s = unbox_int_type (typeOf e) in + unbox_int_exp (e), v, s + +exception Weird_bitwidth + +(* (int64 * int * sign) : exp *) +let ocaml_int_to_cil v n s = + let char_size = bitsSizeOf charType in + let int_size = bitsSizeOf intType in + let short_size = bitsSizeOf (TInt(IShort,[]))in + let long_size = bitsSizeOf longType in + let longlong_size = bitsSizeOf (TInt(ILongLong,[])) in + let i = + match s with + Signed -> + if (n = char_size) then + ISChar + else if (n = int_size) then + IInt + else if (n = short_size) then + IShort + else if (n = long_size) then + ILong + else if (n = longlong_size) then + ILongLong + else + raise Weird_bitwidth + | Unsigned -> + if (n = char_size) then + IUChar + else if (n = int_size) then + IUInt + else if (n = short_size) then + IUShort + else if (n = long_size) then + IULong + else if (n = longlong_size) then + IULongLong + else + raise Weird_bitwidth + in + kinteger64 i v + +(***************************************************************************** + * a couple of type functions that I thought would be useful: + ****************************************************************************) + +let rec isCompositeType tp = + match tp with + TComp _ -> true + | TPtr(x, _) -> isCompositeType x + | TArray(x,_,_) -> isCompositeType x + | TFun(x,_,_,_) -> isCompositeType x + | TNamed (x,_) -> isCompositeType x.ttype + | _ -> false + +(** START OF deepHasAttribute ************************************************) +let visited = ref [] +class attribute_checker target rflag = object (self) + inherit nopCilVisitor + method vtype t = + match t with + TComp(cinfo, a) -> + if(not (List.exists (fun x -> cinfo.cname = x) !visited )) then begin + visited := cinfo.cname :: !visited; + List.iter + (fun f -> + if (hasAttribute target f.fattr) then + rflag := true + else + ignore(visitCilType (new attribute_checker target rflag) + f.ftype)) cinfo.cfields; + end; + DoChildren + | TNamed(t1, a) -> + if(not (List.exists (fun x -> t1.tname = x) !visited )) then begin + visited := t1.tname :: !visited; + ignore(visitCilType (new attribute_checker target rflag) t1.ttype); + end; + DoChildren + | _ -> + DoChildren + method vattr (Attr(name,params)) = + if (name = target) then rflag := true; + DoChildren +end + +let deepHasAttribute s t = + let found = ref false in + visited := []; + ignore(visitCilType (new attribute_checker s found) t); + !found +(** END OF deepHasAttribute **************************************************) + +(** Stuff from ptranal, slightly modified ************************************) + +(***************************************************************************** + * A transformation to make every instruction be in its own statement. + ****************************************************************************) + +class callBBVisitor = object + inherit nopCilVisitor + + method vstmt s = + match s.skind with + Instr(il) -> begin + if (List.length il > 1) then + let list_of_stmts = List.map (fun one_inst -> + mkStmtOneInstr one_inst) il in + let block = mkBlock list_of_stmts in + s.skind <- Block block; + ChangeTo(s) + else + SkipChildren + end + | _ -> DoChildren + + method vvdec _ = SkipChildren + method vexpr _ = SkipChildren + method vlval _ = SkipChildren + method vtype _ = SkipChildren +end + +let one_instruction_per_statement f = + let thisVisitor = new callBBVisitor in + visitCilFileSameGlobals thisVisitor f + +(***************************************************************************** + * A transformation that gives each variable a unique identifier. + ****************************************************************************) + +class vidVisitor = object + inherit nopCilVisitor + val count = ref 0 + + method vvdec vi = + vi.vid <- !count ; + incr count ; SkipChildren +end + +let globally_unique_vids f = + let thisVisitor = new vidVisitor in + visitCilFileSameGlobals thisVisitor f + +(** End of stuff from ptranal ************************************************) + +class sidVisitor = object + inherit nopCilVisitor + val count = ref 0 + + method vstmt s = + s.sid <- !count ; + incr count ; + DoChildren +end + +let globally_unique_sids f = + let thisVisitor = new sidVisitor in + visitCilFileSameGlobals thisVisitor f + +(** Comparing expressions without a Out_of_memory error **********************) + +let compare_exp x y = + compare x y + diff --git a/cil/src/ext/dataflow.ml b/cil/src/ext/dataflow.ml new file mode 100755 index 00000000..7f28f841 --- /dev/null +++ b/cil/src/ext/dataflow.ml @@ -0,0 +1,466 @@ +(* MODIF: Loop constructor replaced by 3 constructors: While, DoWhile, For. *) + +module IH = Inthash +module E = Errormsg + +open Cil +open Pretty + +(** A framework for data flow analysis for CIL code. Before using + this framework, you must initialize the Control-flow Graph for your + program, e.g using {!Cfg.computeFileCFG} *) + +type 't action = + Default (** The default action *) + | Done of 't (** Do not do the default action. Use this result *) + | Post of ('t -> 't) (** The default action, followed by the given + * transformer *) + +type 't stmtaction = + SDefault (** The default action *) + | SDone (** Do not visit this statement or its successors *) + | SUse of 't (** Visit the instructions and successors of this statement + as usual, but use the specified state instead of the + one that was passed to doStmt *) + +(* For if statements *) +type 't guardaction = + GDefault (** The default state *) + | GUse of 't (** Use this data for the branch *) + | GUnreachable (** The branch will never be taken. *) + + +(****************************************************************** + ********** + ********** FORWARDS + ********** + ********************************************************************) + +module type ForwardsTransfer = sig + val name: string (** For debugging purposes, the name of the analysis *) + + val debug: bool ref (** Whether to turn on debugging *) + + type t (** The type of the data we compute for each block start. May be + * imperative. *) + + val copy: t -> t + (** Make a deep copy of the data *) + + + val stmtStartData: t Inthash.t + (** For each statement id, the data at the start. Not found in the hash + * table means nothing is known about the state at this point. At the end + * of the analysis this means that the block is not reachable. *) + + val pretty: unit -> t -> Pretty.doc + (** Pretty-print the state *) + + val computeFirstPredecessor: Cil.stmt -> t -> t + (** Give the first value for a predecessors, compute the value to be set + * for the block *) + + val combinePredecessors: Cil.stmt -> old:t -> t -> t option + (** Take some old data for the start of a statement, and some new data for + * the same point. Return None if the combination is identical to the old + * data. Otherwise, compute the combination, and return it. *) + + val doInstr: Cil.instr -> t -> t action + (** The (forwards) transfer function for an instruction. The + * {!Cil.currentLoc} is set before calling this. The default action is to + * continue with the state unchanged. *) + + val doStmt: Cil.stmt -> t -> t stmtaction + (** The (forwards) transfer function for a statement. The {!Cil.currentLoc} + * is set before calling this. The default action is to do the instructions + * in this statement, if applicable, and continue with the successors. *) + + val doGuard: Cil.exp -> t -> t guardaction + (** Generate the successor to an If statement assuming the given expression + * is nonzero. Analyses that don't need guard information can return + * GDefault; this is equivalent to returning GUse of the input. + * A return value of GUnreachable indicates that this half of the branch + * will not be taken and should not be explored. This will be called + * twice per If, once for "then" and once for "else". + *) + + val filterStmt: Cil.stmt -> bool + (** Whether to put this statement in the worklist. This is called when a + * block would normally be put in the worklist. *) + +end + + +module ForwardsDataFlow = + functor (T : ForwardsTransfer) -> + struct + + (** Keep a worklist of statements to process. It is best to keep a queue, + * because this way it is more likely that we are going to process all + * predecessors of a statement before the statement itself. *) + let worklist: Cil.stmt Queue.t = Queue.create () + + (** We call this function when we have encountered a statement, with some + * state. *) + let reachedStatement (s: stmt) (d: T.t) : unit = + (** see if we know about it already *) + E.pushContext (fun _ -> dprintf "Reached statement %d with %a" + s.sid T.pretty d); + let newdata: T.t option = + try + let old = IH.find T.stmtStartData s.sid in + match T.combinePredecessors s ~old:old d with + None -> (* We are done here *) + if !T.debug then + ignore (E.log "FF(%s): reached stmt %d with %a\n implies the old state %a\n" + T.name s.sid T.pretty d T.pretty old); + None + | Some d' -> begin + (* We have changed the data *) + if !T.debug then + ignore (E.log "FF(%s): weaken data for block %d: %a\n" + T.name s.sid T.pretty d'); + Some d' + end + with Not_found -> (* was bottom before *) + let d' = T.computeFirstPredecessor s d in + if !T.debug then + ignore (E.log "FF(%s): set data for block %d: %a\n" + T.name s.sid T.pretty d'); + Some d' + in + E.popContext (); + match newdata with + None -> () + | Some d' -> + IH.replace T.stmtStartData s.sid d'; + if T.filterStmt s && + not (Queue.fold (fun exists s' -> exists || s'.sid = s.sid) + false + worklist) then + Queue.add s worklist + + + (** Get the two successors of an If statement *) + let ifSuccs (s:stmt) : stmt * stmt = + let fstStmt blk = match blk.bstmts with + [] -> Cil.dummyStmt + | fst::_ -> fst + in + match s.skind with + If(e, b1, b2, _) -> + let thenSucc = fstStmt b1 in + let elseSucc = fstStmt b2 in + let oneFallthrough () = + let fallthrough = + List.filter + (fun s' -> thenSucc != s' && elseSucc != s') + s.succs + in + match fallthrough with + [] -> E.s (bug "Bad CFG: missing fallthrough for If.") + | [s'] -> s' + | _ -> E.s (bug "Bad CFG: multiple fallthrough for If.") + in + (* If thenSucc or elseSucc is Cil.dummyStmt, it's an empty block. + So the successor is the statement after the if *) + let stmtOrFallthrough s' = + if s' == Cil.dummyStmt then + oneFallthrough () + else + s' + in + (stmtOrFallthrough thenSucc, + stmtOrFallthrough elseSucc) + + | _-> E.s (bug "ifSuccs on a non-If Statement.") + + (** Process a statement *) + let processStmt (s: stmt) : unit = + currentLoc := get_stmtLoc s.skind; + if !T.debug then + ignore (E.log "FF(%s).stmt %d at %t\n" T.name s.sid d_thisloc); + + (* It must be the case that the block has some data *) + let init: T.t = + try T.copy (IH.find T.stmtStartData s.sid) + with Not_found -> + E.s (E.bug "FF(%s): processing block without data" T.name) + in + + (** See what the custom says *) + match T.doStmt s init with + SDone -> () + | (SDefault | SUse _) as act -> begin + let curr = match act with + SDefault -> init + | SUse d -> d + | SDone -> E.s (bug "SDone") + in + (* Do the instructions in order *) + let handleInstruction (s: T.t) (i: instr) : T.t = + currentLoc := get_instrLoc i; + + (* Now handle the instruction itself *) + let s' = + let action = T.doInstr i s in + match action with + | Done s' -> s' + | Default -> s (* do nothing *) + | Post f -> f s + in + s' + in + + let after: T.t = + match s.skind with + Instr il -> + (* Handle instructions starting with the first one *) + List.fold_left handleInstruction curr il + + | Goto _ | Break _ | Continue _ | If _ + | TryExcept _ | TryFinally _ + | Switch _ | (*Loop _*) While _ | DoWhile _ | For _ + | Return _ | Block _ -> curr + in + currentLoc := get_stmtLoc s.skind; + + (* Handle If guards *) + let succsToReach = match s.skind with + If (e, _, _, _) -> begin + let not_e = UnOp(LNot, e, intType) in + let thenGuard = T.doGuard e after in + let elseGuard = T.doGuard not_e after in + if thenGuard = GDefault && elseGuard = GDefault then + (* this is the common case *) + s.succs + else begin + let doBranch succ guard = + match guard with + GDefault -> reachedStatement succ after + | GUse d -> reachedStatement succ d + | GUnreachable -> + if !T.debug then + ignore (E.log "FF(%s): Not exploring branch to %d\n" + T.name succ.sid); + + () + in + let thenSucc, elseSucc = ifSuccs s in + doBranch thenSucc thenGuard; + doBranch elseSucc elseGuard; + [] + end + end + | _ -> s.succs + in + (* Reach the successors *) + List.iter (fun s' -> reachedStatement s' after) succsToReach; + + end + + + + + (** Compute the data flow. Must have the CFG initialized *) + let compute (sources: stmt list) = + Queue.clear worklist; + List.iter (fun s -> Queue.add s worklist) sources; + + (** All initial stmts must have non-bottom data *) + List.iter (fun s -> + if not (IH.mem T.stmtStartData s.sid) then + E.s (E.error "FF(%s): initial stmt %d does not have data" + T.name s.sid)) + sources; + if !T.debug then + ignore (E.log "\nFF(%s): processing\n" + T.name); + let rec fixedpoint () = + if !T.debug && not (Queue.is_empty worklist) then + ignore (E.log "FF(%s): worklist= %a\n" + T.name + (docList (fun s -> num s.sid)) + (List.rev + (Queue.fold (fun acc s -> s :: acc) [] worklist))); + try + let s = Queue.take worklist in + processStmt s; + fixedpoint (); + with Queue.Empty -> + if !T.debug then + ignore (E.log "FF(%s): done\n\n" T.name) + in + fixedpoint () + + end + + + +(****************************************************************** + ********** + ********** BACKWARDS + ********** + ********************************************************************) +module type BackwardsTransfer = sig + val name: string (* For debugging purposes, the name of the analysis *) + + val debug: bool ref (** Whether to turn on debugging *) + + type t (** The type of the data we compute for each block start. In many + * presentations of backwards data flow analysis we maintain the + * data at the block end. This is not easy to do with JVML because + * a block has many exceptional ends. So we maintain the data for + * the statement start. *) + + val pretty: unit -> t -> Pretty.doc (** Pretty-print the state *) + + val stmtStartData: t Inthash.t + (** For each block id, the data at the start. This data structure must be + * initialized with the initial data for each block *) + + val combineStmtStartData: Cil.stmt -> old:t -> t -> t option + (** When the analysis reaches the start of a block, combine the old data + * with the one we have just computed. Return None if the combination is + * the same as the old data, otherwise return the combination. In the + * latter case, the predecessors of the statement are put on the working + * list. *) + + + val combineSuccessors: t -> t -> t + (** Take the data from two successors and combine it *) + + + val doStmt: Cil.stmt -> t action + (** The (backwards) transfer function for a branch. The {!Cil.currentLoc} is + * set before calling this. If it returns None, then we have some default + * handling. Otherwise, the returned data is the data before the branch + * (not considering the exception handlers) *) + + val doInstr: Cil.instr -> t -> t action + (** The (backwards) transfer function for an instruction. The + * {!Cil.currentLoc} is set before calling this. If it returns None, then we + * have some default handling. Otherwise, the returned data is the data + * before the branch (not considering the exception handlers) *) + + val filterStmt: Cil.stmt -> Cil.stmt -> bool + (** Whether to put this predecessor block in the worklist. We give the + * predecessor and the block whose predecessor we are (and whose data has + * changed) *) + +end + +module BackwardsDataFlow = + functor (T : BackwardsTransfer) -> + struct + + let getStmtStartData (s: stmt) : T.t = + try IH.find T.stmtStartData s.sid + with Not_found -> + E.s (E.bug "BF(%s): stmtStartData is not initialized for %d" + T.name s.sid) + + (** Process a statement and return true if the set of live return + * addresses on its entry has changed. *) + let processStmt (s: stmt) : bool = + if !T.debug then + ignore (E.log "FF(%s).stmt %d\n" T.name s.sid); + + + (* Find the state before the branch *) + currentLoc := get_stmtLoc s.skind; + let d: T.t = + match T.doStmt s with + Done d -> d + | (Default | Post _) as action -> begin + (* Do the default one. Combine the successors *) + let res = + match s.succs with + [] -> E.s (E.bug "You must doStmt for the statements with no successors") + | fst :: rest -> + List.fold_left (fun acc succ -> + T.combineSuccessors acc (getStmtStartData succ)) + (getStmtStartData fst) + rest + in + (* Now do the instructions *) + let res' = + match s.skind with + Instr il -> + (* Now scan the instructions in reverse order. This may + * Stack_overflow on very long blocks ! *) + let handleInstruction (i: instr) (s: T.t) : T.t = + currentLoc := get_instrLoc i; + (* First handle the instruction itself *) + let action = T.doInstr i s in + match action with + | Done s' -> s' + | Default -> s (* do nothing *) + | Post f -> f s + in + (* Handle instructions starting with the last one *) + List.fold_right handleInstruction il res + + | _ -> res + in + match action with + Post f -> f res' + | _ -> res' + end + in + + (* See if the state has changed. The only changes are that it may grow.*) + let s0 = getStmtStartData s in + + match T.combineStmtStartData s ~old:s0 d with + None -> (* The old data is good enough *) + false + + | Some d' -> + (* We have changed the data *) + if !T.debug then + ignore (E.log "BF(%s): set data for block %d: %a\n" + T.name s.sid T.pretty d'); + IH.replace T.stmtStartData s.sid d'; + true + + + (** Compute the data flow. Must have the CFG initialized *) + let compute (sinks: stmt list) = + let worklist: Cil.stmt Queue.t = Queue.create () in + List.iter (fun s -> Queue.add s worklist) sinks; + if !T.debug && not (Queue.is_empty worklist) then + ignore (E.log "\nBF(%s): processing\n" + T.name); + let rec fixedpoint () = + if !T.debug && not (Queue.is_empty worklist) then + ignore (E.log "BF(%s): worklist= %a\n" + T.name + (docList (fun s -> num s.sid)) + (List.rev + (Queue.fold (fun acc s -> s :: acc) [] worklist))); + try + let s = Queue.take worklist in + let changes = processStmt s in + if changes then begin + (* We must add all predecessors of block b, only if not already + * in and if the filter accepts them. *) + List.iter + (fun p -> + if not (Queue.fold (fun exists s' -> exists || p.sid = s'.sid) + false worklist) && + T.filterStmt p s then + Queue.add p worklist) + s.preds; + end; + fixedpoint (); + + with Queue.Empty -> + if !T.debug then + ignore (E.log "BF(%s): done\n\n" T.name) + in + fixedpoint (); + + end + + diff --git a/cil/src/ext/dataflow.mli b/cil/src/ext/dataflow.mli new file mode 100755 index 00000000..e72c5db0 --- /dev/null +++ b/cil/src/ext/dataflow.mli @@ -0,0 +1,151 @@ +(** A framework for data flow analysis for CIL code. Before using + this framework, you must initialize the Control-flow Graph for your + program, e.g using {!Cfg.computeFileCFG} *) + +type 't action = + Default (** The default action *) + | Done of 't (** Do not do the default action. Use this result *) + | Post of ('t -> 't) (** The default action, followed by the given + * transformer *) + +type 't stmtaction = + SDefault (** The default action *) + | SDone (** Do not visit this statement or its successors *) + | SUse of 't (** Visit the instructions and successors of this statement + as usual, but use the specified state instead of the + one that was passed to doStmt *) + +(* For if statements *) +type 't guardaction = + GDefault (** The default state *) + | GUse of 't (** Use this data for the branch *) + | GUnreachable (** The branch will never be taken. *) + + +(****************************************************************** + ********** + ********** FORWARDS + ********** + ********************************************************************) + +module type ForwardsTransfer = sig + val name: string (** For debugging purposes, the name of the analysis *) + + val debug: bool ref (** Whether to turn on debugging *) + + type t (** The type of the data we compute for each block start. May be + * imperative. *) + + val copy: t -> t + (** Make a deep copy of the data *) + + + val stmtStartData: t Inthash.t + (** For each statement id, the data at the start. Not found in the hash + * table means nothing is known about the state at this point. At the end + * of the analysis this means that the block is not reachable. *) + + val pretty: unit -> t -> Pretty.doc + (** Pretty-print the state *) + + val computeFirstPredecessor: Cil.stmt -> t -> t + (** Give the first value for a predecessors, compute the value to be set + * for the block *) + + val combinePredecessors: Cil.stmt -> old:t -> t -> t option + (** Take some old data for the start of a statement, and some new data for + * the same point. Return None if the combination is identical to the old + * data. Otherwise, compute the combination, and return it. *) + + val doInstr: Cil.instr -> t -> t action + (** The (forwards) transfer function for an instruction. The + * {!Cil.currentLoc} is set before calling this. The default action is to + * continue with the state unchanged. *) + + val doStmt: Cil.stmt -> t -> t stmtaction + (** The (forwards) transfer function for a statement. The {!Cil.currentLoc} + * is set before calling this. The default action is to do the instructions + * in this statement, if applicable, and continue with the successors. *) + + val doGuard: Cil.exp -> t -> t guardaction + (** Generate the successor to an If statement assuming the given expression + * is nonzero. Analyses that don't need guard information can return + * GDefault; this is equivalent to returning GUse of the input. + * A return value of GUnreachable indicates that this half of the branch + * will not be taken and should not be explored. This will be called + * twice per If, once for "then" and once for "else". + *) + + val filterStmt: Cil.stmt -> bool + (** Whether to put this statement in the worklist. This is called when a + * block would normally be put in the worklist. *) + +end + +module ForwardsDataFlow (T : ForwardsTransfer) : sig + val compute: Cil.stmt list -> unit + (** Fill in the T.stmtStartData, given a number of initial statements to + * start from. All of the initial statements must have some entry in + * T.stmtStartData (i.e., the initial data should not be bottom) *) +end + +(****************************************************************** + ********** + ********** BACKWARDS + ********** + ********************************************************************) +module type BackwardsTransfer = sig + val name: string (** For debugging purposes, the name of the analysis *) + + val debug: bool ref (** Whether to turn on debugging *) + + type t (** The type of the data we compute for each block start. In many + * presentations of backwards data flow analysis we maintain the + * data at the block end. This is not easy to do with JVML because + * a block has many exceptional ends. So we maintain the data for + * the statement start. *) + + val pretty: unit -> t -> Pretty.doc (** Pretty-print the state *) + + val stmtStartData: t Inthash.t + (** For each block id, the data at the start. This data structure must be + * initialized with the initial data for each block *) + + val combineStmtStartData: Cil.stmt -> old:t -> t -> t option + (** When the analysis reaches the start of a block, combine the old data + * with the one we have just computed. Return None if the combination is + * the same as the old data, otherwise return the combination. In the + * latter case, the predecessors of the statement are put on the working + * list. *) + + + val combineSuccessors: t -> t -> t + (** Take the data from two successors and combine it *) + + + val doStmt: Cil.stmt -> t action + (** The (backwards) transfer function for a branch. The {!Cil.currentLoc} is + * set before calling this. If it returns None, then we have some default + * handling. Otherwise, the returned data is the data before the branch + * (not considering the exception handlers) *) + + val doInstr: Cil.instr -> t -> t action + (** The (backwards) transfer function for an instruction. The + * {!Cil.currentLoc} is set before calling this. If it returns None, then we + * have some default handling. Otherwise, the returned data is the data + * before the branch (not considering the exception handlers) *) + + val filterStmt: Cil.stmt -> Cil.stmt -> bool + (** Whether to put this predecessor block in the worklist. We give the + * predecessor and the block whose predecessor we are (and whose data has + * changed) *) + +end + +module BackwardsDataFlow (T : BackwardsTransfer) : sig + val compute: Cil.stmt list -> unit + (** Fill in the T.stmtStartData, given a number of initial statements to + * start from (the sinks for the backwards data flow). All of the statements + * (not just the initial ones!) must have some entry in T.stmtStartData + * (i.e., the initial data should not be bottom) *) +end diff --git a/cil/src/ext/dataslicing.ml b/cil/src/ext/dataslicing.ml new file mode 100644 index 00000000..35390b40 --- /dev/null +++ b/cil/src/ext/dataslicing.ml @@ -0,0 +1,462 @@ +(* MODIF: Loop constructor replaced by 3 constructors: While, DoWhile, For. *) + +(* + * + * Copyright (c) 2004, + * Jeremy Condit + * George C. Necula + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +open Cil +open Pretty +module E = Errormsg + +let debug = false + +let numRegions : int = 2 + +let newGlobals : global list ref = ref [] + +let curFundec : fundec ref = ref dummyFunDec +let curLocation : location ref = ref locUnknown + +let applyOption (fn : 'a -> 'b) (ao : 'a option) : 'b option = + match ao with + | Some a -> Some (fn a) + | None -> None + +let getRegion (attrs : attributes) : int = + try + match List.hd (filterAttributes "region" attrs) with + | Attr (_, [AInt i]) -> i + | _ -> E.s (bug "bad region attribute") + with Failure _ -> + 1 + +let checkRegion (i : int) (attrs : attributes) : bool = + (getRegion attrs) = i + +let regionField (i : int) : string = + "r" ^ (string_of_int i) + +let regionStruct (i : int) (name : string) : string = + name ^ "_r" ^ (string_of_int i) + +let foldRegions (fn : int -> 'a -> 'a) (base : 'a) : 'a = + let rec helper (i : int) : 'a = + if i <= numRegions then + fn i (helper (i + 1)) + else + base + in + helper 1 + +let rec getTypeName (t : typ) : string = + match t with + | TVoid _ -> "void" + | TInt _ -> "int" + | TFloat _ -> "float" + | TComp (cinfo, _) -> "comp_" ^ cinfo.cname + | TNamed (tinfo, _) -> "td_" ^ tinfo.tname + | TPtr (bt, _) -> "ptr_" ^ (getTypeName bt) + | TArray (bt, _, _) -> "array_" ^ (getTypeName bt) + | TFun _ -> "fn" + | _ -> E.s (unimp "typename") + +let isAllocFunction (fn : exp) : bool = + match fn with + | Lval (Var vinfo, NoOffset) when vinfo.vname = "malloc" -> true + | _ -> false + +let isExternalFunction (fn : exp) : bool = + match fn with + | Lval (Var vinfo, NoOffset) when vinfo.vstorage = Extern -> true + | _ -> false + +let types : (int * typsig, typ) Hashtbl.t = Hashtbl.create 113 +let typeInfos : (int * string, typeinfo) Hashtbl.t = Hashtbl.create 113 +let compInfos : (int * int, compinfo) Hashtbl.t = Hashtbl.create 113 +let varTypes : (typsig, typ) Hashtbl.t = Hashtbl.create 113 +let varCompInfos : (typsig, compinfo) Hashtbl.t = Hashtbl.create 113 + +let rec sliceCompInfo (i : int) (cinfo : compinfo) : compinfo = + try + Hashtbl.find compInfos (i, cinfo.ckey) + with Not_found -> + mkCompInfo cinfo.cstruct (regionStruct i cinfo.cname) + (fun cinfo' -> + Hashtbl.add compInfos (i, cinfo.ckey) cinfo'; + List.fold_right + (fun finfo rest -> + let t = sliceType i finfo.ftype in + if not (isVoidType t) then + (finfo.fname, t, finfo.fbitfield, + finfo.fattr, finfo.floc) :: rest + else + rest) + cinfo.cfields []) + cinfo.cattr + +and sliceTypeInfo (i : int) (tinfo : typeinfo) : typeinfo = + try + Hashtbl.find typeInfos (i, tinfo.tname) + with Not_found -> + let result = + { tinfo with tname = regionStruct i tinfo.tname; + ttype = sliceType i tinfo.ttype; } + in + Hashtbl.add typeInfos (i, tinfo.tname) result; + result + +and sliceType (i : int) (t : typ) : typ = + let ts = typeSig t in + try + Hashtbl.find types (i, ts) + with Not_found -> + let result = + match t with + | TVoid _ -> t + | TInt (_, attrs) -> if checkRegion i attrs then t else TVoid [] + | TFloat (_, attrs) -> if checkRegion i attrs then t else TVoid [] + | TComp (cinfo, attrs) -> TComp (sliceCompInfo i cinfo, attrs) + | TNamed (tinfo, attrs) -> TNamed (sliceTypeInfo i tinfo, attrs) + | TPtr (TVoid _, _) -> t (* Avoid discarding void*. *) + | TPtr (bt, attrs) -> + let bt' = sliceType i bt in + if not (isVoidType bt') then TPtr (bt', attrs) else TVoid [] + | TArray (bt, eo, attrs) -> + TArray (sliceType i bt, applyOption (sliceExp 1) eo, attrs) + | TFun (ret, args, va, attrs) -> + if checkRegion i attrs then + TFun (sliceTypeAll ret, + applyOption + (List.map (fun (aname, atype, aattrs) -> + (aname, sliceTypeAll atype, aattrs))) + args, + va, attrs) + else + TVoid [] + | TBuiltin_va_list _ -> t + | _ -> E.s (unimp "type %a" d_type t) + in + Hashtbl.add types (i, ts) result; + result + +and sliceTypeAll (t : typ) : typ = + begin + match t with + | TComp (cinfo, _) when hasAttribute "var_type_sliced" cinfo.cattr -> + E.s (bug "tried to slice twice") + | _ -> () + end; + let ts = typeSig t in + try + Hashtbl.find varTypes ts + with Not_found -> + let cinfo = + let name = ("var_" ^ (getTypeName t)) in + if debug then ignore (E.log "creating %s\n" name); + try + Hashtbl.find varCompInfos ts + with Not_found -> + mkCompInfo true name + (fun cinfo -> + Hashtbl.add varCompInfos ts cinfo; + foldRegions + (fun i rest -> + let t' = sliceType i t in + if not (isVoidType t') then + (regionField i, t', None, [], !curLocation) :: rest + else + rest) + []) + [Attr ("var_type_sliced", [])] + in + let t' = + if List.length cinfo.cfields > 1 then + begin + newGlobals := GCompTag (cinfo, !curLocation) :: !newGlobals; + TComp (cinfo, []) + end + else + t + in + Hashtbl.add varTypes ts t'; + t' + +and sliceLval (i : int) (lv : lval) : lval = + if debug then ignore (E.log "lval %a\n" d_lval lv); + let lh, offset = lv in + match lh with + | Var vinfo -> + let t = sliceTypeAll vinfo.vtype in + let offset' = + match t with + | TComp (cinfo, _) when hasAttribute "var_type_sliced" cinfo.cattr -> + Field (getCompField cinfo (regionField i), offset) + | _ -> offset + in + Var vinfo, offset' + | Mem e -> + Mem (sliceExp i e), offset + +and sliceExp (i : int) (e : exp) : exp = + if debug then ignore (E.log "exp %a\n" d_exp e); + match e with + | Const c -> Const c + | Lval lv -> Lval (sliceLval i lv) + | UnOp (op, e1, t) -> UnOp (op, sliceExp i e1, sliceType i t) + | BinOp (op, e1, e2, t) -> BinOp (op, sliceExp i e1, sliceExp i e2, + sliceType i t) + | CastE (t, e) -> sliceCast i t e + | AddrOf lv -> AddrOf (sliceLval i lv) + | StartOf lv -> StartOf (sliceLval i lv) + | SizeOf t -> SizeOf (sliceTypeAll t) + | _ -> E.s (unimp "exp %a" d_exp e) + +and sliceCast (i : int) (t : typ) (e : exp) : exp = + let te = typeOf e in + match t, te with + | TInt (k1, _), TInt (k2, attrs2) when k1 = k2 -> + (* Note: We strip off integer cast operations. *) + sliceExp (getRegion attrs2) e + | TInt (k1, _), TPtr _ -> + (* Note: We strip off integer cast operations. *) + sliceExp i e + | TPtr _, _ when isZero e -> + CastE (sliceType i t, sliceExp i e) + | TPtr (bt1, _), TPtr (bt2, _) when (typeSig bt1) = (typeSig bt2) -> + CastE (sliceType i t, sliceExp i e) + | _ -> + E.s (unimp "sketchy cast (%a) -> (%a)\n" d_type te d_type t) + +and sliceExpAll (e : exp) (l : location) : instr list * exp = + let t = typeOf e in + let t' = sliceTypeAll t in + match t' with + | TComp (cinfo, _) when hasAttribute "var_type_sliced" cinfo.cattr -> + let vinfo = makeTempVar !curFundec t in + let instrs = + foldRegions + (fun i rest -> + try + let finfo = getCompField cinfo (regionField i) in + if not (isVoidType finfo.ftype) then + Set ((Var vinfo, Field (finfo, NoOffset)), + sliceExp i e, l) :: rest + else + rest + with Not_found -> + rest) + [] + in + instrs, Lval (var vinfo) + | _ -> [], sliceExp 1 e + +let sliceVar (vinfo : varinfo) : unit = + if hasAttribute "var_sliced" vinfo.vattr then + E.s (bug "tried to slice a var twice"); + let t = sliceTypeAll vinfo.vtype in + if debug then ignore (E.log "setting %s type to %a\n" vinfo.vname d_type t); + vinfo.vattr <- addAttribute (Attr ("var_sliced", [])) vinfo.vattr; + vinfo.vtype <- t + +let sliceInstr (inst : instr) : instr list = + match inst with + | Set (lv, e, loc) -> + if debug then ignore (E.log "set %a %a\n" d_lval lv d_exp e); + let t = typeOfLval lv in + foldRegions + (fun i rest -> + if not (isVoidType (sliceType i t)) then + Set (sliceLval i lv, sliceExp i e, loc) :: rest + else + rest) + [] + | Call (ret, fn, args, l) when isAllocFunction fn -> + let lv = + match ret with + | Some lv -> lv + | None -> E.s (bug "malloc call has no return lval") + in + let t = typeOfLval lv in + foldRegions + (fun i rest -> + if not (isVoidType (sliceType i t)) then + Call (Some (sliceLval i lv), sliceExp 1 fn, + List.map (sliceExp i) args, l) :: rest + else + rest) + [] + | Call (ret, fn, args, l) when isExternalFunction fn -> + [Call (applyOption (sliceLval 1) ret, sliceExp 1 fn, + List.map (sliceExp 1) args, l)] + | Call (ret, fn, args, l) -> + let ret', set = + match ret with + | Some lv -> + let vinfo = makeTempVar !curFundec (typeOfLval lv) in + Some (var vinfo), [Set (lv, Lval (var vinfo), l)] + | None -> + None, [] + in + let instrs, args' = + List.fold_right + (fun arg (restInstrs, restArgs) -> + let instrs, arg' = sliceExpAll arg l in + instrs @ restInstrs, (arg' :: restArgs)) + args ([], []) + in + instrs @ (Call (ret', sliceExp 1 fn, args', l) :: set) + | _ -> E.s (unimp "inst %a" d_instr inst) + +let sliceReturnExp (eo : exp option) (l : location) : stmtkind = + match eo with + | Some e -> + begin + match sliceExpAll e l with + | [], e' -> Return (Some e', l) + | instrs, e' -> Block (mkBlock [mkStmt (Instr instrs); + mkStmt (Return (Some e', l))]) + end + | None -> Return (None, l) + +let rec sliceStmtKind (sk : stmtkind) : stmtkind = + match sk with + | Instr instrs -> Instr (List.flatten (List.map sliceInstr instrs)) + | Block b -> Block (sliceBlock b) + | If (e, b1, b2, l) -> If (sliceExp 1 e, sliceBlock b1, sliceBlock b2, l) + | Break l -> Break l + | Continue l -> Continue l + | Return (eo, l) -> sliceReturnExp eo l + | Switch (e, b, sl, l) -> Switch (sliceExp 1 e, sliceBlock b, + List.map sliceStmt sl, l) +(* + | Loop (b, l, so1, so2) -> Loop (sliceBlock b, l, + applyOption sliceStmt so1, + applyOption sliceStmt so2) +*) + | While (e, b, l) -> While (sliceExp 1 e, sliceBlock b, l) + | DoWhile (e, b, l) -> DoWhile (sliceExp 1 e, sliceBlock b, l) + | For (bInit, e, bIter, b, l) -> + For (sliceBlock bInit, sliceExp 1e, sliceBlock bIter, sliceBlock b, l) + | Goto _ -> sk + | _ -> E.s (unimp "statement") + +and sliceStmt (s : stmt) : stmt = + (* Note: We update statements destructively so that goto/switch work. *) + s.skind <- sliceStmtKind s.skind; + s + +and sliceBlock (b : block) : block = + ignore (List.map sliceStmt b.bstmts); + b + +let sliceFundec (fd : fundec) (l : location) : unit = + curFundec := fd; + curLocation := l; + ignore (sliceBlock fd.sbody); + curFundec := dummyFunDec; + curLocation := locUnknown + +let sliceGlobal (g : global) : unit = + match g with + | GType (tinfo, l) -> + newGlobals := + foldRegions (fun i rest -> GType (sliceTypeInfo i tinfo, l) :: rest) + !newGlobals + | GCompTag (cinfo, l) -> + newGlobals := + foldRegions (fun i rest -> GCompTag (sliceCompInfo i cinfo, l) :: rest) + !newGlobals + | GCompTagDecl (cinfo, l) -> + newGlobals := + foldRegions (fun i rest -> GCompTagDecl (sliceCompInfo i cinfo, l) :: + rest) + !newGlobals + | GFun (fd, l) -> + sliceFundec fd l; + newGlobals := GFun (fd, l) :: !newGlobals + | GVarDecl _ + | GVar _ -> + (* Defer processing of vars until end. *) + newGlobals := g :: !newGlobals + | _ -> + E.s (unimp "global %a\n" d_global g) + +let sliceGlobalVars (g : global) : unit = + match g with + | GFun (fd, l) -> + curFundec := fd; + curLocation := l; + List.iter sliceVar fd.slocals; + List.iter sliceVar fd.sformals; + setFunctionType fd (sliceType 1 fd.svar.vtype); + curFundec := dummyFunDec; + curLocation := locUnknown; + | GVar (vinfo, _, l) -> + curLocation := l; + sliceVar vinfo; + curLocation := locUnknown + | _ -> () + +class dropAttrsVisitor = object + inherit nopCilVisitor + + method vvrbl (vinfo : varinfo) = + vinfo.vattr <- dropAttribute "var_sliced" vinfo.vattr; + DoChildren + + method vglob (g : global) = + begin + match g with + | GCompTag (cinfo, _) -> + cinfo.cattr <- dropAttribute "var_type_sliced" cinfo.cattr; + | _ -> () + end; + DoChildren +end + +let sliceFile (f : file) : unit = + newGlobals := []; + List.iter sliceGlobal f.globals; + List.iter sliceGlobalVars f.globals; + f.globals <- List.rev !newGlobals; + visitCilFile (new dropAttrsVisitor) f + +let feature : featureDescr = + { fd_name = "DataSlicing"; + fd_enabled = ref false; + fd_description = "data slicing"; + fd_extraopt = []; + fd_doit = sliceFile; + fd_post_check = true; + } diff --git a/cil/src/ext/dataslicing.mli b/cil/src/ext/dataslicing.mli new file mode 100644 index 00000000..00606484 --- /dev/null +++ b/cil/src/ext/dataslicing.mli @@ -0,0 +1,41 @@ +(* + * + * Copyright (c) 2001-2002, + * Jeremy Condit + * George C. Necula + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* This feature implements data slicing. The user annotates base types + * and function types with region(i) annotations, and this transformation + * will separate the fields into parallel data structures accordingly. *) + +val feature: Cil.featureDescr diff --git a/cil/src/ext/deadcodeelim.ml b/cil/src/ext/deadcodeelim.ml new file mode 100644 index 00000000..e560e01d --- /dev/null +++ b/cil/src/ext/deadcodeelim.ml @@ -0,0 +1,173 @@ +(* Eliminate assignment instructions whose results are not + used *) + +open Cil +open Pretty + +module E = Errormsg +module RD = Reachingdefs +module UD = Usedef +module IH = Inthash +module S = Stats + +module IS = Set.Make( + struct + type t = int + let compare = compare + end) + +let debug = RD.debug + + +let usedDefsSet = ref IS.empty +(* put used def ids into usedDefsSet *) +(* assumes reaching definitions have already been computed *) +class usedDefsCollectorClass = object(self) + inherit RD.rdVisitorClass + + method add_defids iosh e u = + UD.VS.iter (fun vi -> + if IH.mem iosh vi.vid then + let ios = IH.find iosh vi.vid in + if !debug then ignore(E.log "DCE: IOS size for vname=%s at stmt=%d: %d\n" + vi.vname sid (RD.IOS.cardinal ios)); + RD.IOS.iter (function + Some(i) -> + if !debug then ignore(E.log "DCE: def %d used: %a\n" i d_plainexp e); + usedDefsSet := IS.add i (!usedDefsSet) + | None -> ()) ios + else if !debug then ignore(E.log "DCE: vid %d:%s not in stm:%d iosh at %a\n" + vi.vid vi.vname sid d_plainexp e)) u + + method vexpr e = + let u = UD.computeUseExp e in + match self#get_cur_iosh() with + Some(iosh) -> self#add_defids iosh e u; DoChildren + | None -> + if !debug then ignore(E.log "DCE: use but no rd data: %a\n" d_plainexp e); + DoChildren + + method vinst i = + let handle_inst iosh i = match i with + | Asm(_,_,slvl,_,_,_) -> List.iter (fun (s,lv) -> + match lv with (Var v, off) -> + if s.[0] = '+' then + self#add_defids iosh (Lval(Var v, off)) (UD.VS.singleton v) + | _ -> ()) slvl + | _ -> () + in + begin try + cur_rd_dat <- Some(List.hd rd_dat_lst); + rd_dat_lst <- List.tl rd_dat_lst + with Failure "hd" -> () + end; + match self#get_cur_iosh() with + Some iosh -> handle_inst iosh i; DoChildren + | None -> DoChildren + +end + +(*************************************************** + * Also need to find reads from volatiles + * uses two functions I've put in ciltools which + * are basically what Zach wrote, except one is for + * types and one is for vars. Another difference is + * they filter out pointers to volatiles. This + * handles DMA + ***************************************************) +class hasVolatile flag = object (self) + inherit nopCilVisitor + method vlval l = + let tp = typeOfLval l in + if (Ciltools.is_volatile_tp tp) then flag := true; + DoChildren + method vexpr e = + DoChildren +end + +let exp_has_volatile e = + let flag = ref false in + ignore (visitCilExpr (new hasVolatile flag) e); + !flag + (***************************************************) + +let removedCount = ref 0 +(* Filter out instructions whose definition ids are not + in usedDefsSet *) +class uselessInstrElim : cilVisitor = object(self) + inherit nopCilVisitor + + method vstmt stm = + + let test (i,(_,s,iosh)) = + match i with + Call _ -> true + | Set((Var vi,NoOffset),e,_) -> + if vi.vglob || (Ciltools.is_volatile_vi vi) || (exp_has_volatile e) then true else + let _, defd = UD.computeUseDefInstr i in + let rec loop n = + if n < 0 then false else + if IS.mem (n+s) (!usedDefsSet) + then true + else loop (n-1) + in + if loop (UD.VS.cardinal defd - 1) + then true + else (incr removedCount; false) + | _ -> true + in + + let filter il stmdat = + let rd_dat_lst = RD.instrRDs il stm.sid stmdat false in + let ildatlst = List.combine il rd_dat_lst in + let ildatlst' = List.filter test ildatlst in + let (newil,_) = List.split ildatlst' in + newil + in + + match RD.getRDs stm.sid with + None -> DoChildren + | Some(_,s,iosh) -> + match stm.skind with + Instr il -> + stm.skind <- Instr(filter il ((),s,iosh)); + SkipChildren + | _ -> DoChildren + +end + +(* until fixed point is reached *) +let elim_dead_code_fp (fd : fundec) : fundec = + (* fundec -> fundec *) + let rec loop fd = + usedDefsSet := IS.empty; + removedCount := 0; + S.time "reaching definitions" RD.computeRDs fd; + ignore(visitCilFunction (new usedDefsCollectorClass :> cilVisitor) fd); + let fd' = visitCilFunction (new uselessInstrElim) fd in + if !removedCount = 0 then fd' else loop fd' + in + loop fd + +(* just once *) +let elim_dead_code (fd : fundec) : fundec = + (* fundec -> fundec *) + usedDefsSet := IS.empty; + removedCount := 0; + S.time "reaching definitions" RD.computeRDs fd; + ignore(visitCilFunction (new usedDefsCollectorClass :> cilVisitor) fd); + let fd' = visitCilFunction (new uselessInstrElim) fd in + fd' + +class deadCodeElimClass : cilVisitor = object(self) + inherit nopCilVisitor + + method vfunc fd = + let fd' = elim_dead_code fd in + ChangeTo(fd') + +end + +let dce f = + if !debug then ignore(E.log "DCE: starting dead code elimination\n"); + visitCilFile (new deadCodeElimClass) f diff --git a/cil/src/ext/dominators.ml b/cil/src/ext/dominators.ml new file mode 100755 index 00000000..d838d23f --- /dev/null +++ b/cil/src/ext/dominators.ml @@ -0,0 +1,241 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(** Compute dominator information for the statements in a function *) +open Cil +open Pretty +module E = Errormsg +module H = Hashtbl +module U = Util +module IH = Inthash + +module DF = Dataflow + +let debug = false + +(* For each statement we maintain a set of statements that dominate it *) +module BS = Set.Make(struct + type t = Cil.stmt + let compare v1 v2 = Pervasives.compare v1.sid v2.sid + end) + + + + +(** Customization module for dominators *) +module DT = struct + let name = "dom" + + let debug = ref debug + + type t = BS.t + + (** For each statement in a function we keep the set of dominator blocks. + * Indexed by statement id *) + let stmtStartData: t IH.t = IH.create 17 + + let copy (d: t) = d + + let pretty () (d: t) = + dprintf "{%a}" + (docList (fun s -> dprintf "%d" s.sid)) + (BS.elements d) + + let computeFirstPredecessor (s: stmt) (d: BS.t) : BS.t = + (* Make sure we add this block to the set *) + BS.add s d + + let combinePredecessors (s: stmt) ~(old: BS.t) (d: BS.t) : BS.t option = + (* First, add this block to the data from the predecessor *) + let d' = BS.add s d in + if BS.subset old d' then + None + else + Some (BS.inter old d') + + let doInstr (i: instr) (d: t) = DF.Default + + let doStmt (s: stmt) (d: t) = DF.SDefault + + let doGuard condition _ = DF.GDefault + + + let filterStmt _ = true +end + + + +module Dom = DF.ForwardsDataFlow(DT) + +let getStmtDominators (data: BS.t IH.t) (s: stmt) : BS.t = + try IH.find data s.sid + with Not_found -> BS.empty (* Not reachable *) + + +let getIdom (idomInfo: stmt option IH.t) (s: stmt) = + try IH.find idomInfo s.sid + with Not_found -> + E.s (E.bug "Immediate dominator information not set for statement %d" + s.sid) + +(** Check whether one block dominates another. This assumes that the "idom" + * field has been computed. *) +let rec dominates (idomInfo: stmt option IH.t) (s1: stmt) (s2: stmt) = + s1 == s2 || + (let s2idom = getIdom idomInfo s2 in + match s2idom with + None -> false + | Some s2idom -> dominates idomInfo s1 s2idom) + + + + +let computeIDom (f: fundec) : stmt option IH.t = + (* We must prepare the CFG info first *) + prepareCFG f; + computeCFGInfo f false; + + IH.clear DT.stmtStartData; + let idomData: stmt option IH.t = IH.create 13 in + + let _ = + match f.sbody.bstmts with + [] -> () (* function has no body *) + | start :: _ -> begin + (* We start with only the start block *) + IH.add DT.stmtStartData start.sid (BS.singleton start); + + Dom.compute [start]; + + (* Dump the dominators information *) + if debug then + List.iter + (fun s -> + let sdoms = getStmtDominators DT.stmtStartData s in + if not (BS.mem s sdoms) then begin + (* It can be that the block is not reachable *) + if s.preds <> [] then + E.s (E.bug "Statement %d is not in its list of dominators" + s.sid); + end; + ignore (E.log "Dominators for %d: %a\n" s.sid + DT.pretty (BS.remove s sdoms))) + f.sallstmts; + + (* Now fill the immediate dominators for all nodes *) + let rec fillOneIdom (s: stmt) = + try + ignore (IH.find idomData s.sid) + (* Already set *) + with Not_found -> begin + (* Get the dominators *) + let sdoms = getStmtDominators DT.stmtStartData s in + (* Fill the idom for the dominators first *) + let idom = + BS.fold + (fun d (sofar: stmt option) -> + if d.sid = s.sid then + sofar (* Ignore the block itself *) + else begin + (* fill the idom information recursively *) + fillOneIdom d; + match sofar with + None -> Some d + | Some sofar' -> + (* See if d is dominated by sofar. We know that the + * idom information has been computed for both sofar + * and for d*) + if dominates idomData sofar' d then + Some d + else + sofar + end) + sdoms + None + in + IH.replace idomData s.sid idom + end + in + (* Scan all blocks and compute the idom *) + List.iter fillOneIdom f.sallstmts + end + in + idomData + + + +(** Compute the start of the natural loops. For each start, keep a list of + * origin of a back edge. The loop consists of the loop start and all + * predecessors of the origins of back edges, up to and including the loop + * start *) +let findNaturalLoops (f: fundec) + (idomData: stmt option IH.t) : (stmt * stmt list) list = + let loops = + List.fold_left + (fun acc b -> + (* Iterate over all successors, and see if they are among the + * dominators for this block *) + List.fold_left + (fun acc s -> + if dominates idomData s b then + (* s is the start of a natural loop *) + let rec addNaturalLoop = function + [] -> [(s, [b])] + | (s', backs) :: rest when s'.sid = s.sid -> + (s', b :: backs) :: rest + | l :: rest -> l :: addNaturalLoop rest + in + addNaturalLoop acc + else + acc) + acc + b.succs) + [] + f.sallstmts + in + + if debug then + ignore (E.log "Natural loops:\n%a\n" + (docList ~sep:line + (fun (s, backs) -> + dprintf " Start: %d, backs:%a" + s.sid + (docList (fun b -> num b.sid)) + backs)) + loops); + + loops diff --git a/cil/src/ext/dominators.mli b/cil/src/ext/dominators.mli new file mode 100755 index 00000000..0abf82e9 --- /dev/null +++ b/cil/src/ext/dominators.mli @@ -0,0 +1,29 @@ + + +(** Compute dominators using data flow analysis *) +(** Author: George Necula + 5/28/2004 + **) + +(** Invoke on a code after filling in the CFG info and it computes the + * immediate dominator information. We map each statement to its immediate + * dominator (None for the start statement, and for the unreachable + * statements). *) +val computeIDom: Cil.fundec -> Cil.stmt option Inthash.t + + +(** This is like Inthash.find but gives an error if the information is + * Not_found *) +val getIdom: Cil.stmt option Inthash.t -> Cil.stmt -> Cil.stmt option + +(** Check whether one statement dominates another. *) +val dominates: Cil.stmt option Inthash.t -> Cil.stmt -> Cil.stmt -> bool + + +(** Compute the start of the natural loops. This assumes that the "idom" + * field has been computed. For each start, keep a list of origin of a back + * edge. The loop consists of the loop start and all predecessors of the + * origins of back edges, up to and including the loop start *) +val findNaturalLoops: Cil.fundec -> + Cil.stmt option Inthash.t -> + (Cil.stmt * Cil.stmt list) list diff --git a/cil/src/ext/epicenter.ml b/cil/src/ext/epicenter.ml new file mode 100644 index 00000000..a8045e85 --- /dev/null +++ b/cil/src/ext/epicenter.ml @@ -0,0 +1,114 @@ +(* epicenter.ml *) +(* code for epicenter.mli *) + +(* module maintainer: scott *) +(* see copyright at end of this file *) + +open Callgraph +open Cil +open Trace +open Pretty +module H = Hashtbl +module IH = Inthash + +let sliceFile (f:file) (epicenter:string) (maxHops:int) : unit = + (* compute the static call graph *) + let graph:callgraph = (computeGraph f) in + + (* will accumulate here the set of names of functions already seen *) + let seen: (string, unit) H.t = (H.create 117) in + + (* when removing "unused" symbols, keep all seen functions *) + let isRoot : global -> bool = function + | GFun ({svar = {vname = vname}}, _) -> + H.mem seen vname + | _ -> + false + in + + (* recursive depth-first search through the call graph, finding + * all nodes within 'hops' hops of 'node' and marking them to + * to be retained *) + let rec dfs (node:callnode) (hops:int) : unit = + (* only recurse if we haven't already marked this node *) + if not (H.mem seen (nodeName node.cnInfo)) then + begin + (* add this node *) + H.add seen (nodeName node.cnInfo) (); + trace "epicenter" (dprintf "will keep %s\n" (nodeName node.cnInfo)); + + (* if we cannot do any more hops, stop *) + if (hops > 0) then + + (* recurse on all the node's callers and callees *) + let recurse _ (adjacent:callnode) : unit = + (dfs adjacent (hops - 1)) + in + IH.iter recurse node.cnCallees; + IH.iter recurse node.cnCallers + end + in + dfs (Hashtbl.find graph epicenter) maxHops; + + (* finally, throw away anything we haven't decided to keep *) + Cilutil.sliceGlobal := true; + Rmtmps.removeUnusedTemps ~isRoot:isRoot f + +let doEpicenter = ref false +let epicenterName = ref "" +let epicenterHops = ref 0 + +let feature : featureDescr = + { fd_name = "epicenter"; + fd_enabled = doEpicenter; + fd_description = "remove all functions except those within some number " ^ + "of hops (in the call graph) from a given function"; + fd_extraopt = + [ + ("--epicenter-name", + Arg.String (fun s -> epicenterName := s), + ": do an epicenter slice starting from function "); + ("--epicenter-hops", Arg.Int (fun n -> epicenterHops := n), + ": specify max # of hops for epicenter slice"); + ]; + + fd_doit = + (fun f -> + sliceFile f !epicenterName !epicenterHops); + + fd_post_check = true; + } + + +(* + * + * Copyright (c) 2001-2002 by + * George C. Necula necula@cs.berkeley.edu + * Scott McPeak smcpeak@cs.berkeley.edu + * Wes Weimer weimer@cs.berkeley.edu + * Ben Liblit liblit@cs.berkeley.edu + * + * All rights reserved. Permission to use, copy, modify and distribute + * this software for research purposes only is hereby granted, + * provided that the following conditions are met: + * 1. XSRedistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. The name of the authors may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * DISCLAIMER: + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) diff --git a/cil/src/ext/heap.ml b/cil/src/ext/heap.ml new file mode 100644 index 00000000..10f48a04 --- /dev/null +++ b/cil/src/ext/heap.ml @@ -0,0 +1,112 @@ +(* See copyright notice at the end of the file *) + +(* The type of a heap (priority queue): keys are integers, data values + * are whatever you like *) +type ('a) t = { + elements : (int * ('a option)) array ; + mutable size : int ; (* current number of elements *) + capacity : int ; (* max number of elements *) +} + +let create size = { + elements = Array.create (size+1) (max_int,None) ; + size = 0 ; + capacity = size ; +} + +let clear heap = heap.size <- 0 + +let is_full heap = (heap.size = heap.capacity) + +let is_empty heap = (heap.size = 0) + +let insert heap prio elt = begin + if is_full heap then begin + raise (Invalid_argument "Heap.insert") + end ; + heap.size <- heap.size + 1 ; + let i = ref heap.size in + while ( fst heap.elements.(!i / 2) < prio ) do + heap.elements.(!i) <- heap.elements.(!i / 2) ; + i := (!i / 2) + done ; + heap.elements.(!i) <- (prio,Some(elt)) + end + +let examine_max heap = + if is_empty heap then begin + raise (Invalid_argument "Heap.examine_max") + end ; + match heap.elements.(1) with + p,Some(elt) -> p,elt + | p,None -> failwith "Heap.examine_max" + +let extract_max heap = begin + if is_empty heap then begin + raise (Invalid_argument "Heap.extract_max") + end ; + let max = heap.elements.(1) in + let last = heap.elements.(heap.size) in + heap.size <- heap.size - 1 ; + let i = ref 1 in + let break = ref false in + while (!i * 2 <= heap.size) && not !break do + let child = ref (!i * 2) in + + (* find smaller child *) + if (!child <> heap.size && + fst heap.elements.(!child+1) > fst heap.elements.(!child)) then begin + incr child + end ; + + (* percolate one level *) + if (fst last < fst heap.elements.(!child)) then begin + heap.elements.(!i) <- heap.elements.(!child) ; + i := !child + end else begin + break := true + end + done ; + heap.elements.(!i) <- last ; + match max with + p,Some(elt) -> p,elt + | p,None -> failwith "Heap.examine_min" + end + + +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) diff --git a/cil/src/ext/heapify.ml b/cil/src/ext/heapify.ml new file mode 100644 index 00000000..a583181e --- /dev/null +++ b/cil/src/ext/heapify.ml @@ -0,0 +1,250 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* + * Heapify: a program transform that looks over functions, finds those + * that have local (stack) variables that contain arrays, puts all such + * local variables into a single heap allocated structure, changes all + * accesses to such variables into accesses to fields of that structure + * and frees the structure on return. + *) +open Cil + +(* utilities that should be in Cil.ml *) +(* sfg: this function appears to never be called *) +let mkSimpleField ci fn ft fl = + { fcomp = ci ; fname = fn ; ftype = ft ; fbitfield = None ; fattr = []; + floc = fl } + + +(* actual Heapify begins *) + +let heapifyNonArrays = ref false + +(* Does this local var contain an array? *) +let rec containsArray (t:typ) : bool = (* does this type contain an array? *) + match unrollType t with + TArray _ -> true + | TComp(ci, _) -> (* look at the types of the fields *) + List.exists (fun fi -> containsArray fi.ftype) ci.cfields + | _ -> + (* Ignore other types, including TInt and TPtr. We don't care whether + there are arrays in the base types of pointers; only about whether + this local variable itself needs to be moved to the heap. *) + false + + +class heapifyModifyVisitor big_struct big_struct_fields varlist free + (currentFunction: fundec) = object(self) + inherit nopCilVisitor (* visit lvalues and statements *) + method vlval l = match l with (* should we change this one? *) + Var(vi),vi_offset when List.mem_assoc vi varlist -> (* check list *) + let i = List.assoc vi varlist in (* find field offset *) + let big_struct_field = List.nth big_struct_fields i in + let new_lval = Mem(Lval(big_struct, NoOffset)), + Field(big_struct_field,vi_offset) in (* rewrite the lvalue *) + ChangeDoChildrenPost(new_lval, (fun l -> l)) + | _ -> DoChildren (* ignore other lvalues *) + method vstmt s = match s.skind with (* also rewrite the return *) + Return(None,loc) -> + let free_instr = Call(None,free,[Lval(big_struct,NoOffset)],loc) in + self#queueInstr [free_instr]; (* insert free_instr before the return *) + DoChildren + | Return(Some exp ,loc) -> + (* exp may depend on big_struct, so evaluate it before calling free. + * This becomes: tmp = exp; free(big_struct); return tmp; *) + let exp_new = visitCilExpr (self :> cilVisitor) exp in + let ret_tmp = makeTempVar currentFunction (typeOf exp_new) in + let eval_ret_instr = Set(var ret_tmp, exp_new, loc) in + let free_instr = Call(None,free,[Lval(big_struct,NoOffset)],loc) in + (* insert the instructions before the return *) + self#queueInstr [eval_ret_instr; free_instr]; + s.skind <- (Return(Some(Lval(var ret_tmp)), loc)); + DoChildren + | _ -> DoChildren (* ignore other statements *) +end + +class heapifyAnalyzeVisitor f alloc free = object + inherit nopCilVisitor (* only look at function bodies *) + method vglob gl = match gl with + GFun(fundec,funloc) -> + let counter = ref 0 in (* the number of local vars containing arrays *) + let varlist = ref [] in (* a list of (var,id) pairs, in reverse order *) + List.iter (fun vi -> + (* find all local vars with arrays. If the user requests it, + we also look for non-array vars whose address is taken. *) + if (containsArray vi.vtype) || (vi.vaddrof && !heapifyNonArrays) + then begin + varlist := (vi,!counter) :: !varlist ; (* add it to the list *) + incr counter (* put the next such var in the next slot *) + end + ) fundec.slocals ; + if (!varlist <> []) then begin (* some local vars contain arrays *) + let name = (fundec.svar.vname ^ "_heapify") in + let ci = mkCompInfo true name (* make a big structure *) + (fun _ -> List.rev_map (* reverse the list to fix the order *) + (* each local var becomes a field *) + (fun (vi,i) -> vi.vname,vi.vtype,None,[],vi.vdecl) !varlist) [] in + let vi = makeLocalVar fundec name (TPtr(TComp(ci,[]),[])) in + let modify = new heapifyModifyVisitor (Var(vi)) ci.cfields + !varlist free fundec in (* rewrite accesses to local vars *) + fundec.sbody <- visitCilBlock modify fundec.sbody ; + let alloc_stmt = mkStmt (* allocate the big struct on the heap *) + (Instr [Call(Some(Var(vi),NoOffset), alloc, + [SizeOf(TComp(ci,[]))],funloc)]) in + fundec.sbody.bstmts <- alloc_stmt :: fundec.sbody.bstmts ; + fundec.slocals <- List.filter (fun vi -> (* remove local vars *) + not (List.mem_assoc vi !varlist)) fundec.slocals ; + let typedec = (GCompTag(ci,funloc)) in (* declare the big struct *) + ChangeTo([typedec ; GFun(fundec,funloc)]) (* done! *) + end else + DoChildren (* ignore everything else *) + | _ -> DoChildren +end + +let heapify (f : file) (alloc : exp) (free : exp) = + visitCilFile (new heapifyAnalyzeVisitor f alloc free) f; + f + +(* heapify code ends here *) + +let default_heapify (f : file) = + let alloc_fun = emptyFunction "malloc" in + let free_fun = emptyFunction "free" in + let alloc_exp = (Lval((Var(alloc_fun.svar)),NoOffset)) in + let free_exp = (Lval((Var(free_fun.svar)),NoOffset)) in + ignore (heapify f alloc_exp free_exp) + +(* StackGuard clone *) + +class sgModifyVisitor restore_ra_stmt = object + inherit nopCilVisitor + method vstmt s = match s.skind with (* also rewrite the return *) + Return(_,loc) -> let new_block = mkBlock [restore_ra_stmt ; s] in + ChangeTo(mkStmt (Block(new_block))) + | _ -> DoChildren (* ignore other statements *) +end + +class sgAnalyzeVisitor f push pop get_ra set_ra = object + inherit nopCilVisitor + method vfunc fundec = + let needs_guarding = List.fold_left + (fun acc vi -> acc || containsArray vi.vtype) + false fundec.slocals in + if needs_guarding then begin + let ra_tmp = makeLocalVar fundec "return_address" voidPtrType in + let ra_exp = Lval(Var(ra_tmp),NoOffset) in + let save_ra_stmt = mkStmt (* save the current return address *) + (Instr [Call(Some(Var(ra_tmp),NoOffset), get_ra, [], locUnknown) ; + Call(None, push, [ra_exp], locUnknown)]) in + let restore_ra_stmt = mkStmt (* restore the old return address *) + (Instr [Call(Some(Var(ra_tmp),NoOffset), pop, [], locUnknown) ; + Call(None, set_ra, [ra_exp], locUnknown)]) in + let modify = new sgModifyVisitor restore_ra_stmt in + fundec.sbody <- visitCilBlock modify fundec.sbody ; + fundec.sbody.bstmts <- save_ra_stmt :: fundec.sbody.bstmts ; + ChangeTo(fundec) (* done! *) + end else DoChildren +end + +let stackguard (f : file) (push : exp) (pop : exp) + (get_ra : exp) (set_ra : exp) = + visitCilFileSameGlobals (new sgAnalyzeVisitor f push pop get_ra set_ra) f; + f + (* stackguard code ends *) + +let default_stackguard (f : file) = + let expify fundec = Lval(Var(fundec.svar),NoOffset) in + let push = expify (emptyFunction "stackguard_push") in + let pop = expify (emptyFunction "stackguard_pop") in + let get_ra = expify (emptyFunction "stackguard_get_ra") in + let set_ra = expify (emptyFunction "stackguard_set_ra") in + let global_decl = +"extern void * stackguard_get_ra(); +extern void stackguard_set_ra(void *new_ra); +/* You must provide an implementation for functions that get and set the + * return address. Such code is unfortunately architecture specific. + */ +struct stackguard_stack { + void * data; + struct stackguard_stack * next; +} * stackguard_stack; + +void stackguard_push(void *ra) { + void * old = stackguard_stack; + stackguard_stack = (struct stackguard_stack *) + malloc(sizeof(stackguard_stack)); + stackguard_stack->data = ra; + stackguard_stack->next = old; +} + +void * stackguard_pop() { + void * ret = stackguard_stack->data; + void * next = stackguard_stack->next; + free(stackguard_stack); + stackguard_stack->next = next; + return ret; +}" in + f.globals <- GText(global_decl) :: f.globals ; + ignore (stackguard f push pop get_ra set_ra ) + + +let feature1 : featureDescr = + { fd_name = "stackGuard"; + fd_enabled = Cilutil.doStackGuard; + fd_description = "instrument function calls and returns to maintain a separate stack for return addresses" ; + fd_extraopt = []; + fd_doit = (function (f: file) -> default_stackguard f); + fd_post_check = true; + } +let feature2 : featureDescr = + { fd_name = "heapify"; + fd_enabled = Cilutil.doHeapify; + fd_description = "move stack-allocated arrays to the heap" ; + fd_extraopt = [ + "--heapifyAll", Arg.Set heapifyNonArrays, + "When using heapify, move all local vars whose address is taken, not just arrays."; + ]; + fd_doit = (function (f: file) -> default_heapify f); + fd_post_check = true; + } + + + + + + diff --git a/cil/src/ext/liveness.ml b/cil/src/ext/liveness.ml new file mode 100644 index 00000000..72cd6073 --- /dev/null +++ b/cil/src/ext/liveness.ml @@ -0,0 +1,190 @@ + +(* Calculate which variables are live at + * each statememnt. + * + * + * + *) + +open Cil +open Pretty + +module DF = Dataflow +module UD = Usedef +module IH = Inthash +module E = Errormsg + +let debug = ref false + +let live_label = ref "" +let live_func = ref "" + +module VS = UD.VS + +let debug_print () vs = (VS.fold + (fun vi d -> + d ++ text "name: " ++ text vi.vname + ++ text " id: " ++ num vi.vid ++ text " ") + vs nil) ++ line + +let min_print () vs = (VS.fold + (fun vi d -> + d ++ text vi.vname + ++ text "(" ++ d_type () vi.vtype ++ text ")" + ++ text ",") + vs nil) ++ line + +let printer = ref debug_print + +module LiveFlow = struct + let name = "Liveness" + let debug = debug + type t = VS.t + + let pretty () vs = + let fn = !printer in + fn () vs + + let stmtStartData = IH.create 32 + + let combineStmtStartData (stm:stmt) ~(old:t) (now:t) = + if not(VS.compare old now = 0) + then Some(VS.union old now) + else None + + let combineSuccessors = VS.union + + let doStmt stmt = + if !debug then ignore(E.log "looking at: %a\n" d_stmt stmt); + match stmt.succs with + [] -> let u,d = UD.computeUseDefStmtKind stmt.skind in + if !debug then ignore(E.log "doStmt: no succs %d\n" stmt.sid); + DF.Done u + | _ -> + let handle_stm vs = match stmt.skind with + Instr _ -> vs + | s -> let u, d = UD.computeUseDefStmtKind s in + VS.union u (VS.diff vs d) + in + DF.Post handle_stm + + let doInstr i vs = + let transform vs' = + let u,d = UD.computeUseDefInstr i in + VS.union u (VS.diff vs' d) + in + DF.Post transform + + let filterStmt stm1 stm2 = true + +end + +module L = DF.BackwardsDataFlow(LiveFlow) + +let sink_stmts = ref [] +class sinkFinderClass = object(self) + inherit nopCilVisitor + + method vstmt s = match s.succs with + [] -> (sink_stmts := s :: (!sink_stmts); + DoChildren) + | _ -> DoChildren + +end + +(* gives list of return statements from a function *) +(* fundec -> stm list *) +let find_sinks fdec = + sink_stmts := []; + ignore(visitCilFunction (new sinkFinderClass) fdec); + !sink_stmts + +(* XXX: This does not compute the best ordering to + * give to the work-list algorithm. + *) +let all_stmts = ref [] +class nullAdderClass = object(self) + inherit nopCilVisitor + + method vstmt s = + all_stmts := s :: (!all_stmts); + IH.add LiveFlow.stmtStartData s.sid VS.empty; + DoChildren + +end + +let null_adder fdec = + ignore(visitCilFunction (new nullAdderClass) fdec); + !all_stmts + +let computeLiveness fdec = + IH.clear LiveFlow.stmtStartData; + UD.onlyNoOffsetsAreDefs := false; + all_stmts := []; + let a = null_adder fdec in + L.compute a + +let print_everything () = + let d = IH.fold (fun i vs d -> + d ++ num i ++ text ": " ++ LiveFlow.pretty () vs) + LiveFlow.stmtStartData nil in + ignore(printf "%t" (fun () -> d)) + +let match_label lbl = match lbl with + Label(str,_,b) -> + if !debug then ignore(E.log "Liveness: label seen: %s\n" str); + (*b && *)(String.compare str (!live_label) = 0) +| _ -> false + +class doFeatureClass = object(self) + inherit nopCilVisitor + + method vfunc fd = + if String.compare fd.svar.vname (!live_func) = 0 then + (Cfg.clearCFGinfo fd; + ignore(Cfg.cfgFun fd); + computeLiveness fd; + if String.compare (!live_label) "" = 0 then + (printer := min_print; + print_everything(); + SkipChildren) + else DoChildren) + else SkipChildren + + method vstmt s = + if List.exists match_label s.labels then try + let vs = IH.find LiveFlow.stmtStartData s.sid in + (printer := min_print; + ignore(printf "%a" LiveFlow.pretty vs); + SkipChildren) + with Not_found -> + if !debug then ignore(E.log "Liveness: stmt: %d not found\n" s.sid); + DoChildren + else + (if List.length s.labels = 0 then + if !debug then ignore(E.log "Liveness: no label at sid=%d\n" s.sid); + DoChildren) + +end + +let do_live_feature (f:file) = + visitCilFile (new doFeatureClass) f + +let feature = + { + fd_name = "Liveness"; + fd_enabled = ref false; + fd_description = "Spit out live variables at a label"; + fd_extraopt = [ + "--live_label", + Arg.String (fun s -> live_label := s), + "Output the variables live at this label"; + "--live_func", + Arg.String (fun s -> live_func := s), + "Output the variables live at each statement in this function."; + "--live_debug", + Arg.Unit (fun n -> debug := true), + "Print lots of debugging info";]; + fd_doit = do_live_feature; + fd_post_check = false + } diff --git a/cil/src/ext/logcalls.ml b/cil/src/ext/logcalls.ml new file mode 100644 index 00000000..0cdbc153 --- /dev/null +++ b/cil/src/ext/logcalls.ml @@ -0,0 +1,268 @@ +(** See copyright notice at the end of this file *) + +(** Add printf before each function call *) + +open Pretty +open Cil +open Trace +module E = Errormsg +module H = Hashtbl + +let i = ref 0 +let name = ref "" + +(* Switches *) +let printFunctionName = ref "printf" + +let addProto = ref false + +let printf: varinfo option ref = ref None +let makePrintfFunction () : varinfo = + match !printf with + Some v -> v + | None -> begin + let v = makeGlobalVar !printFunctionName + (TFun(voidType, Some [("format", charPtrType, [])], + true, [])) in + printf := Some v; + addProto := true; + v + end + +let mkPrint (format: string) (args: exp list) : instr = + let p: varinfo = makePrintfFunction () in + Call(None, Lval(var p), (mkString format) :: args, !currentLoc) + + +let d_string (fmt : ('a,unit,doc,string) format4) : 'a = + let f (d: doc) : string = + Pretty.sprint 200 d + in + Pretty.gprintf f fmt + +let currentFunc: string ref = ref "" + +class logCallsVisitorClass = object + inherit nopCilVisitor + + (* Watch for a declaration for our printer *) + + method vinst i = begin + match i with + | Call(lo,e,al,l) -> + let pre = mkPrint (d_string "call %a\n" d_exp e) [] in + let post = mkPrint (d_string "return from %a\n" d_exp e) [] in +(* + let str1 = prefix ^ + (Pretty.sprint 800 ( Pretty.dprintf "Calling %a(%a)\n" + d_exp e + (docList ~sep:(chr ',' ++ break ) (fun arg -> + try + match unrollType (typeOf arg) with + TInt _ | TEnum _ -> dprintf "%a = %%d" d_exp arg + | TFloat _ -> dprintf "%a = %%g" d_exp arg + | TVoid _ -> text "void" + | TComp _ -> text "comp" + | _ -> dprintf "%a = %%p" d_exp arg + with _ -> dprintf "%a = %%p" d_exp arg)) al)) in + let log_args = List.filter (fun arg -> + match unrollType (typeOf arg) with + TVoid _ | TComp _ -> false + | _ -> true) al in + let str2 = prefix ^ (Pretty.sprint 800 + ( Pretty.dprintf "Returned from %a\n" d_exp e)) in + let newinst str args = ((Call (None, Lval(var printfFun.svar), + ( [ (* one ; *) mkString str ] @ args), + locUnknown)) : instr )in + let ilist = ([ (newinst str1 log_args) ; i ; (newinst str2 []) ] : instr list) in + *) + ChangeTo [ pre; i; post ] + + | _ -> DoChildren + end + method vstmt (s : stmt) = begin + match s.skind with + Return _ -> + let pre = mkPrint (d_string "exit %s\n" !currentFunc) [] in + ChangeTo (mkStmt (Block (mkBlock [ mkStmtOneInstr pre; s ]))) + | _ -> DoChildren + +(* +(Some(e),l) -> + let str = prefix ^ Pretty.sprint 800 ( Pretty.dprintf + "Return(%%p) from %s\n" funstr ) in + let newinst = ((Call (None, Lval(var printfFun.svar), + ( [ (* one ; *) mkString str ; e ]), + locUnknown)) : instr )in + let new_stmt = mkStmtOneInstr newinst in + let slist = [ new_stmt ; s ] in + (ChangeTo(mkStmt(Block(mkBlock slist)))) + | Return(None,l) -> + let str = prefix ^ (Pretty.sprint 800 ( Pretty.dprintf + "Return void from %s\n" funstr)) in + let newinst = ((Call (None, Lval(var printfFun.svar), + ( [ (* one ; *) mkString str ]), + locUnknown)) : instr )in + let new_stmt = mkStmtOneInstr newinst in + let slist = [ new_stmt ; s ] in + (ChangeTo(mkStmt(Block(mkBlock slist)))) + | _ -> DoChildren +*) + end +end + +let logCallsVisitor = new logCallsVisitorClass + + +let logCalls (f: file) : unit = + + let doGlobal = function + | GVarDecl (v, _) when v.vname = !printFunctionName -> + if !printf = None then + printf := Some v + + | GFun (fdec, loc) -> + currentFunc := fdec.svar.vname; + (* do the body *) + ignore (visitCilFunction logCallsVisitor fdec); + (* Now add the entry instruction *) + let pre = mkPrint (d_string "enter %s\n" !currentFunc) [] in + fdec.sbody <- + mkBlock [ mkStmtOneInstr pre; + mkStmt (Block fdec.sbody) ] +(* + (* debugging 'anagram', it's really nice to be able to see the strings *) + (* inside fat pointers, even if it's a bit of a hassle and a hack here *) + let isFatCharPtr (cinfo:compinfo) = + cinfo.cname="wildp_char" || + cinfo.cname="fseqp_char" || + cinfo.cname="seqp_char" in + + (* Collect expressions that denote the actual arguments *) + let actargs = + (* make lvals out of args which pass test below *) + (List.map + (fun vi -> match unrollType vi.vtype with + | TComp(cinfo, _) when isFatCharPtr(cinfo) -> + (* access the _p field for these *) + (* luckily it's called "_p" in all three fat pointer variants *) + Lval(Var(vi), Field(getCompField cinfo "_p", NoOffset)) + | _ -> + Lval(var vi)) + + (* decide which args to pass *) + (List.filter + (fun vi -> match unrollType vi.vtype with + | TPtr(TInt(k, _), _) when isCharType(k) -> + !printPtrs || !printStrings + | TComp(cinfo, _) when isFatCharPtr(cinfo) -> + !printStrings + | TVoid _ | TComp _ -> false + | TPtr _ | TArray _ | TFun _ -> !printPtrs + | _ -> true) + fdec.sformals) + ) in + + (* make a format string for printing them *) + (* sm: expanded width to 200 because I want one per line *) + let formatstr = prefix ^ (Pretty.sprint 200 + (dprintf "entering %s(%a)\n" fdec.svar.vname + (docList ~sep:(chr ',' ++ break) + (fun vi -> match unrollType vi.vtype with + | TInt _ | TEnum _ -> dprintf "%s = %%d" vi.vname + | TFloat _ -> dprintf "%s = %%g" vi.vname + | TVoid _ -> dprintf "%s = (void)" vi.vname + | TComp(cinfo, _) -> ( + if !printStrings && isFatCharPtr(cinfo) then + dprintf "%s = \"%%s\"" vi.vname + else + dprintf "%s = (comp)" vi.vname + ) + | TPtr(TInt(k, _), _) when isCharType(k) -> ( + if (!printStrings) then + dprintf "%s = \"%%s\"" vi.vname + else if (!printPtrs) then + dprintf "%s = %%p" vi.vname + else + dprintf "%s = (str)" vi.vname + ) + | TPtr _ | TArray _ | TFun _ -> ( + if (!printPtrs) then + dprintf "%s = %%p" vi.vname + else + dprintf "%s = (ptr)" vi.vname + ) + | _ -> dprintf "%s = (?type?)" vi.vname)) + fdec.sformals)) in + + i := 0 ; + name := fdec.svar.vname ; + if !allInsts then ( + let thisVisitor = new verboseLogVisitor printfFun !name prefix in + fdec.sbody <- visitCilBlock thisVisitor fdec.sbody + ); + fdec.sbody.bstmts <- + mkStmt (Instr [Call (None, Lval(var printfFun.svar), + ( (* one :: *) mkString formatstr + :: actargs), + loc)]) :: fdec.sbody.bstmts + *) + | _ -> () + in + Stats.time "logCalls" (iterGlobals f) doGlobal; + if !addProto then begin + let p = makePrintfFunction () in + E.log "Adding prototype for call logging function %s\n" p.vname; + f.globals <- GVarDecl (p, locUnknown) :: f.globals + end + +let feature : featureDescr = + { fd_name = "logcalls"; + fd_enabled = Cilutil.logCalls; + fd_description = "generation of code to log function calls"; + fd_extraopt = [ + ("--logcallprintf", Arg.String (fun s -> printFunctionName := s), + "the name of the printf function to use"); + ("--logcalladdproto", Arg.Unit (fun s -> addProto := true), + "whether to add the prototype for the printf function") + ]; + fd_doit = logCalls; + fd_post_check = true + } + +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) diff --git a/cil/src/ext/logcalls.mli b/cil/src/ext/logcalls.mli new file mode 100644 index 00000000..22a1e96a --- /dev/null +++ b/cil/src/ext/logcalls.mli @@ -0,0 +1,41 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + + +(* A simple CIL transformer that inserts calls to a runtime function to log + * the call in each function *) +val feature: Cil.featureDescr diff --git a/cil/src/ext/logwrites.ml b/cil/src/ext/logwrites.ml new file mode 100644 index 00000000..3afd0679 --- /dev/null +++ b/cil/src/ext/logwrites.ml @@ -0,0 +1,139 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +open Pretty +open Cil +module E = Errormsg +module H = Hashtbl + +(* David Park at Stanford points out that you cannot take the address of a + * bitfield in GCC. *) + +(* Returns true if the given lvalue offset ends in a bitfield access. *) +let rec is_bitfield lo = match lo with + | NoOffset -> false + | Field(fi,NoOffset) -> not (fi.fbitfield = None) + | Field(_,lo) -> is_bitfield lo + | Index(_,lo) -> is_bitfield lo + +(* Return an expression that evaluates to the address of the given lvalue. + * For most lvalues, this is merely AddrOf(lv). However, for bitfields + * we do some offset gymnastics. + *) +let addr_of_lv (lh,lo) = + if is_bitfield lo then begin + (* we figure out what the address would be without the final bitfield + * access, and then we add in the offset of the bitfield from the + * beginning of its enclosing comp *) + let rec split_offset_and_bitfield lo = match lo with + | NoOffset -> failwith "logwrites: impossible" + | Field(fi,NoOffset) -> (NoOffset,fi) + | Field(e,lo) -> let a,b = split_offset_and_bitfield lo in + ((Field(e,a)),b) + | Index(e,lo) -> let a,b = split_offset_and_bitfield lo in + ((Index(e,a)),b) + in + let new_lv_offset, bf = split_offset_and_bitfield lo in + let new_lv = (lh, new_lv_offset) in + let enclosing_type = TComp(bf.fcomp, []) in + let bits_offset, bits_width = + bitsOffset enclosing_type (Field(bf,NoOffset)) in + let bytes_offset = bits_offset / 8 in + let lvPtr = mkCast ~e:(mkAddrOf (new_lv)) ~newt:(charPtrType) in + (BinOp(PlusPI, lvPtr, (integer bytes_offset), ulongType)) + end else (AddrOf (lh,lo)) + +class logWriteVisitor = object + inherit nopCilVisitor + (* Create a prototype for the logging function, but don't put it in the + * file *) + val printfFun = + let fdec = emptyFunction "syslog" in + fdec.svar.vtype <- TFun(intType, + Some [ ("prio", intType, []); + ("format", charConstPtrType, []) ], + true, []); + fdec + + method vinst (i: instr) : instr list visitAction = + match i with + Set(lv, e, l) -> begin + (* Check if we need to log *) + match lv with + (Var(v), off) when not v.vglob -> SkipChildren + | _ -> let str = Pretty.sprint 80 + (Pretty.dprintf "Write %%p to 0x%%08x at %%s:%%d (%a)\n" d_lval lv) + in + ChangeTo + [ Call((None), (Lval(Var(printfFun.svar),NoOffset)), + [ one ; + mkString str ; e ; addr_of_lv lv; + mkString l.file; + integer l.line], locUnknown); + i] + end + | Call(Some lv, f, args, l) -> begin + (* Check if we need to log *) + match lv with + (Var(v), off) when not v.vglob -> SkipChildren + | _ -> let str = Pretty.sprint 80 + (Pretty.dprintf "Write retval to 0x%%08x at %%s:%%d (%a)\n" d_lval lv) + in + ChangeTo + [ Call((None), (Lval(Var(printfFun.svar),NoOffset)), + [ one ; + mkString str ; AddrOf lv; + mkString l.file; + integer l.line], locUnknown); + i] + end + | _ -> SkipChildren + +end + +let feature : featureDescr = + { fd_name = "logwrites"; + fd_enabled = Cilutil.logWrites; + fd_description = "generation of code to log memory writes"; + fd_extraopt = []; + fd_doit = + (function (f: file) -> + let lwVisitor = new logWriteVisitor in + visitCilFileSameGlobals lwVisitor f); + fd_post_check = true; + } + diff --git a/cil/src/ext/oneret.ml b/cil/src/ext/oneret.ml new file mode 100644 index 00000000..b3ce4a10 --- /dev/null +++ b/cil/src/ext/oneret.ml @@ -0,0 +1,187 @@ +(* MODIF: Loop constructor replaced by 3 constructors: While, DoWhile, For. *) + +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* Make sure that there is exactly one Return statement in the whole body. + * Replace all the other returns with Goto. This is convenient if you later + * want to insert some finalizer code, since you have a precise place where + * to put it *) +open Cil +open Pretty + +module E = Errormsg + +let dummyVisitor = new nopCilVisitor + +let oneret (f: Cil.fundec) : unit = + let fname = f.svar.vname in + (* Get the return type *) + let retTyp = + match f.svar.vtype with + TFun(rt, _, _, _) -> rt + | _ -> E.s (E.bug "Function %s does not have a function type\n" + f.svar.vname) + in + (* Does it return anything ? *) + let hasRet = match unrollType retTyp with TVoid _ -> false | _ -> true in + + (* Memoize the return result variable. Use only if hasRet *) + let lastloc = ref locUnknown in + let retVar : varinfo option ref = ref None in + let getRetVar (x: unit) : varinfo = + match !retVar with + Some rv -> rv + | None -> begin + let rv = makeLocalVar f "__retres" retTyp in (* don't collide *) + retVar := Some rv; + rv + end + in + (* Remember if we have introduced goto's *) + let haveGoto = ref false in + (* Memoize the return statement *) + let retStmt : stmt ref = ref dummyStmt in + let getRetStmt (x: unit) : stmt = + if !retStmt == dummyStmt then begin + (* Must create a statement *) + let rv = + if hasRet then Some (Lval(Var (getRetVar ()), NoOffset)) else None + in + let sr = mkStmt (Return (rv, !lastloc)) in + retStmt := sr; + sr + end else + !retStmt + in + (* Now scan all the statements. Know if you are the main body of the + * function and be prepared to add new statements at the end *) + let rec scanStmts (mainbody: bool) = function + | [] when mainbody -> (* We are at the end of the function. Now it is + * time to add the return statement *) + let rs = getRetStmt () in + if !haveGoto then + rs.labels <- (Label("return_label", !lastloc, false)) :: rs.labels; + [rs] + + | [] -> [] + + | ({skind=Return (retval, l)} as s) :: rests -> + currentLoc := l; +(* + ignore (E.log "Fixing return(%a) at %a\n" + insert + (match retval with None -> text "None" + | Some e -> d_exp () e) + d_loc l); +*) + if hasRet && retval = None then + E.s (error "Found return without value in function %s\n" fname); + if not hasRet && retval <> None then + E.s (error "Found return in subroutine %s\n" fname); + (* Keep this statement because it might have labels. But change it to + * an instruction that sets the return value (if any). *) + s.skind <- begin + match retval with + Some rval -> Instr [Set((Var (getRetVar ()), NoOffset), rval, l)] + | None -> Instr [] + end; + (* See if this is the last statement in function *) + if mainbody && rests == [] then + s :: scanStmts mainbody rests + else begin + (* Add a Goto *) + let sgref = ref (getRetStmt ()) in + let sg = mkStmt (Goto (sgref, l)) in + haveGoto := true; + s :: sg :: (scanStmts mainbody rests) + end + + | ({skind=If(eb,t,e,l)} as s) :: rests -> + currentLoc := l; + s.skind <- If(eb, scanBlock false t, scanBlock false e, l); + s :: scanStmts mainbody rests +(* + | ({skind=Loop(b,l,lb1,lb2)} as s) :: rests -> + currentLoc := l; + s.skind <- Loop(scanBlock false b, l,lb1,lb2); + s :: scanStmts mainbody rests +*) + | ({skind=While(e,b,l)} as s) :: rests -> + currentLoc := l; + s.skind <- While(e, scanBlock false b, l); + s :: scanStmts mainbody rests + | ({skind=DoWhile(e,b,l)} as s) :: rests -> + currentLoc := l; + s.skind <- DoWhile(e, scanBlock false b, l); + s :: scanStmts mainbody rests + | ({skind=For(bInit,e,bIter,b,l)} as s) :: rests -> + currentLoc := l; + s.skind <- For(scanBlock false bInit, e, scanBlock false bIter, + scanBlock false b, l); + s :: scanStmts mainbody rests + | ({skind=Switch(e, b, cases, l)} as s) :: rests -> + currentLoc := l; + s.skind <- Switch(e, scanBlock false b, cases, l); + s :: scanStmts mainbody rests + | ({skind=Block b} as s) :: rests -> + s.skind <- Block (scanBlock false b); + s :: scanStmts mainbody rests + | ({skind=(Goto _ | Instr _ | Continue _ | Break _ + | TryExcept _ | TryFinally _)} as s) + :: rests -> s :: scanStmts mainbody rests + + and scanBlock (mainbody: bool) (b: block) = + { bstmts = scanStmts mainbody b.bstmts; battrs = b.battrs; } + + in + ignore (visitCilBlock dummyVisitor f.sbody) ; (* sets CurrentLoc *) + lastloc := !currentLoc ; (* last location in the function *) + f.sbody <- scanBlock true f.sbody + + +let feature : featureDescr = + { fd_name = "oneRet"; + fd_enabled = Cilutil.doOneRet; + fd_description = "make each function have at most one 'return'" ; + fd_extraopt = []; + fd_doit = (function (f: file) -> + Cil.iterGlobals f (fun glob -> match glob with + Cil.GFun(fd,_) -> oneret fd; + | _ -> ())); + fd_post_check = true; + } diff --git a/cil/src/ext/oneret.mli b/cil/src/ext/oneret.mli new file mode 100644 index 00000000..f98ab4d1 --- /dev/null +++ b/cil/src/ext/oneret.mli @@ -0,0 +1,44 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + + +(* Make sure that there is only one Return statement in the whole body. + * Replace all the other returns with Goto. Make sure that there is a return + * if the function is supposed to return something, and it is not declared to + * not return. *) +val oneret: Cil.fundec -> unit +val feature : Cil.featureDescr diff --git a/cil/src/ext/partial.ml b/cil/src/ext/partial.ml new file mode 100644 index 00000000..4beca3fc --- /dev/null +++ b/cil/src/ext/partial.ml @@ -0,0 +1,851 @@ +(* See copyright notice at the end of the file *) +(***************************************************************************** + * Partial Evaluation & Constant Folding + * + * Soundness Assumptions: + * (1) Whole program analysis. You may call functions that are not defined + * (e.g., library functions) but they may not call back. + * (2) An undefined function may not return the address of a function whose + * address is not already taken in the code I can see. + * (3) A function pointer call may only call a function that has its + * address visibly taken in the code I can see. + * + * (More assumptions in the comments below) + *****************************************************************************) +open Cil +open Pretty + +(***************************************************************************** + * A generic signature for Alias Analysis information. Used to compute the + * call graph and do symbolic execution. + ****************************************************************************) +module type AliasInfo = + sig + val can_have_the_same_value : Cil.exp -> Cil.exp -> bool + val resolve_function_pointer : Cil.exp -> (Cil.fundec list) + end + +(***************************************************************************** + * A generic signature for Symbolic Execution execution algorithms. Such + * algorithms are used below to perform constant folding and dead-code + * elimination. You write a "basic-block" symex algorithm, we'll make it + * a whole-program CFG-pruner. + ****************************************************************************) +module type Symex = + sig + type t (* the type of a symex algorithm state object *) + val empty : t (* all values unknown *) + val equal : t -> t -> bool (* are these the same? *) + val assign : t -> Cil.lval -> Cil.exp -> (Cil.exp * t) + (* incorporate an assignment, return the RHS *) + val unassign : t -> Cil.lval -> t + (* lose all information about the given lvalue: assume an + * unknown external value has been assigned to it *) + val assembly : t -> Cil.instr -> t (* handle ASM *) + val assume : t -> Cil.exp -> t (* incorporate an assumption *) + val evaluate : t -> Cil.exp -> Cil.exp (* symbolic evaluation *) + val join : (t list) -> t (* join a bunch of states *) + val call : t -> Cil.fundec -> (Cil.exp list) -> (Cil.exp list * t) + (* we are calling the given function with the given actuals *) + val return : t -> Cil.fundec -> t + (* we are returning from the given function *) + val call_to_unknown_function : t -> t + (* throw away information that may have been changed *) + val debug : t -> unit + end + +(***************************************************************************** + * A generic signature for whole-progam call graphs. + ****************************************************************************) +module type CallGraph = + sig + type t (* the type of a call graph *) + val compute : Cil.file -> t (* file for which we compute the graph *) + val can_call : t -> Cil.fundec -> (Cil.fundec list) + val can_be_called_by : t -> Cil.fundec -> (Cil.fundec list) + val fundec_of_varinfo : t -> Cil.varinfo -> Cil.fundec + end + +(***************************************************************************** + * My cheap-o Alias Analysis. Assume all expressions can have the same + * value and any function with its address taken can be the target of + * any function pointer. + * + * Soundness Assumptions: + * (1) Someone must call "find_all_functions_With_address_taken" before the + * results are valid. This is already done in the code below. + ****************************************************************************) +let all_functions_with_address_taken = ref [] +let find_all_functions_with_address_taken (f : Cil.file) = + iterGlobals f (fun g -> match g with + GFun(fd,_) -> if fd.svar.vaddrof then + all_functions_with_address_taken := fd :: + !all_functions_with_address_taken + | _ -> ()) + +module EasyAlias = + struct + let can_have_the_same_value e1 e2 = true + let resolve_function_pointer e1 = !all_functions_with_address_taken + end + +(***************************************************************************** + * My particular method for computing the Call Graph. + ****************************************************************************) +module EasyCallGraph = functor (A : AliasInfo) -> + struct + type callGraphNode = { + fd : Cil.fundec ; + mutable calledBy : Cil.fundec list ; + mutable calls : Cil.fundec list ; + } + type t = (Cil.varinfo, callGraphNode) Hashtbl.t + + let cgCreateNode cg fundec = + let newnode = { fd = fundec ; calledBy = [] ; calls = [] } in + Hashtbl.add cg fundec.svar newnode + + let cgFindNode cg svar = Hashtbl.find cg svar + + let cgAddEdge cg caller callee = + try + let n1 = cgFindNode cg caller in + let n2 = cgFindNode cg callee in + n1.calls <- n2.fd :: n1.calls ; + n1.calledBy <- n1.fd :: n1.calledBy + with _ -> () + + class callGraphVisitor cg = object + inherit nopCilVisitor + val the_fun = ref None + + method vinst i = + let _ = match i with + Call(_,Lval(Var(callee),NoOffset),_,_) -> begin + (* known function call *) + match !the_fun with + None -> failwith "callGraphVisitor: call outside of any function" + | Some(enclosing) -> cgAddEdge cg enclosing callee + end + | Call(_,e,_,_) -> begin + (* unknown function call *) + match !the_fun with + None -> failwith "callGraphVisitor: call outside of any function" + | Some(enclosing) -> let lst = A.resolve_function_pointer e in + List.iter (fun possible_target_fd -> + cgAddEdge cg enclosing possible_target_fd.svar) lst + end + | _ -> () + in SkipChildren + + method vfunc f = the_fun := Some(f.svar) ; DoChildren + end + + let compute (f : Cil.file) = + let cg = Hashtbl.create 511 in + iterGlobals f (fun g -> match g with + GFun(fd,_) -> cgCreateNode cg fd + | _ -> () + ) ; + visitCilFileSameGlobals (new callGraphVisitor cg) f ; + cg + + let can_call cg fd = + let n = cgFindNode cg fd.svar in n.calls + let can_be_called_by cg fd = + let n = cgFindNode cg fd.svar in n.calledBy + let fundec_of_varinfo cg vi = + let n = cgFindNode cg vi in n.fd + end (* END OF: module EasyCallGraph *) + +(***************************************************************************** + * Necula's Constant Folding Strategem (re-written to be applicative) + * + * Soundness Assumptions: + * (1) Inline assembly does not affect constant folding. + ****************************************************************************) +module OrderedInt = + struct + type t = int + let compare = compare + end +module IntMap = Map.Make(OrderedInt) + +module NeculaFolding = functor (A : AliasInfo) -> + struct + (* Register file. Maps identifiers of local variables to expressions. + * We also remember if the expression depends on memory or depends on + * variables that depend on memory *) + type reg = { + rvi : varinfo ; + rval : exp ; + rmem : bool + } + type t = reg IntMap.t + let empty = IntMap.empty + let equal t1 t2 = (compare t1 t2 = 0) (* use OCAML here *) + let dependsOnMem = ref false + (* Rewrite an expression based on the current register file *) + class rewriteExpClass (regFile : t) = object + inherit nopCilVisitor + method vexpr = function + | Lval (Var v, NoOffset) -> begin + try + let defined = (IntMap.find v.vid regFile) in + if (defined.rmem) then dependsOnMem := true; + (match defined.rval with + | Const(x) -> ChangeTo (defined.rval) + | _ -> DoChildren) + with Not_found -> DoChildren + end + | Lval (Mem _, _) -> dependsOnMem := true; DoChildren + | _ -> DoChildren + end + (* Rewrite an expression and return the new expression along with an + * indication of whether it depends on memory *) + let rewriteExp r (e: exp) : exp * bool = + dependsOnMem := false; + let e' = constFold true (visitCilExpr (new rewriteExpClass r) e) in + e', !dependsOnMem + let eval r e = + let new_e, depends = rewriteExp r e in + new_e + + let setMemory regFile = + (* Get a list of all mappings that depend on memory *) + let depids = ref [] in + IntMap.iter (fun id v -> if v.rmem then depids := id :: !depids) regFile; + (* And remove them from the register file *) + List.fold_left (fun acc id -> IntMap.remove id acc) regFile !depids + + let setRegister regFile (v: varinfo) ((e,b): exp * bool) = + IntMap.add v.vid { rvi = v ; rval = e ; rmem = b; } regFile + + let resetRegister regFile (id: int) = + IntMap.remove id regFile + + class findLval lv contains = object + inherit nopCilVisitor + method vlval l = + if l = lv then + (contains := true ; SkipChildren) + else + DoChildren + end + + let removeMappingsThatDependOn regFile l = + (* Get a list of all mappings that depend on l *) + let depids = ref [] in + IntMap.iter (fun id reg -> + let found = ref false in + ignore (visitCilExpr (new findLval l found) reg.rval) ; + if !found then + depids := id :: !depids + ) regFile ; + (* And remove them from the register file *) + List.fold_left (fun acc id -> IntMap.remove id acc) regFile !depids + + let assign r l e = + let (newe,b) = rewriteExp r e in + let r' = match l with + (Var v, NoOffset) -> + let r'' = setRegister r v (newe,b) in + removeMappingsThatDependOn r'' l + | (Mem _, _) -> setMemory r + | _ -> r + in newe, r' + + let unassign r l = + let r' = match l with + (Var v, NoOffset) -> + let r'' = resetRegister r v.vid in + removeMappingsThatDependOn r'' l + | (Mem _, _) -> setMemory r + | _ -> r + in r' + + let assembly r i = r (* no-op in Necula-world *) + let assume r e = r (* no-op in Necula-world *) + + let evaluate r e = + let (newe,_) = rewriteExp r e in + newe + + (* Join two symex states *) + let join2 (r1 : t) (r2 : t) = + let keep = ref [] in + IntMap.iter (fun id reg -> + try + let reg' = IntMap.find id r2 in + if reg'.rval = reg.rval && reg'.rmem = reg.rmem then + keep := (id,reg) :: !keep + with _ -> () + ) r1 ; + List.fold_left (fun acc (id,v) -> + IntMap.add id v acc) (IntMap.empty) !keep + + let join (lst : t list) = match lst with + [] -> failwith "empty list" + | r :: tl -> List.fold_left + (fun (acc : t) (elt : t) -> join2 acc elt) r tl + + let call r fd el = + let new_arg_list = ref [] in + let final_r = List.fold_left2 (fun r vi e -> + let newe, r' = assign r ((Var(vi),NoOffset)) e in + new_arg_list := newe :: !new_arg_list ; + r' + ) r fd.sformals el in + (List.rev !new_arg_list), final_r + + let return r fd = + let regFile = + List.fold_left (fun r vi -> IntMap.remove vi.vid r) r fd.sformals + in + (* Get a list of all globals *) + let depids = ref [] in + IntMap.iter (fun vid reg -> + if reg.rvi.vglob || reg.rvi.vaddrof then depids := vid :: !depids + ) regFile ; + (* And remove them from the register file *) + List.fold_left (fun acc id -> IntMap.remove id acc) regFile !depids + + + let call_to_unknown_function r = + setMemory r + + let debug r = + IntMap.iter (fun key reg -> + ignore (Pretty.printf "%s <- %a (%b)@!" reg.rvi.vname d_exp reg.rval reg.rmem) + ) r + end (* END OF: NeculaFolding *) + +(***************************************************************************** + * A transformation to make every function call end its statement. So + * { x=1; Foo(); y=1; } + * becomes at least: + * { { x=1; Foo(); } + * { y=1; } } + * But probably more like: + * { { x=1; } { Foo(); } { y=1; } } + ****************************************************************************) +let rec contains_call il = match il with + [] -> false + | Call(_) :: tl -> true + | _ :: tl -> contains_call tl + +class callBBVisitor = object + inherit nopCilVisitor + + method vstmt s = + match s.skind with + Instr(il) when contains_call il -> begin + let list_of_stmts = List.map (fun one_inst -> + mkStmtOneInstr one_inst) il in + let block = mkBlock list_of_stmts in + ChangeDoChildrenPost(s, (fun _ -> + s.skind <- Block(block) ; + s)) + end + | _ -> DoChildren + + method vvdec _ = SkipChildren + method vexpr _ = SkipChildren + method vlval _ = SkipChildren + method vtype _ = SkipChildren +end + +let calls_end_basic_blocks f = + let thisVisitor = new callBBVisitor in + visitCilFileSameGlobals thisVisitor f + +(***************************************************************************** + * A transformation that gives each variable a unique identifier. + ****************************************************************************) +class vidVisitor = object + inherit nopCilVisitor + val count = ref 0 + + method vvdec vi = + vi.vid <- !count ; + incr count ; SkipChildren +end + +let globally_unique_vids f = + let thisVisitor = new vidVisitor in + visitCilFileSameGlobals thisVisitor f + +(***************************************************************************** + * The Weimeric Partial Evaluation Data-Flow Engine + * + * This functor performs flow-sensitive, context-insensitive whole-program + * data-flow analysis with an eye toward partial evaluation and constant + * folding. + * + * Toposort the whole-program inter-procedural CFG to compute + * (1) the number of actual predecessors for each statement + * (2) the global toposort ordering + * + * Perform standard data-flow analysis (joins, etc) on the ICFG until you + * hit a fixed point. If this changed the structure of the ICFG (by + * removing an IF-branch or an empty function call), redo the whole thing. + * + * Soundness Assumptions: + * (1) A "call instruction" is the last thing in its statement. + * Use "calls_end_basic_blocks" to get this. cil/src/main.ml does + * this when you pass --makeCFG. + * (2) All variables have globally unique identifiers. + * Use "globally_unique_vids" to get this. cil/src/main.ml does + * this when you pass --makeCFG. + * (3) This may not be a strict soundness requirement, but I wrote this + * assuming that the input file has all switch/break/continue + * statements removed. + ****************************************************************************) +module MakePartial = + functor (S : Symex) -> + functor (C : CallGraph) -> + functor (A : AliasInfo) -> + struct + + let debug = false + + (* We keep this information about every statement. Ideally this should + * be put in the stmt itself, but CIL doesn't give us space. *) + type sinfo = { (* statement info *) + incoming_state : (int, S.t) Hashtbl.t ; + (* mapping from stmt.sid to Symex.state *) + reachable_preds : (int, bool) Hashtbl.t ; + (* basically a set of all of the stmt.sids that can really + * reach this statement *) + mutable last_used_state : S.t option ; + (* When we last did the Post() of this statement, what + * incoming state did we use? If our new incoming state is + * the same, we don't have to do it again. *) + mutable priority : int ; + (* Whole-program toposort priority. High means "do me first". + * The first stmt in "main()" will have the highest priority. + *) + } + let sinfo_ht = Hashtbl.create 511 + let clear_sinfo () = Hashtbl.clear sinfo_ht + + (* We construct sinfo nodes lazily: if you ask for one that isn't + * there, we build it. *) + let get_sinfo stmt = + try + Hashtbl.find sinfo_ht stmt.sid + with _ -> + let new_sinfo = { incoming_state = Hashtbl.create 3 ; + reachable_preds = Hashtbl.create 3 ; + last_used_state = None ; + priority = (-1) ; } in + Hashtbl.add sinfo_ht stmt.sid new_sinfo ; + new_sinfo + + (* Topological Sort is a DFS in which you assign a priority right as + * you finished visiting the children. While we're there we compute + * the actual number of unique predecessors for each statement. The CIL + * information may be out of date because we keep changing the CFG by + * removing IFs and whatnot. *) + let toposort_counter = ref 1 + let add_edge s1 s2 = + let si2 = get_sinfo s2 in + Hashtbl.replace si2.reachable_preds s1.sid true + + let rec toposort c stmt = + let si = get_sinfo stmt in + if si.priority >= 0 then + () (* already visited! *) + else begin + si.priority <- 0 ; (* currently visiting *) + (* handle function calls in this basic block *) + (match stmt.skind with + (Instr(il)) -> + List.iter (fun i -> + let fd_list = match i with + Call(_,Lval(Var(vi),NoOffset),_,_) -> + begin + try + let fd = C.fundec_of_varinfo c vi in + [fd] + with e -> [] (* calling external function *) + end + | Call(_,e,_,_) -> + A.resolve_function_pointer e + | _ -> [] + in + List.iter (fun fd -> + if List.length fd.sbody.bstmts > 0 then + let fun_stmt = List.hd fd.sbody.bstmts in + add_edge stmt fun_stmt ; + toposort c fun_stmt + ) fd_list + ) il + | _ -> ()); + List.iter (fun succ -> + add_edge stmt succ ; toposort c succ) stmt.succs ; + si.priority <- !toposort_counter ; + incr toposort_counter + end + + (* we set this to true whenever we eliminate an IF or otherwise + * change the CFG *) + let changed_cfg = ref false + + (* Partially evaluate / constant fold a statement. Basically this just + * asks the Symex algorithm to evaluate the RHS in the current state + * and then compute a new state that incorporates the assignment. + * + * However, we have special handling for ifs and calls. If we can + * evaluate an if predicate to a constant, we remove the if. + * + * If we are going to make a call to a function with an empty body, we + * remove the function call. *) + let partial_stmt c state stmt handle_funcall = + let result = match stmt.skind with + Instr(il) -> + let state = ref state in + let new_il = List.map (fun i -> + if debug then begin + ignore (Pretty.printf "Instr %a@!" d_instr i ) + end ; + match i with + | Set(l,e,loc) -> + let e', state' = S.assign !state l e in + state := state' ; + [Set(l,e',loc)] + | Call(lo,(Lval(Var(vi),NoOffset)),al,loc) -> + let result = begin + try + let fd = C.fundec_of_varinfo c vi in + begin + match fd.sbody.bstmts with + [] -> [] (* no point in making this call *) + | hd :: tl -> + let al', state' = S.call !state fd al in + handle_funcall stmt hd state' ; + let state'' = S.return state' fd in + state := state'' ; + [Call(lo,(Lval(Var(vi),NoOffset)),al',loc)] + end + with e -> + let state'' = S.call_to_unknown_function !state in + let al' = List.map (S.evaluate !state) al in + state := state'' ; + [Call(lo,(Lval(Var(vi),NoOffset)),al',loc)] + end in + (* handle return value *) + begin + match lo with + Some(lv) -> state := S.unassign !state lv + | _ -> () + end ; + result + | Call(lo,f,al,loc) -> + let al' = List.map (S.evaluate !state) al in + state := S.call_to_unknown_function !state ; + (match lo with + Some(lv) -> state := S.unassign !state lv + | None -> ()) ; + [Call(lo,f,al',loc)] + | Asm(_) -> state := S.assembly !state i ; [i] + ) il in + stmt.skind <- Instr(List.flatten new_il) ; + if debug then begin + ignore (Pretty.printf "New Stmt is %a@!" d_stmt stmt) ; + end ; + !state + + | If(e,b1,b2,loc) -> + let e' = S.evaluate state e in + (* Pretty.printf "%a evals to %a\n" d_exp e d_exp e' ; *) + + (* helper function to remove an IF branch *) + let remove b remains = begin + changed_cfg := true ; + (match b.bstmts with + | [] -> () + | hd :: tl -> + stmt.succs <- List.filter (fun succ -> succ.sid <> hd.sid) + stmt.succs + ) + end in + + if (e' = one) then begin + if b2.bstmts = [] && b2.battrs = [] then begin + stmt.skind <- Block(b1) ; + match b1.bstmts with + [] -> failwith "partial: completely empty if" + | hd :: tl -> stmt.succs <- [hd] + end else + stmt.skind <- Block( + { bstmts = + [ mkStmt (Block(b1)) ; + mkStmt (If(zero,b2,{bstmts=[];battrs=[];},loc)) ] ; + battrs = [] } ) ; + remove b2 b1 ; + state + end else if (e' = zero) then begin + if b1.bstmts = [] && b1.battrs = [] then begin + stmt.skind <- Block(b2) ; + match b2.bstmts with + [] -> failwith "partial: completely empty if" + | hd :: tl -> stmt.succs <- [hd] + end else + stmt.skind <- Block( + { bstmts = + [ mkStmt (Block(b2)) ; + mkStmt (If(zero,b1,{bstmts=[];battrs=[];},loc)) ] ; + battrs = [] } ) ; + remove b1 b2 ; + state + end else begin + stmt.skind <- If(e',b1,b2,loc) ; + state + end + + | Return(Some(e),loc) -> + let e' = S.evaluate state e in + stmt.skind <- Return(Some(e'),loc) ; + state + + | Block(b) -> + if debug && List.length stmt.succs > 1 then begin + ignore (Pretty.printf "(%a) has successors [%a]@!" + d_stmt stmt + (docList ~sep:(chr '@') (d_stmt ())) + stmt.succs) + end ; + state + + | _ -> state + in result + + (* + * This is the main conceptual entry-point for the partial evaluation + * data-flow functor. + *) + let dataflow (file : Cil.file) (* whole program *) + (c : C.t) (* control-flow graph *) + (initial_state : S.t) (* any assumptions? *) + (initial_stmt : Cil.stmt) (* entry point *) + = begin + (* count the total number of statements in the program *) + let num_stmts = ref 1 in + iterGlobals file (fun g -> match g with + GFun(fd,_) -> begin + match fd.smaxstmtid with + Some(i) -> if i > !num_stmts then num_stmts := i + | None -> () + end + | _ -> () + ) ; + (if debug then + Printf.printf "Dataflow: at most %d statements in program\n" !num_stmts); + + (* create a priority queue in which to store statements *) + let worklist = Heap.create !num_stmts in + + let finished = ref false in + let passes = ref 0 in + + (* add something to the work queue *) + let enqueue caller callee state = begin + let si = get_sinfo callee in + Hashtbl.replace si.incoming_state caller.sid state ; + Heap.insert worklist si.priority callee + end in + + (* we will be finished when we complete a round of data-flow that + * does not change the ICFG *) + while not !finished do + clear_sinfo () ; + incr passes ; + + (* we must recompute the ordering and the predecessor information + * because we may have changed it by removing IFs *) + (if debug then Printf.printf "Dataflow: Topological Sorting & Reachability\n" ); + toposort c initial_stmt ; + + let initial_si = get_sinfo initial_stmt in + Heap.insert worklist initial_si.priority initial_stmt ; + + while not (Heap.is_empty worklist) do + let (p,s) = Heap.extract_max worklist in + if debug then begin + ignore (Pretty.printf "Working on stmt %d (%a) %a@!" + s.sid + (docList ~sep:(chr ',' ++ break) (fun s -> dprintf "%d" s.sid)) + s.succs + d_stmt s) ; + flush stdout ; + end ; + let si = get_sinfo s in + + (* Even though this stmt is on the worklist, we may not have + * to do anything with it if the join of all of the incoming + * states is the same as the last state we used here. *) + let must_recompute, incoming_state = + begin + let list_of_incoming_states = ref [] in + Hashtbl.iter (fun true_pred_sid b -> + let this_pred_state = + try + Hashtbl.find si.incoming_state true_pred_sid + with _ -> + (* this occurs when we're evaluating a statement and we + * have not yet evaluated all of its predecessors (the + * first time we look at a loop head, say). We must be + * conservative. We'll come back later with better + * information (as we work toward the fix-point). *) + S.empty + in + if debug then begin + Printf.printf " Incoming State from %d\n" true_pred_sid ; + S.debug this_pred_state ; + flush stdout ; + end ; + list_of_incoming_states := this_pred_state :: + !list_of_incoming_states + ) si.reachable_preds ; + let merged_incoming_state = + if !list_of_incoming_states = [] then + (* this occurs when we're looking at the first statement + * in "main" -- it has no preds *) + initial_state + else + S.join !list_of_incoming_states + in + if debug then begin + Printf.printf " Merged State:\n" ; + S.debug merged_incoming_state ; + flush stdout ; + end ; + let must_recompute = match si.last_used_state with + None -> true + | Some(last) -> not (S.equal merged_incoming_state last) + in must_recompute, merged_incoming_state + end + in + if must_recompute then begin + si.last_used_state <- Some(incoming_state) ; + let outgoing_state = + (* partially evaluate and optimize the statement *) + partial_stmt c incoming_state s enqueue in + let fresh_succs = s.succs in + (* touch every successor so that we will reconsider it *) + List.iter (fun succ -> + enqueue s succ outgoing_state + ) fresh_succs ; + end else begin + if debug then begin + Printf.printf "No need to recompute.\n" + end + end + done ; + (if debug then Printf.printf "Dataflow: Pass %d Complete\n" !passes) ; + if !changed_cfg then begin + (if debug then Printf.printf "Dataflow: Restarting (CFG Changed)\n") ; + changed_cfg := false + end else + finished := true + done ; + (if debug then Printf.printf "Dataflow: Completed (%d passes)\n" !passes) + + end + + let simplify file c fd (assumptions : (Cil.lval * Cil.exp) list) = + let starting_state = List.fold_left (fun s (l,e) -> + let e',s' = S.assign s l e in + s' + ) S.empty assumptions in + dataflow file c starting_state (List.hd fd.sbody.bstmts) + + end + + +(* + * Currently our partial-eval optimizer is built out of basically nothing. + * The alias analysis is fake, the call grpah is cheap, and we're using + * George's old basic-block symex. Still, it works. + *) +(* Don't you love Functor application? *) +module BasicCallGraph = EasyCallGraph(EasyAlias) +module BasicSymex = NeculaFolding(EasyAlias) +module BasicPartial = MakePartial(BasicSymex)(BasicCallGraph)(EasyAlias) + +(* + * A very easy entry-point to partial evaluation/symbolic execution. + * You pass the Cil file and a list of assumptions (lvalue, exp pairs that + * should be treated as assignments that occur before the program starts). + * + * We partially evaluate and optimize starting from "main". The Cil.file + * is modified in place. + *) +let partial (f : Cil.file) (assumptions : (Cil.lval * Cil.exp) list) = + try + find_all_functions_with_address_taken f ; + let c = BasicCallGraph.compute f in + try + iterGlobals f (fun g -> match g with + GFun(fd,_) when fd.svar.vname = "main" -> + BasicPartial.simplify f c fd assumptions + | _ -> ()) ; + with e -> begin + Printf.printf "Error in DataFlow: %s\n" (Printexc.to_string e) ; + raise e + end + with e -> begin + Printf.printf "Error in Partial: %s\n" (Printexc.to_string e) ; + raise e + end + +let feature : featureDescr = + { fd_name = "partial"; + fd_enabled = Cilutil.doPartial; + fd_description = "interprocedural partial evaluation and constant folding" ; + fd_extraopt = []; + fd_doit = (function (f: file) -> + if not !Cilutil.makeCFG then begin + Errormsg.s (Errormsg.error "--dopartial: you must also specify --domakeCFG\n") + end ; + partial f [] ) ; + fd_post_check = false; + } + +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) diff --git a/cil/src/ext/pta/golf.ml b/cil/src/ext/pta/golf.ml new file mode 100644 index 00000000..5ea47ff1 --- /dev/null +++ b/cil/src/ext/pta/golf.ml @@ -0,0 +1,1657 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(***********************************************************************) +(* *) +(* Exceptions *) +(* *) +(***********************************************************************) + +exception Inconsistent (* raised if constraint system is inconsistent *) +exception WellFormed (* raised if types are not well-formed *) +exception NoContents +exception APFound (* raised if an alias pair is found, a control + flow exception *) + + +module U = Uref +module S = Setp +module H = Hashtbl +module Q = Queue + + +(** Subtyping kinds *) +type polarity = + Pos + | Neg + | Sub + +(** Path kinds, for CFL reachability *) +type pkind = + Positive + | Negative + | Match + | Seed + +(** Context kinds -- open or closed *) +type context = + Open + | Closed + +(* A configuration is a context (open or closed) coupled with a pair + of stamps representing a state in the cartesian product DFA. *) +type configuration = context * int * int + +module ConfigHash = +struct + type t = configuration + let equal t t' = t = t' + let hash t = Hashtbl.hash t +end + +module CH = H.Make (ConfigHash) + +type config_map = unit CH.t + +(** Generic bounds *) +type 'a bound = {index : int; info : 'a U.uref} + +(** For label paths. *) +type 'a path = { + kind : pkind; + reached_global : bool; + head : 'a U.uref; + tail : 'a U.uref +} + +module Bound = +struct + type 'a t = 'a bound + let compare (x : 'a t) (y : 'a t) = + if U.equal (x.info, y.info) then x.index - y.index + else Pervasives.compare (U.deref x.info) (U.deref y.info) +end + +module Path = +struct + type 'a t = 'a path + let compare (x : 'a t) (y : 'a t) = + if U.equal (x.head, y.head) then + begin + if U.equal (x.tail, y.tail) then + begin + if x.reached_global = y.reached_global then + Pervasives.compare x.kind y.kind + else Pervasives.compare x.reached_global y.reached_global + end + else Pervasives.compare (U.deref x.tail) (U.deref y.tail) + end + else Pervasives.compare (U.deref x.head) (U.deref y.head) +end + +module B = S.Make (Bound) + +module P = S.Make (Path) + +type 'a boundset = 'a B.t + +type 'a pathset = 'a P.t + +(** Constants, which identify elements in points-to sets *) +(** jk : I'd prefer to make this an 'a constant and specialize it to varinfo + for use with the Cil frontend, but for now, this will do *) +type constant = int * string * Cil.varinfo + +module Constant = +struct + type t = constant + let compare (xid, _, _) (yid, _, _) = xid - yid +end +module C = Set.Make (Constant) + +(** Sets of constants. Set union is used when two labels containing + constant sets are unified *) +type constantset = C.t + +type lblinfo = { + mutable l_name: string; + (** either empty or a singleton, the initial location for this label *) + loc : constantset; + (** Name of this label *) + l_stamp : int; + (** Unique integer for this label *) + mutable l_global : bool; + (** True if this location is globally accessible *) + mutable aliases: constantset; + (** Set of constants (tags) for checking aliases *) + mutable p_lbounds: lblinfo boundset; + (** Set of umatched (p) lower bounds *) + mutable n_lbounds: lblinfo boundset; + (** Set of unmatched (n) lower bounds *) + mutable p_ubounds: lblinfo boundset; + (** Set of umatched (p) upper bounds *) + mutable n_ubounds: lblinfo boundset; + (** Set of unmatched (n) upper bounds *) + mutable m_lbounds: lblinfo boundset; + (** Set of matched (m) lower bounds *) + mutable m_ubounds: lblinfo boundset; + (** Set of matched (m) upper bounds *) + + mutable m_upath: lblinfo pathset; + mutable m_lpath: lblinfo pathset; + mutable n_upath: lblinfo pathset; + mutable n_lpath: lblinfo pathset; + mutable p_upath: lblinfo pathset; + mutable p_lpath: lblinfo pathset; + + mutable l_seeded : bool; + mutable l_ret : bool; + mutable l_param : bool; +} + +(** Constructor labels *) +and label = lblinfo U.uref + +(** The type of lvalues. *) +type lvalue = { + l: label; + contents: tau +} + +and vinfo = { + v_stamp : int; + v_name : string; + + mutable v_hole : (int,unit) H.t; + mutable v_global : bool; + mutable v_mlbs : tinfo boundset; + mutable v_mubs : tinfo boundset; + mutable v_plbs : tinfo boundset; + mutable v_pubs : tinfo boundset; + mutable v_nlbs : tinfo boundset; + mutable v_nubs : tinfo boundset +} + +and rinfo = { + r_stamp : int; + rl : label; + points_to : tau; + mutable r_global: bool; +} + +and finfo = { + f_stamp : int; + fl : label; + ret : tau; + mutable args : tau list; + mutable f_global : bool; +} + +and pinfo = { + p_stamp : int; + ptr : tau; + lam : tau; + mutable p_global : bool; +} + +and tinfo = Var of vinfo + | Ref of rinfo + | Fun of finfo + | Pair of pinfo + +and tau = tinfo U.uref + +type tconstraint = Unification of tau * tau + | Leq of tau * (int * polarity) * tau + + +(** Association lists, used for printing recursive types. The first element + is a type that has been visited. The second element is the string + representation of that type (so far). If the string option is set, then + this type occurs within itself, and is associated with the recursive var + name stored in the option. When walking a type, add it to an association + list. + + Example : suppose we have the constraint 'a = ref('a). The type is unified + via cyclic unification, and would loop infinitely if we attempted to print + it. What we want to do is print the type u rv. ref(rv). This is accomplished + in the following manner: + + -- ref('a) is visited. It is not in the association list, so it is added + and the string "ref(" is stored in the second element. We recurse to print + the first argument of the constructor. + + -- In the recursive call, we see that 'a (or ref('a)) is already in the + association list, so the type is recursive. We check the string option, + which is None, meaning that this is the first recurrence of the type. We + create a new recursive variable, rv and set the string option to 'rv. Next, + we prepend u rv. to the string representation we have seen before, "ref(", + and return "rv" as the string representation of this type. + + -- The string so far is "u rv.ref(". The recursive call returns, and we + complete the type by printing the result of the call, "rv", and ")" + + In a type where the recursive variable appears twice, e.g. 'a = pair('a,'a), + the second time we hit 'a, the string option will be set, so we know to + reuse the same recursive variable name. +*) +type association = tau * string ref * string option ref + +module PathHash = +struct + type t = int list + let equal t t' = t = t' + let hash t = Hashtbl.hash t +end + +module PH = H.Make (PathHash) + +(***********************************************************************) +(* *) +(* Global Variables *) +(* *) +(***********************************************************************) + +(** Print the instantiations constraints. *) +let print_constraints : bool ref = ref false + +(** If true, print all constraints (including induced) and show + additional debug output. *) +let debug = ref false + +(** Just debug all the constraints (including induced) *) +let debug_constraints = ref false + +(** Debug smart alias queries *) +let debug_aliases = ref false + +let smart_aliases = ref false + +(** If true, make the flow step a no-op *) +let no_flow = ref false + +(** If true, disable subtyping (unification at all levels) *) +let no_sub = ref false + +(** If true, treat indexed edges as regular subtyping *) +let analyze_mono = ref true + +(** A list of equality constraints. *) +let eq_worklist : tconstraint Q.t = Q.create () + +(** A list of leq constraints. *) +let leq_worklist : tconstraint Q.t = Q.create () + +let path_worklist : (lblinfo path) Q.t = Q.create () + +let path_hash : (lblinfo path) PH.t = PH.create 32 + +(** A count of the constraints introduced from the AST. Used for debugging. *) +let toplev_count = ref 0 + +(** A hashtable containing stamp pairs of labels that must be aliased. *) +let cached_aliases : (int * int,unit) H.t = H.create 64 + +(** A hashtable mapping pairs of tau's to their join node. *) +let join_cache : (int * int, tau) H.t = H.create 64 + +(***********************************************************************) +(* *) +(* Utility Functions *) +(* *) +(***********************************************************************) + +let find = U.deref + +let die s = + Printf.printf "*******\nAssertion failed: %s\n*******\n" s; + assert false + +let fresh_appsite : (unit -> int) = + let appsite_index = ref 0 in + fun () -> + incr appsite_index; + !appsite_index + +(** Generate a unique integer. *) +let fresh_index : (unit -> int) = + let counter = ref 0 in + fun () -> + incr counter; + !counter + +let fresh_stamp : (unit -> int) = + let stamp = ref 0 in + fun () -> + incr stamp; + !stamp + +(** Return a unique integer representation of a tau *) +let get_stamp (t : tau) : int = + match find t with + Var v -> v.v_stamp + | Ref r -> r.r_stamp + | Pair p -> p.p_stamp + | Fun f -> f.f_stamp + +(** Negate a polarity. *) +let negate (p : polarity) : polarity = + match p with + Pos -> Neg + | Neg -> Pos + | Sub -> die "negate" + +(** Consistency checks for inferred types *) +let pair_or_var (t : tau) = + match find t with + Pair _ -> true + | Var _ -> true + | _ -> false + +let ref_or_var (t : tau) = + match find t with + Ref _ -> true + | Var _ -> true + | _ -> false + +let fun_or_var (t : tau) = + match find t with + Fun _ -> true + | Var _ -> true + | _ -> false + + + +(** Apply [f] structurally down [t]. Guaranteed to terminate, even if [t] + is recursive *) +let iter_tau f t = + let visited : (int,tau) H.t = H.create 4 in + let rec iter_tau' t = + if H.mem visited (get_stamp t) then () else + begin + f t; + H.add visited (get_stamp t) t; + match U.deref t with + Pair p -> + iter_tau' p.ptr; + iter_tau' p.lam + | Fun f -> + List.iter iter_tau' (f.args); + iter_tau' f.ret + | Ref r -> iter_tau' r.points_to + | _ -> () + end + in + iter_tau' t + +(* Extract a label's bounds according to [positive] and [upper]. *) +let get_bounds (p :polarity ) (upper : bool) (l : label) : lblinfo boundset = + let li = find l in + match p with + Pos -> if upper then li.p_ubounds else li.p_lbounds + | Neg -> if upper then li.n_ubounds else li.n_lbounds + | Sub -> if upper then li.m_ubounds else li.m_lbounds + +let equal_tau (t : tau) (t' : tau) = + get_stamp t = get_stamp t' + +let get_label_stamp (l : label) : int = + (find l).l_stamp + +(** Return true if [t] is global (treated monomorphically) *) +let get_global (t : tau) : bool = + match find t with + Var v -> v.v_global + | Ref r -> r.r_global + | Pair p -> p.p_global + | Fun f -> f.f_global + +let is_ret_label l = (find l).l_ret || (find l).l_global (* todo - check *) + +let is_param_label l = (find l).l_param || (find l).l_global + +let is_global_label l = (find l).l_global + +let is_seeded_label l = (find l).l_seeded + +let set_global_label (l : label) (b : bool) : unit = + assert ((not (is_global_label l)) || b); + (U.deref l).l_global <- b + +(** Aliases for set_global *) +let global_tau = get_global + + +(** Get_global for lvalues *) +let global_lvalue lv = get_global lv.contents + + + +(***********************************************************************) +(* *) +(* Printing Functions *) +(* *) +(***********************************************************************) + +let string_of_configuration (c, i, i') = + let context = match c with + Open -> "O" + | Closed -> "C" + in + Printf.sprintf "(%s,%d,%d)" context i i' + +let string_of_polarity p = + match p with + Pos -> "+" + | Neg -> "-" + | Sub -> "M" + +(** Convert a label to a string, short representation *) +let string_of_label (l : label) : string = + "\"" ^ (find l).l_name ^ "\"" + +(** Return true if the element [e] is present in the association list, + according to uref equality *) +let rec assoc_list_mem (e : tau) (l : association list) = + match l with + | [] -> None + | (h, s, so) :: t -> + if U.equal (h,e) then Some (s, so) else assoc_list_mem e t + +(** Given a tau, create a unique recursive variable name. This should always + return the same name for a given tau *) +let fresh_recvar_name (t : tau) : string = + match find t with + Pair p -> "rvp" ^ string_of_int p.p_stamp + | Ref r -> "rvr" ^ string_of_int r.r_stamp + | Fun f -> "rvf" ^ string_of_int f.f_stamp + | _ -> die "fresh_recvar_name" + + +(** Return a string representation of a tau, using association lists. *) +let string_of_tau (t : tau) : string = + let tau_map : association list ref = ref [] in + let rec string_of_tau' t = + match assoc_list_mem t !tau_map with + Some (s, so) -> (* recursive type. see if a var name has been set *) + begin + match !so with + None -> + let rv = fresh_recvar_name t in + s := "u " ^ rv ^ "." ^ !s; + so := Some rv; + rv + | Some rv -> rv + end + | None -> (* type's not recursive. Add it to the assoc list and cont. *) + let s = ref "" + and so : string option ref = ref None in + tau_map := (t, s, so) :: !tau_map; + begin + match find t with + Var v -> s := v.v_name; + | Pair p -> + assert (ref_or_var p.ptr); + assert (fun_or_var p.lam); + s := "{"; + s := !s ^ string_of_tau' p.ptr; + s := !s ^ ","; + s := !s ^ string_of_tau' p.lam; + s := !s ^"}" + | Ref r -> + assert (pair_or_var r.points_to); + s := "ref(|"; + s := !s ^ string_of_label r.rl; + s := !s ^ "|,"; + s := !s ^ string_of_tau' r.points_to; + s := !s ^ ")" + | Fun f -> + assert (pair_or_var f.ret); + let rec string_of_args = function + h :: [] -> + assert (pair_or_var h); + s := !s ^ string_of_tau' h + | h :: t -> + assert (pair_or_var h); + s := !s ^ string_of_tau' h ^ ","; + string_of_args t + | [] -> () + in + s := "fun(|"; + s := !s ^ string_of_label f.fl; + s := !s ^ "|,"; + s := !s ^ "<"; + if List.length f.args > 0 then string_of_args f.args + else s := !s ^ "void"; + s := !s ^">,"; + s := !s ^ string_of_tau' f.ret; + s := !s ^ ")" + end; + tau_map := List.tl !tau_map; + !s + in + string_of_tau' t + +(** Convert an lvalue to a string *) +let rec string_of_lvalue (lv : lvalue) : string = + let contents = string_of_tau lv.contents + and l = string_of_label lv.l in + assert (pair_or_var lv.contents); (* do a consistency check *) + Printf.sprintf "[%s]^(%s)" contents l + +let print_path (p : lblinfo path) : unit = + let string_of_pkind = function + Positive -> "p" + | Negative -> "n" + | Match -> "m" + | Seed -> "s" + in + Printf.printf + "%s --%s--> %s (%d) : " + (string_of_label p.head) + (string_of_pkind p.kind) + (string_of_label p.tail) + (PathHash.hash p) + +(** Print a list of tau elements, comma separated *) +let rec print_tau_list (l : tau list) : unit = + let rec print_t_strings = function + h :: [] -> print_endline h + | h :: t -> + print_string h; + print_string ", "; + print_t_strings t + | [] -> () + in + print_t_strings (List.map string_of_tau l) + +let print_constraint (c : tconstraint) = + match c with + Unification (t, t') -> + let lhs = string_of_tau t + and rhs = string_of_tau t' in + Printf.printf "%s == %s\n" lhs rhs + | Leq (t, (i, p), t') -> + let lhs = string_of_tau t + and rhs = string_of_tau t' in + Printf.printf "%s <={%d,%s} %s\n" lhs i (string_of_polarity p) rhs + +(***********************************************************************) +(* *) +(* Type Operations -- these do not create any constraints *) +(* *) +(***********************************************************************) + +(** Create an lvalue with label [lbl] and tau contents [t]. *) +let make_lval (lbl, t : label * tau) : lvalue = + {l = lbl; contents = t} + +let make_label_int (is_global : bool) (name :string) (vio : Cil.varinfo option) : label = + let locc = + match vio with + Some vi -> C.add (fresh_index (), name, vi) C.empty + | None -> C.empty + in + U.uref { + l_name = name; + l_global = is_global; + l_stamp = fresh_stamp (); + loc = locc; + aliases = locc; + p_ubounds = B.empty; + p_lbounds = B.empty; + n_ubounds = B.empty; + n_lbounds = B.empty; + m_ubounds = B.empty; + m_lbounds = B.empty; + m_upath = P.empty; + m_lpath = P.empty; + n_upath = P.empty; + n_lpath = P.empty; + p_upath = P.empty; + p_lpath = P.empty; + l_seeded = false; + l_ret = false; + l_param = false + } + +(** Create a new label with name [name]. Also adds a fresh constant + with name [name] to this label's aliases set. *) +let make_label (is_global : bool) (name : string) (vio : Cil.varinfo option) : label = + make_label_int is_global name vio + +(** Create a new label with an unspecified name and an empty alias set. *) +let fresh_label (is_global : bool) : label = + let index = fresh_index () in + make_label_int is_global ("l_" ^ string_of_int index) None + +(** Create a fresh bound (edge in the constraint graph). *) +let make_bound (i, a : int * label) : lblinfo bound = + {index = i; info = a} + +let make_tau_bound (i, a : int * tau) : tinfo bound = + {index = i; info = a} + +(** Create a fresh named variable with name '[name]. *) +let make_var (b: bool) (name : string) : tau = + U.uref (Var {v_name = ("'" ^ name); + v_hole = H.create 8; + v_stamp = fresh_index (); + v_global = b; + v_mlbs = B.empty; + v_mubs = B.empty; + v_plbs = B.empty; + v_pubs = B.empty; + v_nlbs = B.empty; + v_nubs = B.empty}) + +(** Create a fresh unnamed variable (name will be 'fv). *) +let fresh_var (is_global : bool) : tau = + make_var is_global ("fv" ^ string_of_int (fresh_index ())) + +(** Create a fresh unnamed variable (name will be 'fi). *) +let fresh_var_i (is_global : bool) : tau = + make_var is_global ("fi" ^ string_of_int (fresh_index())) + +(** Create a Fun constructor. *) +let make_fun (lbl, a, r : label * (tau list) * tau) : tau = + U.uref (Fun {fl = lbl; + f_stamp = fresh_index (); + f_global = false; + args = a; + ret = r }) + +(** Create a Ref constructor. *) +let make_ref (lbl,pt : label * tau) : tau = + U.uref (Ref {rl = lbl; + r_stamp = fresh_index (); + r_global = false; + points_to = pt}) + +(** Create a Pair constructor. *) +let make_pair (p,f : tau * tau) : tau = + U.uref (Pair {ptr = p; + p_stamp = fresh_index (); + p_global = false; + lam = f}) + +(** Copy the toplevel constructor of [t], putting fresh variables in each + argement of the constructor. *) +let copy_toplevel (t : tau) : tau = + match find t with + Pair _ -> make_pair (fresh_var_i false, fresh_var_i false) + | Ref _ -> make_ref (fresh_label false, fresh_var_i false) + | Fun f -> + let fresh_fn = fun _ -> fresh_var_i false in + make_fun (fresh_label false, + List.map fresh_fn f.args, fresh_var_i false) + | _ -> die "copy_toplevel" + + +let has_same_structure (t : tau) (t' : tau) = + match find t, find t' with + Pair _, Pair _ -> true + | Ref _, Ref _ -> true + | Fun _, Fun _ -> true + | Var _, Var _ -> true + | _ -> false + + +let pad_args (f, f' : finfo * finfo) : unit = + let padding = ref ((List.length f.args) - (List.length f'.args)) + in + if !padding == 0 then () + else + let to_pad = + if !padding > 0 then f' else (padding := -(!padding); f) + in + for i = 1 to !padding do + to_pad.args <- to_pad.args @ [fresh_var false] + done + + +let pad_args2 (fi, tlr : finfo * tau list ref) : unit = + let padding = ref (List.length fi.args - List.length !tlr) + in + if !padding == 0 then () + else + if !padding > 0 then + for i = 1 to !padding do + tlr := !tlr @ [fresh_var false] + done + else + begin + padding := -(!padding); + for i = 1 to !padding do + fi.args <- fi.args @ [fresh_var false] + done + end + +(***********************************************************************) +(* *) +(* Constraint Generation/ Resolution *) +(* *) +(***********************************************************************) + + +(** Make the type a global type *) +let set_global (t : tau) (b : bool) : unit = + let set_global_down t = + match find t with + Var v -> v.v_global <- true + | Ref r -> set_global_label r.rl true + | Fun f -> set_global_label f.fl true + | _ -> () + in + if !debug && b then Printf.printf "Set global: %s\n" (string_of_tau t); + assert ((not (get_global t)) || b); + if b then iter_tau set_global_down t; + match find t with + Var v -> v.v_global <- b + | Ref r -> r.r_global <- b + | Pair p -> p.p_global <- b + | Fun f -> f.f_global <- b + + +let rec unify_int (t, t' : tau * tau) : unit = + if equal_tau t t' then () + else + let ti, ti' = find t, find t' in + U.unify combine (t, t'); + match ti, ti' with + Var v, Var v' -> + set_global t' (v.v_global || get_global t'); + merge_vholes (v, v'); + merge_vlbs (v, v'); + merge_vubs (v, v') + | Var v, _ -> + set_global t' (v.v_global || get_global t'); + trigger_vhole v t'; + notify_vlbs t v; + notify_vubs t v + | _, Var v -> + set_global t (v.v_global || get_global t); + trigger_vhole v t; + notify_vlbs t' v; + notify_vubs t' v + | Ref r, Ref r' -> + set_global t (r.r_global || r'.r_global); + unify_ref (r, r') + | Fun f, Fun f' -> + set_global t (f.f_global || f'.f_global); + unify_fun (f, f') + | Pair p, Pair p' -> () + | _ -> raise Inconsistent +and notify_vlbs (t : tau) (vi : vinfo) : unit = + let notify p bounds = + List.iter + (fun b -> + add_constraint (Unification (b.info,copy_toplevel t)); + add_constraint (Leq (b.info, (b.index, p), t))) + bounds + in + notify Sub (B.elements vi.v_mlbs); + notify Pos (B.elements vi.v_plbs); + notify Neg (B.elements vi.v_nlbs) +and notify_vubs (t : tau) (vi : vinfo) : unit = + let notify p bounds = + List.iter + (fun b -> + add_constraint (Unification (b.info,copy_toplevel t)); + add_constraint (Leq (t, (b.index, p), b.info))) + bounds + in + notify Sub (B.elements vi.v_mubs); + notify Pos (B.elements vi.v_pubs); + notify Neg (B.elements vi.v_nubs) +and unify_ref (ri,ri' : rinfo * rinfo) : unit = + add_constraint (Unification (ri.points_to, ri'.points_to)) +and unify_fun (fi, fi' : finfo * finfo) : unit = + let rec union_args = function + _, [] -> false + | [], _ -> true + | h :: t, h' :: t' -> + add_constraint (Unification (h, h')); + union_args(t, t') + in + unify_label(fi.fl, fi'.fl); + add_constraint (Unification (fi.ret, fi'.ret)); + if union_args (fi.args, fi'.args) then fi.args <- fi'.args; +and unify_label (l, l' : label * label) : unit = + let pick_name (li, li' : lblinfo * lblinfo) = + if String.length li.l_name > 1 && String.sub (li.l_name) 0 2 = "l_" then + li.l_name <- li'.l_name + else () + in + let combine_label (li, li' : lblinfo *lblinfo) : lblinfo = + let rm_self b = not (li.l_stamp = get_label_stamp b.info) + in + pick_name (li, li'); + li.l_global <- li.l_global || li'.l_global; + li.aliases <- C.union li.aliases li'.aliases; + li.p_ubounds <- B.union li.p_ubounds li'.p_ubounds; + li.p_lbounds <- B.union li.p_lbounds li'.p_lbounds; + li.n_ubounds <- B.union li.n_ubounds li'.n_ubounds; + li.n_lbounds <- B.union li.n_lbounds li'.n_lbounds; + li.m_ubounds <- B.union li.m_ubounds (B.filter rm_self li'.m_ubounds); + li.m_lbounds <- B.union li.m_lbounds (B.filter rm_self li'.m_lbounds); + li.m_upath <- P.union li.m_upath li'.m_upath; + li.m_lpath<- P.union li.m_lpath li'.m_lpath; + li.n_upath <- P.union li.n_upath li'.n_upath; + li.n_lpath <- P.union li.n_lpath li'.n_lpath; + li.p_upath <- P.union li.p_upath li'.p_upath; + li.p_lpath <- P.union li.p_lpath li'.p_lpath; + li.l_seeded <- li.l_seeded || li'.l_seeded; + li.l_ret <- li.l_ret || li'.l_ret; + li.l_param <- li.l_param || li'.l_param; + li + in + if !debug_constraints then + Printf.printf "%s == %s\n" (string_of_label l) (string_of_label l'); + U.unify combine_label (l, l') +and merge_vholes (vi, vi' : vinfo * vinfo) : unit = + H.iter + (fun i -> fun _ -> H.replace vi'.v_hole i ()) + vi.v_hole +and merge_vlbs (vi, vi' : vinfo * vinfo) : unit = + vi'.v_mlbs <- B.union vi.v_mlbs vi'.v_mlbs; + vi'.v_plbs <- B.union vi.v_plbs vi'.v_plbs; + vi'.v_nlbs <- B.union vi.v_nlbs vi'.v_nlbs +and merge_vubs (vi, vi' : vinfo * vinfo) : unit = + vi'.v_mubs <- B.union vi.v_mubs vi'.v_mubs; + vi'.v_pubs <- B.union vi.v_pubs vi'.v_pubs; + vi'.v_nubs <- B.union vi.v_nubs vi'.v_nubs +and trigger_vhole (vi : vinfo) (t : tau) = + let add_self_loops (t : tau) : unit = + match find t with + Var v -> + H.iter + (fun i -> fun _ -> H.replace v.v_hole i ()) + vi.v_hole + | Ref r -> + H.iter + (fun i -> fun _ -> + leq_label (r.rl, (i, Pos), r.rl); + leq_label (r.rl, (i, Neg), r.rl)) + vi.v_hole + | Fun f -> + H.iter + (fun i -> fun _ -> + leq_label (f.fl, (i, Pos), f.fl); + leq_label (f.fl, (i, Neg), f.fl)) + vi.v_hole + | _ -> () + in + iter_tau add_self_loops t +(** Pick the representative info for two tinfo's. This function prefers the + first argument when both arguments are the same structure, but when + one type is a structure and the other is a var, it picks the structure. + All other actions (e.g., updating the info) is done in unify_int *) +and combine (ti, ti' : tinfo * tinfo) : tinfo = + match ti, ti' with + Var _, _ -> ti' + | _, _ -> ti +and leq_int (t, (i, p), t') : unit = + if equal_tau t t' then () + else + let ti, ti' = find t, find t' in + match ti, ti' with + Var v, Var v' -> + begin + match p with + Pos -> + v.v_pubs <- B.add (make_tau_bound (i, t')) v.v_pubs; + v'.v_plbs <- B.add (make_tau_bound (i, t)) v'.v_plbs + | Neg -> + v.v_nubs <- B.add (make_tau_bound (i, t')) v.v_nubs; + v'.v_nlbs <- B.add (make_tau_bound (i, t)) v'.v_nlbs + | Sub -> + v.v_mubs <- B.add (make_tau_bound (i, t')) v.v_mubs; + v'.v_mlbs <- B.add (make_tau_bound (i, t)) v'.v_mlbs + end + | Var v, _ -> + add_constraint (Unification (t, copy_toplevel t')); + add_constraint (Leq (t, (i, p), t')) + | _, Var v -> + add_constraint (Unification (t', copy_toplevel t)); + add_constraint (Leq (t, (i, p), t')) + | Ref r, Ref r' -> leq_ref (r, (i, p), r') + | Fun f, Fun f' -> add_constraint (Unification (t, t')) + | Pair pr, Pair pr' -> + add_constraint (Leq (pr.ptr, (i, p), pr'.ptr)); + add_constraint (Leq (pr.lam, (i, p), pr'.lam)) + | _ -> raise Inconsistent +and leq_ref (ri, (i, p), ri') : unit = + let add_self_loops (t : tau) : unit = + match find t with + Var v -> H.replace v.v_hole i () + | Ref r -> + leq_label (r.rl, (i, Pos), r.rl); + leq_label (r.rl, (i, Neg), r.rl) + | Fun f -> + leq_label (f.fl, (i, Pos), f.fl); + leq_label (f.fl, (i, Neg), f.fl) + | _ -> () + in + iter_tau add_self_loops ri.points_to; + add_constraint (Unification (ri.points_to, ri'.points_to)); + leq_label(ri.rl, (i, p), ri'.rl) +and leq_label (l,(i, p), l') : unit = + if !debug_constraints then + Printf.printf + "%s <={%d,%s} %s\n" + (string_of_label l) i (string_of_polarity p) (string_of_label l'); + let li, li' = find l, find l' in + match p with + Pos -> + li.l_ret <- true; + li.p_ubounds <- B.add (make_bound (i, l')) li.p_ubounds; + li'.p_lbounds <- B.add (make_bound (i, l)) li'.p_lbounds + | Neg -> + li'.l_param <- true; + li.n_ubounds <- B.add (make_bound (i, l')) li.n_ubounds; + li'.n_lbounds <- B.add (make_bound (i, l)) li'.n_lbounds + | Sub -> + if U.equal (l, l') then () + else + begin + li.m_ubounds <- B.add (make_bound(0, l')) li.m_ubounds; + li'.m_lbounds <- B.add (make_bound(0, l)) li'.m_lbounds + end +and add_constraint_int (c : tconstraint) (toplev : bool) = + if !debug_constraints && toplev then + begin + Printf.printf "%d:>" !toplev_count; + print_constraint c; + incr toplev_count + end + else + if !debug_constraints then print_constraint c else (); + begin + match c with + Unification _ -> Q.add c eq_worklist + | Leq _ -> Q.add c leq_worklist + end; + solve_constraints () +and add_constraint (c : tconstraint) = + add_constraint_int c false +and add_toplev_constraint (c : tconstraint) = + if !print_constraints && not !debug_constraints then + begin + Printf.printf "%d:>" !toplev_count; + incr toplev_count; + print_constraint c + end + else (); + add_constraint_int c true +and fetch_constraint () : tconstraint option = + try Some (Q.take eq_worklist) + with Q.Empty -> (try Some (Q.take leq_worklist) + with Q.Empty -> None) +(** The main solver loop. *) +and solve_constraints () : unit = + match fetch_constraint () with + Some c -> + begin + match c with + Unification (t, t') -> unify_int (t, t') + | Leq (t, (i, p), t') -> + if !no_sub then unify_int (t, t') + else + if !analyze_mono then leq_int (t, (0, Sub), t') + else leq_int (t, (i, p), t') + end; + solve_constraints () + | None -> () + + +(***********************************************************************) +(* *) +(* Interface Functions *) +(* *) +(***********************************************************************) + +(** Return the contents of the lvalue. *) +let rvalue (lv : lvalue) : tau = + lv.contents + +(** Dereference the rvalue. If it does not have enough structure to support + the operation, then the correct structure is added via new unification + constraints. *) +let rec deref (t : tau) : lvalue = + match U.deref t with + Pair p -> + begin + match U.deref p.ptr with + Var _ -> + let is_global = global_tau p.ptr in + let points_to = fresh_var is_global in + let l = fresh_label is_global in + let r = make_ref (l, points_to) + in + add_toplev_constraint (Unification (p.ptr, r)); + make_lval (l, points_to) + | Ref r -> make_lval (r.rl, r.points_to) + | _ -> raise WellFormed + end + | Var v -> + let is_global = global_tau t in + add_toplev_constraint + (Unification (t, make_pair (fresh_var is_global, + fresh_var is_global))); + deref t + | _ -> raise WellFormed + +(** Form the union of [t] and [t'], if it doesn't exist already. *) +let join (t : tau) (t' : tau) : tau = + try H.find join_cache (get_stamp t, get_stamp t') + with Not_found -> + let t'' = fresh_var false in + add_toplev_constraint (Leq (t, (0, Sub), t'')); + add_toplev_constraint (Leq (t', (0, Sub), t'')); + H.add join_cache (get_stamp t, get_stamp t') t''; + t'' + +(** Form the union of a list [tl], expected to be the initializers of some + structure or array type. *) +let join_inits (tl : tau list) : tau = + let t' = fresh_var false in + List.iter + (fun t -> add_toplev_constraint (Leq (t, (0, Sub), t'))) + tl; + t' + +(** Take the address of an lvalue. Does not add constraints. *) +let address (lv : lvalue) : tau = + make_pair (make_ref (lv.l, lv.contents), fresh_var false) + +(** For this version of golf, instantiation is handled at [apply] *) +let instantiate (lv : lvalue) (i : int) : lvalue = + lv + +(** Constraint generated from assigning [t] to [lv]. *) +let assign (lv : lvalue) (t : tau) : unit = + add_toplev_constraint (Leq (t, (0, Sub), lv.contents)) + +let assign_ret (i : int) (lv : lvalue) (t : tau) : unit = + add_toplev_constraint (Leq (t, (i, Pos), lv.contents)) + +(** Project out the first (ref) component or a pair. If the argument [t] has + no discovered structure, raise NoContents. *) +let proj_ref (t : tau) : tau = + match U.deref t with + Pair p -> p.ptr + | Var v -> raise NoContents + | _ -> raise WellFormed + +(* Project out the second (fun) component of a pair. If the argument [t] has + no discovered structure, create it on the fly by adding constraints. *) +let proj_fun (t : tau) : tau = + match U.deref t with + Pair p -> p.lam + | Var v -> + let p, f = fresh_var false, fresh_var false in + add_toplev_constraint (Unification (t, make_pair(p, f))); + f + | _ -> raise WellFormed + +let get_args (t : tau) : tau list = + match U.deref t with + Fun f -> f.args + | _ -> raise WellFormed + +let get_finfo (t : tau) : finfo = + match U.deref t with + Fun f -> f + | _ -> raise WellFormed + +(** Function type [t] is applied to the arguments [actuals]. Unifies the + actuals with the formals of [t]. If no functions have been discovered for + [t] yet, create a fresh one and unify it with t. The result is the return + value of the function plus the index of this application site. *) +let apply (t : tau) (al : tau list) : (tau * int) = + let i = fresh_appsite () in + let f = proj_fun t in + let actuals = ref al in + let fi,ret = + match U.deref f with + Fun fi -> fi, fi.ret + | Var v -> + let new_l, new_ret, new_args = + fresh_label false, fresh_var false, + List.map (function _ -> fresh_var false) !actuals + in + let new_fun = make_fun (new_l, new_args, new_ret) in + add_toplev_constraint (Unification (new_fun, f)); + (get_finfo new_fun, new_ret) + | _ -> raise WellFormed + in + pad_args2 (fi, actuals); + List.iter2 + (fun actual -> fun formal -> + add_toplev_constraint (Leq (actual,(i, Neg), formal))) + !actuals fi.args; + (ret, i) + +(** Create a new function type with name [name], list of formal arguments + [formals], and return value [ret]. Adds no constraints. *) +let make_function (name : string) (formals : lvalue list) (ret : tau) : tau = + let f = make_fun (make_label false name None, + List.map (fun x -> rvalue x) formals, + ret) + in + make_pair (fresh_var false, f) + +(** Create an lvalue. If [is_global] is true, the lvalue will be treated + monomorphically. *) +let make_lvalue (is_global : bool) (name : string) (vio : Cil.varinfo option) : lvalue = + if !debug && is_global then + Printf.printf "Making global lvalue : %s\n" name + else (); + make_lval (make_label is_global name vio, make_var is_global name) + +(** Create a fresh non-global named variable. *) +let make_fresh (name : string) : tau = + make_var false name + +(** The default type for constants. *) +let bottom () : tau = + make_var false "bottom" + +(** Unify the result of a function with its return value. *) +let return (t : tau) (t' : tau) = + add_toplev_constraint (Leq (t', (0, Sub), t)) + +(***********************************************************************) +(* *) +(* Query/Extract Solutions *) +(* *) +(***********************************************************************) + +let make_summary = leq_label + +let path_signature k l l' b : int list = + let ksig = + match k with + Positive -> 1 + | Negative -> 2 + | _ -> 3 + in + [ksig; + get_label_stamp l; + get_label_stamp l'; + if b then 1 else 0] + +let make_path (k, l, l', b) = + let psig = path_signature k l l' b in + if PH.mem path_hash psig then () + else + let new_path = {kind = k; head = l; tail = l'; reached_global = b} + and li, li' = find l, find l' in + PH.add path_hash psig new_path; + Q.add new_path path_worklist; + begin + match k with + Positive -> + li.p_upath <- P.add new_path li.p_upath; + li'.p_lpath <- P.add new_path li'.p_lpath + | Negative -> + li.n_upath <- P.add new_path li.n_upath; + li'.n_lpath <- P.add new_path li'.n_lpath + | _ -> + li.m_upath <- P.add new_path li.m_upath; + li'.m_lpath <- P.add new_path li'.m_lpath + end; + if !debug then + begin + print_string "Discovered path : "; + print_path new_path; + print_newline () + end + +let backwards_tabulate (l : label) : unit = + let rec loop () = + let rule1 p = + if !debug then print_endline "rule1"; + B.iter + (fun lb -> + make_path (Match, lb.info, p.tail, + p.reached_global || is_global_label p.head)) + (find p.head).m_lbounds + and rule2 p = + if !debug then print_endline "rule2"; + B.iter + (fun lb -> + make_path (Negative, lb.info, p.tail, + p.reached_global || is_global_label p.head)) + (find p.head).n_lbounds + and rule2m p = + if !debug then print_endline "rule2m"; + B.iter + (fun lb -> + make_path (Match, lb.info, p.tail, + p.reached_global || is_global_label p.head)) + (find p.head).n_lbounds + and rule3 p = + if !debug then print_endline "rule3"; + B.iter + (fun lb -> + make_path (Positive, lb.info, p.tail, + p.reached_global || is_global_label p.head)) + (find p.head).p_lbounds + and rule4 p = + if !debug then print_endline "rule4"; + B.iter + (fun lb -> + make_path(Negative, lb.info, p.tail, + p.reached_global || is_global_label p.head)) + (find p.head).m_lbounds + and rule5 p = + if !debug then print_endline "rule5"; + B.iter + (fun lb -> + make_path (Positive, lb.info, p.tail, + p.reached_global || is_global_label p.head)) + (find p.head).m_lbounds + and rule6 p = + if !debug then print_endline "rule6"; + B.iter + (fun lb -> + if is_seeded_label lb.info then () + else + begin + (find lb.info).l_seeded <- true; (* set seeded *) + make_path (Seed, lb.info, lb.info, + is_global_label lb.info) + end) + (find p.head).p_lbounds + and rule7 p = + if !debug then print_endline "rule7"; + if not (is_ret_label p.tail && is_param_label p.head) then () + else + B.iter + (fun lb -> + B.iter + (fun ub -> + if lb.index = ub.index then + begin + if !debug then + Printf.printf "New summary : %s %s\n" + (string_of_label lb.info) + (string_of_label ub.info); + make_summary (lb.info, (0, Sub), ub.info); + (* rules 1, 4, and 5 *) + P.iter + (fun ubp -> (* rule 1 *) + make_path (Match, lb.info, ubp.tail, + ubp.reached_global)) + (find ub.info).m_upath; + P.iter + (fun ubp -> (* rule 4 *) + make_path (Negative, lb.info, ubp.tail, + ubp.reached_global)) + (find ub.info).n_upath; + P.iter + (fun ubp -> (* rule 5 *) + make_path (Positive, lb.info, ubp.tail, + ubp.reached_global)) + (find ub.info).p_upath + end) + (find p.tail).p_ubounds) + (find p.head).n_lbounds + in + let matched_backward_rules p = + rule1 p; + if p.reached_global then rule2m p else rule2 p; + rule3 p; + rule6 p; + rule7 p + and negative_backward_rules p = + rule2 p; + rule3 p; + rule4 p; + rule6 p; + rule7 p + and positive_backward_rules p = + rule3 p; + rule5 p; + rule6 p; + rule7 p + in (* loop *) + if Q.is_empty path_worklist then () + else + let p = Q.take path_worklist in + if !debug then + begin + print_string "Processing path: "; + print_path p; + print_newline () + end; + begin + match p.kind with + Positive -> + if is_global_label p.tail then matched_backward_rules p + else positive_backward_rules p + | Negative -> negative_backward_rules p + | _ -> matched_backward_rules p + end; + loop () + in (* backwards_tabulate *) + if !debug then + begin + Printf.printf "Tabulating for %s..." (string_of_label l); + if is_global_label l then print_string "(global)"; + print_newline () + end; + make_path (Seed, l, l, is_global_label l); + loop () + +let collect_ptsets (l : label) : constantset = (* todo -- cache aliases *) + let li = find l + and collect init s = + P.fold (fun x a -> C.union a (find x.head).aliases) s init + in + backwards_tabulate l; + collect (collect (collect li.aliases li.m_lpath) li.n_lpath) li.p_lpath + +let extract_ptlabel (lv : lvalue) : label option = + try + match find (proj_ref lv.contents) with + Var v -> None + | Ref r -> Some r.rl; + | _ -> raise WellFormed + with NoContents -> None + +let points_to_aux (t : tau) : constant list = + try + match find (proj_ref t) with + Var v -> [] + | Ref r -> C.elements (collect_ptsets r.rl) + | _ -> raise WellFormed + with NoContents -> [] + +let points_to_names (lv : lvalue) : string list = + List.map (fun (_, str, _) -> str) (points_to_aux lv.contents) + +let points_to (lv : lvalue) : Cil.varinfo list = + let rec get_vinfos l : Cil.varinfo list = match l with + | (_, _, h) :: t -> h :: get_vinfos t + | [] -> [] + in + get_vinfos (points_to_aux lv.contents) + +let epoints_to (t : tau) : Cil.varinfo list = + let rec get_vinfos l : Cil.varinfo list = match l with + | (_, _, h) :: t -> h :: get_vinfos t + | [] -> [] + in + get_vinfos (points_to_aux t) + +let smart_alias_query (l : label) (l' : label) : bool = + (* Set of dead configurations *) + let dead_configs : config_map = CH.create 16 in + (* the set of discovered configurations *) + let discovered : config_map = CH.create 16 in + let rec filter_match (i : int) = + B.filter (fun (b : lblinfo bound) -> i = b.index) + in + let rec simulate c l l' = + let config = (c, get_label_stamp l, get_label_stamp l') in + if U.equal (l, l') then + begin + if !debug then + Printf.printf + "%s and %s are aliased\n" + (string_of_label l) + (string_of_label l'); + raise APFound + end + else if CH.mem discovered config then () + else + begin + if !debug_aliases then + Printf.printf + "Exploring configuration %s\n" + (string_of_configuration config); + CH.add discovered config (); + B.iter + (fun lb -> simulate c lb.info l') + (get_bounds Sub false l); (* epsilon closure of l *) + B.iter + (fun lb -> simulate c l lb.info) + (get_bounds Sub false l'); (* epsilon closure of l' *) + B.iter + (fun lb -> + let matching = + filter_match lb.index (get_bounds Pos false l') + in + B.iter + (fun b -> simulate Closed lb.info b.info) + matching; + if is_global_label l' then (* positive self-loops on l' *) + simulate Closed lb.info l') + (get_bounds Pos false l); (* positive transitions on l *) + if is_global_label l then + B.iter + (fun lb -> simulate Closed l lb.info) + (get_bounds Pos false l'); (* positive self-loops on l *) + begin + match c with (* negative transitions on l, only if Open *) + Open -> + B.iter + (fun lb -> + let matching = + filter_match lb.index (get_bounds Neg false l') + in + B.iter + (fun b -> simulate Open lb.info b.info) + matching ; + if is_global_label l' then (* neg self-loops on l' *) + simulate Open lb.info l') + (get_bounds Neg false l); + if is_global_label l then + B.iter + (fun lb -> simulate Open l lb.info) + (get_bounds Neg false l') (* negative self-loops on l *) + | _ -> () + end; + (* if we got this far, then the configuration was not used *) + CH.add dead_configs config (); + end + in + try + begin + if H.mem cached_aliases (get_label_stamp l, get_label_stamp l') then + true + else + begin + simulate Open l l'; + if !debug then + Printf.printf + "%s and %s are NOT aliased\n" + (string_of_label l) + (string_of_label l'); + false + end + end + with APFound -> + CH.iter + (fun config -> fun _ -> + if not (CH.mem dead_configs config) then + H.add + cached_aliases + (get_label_stamp l, get_label_stamp l') + ()) + discovered; + true + +(** todo : uses naive alias query for now *) +let may_alias (t1 : tau) (t2 : tau) : bool = + try + let l1 = + match find (proj_ref t1) with + Ref r -> r.rl + | Var v -> raise NoContents + | _ -> raise WellFormed + and l2 = + match find (proj_ref t2) with + Ref r -> r.rl + | Var v -> raise NoContents + | _ -> raise WellFormed + in + not (C.is_empty (C.inter (collect_ptsets l1) (collect_ptsets l2))) + with NoContents -> false + +let alias_query (b : bool) (lvl : lvalue list) : int * int = + let naive_count = ref 0 in + let smart_count = ref 0 in + let lbls = List.map extract_ptlabel lvl in (* label option list *) + let ptsets = + List.map + (function + Some l -> collect_ptsets l + | None -> C.empty) + lbls in + let record_alias s lo s' lo' = + match lo, lo' with + Some l, Some l' -> + if !debug_aliases then + Printf.printf + "Checking whether %s and %s are aliased...\n" + (string_of_label l) + (string_of_label l'); + if C.is_empty (C.inter s s') then () + else + begin + incr naive_count; + if !smart_aliases && smart_alias_query l l' then + incr smart_count + end + | _ -> () + in + let rec check_alias sets labels = + match sets,labels with + s :: st, l :: lt -> + List.iter2 (record_alias s l) ptsets lbls; + check_alias st lt + | [], [] -> () + | _ -> die "check_alias" + in + check_alias ptsets lbls; + (!naive_count, !smart_count) + +let alias_frequency (lvl : (lvalue * bool) list) : int * int = + let extract_lbl (lv, b : lvalue * bool) = (lv.l, b) in + let naive_count = ref 0 in + let smart_count = ref 0 in + let lbls = List.map extract_lbl lvl in + let ptsets = + List.map + (fun (lbl, b) -> + if b then (find lbl).loc (* symbol access *) + else collect_ptsets lbl) + lbls in + let record_alias s (l, b) s' (l', b') = + if !debug_aliases then + Printf.printf + "Checking whether %s and %s are aliased...\n" + (string_of_label l) + (string_of_label l'); + if C.is_empty (C.inter s s') then () + else + begin + if !debug_aliases then + Printf.printf + "%s and %s are aliased naively...\n" + (string_of_label l) + (string_of_label l'); + incr naive_count; + if !smart_aliases then + if b || b' || smart_alias_query l l' then incr smart_count + else + Printf.printf + "%s and %s are not aliased by smart queries...\n" + (string_of_label l) + (string_of_label l'); + end + in + let rec check_alias sets labels = + match sets, labels with + s :: st, l :: lt -> + List.iter2 (record_alias s l) ptsets lbls; + check_alias st lt + | [], [] -> () + | _ -> die "check_alias" + in + check_alias ptsets lbls; + (!naive_count, !smart_count) + + +(** an interface for extracting abstract locations from this analysis *) + +type absloc = label + +let absloc_of_lvalue (l : lvalue) : absloc = l.l +let absloc_eq (a1, a2) = smart_alias_query a1 a2 +let absloc_print_name = ref true +let d_absloc () (p : absloc) = + let a = find p in + if !absloc_print_name then Pretty.dprintf "%s" a.l_name + else Pretty.dprintf "%d" a.l_stamp + +let phonyAddrOf (lv : lvalue) : lvalue = + make_lval (fresh_label true, address lv) + +(* transitive closure of points to, starting from l *) +let rec tauPointsTo (l : tau) : absloc list = + match find l with + Var _ -> [] + | Ref r -> r.rl :: tauPointsTo r.points_to + | _ -> [] + +let rec absloc_points_to (l : lvalue) : absloc list = + tauPointsTo l.contents + + +(** The following definitions are only introduced for the + compatability with Olf. *) + +exception UnknownLocation + +let finished_constraints () = () +let apply_undefined (_ : tau list) = (fresh_var true, 0) +let assign_undefined (_ : lvalue) = () + +let absloc_epoints_to = tauPointsTo diff --git a/cil/src/ext/pta/golf.mli b/cil/src/ext/pta/golf.mli new file mode 100644 index 00000000..569855c5 --- /dev/null +++ b/cil/src/ext/pta/golf.mli @@ -0,0 +1,83 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +type lvalue +type tau +type absloc + +(* only for compatability with Olf *) +exception UnknownLocation + +val debug : bool ref +val debug_constraints : bool ref +val debug_aliases : bool ref +val smart_aliases : bool ref +val finished_constraints : unit -> unit (* only for compatability with Olf *) +val print_constraints : bool ref +val no_flow : bool ref +val no_sub : bool ref +val analyze_mono : bool ref +val solve_constraints : unit -> unit +val rvalue : lvalue -> tau +val deref : tau -> lvalue +val join : tau -> tau -> tau +val join_inits : tau list -> tau +val address : lvalue -> tau +val instantiate : lvalue -> int -> lvalue +val assign : lvalue -> tau -> unit +val assign_ret : int -> lvalue -> tau -> unit +val apply : tau -> tau list -> (tau * int) +val apply_undefined : tau list -> (tau * int) (* only for compatability with Olf *) +val assign_undefined : lvalue -> unit (* only for compatability with Olf *) +val make_function : string -> lvalue list -> tau -> tau +val make_lvalue : bool -> string -> (Cil.varinfo option) -> lvalue +val bottom : unit -> tau +val return : tau -> tau -> unit +val make_fresh : string -> tau +val points_to_names : lvalue -> string list +val points_to : lvalue -> Cil.varinfo list +val epoints_to : tau -> Cil.varinfo list +val string_of_lvalue : lvalue -> string +val global_lvalue : lvalue -> bool +val alias_query : bool -> lvalue list -> int * int +val alias_frequency : (lvalue * bool) list -> int * int + +val may_alias : tau -> tau -> bool + +val absloc_points_to : lvalue -> absloc list +val absloc_epoints_to : tau -> absloc list +val absloc_of_lvalue : lvalue -> absloc +val absloc_eq : (absloc * absloc) -> bool +val d_absloc : unit -> absloc -> Pretty.doc +val phonyAddrOf : lvalue -> lvalue diff --git a/cil/src/ext/pta/olf.ml b/cil/src/ext/pta/olf.ml new file mode 100644 index 00000000..0d770028 --- /dev/null +++ b/cil/src/ext/pta/olf.ml @@ -0,0 +1,1108 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(***********************************************************************) +(* *) +(* Exceptions *) +(* *) +(***********************************************************************) + +exception Inconsistent (* raised if constraint system is inconsistent *) +exception WellFormed (* raised if types are not well-formed *) +exception NoContents +exception APFound (* raised if an alias pair is found, a control + flow exception *) +exception ReachedTop (* raised if top (from an undefined function) + flows to a c_absloc during the flow step *) +exception UnknownLocation + +let solve_constraints () = () (* only for compatability with Golf *) + +open Cil + +module U = Uref +module S = Setp +module H = Hashtbl +module Q = Queue + +(** Generic bounds *) +type 'a bound = {info : 'a U.uref} + +module Bound = +struct + type 'a t = 'a bound + let compare (x : 'a t) (y : 'a t) = + Pervasives.compare (U.deref x.info) (U.deref y.info) +end + +module B = S.Make (Bound) + +type 'a boundset = 'a B.t + +(** Abslocs, which identify elements in points-to sets *) +(** jk : I'd prefer to make this an 'a absloc and specialize it to + varinfo for use with the Cil frontend, but for now, this will do *) +type absloc = int * string * Cil.varinfo option + +module Absloc = +struct + type t = absloc + let compare (xid, _, _) (yid, _, _) = xid - yid +end + +module C = Set.Make (Absloc) + +(** Sets of abslocs. Set union is used when two c_abslocs containing + absloc sets are unified *) +type abslocset = C.t + +let d_absloc () (a: absloc) : Pretty.doc = + let i,s,_ = a in + Pretty.dprintf "<%d, %s>" i s + +type c_abslocinfo = { + mutable l_name: string; (** name of the location *) + loc : absloc; + l_stamp : int; + mutable l_top : bool; + mutable aliases : abslocset; + mutable lbounds : c_abslocinfo boundset; + mutable ubounds : c_abslocinfo boundset; + mutable flow_computed : bool +} +and c_absloc = c_abslocinfo U.uref + +(** The type of lvalues. *) +type lvalue = { + l: c_absloc; + contents: tau +} +and vinfo = { + v_stamp : int; + v_name : string; + mutable v_top : bool; + mutable v_lbounds : tinfo boundset; + mutable v_ubounds : tinfo boundset +} +and rinfo = { + r_stamp : int; + rl : c_absloc; + points_to : tau +} +and finfo = { + f_stamp : int; + fl : c_absloc; + ret : tau; + mutable args : tau list +} +and pinfo = { + p_stamp : int; + ptr : tau; + lam : tau +} +and tinfo = + Var of vinfo + | Ref of rinfo + | Fun of finfo + | Pair of pinfo +and tau = tinfo U.uref + +type tconstraint = + Unification of tau * tau + | Leq of tau * tau + +(** Association lists, used for printing recursive types. The first + element is a type that has been visited. The second element is the + string representation of that type (so far). If the string option is + set, then this type occurs within itself, and is associated with the + recursive var name stored in the option. When walking a type, add it + to an association list. + + Example: suppose we have the constraint 'a = ref('a). The type is + unified via cyclic unification, and would loop infinitely if we + attempted to print it. What we want to do is print the type u + rv. ref(rv). This is accomplished in the following manner: + + -- ref('a) is visited. It is not in the association list, so it is + added and the string "ref(" is stored in the second element. We + recurse to print the first argument of the constructor. + + -- In the recursive call, we see that 'a (or ref('a)) is already + in the association list, so the type is recursive. We check the + string option, which is None, meaning that this is the first + recurrence of the type. We create a new recursive variable, rv and + set the string option to 'rv. Next, we prepend u rv. to the string + representation we have seen before, "ref(", and return "rv" as the + string representation of this type. + + -- The string so far is "u rv.ref(". The recursive call returns, + and we complete the type by printing the result of the call, "rv", + and ")" + + In a type where the recursive variable appears twice, e.g. 'a = + pair('a,'a), the second time we hit 'a, the string option will be + set, so we know to reuse the same recursive variable name. +*) +type association = tau * string ref * string option ref + +(** The current state of the solver engine either adding more + constraints, or finished adding constraints and querying graph *) +type state = + AddingConstraints + | FinishedConstraints + +(***********************************************************************) +(* *) +(* Global Variables *) +(* *) +(***********************************************************************) + +(** A count of the constraints introduced from the AST. Used for + debugging. *) +let toplev_count = ref 0 + +let solver_state : state ref = ref AddingConstraints + +(** Print the instantiations constraints. *) +let print_constraints : bool ref = ref false + +(** If true, print all constraints (including induced) and show + additional debug output. *) +let debug = ref false + +(** Just debug all the constraints (including induced) *) +let debug_constraints = ref false + +(** Debug the flow step *) +let debug_flow_step = ref false + +(** Compatibility with GOLF *) +let debug_aliases = ref false +let smart_aliases = ref false +let no_flow = ref false +let analyze_mono = ref false + +(** If true, disable subtyping (unification at all levels) *) +let no_sub = ref false + +(** A list of equality constraints. *) +let eq_worklist : tconstraint Q.t = Q.create () + +(** A list of leq constraints. *) +let leq_worklist : tconstraint Q.t = Q.create () + +(** A hashtable containing stamp pairs of c_abslocs that must be aliased. *) +let cached_aliases : (int * int, unit) H.t = H.create 64 + +(** A hashtable mapping pairs of tau's to their join node. *) +let join_cache : (int * int, tau) H.t = H.create 64 + +(** *) +let label_prefix = "l_" + + +(***********************************************************************) +(* *) +(* Utility Functions *) +(* *) +(***********************************************************************) + +let starts_with s p = + let n = String.length p in + if String.length s < n then false + else String.sub s 0 n = p + + +let die s = + Printf.printf "*******\nAssertion failed: %s\n*******\n" s; + assert false + +let insist b s = + if not b then die s else () + + +let can_add_constraints () = + !solver_state = AddingConstraints + +let can_query_graph () = + !solver_state = FinishedConstraints + +let finished_constraints () = + insist (!solver_state = AddingConstraints) "inconsistent states"; + solver_state := FinishedConstraints + +let find = U.deref + +(** return the prefix of the list up to and including the first + element satisfying p. if no element satisfies p, return the empty + list *) +let rec keep_until p l = + match l with + [] -> [] + | x :: xs -> if p x then [x] else x :: keep_until p xs + + +(** Generate a unique integer. *) +let fresh_index : (unit -> int) = + let counter = ref 0 in + fun () -> + incr counter; + !counter + +let fresh_stamp : (unit -> int) = + let stamp = ref 0 in + fun () -> + incr stamp; + !stamp + +(** Return a unique integer representation of a tau *) +let get_stamp (t : tau) : int = + match find t with + Var v -> v.v_stamp + | Ref r -> r.r_stamp + | Pair p -> p.p_stamp + | Fun f -> f.f_stamp + +(** Consistency checks for inferred types *) +let pair_or_var (t : tau) = + match find t with + Pair _ -> true + | Var _ -> true + | _ -> false + +let ref_or_var (t : tau) = + match find t with + Ref _ -> true + | Var _ -> true + | _ -> false + +let fun_or_var (t : tau) = + match find t with + Fun _ -> true + | Var _ -> true + | _ -> false + + +(** Apply [f] structurally down [t]. Guaranteed to terminate, even if [t] + is recursive *) +let iter_tau f t = + let visited : (int, tau) H.t = H.create 4 in + let rec iter_tau' t = + if H.mem visited (get_stamp t) then () else + begin + f t; + H.add visited (get_stamp t) t; + match find t with + Pair p -> + iter_tau' p.ptr; + iter_tau' p.lam + | Fun f -> + List.iter iter_tau' f.args; + iter_tau' f.ret; + | Ref r -> iter_tau' r.points_to + | _ -> () + end + in + iter_tau' t + +let equal_absloc = function + (i, _, _), (i', _, _) -> i = i' + +let equal_c_absloc l l' = + (find l).l_stamp = (find l').l_stamp + +let equal_tau (t : tau) (t' : tau) = + get_stamp t = get_stamp t' + +let top_c_absloc l = + (find l).l_top + +let get_flow_computed l = + (find l).flow_computed + +let set_flow_computed l = + (find l).flow_computed <- true + +let rec top_tau (t : tau) = + match find t with + Pair p -> top_tau p.ptr || top_tau p.lam + | Ref r -> top_c_absloc r.rl + | Fun f -> top_c_absloc f.fl + | Var v -> v.v_top + +let get_c_absloc_stamp (l : c_absloc) : int = + (find l).l_stamp + +let set_top_c_absloc (l : c_absloc) (b: bool) : unit = + (find l).l_top <- b + +let get_aliases (l : c_absloc) = + if top_c_absloc l then raise ReachedTop + else (find l).aliases + +(***********************************************************************) +(* *) +(* Printing Functions *) +(* *) +(***********************************************************************) + +(** Convert a c_absloc to a string, short representation *) +let string_of_c_absloc (l : c_absloc) : string = + "\"" ^ + (find l).l_name ^ + if top_c_absloc l then "(top)" else "" ^ + "\"" + +(** Return true if the element [e] is present in the association list, + according to uref equality *) +let rec assoc_list_mem (e : tau) (l : association list) = + match l with + [] -> None + | (h, s, so) :: t -> + if U.equal (h, e) then Some (s, so) + else assoc_list_mem e t + +(** Given a tau, create a unique recursive variable name. This should + always return the same name for a given tau *) +let fresh_recvar_name (t : tau) : string = + match find t with + Pair p -> "rvp" ^ string_of_int p.p_stamp + | Ref r -> "rvr" ^ string_of_int r.r_stamp + | Fun f -> "rvf" ^ string_of_int f.f_stamp + | _ -> die "fresh_recvar_name" + + +(** Return a string representation of a tau, using association lists. *) +let string_of_tau (t : tau) : string = + let tau_map : association list ref = ref [] in + let rec string_of_tau' t = + match assoc_list_mem t !tau_map with + Some (s, so) -> (* recursive type. see if a var name has been set *) + begin + match !so with + None -> + let rv = fresh_recvar_name t in + s := "u " ^ rv ^ "." ^ !s; + so := Some rv; + rv + | Some rv -> rv + end + | None -> (* type's not recursive. Add it to the assoc list and cont. *) + let s = ref "" + and so : string option ref = ref None in + tau_map := (t, s, so) :: !tau_map; + begin + match find t with + Var v -> s := v.v_name + | Pair p -> + insist (ref_or_var p.ptr) "wellformed"; + insist (fun_or_var p.lam) "wellformed"; + s := "{"; + s := !s ^ string_of_tau' p.ptr; + s := !s ^ ","; + s := !s ^ string_of_tau' p.lam; + s := !s ^ "}" + | Ref r -> + insist (pair_or_var r.points_to) "wellformed"; + s := "ref(|"; + s := !s ^ string_of_c_absloc r.rl; + s := !s ^ "|,"; + s := !s ^ string_of_tau' r.points_to; + s := !s ^ ")" + | Fun f -> + let rec string_of_args = function + [] -> () + | h :: [] -> + insist (pair_or_var h) "wellformed"; + s := !s ^ string_of_tau' h + | h :: t -> + insist (pair_or_var h) "wellformed"; + s := !s ^ string_of_tau' h ^ ","; + string_of_args t + in + insist (pair_or_var f.ret) "wellformed"; + s := "fun(|"; + s := !s ^ string_of_c_absloc f.fl; + s := !s ^ "|,"; + s := !s ^ "<"; + if List.length f.args > 0 then string_of_args f.args + else s := !s ^ "void"; + s := !s ^ ">,"; + s := !s ^ string_of_tau' f.ret; + s := !s ^ ")" + end; + tau_map := List.tl !tau_map; + !s + in + string_of_tau' t + +(** Convert an lvalue to a string *) +let rec string_of_lvalue (lv : lvalue) : string = + let contents = string_of_tau lv.contents + and l = string_of_c_absloc lv.l + in + insist (pair_or_var lv.contents) "inconsistency at string_of_lvalue"; + (* do a consistency check *) + Printf.sprintf "[%s]^(%s)" contents l + +(** Print a list of tau elements, comma separated *) +let rec print_tau_list (l : tau list) : unit = + let rec print_t_strings = function + [] -> () + | h :: [] -> print_endline h + | h :: t -> + print_string h; + print_string ", "; + print_t_strings t + in + print_t_strings (List.map string_of_tau l) + +let print_constraint (c : tconstraint) = + match c with + Unification (t, t') -> + let lhs = string_of_tau t in + let rhs = string_of_tau t' in + Printf.printf "%s == %s\n" lhs rhs + | Leq (t, t') -> + let lhs = string_of_tau t in + let rhs = string_of_tau t' in + Printf.printf "%s <= %s\n" lhs rhs + +(***********************************************************************) +(* *) +(* Type Operations -- these do not create any constraints *) +(* *) +(***********************************************************************) + +(** Create an lvalue with c_absloc [lbl] and tau contents [t]. *) +let make_lval (loc, t : c_absloc * tau) : lvalue = + {l = loc; contents = t} + +let make_c_absloc_int (is_top : bool) (name : string) (vio : Cil.varinfo option) : c_absloc = + let my_absloc = (fresh_index (), name, vio) in + let locc = C.add my_absloc C.empty + in + U.uref { + l_name = name; + l_top = is_top; + l_stamp = fresh_stamp (); + loc = my_absloc; + aliases = locc; + ubounds = B.empty; + lbounds = B.empty; + flow_computed = false + } + +(** Create a new c_absloc with name [name]. Also adds a fresh absloc + with name [name] to this c_absloc's aliases set. *) +let make_c_absloc (is_top : bool) (name : string) (vio : Cil.varinfo option) = + make_c_absloc_int is_top name vio + +let fresh_c_absloc (is_top : bool) : c_absloc = + let index = fresh_index () in + make_c_absloc_int is_top (label_prefix ^ string_of_int index) None + +(** Create a fresh bound (edge in the constraint graph). *) +let make_bound (a : c_absloc) : c_abslocinfo bound = + {info = a} + +let make_tau_bound (t : tau) : tinfo bound = + {info = t} + +(** Create a fresh named variable with name '[name]. *) +let make_var (is_top : bool) (name : string) : tau = + U.uref (Var {v_name = ("'" ^ name); + v_top = is_top; + v_stamp = fresh_index (); + v_lbounds = B.empty; + v_ubounds = B.empty}) + +let fresh_var (is_top : bool) : tau = + make_var is_top ("fi" ^ string_of_int (fresh_index ())) + +(** Create a fresh unnamed variable (name will be 'fi). *) +let fresh_var_i (is_top : bool) : tau = + make_var is_top ("fi" ^ string_of_int (fresh_index ())) + +(** Create a Fun constructor. *) +let make_fun (lbl, a, r : c_absloc * (tau list) * tau) : tau = + U.uref (Fun {fl = lbl; + f_stamp = fresh_index (); + args = a; + ret = r}) + +(** Create a Ref constructor. *) +let make_ref (lbl, pt : c_absloc * tau) : tau = + U.uref (Ref {rl = lbl; + r_stamp = fresh_index (); + points_to = pt}) + +(** Create a Pair constructor. *) +let make_pair (p, f : tau * tau) : tau = + U.uref (Pair {ptr = p; + p_stamp = fresh_index (); + lam = f}) + +(** Copy the toplevel constructor of [t], putting fresh variables in each + argement of the constructor. *) +let copy_toplevel (t : tau) : tau = + match find t with + Pair _ -> make_pair (fresh_var_i false, fresh_var_i false) + | Ref _ -> make_ref (fresh_c_absloc false, fresh_var_i false) + | Fun f -> + make_fun (fresh_c_absloc false, + List.map (fun _ -> fresh_var_i false) f.args, + fresh_var_i false) + | _ -> die "copy_toplevel" + +let has_same_structure (t : tau) (t' : tau) = + match find t, find t' with + Pair _, Pair _ -> true + | Ref _, Ref _ -> true + | Fun _, Fun _ -> true + | Var _, Var _ -> true + | _ -> false + +let pad_args (fi, tlr : finfo * tau list ref) : unit = + let padding = List.length fi.args - List.length !tlr + in + if padding == 0 then () + else + if padding > 0 then + for i = 1 to padding do + tlr := !tlr @ [fresh_var false] + done + else + for i = 1 to -padding do + fi.args <- fi.args @ [fresh_var false] + done + +(***********************************************************************) +(* *) +(* Constraint Generation/ Resolution *) +(* *) +(***********************************************************************) + +let set_top (b : bool) (t : tau) : unit = + let set_top_down t = + match find t with + Var v -> v.v_top <- b + | Ref r -> set_top_c_absloc r.rl b + | Fun f -> set_top_c_absloc f.fl b + | Pair p -> () + in + iter_tau set_top_down t + +let rec unify_int (t, t' : tau * tau) : unit = + if equal_tau t t' then () + else + let ti, ti' = find t, find t' in + U.unify combine (t, t'); + match ti, ti' with + Var v, Var v' -> + set_top (v.v_top || v'.v_top) t'; + merge_v_lbounds (v, v'); + merge_v_ubounds (v, v') + | Var v, _ -> + set_top (v.v_top || top_tau t') t'; + notify_vlbounds t v; + notify_vubounds t v + | _, Var v -> + set_top (v.v_top || top_tau t) t; + notify_vlbounds t' v; + notify_vubounds t' v + | Ref r, Ref r' -> unify_ref (r, r') + | Fun f, Fun f' -> unify_fun (f, f') + | Pair p, Pair p' -> unify_pair (p, p') + | _ -> raise Inconsistent +and notify_vlbounds (t : tau) (vi : vinfo) : unit = + let notify bounds = + List.iter + (fun b -> + add_constraint (Unification (b.info, copy_toplevel t)); + add_constraint (Leq (b.info, t))) + bounds + in + notify (B.elements vi.v_lbounds) +and notify_vubounds (t : tau) (vi : vinfo) : unit = + let notify bounds = + List.iter + (fun b -> + add_constraint (Unification (b.info, copy_toplevel t)); + add_constraint (Leq (t, b.info))) + bounds + in + notify (B.elements vi.v_ubounds) +and unify_ref (ri, ri' : rinfo * rinfo) : unit = + unify_c_abslocs (ri.rl, ri'.rl); + add_constraint (Unification (ri.points_to, ri'.points_to)) +and unify_fun (fi, fi' : finfo * finfo) : unit = + let rec union_args = function + _, [] -> false + | [], _ -> true + | h :: t, h' :: t' -> + add_constraint (Unification (h, h')); + union_args(t, t') + in + unify_c_abslocs (fi.fl, fi'.fl); + add_constraint (Unification (fi.ret, fi'.ret)); + if union_args (fi.args, fi'.args) then fi.args <- fi'.args +and unify_pair (pi, pi' : pinfo * pinfo) : unit = + add_constraint (Unification (pi.ptr, pi'.ptr)); + add_constraint (Unification (pi.lam, pi'.lam)) +and unify_c_abslocs (l, l' : c_absloc * c_absloc) : unit = + let pick_name (li, li' : c_abslocinfo * c_abslocinfo) = + if starts_with li.l_name label_prefix then li.l_name <- li'.l_name + else () in + let combine_c_absloc (li, li' : c_abslocinfo * c_abslocinfo) : c_abslocinfo = + pick_name (li, li'); + li.l_top <- li.l_top || li'.l_top; + li.aliases <- C.union li.aliases li'.aliases; + li.ubounds <- B.union li.ubounds li'.ubounds; + li.lbounds <- B.union li.lbounds li'.lbounds; + li + in + if !debug_constraints then + Printf.printf + "%s == %s\n" + (string_of_c_absloc l) + (string_of_c_absloc l'); + U.unify combine_c_absloc (l, l') +and merge_v_lbounds (vi, vi' : vinfo * vinfo) : unit = + vi'.v_lbounds <- B.union vi.v_lbounds vi'.v_lbounds; +and merge_v_ubounds (vi, vi' : vinfo * vinfo) : unit = + vi'.v_ubounds <- B.union vi.v_ubounds vi'.v_ubounds; +(** Pick the representative info for two tinfo's. This function + prefers the first argument when both arguments are the same + structure, but when one type is a structure and the other is a + var, it picks the structure. All other actions (e.g., updating + the info) is done in unify_int *) +and combine (ti, ti' : tinfo * tinfo) : tinfo = + match ti, ti' with + Var _, _ -> ti' + | _, _ -> ti +and leq_int (t, t') : unit = + if equal_tau t t' then () + else + let ti, ti' = find t, find t' in + match ti, ti' with + Var v, Var v' -> + v.v_ubounds <- B.add (make_tau_bound t') v.v_ubounds; + v'.v_lbounds <- B.add (make_tau_bound t) v'.v_lbounds + | Var v, _ -> + add_constraint (Unification (t, copy_toplevel t')); + add_constraint (Leq (t, t')) + | _, Var v -> + add_constraint (Unification (t', copy_toplevel t)); + add_constraint (Leq (t, t')) + | Ref r, Ref r' -> leq_ref (r, r') + | Fun f, Fun f' -> + (* TODO: check, why not do subtyping here? *) + add_constraint (Unification (t, t')) + | Pair pr, Pair pr' -> + add_constraint (Leq (pr.ptr, pr'.ptr)); + add_constraint (Leq (pr.lam, pr'.lam)) + | _ -> raise Inconsistent +and leq_ref (ri, ri') : unit = + leq_c_absloc (ri.rl, ri'.rl); + add_constraint (Unification (ri.points_to, ri'.points_to)) +and leq_c_absloc (l, l') : unit = + let li, li' = find l, find l' in + if !debug_constraints then + Printf.printf + "%s <= %s\n" + (string_of_c_absloc l) + (string_of_c_absloc l'); + if U.equal (l, l') then () + else + begin + li.ubounds <- B.add (make_bound l') li.ubounds; + li'.lbounds <- B.add (make_bound l) li'.lbounds + end +and add_constraint_int (c : tconstraint) (toplev : bool) = + if !debug_constraints && toplev then + begin + Printf.printf "%d:>" !toplev_count; + print_constraint c; + incr toplev_count + end + else + if !debug_constraints then print_constraint c else (); + insist (can_add_constraints ()) + "can't add constraints after compute_results is called"; + begin + match c with + Unification _ -> Q.add c eq_worklist + | Leq _ -> Q.add c leq_worklist + end; + solve_constraints () (* solve online *) +and add_constraint (c : tconstraint) = + add_constraint_int c false +and add_toplev_constraint (c : tconstraint) = + if !print_constraints && not !debug_constraints then + begin + Printf.printf "%d:>" !toplev_count; + incr toplev_count; + print_constraint c + end + else (); + add_constraint_int c true +and fetch_constraint () : tconstraint option = + try Some (Q.take eq_worklist) + with Q.Empty -> + begin + try Some (Q.take leq_worklist) + with Q.Empty -> None + end +(** The main solver loop. *) +and solve_constraints () : unit = + match fetch_constraint () with + None -> () + | Some c -> + begin + match c with + Unification (t, t') -> unify_int (t, t') + | Leq (t, t') -> + if !no_sub then unify_int (t, t') + else leq_int (t, t') + end; + solve_constraints () + +(***********************************************************************) +(* *) +(* Interface Functions *) +(* *) +(***********************************************************************) + +(** Return the contents of the lvalue. *) +let rvalue (lv : lvalue) : tau = + lv.contents + +(** Dereference the rvalue. If it does not have enough structure to + support the operation, then the correct structure is added via new + unification constraints. *) +let rec deref (t : tau) : lvalue = + match find t with + Pair p -> + begin + match find p.ptr with + | Var _ -> + let is_top = top_tau p.ptr in + let points_to = fresh_var is_top in + let l = fresh_c_absloc is_top in + let r = make_ref (l, points_to) + in + add_toplev_constraint (Unification (p.ptr, r)); + make_lval (l, points_to) + | Ref r -> make_lval (r.rl, r.points_to) + | _ -> raise WellFormed + end + | Var v -> + let is_top = top_tau t in + add_toplev_constraint + (Unification (t, make_pair (fresh_var is_top, fresh_var is_top))); + deref t + | _ -> raise WellFormed + + +(** Form the union of [t] and [t'], if it doesn't exist already. *) +let join (t : tau) (t' : tau) : tau = + let s, s' = get_stamp t, get_stamp t' in + try H.find join_cache (s, s') + with Not_found -> + let t'' = fresh_var false in + add_toplev_constraint (Leq (t, t'')); + add_toplev_constraint (Leq (t', t'')); + H.add join_cache (s, s') t''; + t'' + +(** Form the union of a list [tl], expected to be the initializers of some + structure or array type. *) +let join_inits (tl : tau list) : tau = + let t' = fresh_var false in + List.iter (function t -> add_toplev_constraint (Leq (t, t'))) tl; + t' + +(** Take the address of an lvalue. Does not add constraints. *) +let address (lv : lvalue) : tau = + make_pair (make_ref (lv.l, lv.contents), fresh_var false ) + +(** No instantiation in this analysis *) +let instantiate (lv : lvalue) (i : int) : lvalue = + lv + +(** Constraint generated from assigning [t] to [lv]. *) +let assign (lv : lvalue) (t : tau) : unit = + add_toplev_constraint (Leq (t, lv.contents)) + +let assign_ret (i : int) (lv : lvalue) (t : tau) : unit = + add_toplev_constraint (Leq (t, lv.contents)) + +(** Project out the first (ref) component or a pair. If the argument + [t] has no discovered structure, raise NoContents. *) +let proj_ref (t : tau) : tau = + match find t with + Pair p -> p.ptr + | Var v -> raise NoContents + | _ -> raise WellFormed + +(* Project out the second (fun) component of a pair. If the argument + [t] has no discovered structure, create it on the fly by adding + constraints. *) +let proj_fun (t : tau) : tau = + match find t with + Pair p -> p.lam + | Var v -> + let p, f = fresh_var false, fresh_var false in + add_toplev_constraint (Unification (t, make_pair (p, f))); + f + | _ -> raise WellFormed + +let get_args (t : tau) : tau list = + match find t with + Fun f -> f.args + | _ -> raise WellFormed + +let get_finfo (t : tau) : finfo = + match find t with + Fun f -> f + | _ -> raise WellFormed + +(** Function type [t] is applied to the arguments [actuals]. Unifies + the actuals with the formals of [t]. If no functions have been + discovered for [t] yet, create a fresh one and unify it with + t. The result is the return value of the function plus the index + of this application site. + + For this analysis, the application site is always 0 *) +let apply (t : tau) (al : tau list) : (tau * int) = + let f = proj_fun t in + let actuals = ref al in + let fi, ret = + match find f with + Fun fi -> fi, fi.ret + | Var v -> + let new_l, new_ret, new_args = + fresh_c_absloc false, + fresh_var false, + List.map (function _ -> fresh_var false) !actuals + in + let new_fun = make_fun (new_l, new_args, new_ret) in + add_toplev_constraint (Unification (new_fun, f)); + (get_finfo new_fun, new_ret) + | _ -> raise WellFormed + in + pad_args (fi, actuals); + List.iter2 + (fun actual -> fun formal -> + add_toplev_constraint (Leq (actual, formal))) + !actuals fi.args; + (ret, 0) + +let make_undefined_lvalue () = + make_lval (make_c_absloc false "undefined" None, + make_var true "undefined") + +let make_undefined_rvalue () = + make_var true "undefined" + +let assign_undefined (lv : lvalue) : unit = + assign lv (make_undefined_rvalue ()) + +let apply_undefined (al : tau list) : (tau * int) = + List.iter + (fun actual -> assign (make_undefined_lvalue ()) actual) + al; + (fresh_var true, 0) + +(** Create a new function type with name [name], list of formal + arguments [formals], and return value [ret]. Adds no constraints. *) +let make_function (name : string) (formals : lvalue list) (ret : tau) : tau = + let f = make_fun (make_c_absloc false name None, + List.map (fun x -> rvalue x) formals, + ret) + in + make_pair (fresh_var false, f) + +(** Create an lvalue. *) +let make_lvalue (b : bool ) (name : string) (vio : Cil.varinfo option) = + make_lval (make_c_absloc false name vio, + make_var false name) + +(** Create a fresh named variable. *) +let make_fresh (name : string) : tau = + make_var false name + +(** The default type for abslocs. *) +let bottom () : tau = + make_var false "bottom" + +(** Unify the result of a function with its return value. *) +let return (t : tau) (t' : tau) = + add_toplev_constraint (Leq (t', t)) + +(***********************************************************************) +(* *) +(* Query/Extract Solutions *) +(* *) +(***********************************************************************) + +module IntHash = Hashtbl.Make (struct + type t = int + let equal x y = x = y + let hash x = x + end) + +(** todo : reached_top !! *) +let collect_ptset_fast (l : c_absloc) : abslocset = + let onpath : unit IntHash.t = IntHash.create 101 in + let path : c_absloc list ref = ref [] in + let compute_path (i : int) = + keep_until (fun l -> i = get_c_absloc_stamp l) !path in + let collapse_cycle (cycle : c_absloc list) = + match cycle with + l :: ls -> + List.iter (fun l' -> unify_c_abslocs (l, l')) ls; + C.empty + | [] -> die "collapse cycle" in + let rec flow_step (l : c_absloc) : abslocset = + let stamp = get_c_absloc_stamp l in + if IntHash.mem onpath stamp then (* already seen *) + collapse_cycle (compute_path stamp) + else + let li = find l in + IntHash.add onpath stamp (); + path := l :: !path; + B.iter + (fun lb -> li.aliases <- C.union li.aliases (flow_step lb.info)) + li.lbounds; + path := List.tl !path; + IntHash.remove onpath stamp; + li.aliases + in + insist (can_query_graph ()) "collect_ptset_fast can't query graph"; + if get_flow_computed l then get_aliases l + else + begin + set_flow_computed l; + flow_step l + end + +(** this is a quadratic flow step. keep it for debugging the fast + version above. *) +let collect_ptset_slow (l : c_absloc) : abslocset = + let onpath : unit IntHash.t = IntHash.create 101 in + let rec flow_step (l : c_absloc) : abslocset = + if top_c_absloc l then raise ReachedTop + else + let stamp = get_c_absloc_stamp l in + if IntHash.mem onpath stamp then C.empty + else + let li = find l in + IntHash.add onpath stamp (); + B.iter + (fun lb -> li.aliases <- C.union li.aliases (flow_step lb.info)) + li.lbounds; + li.aliases + in + insist (can_query_graph ()) "collect_ptset_slow can't query graph"; + if get_flow_computed l then get_aliases l + else + begin + set_flow_computed l; + flow_step l + end + +let collect_ptset = + collect_ptset_slow + (* if !debug_flow_step then collect_ptset_slow + else collect_ptset_fast *) + +let may_alias (t1 : tau) (t2 : tau) : bool = + let get_l (t : tau) : c_absloc = + match find (proj_ref t) with + Ref r -> r.rl + | Var v -> raise NoContents + | _ -> raise WellFormed + in + try + let l1 = get_l t1 + and l2 = get_l t2 in + equal_c_absloc l1 l2 || + not (C.is_empty (C.inter (collect_ptset l1) (collect_ptset l2))) + with + NoContents -> false + | ReachedTop -> raise UnknownLocation + +let points_to_aux (t : tau) : absloc list = + try + match find (proj_ref t) with + Var v -> [] + | Ref r -> C.elements (collect_ptset r.rl) + | _ -> raise WellFormed + with + NoContents -> [] + | ReachedTop -> raise UnknownLocation + +let points_to (lv : lvalue) : Cil.varinfo list = + let rec get_vinfos l : Cil.varinfo list = + match l with + [] -> [] + | (_, _, Some h) :: t -> h :: get_vinfos t + | (_, _, None) :: t -> get_vinfos t + in + get_vinfos (points_to_aux lv.contents) + +let epoints_to (t : tau) : Cil.varinfo list = + let rec get_vinfos l : Cil.varinfo list = match l with + [] -> [] + | (_, _, Some h) :: t -> h :: get_vinfos t + | (_, _, None) :: t -> get_vinfos t + in + get_vinfos (points_to_aux t) + +let points_to_names (lv : lvalue) : string list = + List.map (fun v -> v.vname) (points_to lv) + +let absloc_points_to (lv : lvalue) : absloc list = + points_to_aux lv.contents + +let absloc_epoints_to (t : tau) : absloc list = + points_to_aux t + +let absloc_of_lvalue (lv : lvalue) : absloc = + (find lv.l).loc + +let absloc_eq = equal_absloc diff --git a/cil/src/ext/pta/olf.mli b/cil/src/ext/pta/olf.mli new file mode 100644 index 00000000..43794825 --- /dev/null +++ b/cil/src/ext/pta/olf.mli @@ -0,0 +1,80 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +type lvalue +type tau +type absloc + +(** Raised if a pointer flows to an undefined function. + We assume that such a function can have any effect on the pointer's contents +*) +exception UnknownLocation + +val debug : bool ref +val debug_constraints : bool ref +val debug_aliases : bool ref +val smart_aliases : bool ref +val finished_constraints : unit -> unit +val print_constraints : bool ref +val no_flow : bool ref +val no_sub : bool ref +val analyze_mono : bool ref +val solve_constraints : unit -> unit (* only for compatability with Golf *) +val rvalue : lvalue -> tau +val deref : tau -> lvalue +val join : tau -> tau -> tau +val join_inits : tau list -> tau +val address : lvalue -> tau +val instantiate : lvalue -> int -> lvalue +val assign : lvalue -> tau -> unit +val assign_ret : int -> lvalue -> tau -> unit +val apply : tau -> tau list -> (tau * int) +val apply_undefined : tau list -> (tau * int) +val assign_undefined : lvalue -> unit +val make_function : string -> lvalue list -> tau -> tau +val make_lvalue : bool -> string -> (Cil.varinfo option) -> lvalue +val bottom : unit -> tau +val return : tau -> tau -> unit +val make_fresh : string -> tau +val points_to_names : lvalue -> string list +val points_to : lvalue -> Cil.varinfo list +val epoints_to : tau -> Cil.varinfo list +val string_of_lvalue : lvalue -> string +val may_alias : tau -> tau -> bool + +val absloc_points_to : lvalue -> absloc list +val absloc_epoints_to : tau -> absloc list +val absloc_of_lvalue : lvalue -> absloc +val absloc_eq : (absloc * absloc) -> bool +val d_absloc : unit -> absloc -> Pretty.doc diff --git a/cil/src/ext/pta/ptranal.ml b/cil/src/ext/pta/ptranal.ml new file mode 100644 index 00000000..c91bda81 --- /dev/null +++ b/cil/src/ext/pta/ptranal.ml @@ -0,0 +1,597 @@ +(* MODIF: Loop constructor replaced by 3 constructors: While, DoWhile, For. *) + +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +exception Bad_return +exception Bad_function + + +open Cil + +module H = Hashtbl + +module A = Olf +exception UnknownLocation = A.UnknownLocation + +type access = A.lvalue * bool + +type access_map = (lval, access) H.t + +(** a mapping from varinfo's back to fundecs *) +module VarInfoKey = +struct + type t = varinfo + let compare v1 v2 = v1.vid - v2.vid +end + +module F = Map.Make (VarInfoKey) + + +(***********************************************************************) +(* *) +(* Global Variables *) +(* *) +(***********************************************************************) + +let model_strings = ref false +let print_constraints = A.print_constraints +let debug_constraints = A.debug_constraints +let debug_aliases = A.debug_aliases +let smart_aliases = A.smart_aliases +let debug = A.debug +let analyze_mono = A.analyze_mono +let no_flow = A.no_flow +let no_sub = A.no_sub +let fun_ptrs_as_funs = ref false +let show_progress = ref false +let debug_may_aliases = ref false + +let found_undefined = ref false + +let conservative_undefineds = ref false + +let current_fundec : fundec option ref = ref None + +let fun_access_map : (fundec, access_map) H.t = H.create 64 + +(* A mapping from varinfos to fundecs *) +let fun_varinfo_map = ref F.empty + +let current_ret : A.tau option ref = ref None + +let lvalue_hash : (varinfo,A.lvalue) H.t = H.create 64 + +let expressions : (exp,A.tau) H.t = H.create 64 + +let lvalues : (lval,A.lvalue) H.t = H.create 64 + +let fresh_index : (unit -> int) = + let count = ref 0 in + fun () -> + incr count; + !count + +let alloc_names = [ + "malloc"; + "calloc"; + "realloc"; + "xmalloc"; + "__builtin_alloca"; + "alloca"; + "kmalloc" +] + +let all_globals : varinfo list ref = ref [] +let all_functions : fundec list ref = ref [] + + +(***********************************************************************) +(* *) +(* Utility Functions *) +(* *) +(***********************************************************************) + +let is_undefined_fun = function + Lval (lh, o) -> + if isFunctionType (typeOfLval (lh, o)) then + match lh with + Var v -> v.vstorage = Extern + | _ -> false + else false + | _ -> false + +let is_alloc_fun = function + Lval (lh, o) -> + if isFunctionType (typeOfLval (lh, o)) then + match lh with + Var v -> List.mem v.vname alloc_names + | _ -> false + else false + | _ -> false + +let next_alloc = function + Lval (Var v, o) -> + let name = Printf.sprintf "%s@%d" v.vname (fresh_index ()) + in + A.address (A.make_lvalue false name (Some v)) (* check *) + | _ -> raise Bad_return + +let is_effect_free_fun = function + Lval (lh, o) when isFunctionType (typeOfLval (lh, o)) -> + begin + match lh with + Var v -> + begin + try ("CHECK_" = String.sub v.vname 0 6) + with Invalid_argument _ -> false + end + | _ -> false + end + | _ -> false + + +(***********************************************************************) +(* *) +(* AST Traversal Functions *) +(* *) +(***********************************************************************) + +(* should do nothing, might need to worry about Index case *) +(* let analyzeOffset (o : offset ) : A.tau = A.bottom () *) + +let analyze_var_decl (v : varinfo ) : A.lvalue = + try H.find lvalue_hash v + with Not_found -> + let lv = A.make_lvalue false v.vname (Some v) + in + H.add lvalue_hash v lv; + lv + +let isFunPtrType (t : typ) : bool = + match t with + TPtr (t, _) -> isFunctionType t + | _ -> false + +let rec analyze_lval (lv : lval ) : A.lvalue = + let find_access (l : A.lvalue) (is_var : bool) : A.lvalue = + match !current_fundec with + None -> l + | Some f -> + let accesses = H.find fun_access_map f in + if H.mem accesses lv then l + else + begin + H.add accesses lv (l, is_var); + l + end in + let result = + match lv with + Var v, _ -> (* instantiate every syntactic occurrence of a function *) + let alv = + if isFunctionType (typeOfLval lv) then + A.instantiate (analyze_var_decl v) (fresh_index ()) + else analyze_var_decl v + in + find_access alv true + | Mem e, _ -> + (* assert (not (isFunctionType(typeOf(e))) ); *) + let alv = + if !fun_ptrs_as_funs && isFunPtrType (typeOf e) then + analyze_expr_as_lval e + else A.deref (analyze_expr e) + in + find_access alv false + in + H.replace lvalues lv result; + result +and analyze_expr_as_lval (e : exp) : A.lvalue = + match e with + Lval l -> analyze_lval l + | _ -> assert false (* todo -- other kinds of expressions? *) +and analyze_expr (e : exp ) : A.tau = + let result = + match e with + Const (CStr s) -> + if !model_strings then + A.address (A.make_lvalue + false + s + (Some (makeVarinfo false s charConstPtrType))) + else A.bottom () + | Const c -> A.bottom () + | Lval l -> A.rvalue (analyze_lval l) + | SizeOf _ -> A.bottom () + | SizeOfStr _ -> A.bottom () + | AlignOf _ -> A.bottom () + | UnOp (op, e, t) -> analyze_expr e + | BinOp (op, e, e', t) -> A.join (analyze_expr e) (analyze_expr e') + | CastE (t, e) -> analyze_expr e + | AddrOf l -> + if !fun_ptrs_as_funs && isFunctionType (typeOfLval l) then + A.rvalue (analyze_lval l) + else A.address (analyze_lval l) + | StartOf l -> A.address (analyze_lval l) + | AlignOfE _ -> A.bottom () + | SizeOfE _ -> A.bottom () + in + H.add expressions e result; + result + + +(* check *) +let rec analyze_init (i : init ) : A.tau = + match i with + SingleInit e -> analyze_expr e + | CompoundInit (t, oi) -> + A.join_inits (List.map (function (_, i) -> analyze_init i) oi) + +let analyze_instr (i : instr ) : unit = + match i with + Set (lval, rhs, l) -> + A.assign (analyze_lval lval) (analyze_expr rhs) + | Call (res, fexpr, actuals, l) -> + if not (isFunctionType (typeOf fexpr)) then + () (* todo : is this a varargs? *) + else if is_alloc_fun fexpr then + begin + if !debug then print_string "Found allocation function...\n"; + match res with + Some r -> A.assign (analyze_lval r) (next_alloc fexpr) + | None -> () + end + else if is_effect_free_fun fexpr then + List.iter (fun e -> ignore (analyze_expr e)) actuals + else (* todo : check to see if the thing is an undefined function *) + let fnres, site = + if is_undefined_fun fexpr & !conservative_undefineds then + A.apply_undefined (List.map analyze_expr actuals) + else + A.apply (analyze_expr fexpr) (List.map analyze_expr actuals) + in + begin + match res with + Some r -> + begin + A.assign_ret site (analyze_lval r) fnres; + found_undefined := true; + end + | None -> () + end + | Asm _ -> () + +let rec analyze_stmt (s : stmt ) : unit = + match s.skind with + Instr il -> List.iter analyze_instr il + | Return (eo, l) -> + begin + match eo with + Some e -> + begin + match !current_ret with + Some ret -> A.return ret (analyze_expr e) + | None -> raise Bad_return + end + | None -> () + end + | Goto (s', l) -> () (* analyze_stmt(!s') *) + | If (e, b, b', l) -> + (* ignore the expression e; expressions can't be side-effecting *) + analyze_block b; + analyze_block b' + | Switch (e, b, sl, l) -> + analyze_block b; + List.iter analyze_stmt sl +(* + | Loop (b, l, _, _) -> analyze_block b +*) + | While (_, b, _) -> analyze_block b + | DoWhile (_, b, _) -> analyze_block b + | For (bInit, _, bIter, b, _) -> + analyze_block bInit; + analyze_block bIter; + analyze_block b + | Block b -> analyze_block b + | TryFinally (b, h, _) -> + analyze_block b; + analyze_block h + | TryExcept (b, (il, _), h, _) -> + analyze_block b; + List.iter analyze_instr il; + analyze_block h + | Break l -> () + | Continue l -> () + + +and analyze_block (b : block ) : unit = + List.iter analyze_stmt b.bstmts + +let analyze_function (f : fundec ) : unit = + let oldlv = analyze_var_decl f.svar in + let ret = A.make_fresh (f.svar.vname ^ "_ret") in + let formals = List.map analyze_var_decl f.sformals in + let newf = A.make_function f.svar.vname formals ret in + if !show_progress then + Printf.printf "Analyzing function %s\n" f.svar.vname; + fun_varinfo_map := F.add f.svar f (!fun_varinfo_map); + current_fundec := Some f; + H.add fun_access_map f (H.create 8); + A.assign oldlv newf; + current_ret := Some ret; + analyze_block f.sbody + +let analyze_global (g : global ) : unit = + match g with + GVarDecl (v, l) -> () (* ignore (analyze_var_decl(v)) -- no need *) + | GVar (v, init, l) -> + all_globals := v :: !all_globals; + begin + match init.init with + Some i -> A.assign (analyze_var_decl v) (analyze_init i) + | None -> ignore (analyze_var_decl v) + end + | GFun (f, l) -> + all_functions := f :: !all_functions; + analyze_function f + | _ -> () + +let analyze_file (f : file) : unit = + iterGlobals f analyze_global + + +(***********************************************************************) +(* *) +(* High-level Query Interface *) +(* *) +(***********************************************************************) + +(* Same as analyze_expr, but no constraints. *) +let rec traverse_expr (e : exp) : A.tau = + H.find expressions e + +and traverse_expr_as_lval (e : exp) : A.lvalue = + match e with + | Lval l -> traverse_lval l + | _ -> assert false (* todo -- other kinds of expressions? *) + +and traverse_lval (lv : lval ) : A.lvalue = + H.find lvalues lv + +let may_alias (e1 : exp) (e2 : exp) : bool = + let tau1,tau2 = traverse_expr e1, traverse_expr e2 in + let result = A.may_alias tau1 tau2 in + if !debug_may_aliases then + begin + let doc1 = d_exp () e1 in + let doc2 = d_exp () e2 in + let s1 = Pretty.sprint ~width:30 doc1 in + let s2 = Pretty.sprint ~width:30 doc2 in + Printf.printf + "%s and %s may alias? %s\n" + s1 + s2 + (if result then "yes" else "no") + end; + result + +let resolve_lval (lv : lval) : varinfo list = + A.points_to (traverse_lval lv) + +let resolve_exp (e : exp) : varinfo list = + A.epoints_to (traverse_expr e) + +let resolve_funptr (e : exp) : fundec list = + let varinfos = A.epoints_to (traverse_expr e) in + List.fold_left + (fun fdecs -> fun vinf -> + try F.find vinf !fun_varinfo_map :: fdecs + with Not_found -> fdecs) + [] + varinfos + +let count_hash_elts h = + let result = ref 0 in + H.iter (fun _ -> fun _ -> incr result) lvalue_hash; + !result + +let compute_may_aliases (b : bool) : unit = + let rec compute_may_aliases_aux (exps : exp list) = + match exps with + [] -> () + | h :: t -> + ignore (List.map (may_alias h) t); + compute_may_aliases_aux t + and exprs : exp list ref = ref [] in + H.iter (fun e -> fun _ -> exprs := e :: !exprs) expressions; + compute_may_aliases_aux !exprs + + +let compute_results (show_sets : bool) : unit = + let total_pointed_to = ref 0 + and total_lvalues = H.length lvalue_hash + and counted_lvalues = ref 0 + and lval_elts : (string * (string list)) list ref = ref [] in + let print_result (name, set) = + let rec print_set s = + match s with + [] -> () + | h :: [] -> print_string h + | h :: t -> + print_string (h ^ ", "); + print_set t + and ptsize = List.length set in + total_pointed_to := !total_pointed_to + ptsize; + if ptsize > 0 then + begin + print_string (name ^ "(" ^ (string_of_int ptsize) ^ ") -> "); + print_set set; + print_newline () + end + in + (* Make the most pessimistic assumptions about globals if an + undefined function is present. Such a function can write to every + global variable *) + let hose_globals () : unit = + List.iter + (fun vd -> A.assign_undefined (analyze_var_decl vd)) + !all_globals + in + let show_progress_fn (counted : int ref) (total : int) : unit = + incr counted; + if !show_progress then + Printf.printf "Computed flow for %d of %d sets\n" !counted total + in + if !conservative_undefineds && !found_undefined then hose_globals (); + A.finished_constraints (); + if show_sets then + begin + print_endline "Computing points-to sets..."; + Hashtbl.iter + (fun vinf -> fun lv -> + show_progress_fn counted_lvalues total_lvalues; + try lval_elts := (vinf.vname, A.points_to_names lv) :: !lval_elts + with A.UnknownLocation -> ()) + lvalue_hash; + List.iter print_result !lval_elts; + Printf.printf + "Total number of things pointed to: %d\n" + !total_pointed_to + end; + if !debug_may_aliases then + begin + Printf.printf "Printing may alias relationships\n"; + compute_may_aliases true + end + +let print_types () : unit = + print_string "Printing inferred types of lvalues...\n"; + Hashtbl.iter + (fun vi -> fun lv -> + Printf.printf "%s : %s\n" vi.vname (A.string_of_lvalue lv)) + lvalue_hash + + + +(** Alias queries. For each function, gather sets of locals, formals, and + globals. Do n^2 work for each of these functions, reporting whether or not + each pair of values is aliased. Aliasing is determined by taking points-to + set intersections. +*) +let compute_aliases = compute_may_aliases + + +(***********************************************************************) +(* *) +(* Abstract Location Interface *) +(* *) +(***********************************************************************) + +type absloc = A.absloc + +let rec lvalue_of_varinfo (vi : varinfo) : A.lvalue = + H.find lvalue_hash vi + +let lvalue_of_lval = traverse_lval +let tau_of_expr = traverse_expr + +(** return an abstract location for a varinfo, resp. lval *) +let absloc_of_varinfo vi = + A.absloc_of_lvalue (lvalue_of_varinfo vi) + +let absloc_of_lval lv = + A.absloc_of_lvalue (lvalue_of_lval lv) + +let absloc_e_points_to e = + A.absloc_epoints_to (tau_of_expr e) + +let absloc_lval_aliases lv = + A.absloc_points_to (lvalue_of_lval lv) + +(* all abslocs that e transitively points to *) +let absloc_e_transitive_points_to (e : Cil.exp) : absloc list = + let rec lv_trans_ptsto (worklist : varinfo list) (acc : varinfo list) : absloc list = + match worklist with + [] -> List.map absloc_of_varinfo acc + | vi :: wklst'' -> + if List.mem vi acc then lv_trans_ptsto wklst'' acc + else + lv_trans_ptsto + (List.rev_append + (A.points_to (lvalue_of_varinfo vi)) + wklst'') + (vi :: acc) + in + lv_trans_ptsto (A.epoints_to (tau_of_expr e)) [] + +let absloc_eq a b = A.absloc_eq (a, b) + +let d_absloc: unit -> absloc -> Pretty.doc = A.d_absloc + + +let ptrAnalysis = ref false +let ptrResults = ref false +let ptrTypes = ref false + + + +(** Turn this into a CIL feature *) +let feature : featureDescr = { + fd_name = "ptranal"; + fd_enabled = ptrAnalysis; + fd_description = "alias analysis"; + fd_extraopt = [ + ("--ptr_may_aliases", + Arg.Unit (fun _ -> debug_may_aliases := true), + "Print out results of may alias queries"); + ("--ptr_unify", Arg.Unit (fun _ -> no_sub := true), + "Make the alias analysis unification-based"); + ("--ptr_model_strings", Arg.Unit (fun _ -> model_strings := true), + "Make the alias analysis model string constants"); + ("--ptr_conservative", + Arg.Unit (fun _ -> conservative_undefineds := true), + "Treat undefineds conservatively in alias analysis"); + ("--ptr_results", Arg.Unit (fun _ -> ptrResults := true), + "print the results of the alias analysis"); + ("--ptr_mono", Arg.Unit (fun _ -> analyze_mono := true), + "run alias analysis monomorphically"); + ("--ptr_types",Arg.Unit (fun _ -> ptrTypes := true), + "print inferred points-to analysis types") + ]; + fd_doit = (function (f: file) -> + analyze_file f; + compute_results !ptrResults; + if !ptrTypes then print_types ()); + fd_post_check = false (* No changes *) +} diff --git a/cil/src/ext/pta/ptranal.mli b/cil/src/ext/pta/ptranal.mli new file mode 100644 index 00000000..36eb7a54 --- /dev/null +++ b/cil/src/ext/pta/ptranal.mli @@ -0,0 +1,156 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(***********************************************************************) +(* *) +(* Flags *) +(* *) +(***********************************************************************) + +(** Print extra debugging info *) +val debug : bool ref + +(** Debug constraints (print all constraints) *) +val debug_constraints : bool ref + +(** Debug smart alias queries *) +val debug_aliases : bool ref + +(** Debug may alias queries *) +val debug_may_aliases : bool ref + +val smart_aliases : bool ref + +(** Print out the top level constraints *) +val print_constraints : bool ref + +(** Make the analysis monomorphic *) +val analyze_mono : bool ref + +(** Disable subtyping *) +val no_sub : bool ref + +(** Make the flow step a no-op *) +val no_flow : bool ref + +(** Show the progress of the flow step *) +val show_progress : bool ref + +(** Treat undefined functions conservatively *) +val conservative_undefineds : bool ref + +(***********************************************************************) +(* *) +(* Building the Points-to Graph *) +(* *) +(***********************************************************************) + +(** Analyze a file *) +val analyze_file : Cil.file -> unit + +(** Print the type of each lvalue in the program *) +val print_types : unit -> unit + +(***********************************************************************) +(* *) +(* High-level Query Interface *) +(* *) +(***********************************************************************) + +(** If undefined functions are analyzed conservatively, any of the + high-level queries may raise this exception *) +exception UnknownLocation + +val may_alias : Cil.exp -> Cil.exp -> bool + +val resolve_lval : Cil.lval -> (Cil.varinfo list) + +val resolve_exp : Cil.exp -> (Cil.varinfo list) + +val resolve_funptr : Cil.exp -> (Cil.fundec list) + +(***********************************************************************) +(* *) +(* Low-level Query Interface *) +(* *) +(***********************************************************************) + +(** type for abstract locations *) +type absloc + +(** Give an abstract location for a varinfo *) +val absloc_of_varinfo : Cil.varinfo -> absloc + +(** Give an abstract location for an Cil lvalue *) +val absloc_of_lval : Cil.lval -> absloc + +(** may the two abstract locations be aliased? *) +val absloc_eq : absloc -> absloc -> bool + +val absloc_e_points_to : Cil.exp -> absloc list +val absloc_e_transitive_points_to : Cil.exp -> absloc list + +val absloc_lval_aliases : Cil.lval -> absloc list + +(** Print a string representing an absloc, for debugging. *) +val d_absloc : unit -> absloc -> Pretty.doc + + +(***********************************************************************) +(* *) +(* Printing results *) +(* *) +(***********************************************************************) + +(** Compute points to sets for variables. If true is passed, print the sets. *) +val compute_results : bool -> unit + +(* + +Deprecated these. -- jk + +(** Compute alias relationships. If true is passed, print all alias pairs. *) + val compute_aliases : bool -> unit + +(** Compute alias frequncy *) +val compute_alias_frequency : unit -> unit + + +*) + +val compute_aliases : bool -> unit + + +val feature: Cil.featureDescr diff --git a/cil/src/ext/pta/setp.ml b/cil/src/ext/pta/setp.ml new file mode 100644 index 00000000..a39b9722 --- /dev/null +++ b/cil/src/ext/pta/setp.ml @@ -0,0 +1,342 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +(***********************************************************************) +(* *) +(* Objective Caml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the GNU Library General Public License, with *) +(* the special exception on linking described in file ../LICENSE. *) +(* *) +(***********************************************************************) + +(* $Id: setp.ml,v 1.3 2003-02-19 19:26:31 jkodumal Exp $ *) + +(* Sets over ordered types *) + +module type PolyOrderedType = + sig + type 'a t + val compare: 'a t -> 'a t -> int + end + +module type S = + sig + type 'a elt + type 'a t + val empty: 'a t + val is_empty: 'a t -> bool + val mem: 'a elt -> 'a t -> bool + val add: 'a elt -> 'a t -> 'a t + val singleton: 'a elt -> 'a t + val remove: 'a elt -> 'a t -> 'a t + val union: 'a t -> 'a t -> 'a t + val inter: 'a t -> 'a t -> 'a t + val diff: 'a t -> 'a t -> 'a t + val compare: 'a t -> 'a t -> int + val equal: 'a t -> 'a t -> bool + val subset: 'a t -> 'a t -> bool + val iter: ('a elt -> unit) -> 'a t -> unit + val fold: ('a elt -> 'b -> 'b) -> 'a t -> 'b -> 'b + val for_all: ('a elt -> bool) -> 'a t -> bool + val exists: ('a elt -> bool) -> 'a t -> bool + val filter: ('a elt -> bool) -> 'a t -> 'a t + val partition: ('a elt -> bool) -> 'a t -> 'a t * 'a t + val cardinal: 'a t -> int + val elements: 'a t -> 'a elt list + val min_elt: 'a t -> 'a elt + val max_elt: 'a t -> 'a elt + val choose: 'a t -> 'a elt + end + +module Make(Ord: PolyOrderedType) = + struct + type 'a elt = 'a Ord.t + type 'a t = Empty | Node of 'a t * 'a elt * 'a t * int + + (* Sets are represented by balanced binary trees (the heights of the + children differ by at most 2 *) + + let height = function + Empty -> 0 + | Node(_, _, _, h) -> h + + (* Creates a new node with left son l, value x and right son r. + l and r must be balanced and | height l - height r | <= 2. + Inline expansion of height for better speed. *) + + let create l x r = + let hl = match l with Empty -> 0 | Node(_,_,_,h) -> h in + let hr = match r with Empty -> 0 | Node(_,_,_,h) -> h in + Node(l, x, r, (if hl >= hr then hl + 1 else hr + 1)) + + (* Same as create, but performs one step of rebalancing if necessary. + Assumes l and r balanced. + Inline expansion of create for better speed in the most frequent case + where no rebalancing is required. *) + + let bal l x r = + let hl = match l with Empty -> 0 | Node(_,_,_,h) -> h in + let hr = match r with Empty -> 0 | Node(_,_,_,h) -> h in + if hl > hr + 2 then begin + match l with + Empty -> invalid_arg "Set.bal" + | Node(ll, lv, lr, _) -> + if height ll >= height lr then + create ll lv (create lr x r) + else begin + match lr with + Empty -> invalid_arg "Set.bal" + | Node(lrl, lrv, lrr, _)-> + create (create ll lv lrl) lrv (create lrr x r) + end + end else if hr > hl + 2 then begin + match r with + Empty -> invalid_arg "Set.bal" + | Node(rl, rv, rr, _) -> + if height rr >= height rl then + create (create l x rl) rv rr + else begin + match rl with + Empty -> invalid_arg "Set.bal" + | Node(rll, rlv, rlr, _) -> + create (create l x rll) rlv (create rlr rv rr) + end + end else + Node(l, x, r, (if hl >= hr then hl + 1 else hr + 1)) + + (* Same as bal, but repeat rebalancing until the final result + is balanced. *) + + let rec join l x r = + match bal l x r with + Empty -> invalid_arg "Set.join" + | Node(l', x', r', _) as t' -> + let d = height l' - height r' in + if d < -2 || d > 2 then join l' x' r' else t' + + (* Merge two trees l and r into one. + All elements of l must precede the elements of r. + Assumes | height l - height r | <= 2. *) + + let rec merge t1 t2 = + match (t1, t2) with + (Empty, t) -> t + | (t, Empty) -> t + | (Node(l1, v1, r1, h1), Node(l2, v2, r2, h2)) -> + bal l1 v1 (bal (merge r1 l2) v2 r2) + + (* Same as merge, but does not assume anything about l and r. *) + + let rec concat t1 t2 = + match (t1, t2) with + (Empty, t) -> t + | (t, Empty) -> t + | (Node(l1, v1, r1, h1), Node(l2, v2, r2, h2)) -> + join l1 v1 (join (concat r1 l2) v2 r2) + + (* Splitting *) + + let rec split x = function + Empty -> + (Empty, None, Empty) + | Node(l, v, r, _) -> + let c = Ord.compare x v in + if c = 0 then (l, Some v, r) + else if c < 0 then + let (ll, vl, rl) = split x l in (ll, vl, join rl v r) + else + let (lr, vr, rr) = split x r in (join l v lr, vr, rr) + + (* Implementation of the set operations *) + + let empty = Empty + + let is_empty = function Empty -> true | _ -> false + + let rec mem x = function + Empty -> false + | Node(l, v, r, _) -> + let c = Ord.compare x v in + c = 0 || mem x (if c < 0 then l else r) + + let rec add x = function + Empty -> Node(Empty, x, Empty, 1) + | Node(l, v, r, _) as t -> + let c = Ord.compare x v in + if c = 0 then t else + if c < 0 then bal (add x l) v r else bal l v (add x r) + + let singleton x = Node(Empty, x, Empty, 1) + + let rec remove x = function + Empty -> Empty + | Node(l, v, r, _) -> + let c = Ord.compare x v in + if c = 0 then merge l r else + if c < 0 then bal (remove x l) v r else bal l v (remove x r) + + let rec union s1 s2 = + match (s1, s2) with + (Empty, t2) -> t2 + | (t1, Empty) -> t1 + | (Node(l1, v1, r1, h1), Node(l2, v2, r2, h2)) -> + if h1 >= h2 then + if h2 = 1 then add v2 s1 else begin + let (l2, _, r2) = split v1 s2 in + join (union l1 l2) v1 (union r1 r2) + end + else + if h1 = 1 then add v1 s2 else begin + let (l1, _, r1) = split v2 s1 in + join (union l1 l2) v2 (union r1 r2) + end + + let rec inter s1 s2 = + match (s1, s2) with + (Empty, t2) -> Empty + | (t1, Empty) -> Empty + | (Node(l1, v1, r1, _), t2) -> + match split v1 t2 with + (l2, None, r2) -> + concat (inter l1 l2) (inter r1 r2) + | (l2, Some _, r2) -> + join (inter l1 l2) v1 (inter r1 r2) + + let rec diff s1 s2 = + match (s1, s2) with + (Empty, t2) -> Empty + | (t1, Empty) -> t1 + | (Node(l1, v1, r1, _), t2) -> + match split v1 t2 with + (l2, None, r2) -> + join (diff l1 l2) v1 (diff r1 r2) + | (l2, Some _, r2) -> + concat (diff l1 l2) (diff r1 r2) + + let rec compare_aux l1 l2 = + match (l1, l2) with + ([], []) -> 0 + | ([], _) -> -1 + | (_, []) -> 1 + | (Empty :: t1, Empty :: t2) -> + compare_aux t1 t2 + | (Node(Empty, v1, r1, _) :: t1, Node(Empty, v2, r2, _) :: t2) -> + let c = Ord.compare v1 v2 in + if c <> 0 then c else compare_aux (r1::t1) (r2::t2) + | (Node(l1, v1, r1, _) :: t1, t2) -> + compare_aux (l1 :: Node(Empty, v1, r1, 0) :: t1) t2 + | (t1, Node(l2, v2, r2, _) :: t2) -> + compare_aux t1 (l2 :: Node(Empty, v2, r2, 0) :: t2) + + let compare s1 s2 = + compare_aux [s1] [s2] + + let equal s1 s2 = + compare s1 s2 = 0 + + let rec subset s1 s2 = + match (s1, s2) with + Empty, _ -> + true + | _, Empty -> + false + | Node (l1, v1, r1, _), (Node (l2, v2, r2, _) as t2) -> + let c = Ord.compare v1 v2 in + if c = 0 then + subset l1 l2 && subset r1 r2 + else if c < 0 then + subset (Node (l1, v1, Empty, 0)) l2 && subset r1 t2 + else + subset (Node (Empty, v1, r1, 0)) r2 && subset l1 t2 + + let rec iter f = function + Empty -> () + | Node(l, v, r, _) -> iter f l; f v; iter f r + + let rec fold f s accu = + match s with + Empty -> accu + | Node(l, v, r, _) -> fold f l (f v (fold f r accu)) + + let rec for_all p = function + Empty -> true + | Node(l, v, r, _) -> p v && for_all p l && for_all p r + + let rec exists p = function + Empty -> false + | Node(l, v, r, _) -> p v || exists p l || exists p r + + let filter p s = + let rec filt accu = function + | Empty -> accu + | Node(l, v, r, _) -> + filt (filt (if p v then add v accu else accu) l) r in + filt Empty s + + let partition p s = + let rec part (t, f as accu) = function + | Empty -> accu + | Node(l, v, r, _) -> + part (part (if p v then (add v t, f) else (t, add v f)) l) r in + part (Empty, Empty) s + + let rec cardinal = function + Empty -> 0 + | Node(l, v, r, _) -> cardinal l + 1 + cardinal r + + let rec elements_aux accu = function + Empty -> accu + | Node(l, v, r, _) -> elements_aux (v :: elements_aux accu r) l + + let elements s = + elements_aux [] s + + let rec min_elt = function + Empty -> raise Not_found + | Node(Empty, v, r, _) -> v + | Node(l, v, r, _) -> min_elt l + + let rec max_elt = function + Empty -> raise Not_found + | Node(l, v, Empty, _) -> v + | Node(l, v, r, _) -> max_elt r + + let choose = min_elt + + end diff --git a/cil/src/ext/pta/setp.mli b/cil/src/ext/pta/setp.mli new file mode 100644 index 00000000..a3b30313 --- /dev/null +++ b/cil/src/ext/pta/setp.mli @@ -0,0 +1,180 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +(***********************************************************************) +(* *) +(* Objective Caml *) +(* *) +(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) +(* *) +(* Copyright 1996 Institut National de Recherche en Informatique et *) +(* en Automatique. All rights reserved. This file is distributed *) +(* under the terms of the GNU Library General Public License, with *) +(* the special exception on linking described in file ../LICENSE. *) +(* *) +(***********************************************************************) + +(* $Id: setp.mli,v 1.3 2003-02-19 19:26:31 jkodumal Exp $ *) + +(** Sets over ordered types. + + This module implements the set data structure, given a total ordering + function over the set elements. All operations over sets + are purely applicative (no side-effects). + The implementation uses balanced binary trees, and is therefore + reasonably efficient: insertion and membership take time + logarithmic in the size of the set, for instance. +*) + +module type PolyOrderedType = + sig + type 'a t + (** The type of the set elements. *) + val compare : 'a t -> 'a t -> int + (** A total ordering function over the set elements. + This is a two-argument function [f] such that + [f e1 e2] is zero if the elements [e1] and [e2] are equal, + [f e1 e2] is strictly negative if [e1] is smaller than [e2], + and [f e1 e2] is strictly positive if [e1] is greater than [e2]. + Example: a suitable ordering function is + the generic structural comparison function {!Pervasives.compare}. *) + end +(** Input signature of the functor {!Set.Make}. *) + +module type S = + sig + type 'a elt + (** The type of the set elements. *) + + type 'a t + (** The type of sets. *) + + val empty: 'a t + (** The empty set. *) + + val is_empty: 'a t -> bool + (** Test whether a set is empty or not. *) + + val mem: 'a elt -> 'a t -> bool + (** [mem x s] tests whether [x] belongs to the set [s]. *) + + val add: 'a elt -> 'a t -> 'a t + (** [add x s] returns a set containing all elements of [s], + plus [x]. If [x] was already in [s], [s] is returned unchanged. *) + + val singleton: 'a elt -> 'a t + (** [singleton x] returns the one-element set containing only [x]. *) + + val remove: 'a elt -> 'a t -> 'a t + (** [remove x s] returns a set containing all elements of [s], + except [x]. If [x] was not in [s], [s] is returned unchanged. *) + + val union: 'a t -> 'a t -> 'a t + (** Set union. *) + + val inter: 'a t -> 'a t -> 'a t + (** Set interseection. *) + + (** Set difference. *) + val diff: 'a t -> 'a t -> 'a t + + val compare: 'a t -> 'a t -> int + (** Total ordering between sets. Can be used as the ordering function + for doing sets of sets. *) + + val equal: 'a t -> 'a t -> bool + (** [equal s1 s2] tests whether the sets [s1] and [s2] are + equal, that is, contain equal elements. *) + + val subset: 'a t -> 'a t -> bool + (** [subset s1 s2] tests whether the set [s1] is a subset of + the set [s2]. *) + + val iter: ('a elt -> unit) -> 'a t -> unit + (** [iter f s] applies [f] in turn to all elements of [s]. + The order in which the elements of [s] are presented to [f] + is unspecified. *) + + val fold: ('a elt -> 'b -> 'b) -> 'a t -> 'b -> 'b + (** [fold f s a] computes [(f xN ... (f x2 (f x1 a))...)], + where [x1 ... xN] are the elements of [s]. + The order in which elements of [s] are presented to [f] is + unspecified. *) + + val for_all: ('a elt -> bool) -> 'a t -> bool + (** [for_all p s] checks if all elements of the set + satisfy the predicate [p]. *) + + val exists: ('a elt -> bool) -> 'a t -> bool + (** [exists p s] checks if at least one element of + the set satisfies the predicate [p]. *) + + val filter: ('a elt -> bool) -> 'a t -> 'a t + (** [filter p s] returns the set of all elements in [s] + that satisfy predicate [p]. *) + + val partition: ('a elt -> bool) -> 'a t -> 'a t * 'a t + (** [partition p s] returns a pair of sets [(s1, s2)], where + [s1] is the set of all the elements of [s] that satisfy the + predicate [p], and [s2] is the set of all the elements of + [s] that do not satisfy [p]. *) + + val cardinal: 'a t -> int + (** Return the number of elements of a set. *) + + val elements: 'a t -> 'a elt list + (** Return the list of all elements of the given set. + The returned list is sorted in increasing order with respect + to the ordering [Ord.compare], where [Ord] is the argument + given to {!Set.Make}. *) + + val min_elt: 'a t -> 'a elt + (** Return the smallest element of the given set + (with respect to the [Ord.compare] ordering), or raise + [Not_found] if the set is empty. *) + + val max_elt: 'a t -> 'a elt + (** Same as {!Set.S.min_elt}, but returns the largest element of the + given set. *) + + val choose: 'a t -> 'a elt + (** Return one element of the given set, or raise [Not_found] if + the set is empty. Which element is chosen is unspecified, + but equal elements will be chosen for equal sets. *) + end +(** Output signature of the functor {!Set.Make}. *) + +module Make (Ord : PolyOrderedType) : S with type 'a elt = 'a Ord.t +(** Functor building an implementation of the set structure + given a totally ordered type. *) diff --git a/cil/src/ext/pta/steensgaard.ml b/cil/src/ext/pta/steensgaard.ml new file mode 100644 index 00000000..63686934 --- /dev/null +++ b/cil/src/ext/pta/steensgaard.ml @@ -0,0 +1,1417 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(***********************************************************************) +(* *) +(* *) +(* This file is currently unused by CIL. It is included in the *) +(* distribution for reference only. *) +(* *) +(* *) +(***********************************************************************) + + +(***********************************************************************) +(* *) +(* Type Declarations *) +(* *) +(***********************************************************************) + +exception Inconsistent of string +exception Bad_cache +exception No_contents +exception Bad_proj +exception Bad_type_copy +exception Instantiation_cycle + +module U = Uref +module S = Setp +module H = Hashtbl +module Q = Queue + +(** Polarity kinds-- positive, negative, or nonpolar. *) +type polarity = Pos + | Neg + | Non + +(** Label bounds. The polymorphic type is a hack for recursive modules *) +type 'a bound = {index : int; info : 'a} + +(** The 'a type may in general contain urefs, which makes Pervasives.compare + incorrect. However, the bounds will always be correct because if two tau's + get unified, their cached instantiations will be re-entered into the + worklist, ensuring that any labels find the new bounds *) +module Bound = +struct + type 'a t = 'a bound + let compare (x : 'a t) (y : 'a t) = + Pervasives.compare x y +end + +module B = S.Make(Bound) + +type 'a boundset = 'a B.t + +(** Constants, which identify elements in points-to sets *) +type constant = int * string + +module Constant = +struct + type t = constant + + let compare ((xid,_) : t) ((yid,_) : t) = + Pervasives.compare xid yid +end + +module C = Set.Make(Constant) + +(** Sets of constants. Set union is used when two labels containing + constant sets are unified *) +type constantset = C.t + +type lblinfo = { + mutable l_name: string; + (** Name of this label *) + mutable aliases: constantset; + (** Set of constants (tags) for checking aliases *) + p_bounds: label boundset U.uref; + (** Set of umatched (p) lower bounds *) + n_bounds: label boundset U.uref; + (** Set of unmatched (n) lower bounds *) + mutable p_cached: bool; + (** Flag indicating whether all reachable p edges have been locally cached *) + mutable n_cached: bool; + (** Flag indicating whether all reachable n edges have been locally cached *) + mutable on_path: bool; + (** For cycle detection during reachability queries *) +} + +(** Constructor labels *) +and label = lblinfo U.uref + +(** The type of lvalues. *) +type lvalue = { + l: label; + contents: tau +} + +(** Data for variables. *) +and vinfo = { + v_name: string; + mutable v_global: bool; + v_cache: cache +} + +(** Data for ref constructors. *) +and rinfo = { + rl: label; + mutable r_global: bool; + points_to: tau; + r_cache: cache +} + +(** Data for fun constructors. *) +and finfo = { + fl: label; + mutable f_global: bool; + args: tau list ref; + ret: tau; + f_cache: cache +} + +(* Data for pairs. Note there is no label. *) +and pinfo = { + mutable p_global: bool; + ptr: tau; + lam: tau; + p_cache: cache +} + +(** Type constructors discovered by type inference *) +and tinfo = Wild + | Var of vinfo + | Ref of rinfo + | Fun of finfo + | Pair of pinfo + +(** The top-level points-to type. *) +and tau = tinfo U.uref + +(** The instantiation constraint cache. The index is used as a key. *) +and cache = (int,polarity * tau) H.t + +(* Type of semi-unification constraints *) +type su_constraint = Instantiation of tau * (int * polarity) * tau + | Unification of tau * tau + +(** Association lists, used for printing recursive types. The first element + is a type that has been visited. The second element is the string + representation of that type (so far). If the string option is set, then + this type occurs within itself, and is associated with the recursive var + name stored in the option. When walking a type, add it to an association + list. + + Example : suppose we have the constraint 'a = ref('a). The type is unified + via cyclic unification, and would loop infinitely if we attempted to print + it. What we want to do is print the type u rv. ref(rv). This is accomplished + in the following manner: + + -- ref('a) is visited. It is not in the association list, so it is added + and the string "ref(" is stored in the second element. We recurse to print + the first argument of the constructor. + + -- In the recursive call, we see that 'a (or ref('a)) is already in the + association list, so the type is recursive. We check the string option, + which is None, meaning that this is the first recurrence of the type. We + create a new recursive variable, rv and set the string option to 'rv. Next, + we prepend u rv. to the string representation we have seen before, "ref(", + and return "rv" as the string representation of this type. + + -- The string so far is "u rv.ref(". The recursive call returns, and we + complete the type by printing the result of the call, "rv", and ")" + + In a type where the recursive variable appears twice, e.g. 'a = pair('a,'a), + the second time we hit 'a, the string option will be set, so we know to + reuse the same recursive variable name. +*) +type association = tau * string ref * string option ref + +(***********************************************************************) +(* *) +(* Global Variables *) +(* *) +(***********************************************************************) + +(** Print the instantiations constraints (loops with cyclic structures). *) +let print_constraints : bool ref = ref false + +(** Solve constraints as they are introduced. If this is false, constraints + are solved in batch fashion at calls to solveConstraints. *) +let solve_online : bool ref = ref true + +(** If true, print all constraints (including induced) and show additional + debug output. *) +let debug = ref false +let debug_constraints = debug + +(** If true, print out extra verbose debug information (including contents + of label sets *) +let verbose_debug = ref false + + +(** If true, make the flow step a no-op *) +let no_flow = ref false + +let no_sub = ref false + +(** If true, do not add instantiation constraints *) +let analyze_mono = ref false + +(** A counter for generating unique integers. *) +let counter : int ref = ref 0 + +(** A list of equality constraints. *) +let eq_worklist : su_constraint Q.t = Q.create() + +(** A list of instantiation constraints. *) +let inst_worklist : su_constraint Q.t = Q.create() + +(***********************************************************************) +(* *) +(* Utility Functions *) +(* *) +(***********************************************************************) + +(** Consistency check for inferred types *) +let pair_or_var (t : tau) = + match (U.deref t) with + | Pair _ -> true + | Var _ -> true + | _ -> false + +let ref_or_var (t : tau) = + match (U.deref t) with + | Ref _ -> true + | Var _ -> true + | _ -> false + +let fun_or_var (t : tau) = + match (U.deref t) with + | Fun _ -> true + | Var _ -> true + | _ -> false + +(** Generate a unique integer. *) +let fresh_index () : int = + incr counter; + !counter + +(** Negate a polarity. *) +let negate (p : polarity) : polarity = + match p with + | Pos -> Neg + | Neg -> Pos + | Non -> Non + +(** Compute the least-upper-bounds of two polarities. *) +let lub (p,p' : polarity * polarity) : polarity = + match p with + | Pos -> + begin + match p' with + | Pos -> Pos + | _ -> Non + end + | Neg -> + begin + match p' with + | Neg -> Neg + | _ -> Non + end + | Non -> Non + +(** Extract the cache from a type *) +let get_cache (t : tau) : cache = + match U.deref t with + | Wild -> raise Bad_cache + | Var v -> v.v_cache + | Ref r -> r.r_cache + | Pair p -> p.p_cache + | Fun f -> f.f_cache + +(** Determine whether or not a type is global *) +let get_global (t : tau) : bool = + match U.deref t with + | Wild -> false + | Var v -> v.v_global + | Ref r -> r.r_global + | Pair p -> p.p_global + | Fun f -> f.f_global + +(** Return true if a type is monomorphic (global). *) +let global_tau = get_global + +let global_lvalue lv = get_global lv.contents + +(** Return true if e is a member of l (according to uref equality) *) +let rec ulist_mem e l = + match l with + | [] -> false + | h :: t -> if (U.equal(h,e)) then true else ulist_mem e t + +(** Convert a polarity to a string *) +let string_of_polarity p = + match p with + | Pos -> "+" + | Neg -> "-" + | Non -> "T" + +(** Convert a label to a string, short representation *) +let string_of_label2 (l : label) : string = + "\"" ^ (U.deref l).l_name ^ "\"" + +(** Convert a label to a string, long representation *) +let string_of_label (l : label ) : string = + let rec constset_to_string = function + | (_,s) :: [] -> s + | (_,s) :: t -> s ^ "," ^ (constset_to_string t) + | [] -> "" + in + let aliases = constset_to_string (C.elements ((U.deref l).aliases)) + in + if ( (aliases = "") || (not !verbose_debug)) + then string_of_label2 l + else aliases + +(** Return true if the element [e] is present in the association list *) +let rec assoc_list_mem (e : tau) (l : association list) = + match l with + | [] -> None + | (h,s,so) :: t -> + if (U.equal(h,e)) then (Some (s,so)) else assoc_list_mem e t + +(** Given a tau, create a unique recursive variable name. This should always + return the same name for a given tau *) +let fresh_recvar_name (t : tau) : string = + match U.deref t with + | Pair p -> "rvp" ^ string_of_int((Hashtbl.hash p)) + | Ref r -> "rvr" ^ string_of_int((Hashtbl.hash r)) + | Fun f -> "rvf" ^ string_of_int((Hashtbl.hash f)) + | _ -> raise (Inconsistent ("recvar_name")) + +(** Return a string representation of a tau, using association lists. *) +let string_of_tau (t : tau ) : string = + let tau_map : association list ref = ref [] in + let rec string_of_tau' t = + match (assoc_list_mem t (!tau_map)) with + | Some (s,so) -> (* recursive type. see if a var name has been set *) + begin + match (!so) with + | None -> + begin + let rv = fresh_recvar_name(t) in + s := "u " ^ rv ^ "." ^ (!s); + so := Some (rv); + rv + end + | Some rv -> rv + end + | None -> (* type's not recursive. Add it to the assoc list and cont. *) + let s = ref "" in + let so : string option ref = ref None in + begin + tau_map := (t,s,so) :: (!tau_map); + + (match (U.deref t) with + | Wild -> s := "_"; + | Var v -> s := v.v_name; + | Pair p -> + begin + assert (ref_or_var(p.ptr)); + assert (fun_or_var(p.lam)); + s := "{"; + s := (!s) ^ (string_of_tau' p.ptr); + s := (!s) ^ ","; + s := (!s) ^ (string_of_tau' p.lam); + s := (!s) ^"}" + + end + | Ref r -> + begin + assert(pair_or_var(r.points_to)); + s := "ref(|"; + s := (!s) ^ (string_of_label r.rl); + s := (!s) ^ "|,"; + s := (!s) ^ (string_of_tau' r.points_to); + s := (!s) ^ ")" + + end + | Fun f -> + begin + assert(pair_or_var(f.ret)); + let rec string_of_args = function + | h :: [] -> + begin + assert(pair_or_var(h)); + s := (!s) ^ (string_of_tau' h) + end + | h :: t -> + begin + assert(pair_or_var(h)); + s := (!s) ^ (string_of_tau' h) ^ ","; + string_of_args t + end + | [] -> () + in + s := "fun(|"; + s := (!s) ^ (string_of_label f.fl); + s := (!s) ^ "|,"; + s := (!s) ^ "<"; + if (List.length !(f.args) > 0) + then + string_of_args !(f.args) + else + s := (!s) ^ "void"; + s := (!s) ^">,"; + s := (!s) ^ (string_of_tau' f.ret); + s := (!s) ^ ")" + end); + tau_map := List.tl (!tau_map); + !s + end + in + string_of_tau' t + +(** Convert an lvalue to a string *) +let rec string_of_lvalue (lv : lvalue) : string = + let contents = (string_of_tau(lv.contents)) in + let l = (string_of_label lv.l) in + assert(pair_or_var(lv.contents)); + Printf.sprintf "[%s]^(%s)" contents l + +(** Print a list of tau elements, comma separated *) +let rec print_tau_list (l : tau list) : unit = + let t_strings = List.map string_of_tau l in + let rec print_t_strings = function + | h :: [] -> print_string h; print_newline(); + | h :: t -> print_string h; print_string ", "; print_t_strings t + | [] -> () + in + print_t_strings t_strings + +(** Print a constraint. *) +let print_constraint (c : su_constraint) = + match c with + | Unification (t,t') -> + let lhs = string_of_tau t in + let rhs = string_of_tau t' in + Printf.printf "%s == %s\n" lhs rhs + | Instantiation (t,(i,p),t') -> + let lhs = string_of_tau t in + let rhs = string_of_tau t' in + let index = string_of_int i in + let pol = string_of_polarity p in + Printf.printf "%s <={%s,%s} %s\n" lhs index pol rhs + +(* If [positive] is true, return the p-edge bounds, otherwise, return + the n-edge bounds. *) +let get_bounds (positive : bool) (l : label) : label boundset U.uref = + if (positive) then + (U.deref l).p_bounds + else + (U.deref l).n_bounds + +(** Used for cycle detection during the flow step. Returns true if the + label [l] is found on the current path. *) +let on_path (l : label) : bool = + (U.deref l).on_path + +(** Used for cycle detection during the flow step. Identifies [l] as being + on/off the current path. *) +let set_on_path (l : label) (b : bool) : unit = + (U.deref l).on_path <- b + +(** Make the type a global type *) +let set_global (t : tau) (b : bool) : bool = + if (!debug && b) + then + Printf.printf "Setting a new global : %s\n" (string_of_tau t); + begin + assert ( (not (get_global(t)) ) || b ); + (match U.deref t with + | Wild -> () + | Var v -> v.v_global <- b + | Ref r -> r.r_global <- b + | Pair p -> p.p_global <- b + | Fun f -> f.f_global <- b); + b + end + +(** Return a label's bounds as a string *) +let string_of_bounds (is_pos : bool) (l : label) : string = + let bounds = + if (is_pos) then + U.deref ((U.deref l).p_bounds) + else + U.deref ((U.deref l).n_bounds) + in + B.fold (fun b -> fun res -> res ^ (string_of_label2 b.info) ^ " " + ) bounds "" + +(***********************************************************************) +(* *) +(* Type Operations -- these do not create any constraints *) +(* *) +(***********************************************************************) + +let wild_val = U.uref Wild + +(** The wild (don't care) value. *) +let wild () : tau = + wild_val + +(** Create an lvalue with label [lbl] and tau contents [t]. *) +let make_lval (lbl,t : label * tau) : lvalue = + {l = lbl; contents = t} + +(** Create a new label with name [name]. Also adds a fresh constant + with name [name] to this label's aliases set. *) +let make_label (name : string) : label = + U.uref { + l_name = name; + aliases = (C.add (fresh_index(),name) C.empty); + p_bounds = U.uref (B.empty); + n_bounds = U.uref (B.empty); + p_cached = false; + n_cached = false; + on_path = false + } + +(** Create a new label with an unspecified name and an empty alias set. *) +let fresh_label () : label = + U.uref { + l_name = "l_" ^ (string_of_int (fresh_index())); + aliases = (C.empty); + p_bounds = U.uref (B.empty); + n_bounds = U.uref (B.empty); + p_cached = false; + n_cached = false; + on_path = false + } + +(** Create a fresh bound. *) +let make_bound (i,a : int * 'a) : 'a bound = + {index = i; info = a } + +(** Create a fresh named variable with name '[name]. *) +let make_var (b: bool) (name : string) : tau = + U.uref (Var {v_name = ("'" ^name); + v_global = b; + v_cache = H.create 4}) + +(** Create a fresh unnamed variable (name will be 'fv). *) +let fresh_var () : tau = + make_var false ("fv" ^ (string_of_int (fresh_index())) ) + +(** Create a fresh unnamed variable (name will be 'fi). *) +let fresh_var_i () : tau = + make_var false ("fi" ^ (string_of_int (fresh_index())) ) + +(** Create a Fun constructor. *) +let make_fun (lbl,a,r : label * (tau list) * tau) : tau = + U.uref (Fun {fl = lbl ; + f_global = false; + args = ref a; + ret = r; + f_cache = H.create 4}) + +(** Create a Ref constructor. *) +let make_ref (lbl,pt : label * tau) : tau = + U.uref (Ref {rl = lbl ; + r_global = false; + points_to = pt; + r_cache = H.create 4}) + +(** Create a Pair constructor. *) +let make_pair (p,f : tau * tau) : tau = + U.uref (Pair {ptr = p; + p_global = false; + lam = f; + p_cache = H.create 4}) + +(** Copy the toplevel constructor of [t], putting fresh variables in each + argement of the constructor. *) +let copy_toplevel (t : tau) : tau = + match U.deref t with + | Pair _ -> + make_pair (fresh_var_i(), fresh_var_i()) + | Ref _ -> + make_ref (fresh_label(),fresh_var_i()) + | Fun f -> + let fresh_fn = fun _ -> fresh_var_i() + in + make_fun (fresh_label(), List.map fresh_fn !(f.args) , fresh_var_i()) + | _ -> raise Bad_type_copy + +let pad_args (l,l' : (tau list ref) * (tau list ref)) : unit = + let padding = ref ((List.length (!l)) - (List.length (!l'))) + in + if (!padding == 0) then () + else + let to_pad = + if (!padding > 0) then l' else (padding := -(!padding);l) + in + for i = 1 to (!padding) do + to_pad := (!to_pad) @ [fresh_var()] + done + +(***********************************************************************) +(* *) +(* Constraint Generation/ Resolution *) +(* *) +(***********************************************************************) + +(** Returns true if the constraint has no effect, i.e. either the left-hand + side or the right-hand side is wild. *) +let wild_constraint (t,t' : tau * tau) : bool = + let ti,ti' = U.deref t, U.deref t' in + match ti,ti' with + | Wild, _ -> true + | _, Wild -> true + | _ -> false + +exception Cycle_found + +(** Cycle detection between instantiations. Returns true if there is a cycle + from t to t' *) +let exists_cycle (t,t' : tau * tau) : bool = + let visited : tau list ref = ref [] in + let rec exists_cycle' t = + if (ulist_mem t (!visited)) + then + begin (* + print_string "Instantiation cycle found :"; + print_tau_list (!visited); + print_newline(); + print_string (string_of_tau t); + print_newline(); *) + (* raise Instantiation_cycle *) + (* visited := List.tl (!visited) *) (* check *) + end + else + begin + visited := t :: (!visited); + if (U.equal(t,t')) + then raise Cycle_found + else + H.iter (fun _ -> fun (_,t'') -> + if (U.equal (t,t'')) then () + else + ignore (exists_cycle' t'') + ) (get_cache t) ; + visited := List.tl (!visited) + end + in + try + exists_cycle' t; + false + with + | Cycle_found -> true + +exception Subterm + +(** Returns true if [t'] is a proper subterm of [t] *) +let proper_subterm (t,t') = + let visited : tau list ref = ref [] in + let rec proper_subterm' t = + if (ulist_mem t (!visited)) + then () (* recursive type *) + else + if (U.equal (t,t')) + then raise Subterm + else + begin + visited := t :: (!visited); + ( + match (U.deref t) with + | Wild -> () + | Var _ -> () + | Ref r -> + proper_subterm' r.points_to + | Pair p -> + proper_subterm' p.ptr; + proper_subterm' p.lam + | Fun f -> + proper_subterm' f.ret; + List.iter (proper_subterm') !(f.args) + ); + visited := List.tl (!visited) + end + in + try + if (U.equal(t,t')) then false + else + begin + proper_subterm' t; + false + end + with + | Subterm -> true + +(** The extended occurs check. Search for a cycle of instantiations from [t] + to [t']. If such a cycle exists, check to see that [t'] is a proper subterm + of [t]. If it is, then return true *) +let eoc (t,t') : bool = + if (exists_cycle(t,t') && proper_subterm(t,t')) + then + begin + if (!debug) + then + Printf.printf "Occurs check : %s occurs within %s\n" (string_of_tau t') + (string_of_tau t) + else + (); + true + end + else + false + +(** Resolve an instantiation constraint *) +let rec instantiate_int (t,(i,p),t' : tau * (int * polarity) * tau) = + if ( wild_constraint(t,t') || (not (store(t,(i,p),t'))) || + U.equal(t,t') ) + then () + else + let ti,ti' = U.deref t, U.deref t' in + match ti,ti' with + | Ref r, Ref r' -> + instantiate_ref(r,(i,p),r') + | Fun f, Fun f' -> + instantiate_fun(f,(i,p),f') + | Pair pr, Pair pr' -> + begin + add_constraint_int (Instantiation (pr.ptr,(i,p),pr'.ptr)); + add_constraint_int (Instantiation (pr.lam,(i,p),pr'.lam)) + end + | Var v, _ -> () + | _,Var v' -> + if eoc(t,t') + then + add_constraint_int (Unification (t,t')) + else + begin + unstore(t,i); + add_constraint_int (Unification ((copy_toplevel t),t')); + add_constraint_int (Instantiation (t,(i,p),t')) + end + | _ -> raise (Inconsistent("instantiate")) + +(** Apply instantiations to the ref's label, and structurally down the type. + Contents of ref constructors are instantiated with polarity Non. *) +and instantiate_ref (ri,(i,p),ri') : unit = + add_constraint_int (Instantiation(ri.points_to,(i,Non),ri'.points_to)); + instantiate_label (ri.rl,(i,p),ri'.rl) + +(** Apply instantiations to the fun's label, and structurally down the type. + Flip the polarity for the function's args. If the lengths of the argument + lists don't match, extend the shorter list as necessary. *) +and instantiate_fun (fi,(i,p),fi') : unit = + pad_args (fi.args, fi'.args); + assert(List.length !(fi.args) == List.length !(fi'.args)); + add_constraint_int (Instantiation (fi.ret,(i,p),fi'.ret)); + List.iter2 (fun t ->fun t' -> + add_constraint_int (Instantiation(t,(i,negate p),t'))) + !(fi.args) !(fi'.args); + instantiate_label (fi.fl,(i,p),fi'.fl) + +(** Instantiate a label. Update the label's bounds with new flow edges. + *) +and instantiate_label (l,(i,p),l' : label * (int * polarity) * label) : unit = + if (!debug) then + Printf.printf "%s <= {%d,%s} %s\n" (string_of_label l) i + (string_of_polarity p) (string_of_label l'); + let li,li' = U.deref l, U.deref l' in + match p with + | Pos -> + U.update (li'.p_bounds, + B.add(make_bound (i,l)) (U.deref li'.p_bounds) + ) + | Neg -> + U.update (li.n_bounds, + B.add(make_bound (i,l')) (U.deref li.n_bounds) + ) + | Non -> + begin + U.update (li'.p_bounds, + B.add(make_bound (i,l)) (U.deref li'.p_bounds) + ); + U.update (li.n_bounds, + B.add(make_bound (i,l')) (U.deref li.n_bounds) + ) + end + +(** Resolve a unification constraint. Does the uref unification after grabbing + a copy of the information before the two infos are unified. The other + interesting feature of this function is the way 'globalness' is propagated. + If a non-global is unified with a global, the non-global becomes global. + If the ecr became global, there is a problem because none of its cached + instantiations know that the type became monomorphic. In this case, they + must be re-inserted via merge-cache. Merge-cache always reinserts cached + instantiations from the non-ecr type, i.e. the type that was 'killed' by the + unification. *) +and unify_int (t,t' : tau * tau) : unit = + if (wild_constraint(t,t') || U.equal(t,t')) + then () + else + let ti, ti' = U.deref t, U.deref t' in + begin + U.unify combine (t,t'); + match ti,ti' with + | Var v, _ -> + begin + if (set_global t' (v.v_global || (get_global t'))) + then (H.iter (merge_cache t') (get_cache t')) + else (); + H.iter (merge_cache t') v.v_cache + end + | _, Var v -> + begin + if (set_global t (v.v_global || (get_global t))) + then (H.iter (merge_cache t) (get_cache t)) + else (); + H.iter (merge_cache t) v.v_cache + end + | Ref r, Ref r' -> + begin + if (set_global t (r.r_global || r'.r_global)) + then (H.iter (merge_cache t) (get_cache t)) + else (); + H.iter (merge_cache t) r'.r_cache; + unify_ref(r,r') + end + | Fun f, Fun f' -> + begin + if (set_global t (f.f_global || f'.f_global)) + then (H.iter (merge_cache t) (get_cache t)) + else (); + H.iter (merge_cache t) f'.f_cache; + unify_fun (f,f'); + end + | Pair p, Pair p' -> + begin + if (set_global t (p.p_global || p'.p_global)) + then (H.iter (merge_cache t) (get_cache t)) + else (); + H.iter (merge_cache t) p'.p_cache; + add_constraint_int (Unification (p.ptr,p'.ptr)); + add_constraint_int (Unification (p.lam,p'.lam)) + end + | _ -> raise (Inconsistent("unify")) + end + +(** Unify the ref's label, and apply unification structurally down the type. *) +and unify_ref (ri,ri' : rinfo * rinfo) : unit = + add_constraint_int (Unification (ri.points_to,ri'.points_to)); + unify_label(ri.rl,ri'.rl) + +(** Unify the fun's label, and apply unification structurally down the type, + at arguments and return value. When combining two lists of different lengths, + always choose the longer list for the representative. *) +and unify_fun (li,li' : finfo * finfo) : unit = + let rec union_args = function + | _, [] -> false + | [], _ -> true + | h :: t, h' :: t' -> + add_constraint_int (Unification (h,h')); union_args(t,t') + in + begin + unify_label(li.fl,li'.fl); + add_constraint_int (Unification (li.ret,li'.ret)); + if (union_args(!(li.args),!(li'.args))) + then li.args := !(li'.args); + end + +(** Unify two labels, combining the set of constants denoting aliases. *) +and unify_label (l,l' : label * label) : unit = + let pick_name (li,li' : lblinfo * lblinfo) = + if ( (String.length li.l_name) > 1 && (String.sub (li.l_name) 0 2) = "l_") + then + li.l_name <- li'.l_name + else () + in + let combine_label (li,li' : lblinfo *lblinfo) : lblinfo = + let p_bounds = U.deref (li.p_bounds) in + let p_bounds' = U.deref (li'.p_bounds) in + let n_bounds = U.deref (li.n_bounds) in + let n_bounds' = U.deref (li'.n_bounds) in + begin + pick_name(li,li'); + li.aliases <- C.union (li.aliases) (li'.aliases); + U.update (li.p_bounds, (B.union p_bounds p_bounds')); + U.update (li.n_bounds, (B.union n_bounds n_bounds')); + li + end + in(* + if (!debug) then + begin + Printf.printf "Unifying %s with %s...\n" + (string_of_label l) (string_of_label l'); + Printf.printf "pbounds : %s\n" (string_of_bounds true l); + Printf.printf "nbounds : %s\n" (string_of_bounds false l); + Printf.printf "pbounds : %s\n" (string_of_bounds true l'); + Printf.printf "nbounds : %s\n\n" (string_of_bounds false l') + end; *) + U.unify combine_label (l,l') + (* if (!debug) then + begin + Printf.printf "pbounds : %s\n" (string_of_bounds true l); + Printf.printf "nbounds : %s\n" (string_of_bounds false l) + end *) + +(** Re-assert a cached instantiation constraint, since the old type was + killed by a unification *) +and merge_cache (rep : tau) (i : int) (p,t' : polarity * tau) : unit = + add_constraint_int (Instantiation (rep,(i,p),t')) + +(** Pick the representative info for two tinfo's. This function prefers the + first argument when both arguments are the same structure, but when + one type is a structure and the other is a var, it picks the structure. *) +and combine (ti,ti' : tinfo * tinfo) : tinfo = + match ti,ti' with + | Var _, _ -> ti' + | _,_ -> ti + +(** Add a new constraint induced by other constraints. *) +and add_constraint_int (c : su_constraint) = + if (!print_constraints && !debug) then print_constraint c else (); + begin + match c with + | Instantiation _ -> + Q.add c inst_worklist + | Unification _ -> + Q.add c eq_worklist + end; + if (!debug) then solve_constraints() else () + +(** Add a new constraint introduced through this module's interface (a + top-level constraint). *) +and add_constraint (c : su_constraint) = + begin + add_constraint_int (c); + if (!print_constraints && not (!debug)) then print_constraint c else (); + if (!solve_online) then solve_constraints() else () + end + + +(* Fetch constraints, preferring equalities. *) +and fetch_constraint () : su_constraint option = + if (Q.length eq_worklist > 0) + then + Some (Q.take eq_worklist) + else if (Q.length inst_worklist > 0) + then + Some (Q.take inst_worklist) + else + None + +(** Returns the target of a cached instantiation, if it exists. *) +and target (t,i,p : tau * int * polarity) : (polarity * tau) option = + let cache = get_cache t in + if (global_tau t) then Some (Non,t) + else + try + Some (H.find cache i) + with + | Not_found -> None + +(** Caches a new instantiation, or applies well-formedness. *) +and store ( t,(i,p),t' : tau * (int * polarity) * tau) : bool = + let cache = get_cache t in + match target(t,i,p) with + | Some (p'',t'') -> + if (U.equal (t',t'') && (lub(p,p'') = p'')) + then + false + else + begin + add_constraint_int (Unification (t',t'')); + H.replace cache i (lub(p,p''),t''); + (* add a new forced instantiation as well *) + if (lub(p,p'') = p'') + then () + else + begin + unstore(t,i); + add_constraint_int (Instantiation (t,(i,lub(p,p'')),t'')) + end; + false + end + | None -> + begin + H.add cache i (p,t'); + true + end + +(** Remove a cached instantiation. Used when type structure changes *) +and unstore (t,i : tau * int) = +let cache = get_cache t in + H.remove cache i + +(** The main solver loop. *) +and solve_constraints () : unit = + match fetch_constraint () with + | Some c -> + begin + (match c with + | Instantiation (t,(i,p),t') -> instantiate_int (t,(i,p),t') + | Unification (t,t') -> unify_int (t,t') + ); + solve_constraints() + end + | None -> () + + +(***********************************************************************) +(* *) +(* Interface Functions *) +(* *) +(***********************************************************************) + +(** Return the contents of the lvalue. *) +let rvalue (lv : lvalue) : tau = + lv.contents + +(** Dereference the rvalue. If it does not have enough structure to support + the operation, then the correct structure is added via new unification + constraints. *) +let rec deref (t : tau) : lvalue = + match U.deref t with + | Pair p -> + ( + match U.deref (p.ptr) with + | Var _ -> + begin + (* let points_to = make_pair(fresh_var(),fresh_var()) in *) + let points_to = fresh_var() in + let l = fresh_label() in + let r = make_ref(l,points_to) + in + add_constraint (Unification (p.ptr,r)); + make_lval(l, points_to) + end + | Ref r -> make_lval(r.rl, r.points_to) + | _ -> raise (Inconsistent("deref")) + ) + | Var v -> + begin + add_constraint (Unification (t,make_pair(fresh_var(),fresh_var()))); + deref t + end + | _ -> raise (Inconsistent("deref -- no top level pair")) + +(** Form the union of [t] and [t']. *) +let join (t : tau) (t' : tau) : tau = + let t'' = fresh_var() in + add_constraint (Unification (t,t'')); + add_constraint (Unification (t',t'')); + t'' + +(** Form the union of a list [tl], expected to be the initializers of some + structure or array type. *) +let join_inits (tl : tau list) : tau = + let t' = fresh_var() in + begin + List.iter (function t'' -> add_constraint (Unification(t',t''))) tl; + t' + end + +(** Take the address of an lvalue. Does not add constraints. *) +let address (lv : lvalue) : tau = + make_pair (make_ref (lv.l, lv.contents), fresh_var() ) + +(** Instantiate a type with index i. By default, uses positive polarity. + Adds an instantiation constraint. *) +let instantiate (lv : lvalue) (i : int) : lvalue = + if (!analyze_mono) then lv + else + begin + let l' = fresh_label () in + let t' = fresh_var_i () in + instantiate_label(lv.l,(i,Pos),l'); + add_constraint (Instantiation (lv.contents,(i,Pos),t')); + make_lval(l',t') (* check -- fresh label ?? *) + end + +(** Constraint generated from assigning [t] to [lv]. *) +let assign (lv : lvalue) (t : tau) : unit = + add_constraint (Unification (lv.contents,t)) + + +(** Project out the first (ref) component or a pair. If the argument [t] has + no discovered structure, raise No_contents. *) +let proj_ref (t : tau) : tau = + match U.deref t with + | Pair p -> p.ptr + | Var v -> raise No_contents + | _ -> raise Bad_proj + +(* Project out the second (fun) component of a pair. If the argument [t] has + no discovered structure, create it on the fly by adding constraints. *) +let proj_fun (t : tau) : tau = + match U.deref t with + | Pair p -> p.lam + | Var v -> + let p,f = fresh_var(), fresh_var() in + add_constraint (Unification (t,make_pair(p,f))); + f + | _ -> raise Bad_proj + +let get_args (t : tau) : tau list ref = + match U.deref t with + | Fun f -> f.args + | _ -> raise (Inconsistent("get_args")) + +(** Function type [t] is applied to the arguments [actuals]. Unifies the + actuals with the formals of [t]. If no functions have been discovered for + [t] yet, create a fresh one and unify it with t. The result is the return + value of the function. *) +let apply (t : tau) (al : tau list) : tau = + let f = proj_fun(t) in + let actuals = ref al in + let formals,ret = + match U.deref f with + | Fun fi -> (fi.args),fi.ret + | Var v -> + let new_l,new_ret,new_args = + fresh_label(), fresh_var (), + List.map (function _ -> fresh_var()) (!actuals) + in + let new_fun = make_fun(new_l,new_args,new_ret) in + add_constraint (Unification(new_fun,f)); + (get_args new_fun,new_ret) + | Ref _ -> raise (Inconsistent ("apply_ref")) + | Pair _ -> raise (Inconsistent ("apply_pair")) + | Wild -> raise (Inconsistent("apply_wild")) + in + pad_args(formals,actuals); + List.iter2 (fun actual -> fun formal -> + add_constraint (Unification (actual,formal)) + ) !actuals !formals; + ret + +(** Create a new function type with name [name], list of formal arguments + [formals], and return value [ret]. Adds no constraints. *) +let make_function (name : string) (formals : lvalue list) (ret : tau) : tau = + let + f = make_fun(make_label(name),List.map (fun x -> rvalue x) formals, ret) + in + make_pair(fresh_var(),f) + +(** Create an lvalue. If [is_global] is true, the lvalue will be treated + monomorphically. *) +let make_lvalue (is_global : bool) (name : string) : lvalue = + if (!debug && is_global) + then + Printf.printf "Making global lvalue : %s\n" name + else (); + make_lval(make_label(name), make_var is_global name) + + +(** Create a fresh non-global named variable. *) +let make_fresh (name : string) : tau = + make_var false (name) + +(** The default type for constants. *) +let bottom () : tau = + make_var false ("bottom") + +(** Unify the result of a function with its return value. *) +let return (t : tau) (t' : tau) = + add_constraint (Unification (t,t')) + + +(***********************************************************************) +(* *) +(* Query/Extract Solutions *) +(* *) +(***********************************************************************) + +(** Unify the data stored in two label bounds. *) +let combine_lbounds (s,s' : label boundset * label boundset) = + B.union s s' + +(** Truncates a list of urefs [l] to those elements up to and including the + first occurence of the specified element [elt]. *) +let truncate l elt = + let keep = ref true in + List.filter + (fun x -> + if (not (!keep)) + then + false + else + begin + if (U.equal(x,elt)) + then + keep := false + else (); + true + end + ) l + +let debug_cycle_bounds is_pos c = + let rec debug_cycle_bounds' = function + | h :: [] -> + Printf.printf "%s --> %s\n" (string_of_bounds is_pos h) + (string_of_label2 h) + | h :: t -> + begin + Printf.printf "%s --> %s\n" (string_of_bounds is_pos h) + (string_of_label2 h); + debug_cycle_bounds' t + end + | [] -> () + in + debug_cycle_bounds' c + +(** For debugging, print a cycle of instantiations *) +let debug_cycle (is_pos,c,l,p) = + let kind = if is_pos then "P" else "N" in + let rec string_of_cycle = function + | h :: [] -> string_of_label2 h + | [] -> "" + | h :: t -> Printf.sprintf "%s,%s" (string_of_label2 h) (string_of_cycle t) + in + Printf.printf "Collapsing %s cycle around %s:\n" kind (string_of_label2 l); + Printf.printf "Elements are: %s\n" (string_of_cycle c); + Printf.printf "Per-element bounds:\n"; + debug_cycle_bounds is_pos c; + Printf.printf "Full path is: %s" (string_of_cycle p); + print_newline() + +(** Compute pos or neg flow, depending on [is_pos]. Searches for cycles in the + instantiations (can these even occur?) and unifies either the positive or + negative edge sets for the labels on the cycle. Note that this does not + ever unify the labels themselves. The return is the new bounds of the + argument label *) +let rec flow (is_pos : bool) (path : label list) (l : label) : label boundset = + let collapse_cycle () = + let cycle = truncate path l in + debug_cycle (is_pos,cycle,l,path); + List.iter (fun x -> U.unify combine_lbounds + ((get_bounds is_pos x),get_bounds is_pos l) + ) cycle + in + if (on_path l) + then + begin + collapse_cycle (); + (* set_on_path l false; *) + B.empty + end + else + if ( (is_pos && (U.deref l).p_cached) || + ( (not is_pos) && (U.deref l).n_cached) ) then + begin + U.deref (get_bounds is_pos l) + end + else + begin + let newbounds = ref B.empty in + let base = get_bounds is_pos l in + set_on_path l true; + if (is_pos) then + (U.deref l).p_cached <- true + else + (U.deref l).n_cached <- true; + B.iter + (fun x -> + if (U.equal(x.info,l)) then () + else + (newbounds := + (B.union (!newbounds) (flow is_pos (l :: path) x.info))) + ) (U.deref base); + set_on_path l false; + U.update (base,(B.union (U.deref base) !newbounds)); + U.deref base + end + +(** Compute and cache any positive flow. *) +let pos_flow l : constantset = + let result = ref C.empty in + begin + ignore (flow true [] l); + B.iter (fun x -> result := C.union (!result) (U.deref(x.info)).aliases ) + (U.deref (get_bounds true l)); + !result + end + +(** Compute and cache any negative flow. *) +let neg_flow l : constantset = + let result = ref C.empty in + begin + ignore (flow false [] l); + B.iter (fun x -> result := C.union (!result) (U.deref(x.info)).aliases ) + (U.deref (get_bounds false l)); + !result + end + +(** Compute and cache any pos-neg flow. Assumes that both pos_flow and + neg_flow have been computed for the label [l]. *) +let pos_neg_flow(l : label) : constantset = + let result = ref C.empty in + begin + B.iter (fun x -> result := C.union (!result) (pos_flow x.info)) + (U.deref (get_bounds false l)); + !result + end + +(** Compute a points-to set by computing positive, then negative, then + positive-negative flow for a label. *) +let points_to_int (lv : lvalue) : constantset = + let visited_caches : cache list ref = ref [] in + let rec points_to_tau (t : tau) : constantset = + try + begin + match U.deref (proj_ref t) with + | Var v -> C.empty + | Ref r -> + begin + let pos = pos_flow r.rl in + let neg = neg_flow r.rl in + let interproc = C.union (pos_neg_flow r.rl) (C.union pos neg) + in + C.union ((U.deref(r.rl)).aliases) interproc + end + | _ -> raise (Inconsistent ("points_to")) + end + with + | No_contents -> + begin + match (U.deref t) with + | Var v -> rebuild_flow v.v_cache + | _ -> raise (Inconsistent ("points_to")) + end + and rebuild_flow (c : cache) : constantset = + if (List.mem c (!visited_caches) ) (* cyclic instantiations *) + then + begin + (* visited_caches := List.tl (!visited_caches); *) (* check *) + C.empty + end + else + begin + visited_caches := c :: (!visited_caches); + let result = ref (C.empty) in + H.iter (fun _ -> fun(p,t) -> + match p with + | Pos -> () + | _ -> result := C.union (!result) (points_to_tau t) + ) c; + visited_caches := List.tl (!visited_caches); + !result + end + in + if (!no_flow) then + (U.deref lv.l).aliases + else + points_to_tau (lv.contents) + +let points_to (lv : lvalue) : string list = + List.map snd (C.elements (points_to_int lv)) + +let alias_query (a_progress : bool) (lv : lvalue list) : int * int = + (0,0) (* todo *) +(* + let a_count = ref 0 in + let ptsets = List.map points_to_int lv in + let total_sets = List.length ptsets in + let counted_sets = ref 0 in + let record_alias s s' = + if (C.is_empty (C.inter s s')) + then () + else (incr a_count) + in + let rec check_alias = function + | h :: t -> + begin + List.iter (record_alias h) ptsets; + check_alias t + end + | [] -> () + in + check_alias ptsets; + !a_count +*) diff --git a/cil/src/ext/pta/steensgaard.mli b/cil/src/ext/pta/steensgaard.mli new file mode 100644 index 00000000..f009e7e0 --- /dev/null +++ b/cil/src/ext/pta/steensgaard.mli @@ -0,0 +1,71 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(***********************************************************************) +(* *) +(* *) +(* This file is currently unused by CIL. It is included in the *) +(* distribution for reference only. *) +(* *) +(* *) +(***********************************************************************) + +type lvalue +type tau +val debug : bool ref +val debug_constraints : bool ref +val print_constraints : bool ref +val no_flow : bool ref +val no_sub : bool ref +val analyze_mono : bool ref +val solve_online : bool ref +val solve_constraints : unit -> unit +val rvalue : lvalue -> tau +val deref : tau -> lvalue +val join : tau -> tau -> tau +val join_inits : tau list -> tau +val address : lvalue -> tau +val instantiate : lvalue -> int -> lvalue +val assign : lvalue -> tau -> unit +val apply : tau -> tau list -> tau +val make_function : string -> lvalue list -> tau -> tau +val make_lvalue : bool -> string -> lvalue +val bottom : unit -> tau +val return : tau -> tau -> unit +val make_fresh : string -> tau +val points_to : lvalue -> string list +val string_of_lvalue : lvalue -> string +val global_lvalue : lvalue -> bool +val alias_query : bool -> lvalue list -> int * int diff --git a/cil/src/ext/pta/uref.ml b/cil/src/ext/pta/uref.ml new file mode 100644 index 00000000..53f36400 --- /dev/null +++ b/cil/src/ext/pta/uref.ml @@ -0,0 +1,94 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +exception Bad_find + +type 'a urefC = + Ecr of 'a * int + | Link of 'a uref +and 'a uref = 'a urefC ref + +let rec find p = + match !p with + | Ecr _ -> p + | Link p' -> + let p'' = find p' + in p := Link p''; p'' + +let uref x = ref (Ecr(x,0)) + +let equal (p,p') = (find p == find p') + +let deref p = + match ! (find p) with + | Ecr (x,_) -> x + | _ -> raise Bad_find + +let update (p,x) = + let p' = find p + in + match !p' with + | Ecr (_,rank) -> p' := Ecr(x,rank) + | _ -> raise Bad_find + +let unify f (p,q) = + let p',q' = find p, find q in + match (!p',!q') with + | (Ecr(px,pr),Ecr(qx,qr)) -> + let x = f(px,qx) in + if (p' == q') then + p' := Ecr(x,pr) + else if pr == qr then + (q' := Ecr(x,qr+1); p' := Link q') + else if pr < qr then + (q' := Ecr(x,qr); p' := Link q') + else (* pr > qr *) + (p' := Ecr(x,pr); q' := Link p') + | _ -> raise Bad_find + +let union (p,q) = + let p',q' = find p, find q in + match (!p',!q') with + | (Ecr(px,pr),Ecr(qx,qr)) -> + if (p' == q') then + () + else if pr == qr then + (q' := Ecr(qx, qr+1); p' := Link q') + else if pr < qr then + p' := Link q' + else (* pr > qr *) + q' := Link p' + | _ -> raise Bad_find + + diff --git a/cil/src/ext/pta/uref.mli b/cil/src/ext/pta/uref.mli new file mode 100644 index 00000000..1dee5036 --- /dev/null +++ b/cil/src/ext/pta/uref.mli @@ -0,0 +1,65 @@ +(* + * + * Copyright (c) 2001-2002, + * John Kodumal + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +type 'a uref + +(** Union-find with union by rank and path compression + + This is an implementation of Tarjan's union-find data structure using + generics. The interface is analagous to standard references, with the + addition of a union operation which makes two references indistinguishable. + +*) + +val uref: 'a -> 'a uref + (** Create a new uref *) + +val equal: 'a uref * 'a uref -> bool + (** Test whether two urefs share the same equivalence class *) + +val deref: 'a uref -> 'a + (** Extract the contents of this reference *) + +val update: 'a uref * 'a -> unit + (** Update the value stored in this reference *) + +val unify: ('a * 'a -> 'a) -> 'a uref * 'a uref -> unit + (** [unify f (p,q)] unifies references [p] and [q], making them + indistinguishable. The contents of the reference are the result of + [f] *) + +val union: 'a uref * 'a uref -> unit + (** [unify (p,q)] unifies references [p] and [q], making them + indistinguishable. The contents of the reference are the contents of + one of the first or second arguments (unspecified) *) diff --git a/cil/src/ext/reachingdefs.ml b/cil/src/ext/reachingdefs.ml new file mode 100644 index 00000000..b6af37cb --- /dev/null +++ b/cil/src/ext/reachingdefs.ml @@ -0,0 +1,511 @@ +(* Calculate reaching definitions for each instruction. + * Determine when it is okay to replace some variables with + * expressions. + * + * After calling computeRDs on a fundec, + * ReachingDef.stmtStartData will contain a mapping from + * statement ids to data about which definitions reach each + * statement. ReachingDef.defIdStmtHash will contain a + * mapping from definition ids to the statement in which + * that definition takes place. + * + * instrRDs takes a list of instructions, and the + * definitions that reach the first instruction, and + * for each instruction figures out which definitions + * reach into or out of each instruction. + * + *) + +open Cil +open Pretty + +module E = Errormsg +module DF = Dataflow +module UD = Usedef +module IH = Inthash +module U = Util +module S = Stats + +let debug_fn = ref "" + +module IOS = + Set.Make(struct + type t = int option + let compare io1 io2 = + match io1, io2 with + Some i1, Some i2 -> Pervasives.compare i1 i2 + | Some i1, None -> 1 + | None, Some i2 -> -1 + | None, None -> 0 + end) + +let debug = ref false + +(* return the intersection of + Inthashes ih1 and ih2 *) +let ih_inter ih1 ih2 = + let ih' = IH.copy ih1 in + IH.iter (fun id vi -> + if not(IH.mem ih2 id) then + IH.remove ih' id else + ()) ih1; + ih' + +let ih_union ih1 ih2 = + let ih' = IH.copy ih1 in + IH.iter (fun id vi -> + if not(IH.mem ih' id) + then IH.add ih' id vi + else ()) ih2; + ih' + +(* Lookup varinfo in iosh. If the set contains None + or is not a singleton, return None, otherwise + return Some of the singleton *) +(* IOS.t IH.t -> varinfo -> int option *) +let iosh_singleton_lookup iosh vi = + if IH.mem iosh vi.vid then + let ios = IH.find iosh vi.vid in + if not (IOS.cardinal ios = 1) then None + else IOS.choose ios + else None + +(* IOS.t IH.t -> varinfo -> IOS.t *) +let iosh_lookup iosh vi = + if IH.mem iosh vi.vid + then Some(IH.find iosh vi.vid) + else None + +(* return Some(vid) if iosh contains defId. + return None otherwise *) +(* IOS.t IH.t -> int -> int option *) +let iosh_defId_find iosh defId = + (* int -> IOS.t -> int option -> int option*) + let get_vid vid ios io = + match io with + Some(i) -> Some(i) + | None -> + let there = IOS.exists + (function None -> false + | Some(i') -> defId = i') ios in + if there then Some(vid) else None + in + IH.fold get_vid iosh None + +(* The resulting iosh will contain the + union of the same entries from iosh1 and + iosh2. If iosh1 has an entry that iosh2 + does not, then the result will contain + None in addition to the things from the + entry in iosh1. *) +(* XXX this function is a performance bottleneck *) +let iosh_combine iosh1 iosh2 = + let iosh' = IH.copy iosh1 in + IH.iter (fun id ios1 -> + try let ios2 = IH.find iosh2 id in + let newset = IOS.union ios1 ios2 in + IH.replace iosh' id newset; + with Not_found -> + let newset = IOS.add None ios1 in + IH.replace iosh' id newset) iosh1; + IH.iter (fun id ios2 -> + if not(IH.mem iosh1 id) then + let newset = IOS.add None ios2 in + IH.add iosh' id newset) iosh2; + iosh' + + +(* determine if two IOS.t IH.t s are the same *) +let iosh_equals iosh1 iosh2 = +(* if IH.length iosh1 = 0 && not(IH.length iosh2 = 0) || + IH.length iosh2 = 0 && not(IH.length iosh1 = 0)*) + if not(IH.length iosh1 = IH.length iosh2) + then + (if !debug then ignore(E.log "iosh_equals: length not same\n"); + false) + else + IH.fold (fun vid ios b -> + if not b then b else + try let ios2 = IH.find iosh2 vid in + if not(IOS.compare ios ios2 = 0) then + (if !debug then ignore(E.log "iosh_equals: sets for vid %d not equal\n" vid); + false) + else true + with Not_found -> + (if !debug then ignore(E.log "iosh_equals: vid %d not in iosh2\n" vid); + false)) iosh1 true + +(* replace an entire set with a singleton. + if nothing was there just add the singleton *) +(* IOS.t IH.t -> int -> varinfo -> unit *) +let iosh_replace iosh i vi = + if IH.mem iosh vi.vid then + let newset = IOS.singleton (Some i) in + IH.replace iosh vi.vid newset + else + let newset = IOS.singleton (Some i) in + IH.add iosh vi.vid newset + +(* remove definitions that are killed. + add definitions that are gend *) +(* Takes the defs, the data, and a function for + obtaining the next def id *) +(* VS.t -> IOS.t IH.t -> (unit->int) -> unit *) +let proc_defs vs iosh f = + let pd vi = + let newi = f() in + (*if !debug then + ignore (E.log "proc_defs: genning %d\n" newi);*) + iosh_replace iosh newi vi + in + UD.VS.iter pd vs + +let idMaker () start = + let counter = ref start in + fun () -> + let ret = !counter in + counter := !counter + 1; + ret + +(* given reaching definitions into a list of + instructions, figure out the definitions that + reach in/out of each instruction *) +(* if out is true then calculate the definitions that + go out of each instruction, if it is false then + calculate the definitions reaching into each instruction *) +(* instr list -> int -> (varinfo IH.t * int) -> bool -> (varinfo IH.t * int) list *) +let iRDsHtbl = Hashtbl.create 128 +let instrRDs il sid (ivih, s, iosh) out = + if Hashtbl.mem iRDsHtbl (sid,out) then Hashtbl.find iRDsHtbl (sid,out) else + +(* let print_instr i (_,s', iosh') = *) +(* let d = d_instr () i ++ line in *) +(* fprint stdout 80 d; *) +(* flush stdout *) +(* in *) + + let proc_one hil i = + match hil with + | [] -> + let _, defd = UD.computeUseDefInstr i in + if UD.VS.is_empty defd + then ((*if !debug then print_instr i ((), s, iosh);*) + [((), s, iosh)]) + else + let iosh' = IH.copy iosh in + proc_defs defd iosh' (idMaker () s); + (*if !debug then + print_instr i ((), s + UD.VS.cardinal defd, iosh');*) + ((), s + UD.VS.cardinal defd, iosh')::hil + | (_, s', iosh')::hrst as l -> + let _, defd = UD.computeUseDefInstr i in + if UD.VS.is_empty defd + then + ((*if !debug then + print_instr i ((),s', iosh');*) + ((), s', iosh')::l) + else let iosh'' = IH.copy iosh' in + proc_defs defd iosh'' (idMaker () s'); + (*if !debug then + print_instr i ((), s' + UD.VS.cardinal defd, iosh'');*) + ((),s' + UD.VS.cardinal defd, iosh'')::l + in + let folded = List.fold_left proc_one [((),s,iosh)] il in + let foldedout = List.tl (List.rev folded) in + let foldednotout = List.rev (List.tl folded) in + Hashtbl.add iRDsHtbl (sid,true) foldedout; + Hashtbl.add iRDsHtbl (sid,false) foldednotout; + if out then foldedout else foldednotout + + + +(* The right hand side of an assignment is either + a function call or an expression *) +type rhs = RDExp of exp | RDCall of instr + +(* take the id number of a definition and return + the rhs of the definition if there is one. + Returns None if, for example, the definition is + caused by an assembly instruction *) +(* stmt IH.t -> (()*int*IOS.t IH.t) IH.t -> int -> (rhs * int * IOS.t IH.t) option *) +let rhsHtbl = IH.create 64 (* to avoid recomputation *) +let getDefRhs didstmh stmdat defId = + if IH.mem rhsHtbl defId then IH.find rhsHtbl defId else + let stm = + try IH.find didstmh defId + with Not_found -> E.s (E.error "getDefRhs: defId %d not found\n" defId) in + let (_,s,iosh) = + try IH.find stmdat stm.sid + with Not_found -> E.s (E.error "getDefRhs: sid %d not found \n" stm.sid) in + match stm.skind with + Instr il -> + let ivihl = instrRDs il stm.sid ((),s,iosh) true in (* defs that reach out of each instr *) + let ivihl_in = instrRDs il stm.sid ((),s,iosh) false in (* defs that reach into each instr *) + let iihl = List.combine (List.combine il ivihl) ivihl_in in + (try let ((i,(_,_,diosh)),(_,_,iosh_in)) = List.find (fun ((i,(_,_,iosh')),_) -> + match S.time "iosh_defId_find" (iosh_defId_find iosh') defId with + Some vid -> + (match i with + Set((Var vi',NoOffset),_,_) -> vi'.vid = vid (* _ -> NoOffset *) + | Call(Some(Var vi',NoOffset),_,_,_) -> vi'.vid = vid (* _ -> NoOffset *) + | Call(None,_,_,_) -> false + | Asm(_,_,sll,_,_,_) -> List.exists + (function (_,(Var vi',NoOffset)) -> vi'.vid = vid | _ -> false) sll + | _ -> false) + | None -> false) iihl in + (match i with + Set((lh,_),e,_) -> + (match lh with + Var(vi') -> + (IH.add rhsHtbl defId (Some(RDExp(e),stm.sid,iosh_in)); + Some(RDExp(e), stm.sid, iosh_in)) + | _ -> E.s (E.error "Reaching Defs getDefRhs: right vi not first\n")) + | Call(lvo,e,el,_) -> + (IH.add rhsHtbl defId (Some(RDCall(i),stm.sid,iosh_in)); + Some(RDCall(i), stm.sid, iosh_in)) + | Asm(a,sl,slvl,sel,sl',_) -> None) (* ? *) + with Not_found -> + (if !debug then ignore (E.log "getDefRhs: No instruction defines %d\n" defId); + IH.add rhsHtbl defId None; + None)) + | _ -> E.s (E.error "getDefRhs: defining statement not an instruction list %d\n" defId) + (*None*) + +let prettyprint didstmh stmdat () (_,s,iosh) = text "" + (*seq line (fun (vid,ios) -> + num vid ++ text ": " ++ + IOS.fold (fun io d -> match io with + None -> d ++ text "None " + | Some i -> + let stm = IH.find didstmh i in + match getDefRhs didstmh stmdat i with + None -> d ++ num i + | Some(RDExp(e),_,_) -> + d ++ num i ++ text " " ++ (d_exp () e) + | Some(RDCall(c),_,_) -> + d ++ num i ++ text " " ++ (d_instr () c)) + ios nil) + (IH.tolist iosh)*) + +module ReachingDef = + struct + + let name = "Reaching Definitions" + + let debug = debug + + (* Should the analysis calculate may-reach + or must-reach *) + let mayReach = ref false + + + (* An integer that tells the id number of + the first definition *) + (* Also a hash from variable ids to a set of + definition ids that reach this statement. + None means there is a path to this point on which + there is no definition of the variable *) + type t = (unit * int * IOS.t IH.t) + + let copy (_, i, iosh) = ((), i, IH.copy iosh) + + (* entries for starting statements must + be added before calling compute *) + let stmtStartData = IH.create 32 + + (* a mapping from definition ids to + the statement corresponding to that id *) + let defIdStmtHash = IH.create 32 + + (* mapping from statement ids to statements + for better performance of ok_to_replace *) + let sidStmtHash = IH.create 64 + + (* pretty printer *) + let pretty = prettyprint defIdStmtHash stmtStartData + + + (* The first id to use when computeFirstPredecessor + is next called *) + let nextDefId = ref 0 + + (* Count the number of variable definitions in + a statement *) + let num_defs stm = + match stm.skind with + Instr(il) -> List.fold_left (fun s i -> + let _, d = UD.computeUseDefInstr i in + s + UD.VS.cardinal d) 0 il + | _ -> let _, d = UD.computeUseDefStmtKind stm.skind in + UD.VS.cardinal d + + (* the first predecessor is just the data in along with + the id of the first definition of the statement, + which we get from nextDefId *) + let computeFirstPredecessor stm (_, s, iosh) = + let startDefId = max !nextDefId s in + let numds = num_defs stm in + let rec loop n = + if n < 0 + then () + else + (if !debug then + ignore (E.log "RD: defId %d -> stm %d\n" (startDefId + n) stm.sid); + IH.add defIdStmtHash (startDefId + n) stm; + loop (n-1)) + in + loop (numds - 1); + nextDefId := startDefId + numds; + ((), startDefId, IH.copy iosh) + + + let combinePredecessors (stm:stmt) ~(old:t) ((_, s, iosh):t) = + match old with (_, os, oiosh) -> + if S.time "iosh_equals" (iosh_equals oiosh) iosh then None else + Some((), os, S.time "iosh_combine" (iosh_combine oiosh) iosh) + + (* return an action that removes things that + are redefinied and adds the generated defs *) + let doInstr inst (_, s, iosh) = + let transform (_, s', iosh') = + let _, defd = UD.computeUseDefInstr inst in + proc_defs defd iosh' (idMaker () s'); + ((), s' + UD.VS.cardinal defd, iosh') + in + DF.Post transform + + (* all the work gets done at the instruction level *) + let doStmt stm (_, s, iosh) = + if not(IH.mem sidStmtHash stm.sid) then + IH.add sidStmtHash stm.sid stm; + if !debug then ignore(E.log "RD: looking at %a\n" d_stmt stm); + DF.SDefault + + let doGuard condition _ = DF.GDefault + + let filterStmt stm = true + +end + +module RD = DF.ForwardsDataFlow(ReachingDef) + +(* map all variables in vil to a set containing + None in iosh *) +(* IOS.t IH.t -> varinfo list -> () *) +let iosh_none_fill iosh vil = + List.iter (fun vi -> + IH.add iosh vi.vid (IOS.singleton None)) + vil + +(* Computes the reaching definitions for a + function. *) +(* Cil.fundec -> unit *) +let computeRDs fdec = + try + if compare fdec.svar.vname (!debug_fn) = 0 then + (debug := true; + ignore (E.log "%s =\n%a\n" (!debug_fn) d_block fdec.sbody)); + let bdy = fdec.sbody in + let slst = bdy.bstmts in + let _ = IH.clear ReachingDef.stmtStartData in + let _ = IH.clear ReachingDef.defIdStmtHash in + let _ = IH.clear rhsHtbl in + let _ = Hashtbl.clear iRDsHtbl in + let _ = ReachingDef.nextDefId := 0 in + let fst_stm = List.hd slst in + let fst_iosh = IH.create 32 in + let _ = UD.onlyNoOffsetsAreDefs := false in + (*let _ = iosh_none_fill fst_iosh fdec.sformals in*) + let _ = IH.add ReachingDef.stmtStartData fst_stm.sid ((), 0, fst_iosh) in + let _ = ReachingDef.computeFirstPredecessor fst_stm ((), 0, fst_iosh) in + if !debug then + ignore (E.log "computeRDs: fst_stm.sid=%d\n" fst_stm.sid); + RD.compute [fst_stm]; + if compare fdec.svar.vname (!debug_fn) = 0 then + debug := false + (* now ReachingDef.stmtStartData has the reaching def data in it *) + with Failure "hd" -> if compare fdec.svar.vname (!debug_fn) = 0 then + debug := false + +(* return the definitions that reach the statement + with statement id sid *) +let getRDs sid = + try + Some (IH.find ReachingDef.stmtStartData sid) + with Not_found -> + None +(* E.s (E.error "getRDs: sid %d not found\n" sid) *) + +let getDefIdStmt defid = + try + Some(IH.find ReachingDef.defIdStmtHash defid) + with Not_found -> + None + +let getStmt sid = + try Some(IH.find ReachingDef.sidStmtHash sid) + with Not_found -> None + +(* Pretty print the reaching definition data for + a function *) +let ppFdec fdec = + seq line (fun stm -> + let ivih = IH.find ReachingDef.stmtStartData stm.sid in + ReachingDef.pretty () ivih) fdec.sbody.bstmts + + +(* If this class is extended with a visitor on expressions, + then the current rd data is available at each expression *) +class rdVisitorClass = object (self) + inherit nopCilVisitor + + (* the statement being worked on *) + val mutable sid = -1 + + (* if a list of instructions is being processed, + then this is the corresponding list of + reaching definitions *) + val mutable rd_dat_lst = [] + + (* these are the reaching defs for the current + instruction if there is one *) + val mutable cur_rd_dat = None + + method vstmt stm = + sid <- stm.sid; + match getRDs sid with + None -> + if !debug then ignore(E.log "rdVis: stm %d had no data\n" sid); + cur_rd_dat <- None; + DoChildren + | Some(_,s,iosh) -> + match stm.skind with + Instr il -> + if !debug then ignore(E.log "rdVis: visit il\n"); + rd_dat_lst <- instrRDs il stm.sid ((),s,iosh) false; + DoChildren + | _ -> + if !debug then ignore(E.log "rdVis: visit non-il\n"); + cur_rd_dat <- None; + DoChildren + + method vinst i = + if !debug then ignore(E.log "rdVis: before %a, rd_dat_lst is %d long\n" + d_instr i (List.length rd_dat_lst)); + try + cur_rd_dat <- Some(List.hd rd_dat_lst); + rd_dat_lst <- List.tl rd_dat_lst; + DoChildren + with Failure "hd" -> + if !debug then ignore(E.log "rdVis: il rd_dat_lst mismatch\n"); + DoChildren + + method get_cur_iosh () = + match cur_rd_dat with + None -> (match getRDs sid with + None -> None + | Some(_,_,iosh) -> Some iosh) + | Some(_,_,iosh) -> Some iosh + +end + diff --git a/cil/src/ext/sfi.ml b/cil/src/ext/sfi.ml new file mode 100755 index 00000000..9886526c --- /dev/null +++ b/cil/src/ext/sfi.ml @@ -0,0 +1,337 @@ +(* + * + * Copyright (c) 2005, + * George C. Necula + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(** This is a module that inserts runtime checks for memory reads/writes and + * allocations *) + +open Pretty +open Cil +module E = Errormsg +module H = Hashtbl + +let doSfi = ref false +let doSfiReads = ref false +let doSfiWrites = ref true + +(* A number of functions to be skipped *) +let skipFunctions : (string, unit) H.t = H.create 13 +let mustSfiFunction (f: fundec) : bool = + not (H.mem skipFunctions f.svar.vname) + +(** Some functions are known to be allocators *) +type dataLocation = + InResult (* Interesting data is in the return value *) + | InArg of int (* in the nth argument. Starts from 1. *) + | InArgTimesArg of int * int (* (for size) data is the product of two + * arguments *) + | PointedToByArg of int (* pointed to by nth argument *) + +(** Compute the data based on the location and the actual argument list *) +let extractData (dl: dataLocation) (args: exp list) (res: lval option) : exp = + let getArg (n: int) = + try List.nth args (n - 1) (* Args are based at 1 *) + with _ -> E.s (E.bug "Cannot extract argument %d at %a" + n d_loc !currentLoc) + in + match dl with + InResult -> begin + match res with + None -> + E.s (E.bug "Cannot extract InResult data (at %a)" d_loc !currentLoc) + | Some r -> Lval r + end + | InArg n -> getArg n + | InArgTimesArg (n1, n2) -> + let a1 = getArg n1 in + let a2 = getArg n2 in + BinOp(Mult, mkCast ~e:a1 ~newt:longType, + mkCast ~e:a2 ~newt:longType, longType) + | PointedToByArg n -> + let a = getArg n in + Lval (mkMem a NoOffset) + + + +(* for each allocator, where is the length and where is the result *) +let allocators: (string, (dataLocation * dataLocation)) H.t = H.create 13 +let _ = + H.add allocators "malloc" (InArg 1, InResult); + H.add allocators "calloc" (InArgTimesArg (1, 2), InResult); + H.add allocators "realloc" (InArg 2, InResult) + +(* for each deallocator, where is the data being deallocated *) +let deallocators: (string, dataLocation) H.t = H.create 13 +let _= + H.add deallocators "free" (InArg 1); + H.add deallocators "realloc" (InArg 1) + +(* Returns true if the given lvalue offset ends in a bitfield access. *) +let rec is_bitfield lo = match lo with + | NoOffset -> false + | Field(fi,NoOffset) -> not (fi.fbitfield = None) + | Field(_,lo) -> is_bitfield lo + | Index(_,lo) -> is_bitfield lo + +(* Return an expression that evaluates to the address of the given lvalue. + * For most lvalues, this is merely AddrOf(lv). However, for bitfields + * we do some offset gymnastics. + *) +let addr_of_lv (lv: lval) = + let lh, lo = lv in + if is_bitfield lo then begin + (* we figure out what the address would be without the final bitfield + * access, and then we add in the offset of the bitfield from the + * beginning of its enclosing comp *) + let rec split_offset_and_bitfield lo = match lo with + | NoOffset -> failwith "logwrites: impossible" + | Field(fi,NoOffset) -> (NoOffset,fi) + | Field(e,lo) -> let a,b = split_offset_and_bitfield lo in + ((Field(e,a)),b) + | Index(e,lo) -> let a,b = split_offset_and_bitfield lo in + ((Index(e,a)),b) + in + let new_lv_offset, bf = split_offset_and_bitfield lo in + let new_lv = (lh, new_lv_offset) in + let enclosing_type = TComp(bf.fcomp, []) in + let bits_offset, bits_width = + bitsOffset enclosing_type (Field(bf,NoOffset)) in + let bytes_offset = bits_offset / 8 in + let lvPtr = mkCast ~e:(mkAddrOf (new_lv)) ~newt:(charPtrType) in + (BinOp(PlusPI, lvPtr, (integer bytes_offset), ulongType)) + end else + (mkAddrOf (lh,lo)) + + +let mustLogLval (forwrite: bool) (lv: lval) : bool = + match lv with + Var v, off -> (* Inside a variable. We assume the array offsets are fine *) + false + | Mem e, off -> + if forwrite && not !doSfiWrites then + false + else if not forwrite && not !doSfiReads then + false + + (* If this is an lval of function type, we do not log it *) + else if isFunctionType (typeOfLval lv) then + false + else + true + +(* Create prototypes for the logging functions *) +let mkProto (name: string) (args: (string * typ * attributes) list) = + let fdec = emptyFunction name in + fdec.svar.vtype <- TFun(voidType, + Some args, false, []); + fdec + + +let logReads = mkProto "logRead" [ ("addr", voidPtrType, []); + ("what", charPtrType, []); + ("file", charPtrType, []); + ("line", intType, []) ] +let callLogRead (lv: lval) = + let what = Pretty.sprint 80 (d_lval () lv) in + Call(None, + Lval(Var(logReads.svar),NoOffset), + [ addr_of_lv lv; mkString what; mkString !currentLoc.file; + integer !currentLoc.line], !currentLoc ) + +let logWrites = mkProto "logWrite" [ ("addr", voidPtrType, []); + ("what", charPtrType, []); + ("file", charPtrType, []); + ("line", intType, []) ] +let callLogWrite (lv: lval) = + let what = Pretty.sprint 80 (d_lval () lv) in + Call(None, + Lval(Var(logWrites.svar), NoOffset), + [ addr_of_lv lv; mkString what; mkString !currentLoc.file; + integer !currentLoc.line], !currentLoc ) + +let logStackFrame = mkProto "logStackFrame" [ ("func", charPtrType, []) ] +let callLogStack (fname: string) = + Call(None, + Lval(Var(logStackFrame.svar), NoOffset), + [ mkString fname; ], !currentLoc ) + +let logAlloc = mkProto "logAlloc" [ ("addr", voidPtrType, []); + ("size", intType, []); + ("file", charPtrType, []); + ("line", intType, []) ] +let callLogAlloc (szloc: dataLocation) + (resLoc: dataLocation) + (args: exp list) + (res: lval option) = + let sz = extractData szloc args res in + let res = extractData resLoc args res in + Call(None, + Lval(Var(logAlloc.svar), NoOffset), + [ res; sz; mkString !currentLoc.file; + integer !currentLoc.line ], !currentLoc ) + + +let logFree = mkProto "logFree" [ ("addr", voidPtrType, []); + ("file", charPtrType, []); + ("line", intType, []) ] +let callLogFree (dataloc: dataLocation) + (args: exp list) + (res: lval option) = + let data = extractData dataloc args res in + Call(None, + Lval(Var(logFree.svar), NoOffset), + [ data; mkString !currentLoc.file; + integer !currentLoc.line ], !currentLoc ) + +class sfiVisitorClass : Cil.cilVisitor = object (self) + inherit nopCilVisitor + + method vexpr (e: exp) : exp visitAction = + match e with + Lval lv when mustLogLval false lv -> (* A read *) + self#queueInstr [ callLogRead lv ]; + DoChildren + + | _ -> DoChildren + + + method vinst (i: instr) : instr list visitAction = + match i with + Set(lv, e, l) when mustLogLval true lv -> + self#queueInstr [ callLogWrite lv ]; + DoChildren + + | Call(lvo, f, args, l) -> + (* Instrument the write *) + (match lvo with + Some lv when mustLogLval true lv -> + self#queueInstr [ callLogWrite lv ] + | _ -> ()); + (* Do the expressions in the call, and then see if we need to + * instrument the function call *) + ChangeDoChildrenPost + ([i], + (fun il -> + currentLoc := l; + match f with + Lval (Var fv, NoOffset) -> begin + (* Is it an allocator? *) + try + let szloc, resloc = H.find allocators fv.vname in + il @ [callLogAlloc szloc resloc args lvo] + with Not_found -> begin + (* Is it a deallocator? *) + try + let resloc = H.find deallocators fv.vname in + il @ [ callLogFree resloc args lvo ] + with Not_found -> + il + end + end + | _ -> il)) + + | _ -> DoChildren + + method vfunc (fdec: fundec) = + (* Instead a stack log at the start of a function *) + ChangeDoChildrenPost + (fdec, + fun fdec -> + fdec.sbody <- + mkBlock + [ mkStmtOneInstr (callLogStack fdec.svar.vname); + mkStmt (Block fdec.sbody) ]; + fdec) + +end + +let doit (f: file) = + let sfiVisitor = new sfiVisitorClass in + let compileLoc (l: location) = function + ACons("inres", []) -> InResult + | ACons("inarg", [AInt n]) -> InArg n + | ACons("inargxarg", [AInt n1; AInt n2]) -> InArgTimesArg (n1, n2) + | ACons("pointedby", [AInt n]) -> PointedToByArg n + | _ -> E.warn "Invalid location at %a" d_loc l; + InResult + in + iterGlobals f + (fun glob -> + match glob with + GFun(fdec, _) when mustSfiFunction fdec -> + ignore (visitCilFunction sfiVisitor fdec) + | GPragma(Attr("sfiignore", al), l) -> + List.iter + (function AStr fn -> H.add skipFunctions fn () + | _ -> E.warn "Invalid argument in \"sfiignore\" pragma at %a" + d_loc l) + al + + | GPragma(Attr("sfialloc", al), l) -> begin + match al with + AStr fname :: locsz :: locres :: [] -> + H.add allocators fname (compileLoc l locsz, compileLoc l locres) + | _ -> E.warn "Invalid sfialloc pragma at %a" d_loc l + end + + | GPragma(Attr("sfifree", al), l) -> begin + match al with + AStr fname :: locwhat :: [] -> + H.add deallocators fname (compileLoc l locwhat) + | _ -> E.warn "Invalid sfifree pragma at %a" d_loc l + end + + + | _ -> ()); + (* Now add the prototypes for the instrumentation functions *) + f.globals <- + GVarDecl (logReads.svar, locUnknown) :: + GVarDecl (logWrites.svar, locUnknown) :: + GVarDecl (logStackFrame.svar, locUnknown) :: + GVarDecl (logAlloc.svar, locUnknown) :: + GVarDecl (logFree.svar, locUnknown) :: f.globals + + +let feature : featureDescr = + { fd_name = "sfi"; + fd_enabled = doSfi; + fd_description = "instrument memory operations"; + fd_extraopt = [ + "--sfireads", Arg.Set doSfiReads, "SFI for reads"; + "--sfiwrites", Arg.Set doSfiWrites, "SFI for writes"; + ]; + fd_doit = doit; + fd_post_check = true; + } + diff --git a/cil/src/ext/simplemem.ml b/cil/src/ext/simplemem.ml new file mode 100644 index 00000000..1b27815c --- /dev/null +++ b/cil/src/ext/simplemem.ml @@ -0,0 +1,132 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* + * Simplemem: Transform a program so that all memory expressions are + * "simple". Introduce well-typed temporaries to hold intermediate values + * for expressions that would normally involve more than one memory + * reference. + * + * If simplemem succeeds, each lvalue should contain only one Mem() + * constructor. + *) +open Cil + +(* current context: where should we put our temporaries? *) +let thefunc = ref None + +(* build up a list of assignments to temporary variables *) +let assignment_list = ref [] + +(* turn "int a[5][5]" into "int ** temp" *) +let rec array_to_pointer tau = + match unrollType tau with + TArray(dest,_,al) -> TPtr(array_to_pointer dest,al) + | _ -> tau + +(* create a temporary variable in the current function *) +let make_temp tau = + let tau = array_to_pointer tau in + match !thefunc with + Some(fundec) -> makeTempVar fundec ~name:("mem_") tau + | None -> failwith "simplemem: temporary needed outside a function" + +(* separate loffsets into "scalar addition parts" and "memory parts" *) +let rec separate_loffsets lo = + match lo with + NoOffset -> NoOffset, NoOffset + | Field(fi,rest) -> + let s,m = separate_loffsets rest in + Field(fi,s) , m + | Index(_) -> NoOffset, lo + +(* Recursively decompose the lvalue so that what is under a "Mem()" + * constructor is put into a temporary variable. *) +let rec handle_lvalue (lb,lo) = + let s,m = separate_loffsets lo in + match lb with + Var(vi) -> + handle_loffset (lb,s) m + | Mem(Lval(Var(_),NoOffset)) -> + (* special case to avoid generating "tmp = ptr;" *) + handle_loffset (lb,s) m + | Mem(e) -> + begin + let new_vi = make_temp (typeOf e) in + assignment_list := (Set((Var(new_vi),NoOffset),e,!currentLoc)) + :: !assignment_list ; + handle_loffset (Mem(Lval(Var(new_vi),NoOffset)),NoOffset) lo + end +and handle_loffset lv lo = + match lo with + NoOffset -> lv + | Field(f,o) -> handle_loffset (addOffsetLval (Field(f,NoOffset)) lv) o + | Index(exp,o) -> handle_loffset (addOffsetLval (Index(exp,NoOffset)) lv) o + +(* the transformation is implemented as a Visitor *) +class simpleVisitor = object + inherit nopCilVisitor + + method vfunc fundec = (* we must record the current context *) + thefunc := Some(fundec) ; + DoChildren + + method vlval lv = ChangeDoChildrenPost(lv, + (fun lv -> handle_lvalue lv)) + + method unqueueInstr () = + let result = List.rev !assignment_list in + assignment_list := [] ; + result +end + +(* Main entry point: apply the transformation to a file *) +let simplemem (f : file) = + try + visitCilFileSameGlobals (new simpleVisitor) f; + f + with e -> Printf.printf "Exception in Simplemem.simplemem: %s\n" + (Printexc.to_string e) ; raise e + +let feature : featureDescr = + { fd_name = "simpleMem"; + fd_enabled = Cilutil.doSimpleMem; + fd_description = "simplify all memory expressions" ; + fd_extraopt = []; + fd_doit = (function (f: file) -> ignore (simplemem f)) ; + fd_post_check = true; + } diff --git a/cil/src/ext/simplify.ml b/cil/src/ext/simplify.ml new file mode 100755 index 00000000..776d4916 --- /dev/null +++ b/cil/src/ext/simplify.ml @@ -0,0 +1,845 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * Sumit Gulwani + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* This module simplifies the expressions in a program in the following ways: + +1. All expressions are either + + basic::= + Const _ + Addrof(Var v, NoOffset) + StartOf(Var v, NoOffset) + Lval(Var v, off), where v is a variable whose address is not taken + and off contains only "basic" + + exp::= + basic + Lval(Mem basic, NoOffset) + BinOp(bop, basic, basic) + UnOp(uop, basic) + CastE(t, basic) + + lval ::= + Mem basic, NoOffset + Var v, off, where v is a variable whose address is not taken and off + contains only "basic" + + - all sizeof and alignof are turned into constants + - accesses to variables whose address is taken is turned into "Mem" accesses + - same for accesses to arrays + - all field and index computations are turned into address arithmetic, + including bitfields. + +*) + + +open Pretty +open Cil +module E = Errormsg +module H = Hashtbl + +type taExp = exp (* Three address expression *) +type bExp = exp (* Basic expression *) + +let debug = true + +(* Whether to split structs *) +let splitStructs = ref true + +let onlyVariableBasics = ref false +let noStringConstantsBasics = ref false + +exception BitfieldAccess + +(* Turn an expression into a three address expression (and queue some + * instructions in the process) *) +let rec makeThreeAddress + (setTemp: taExp -> bExp) (* Given an expression save it into a temp and + * return that temp *) + (e: exp) : taExp = + match e with + SizeOf _ | SizeOfE _ | AlignOf _ | AlignOfE _ | SizeOfStr _ -> + constFold true e + | Const _ -> e + | AddrOf (Var _, NoOffset) -> e + | Lval lv -> Lval (simplifyLval setTemp lv) + | BinOp(bo, e1, e2, tres) -> + BinOp(bo, makeBasic setTemp e1, makeBasic setTemp e2, tres) + | UnOp(uo, e1, tres) -> + UnOp(uo, makeBasic setTemp e1, tres) + | CastE(t, e) -> + CastE(t, makeBasic setTemp e) + | AddrOf lv -> begin + match simplifyLval setTemp lv with + Mem a, NoOffset -> a + | _ -> (* This is impossible, because we are taking the address + * of v and simplifyLval should turn it into a Mem, except if the + * sizeof has failed. *) + E.s (bug "Simplify: makeThreeAddress for AddrOf(LV=%a, LVT=%a)" + d_lval lv d_type (typeOfLval lv)) + end + | StartOf lv -> + makeThreeAddress setTemp (AddrOf (addOffsetLval (Index(zero, NoOffset)) + lv)) + +(* Make a basic expression *) +and makeBasic (setTemp: taExp -> bExp) (e: exp) : bExp = + let dump = false (* !currentLoc.line = 395 *) in + if dump then + ignore (E.log "makeBasic %a\n" d_plainexp e); + (* Make it a three address expression first *) + let e' = makeThreeAddress setTemp e in + if dump then + ignore (E.log " e'= %a\n" d_plainexp e); + (* See if it is a basic one *) + match e' with + | Lval (Var _, _) -> e' + | Const _ | AddrOf (Var _, NoOffset) | StartOf (Var _, NoOffset) -> + if !onlyVariableBasics then setTemp e' else e' + | SizeOf _ | SizeOfE _ | AlignOf _ | AlignOfE _ | SizeOfStr _ -> + E.s (bug "Simplify: makeBasic found SizeOf: %a" d_exp e') + + (* We cannot make a function to be Basic, unless it actually is a variable + * already. If this is a function pointer the best we can do is to make + * the address of the function basic *) + | Lval (Mem a, NoOffset) when isFunctionType (typeOf e') -> + if dump then + ignore (E.log " a function type\n"); + let a' = makeBasic setTemp a in + Lval (Mem a', NoOffset) + + | _ -> setTemp e' (* Put it into a temporary otherwise *) + + +and simplifyLval + (setTemp: taExp -> bExp) + (lv: lval) : lval = + (* Add, watching for a zero *) + let add (e1: exp) (e2: exp) = + if isZero e2 then e1 else BinOp(PlusA, e1, e2, !upointType) + in + (* Convert an offset to an integer, and possibly a residual bitfield offset*) + let rec offsetToInt + (t: typ) (* The type of the host *) + (off: offset) : exp * offset = + match off with + NoOffset -> zero, NoOffset + | Field(fi, off') -> begin + let start = + try + let start, _ = bitsOffset t (Field(fi, NoOffset)) in + start + with SizeOfError (whystr, t') -> + E.s (E.bug "%a: Cannot compute sizeof: %s: %a" + d_loc !currentLoc whystr d_type t') + in + if start land 7 <> 0 then begin + (* We have a bitfield *) + assert (off' = NoOffset); + zero, Field(fi, off') + end else begin + let next, restoff = offsetToInt fi.ftype off' in + add (integer (start / 8)) next, restoff + end + end + | Index(ei, off') -> begin + let telem = match unrollType t with + TArray(telem, _, _) -> telem + | _ -> E.s (bug "Simplify: simplifyLval: index on a non-array") + in + let next, restoff = offsetToInt telem off' in + add + (BinOp(Mult, ei, SizeOf telem, !upointType)) + next, + restoff + end + in + let tres = TPtr(typeOfLval lv, []) in + match lv with + Mem a, off -> + let offidx, restoff = offsetToInt (typeOfLval (Mem a, NoOffset)) off in + let a' = + if offidx <> zero then + add (mkCast a !upointType) offidx + else + a + in + let a' = makeBasic setTemp a' in + Mem (mkCast a' tres), restoff + + | Var v, off when v.vaddrof -> (* We are taking this variable's address *) + let offidx, restoff = offsetToInt v.vtype off in + (* We cannot call makeBasic recursively here, so we must do it + * ourselves *) + let a = mkAddrOrStartOf (Var v, NoOffset) in + let a' = + if offidx = zero then a else + add (mkCast a !upointType) (makeBasic setTemp offidx) + in + let a' = setTemp a' in + Mem (mkCast a' tres), restoff + + | Var v, off -> + (Var v, simplifyOffset setTemp off) + + +(* Simplify an offset and make sure it has only three address expressions in + * indices *) +and simplifyOffset (setTemp: taExp -> bExp) = function + NoOffset -> NoOffset + | Field(fi, off) -> Field(fi, simplifyOffset setTemp off) + | Index(ei, off) -> + let ei' = makeBasic setTemp ei in + Index(ei', simplifyOffset setTemp off) + + + + +(** This is a visitor that will turn all expressions into three address code *) +class threeAddressVisitor (fi: fundec) = object (self) + inherit nopCilVisitor + + method private makeTemp (e1: exp) : exp = + let t = makeTempVar fi (typeOf e1) in + (* Add this instruction before the current statement *) + self#queueInstr [Set(var t, e1, !currentLoc)]; + Lval(var t) + + (* We'll ensure that this gets called only for top-level expressions + * inside functions. We must turn them into three address code. *) + method vexpr (e: exp) = + let e' = makeThreeAddress self#makeTemp e in + ChangeTo e' + + + (** We want the argument in calls to be simple variables *) + method vinst (i: instr) = + match i with + Call (someo, f, args, loc) -> + let someo' = + match someo with + Some lv -> Some (simplifyLval self#makeTemp lv) + | _ -> None + in + let f' = makeBasic self#makeTemp f in + let args' = List.map (makeBasic self#makeTemp) args in + ChangeTo [ Call (someo', f', args', loc) ] + | _ -> DoChildren + + (* This method will be called only on top-level "lvals" (those on the + * left of assignments and function calls) *) + method vlval (lv: lval) = + ChangeTo (simplifyLval self#makeTemp lv) +end + +(******************** + Next is an old version of the code that was splitting structs into + * variables. It was not working on variables that are arguments or returns + * of function calls. +(** This is a visitor that splits structured variables into separate + * variables. *) +let isStructType (t: typ): bool = + match unrollType t with + TComp (ci, _) -> ci.cstruct + | _ -> false + +(* Keep track of how we change the variables. For each variable id we keep a + * hash table that maps an offset (a sequence of fieldinfo) into a + * replacement variable. We also keep track of the splittable vars: those + * with structure type but whose address is not take and which do not appear + * as the argument to a Return *) +let splittableVars: (int, unit) H.t = H.create 13 +let replacementVars: (int * offset, varinfo) H.t = H.create 13 + +let findReplacement (fi: fundec) (v: varinfo) (off: offset) : varinfo = + try + H.find replacementVars (v.vid, off) + with Not_found -> begin + let t = typeOfLval (Var v, off) in + (* make a name for this variable *) + let rec mkName = function + | Field(fi, off) -> "_" ^ fi.fname ^ mkName off + | _ -> "" + in + let v' = makeTempVar fi ~name:(v.vname ^ mkName off ^ "_") t in + H.add replacementVars (v.vid, off) v'; + if debug then + ignore (E.log "Simplify: %s (%a) replace %a with %s\n" + fi.svar.vname + d_loc !currentLoc + d_lval (Var v, off) + v'.vname); + v' + end + + (* Now separate the offset into a sequence of field accesses and the + * rest of the offset *) +let rec separateOffset (off: offset): offset * offset = + match off with + NoOffset -> NoOffset, NoOffset + | Field(fi, off') when fi.fcomp.cstruct -> + let off1, off2 = separateOffset off' in + Field(fi, off1), off2 + | _ -> NoOffset, off + + +class splitStructVisitor (fi: fundec) = object (self) + inherit nopCilVisitor + + method vlval (lv: lval) = + match lv with + Var v, off when H.mem splittableVars v.vid -> + (* The type of this lval better not be a struct *) + if isStructType (typeOfLval lv) then + E.s (unimp "Simplify: found lval of struct type %a : %a\n" + d_lval lv d_type (typeOfLval lv)); + let off1, restoff = separateOffset off in + let lv' = + if off1 <> NoOffset then begin + (* This is a splittable variable and we have an offset that makes + * it a scalar. Find the replacement variable for this *) + let v' = findReplacement fi v off1 in + if restoff = NoOffset then + Var v', NoOffset + else (* We have some more stuff. Use Mem *) + Mem (mkAddrOrStartOf (Var v', NoOffset)), restoff + end else begin (* off1 = NoOffset *) + if restoff = NoOffset then + E.s (bug "Simplify: splitStructVisitor:lval") + else + simplifyLval + (fun e1 -> + let t = makeTempVar fi (typeOf e1) in + (* Add this instruction before the current statement *) + self#queueInstr [Set(var t, e1, !currentLoc)]; + Lval(var t)) + (Mem (mkAddrOrStartOf (Var v, NoOffset)), restoff) + end + in + ChangeTo lv' + + | _ -> DoChildren + + method vinst (i: instr) = + (* Accumulate to the list of instructions a number of assignments of + * non-splittable lvalues *) + let rec accAssignment (ci: compinfo) (dest: lval) (what: lval) + (acc: instr list) : instr list = + List.fold_left + (fun acc f -> + let dest' = addOffsetLval (Field(f, NoOffset)) dest in + let what' = addOffsetLval (Field(f, NoOffset)) what in + match unrollType f.ftype with + TComp(ci, _) when ci.cstruct -> + accAssignment ci dest' what' acc + | TArray _ -> (* We must copy the array *) + (Set((Mem (AddrOf dest'), NoOffset), + Lval (Mem (AddrOf what'), NoOffset), !currentLoc)) :: acc + | _ -> (* If the type of f is not a struct then leave this alone *) + (Set(dest', Lval what', !currentLoc)) :: acc) + acc + ci.cfields + in + let doAssignment (ci: compinfo) (dest: lval) (what: lval) : instr list = + let il' = accAssignment ci dest what [] in + List.concat (List.map (visitCilInstr (self :> cilVisitor)) il') + in + match i with + Set(((Var v, off) as lv), what, _) when H.mem splittableVars v.vid -> + let off1, restoff = separateOffset off in + if restoff <> NoOffset then (* This means that we are only assigning + * part of a replacement variable. Leave + * this alone because the vlval will take + * care of it *) + DoChildren + else begin + (* The type of the replacement has to be a structure *) + match unrollType (typeOfLval lv) with + TComp (ci, _) when ci.cstruct -> + (* The assigned thing better be an lvalue *) + let whatlv = + match what with + Lval lv -> lv + | _ -> E.s (unimp "Simplify: assigned struct is not lval") + in + ChangeTo (doAssignment ci (Var v, off) whatlv) + + | _ -> (* vlval will take care of it *) + DoChildren + end + + | Set(dest, Lval (Var v, off), _) when H.mem splittableVars v.vid -> + let off1, restoff = separateOffset off in + if restoff <> NoOffset then (* vlval will do this *) + DoChildren + else begin + (* The type of the replacement has to be a structure *) + match unrollType (typeOfLval dest) with + TComp (ci, _) when ci.cstruct -> + ChangeTo (doAssignment ci dest (Var v, off)) + + | _ -> (* vlval will take care of it *) + DoChildren + end + + | _ -> DoChildren + +end +*) + +(* Whether to split the arguments of functions *) +let splitArguments = true + +(* Whether we try to do the splitting all in one pass. The advantage is that + * it is faster and it generates nicer names *) +let lu = locUnknown + +(* Go over the code and split some temporary variables of stucture type into + * several separate variables. The hope is that the compiler will have an + * easier time to do standard optimizations with the resulting scalars *) +(* Unfortunately, implementing this turns out to be more complicated than I + * thought *) + +(** Iterate over the fields of a structured type. Returns the empty list if + * no splits. The offsets are in order in which they appear in the structure + * type. Along with the offset we pass a string that identifies the + * meta-component, and the type of that component. *) +let rec foldRightStructFields + (doit: offset -> string -> typ -> 'a) (* Invoked on non-struct fields *) + (off: offset) + (post: 'a list) (** A suffix to what you compute *) + (fields: fieldinfo list) : 'a list = + List.fold_right + (fun f post -> + let off' = addOffset (Field(f, NoOffset)) off in + match unrollType f.ftype with + TComp (comp, _) when comp.cstruct -> (* struct type: recurse *) + foldRightStructFields doit off' post comp.cfields + | _ -> + (doit off' f.fname f.ftype) :: post) + fields + post + + +let rec foldStructFields + (t: typ) + (doit: offset -> string -> typ -> 'a) + : 'a list = + match unrollType t with + TComp (comp, _) when comp.cstruct -> + foldRightStructFields doit NoOffset [] comp.cfields + | _ -> [] + + +(* Map a variable name to a list of component variables, along with the + * accessor offset. The fields are in the order in which they appear in the + * structure. *) +let newvars : (string, (offset * varinfo) list) H.t = H.create 13 + +(* Split a variable and return the replacements, in the proper order. If this + * variable is not split, then return just the variable. *) +let splitOneVar (v: varinfo) + (mknewvar: string -> typ -> varinfo) : varinfo list = + try + (* See if we have already split it *) + List.map snd (H.find newvars v.vname) + with Not_found -> begin + let vars: (offset * varinfo) list = + foldStructFields v.vtype + (fun off n t -> (* make a new one *) + let newname = v.vname ^ "_" ^ n in + let v'= mknewvar newname t in + (off, v')) + in + if vars = [] then + [ v ] + else begin + (* Now remember the newly created vars *) + H.add newvars v.vname vars; + List.map snd vars (* Return just the vars *) + end + end + + +(* A visitor that finds all locals that appear in a call or have their + * address taken *) +let dontSplitLocals : (string, bool) H.t = H.create 111 +class findVarsCantSplitClass : cilVisitor = object (self) + inherit nopCilVisitor + + (* expressions, to see the address being taken *) + method vexpr (e: exp) : exp visitAction = + match e with + AddrOf (Var v, NoOffset) -> + H.add dontSplitLocals v.vname true; SkipChildren + (* See if we take the address of the "_ms" field in a variable *) + | _ -> DoChildren + + + (* variables involved in call instructions *) + method vinst (i: instr) : instr list visitAction = + match i with + Call (res, f, args, _) -> + (match res with + Some (Var v, NoOffset) -> H.add dontSplitLocals v.vname true + | _ -> ()); + if not splitArguments then + List.iter (fun a -> + match a with + Lval (Var v, NoOffset) -> H.add dontSplitLocals v.vname true + | _ -> ()) args; + (* Now continue the visit *) + DoChildren + + | _ -> DoChildren + + (* Variables used in return should not be split *) + method vstmt (s: stmt) : stmt visitAction = + match s.skind with + Return (Some (Lval (Var v, NoOffset)), _) -> + H.add dontSplitLocals v.vname true; DoChildren + | Return (Some e, _) -> + DoChildren + | _ -> DoChildren + + method vtype t = SkipChildren + +end +let findVarsCantSplit = new findVarsCantSplitClass + +let isVar lv = + match lv with + (Var v, NoOffset) -> true + | _ -> false + + +class splitVarVisitorClass(func:fundec option) : cilVisitor = object (self) + inherit nopCilVisitor + + method private makeTemp (e1: exp) : exp = + let fi:fundec = match func with + Some f -> f + | None -> + E.s (bug "You can't create a temporary if you're not in a function.") + in + let t = makeTempVar fi (typeOf e1) in + (* Add this instruction before the current statement *) + self#queueInstr [Set(var t, e1, !currentLoc)]; + Lval(var t) + + + (* We must process the function types *) + method vtype t = + (* We invoke the visitor first and then we fix it *) + let postProcessFunType (t: typ) : typ = + match t with + TFun(rt, Some params, isva, a) -> + let rec loopParams = function + [] -> [] + | ((pn, pt, pa) :: rest) as params -> + let rest' = loopParams rest in + let res: (string * typ * attributes) list = + foldStructFields pt + (fun off n t -> + (* Careful with no-name parameters, or we end up with + * many parameters named _p ! *) + ((if pn <> "" then pn ^ n else ""), t, pa)) + in + if res = [] then (* Not a fat *) + if rest' == rest then + params (* No change at all. Try not to reallocate so that + * the visitor does not allocate. *) + else + (pn, pt, pa) :: rest' + else (* Some change *) + res @ rest' + in + let params' = loopParams params in + if params == params' then + t + else + TFun(rt, Some params', isva, a) + + | t -> t + in + if splitArguments then + ChangeDoChildrenPost(t, postProcessFunType) + else + SkipChildren + + (* Whenever we see a variable with a field access we try to replace it + * by its components *) + method vlval ((b, off) : lval) : lval visitAction = + try + match b, off with + Var v, (Field _ as off) -> + (* See if this variable has some splits.Might throw Not_found *) + let splits = H.find newvars v.vname in + (* Now find among the splits one that matches this offset. And + * return the remaining offset *) + let rec find = function + [] -> + E.s (E.bug "Cannot find component %a of %s\n" + (d_offset nil) off v.vname) + | (splitoff, splitvar) :: restsplits -> + let rec matches = function + Field(f1, rest1), Field(f2, rest2) + when f1.fname = f2.fname -> + matches (rest1, rest2) + | off, NoOffset -> + (* We found a match *) + (Var splitvar, off) + | NoOffset, restoff -> + ignore (warn "Found aggregate lval %a\n" + d_lval (b, off)); + find restsplits + + | _, _ -> (* We did not match this one; go on *) + find restsplits + in + matches (off, splitoff) + in + ChangeTo (find splits) + | _ -> DoChildren + with Not_found -> DoChildren + + (* Sometimes we pass the variable as a whole to a function or we + * assign it to something *) + method vinst (i: instr) : instr list visitAction = + match i with + (* Split into several instructions and then do children inside + * the rhs. Howver, v might appear in the rhs and if we + * duplicate the instruction we might get bad + * results. (e.g. test/small1/simplify_Structs2.c). So first copy + * the rhs to temp variables, then to v. + * + * Optimization: if the rhs is a variable, skip the temporary vars. + * Either the rhs = lhs, in which case this is all a nop, or it's not, + * in which case the rhs and lhs don't overlap.*) + + Set ((Var v, NoOffset), Lval lv, l) when H.mem newvars v.vname -> begin + let needTemps = not (isVar lv) in + let vars4v = H.find newvars v.vname in + if vars4v = [] then E.s (errorLoc l "No fields in split struct"); + ChangeTo + (List.map + (fun (off, newv) -> + let lv' = + visitCilLval (self :> cilVisitor) + (addOffsetLval off lv) in + (* makeTemp creates a temp var and puts (Lval lv') in it, + before any instructions in this ChangeTo list are handled.*) + let lv_tmp = if needTemps then + self#makeTemp (Lval lv') + else + (Lval lv') + in + Set((Var newv, NoOffset), lv_tmp, l)) + vars4v) + end + + | Set (lv, Lval (Var v, NoOffset), l) when H.mem newvars v.vname -> begin + (* Split->NonSplit assignment. no overlap between lhs and rhs + is possible*) + let vars4v = H.find newvars v.vname in + if vars4v = [] then E.s (errorLoc l "No fields in split struct"); + ChangeTo + (List.map + (fun (off, newv) -> + let lv' = + visitCilLval (self :> cilVisitor) + (addOffsetLval off lv) in + Set(lv', Lval (Var newv, NoOffset), l)) + vars4v) + end + + (* Split all function arguments in calls *) + | Call (ret, f, args, l) when splitArguments -> + (* Visit the children first and then see if we must change the + * arguments *) + let finishArgs = function + [Call (ret', f', args', l')] as i' -> + let mustChange = ref false in + let newargs = + (* Look for opportunities to split arguments. If we can + * split, we must split the original argument (in args). + * Otherwise, we use the result of processing children + * (in args'). *) + List.fold_right2 + (fun a a' acc -> + match a with + Lval (Var v, NoOffset) when H.mem newvars v.vname -> + begin + mustChange := true; + (List.map + (fun (_, newv) -> + Lval (Var newv, NoOffset)) + (H.find newvars v.vname)) + @ acc + end + | Lval lv -> begin + let newargs = + foldStructFields (typeOfLval lv) + (fun off n t -> + let lv' = addOffsetLval off lv in + Lval lv') in + if newargs = [] then + a' :: acc (* not a split var *) + else begin + mustChange := true; + newargs @ acc + end + end + | _ -> (* only lvals are split, right? *) + a' :: acc) + args args' + [] + in + if !mustChange then + [Call (ret', f', newargs, l')] + else + i' + | _ -> E.s (E.bug "splitVarVisitorClass: expecting call") + in + ChangeDoChildrenPost ([i], finishArgs) + + | _ -> DoChildren + + + method vfunc (func: fundec) : fundec visitAction = + H.clear newvars; + H.clear dontSplitLocals; + (* Visit the type of the function itself *) + if splitArguments then + func.svar.vtype <- visitCilType (self :> cilVisitor) func.svar.vtype; + + (* Go over the block and find the candidates *) + ignore (visitCilBlock findVarsCantSplit func.sbody); + + (* Now go over the formals and create the splits *) + if splitArguments then begin + (* Split all formals because we will split all arguments in function + * types *) + let newformals = + List.fold_right + (fun form acc -> + (* Process the type first *) + form.vtype <- + visitCilType (self : #cilVisitor :> cilVisitor) form.vtype; + let form' = + splitOneVar form + (fun s t -> makeLocalVar func ~insert:false s t) + in + (* Now it is a good time to check if we actually can split this + * one *) + if List.length form' > 1 && + H.mem dontSplitLocals form.vname then + ignore (warn "boxsplit: can't split formal \"%s\" in %s. Make sure you never take the address of a formal.\n" + form.vname func.svar.vname); + form' @ acc) + func.sformals [] + in + (* Now make sure we fix the type. *) + setFormals func newformals + end; + (* Now go over the locals and create the splits *) + List.iter + (fun l -> + (* Process the type of the local *) + l.vtype <- visitCilType (self :> cilVisitor) l.vtype; + (* Now see if we must split it *) + if not (H.mem dontSplitLocals l.vname) then begin + ignore (splitOneVar l (fun s t -> makeTempVar func ~name:s t)) + end) + func.slocals; + (* Now visit the body and change references to these variables *) + ignore (visitCilBlock (self :> cilVisitor) func.sbody); + H.clear newvars; + H.clear dontSplitLocals; + SkipChildren (* We are done with this function *) + + (* Try to catch the occurrences of the variable in a sizeof expression *) + method vexpr (e: exp) = + match e with + | SizeOfE (Lval(Var v, NoOffset)) -> begin + try + let splits = H.find newvars v.vname in + (* We cound here on no padding between the elements ! *) + ChangeTo + (List.fold_left + (fun acc (_, thisv) -> + BinOp(PlusA, SizeOfE(Lval(Var thisv, NoOffset)), + acc, uintType)) + zero + splits) + with Not_found -> DoChildren + end + | _ -> DoChildren +end + +let doGlobal = function + GFun(fi, _) -> + (* Visit the body and change all expressions into three address code *) + let v = new threeAddressVisitor fi in + fi.sbody <- visitCilBlock v fi.sbody; + if !splitStructs then begin + H.clear dontSplitLocals; + let splitVarVisitor = new splitVarVisitorClass (Some fi) in + ignore (visitCilFunction splitVarVisitor fi); + end + | GVarDecl(vi, _) when isFunctionType vi.vtype -> + (* we might need to split the args/return value in the function type. *) + if !splitStructs then begin + H.clear dontSplitLocals; + let splitVarVisitor = new splitVarVisitorClass None in + ignore (visitCilVarDecl splitVarVisitor vi); + end + | _ -> () + +let feature : featureDescr = + { fd_name = "simplify"; + fd_enabled = ref false; + fd_description = "compiles CIL to 3-address code"; + fd_extraopt = [ + ("--no-split-structs", Arg.Unit (fun _ -> splitStructs := false), + "do not split structured variables"); + ]; + fd_doit = (function f -> iterGlobals f doGlobal); + fd_post_check = true; +} + diff --git a/cil/src/ext/ssa.ml b/cil/src/ext/ssa.ml new file mode 100644 index 00000000..942c92b6 --- /dev/null +++ b/cil/src/ext/ssa.ml @@ -0,0 +1,696 @@ +module B=Bitmap +module E = Errormsg + +open Cil +open Pretty + +let debug = false + +(* Globalsread, Globalswritten should be closed under call graph *) + +module StringOrder = + struct + type t = string + let compare s1 s2 = + if s1 = s2 then 0 else + if s1 < s2 then -1 else 1 + end + +module StringSet = Set.Make (StringOrder) + +module IntOrder = + struct + type t = int + let compare i1 i2 = + if i1 = i2 then 0 else + if i1 < i2 then -1 else 1 + end + +module IntSet = Set.Make (IntOrder) + + +type cfgInfo = { + name: string; (* The function name *) + start : int; + size : int; + blocks: cfgBlock array; (** Dominating blocks must come first *) + successors: int list array; (* block indices *) + predecessors: int list array; + mutable nrRegs: int; + mutable regToVarinfo: varinfo array; (** Map register IDs to varinfo *) + } + +(** A block corresponds to a statement *) +and cfgBlock = { + bstmt: Cil.stmt; + + (* We abstract the statement as a list of def/use instructions *) + instrlist: instruction list; + mutable livevars: (reg * int) list; + (** For each variable ID that is live at the start of the block, the + * block whose definition reaches this point. If that block is the same + * as the current one, then the variable is a phi variable *) + mutable reachable: bool; + } + +and instruction = (reg list * reg list) + (* lhs variables, variables on rhs. *) + + +and reg = int + +type idomInfo = int array (* immediate dominator *) + +and dfInfo = (int list) array (* dominance frontier *) + +and oneSccInfo = { + nodes: int list; + headers: int list; + backEdges: (int*int) list; + } + +and sccInfo = oneSccInfo list + +(* Muchnick's Domin_Fast, 7.16 *) + +let compute_idom (flowgraph: cfgInfo): idomInfo = + let start = flowgraph.start in + let size = flowgraph.size in + let successors = flowgraph.successors in + let predecessors = flowgraph.predecessors in + let n0 = size in (* a new node (not in the flowgraph) *) + let idom = Array.make size (-1) in (* Make an array of immediate dominators *) + let nnodes = size + 1 in + let nodeSet = B.init nnodes (fun i -> true) in + + let ndfs = Array.create nnodes 0 in (* mapping from depth-first + * number to nodes. DForder + * starts at 1, with 0 used as + * an invalid entry *) + let parent = Array.create nnodes 0 in (* the parent in depth-first + * spanning tree *) + + (* A semidominator of w is the node v with the minimal DForder such + * that there is a path from v to w containing only nodes with the + * DForder larger than w. *) + let sdno = Array.create nnodes 0 in (* depth-first number of + * semidominator *) + + (* The set of nodes whose + * semidominator is ndfs(i) *) + let bucket = Array.init nnodes (fun _ -> B.cloneEmpty nodeSet) in + + (* The functions link and eval maintain a forest within the + * depth-first spanning tree. Ancestor is n0 is the node is a root in + * the forest. Label(v) is the node in the ancestor chain with the + * smallest depth-first number of its semidominator. Child and Size + * are used to keep the trees in the forest balanced *) + let ancestor = Array.create nnodes 0 in + let label = Array.create nnodes 0 in + let child = Array.create nnodes 0 in + let size = Array.create nnodes 0 in + + + let n = ref 0 in (* depth-first scan and numbering. + * Initialize data structures. *) + ancestor.(n0) <- n0; + label.(n0) <- n0; + let rec depthFirstSearchDom v = + incr n; + sdno.(v) <- !n; + ndfs.(!n) <- v; label.(v) <- v; + ancestor.(v) <- n0; (* All nodes are roots initially *) + child.(v) <- n0; size.(v) <- 1; + List.iter + (fun w -> + if sdno.(w) = 0 then begin + parent.(w) <- v; depthFirstSearchDom w + end) + successors.(v); + in + (* Determine the ancestor of v whose semidominator has the the minimal + * DFnumber. In the process, compress the paths in the forest. *) + let eval v = + let rec compress v = + if ancestor.(ancestor.(v)) <> n0 then + begin + compress ancestor.(v); + if sdno.(label.(ancestor.(v))) < sdno.(label.(v)) then + label.(v) <- label.(ancestor.(v)); + ancestor.(v) <- ancestor.(ancestor.(v)) + end + in + if ancestor.(v) = n0 then label.(v) + else begin + compress v; + if sdno.(label.(ancestor.(v))) >= sdno.(label.(v)) then + label.(v) + else label.(ancestor.(v)) + end + in + + let link v w = + let s = ref w in + while sdno.(label.(w)) < sdno.(label.(child.(!s))) do + if size.(!s) + size.(child.(child.(!s))) >= 2* size.(child.(!s)) then + (ancestor.(child.(!s)) <- !s; + child.(!s) <- child.(child.(!s))) + else + (size.(child.(!s)) <- size.(!s); + ancestor.(!s) <- child.(!s); s := child.(!s)); + done; + label.(!s) <- label.(w); + size.(v) <- size.(v) + size.(w); + if size.(v) < 2 * size.(w) then begin + let tmp = !s in + s := child.(v); + child.(v) <- tmp; + end; + while !s <> n0 do + ancestor.(!s) <- v; + s := child.(!s); + done; + in + (* Start now *) + depthFirstSearchDom start; + for i = !n downto 2 do + let w = ndfs.(i) in + List.iter (fun v -> + let u = eval v in + if sdno.(u) < sdno.(w) then sdno.(w) <- sdno.(u);) + predecessors.(w); + B.set bucket.(ndfs.(sdno.(w))) w true; + link parent.(w) w; + while not (B.empty bucket.(parent.(w))) do + let v = + match B.toList bucket.(parent.(w)) with + x :: _ -> x + | [] -> ignore(print_string "Error in dominfast");0 in + B.set bucket.(parent.(w)) v false; + let u = eval v in + idom.(v) <- if sdno.(u) < sdno.(v) then u else parent.(w); + done; + done; + + for i=2 to !n do + let w = ndfs.(i) in + if idom.(w) <> ndfs.(sdno.(w)) then begin + let newDom = idom.(idom.(w)) in + idom.(w) <- newDom; + end + done; + idom + + + + + +let dominance_frontier (flowgraph: cfgInfo) : dfInfo = + let idom = compute_idom flowgraph in + let size = flowgraph.size in + let children = Array.create size [] in + for i = 0 to size - 1 do + if (idom.(i) != -1) then children.(idom.(i)) <- i :: children.(idom.(i)); + done; + + let size = flowgraph.size in + let start = flowgraph.start in + let successors = flowgraph.successors in + + let df = Array.create size [] in + (* Compute the dominance frontier *) + + let bottom = Array.make size true in (* bottom of the dominator tree *) + for i = 0 to size - 1 do + if (i != start) && idom.(i) <> -1 then bottom.(idom.(i)) <- false; + done; + + let processed = Array.make size false in (* to record the nodes added to work_list *) + let workList = ref ([]) in (* to iterate in a bottom-up traversal of the dominator tree *) + for i = 0 to size - 1 do + if (bottom.(i)) then workList := i :: !workList; + done; + while (!workList != []) do + let x = List.hd !workList in + let update y = if idom.(y) <> x then df.(x) <- y::df.(x) in + (* compute local component *) + +(* We use whichPred instead of whichSucc because ultimately this info is + * needed by control dependence dag which is constructed from REVERSE + * dominance frontier *) + List.iter (fun succ -> update succ) successors.(x); + (* add on up component *) + List.iter (fun z -> List.iter (fun y -> update y) df.(z)) children.(x); + processed.(x) <- true; + workList := List.tl !workList; + if (x != start) then begin + let i = idom.(x) in + if i <> -1 && + (List.for_all (fun child -> processed.(child)) children.(i)) then workList := i :: !workList; + end; + done; + df + + +(* Computes for each register, the set of nodes that need a phi definition + * for the register *) + +let add_phi_functions_info (flowgraph: cfgInfo) : unit = + let df = dominance_frontier flowgraph in + let size = flowgraph.size in + let nrRegs = flowgraph.nrRegs in + + + let defs = Array.init size (fun i -> B.init nrRegs (fun j -> false)) in + for i = 0 to size-1 do + List.iter + (fun (lhs,rhs) -> + List.iter (fun (r: reg) -> B.set defs.(i) r true) lhs; + ) + flowgraph.blocks.(i).instrlist + done; + let iterCount = ref 0 in + let hasAlready = Array.create size 0 in + let work = Array.create size 0 in + let w = ref ([]) in + let dfPlus = Array.init nrRegs ( + fun i -> + let defIn = B.make size in + for j = 0 to size - 1 do + if B.get defs.(j) i then B.set defIn j true + done; + let res = ref [] in + incr iterCount; + B.iter (fun x -> work.(x) <- !iterCount; w := x :: !w;) defIn; + while (!w != []) do + let x = List.hd !w in + w := List.tl !w; + List.iter (fun y -> + if (hasAlready.(y) < !iterCount) then begin + res := y :: !res; + hasAlready.(y) <- !iterCount; + if (work.(y) < !iterCount) then begin + work.(y) <- !iterCount; + w := y :: !w; + end; + end; + ) df.(x) + done; + (* res := List.filter (fun blkId -> B.get liveIn.(blkId) i) !res; *) + !res + ) in + let result = Array.create size ([]) in + for i = 0 to nrRegs - 1 do + List.iter (fun node -> result.(node) <- i::result.(node);) dfPlus.(i) + done; +(* result contains for each node, the list of variables that need phi + * definition *) + for i = 0 to size-1 do + flowgraph.blocks.(i).livevars <- + List.map (fun r -> (r, i)) result.(i); + done + + + +(* add dominating definitions info *) + +let add_dom_def_info (f: cfgInfo): unit = + let blocks = f.blocks in + let start = f.start in + let size = f.size in + let nrRegs = f.nrRegs in + + let idom = compute_idom f in + let children = Array.create size [] in + for i = 0 to size - 1 do + if (idom.(i) != -1) then children.(idom.(i)) <- i :: children.(idom.(i)); + done; + + if debug then begin + ignore (E.log "Immediate dominators\n"); + for i = 0 to size - 1 do + ignore (E.log " block %d: idom=%d, children=%a\n" + i idom.(i) + (docList num) children.(i)); + done + end; + + (* For each variable, maintain a stack of blocks that define it. When you + * process a block, the top of the stack is the closest dominator that + * defines the variable *) + let s = Array.make nrRegs ([start]) in + + (* Search top-down in the idom tree *) + let rec search (x: int): unit = (* x is a graph node *) + (* Push the current block for the phi variables *) + List.iter + (fun ((r: reg), dr) -> + if x = dr then s.(r) <- x::s.(r)) + blocks.(x).livevars; + + (* Clear livevars *) + blocks.(x).livevars <- []; + + (* Compute livevars *) + for i = 0 to nrRegs-1 do + match s.(i) with + | [] -> assert false + | fst :: _ -> + blocks.(x).livevars <- (i, fst) :: blocks.(x).livevars + done; + + + (* Update s for the children *) + List.iter + (fun (lhs,rhs) -> + List.iter (fun (lreg: reg) -> s.(lreg) <- x::s.(lreg) ) lhs; + ) + blocks.(x).instrlist; + + + (* Go and do the children *) + List.iter search children.(x); + + (* Then we pop x, whenever it is on top of a stack *) + Array.iteri + (fun i istack -> + let rec dropX = function + [] -> [] + | x' :: rest when x = x' -> dropX rest + | l -> l + in + s.(i) <- dropX istack) + s; + in + search(start) + + + +let prune_cfg (f: cfgInfo): cfgInfo = + let size = f.size in + if size = 0 then f else + let reachable = Array.make size false in + let worklist = ref([f.start]) in + while (!worklist != []) do + let h = List.hd !worklist in + worklist := List.tl !worklist; + reachable.(h) <- true; + List.iter (fun s -> if (reachable.(s) = false) then worklist := s::!worklist; + ) f.successors.(h); + done; +(* + let dummyblock = { bstmt = mkEmptyStmt (); + instrlist = []; + livevars = [] } + in +*) + let successors = Array.init size (fun i -> List.filter (fun s -> reachable.(s)) f.successors.(i)) in + let predecessors = Array.init size (fun i -> List.filter (fun s -> reachable.(s)) f.predecessors.(i)) in + Array.iteri (fun i b -> b.reachable <- reachable.(i)) f.blocks; + let result: cfgInfo = + { name = f.name; + start = f.start; + size = f.size; + successors = successors; + predecessors = predecessors; + blocks = f.blocks; + nrRegs = f.nrRegs; + regToVarinfo = f.regToVarinfo; + } + in + result + + +let add_ssa_info (f: cfgInfo): unit = + let f = prune_cfg f in + let d_reg () (r: int) = + dprintf "%s(%d)" f.regToVarinfo.(r).vname r + in + if debug then begin + ignore (E.log "Doing SSA for %s. Initial data:\n" f.name); + Array.iteri (fun i b -> + ignore (E.log " block %d:\n succs=@[%a@]\n preds=@[%a@]\n instr=@[%a@]\n" + i + (docList num) f.successors.(i) + (docList num) f.predecessors.(i) + (docList ~sep:line (fun (lhs, rhs) -> + dprintf "%a := @[%a@]" + (docList (d_reg ())) lhs (docList (d_reg ())) rhs)) + b.instrlist)) + f.blocks; + end; + + add_phi_functions_info f; + add_dom_def_info f; + + if debug then begin + ignore (E.log "After SSA\n"); + Array.iter (fun b -> + ignore (E.log " block %d livevars: @[%a@]\n" + b.bstmt.sid + (docList (fun (i, fst) -> + dprintf "%a def at %d" d_reg i fst)) + b.livevars)) + f.blocks; + end + + +let set2list s = + let result = ref([]) in + IntSet.iter (fun element -> result := element::!result) s; + !result + + + + +let preorderDAG (nrNodes: int) (successors: (int list) array): int list = + let processed = Array.make nrNodes false in + let revResult = ref ([]) in + let predecessorsSet = Array.make nrNodes (IntSet.empty) in + for i = 0 to nrNodes -1 do + List.iter (fun s -> predecessorsSet.(s) <- IntSet.add i predecessorsSet.(s)) successors.(i); + done; + let predecessors = Array.init nrNodes (fun i -> set2list predecessorsSet.(i)) in + let workList = ref([]) in + for i = 0 to nrNodes - 1 do + if (predecessors.(i) = []) then workList := i::!workList; + done; + while (!workList != []) do + let x = List.hd !workList in + workList := List.tl !workList; + revResult := x::!revResult; + processed.(x) <- true; + List.iter (fun s -> + if (List.for_all (fun p -> processed.(p)) predecessors.(s)) then + workList := s::!workList; + ) successors.(x); + done; + List.rev !revResult + + +(* Muchnick Fig 7.12 *) +(* takes an SCC description as an input and returns prepares the appropriate SCC *) +let preorder (nrNodes: int) (successors: (int list) array) (r: int): oneSccInfo = + if debug then begin + ignore (E.log "Inside preorder \n"); + for i = 0 to nrNodes - 1 do + ignore (E.log "succ(%d) = %a" i (docList (fun i -> num i)) successors.(i)); + done; + end; + let i = ref(0) in + let j = ref(0) in + let pre = Array.make nrNodes (-1) in + let post = Array.make nrNodes (-1) in + let visit = Array.make nrNodes (false) in + let backEdges = ref ([]) in + let headers = ref(IntSet.empty) in + let rec depth_first_search_pp (x:int) = + visit.(x) <- true; + pre.(x) <- !j; + incr j; + List.iter (fun (y:int) -> + if (not visit.(y)) then + (depth_first_search_pp y) + else + if (post.(y) = -1) then begin + backEdges := (x,y)::!backEdges; + headers := IntSet.add y !headers; + end; + ) successors.(x); + post.(x) <- !i; + incr i; + in + depth_first_search_pp r; + let nodes = Array.make nrNodes (-1) in + for y = 0 to nrNodes - 1 do + if (pre.(y) != -1) then nodes.(pre.(y)) <- y; + done; + let nodeList = List.filter (fun i -> (i != -1)) (Array.to_list nodes) in + let result = { headers = set2list !headers; backEdges = !backEdges; nodes = nodeList; } in + result + + +exception Finished + + +let strong_components (f: cfgInfo) (debug: bool) = + let size = f.size in + let parent = Array.make size (-1) in + let color = Array.make size (-1) in + let finish = Array.make size (-1) in + let root = Array.make size (-1) in + +(* returns a list of SCC. Each SCC is a tuple of SCC root and SCC nodes *) + let dfs (successors: (int list) array) (order: int array) = + let time = ref(-1) in + let rec dfs_visit u = + color.(u) <- 1; + incr time; + (* d.(u) <- time; *) + List.iter (fun v -> + if color.(v) = 0 then (parent.(v) <- u; dfs_visit v) + ) successors.(u); + color.(u) <- 2; + incr time; + finish.(u) <- !time + in + for u = 0 to size - 1 do + color.(u) <- 0; (* white = 0, gray = 1, black = 2 *) + parent.(u) <- -1; (* nil = -1 *) + root.(u) <- 0; (* Is u a root? *) + done; + time := 0; + Array.iter (fun u -> + if (color.(u) = 0) then begin + root.(u) <- 1; + dfs_visit u; + end; + ) order; + in + + let simpleOrder = Array.init size (fun i -> i) in + dfs f.successors simpleOrder; + Array.sort (fun i j -> if (finish.(i) > finish.(j)) then -1 else 1) simpleOrder; + + dfs f.predecessors simpleOrder; +(* SCCs have been computed. (The trees represented by non-null parent edges + * represent the SCCS. We call the black nodes as the roots). Now put the + * result in the ouput format *) + let allScc = ref([]) in + for u = 0 to size - 1 do + if root.(u) = 1 then begin + let sccNodes = ref(IntSet.empty) in + let workList = ref([u]) in + while (!workList != []) do + let h=List.hd !workList in + workList := List.tl !workList; + sccNodes := IntSet.add h !sccNodes; + List.iter (fun s -> if parent.(s)=h then workList := s::!workList;) f.predecessors.(h); + done; + allScc := (u,!sccNodes)::!allScc; + if (debug) then begin + ignore (E.log "Got an SCC with root %d and nodes %a" u (docList num) (set2list !sccNodes)); + end; + end; + done; + !allScc + + +let stronglyConnectedComponents (f: cfgInfo) (debug: bool): sccInfo = + let size = f.size in + if (debug) then begin + ignore (E.log "size = %d\n" size); + for i = 0 to size - 1 do + ignore (E.log "Successors(%d): %a\n" i (docList (fun n -> num n)) f.successors.(i)); + done; + end; + + let allScc = strong_components f debug in + let all_sccArray = Array.of_list allScc in + + if (debug) then begin + ignore (E.log "Computed SCCs\n"); + for i = 0 to (Array.length all_sccArray) - 1 do + ignore(E.log "SCC #%d: " i); + let (_,sccNodes) = all_sccArray.(i) in + IntSet.iter (fun i -> ignore(E.log "%d, " i)) sccNodes; + ignore(E.log "\n"); + done; + end; + + + (* Construct sccId: Node -> Scc Id *) + let sccId = Array.make size (-1) in + Array.iteri (fun i (r,sccNodes) -> + IntSet.iter (fun n -> sccId.(n) <- i) sccNodes; + ) all_sccArray; + + if (debug) then begin + ignore (E.log "\nComputed SCC IDs: "); + for i = 0 to size - 1 do + ignore (E.log "SCCID(%d) = %d " i sccId.(i)); + done; + end; + + + (* Construct sccCFG *) + let nrScc = Array.length all_sccArray in + let successors = Array.make nrScc [] in + for x = 0 to nrScc - 1 do + successors.(x) <- + let s = ref(IntSet.empty) in + IntSet.iter (fun y -> + List.iter (fun z -> + let sy = sccId.(y) in + let sz = sccId.(z) in + if (not(sy = sz)) then begin + s := IntSet.add sz !s; + end + ) f.successors.(y) + ) (snd all_sccArray.(x)); + set2list !s + done; + + if (debug) then begin + ignore (E.log "\nComputed SCC CFG, which should be a DAG:"); + ignore (E.log "nrSccs = %d " nrScc); + for i = 0 to nrScc - 1 do + ignore (E.log "successors(%d) = [%a] " i (docList (fun j -> num j)) successors.(i)); + done; + end; + + + (* Order SCCs. The graph is a DAG here *) + let sccorder = preorderDAG nrScc successors in + + if (debug) then begin + ignore (E.log "\nComputed SCC Preorder: "); + ignore (E.log "Nodes in Preorder = [%a]" (docList (fun i -> num i)) sccorder); + end; + + (* Order nodes of each SCC. The graph is a SCC here.*) + let scclist = List.map (fun i -> + let successors = Array.create size [] in + for j = 0 to size - 1 do + successors.(j) <- List.filter (fun x -> IntSet.mem x (snd all_sccArray.(i))) f.successors.(j); + done; + preorder f.size successors (fst all_sccArray.(i)) + ) sccorder in + if (debug) then begin + ignore (E.log "Computed Preorder for Nodes of each SCC\n"); + List.iter (fun scc -> + ignore (E.log "BackEdges = %a \n" + (docList (fun (src,dest) -> dprintf "(%d,%d)" src dest)) + scc.backEdges);) + scclist; + end; + scclist + + + + + + + + + diff --git a/cil/src/ext/ssa.mli b/cil/src/ext/ssa.mli new file mode 100644 index 00000000..be244d81 --- /dev/null +++ b/cil/src/ext/ssa.mli @@ -0,0 +1,45 @@ +type cfgInfo = { + name: string; (* The function name *) + start : int; + size : int; + blocks: cfgBlock array; (** Dominating blocks must come first *) + successors: int list array; (* block indices *) + predecessors: int list array; + mutable nrRegs: int; + mutable regToVarinfo: Cil.varinfo array; (** Map register IDs to varinfo *) + } + +(** A block corresponds to a statement *) +and cfgBlock = { + bstmt: Cil.stmt; + + (* We abstract the statement as a list of def/use instructions *) + instrlist: instruction list; + mutable livevars: (reg * int) list; + (** For each variable ID that is live at the start of the block, the + * block whose definition reaches this point. If that block is the same + * as the current one, then the variable is a phi variable *) + mutable reachable: bool; + } + +and instruction = (reg list * reg list) + (* lhs variables, variables on rhs. *) + + +and reg = int + +type idomInfo = int array (* immediate dominator *) + +and dfInfo = (int list) array (* dominance frontier *) + +and oneSccInfo = { + nodes: int list; + headers: int list; + backEdges: (int*int) list; + } + +and sccInfo = oneSccInfo list + +val add_ssa_info: cfgInfo -> unit +val stronglyConnectedComponents: cfgInfo -> bool -> sccInfo +val prune_cfg: cfgInfo -> cfgInfo diff --git a/cil/src/ext/stackoverflow.ml b/cil/src/ext/stackoverflow.ml new file mode 100644 index 00000000..da2c4018 --- /dev/null +++ b/cil/src/ext/stackoverflow.ml @@ -0,0 +1,246 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) +module H = Hashtbl +open Cil +open Pretty +module E = Errormsg + +let debug = false + + +(* For each function we have a node *) +type node = { name: string; + mutable scanned: bool; + mutable mustcheck: bool; + mutable succs: node list } +(* We map names to nodes *) +let functionNodes: (string, node) H.t = H.create 113 +let getFunctionNode (n: string) : node = + Util.memoize + functionNodes + n + (fun _ -> { name = n; mustcheck = false; scanned = false; succs = [] }) + +(** Dump the function call graph. Assume that there is a main *) +let dumpGraph = true +let dumpFunctionCallGraph () = + H.iter (fun _ x -> x.scanned <- false) functionNodes; + let rec dumpOneNode (ind: int) (n: node) : unit = + output_string !E.logChannel "\n"; + for i = 0 to ind do + output_string !E.logChannel " " + done; + output_string !E.logChannel (n.name ^ " "); + if n.scanned then (* Already dumped *) + output_string !E.logChannel " " + else begin + n.scanned <- true; + List.iter (dumpOneNode (ind + 1)) n.succs + end + in + try + let main = H.find functionNodes "main" in + dumpOneNode 0 main + with Not_found -> begin + ignore (E.log + "I would like to dump the function graph but there is no main"); + end + +(* We add a dummy function whose name is "@@functionPointer@@" that is called + * at all invocations of function pointers and itself calls all functions + * whose address is taken. *) +let functionPointerName = "@@functionPointer@@" + +let checkSomeFunctions = ref false + +let init () = + H.clear functionNodes; + checkSomeFunctions := false + + +let addCall (caller: string) (callee: string) = + let callerNode = getFunctionNode caller in + let calleeNode = getFunctionNode callee in + if not (List.exists (fun n -> n.name = callee) callerNode.succs) then begin + if debug then + ignore (E.log "found call from %s to %s\n" caller callee); + callerNode.succs <- calleeNode :: callerNode.succs; + end; + () + + +class findCallsVisitor (host: string) : cilVisitor = object + inherit nopCilVisitor + + method vinst i = + match i with + | Call(_,Lval(Var(vi),NoOffset),_,l) -> + addCall host vi.vname; + SkipChildren + + | Call(_,e,_,l) -> (* Calling a function pointer *) + addCall host functionPointerName; + SkipChildren + + | _ -> SkipChildren (* No calls in other instructions *) + + (* There are no calls in expressions and types *) + method vexpr e = SkipChildren + method vtype t = SkipChildren + +end + +(* Now detect the cycles in the call graph. Do a depth first search of the + * graph (stack is the list of nodes already visited in the current path). + * Return true if we have found a cycle. *) +let rec breakCycles (stack: node list) (n: node) : bool = + if n.scanned then (* We have already scanned this node. There are no cycles + * going through this node *) + false + else if n.mustcheck then + (* We are reaching a node that we already know we much check. Return with + * no new cycles. *) + false + else if List.memq n stack then begin + (* We have found a cycle. Mark the node n to be checked and return *) + if debug then + ignore (E.log "Will place an overflow check in %s\n" n.name); + checkSomeFunctions := true; + n.mustcheck <- true; + n.scanned <- true; + true + end else begin + let res = List.exists (fun nd -> breakCycles (n :: stack) nd) n.succs in + n.scanned <- true; + if res && n.mustcheck then + false + else + res + end +let findCheckPlacement () = + H.iter (fun _ nd -> + if nd.name <> functionPointerName + && not nd.scanned && not nd.mustcheck then begin + ignore (breakCycles [] nd) + end) + functionNodes + +let makeFunctionCallGraph (f: Cil.file) : unit = + init (); + (* Scan the file and construct the control-flow graph *) + List.iter + (function + GFun(fdec, _) -> + if fdec.svar.vaddrof then + addCall functionPointerName fdec.svar.vname; + let vis = new findCallsVisitor fdec.svar.vname in + ignore (visitCilBlock vis fdec.sbody) + + | _ -> ()) + f.globals + +let makeAndDumpFunctionCallGraph (f: file) = + makeFunctionCallGraph f; + dumpFunctionCallGraph () + + +let addCheck (f: Cil.file) : unit = + makeFunctionCallGraph f; + findCheckPlacement (); + if !checkSomeFunctions then begin + (* Add a declaration for the stack threshhold variable. The program is + * stopped when the stack top is less than this value. *) + let stackThreshholdVar = makeGlobalVar "___stack_threshhold" !upointType in + stackThreshholdVar.vstorage <- Extern; + (* And the initialization function *) + let computeStackThreshhold = + makeGlobalVar "___compute_stack_threshhold" + (TFun(!upointType, Some [], false, [])) in + computeStackThreshhold.vstorage <- Extern; + (* And the failure function *) + let stackOverflow = + makeGlobalVar "___stack_overflow" + (TFun(voidType, Some [], false, [])) in + stackOverflow.vstorage <- Extern; + f.globals <- + GVar(stackThreshholdVar, {init=None}, locUnknown) :: + GVarDecl(computeStackThreshhold, locUnknown) :: + GVarDecl(stackOverflow, locUnknown) :: f.globals; + (* Now scan and instrument each function definition *) + List.iter + (function + GFun(fdec, l) -> + (* If this is main we must introduce the initialization of the + * bottomOfStack *) + let nd = getFunctionNode fdec.svar.vname in + if fdec.svar.vname = "main" then begin + if nd.mustcheck then + E.s (E.error "The \"main\" function is recursive!!"); + let loc = makeLocalVar fdec "__a_local" intType in + loc.vaddrof <- true; + fdec.sbody <- + mkBlock + [ mkStmtOneInstr + (Call (Some(var stackThreshholdVar), + Lval(var computeStackThreshhold), [], l)); + mkStmt (Block fdec.sbody) ] + end else if nd.mustcheck then begin + let loc = makeLocalVar fdec "__a_local" intType in + loc.vaddrof <- true; + fdec.sbody <- + mkBlock + [ mkStmt + (If(BinOp(Le, + CastE(!upointType, AddrOf (var loc)), + Lval(var stackThreshholdVar), intType), + mkBlock [mkStmtOneInstr + (Call(None, Lval(var stackOverflow), + [], l))], + mkBlock [], + l)); + mkStmt (Block fdec.sbody) ] + end else + () + + | _ -> ()) + f.globals; + () + end + + + + diff --git a/cil/src/ext/stackoverflow.mli b/cil/src/ext/stackoverflow.mli new file mode 100644 index 00000000..6ec02007 --- /dev/null +++ b/cil/src/ext/stackoverflow.mli @@ -0,0 +1,43 @@ +(* + * + * Copyright (c) 2001-2002, + * George C. Necula + * Scott McPeak + * Wes Weimer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of the contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *) + +(* This module inserts code to check for stack overflow. It saves the address + * of the top of the stack in "main" and then it picks one function *) + +val addCheck: Cil.file -> unit + +val makeAndDumpFunctionCallGraph: Cil.file -> unit diff --git a/cil/src/ext/usedef.ml b/cil/src/ext/usedef.ml new file mode 100755 index 00000000..57f226aa --- /dev/null +++ b/cil/src/ext/usedef.ml @@ -0,0 +1,188 @@ +(* MODIF: Loop constructor replaced by 3 constructors: While, DoWhile, For. *) + + +open Cil +open Pretty + +(** compute use/def information *) + +module VS = Set.Make (struct + type t = Cil.varinfo + let compare v1 v2 = Pervasives.compare v1.vid v2.vid + end) + +(** Set this global to how you want to handle function calls *) +let getUseDefFunctionRef: (exp -> VS.t * VS.t) ref = + ref (fun _ -> (VS.empty, VS.empty)) + +(** Say if you want to consider a variable use *) +let considerVariableUse: (varinfo -> bool) ref = + ref (fun _ -> true) + + +(** Say if you want to consider a variable def *) +let considerVariableDef: (varinfo -> bool) ref = + ref (fun _ -> true) + +(** Save if you want to consider a variable addrof as a use *) +let considerVariableAddrOfAsUse: (varinfo -> bool) ref = + ref (fun _ -> true) + +(* When this is true, only definitions of a variable without + an offset are counted as definitions. So: + a = 5; would be a definition, but + a[1] = 5; would not *) +let onlyNoOffsetsAreDefs: bool ref = ref false + +let varUsed: VS.t ref = ref VS.empty +let varDefs: VS.t ref = ref VS.empty + +class useDefVisitorClass : cilVisitor = object (self) + inherit nopCilVisitor + + (** this will be invoked on variable definitions only because we intercept + * all uses of variables in expressions ! *) + method vvrbl (v: varinfo) = + if (!considerVariableDef) v && + not(!onlyNoOffsetsAreDefs) then + varDefs := VS.add v !varDefs; + SkipChildren + + (** If onlyNoOffsetsAreDefs is true, then we need to see the + * varinfo in an lval along with the offset. Otherwise just + * DoChildren *) + method vlval (l: lval) = + if !onlyNoOffsetsAreDefs then + match l with + (Var vi, NoOffset) -> + if (!considerVariableDef) vi then + varDefs := VS.add vi !varDefs; + SkipChildren + | _ -> DoChildren + else DoChildren + + method vexpr = function + Lval (Var v, off) -> + ignore (visitCilOffset (self :> cilVisitor) off); + if (!considerVariableUse) v then + varUsed := VS.add v !varUsed; + SkipChildren (* So that we do not see the v *) + + | AddrOf (Var v, off) + | StartOf (Var v, off) -> + ignore (visitCilOffset (self :> cilVisitor) off); + if (!considerVariableAddrOfAsUse) v then + varUsed := VS.add v !varUsed; + SkipChildren + + | _ -> DoChildren + + (* For function calls, do the transitive variable read/defs *) + method vinst = function + Call (_, f, _, _) -> begin + (* we will call DoChildren to compute the use and def that appear in + * this instruction. We also add in the stuff computed by + * getUseDefFunctionRef *) + let use, def = !getUseDefFunctionRef f in + varUsed := VS.union !varUsed use; + varDefs := VS.union !varDefs def; + DoChildren; + end + | Asm(_,_,slvl,_,_,_) -> List.iter (fun (s,lv) -> + match lv with (Var v, off) -> + if s.[0] = '+' then + varUsed := VS.add v !varUsed; + | _ -> ()) slvl; + DoChildren + | _ -> DoChildren + +end + +let useDefVisitor = new useDefVisitorClass + +(** Compute the use information for an expression (accumulate to an existing + * set) *) +let computeUseExp ?(acc=VS.empty) (e: exp) : VS.t = + varUsed := acc; + ignore (visitCilExpr useDefVisitor e); + !varUsed + + +(** Compute the use/def information for an instruction *) +let computeUseDefInstr ?(acc_used=VS.empty) + ?(acc_defs=VS.empty) + (i: instr) : VS.t * VS.t = + varUsed := acc_used; + varDefs := acc_defs; + ignore (visitCilInstr useDefVisitor i); + !varUsed, !varDefs + + +(** Compute the use/def information for a statement kind. Do not descend into + * the nested blocks. *) +let computeUseDefStmtKind ?(acc_used=VS.empty) + ?(acc_defs=VS.empty) + (sk: stmtkind) : VS.t * VS.t = + varUsed := acc_used; + varDefs := acc_defs; + let ve e = ignore (visitCilExpr useDefVisitor e) in + let _ = + match sk with + Return (None, _) -> () + | Return (Some e, _) -> ve e + | If (e, _, _, _) -> ve e + | Break _ | Goto _ | Continue _ -> () +(* + | Loop (_, _, _, _) -> () +*) + | While _ | DoWhile _ | For _ -> () + | Switch (e, _, _, _) -> ve e + | Instr il -> + List.iter (fun i -> ignore (visitCilInstr useDefVisitor i)) il + | TryExcept _ | TryFinally _ -> () + | Block _ -> () + in + !varUsed, !varDefs + +(* Compute the use/def information for a statement kind. + DO descend into nested blocks *) +let rec computeDeepUseDefStmtKind ?(acc_used=VS.empty) + ?(acc_defs=VS.empty) + (sk: stmtkind) : VS.t * VS.t = + let handle_block b = + List.fold_left (fun (u,d) s -> + let u',d' = computeDeepUseDefStmtKind s.skind in + (VS.union u u', VS.union d d')) (VS.empty, VS.empty) + b.bstmts + in + varUsed := acc_used; + varDefs := acc_defs; + let ve e = ignore (visitCilExpr useDefVisitor e) in + match sk with + Return (None, _) -> !varUsed, !varDefs + | Return (Some e, _) -> + let _ = ve e in + !varUsed, !varDefs + | If (e, tb, fb, _) -> + let _ = ve e in + let u, d = !varUsed, !varDefs in + let u', d' = handle_block tb in + let u'', d'' = handle_block fb in + (VS.union (VS.union u u') u'', VS.union (VS.union d d') d'') + | Break _ | Goto _ | Continue _ -> !varUsed, !varDefs +(* + | Loop (b, _, _, _) -> handle_block b +*) + | While (_, b, _) -> handle_block b + | DoWhile (_, b, _) -> handle_block b + | For (_, _, _, b, _) -> handle_block b + | Switch (e, b, _, _) -> + let _ = ve e in + let u, d = !varUsed, !varDefs in + let u', d' = handle_block b in + (VS.union u u', VS.union d d') + | Instr il -> + List.iter (fun i -> ignore (visitCilInstr useDefVisitor i)) il; + !varUsed, !varDefs + | TryExcept _ | TryFinally _ -> !varUsed, !varDefs + | Block b -> handle_block b -- cgit