From a5f03d96eee482cd84861fc8cefff9eb451c0cad Mon Sep 17 00:00:00 2001 From: xleroy Date: Sun, 29 Mar 2009 09:47:11 +0000 Subject: Cleaned up configure script. Distribution of CIL as an expanded source tree with changes applied (instead of original .tar.gz + patches to be applied at config time). git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@1020 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- cil/doc/cil004.html | 350 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 350 insertions(+) create mode 100644 cil/doc/cil004.html (limited to 'cil/doc/cil004.html') diff --git a/cil/doc/cil004.html b/cil/doc/cil004.html new file mode 100644 index 00000000..16fde392 --- /dev/null +++ b/cil/doc/cil004.html @@ -0,0 +1,350 @@ + + + + + + + + + + + + + +Compiling C to CIL + + + +Previous +Up +Next +
+ +

4  Compiling C to CIL

+In this section we try to describe a few of the many transformations that are +applied to a C program to convert it to CIL. The module that implements this +conversion is about 5000 lines of OCaml code. In contrast a simple program +transformation that instruments all functions to keep a shadow stack of the +true return address (thus preventing stack smashing) is only 70 lines of code. +This example shows that the analysis is so much simpler because it has to +handle only a few simple C constructs and also because it can leverage on CIL +infrastructure such as visitors and pretty-printers.
+
+In no particular order these are a few of the most significant ways in which +C programs are compiled into CIL: +
  1. +CIL will eliminate all declarations for unused entities. This means that +just because your hello world program includes stdio.h it does not mean +that your analysis has to handle all the ugly stuff from stdio.h.
    +
    +
  2. Type specifiers are interpreted and normalized: +
    
    +int long signed x;
    +signed long extern x;
    +long static int long y;
    +
    +// Some code that uses these declaration, so that CIL does not remove them
    +int main() { return x + y; }
    +
    +See the CIL output for this +code fragment
    +
    +
  3. Anonymous structure and union declarations are given a name. +
    
    + struct { int x; } s;
    +
    +See the CIL output for this +code fragment
    +
    +
  4. Nested structure tag definitions are pulled apart. This means that all +structure tag definitions can be found by a simple scan of the globals. +
    
    +struct foo {
    +   struct bar {
    +      union baz { 
    +          int x1; 
    +          double x2;
    +      } u1;
    +      int y;
    +   } s1;
    +   int z;
    +} f;
    +
    +See the CIL output for this +code fragment
    +
    +
  5. All structure, union, enumeration definitions and the type definitions +from inners scopes are moved to global scope (with appropriate renaming). This +facilitates moving around of the references to these entities. +
    
    +int main() {
    +  struct foo { 
    +        int x; } foo; 
    +  {
    +     struct foo { 
    +        double d;
    +     };
    +     return foo.x;
    +  }      
    +}
    +
    +See the CIL output for this +code fragment
    +
    +
  6. Prototypes are added for those functions that are called before being +defined. Furthermore, if a prototype exists but does not specify the type of +parameters that is fixed. But CIL will not be able to add prototypes for those +functions that are neither declared nor defined (but are used!). +
    
    +  int f();  // Prototype without arguments
    +  int f(double x) {
    +      return g(x);
    +  }
    +  int g(double x) {
    +     return x;
    +  } 
    +
    +See the CIL output for this +code fragment
    +
    +
  7. Array lengths are computed based on the initializers or by constant +folding. +
    
    +  int a1[] = {1,2,3};
    +  int a2[sizeof(int) >= 4 ? 8 : 16];
    +
    +See the CIL output for this +code fragment
    +
    +
  8. Enumeration tags are computed using constant folding: +
    
    +int main() {
    +  enum { 
    +     FIVE = 5, 
    +     SIX, SEVEN, 
    +     FOUR = FIVE - 1, 
    +     EIGHT = sizeof(double)
    +  } x = FIVE;
    + return x;
    +}
    +
    +
    +See the CIL output for this +code fragment
    +
    +
  9. Initializers are normalized to include specific initialization for the +missing elements: +
    
    +  int a1[5] = {1,2,3};
    +  struct foo { int x, y; } s1 = { 4 };
    +
    +See the CIL output for this +code fragment
    +
    +
  10. Initializer designators are interpreted and eliminated. Subobjects are +properly marked with braces. CIL implements +the whole ISO C99 specification for initializer (neither GCC nor MSVC do) and +a few GCC extensions. +
    
    +  struct foo { 
    +     int x, y; 
    +     int a[5];
    +     struct inner {
    +        int z;
    +     } inner;
    +  } s = { 0, .inner.z = 3, .a[1 ... 2] = 5, 4, y : 8 };
    +
    +See the CIL output for this +code fragment
    +
    +
  11. String initializers for arrays of characters are processed +
    
    +char foo[] = "foo plus bar";
    +
    +See the CIL output for this +code fragment
    +
    +
  12. String constants are concatenated +
    
    +char *foo = "foo " " plus " " bar ";
    +
    +See the CIL output for this +code fragment
    +
    +
  13. Initializers for local variables are turned into assignments. This is in +order to separate completely the declarative part of a function body from the +statements. This has the unfortunate effect that we have to drop the const +qualifier from local variables ! +
    
    +  int x = 5; 
    +  struct foo { int f1, f2; } a [] = {1, 2, 3, 4, 5 };
    +
    +See the CIL output for this +code fragment
    +
    +
  14. Local variables in inner scopes are pulled to function scope (with +appropriate renaming). Local scopes thus disappear. This makes it easy to find +and operate on all local variables in a function. +
    
    +  int x = 5; 
    +  int main() {
    +    int x = 6;
    +    { 
    +      int x = 7;
    +      return x;
    +    }
    +    return x;
    +  } 
    +
    +See the CIL output for this +code fragment
    +
    +
  15. Global declarations in local scopes are moved to global scope: +
    
    +  int x = 5; 
    +  int main() {
    +    int x = 6;
    +    { 
    +      static int x = 7;
    +      return x;
    +    }
    +    return x;
    +  } 
    +
    +See the CIL output for this +code fragment
    +
    +
  16. Return statements are added for functions that are missing them. If the +return type is not a base type then a return without a value is added. +The guaranteed presence of return statements makes it easy to implement a +transformation that inserts some code to be executed immediately before +returning from a function. +
    
    +  int foo() {
    +    int x = 5;
    +  } 
    +
    +See the CIL output for this +code fragment
    +
    +
  17. One of the most significant transformations is that expressions that +contain side-effects are separated into statements. +
    
    +   int x, f(int);
    +   return (x ++ + f(x));
    +
    +See the CIL output for this +code fragment
    +
    +Internally, the x ++ statement is turned into an assignment which the +pretty-printer prints like the original. CIL has only three forms of basic +statements: assignments, function calls and inline assembly.
    +
    +
  18. Shortcut evaluation of boolean expressions and the ?: operator are +compiled into explicit conditionals: +
    
    +  int x;
    +  int y = x ? 2 : 4;
    +  int z = x || y;
    +  // Here we duplicate the return statement
    +  if(x && y) { return 0; } else { return 1; }
    +  // To avoid excessive duplication, CIL uses goto's for 
    +  // statement that have more than 5 instructions
    +  if(x && y || z) { x ++; y ++; z ++; x ++; y ++; return z; }
    +
    +See the CIL output for this +code fragment
    +
    +
  19. GCC's conditional expression with missing operands are also compiled +into conditionals: +
    
    +  int f();;
    +  return f() ? : 4;
    +
    +See the CIL output for this +code fragment
    +
    +
  20. All forms of loops (while, for and do) are compiled +internally as a single while(1) looping construct with explicit break +statement for termination. For simple while loops the pretty printer is +able to print back the original: +
    
    +   int x, y;
    +   for(int i = 0; i<5; i++) {
    +      if(i == 5) continue;
    +      if(i == 4) break;
    +      i += 2;
    +   } 
    +   while(x < 5) {
    +     if(x == 3) continue;
    +     x ++;
    +   }
    +
    +See the CIL output for this +code fragment
    +
    +
  21. GCC's block expressions are compiled away. (That's right there is an +infinite loop in this code.) +
    
    +   int x = 5, y = x;
    +   int z = ({ x++; L: y -= x; y;});
    +   return ({ goto L; 0; });
    +
    +See the CIL output for this +code fragment
    +
    +
  22. CIL contains support for both MSVC and GCC inline assembly (both in one +internal construct)
    +
    +
  23. CIL compiles away the GCC extension that allows many kinds of constructs +to be used as lvalues: +
    
    +   int x, y, z;
    +   return &(x ? y : z) - & (x ++, x);
    +
    +See the CIL output for this +code fragment
    +
    +
  24. All types are computed and explicit casts are inserted for all +promotions and conversions that a compiler must insert:
    +
    +
  25. CIL will turn old-style function definition (without prototype) into +new-style definitions. This will make the compiler less forgiving when +checking function calls, and will catch for example cases when a function is +called with too few arguments. This happens in old-style code for the purpose +of implementing variable argument functions.
    +
    +
  26. Since CIL sees the source after preprocessing the code after CIL does +not contain the comments and the preprocessing directives.
    +
    +
  27. CIL will remove from the source file those type declarations, local +variables and inline functions that are not used in the file. This means that +your analysis does not have to see all the ugly stuff that comes from the +header files: +
    
    +#include <stdio.h>
    +
    +typedef int unused_type;
    +
    +static char unused_static (void) { return 0; }
    +
    +int main() {
    +  int unused_local;
    +  printf("Hello world\n"); // Only printf will be kept from stdio.h     
    +}
    +
    +See the CIL output for this +code fragment
+
+Previous +Up +Next + + -- cgit