From a5f03d96eee482cd84861fc8cefff9eb451c0cad Mon Sep 17 00:00:00 2001 From: xleroy Date: Sun, 29 Mar 2009 09:47:11 +0000 Subject: Cleaned up configure script. Distribution of CIL as an expanded source tree with changes applied (instead of original .tar.gz + patches to be applied at config time). git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@1020 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- cil/doc/cil016.html | 342 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 342 insertions(+) create mode 100644 cil/doc/cil016.html (limited to 'cil/doc/cil016.html') diff --git a/cil/doc/cil016.html b/cil/doc/cil016.html new file mode 100644 index 00000000..3191a9d5 --- /dev/null +++ b/cil/doc/cil016.html @@ -0,0 +1,342 @@ + + + + + + + + + + + + + +Who Says C is Simple? + + + +Previous +Up +Next +
+ +

16  Who Says C is Simple?

+When I (George) started to write CIL I thought it was going to take two weeks. +Exactly a year has passed since then and I am still fixing bugs in it. This +gross underestimate was due to the fact that I thought parsing and making +sense of C is simple. You probably think the same. What I did not expect was +how many dark corners this language has, especially if you want to parse +real-world programs such as those written for GCC or if you are more ambitious +and you want to parse the Linux or Windows NT sources (both of these were +written without any respect for the standard and with the expectation that +compilers will be changed to accommodate the program).
+
+The following examples were actually encountered either in real programs or +are taken from the ISO C99 standard or from the GCC's testcases. My first +reaction when I saw these was: Is this C?. The second one was : What the hell does it mean?.
+
+If you are contemplating doing program analysis for C on abstract-syntax +trees then your analysis ought to be able to handle these things. Or, you can +use CIL and let CIL translate them into clean C code.
+
+ +

16.1  Standard C

+
  1. Why does the following code return 0 for most values of x? (This +should be easy.) +
    
    +  int x;
    +  return x == (1 && x);
    +
    +See the CIL output for this +code fragment
    +
    +
  2. Why does the following code return 0 and not -1? (Answer: because +sizeof is unsigned, thus the result of the subtraction is unsigned, thus +the shift is logical.) +
    
    + return ((1 - sizeof(int)) >> 32);
    +
    +See the CIL output for this +code fragment
    +
    +
  3. Scoping rules can be tricky. This function returns 5. +
    
    +int x = 5;
    +int f() {
    +  int x = 3;
    +  {
    +    extern int x;
    +    return x;
    +  }
    +}
    +
    +See the CIL output for this +code fragment
    +
    +
  4. Functions and function pointers are implicitly converted to each other. +
    
    +int (*pf)(void);
    +int f(void) {
    +
    +   pf = &f; // This looks ok
    +   pf = ***f; // Dereference a function?
    +   pf(); // Invoke a function pointer?     
    +   (****pf)();  // Looks strange but Ok
    +   (***************f)(); // Also Ok             
    +}
    +
    +See the CIL output for this +code fragment
    +
    +
  5. Initializer with designators are one of the hardest parts about ISO C. +Neither MSVC or GCC implement them fully. GCC comes close though. What is the +final value of i.nested.y and i.nested.z? (Answer: 2 and respectively +6). +
    
    +struct { 
    +   int x; 
    +   struct { 
    +       int y, z; 
    +   } nested;
    +} i = { .nested.y = 5, 6, .x = 1, 2 };               
    +
    +See the CIL output for this +code fragment
    +
    +
  6. This is from c-torture. This function returns 1. +
    
    +typedef struct
    +{
    +  char *key;
    +  char *value;
    +} T1;
    +
    +typedef struct
    +{
    +  long type;
    +  char *value;
    +} T3;
    +
    +T1 a[] =
    +{
    +  {
    +    "",
    +    ((char *)&((T3) {1, (char *) 1}))
    +  }
    +};
    +int main() {
    +   T3 *pt3 = (T3*)a[0].value;
    +   return pt3->value;
    +}
    +
    +See the CIL output for this +code fragment
    +
    +
  7. Another one with constructed literals. This one is legal according to +the GCC documentation but somehow GCC chokes on (it works in CIL though). This +code returns 2. +
    
    + return ((int []){1,2,3,4})[1];
    +
    +See the CIL output for this +code fragment
    +
    +
  8. In the example below there is one copy of “bar” and two copies of + “pbar” (static prototypes at block scope have file scope, while for all + other types they have block scope). +
    
    +  int foo() {
    +     static bar();
    +     static (*pbar)() = bar;
    +
    +  }
    +
    +  static bar() { 
    +    return 1;
    +  }
    +
    +  static (*pbar)() = 0;
    +
    +See the CIL output for this +code fragment
    +
    +
  9. Two years after heavy use of CIL, by us and others, I discovered a bug + in the parser. The return value of the following function depends on what + precedence you give to casts and unary minus: +
    
    +  unsigned long foo() {
    +    return (unsigned long) - 1 / 8;
    +  }
    +
    +See the CIL output for this +code fragment
    +
    +The correct interpretation is ((unsigned long) - 1) / 8, which is a + relatively large number, as opposed to (unsigned long) (- 1 / 8), which + is 0.
+ +

16.2  GCC ugliness

+
  1. GCC has generalized lvalues. You can take the address of a lot of +strange things: +
    
    +  int x, y, z;
    +  return &(x ? y : z) - & (x++, x);
    +
    +See the CIL output for this +code fragment
    +
    +
  2. GCC lets you omit the second component of a conditional expression. +
    
    +  extern int f();
    +  return f() ? : -1; // Returns the result of f unless it is 0
    +
    +See the CIL output for this +code fragment
    +
    +
  3. Computed jumps can be tricky. CIL compiles them away in a fairly clean +way but you are on your own if you try to jump into another function this way. +
    
    +static void *jtab[2]; // A jump table
    +static int doit(int x){
    + 
    +  static int jtab_init = 0;
    +  if(!jtab_init) { // Initialize the jump table
    +    jtab[0] = &&lbl1;
    +    jtab[1] = &&lbl2;
    +    jtab_init = 1;
    +  }
    +  goto *jtab[x]; // Jump through the table
    +lbl1:
    +  return 0;
    +lbl2:
    +  return 1;
    +}
    + 
    +int main(void){
    +  if (doit(0) != 0) exit(1);
    +  if (doit(1) != 1) exit(1);
    +  exit(0);
    +}
    +
    +See the CIL output for this +code fragment
    +
    +
  4. A cute little example that we made up. What is the returned value? +(Answer: 1); +
    
    + return ({goto L; 0;}) && ({L: 5;});
    +
    +See the CIL output for this +code fragment
    +
    +
  5. extern inline is a strange feature of GNU C. Can you guess what the +following code computes? +
    
    +extern inline foo(void) { return 1; }
    +int firstuse(void) { return foo(); }
    +
    +// A second, incompatible definition of foo
    +int foo(void) { return 2; }
    +
    +int main() {
    +    return foo() + firstuse();
    +}
    +
    +See the CIL output for this +code fragment
    +
    +The answer depends on whether the optimizations are turned on. If they are +then the answer is 3 (the first definition is inlined at all occurrences until +the second definition). If the optimizations are off, then the first +definition is ignore (treated like a prototype) and the answer is 4.
    +
    +CIL will misbehave on this example, if the optimizations are turned off (it + always returns 3).
    +
    +
  6. GCC allows you to cast an object of a type T into a union as long as the +union has a field of that type: +
    
    +union u { 
    +   int i; 
    +   struct s { 
    +      int i1, i2;
    +   } s;
    +};
    +
    +union u x = (union u)6;
    +
    +int main() {
    +  struct s y = {1, 2};
    +  union u  z = (union u)y;
    +}
    +
    +See the CIL output for this +code fragment
    +
    +
  7. GCC allows you to use the __mode__ attribute to specify the size +of the integer instead of the standard char, short and so on: +
    
    +int __attribute__ ((__mode__ (  __QI__ ))) i8;
    +int __attribute__ ((__mode__ (  __HI__ ))) i16;
    +int __attribute__ ((__mode__ (  __SI__ ))) i32;
    +int __attribute__ ((__mode__ (  __DI__ ))) i64;
    +
    +See the CIL output for this +code fragment
    +
    +
  8. The “alias” attribute on a function declaration tells the + linker to treat this declaration as another name for the specified + function. CIL will replace the declaration with a trampoline + function pointing to the specified target. +
    
    +    static int bar(int x, char y) {
    +      return x + y;
    +    }
    +
    +    //foo is considered another name for bar.
    +    int foo(int x, char y) __attribute__((alias("bar")));
    +
    +See the CIL output for this +code fragment
+ +

16.3  Microsoft VC ugliness

+This compiler has few extensions, so there is not much to say here. +
  1. +Why does the following code return 0 and not -1? (Answer: because of a +bug in Microsoft Visual C. It thinks that the shift is unsigned just because +the second operator is unsigned. CIL reproduces this bug when in MSVC mode.) +
    
    + return -3 >> (8 * sizeof(int));
    +

    +
    +
  2. Unnamed fields in a structure seem really strange at first. It seems +that Microsoft Visual C introduced this extension, then GCC picked it up (but +in the process implemented it wrongly: in GCC the field y overlaps with +x!). +
    
    +struct {
    +  int x;
    +  struct {
    +     int y, z;
    +     struct {
    +       int u, v;
    +     };
    + };
    +} a;
    +return a.x + a.y + a.z + a.u + a.v;
    +
    +See the CIL output for this +code fragment
+
+Previous +Up +Next + + -- cgit