PLSysSec / sys

Sys: A Static/Symbolic Tool for Finding Good Bugs in Good (Browser) Code
https://cseweb.ucsd.edu/~dstefan/pubs/brown:2020:sys.pdf
GNU General Public License v2.0
215 stars 41 forks source link

Uninit checker - false positive? #23

Open marcinguy opened 3 years ago

marcinguy commented 3 years ago

Sys output

Stack uninit bug
Name "gmed_n_91"
in
Name "gmed_n"
[UnName 2,UnName 12,UnName 33,UnName 34,UnName 85]
"/tmp/test.ll"
on line
7

C Code:

user@2d2d788ad909:~/src/sys$ cat /tmp/test.c 
#define NMAX    9 
#include <stdio.h>
typedef short Word16;

Word16 gmed_n(            /* o : the median value    */
        Word16 ind[],   /* i : input values        */
        Word16 n        /* i : number of inputs    */
         )
{
    Word16 i, j, ix = 0;
    Word16 max;
    Word16 medianIndex;
    Word16  tmp[NMAX];
    Word16  tmp2[NMAX];

    for (i = 0; i < n; i++)
    {
        *(tmp2 + i) = *(ind + i);
    }

    for (i = 0; i < n; i++)
    {
        max = -32767;
        for (j = 0; j < n; j++)
        {
            if (*(tmp2 + j) >= max)
            {
                max = *(tmp2 + j);
                ix = j;
            }
        }
        *(tmp2 + ix) = -32768;
        *(tmp + i) = ix;
    }

    medianIndex = *(tmp + (n >> 1));  /* account for complex addressing */

    return (*(ind + medianIndex)); <--- here should be uninit Bug per Sys
}

int main()
{
    short a[5]={1,32767,3,4,5};
    int i = 0;
    short res;
    res=gmed_n(a,5);
    printf("%i\n",res);
}

here is the bug:

return (*(ind + medianIndex)); <--- here should be uninit Bug per Sys

However tmp in the line above is referenced and assigned good values in for loop.

Any explanation why Sys flagged it? Using static and symbolic execution

Working on a real project and would like to understand Sys better.

Thanks in advance

LL for reference:

; ModuleID = 'test.c'
source_filename = "test.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"

@__const.main.a = private unnamed_addr constant [5 x i16] [i16 1, i16 32767, i16 3, i16 4, i16 5], align 2
@.str = private unnamed_addr constant [4 x i8] c"%i\0A\00", align 1

; Function Attrs: noinline nounwind optnone uwtable
define dso_local signext i16 @gmed_n(i16*, i16 signext) #0 {
  %3 = alloca i16*, align 8
  %4 = alloca i16, align 2
  %5 = alloca i16, align 2
  %6 = alloca i16, align 2
  %7 = alloca i16, align 2
  %8 = alloca i16, align 2
  %9 = alloca i16, align 2
  %10 = alloca [9 x i16], align 16
  %11 = alloca [9 x i16], align 16
  store i16* %0, i16** %3, align 8
  store i16 %1, i16* %4, align 2
  store i16 0, i16* %7, align 2
  store i16 0, i16* %5, align 2
  br label %12

12:                                               ; preds = %30, %2
  %13 = load i16, i16* %5, align 2
  %14 = sext i16 %13 to i32
  %15 = load i16, i16* %4, align 2
  %16 = sext i16 %15 to i32
  %17 = icmp slt i32 %14, %16
  br i1 %17, label %18, label %33

18:                                               ; preds = %12
  %19 = load i16*, i16** %3, align 8
  %20 = load i16, i16* %5, align 2
  %21 = sext i16 %20 to i32
  %22 = sext i32 %21 to i64
  %23 = getelementptr inbounds i16, i16* %19, i64 %22
  %24 = load i16, i16* %23, align 2
  %25 = getelementptr inbounds [9 x i16], [9 x i16]* %11, i64 0, i64 0
  %26 = load i16, i16* %5, align 2
  %27 = sext i16 %26 to i32
  %28 = sext i32 %27 to i64
  %29 = getelementptr inbounds i16, i16* %25, i64 %28
  store i16 %24, i16* %29, align 2
  br label %30

30:                                               ; preds = %18
  %31 = load i16, i16* %5, align 2
  %32 = add i16 %31, 1
  store i16 %32, i16* %5, align 2
  br label %12

33:                                               ; preds = %12
  store i16 0, i16* %5, align 2
  br label %34

34:                                               ; preds = %82, %33
  %35 = load i16, i16* %5, align 2
  %36 = sext i16 %35 to i32
  %37 = load i16, i16* %4, align 2
  %38 = sext i16 %37 to i32
  %39 = icmp slt i32 %36, %38
  br i1 %39, label %40, label %85

40:                                               ; preds = %34
  store i16 -32767, i16* %8, align 2
  store i16 0, i16* %6, align 2
  br label %41

41:                                               ; preds = %67, %40
  %42 = load i16, i16* %6, align 2
  %43 = sext i16 %42 to i32
  %44 = load i16, i16* %4, align 2
  %45 = sext i16 %44 to i32
  %46 = icmp slt i32 %43, %45
  br i1 %46, label %47, label %70

47:                                               ; preds = %41
  %48 = getelementptr inbounds [9 x i16], [9 x i16]* %11, i64 0, i64 0
  %49 = load i16, i16* %6, align 2
  %50 = sext i16 %49 to i32
  %51 = sext i32 %50 to i64
  %52 = getelementptr inbounds i16, i16* %48, i64 %51
  %53 = load i16, i16* %52, align 2
  %54 = sext i16 %53 to i32
  %55 = load i16, i16* %8, align 2
  %56 = sext i16 %55 to i32
  %57 = icmp sge i32 %54, %56
  br i1 %57, label %58, label %66

58:                                               ; preds = %47
  %59 = getelementptr inbounds [9 x i16], [9 x i16]* %11, i64 0, i64 0
  %60 = load i16, i16* %6, align 2
  %61 = sext i16 %60 to i32
  %62 = sext i32 %61 to i64
  %63 = getelementptr inbounds i16, i16* %59, i64 %62
  %64 = load i16, i16* %63, align 2
  store i16 %64, i16* %8, align 2
  %65 = load i16, i16* %6, align 2
  store i16 %65, i16* %7, align 2
  br label %66

66:                                               ; preds = %58, %47
  br label %67

67:                                               ; preds = %66
  %68 = load i16, i16* %6, align 2
  %69 = add i16 %68, 1
  store i16 %69, i16* %6, align 2
  br label %41

70:                                               ; preds = %41
  %71 = getelementptr inbounds [9 x i16], [9 x i16]* %11, i64 0, i64 0
  %72 = load i16, i16* %7, align 2
  %73 = sext i16 %72 to i32
  %74 = sext i32 %73 to i64
  %75 = getelementptr inbounds i16, i16* %71, i64 %74
  store i16 -32768, i16* %75, align 2
  %76 = load i16, i16* %7, align 2
  %77 = getelementptr inbounds [9 x i16], [9 x i16]* %10, i64 0, i64 0
  %78 = load i16, i16* %5, align 2
  %79 = sext i16 %78 to i32
  %80 = sext i32 %79 to i64
  %81 = getelementptr inbounds i16, i16* %77, i64 %80
  store i16 %76, i16* %81, align 2
  br label %82

82:                                               ; preds = %70
  %83 = load i16, i16* %5, align 2
  %84 = add i16 %83, 1
  store i16 %84, i16* %5, align 2
  br label %34

85:                                               ; preds = %34
  %86 = getelementptr inbounds [9 x i16], [9 x i16]* %10, i64 0, i64 0
  %87 = load i16, i16* %4, align 2
  %88 = sext i16 %87 to i32
  %89 = ashr i32 %88, 1
  %90 = sext i32 %89 to i64
  %91 = getelementptr inbounds i16, i16* %86, i64 %90
  %92 = load i16, i16* %91, align 2
  store i16 %92, i16* %9, align 2
  %93 = load i16*, i16** %3, align 8
  %94 = load i16, i16* %9, align 2
  %95 = sext i16 %94 to i32
  %96 = sext i32 %95 to i64
  %97 = getelementptr inbounds i16, i16* %93, i64 %96
  %98 = load i16, i16* %97, align 2
  ret i16 %98
}

; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 @main() #0 {
  %1 = alloca [5 x i16], align 2
  %2 = alloca i32, align 4
  %3 = alloca i16, align 2
  %4 = bitcast [5 x i16]* %1 to i8*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 %4, i8* align 2 bitcast ([5 x i16]* @__const.main.a to i8*), i64 10, i1 false)
  store i32 0, i32* %2, align 4
  %5 = getelementptr inbounds [5 x i16], [5 x i16]* %1, i64 0, i64 0
  %6 = call signext i16 @gmed_n(i16* %5, i16 signext 5)
  store i16 %6, i16* %3, align 2
  %7 = load i16, i16* %3, align 2
  %8 = sext i16 %7 to i32
  %9 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %8)
  ret i32 0
}

; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #1

declare dso_local i32 @printf(i8*, ...) #2

attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind }
attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 9.0.1-16 "}
mlfbrown commented 3 years ago

Ah! It looks like you're encountering the fact that Sys skips code in order to find bugs in really big codebases, potentially leading to FPs. In this case, Sys has determined that it's possible, given no context, to end up with an uninit bug off of ind (if neither of the loops execute). True! But the function is never called in such a buggy way... There are a few approaches here, depending on what your goal is in using Sys:

  1. Compile at an optimization level that inlines the function. That solves the immediate problem.
  2. Use a full-program symex approach (e.g., something like KLEE won't have this problem---it has full program context). This works if your goal is to symex small programs, but won't work for big things like browsers.
  3. Develop a suppression heuristic. If this is a pattern of false positive you're running into in a bigger codebase, this could be the solution! Eg don't report bugs due to unentered loops (I've meant to do this for a while, see other issues)
  4. Enhance Sys with limited backwards symbolic execution, to prune obviously impossible reports like this one. This is also something we've been meaning to do for a while. The right approach will just depend on what you intend to do! There are many other possible approaches, too (e.g., automatically detect that this is small code and start from call sites, etc). But the approach, again, depends on your goals :)

EDITING TO ADD: one thing we also found very helpful was re-running the tool on debug builds of the browser (builds with assertions). If the bug didn't exist with the assertions turned on, it was likely not a bug they cared about

marcinguy commented 3 years ago

Thank you for taking the time to explain it in such detail. It helps to understand the project better.

vanhauser-thc commented 3 years ago

IMHO the assumption that the loops could be skipped is valid as n could be zero. That there is no callee that actually calls the function with that value is a hard analysis to tackle.

but still here is an issue I think - that an assumption was made which results in the identified vulnerability be detected. If there would be no loop it would not be a false positive. So the fix should be that if loops etc. are skipped because of non-concrete values then the message should maybe look like this: Stack uninit bug (assumption) to make it clear that this could be a false positive.

marcinguy commented 3 years ago

FYI Here seems like another false positive.

Got an uninit bug that p_quant_data_msb[0] could be uninitialized in a call to calc_diff_freq(), I don't see how however.

It seems it is properly assigned value in all conditions (if, else if, else)

Relevant code:

INT fdk_sacenc_ecDataPairEnc(HANDLE_FDK_BITSTREAM strm,
                             SHORT aaInData[][MAXBANDS],
                             SHORT aHistory[MAXBANDS],
                             const DATA_TYPE data_type, const INT setIdx,
                             const INT startBand, const INT dataBands,
                             const INT coarse_flag,
                             const INT independency_flag) {
.
.  
.
  SHORT quant_data_lsb[2][MAXBANDS];
  SHORT quant_data_msb[2][MAXBANDS];

  SHORT quant_data_hist_lsb[MAXBANDS];
  SHORT quant_data_hist_msb[MAXBANDS];

  SHORT data_diff_freq[2][MAXBANDS];
  SHORT data_diff_time[2][MAXBANDS + 2];

  SHORT *p_quant_data_msb[2];
  SHORT *p_quant_data_hist_msb = NULL;
.
.
.
 /* Split off LSB */
  if (splitLsb_flag) {
    split_lsb(aaInData[setIdx] + startBand, quant_offset, dataBands,
              quant_data_lsb[0], quant_data_msb[0]);

    split_lsb(aaInData[setIdx + 1] + startBand, quant_offset, dataBands,
              quant_data_lsb[1], quant_data_msb[1]);

    p_quant_data_msb[0] = quant_data_msb[0];
    p_quant_data_msb[1] = quant_data_msb[1];

    num_lsb_bits = 2 * dataBands;
  } else if (quant_offset != 0) {
    for (pb = 0; pb < dataBands; pb++) {
      quant_data_msb[0][pb] = aaInData[setIdx][startBand + pb] + quant_offset;
      quant_data_msb[1][pb] =
          aaInData[setIdx + 1][startBand + pb] + quant_offset;
    }

    p_quant_data_msb[0] = quant_data_msb[0];
    p_quant_data_msb[1] = quant_data_msb[1];

    num_lsb_bits = 0;
  } else {
    p_quant_data_msb[0] = aaInData[setIdx] + startBand;
    p_quant_data_msb[1] = aaInData[setIdx + 1] + startBand;

    num_lsb_bits = 0;
  }

  if (allowDiffTimeBack_flag) {
    if (splitLsb_flag) {
      split_lsb(aHistory + startBand, quant_offset, dataBands,
                quant_data_hist_lsb, quant_data_hist_msb);

      p_quant_data_hist_msb = quant_data_hist_msb;
    } else if (quant_offset != 0) {
      for (pb = 0; pb < dataBands; pb++) {
        quant_data_hist_msb[pb] = aHistory[startBand + pb] + quant_offset;
      }
      p_quant_data_hist_msb = quant_data_hist_msb;
    } else {
      p_quant_data_hist_msb = aHistory + startBand;
    }
  }

  /* Calculate frequency differences */
  calc_diff_freq(p_quant_data_msb[0], data_diff_freq[0], dataBands);

  calc_diff_freq(p_quant_data_msb[1], data_diff_freq[1], dataBands);
 .
 .
 .

Thanks,

mlfbrown commented 3 years ago

Guessing, right now without looking at IR, that it's related to #1 (I broke something in the insert/extract operations recently). Would it be possible to see the ll file?

marcinguy commented 3 years ago

Yes, sure. LL file (with debugs): https://github.com/marcinguy/public/blob/master/sacenc_nlc_enc.o.ll