honggyukim / uftrace

Function graph tracer for C/C++/Rust/Python
https://uftrace.github.io/slide/
GNU General Public License v2.0
1 stars 0 forks source link

-mfentry and -mnop-mcount #6

Open honggyukim opened 5 years ago

honggyukim commented 5 years ago

-mfentry -mno-fentry

-mrecord-mcount -mno-record-mcount

-mnop-mcount -mno-nop-mcount

$ cd gcc/config
$ grep nop_mcount * -rIn
i386/i386.opt:929:Target Report Var(flag_nop_mcount)
i386/i386.c:4235:  if (flag_nop_mcount)
i386/i386.c:4238:  if (flag_nop_mcount && flag_pic)
i386/i386.c:41185:  if (flag_nop_mcount)
s390/s390.opt:298:Target Report Var(flag_nop_mcount)
s390/s390.c:12558:      if (flag_nop_mcount)
s390/s390.c:12568:      if (flag_nop_mcount)
s390/s390.c:12585:      if (flag_nop_mcount)
$ cat gcc/config/i386/i386.c
    ...
/* Print call to TARGET to FILE.  */

static void
x86_print_call_or_nop (FILE *file, const char *target)
{ 
  if (flag_nop_mcount)
    /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
    fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
  else
    fprintf (file, "1:\tcall\t%s\n", target);
} 

/* Output assembler code to FILE to increment profiler label # LABELNO
   for profiling a function entry.  */
void
x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
{
  const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
                                         : MCOUNT_NAME);

  if (cfun->machine->endbr_queued_at_entrance)
    fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");

  if (TARGET_64BIT)
    {
#ifndef NO_PROFILE_COUNTERS
      fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
#endif

      if (!TARGET_PECOFF && flag_pic)
        fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
      else
        x86_print_call_or_nop (file, mcount_name);
    }
  else if (flag_pic)
    {
#ifndef NO_PROFILE_COUNTERS
      fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
               LPREFIX, labelno);
#endif
      fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
    }
  else
    {
#ifndef NO_PROFILE_COUNTERS
      fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
               LPREFIX, labelno);
#endif
      x86_print_call_or_nop (file, mcount_name);
    }

  if (flag_record_mcount)
    {
      fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
      fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
      fprintf (file, "\t.previous\n");
    }
}
    ...
honggyukim commented 5 years ago
$ grep MCOUNT_NAME gcc/config/i386/*
    ...
config/i386/i386.h:#define MCOUNT_NAME "_mcount"
config/i386/i386.h:#define MCOUNT_NAME_BEFORE_PROLOGUE "__fentry__"
    ...
config/i386/x86-64.h:#undef MCOUNT_NAME
config/i386/x86-64.h:#define MCOUNT_NAME "mcount"
honggyukim commented 5 years ago
$ cat gcc/config/arm/arm.h
    ...
/* Call the function profiler with a given profile label.  The Acorn
   compiler puts this BEFORE the prolog but gcc puts it afterwards.
   On the ARM the full profile code will look like:
        .data
        LP1
                .word   0
        .text
                mov     ip, lr
                bl      mcount
                .word   LP1

   profile_function() in final.c outputs the .data section, FUNCTION_PROFILER
   will output the .text section.

   The ``mov ip,lr'' seems like a good idea to stick with cc convention.
   ``prof'' doesn't seem to mind about this!

   Note - this version of the code is designed to work in both ARM and
   Thumb modes.  */
#ifndef ARM_FUNCTION_PROFILER
#define ARM_FUNCTION_PROFILER(STREAM, LABELNO)          \
{                                                       \
  char temp[20];                                        \
  rtx sym;                                              \
                                                        \
  asm_fprintf (STREAM, "\tmov\t%r, %r\n\tbl\t",         \
           IP_REGNUM, LR_REGNUM);                       \
  assemble_name (STREAM, ARM_MCOUNT_NAME);              \
  fputc ('\n', STREAM);                                 \
  ASM_GENERATE_INTERNAL_LABEL (temp, "LP", LABELNO);    \
  sym = gen_rtx_SYMBOL_REF (Pmode, temp);               \
  assemble_aligned_integer (UNITS_PER_WORD, sym);       \
}
#endif

#ifdef THUMB_FUNCTION_PROFILER
#define FUNCTION_PROFILER(STREAM, LABELNO)              \
  if (TARGET_ARM)                                       \
    ARM_FUNCTION_PROFILER (STREAM, LABELNO)             \
  else                                                  \
    THUMB_FUNCTION_PROFILER (STREAM, LABELNO)
#else
#define FUNCTION_PROFILER(STREAM, LABELNO)              \
    ARM_FUNCTION_PROFILER (STREAM, LABELNO)
#endif
    ...
honggyukim commented 5 years ago
$ cat gcc/config/arm/linux-elf.h
    ...
/* Call the function profiler with a given profile label.  */
#undef  ARM_FUNCTION_PROFILER
#define ARM_FUNCTION_PROFILER(STREAM, LABELNO)                          \
{                                                                       \
  fprintf (STREAM, "\tbl\tmcount%s\n",                                  \
           (TARGET_ARM && NEED_PLT_RELOC) ? "(PLT)" : "");              \
}
    ...
honggyukim commented 5 years ago

profile_function() in final.c outputs the .data section, FUNCTION_PROFILER will output the .text section.

$ cat gcc/final.c
    ...
static void
profile_function (FILE *file ATTRIBUTE_UNUSED)
{
#ifndef NO_PROFILE_COUNTERS
# define NO_PROFILE_COUNTERS    0
#endif
#ifdef ASM_OUTPUT_REG_PUSH
  rtx sval = NULL, chain = NULL;

  if (cfun->returns_struct)
    sval = targetm.calls.struct_value_rtx (TREE_TYPE (current_function_decl),
                                           true);
  if (cfun->static_chain_decl)
    chain = targetm.calls.static_chain (current_function_decl, true);
#endif /* ASM_OUTPUT_REG_PUSH */

  if (! NO_PROFILE_COUNTERS)
    {
      int align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
      switch_to_section (data_section);
      ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
      targetm.asm_out.internal_label (file, "LP", current_function_funcdef_no);
      assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
    }

  switch_to_section (current_function_section ());

#ifdef ASM_OUTPUT_REG_PUSH
  if (sval && REG_P (sval))
    ASM_OUTPUT_REG_PUSH (file, REGNO (sval));
  if (chain && REG_P (chain))
    ASM_OUTPUT_REG_PUSH (file, REGNO (chain));
#endif

  FUNCTION_PROFILER (file, current_function_funcdef_no);

#ifdef ASM_OUTPUT_REG_PUSH
  if (chain && REG_P (chain))
    ASM_OUTPUT_REG_POP (file, REGNO (chain));
  if (sval && REG_P (sval))
    ASM_OUTPUT_REG_POP (file, REGNO (sval));
#endif
}
    ...
honggyukim commented 5 years ago

-fprolog-pad=N,M