universal-ctags / ctags

A maintained ctags implementation
https://ctags.io
GNU General Public License v2.0
6.45k stars 620 forks source link

C/C++: extern "c" will cause parse error #2647

Open chongchai opened 3 years ago

chongchai commented 3 years ago

I find the definition will not be parsed in the #else and extern "C".

The sample code:

#ifdef __cplusplus
extern "C" {
#endif

#ifdef MAX
  void testMax1() {}
  #define NB1 11
#else
  void testMax2() {}
  #define NB2 22
#endif

#ifdef __cplusplus
}
#endif

#ifdef MAX
  void testMax3() {}
  #define NB3 33
#else
  void testMax4() {}
  #define NB4 44
#endif

I use following command to parse:

ctags.exe --output-format=json --langmap=c:+.inc --kinds-c=+plz --fields=+nie -o - D:\tmp\test\testIfMacro.c

Result is:

{"_type": "tag", "name": "NB1", "path": "D:\\tmp\\test\\testIfMacro.c", "pattern": "/^  #define NB1 /", "file": true, "line": 7, "kind": "macro", "end": 7}
{"_type": "tag", "name": "NB3", "path": "D:\\tmp\\test\\testIfMacro.c", "pattern": "/^  #define NB3 /", "file": true, "line": 19, "kind": "macro", "end": 19}
{"_type": "tag", "name": "NB4", "path": "D:\\tmp\\test\\testIfMacro.c", "pattern": "/^  #define NB4 /", "file": true, "line": 22, "kind": "macro", "end": 22}
{"_type": "tag", "name": "testMax1", "path": "D:\\tmp\\test\\testIfMacro.c", "pattern": "/^  void testMax1() {}$/", "line": 6, "typeref": "typename:void", "kind": "function", "end": 6}
{"_type": "tag", "name": "testMax3", "path": "D:\\tmp\\test\\testIfMacro.c", "pattern": "/^  void testMax3() {}$/", "line": 18, "typeref": "typename:void", "kind": "function", "end": 18}
{"_type": "tag", "name": "testMax4", "path": "D:\\tmp\\test\\testIfMacro.c", "pattern": "/^  void testMax4() {}$/", "line": 21, "typeref": "typename:void", "kind": "function", "end": 21}

The testMax2 and NB2 are not parsed, while testMax4 and NB4 are parsed. I thinkextern "C" will cause this error. Can you support this situation?

masatake commented 3 years ago

I thinkextern "C" will cause this error. Can you support this situation?

It seems that your guessing is correct. Following change is for ignoring the code between "#ifdef __cplusplus ~ #endif. @pragmaware, how do you think about my approach?

$ git diff | cat
diff --git a/parsers/cpreprocessor.c b/parsers/cpreprocessor.c
index 668171f0..4ad6087d 100644
--- a/parsers/cpreprocessor.c
+++ b/parsers/cpreprocessor.c
@@ -953,10 +953,31 @@ static void directivePragma (int c)
    Cpp.directive.state = DRCTV_NONE;
 }

+static bool isDefCondition (const int c, const char *condition)
+{
+   if (*condition == '\0')
+       return true;
+   else if (c == EOF)
+       return false;
+
+   if (*condition != '\0' && c == condition[0])
+   {
+       const int next = cppGetcFromUngetBufferOrFile ();
+       return isDefCondition (next, condition + 1);
+   }
+
+   return false;
+}
+
 static bool directiveIf (const int c)
 {
    DebugStatement ( const bool ignore0 = isIgnore (); )
-   const bool ignore = pushConditional ((bool) (c != '0'));
+   bool firstBranchChosen = true;
+
+   if (c == '0' || isDefCondition (c, "__cplusplus"))
+       firstBranchChosen = false;
+
+   const bool ignore = pushConditional (firstBranchChosen);

    Cpp.directive.state = DRCTV_NONE;
    DebugStatement ( debugCppNest (true, Cpp.directive.nestLevel);
$ u-ctags --output-format=json --langmap=c:+.inc --kinds-c=+plz --fields=+nie -o - /tmp/foo.c

u-ctags --output-format=json --langmap=c:+.inc --kinds-c=+plz --fields=+nie -o - /tmp/foo.c

{"_type": "tag", "name": "NB1", "path": "/tmp/foo.c", "pattern": "/^  #define NB1 /", "file": true, "line": 7, "kind": "macro", "end": 7}
{"_type": "tag", "name": "NB2", "path": "/tmp/foo.c", "pattern": "/^  #define NB2 /", "file": true, "line": 10, "kind": "macro", "end": 10}
{"_type": "tag", "name": "NB3", "path": "/tmp/foo.c", "pattern": "/^  #define NB3 /", "file": true, "line": 19, "kind": "macro", "end": 19}
{"_type": "tag", "name": "NB4", "path": "/tmp/foo.c", "pattern": "/^  #define NB4 /", "file": true, "line": 22, "kind": "macro", "end": 22}
{"_type": "tag", "name": "testMax1", "path": "/tmp/foo.c", "pattern": "/^  void testMax1() {}$/", "line": 6, "typeref": "typename:void", "kind": "function", "end": 6}
{"_type": "tag", "name": "testMax2", "path": "/tmp/foo.c", "pattern": "/^  void testMax2() {}$/", "line": 9, "typeref": "typename:void", "kind": "function", "end": 9}
{"_type": "tag", "name": "testMax3", "path": "/tmp/foo.c", "pattern": "/^  void testMax3() {}$/", "line": 18, "typeref": "typename:void", "kind": "function", "end": 18}
{"_type": "tag", "name": "testMax4", "path": "/tmp/foo.c", "pattern": "/^  void testMax4() {}$/", "line": 21, "typeref": "typename:void", "kind": "function", "end": 21}
pragmaware commented 3 years ago

Well, if it fixes the problem and doesn't break tests then it looks good.

However, I don't understand the relation between the #ifdef __cplusplus define and the #ifdef MAX one. They shouldn't influence each other in terms of choosing one or multiple branches... or I'm missing something?

masatake commented 3 years ago

My analysis is that it is related to how c parser handles extern "C" { }.

$ cat /tmp/foo.c
cat /tmp/foo.c
extern "C" {

#ifdef MAX
  void testMax1() {}
  #define NB1 11
#else
  void testMax2() {}
  #define NB2 22
#endif

}
$ u-ctags --output-format=xref --kinds-c=+plz --fields=+nie -o - /tmp/foo.c

u-ctags --output-format=xref --kinds-c=+plz --fields=+nie -o - /tmp/foo.c

NB1              macro         5 /tmp/foo.c       #define NB1 11
testMax1         function      4 /tmp/foo.c       void testMax1() {}

$ cat /tmp/bar.c 
cat /tmp/bar.c 
// extern "C" {

#ifdef MAX
  void testMax1() {}
  #define NB1 11
#else
  void testMax2() {}
  #define NB2 22
#endif

// }
$ u-ctags --output-format=xref --kinds-c=+plz --fields=+nie -o - /tmp/bar.c

u-ctags --output-format=xref --kinds-c=+plz --fields=+nie -o - /tmp/bar.c

NB1              macro         5 /tmp/bar.c       #define NB1 11
NB2              macro         8 /tmp/bar.c       #define NB2 22
testMax1         function      4 /tmp/bar.c       void testMax1() {}
testMax2         function      7 /tmp/bar.c       void testMax2() {}

If cpreprocessor parser suppresses the area #ifdef __cplusplus ~ #end, c parser doesn't read the line extern "C" { and }.

I have to add one more condition to the patch (https://github.com/universal-ctags/ctags/issues/2647#issuecomment-693859110). The condition is "if the client parser of cpreprocessor parser is c parser" or "if the client parser of cpreprocessor parser is not c++ parser".

pragmaware commented 3 years ago

Though I still don't understand why. In theory the behaviour of an #ifdef should not be influenced by any previous #ifdef... ... it seems to be more of a bug in the handling of the second #ifdef, not the first one...

masatake commented 3 years ago

I found this is nothing to do with extern "C" { ... }. I can reproduce the behavior with struct s { ... }:

[yamato@control]~/var/ctags-github% cat /tmp/f.c
cat /tmp/f.c
struct s {
#ifdef X
  int i;
#else
  int j;
#endif  
};
[yamato@control]~/var/ctags-github% ./ctags -o - /tmp/f.c
./ctags -o - /tmp/f.c
i   /tmp/f.c    /^  int i;$/;"  m   language:C  struct:s    typeref:typename:int    file:
s   /tmp/f.c    /^struct s {$/;"    s   language:C  file:
[yamato@control]~/var/ctags-github% cat /tmp/g.c
cat /tmp/g.c

#ifdef X
  int i;
#else
  int j;
#endif  

[yamato@control]~/var/ctags-github% ./ctags -o - /tmp/g.c
./ctags -o - /tmp/g.c
i   /tmp/g.c    /^  int i;$/;"  v   language:C  typeref:typename:int
j   /tmp/g.c    /^  int j;$/;"  v   language:C  typeref:typename:int
[yamato@control]~/var/ctags-github% 
chongchai commented 3 years ago

I can add another testcase for thier behavior. Local variable In function body like follows

[yklhard@gotpc12]$ cat a.c
void test() {
#ifdef X
  int i = 1;
#else
  int j = 2;
#endif 
}
[yklhard@gotpc12]$ ./ctags-src/ctags --kinds-c=+l -o - a.c
i   a.c /^  int i = 1;$/;"  l   function:test   typeref:typename:int    file:
test    a.c /^void test() {$/;" f   typeref:typename:void
[yklhard@gotpc12]$ cat b.c
#ifdef X
  int i = 1;
#else
  int j = 2;
#endif 
[yklhard@gotpc12]$ ./ctags-src/ctags --kinds-c=+l -o - b.c
i   b.c /^  int i = 1;$/;"  v   typeref:typename:int
j   b.c /^  int j = 2;$/;"  v   typeref:typename:int
ArcsinX commented 3 years ago

Seems that we should not call cppBeginStatement() here https://github.com/universal-ctags/ctags/blob/master/parsers/cxx/cxx_parser_block.c#L262 :