universal-ctags / ctags

A maintained ctags implementation
https://ctags.io
GNU General Public License v2.0
6.59k stars 629 forks source link

Elm: capturing signatures #1939

Open groteck opened 6 years ago

groteck commented 6 years ago

I would like to improve the Elm lang parser with the signature, but not sure about how to do it. If you point me to some docs or examples I can try to do a PR.

The actual output from elm functions is:

foo input.elm   /^foo a =$/;"   f   roles:def

I would like to have something like:

foo input.elm   /^foo a =$/;"   f   signature:(String -> String)    roles:def
masatake commented 6 years ago

I would like to see the input for the output because I don't know Elm well. Some hints for making a signture are in http://docs.ctags.io/en/latest/optlib.html#adding-custom-fields-to-the-tag-output. Good luck!

groteck commented 6 years ago

Sorry the code example will be: https://github.com/universal-ctags/ctags/blob/master/Units/simple-elm.d/input.elm#L28

The actual output is: https://github.com/universal-ctags/ctags/blob/master/Units/simple-elm.d/expected.tags#L14

My proposal will be add the signature:

foo input.elm   /^foo a =$/;"   f   signature:(Int -> Int)    roles:def
groteck commented 6 years ago

I get something working and looks promising:

Input:

foo : Int -> Int
foo a =
    a + 1

Optlib:

--mline-regex-Elm=/^([[:lower:]][[:alnum:]_]+)[[:blank:]]:((\n[[:blank:]].*)+|[[:blank:]].*)\n(([[:lower:]][[:alnum:]_]+).*)=/\1/f/{mgroup=2}{_field=signature:(\2)}{scope=clear}{exclusive}

Output:

foo input.elm /^foo : Int -> Int$/;"  f signature:( Int -> Int)

This also works with the multiline signatures like:

fooMultiLine :
    Int
    -> Int
fooMultiLine a =
    1 + a

Output:

fooMultiLine  input.elm /^fooMultiLine :$/;"  f signature:(

but the output needs to be pre-formatted, there is some way to remove the \n from the multiline signatures?

masatake commented 6 years ago

The feature what we need is _transform flag that you can specify like

{mgroup=2}{_transform=removeNewline(\2)}{_field=signature:(\2)}

The flag is NOT implemented yet. I have a sketch that doesn't work with current code base:

commit 646288f0533b753c1cbaf1d1bb2c731cfc7f8a9a
Author: Masatake YAMATO <yamato@redhat.com>
Date:   Fri Sep 5 21:58:16 2014 +0900

    introduce transformer

    This patch introduces new flags: tr and transform.
    They can transform matched string in the way specified with
    its parameter.

    .e.g.

            --regex-<LANG>=../\1/{transform=upcase}
            --regex-<LANG>=../\1/{tr=^}

    With this flag, letters in \1 are converted to upper case.

    This feature is still in EXPERIMENTAL.

    Signed-off-by: Masatake YAMATO <yamato@redhat.com>

diff --git a/lregex.c b/lregex.c
index 78dca29a..7d0d2286 100644
--- a/lregex.c
+++ b/lregex.c
@@ -64,11 +64,18 @@ struct sKind {
 };

 enum pType { PTRN_TAG, PTRN_CALLBACK };
+enum trType {
+   TR_UPCASE           = 1 << 0,
+   TR_DOWNCASE         = 1 << 1,
+   TR_DOT2UNDERSCORE   = 1 << 2,
+   TR_SLASH2UNDERSCORE = 1 << 3,
+};

 typedef struct {
    regex_t *pattern;
    enum pType type;
    boolean exclusive;
+   unsigned int transformers;
    union {
        struct {
            char *name_pattern;
@@ -266,8 +273,129 @@ static void ptrn_flag_exclusive_long (const char* const s __unused__, const char
    ptrn_flag_exclusive_short ('x', data);
 }

+
+static void tr_upcase (vString *const input)
+{
+   char* str;
+
+   for (str = vStringValue (input); *str != '\0'; str++)
+       *str = toupper (*str);
+}
+
+static void tr_downcase (vString *const input)
+{
+   char* str;
+
+   for (str = vStringValue (input); *str != '\0'; str++)
+       *str = tolower (*str);
+}
+
+static void tr (vString *const input, char from, char to)
+{
+   char* str;
+
+   for (str = vStringValue (input); *str != '\0'; str++)
+       if (*str == from)
+           *str = to;
+
+}
+
+static void tr_dottounderscore (vString *const input)
+{
+   tr (input, '.', '_');
+
+}
+
+static void tr_slashtounderscore (vString *const input)
+{
+   tr (input, '/', '_');
+}
+
+struct trTable {
+   enum  trType type;
+   const char   key;
+   const char*  name;
+   void (* transform_in_place)                  (vString *const input);
+   vString *const (* transform_with_allocation) (vString *const input);
+   const char* const* help;
+};
+
+static struct trTable transformers[] = {
+   { TR_UPCASE,           '^', "upcase",            tr_upcase,            NULL, NULL },
+   { TR_DOWNCASE,         '_', "downcase",          tr_downcase,          NULL, NULL },
+   { TR_DOT2UNDERSCORE,   '.', "dottounderscore",   tr_dottounderscore,   NULL, NULL },
+   { TR_SLASH2UNDERSCORE, 's', "slashtounderscore", tr_slashtounderscore, NULL, NULL },
+};
+
+static void ptrn_flag_transform (const char* const s, const char* const trname, void* data)
+{
+   unsigned int i;
+   regexPattern *ptrn = data;
+   unsigned int old_state;
+
+   if (trname[0] == '\0')
+       return;
+
+   old_state = ptrn->transformers;
+   for (i = 0; i < COUNT (transformers); i++)
+   {
+       if (strcmp (transformers[i].name, trname) == 0)
+           ptrn->transformers |= transformers[i].type;
+   }
+
+   if (old_state == ptrn->transformers)
+       error (WARNING, "unknown transformer: %s", trname);
+}
+
+static void ptrn_flag_tr (const char* const s, const char* const keys, void* data)
+{
+   unsigned int i;
+   regexPattern *ptrn = data;
+   unsigned int old_state;
+
+   if (keys[0] == '\0')
+       return;
+
+   old_state = ptrn->transformers;
+   for (i = 0; i < COUNT (transformers); i++)
+   {
+       if (strchr (keys, transformers[i].key))
+           ptrn->transformers |= transformers[i].type;
+   }
+
+   if (old_state == ptrn->transformers)
+       error (WARNING, "no transformer key found in: %s", keys);
+}
+
+static vString* transform (vString* name, const regexPattern * const ptrn)
+{
+   int i;
+
+   if (!ptrn->transformers)
+       return name;
+
+   for (i = 0; i < COUNT (transformers); i++)
+   {
+       if (ptrn->transformers & transformers[i].type)
+       {
+           if (transformers[i].transform_in_place)
+               transformers[i].transform_in_place (name);
+           else if (transformers[i].transform_with_allocation)
+           {
+               vString *tmp = name;
+               name = transformers[i].transform_with_allocation(tmp);
+               vStringDelete (tmp);
+           }
+       }
+   }
+   return name;
+}
+
+
 static flagDefinition ptrnFlagDef[] = {
-   { 'x', "exclusive", ptrn_flag_exclusive_short, ptrn_flag_exclusive_long },
+   { 'x',  "exclusive",  ptrn_flag_exclusive_short, ptrn_flag_exclusive_long },
+   { '\0', "transform",  NULL,                      ptrn_flag_transform      },
+   { '\0', "tr",         NULL,                      ptrn_flag_tr             },
 };

 static regexPattern* addCompiledTagCommon (const langType language,
@@ -307,6 +435,8 @@ static void addCompiledTagPattern (

    ptrn  = addCompiledTagCommon(language, pattern);
    ptrn->type    = PTRN_TAG;
+   ptrn->exclusive = FALSE;
+   ptrn->transformers = 0;
    ptrn->u.tag.name_pattern = name;
    ptrn->u.tag.kind.enabled = TRUE;
    ptrn->u.tag.kind.letter  = kind;
@@ -506,12 +636,15 @@ static void matchTagPattern (const vString* const line,
        const regexPattern* const patbuf,
        const regmatch_t* const pmatch)
 {
-   vString *const name = substitute (vStringValue (line),
+   vString * name = substitute (vStringValue (line),
            patbuf->u.tag.name_pattern, BACK_REFERENCE_COUNT, pmatch);
    vStringStripLeading (name);
    vStringStripTrailing (name);
    if (vStringLength (name) > 0)
+   {
+       name = transform (name, patbuf);
        makeRegexTag (name, &patbuf->u.tag.kind);
+   }
    else
        error (WARNING, "%s:%ld: null expansion of name pattern \"%s\"",
            getInputFileName (), getInputLineNumber (),

I'm sorry that I have no time for working this topic now. However, if you know C and you have much interests about ctags, you can start from the above patch.

BTW, scope flag may not work with the multi line parsr.

masatake commented 6 years ago

I also planed embedding a postscript or scheme alike interpreter and ebpf alike backend(?) to ctags to allow users to define a transformer from command line.

groteck commented 6 years ago

My C skills are limited but I can give a try :) I will create a PR with the one line solution and wait try to add the transform In a different one, Tanks!!!

masatake commented 5 years ago

At least two features are needed

  1. prepare the way to fill a common filed from an optlib parser, and

  2. transforming a string

  3. must be resolved first. All the issue I'm working on is about a field.