adtools / amigaos-cross-toolchain

AmigaOS cross compiler for Linux / MacOSX / Windows
184 stars 48 forks source link

updated inline/macros.h file with latest state-of-the-art LPXX() macros #75

Closed jens-maus closed 7 years ago

jens-maus commented 7 years ago

This pull request bring in a larger bunch of LPXX() macros with a wider range of different possible applications for these kind of macros. This macros.h file is taken from the AmiSSL project (https://github.com/jens-maus/amissl/blob/master/include/inline/macros.h) and represents the latest state-of-the-art with LPXX() macro development.

sezero commented 7 years ago

With this patch applied, building something with WriteLUTPixelArray() fails:

$ cat test.c
#include <intuition/intuitionbase.h>
#include <proto/cybergraphics.h>
void FlipScreen (void) {
 WriteLUTPixelArray(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
}

$ m68k-amigaos-gcc -c test.c
test.c: In function `FlipScreen':
test.c:4: more than 15 operands in `asm'

FWIW, without the patch applied, cpp output for FlipScreen() looks like this:

void FlipScreen (void) {
 ({   APTR  _WriteLUTPixelArray_v1 = (   0  );    UWORD  _WriteLUTPixelArray_v2 = (    0  );      UWORD  _WriteLUTPixelArray_v3 = (    0  );      UWORD  _WriteLUTPixelArray_v4 = (    0  );      struct RastPort *  _WriteLUTPixelArray_v5 = (    0  );      APTR  _WriteLUTPixelArray_v6 = (    0  );   UWORD  _WriteLUTPixelArray_v7 = (    0  );      UWORD  _WriteLUTPixelArray_v8 = (    0  );      UWORD  _WriteLUTPixelArray_v9 = (    0  );      UWORD  _WriteLUTPixelArray_v10 = (    0  );     UBYTE  _WriteLUTPixelArray_v11 = (    0  );   {   register   ULONG  _WriteLUTPixelArray_re __asm("d0");   register struct Library *const _WriteLUTPixelArray_bn __asm("a6") = (struct Library*)(  CyberGfxBase  ); register   APTR  _n1 __asm("a0") = _WriteLUTPixelArray_v1; register   UWORD  _n2 __asm("d0") = _WriteLUTPixelArray_v2; register   UWORD  _n3 __asm("d1") = _WriteLUTPixelArray_v3; register   UWORD  _n4 __asm("d2") = _WriteLUTPixelArray_v4; register   struct RastPort *  _n5 __asm("a1") = _WriteLUTPixelArray_v5; register   APTR  _n6 __asm("a2") = _WriteLUTPixelArray_v6;  register   UWORD  _n7 __asm("d3") = _WriteLUTPixelArray_v7; register   UWORD  _n8 __asm("d4") = _WriteLUTPixelArray_v8; register   UWORD  _n9 __asm("d5") = _WriteLUTPixelArray_v9; register   UWORD  _n10 __asm("d6") = _WriteLUTPixelArray_v10;register   UBYTE  _n11 __asm("d7") = _WriteLUTPixelArray_v11;  __asm volatile ("jsr a6@(-""0xc6"":W)"  : "=r" (_WriteLUTPixelArray_re) : "r" (_WriteLUTPixelArray_bn), "rf"(_n1), "rf"(_n2), "rf"(_n3), "rf"(_n4), "rf"(_n5), "rf"(_n6), "rf"(_n7), "rf"(_n8), "rf"(_n9), "rf"(_n10), "rf"(_n11) : "d0", "d1", "a0", "a1", "fp0", "fp1", "cc", "memory");  _WriteLUTPixelArray_re; }   })  ;

And, wWith the patch applied, it looks like this:

void FlipScreen (void) {
 ({   APTR  _WriteLUTPixelArray_v1 = (   0  );    UWORD  _WriteLUTPixelArray_v2 = (    0  );      UWORD  _WriteLUTPixelArray_v3 = (    0  );      UWORD  _WriteLUTPixelArray_v4 = (    0  );      struct RastPort *  _WriteLUTPixelArray_v5 = (    0  );      APTR  _WriteLUTPixelArray_v6 = (    0  );   UWORD  _WriteLUTPixelArray_v7 = (    0  );      UWORD  _WriteLUTPixelArray_v8 = (    0  );      UWORD  _WriteLUTPixelArray_v9 = (    0  );      UWORD  _WriteLUTPixelArray_v10 = (    0  );     UBYTE  _WriteLUTPixelArray_v11 = (    0  );     ULONG  _WriteLUTPixelArray_re2 =  ({  register int _d1 __asm("d1");   register int _a0 __asm("a0");   register int _a1 __asm("a1");   register   ULONG  _WriteLUTPixelArray_re __asm("d0");   register void *const _WriteLUTPixelArray_bn __asm("a6") = (  CyberGfxBase  );   register   APTR  _n1 __asm("a0") = _WriteLUTPixelArray_v1;  register   UWORD  _n2 __asm("d0") = _WriteLUTPixelArray_v2; register   UWORD  _n3 __asm("d1") = _WriteLUTPixelArray_v3; register   UWORD  _n4 __asm("d2") = _WriteLUTPixelArray_v4; register   struct RastPort *  _n5 __asm("a1") = _WriteLUTPixelArray_v5; register   APTR  _n6 __asm("a2") = _WriteLUTPixelArray_v6;  register   UWORD  _n7 __asm("d3") = _WriteLUTPixelArray_v7; register   UWORD  _n8 __asm("d4") = _WriteLUTPixelArray_v8; register   UWORD  _n9 __asm("d5") = _WriteLUTPixelArray_v9; register   UWORD  _n10 __asm("d6") = _WriteLUTPixelArray_v10;   register   UBYTE  _n11 __asm("d7") = _WriteLUTPixelArray_v11;   __asm volatile ("jsr a6@(-""0xc6"":W)"  : "=r" (_WriteLUTPixelArray_re), "=r" (_d1), "=r" (_a0), "=r" (_a1) : "r" (_WriteLUTPixelArray_bn), "rf"(_n1), "rf"(_n2), "rf"(_n3), "rf"(_n4), "rf"(_n5), "rf"(_n6), "rf"(_n7), "rf"(_n8), "rf"(_n9), "rf"(_n10), "rf"(_n11) : "fp0", "fp1", "cc", "memory");  _WriteLUTPixelArray_re; }); _WriteLUTPixelArray_re2;    })  ;
}
cahirwpz commented 7 years ago

@sezero Does the test build correctly if you replace inline/macros.h and inline/stubs.h with macros.h and stubs.h from fd2pragma?

sezero commented 7 years ago

No, using those gives the same error for me.

cahirwpz commented 7 years ago

@jens-maus Does AmiSSL (and other projects you maintain) experience any issues with macros.h taken from fd2pragma (i.e. current one) ? Is there any good reason to switch to macros.h from fd2sfd (introduced by this PR)?

jens-maus commented 7 years ago

Well, the "issues" we are having with the macros.h from fd2pragma are that they are not complete. Especially AmiSSL is massively using a more wide range of LPXX() macros, especially some which return a function pointer and fd2pragma is not prepared for these kind of macros AFAIK. Just have a look at the additional LP() macros this pull request would introduce.

sezero commented 7 years ago

@jens-maus Can you not reproduce the error I described?

jens-maus commented 7 years ago

@sezero I can of course reproduce the issue but don't have a solution for it yet.

sezero commented 7 years ago

Changing max_recog_operands initialization from 14 to 15 in gcc/genconfig.c (so that the +1 from later on makes it 16) allows compilation to succeed; like so:

diff --git a/gcc/genconfig.c b/gcc/genconfig.c
index 7021b97..c495e99 100644
--- a/gcc/genconfig.c
+++ b/gcc/genconfig.c
@@ -327,8 +327,8 @@ main (argc, argv)
   printf ("/* Generated automatically by the program `genconfig'\n\
 from the machine description file `md'.  */\n\n");

-  /* Allow at least 15 operands for the sake of asm constructs.  */
-  max_recog_operands = 14;  /* We will add 1 later.  */
+  /* Allow at least 16 operands for the sake of asm constructs.  */
+  max_recog_operands = 15;  /* We will add 1 later.  */
   max_dup_operands = 1;

   /* Read the machine description.  */

Can you think of any ill effects from this change?

(Note: all original gcc before version 3.1 had it as 9, but the amigaos patch changes it, hence the error message with 15 and not 10. With gcc-3.1, it is change to allow 30.)

jens-maus commented 7 years ago

@sezero Nice finding, this definitly makes sense. However, I would propose to immediately bump it to something like 20 if this doesn't have any negative effects.

sezero commented 7 years ago

I will leave that decision to @cahirwpz

BTW, your updated macros.h shortens some asm output. E.g. this

    if (DeleteFile((const STRPTR) path) != 0)
        return 0;
    return -1;

gives the following difference in asm output:

 _Sys_unlink:
    pea a5@
    movel sp,a5
    movel a6,sp@-
    movel _DOSBase,a6
    movel a5@(8),d1
 #APP
    jsr a6@(-0x48:W)
 #NO_APP
    tstl d0
-   jne L20
-   moveq #-1,d0
-   jra L21
-   .even
-L20:
-   clrl d0
-L21:
+   seq d0
+   extbl d0
    movel sp@+,a6
    unlk a5
    rts

which is nice I guess.

cahirwpz commented 7 years ago

@sezero Thanks for finding out the culprit. I'll have a look at more recent versions of GCC to choose the right value. It's good to know that alternative macro set is improving code efficiency. Good job!

@jens-maus I'll redo the patch. The header files will get copied from fd2sfd instead of keeping them in my repo. GCC will be patched according to @sezero proposal.

jens-maus commented 7 years ago

@cahirwpz I also think that @sezero proposal to patch GCC is viable and should be applied. However, how do you want to use fd2sfd to generate macros.h? AFAIK fd2sfd doesn't provide the complete set of LP() macros as my patch is proposing. Or am I wrong?

cahirwpz commented 7 years ago

@jens-maus You're right! Could you send me your macros.h by e-mail. I'll compare it against fd2sfd's version and make my mind about it.

jens-maus commented 7 years ago

@cahirwpz It is essentially exactly the macros.h file we are using with AmiSSL (see here: https://github.com/jens-maus/amissl/blob/master/include/inline/macros.h)

jens-maus commented 7 years ago

For the sake of simplicity I create a pull request for the bump of max_recog_operands, as @sezero suggested. Feel free to modify it before merging it.

sezero commented 7 years ago

Noticed the following with gcc2 (both 2.95.3 and 2.95.4) :

$ cat test.c
#include <intuition/intuition.h>
#include <proto/intuition.h>
#include <proto/graphics.h>
extern struct IntuitionBase *IntuitionBase;
extern struct GfxBase *GfxBase;

struct Window * w3dWindow;
struct BitMap * w3dBitMap;
unsigned vid_OpenWindow(void)
{
  struct TagItem OpenWinTags[] = { {TAG_DONE,0} };
  w3dWindow = OpenWindowTagList(NULL, OpenWinTags);
  if (!w3dWindow) goto Duh;

  w3dBitMap = AllocBitMap(0,0,8,0,w3dWindow->RPort->BitMap);
  if (!w3dBitMap) goto Duh;
  return 1;
Duh:
  return 0;
}

m68k-amigaos-gcc -m68060 -O2 -S test.c

The asm output gives the following diff with old vs new macros.h:

--- test1.s
+++ test2.s
@@ -15,9 +15,8 @@
 #APP
    jsr a6@(-0x25e:W)
 #NO_APP
-   movel d0,_w3dWindow
    movel d0,a0
-   tstl a0
+   movel a0,_w3dWindow
    jeq L4
    movel a0@(50),a0
    movel _GfxBase,a6
@@ -29,8 +28,8 @@
 #APP
    jsr a6@(-0x396:W)
 #NO_APP
-   movel d0,_w3dBitMap
    movel d0,d1
+   movel d1,_w3dBitMap
    moveq #1,d0
    tstl d1
    jne L6

Note that 'testl' is gone. My asm knowledge is less than little, but is this OK?

tboeckel commented 7 years ago

The "tst.l a0" plus the following "jeq L4" is just replaced by the equivalent "move.l a0, _w3dWindow" plus the same "jeq L4". The move instruction will set the zero bit if the moved value was zero and the jeq instruction checks this zero bit and branches accordingly. There is no need for an additional explicit zero check. So this is ok from my point of view.

sezero commented 7 years ago

@tboeckel: I see, thanks.

cahirwpz commented 7 years ago

@jens-maus Please apply your changes to adtools/fd2sfd. Toolchain build script will take macros.h file from there.

cahirwpz commented 7 years ago

@jens-maus Oops... I meant adtools/fd2pragma. Ignore my previous comment. I've already done it.

cahirwpz commented 7 years ago

Implemented by 51a0799cac96117b61a7d43cbe032063d6141bf1