Closed jens-maus closed 7 years ago
With this patch applied, building something with WriteLUTPixelArray() fails:
$ cat test.c
#include <intuition/intuitionbase.h>
#include <proto/cybergraphics.h>
void FlipScreen (void) {
WriteLUTPixelArray(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
}
$ m68k-amigaos-gcc -c test.c
test.c: In function `FlipScreen':
test.c:4: more than 15 operands in `asm'
FWIW, without the patch applied, cpp output for FlipScreen() looks like this:
void FlipScreen (void) {
({ APTR _WriteLUTPixelArray_v1 = ( 0 ); UWORD _WriteLUTPixelArray_v2 = ( 0 ); UWORD _WriteLUTPixelArray_v3 = ( 0 ); UWORD _WriteLUTPixelArray_v4 = ( 0 ); struct RastPort * _WriteLUTPixelArray_v5 = ( 0 ); APTR _WriteLUTPixelArray_v6 = ( 0 ); UWORD _WriteLUTPixelArray_v7 = ( 0 ); UWORD _WriteLUTPixelArray_v8 = ( 0 ); UWORD _WriteLUTPixelArray_v9 = ( 0 ); UWORD _WriteLUTPixelArray_v10 = ( 0 ); UBYTE _WriteLUTPixelArray_v11 = ( 0 ); { register ULONG _WriteLUTPixelArray_re __asm("d0"); register struct Library *const _WriteLUTPixelArray_bn __asm("a6") = (struct Library*)( CyberGfxBase ); register APTR _n1 __asm("a0") = _WriteLUTPixelArray_v1; register UWORD _n2 __asm("d0") = _WriteLUTPixelArray_v2; register UWORD _n3 __asm("d1") = _WriteLUTPixelArray_v3; register UWORD _n4 __asm("d2") = _WriteLUTPixelArray_v4; register struct RastPort * _n5 __asm("a1") = _WriteLUTPixelArray_v5; register APTR _n6 __asm("a2") = _WriteLUTPixelArray_v6; register UWORD _n7 __asm("d3") = _WriteLUTPixelArray_v7; register UWORD _n8 __asm("d4") = _WriteLUTPixelArray_v8; register UWORD _n9 __asm("d5") = _WriteLUTPixelArray_v9; register UWORD _n10 __asm("d6") = _WriteLUTPixelArray_v10;register UBYTE _n11 __asm("d7") = _WriteLUTPixelArray_v11; __asm volatile ("jsr a6@(-""0xc6"":W)" : "=r" (_WriteLUTPixelArray_re) : "r" (_WriteLUTPixelArray_bn), "rf"(_n1), "rf"(_n2), "rf"(_n3), "rf"(_n4), "rf"(_n5), "rf"(_n6), "rf"(_n7), "rf"(_n8), "rf"(_n9), "rf"(_n10), "rf"(_n11) : "d0", "d1", "a0", "a1", "fp0", "fp1", "cc", "memory"); _WriteLUTPixelArray_re; } }) ;
And, wWith the patch applied, it looks like this:
void FlipScreen (void) {
({ APTR _WriteLUTPixelArray_v1 = ( 0 ); UWORD _WriteLUTPixelArray_v2 = ( 0 ); UWORD _WriteLUTPixelArray_v3 = ( 0 ); UWORD _WriteLUTPixelArray_v4 = ( 0 ); struct RastPort * _WriteLUTPixelArray_v5 = ( 0 ); APTR _WriteLUTPixelArray_v6 = ( 0 ); UWORD _WriteLUTPixelArray_v7 = ( 0 ); UWORD _WriteLUTPixelArray_v8 = ( 0 ); UWORD _WriteLUTPixelArray_v9 = ( 0 ); UWORD _WriteLUTPixelArray_v10 = ( 0 ); UBYTE _WriteLUTPixelArray_v11 = ( 0 ); ULONG _WriteLUTPixelArray_re2 = ({ register int _d1 __asm("d1"); register int _a0 __asm("a0"); register int _a1 __asm("a1"); register ULONG _WriteLUTPixelArray_re __asm("d0"); register void *const _WriteLUTPixelArray_bn __asm("a6") = ( CyberGfxBase ); register APTR _n1 __asm("a0") = _WriteLUTPixelArray_v1; register UWORD _n2 __asm("d0") = _WriteLUTPixelArray_v2; register UWORD _n3 __asm("d1") = _WriteLUTPixelArray_v3; register UWORD _n4 __asm("d2") = _WriteLUTPixelArray_v4; register struct RastPort * _n5 __asm("a1") = _WriteLUTPixelArray_v5; register APTR _n6 __asm("a2") = _WriteLUTPixelArray_v6; register UWORD _n7 __asm("d3") = _WriteLUTPixelArray_v7; register UWORD _n8 __asm("d4") = _WriteLUTPixelArray_v8; register UWORD _n9 __asm("d5") = _WriteLUTPixelArray_v9; register UWORD _n10 __asm("d6") = _WriteLUTPixelArray_v10; register UBYTE _n11 __asm("d7") = _WriteLUTPixelArray_v11; __asm volatile ("jsr a6@(-""0xc6"":W)" : "=r" (_WriteLUTPixelArray_re), "=r" (_d1), "=r" (_a0), "=r" (_a1) : "r" (_WriteLUTPixelArray_bn), "rf"(_n1), "rf"(_n2), "rf"(_n3), "rf"(_n4), "rf"(_n5), "rf"(_n6), "rf"(_n7), "rf"(_n8), "rf"(_n9), "rf"(_n10), "rf"(_n11) : "fp0", "fp1", "cc", "memory"); _WriteLUTPixelArray_re; }); _WriteLUTPixelArray_re2; }) ;
}
No, using those gives the same error for me.
@jens-maus Does AmiSSL (and other projects you maintain) experience any issues with macros.h
taken from fd2pragma
(i.e. current one) ? Is there any good reason to switch to macros.h
from fd2sfd
(introduced by this PR)?
Well, the "issues" we are having with the macros.h from fd2pragma are that they are not complete. Especially AmiSSL is massively using a more wide range of LPXX() macros, especially some which return a function pointer and fd2pragma is not prepared for these kind of macros AFAIK. Just have a look at the additional LP() macros this pull request would introduce.
@jens-maus Can you not reproduce the error I described?
@sezero I can of course reproduce the issue but don't have a solution for it yet.
Changing max_recog_operands initialization from 14 to 15 in gcc/genconfig.c (so that the +1 from later on makes it 16) allows compilation to succeed; like so:
diff --git a/gcc/genconfig.c b/gcc/genconfig.c
index 7021b97..c495e99 100644
--- a/gcc/genconfig.c
+++ b/gcc/genconfig.c
@@ -327,8 +327,8 @@ main (argc, argv)
printf ("/* Generated automatically by the program `genconfig'\n\
from the machine description file `md'. */\n\n");
- /* Allow at least 15 operands for the sake of asm constructs. */
- max_recog_operands = 14; /* We will add 1 later. */
+ /* Allow at least 16 operands for the sake of asm constructs. */
+ max_recog_operands = 15; /* We will add 1 later. */
max_dup_operands = 1;
/* Read the machine description. */
Can you think of any ill effects from this change?
(Note: all original gcc before version 3.1 had it as 9, but the amigaos patch changes it, hence the error message with 15 and not 10. With gcc-3.1, it is change to allow 30.)
@sezero Nice finding, this definitly makes sense. However, I would propose to immediately bump it to something like 20 if this doesn't have any negative effects.
I will leave that decision to @cahirwpz
BTW, your updated macros.h shortens some asm output. E.g. this
if (DeleteFile((const STRPTR) path) != 0)
return 0;
return -1;
gives the following difference in asm output:
_Sys_unlink:
pea a5@
movel sp,a5
movel a6,sp@-
movel _DOSBase,a6
movel a5@(8),d1
#APP
jsr a6@(-0x48:W)
#NO_APP
tstl d0
- jne L20
- moveq #-1,d0
- jra L21
- .even
-L20:
- clrl d0
-L21:
+ seq d0
+ extbl d0
movel sp@+,a6
unlk a5
rts
which is nice I guess.
@sezero Thanks for finding out the culprit. I'll have a look at more recent versions of GCC to choose the right value. It's good to know that alternative macro set is improving code efficiency. Good job!
@jens-maus I'll redo the patch. The header files will get copied from fd2sfd
instead of keeping them in my repo. GCC will be patched according to @sezero proposal.
@cahirwpz I also think that @sezero proposal to patch GCC is viable and should be applied. However, how do you want to use fd2sfd to generate macros.h? AFAIK fd2sfd doesn't provide the complete set of LP() macros as my patch is proposing. Or am I wrong?
@jens-maus You're right! Could you send me your macros.h
by e-mail. I'll compare it against fd2sfd's version and make my mind about it.
@cahirwpz It is essentially exactly the macros.h file we are using with AmiSSL (see here: https://github.com/jens-maus/amissl/blob/master/include/inline/macros.h)
For the sake of simplicity I create a pull request for the bump of max_recog_operands, as @sezero suggested. Feel free to modify it before merging it.
Noticed the following with gcc2 (both 2.95.3 and 2.95.4) :
$ cat test.c
#include <intuition/intuition.h>
#include <proto/intuition.h>
#include <proto/graphics.h>
extern struct IntuitionBase *IntuitionBase;
extern struct GfxBase *GfxBase;
struct Window * w3dWindow;
struct BitMap * w3dBitMap;
unsigned vid_OpenWindow(void)
{
struct TagItem OpenWinTags[] = { {TAG_DONE,0} };
w3dWindow = OpenWindowTagList(NULL, OpenWinTags);
if (!w3dWindow) goto Duh;
w3dBitMap = AllocBitMap(0,0,8,0,w3dWindow->RPort->BitMap);
if (!w3dBitMap) goto Duh;
return 1;
Duh:
return 0;
}
m68k-amigaos-gcc -m68060 -O2 -S test.c
The asm output gives the following diff with old vs new macros.h:
--- test1.s
+++ test2.s
@@ -15,9 +15,8 @@
#APP
jsr a6@(-0x25e:W)
#NO_APP
- movel d0,_w3dWindow
movel d0,a0
- tstl a0
+ movel a0,_w3dWindow
jeq L4
movel a0@(50),a0
movel _GfxBase,a6
@@ -29,8 +28,8 @@
#APP
jsr a6@(-0x396:W)
#NO_APP
- movel d0,_w3dBitMap
movel d0,d1
+ movel d1,_w3dBitMap
moveq #1,d0
tstl d1
jne L6
Note that 'testl' is gone. My asm knowledge is less than little, but is this OK?
The "tst.l a0" plus the following "jeq L4" is just replaced by the equivalent "move.l a0, _w3dWindow" plus the same "jeq L4". The move instruction will set the zero bit if the moved value was zero and the jeq instruction checks this zero bit and branches accordingly. There is no need for an additional explicit zero check. So this is ok from my point of view.
@tboeckel: I see, thanks.
@jens-maus Please apply your changes to adtools/fd2sfd. Toolchain build script will take macros.h
file from there.
@jens-maus Oops... I meant adtools/fd2pragma. Ignore my previous comment. I've already done it.
Implemented by 51a0799cac96117b61a7d43cbe032063d6141bf1
This pull request bring in a larger bunch of LPXX() macros with a wider range of different possible applications for these kind of macros. This macros.h file is taken from the AmiSSL project (https://github.com/jens-maus/amissl/blob/master/include/inline/macros.h) and represents the latest state-of-the-art with LPXX() macro development.