Re: "Mini" tags to reduce the number of op codes

Liste des GroupesRevenir à c arch 
Sujet : Re: "Mini" tags to reduce the number of op codes
De : cr88192 (at) *nospam* gmail.com (BGB)
Groupes : comp.arch
Date : 13. Apr 2024, 07:12:53
Autres entêtes
Organisation : A noiseless patient Spider
Message-ID : <uvd7p8$2s5mf$1@dont-email.me>
References : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
User-Agent : Mozilla Thunderbird
On 4/12/2024 10:17 PM, MitchAlsup1 wrote:
BGB wrote:
 
On 4/11/2024 8:40 PM, MitchAlsup1 wrote:
BGB wrote:
>
On 4/11/2024 6:06 PM, MitchAlsup1 wrote:
>
>
While I admit that <basically> anything bigger than 50-bits will be fine
as displacements, they are not fine for constants and especially FP
constants and many bit twiddling constants.
>
>
The number of cases where this comes up is not statistically significant enough to have a meaningful impact on performance.
>
Fraction of a percent edge-cases are not deal-breakers, as I see it.
>
Idle speculation::
>
     .globl    r8_erf                          ; -- Begin function r8_erf
     .type    r8_erf,@function
r8_erf:                                 ; @r8_erf
; %bb.0:
     add    sp,sp,#-128
     std    #4614300636657501161,[sp,88]    // a[0]
     std    #4645348406721991307,[sp,104]    // a[2]
     std    #4659275911028085274,[sp,112]    // a[3]
     std    #4595861367557309218,[sp,120]    // a[4]
     std    #4599171895595656694,[sp,40]    // p[0]
     std    #4593699784569291823,[sp,56]    // p[2]
     std    #4580293056851789237,[sp,64]    // p[3]
     std    #4559215111867327292,[sp,72]    // p[4]
     std    #4580359811580069319,[sp,80]    // p[4]
     std    #4612966212090462427,[sp]    // q[0]
     std    #4602930165995154489,[sp,16]    // q[2]
     std    #4588882433176075751,[sp,24]    // q[3]
     std    #4567531038595922641,[sp,32]    // q[4]
     fabs    r2,r1
     fcmp    r3,r2,#0x3EF00000        // thresh
     bnlt    r3,.LBB141_6
; %bb.1:
     fcmp    r3,r2,#4            // xabs <= 4.0
     bnlt    r3,.LBB141_7
; %bb.2:
     fcmp    r3,r2,#0x403A8B020C49BA5E    // xbig
     bngt    r3,.LBB141_11
; %bb.3:
     fmul    r3,r1,r1
     fdiv    r3,#1,r3
     mov    r4,#0x3F90B4FB18B485C7        // p[5]
     fmac    r4,r3,r4,#0x3FD38A78B9F065F6    // p[0]
     fadd    r5,r3,#0x40048C54508800DB    // q[0]
     fmac    r6,r3,r4,#0x3FD70FE40E2425B8    // p[1]
     fmac    r4,r3,r5,#0x3FFDF79D6855F0AD    // q[1]
     fmul    r4,r3,r4
     fmul    r6,r3,r6
     mov    r5,#2
     add    r7,sp,#40            // p[*]
     add    r8,sp,#0            // q[*]
LBB141_4:                              ; %._crit_edge11
                                        ; =>This Inner Loop Header: Depth=1
     vec    r9,{r4,r6}
     ldd    r10,[r7,r5<<3,0]        // p[*]
     ldd    r11,[r8,r5<<3,0]        // q[*]
     fadd    r6,r6,r10
     fadd    r4,r4,r11
     fmul    r4,r3,r4
     fmul    r6,r3,r6
     loop    ne,r5,#4,#1
; %bb.5:
     fadd    r5,r6,#0x3F4595FD0D71E33C    // p[4]
     fmul    r3,r3,r5
     fadd    r4,r4,#0x3F632147A014BAD1    // q[4]
     fdiv    r3,r3,r4
     fadd    r3,#0x3FE20DD750429B6D,-r3    // c[0]
     fdiv    r3,r3,r2
     br    .LBB141_10            // common tail
LBB141_6:                              ; %._crit_edge
     fmul    r3,r1,r1
     fcmp    r2,r2,#0x3C9FFE5AB7E8AD5E    // xsmall
     sra    r2,r2,<1:13>
     cvtsd    r4,#0
     mux    r2,r2,r3,r4
     mov    r3,#0x3FC7C7905A31C322        // a[4]
     fmac    r3,r2,r3,#0x400949FB3ED443E9    // a[0]
     fmac    r3,r2,r3,#0x405C774E4D365DA3    // a[1]
     ldd    r4,[sp,104]            // a[2]
     fmac    r3,r2,r3,r4
     fadd    r4,r2,#0x403799EE342FB2DE    // b[0]
     fmac    r4,r2,r4,#0x406E80C9D57E55B8    // b[1]
     fmac    r4,r2,r4,#0x40940A77529CADC8    // b[2]
     fmac    r3,r2,r3,#0x40A912C1535D121A    // a[3]
     fmul    r1,r3,r1
     fmac    r2,r2,r4,#0x40A63879423B87AD    // b[3]
     fdiv    r2,r1,r2
     mov    r1,r2
     add    sp,sp,#128
     ret                // 68
LBB141_7:
     fmul    r3,r2,#0x3E571E703C5F5815    // c[8]
     mov    r5,#0
     mov    r4,r2
LBB141_8:                              ; =>This Inner Loop Header: Depth=1
     vec    r6,{r3,r4}
     ldd    r7,[ip,r5<<3,.L__const.r8_erf.c]// c[*]
     fadd    r3,r3,r7
     fmul    r3,r2,r3
     ldd    r7,[ip,r5<<3,.L__const.r8_erf.d]// d[*]
     fadd    r4,r4,r7
     fmul    r4,r2,r4
     loop    ne,r5,#7,#1
; %bb.9:
     fadd    r3,r3,#0x4093395B7FD2FC8E    // c[7]
     fadd    r4,r4,#0x4093395B7FD35F61    // d[7]
     fdiv    r3,r3,r4
LBB141_10:                // common tail
     fmul    r4,r2,#0x41800000        // 16.0
     fmul    r4,r4,#0x3D800000        // 1/16.0
     cvtds    r4,r4                // (signed)double
     cvtsd    r4,r4                // (double)signed
     fadd    r5,r2,-r4
     fadd    r2,r2,r4
     fmul    r4,r4,-r4
     fexp    r4,r4                // exp()
     fmul    r2,r2,-r5
     fexp    r2,r2                // exp()
     fmul    r2,r4,r2
     fadd    r2,#0,-r2
     fmac    r2,r2,r3,#0x3F000000        // 0.5
     fadd    r2,r2,#0x3F000000        // 0.5
     pflt    r1,0,T
     fadd    r2,#0,-r2
     mov    r1,r2
     add    sp,sp,#128
     ret
LBB141_11:
     fcmp    r1,r1,#0
     sra    r1,r1,<1:13>
     cvtsd    r2,#-1                // (double)-1
     cvtsd    r3,#1                // (double)+1
     mux    r2,r1,r3,r2
     mov    r1,r2
     add    sp,sp,#128
     ret
Lfunc_end141:
     .size    r8_erf, .Lfunc_end141-r8_erf
                                        ; -- End function
 
These patterns seem rather unusual...
Don't really know the ABI.
 
Patterns don't really fit observations for typical compiler output though (mostly in the FP constants, and particular ones that fall outside the scope of what can be exactly represented as Binary16 or similar, are rare).
 You are N E V E R going to find the coefficients of a Chebyshev
polynomial to fit in a small FP container; excepting the very
occasional C0 or C1 term {which are mostly 1.0 and 0.0}
Some stats I have (for GLQuake):
   14.9% of constants are floating-point.
     10.99% are FP and can be expressed exactly as Binary16.
       7.3% as Fp5 (E3.F2)
       9.5% as Fp10 (S.E5.F6)
     1.3% can be expressed in Binary32
     2.7% need Binary64.
If scaled so that this is only FP constants:
   73.0% are Binary16
    8.7% are Binary32
   18.1% are Binary64
Granted, this is inexact, as the stat is based on pattern recognition rather than type. However, given that for Doom the total percentage of constants flagged as FP drops to around 1%, probably not too far off.
So, here, it seems it is common enough to where ability to load it into a register in 1 cycle is worthwhile, but not so much that I am all that worried about needing to spend an instruction to do so.
More so, when the 1 cycle spent on the constant load, is overshadowed by the 6 cycles it takes to to a Binary64 FADD or FMUL (faster only exists for low-precision ops, or for Binary16/Binary32 SIMD).
Can also note, for integers immediate values:
   3RI Imm9un: 97% hit-rate
     2% turn into Jumbo Imm33s
     1% require a separate constant
   2RI Imm10un: 94% hit-rate
     4.4% turn into 2RI Imm16
     1.5% turn into Jumbo Imm33s
     0.1% require a separate constant
    Ld/St Disp9u: 96.4%
     0.18% are negative
     3.42%: Jumbo Disp33s
For RISC-V, the Imm12s case does result in a better hit rate for the basic instructions, albeit the fallback case is worse (LUI+ADD or a memory load).
Whereas, in my case, it is more a question of whether it ends up better to load the immediate into a register or to use a jumbo prefix (where the compile may look forward and make a guess).
In a world where I could have the ability to directly store constants to memory, or glue full 64 bit constants onto any instruction, etc, it doesn't seem likely that this would have all that large of an impact on either program size or performance.
Though, at the moment (in ongoing compiler fiddling), I am not seeing much more evidence of unrelated / unreachable code being included in the binary. Seems like this optimization case may be used up.
This leaves roughly another 4% remaining...
I guess, will see how much of a fight this last 4% puts up...
Though, looks like I could in theory shave several kB off mostly by disabling the memcpy slide and making the limit for inline memcpy smaller and similar, but this comes at a performance impact (needs to go the slower route of "actually calling memcpy()" in more cases...). Will continue to look for other options.

Date Sujet#  Auteur
3 Apr 24 * "Mini" tags to reduce the number of op codes81Stephen Fuld
3 Apr 24 +* Re: "Mini" tags to reduce the number of op codes8Anton Ertl
15 Apr 24 i+* Re: "Mini" tags to reduce the number of op codes6MitchAlsup1
15 Apr 24 ii`* Re: "Mini" tags to reduce the number of op codes5Terje Mathisen
15 Apr 24 ii +- Re: "Mini" tags to reduce the number of op codes1Terje Mathisen
15 Apr 24 ii `* Re: "Mini" tags to reduce the number of op codes3MitchAlsup1
16 Apr 24 ii  `* Re: "Mini" tags to reduce the number of op codes2Terje Mathisen
16 Apr 24 ii   `- Re: "Mini" tags to reduce the number of op codes1MitchAlsup1
17 Apr 24 i`- Re: "Mini" tags to reduce the number of op codes1Stephen Fuld
3 Apr 24 +* Re: "Mini" tags to reduce the number of op codes3Thomas Koenig
17 Apr 24 i`* Re: "Mini" tags to reduce the number of op codes2Stephen Fuld
17 Apr 24 i `- Re: "Mini" tags to reduce the number of op codes1BGB-Alt
3 Apr 24 +* Re: "Mini" tags to reduce the number of op codes12BGB-Alt
3 Apr 24 i+* Re: "Mini" tags to reduce the number of op codes9MitchAlsup1
4 Apr 24 ii+* Re: "Mini" tags to reduce the number of op codes7Terje Mathisen
4 Apr 24 iii+* Re: "Mini" tags to reduce the number of op codes3Michael S
4 Apr 24 iiii`* Re: "Mini" tags to reduce the number of op codes2Terje Mathisen
4 Apr 24 iiii `- Re: "Mini" tags to reduce the number of op codes1Michael S
5 Apr 24 iii`* Re: "Mini" tags to reduce the number of op codes3BGB-Alt
5 Apr 24 iii `* Re: "Mini" tags to reduce the number of op codes2MitchAlsup1
5 Apr 24 iii  `- Re: "Mini" tags to reduce the number of op codes1BGB
17 Apr 24 ii`- Re: "Mini" tags to reduce the number of op codes1Stephen Fuld
3 Apr 24 i`* Re: "Mini" tags to reduce the number of op codes2MitchAlsup1
4 Apr 24 i `- Re: "Mini" tags to reduce the number of op codes1BGB
5 Apr 24 +* Re: "Mini" tags to reduce the number of op codes54John Savard
5 Apr 24 i+- Re: "Mini" tags to reduce the number of op codes1BGB-Alt
5 Apr 24 i`* Re: "Mini" tags to reduce the number of op codes52MitchAlsup1
7 Apr 24 i `* Re: "Mini" tags to reduce the number of op codes51John Savard
7 Apr 24 i  +* Re: "Mini" tags to reduce the number of op codes6MitchAlsup1
8 Apr 24 i  i`* Re: "Mini" tags to reduce the number of op codes5John Savard
8 Apr 24 i  i +* Re: "Mini" tags to reduce the number of op codes2Thomas Koenig
17 Apr 24 i  i i`- Re: "Mini" tags to reduce the number of op codes1John Savard
8 Apr 24 i  i `* Re: "Mini" tags to reduce the number of op codes2MitchAlsup1
17 Apr 24 i  i  `- Re: "Mini" tags to reduce the number of op codes1John Savard
7 Apr 24 i  `* Re: "Mini" tags to reduce the number of op codes44Thomas Koenig
7 Apr 24 i   `* Re: "Mini" tags to reduce the number of op codes43MitchAlsup1
8 Apr 24 i    `* Re: "Mini" tags to reduce the number of op codes42Thomas Koenig
8 Apr 24 i     +- Re: "Mini" tags to reduce the number of op codes1Anton Ertl
9 Apr 24 i     `* Re: "Mini" tags to reduce the number of op codes40Thomas Koenig
9 Apr 24 i      +* Re: "Mini" tags to reduce the number of op codes38BGB
9 Apr 24 i      i`* Re: "Mini" tags to reduce the number of op codes37MitchAlsup1
10 Apr 24 i      i `* Re: "Mini" tags to reduce the number of op codes36BGB-Alt
10 Apr 24 i      i  +* Re: "Mini" tags to reduce the number of op codes31MitchAlsup1
10 Apr 24 i      i  i+* Re: "Mini" tags to reduce the number of op codes23BGB
10 Apr 24 i      i  ii`* Re: "Mini" tags to reduce the number of op codes22MitchAlsup1
10 Apr 24 i      i  ii +* Re: "Mini" tags to reduce the number of op codes3BGB-Alt
10 Apr 24 i      i  ii i`* Re: "Mini" tags to reduce the number of op codes2MitchAlsup1
11 Apr 24 i      i  ii i `- Re: "Mini" tags to reduce the number of op codes1BGB
10 Apr 24 i      i  ii +- Re: "Mini" tags to reduce the number of op codes1BGB-Alt
11 Apr 24 i      i  ii +* Re: "Mini" tags to reduce the number of op codes16MitchAlsup1
11 Apr 24 i      i  ii i`* Re: "Mini" tags to reduce the number of op codes15Michael S
11 Apr 24 i      i  ii i `* Re: "Mini" tags to reduce the number of op codes14BGB
11 Apr 24 i      i  ii i  `* Re: "Mini" tags to reduce the number of op codes13MitchAlsup1
11 Apr 24 i      i  ii i   +* Re: "Mini" tags to reduce the number of op codes9BGB-Alt
12 Apr 24 i      i  ii i   i`* Re: "Mini" tags to reduce the number of op codes8MitchAlsup1
12 Apr 24 i      i  ii i   i `* Re: "Mini" tags to reduce the number of op codes7BGB
12 Apr 24 i      i  ii i   i  `* Re: "Mini" tags to reduce the number of op codes6MitchAlsup1
12 Apr 24 i      i  ii i   i   `* Re: "Mini" tags to reduce the number of op codes5BGB
13 Apr 24 i      i  ii i   i    +- Re: "Mini" tags to reduce the number of op codes1MitchAlsup1
13 Apr 24 i      i  ii i   i    `* Re: "Mini" tags to reduce the number of op codes3MitchAlsup1
13 Apr 24 i      i  ii i   i     +- Re: "Mini" tags to reduce the number of op codes1BGB
15 Apr 24 i      i  ii i   i     `- Re: "Mini" tags to reduce the number of op codes1BGB-Alt
12 Apr 24 i      i  ii i   `* Re: "Mini" tags to reduce the number of op codes3Michael S
12 Apr 24 i      i  ii i    +- Re: "Mini" tags to reduce the number of op codes1Michael S
15 Apr 24 i      i  ii i    `- Re: "Mini" tags to reduce the number of op codes1MitchAlsup1
11 Apr 24 i      i  ii `- Re: "Mini" tags to reduce the number of op codes1Terje Mathisen
11 Apr 24 i      i  i`* Re: "Mini" tags to reduce the number of op codes7Paul A. Clayton
11 Apr 24 i      i  i +- Re: "Mini" tags to reduce the number of op codes1BGB
11 Apr 24 i      i  i +* Re: "Mini" tags to reduce the number of op codes2BGB-Alt
12 Apr 24 i      i  i i`- Re: "Mini" tags to reduce the number of op codes1MitchAlsup1
12 Apr 24 i      i  i +* Re: "Mini" tags to reduce the number of op codes2MitchAlsup1
21 Apr 24 i      i  i i`- Re: "Mini" tags to reduce the number of op codes1Paul A. Clayton
21 Apr 24 i      i  i `- Re: "Mini" tags to reduce the number of op codes1Paul A. Clayton
10 Apr 24 i      i  `* Re: "Mini" tags to reduce the number of op codes4Chris M. Thomasson
10 Apr 24 i      i   `* Re: "Mini" tags to reduce the number of op codes3BGB
10 Apr 24 i      i    `* Re: "Mini" tags to reduce the number of op codes2Chris M. Thomasson
10 Apr 24 i      i     `- Re: "Mini" tags to reduce the number of op codes1BGB-Alt
13 Apr 24 i      `- Re: "Mini" tags to reduce the number of op codes1Brian G. Lucas
15 Apr 24 +- Re: "Mini" tags to reduce the number of op codes1MitchAlsup1
17 Apr 24 `* Re: "Mini" tags to reduce the number of op codes2Stephen Fuld
17 Apr 24  `- Re: "Mini" tags to reduce the number of op codes1MitchAlsup1

Haut de la page

Les messages affichés proviennent d'usenet.

NewsPortal