[[Arm]]

-Armv8.2-A is supported from GCC 7 series
 AArch64
 GCC has been updated to the latest revision of the procedure call standard (AAPCS64) to provide support for parameter passing when data types have been over-aligned.
 The ARMv8.3-A architecture is now supported. It can be used by specifying the -march=armv8.3-a option.
 The option -msign-return-address= is supported to enable return address protection using ARMv8.3-A Pointer Authentication Extensions. For more information on the arguments accepted by this option, please refer to AArch64-Options.
 The ARMv8.2-A architecture and the ARMv8.2-A 16-bit Floating-Point Extensions are now supported. They can be used by specifying the -march=armv8.2-a or -march=armv8.2-a+fp16 options. The 16-bit Floating-Point Extensions introduce new half-precision data processing floating-point instructions.



-FMA for HGEMM
 vfmaq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
 {
   return __builtin_aarch64_fmav8hf (__b, __c, __a);
 }
-https://github.com/gcc-mirror/gcc/blob/87fb575328cc5d954b91672681aacfc383134b12/gcc/config/aarch64/arm_neon.h#L31225-L31230

-Compiler Explorer
-https://godbolt.org/z/obJxS9
 #include <arm_neon.h>
 #include <iostream>
 
 int main(int argc, char** argv)
 {
     float16_t value[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, };
     float16_t resultWrite[8];
     float16x8_t a = vld1q_f16(value + 0);
     float16x8_t b = vld1q_f16(value + 8);
     float16x8_t c = vld1q_f16(value + 16);
     float16x8_t result = vfmaq_f16(a, b, c);
     vst1q_f16(resultWrite, result);
     std::cout << resultWrite << std::endl;
     return 0;
 }
-objdump
 $ objdump -D main.cpp.o  | grep -e fmla -e dump
 58c:   4e450c2a        fmla    v10.8h, v1.8h, v5.8h
 598:   4e440c2a        fmla    v10.8h, v1.8h, v4.8h
 5a8:   4e4a0c30        fmla    v16.8h, v1.8h, v10.8h
 5b4:   4e490c2a        fmla    v10.8h, v1.8h, v9.8h
 5c4:   4e480c2a        fmla    v10.8h, v1.8h, v8.8h
 5d4:   4e470c2a        fmla    v10.8h, v1.8h, v7.8h
 5e0:   4e420c20        fmla    v0.8h, v1.8h, v2.8h
 5e8:   4e430c2a        fmla    v10.8h, v1.8h, v3.8h
 608:   4e450c05        fmla    v5.8h, v0.8h, v5.8h
 60c:   4e450c25        fmla    v5.8h, v1.8h, v5.8h
 610:   4e450c45        fmla    v5.8h, v2.8h, v5.8h
 614:   4e450c65        fmla    v5.8h, v3.8h, v5.8h
 618:   4e450c85        fmla    v5.8h, v4.8h, v5.8h
 61c:   4e450ca5        fmla    v5.8h, v5.8h, v5.8h
 620:   4e450cc5        fmla    v5.8h, v6.8h, v5.8h
 624:   4e450ce5        fmla    v5.8h, v7.8h, v5.8h
 628:   4e450d05        fmla    v5.8h, v8.8h, v5.8h
 62c:   4e450d25        fmla    v5.8h, v9.8h, v5.8h
 630:   4e450d45        fmla    v5.8h, v10.8h, v5.8h
 634:   4e450d65        fmla    v5.8h, v11.8h, v5.8h
 638:   4e450d85        fmla    v5.8h, v12.8h, v5.8h
 63c:   4e450da5        fmla    v5.8h, v13.8h, v5.8h
 640:   4e450dc5        fmla    v5.8h, v14.8h, v5.8h
 644:   4e450de5        fmla    v5.8h, v15.8h, v5.8h
 648:   4e450c0a        fmla    v10.8h, v0.8h, v5.8h
 64c:   4e450c2a        fmla    v10.8h, v1.8h, v5.8h
 650:   4e450c4a        fmla    v10.8h, v2.8h, v5.8h
 654:   4e450c6a        fmla    v10.8h, v3.8h, v5.8h
 658:   4e450c8a        fmla    v10.8h, v4.8h, v5.8h
 65c:   4e450caa        fmla    v10.8h, v5.8h, v5.8h
 660:   4e450cca        fmla    v10.8h, v6.8h, v5.8h
 664:   4e450cea        fmla    v10.8h, v7.8h, v5.8h
 668:   4e450d0a        fmla    v10.8h, v8.8h, v5.8h
 66c:   4e450d2a        fmla    v10.8h, v9.8h, v5.8h
 670:   4e450d4a        fmla    v10.8h, v10.8h, v5.8h
 674:   4e450d6a        fmla    v10.8h, v11.8h, v5.8h
 678:   4e450d8a        fmla    v10.8h, v12.8h, v5.8h
 67c:   4e450daa        fmla    v10.8h, v13.8h, v5.8h
 680:   4e450dca        fmla    v10.8h, v14.8h, v5.8h
 684:   4e450dea        fmla    v10.8h, v15.8h, v5.8h

-analyzing
                      src2         src1  dst           dst,    src1,  src2
 4e460c2a 01001110010 00110 000011 00001 01010 fmla    v10.8h, v1.8h, v6.8h
 4e450c2a 01001110010 00101 000011 00001 01010 fmla    v10.8h, v1.8h, v5.8h
 4e440c2a 01001110010 00100 000011 00001 01010 fmla    v10.8h, v1.8h, v4.8h
 4e4a0c30 01001110010 01010 000011 00001 10000 fmla    v16.8h, v1.8h, v10.8h
 4e490c2a 01001110010 01001 000011 00001 01010 fmla    v10.8h, v1.8h, v9.8h
 4e480c2a 01001110010 01000 000011 00001 01010 fmla    v10.8h, v1.8h, v8.8h
 4e470c2a 01001110010 00111 000011 00001 01010 fmla    v10.8h, v1.8h, v7.8h
 4e420c20 01001110010 00010 000011 00001 00000 fmla    v0.8h, v1.8h, v2.8h
 4e430c2a 01001110010 00011 000011 00001 01010 fmla    v10.8h, v1.8h, v3.8h
 
 4e450c2a 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v1.8h, v5.8h
 4e440c2a 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v1.8h, v4.8h
 4e4a0c30 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v16.8h, v1.8h, v10.8h
 4e490c2a 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v1.8h, v9.8h
 4e480c2a 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v1.8h, v8.8h
 4e470c2a 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v1.8h, v7.8h
 4e420c20 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v0.8h, v1.8h, v2.8h
 4e430c2a 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v1.8h, v3.8h
 4e450c05 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v5.8h, v0.8h, v5.8h
 4e450c25 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v5.8h, v1.8h, v5.8h
 4e450c45 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v5.8h, v2.8h, v5.8h
 4e450c65 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v5.8h, v3.8h, v5.8h
 4e450c85 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v5.8h, v4.8h, v5.8h
 4e450ca5 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v5.8h, v5.8h, v5.8h
 4e450cc5 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v5.8h, v6.8h, v5.8h
 4e450ce5 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v5.8h, v7.8h, v5.8h
 4e450d05 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v5.8h, v8.8h, v5.8h
 4e450d25 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v5.8h, v9.8h, v5.8h
 4e450d45 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v5.8h, v10.8h, v5.8h
 4e450d65 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v5.8h, v11.8h, v5.8h
 4e450d85 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v5.8h, v12.8h, v5.8h
 4e450da5 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v5.8h, v13.8h, v5.8h
 4e450dc5 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v5.8h, v14.8h, v5.8h
 4e450de5 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v5.8h, v15.8h, v5.8h
 4e450c0a 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v0.8h, v5.8h
 4e450c2a 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v1.8h, v5.8h
 4e450c4a 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v2.8h, v5.8h
 4e450c6a 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v3.8h, v5.8h
 4e450c8a 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v4.8h, v5.8h
 4e450caa 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v5.8h, v5.8h
 4e450cca 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v6.8h, v5.8h
 4e450cea 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v7.8h, v5.8h
 4e450d0a 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v8.8h, v5.8h
 4e450d2a 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v9.8h, v5.8h
 4e450d4a 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v10.8h, v5.8h
 4e450d6a 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v11.8h, v5.8h
 4e450d8a 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v12.8h, v5.8h
 4e450daa 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v13.8h, v5.8h
 4e450dca 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v14.8h, v5.8h
 4e450dea 01001110010 xxxxx 000011 xxxxx xxxxx fmla    v10.8h, v15.8h, v5.8h
 
   401908:   4e490d0a    fmla    v10.8h, v8.8h, v9.8h
   40190c:   4e490d0a    fmla    v10.8h, v8.8h, v9.8h
   401910:   4e490cea    fmla    v10.8h, v7.8h, v9.8h
   401914:   4e490cca    fmla    v10.8h, v6.8h, v9.8h
   401918:   4e490caa    fmla    v10.8h, v5.8h, v9.8h

トップ   編集 差分 履歴 添付 複製 名前変更 リロード   新規 一覧 検索 最終更新   ヘルプ   最終更新のRSS