- 追加された行はこの色です。
- 削除された行はこの色です。
- Armv8.2 へ行く。
[[Arm]]
-Armv8.2-A is supported from GCC 7 series
AArch64
GCC has been updated to the latest revision of the procedure call standard (AAPCS64) to provide support for parameter passing when data types have been over-aligned.
The ARMv8.3-A architecture is now supported. It can be used by specifying the -march=armv8.3-a option.
The option -msign-return-address= is supported to enable return address protection using ARMv8.3-A Pointer Authentication Extensions. For more information on the arguments accepted by this option, please refer to AArch64-Options.
The ARMv8.2-A architecture and the ARMv8.2-A 16-bit Floating-Point Extensions are now supported. They can be used by specifying the -march=armv8.2-a or -march=armv8.2-a+fp16 options. The 16-bit Floating-Point Extensions introduce new half-precision data processing floating-point instructions.
-FMA for HGEMM
vfmaq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
{
return __builtin_aarch64_fmav8hf (__b, __c, __a);
}
-https://github.com/gcc-mirror/gcc/blob/87fb575328cc5d954b91672681aacfc383134b12/gcc/config/aarch64/arm_neon.h#L31225-L31230
-Compiler Explorer
-https://godbolt.org/z/obJxS9
#include <arm_neon.h>
#include <iostream>
int main(int argc, char** argv)
{
float16_t value[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, 100.0f, };
float16_t resultWrite[8];
float16x8_t a = vld1q_f16(value + 0);
float16x8_t b = vld1q_f16(value + 8);
float16x8_t c = vld1q_f16(value + 16);
float16x8_t result = vfmaq_f16(a, b, c);
vst1q_f16(resultWrite, result);
std::cout << resultWrite << std::endl;
return 0;
}
-objdump
$ objdump -D main.cpp.o | grep -e fmla -e dump
58c: 4e450c2a fmla v10.8h, v1.8h, v5.8h
598: 4e440c2a fmla v10.8h, v1.8h, v4.8h
5a8: 4e4a0c30 fmla v16.8h, v1.8h, v10.8h
5b4: 4e490c2a fmla v10.8h, v1.8h, v9.8h
5c4: 4e480c2a fmla v10.8h, v1.8h, v8.8h
5d4: 4e470c2a fmla v10.8h, v1.8h, v7.8h
5e0: 4e420c20 fmla v0.8h, v1.8h, v2.8h
5e8: 4e430c2a fmla v10.8h, v1.8h, v3.8h
608: 4e450c05 fmla v5.8h, v0.8h, v5.8h
60c: 4e450c25 fmla v5.8h, v1.8h, v5.8h
610: 4e450c45 fmla v5.8h, v2.8h, v5.8h
614: 4e450c65 fmla v5.8h, v3.8h, v5.8h
618: 4e450c85 fmla v5.8h, v4.8h, v5.8h
61c: 4e450ca5 fmla v5.8h, v5.8h, v5.8h
620: 4e450cc5 fmla v5.8h, v6.8h, v5.8h
624: 4e450ce5 fmla v5.8h, v7.8h, v5.8h
628: 4e450d05 fmla v5.8h, v8.8h, v5.8h
62c: 4e450d25 fmla v5.8h, v9.8h, v5.8h
630: 4e450d45 fmla v5.8h, v10.8h, v5.8h
634: 4e450d65 fmla v5.8h, v11.8h, v5.8h
638: 4e450d85 fmla v5.8h, v12.8h, v5.8h
63c: 4e450da5 fmla v5.8h, v13.8h, v5.8h
640: 4e450dc5 fmla v5.8h, v14.8h, v5.8h
644: 4e450de5 fmla v5.8h, v15.8h, v5.8h
648: 4e450c0a fmla v10.8h, v0.8h, v5.8h
64c: 4e450c2a fmla v10.8h, v1.8h, v5.8h
650: 4e450c4a fmla v10.8h, v2.8h, v5.8h
654: 4e450c6a fmla v10.8h, v3.8h, v5.8h
658: 4e450c8a fmla v10.8h, v4.8h, v5.8h
65c: 4e450caa fmla v10.8h, v5.8h, v5.8h
660: 4e450cca fmla v10.8h, v6.8h, v5.8h
664: 4e450cea fmla v10.8h, v7.8h, v5.8h
668: 4e450d0a fmla v10.8h, v8.8h, v5.8h
66c: 4e450d2a fmla v10.8h, v9.8h, v5.8h
670: 4e450d4a fmla v10.8h, v10.8h, v5.8h
674: 4e450d6a fmla v10.8h, v11.8h, v5.8h
678: 4e450d8a fmla v10.8h, v12.8h, v5.8h
67c: 4e450daa fmla v10.8h, v13.8h, v5.8h
680: 4e450dca fmla v10.8h, v14.8h, v5.8h
684: 4e450dea fmla v10.8h, v15.8h, v5.8h
-analyzing
src2 src1 dst dst, src1, src2
4e460c2a 01001110010 00110 000011 00001 01010 fmla v10.8h, v1.8h, v6.8h
4e450c2a 01001110010 00101 000011 00001 01010 fmla v10.8h, v1.8h, v5.8h
4e440c2a 01001110010 00100 000011 00001 01010 fmla v10.8h, v1.8h, v4.8h
4e4a0c30 01001110010 01010 000011 00001 10000 fmla v16.8h, v1.8h, v10.8h
4e490c2a 01001110010 01001 000011 00001 01010 fmla v10.8h, v1.8h, v9.8h
4e480c2a 01001110010 01000 000011 00001 01010 fmla v10.8h, v1.8h, v8.8h
4e470c2a 01001110010 00111 000011 00001 01010 fmla v10.8h, v1.8h, v7.8h
4e420c20 01001110010 00010 000011 00001 00000 fmla v0.8h, v1.8h, v2.8h
4e430c2a 01001110010 00011 000011 00001 01010 fmla v10.8h, v1.8h, v3.8h
4e450c2a 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v1.8h, v5.8h
4e440c2a 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v1.8h, v4.8h
4e4a0c30 01001110010 xxxxx 000011 xxxxx xxxxx fmla v16.8h, v1.8h, v10.8h
4e490c2a 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v1.8h, v9.8h
4e480c2a 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v1.8h, v8.8h
4e470c2a 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v1.8h, v7.8h
4e420c20 01001110010 xxxxx 000011 xxxxx xxxxx fmla v0.8h, v1.8h, v2.8h
4e430c2a 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v1.8h, v3.8h
4e450c05 01001110010 xxxxx 000011 xxxxx xxxxx fmla v5.8h, v0.8h, v5.8h
4e450c25 01001110010 xxxxx 000011 xxxxx xxxxx fmla v5.8h, v1.8h, v5.8h
4e450c45 01001110010 xxxxx 000011 xxxxx xxxxx fmla v5.8h, v2.8h, v5.8h
4e450c65 01001110010 xxxxx 000011 xxxxx xxxxx fmla v5.8h, v3.8h, v5.8h
4e450c85 01001110010 xxxxx 000011 xxxxx xxxxx fmla v5.8h, v4.8h, v5.8h
4e450ca5 01001110010 xxxxx 000011 xxxxx xxxxx fmla v5.8h, v5.8h, v5.8h
4e450cc5 01001110010 xxxxx 000011 xxxxx xxxxx fmla v5.8h, v6.8h, v5.8h
4e450ce5 01001110010 xxxxx 000011 xxxxx xxxxx fmla v5.8h, v7.8h, v5.8h
4e450d05 01001110010 xxxxx 000011 xxxxx xxxxx fmla v5.8h, v8.8h, v5.8h
4e450d25 01001110010 xxxxx 000011 xxxxx xxxxx fmla v5.8h, v9.8h, v5.8h
4e450d45 01001110010 xxxxx 000011 xxxxx xxxxx fmla v5.8h, v10.8h, v5.8h
4e450d65 01001110010 xxxxx 000011 xxxxx xxxxx fmla v5.8h, v11.8h, v5.8h
4e450d85 01001110010 xxxxx 000011 xxxxx xxxxx fmla v5.8h, v12.8h, v5.8h
4e450da5 01001110010 xxxxx 000011 xxxxx xxxxx fmla v5.8h, v13.8h, v5.8h
4e450dc5 01001110010 xxxxx 000011 xxxxx xxxxx fmla v5.8h, v14.8h, v5.8h
4e450de5 01001110010 xxxxx 000011 xxxxx xxxxx fmla v5.8h, v15.8h, v5.8h
4e450c0a 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v0.8h, v5.8h
4e450c2a 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v1.8h, v5.8h
4e450c4a 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v2.8h, v5.8h
4e450c6a 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v3.8h, v5.8h
4e450c8a 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v4.8h, v5.8h
4e450caa 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v5.8h, v5.8h
4e450cca 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v6.8h, v5.8h
4e450cea 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v7.8h, v5.8h
4e450d0a 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v8.8h, v5.8h
4e450d2a 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v9.8h, v5.8h
4e450d4a 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v10.8h, v5.8h
4e450d6a 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v11.8h, v5.8h
4e450d8a 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v12.8h, v5.8h
4e450daa 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v13.8h, v5.8h
4e450dca 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v14.8h, v5.8h
4e450dea 01001110010 xxxxx 000011 xxxxx xxxxx fmla v10.8h, v15.8h, v5.8h
401908: 4e490d0a fmla v10.8h, v8.8h, v9.8h
40190c: 4e490d0a fmla v10.8h, v8.8h, v9.8h
401910: 4e490cea fmla v10.8h, v7.8h, v9.8h
401914: 4e490cca fmla v10.8h, v6.8h, v9.8h
401918: 4e490caa fmla v10.8h, v5.8h, v9.8h