Assembly Diffs
linux arm64
Diffs are based on 2,507,317 contexts (1,007,092 MinOpts, 1,500,225 FullOpts).
MISSED contexts: 1 (0.00%)
No diffs found.
Details
Context information
Collection |
Diffed contexts |
MinOpts |
FullOpts |
Missed, base |
Missed, diff |
benchmarks.run.linux.arm64.checked.mch |
33,710 |
2,779 |
30,931 |
0 (0.00%) |
0 (0.00%) |
benchmarks.run_pgo.linux.arm64.checked.mch |
152,904 |
61,290 |
91,614 |
0 (0.00%) |
0 (0.00%) |
benchmarks.run_tiered.linux.arm64.checked.mch |
63,814 |
48,019 |
15,795 |
0 (0.00%) |
0 (0.00%) |
coreclr_tests.run.linux.arm64.checked.mch |
625,275 |
382,318 |
242,957 |
1 (0.00%) |
1 (0.00%) |
libraries.crossgen2.linux.arm64.checked.mch |
234,496 |
15 |
234,481 |
0 (0.00%) |
0 (0.00%) |
libraries.pmi.linux.arm64.checked.mch |
295,745 |
6 |
295,739 |
0 (0.00%) |
0 (0.00%) |
libraries_tests.run.linux.arm64.Release.mch |
744,166 |
490,908 |
253,258 |
0 (0.00%) |
0 (0.00%) |
librariestestsnotieredcompilation.run.linux.arm64.Release.mch |
304,865 |
21,599 |
283,266 |
0 (0.00%) |
0 (0.00%) |
realworld.run.linux.arm64.checked.mch |
33,309 |
151 |
33,158 |
0 (0.00%) |
0 (0.00%) |
smoke_tests.nativeaot.linux.arm64.checked.mch |
19,033 |
7 |
19,026 |
0 (0.00%) |
0 (0.00%) |
|
2,507,317 |
1,007,092 |
1,500,225 |
1 (0.00%) |
1 (0.00%) |
linux x64
Diffs are based on 2,517,908 contexts (991,070 MinOpts, 1,526,838 FullOpts).
MISSED contexts: 1 (0.00%)
Overall (-468 bytes)
Collection |
Base size (bytes) |
Diff size (bytes) |
coreclr_tests.run.linux.x64.checked.mch |
403,725,618 |
-282 |
libraries.pmi.linux.x64.checked.mch |
60,420,245 |
-36 |
libraries_tests.run.linux.x64.Release.mch |
337,126,106 |
-88 |
librariestestsnotieredcompilation.run.linux.x64.Release.mch |
132,558,776 |
-62 |
MinOpts (-326 bytes)
Collection |
Base size (bytes) |
Diff size (bytes) |
coreclr_tests.run.linux.x64.checked.mch |
279,754,702 |
-242 |
libraries_tests.run.linux.x64.Release.mch |
183,759,693 |
-84 |
FullOpts (-142 bytes)
Collection |
Base size (bytes) |
Diff size (bytes) |
coreclr_tests.run.linux.x64.checked.mch |
123,970,916 |
-40 |
libraries.pmi.linux.x64.checked.mch |
60,307,388 |
-36 |
libraries_tests.run.linux.x64.Release.mch |
153,366,413 |
-4 |
librariestestsnotieredcompilation.run.linux.x64.Release.mch |
121,941,008 |
-62 |
Example diffs
coreclr_tests.run.linux.x64.checked.mch
-2 (-3.70%) : 291041.dasm - Runtime_90508:Test1(System.Runtime.Intrinsics.Vector1281[double],double):System.Runtime.Intrinsics.Vector128
1double
@@ -21,21 +21,20 @@ G_M47826_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mov bword ptr [rbp-0x08], rdi
;; size=22 bbWeight=1 PerfScore 4.75
G_M47826_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- vmovsd xmm0, qword ptr [rbp-0x10]
- vmovddup xmm0, xmm0
- vaddpd xmm0, xmm0, xmmword ptr [rbp+0x10]
+ vmovaps xmm0, xmmword ptr [rbp+0x10]
+ vaddpd xmm0, xmm0, qword ptr [rbp-0x10] {1to2}
mov rax, bword ptr [rbp-0x08]
; byrRegs +[rax]
vmovups xmmword ptr [rax], xmm0
mov rax, bword ptr [rbp-0x08]
- ;; size=26 bbWeight=1 PerfScore 13.00
+ ;; size=24 bbWeight=1 PerfScore 12.00
G_M47826_IG03: ; bbWeight=1, epilog, nogc, extend
add rsp, 16
pop rbp
ret
;; size=6 bbWeight=1 PerfScore 1.75
-; Total bytes of code 54, prolog size 13, PerfScore 19.50, instruction count 15, allocated bytes for code 54 (MethodHash=7638452d) for method Runtime_90508:Test1(System.Runtime.Intrinsics.Vector128`1[double],double):System.Runtime.Intrinsics.Vector128`1[double] (Tier0)
+; Total bytes of code 52, prolog size 13, PerfScore 18.50, instruction count 14, allocated bytes for code 55 (MethodHash=7638452d) for method Runtime_90508:Test1(System.Runtime.Intrinsics.Vector128`1[double],double):System.Runtime.Intrinsics.Vector128`1[double] (Tier0)
; ============================================================
Unwind Info:
-4 (-3.45%) : 82855.dasm - JIT.HardwareIntrinsics.General.Vector128.VectorCreate_CreateDouble:RunBasicScenario():this (Tier0)
@@ -30,8 +30,7 @@ G_M43778_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
; gcrRegs -[rdi]
call [<unknown method>]
vmovsd qword ptr [rbp-0x10], xmm0
- vmovsd xmm0, qword ptr [rbp-0x10]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x10]
vmovaps xmmword ptr [rbp-0x20], xmm0
vmovaps xmm0, xmmword ptr [rbp-0x20]
vmovups xmmword ptr [rsp], xmm0
@@ -43,14 +42,14 @@ G_M43778_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
call [<unknown method>]
; gcrRegs -[rsi rdi]
nop
- ;; size=77 bbWeight=1 PerfScore 23.75
+ ;; size=73 bbWeight=1 PerfScore 22.75
G_M43778_IG03: ; bbWeight=1, epilog, nogc, extend
add rsp, 48
pop rbp
ret
;; size=6 bbWeight=1 PerfScore 1.75
-; Total bytes of code 116, prolog size 29, PerfScore 32.83, instruction count 26, allocated bytes for code 116 (MethodHash=06dc54fd) for method JIT.HardwareIntrinsics.General._Vector128.VectorCreate__CreateDouble:RunBasicScenario():this (Tier0)
+; Total bytes of code 112, prolog size 29, PerfScore 31.83, instruction count 25, allocated bytes for code 112 (MethodHash=06dc54fd) for method JIT.HardwareIntrinsics.General._Vector128.VectorCreate__CreateDouble:RunBasicScenario():this (Tier0)
; ============================================================
Unwind Info:
-40 (-3.12%) : 32934.dasm - JIT.HardwareIntrinsics.General.Vector1281.VectorAs__AsDouble:RunBasicScenario():this (MinOpts)
@@ -86,8 +86,7 @@ G_M63628_IG04: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
call [<unknown method>]
; gcr arg pop 0
vmovsd qword ptr [rbp-0xC8], xmm0
- vmovsd xmm0, qword ptr [rbp-0xC8]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0xC8]
vmovaps xmmword ptr [rbp-0xE0], xmm0
vmovaps xmm0, xmmword ptr [rbp-0xE0]
vmovaps xmmword ptr [rbp-0x20], xmm0
@@ -110,8 +109,7 @@ G_M63628_IG04: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
call [<unknown method>]
; gcr arg pop 0
vmovsd qword ptr [rbp-0xF8], xmm0
- vmovsd xmm0, qword ptr [rbp-0xF8]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0xF8]
vmovaps xmmword ptr [rbp-0x110], xmm0
vmovaps xmm0, xmmword ptr [rbp-0x110]
vmovaps xmmword ptr [rbp-0x20], xmm0
@@ -134,8 +132,7 @@ G_M63628_IG04: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
call [<unknown method>]
; gcr arg pop 0
vmovsd qword ptr [rbp-0x128], xmm0
- vmovsd xmm0, qword ptr [rbp-0x128]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x128]
vmovaps xmmword ptr [rbp-0x140], xmm0
vmovaps xmm0, xmmword ptr [rbp-0x140]
vmovaps xmmword ptr [rbp-0x20], xmm0
@@ -149,19 +146,18 @@ G_M63628_IG04: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
vmovups xmmword ptr [rsp+0x10], xmm0
mov rdi, gword ptr [rbp-0x08]
; gcrRegs +[rdi]
- ;; size=346 bbWeight=1 PerfScore 100.75
-G_M63628_IG05: ; bbWeight=1, extend
mov rsi, 0xD1FFAB1E
; gcrRegs +[rsi]
call [<unknown method>]
; gcrRegs -[rsi rdi]
; gcr arg pop 0
+ ;; size=350 bbWeight=1 PerfScore 101.00
+G_M63628_IG05: ; bbWeight=1, extend
nop
call [<unknown method>]
; gcr arg pop 0
vmovsd qword ptr [rbp-0x158], xmm0
- vmovsd xmm0, qword ptr [rbp-0x158]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x158]
vmovaps xmmword ptr [rbp-0x170], xmm0
vmovaps xmm0, xmmword ptr [rbp-0x170]
vmovaps xmmword ptr [rbp-0x20], xmm0
@@ -184,8 +180,7 @@ G_M63628_IG05: ; bbWeight=1, extend
call [<unknown method>]
; gcr arg pop 0
vmovsd qword ptr [rbp-0x188], xmm0
- vmovsd xmm0, qword ptr [rbp-0x188]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x188]
vmovaps xmmword ptr [rbp-0x1A0], xmm0
vmovaps xmm0, xmmword ptr [rbp-0x1A0]
vmovaps xmmword ptr [rbp-0x20], xmm0
@@ -208,8 +203,7 @@ G_M63628_IG05: ; bbWeight=1, extend
call [<unknown method>]
; gcr arg pop 0
vmovsd qword ptr [rbp-0x1B8], xmm0
- vmovsd xmm0, qword ptr [rbp-0x1B8]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x1B8]
vmovaps xmmword ptr [rbp-0x1D0], xmm0
vmovaps xmm0, xmmword ptr [rbp-0x1D0]
vmovaps xmmword ptr [rbp-0x20], xmm0
@@ -225,8 +219,6 @@ G_M63628_IG05: ; bbWeight=1, extend
; gcrRegs +[rdi]
mov rsi, 0xD1FFAB1E
; gcrRegs +[rsi]
- ;; size=355 bbWeight=1 PerfScore 100.75
-G_M63628_IG06: ; bbWeight=1, extend
call [<unknown method>]
; gcrRegs -[rsi rdi]
; gcr arg pop 0
@@ -234,8 +226,9 @@ G_M63628_IG06: ; bbWeight=1, extend
call [<unknown method>]
; gcr arg pop 0
vmovsd qword ptr [rbp-0x1E8], xmm0
- vmovsd xmm0, qword ptr [rbp-0x1E8]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x1E8]
+ ;; size=356 bbWeight=1 PerfScore 104.75
+G_M63628_IG06: ; bbWeight=1, extend
vmovaps xmmword ptr [rbp-0x200], xmm0
vmovaps xmm0, xmmword ptr [rbp-0x200]
vmovaps xmmword ptr [rbp-0x20], xmm0
@@ -258,8 +251,7 @@ G_M63628_IG06: ; bbWeight=1, extend
call [<unknown method>]
; gcr arg pop 0
vmovsd qword ptr [rbp-0x218], xmm0
- vmovsd xmm0, qword ptr [rbp-0x218]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x218]
vmovaps xmmword ptr [rbp-0x230], xmm0
vmovaps xmm0, xmmword ptr [rbp-0x230]
vmovaps xmmword ptr [rbp-0x20], xmm0
@@ -282,8 +274,7 @@ G_M63628_IG06: ; bbWeight=1, extend
call [<unknown method>]
; gcr arg pop 0
vmovsd qword ptr [rbp-0x248], xmm0
- vmovsd xmm0, qword ptr [rbp-0x248]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x248]
vmovaps xmmword ptr [rbp-0x260], xmm0
vmovaps xmm0, xmmword ptr [rbp-0x260]
vmovaps xmmword ptr [rbp-0x20], xmm0
@@ -303,17 +294,16 @@ G_M63628_IG06: ; bbWeight=1, extend
; gcrRegs -[rsi rdi]
; gcr arg pop 0
nop
- ;; size=370 bbWeight=1 PerfScore 103.75
-G_M63628_IG07: ; bbWeight=1, extend
call [<unknown method>]
; gcr arg pop 0
vmovsd qword ptr [rbp-0x278], xmm0
- vmovsd xmm0, qword ptr [rbp-0x278]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x278]
vmovaps xmmword ptr [rbp-0x290], xmm0
vmovaps xmm0, xmmword ptr [rbp-0x290]
vmovaps xmmword ptr [rbp-0x20], xmm0
vmovaps xmm0, xmmword ptr [rbp-0x20]
+ ;; size=377 bbWeight=1 PerfScore 105.50
+G_M63628_IG07: ; bbWeight=1, extend
vmovaps xmmword ptr [rbp-0x2A0], xmm0
vmovaps xmm0, xmmword ptr [rbp-0x2A0]
vmovaps xmmword ptr [rbp-0xC0], xmm0
@@ -330,14 +320,14 @@ G_M63628_IG07: ; bbWeight=1, extend
; gcr arg pop 0
nop
nop
- ;; size=122 bbWeight=1 PerfScore 33.75
+ ;; size=70 bbWeight=1 PerfScore 17.75
G_M63628_IG08: ; bbWeight=1, epilog, nogc, extend
add rsp, 704
pop rbp
ret
;; size=9 bbWeight=1 PerfScore 1.75
-; Total bytes of code 1280, prolog size 64, PerfScore 361.33, instruction count 213, allocated bytes for code 1280 (MethodHash=3e480773) for method JIT.HardwareIntrinsics.General._Vector128_1.VectorAs__AsDouble:RunBasicScenario():this (MinOpts)
+; Total bytes of code 1240, prolog size 64, PerfScore 351.33, instruction count 203, allocated bytes for code 1240 (MethodHash=3e480773) for method JIT.HardwareIntrinsics.General._Vector128_1.VectorAs__AsDouble:RunBasicScenario():this (MinOpts)
; ============================================================
Unwind Info:
+4 (+2.25%) : 147578.dasm - JIT.HardwareIntrinsics.X86.Avx512FVLVector128.SimpleUnaryOpConvTestConvertToVector128DoubleVector128UInt32:RunLclVarScenarioUnsafeRead():this (Tier0)
@@ -38,7 +38,8 @@ G_M65260_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
; byrRegs -[rdi]
vmovups xmm0, xmmword ptr [rax]
vmovaps xmmword ptr [rbp-0x20], xmm0
- vcvtudq2pd xmm0, xmmword ptr [rbp-0x20]
+ vmovaps xmm0, xmmword ptr [rbp-0x20]
+ vcvtudq2pd xmm0, xmm0
vmovaps xmmword ptr [rbp-0x30], xmm0
mov rax, gword ptr [rbp-0x08]
; gcrRegs +[rax]
@@ -71,14 +72,14 @@ G_M65260_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
call [<unknown method>]
; gcrRegs -[rdx rdi]
nop
- ;; size=145 bbWeight=1 PerfScore 56.25
+ ;; size=149 bbWeight=1 PerfScore 57.25
G_M65260_IG03: ; bbWeight=1, epilog, nogc, extend
add rsp, 80
pop rbp
ret
;; size=6 bbWeight=1 PerfScore 1.75
-; Total bytes of code 178, prolog size 23, PerfScore 64.08, instruction count 41, allocated bytes for code 181 (MethodHash=da8f0113) for method JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunLclVarScenario_UnsafeRead():this (Tier0)
+; Total bytes of code 182, prolog size 23, PerfScore 65.08, instruction count 42, allocated bytes for code 182 (MethodHash=da8f0113) for method JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunLclVarScenario_UnsafeRead():this (Tier0)
; ============================================================
Unwind Info:
+4 (+9.52%) : 126538.dasm - System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector128Double(System.Runtime.Intrinsics.Vector1281[uint]):System.Runtime.Intrinsics.Vector128
1double
@@ -19,19 +19,20 @@ G_M19198_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mov bword ptr [rbp-0x08], rdi
;; size=17 bbWeight=1 PerfScore 3.75
G_M19198_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- vcvtudq2pd xmm0, xmmword ptr [rbp+0x10]
+ vmovaps xmm0, xmmword ptr [rbp+0x10]
+ vcvtudq2pd xmm0, xmm0
mov rax, bword ptr [rbp-0x08]
; byrRegs +[rax]
vmovups xmmword ptr [rax], xmm0
mov rax, bword ptr [rbp-0x08]
- ;; size=19 bbWeight=1 PerfScore 10.00
+ ;; size=23 bbWeight=1 PerfScore 11.00
G_M19198_IG03: ; bbWeight=1, epilog, nogc, extend
add rsp, 16
pop rbp
ret
;; size=6 bbWeight=1 PerfScore 1.75
-; Total bytes of code 42, prolog size 13, PerfScore 15.50, instruction count 12, allocated bytes for code 42 (MethodHash=6d20b501) for method System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector128Double(System.Runtime.Intrinsics.Vector128`1[uint]):System.Runtime.Intrinsics.Vector128`1[double] (Tier0)
+; Total bytes of code 46, prolog size 13, PerfScore 16.50, instruction count 13, allocated bytes for code 46 (MethodHash=6d20b501) for method System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector128Double(System.Runtime.Intrinsics.Vector128`1[uint]):System.Runtime.Intrinsics.Vector128`1[double] (Tier0)
; ============================================================
Unwind Info:
+4 (+21.05%) : 452733.dasm - System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector128Double(System.Runtime.Intrinsics.Vector1281[uint]):System.Runtime.Intrinsics.Vector128
1double
@@ -18,16 +18,17 @@ G_M19198_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
;; size=3 bbWeight=1 PerfScore 1.00
G_M19198_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0080 {rdi}, byref
; byrRegs +[rdi]
- vcvtudq2pd xmm0, xmmword ptr [rsp+0x08]
+ vmovaps xmm0, xmmword ptr [rsp+0x08]
+ vcvtudq2pd xmm0, xmm0
vmovups xmmword ptr [rdi], xmm0
mov rax, rdi
; byrRegs +[rax]
- ;; size=15 bbWeight=1 PerfScore 8.25
+ ;; size=19 bbWeight=1 PerfScore 9.25
G_M19198_IG03: ; bbWeight=1, epilog, nogc, extend
ret
;; size=1 bbWeight=1 PerfScore 1.00
-; Total bytes of code 19, prolog size 3, PerfScore 10.25, instruction count 5, allocated bytes for code 19 (MethodHash=6d20b501) for method System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector128Double(System.Runtime.Intrinsics.Vector128`1[uint]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
+; Total bytes of code 23, prolog size 3, PerfScore 11.25, instruction count 6, allocated bytes for code 23 (MethodHash=6d20b501) for method System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector128Double(System.Runtime.Intrinsics.Vector128`1[uint]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
; ============================================================
Unwind Info:
libraries.pmi.linux.x64.checked.mch
-4 (-19.05%) : 35972.dasm - System.Runtime.Intrinsics.X86.Sse3:MoveAndDuplicate(System.Runtime.Intrinsics.Vector1281[double]):System.Runtime.Intrinsics.Vector128
1double
@@ -18,17 +18,16 @@ G_M14019_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
;; size=3 bbWeight=1 PerfScore 1.00
G_M14019_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0080 {rdi}, byref
; byrRegs +[rdi]
- vmovaps xmm0, xmmword ptr [rsp+0x08]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rsp+0x08]
vmovups xmmword ptr [rdi], xmm0
mov rax, rdi
; byrRegs +[rax]
- ;; size=17 bbWeight=1 PerfScore 6.25
+ ;; size=13 bbWeight=1 PerfScore 5.25
G_M14019_IG03: ; bbWeight=1, epilog, nogc, extend
ret
;; size=1 bbWeight=1 PerfScore 1.00
-; Total bytes of code 21, prolog size 3, PerfScore 8.25, instruction count 6, allocated bytes for code 21 (MethodHash=666cc93c) for method System.Runtime.Intrinsics.X86.Sse3:MoveAndDuplicate(System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
+; Total bytes of code 17, prolog size 3, PerfScore 7.25, instruction count 5, allocated bytes for code 17 (MethodHash=666cc93c) for method System.Runtime.Intrinsics.X86.Sse3:MoveAndDuplicate(System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
; ============================================================
Unwind Info:
-6 (-6.82%) : 254886.dasm - System.Numerics.Tensors.TensorPrimitives+RadiansToDegreesOperator1[double]:Invoke(System.Runtime.Intrinsics.Vector128
1[double]):System.Runtime.Intrinsics.Vector128`1double
@@ -32,15 +32,13 @@ G_M62030_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0008 {rbx}, byr
call [<unknown method>]
test eax, eax
je SHORT G_M62030_IG04
- vmovsd xmm0, qword ptr [rbp-0x10]
- vmovddup xmm0, xmm0
- vmovaps xmm1, xmmword ptr [rbp+0x10]
- vmulpd xmm0, xmm1, xmm0
+ vmovaps xmm0, xmmword ptr [rbp+0x10]
+ vmulpd xmm0, xmm0, qword ptr [rbp-0x10] {1to2}
vdivpd xmm0, xmm0, qword ptr [reloc @RWD00] {1to2}
vmovups xmmword ptr [rbx], xmm0
mov rax, rbx
; byrRegs +[rax]
- ;; size=54 bbWeight=1 PerfScore 31.25
+ ;; size=48 bbWeight=1 PerfScore 29.25
G_M62030_IG03: ; bbWeight=1, epilog, nogc, extend
add rsp, 8
pop rbx
@@ -57,7 +55,7 @@ G_M62030_IG04: ; bbWeight=0, gcVars=0000000000000000 {}, gcrefRegs=0000 {
RWD00 dq 400921FB54442D18h ; 3.14159265
-; Total bytes of code 88, prolog size 11, PerfScore 38.25, instruction count 26, allocated bytes for code 88 (MethodHash=c70f0db1) for method System.Numerics.Tensors.TensorPrimitives+RadiansToDegreesOperator`1[double]:Invoke(System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
+; Total bytes of code 82, prolog size 11, PerfScore 36.25, instruction count 24, allocated bytes for code 85 (MethodHash=c70f0db1) for method System.Numerics.Tensors.TensorPrimitives+RadiansToDegreesOperator`1[double]:Invoke(System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
; ============================================================
Unwind Info:
-6 (-5.94%) : 254882.dasm - System.Numerics.Tensors.TensorPrimitives+DegreesToRadiansOperator1[double]:Invoke(System.Runtime.Intrinsics.Vector128
1[double]):System.Runtime.Intrinsics.Vector128`1double
@@ -35,14 +35,12 @@ G_M11950_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0008 {rbx}, byr
call [<unknown method>]
test eax, eax
je SHORT G_M11950_IG04
- vmovsd xmm0, qword ptr [rbp-0x28]
- vmovddup xmm0, xmm0
- vmovaps xmm1, xmmword ptr [rbp-0x20]
- vdivpd xmm0, xmm1, xmm0
+ vmovaps xmm0, xmmword ptr [rbp-0x20]
+ vdivpd xmm0, xmm0, qword ptr [rbp-0x28] {1to2}
vmovups xmmword ptr [rbx], xmm0
mov rax, rbx
; byrRegs +[rax]
- ;; size=64 bbWeight=1 PerfScore 35.25
+ ;; size=58 bbWeight=1 PerfScore 33.25
G_M11950_IG03: ; bbWeight=1, epilog, nogc, extend
add rsp, 40
pop rbx
@@ -59,7 +57,7 @@ G_M11950_IG04: ; bbWeight=0, gcVars=0000000000000000 {}, gcrefRegs=0000 {
RWD00 dq 400921FB54442D18h ; 3.14159265
-; Total bytes of code 101, prolog size 14, PerfScore 41.50, instruction count 28, allocated bytes for code 101 (MethodHash=39d7d151) for method System.Numerics.Tensors.TensorPrimitives+DegreesToRadiansOperator`1[double]:Invoke(System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
+; Total bytes of code 95, prolog size 14, PerfScore 39.50, instruction count 26, allocated bytes for code 98 (MethodHash=39d7d151) for method System.Numerics.Tensors.TensorPrimitives+DegreesToRadiansOperator`1[double]:Invoke(System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
; ============================================================
Unwind Info:
-8 (-2.37%) : 28999.dasm - System.SpanHelpers:ReplaceValueTypedouble (FullOpts)
@@ -135,11 +135,9 @@ G_M37632_IG14: ; bbWeight=0.50, gcVars=0000000000000000 {}, gcrefRegs=000
cmp r14, 4
jae SHORT G_M37632_IG18
add r14, -2
- vmovsd xmm0, qword ptr [rbp-0x20]
- vmovddup xmm0, xmm0
- vmovsd xmm1, qword ptr [rbp-0x28]
- vmovddup xmm1, xmm1
- ;; size=61 bbWeight=0.50 PerfScore 7.62
+ vmovddup xmm0, qword ptr [rbp-0x20]
+ vmovddup xmm1, qword ptr [rbp-0x28]
+ ;; size=53 bbWeight=0.50 PerfScore 6.62
G_M37632_IG15: ; bbWeight=4, gcrefRegs=0000 {}, byrefRegs=8008 {rbx r15}, byref, isz
vmovups xmm2, xmmword ptr [rbx+8*rax]
vcmppd xmm3, xmm0, xmm2, 0
@@ -194,7 +192,7 @@ G_M37632_IG21: ; bbWeight=0.50, epilog, nogc, extend
ret
;; size=14 bbWeight=0.50 PerfScore 2.12
-; Total bytes of code 338, prolog size 27, PerfScore 209.75, instruction count 97, allocated bytes for code 338 (MethodHash=f7696cff) for method System.SpanHelpers:ReplaceValueType[double](byref,byref,double,double,ulong) (FullOpts)
+; Total bytes of code 330, prolog size 27, PerfScore 208.75, instruction count 95, allocated bytes for code 330 (MethodHash=f7696cff) for method System.SpanHelpers:ReplaceValueType[double](byref,byref,double,double,ulong) (FullOpts)
; ============================================================
Unwind Info:
-4 (-1.63%) : 29050.dasm - System.SpanHelpers:CountValueTypedouble:int (FullOpts)
@@ -91,11 +91,10 @@ G_M58357_IG06: ; bbWeight=0.50, epilog, nogc, extend
;; size=5 bbWeight=0.50 PerfScore 1.25
G_M58357_IG07: ; bbWeight=0.50, gcVars=0000000000000000 {}, gcrefRegs=0000 {}, byrefRegs=0082 {rcx rdi}, gcvars, byref
; byrRegs -[rdx] +[rcx rdi]
- vmovaps xmm1, xmm0
- vmovddup xmm1, xmm1
+ vmovddup xmm1, xmm0
lea rdx, bword ptr [rcx-0x10]
; byrRegs +[rdx]
- ;; size=12 bbWeight=0.50 PerfScore 0.88
+ ;; size=8 bbWeight=0.50 PerfScore 0.75
G_M58357_IG08: ; bbWeight=4, gcrefRegs=0000 {}, byrefRegs=0086 {rcx rdx rdi}, byref, isz
vcmppd xmm2, xmm1, xmmword ptr [rdi], 0
vmovmskpd rsi, xmm2
@@ -166,7 +165,7 @@ G_M58357_IG17: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0000 {}, byr
ret
;; size=5 bbWeight=0.50 PerfScore 1.25
-; Total bytes of code 246, prolog size 7, PerfScore 140.75, instruction count 81, allocated bytes for code 246 (MethodHash=3f0f1c0a) for method System.SpanHelpers:CountValueType[double](byref,double,int):int (FullOpts)
+; Total bytes of code 242, prolog size 7, PerfScore 140.62, instruction count 80, allocated bytes for code 242 (MethodHash=3f0f1c0a) for method System.SpanHelpers:CountValueType[double](byref,double,int):int (FullOpts)
; ============================================================
Unwind Info:
+4 (+21.05%) : 35273.dasm - System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector128Double(System.Runtime.Intrinsics.Vector1281[uint]):System.Runtime.Intrinsics.Vector128
1double
@@ -18,16 +18,17 @@ G_M19198_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
;; size=3 bbWeight=1 PerfScore 1.00
G_M19198_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0080 {rdi}, byref
; byrRegs +[rdi]
- vcvtudq2pd xmm0, xmmword ptr [rsp+0x08]
+ vmovaps xmm0, xmmword ptr [rsp+0x08]
+ vcvtudq2pd xmm0, xmm0
vmovups xmmword ptr [rdi], xmm0
mov rax, rdi
; byrRegs +[rax]
- ;; size=15 bbWeight=1 PerfScore 8.25
+ ;; size=19 bbWeight=1 PerfScore 9.25
G_M19198_IG03: ; bbWeight=1, epilog, nogc, extend
ret
;; size=1 bbWeight=1 PerfScore 1.00
-; Total bytes of code 19, prolog size 3, PerfScore 10.25, instruction count 5, allocated bytes for code 19 (MethodHash=6d20b501) for method System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector128Double(System.Runtime.Intrinsics.Vector128`1[uint]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
+; Total bytes of code 23, prolog size 3, PerfScore 11.25, instruction count 6, allocated bytes for code 23 (MethodHash=6d20b501) for method System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector128Double(System.Runtime.Intrinsics.Vector128`1[uint]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
; ============================================================
Unwind Info:
libraries_tests.run.linux.x64.Release.mch
-12 (-10.08%) : 515847.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:ExpDoubleTest(double,double,double):this (Tier0)
@@ -28,29 +28,26 @@ G_M8981_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mov gword ptr [rbp-0x08], rdi
;; size=42 bbWeight=1 PerfScore 9.00
G_M8981_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- vmovsd xmm0, qword ptr [rbp-0x10]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x10]
vmovups xmmword ptr [rsp], xmm0
lea rdi, [rbp-0x30]
call [<unknown method>]
- vmovsd xmm0, qword ptr [rbp-0x18]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x18]
vmovups xmmword ptr [rsp], xmm0
vmovaps xmm0, xmmword ptr [rbp-0x30]
vmovups xmmword ptr [rsp+0x10], xmm0
- vmovsd xmm0, qword ptr [rbp-0x20]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x20]
vmovups xmmword ptr [rsp+0x20], xmm0
call [<unknown method>]
nop
- ;; size=71 bbWeight=1 PerfScore 25.75
+ ;; size=59 bbWeight=1 PerfScore 22.75
G_M8981_IG03: ; bbWeight=1, epilog, nogc, extend
add rsp, 96
pop rbp
ret
;; size=6 bbWeight=1 PerfScore 1.75
-; Total bytes of code 119, prolog size 23, PerfScore 36.50, instruction count 29, allocated bytes for code 119 (MethodHash=b03cdcea) for method System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:ExpDoubleTest(double,double,double):this (Tier0)
+; Total bytes of code 107, prolog size 23, PerfScore 33.50, instruction count 26, allocated bytes for code 107 (MethodHash=b03cdcea) for method System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:ExpDoubleTest(double,double,double):this (Tier0)
; ============================================================
Unwind Info:
-12 (-10.08%) : 516247.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:Log2DoubleTest(double,double,double):this (Tier0)
@@ -28,29 +28,26 @@ G_M32366_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mov gword ptr [rbp-0x08], rdi
;; size=42 bbWeight=1 PerfScore 9.00
G_M32366_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- vmovsd xmm0, qword ptr [rbp-0x10]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x10]
vmovups xmmword ptr [rsp], xmm0
lea rdi, [rbp-0x30]
call [<unknown method>]
- vmovsd xmm0, qword ptr [rbp-0x18]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x18]
vmovups xmmword ptr [rsp], xmm0
vmovaps xmm0, xmmword ptr [rbp-0x30]
vmovups xmmword ptr [rsp+0x10], xmm0
- vmovsd xmm0, qword ptr [rbp-0x20]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x20]
vmovups xmmword ptr [rsp+0x20], xmm0
call [<unknown method>]
nop
- ;; size=71 bbWeight=1 PerfScore 25.75
+ ;; size=59 bbWeight=1 PerfScore 22.75
G_M32366_IG03: ; bbWeight=1, epilog, nogc, extend
add rsp, 96
pop rbp
ret
;; size=6 bbWeight=1 PerfScore 1.75
-; Total bytes of code 119, prolog size 23, PerfScore 36.50, instruction count 29, allocated bytes for code 119 (MethodHash=d0478191) for method System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:Log2DoubleTest(double,double,double):this (Tier0)
+; Total bytes of code 107, prolog size 23, PerfScore 33.50, instruction count 26, allocated bytes for code 107 (MethodHash=d0478191) for method System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:Log2DoubleTest(double,double,double):this (Tier0)
; ============================================================
Unwind Info:
-12 (-10.08%) : 516260.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:LogDoubleTest(double,double,double):this (Tier0)
@@ -28,29 +28,26 @@ G_M28892_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mov gword ptr [rbp-0x08], rdi
;; size=42 bbWeight=1 PerfScore 9.00
G_M28892_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- vmovsd xmm0, qword ptr [rbp-0x10]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x10]
vmovups xmmword ptr [rsp], xmm0
lea rdi, [rbp-0x30]
call [<unknown method>]
- vmovsd xmm0, qword ptr [rbp-0x18]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x18]
vmovups xmmword ptr [rsp], xmm0
vmovaps xmm0, xmmword ptr [rbp-0x30]
vmovups xmmword ptr [rsp+0x10], xmm0
- vmovsd xmm0, qword ptr [rbp-0x20]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x20]
vmovups xmmword ptr [rsp+0x20], xmm0
call [<unknown method>]
nop
- ;; size=71 bbWeight=1 PerfScore 25.75
+ ;; size=59 bbWeight=1 PerfScore 22.75
G_M28892_IG03: ; bbWeight=1, epilog, nogc, extend
add rsp, 96
pop rbp
ret
;; size=6 bbWeight=1 PerfScore 1.75
-; Total bytes of code 119, prolog size 23, PerfScore 36.50, instruction count 29, allocated bytes for code 119 (MethodHash=05418f23) for method System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:LogDoubleTest(double,double,double):this (Tier0)
+; Total bytes of code 107, prolog size 23, PerfScore 33.50, instruction count 26, allocated bytes for code 107 (MethodHash=05418f23) for method System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:LogDoubleTest(double,double,double):this (Tier0)
; ============================================================
Unwind Info:
-8 (-1.14%) : 450788.dasm - System.Numerics.Tensors.TensorPrimitives:g_VectorizedSmall8|2336[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,byref,double,byref,ulong) (Tier0)
@@ -129,8 +129,7 @@ G_M63258_IG06: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
;; size=73 bbWeight=1 PerfScore 31.50
G_M63258_IG07: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, isz
; byrRegs -[rax]
- vmovsd xmm0, qword ptr [rbp-0x18]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x18]
vmovaps xmmword ptr [rbp-0xC0], xmm0
mov rax, bword ptr [rbp-0x08]
; byrRegs +[rax]
@@ -167,7 +166,7 @@ G_M63258_IG07: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
vmovaps xmm0, xmmword ptr [rbp-0xE0]
vmovups xmmword ptr [rax+8*rcx-0x10], xmm0
jmp SHORT G_M63258_IG10
- ;; size=177 bbWeight=1 PerfScore 61.00
+ ;; size=173 bbWeight=1 PerfScore 60.00
G_M63258_IG08: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, isz
; byrRegs -[rax]
mov rax, bword ptr [rbp-0x08]
@@ -177,8 +176,7 @@ G_M63258_IG08: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mov rax, bword ptr [rbp-0x10]
vmovups xmm0, xmmword ptr [rax]
vmovups xmmword ptr [rsp+0x10], xmm0
- vmovsd xmm0, qword ptr [rbp-0x18]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x18]
vmovups xmmword ptr [rsp+0x20], xmm0
lea rdi, [rbp-0xF0]
call [<unknown method>]
@@ -188,7 +186,7 @@ G_M63258_IG08: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
vmovaps xmm0, xmmword ptr [rbp-0xF0]
vmovups xmmword ptr [rax], xmm0
jmp SHORT G_M63258_IG10
- ;; size=73 bbWeight=1 PerfScore 28.50
+ ;; size=69 bbWeight=1 PerfScore 27.50
G_M63258_IG09: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
; byrRegs -[rax]
mov rax, bword ptr [rbp-0x08]
@@ -220,7 +218,7 @@ RWD00 dd G_M63258_IG10 - G_M63258_IG02
dd G_M63258_IG05 - G_M63258_IG02
-; Total bytes of code 699, prolog size 71, PerfScore 250.58, instruction count 142, allocated bytes for code 699 (MethodHash=938108e5) for method System.Numerics.Tensors.TensorPrimitives:<InvokeSpanSpanScalarIntoSpan>g__VectorizedSmall8|233_6[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,byref,double,byref,ulong) (Tier0)
+; Total bytes of code 691, prolog size 71, PerfScore 248.58, instruction count 140, allocated bytes for code 691 (MethodHash=938108e5) for method System.Numerics.Tensors.TensorPrimitives:<InvokeSpanSpanScalarIntoSpan>g__VectorizedSmall8|233_6[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,byref,double,byref,ulong) (Tier0)
; ============================================================
Unwind Info:
-8 (-1.03%) : 450781.dasm - System.Numerics.Tensors.TensorPrimitives:g_VectorizedSmall8|2316double,System.Numerics.Tensors.TensorPrimitives+IdentityOperator1[double],System.Numerics.Tensors.TensorPrimitives+MultiplyOperator
1[double] (Tier0)
@@ -136,8 +136,7 @@ G_M51506_IG06: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
;; size=85 bbWeight=1 PerfScore 33.00
G_M51506_IG07: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
; byrRegs -[rax]
- vmovsd xmm0, qword ptr [rbp-0x10]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x10]
vmovaps xmmword ptr [rbp-0xC0], xmm0
mov rax, bword ptr [rbp-0x08]
; byrRegs +[rax]
@@ -175,7 +174,7 @@ G_M51506_IG07: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
vmovaps xmm0, xmmword ptr [rbp-0xE0]
vmovups xmmword ptr [rax+8*rcx-0x10], xmm0
jmp G_M51506_IG10
- ;; size=198 bbWeight=1 PerfScore 63.00
+ ;; size=194 bbWeight=1 PerfScore 62.00
G_M51506_IG08: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, isz
; byrRegs -[rax]
mov rax, bword ptr [rbp-0x08]
@@ -187,8 +186,7 @@ G_M51506_IG08: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
; byrRegs -[rax]
vmovaps xmm0, xmmword ptr [rbp-0x1A0]
vmovups xmmword ptr [rsp], xmm0
- vmovsd xmm0, qword ptr [rbp-0x10]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x10]
vmovups xmmword ptr [rsp+0x10], xmm0
lea rdi, [rbp-0xF0]
call [<unknown method>]
@@ -197,7 +195,7 @@ G_M51506_IG08: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
vmovaps xmm0, xmmword ptr [rbp-0xF0]
vmovups xmmword ptr [rax], xmm0
jmp SHORT G_M51506_IG10
- ;; size=85 bbWeight=1 PerfScore 30.00
+ ;; size=81 bbWeight=1 PerfScore 29.00
G_M51506_IG09: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
; byrRegs -[rax]
mov rax, bword ptr [rbp-0x08]
@@ -230,7 +228,7 @@ RWD00 dd G_M51506_IG10 - G_M51506_IG02
dd G_M51506_IG05 - G_M51506_IG02
-; Total bytes of code 775, prolog size 74, PerfScore 259.58, instruction count 146, allocated bytes for code 775 (MethodHash=8b4136cd) for method System.Numerics.Tensors.TensorPrimitives:<InvokeSpanScalarIntoSpan>g__VectorizedSmall8|231_6[double,System.Numerics.Tensors.TensorPrimitives+IdentityOperator`1[double],System.Numerics.Tensors.TensorPrimitives+MultiplyOperator`1[double]](byref,double,byref,ulong) (Tier0)
+; Total bytes of code 767, prolog size 74, PerfScore 257.58, instruction count 144, allocated bytes for code 767 (MethodHash=8b4136cd) for method System.Numerics.Tensors.TensorPrimitives:<InvokeSpanScalarIntoSpan>g__VectorizedSmall8|231_6[double,System.Numerics.Tensors.TensorPrimitives+IdentityOperator`1[double],System.Numerics.Tensors.TensorPrimitives+MultiplyOperator`1[double]](byref,double,byref,ulong) (Tier0)
; ============================================================
Unwind Info:
-4 (-0.70%) : 448590.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator1[double]](System.ReadOnlySpan
1[double],System.ReadOnlySpan1[double],double,System.Span
1[double]) (Tier1)
@@ -61,9 +61,9 @@
; V49 tmp41 [V49,T14] ( 2, 0 ) simd32 -> mm0 single-def "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[double]>
; V50 tmp42 [V50,T15] ( 2, 0 ) simd32 -> mm1 single-def "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[double]>
;* V51 tmp43 [V51 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[double]>
-; V52 tmp44 [V52,T13] ( 3, 0 ) simd16 -> mm0 single-def "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[double]>
-; V53 tmp45 [V53,T16] ( 2, 0 ) simd16 -> mm1 single-def "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[double]>
-; V54 tmp46 [V54,T17] ( 2, 0 ) simd16 -> mm0 single-def "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[double]>
+; V52 tmp44 [V52,T13] ( 3, 0 ) simd16 -> mm2 single-def "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[double]>
+; V53 tmp45 [V53,T16] ( 2, 0 ) simd16 -> mm0 single-def "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[double]>
+; V54 tmp46 [V54,T17] ( 2, 0 ) simd16 -> mm1 single-def "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[double]>
;* V55 tmp47 [V55 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[double]>
;* V56 tmp48 [V56 ] ( 0, 0 ) simd32 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[double]>
;* V57 tmp49 [V57 ] ( 0, 0 ) simd32 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[double]>
@@ -285,26 +285,25 @@ G_M2600_IG12: ; bbWeight=0, gcrefRegs=0000 {}, byrefRegs=B000 {r12 r13 r1
jmp SHORT G_M2600_IG16
;; size=28 bbWeight=0 PerfScore 0.00
G_M2600_IG13: ; bbWeight=0, gcrefRegs=0000 {}, byrefRegs=B000 {r12 r13 r15}, byref, isz
- vmovddup xmm0, xmm2
- vmovups xmm1, xmmword ptr [r13]
- vaddpd xmm1, xmm1, xmmword ptr [r12]
- vmulpd xmm1, xmm1, xmm0
- vmovups xmm2, xmmword ptr [r13+8*rcx-0x10]
- vaddpd xmm2, xmm2, xmmword ptr [r12+8*rcx-0x10]
- vmulpd xmm0, xmm2, xmm0
- vmovups xmmword ptr [r15], xmm1
- vmovups xmmword ptr [r15+8*rcx-0x10], xmm0
+ vmovddup xmm2, xmm2
+ vmovups xmm0, xmmword ptr [r13]
+ vaddpd xmm0, xmm0, xmmword ptr [r12]
+ vmulpd xmm0, xmm0, xmm2
+ vmovups xmm1, xmmword ptr [r13+8*rcx-0x10]
+ vaddpd xmm1, xmm1, xmmword ptr [r12+8*rcx-0x10]
+ vmulpd xmm1, xmm1, xmm2
+ vmovups xmmword ptr [r15], xmm0
+ vmovups xmmword ptr [r15+8*rcx-0x10], xmm1
jmp SHORT G_M2600_IG16
;; size=52 bbWeight=0 PerfScore 0.00
G_M2600_IG14: ; bbWeight=0, gcrefRegs=0000 {}, byrefRegs=B000 {r12 r13 r15}, byref, isz
vmovups xmm0, xmmword ptr [r13]
vaddpd xmm0, xmm0, xmmword ptr [r12]
- vmovaps xmm1, xmm2
- vmovddup xmm1, xmm1
+ vmovddup xmm1, xmm2
vmulpd xmm0, xmm0, xmm1
vmovups xmmword ptr [r15], xmm0
jmp SHORT G_M2600_IG16
- ;; size=31 bbWeight=0 PerfScore 0.00
+ ;; size=27 bbWeight=0 PerfScore 0.00
G_M2600_IG15: ; bbWeight=0, gcrefRegs=0000 {}, byrefRegs=B000 {r12 r13 r15}, byref
vmovsd xmm0, qword ptr [r13]
vmovsd xmm1, qword ptr [r12]
@@ -353,7 +352,7 @@ RWD00 dd G_M2600_IG16 - G_M2600_IG02
dd G_M2600_IG11 - G_M2600_IG02
-; Total bytes of code 572, prolog size 19, PerfScore 63.68, instruction count 150, allocated bytes for code 572 (MethodHash=0ed2f5d7) for method System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](System.ReadOnlySpan`1[double],System.ReadOnlySpan`1[double],double,System.Span`1[double]) (Tier1)
+; Total bytes of code 568, prolog size 19, PerfScore 63.68, instruction count 149, allocated bytes for code 568 (MethodHash=0ed2f5d7) for method System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](System.ReadOnlySpan`1[double],System.ReadOnlySpan`1[double],double,System.Span`1[double]) (Tier1)
; ============================================================
Unwind Info:
librariestestsnotieredcompilation.run.linux.x64.Release.mch
-8 (-4.08%) : 191109.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:LogDoubleTest(double,double,double):this (FullOpts)
@@ -99,11 +99,9 @@ G_M28892_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
call [<unknown method>]
vmovaps xmm2, xmmword ptr [rbp-0x30]
vmovaps xmmword ptr [rbp-0x20], xmm2
- vmovsd xmm2, qword ptr [rbp-0x08]
- vmovddup xmm2, xmm2
+ vmovddup xmm2, qword ptr [rbp-0x08]
vmovaps xmmword ptr [rbp-0x40], xmm2
- vmovsd xmm2, qword ptr [rbp-0x10]
- vmovddup xmm2, xmm2
+ vmovddup xmm2, qword ptr [rbp-0x10]
vmovaps xmmword ptr [rbp-0x50], xmm2
mov rax, qword ptr [rbp-0x40]
mov rcx, qword ptr [rbp-0x20]
@@ -126,14 +124,14 @@ G_M28892_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
vmovsd xmm2, qword ptr [rbp-0x80]
call [<unknown method>]
nop
- ;; size=148 bbWeight=1 PerfScore 55.75
+ ;; size=140 bbWeight=1 PerfScore 53.75
G_M28892_IG03: ; bbWeight=1, epilog, nogc, extend
add rsp, 144
pop rbp
ret
;; size=9 bbWeight=1 PerfScore 1.75
-; Total bytes of code 196, prolog size 29, PerfScore 64.50, instruction count 45, allocated bytes for code 196 (MethodHash=05418f23) for method System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:LogDoubleTest(double,double,double):this (FullOpts)
+; Total bytes of code 188, prolog size 29, PerfScore 62.50, instruction count 43, allocated bytes for code 188 (MethodHash=05418f23) for method System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:LogDoubleTest(double,double,double):this (FullOpts)
; ============================================================
Unwind Info:
-8 (-4.08%) : 190732.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:ExpDoubleTest(double,double,double):this (FullOpts)
@@ -99,11 +99,9 @@ G_M8981_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
call [<unknown method>]
vmovaps xmm2, xmmword ptr [rbp-0x30]
vmovaps xmmword ptr [rbp-0x20], xmm2
- vmovsd xmm2, qword ptr [rbp-0x08]
- vmovddup xmm2, xmm2
+ vmovddup xmm2, qword ptr [rbp-0x08]
vmovaps xmmword ptr [rbp-0x40], xmm2
- vmovsd xmm2, qword ptr [rbp-0x10]
- vmovddup xmm2, xmm2
+ vmovddup xmm2, qword ptr [rbp-0x10]
vmovaps xmmword ptr [rbp-0x50], xmm2
mov rax, qword ptr [rbp-0x40]
mov rcx, qword ptr [rbp-0x20]
@@ -126,14 +124,14 @@ G_M8981_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
vmovsd xmm2, qword ptr [rbp-0x80]
call [<unknown method>]
nop
- ;; size=148 bbWeight=1 PerfScore 55.75
+ ;; size=140 bbWeight=1 PerfScore 53.75
G_M8981_IG03: ; bbWeight=1, epilog, nogc, extend
add rsp, 144
pop rbp
ret
;; size=9 bbWeight=1 PerfScore 1.75
-; Total bytes of code 196, prolog size 29, PerfScore 64.50, instruction count 45, allocated bytes for code 196 (MethodHash=b03cdcea) for method System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:ExpDoubleTest(double,double,double):this (FullOpts)
+; Total bytes of code 188, prolog size 29, PerfScore 62.50, instruction count 43, allocated bytes for code 188 (MethodHash=b03cdcea) for method System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:ExpDoubleTest(double,double,double):this (FullOpts)
; ============================================================
Unwind Info:
-8 (-4.08%) : 191104.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:Log2DoubleTest(double,double,double):this (FullOpts)
@@ -99,11 +99,9 @@ G_M32366_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
call [<unknown method>]
vmovaps xmm2, xmmword ptr [rbp-0x30]
vmovaps xmmword ptr [rbp-0x20], xmm2
- vmovsd xmm2, qword ptr [rbp-0x08]
- vmovddup xmm2, xmm2
+ vmovddup xmm2, qword ptr [rbp-0x08]
vmovaps xmmword ptr [rbp-0x40], xmm2
- vmovsd xmm2, qword ptr [rbp-0x10]
- vmovddup xmm2, xmm2
+ vmovddup xmm2, qword ptr [rbp-0x10]
vmovaps xmmword ptr [rbp-0x50], xmm2
mov rax, qword ptr [rbp-0x40]
mov rcx, qword ptr [rbp-0x20]
@@ -126,14 +124,14 @@ G_M32366_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
vmovsd xmm2, qword ptr [rbp-0x80]
call [<unknown method>]
nop
- ;; size=148 bbWeight=1 PerfScore 55.75
+ ;; size=140 bbWeight=1 PerfScore 53.75
G_M32366_IG03: ; bbWeight=1, epilog, nogc, extend
add rsp, 144
pop rbp
ret
;; size=9 bbWeight=1 PerfScore 1.75
-; Total bytes of code 196, prolog size 29, PerfScore 64.50, instruction count 45, allocated bytes for code 196 (MethodHash=d0478191) for method System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:Log2DoubleTest(double,double,double):this (FullOpts)
+; Total bytes of code 188, prolog size 29, PerfScore 62.50, instruction count 43, allocated bytes for code 188 (MethodHash=d0478191) for method System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:Log2DoubleTest(double,double,double):this (FullOpts)
; ============================================================
Unwind Info:
-11 (-2.36%) : 160695.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator1[double]](System.ReadOnlySpan
1[double],System.ReadOnlySpan1[double],double,System.Span
1[double]) (FullOpts)
@@ -219,7 +219,7 @@ G_M2600_IG09: ; bbWeight=0.50, gcVars=0000000000000000 {}, gcrefRegs=0000
add rdx, rdi
jmp rdx
;; size=43 bbWeight=0.50 PerfScore 4.25
-G_M2600_IG10: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C008 {rbx r14 r15}, byref
+G_M2600_IG10: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C008 {rbx r14 r15}, byref, isz
vbroadcastsd ymm0, qword ptr [rbp-0x30]
vmovups ymm1, ymmword ptr [r15]
vaddpd ymm1, ymm1, ymmword ptr [r14]
@@ -229,8 +229,8 @@ G_M2600_IG10: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C008 {rbx r14
vmulpd ymm0, ymm2, ymm0
vmovups ymmword ptr [rbx], ymm1
vmovups ymmword ptr [rbx+8*rcx-0x20], ymm0
- jmp G_M2600_IG15
- ;; size=53 bbWeight=0.50 PerfScore 19.00
+ jmp SHORT G_M2600_IG15
+ ;; size=50 bbWeight=0.50 PerfScore 19.00
G_M2600_IG11: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C008 {rbx r14 r15}, byref, isz
vmovups ymm1, ymmword ptr [r15]
vaddpd ymm1, ymm1, ymmword ptr [r14]
@@ -240,8 +240,7 @@ G_M2600_IG11: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C008 {rbx r14
jmp SHORT G_M2600_IG15
;; size=26 bbWeight=0.50 PerfScore 11.00
G_M2600_IG12: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C008 {rbx r14 r15}, byref, isz
- vmovsd xmm0, qword ptr [rbp-0x30]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x30]
vmovups xmm1, xmmword ptr [r15]
vaddpd xmm1, xmm1, xmmword ptr [r14]
vmulpd xmm1, xmm1, xmm0
@@ -251,16 +250,15 @@ G_M2600_IG12: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C008 {rbx r14
vmovups xmmword ptr [rbx], xmm1
vmovups xmmword ptr [rbx+8*rcx-0x10], xmm0
jmp SHORT G_M2600_IG15
- ;; size=53 bbWeight=0.50 PerfScore 18.00
+ ;; size=49 bbWeight=0.50 PerfScore 17.50
G_M2600_IG13: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C008 {rbx r14 r15}, byref, isz
vmovups xmm1, xmmword ptr [r15]
vaddpd xmm1, xmm1, xmmword ptr [r14]
- vmovsd xmm0, qword ptr [rbp-0x30]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x30]
vmulpd xmm0, xmm1, xmm0
vmovups xmmword ptr [rbx], xmm0
jmp SHORT G_M2600_IG15
- ;; size=29 bbWeight=0.50 PerfScore 10.50
+ ;; size=25 bbWeight=0.50 PerfScore 10.00
G_M2600_IG14: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C008 {rbx r14 r15}, byref
vmovsd xmm1, qword ptr [r15]
vaddsd xmm1, xmm1, qword ptr [r14]
@@ -308,7 +306,7 @@ RWD00 dd G_M2600_IG15 - G_M2600_IG02
dd G_M2600_IG10 - G_M2600_IG02
-; Total bytes of code 466, prolog size 22, PerfScore 104.38, instruction count 126, allocated bytes for code 466 (MethodHash=0ed2f5d7) for method System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](System.ReadOnlySpan`1[double],System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
+; Total bytes of code 455, prolog size 22, PerfScore 103.38, instruction count 124, allocated bytes for code 455 (MethodHash=0ed2f5d7) for method System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](System.ReadOnlySpan`1[double],System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
; ============================================================
Unwind Info:
-8 (-1.82%) : 160690.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarSpanIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+MultiplyAddOperator1[double]](System.ReadOnlySpan
1[double],double,System.ReadOnlySpan1[double],System.Span
1[double]) (FullOpts)
@@ -237,8 +237,7 @@ G_M64136_IG11: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C008 {rbx r1
jmp SHORT G_M64136_IG15
;; size=22 bbWeight=0.50 PerfScore 10.00
G_M64136_IG12: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C008 {rbx r14 r15}, byref, isz
- vmovsd xmm0, qword ptr [rbp-0x30]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x30]
vmulpd xmm1, xmm0, xmmword ptr [r15]
vaddpd xmm1, xmm1, xmmword ptr [r14]
vmulpd xmm0, xmm0, xmmword ptr [r15+8*rcx-0x10]
@@ -246,15 +245,14 @@ G_M64136_IG12: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C008 {rbx r1
vmovups xmmword ptr [rbx], xmm1
vmovups xmmword ptr [rbx+8*rcx-0x10], xmm0
jmp SHORT G_M64136_IG15
- ;; size=45 bbWeight=0.50 PerfScore 17.00
+ ;; size=41 bbWeight=0.50 PerfScore 16.50
G_M64136_IG13: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C008 {rbx r14 r15}, byref, isz
- vmovsd xmm0, qword ptr [rbp-0x30]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x30]
vmulpd xmm0, xmm0, xmmword ptr [r15]
vaddpd xmm0, xmm0, xmmword ptr [r14]
vmovups xmmword ptr [rbx], xmm0
jmp SHORT G_M64136_IG15
- ;; size=25 bbWeight=0.50 PerfScore 10.00
+ ;; size=21 bbWeight=0.50 PerfScore 9.50
G_M64136_IG14: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C008 {rbx r14 r15}, byref
vmovsd xmm0, qword ptr [rbp-0x30]
vmulsd xmm0, xmm0, qword ptr [r15]
@@ -302,7 +300,7 @@ RWD00 dd G_M64136_IG15 - G_M64136_IG02
dd G_M64136_IG10 - G_M64136_IG02
-; Total bytes of code 439, prolog size 22, PerfScore 99.88, instruction count 120, allocated bytes for code 439 (MethodHash=9daf0577) for method System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarSpanIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+MultiplyAddOperator`1[double]](System.ReadOnlySpan`1[double],double,System.ReadOnlySpan`1[double],System.Span`1[double]) (FullOpts)
+; Total bytes of code 431, prolog size 22, PerfScore 98.88, instruction count 118, allocated bytes for code 431 (MethodHash=9daf0577) for method System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarSpanIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+MultiplyAddOperator`1[double]](System.ReadOnlySpan`1[double],double,System.ReadOnlySpan`1[double],System.Span`1[double]) (FullOpts)
; ============================================================
Unwind Info:
-8 (-1.82%) : 160236.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarSpanIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+MultiplyAddOperator1[double]](System.ReadOnlySpan
1[double],double,System.ReadOnlySpan1[double],System.Span
1[double]) (FullOpts)
@@ -237,8 +237,7 @@ G_M64136_IG11: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C008 {rbx r1
jmp SHORT G_M64136_IG15
;; size=22 bbWeight=0.50 PerfScore 10.00
G_M64136_IG12: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C008 {rbx r14 r15}, byref, isz
- vmovsd xmm0, qword ptr [rbp-0x30]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x30]
vmulpd xmm1, xmm0, xmmword ptr [r15]
vaddpd xmm1, xmm1, xmmword ptr [r14]
vmulpd xmm0, xmm0, xmmword ptr [r15+8*rcx-0x10]
@@ -246,15 +245,14 @@ G_M64136_IG12: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C008 {rbx r1
vmovups xmmword ptr [rbx], xmm1
vmovups xmmword ptr [rbx+8*rcx-0x10], xmm0
jmp SHORT G_M64136_IG15
- ;; size=45 bbWeight=0.50 PerfScore 17.00
+ ;; size=41 bbWeight=0.50 PerfScore 16.50
G_M64136_IG13: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C008 {rbx r14 r15}, byref, isz
- vmovsd xmm0, qword ptr [rbp-0x30]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp-0x30]
vmulpd xmm0, xmm0, xmmword ptr [r15]
vaddpd xmm0, xmm0, xmmword ptr [r14]
vmovups xmmword ptr [rbx], xmm0
jmp SHORT G_M64136_IG15
- ;; size=25 bbWeight=0.50 PerfScore 10.00
+ ;; size=21 bbWeight=0.50 PerfScore 9.50
G_M64136_IG14: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C008 {rbx r14 r15}, byref
vmovsd xmm0, qword ptr [rbp-0x30]
vmulsd xmm0, xmm0, qword ptr [r15]
@@ -302,7 +300,7 @@ RWD00 dd G_M64136_IG15 - G_M64136_IG02
dd G_M64136_IG10 - G_M64136_IG02
-; Total bytes of code 439, prolog size 22, PerfScore 99.88, instruction count 120, allocated bytes for code 439 (MethodHash=9daf0577) for method System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarSpanIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+MultiplyAddOperator`1[double]](System.ReadOnlySpan`1[double],double,System.ReadOnlySpan`1[double],System.Span`1[double]) (FullOpts)
+; Total bytes of code 431, prolog size 22, PerfScore 98.88, instruction count 118, allocated bytes for code 431 (MethodHash=9daf0577) for method System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarSpanIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+MultiplyAddOperator`1[double]](System.ReadOnlySpan`1[double],double,System.ReadOnlySpan`1[double],System.Span`1[double]) (FullOpts)
; ============================================================
Unwind Info:
Details
Improvements/regressions per collection
Collection |
Contexts with diffs |
Improvements |
Regressions |
Same size |
Improvements (bytes) |
Regressions (bytes) |
benchmarks.run.linux.x64.checked.mch |
0 |
0 |
0 |
0 |
-0 |
+0 |
benchmarks.run_pgo.linux.x64.checked.mch |
0 |
0 |
0 |
0 |
-0 |
+0 |
benchmarks.run_tiered.linux.x64.checked.mch |
0 |
0 |
0 |
0 |
-0 |
+0 |
coreclr_tests.run.linux.x64.checked.mch |
78 |
69 |
9 |
0 |
-318 |
+36 |
libraries.crossgen2.linux.x64.checked.mch |
0 |
0 |
0 |
0 |
-0 |
+0 |
libraries.pmi.linux.x64.checked.mch |
7 |
6 |
1 |
0 |
-40 |
+4 |
libraries_tests.run.linux.x64.Release.mch |
9 |
9 |
0 |
0 |
-88 |
+0 |
librariestestsnotieredcompilation.run.linux.x64.Release.mch |
7 |
7 |
0 |
0 |
-62 |
+0 |
realworld.run.linux.x64.checked.mch |
0 |
0 |
0 |
0 |
-0 |
+0 |
smoke_tests.nativeaot.linux.x64.checked.mch |
0 |
0 |
0 |
0 |
-0 |
+0 |
|
101 |
91 |
10 |
0 |
-508 |
+40 |
Context information
Collection |
Diffed contexts |
MinOpts |
FullOpts |
Missed, base |
Missed, diff |
benchmarks.run.linux.x64.checked.mch |
36,217 |
3,549 |
32,668 |
0 (0.00%) |
0 (0.00%) |
benchmarks.run_pgo.linux.x64.checked.mch |
160,327 |
63,207 |
97,120 |
0 (0.00%) |
0 (0.00%) |
benchmarks.run_tiered.linux.x64.checked.mch |
65,386 |
50,236 |
15,150 |
0 (0.00%) |
0 (0.00%) |
coreclr_tests.run.linux.x64.checked.mch |
599,012 |
355,942 |
243,070 |
1 (0.00%) |
1 (0.00%) |
libraries.crossgen2.linux.x64.checked.mch |
234,329 |
15 |
234,314 |
0 (0.00%) |
0 (0.00%) |
libraries.pmi.linux.x64.checked.mch |
296,931 |
6 |
296,925 |
0 (0.00%) |
0 (0.00%) |
libraries_tests.run.linux.x64.Release.mch |
759,766 |
496,184 |
263,582 |
0 (0.00%) |
0 (0.00%) |
librariestestsnotieredcompilation.run.linux.x64.Release.mch |
305,443 |
21,912 |
283,531 |
0 (0.00%) |
0 (0.00%) |
realworld.run.linux.x64.checked.mch |
33,083 |
9 |
33,074 |
0 (0.00%) |
0 (0.00%) |
smoke_tests.nativeaot.linux.x64.checked.mch |
27,414 |
10 |
27,404 |
0 (0.00%) |
0 (0.00%) |
|
2,517,908 |
991,070 |
1,526,838 |
1 (0.00%) |
1 (0.00%) |
jit-analyze output
coreclr_tests.run.linux.x64.checked.mch
To reproduce these diffs on Windows x64:
superpmi.py asmdiffs -target_os linux -target_arch x64 -arch x64
Summary of Code Size diffs:
(Lower is better)
Total bytes of base: 403725618 (overridden on cmd)
Total bytes of diff: 403725336 (overridden on cmd)
Total bytes of delta: -282 (-0.00 % of base)
diff is an improvement.
relative diff is an improvement.
Detail diffs
Top file regressions (bytes):
4 : 126532.dasm (1.55 % of base)
4 : 126536.dasm (1.55 % of base)
4 : 147578.dasm (2.25 % of base)
4 : 452733.dasm (21.05 % of base)
4 : 126535.dasm (1.55 % of base)
4 : 126538.dasm (9.52 % of base)
4 : 126539.dasm (1.45 % of base)
4 : 464819.dasm (0.97 % of base)
4 : 464821.dasm (0.97 % of base)
Top file improvements (bytes):
-40 : 32934.dasm (-3.12 % of base)
-40 : 32940.dasm (-3.12 % of base)
-40 : 32941.dasm (-0.83 % of base)
-4 : 116791.dasm (-1.32 % of base)
-4 : 119884.dasm (-1.34 % of base)
-4 : 137995.dasm (-1.97 % of base)
-4 : 137998.dasm (-1.97 % of base)
-4 : 138015.dasm (-1.97 % of base)
-4 : 141001.dasm (-2.00 % of base)
-4 : 174711.dasm (-0.17 % of base)
-4 : 174714.dasm (-0.17 % of base)
-4 : 174724.dasm (-0.17 % of base)
-4 : 174727.dasm (-0.17 % of base)
-4 : 174738.dasm (-0.17 % of base)
-4 : 181354.dasm (-0.46 % of base)
-4 : 33138.dasm (-1.55 % of base)
-4 : 40125.dasm (-2.61 % of base)
-4 : 432507.dasm (-1.44 % of base)
-4 : 82855.dasm (-3.45 % of base)
-4 : 116769.dasm (-1.32 % of base)
61 total files with Code Size differences (52 improved, 9 regressed), 0 unchanged.
Top method regressions (bytes):
4 (1.55 % of base) : 126535.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunBasicScenario_Load():this (MinOpts)
4 (1.55 % of base) : 126536.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunBasicScenario_LoadAligned():this (MinOpts)
4 (1.55 % of base) : 126532.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunBasicScenario_UnsafeRead():this (MinOpts)
4 (1.45 % of base) : 126539.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunLclVarScenario_UnsafeRead():this (MinOpts)
4 (2.25 % of base) : 147578.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunLclVarScenario_UnsafeRead():this (Tier0)
4 (0.97 % of base) : 464821.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunStructFldScenario():this (FullOpts)
4 (0.97 % of base) : 464819.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunStructLclFldScenario():this (FullOpts)
4 (21.05 % of base) : 452733.dasm - System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector128Double(System.Runtime.Intrinsics.Vector128`1[uint]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
4 (9.52 % of base) : 126538.dasm - System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector128Double(System.Runtime.Intrinsics.Vector128`1[uint]):System.Runtime.Intrinsics.Vector128`1[double] (Tier0)
Top method improvements (bytes):
-40 (-3.12 % of base) : 32934.dasm - JIT.HardwareIntrinsics.General._Vector128_1.VectorAs__AsDouble:RunBasicScenario():this (MinOpts)
-40 (-3.12 % of base) : 32940.dasm - JIT.HardwareIntrinsics.General._Vector128_1.VectorAs__AsDouble:RunGenericScenario():this (MinOpts)
-40 (-0.83 % of base) : 32941.dasm - JIT.HardwareIntrinsics.General._Vector128_1.VectorAs__AsDouble:RunReflectionScenario():this (MinOpts)
-4 (-0.46 % of base) : 181354.dasm - IntelHardwareIntrinsicTest._Sse3.Program:MoveAndDuplicate() (MinOpts)
-4 (-0.17 % of base) : 174717.dasm - IntelHardwareIntrinsicTest.General.Program:CreateVector128[byte](byte):System.Runtime.Intrinsics.Vector128`1[byte] (MinOpts)
-4 (-0.17 % of base) : 174711.dasm - IntelHardwareIntrinsicTest.General.Program:CreateVector128[double](double):System.Runtime.Intrinsics.Vector128`1[double] (MinOpts)
-4 (-0.17 % of base) : 174705.dasm - IntelHardwareIntrinsicTest.General.Program:CreateVector128[float](float):System.Runtime.Intrinsics.Vector128`1[float] (MinOpts)
-4 (-0.17 % of base) : 174727.dasm - IntelHardwareIntrinsicTest.General.Program:CreateVector128[int](int):System.Runtime.Intrinsics.Vector128`1[int] (MinOpts)
-4 (-0.17 % of base) : 174733.dasm - IntelHardwareIntrinsicTest.General.Program:CreateVector128[long](long):System.Runtime.Intrinsics.Vector128`1[long] (MinOpts)
-4 (-0.17 % of base) : 174721.dasm - IntelHardwareIntrinsicTest.General.Program:CreateVector128[short](short):System.Runtime.Intrinsics.Vector128`1[short] (MinOpts)
-4 (-0.17 % of base) : 174714.dasm - IntelHardwareIntrinsicTest.General.Program:CreateVector128[ubyte](ubyte):System.Runtime.Intrinsics.Vector128`1[ubyte] (MinOpts)
-4 (-0.17 % of base) : 174730.dasm - IntelHardwareIntrinsicTest.General.Program:CreateVector128[uint](uint):System.Runtime.Intrinsics.Vector128`1[uint] (MinOpts)
-4 (-0.17 % of base) : 174738.dasm - IntelHardwareIntrinsicTest.General.Program:CreateVector128[ulong](ulong):System.Runtime.Intrinsics.Vector128`1[ulong] (MinOpts)
-4 (-0.17 % of base) : 174724.dasm - IntelHardwareIntrinsicTest.General.Program:CreateVector128[ushort](ushort):System.Runtime.Intrinsics.Vector128`1[ushort] (MinOpts)
-4 (-1.44 % of base) : 432507.dasm - JIT.HardwareIntrinsics.General._Vector128.Program:CreateDouble() (FullOpts)
-4 (-2.61 % of base) : 40125.dasm - JIT.HardwareIntrinsics.General._Vector128.VectorCreate__CreateDouble:RunBasicScenario():this (MinOpts)
-4 (-3.45 % of base) : 82855.dasm - JIT.HardwareIntrinsics.General._Vector128.VectorCreate__CreateDouble:RunBasicScenario():this (Tier0)
-4 (-1.55 % of base) : 33138.dasm - JIT.HardwareIntrinsics.General._Vector128_1.VectorAs__AsVectorDouble:RunBasicScenario():this (MinOpts)
-4 (-1.97 % of base) : 138018.dasm - JIT.HardwareIntrinsics.X86._Avx512F.handwritten.SimpleUnaryOpTest__ConvertToVector512DoubleInt32:RunBasicScenario_Load():this (Tier0)
-4 (-1.97 % of base) : 138015.dasm - JIT.HardwareIntrinsics.X86._Avx512F.handwritten.SimpleUnaryOpTest__ConvertToVector512DoubleInt32:RunBasicScenario_UnsafeRead():this (Tier0)
Top method regressions (percentages):
4 (21.05 % of base) : 452733.dasm - System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector128Double(System.Runtime.Intrinsics.Vector128`1[uint]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
4 (9.52 % of base) : 126538.dasm - System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector128Double(System.Runtime.Intrinsics.Vector128`1[uint]):System.Runtime.Intrinsics.Vector128`1[double] (Tier0)
4 (2.25 % of base) : 147578.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunLclVarScenario_UnsafeRead():this (Tier0)
4 (1.55 % of base) : 126535.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunBasicScenario_Load():this (MinOpts)
4 (1.55 % of base) : 126536.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunBasicScenario_LoadAligned():this (MinOpts)
4 (1.55 % of base) : 126532.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunBasicScenario_UnsafeRead():this (MinOpts)
4 (1.45 % of base) : 126539.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunLclVarScenario_UnsafeRead():this (MinOpts)
4 (0.97 % of base) : 464821.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunStructFldScenario():this (FullOpts)
4 (0.97 % of base) : 464819.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunStructLclFldScenario():this (FullOpts)
Top method improvements (percentages):
-2 (-3.70 % of base) : 291041.dasm - Runtime_90508:Test1(System.Runtime.Intrinsics.Vector128`1[double],double):System.Runtime.Intrinsics.Vector128`1[double] (Tier0)
-4 (-3.45 % of base) : 82855.dasm - JIT.HardwareIntrinsics.General._Vector128.VectorCreate__CreateDouble:RunBasicScenario():this (Tier0)
-40 (-3.12 % of base) : 32934.dasm - JIT.HardwareIntrinsics.General._Vector128_1.VectorAs__AsDouble:RunBasicScenario():this (MinOpts)
-40 (-3.12 % of base) : 32940.dasm - JIT.HardwareIntrinsics.General._Vector128_1.VectorAs__AsDouble:RunGenericScenario():this (MinOpts)
-4 (-2.61 % of base) : 40125.dasm - JIT.HardwareIntrinsics.General._Vector128.VectorCreate__CreateDouble:RunBasicScenario():this (MinOpts)
-4 (-2.47 % of base) : 151241.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector256.SimpleUnaryOpConvTest__ConvertToVector256DoubleVector128UInt32+TestStruct:RunStructFldScenario(JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector256.SimpleUnaryOpConvTest__ConvertToVector256DoubleVector128UInt32):this (Tier0)
-4 (-2.07 % of base) : 151232.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector256.SimpleUnaryOpConvTest__ConvertToVector256DoubleVector128UInt32:RunBasicScenario_Load():this (Tier0)
-4 (-2.07 % of base) : 151224.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector256.SimpleUnaryOpConvTest__ConvertToVector256DoubleVector128UInt32:RunBasicScenario_UnsafeRead():this (Tier0)
-4 (-2.00 % of base) : 141001.dasm - JIT.HardwareIntrinsics.X86._Avx512F.SimpleUnaryOpConvTest__ConvertToVector512DoubleVector256UInt32+TestStruct:RunStructFldScenario(JIT.HardwareIntrinsics.X86._Avx512F.SimpleUnaryOpConvTest__ConvertToVector512DoubleVector256UInt32):this (Tier0)
-4 (-1.97 % of base) : 138018.dasm - JIT.HardwareIntrinsics.X86._Avx512F.handwritten.SimpleUnaryOpTest__ConvertToVector512DoubleInt32:RunBasicScenario_Load():this (Tier0)
-4 (-1.97 % of base) : 138015.dasm - JIT.HardwareIntrinsics.X86._Avx512F.handwritten.SimpleUnaryOpTest__ConvertToVector512DoubleInt32:RunBasicScenario_UnsafeRead():this (Tier0)
-4 (-1.97 % of base) : 137998.dasm - JIT.HardwareIntrinsics.X86._Avx512F.handwritten.SimpleUnaryOpTest__ConvertToVector512DoubleSingle:RunBasicScenario_Load():this (Tier0)
-4 (-1.97 % of base) : 137995.dasm - JIT.HardwareIntrinsics.X86._Avx512F.handwritten.SimpleUnaryOpTest__ConvertToVector512DoubleSingle:RunBasicScenario_UnsafeRead():this (Tier0)
-4 (-1.94 % of base) : 138023.dasm - JIT.HardwareIntrinsics.X86._Avx512F.handwritten.SimpleUnaryOpTest__ConvertToVector512DoubleInt32:RunClsVarScenario():this (Tier0)
-4 (-1.94 % of base) : 138003.dasm - JIT.HardwareIntrinsics.X86._Avx512F.handwritten.SimpleUnaryOpTest__ConvertToVector512DoubleSingle:RunClsVarScenario():this (Tier0)
-4 (-1.83 % of base) : 140992.dasm - JIT.HardwareIntrinsics.X86._Avx512F.SimpleUnaryOpConvTest__ConvertToVector512DoubleVector256UInt32:RunBasicScenario_Load():this (Tier0)
-4 (-1.83 % of base) : 140989.dasm - JIT.HardwareIntrinsics.X86._Avx512F.SimpleUnaryOpConvTest__ConvertToVector512DoubleVector256UInt32:RunBasicScenario_UnsafeRead():this (Tier0)
-4 (-1.74 % of base) : 130172.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector256.SimpleUnaryOpConvTest__ConvertToVector256DoubleVector128UInt32+TestStruct:RunStructFldScenario(JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector256.SimpleUnaryOpConvTest__ConvertToVector256DoubleVector128UInt32):this (MinOpts)
-4 (-1.55 % of base) : 33138.dasm - JIT.HardwareIntrinsics.General._Vector128_1.VectorAs__AsVectorDouble:RunBasicScenario():this (MinOpts)
-4 (-1.44 % of base) : 432507.dasm - JIT.HardwareIntrinsics.General._Vector128.Program:CreateDouble() (FullOpts)
libraries.pmi.linux.x64.checked.mch
To reproduce these diffs on Windows x64:
superpmi.py asmdiffs -target_os linux -target_arch x64 -arch x64
Summary of Code Size diffs:
(Lower is better)
Total bytes of base: 60420245 (overridden on cmd)
Total bytes of diff: 60420209 (overridden on cmd)
Total bytes of delta: -36 (-0.00 % of base)
diff is an improvement.
relative diff is an improvement.
Detail diffs
Top file regressions (bytes):
4 : 35273.dasm (21.05 % of base)
Top file improvements (bytes):
-12 : 28912.dasm (-2.79 % of base)
-8 : 28999.dasm (-2.37 % of base)
-6 : 254886.dasm (-6.82 % of base)
-6 : 254882.dasm (-5.94 % of base)
-4 : 29050.dasm (-1.63 % of base)
-4 : 35972.dasm (-19.05 % of base)
7 total files with Code Size differences (6 improved, 1 regressed), 0 unchanged.
Top method regressions (bytes):
4 (21.05 % of base) : 35273.dasm - System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector128Double(System.Runtime.Intrinsics.Vector128`1[uint]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
Top method improvements (bytes):
-12 (-2.79 % of base) : 28912.dasm - System.SpanHelpers:NonPackedContainsValueType[double](byref,double,int):ubyte (FullOpts)
-8 (-2.37 % of base) : 28999.dasm - System.SpanHelpers:ReplaceValueType[double](byref,byref,double,double,ulong) (FullOpts)
-6 (-5.94 % of base) : 254882.dasm - System.Numerics.Tensors.TensorPrimitives+DegreesToRadiansOperator`1[double]:Invoke(System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
-6 (-6.82 % of base) : 254886.dasm - System.Numerics.Tensors.TensorPrimitives+RadiansToDegreesOperator`1[double]:Invoke(System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
-4 (-19.05 % of base) : 35972.dasm - System.Runtime.Intrinsics.X86.Sse3:MoveAndDuplicate(System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
-4 (-1.63 % of base) : 29050.dasm - System.SpanHelpers:CountValueType[double](byref,double,int):int (FullOpts)
Top method regressions (percentages):
4 (21.05 % of base) : 35273.dasm - System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector128Double(System.Runtime.Intrinsics.Vector128`1[uint]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
Top method improvements (percentages):
-4 (-19.05 % of base) : 35972.dasm - System.Runtime.Intrinsics.X86.Sse3:MoveAndDuplicate(System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
-6 (-6.82 % of base) : 254886.dasm - System.Numerics.Tensors.TensorPrimitives+RadiansToDegreesOperator`1[double]:Invoke(System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
-6 (-5.94 % of base) : 254882.dasm - System.Numerics.Tensors.TensorPrimitives+DegreesToRadiansOperator`1[double]:Invoke(System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
-12 (-2.79 % of base) : 28912.dasm - System.SpanHelpers:NonPackedContainsValueType[double](byref,double,int):ubyte (FullOpts)
-8 (-2.37 % of base) : 28999.dasm - System.SpanHelpers:ReplaceValueType[double](byref,byref,double,double,ulong) (FullOpts)
-4 (-1.63 % of base) : 29050.dasm - System.SpanHelpers:CountValueType[double](byref,double,int):int (FullOpts)
7 total methods with Code Size differences (6 improved, 1 regressed).
libraries_tests.run.linux.x64.Release.mch
To reproduce these diffs on Windows x64:
superpmi.py asmdiffs -target_os linux -target_arch x64 -arch x64
Summary of Code Size diffs:
(Lower is better)
Total bytes of base: 337126106 (overridden on cmd)
Total bytes of diff: 337126018 (overridden on cmd)
Total bytes of delta: -88 (-0.00 % of base)
diff is an improvement.
relative diff is an improvement.
Detail diffs
Top file improvements (bytes):
-16 : 515752.dasm (-2.12 % of base)
-12 : 516247.dasm (-10.08 % of base)
-12 : 515847.dasm (-10.08 % of base)
-12 : 516260.dasm (-10.08 % of base)
-8 : 448704.dasm (-1.14 % of base)
-8 : 448559.dasm (-1.14 % of base)
-8 : 450788.dasm (-1.14 % of base)
-8 : 450781.dasm (-1.03 % of base)
-4 : 448590.dasm (-0.70 % of base)
9 total files with Code Size differences (9 improved, 0 regressed), 0 unchanged.
Top method improvements (bytes):
-16 (-2.12 % of base) : 515752.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:Vector128DoubleEqualsNonCanonicalNaNTest():this (Instrumented Tier0)
-12 (-10.08 % of base) : 515847.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:ExpDoubleTest(double,double,double):this (Tier0)
-12 (-10.08 % of base) : 516247.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:Log2DoubleTest(double,double,double):this (Tier0)
-12 (-10.08 % of base) : 516260.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:LogDoubleTest(double,double,double):this (Tier0)
-8 (-1.03 % of base) : 450781.dasm - System.Numerics.Tensors.TensorPrimitives:<InvokeSpanScalarIntoSpan>g__VectorizedSmall8|231_6[double,System.Numerics.Tensors.TensorPrimitives+IdentityOperator`1[double],System.Numerics.Tensors.TensorPrimitives+MultiplyOperator`1[double]](byref,double,byref,ulong) (Tier0)
-8 (-1.14 % of base) : 448704.dasm - System.Numerics.Tensors.TensorPrimitives:<InvokeSpanScalarSpanIntoSpan>g__VectorizedSmall8|234_6[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,double,byref,byref,ulong) (Tier0)
-8 (-1.14 % of base) : 448559.dasm - System.Numerics.Tensors.TensorPrimitives:<InvokeSpanSpanScalarIntoSpan>g__VectorizedSmall8|233_6[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,byref,double,byref,ulong) (Tier0)
-8 (-1.14 % of base) : 450788.dasm - System.Numerics.Tensors.TensorPrimitives:<InvokeSpanSpanScalarIntoSpan>g__VectorizedSmall8|233_6[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,byref,double,byref,ulong) (Tier0)
-4 (-0.70 % of base) : 448590.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](System.ReadOnlySpan`1[double],System.ReadOnlySpan`1[double],double,System.Span`1[double]) (Tier1)
Top method improvements (percentages):
-12 (-10.08 % of base) : 515847.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:ExpDoubleTest(double,double,double):this (Tier0)
-12 (-10.08 % of base) : 516247.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:Log2DoubleTest(double,double,double):this (Tier0)
-12 (-10.08 % of base) : 516260.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:LogDoubleTest(double,double,double):this (Tier0)
-16 (-2.12 % of base) : 515752.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:Vector128DoubleEqualsNonCanonicalNaNTest():this (Instrumented Tier0)
-8 (-1.14 % of base) : 448704.dasm - System.Numerics.Tensors.TensorPrimitives:<InvokeSpanScalarSpanIntoSpan>g__VectorizedSmall8|234_6[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,double,byref,byref,ulong) (Tier0)
-8 (-1.14 % of base) : 448559.dasm - System.Numerics.Tensors.TensorPrimitives:<InvokeSpanSpanScalarIntoSpan>g__VectorizedSmall8|233_6[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,byref,double,byref,ulong) (Tier0)
-8 (-1.14 % of base) : 450788.dasm - System.Numerics.Tensors.TensorPrimitives:<InvokeSpanSpanScalarIntoSpan>g__VectorizedSmall8|233_6[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,byref,double,byref,ulong) (Tier0)
-8 (-1.03 % of base) : 450781.dasm - System.Numerics.Tensors.TensorPrimitives:<InvokeSpanScalarIntoSpan>g__VectorizedSmall8|231_6[double,System.Numerics.Tensors.TensorPrimitives+IdentityOperator`1[double],System.Numerics.Tensors.TensorPrimitives+MultiplyOperator`1[double]](byref,double,byref,ulong) (Tier0)
-4 (-0.70 % of base) : 448590.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](System.ReadOnlySpan`1[double],System.ReadOnlySpan`1[double],double,System.Span`1[double]) (Tier1)
9 total methods with Code Size differences (9 improved, 0 regressed).
librariestestsnotieredcompilation.run.linux.x64.Release.mch
To reproduce these diffs on Windows x64:
superpmi.py asmdiffs -target_os linux -target_arch x64 -arch x64
Summary of Code Size diffs:
(Lower is better)
Total bytes of base: 132558776 (overridden on cmd)
Total bytes of diff: 132558714 (overridden on cmd)
Total bytes of delta: -62 (-0.00 % of base)
diff is an improvement.
relative diff is an improvement.
Detail diffs
Top file improvements (bytes):
-11 : 160225.dasm (-2.36 % of base)
-11 : 160695.dasm (-2.36 % of base)
-8 : 160690.dasm (-1.82 % of base)
-8 : 190732.dasm (-4.08 % of base)
-8 : 160236.dasm (-1.82 % of base)
-8 : 191109.dasm (-4.08 % of base)
-8 : 191104.dasm (-4.08 % of base)
7 total files with Code Size differences (7 improved, 0 regressed), 0 unchanged.
Top method improvements (bytes):
-11 (-2.36 % of base) : 160225.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](System.ReadOnlySpan`1[double],System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
-11 (-2.36 % of base) : 160695.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](System.ReadOnlySpan`1[double],System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
-8 (-1.82 % of base) : 160690.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarSpanIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+MultiplyAddOperator`1[double]](System.ReadOnlySpan`1[double],double,System.ReadOnlySpan`1[double],System.Span`1[double]) (FullOpts)
-8 (-1.82 % of base) : 160236.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarSpanIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+MultiplyAddOperator`1[double]](System.ReadOnlySpan`1[double],double,System.ReadOnlySpan`1[double],System.Span`1[double]) (FullOpts)
-8 (-4.08 % of base) : 190732.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:ExpDoubleTest(double,double,double):this (FullOpts)
-8 (-4.08 % of base) : 191104.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:Log2DoubleTest(double,double,double):this (FullOpts)
-8 (-4.08 % of base) : 191109.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:LogDoubleTest(double,double,double):this (FullOpts)
Top method improvements (percentages):
-8 (-4.08 % of base) : 190732.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:ExpDoubleTest(double,double,double):this (FullOpts)
-8 (-4.08 % of base) : 191104.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:Log2DoubleTest(double,double,double):this (FullOpts)
-8 (-4.08 % of base) : 191109.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:LogDoubleTest(double,double,double):this (FullOpts)
-11 (-2.36 % of base) : 160225.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](System.ReadOnlySpan`1[double],System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
-11 (-2.36 % of base) : 160695.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](System.ReadOnlySpan`1[double],System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
-8 (-1.82 % of base) : 160690.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarSpanIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+MultiplyAddOperator`1[double]](System.ReadOnlySpan`1[double],double,System.ReadOnlySpan`1[double],System.Span`1[double]) (FullOpts)
-8 (-1.82 % of base) : 160236.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarSpanIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+MultiplyAddOperator`1[double]](System.ReadOnlySpan`1[double],double,System.ReadOnlySpan`1[double],System.Span`1[double]) (FullOpts)
7 total methods with Code Size differences (7 improved, 0 regressed).
osx arm64
Diffs are based on 2,270,868 contexts (932,669 MinOpts, 1,338,199 FullOpts).
MISSED contexts: 2 (0.00%)
No diffs found.
Details
Context information
Collection |
Diffed contexts |
MinOpts |
FullOpts |
Missed, base |
Missed, diff |
benchmarks.run.osx.arm64.checked.mch |
24,860 |
4 |
24,856 |
0 (0.00%) |
0 (0.00%) |
benchmarks.run_pgo.osx.arm64.checked.mch |
83,817 |
48,088 |
35,729 |
0 (0.00%) |
0 (0.00%) |
benchmarks.run_tiered.osx.arm64.checked.mch |
48,124 |
37,350 |
10,774 |
0 (0.00%) |
0 (0.00%) |
coreclr_tests.run.osx.arm64.checked.mch |
585,886 |
356,765 |
229,121 |
2 (0.00%) |
2 (0.00%) |
libraries.crossgen2.osx.arm64.checked.mch |
234,105 |
15 |
234,090 |
0 (0.00%) |
0 (0.00%) |
libraries.pmi.osx.arm64.checked.mch |
316,348 |
18 |
316,330 |
0 (0.00%) |
0 (0.00%) |
libraries_tests.run.osx.arm64.Release.mch |
643,885 |
468,829 |
175,056 |
0 (0.00%) |
0 (0.00%) |
librariestestsnotieredcompilation.run.osx.arm64.Release.mch |
302,301 |
21,597 |
280,704 |
0 (0.00%) |
0 (0.00%) |
realworld.run.osx.arm64.checked.mch |
31,542 |
3 |
31,539 |
0 (0.00%) |
0 (0.00%) |
|
2,270,868 |
932,669 |
1,338,199 |
2 (0.00%) |
2 (0.00%) |
windows arm64
Diffs are based on 2,341,108 contexts (938,449 MinOpts, 1,402,659 FullOpts).
MISSED contexts: 9 (0.00%)
No diffs found.
Details
Context information
Collection |
Diffed contexts |
MinOpts |
FullOpts |
Missed, base |
Missed, diff |
benchmarks.run.windows.arm64.checked.mch |
24,446 |
4 |
24,442 |
0 (0.00%) |
0 (0.00%) |
benchmarks.run_pgo.windows.arm64.checked.mch |
96,121 |
48,079 |
48,042 |
0 (0.00%) |
0 (0.00%) |
benchmarks.run_tiered.windows.arm64.checked.mch |
49,360 |
36,713 |
12,647 |
0 (0.00%) |
0 (0.00%) |
coreclr_tests.run.windows.arm64.checked.mch |
595,403 |
362,592 |
232,811 |
1 (0.00%) |
1 (0.00%) |
libraries.crossgen2.windows.arm64.checked.mch |
244,149 |
15 |
244,134 |
3 (0.00%) |
3 (0.00%) |
libraries.pmi.windows.arm64.checked.mch |
305,575 |
6 |
305,569 |
3 (0.00%) |
3 (0.00%) |
libraries_tests.run.windows.arm64.Release.mch |
651,606 |
469,431 |
182,175 |
0 (0.00%) |
0 (0.00%) |
librariestestsnotieredcompilation.run.windows.arm64.Release.mch |
317,077 |
21,598 |
295,479 |
0 (0.00%) |
0 (0.00%) |
realworld.run.windows.arm64.checked.mch |
33,211 |
3 |
33,208 |
0 (0.00%) |
0 (0.00%) |
smoke_tests.nativeaot.windows.arm64.checked.mch |
24,160 |
8 |
24,152 |
2 (0.01%) |
2 (0.01%) |
|
2,341,108 |
938,449 |
1,402,659 |
9 (0.00%) |
9 (0.00%) |
windows x64
Diffs are based on 2,512,209 contexts (997,391 MinOpts, 1,514,818 FullOpts).
MISSED contexts: 3 (0.00%)
Overall (-468 bytes)
Collection |
Base size (bytes) |
Diff size (bytes) |
coreclr_tests.run.windows.x64.checked.mch |
393,207,052 |
-332 |
libraries.crossgen2.windows.x64.checked.mch |
39,486,563 |
+0 |
libraries.pmi.windows.x64.checked.mch |
61,663,491 |
-28 |
libraries_tests.run.windows.x64.Release.mch |
282,129,292 |
-84 |
librariestestsnotieredcompilation.run.windows.x64.Release.mch |
137,066,325 |
-24 |
MinOpts (-358 bytes)
Collection |
Base size (bytes) |
Diff size (bytes) |
coreclr_tests.run.windows.x64.checked.mch |
272,788,854 |
-274 |
libraries_tests.run.windows.x64.Release.mch |
175,858,318 |
-84 |
FullOpts (-110 bytes)
Collection |
Base size (bytes) |
Diff size (bytes) |
coreclr_tests.run.windows.x64.checked.mch |
120,418,198 |
-58 |
libraries.crossgen2.windows.x64.checked.mch |
39,485,376 |
+0 |
libraries.pmi.windows.x64.checked.mch |
61,549,970 |
-28 |
librariestestsnotieredcompilation.run.windows.x64.Release.mch |
126,447,219 |
-24 |
Example diffs
coreclr_tests.run.windows.x64.checked.mch
-4 (-16.67%) : 425765.dasm - System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector256Double(System.Runtime.Intrinsics.Vector1281[uint]):System.Runtime.Intrinsics.Vector256
1double
@@ -18,18 +18,17 @@ G_M32318_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
;; size=3 bbWeight=1 PerfScore 1.00
G_M32318_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0006 {rcx rdx}, byref
; byrRegs +[rcx rdx]
- vmovups xmm0, xmmword ptr [rdx]
- vcvtudq2pd ymm0, ymm0
+ vcvtudq2pd ymm0, ymmword ptr [rdx]
vmovups ymmword ptr [rcx], ymm0
mov rax, rcx
; byrRegs +[rax]
- ;; size=17 bbWeight=1 PerfScore 12.25
+ ;; size=13 bbWeight=1 PerfScore 11.25
G_M32318_IG03: ; bbWeight=1, epilog, nogc, extend
vzeroupper
ret
;; size=4 bbWeight=1 PerfScore 2.00
-; Total bytes of code 24, prolog size 3, PerfScore 15.25, instruction count 7, allocated bytes for code 24 (MethodHash=2e6381c1) for method System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector256Double(System.Runtime.Intrinsics.Vector128`1[uint]):System.Runtime.Intrinsics.Vector256`1[double] (FullOpts)
+; Total bytes of code 20, prolog size 3, PerfScore 14.25, instruction count 6, allocated bytes for code 20 (MethodHash=2e6381c1) for method System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector256Double(System.Runtime.Intrinsics.Vector128`1[uint]):System.Runtime.Intrinsics.Vector256`1[double] (FullOpts)
; ============================================================
Unwind Info:
-4 (-15.38%) : 424513.dasm - System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector2561[int]):System.Runtime.Intrinsics.Vector512
1double
@@ -18,18 +18,17 @@ G_M49776_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
;; size=3 bbWeight=1 PerfScore 1.00
G_M49776_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0006 {rcx rdx}, byref
; byrRegs +[rcx rdx]
- vmovups ymm0, ymmword ptr [rdx]
- vcvtdq2pd zmm0, zmm0
+ vcvtdq2pd zmm0, ymmword ptr [rdx]
vmovups zmmword ptr [rcx], zmm0
mov rax, rcx
; byrRegs +[rax]
- ;; size=19 bbWeight=1 PerfScore 11.25
+ ;; size=15 bbWeight=1 PerfScore 9.25
G_M49776_IG03: ; bbWeight=1, epilog, nogc, extend
vzeroupper
ret
;; size=4 bbWeight=1 PerfScore 2.00
-; Total bytes of code 26, prolog size 3, PerfScore 14.25, instruction count 7, allocated bytes for code 26 (MethodHash=2f113d8f) for method System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector256`1[int]):System.Runtime.Intrinsics.Vector512`1[double] (FullOpts)
+; Total bytes of code 22, prolog size 3, PerfScore 12.25, instruction count 6, allocated bytes for code 22 (MethodHash=2f113d8f) for method System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector256`1[int]):System.Runtime.Intrinsics.Vector512`1[double] (FullOpts)
; ============================================================
Unwind Info:
-4 (-15.38%) : 424826.dasm - System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector2561[uint]):System.Runtime.Intrinsics.Vector512
1double
@@ -18,18 +18,17 @@ G_M50789_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
;; size=3 bbWeight=1 PerfScore 1.00
G_M50789_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0006 {rcx rdx}, byref
; byrRegs +[rcx rdx]
- vmovups ymm0, ymmword ptr [rdx]
- vcvtudq2pd zmm0, zmm0
+ vcvtudq2pd zmm0, zmmword ptr [rdx]
vmovups zmmword ptr [rcx], zmm0
mov rax, rcx
; byrRegs +[rax]
- ;; size=19 bbWeight=1 PerfScore 11.25
+ ;; size=15 bbWeight=1 PerfScore 9.25
G_M50789_IG03: ; bbWeight=1, epilog, nogc, extend
vzeroupper
ret
;; size=4 bbWeight=1 PerfScore 2.00
-; Total bytes of code 26, prolog size 3, PerfScore 14.25, instruction count 7, allocated bytes for code 26 (MethodHash=1ec7399a) for method System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector256`1[uint]):System.Runtime.Intrinsics.Vector512`1[double] (FullOpts)
+; Total bytes of code 22, prolog size 3, PerfScore 12.25, instruction count 6, allocated bytes for code 22 (MethodHash=1ec7399a) for method System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector256`1[uint]):System.Runtime.Intrinsics.Vector512`1[double] (FullOpts)
; ============================================================
Unwind Info:
+4 (+1.50%) : 121601.dasm - JIT.HardwareIntrinsics.X86.Avx512FVLVector128.SimpleUnaryOpConvTestConvertToVector128DoubleVector128UInt32:RunBasicScenarioLoadAligned():this (MinOpts)
@@ -61,7 +61,8 @@ G_M21378_IG04: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
mov rax, qword ptr [rbp-0x18]
vmovdqa xmm0, xmmword ptr [rax]
vmovaps xmmword ptr [rbp-0x30], xmm0
- vcvtudq2pd xmm0, xmmword ptr [rbp-0x30]
+ vmovaps xmm0, xmmword ptr [rbp-0x30]
+ vcvtudq2pd xmm0, xmm0
vmovaps xmmword ptr [rbp-0x40], xmm0
vmovaps xmm0, xmmword ptr [rbp-0x40]
vmovaps xmmword ptr [rbp-0x10], xmm0
@@ -115,14 +116,14 @@ G_M21378_IG04: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
; gcr arg pop 0
nop
nop
- ;; size=201 bbWeight=1 PerfScore 73.75
+ ;; size=205 bbWeight=1 PerfScore 74.75
G_M21378_IG05: ; bbWeight=1, epilog, nogc, extend
add rsp, 128
pop rbp
ret
;; size=9 bbWeight=1 PerfScore 1.75
-; Total bytes of code 266, prolog size 42, PerfScore 90.08, instruction count 63, allocated bytes for code 269 (MethodHash=9cdfac7d) for method JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunBasicScenario_LoadAligned():this (MinOpts)
+; Total bytes of code 270, prolog size 42, PerfScore 91.08, instruction count 64, allocated bytes for code 270 (MethodHash=9cdfac7d) for method JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunBasicScenario_LoadAligned():this (MinOpts)
; ============================================================
Unwind Info:
+4 (+1.50%) : 121600.dasm - JIT.HardwareIntrinsics.X86.Avx512FVLVector128.SimpleUnaryOpConvTestConvertToVector128DoubleVector128UInt32:RunBasicScenarioLoad():this (MinOpts)
@@ -61,7 +61,8 @@ G_M36494_IG04: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
mov rax, qword ptr [rbp-0x18]
vmovups xmm0, xmmword ptr [rax]
vmovaps xmmword ptr [rbp-0x30], xmm0
- vcvtudq2pd xmm0, xmmword ptr [rbp-0x30]
+ vmovaps xmm0, xmmword ptr [rbp-0x30]
+ vcvtudq2pd xmm0, xmm0
vmovaps xmmword ptr [rbp-0x40], xmm0
vmovaps xmm0, xmmword ptr [rbp-0x40]
vmovaps xmmword ptr [rbp-0x10], xmm0
@@ -115,14 +116,14 @@ G_M36494_IG04: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
; gcr arg pop 0
nop
nop
- ;; size=201 bbWeight=1 PerfScore 73.75
+ ;; size=205 bbWeight=1 PerfScore 74.75
G_M36494_IG05: ; bbWeight=1, epilog, nogc, extend
add rsp, 128
pop rbp
ret
;; size=9 bbWeight=1 PerfScore 1.75
-; Total bytes of code 266, prolog size 42, PerfScore 90.08, instruction count 63, allocated bytes for code 269 (MethodHash=28e07171) for method JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunBasicScenario_Load():this (MinOpts)
+; Total bytes of code 270, prolog size 42, PerfScore 91.08, instruction count 64, allocated bytes for code 270 (MethodHash=28e07171) for method JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunBasicScenario_Load():this (MinOpts)
; ============================================================
Unwind Info:
+4 (+2.21%) : 142132.dasm - JIT.HardwareIntrinsics.X86.Avx512FVLVector128.SimpleUnaryOpConvTestConvertToVector128DoubleVector128UInt32:RunLclVarScenarioUnsafeRead():this (Tier0)
@@ -39,7 +39,8 @@ G_M65260_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
; byrRegs -[rcx]
vmovups xmm0, xmmword ptr [rax]
vmovaps xmmword ptr [rbp-0x10], xmm0
- vcvtudq2pd xmm0, xmmword ptr [rbp-0x10]
+ vmovaps xmm0, xmmword ptr [rbp-0x10]
+ vcvtudq2pd xmm0, xmm0
vmovaps xmmword ptr [rbp-0x20], xmm0
mov rax, gword ptr [rbp+0x10]
; gcrRegs +[rax]
@@ -73,14 +74,14 @@ G_M65260_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
call [<unknown method>]
; gcrRegs -[rcx r9]
nop
- ;; size=149 bbWeight=1 PerfScore 56.75
+ ;; size=153 bbWeight=1 PerfScore 57.75
G_M65260_IG03: ; bbWeight=1, epilog, nogc, extend
add rsp, 96
pop rbp
ret
;; size=6 bbWeight=1 PerfScore 1.75
-; Total bytes of code 181, prolog size 22, PerfScore 64.58, instruction count 42, allocated bytes for code 184 (MethodHash=da8f0113) for method JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunLclVarScenario_UnsafeRead():this (Tier0)
+; Total bytes of code 185, prolog size 22, PerfScore 65.58, instruction count 43, allocated bytes for code 185 (MethodHash=da8f0113) for method JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunLclVarScenario_UnsafeRead():this (Tier0)
; ============================================================
Unwind Info:
libraries.crossgen2.windows.x64.checked.mch
+0 (0.00%) : 36719.dasm - System.SpanHelpers:Filldouble (FullOpts)
@@ -14,7 +14,7 @@
; V02 arg2 [V02,T10] ( 18, 38 ) double -> mm2 single-def
; V03 loc0 [V03,T00] ( 23, 50 ) long -> rax
;* V04 loc1 [V04 ] ( 0, 0 ) double -> zero-ref ld-addr-op
-; V05 loc2 [V05,T11] ( 5, 9.50) simd16 -> mm0 ld-addr-op <System.Numerics.Vector`1[ubyte]>
+; V05 loc2 [V05,T11] ( 5, 9.50) simd16 -> mm2 ld-addr-op <System.Numerics.Vector`1[ubyte]>
; V06 loc3 [V06,T04] ( 5, 9.50) byref -> rcx single-def
; V07 loc4 [V07,T08] ( 4, 2 ) long -> rax
; V08 loc5 [V08,T06] ( 2, 4.50) long -> r8
@@ -37,7 +37,7 @@ G_M41871_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0002 {rcx}, byr
jb SHORT G_M41871_IG08
;; size=6 bbWeight=1 PerfScore 1.25
G_M41871_IG03: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0002 {rcx}, byref, isz
- movddup xmm0, xmm2
+ movddup xmm2, xmm2
lea rax, [8*rdx]
mov r8, rax
and r8, -32
@@ -46,8 +46,8 @@ G_M41871_IG03: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0002 {rcx},
jb SHORT G_M41871_IG05
;; size=28 bbWeight=0.50 PerfScore 1.75
G_M41871_IG04: ; bbWeight=4, gcrefRegs=0000 {}, byrefRegs=0002 {rcx}, byref, isz
- movups xmmword ptr [rcx+r10], xmm0
- movups xmmword ptr [rcx+r10+0x10], xmm0
+ movups xmmword ptr [rcx+r10], xmm2
+ movups xmmword ptr [rcx+r10+0x10], xmm2
add r10, 32
cmp r10, r8
jb SHORT G_M41871_IG04
@@ -55,10 +55,10 @@ G_M41871_IG04: ; bbWeight=4, gcrefRegs=0000 {}, byrefRegs=0002 {rcx}, byr
G_M41871_IG05: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0002 {rcx}, byref, isz
test al, 16
je SHORT G_M41871_IG06
- movups xmmword ptr [rcx+r10], xmm0
+ movups xmmword ptr [rcx+r10], xmm2
;; size=9 bbWeight=0.50 PerfScore 1.62
G_M41871_IG06: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0002 {rcx}, byref
- movups xmmword ptr [rcx+rax-0x10], xmm0
+ movups xmmword ptr [rcx+rax-0x10], xmm2
;; size=5 bbWeight=0.50 PerfScore 1.00
G_M41871_IG07: ; bbWeight=0.50, epilog, nogc, extend
ret
libraries.pmi.windows.x64.checked.mch
-4 (-16.67%) : 31897.dasm - System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector256Double(System.Runtime.Intrinsics.Vector1281[uint]):System.Runtime.Intrinsics.Vector256
1double
@@ -18,18 +18,17 @@ G_M32318_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
;; size=3 bbWeight=1 PerfScore 1.00
G_M32318_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0006 {rcx rdx}, byref
; byrRegs +[rcx rdx]
- vmovups xmm0, xmmword ptr [rdx]
- vcvtudq2pd ymm0, ymm0
+ vcvtudq2pd ymm0, ymmword ptr [rdx]
vmovups ymmword ptr [rcx], ymm0
mov rax, rcx
; byrRegs +[rax]
- ;; size=17 bbWeight=1 PerfScore 12.25
+ ;; size=13 bbWeight=1 PerfScore 11.25
G_M32318_IG03: ; bbWeight=1, epilog, nogc, extend
vzeroupper
ret
;; size=4 bbWeight=1 PerfScore 2.00
-; Total bytes of code 24, prolog size 3, PerfScore 15.25, instruction count 7, allocated bytes for code 24 (MethodHash=2e6381c1) for method System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector256Double(System.Runtime.Intrinsics.Vector128`1[uint]):System.Runtime.Intrinsics.Vector256`1[double] (FullOpts)
+; Total bytes of code 20, prolog size 3, PerfScore 14.25, instruction count 6, allocated bytes for code 20 (MethodHash=2e6381c1) for method System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector256Double(System.Runtime.Intrinsics.Vector128`1[uint]):System.Runtime.Intrinsics.Vector256`1[double] (FullOpts)
; ============================================================
Unwind Info:
-4 (-15.38%) : 31461.dasm - System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector2561[int]):System.Runtime.Intrinsics.Vector512
1double
@@ -18,18 +18,17 @@ G_M49776_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
;; size=3 bbWeight=1 PerfScore 1.00
G_M49776_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0006 {rcx rdx}, byref
; byrRegs +[rcx rdx]
- vmovups ymm0, ymmword ptr [rdx]
- vcvtdq2pd zmm0, zmm0
+ vcvtdq2pd zmm0, ymmword ptr [rdx]
vmovups zmmword ptr [rcx], zmm0
mov rax, rcx
; byrRegs +[rax]
- ;; size=19 bbWeight=1 PerfScore 11.25
+ ;; size=15 bbWeight=1 PerfScore 9.25
G_M49776_IG03: ; bbWeight=1, epilog, nogc, extend
vzeroupper
ret
;; size=4 bbWeight=1 PerfScore 2.00
-; Total bytes of code 26, prolog size 3, PerfScore 14.25, instruction count 7, allocated bytes for code 26 (MethodHash=2f113d8f) for method System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector256`1[int]):System.Runtime.Intrinsics.Vector512`1[double] (FullOpts)
+; Total bytes of code 22, prolog size 3, PerfScore 12.25, instruction count 6, allocated bytes for code 22 (MethodHash=2f113d8f) for method System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector256`1[int]):System.Runtime.Intrinsics.Vector512`1[double] (FullOpts)
; ============================================================
Unwind Info:
-4 (-15.38%) : 31462.dasm - System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector2561[float]):System.Runtime.Intrinsics.Vector512
1double
@@ -18,18 +18,17 @@ G_M14803_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
;; size=3 bbWeight=1 PerfScore 1.00
G_M14803_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0006 {rcx rdx}, byref
; byrRegs +[rcx rdx]
- vmovups ymm0, ymmword ptr [rdx]
- vcvtps2pd zmm0, zmm0
+ vcvtps2pd zmm0, ymmword ptr [rdx]
vmovups zmmword ptr [rcx], zmm0
mov rax, rcx
; byrRegs +[rax]
- ;; size=19 bbWeight=1 PerfScore 11.25
+ ;; size=15 bbWeight=1 PerfScore 9.25
G_M14803_IG03: ; bbWeight=1, epilog, nogc, extend
vzeroupper
ret
;; size=4 bbWeight=1 PerfScore 2.00
-; Total bytes of code 26, prolog size 3, PerfScore 14.25, instruction count 7, allocated bytes for code 26 (MethodHash=e82ec62c) for method System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector256`1[float]):System.Runtime.Intrinsics.Vector512`1[double] (FullOpts)
+; Total bytes of code 22, prolog size 3, PerfScore 12.25, instruction count 6, allocated bytes for code 22 (MethodHash=e82ec62c) for method System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector256`1[float]):System.Runtime.Intrinsics.Vector512`1[double] (FullOpts)
; ============================================================
Unwind Info:
-2 (-2.30%) : 291601.dasm - System.Numerics.Tensors.TensorPrimitives+RadiansToDegreesOperator1[double]:Invoke(System.Runtime.Intrinsics.Vector128
1[double]):System.Runtime.Intrinsics.Vector128`1double
@@ -34,14 +34,13 @@ G_M62030_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0048 {rbx rsi},
; gcr arg pop 0
test eax, eax
je SHORT G_M62030_IG04
- vmovsd xmm0, qword ptr [rsp+0x20]
- vmovddup xmm0, xmm0
- vmulpd xmm0, xmm0, xmmword ptr [rbx]
+ vmovups xmm0, xmmword ptr [rbx]
+ vmulpd xmm0, xmm0, qword ptr [rsp+0x20] {1to2}
vdivpd xmm0, xmm0, qword ptr [reloc @RWD00] {1to2}
vmovups xmmword ptr [rsi], xmm0
mov rax, rsi
; byrRegs +[rax]
- ;; size=51 bbWeight=1 PerfScore 31.25
+ ;; size=49 bbWeight=1 PerfScore 30.25
G_M62030_IG03: ; bbWeight=1, epilog, nogc, extend
add rsp, 40
pop rbx
@@ -59,7 +58,7 @@ G_M62030_IG04: ; bbWeight=0, gcVars=0000000000000000 {}, gcrefRegs=0000 {
RWD00 dq 400921FB54442D18h ; 3.14159265
-; Total bytes of code 87, prolog size 9, PerfScore 37.25, instruction count 25, allocated bytes for code 87 (MethodHash=c70f0db1) for method System.Numerics.Tensors.TensorPrimitives+RadiansToDegreesOperator`1[double]:Invoke(System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
+; Total bytes of code 85, prolog size 9, PerfScore 36.25, instruction count 24, allocated bytes for code 85 (MethodHash=c70f0db1) for method System.Numerics.Tensors.TensorPrimitives+RadiansToDegreesOperator`1[double]:Invoke(System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
; ============================================================
Unwind Info:
-4 (-1.56%) : 25362.dasm - System.SpanHelpers:CountValueTypedouble:int (FullOpts)
@@ -88,11 +88,10 @@ G_M58357_IG06: ; bbWeight=0.50, epilog, nogc, extend
;; size=4 bbWeight=0.50 PerfScore 1.00
G_M58357_IG07: ; bbWeight=0.50, gcVars=0000000000000000 {}, gcrefRegs=0000 {}, byrefRegs=0006 {rcx rdx}, gcvars, byref
; byrRegs -[r8] +[rcx rdx]
- vmovaps xmm0, xmm1
- vmovddup xmm0, xmm0
+ vmovddup xmm0, xmm1
lea r8, bword ptr [rdx-0x10]
; byrRegs +[r8]
- ;; size=12 bbWeight=0.50 PerfScore 0.88
+ ;; size=8 bbWeight=0.50 PerfScore 0.75
G_M58357_IG08: ; bbWeight=4, gcrefRegs=0000 {}, byrefRegs=0106 {rcx rdx r8}, byref, isz
vcmppd xmm2, xmm0, xmmword ptr [rcx], 0
vmovmskpd r10, xmm2
@@ -161,7 +160,7 @@ G_M58357_IG17: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0000 {}, byr
ret
;; size=4 bbWeight=0.50 PerfScore 1.00
-; Total bytes of code 257, prolog size 3, PerfScore 138.75, instruction count 76, allocated bytes for code 257 (MethodHash=3f0f1c0a) for method System.SpanHelpers:CountValueType[double](byref,double,int):int (FullOpts)
+; Total bytes of code 253, prolog size 3, PerfScore 138.62, instruction count 75, allocated bytes for code 253 (MethodHash=3f0f1c0a) for method System.SpanHelpers:CountValueType[double](byref,double,int):int (FullOpts)
; ============================================================
Unwind Info:
+0 (0.00%) : 25311.dasm - System.SpanHelpers:ReplaceValueTypedouble (FullOpts)
@@ -17,9 +17,9 @@
; V06 loc1 [V06,T09] ( 5, 14 ) double -> mm0
; V07 loc2 [V07,T00] ( 11, 40.50) long -> r8
; V08 loc3 [V08,T07] ( 4, 5.50) long -> rax
-; V09 loc4 [V09,T15] ( 3, 5 ) simd16 -> mm0 <System.Runtime.Intrinsics.Vector128`1[double]>
-; V10 loc5 [V10,T16] ( 3, 5 ) simd16 -> mm1 <System.Runtime.Intrinsics.Vector128`1[double]>
-; V11 loc6 [V11,T10] ( 6, 13.50) simd16 -> mm2 <System.Runtime.Intrinsics.Vector128`1[double]>
+; V09 loc4 [V09,T15] ( 3, 5 ) simd16 -> mm2 <System.Runtime.Intrinsics.Vector128`1[double]>
+; V10 loc5 [V10,T16] ( 3, 5 ) simd16 -> mm3 <System.Runtime.Intrinsics.Vector128`1[double]>
+; V11 loc6 [V11,T10] ( 6, 13.50) simd16 -> mm0 <System.Runtime.Intrinsics.Vector128`1[double]>
;* V12 loc7 [V12 ] ( 0, 0 ) simd16 -> zero-ref <System.Runtime.Intrinsics.Vector128`1[double]>
;* V13 loc8 [V13 ] ( 0, 0 ) simd16 -> zero-ref <System.Runtime.Intrinsics.Vector128`1[double]>
; V14 loc9 [V14,T08] ( 4, 5.50) long -> rax
@@ -115,23 +115,23 @@ G_M37632_IG14: ; bbWeight=0.50, gcVars=0000000000000000 {}, gcrefRegs=000
cmp rax, 4
jae SHORT G_M37632_IG18
add rax, -2
- vmovddup xmm0, xmm2
- vmovddup xmm1, xmm3
+ vmovddup xmm2, xmm2
+ vmovddup xmm3, xmm3
;; size=21 bbWeight=0.50 PerfScore 1.88
G_M37632_IG15: ; bbWeight=4, gcrefRegs=0000 {}, byrefRegs=0006 {rcx rdx}, byref, isz
- vmovups xmm2, xmmword ptr [rcx+8*r8]
- vcmppd xmm3, xmm0, xmm2, 0
- vpternlogq xmm3, xmm1, xmm2, -54
- vmovups xmmword ptr [rdx+8*r8], xmm3
+ vmovups xmm0, xmmword ptr [rcx+8*r8]
+ vcmppd xmm1, xmm2, xmm0, 0
+ vpternlogq xmm1, xmm3, xmm0, -54
+ vmovups xmmword ptr [rdx+8*r8], xmm1
add r8, 2
cmp r8, rax
jb SHORT G_M37632_IG15
;; size=33 bbWeight=4 PerfScore 44.00
G_M37632_IG16: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0006 {rcx rdx}, byref
- vmovups xmm2, xmmword ptr [rcx+8*rax]
- vcmppd xmm0, xmm0, xmm2, 0
- vpternlogq xmm0, xmm1, xmm2, -54
- vmovups xmmword ptr [rdx+8*rax], xmm0
+ vmovups xmm0, xmmword ptr [rcx+8*rax]
+ vcmppd xmm1, xmm2, xmm0, 0
+ vpternlogq xmm1, xmm3, xmm0, -54
+ vmovups xmmword ptr [rdx+8*rax], xmm1
;; size=22 bbWeight=0.50 PerfScore 4.75
G_M37632_IG17: ; bbWeight=0.50, epilog, nogc, extend
vzeroupper
libraries_tests.run.windows.x64.Release.mch
-12 (-9.02%) : 464042.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:Log2DoubleTest(double,double,double):this (Tier0)
@@ -32,33 +32,30 @@ G_M32366_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mov gword ptr [rbp+0x10], rcx
;; size=42 bbWeight=1 PerfScore 9.00
G_M32366_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- vmovsd xmm0, qword ptr [rbp+0x18]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp+0x18]
vmovaps xmmword ptr [rbp-0x20], xmm0
lea rdx, [rbp-0x20]
lea rcx, [rbp-0x10]
call [<unknown method>]
- vmovsd xmm0, qword ptr [rbp+0x20]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp+0x20]
vmovaps xmmword ptr [rbp-0x30], xmm0
vmovaps xmm0, xmmword ptr [rbp-0x10]
vmovaps xmmword ptr [rbp-0x40], xmm0
- vmovsd xmm0, qword ptr [rbp+0x28]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp+0x28]
vmovaps xmmword ptr [rbp-0x50], xmm0
lea rcx, [rbp-0x30]
lea rdx, [rbp-0x40]
lea r8, [rbp-0x50]
call [<unknown method>]
nop
- ;; size=85 bbWeight=1 PerfScore 27.75
+ ;; size=73 bbWeight=1 PerfScore 24.75
G_M32366_IG03: ; bbWeight=1, epilog, nogc, extend
add rsp, 112
pop rbp
ret
;; size=6 bbWeight=1 PerfScore 1.75
-; Total bytes of code 133, prolog size 23, PerfScore 38.50, instruction count 33, allocated bytes for code 133 (MethodHash=d0478191) for method System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:Log2DoubleTest(double,double,double):this (Tier0)
+; Total bytes of code 121, prolog size 23, PerfScore 35.50, instruction count 30, allocated bytes for code 121 (MethodHash=d0478191) for method System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:Log2DoubleTest(double,double,double):this (Tier0)
; ============================================================
Unwind Info:
-12 (-9.02%) : 463763.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:ExpDoubleTest(double,double,double):this (Tier0)
@@ -32,33 +32,30 @@ G_M8981_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mov gword ptr [rbp+0x10], rcx
;; size=42 bbWeight=1 PerfScore 9.00
G_M8981_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- vmovsd xmm0, qword ptr [rbp+0x18]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp+0x18]
vmovaps xmmword ptr [rbp-0x20], xmm0
lea rdx, [rbp-0x20]
lea rcx, [rbp-0x10]
call [<unknown method>]
- vmovsd xmm0, qword ptr [rbp+0x20]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp+0x20]
vmovaps xmmword ptr [rbp-0x30], xmm0
vmovaps xmm0, xmmword ptr [rbp-0x10]
vmovaps xmmword ptr [rbp-0x40], xmm0
- vmovsd xmm0, qword ptr [rbp+0x28]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp+0x28]
vmovaps xmmword ptr [rbp-0x50], xmm0
lea rcx, [rbp-0x30]
lea rdx, [rbp-0x40]
lea r8, [rbp-0x50]
call [<unknown method>]
nop
- ;; size=85 bbWeight=1 PerfScore 27.75
+ ;; size=73 bbWeight=1 PerfScore 24.75
G_M8981_IG03: ; bbWeight=1, epilog, nogc, extend
add rsp, 112
pop rbp
ret
;; size=6 bbWeight=1 PerfScore 1.75
-; Total bytes of code 133, prolog size 23, PerfScore 38.50, instruction count 33, allocated bytes for code 133 (MethodHash=b03cdcea) for method System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:ExpDoubleTest(double,double,double):this (Tier0)
+; Total bytes of code 121, prolog size 23, PerfScore 35.50, instruction count 30, allocated bytes for code 121 (MethodHash=b03cdcea) for method System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:ExpDoubleTest(double,double,double):this (Tier0)
; ============================================================
Unwind Info:
-12 (-9.02%) : 464059.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:LogDoubleTest(double,double,double):this (Tier0)
@@ -32,33 +32,30 @@ G_M28892_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mov gword ptr [rbp+0x10], rcx
;; size=42 bbWeight=1 PerfScore 9.00
G_M28892_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- vmovsd xmm0, qword ptr [rbp+0x18]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp+0x18]
vmovaps xmmword ptr [rbp-0x20], xmm0
lea rdx, [rbp-0x20]
lea rcx, [rbp-0x10]
call [<unknown method>]
- vmovsd xmm0, qword ptr [rbp+0x20]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp+0x20]
vmovaps xmmword ptr [rbp-0x30], xmm0
vmovaps xmm0, xmmword ptr [rbp-0x10]
vmovaps xmmword ptr [rbp-0x40], xmm0
- vmovsd xmm0, qword ptr [rbp+0x28]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp+0x28]
vmovaps xmmword ptr [rbp-0x50], xmm0
lea rcx, [rbp-0x30]
lea rdx, [rbp-0x40]
lea r8, [rbp-0x50]
call [<unknown method>]
nop
- ;; size=85 bbWeight=1 PerfScore 27.75
+ ;; size=73 bbWeight=1 PerfScore 24.75
G_M28892_IG03: ; bbWeight=1, epilog, nogc, extend
add rsp, 112
pop rbp
ret
;; size=6 bbWeight=1 PerfScore 1.75
-; Total bytes of code 133, prolog size 23, PerfScore 38.50, instruction count 33, allocated bytes for code 133 (MethodHash=05418f23) for method System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:LogDoubleTest(double,double,double):this (Tier0)
+; Total bytes of code 121, prolog size 23, PerfScore 35.50, instruction count 30, allocated bytes for code 121 (MethodHash=05418f23) for method System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:LogDoubleTest(double,double,double):this (Tier0)
; ============================================================
Unwind Info:
-8 (-0.93%) : 405019.dasm - System.Numerics.Tensors.TensorPrimitives:g_VectorizedSmall8|2336[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,byref,double,byref,ulong) (Tier0)
@@ -155,8 +155,7 @@ G_M63258_IG06: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
;; size=101 bbWeight=1 PerfScore 33.00
G_M63258_IG07: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
; byrRegs -[rax]
- vmovsd xmm0, qword ptr [rbp+0x20]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp+0x20]
vmovaps xmmword ptr [rbp-0xA0], xmm0
mov rax, bword ptr [rbp+0x10]
; byrRegs +[rax]
@@ -199,7 +198,7 @@ G_M63258_IG07: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
vmovaps xmm0, xmmword ptr [rbp-0xC0]
vmovups xmmword ptr [rax+8*rcx-0x10], xmm0
jmp G_M63258_IG10
- ;; size=236 bbWeight=1 PerfScore 64.00
+ ;; size=232 bbWeight=1 PerfScore 63.00
G_M63258_IG08: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, isz
; byrRegs -[rax]
mov rax, bword ptr [rbp+0x10]
@@ -209,8 +208,7 @@ G_M63258_IG08: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mov rax, bword ptr [rbp+0x18]
vmovups xmm0, xmmword ptr [rax]
vmovaps xmmword ptr [rbp-0x290], xmm0
- vmovsd xmm0, qword ptr [rbp+0x20]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp+0x20]
vmovaps xmmword ptr [rbp-0x2A0], xmm0
lea rdx, [rbp-0x280]
lea r8, [rbp-0x290]
@@ -223,7 +221,7 @@ G_M63258_IG08: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
vmovaps xmm0, xmmword ptr [rbp-0xD0]
vmovups xmmword ptr [rax], xmm0
jmp SHORT G_M63258_IG10
- ;; size=101 bbWeight=1 PerfScore 30.00
+ ;; size=97 bbWeight=1 PerfScore 29.00
G_M63258_IG09: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
; byrRegs -[rax]
mov rax, bword ptr [rbp+0x10]
@@ -255,7 +253,7 @@ RWD00 dd G_M63258_IG10 - G_M63258_IG02
dd G_M63258_IG05 - G_M63258_IG02
-; Total bytes of code 862, prolog size 73, PerfScore 259.58, instruction count 159, allocated bytes for code 862 (MethodHash=938108e5) for method System.Numerics.Tensors.TensorPrimitives:<InvokeSpanSpanScalarIntoSpan>g__VectorizedSmall8|233_6[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,byref,double,byref,ulong) (Tier0)
+; Total bytes of code 854, prolog size 73, PerfScore 257.58, instruction count 157, allocated bytes for code 854 (MethodHash=938108e5) for method System.Numerics.Tensors.TensorPrimitives:<InvokeSpanSpanScalarIntoSpan>g__VectorizedSmall8|233_6[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,byref,double,byref,ulong) (Tier0)
; ============================================================
Unwind Info:
-8 (-0.93%) : 404156.dasm - System.Numerics.Tensors.TensorPrimitives:g_VectorizedSmall8|2346[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,double,byref,byref,ulong) (Tier0)
@@ -155,8 +155,7 @@ G_M55933_IG06: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
;; size=101 bbWeight=1 PerfScore 33.00
G_M55933_IG07: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
; byrRegs -[rax]
- vmovsd xmm0, qword ptr [rbp+0x18]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp+0x18]
vmovaps xmmword ptr [rbp-0xA0], xmm0
mov rax, bword ptr [rbp+0x10]
; byrRegs +[rax]
@@ -199,15 +198,14 @@ G_M55933_IG07: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
vmovaps xmm0, xmmword ptr [rbp-0xC0]
vmovups xmmword ptr [rax+8*rcx-0x10], xmm0
jmp G_M55933_IG10
- ;; size=236 bbWeight=1 PerfScore 64.00
+ ;; size=232 bbWeight=1 PerfScore 63.00
G_M55933_IG08: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, isz
; byrRegs -[rax]
mov rax, bword ptr [rbp+0x10]
; byrRegs +[rax]
vmovups xmm0, xmmword ptr [rax]
vmovaps xmmword ptr [rbp-0x280], xmm0
- vmovsd xmm0, qword ptr [rbp+0x18]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp+0x18]
vmovaps xmmword ptr [rbp-0x290], xmm0
mov rax, bword ptr [rbp+0x20]
vmovups xmm0, xmmword ptr [rax]
@@ -223,7 +221,7 @@ G_M55933_IG08: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
vmovaps xmm0, xmmword ptr [rbp-0xD0]
vmovups xmmword ptr [rax], xmm0
jmp SHORT G_M55933_IG10
- ;; size=101 bbWeight=1 PerfScore 30.00
+ ;; size=97 bbWeight=1 PerfScore 29.00
G_M55933_IG09: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
; byrRegs -[rax]
mov rax, bword ptr [rbp+0x10]
@@ -255,7 +253,7 @@ RWD00 dd G_M55933_IG10 - G_M55933_IG02
dd G_M55933_IG05 - G_M55933_IG02
-; Total bytes of code 862, prolog size 73, PerfScore 259.58, instruction count 159, allocated bytes for code 862 (MethodHash=bcce2582) for method System.Numerics.Tensors.TensorPrimitives:<InvokeSpanScalarSpanIntoSpan>g__VectorizedSmall8|234_6[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,double,byref,byref,ulong) (Tier0)
+; Total bytes of code 854, prolog size 73, PerfScore 257.58, instruction count 157, allocated bytes for code 854 (MethodHash=bcce2582) for method System.Numerics.Tensors.TensorPrimitives:<InvokeSpanScalarSpanIntoSpan>g__VectorizedSmall8|234_6[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,double,byref,byref,ulong) (Tier0)
; ============================================================
Unwind Info:
-8 (-0.85%) : 405014.dasm - System.Numerics.Tensors.TensorPrimitives:g_VectorizedSmall8|2316double,System.Numerics.Tensors.TensorPrimitives+IdentityOperator1[double],System.Numerics.Tensors.TensorPrimitives+MultiplyOperator
1[double] (Tier0)
@@ -163,8 +163,7 @@ G_M51506_IG06: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
;; size=114 bbWeight=1 PerfScore 34.50
G_M51506_IG07: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
; byrRegs -[rax]
- vmovsd xmm0, qword ptr [rbp+0x18]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp+0x18]
vmovaps xmmword ptr [rbp-0xA0], xmm0
mov rax, bword ptr [rbp+0x10]
; byrRegs +[rax]
@@ -208,7 +207,7 @@ G_M51506_IG07: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
vmovaps xmm0, xmmword ptr [rbp-0xC0]
vmovups xmmword ptr [rax+8*rcx-0x10], xmm0
jmp G_M51506_IG10
- ;; size=256 bbWeight=1 PerfScore 66.00
+ ;; size=252 bbWeight=1 PerfScore 65.00
G_M51506_IG08: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, isz
; byrRegs -[rax]
mov rax, bword ptr [rbp+0x10]
@@ -221,8 +220,7 @@ G_M51506_IG08: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
; byrRegs -[rax]
vmovaps xmm0, xmmword ptr [rbp-0x180]
vmovaps xmmword ptr [rbp-0x330], xmm0
- vmovsd xmm0, qword ptr [rbp+0x18]
- vmovddup xmm0, xmm0
+ vmovddup xmm0, qword ptr [rbp+0x18]
vmovaps xmmword ptr [rbp-0x340], xmm0
lea rdx, [rbp-0x330]
lea r8, [rbp-0x340]
@@ -233,7 +231,7 @@ G_M51506_IG08: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
vmovaps xmm0, xmmword ptr [rbp-0xD0]
vmovups xmmword ptr [rax], xmm0
jmp SHORT G_M51506_IG10
- ;; size=114 bbWeight=1 PerfScore 31.50
+ ;; size=110 bbWeight=1 PerfScore 30.50
G_M51506_IG09: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
; byrRegs -[rax]
mov rax, bword ptr [rbp+0x10]
@@ -266,7 +264,7 @@ RWD00 dd G_M51506_IG10 - G_M51506_IG02
dd G_M51506_IG05 - G_M51506_IG02
-; Total bytes of code 942, prolog size 73, PerfScore 268.58, instruction count 164, allocated bytes for code 942 (MethodHash=8b4136cd) for method System.Numerics.Tensors.TensorPrimitives:<InvokeSpanScalarIntoSpan>g__VectorizedSmall8|231_6[double,System.Numerics.Tensors.TensorPrimitives+IdentityOperator`1[double],System.Numerics.Tensors.TensorPrimitives+MultiplyOperator`1[double]](byref,double,byref,ulong) (Tier0)
+; Total bytes of code 934, prolog size 73, PerfScore 266.58, instruction count 162, allocated bytes for code 934 (MethodHash=8b4136cd) for method System.Numerics.Tensors.TensorPrimitives:<InvokeSpanScalarIntoSpan>g__VectorizedSmall8|231_6[double,System.Numerics.Tensors.TensorPrimitives+IdentityOperator`1[double],System.Numerics.Tensors.TensorPrimitives+MultiplyOperator`1[double]](byref,double,byref,ulong) (Tier0)
; ============================================================
Unwind Info:
librariestestsnotieredcompilation.run.windows.x64.Release.mch
-4 (-1.23%) : 169036.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+IdentityOperator1[double],System.Numerics.Tensors.TensorPrimitives+InvertedBinaryOperator
2[System.Numerics.Tensors.TensorPrimitives+DivideOperator1[double],double]](System.ReadOnlySpan
1[double],double,System.Span`1[double]) (FullOpts)
@@ -200,12 +200,11 @@ G_M18492_IG09: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0048 {rbx rs
jmp SHORT G_M18492_IG12
;; size=28 bbWeight=0.50 PerfScore 18.50
G_M18492_IG10: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0048 {rbx rsi}, byref, isz
- vmovaps xmm0, xmm6
- vmovddup xmm0, xmm0
+ vmovddup xmm0, xmm6
vdivpd xmm0, xmm0, xmmword ptr [rsi]
vmovups xmmword ptr [rbx], xmm0
jmp SHORT G_M18492_IG12
- ;; size=18 bbWeight=0.50 PerfScore 10.12
+ ;; size=14 bbWeight=0.50 PerfScore 10.00
G_M18492_IG11: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0048 {rbx rsi}, byref
vdivsd xmm0, xmm6, qword ptr [rsi]
vmovsd qword ptr [rbx], xmm0
@@ -248,7 +247,7 @@ RWD00 dd G_M18492_IG12 - G_M18492_IG02
dd G_M18492_IG07 - G_M18492_IG02
-; Total bytes of code 324, prolog size 26, PerfScore 107.21, instruction count 83, allocated bytes for code 324 (MethodHash=b5efb7c3) for method System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+IdentityOperator`1[double],System.Numerics.Tensors.TensorPrimitives+InvertedBinaryOperator`2[System.Numerics.Tensors.TensorPrimitives+DivideOperator`1[double],double]](System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
+; Total bytes of code 320, prolog size 26, PerfScore 107.08, instruction count 82, allocated bytes for code 320 (MethodHash=b5efb7c3) for method System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+IdentityOperator`1[double],System.Numerics.Tensors.TensorPrimitives+InvertedBinaryOperator`2[System.Numerics.Tensors.TensorPrimitives+DivideOperator`1[double],double]](System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
; ============================================================
Unwind Info:
-4 (-1.12%) : 168032.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarIntoSpandouble,System.Numerics.Tensors.TensorPrimitives+IdentityOperator1[double],System.Numerics.Tensors.TensorPrimitives+SubtractOperator
1[double] (FullOpts)
@@ -206,12 +206,11 @@ G_M6707_IG09: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0048 {rbx rsi
;; size=36 bbWeight=0.50 PerfScore 10.50
G_M6707_IG10: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0048 {rbx rsi}, byref, isz
vmovups xmm0, xmmword ptr [rsi]
- vmovaps xmm1, xmm6
- vmovddup xmm1, xmm1
+ vmovddup xmm1, xmm6
vsubpd xmm0, xmm0, xmm1
vmovups xmmword ptr [rbx], xmm0
jmp SHORT G_M6707_IG12
- ;; size=22 bbWeight=0.50 PerfScore 6.12
+ ;; size=18 bbWeight=0.50 PerfScore 6.00
G_M6707_IG11: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0048 {rbx rsi}, byref
vmovsd xmm0, qword ptr [rsi]
vsubsd xmm0, xmm0, xmm6
@@ -255,7 +254,7 @@ RWD00 dd G_M6707_IG12 - G_M6707_IG02
dd G_M6707_IG07 - G_M6707_IG02
-; Total bytes of code 356, prolog size 26, PerfScore 80.71, instruction count 90, allocated bytes for code 356 (MethodHash=89a5e5cc) for method System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+IdentityOperator`1[double],System.Numerics.Tensors.TensorPrimitives+SubtractOperator`1[double]](System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
+; Total bytes of code 352, prolog size 26, PerfScore 80.58, instruction count 89, allocated bytes for code 352 (MethodHash=89a5e5cc) for method System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+IdentityOperator`1[double],System.Numerics.Tensors.TensorPrimitives+SubtractOperator`1[double]](System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
; ============================================================
Unwind Info:
-4 (-0.85%) : 168066.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarSpanIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+MultiplyAddOperator1[double]](System.ReadOnlySpan
1[double],double,System.ReadOnlySpan1[double],System.Span
1[double]) (FullOpts)
@@ -256,13 +256,12 @@ G_M64136_IG12: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=4088 {rbx rd
jmp SHORT G_M64136_IG15
;; size=38 bbWeight=0.50 PerfScore 15.50
G_M64136_IG13: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=4088 {rbx rdi r14}, byref, isz
- vmovaps xmm0, xmm6
- vmovddup xmm0, xmm0
+ vmovddup xmm0, xmm6
vmulpd xmm0, xmm0, xmmword ptr [r14]
vaddpd xmm0, xmm0, xmmword ptr [rdi]
vmovups xmmword ptr [rbx], xmm0
jmp SHORT G_M64136_IG15
- ;; size=23 bbWeight=0.50 PerfScore 8.62
+ ;; size=19 bbWeight=0.50 PerfScore 8.50
G_M64136_IG14: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=4088 {rbx rdi r14}, byref
vmulsd xmm0, xmm6, qword ptr [r14]
vaddsd xmm0, xmm0, qword ptr [rdi]
@@ -314,7 +313,7 @@ RWD00 dd G_M64136_IG15 - G_M64136_IG02
dd G_M64136_IG10 - G_M64136_IG02
-; Total bytes of code 472, prolog size 31, PerfScore 112.33, instruction count 126, allocated bytes for code 472 (MethodHash=9daf0577) for method System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarSpanIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+MultiplyAddOperator`1[double]](System.ReadOnlySpan`1[double],double,System.ReadOnlySpan`1[double],System.Span`1[double]) (FullOpts)
+; Total bytes of code 468, prolog size 31, PerfScore 112.21, instruction count 125, allocated bytes for code 468 (MethodHash=9daf0577) for method System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarSpanIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+MultiplyAddOperator`1[double]](System.ReadOnlySpan`1[double],double,System.ReadOnlySpan`1[double],System.Span`1[double]) (FullOpts)
; ============================================================
Unwind Info:
-4 (-0.85%) : 169035.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarSpanIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+MultiplyAddOperator1[double]](System.ReadOnlySpan
1[double],double,System.ReadOnlySpan1[double],System.Span
1[double]) (FullOpts)
@@ -256,13 +256,12 @@ G_M64136_IG12: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=4088 {rbx rd
jmp SHORT G_M64136_IG15
;; size=38 bbWeight=0.50 PerfScore 15.50
G_M64136_IG13: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=4088 {rbx rdi r14}, byref, isz
- vmovaps xmm0, xmm6
- vmovddup xmm0, xmm0
+ vmovddup xmm0, xmm6
vmulpd xmm0, xmm0, xmmword ptr [r14]
vaddpd xmm0, xmm0, xmmword ptr [rdi]
vmovups xmmword ptr [rbx], xmm0
jmp SHORT G_M64136_IG15
- ;; size=23 bbWeight=0.50 PerfScore 8.62
+ ;; size=19 bbWeight=0.50 PerfScore 8.50
G_M64136_IG14: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=4088 {rbx rdi r14}, byref
vmulsd xmm0, xmm6, qword ptr [r14]
vaddsd xmm0, xmm0, qword ptr [rdi]
@@ -314,7 +313,7 @@ RWD00 dd G_M64136_IG15 - G_M64136_IG02
dd G_M64136_IG10 - G_M64136_IG02
-; Total bytes of code 472, prolog size 31, PerfScore 112.33, instruction count 126, allocated bytes for code 472 (MethodHash=9daf0577) for method System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarSpanIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+MultiplyAddOperator`1[double]](System.ReadOnlySpan`1[double],double,System.ReadOnlySpan`1[double],System.Span`1[double]) (FullOpts)
+; Total bytes of code 468, prolog size 31, PerfScore 112.21, instruction count 125, allocated bytes for code 468 (MethodHash=9daf0577) for method System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarSpanIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+MultiplyAddOperator`1[double]](System.ReadOnlySpan`1[double],double,System.ReadOnlySpan`1[double],System.Span`1[double]) (FullOpts)
; ============================================================
Unwind Info:
-4 (-0.80%) : 168047.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator1[double]](System.ReadOnlySpan
1[double],System.ReadOnlySpan1[double],double,System.Span
1[double]) (FullOpts)
@@ -263,12 +263,11 @@ G_M2600_IG12: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=4088 {rbx rdi
G_M2600_IG13: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=4088 {rbx rdi r14}, byref, isz
vmovups xmm0, xmmword ptr [r14]
vaddpd xmm0, xmm0, xmmword ptr [rdi]
- vmovaps xmm1, xmm6
- vmovddup xmm1, xmm1
+ vmovddup xmm1, xmm6
vmulpd xmm0, xmm0, xmm1
vmovups xmmword ptr [rbx], xmm0
jmp SHORT G_M2600_IG15
- ;; size=27 bbWeight=0.50 PerfScore 9.12
+ ;; size=23 bbWeight=0.50 PerfScore 9.00
G_M2600_IG14: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=4088 {rbx rdi r14}, byref
vmovsd xmm0, qword ptr [r14]
vaddsd xmm0, xmm0, qword ptr [rdi]
@@ -321,7 +320,7 @@ RWD00 dd G_M2600_IG15 - G_M2600_IG02
dd G_M2600_IG10 - G_M2600_IG02
-; Total bytes of code 499, prolog size 31, PerfScore 117.33, instruction count 133, allocated bytes for code 499 (MethodHash=0ed2f5d7) for method System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](System.ReadOnlySpan`1[double],System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
+; Total bytes of code 495, prolog size 31, PerfScore 117.21, instruction count 132, allocated bytes for code 495 (MethodHash=0ed2f5d7) for method System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](System.ReadOnlySpan`1[double],System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
; ============================================================
Unwind Info:
-4 (-0.80%) : 169040.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator1[double]](System.ReadOnlySpan
1[double],System.ReadOnlySpan1[double],double,System.Span
1[double]) (FullOpts)
@@ -263,12 +263,11 @@ G_M2600_IG12: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=4088 {rbx rdi
G_M2600_IG13: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=4088 {rbx rdi r14}, byref, isz
vmovups xmm0, xmmword ptr [r14]
vaddpd xmm0, xmm0, xmmword ptr [rdi]
- vmovaps xmm1, xmm6
- vmovddup xmm1, xmm1
+ vmovddup xmm1, xmm6
vmulpd xmm0, xmm0, xmm1
vmovups xmmword ptr [rbx], xmm0
jmp SHORT G_M2600_IG15
- ;; size=27 bbWeight=0.50 PerfScore 9.12
+ ;; size=23 bbWeight=0.50 PerfScore 9.00
G_M2600_IG14: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=4088 {rbx rdi r14}, byref
vmovsd xmm0, qword ptr [r14]
vaddsd xmm0, xmm0, qword ptr [rdi]
@@ -321,7 +320,7 @@ RWD00 dd G_M2600_IG15 - G_M2600_IG02
dd G_M2600_IG10 - G_M2600_IG02
-; Total bytes of code 499, prolog size 31, PerfScore 117.33, instruction count 133, allocated bytes for code 499 (MethodHash=0ed2f5d7) for method System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](System.ReadOnlySpan`1[double],System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
+; Total bytes of code 495, prolog size 31, PerfScore 117.21, instruction count 132, allocated bytes for code 495 (MethodHash=0ed2f5d7) for method System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](System.ReadOnlySpan`1[double],System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
; ============================================================
Unwind Info:
Details
Improvements/regressions per collection
Collection |
Contexts with diffs |
Improvements |
Regressions |
Same size |
Improvements (bytes) |
Regressions (bytes) |
aspnet.run.windows.x64.checked.mch |
0 |
0 |
0 |
0 |
-0 |
+0 |
benchmarks.run.windows.x64.checked.mch |
0 |
0 |
0 |
0 |
-0 |
+0 |
benchmarks.run_pgo.windows.x64.checked.mch |
0 |
0 |
0 |
0 |
-0 |
+0 |
benchmarks.run_tiered.windows.x64.checked.mch |
0 |
0 |
0 |
0 |
-0 |
+0 |
coreclr_tests.run.windows.x64.checked.mch |
84 |
77 |
7 |
0 |
-360 |
+28 |
libraries.crossgen2.windows.x64.checked.mch |
1 |
0 |
0 |
1 |
-0 |
+0 |
libraries.pmi.windows.x64.checked.mch |
8 |
7 |
0 |
1 |
-28 |
+0 |
libraries_tests.run.windows.x64.Release.mch |
8 |
8 |
0 |
0 |
-84 |
+0 |
librariestestsnotieredcompilation.run.windows.x64.Release.mch |
6 |
6 |
0 |
0 |
-24 |
+0 |
realworld.run.windows.x64.checked.mch |
0 |
0 |
0 |
0 |
-0 |
+0 |
smoke_tests.nativeaot.windows.x64.checked.mch |
0 |
0 |
0 |
0 |
-0 |
+0 |
|
107 |
98 |
7 |
2 |
-496 |
+28 |
Context information
Collection |
Diffed contexts |
MinOpts |
FullOpts |
Missed, base |
Missed, diff |
aspnet.run.windows.x64.checked.mch |
129,290 |
61,702 |
67,588 |
0 (0.00%) |
0 (0.00%) |
benchmarks.run.windows.x64.checked.mch |
27,926 |
4 |
27,922 |
0 (0.00%) |
0 (0.00%) |
benchmarks.run_pgo.windows.x64.checked.mch |
103,183 |
50,327 |
52,856 |
0 (0.00%) |
0 (0.00%) |
benchmarks.run_tiered.windows.x64.checked.mch |
53,511 |
36,872 |
16,639 |
0 (0.00%) |
0 (0.00%) |
coreclr_tests.run.windows.x64.checked.mch |
574,384 |
341,481 |
232,903 |
1 (0.00%) |
1 (0.00%) |
libraries.crossgen2.windows.x64.checked.mch |
243,730 |
15 |
243,715 |
1 (0.00%) |
1 (0.00%) |
libraries.pmi.windows.x64.checked.mch |
309,227 |
6 |
309,221 |
1 (0.00%) |
1 (0.00%) |
libraries_tests.run.windows.x64.Release.mch |
681,424 |
485,046 |
196,378 |
0 (0.00%) |
0 (0.00%) |
librariestestsnotieredcompilation.run.windows.x64.Release.mch |
320,536 |
21,924 |
298,612 |
0 (0.00%) |
0 (0.00%) |
realworld.run.windows.x64.checked.mch |
36,610 |
3 |
36,607 |
0 (0.00%) |
0 (0.00%) |
smoke_tests.nativeaot.windows.x64.checked.mch |
32,388 |
11 |
32,377 |
0 (0.00%) |
0 (0.00%) |
|
2,512,209 |
997,391 |
1,514,818 |
3 (0.00%) |
3 (0.00%) |
jit-analyze output
coreclr_tests.run.windows.x64.checked.mch
To reproduce these diffs on Windows x64:
superpmi.py asmdiffs -target_os windows -target_arch x64 -arch x64
Summary of Code Size diffs:
(Lower is better)
Total bytes of base: 393207052 (overridden on cmd)
Total bytes of diff: 393206720 (overridden on cmd)
Total bytes of delta: -332 (-0.00 % of base)
diff is an improvement.
relative diff is an improvement.
Detail diffs
Top file regressions (bytes):
4 : 121601.dasm (1.50 % of base)
4 : 142132.dasm (2.21 % of base)
4 : 121597.dasm (1.50 % of base)
4 : 121604.dasm (1.44 % of base)
4 : 121600.dasm (1.50 % of base)
4 : 437518.dasm (1.02 % of base)
4 : 437520.dasm (1.02 % of base)
Top file improvements (bytes):
-40 : 37114.dasm (-2.73 % of base)
-40 : 37118.dasm (-2.73 % of base)
-40 : 37119.dasm (-0.83 % of base)
-4 : 114873.dasm (-1.30 % of base)
-4 : 132997.dasm (-1.98 % of base)
-4 : 145616.dasm (-2.30 % of base)
-4 : 145619.dasm (-2.30 % of base)
-4 : 167768.dasm (-0.17 % of base)
-4 : 167771.dasm (-0.17 % of base)
-4 : 183801.dasm (-0.49 % of base)
-4 : 430189.dasm (-1.31 % of base)
-4 : 43494.dasm (-2.67 % of base)
-4 : 111733.dasm (-8.33 % of base)
-4 : 114866.dasm (-8.33 % of base)
-4 : 132983.dasm (-1.86 % of base)
-4 : 424826.dasm (-15.38 % of base)
-4 : 425765.dasm (-16.67 % of base)
-4 : 430205.dasm (-1.31 % of base)
-4 : 432533.dasm (-0.85 % of base)
-4 : 111753.dasm (-8.33 % of base)
64 total files with Code Size differences (57 improved, 7 regressed), 0 unchanged.
Top method regressions (bytes):
4 (1.50 % of base) : 121600.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunBasicScenario_Load():this (MinOpts)
4 (1.50 % of base) : 121601.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunBasicScenario_LoadAligned():this (MinOpts)
4 (1.50 % of base) : 121597.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunBasicScenario_UnsafeRead():this (MinOpts)
4 (1.44 % of base) : 121604.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunLclVarScenario_UnsafeRead():this (MinOpts)
4 (2.21 % of base) : 142132.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunLclVarScenario_UnsafeRead():this (Tier0)
4 (1.02 % of base) : 437520.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunStructFldScenario():this (FullOpts)
4 (1.02 % of base) : 437518.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunStructLclFldScenario():this (FullOpts)
Top method improvements (bytes):
-40 (-2.73 % of base) : 37114.dasm - JIT.HardwareIntrinsics.General._Vector128_1.VectorAs__AsDouble:RunBasicScenario():this (MinOpts)
-40 (-2.73 % of base) : 37118.dasm - JIT.HardwareIntrinsics.General._Vector128_1.VectorAs__AsDouble:RunGenericScenario():this (MinOpts)
-40 (-0.83 % of base) : 37119.dasm - JIT.HardwareIntrinsics.General._Vector128_1.VectorAs__AsDouble:RunReflectionScenario():this (MinOpts)
-4 (-0.49 % of base) : 183801.dasm - IntelHardwareIntrinsicTest._Sse3.Program:MoveAndDuplicate() (Instrumented Tier0)
-4 (-0.17 % of base) : 167768.dasm - IntelHardwareIntrinsicTest.General.Program:CreateVector128[byte](byte):System.Runtime.Intrinsics.Vector128`1[byte] (MinOpts)
-4 (-0.17 % of base) : 167762.dasm - IntelHardwareIntrinsicTest.General.Program:CreateVector128[double](double):System.Runtime.Intrinsics.Vector128`1[double] (MinOpts)
-4 (-0.17 % of base) : 167757.dasm - IntelHardwareIntrinsicTest.General.Program:CreateVector128[float](float):System.Runtime.Intrinsics.Vector128`1[float] (MinOpts)
-4 (-0.17 % of base) : 167777.dasm - IntelHardwareIntrinsicTest.General.Program:CreateVector128[int](int):System.Runtime.Intrinsics.Vector128`1[int] (MinOpts)
-4 (-0.17 % of base) : 167783.dasm - IntelHardwareIntrinsicTest.General.Program:CreateVector128[long](long):System.Runtime.Intrinsics.Vector128`1[long] (MinOpts)
-4 (-0.17 % of base) : 167771.dasm - IntelHardwareIntrinsicTest.General.Program:CreateVector128[short](short):System.Runtime.Intrinsics.Vector128`1[short] (MinOpts)
-4 (-0.17 % of base) : 167765.dasm - IntelHardwareIntrinsicTest.General.Program:CreateVector128[ubyte](ubyte):System.Runtime.Intrinsics.Vector128`1[ubyte] (MinOpts)
-4 (-0.17 % of base) : 167780.dasm - IntelHardwareIntrinsicTest.General.Program:CreateVector128[uint](uint):System.Runtime.Intrinsics.Vector128`1[uint] (MinOpts)
-4 (-0.17 % of base) : 167786.dasm - IntelHardwareIntrinsicTest.General.Program:CreateVector128[ulong](ulong):System.Runtime.Intrinsics.Vector128`1[ulong] (MinOpts)
-4 (-0.17 % of base) : 167774.dasm - IntelHardwareIntrinsicTest.General.Program:CreateVector128[ushort](ushort):System.Runtime.Intrinsics.Vector128`1[ushort] (MinOpts)
-4 (-2.67 % of base) : 43494.dasm - JIT.HardwareIntrinsics.General._Vector128.VectorCreate__CreateDouble:RunBasicScenario():this (MinOpts)
-4 (-3.54 % of base) : 81097.dasm - JIT.HardwareIntrinsics.General._Vector128.VectorCreate__CreateDouble:RunBasicScenario():this (Tier0)
-4 (-1.41 % of base) : 37238.dasm - JIT.HardwareIntrinsics.General._Vector128_1.VectorAs__AsVectorDouble:RunBasicScenario():this (MinOpts)
-4 (-1.31 % of base) : 430205.dasm - JIT.HardwareIntrinsics.X86._Avx512F.handwritten.SimpleUnaryOpTest__ConvertToVector512DoubleInt32:RunBasicScenario_Load():this (FullOpts)
-4 (-1.98 % of base) : 132997.dasm - JIT.HardwareIntrinsics.X86._Avx512F.handwritten.SimpleUnaryOpTest__ConvertToVector512DoubleInt32:RunBasicScenario_Load():this (Tier0)
-4 (-1.98 % of base) : 132994.dasm - JIT.HardwareIntrinsics.X86._Avx512F.handwritten.SimpleUnaryOpTest__ConvertToVector512DoubleInt32:RunBasicScenario_UnsafeRead():this (Tier0)
Top method regressions (percentages):
4 (2.21 % of base) : 142132.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunLclVarScenario_UnsafeRead():this (Tier0)
4 (1.50 % of base) : 121600.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunBasicScenario_Load():this (MinOpts)
4 (1.50 % of base) : 121601.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunBasicScenario_LoadAligned():this (MinOpts)
4 (1.50 % of base) : 121597.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunBasicScenario_UnsafeRead():this (MinOpts)
4 (1.44 % of base) : 121604.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunLclVarScenario_UnsafeRead():this (MinOpts)
4 (1.02 % of base) : 437520.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunStructFldScenario():this (FullOpts)
4 (1.02 % of base) : 437518.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector128.SimpleUnaryOpConvTest__ConvertToVector128DoubleVector128UInt32:RunStructLclFldScenario():this (FullOpts)
Top method improvements (percentages):
-4 (-16.67 % of base) : 425765.dasm - System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector256Double(System.Runtime.Intrinsics.Vector128`1[uint]):System.Runtime.Intrinsics.Vector256`1[double] (FullOpts)
-4 (-15.38 % of base) : 424511.dasm - System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector256`1[float]):System.Runtime.Intrinsics.Vector512`1[double] (FullOpts)
-4 (-15.38 % of base) : 424513.dasm - System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector256`1[int]):System.Runtime.Intrinsics.Vector512`1[double] (FullOpts)
-4 (-15.38 % of base) : 424826.dasm - System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector256`1[uint]):System.Runtime.Intrinsics.Vector512`1[double] (FullOpts)
-4 (-8.70 % of base) : 125331.dasm - System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector256Double(System.Runtime.Intrinsics.Vector128`1[uint]):System.Runtime.Intrinsics.Vector256`1[double] (Tier0)
-4 (-8.33 % of base) : 111733.dasm - System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector256`1[float]):System.Runtime.Intrinsics.Vector512`1[double] (Tier0)
-4 (-8.33 % of base) : 111753.dasm - System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector256`1[int]):System.Runtime.Intrinsics.Vector512`1[double] (Tier0)
-4 (-8.33 % of base) : 114866.dasm - System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector256`1[uint]):System.Runtime.Intrinsics.Vector512`1[double] (Tier0)
-2 (-3.92 % of base) : 281485.dasm - Runtime_90508:Test1(System.Runtime.Intrinsics.Vector128`1[double],double):System.Runtime.Intrinsics.Vector128`1[double] (Tier0)
-4 (-3.54 % of base) : 81097.dasm - JIT.HardwareIntrinsics.General._Vector128.VectorCreate__CreateDouble:RunBasicScenario():this (Tier0)
-40 (-2.73 % of base) : 37114.dasm - JIT.HardwareIntrinsics.General._Vector128_1.VectorAs__AsDouble:RunBasicScenario():this (MinOpts)
-40 (-2.73 % of base) : 37118.dasm - JIT.HardwareIntrinsics.General._Vector128_1.VectorAs__AsDouble:RunGenericScenario():this (MinOpts)
-4 (-2.67 % of base) : 43494.dasm - JIT.HardwareIntrinsics.General._Vector128.VectorCreate__CreateDouble:RunBasicScenario():this (MinOpts)
-4 (-2.30 % of base) : 145616.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector256.SimpleUnaryOpConvTest__ConvertToVector256DoubleVector128UInt32:RunStructLclFldScenario():this (Tier0)
-4 (-2.30 % of base) : 145619.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector256.SimpleUnaryOpConvTest__ConvertToVector256DoubleVector128UInt32+TestStruct:RunStructFldScenario(JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector256.SimpleUnaryOpConvTest__ConvertToVector256DoubleVector128UInt32):this (Tier0)
-4 (-2.08 % of base) : 145610.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector256.SimpleUnaryOpConvTest__ConvertToVector256DoubleVector128UInt32:RunBasicScenario_Load():this (Tier0)
-4 (-2.08 % of base) : 145607.dasm - JIT.HardwareIntrinsics.X86._Avx512F_VL_Vector256.SimpleUnaryOpConvTest__ConvertToVector256DoubleVector128UInt32:RunBasicScenario_UnsafeRead():this (Tier0)
-4 (-1.98 % of base) : 132997.dasm - JIT.HardwareIntrinsics.X86._Avx512F.handwritten.SimpleUnaryOpTest__ConvertToVector512DoubleInt32:RunBasicScenario_Load():this (Tier0)
-4 (-1.98 % of base) : 132994.dasm - JIT.HardwareIntrinsics.X86._Avx512F.handwritten.SimpleUnaryOpTest__ConvertToVector512DoubleInt32:RunBasicScenario_UnsafeRead():this (Tier0)
-4 (-1.98 % of base) : 132978.dasm - JIT.HardwareIntrinsics.X86._Avx512F.handwritten.SimpleUnaryOpTest__ConvertToVector512DoubleSingle:RunBasicScenario_Load():this (Tier0)
libraries.crossgen2.windows.x64.checked.mch
To reproduce these diffs on Windows x64:
superpmi.py asmdiffs -target_os windows -target_arch x64 -arch x64
Summary of Code Size diffs:
(Lower is better)
Total bytes of base: 39486563 (overridden on cmd)
Total bytes of diff: 39486563 (overridden on cmd)
Total bytes of delta: 0 (0.00 % of base)
Detail diffs
0 total files with Code Size differences (0 improved, 0 regressed), 1 unchanged.
0 total methods with Code Size differences (0 improved, 0 regressed).
libraries.pmi.windows.x64.checked.mch
To reproduce these diffs on Windows x64:
superpmi.py asmdiffs -target_os windows -target_arch x64 -arch x64
Summary of Code Size diffs:
(Lower is better)
Total bytes of base: 61663491 (overridden on cmd)
Total bytes of diff: 61663463 (overridden on cmd)
Total bytes of delta: -28 (-0.00 % of base)
diff is an improvement.
relative diff is an improvement.
Detail diffs
Top file improvements (bytes):
-6 : 291597.dasm (-6.12 % of base)
-4 : 25362.dasm (-1.56 % of base)
-4 : 31897.dasm (-16.67 % of base)
-4 : 31461.dasm (-15.38 % of base)
-4 : 31463.dasm (-15.38 % of base)
-4 : 31462.dasm (-15.38 % of base)
-2 : 291601.dasm (-2.30 % of base)
7 total files with Code Size differences (7 improved, 0 regressed), 1 unchanged.
Top method improvements (bytes):
-6 (-6.12 % of base) : 291597.dasm - System.Numerics.Tensors.TensorPrimitives+DegreesToRadiansOperator`1[double]:Invoke(System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
-4 (-15.38 % of base) : 31462.dasm - System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector256`1[float]):System.Runtime.Intrinsics.Vector512`1[double] (FullOpts)
-4 (-15.38 % of base) : 31461.dasm - System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector256`1[int]):System.Runtime.Intrinsics.Vector512`1[double] (FullOpts)
-4 (-15.38 % of base) : 31463.dasm - System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector256`1[uint]):System.Runtime.Intrinsics.Vector512`1[double] (FullOpts)
-4 (-16.67 % of base) : 31897.dasm - System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector256Double(System.Runtime.Intrinsics.Vector128`1[uint]):System.Runtime.Intrinsics.Vector256`1[double] (FullOpts)
-4 (-1.56 % of base) : 25362.dasm - System.SpanHelpers:CountValueType[double](byref,double,int):int (FullOpts)
-2 (-2.30 % of base) : 291601.dasm - System.Numerics.Tensors.TensorPrimitives+RadiansToDegreesOperator`1[double]:Invoke(System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
Top method improvements (percentages):
-4 (-16.67 % of base) : 31897.dasm - System.Runtime.Intrinsics.X86.Avx512F+VL:ConvertToVector256Double(System.Runtime.Intrinsics.Vector128`1[uint]):System.Runtime.Intrinsics.Vector256`1[double] (FullOpts)
-4 (-15.38 % of base) : 31462.dasm - System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector256`1[float]):System.Runtime.Intrinsics.Vector512`1[double] (FullOpts)
-4 (-15.38 % of base) : 31461.dasm - System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector256`1[int]):System.Runtime.Intrinsics.Vector512`1[double] (FullOpts)
-4 (-15.38 % of base) : 31463.dasm - System.Runtime.Intrinsics.X86.Avx512F:ConvertToVector512Double(System.Runtime.Intrinsics.Vector256`1[uint]):System.Runtime.Intrinsics.Vector512`1[double] (FullOpts)
-6 (-6.12 % of base) : 291597.dasm - System.Numerics.Tensors.TensorPrimitives+DegreesToRadiansOperator`1[double]:Invoke(System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
-2 (-2.30 % of base) : 291601.dasm - System.Numerics.Tensors.TensorPrimitives+RadiansToDegreesOperator`1[double]:Invoke(System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector128`1[double] (FullOpts)
-4 (-1.56 % of base) : 25362.dasm - System.SpanHelpers:CountValueType[double](byref,double,int):int (FullOpts)
7 total methods with Code Size differences (7 improved, 0 regressed).
libraries_tests.run.windows.x64.Release.mch
To reproduce these diffs on Windows x64:
superpmi.py asmdiffs -target_os windows -target_arch x64 -arch x64
Summary of Code Size diffs:
(Lower is better)
Total bytes of base: 282129292 (overridden on cmd)
Total bytes of diff: 282129208 (overridden on cmd)
Total bytes of delta: -84 (-0.00 % of base)
diff is an improvement.
relative diff is an improvement.
Detail diffs
Top file improvements (bytes):
-16 : 463658.dasm (-2.06 % of base)
-12 : 464059.dasm (-9.02 % of base)
-12 : 464042.dasm (-9.02 % of base)
-12 : 463763.dasm (-9.02 % of base)
-8 : 405014.dasm (-0.85 % of base)
-8 : 404125.dasm (-0.93 % of base)
-8 : 405019.dasm (-0.93 % of base)
-8 : 404156.dasm (-0.93 % of base)
8 total files with Code Size differences (8 improved, 0 regressed), 0 unchanged.
Top method improvements (bytes):
-16 (-2.06 % of base) : 463658.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:Vector128DoubleEqualsNonCanonicalNaNTest():this (Instrumented Tier0)
-12 (-9.02 % of base) : 463763.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:ExpDoubleTest(double,double,double):this (Tier0)
-12 (-9.02 % of base) : 464042.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:Log2DoubleTest(double,double,double):this (Tier0)
-12 (-9.02 % of base) : 464059.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:LogDoubleTest(double,double,double):this (Tier0)
-8 (-0.85 % of base) : 405014.dasm - System.Numerics.Tensors.TensorPrimitives:<InvokeSpanScalarIntoSpan>g__VectorizedSmall8|231_6[double,System.Numerics.Tensors.TensorPrimitives+IdentityOperator`1[double],System.Numerics.Tensors.TensorPrimitives+MultiplyOperator`1[double]](byref,double,byref,ulong) (Tier0)
-8 (-0.93 % of base) : 404156.dasm - System.Numerics.Tensors.TensorPrimitives:<InvokeSpanScalarSpanIntoSpan>g__VectorizedSmall8|234_6[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,double,byref,byref,ulong) (Tier0)
-8 (-0.93 % of base) : 404125.dasm - System.Numerics.Tensors.TensorPrimitives:<InvokeSpanSpanScalarIntoSpan>g__VectorizedSmall8|233_6[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,byref,double,byref,ulong) (Tier0)
-8 (-0.93 % of base) : 405019.dasm - System.Numerics.Tensors.TensorPrimitives:<InvokeSpanSpanScalarIntoSpan>g__VectorizedSmall8|233_6[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,byref,double,byref,ulong) (Tier0)
Top method improvements (percentages):
-12 (-9.02 % of base) : 463763.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:ExpDoubleTest(double,double,double):this (Tier0)
-12 (-9.02 % of base) : 464042.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:Log2DoubleTest(double,double,double):this (Tier0)
-12 (-9.02 % of base) : 464059.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:LogDoubleTest(double,double,double):this (Tier0)
-16 (-2.06 % of base) : 463658.dasm - System.Runtime.Intrinsics.Tests.Vectors.Vector128Tests:Vector128DoubleEqualsNonCanonicalNaNTest():this (Instrumented Tier0)
-8 (-0.93 % of base) : 404156.dasm - System.Numerics.Tensors.TensorPrimitives:<InvokeSpanScalarSpanIntoSpan>g__VectorizedSmall8|234_6[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,double,byref,byref,ulong) (Tier0)
-8 (-0.93 % of base) : 404125.dasm - System.Numerics.Tensors.TensorPrimitives:<InvokeSpanSpanScalarIntoSpan>g__VectorizedSmall8|233_6[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,byref,double,byref,ulong) (Tier0)
-8 (-0.93 % of base) : 405019.dasm - System.Numerics.Tensors.TensorPrimitives:<InvokeSpanSpanScalarIntoSpan>g__VectorizedSmall8|233_6[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](byref,byref,double,byref,ulong) (Tier0)
-8 (-0.85 % of base) : 405014.dasm - System.Numerics.Tensors.TensorPrimitives:<InvokeSpanScalarIntoSpan>g__VectorizedSmall8|231_6[double,System.Numerics.Tensors.TensorPrimitives+IdentityOperator`1[double],System.Numerics.Tensors.TensorPrimitives+MultiplyOperator`1[double]](byref,double,byref,ulong) (Tier0)
8 total methods with Code Size differences (8 improved, 0 regressed).
librariestestsnotieredcompilation.run.windows.x64.Release.mch
To reproduce these diffs on Windows x64:
superpmi.py asmdiffs -target_os windows -target_arch x64 -arch x64
Summary of Code Size diffs:
(Lower is better)
Total bytes of base: 137066325 (overridden on cmd)
Total bytes of diff: 137066301 (overridden on cmd)
Total bytes of delta: -24 (-0.00 % of base)
diff is an improvement.
relative diff is an improvement.
Detail diffs
Top file improvements (bytes):
-4 : 168047.dasm (-0.80 % of base)
-4 : 169036.dasm (-1.23 % of base)
-4 : 169035.dasm (-0.85 % of base)
-4 : 168032.dasm (-1.12 % of base)
-4 : 168066.dasm (-0.85 % of base)
-4 : 169040.dasm (-0.80 % of base)
6 total files with Code Size differences (6 improved, 0 regressed), 0 unchanged.
Top method improvements (bytes):
-4 (-1.23 % of base) : 169036.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+IdentityOperator`1[double],System.Numerics.Tensors.TensorPrimitives+InvertedBinaryOperator`2[System.Numerics.Tensors.TensorPrimitives+DivideOperator`1[double],double]](System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
-4 (-1.12 % of base) : 168032.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+IdentityOperator`1[double],System.Numerics.Tensors.TensorPrimitives+SubtractOperator`1[double]](System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
-4 (-0.85 % of base) : 169035.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarSpanIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+MultiplyAddOperator`1[double]](System.ReadOnlySpan`1[double],double,System.ReadOnlySpan`1[double],System.Span`1[double]) (FullOpts)
-4 (-0.85 % of base) : 168066.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarSpanIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+MultiplyAddOperator`1[double]](System.ReadOnlySpan`1[double],double,System.ReadOnlySpan`1[double],System.Span`1[double]) (FullOpts)
-4 (-0.80 % of base) : 168047.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](System.ReadOnlySpan`1[double],System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
-4 (-0.80 % of base) : 169040.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](System.ReadOnlySpan`1[double],System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
Top method improvements (percentages):
-4 (-1.23 % of base) : 169036.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+IdentityOperator`1[double],System.Numerics.Tensors.TensorPrimitives+InvertedBinaryOperator`2[System.Numerics.Tensors.TensorPrimitives+DivideOperator`1[double],double]](System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
-4 (-1.12 % of base) : 168032.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+IdentityOperator`1[double],System.Numerics.Tensors.TensorPrimitives+SubtractOperator`1[double]](System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
-4 (-0.85 % of base) : 169035.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarSpanIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+MultiplyAddOperator`1[double]](System.ReadOnlySpan`1[double],double,System.ReadOnlySpan`1[double],System.Span`1[double]) (FullOpts)
-4 (-0.85 % of base) : 168066.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanScalarSpanIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+MultiplyAddOperator`1[double]](System.ReadOnlySpan`1[double],double,System.ReadOnlySpan`1[double],System.Span`1[double]) (FullOpts)
-4 (-0.80 % of base) : 168047.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](System.ReadOnlySpan`1[double],System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
-4 (-0.80 % of base) : 169040.dasm - System.Numerics.Tensors.TensorPrimitives:InvokeSpanSpanScalarIntoSpan[double,System.Numerics.Tensors.TensorPrimitives+AddMultiplyOperator`1[double]](System.ReadOnlySpan`1[double],System.ReadOnlySpan`1[double],double,System.Span`1[double]) (FullOpts)
6 total methods with Code Size differences (6 improved, 0 regressed).