Assembly Diffs

linux arm64

Diffs are based on 2,259,470 contexts (1,008,044 MinOpts, 1,251,426 FullOpts).

MISSED contexts: 159 (0.01%)

Overall (-41,352 bytes)

Collection Base size (bytes) Diff size (bytes)
benchmarks.run_pgo.linux.arm64.checked.mch 79,903,244 -372
benchmarks.run_tiered.linux.arm64.checked.mch 22,276,872 -220
coreclr_tests.run.linux.arm64.checked.mch 509,740,232 -40,760

MinOpts (-41,352 bytes)

Collection Base size (bytes) Diff size (bytes)
benchmarks.run_pgo.linux.arm64.checked.mch 25,548,372 -372
benchmarks.run_tiered.linux.arm64.checked.mch 17,338,964 -220
coreclr_tests.run.linux.arm64.checked.mch 348,907,856 -40,760

Example diffs

benchmarks.run_pgo.linux.arm64.checked.mch

-68 (-28.33%) : 24730.dasm - System.Buffers.ProbabilisticMap:IsCharBitSet(System.Runtime.Intrinsics.Vector1281[ubyte],System.Runtime.Intrinsics.Vector1281[ubyte],System.Runtime.Intrinsics.Vector1281[ubyte]):System.Runtime.Intrinsics.Vector1281ubyte

@@ -5,97 +5,75 @@ ; partially interruptible ; Final local variable assignments ;
-; V00 arg0 [V00 ] ( 1, 1 ) simd16 -> [fp+0x100] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V01 arg1 [V01 ] ( 1, 1 ) simd16 -> [fp+0xF0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V02 arg2 [V02 ] ( 1, 1 ) simd16 -> [fp+0xE0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V03 loc0 [V03 ] ( 1, 1 ) simd16 -> [fp+0xD0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V04 loc1 [V04 ] ( 1, 1 ) simd16 -> [fp+0xC0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V05 loc2 [V05 ] ( 1, 1 ) simd16 -> [fp+0xB0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V06 loc3 [V06 ] ( 1, 1 ) simd16 -> [fp+0xA0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V07 loc4 [V07 ] ( 1, 1 ) simd16 -> [fp+0x90] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V08 loc5 [V08 ] ( 1, 1 ) simd16 -> [fp+0x80] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V09 loc6 [V09 ] ( 1, 1 ) simd16 -> [fp+0x70] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+; V00 arg0 [V00 ] ( 1, 1 ) simd16 -> [fp+0xC0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V01 arg1 [V01 ] ( 1, 1 ) simd16 -> [fp+0xB0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V02 arg2 [V02 ] ( 1, 1 ) simd16 -> [fp+0xA0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V03 loc0 [V03 ] ( 1, 1 ) simd16 -> [fp+0x90] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V04 loc1 [V04 ] ( 1, 1 ) simd16 -> [fp+0x80] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V05 loc2 [V05 ] ( 1, 1 ) simd16 -> [fp+0x70] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V06 loc3 [V06 ] ( 1, 1 ) simd16 -> [fp+0x60] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V07 loc4 [V07 ] ( 1, 1 ) simd16 -> [fp+0x50] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V08 loc5 [V08 ] ( 1, 1 ) simd16 -> [fp+0x40] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V09 loc6 [V09 ] ( 1, 1 ) simd16 -> [fp+0x30] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;# V10 OutArgs [V10 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; V11 tmp1 [V11 ] ( 1, 1 ) struct (32) [fp+0x50] HFA(simd16) do-not-enreg[XS] addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> -; TEMP_02 long -> [fp+0x48] -; TEMP_01 long -> [fp+0x40] -; TEMP_05 simd16 -> [fp+0x30] -; TEMP_04 simd16 -> [fp+0x20] -; TEMP_03 simd16 -> [fp+0x10]
+; V11 tmp1 [V11 ] ( 1, 1 ) struct (32) [fp+0x10] HFA(simd16) do-not-enreg[XS] addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]>
;
-; Lcl frame size = 256
+; Lcl frame size = 192
G_M5123_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, nogc <-- Prolog IG
- stp fp, lr, [sp, #0xD1FFAB1E]!
+ stp fp, lr, [sp, #-0xD0]!
mov fp, sp
- str q0, [fp, #0xD1FFAB1E] // [V00 arg0] - str q1, [fp, #0xF0] // [V01 arg1] - str q2, [fp, #0xE0] // [V02 arg2]
+ str q0, [fp, #0xC0] // [V00 arg0] + str q1, [fp, #0xB0] // [V01 arg1] + str q2, [fp, #0xA0] // [V02 arg2]
;; size=20 bbWeight=1 PerfScore 4.50 G_M5123_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, isz
- ldr q16, [fp, #0xE0] // [V02 arg2] - ushr v16.16b, v16.16b, #5 - str q16, [fp, #0xD0] // [V03 loc0]
+ ldr q0, [fp, #0xA0] // [V02 arg2] + ushr v0.16b, v0.16b, #5 + str q0, [fp, #0x90] // [V03 loc0]
ldr q0, [@RWD00]
- ldr q1, [fp, #0xD0] // [V03 loc0]
+ ldr q1, [fp, #0x90] // [V03 loc0]
movz x0, #0xD1FFAB1E // code for <unknown method> movk x0, #0xD1FFAB1E LSL #16 movk x0, #0xD1FFAB1E LSL #32 ldr x0, [x0] blr x0
- str q0, [fp, #0xC0] // [V04 loc1] - ldr q16, [fp, #0xE0] // [V02 arg2] - str q16, [fp, #0x30] // [TEMP_05] - ldr q16, [@RWD16] - str q16, [fp, #0x20] // [TEMP_04] - ldr q16, [fp, #0x30] // [TEMP_05] - ldr q17, [fp, #0x20] // [TEMP_04] - and v16.16b, v16.16b, v17.16b - str q16, [fp, #0xB0] // [V05 loc2] - stp xzr, xzr, [fp, #0x50] - stp xzr, xzr, [fp, #0x60] - add x0, fp, #80 // [V11 tmp1] - str x0, [fp, #0x48] // [TEMP_02] - ldr q0, [fp, #0xD1FFAB1E] // [V00 arg0] - ldr q1, [fp, #0xF0] // [V01 arg1] - movz x0, #0xD1FFAB1E // code for <unknown method> - movk x0, #0xD1FFAB1E LSL #16 - movk x0, #0xD1FFAB1E LSL #32 - ldr x0, [x0] - str x0, [fp, #0x40] // [TEMP_01] - ldr x0, [fp, #0x48] // [TEMP_02] - ldr x1, [fp, #0x40] // [TEMP_01]
+ str q0, [fp, #0x80] // [V04 loc1] + ldr q0, [fp, #0xA0] // [V02 arg2] + ldr q1, [@RWD16] + and v0.16b, v0.16b, v1.16b + str q0, [fp, #0x70] // [V05 loc2] + stp xzr, xzr, [fp, #0x10] + stp xzr, xzr, [fp, #0x20] + add x0, fp, #16 // [V11 tmp1] + ldr q0, [fp, #0xC0] // [V00 arg0] + ldr q1, [fp, #0xB0] // [V01 arg1] + movz x1, #0xD1FFAB1E // code for <unknown method> + movk x1, #0xD1FFAB1E LSL #16 + movk x1, #0xD1FFAB1E LSL #32 + ldr x1, [x1]
blr x1
- ldr q16, [fp, #0x50] // [V11 tmp1] - str q16, [fp, #0x20] // [TEMP_04] - ldr q16, [fp, #0x60] // [V11 tmp1+0x10] - str q16, [fp, #0x30] // [TEMP_05] - ldr q16, [fp, #0xB0] // [V05 loc2] - str q16, [fp, #0x10] // [TEMP_03] - ldr q16, [fp, #0x20] // [TEMP_04] - ldr q17, [fp, #0x30] // [TEMP_05] - ldr q18, [fp, #0x10] // [TEMP_03] - tbl v16.16b, {v16.16b, v17.16b}, v18.16b - str q16, [fp, #0xA0] // [V06 loc3] - ldr q16, [fp, #0xA0] // [V06 loc3] - str q16, [fp, #0x10] // [TEMP_03] - ldr q16, [fp, #0xC0] // [V04 loc1] - str q16, [fp, #0x30] // [TEMP_05] - ldr q16, [fp, #0x10] // [TEMP_03] - ldr q17, [fp, #0x30] // [TEMP_05] - and v16.16b, v16.16b, v17.16b - cmeq v16.16b, v16.16b, #0 - mvn v0.16b, v16.16b - ;; size=212 bbWeight=1 PerfScore 73.00
+ ldr q0, [fp, #0x10] // [V11 tmp1] + ldr q16, [fp, #0x20] // [V11 tmp1+0x10] + ldr q17, [fp, #0x70] // [V05 loc2] + mov v1.16b, v16.16b + tbl v0.16b, {v0.16b, v1.16b}, v17.16b + str q0, [fp, #0x60] // [V06 loc3] + ldr q0, [fp, #0x60] // [V06 loc3] + ldr q16, [fp, #0x80] // [V04 loc1] + and v0.16b, v0.16b, v16.16b + cmeq v0.16b, v0.16b, #0 + mvn v0.16b, v0.16b + ;; size=144 bbWeight=1 PerfScore 46.50
G_M5123_IG03: ; bbWeight=1, epilog, nogc, extend
- ldp fp, lr, [sp], #0xD1FFAB1E
+ ldp fp, lr, [sp], #0xD0
ret lr ;; size=8 bbWeight=1 PerfScore 2.00 RWD00 dq 8040201008040201h, 8040201008040201h RWD16 dq 1F1F1F1F1F1F1F1Fh, 1F1F1F1F1F1F1F1Fh
-Total bytes of code 240, prolog size 8, PerfScore 79.50, instruction count 60, allocated bytes for code 240 (MethodHash=e056ebfc) for method System.Buffers.ProbabilisticMap:IsCharBitSet(System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]):System.Runtime.Intrinsics.Vector128`1[ubyte] (Tier0)
+Total bytes of code 172, prolog size 8, PerfScore 53.00, instruction count 43, allocated bytes for code 172 (MethodHash=e056ebfc) for method System.Buffers.ProbabilisticMap:IsCharBitSet(System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]):System.Runtime.Intrinsics.Vector128`1[ubyte] (Tier0)
; ============================================================ Unwind Info: @@ -106,7 +84,7 @@ Unwind Info: E bit : 0 X bit : 0 Vers : 0
- Function Length : 60 (0x0003c) Actual length = 240 (0x0000f0)
+ Function Length : 43 (0x0002b) Actual length = 172 (0x0000ac)
---- Epilog scopes ---- ---- Scope 0 Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e) @@ -114,7 +92,7 @@ Unwind Info: ---- Unwind codes ---- E1 set_fp; mov fp, sp ---- Epilog start at index 1 ----
- A1 save_fplr_x #33 (0x21); stp fp, lr, [sp, #-272]!
+ 99 save_fplr_x #25 (0x19); stp fp, lr, [sp, #-208]!
E4 end E4 end

-152 (-26.57%) : 2878.dasm - System.Guid:FormatGuidVector128Utf8(System.Guid,ubyte):System.ValueTuple3[System.Runtime.Intrinsics.Vector1281[ubyte],System.Runtime.Intrinsics.Vector1281[ubyte],System.Runtime.Intrinsics.Vector1281[ubyte]] (Tier0)

@@ -5,34 +5,28 @@ ; partially interruptible ; Final local variable assignments ;
-; V00 arg0 [V00 ] ( 1, 1 ) struct (16) [fp+0x1D0] do-not-enreg[SFA] multireg-arg ld-addr-op <System.Guid> -; V01 arg1 [V01 ] ( 1, 1 ) ubyte -> [fp+0x1CC] do-not-enreg[] -; V02 loc0 [V02 ] ( 1, 1 ) simd16 -> [fp+0x1B0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V03 loc1 [V03 ] ( 1, 1 ) simd16 -> [fp+0x1A0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V04 loc2 [V04 ] ( 1, 1 ) simd16 -> [fp+0x190] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V05 loc3 [V05 ] ( 1, 1 ) simd16 -> [fp+0x180] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V06 loc4 [V06 ] ( 1, 1 ) simd16 -> [fp+0x170] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V07 loc5 [V07 ] ( 1, 1 ) simd16 -> [fp+0x160] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V08 loc6 [V08 ] ( 1, 1 ) simd16 -> [fp+0x150] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V09 loc7 [V09 ] ( 1, 1 ) simd16 -> [fp+0x140] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V10 loc8 [V10 ] ( 1, 1 ) simd16 -> [fp+0x130] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V11 loc9 [V11 ] ( 1, 1 ) simd16 -> [fp+0x120] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V12 loc10 [V12 ] ( 1, 1 ) simd16 -> [fp+0x110] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V13 loc11 [V13 ] ( 1, 1 ) simd16 -> [fp+0x100] HFA(simd16) do-not-enreg[S] ld-addr-op <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+; V00 arg0 [V00 ] ( 1, 1 ) struct (16) [fp+0x190] do-not-enreg[SFA] multireg-arg ld-addr-op <System.Guid> +; V01 arg1 [V01 ] ( 1, 1 ) ubyte -> [fp+0x18C] do-not-enreg[] +; V02 loc0 [V02 ] ( 1, 1 ) simd16 -> [fp+0x170] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V03 loc1 [V03 ] ( 1, 1 ) simd16 -> [fp+0x160] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V04 loc2 [V04 ] ( 1, 1 ) simd16 -> [fp+0x150] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V05 loc3 [V05 ] ( 1, 1 ) simd16 -> [fp+0x140] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V06 loc4 [V06 ] ( 1, 1 ) simd16 -> [fp+0x130] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V07 loc5 [V07 ] ( 1, 1 ) simd16 -> [fp+0x120] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V08 loc6 [V08 ] ( 1, 1 ) simd16 -> [fp+0x110] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V09 loc7 [V09 ] ( 1, 1 ) simd16 -> [fp+0x100] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V10 loc8 [V10 ] ( 1, 1 ) simd16 -> [fp+0xF0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V11 loc9 [V11 ] ( 1, 1 ) simd16 -> [fp+0xE0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V12 loc10 [V12 ] ( 1, 1 ) simd16 -> [fp+0xD0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V13 loc11 [V13 ] ( 1, 1 ) simd16 -> [fp+0xC0] HFA(simd16) do-not-enreg[S] ld-addr-op <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;# V14 OutArgs [V14 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; V15 tmp1 [V15 ] ( 1, 1 ) int -> [fp+0xFC] do-not-enreg[] -; V16 tmp2 [V16 ] ( 1, 1 ) struct (32) [fp+0xD8] HFA(simd16) do-not-enreg[SFR] multireg-ret "Return value temp for multireg return" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> -; V17 tmp3 [V17 ] ( 1, 1 ) struct (48) [fp+0xA8] HFA(simd16) do-not-enreg[XSR] multireg-ret addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> -; V18 tmp4 [V18 ] ( 1, 1 ) struct (32) [fp+0x88] HFA(simd16) do-not-enreg[XS] addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> -; V19 tmp5 [V19 ] ( 1, 1 ) struct (48) [fp+0x58] HFA(simd16) do-not-enreg[XSR] multireg-ret addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> -; TEMP_01 int -> [fp+0x54] -; TEMP_03 long -> [fp+0x4C] -; TEMP_02 long -> [fp+0x44] -; TEMP_06 simd16 -> [fp+0x34] -; TEMP_05 simd16 -> [fp+0x24] -; TEMP_04 simd16 -> [fp+0x14]
+; V15 tmp1 [V15 ] ( 1, 1 ) int -> [fp+0xBC] do-not-enreg[] +; V16 tmp2 [V16 ] ( 1, 1 ) struct (32) [fp+0x98] HFA(simd16) do-not-enreg[SFR] multireg-ret "Return value temp for multireg return" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> +; V17 tmp3 [V17 ] ( 1, 1 ) struct (48) [fp+0x68] HFA(simd16) do-not-enreg[XSR] multireg-ret addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> +; V18 tmp4 [V18 ] ( 1, 1 ) struct (32) [fp+0x48] HFA(simd16) do-not-enreg[XS] addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> +; V19 tmp5 [V19 ] ( 1, 1 ) struct (48) [fp+0x18] HFA(simd16) do-not-enreg[XSR] multireg-ret addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]>
;
-; Lcl frame size = 464
+; Lcl frame size = 400
G_M63253_IG01: ; bbWeight=1, gcVars=0000000000000000 {}, gcrefRegs=0000 {}, byrefRegs=0000 {}, gcvars, byref, nogc <-- Prolog IG stp fp, lr, [sp, #0xD1FFAB1E]! @@ -43,21 +37,17 @@ G_M63253_IG01: ; bbWeight=1, gcVars=0000000000000000 {}, gcrefRegs=0000 { ;; size=20 bbWeight=1 PerfScore 4.50 G_M63253_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, isz mov w0, #1
- str w0, [fp, #0xFC] // [V15 tmp1] - ldr w0, [fp, #0xFC] // [V15 tmp1] - str w0, [fp, #0x54] // [TEMP_01] - movz x0, #0xD1FFAB1E // code for <unknown method> - movk x0, #0xD1FFAB1E LSL #16 - movk x0, #0xD1FFAB1E LSL #32 - ldr x0, [x0] - str x0, [fp, #0x4C] // [TEMP_03] - ldr w0, [fp, #0x54] // [TEMP_01] - ldr x1, [fp, #0x4C] // [TEMP_03]
+ str w0, [fp, #0xBC] // [V15 tmp1] + ldr w0, [fp, #0xBC] // [V15 tmp1] + movz x1, #0xD1FFAB1E // code for <unknown method> + movk x1, #0xD1FFAB1E LSL #16 + movk x1, #0xD1FFAB1E LSL #32 + ldr x1, [x1]
blr x1
- ldr q16, [@RWD00] - str q16, [fp, #0xD1FFAB1E] // [V02 loc0] - ldr q16, [fp, #0xD1FFAB1E] // [V00 arg0] - str q16, [fp, #0xD1FFAB1E] // [V03 loc1]
+ ldr q0, [@RWD00] + str q0, [fp, #0xD1FFAB1E] // [V02 loc0] + ldr q0, [fp, #0xD1FFAB1E] // [V00 arg0] + str q0, [fp, #0xD1FFAB1E] // [V03 loc1]
ldr q0, [fp, #0xD1FFAB1E] // [V03 loc1] ldr q1, [fp, #0xD1FFAB1E] // [V02 loc0] movz x0, #0xD1FFAB1E // code for <unknown method> @@ -65,124 +55,90 @@ G_M63253_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, movk x0, #0xD1FFAB1E LSL #32 ldr x0, [x0] blr x0
- str q0, [fp, #0xD8] // [V16 tmp2] - str q1, [fp, #0xE8] // [V16 tmp2+0x10] - ldr q16, [fp, #0xD8] // [V16 tmp2] - str q16, [fp, #0xD1FFAB1E] // [V04 loc2] - ldr q16, [fp, #0xE8] // [V16 tmp2+0x10] - str q16, [fp, #0xD1FFAB1E] // [V05 loc3] - ldr q16, [fp, #0xD1FFAB1E] // [V04 loc2] - mov v16.16b, v16.16b - str q16, [fp, #0x34] // [TEMP_06] - ldr q16, [@RWD16] - str q16, [fp, #0x24] // [TEMP_05] - ldr q16, [fp, #0x34] // [TEMP_06] - ldr q17, [fp, #0x24] // [TEMP_05] - tbl v16.16b, {v16.16b}, v17.16b - str q16, [fp, #0xD1FFAB1E] // [V04 loc2]
+ str q0, [fp, #0x98] // [V16 tmp2] + str q1, [fp, #0xA8] // [V16 tmp2+0x10] + ldr q0, [fp, #0x98] // [V16 tmp2] + str q0, [fp, #0xD1FFAB1E] // [V04 loc2] + ldr q0, [fp, #0xA8] // [V16 tmp2+0x10] + str q0, [fp, #0xD1FFAB1E] // [V05 loc3] + ldr q0, [fp, #0xD1FFAB1E] // [V04 loc2] + mov v0.16b, v0.16b + ldr q1, [@RWD16] + tbl v0.16b, {v0.16b}, v1.16b + str q0, [fp, #0xD1FFAB1E] // [V04 loc2]
ldr w0, [fp, #0xD1FFAB1E] // [V01 arg1] uxtb w0, w0 cbz w0, G_M63253_IG04
- ldr q16, [fp, #0xD1FFAB1E] // [V04 loc2] - mov v16.16b, v16.16b - str q16, [fp, #0x24] // [TEMP_05] - ldr q16, [@RWD32] - str q16, [fp, #0x34] // [TEMP_06] - ldr q16, [fp, #0x24] // [TEMP_05] - ldr q17, [fp, #0x34] // [TEMP_06] - tbl v16.16b, {v16.16b}, v17.16b - str q16, [fp, #0xD1FFAB1E] // [V06 loc4] - ldr q16, [fp, #0xD1FFAB1E] // [V05 loc3] - mov v16.16b, v16.16b - str q16, [fp, #0x34] // [TEMP_06] - ldr q16, [@RWD48] - str q16, [fp, #0x24] // [TEMP_05] - ldr q16, [fp, #0x34] // [TEMP_06] - ldr q17, [fp, #0x24] // [TEMP_05] - tbl v16.16b, {v16.16b}, v17.16b - str q16, [fp, #0xD1FFAB1E] // [V07 loc5] - ldr q16, [@RWD64] - str q16, [fp, #0xD1FFAB1E] // [V09 loc7] - stp xzr, xzr, [fp, #0x88] - stp xzr, xzr, [fp, #0x98] - add x0, fp, #136 // [V18 tmp4] - str x0, [fp, #0x4C] // [TEMP_03]
+ ldr q0, [fp, #0xD1FFAB1E] // [V04 loc2] + mov v0.16b, v0.16b + ldr q1, [@RWD32] + tbl v0.16b, {v0.16b}, v1.16b + str q0, [fp, #0xD1FFAB1E] // [V06 loc4] + ldr q0, [fp, #0xD1FFAB1E] // [V05 loc3] + mov v0.16b, v0.16b + ldr q1, [@RWD48] + tbl v0.16b, {v0.16b}, v1.16b + str q0, [fp, #0xD1FFAB1E] // [V07 loc5] + ldr q0, [@RWD64] + str q0, [fp, #0xD1FFAB1E] // [V09 loc7] + stp xzr, xzr, [fp, #0x48] + stp xzr, xzr, [fp, #0x58] + add x0, fp, #72 // [V18 tmp4]
ldr q0, [fp, #0xD1FFAB1E] // [V04 loc2] ldr q1, [fp, #0xD1FFAB1E] // [V05 loc3]
- movz x0, #0xD1FFAB1E // code for <unknown method> - movk x0, #0xD1FFAB1E LSL #16 - movk x0, #0xD1FFAB1E LSL #32 - ldr x0, [x0] - str x0, [fp, #0x44] // [TEMP_02] - ldr x0, [fp, #0x4C] // [TEMP_03] - ldr x1, [fp, #0x44] // [TEMP_02]
+ movz x1, #0xD1FFAB1E // code for <unknown method> + movk x1, #0xD1FFAB1E LSL #16 + movk x1, #0xD1FFAB1E LSL #32 + ldr x1, [x1]
blr x1
- ldr q16, [fp, #0x88] // [V18 tmp4] - str q16, [fp, #0x24] // [TEMP_05] - ldr q16, [fp, #0x98] // [V18 tmp4+0x10] - str q16, [fp, #0x34] // [TEMP_06] - ldr q16, [@RWD80] - str q16, [fp, #0x14] // [TEMP_04] - ldr q16, [fp, #0x24] // [TEMP_05] - ldr q17, [fp, #0x34] // [TEMP_06] - ldr q18, [fp, #0x14] // [TEMP_04] - tbl v16.16b, {v16.16b, v17.16b}, v18.16b - str q16, [fp, #0xD1FFAB1E] // [V10 loc8] - ldr q16, [fp, #0xD1FFAB1E] // [V10 loc8] - str q16, [fp, #0x14] // [TEMP_04] - ldr q16, [fp, #0xD1FFAB1E] // [V09 loc7] - str q16, [fp, #0x34] // [TEMP_06] - ldr q16, [fp, #0x14] // [TEMP_04] - ldr q17, [fp, #0x34] // [TEMP_06] - orr v16.16b, v16.16b, v17.16b - str q16, [fp, #0xD1FFAB1E] // [V08 loc6] - stp xzr, xzr, [fp, #0x58] - stp xzr, xzr, [fp, #0x68] - stp xzr, xzr, [fp, #0x78] - add x0, fp, #88 // [V19 tmp5] - str x0, [fp, #0x44] // [TEMP_02]
+ ldr q0, [fp, #0x48] // [V18 tmp4] + ldr q1, [fp, #0x58] // [V18 tmp4+0x10] + ldr q2, [@RWD80] + tbl v0.16b, {v0.16b, v1.16b}, v2.16b + str q0, [fp, #0xF0] // [V10 loc8] + ldr q0, [fp, #0xF0] // [V10 loc8] + ldr q1, [fp, #0xD1FFAB1E] // [V09 loc7] + orr v0.16b, v0.16b, v1.16b + str q0, [fp, #0xD1FFAB1E] // [V08 loc6] + stp xzr, xzr, [fp, #0x18] + stp xzr, xzr, [fp, #0x28] + stp xzr, xzr, [fp, #0x38] + add x0, fp, #24 // [V19 tmp5]
ldr q0, [fp, #0xD1FFAB1E] // [V06 loc4] ldr q1, [fp, #0xD1FFAB1E] // [V07 loc5] ldr q2, [fp, #0xD1FFAB1E] // [V08 loc6]
- movz x0, #0xD1FFAB1E // code for <unknown method> - movk x0, #0xD1FFAB1E LSL #16 - movk x0, #0xD1FFAB1E LSL #32 - ldr x0, [x0] - str x0, [fp, #0x4C] // [TEMP_03] - ldr x0, [fp, #0x44] // [TEMP_02] - ldr x1, [fp, #0x4C] // [TEMP_03]
+ movz x1, #0xD1FFAB1E // code for <unknown method> + movk x1, #0xD1FFAB1E LSL #16 + movk x1, #0xD1FFAB1E LSL #32 + ldr x1, [x1]
blr x1
- ldr q0, [fp, #0x58] // [V19 tmp5] - ldr q1, [fp, #0x68] // [V19 tmp5+0x10] - ldr q2, [fp, #0x78] // [V19 tmp5+0x20] - ;; size=452 bbWeight=1 PerfScore 154.50
+ ldr q0, [fp, #0x18] // [V19 tmp5] + ldr q1, [fp, #0x28] // [V19 tmp5+0x10] + ldr q2, [fp, #0x38] // [V19 tmp5+0x20] + ;; size=316 bbWeight=1 PerfScore 103.50
G_M63253_IG03: ; bbWeight=1, epilog, nogc, extend ldp fp, lr, [sp], #0xD1FFAB1E ret lr ;; size=8 bbWeight=1 PerfScore 2.00 G_M63253_IG04: ; bbWeight=1, gcVars=0000000000000000 {}, gcrefRegs=0000 {}, byrefRegs=0000 {}, gcvars, byref
- movi v16.4s, #0 - str q16, [fp, #0xD1FFAB1E] // [V13 loc11] - stp xzr, xzr, [fp, #0xA8] - stp xzr, xzr, [fp, #0xB8] - stp xzr, xzr, [fp, #0xC8] - add x0, fp, #168 // [V17 tmp3] - str x0, [fp, #0x4C] // [TEMP_03]
+ movi v0.4s, #0 + str q0, [fp, #0xC0] // [V13 loc11] + stp xzr, xzr, [fp, #0x68] + stp xzr, xzr, [fp, #0x78] + stp xzr, xzr, [fp, #0x88]
...

-152 (-25.17%) : 31029.dasm - System.Guid:FormatGuidVector128Utf8(System.Guid,ubyte):System.ValueTuple3[System.Runtime.Intrinsics.Vector1281[ubyte],System.Runtime.Intrinsics.Vector1281[ubyte],System.Runtime.Intrinsics.Vector1281[ubyte]] (Instrumented Tier0)

@@ -5,34 +5,28 @@ ; partially interruptible ; Final local variable assignments ;
-; V00 arg0 [V00 ] ( 1, 1 ) struct (16) [fp+0x1D0] do-not-enreg[SFA] multireg-arg ld-addr-op <System.Guid> -; V01 arg1 [V01 ] ( 1, 1 ) ubyte -> [fp+0x1CC] do-not-enreg[] -; V02 loc0 [V02 ] ( 1, 1 ) simd16 -> [fp+0x1B0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V03 loc1 [V03 ] ( 1, 1 ) simd16 -> [fp+0x1A0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V04 loc2 [V04 ] ( 1, 1 ) simd16 -> [fp+0x190] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V05 loc3 [V05 ] ( 1, 1 ) simd16 -> [fp+0x180] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V06 loc4 [V06 ] ( 1, 1 ) simd16 -> [fp+0x170] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V07 loc5 [V07 ] ( 1, 1 ) simd16 -> [fp+0x160] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V08 loc6 [V08 ] ( 1, 1 ) simd16 -> [fp+0x150] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V09 loc7 [V09 ] ( 1, 1 ) simd16 -> [fp+0x140] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V10 loc8 [V10 ] ( 1, 1 ) simd16 -> [fp+0x130] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V11 loc9 [V11 ] ( 1, 1 ) simd16 -> [fp+0x120] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V12 loc10 [V12 ] ( 1, 1 ) simd16 -> [fp+0x110] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V13 loc11 [V13 ] ( 1, 1 ) simd16 -> [fp+0x100] HFA(simd16) do-not-enreg[S] ld-addr-op <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+; V00 arg0 [V00 ] ( 1, 1 ) struct (16) [fp+0x190] do-not-enreg[SFA] multireg-arg ld-addr-op <System.Guid> +; V01 arg1 [V01 ] ( 1, 1 ) ubyte -> [fp+0x18C] do-not-enreg[] +; V02 loc0 [V02 ] ( 1, 1 ) simd16 -> [fp+0x170] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V03 loc1 [V03 ] ( 1, 1 ) simd16 -> [fp+0x160] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V04 loc2 [V04 ] ( 1, 1 ) simd16 -> [fp+0x150] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V05 loc3 [V05 ] ( 1, 1 ) simd16 -> [fp+0x140] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V06 loc4 [V06 ] ( 1, 1 ) simd16 -> [fp+0x130] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V07 loc5 [V07 ] ( 1, 1 ) simd16 -> [fp+0x120] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V08 loc6 [V08 ] ( 1, 1 ) simd16 -> [fp+0x110] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V09 loc7 [V09 ] ( 1, 1 ) simd16 -> [fp+0x100] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V10 loc8 [V10 ] ( 1, 1 ) simd16 -> [fp+0xF0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V11 loc9 [V11 ] ( 1, 1 ) simd16 -> [fp+0xE0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V12 loc10 [V12 ] ( 1, 1 ) simd16 -> [fp+0xD0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V13 loc11 [V13 ] ( 1, 1 ) simd16 -> [fp+0xC0] HFA(simd16) do-not-enreg[S] ld-addr-op <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;# V14 OutArgs [V14 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; V15 tmp1 [V15 ] ( 1, 1 ) int -> [fp+0xFC] do-not-enreg[] -; V16 tmp2 [V16 ] ( 1, 1 ) struct (32) [fp+0xD8] HFA(simd16) do-not-enreg[SFR] multireg-ret "Return value temp for multireg return" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> -; V17 tmp3 [V17 ] ( 1, 1 ) struct (48) [fp+0xA8] HFA(simd16) do-not-enreg[XSR] multireg-ret addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> -; V18 tmp4 [V18 ] ( 1, 1 ) struct (32) [fp+0x88] HFA(simd16) do-not-enreg[XS] addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> -; V19 tmp5 [V19 ] ( 1, 1 ) struct (48) [fp+0x58] HFA(simd16) do-not-enreg[XSR] multireg-ret addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> -; TEMP_01 int -> [fp+0x54] -; TEMP_03 long -> [fp+0x4C] -; TEMP_02 long -> [fp+0x44] -; TEMP_06 simd16 -> [fp+0x34] -; TEMP_05 simd16 -> [fp+0x24] -; TEMP_04 simd16 -> [fp+0x14]
+; V15 tmp1 [V15 ] ( 1, 1 ) int -> [fp+0xBC] do-not-enreg[] +; V16 tmp2 [V16 ] ( 1, 1 ) struct (32) [fp+0x98] HFA(simd16) do-not-enreg[SFR] multireg-ret "Return value temp for multireg return" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> +; V17 tmp3 [V17 ] ( 1, 1 ) struct (48) [fp+0x68] HFA(simd16) do-not-enreg[XSR] multireg-ret addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> +; V18 tmp4 [V18 ] ( 1, 1 ) struct (32) [fp+0x48] HFA(simd16) do-not-enreg[XS] addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> +; V19 tmp5 [V19 ] ( 1, 1 ) struct (48) [fp+0x18] HFA(simd16) do-not-enreg[XSR] multireg-ret addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]>
;
-; Lcl frame size = 464
+; Lcl frame size = 400
G_M63253_IG01: ; bbWeight=1, gcVars=0000000000000000 {}, gcrefRegs=0000 {}, byrefRegs=0000 {}, gcvars, byref, nogc <-- Prolog IG stp fp, lr, [sp, #0xD1FFAB1E]! @@ -43,21 +37,17 @@ G_M63253_IG01: ; bbWeight=1, gcVars=0000000000000000 {}, gcrefRegs=0000 { ;; size=20 bbWeight=1 PerfScore 4.50 G_M63253_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, isz mov w0, #1
- str w0, [fp, #0xFC] // [V15 tmp1] - ldr w0, [fp, #0xFC] // [V15 tmp1] - str w0, [fp, #0x54] // [TEMP_01] - movz x0, #0xD1FFAB1E // code for <unknown method> - movk x0, #0xD1FFAB1E LSL #16 - movk x0, #0xD1FFAB1E LSL #32 - ldr x0, [x0] - str x0, [fp, #0x4C] // [TEMP_03] - ldr w0, [fp, #0x54] // [TEMP_01] - ldr x1, [fp, #0x4C] // [TEMP_03]
+ str w0, [fp, #0xBC] // [V15 tmp1] + ldr w0, [fp, #0xBC] // [V15 tmp1] + movz x1, #0xD1FFAB1E // code for <unknown method> + movk x1, #0xD1FFAB1E LSL #16 + movk x1, #0xD1FFAB1E LSL #32 + ldr x1, [x1]
blr x1
- ldr q16, [@RWD00] - str q16, [fp, #0xD1FFAB1E] // [V02 loc0] - ldr q16, [fp, #0xD1FFAB1E] // [V00 arg0] - str q16, [fp, #0xD1FFAB1E] // [V03 loc1]
+ ldr q0, [@RWD00] + str q0, [fp, #0xD1FFAB1E] // [V02 loc0] + ldr q0, [fp, #0xD1FFAB1E] // [V00 arg0] + str q0, [fp, #0xD1FFAB1E] // [V03 loc1]
ldr q0, [fp, #0xD1FFAB1E] // [V03 loc1] ldr q1, [fp, #0xD1FFAB1E] // [V02 loc0] movz x0, #0xD1FFAB1E // code for <unknown method> @@ -65,101 +55,71 @@ G_M63253_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, movk x0, #0xD1FFAB1E LSL #32 ldr x0, [x0] blr x0
- str q0, [fp, #0xD8] // [V16 tmp2] - str q1, [fp, #0xE8] // [V16 tmp2+0x10] - ldr q16, [fp, #0xD8] // [V16 tmp2] - str q16, [fp, #0xD1FFAB1E] // [V04 loc2] - ldr q16, [fp, #0xE8] // [V16 tmp2+0x10] - str q16, [fp, #0xD1FFAB1E] // [V05 loc3] - ldr q16, [fp, #0xD1FFAB1E] // [V04 loc2] - mov v16.16b, v16.16b - str q16, [fp, #0x34] // [TEMP_06] - ldr q16, [@RWD16] - str q16, [fp, #0x24] // [TEMP_05] - ldr q16, [fp, #0x34] // [TEMP_06] - ldr q17, [fp, #0x24] // [TEMP_05] - tbl v16.16b, {v16.16b}, v17.16b - str q16, [fp, #0xD1FFAB1E] // [V04 loc2]
+ str q0, [fp, #0x98] // [V16 tmp2] + str q1, [fp, #0xA8] // [V16 tmp2+0x10] + ldr q0, [fp, #0x98] // [V16 tmp2] + str q0, [fp, #0xD1FFAB1E] // [V04 loc2] + ldr q0, [fp, #0xA8] // [V16 tmp2+0x10] + str q0, [fp, #0xD1FFAB1E] // [V05 loc3] + ldr q0, [fp, #0xD1FFAB1E] // [V04 loc2] + mov v0.16b, v0.16b + ldr q1, [@RWD16] + tbl v0.16b, {v0.16b}, v1.16b + str q0, [fp, #0xD1FFAB1E] // [V04 loc2]
ldr w0, [fp, #0xD1FFAB1E] // [V01 arg1] uxtb w0, w0 cbz w0, G_M63253_IG04
- ldr q16, [fp, #0xD1FFAB1E] // [V04 loc2] - mov v16.16b, v16.16b - str q16, [fp, #0x24] // [TEMP_05] - ldr q16, [@RWD32] - str q16, [fp, #0x34] // [TEMP_06] - ldr q16, [fp, #0x24] // [TEMP_05] - ldr q17, [fp, #0x34] // [TEMP_06] - tbl v16.16b, {v16.16b}, v17.16b - str q16, [fp, #0xD1FFAB1E] // [V06 loc4] - ldr q16, [fp, #0xD1FFAB1E] // [V05 loc3] - mov v16.16b, v16.16b - str q16, [fp, #0x34] // [TEMP_06] - ldr q16, [@RWD48] - str q16, [fp, #0x24] // [TEMP_05] - ldr q16, [fp, #0x34] // [TEMP_06] - ldr q17, [fp, #0x24] // [TEMP_05] - tbl v16.16b, {v16.16b}, v17.16b - str q16, [fp, #0xD1FFAB1E] // [V07 loc5] - ldr q16, [@RWD64] - str q16, [fp, #0xD1FFAB1E] // [V09 loc7] - stp xzr, xzr, [fp, #0x88] - stp xzr, xzr, [fp, #0x98] - add x0, fp, #136 // [V18 tmp4] - str x0, [fp, #0x4C] // [TEMP_03]
+ ldr q0, [fp, #0xD1FFAB1E] // [V04 loc2] + mov v0.16b, v0.16b + ldr q1, [@RWD32] + tbl v0.16b, {v0.16b}, v1.16b + str q0, [fp, #0xD1FFAB1E] // [V06 loc4] + ldr q0, [fp, #0xD1FFAB1E] // [V05 loc3] + mov v0.16b, v0.16b + ldr q1, [@RWD48] + tbl v0.16b, {v0.16b}, v1.16b + str q0, [fp, #0xD1FFAB1E] // [V07 loc5] + ldr q0, [@RWD64] + str q0, [fp, #0xD1FFAB1E] // [V09 loc7] + stp xzr, xzr, [fp, #0x48] + stp xzr, xzr, [fp, #0x58] + add x0, fp, #72 // [V18 tmp4]
ldr q0, [fp, #0xD1FFAB1E] // [V04 loc2] ldr q1, [fp, #0xD1FFAB1E] // [V05 loc3]
- movz x0, #0xD1FFAB1E // code for <unknown method> - movk x0, #0xD1FFAB1E LSL #16 - movk x0, #0xD1FFAB1E LSL #32 - ldr x0, [x0] - str x0, [fp, #0x44] // [TEMP_02] - ldr x0, [fp, #0x4C] // [TEMP_03] - ldr x1, [fp, #0x44] // [TEMP_02]
+ movz x1, #0xD1FFAB1E // code for <unknown method> + movk x1, #0xD1FFAB1E LSL #16 + movk x1, #0xD1FFAB1E LSL #32 + ldr x1, [x1]
blr x1
- ldr q16, [fp, #0x88] // [V18 tmp4] - str q16, [fp, #0x24] // [TEMP_05] - ldr q16, [fp, #0x98] // [V18 tmp4+0x10] - str q16, [fp, #0x34] // [TEMP_06] - ldr q16, [@RWD80] - str q16, [fp, #0x14] // [TEMP_04] - ldr q16, [fp, #0x24] // [TEMP_05] - ldr q17, [fp, #0x34] // [TEMP_06] - ldr q18, [fp, #0x14] // [TEMP_04]
+ ldr q16, [fp, #0x48] // [V18 tmp4] + ldr q17, [fp, #0x58] // [V18 tmp4+0x10] + ldr q18, [@RWD80]
tbl v16.16b, {v16.16b, v17.16b}, v18.16b
- str q16, [fp, #0xD1FFAB1E] // [V10 loc8] - ldr q16, [fp, #0xD1FFAB1E] // [V10 loc8] - str q16, [fp, #0x14] // [TEMP_04] - ldr q16, [fp, #0xD1FFAB1E] // [V09 loc7] - str q16, [fp, #0x34] // [TEMP_06] - ldr q16, [fp, #0x14] // [TEMP_04] - ldr q17, [fp, #0x34] // [TEMP_06]
+ str q16, [fp, #0xF0] // [V10 loc8] + ldr q16, [fp, #0xF0] // [V10 loc8] + ldr q17, [fp, #0xD1FFAB1E] // [V09 loc7]
orr v16.16b, v16.16b, v17.16b str q16, [fp, #0xD1FFAB1E] // [V08 loc6] movz x0, #0xD1FFAB1E movk x0, #0xD1FFAB1E LSL #16 movk x0, #0xD1FFAB1E LSL #32 bl CORINFO_HELP_COUNTPROFILE32
- stp xzr, xzr, [fp, #0x58] - stp xzr, xzr, [fp, #0x68] - stp xzr, xzr, [fp, #0x78] - add x0, fp, #88 // [V19 tmp5] - str x0, [fp, #0x44] // [TEMP_02]
+ stp xzr, xzr, [fp, #0x18] + stp xzr, xzr, [fp, #0x28] + stp xzr, xzr, [fp, #0x38] + add x0, fp, #24 // [V19 tmp5]
ldr q0, [fp, #0xD1FFAB1E] // [V06 loc4] ldr q1, [fp, #0xD1FFAB1E] // [V07 loc5] ldr q2, [fp, #0xD1FFAB1E] // [V08 loc6]
- movz x0, #0xD1FFAB1E // code for <unknown method> - movk x0, #0xD1FFAB1E LSL #16 - movk x0, #0xD1FFAB1E LSL #32 - ldr x0, [x0] - str x0, [fp, #0x4C] // [TEMP_03] - ldr x0, [fp, #0x44] // [TEMP_02] - ldr x1, [fp, #0x4C] // [TEMP_03]
+ movz x1, #0xD1FFAB1E // code for <unknown method> + movk x1, #0xD1FFAB1E LSL #16 + movk x1, #0xD1FFAB1E LSL #32 + ldr x1, [x1]
blr x1
- ldr q0, [fp, #0x58] // [V19 tmp5] - ldr q1, [fp, #0x68] // [V19 tmp5+0x10] - ldr q2, [fp, #0x78] // [V19 tmp5+0x20] - ;; size=468 bbWeight=1 PerfScore 157.00
+ ldr q0, [fp, #0x18] // [V19 tmp5] + ldr q1, [fp, #0x28] // [V19 tmp5+0x10] + ldr q2, [fp, #0x38] // [V19 tmp5+0x20] + ;; size=332 bbWeight=1 PerfScore 106.00
G_M63253_IG03: ; bbWeight=1, epilog, nogc, extend ldp fp, lr, [sp], #0xD1FFAB1E ret lr @@ -169,28 +129,24 @@ G_M63253_IG04: ; bbWeight=1, gcVars=0000000000000000 {}, gcrefRegs=0000 { movk x0, #0xD1FFAB1E LSL #16 movk x0, #0xD1FFAB1E LSL #32 bl CORINFO_HELP_COUNTPROFILE32
- movi v16.4s, #0 - str q16, [fp, #0xD1FFAB1E] // [V13 loc11] - stp xzr, xzr, [fp, #0xA8] - stp xzr, xzr, [fp, #0xB8] - stp xzr, xzr, [fp, #0xC8] - add x0, fp, #168 // [V17 tmp3] - str x0, [fp, #0x4C] // [TEMP_03]
+ movi v0.4s, #0 + str q0, [fp, #0xC0] // [V13 loc11]
...

benchmarks.run_tiered.linux.arm64.checked.mch

-68 (-28.33%) : 5669.dasm - System.Buffers.ProbabilisticMap:IsCharBitSet(System.Runtime.Intrinsics.Vector1281[ubyte],System.Runtime.Intrinsics.Vector1281[ubyte],System.Runtime.Intrinsics.Vector1281[ubyte]):System.Runtime.Intrinsics.Vector1281ubyte

@@ -5,97 +5,75 @@ ; partially interruptible ; Final local variable assignments ;
-; V00 arg0 [V00 ] ( 1, 1 ) simd16 -> [fp+0x100] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V01 arg1 [V01 ] ( 1, 1 ) simd16 -> [fp+0xF0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V02 arg2 [V02 ] ( 1, 1 ) simd16 -> [fp+0xE0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V03 loc0 [V03 ] ( 1, 1 ) simd16 -> [fp+0xD0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V04 loc1 [V04 ] ( 1, 1 ) simd16 -> [fp+0xC0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V05 loc2 [V05 ] ( 1, 1 ) simd16 -> [fp+0xB0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V06 loc3 [V06 ] ( 1, 1 ) simd16 -> [fp+0xA0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V07 loc4 [V07 ] ( 1, 1 ) simd16 -> [fp+0x90] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V08 loc5 [V08 ] ( 1, 1 ) simd16 -> [fp+0x80] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V09 loc6 [V09 ] ( 1, 1 ) simd16 -> [fp+0x70] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+; V00 arg0 [V00 ] ( 1, 1 ) simd16 -> [fp+0xC0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V01 arg1 [V01 ] ( 1, 1 ) simd16 -> [fp+0xB0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V02 arg2 [V02 ] ( 1, 1 ) simd16 -> [fp+0xA0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V03 loc0 [V03 ] ( 1, 1 ) simd16 -> [fp+0x90] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V04 loc1 [V04 ] ( 1, 1 ) simd16 -> [fp+0x80] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V05 loc2 [V05 ] ( 1, 1 ) simd16 -> [fp+0x70] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V06 loc3 [V06 ] ( 1, 1 ) simd16 -> [fp+0x60] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V07 loc4 [V07 ] ( 1, 1 ) simd16 -> [fp+0x50] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V08 loc5 [V08 ] ( 1, 1 ) simd16 -> [fp+0x40] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V09 loc6 [V09 ] ( 1, 1 ) simd16 -> [fp+0x30] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;# V10 OutArgs [V10 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; V11 tmp1 [V11 ] ( 1, 1 ) struct (32) [fp+0x50] HFA(simd16) do-not-enreg[XS] addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> -; TEMP_02 long -> [fp+0x48] -; TEMP_01 long -> [fp+0x40] -; TEMP_05 simd16 -> [fp+0x30] -; TEMP_04 simd16 -> [fp+0x20] -; TEMP_03 simd16 -> [fp+0x10]
+; V11 tmp1 [V11 ] ( 1, 1 ) struct (32) [fp+0x10] HFA(simd16) do-not-enreg[XS] addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]>
;
-; Lcl frame size = 256
+; Lcl frame size = 192
G_M5123_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, nogc <-- Prolog IG
- stp fp, lr, [sp, #0xD1FFAB1E]!
+ stp fp, lr, [sp, #-0xD0]!
mov fp, sp
- str q0, [fp, #0xD1FFAB1E] // [V00 arg0] - str q1, [fp, #0xF0] // [V01 arg1] - str q2, [fp, #0xE0] // [V02 arg2]
+ str q0, [fp, #0xC0] // [V00 arg0] + str q1, [fp, #0xB0] // [V01 arg1] + str q2, [fp, #0xA0] // [V02 arg2]
;; size=20 bbWeight=1 PerfScore 4.50 G_M5123_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, isz
- ldr q16, [fp, #0xE0] // [V02 arg2] - ushr v16.16b, v16.16b, #5 - str q16, [fp, #0xD0] // [V03 loc0]
+ ldr q0, [fp, #0xA0] // [V02 arg2] + ushr v0.16b, v0.16b, #5 + str q0, [fp, #0x90] // [V03 loc0]
ldr q0, [@RWD00]
- ldr q1, [fp, #0xD0] // [V03 loc0]
+ ldr q1, [fp, #0x90] // [V03 loc0]
movz x0, #0xD1FFAB1E // code for <unknown method> movk x0, #0xD1FFAB1E LSL #16 movk x0, #0xD1FFAB1E LSL #32 ldr x0, [x0] blr x0
- str q0, [fp, #0xC0] // [V04 loc1] - ldr q16, [fp, #0xE0] // [V02 arg2] - str q16, [fp, #0x30] // [TEMP_05] - ldr q16, [@RWD16] - str q16, [fp, #0x20] // [TEMP_04] - ldr q16, [fp, #0x30] // [TEMP_05] - ldr q17, [fp, #0x20] // [TEMP_04] - and v16.16b, v16.16b, v17.16b - str q16, [fp, #0xB0] // [V05 loc2] - stp xzr, xzr, [fp, #0x50] - stp xzr, xzr, [fp, #0x60] - add x0, fp, #80 // [V11 tmp1] - str x0, [fp, #0x48] // [TEMP_02] - ldr q0, [fp, #0xD1FFAB1E] // [V00 arg0] - ldr q1, [fp, #0xF0] // [V01 arg1] - movz x0, #0xD1FFAB1E // code for <unknown method> - movk x0, #0xD1FFAB1E LSL #16 - movk x0, #0xD1FFAB1E LSL #32 - ldr x0, [x0] - str x0, [fp, #0x40] // [TEMP_01] - ldr x0, [fp, #0x48] // [TEMP_02] - ldr x1, [fp, #0x40] // [TEMP_01]
+ str q0, [fp, #0x80] // [V04 loc1] + ldr q0, [fp, #0xA0] // [V02 arg2] + ldr q1, [@RWD16] + and v0.16b, v0.16b, v1.16b + str q0, [fp, #0x70] // [V05 loc2] + stp xzr, xzr, [fp, #0x10] + stp xzr, xzr, [fp, #0x20] + add x0, fp, #16 // [V11 tmp1] + ldr q0, [fp, #0xC0] // [V00 arg0] + ldr q1, [fp, #0xB0] // [V01 arg1] + movz x1, #0xD1FFAB1E // code for <unknown method> + movk x1, #0xD1FFAB1E LSL #16 + movk x1, #0xD1FFAB1E LSL #32 + ldr x1, [x1]
blr x1
- ldr q16, [fp, #0x50] // [V11 tmp1] - str q16, [fp, #0x20] // [TEMP_04] - ldr q16, [fp, #0x60] // [V11 tmp1+0x10] - str q16, [fp, #0x30] // [TEMP_05] - ldr q16, [fp, #0xB0] // [V05 loc2] - str q16, [fp, #0x10] // [TEMP_03] - ldr q16, [fp, #0x20] // [TEMP_04] - ldr q17, [fp, #0x30] // [TEMP_05] - ldr q18, [fp, #0x10] // [TEMP_03] - tbl v16.16b, {v16.16b, v17.16b}, v18.16b - str q16, [fp, #0xA0] // [V06 loc3] - ldr q16, [fp, #0xA0] // [V06 loc3] - str q16, [fp, #0x10] // [TEMP_03] - ldr q16, [fp, #0xC0] // [V04 loc1] - str q16, [fp, #0x30] // [TEMP_05] - ldr q16, [fp, #0x10] // [TEMP_03] - ldr q17, [fp, #0x30] // [TEMP_05] - and v16.16b, v16.16b, v17.16b - cmeq v16.16b, v16.16b, #0 - mvn v0.16b, v16.16b - ;; size=212 bbWeight=1 PerfScore 73.00
+ ldr q0, [fp, #0x10] // [V11 tmp1] + ldr q16, [fp, #0x20] // [V11 tmp1+0x10] + ldr q17, [fp, #0x70] // [V05 loc2] + mov v1.16b, v16.16b + tbl v0.16b, {v0.16b, v1.16b}, v17.16b + str q0, [fp, #0x60] // [V06 loc3] + ldr q0, [fp, #0x60] // [V06 loc3] + ldr q16, [fp, #0x80] // [V04 loc1] + and v0.16b, v0.16b, v16.16b + cmeq v0.16b, v0.16b, #0 + mvn v0.16b, v0.16b + ;; size=144 bbWeight=1 PerfScore 46.50
G_M5123_IG03: ; bbWeight=1, epilog, nogc, extend
- ldp fp, lr, [sp], #0xD1FFAB1E
+ ldp fp, lr, [sp], #0xD0
ret lr ;; size=8 bbWeight=1 PerfScore 2.00 RWD00 dq 8040201008040201h, 8040201008040201h RWD16 dq 1F1F1F1F1F1F1F1Fh, 1F1F1F1F1F1F1F1Fh
-Total bytes of code 240, prolog size 8, PerfScore 79.50, instruction count 60, allocated bytes for code 240 (MethodHash=e056ebfc) for method System.Buffers.ProbabilisticMap:IsCharBitSet(System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]):System.Runtime.Intrinsics.Vector128`1[ubyte] (Tier0)
+Total bytes of code 172, prolog size 8, PerfScore 53.00, instruction count 43, allocated bytes for code 172 (MethodHash=e056ebfc) for method System.Buffers.ProbabilisticMap:IsCharBitSet(System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]):System.Runtime.Intrinsics.Vector128`1[ubyte] (Tier0)
; ============================================================ Unwind Info: @@ -106,7 +84,7 @@ Unwind Info: E bit : 0 X bit : 0 Vers : 0
- Function Length : 60 (0x0003c) Actual length = 240 (0x0000f0)
+ Function Length : 43 (0x0002b) Actual length = 172 (0x0000ac)
---- Epilog scopes ---- ---- Scope 0 Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e) @@ -114,7 +92,7 @@ Unwind Info: ---- Unwind codes ---- E1 set_fp; mov fp, sp ---- Epilog start at index 1 ----
- A1 save_fplr_x #33 (0x21); stp fp, lr, [sp, #-272]!
+ 99 save_fplr_x #25 (0x19); stp fp, lr, [sp, #-208]!
E4 end E4 end

-152 (-26.57%) : 2378.dasm - System.Guid:FormatGuidVector128Utf8(System.Guid,ubyte):System.ValueTuple3[System.Runtime.Intrinsics.Vector1281[ubyte],System.Runtime.Intrinsics.Vector1281[ubyte],System.Runtime.Intrinsics.Vector1281[ubyte]] (Tier0)

@@ -5,34 +5,28 @@ ; partially interruptible ; Final local variable assignments ;
-; V00 arg0 [V00 ] ( 1, 1 ) struct (16) [fp+0x1D0] do-not-enreg[SFA] multireg-arg ld-addr-op <System.Guid> -; V01 arg1 [V01 ] ( 1, 1 ) ubyte -> [fp+0x1CC] do-not-enreg[] -; V02 loc0 [V02 ] ( 1, 1 ) simd16 -> [fp+0x1B0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V03 loc1 [V03 ] ( 1, 1 ) simd16 -> [fp+0x1A0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V04 loc2 [V04 ] ( 1, 1 ) simd16 -> [fp+0x190] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V05 loc3 [V05 ] ( 1, 1 ) simd16 -> [fp+0x180] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V06 loc4 [V06 ] ( 1, 1 ) simd16 -> [fp+0x170] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V07 loc5 [V07 ] ( 1, 1 ) simd16 -> [fp+0x160] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V08 loc6 [V08 ] ( 1, 1 ) simd16 -> [fp+0x150] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V09 loc7 [V09 ] ( 1, 1 ) simd16 -> [fp+0x140] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V10 loc8 [V10 ] ( 1, 1 ) simd16 -> [fp+0x130] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V11 loc9 [V11 ] ( 1, 1 ) simd16 -> [fp+0x120] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V12 loc10 [V12 ] ( 1, 1 ) simd16 -> [fp+0x110] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V13 loc11 [V13 ] ( 1, 1 ) simd16 -> [fp+0x100] HFA(simd16) do-not-enreg[S] ld-addr-op <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+; V00 arg0 [V00 ] ( 1, 1 ) struct (16) [fp+0x190] do-not-enreg[SFA] multireg-arg ld-addr-op <System.Guid> +; V01 arg1 [V01 ] ( 1, 1 ) ubyte -> [fp+0x18C] do-not-enreg[] +; V02 loc0 [V02 ] ( 1, 1 ) simd16 -> [fp+0x170] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V03 loc1 [V03 ] ( 1, 1 ) simd16 -> [fp+0x160] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V04 loc2 [V04 ] ( 1, 1 ) simd16 -> [fp+0x150] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V05 loc3 [V05 ] ( 1, 1 ) simd16 -> [fp+0x140] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V06 loc4 [V06 ] ( 1, 1 ) simd16 -> [fp+0x130] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V07 loc5 [V07 ] ( 1, 1 ) simd16 -> [fp+0x120] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V08 loc6 [V08 ] ( 1, 1 ) simd16 -> [fp+0x110] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V09 loc7 [V09 ] ( 1, 1 ) simd16 -> [fp+0x100] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V10 loc8 [V10 ] ( 1, 1 ) simd16 -> [fp+0xF0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V11 loc9 [V11 ] ( 1, 1 ) simd16 -> [fp+0xE0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V12 loc10 [V12 ] ( 1, 1 ) simd16 -> [fp+0xD0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V13 loc11 [V13 ] ( 1, 1 ) simd16 -> [fp+0xC0] HFA(simd16) do-not-enreg[S] ld-addr-op <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;# V14 OutArgs [V14 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; V15 tmp1 [V15 ] ( 1, 1 ) int -> [fp+0xFC] do-not-enreg[] -; V16 tmp2 [V16 ] ( 1, 1 ) struct (32) [fp+0xD8] HFA(simd16) do-not-enreg[SFR] multireg-ret "Return value temp for multireg return" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> -; V17 tmp3 [V17 ] ( 1, 1 ) struct (48) [fp+0xA8] HFA(simd16) do-not-enreg[XSR] multireg-ret addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> -; V18 tmp4 [V18 ] ( 1, 1 ) struct (32) [fp+0x88] HFA(simd16) do-not-enreg[XS] addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> -; V19 tmp5 [V19 ] ( 1, 1 ) struct (48) [fp+0x58] HFA(simd16) do-not-enreg[XSR] multireg-ret addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> -; TEMP_01 int -> [fp+0x54] -; TEMP_03 long -> [fp+0x4C] -; TEMP_02 long -> [fp+0x44] -; TEMP_06 simd16 -> [fp+0x34] -; TEMP_05 simd16 -> [fp+0x24] -; TEMP_04 simd16 -> [fp+0x14]
+; V15 tmp1 [V15 ] ( 1, 1 ) int -> [fp+0xBC] do-not-enreg[] +; V16 tmp2 [V16 ] ( 1, 1 ) struct (32) [fp+0x98] HFA(simd16) do-not-enreg[SFR] multireg-ret "Return value temp for multireg return" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> +; V17 tmp3 [V17 ] ( 1, 1 ) struct (48) [fp+0x68] HFA(simd16) do-not-enreg[XSR] multireg-ret addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> +; V18 tmp4 [V18 ] ( 1, 1 ) struct (32) [fp+0x48] HFA(simd16) do-not-enreg[XS] addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> +; V19 tmp5 [V19 ] ( 1, 1 ) struct (48) [fp+0x18] HFA(simd16) do-not-enreg[XSR] multireg-ret addr-exposed ld-addr-op "NewObj constructor temp" <System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]>
;
-; Lcl frame size = 464
+; Lcl frame size = 400
G_M63253_IG01: ; bbWeight=1, gcVars=0000000000000000 {}, gcrefRegs=0000 {}, byrefRegs=0000 {}, gcvars, byref, nogc <-- Prolog IG stp fp, lr, [sp, #0xD1FFAB1E]! @@ -43,21 +37,17 @@ G_M63253_IG01: ; bbWeight=1, gcVars=0000000000000000 {}, gcrefRegs=0000 { ;; size=20 bbWeight=1 PerfScore 4.50 G_M63253_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, isz mov w0, #1
- str w0, [fp, #0xFC] // [V15 tmp1] - ldr w0, [fp, #0xFC] // [V15 tmp1] - str w0, [fp, #0x54] // [TEMP_01] - movz x0, #0xD1FFAB1E // code for <unknown method> - movk x0, #0xD1FFAB1E LSL #16 - movk x0, #0xD1FFAB1E LSL #32 - ldr x0, [x0] - str x0, [fp, #0x4C] // [TEMP_03] - ldr w0, [fp, #0x54] // [TEMP_01] - ldr x1, [fp, #0x4C] // [TEMP_03]
+ str w0, [fp, #0xBC] // [V15 tmp1] + ldr w0, [fp, #0xBC] // [V15 tmp1] + movz x1, #0xD1FFAB1E // code for <unknown method> + movk x1, #0xD1FFAB1E LSL #16 + movk x1, #0xD1FFAB1E LSL #32 + ldr x1, [x1]
blr x1
- ldr q16, [@RWD00] - str q16, [fp, #0xD1FFAB1E] // [V02 loc0] - ldr q16, [fp, #0xD1FFAB1E] // [V00 arg0] - str q16, [fp, #0xD1FFAB1E] // [V03 loc1]
+ ldr q0, [@RWD00] + str q0, [fp, #0xD1FFAB1E] // [V02 loc0] + ldr q0, [fp, #0xD1FFAB1E] // [V00 arg0] + str q0, [fp, #0xD1FFAB1E] // [V03 loc1]
ldr q0, [fp, #0xD1FFAB1E] // [V03 loc1] ldr q1, [fp, #0xD1FFAB1E] // [V02 loc0] movz x0, #0xD1FFAB1E // code for <unknown method> @@ -65,124 +55,90 @@ G_M63253_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, movk x0, #0xD1FFAB1E LSL #32 ldr x0, [x0] blr x0
- str q0, [fp, #0xD8] // [V16 tmp2] - str q1, [fp, #0xE8] // [V16 tmp2+0x10] - ldr q16, [fp, #0xD8] // [V16 tmp2] - str q16, [fp, #0xD1FFAB1E] // [V04 loc2] - ldr q16, [fp, #0xE8] // [V16 tmp2+0x10] - str q16, [fp, #0xD1FFAB1E] // [V05 loc3] - ldr q16, [fp, #0xD1FFAB1E] // [V04 loc2] - mov v16.16b, v16.16b - str q16, [fp, #0x34] // [TEMP_06] - ldr q16, [@RWD16] - str q16, [fp, #0x24] // [TEMP_05] - ldr q16, [fp, #0x34] // [TEMP_06] - ldr q17, [fp, #0x24] // [TEMP_05] - tbl v16.16b, {v16.16b}, v17.16b - str q16, [fp, #0xD1FFAB1E] // [V04 loc2]
+ str q0, [fp, #0x98] // [V16 tmp2] + str q1, [fp, #0xA8] // [V16 tmp2+0x10] + ldr q0, [fp, #0x98] // [V16 tmp2] + str q0, [fp, #0xD1FFAB1E] // [V04 loc2] + ldr q0, [fp, #0xA8] // [V16 tmp2+0x10] + str q0, [fp, #0xD1FFAB1E] // [V05 loc3] + ldr q0, [fp, #0xD1FFAB1E] // [V04 loc2] + mov v0.16b, v0.16b + ldr q1, [@RWD16] + tbl v0.16b, {v0.16b}, v1.16b + str q0, [fp, #0xD1FFAB1E] // [V04 loc2]
ldr w0, [fp, #0xD1FFAB1E] // [V01 arg1] uxtb w0, w0 cbz w0, G_M63253_IG04
- ldr q16, [fp, #0xD1FFAB1E] // [V04 loc2] - mov v16.16b, v16.16b - str q16, [fp, #0x24] // [TEMP_05] - ldr q16, [@RWD32] - str q16, [fp, #0x34] // [TEMP_06] - ldr q16, [fp, #0x24] // [TEMP_05] - ldr q17, [fp, #0x34] // [TEMP_06] - tbl v16.16b, {v16.16b}, v17.16b - str q16, [fp, #0xD1FFAB1E] // [V06 loc4] - ldr q16, [fp, #0xD1FFAB1E] // [V05 loc3] - mov v16.16b, v16.16b - str q16, [fp, #0x34] // [TEMP_06] - ldr q16, [@RWD48] - str q16, [fp, #0x24] // [TEMP_05] - ldr q16, [fp, #0x34] // [TEMP_06] - ldr q17, [fp, #0x24] // [TEMP_05] - tbl v16.16b, {v16.16b}, v17.16b - str q16, [fp, #0xD1FFAB1E] // [V07 loc5] - ldr q16, [@RWD64] - str q16, [fp, #0xD1FFAB1E] // [V09 loc7] - stp xzr, xzr, [fp, #0x88] - stp xzr, xzr, [fp, #0x98] - add x0, fp, #136 // [V18 tmp4] - str x0, [fp, #0x4C] // [TEMP_03]
+ ldr q0, [fp, #0xD1FFAB1E] // [V04 loc2] + mov v0.16b, v0.16b + ldr q1, [@RWD32] + tbl v0.16b, {v0.16b}, v1.16b + str q0, [fp, #0xD1FFAB1E] // [V06 loc4] + ldr q0, [fp, #0xD1FFAB1E] // [V05 loc3] + mov v0.16b, v0.16b + ldr q1, [@RWD48] + tbl v0.16b, {v0.16b}, v1.16b + str q0, [fp, #0xD1FFAB1E] // [V07 loc5] + ldr q0, [@RWD64] + str q0, [fp, #0xD1FFAB1E] // [V09 loc7] + stp xzr, xzr, [fp, #0x48] + stp xzr, xzr, [fp, #0x58] + add x0, fp, #72 // [V18 tmp4]
ldr q0, [fp, #0xD1FFAB1E] // [V04 loc2] ldr q1, [fp, #0xD1FFAB1E] // [V05 loc3]
- movz x0, #0xD1FFAB1E // code for <unknown method> - movk x0, #0xD1FFAB1E LSL #16 - movk x0, #0xD1FFAB1E LSL #32 - ldr x0, [x0] - str x0, [fp, #0x44] // [TEMP_02] - ldr x0, [fp, #0x4C] // [TEMP_03] - ldr x1, [fp, #0x44] // [TEMP_02]
+ movz x1, #0xD1FFAB1E // code for <unknown method> + movk x1, #0xD1FFAB1E LSL #16 + movk x1, #0xD1FFAB1E LSL #32 + ldr x1, [x1]
blr x1
- ldr q16, [fp, #0x88] // [V18 tmp4] - str q16, [fp, #0x24] // [TEMP_05] - ldr q16, [fp, #0x98] // [V18 tmp4+0x10] - str q16, [fp, #0x34] // [TEMP_06] - ldr q16, [@RWD80] - str q16, [fp, #0x14] // [TEMP_04] - ldr q16, [fp, #0x24] // [TEMP_05] - ldr q17, [fp, #0x34] // [TEMP_06] - ldr q18, [fp, #0x14] // [TEMP_04] - tbl v16.16b, {v16.16b, v17.16b}, v18.16b - str q16, [fp, #0xD1FFAB1E] // [V10 loc8] - ldr q16, [fp, #0xD1FFAB1E] // [V10 loc8] - str q16, [fp, #0x14] // [TEMP_04] - ldr q16, [fp, #0xD1FFAB1E] // [V09 loc7] - str q16, [fp, #0x34] // [TEMP_06] - ldr q16, [fp, #0x14] // [TEMP_04] - ldr q17, [fp, #0x34] // [TEMP_06] - orr v16.16b, v16.16b, v17.16b - str q16, [fp, #0xD1FFAB1E] // [V08 loc6] - stp xzr, xzr, [fp, #0x58] - stp xzr, xzr, [fp, #0x68] - stp xzr, xzr, [fp, #0x78] - add x0, fp, #88 // [V19 tmp5] - str x0, [fp, #0x44] // [TEMP_02]
+ ldr q0, [fp, #0x48] // [V18 tmp4] + ldr q1, [fp, #0x58] // [V18 tmp4+0x10] + ldr q2, [@RWD80] + tbl v0.16b, {v0.16b, v1.16b}, v2.16b + str q0, [fp, #0xF0] // [V10 loc8] + ldr q0, [fp, #0xF0] // [V10 loc8] + ldr q1, [fp, #0xD1FFAB1E] // [V09 loc7] + orr v0.16b, v0.16b, v1.16b + str q0, [fp, #0xD1FFAB1E] // [V08 loc6] + stp xzr, xzr, [fp, #0x18] + stp xzr, xzr, [fp, #0x28] + stp xzr, xzr, [fp, #0x38] + add x0, fp, #24 // [V19 tmp5]
ldr q0, [fp, #0xD1FFAB1E] // [V06 loc4] ldr q1, [fp, #0xD1FFAB1E] // [V07 loc5] ldr q2, [fp, #0xD1FFAB1E] // [V08 loc6]
- movz x0, #0xD1FFAB1E // code for <unknown method> - movk x0, #0xD1FFAB1E LSL #16 - movk x0, #0xD1FFAB1E LSL #32 - ldr x0, [x0] - str x0, [fp, #0x4C] // [TEMP_03] - ldr x0, [fp, #0x44] // [TEMP_02] - ldr x1, [fp, #0x4C] // [TEMP_03]
+ movz x1, #0xD1FFAB1E // code for <unknown method> + movk x1, #0xD1FFAB1E LSL #16 + movk x1, #0xD1FFAB1E LSL #32 + ldr x1, [x1]
blr x1
- ldr q0, [fp, #0x58] // [V19 tmp5] - ldr q1, [fp, #0x68] // [V19 tmp5+0x10] - ldr q2, [fp, #0x78] // [V19 tmp5+0x20] - ;; size=452 bbWeight=1 PerfScore 154.50
+ ldr q0, [fp, #0x18] // [V19 tmp5] + ldr q1, [fp, #0x28] // [V19 tmp5+0x10] + ldr q2, [fp, #0x38] // [V19 tmp5+0x20] + ;; size=316 bbWeight=1 PerfScore 103.50
G_M63253_IG03: ; bbWeight=1, epilog, nogc, extend ldp fp, lr, [sp], #0xD1FFAB1E ret lr ;; size=8 bbWeight=1 PerfScore 2.00 G_M63253_IG04: ; bbWeight=1, gcVars=0000000000000000 {}, gcrefRegs=0000 {}, byrefRegs=0000 {}, gcvars, byref
- movi v16.4s, #0 - str q16, [fp, #0xD1FFAB1E] // [V13 loc11] - stp xzr, xzr, [fp, #0xA8] - stp xzr, xzr, [fp, #0xB8] - stp xzr, xzr, [fp, #0xC8] - add x0, fp, #168 // [V17 tmp3] - str x0, [fp, #0x4C] // [TEMP_03]
+ movi v0.4s, #0 + str q0, [fp, #0xC0] // [V13 loc11] + stp xzr, xzr, [fp, #0x68] + stp xzr, xzr, [fp, #0x78] + stp xzr, xzr, [fp, #0x88]
...

coreclr_tests.run.linux.arm64.checked.mch

-44 (-42.31%) : 45771.dasm - System.Runtime.Intrinsics.Arm.AdvSimd+Arm64:VectorTableLookupExtension(System.Runtime.Intrinsics.Vector1281[byte],System.ValueTuple3System.Runtime.Intrinsics.Vector1281[byte],System.Runtime.Intrinsics.Vector1281[byte],System.Runtime.Intrinsics.Vector1281[byte]],System.Runtime.Intrinsics.Vector1281[byte]):System.Runtime.Intrinsics.Vector128`1[byte

@@ -5,52 +5,36 @@ ; partially interruptible ; Final local variable assignments ;
-; V00 arg0 [V00 ] ( 1, 1 ) simd16 -> [fp+0xA0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[byte]> -; V01 arg1 [V01 ] ( 1, 1 ) struct (48) [fp+0x70] HFA(simd16) do-not-enreg[SFA] multireg-arg <System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte]]> -; V02 arg2 [V02 ] ( 1, 1 ) simd16 -> [fp+0x60] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[byte]>
+; V00 arg0 [V00 ] ( 1, 1 ) simd16 -> [fp+0x50] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[byte]> +; V01 arg1 [V01 ] ( 1, 1 ) struct (48) [fp+0x20] HFA(simd16) do-not-enreg[SFA] multireg-arg <System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte]]> +; V02 arg2 [V02 ] ( 1, 1 ) simd16 -> [fp+0x10] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[byte]>
;# V03 OutArgs [V03 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; TEMP_05 simd16 -> [fp+0x50] -; TEMP_04 simd16 -> [fp+0x40] -; TEMP_03 simd16 -> [fp+0x30] -; TEMP_02 simd16 -> [fp+0x20] -; TEMP_01 simd16 -> [fp+0x10]
;
-; Lcl frame size = 160
+; Lcl frame size = 80
G_M10198_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, nogc <-- Prolog IG
- stp fp, lr, [sp, #-0xB0]!
+ stp fp, lr, [sp, #-0x60]!
mov fp, sp
- str q0, [fp, #0xA0] // [V00 arg0] - str q1, [fp, #0x70] // [V01 arg1] - str q2, [fp, #0x80] // [V01 arg1+0x10] - str q3, [fp, #0x90] // [V01 arg1+0x20] - str q4, [fp, #0x60] // [V02 arg2]
+ str q0, [fp, #0x50] // [V00 arg0] + str q1, [fp, #0x20] // [V01 arg1] + str q2, [fp, #0x30] // [V01 arg1+0x10] + str q3, [fp, #0x40] // [V01 arg1+0x20] + str q4, [fp, #0x10] // [V02 arg2]
;; size=28 bbWeight=1 PerfScore 6.50 G_M10198_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- ldr q16, [fp, #0xA0] // [V00 arg0] - str q16, [fp, #0x50] // [TEMP_05] - ldr q16, [fp, #0x70] // [V01 arg1] - str q16, [fp, #0x40] // [TEMP_04] - ldr q16, [fp, #0x80] // [V01 arg1+0x10] - str q16, [fp, #0x30] // [TEMP_03] - ldr q16, [fp, #0x90] // [V01 arg1+0x20] - str q16, [fp, #0x20] // [TEMP_02] - ldr q16, [fp, #0x60] // [V02 arg2] - str q16, [fp, #0x10] // [TEMP_01] - ldr q16, [fp, #0x50] // [TEMP_05] - ldr q17, [fp, #0x40] // [TEMP_04] - ldr q18, [fp, #0x30] // [TEMP_03] - ldr q19, [fp, #0x20] // [TEMP_02] - ldr q20, [fp, #0x10] // [TEMP_01] - tbx v16.16b, {v17.16b, v18.16b, v19.16b}, v20.16b - mov v0.16b, v16.16b - ;; size=68 bbWeight=1 PerfScore 28.50
+ ldr q0, [fp, #0x50] // [V00 arg0] + ldr q16, [fp, #0x20] // [V01 arg1] + ldr q17, [fp, #0x30] // [V01 arg1+0x10] + ldr q18, [fp, #0x40] // [V01 arg1+0x20] + ldr q19, [fp, #0x10] // [V02 arg2] + tbx v0.16b, {v16.16b, v17.16b, v18.16b}, v19.16b + ;; size=24 bbWeight=1 PerfScore 13.00
G_M10198_IG03: ; bbWeight=1, epilog, nogc, extend
- ldp fp, lr, [sp], #0xB0
+ ldp fp, lr, [sp], #0x60
ret lr ;; size=8 bbWeight=1 PerfScore 2.00
-Total bytes of code 104, prolog size 8, PerfScore 37.00, instruction count 26, allocated bytes for code 104 (MethodHash=51c0d829) for method System.Runtime.Intrinsics.Arm.AdvSimd+Arm64:VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128`1[byte],System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte]],System.Runtime.Intrinsics.Vector128`1[byte]):System.Runtime.Intrinsics.Vector128`1[byte] (Tier0)
+Total bytes of code 60, prolog size 8, PerfScore 21.50, instruction count 15, allocated bytes for code 60 (MethodHash=51c0d829) for method System.Runtime.Intrinsics.Arm.AdvSimd+Arm64:VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128`1[byte],System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte]],System.Runtime.Intrinsics.Vector128`1[byte]):System.Runtime.Intrinsics.Vector128`1[byte] (Tier0)
; ============================================================ Unwind Info: @@ -61,7 +45,7 @@ Unwind Info: E bit : 0 X bit : 0 Vers : 0
- Function Length : 26 (0x0001a) Actual length = 104 (0x000068)
+ Function Length : 15 (0x0000f) Actual length = 60 (0x00003c)
---- Epilog scopes ---- ---- Scope 0 Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e) @@ -69,7 +53,7 @@ Unwind Info: ---- Unwind codes ---- E1 set_fp; mov fp, sp ---- Epilog start at index 1 ----
- 95 save_fplr_x #21 (0x15); stp fp, lr, [sp, #-176]!
+ 8B save_fplr_x #11 (0x0B); stp fp, lr, [sp, #-96]!
E4 end E4 end

-44 (-42.31%) : 83975.dasm - System.Runtime.Intrinsics.Arm.AdvSimd:VectorTableLookupExtension(System.Runtime.Intrinsics.Vector641[ubyte],System.ValueTuple3System.Runtime.Intrinsics.Vector1281[ubyte],System.Runtime.Intrinsics.Vector1281[ubyte],System.Runtime.Intrinsics.Vector1281[ubyte]],System.Runtime.Intrinsics.Vector641[ubyte]):System.Runtime.Intrinsics.Vector64`1[ubyte

@@ -5,52 +5,36 @@ ; partially interruptible ; Final local variable assignments ;
-; V00 arg0 [V00 ] ( 1, 1 ) simd8 -> [fp+0x88] HFA(simd8) do-not-enreg[S] <System.Runtime.Intrinsics.Vector64`1[ubyte]> -; V01 arg1 [V01 ] ( 1, 1 ) struct (48) [fp+0x58] HFA(simd16) do-not-enreg[SFA] multireg-arg <System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> -; V02 arg2 [V02 ] ( 1, 1 ) simd8 -> [fp+0x50] HFA(simd8) do-not-enreg[S] <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+; V00 arg0 [V00 ] ( 1, 1 ) simd8 -> [fp+0x48] HFA(simd8) do-not-enreg[S] <System.Runtime.Intrinsics.Vector64`1[ubyte]> +; V01 arg1 [V01 ] ( 1, 1 ) struct (48) [fp+0x18] HFA(simd16) do-not-enreg[SFA] multireg-arg <System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> +; V02 arg2 [V02 ] ( 1, 1 ) simd8 -> [fp+0x10] HFA(simd8) do-not-enreg[S] <System.Runtime.Intrinsics.Vector64`1[ubyte]>
;# V03 OutArgs [V03 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; TEMP_02 simd8 -> [fp+0x48] -; TEMP_01 simd8 -> [fp+0x40] -; TEMP_05 simd16 -> [fp+0x30] -; TEMP_04 simd16 -> [fp+0x20] -; TEMP_03 simd16 -> [fp+0x10]
;
-; Lcl frame size = 128
+; Lcl frame size = 64
G_M54744_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, nogc <-- Prolog IG
- stp fp, lr, [sp, #-0x90]!
+ stp fp, lr, [sp, #-0x50]!
mov fp, sp
- str d0, [fp, #0x88] // [V00 arg0] - str q1, [fp, #0x58] // [V01 arg1] - str q2, [fp, #0x68] // [V01 arg1+0x10] - str q3, [fp, #0x78] // [V01 arg1+0x20] - str d4, [fp, #0x50] // [V02 arg2]
+ str d0, [fp, #0x48] // [V00 arg0] + str q1, [fp, #0x18] // [V01 arg1] + str q2, [fp, #0x28] // [V01 arg1+0x10] + str q3, [fp, #0x38] // [V01 arg1+0x20] + str d4, [fp, #0x10] // [V02 arg2]
;; size=28 bbWeight=1 PerfScore 6.50 G_M54744_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- ldr d16, [fp, #0x88] // [V00 arg0] - str d16, [fp, #0x48] // [TEMP_02] - ldr q16, [fp, #0x58] // [V01 arg1] - str q16, [fp, #0x30] // [TEMP_05] - ldr q16, [fp, #0x68] // [V01 arg1+0x10] - str q16, [fp, #0x20] // [TEMP_04] - ldr q16, [fp, #0x78] // [V01 arg1+0x20] - str q16, [fp, #0x10] // [TEMP_03] - ldr d16, [fp, #0x50] // [V02 arg2] - str d16, [fp, #0x40] // [TEMP_01] - ldr d16, [fp, #0x48] // [TEMP_02] - ldr q17, [fp, #0x30] // [TEMP_05] - ldr q18, [fp, #0x20] // [TEMP_04] - ldr q19, [fp, #0x10] // [TEMP_03] - ldr d20, [fp, #0x40] // [TEMP_01] - tbx v16.8b, {v17.16b, v18.16b, v19.16b}, v20.8b - mov v0.8b, v16.8b - ;; size=68 bbWeight=1 PerfScore 28.50
+ ldr d0, [fp, #0x48] // [V00 arg0] + ldr q16, [fp, #0x18] // [V01 arg1] + ldr q17, [fp, #0x28] // [V01 arg1+0x10] + ldr q18, [fp, #0x38] // [V01 arg1+0x20] + ldr d19, [fp, #0x10] // [V02 arg2] + tbx v0.8b, {v16.16b, v17.16b, v18.16b}, v19.8b + ;; size=24 bbWeight=1 PerfScore 13.00
G_M54744_IG03: ; bbWeight=1, epilog, nogc, extend
- ldp fp, lr, [sp], #0x90
+ ldp fp, lr, [sp], #0x50
ret lr ;; size=8 bbWeight=1 PerfScore 2.00
-Total bytes of code 104, prolog size 8, PerfScore 37.00, instruction count 26, allocated bytes for code 104 (MethodHash=cf242a27) for method System.Runtime.Intrinsics.Arm.AdvSimd:VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64`1[ubyte],System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]],System.Runtime.Intrinsics.Vector64`1[ubyte]):System.Runtime.Intrinsics.Vector64`1[ubyte] (Tier0)
+Total bytes of code 60, prolog size 8, PerfScore 21.50, instruction count 15, allocated bytes for code 60 (MethodHash=cf242a27) for method System.Runtime.Intrinsics.Arm.AdvSimd:VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64`1[ubyte],System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]],System.Runtime.Intrinsics.Vector64`1[ubyte]):System.Runtime.Intrinsics.Vector64`1[ubyte] (Tier0)
; ============================================================ Unwind Info: @@ -61,7 +45,7 @@ Unwind Info: E bit : 0 X bit : 0 Vers : 0
- Function Length : 26 (0x0001a) Actual length = 104 (0x000068)
+ Function Length : 15 (0x0000f) Actual length = 60 (0x00003c)
---- Epilog scopes ---- ---- Scope 0 Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e) @@ -69,7 +53,7 @@ Unwind Info: ---- Unwind codes ---- E1 set_fp; mov fp, sp ---- Epilog start at index 1 ----
- 91 save_fplr_x #17 (0x11); stp fp, lr, [sp, #-144]!
+ 89 save_fplr_x #9 (0x09); stp fp, lr, [sp, #-80]!
E4 end E4 end

-44 (-42.31%) : 45732.dasm - System.Runtime.Intrinsics.Arm.AdvSimd+Arm64:VectorTableLookupExtension(System.Runtime.Intrinsics.Vector1281[ubyte],System.ValueTuple3System.Runtime.Intrinsics.Vector1281[ubyte],System.Runtime.Intrinsics.Vector1281[ubyte],System.Runtime.Intrinsics.Vector1281[ubyte]],System.Runtime.Intrinsics.Vector1281[ubyte]):System.Runtime.Intrinsics.Vector128`1[ubyte

@@ -5,52 +5,36 @@ ; partially interruptible ; Final local variable assignments ;
-; V00 arg0 [V00 ] ( 1, 1 ) simd16 -> [fp+0xA0] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> -; V01 arg1 [V01 ] ( 1, 1 ) struct (48) [fp+0x70] HFA(simd16) do-not-enreg[SFA] multireg-arg <System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> -; V02 arg2 [V02 ] ( 1, 1 ) simd16 -> [fp+0x60] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+; V00 arg0 [V00 ] ( 1, 1 ) simd16 -> [fp+0x50] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]> +; V01 arg1 [V01 ] ( 1, 1 ) struct (48) [fp+0x20] HFA(simd16) do-not-enreg[SFA] multireg-arg <System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> +; V02 arg2 [V02 ] ( 1, 1 ) simd16 -> [fp+0x10] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;# V03 OutArgs [V03 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; TEMP_05 simd16 -> [fp+0x50] -; TEMP_04 simd16 -> [fp+0x40] -; TEMP_03 simd16 -> [fp+0x30] -; TEMP_02 simd16 -> [fp+0x20] -; TEMP_01 simd16 -> [fp+0x10]
;
-; Lcl frame size = 160
+; Lcl frame size = 80
G_M246_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, nogc <-- Prolog IG
- stp fp, lr, [sp, #-0xB0]!
+ stp fp, lr, [sp, #-0x60]!
mov fp, sp
- str q0, [fp, #0xA0] // [V00 arg0] - str q1, [fp, #0x70] // [V01 arg1] - str q2, [fp, #0x80] // [V01 arg1+0x10] - str q3, [fp, #0x90] // [V01 arg1+0x20] - str q4, [fp, #0x60] // [V02 arg2]
+ str q0, [fp, #0x50] // [V00 arg0] + str q1, [fp, #0x20] // [V01 arg1] + str q2, [fp, #0x30] // [V01 arg1+0x10] + str q3, [fp, #0x40] // [V01 arg1+0x20] + str q4, [fp, #0x10] // [V02 arg2]
;; size=28 bbWeight=1 PerfScore 6.50 G_M246_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- ldr q16, [fp, #0xA0] // [V00 arg0] - str q16, [fp, #0x50] // [TEMP_05] - ldr q16, [fp, #0x70] // [V01 arg1] - str q16, [fp, #0x40] // [TEMP_04] - ldr q16, [fp, #0x80] // [V01 arg1+0x10] - str q16, [fp, #0x30] // [TEMP_03] - ldr q16, [fp, #0x90] // [V01 arg1+0x20] - str q16, [fp, #0x20] // [TEMP_02] - ldr q16, [fp, #0x60] // [V02 arg2] - str q16, [fp, #0x10] // [TEMP_01] - ldr q16, [fp, #0x50] // [TEMP_05] - ldr q17, [fp, #0x40] // [TEMP_04] - ldr q18, [fp, #0x30] // [TEMP_03] - ldr q19, [fp, #0x20] // [TEMP_02] - ldr q20, [fp, #0x10] // [TEMP_01] - tbx v16.16b, {v17.16b, v18.16b, v19.16b}, v20.16b - mov v0.16b, v16.16b - ;; size=68 bbWeight=1 PerfScore 28.50
+ ldr q0, [fp, #0x50] // [V00 arg0] + ldr q16, [fp, #0x20] // [V01 arg1] + ldr q17, [fp, #0x30] // [V01 arg1+0x10] + ldr q18, [fp, #0x40] // [V01 arg1+0x20] + ldr q19, [fp, #0x10] // [V02 arg2] + tbx v0.16b, {v16.16b, v17.16b, v18.16b}, v19.16b + ;; size=24 bbWeight=1 PerfScore 13.00
G_M246_IG03: ; bbWeight=1, epilog, nogc, extend
- ldp fp, lr, [sp], #0xB0
+ ldp fp, lr, [sp], #0x60
ret lr ;; size=8 bbWeight=1 PerfScore 2.00
-Total bytes of code 104, prolog size 8, PerfScore 37.00, instruction count 26, allocated bytes for code 104 (MethodHash=4b78ff09) for method System.Runtime.Intrinsics.Arm.AdvSimd+Arm64:VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128`1[ubyte],System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]],System.Runtime.Intrinsics.Vector128`1[ubyte]):System.Runtime.Intrinsics.Vector128`1[ubyte] (Tier0)
+Total bytes of code 60, prolog size 8, PerfScore 21.50, instruction count 15, allocated bytes for code 60 (MethodHash=4b78ff09) for method System.Runtime.Intrinsics.Arm.AdvSimd+Arm64:VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128`1[ubyte],System.ValueTuple`3[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]],System.Runtime.Intrinsics.Vector128`1[ubyte]):System.Runtime.Intrinsics.Vector128`1[ubyte] (Tier0)
; ============================================================ Unwind Info: @@ -61,7 +45,7 @@ Unwind Info: E bit : 0 X bit : 0 Vers : 0
- Function Length : 26 (0x0001a) Actual length = 104 (0x000068)
+ Function Length : 15 (0x0000f) Actual length = 60 (0x00003c)
---- Epilog scopes ---- ---- Scope 0 Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e) @@ -69,7 +53,7 @@ Unwind Info: ---- Unwind codes ---- E1 set_fp; mov fp, sp ---- Epilog start at index 1 ----
- 95 save_fplr_x #21 (0x15); stp fp, lr, [sp, #-176]!
+ 8B save_fplr_x #11 (0x0B); stp fp, lr, [sp, #-96]!
E4 end E4 end

-28 (-28.00%) : 45542.dasm - System.Runtime.Intrinsics.Arm.AdvSimd+Arm64:VectorTableLookup(System.ValueTuple4[System.Runtime.Intrinsics.Vector1281[ubyte],System.Runtime.Intrinsics.Vector1281[ubyte],System.Runtime.Intrinsics.Vector1281[ubyte],System.Runtime.Intrinsics.Vector1281[ubyte]],System.Runtime.Intrinsics.Vector1281[ubyte]):System.Runtime.Intrinsics.Vector128`1ubyte

@@ -5,50 +5,38 @@ ; partially interruptible ; Final local variable assignments ;
-; V00 arg0 [V00 ] ( 1, 1 ) struct (64) [fp+0x70] HFA(simd16) do-not-enreg[SFA] multireg-arg <System.ValueTuple`4[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> -; V01 arg1 [V01 ] ( 1, 1 ) simd16 -> [fp+0x60] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+; V00 arg0 [V00 ] ( 1, 1 ) struct (64) [fp+0x20] HFA(simd16) do-not-enreg[SFA] multireg-arg <System.ValueTuple`4[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]> +; V01 arg1 [V01 ] ( 1, 1 ) simd16 -> [fp+0x10] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;# V02 OutArgs [V02 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; TEMP_05 simd16 -> [fp+0x50] -; TEMP_04 simd16 -> [fp+0x40] -; TEMP_03 simd16 -> [fp+0x30] -; TEMP_02 simd16 -> [fp+0x20] -; TEMP_01 simd16 -> [fp+0x10]
;
-; Lcl frame size = 160
+; Lcl frame size = 80
G_M37192_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, nogc <-- Prolog IG
- stp fp, lr, [sp, #-0xB0]!
+ stp fp, lr, [sp, #-0x60]!
mov fp, sp
- str q0, [fp, #0x70] // [V00 arg0] - str q1, [fp, #0x80] // [V00 arg0+0x10] - str q2, [fp, #0x90] // [V00 arg0+0x20] - str q3, [fp, #0xA0] // [V00 arg0+0x30] - str q4, [fp, #0x60] // [V01 arg1]
+ str q0, [fp, #0x20] // [V00 arg0] + str q1, [fp, #0x30] // [V00 arg0+0x10] + str q2, [fp, #0x40] // [V00 arg0+0x20] + str q3, [fp, #0x50] // [V00 arg0+0x30] + str q4, [fp, #0x10] // [V01 arg1]
;; size=28 bbWeight=1 PerfScore 6.50 G_M37192_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- ldr q16, [fp, #0x70] // [V00 arg0] - str q16, [fp, #0x50] // [TEMP_05] - ldr q16, [fp, #0x80] // [V00 arg0+0x10] - str q16, [fp, #0x40] // [TEMP_04] - ldr q16, [fp, #0x90] // [V00 arg0+0x20] - str q16, [fp, #0x30] // [TEMP_03] - ldr q16, [fp, #0xA0] // [V00 arg0+0x30] - str q16, [fp, #0x20] // [TEMP_02] - ldr q16, [fp, #0x60] // [V01 arg1] - str q16, [fp, #0x10] // [TEMP_01] - ldr q16, [fp, #0x50] // [TEMP_05] - ldr q17, [fp, #0x40] // [TEMP_04] - ldr q18, [fp, #0x30] // [TEMP_03] - ldr q19, [fp, #0x20] // [TEMP_02] - ldr q20, [fp, #0x10] // [TEMP_01] - tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v20.16b - ;; size=64 bbWeight=1 PerfScore 28.00
+ ldr q0, [fp, #0x20] // [V00 arg0] + ldr q16, [fp, #0x30] // [V00 arg0+0x10] + ldr q17, [fp, #0x40] // [V00 arg0+0x20] + ldr q18, [fp, #0x50] // [V00 arg0+0x30] + ldr q19, [fp, #0x10] // [V01 arg1] + mov v1.16b, v16.16b + mov v2.16b, v17.16b + mov v3.16b, v18.16b + tbl v0.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v19.16b + ;; size=36 bbWeight=1 PerfScore 14.50
G_M37192_IG03: ; bbWeight=1, epilog, nogc, extend
- ldp fp, lr, [sp], #0xB0
+ ldp fp, lr, [sp], #0x60
ret lr ;; size=8 bbWeight=1 PerfScore 2.00
-Total bytes of code 100, prolog size 8, PerfScore 36.50, instruction count 25, allocated bytes for code 100 (MethodHash=6ca86eb7) for method System.Runtime.Intrinsics.Arm.AdvSimd+Arm64:VectorTableLookup(System.ValueTuple`4[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]],System.Runtime.Intrinsics.Vector128`1[ubyte]):System.Runtime.Intrinsics.Vector128`1[ubyte] (Tier0)
+Total bytes of code 72, prolog size 8, PerfScore 23.00, instruction count 18, allocated bytes for code 72 (MethodHash=6ca86eb7) for method System.Runtime.Intrinsics.Arm.AdvSimd+Arm64:VectorTableLookup(System.ValueTuple`4[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]],System.Runtime.Intrinsics.Vector128`1[ubyte]):System.Runtime.Intrinsics.Vector128`1[ubyte] (Tier0)
; ============================================================ Unwind Info: @@ -59,7 +47,7 @@ Unwind Info: E bit : 0 X bit : 0 Vers : 0
- Function Length : 25 (0x00019) Actual length = 100 (0x000064)
+ Function Length : 18 (0x00012) Actual length = 72 (0x000048)
---- Epilog scopes ---- ---- Scope 0 Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e) @@ -67,7 +55,7 @@ Unwind Info: ---- Unwind codes ---- E1 set_fp; mov fp, sp ---- Epilog start at index 1 ----
- 95 save_fplr_x #21 (0x15); stp fp, lr, [sp, #-176]!
+ 8B save_fplr_x #11 (0x0B); stp fp, lr, [sp, #-96]!
E4 end E4 end

-28 (-28.00%) : 83854.dasm - System.Runtime.Intrinsics.Arm.AdvSimd:VectorTableLookup(System.ValueTuple4[System.Runtime.Intrinsics.Vector1281[byte],System.Runtime.Intrinsics.Vector1281[byte],System.Runtime.Intrinsics.Vector1281[byte],System.Runtime.Intrinsics.Vector1281[byte]],System.Runtime.Intrinsics.Vector641[byte]):System.Runtime.Intrinsics.Vector64`1byte

@@ -5,50 +5,38 @@ ; partially interruptible ; Final local variable assignments ;
-; V00 arg0 [V00 ] ( 1, 1 ) struct (64) [fp+0x60] HFA(simd16) do-not-enreg[SFA] multireg-arg <System.ValueTuple`4[System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte]]> -; V01 arg1 [V01 ] ( 1, 1 ) simd8 -> [fp+0x58] HFA(simd8) do-not-enreg[S] <System.Runtime.Intrinsics.Vector64`1[byte]>
+; V00 arg0 [V00 ] ( 1, 1 ) struct (64) [fp+0x20] HFA(simd16) do-not-enreg[SFA] multireg-arg <System.ValueTuple`4[System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte]]> +; V01 arg1 [V01 ] ( 1, 1 ) simd8 -> [fp+0x18] HFA(simd8) do-not-enreg[S] <System.Runtime.Intrinsics.Vector64`1[byte]>
;# V02 OutArgs [V02 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; TEMP_01 simd8 -> [fp+0x50] -; TEMP_05 simd16 -> [fp+0x40] -; TEMP_04 simd16 -> [fp+0x30] -; TEMP_03 simd16 -> [fp+0x20] -; TEMP_02 simd16 -> [fp+0x10]
;
-; Lcl frame size = 144
+; Lcl frame size = 80
G_M38175_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, nogc <-- Prolog IG
- stp fp, lr, [sp, #-0xA0]!
+ stp fp, lr, [sp, #-0x60]!
mov fp, sp
- str q0, [fp, #0x60] // [V00 arg0] - str q1, [fp, #0x70] // [V00 arg0+0x10] - str q2, [fp, #0x80] // [V00 arg0+0x20] - str q3, [fp, #0x90] // [V00 arg0+0x30] - str d4, [fp, #0x58] // [V01 arg1]
+ str q0, [fp, #0x20] // [V00 arg0] + str q1, [fp, #0x30] // [V00 arg0+0x10] + str q2, [fp, #0x40] // [V00 arg0+0x20] + str q3, [fp, #0x50] // [V00 arg0+0x30] + str d4, [fp, #0x18] // [V01 arg1]
;; size=28 bbWeight=1 PerfScore 6.50 G_M38175_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- ldr q16, [fp, #0x60] // [V00 arg0] - str q16, [fp, #0x40] // [TEMP_05] - ldr q16, [fp, #0x70] // [V00 arg0+0x10] - str q16, [fp, #0x30] // [TEMP_04] - ldr q16, [fp, #0x80] // [V00 arg0+0x20] - str q16, [fp, #0x20] // [TEMP_03] - ldr q16, [fp, #0x90] // [V00 arg0+0x30] - str q16, [fp, #0x10] // [TEMP_02] - ldr d16, [fp, #0x58] // [V01 arg1] - str d16, [fp, #0x50] // [TEMP_01] - ldr q16, [fp, #0x40] // [TEMP_05] - ldr q17, [fp, #0x30] // [TEMP_04] - ldr q18, [fp, #0x20] // [TEMP_03] - ldr q19, [fp, #0x10] // [TEMP_02] - ldr d20, [fp, #0x50] // [TEMP_01] - tbl v0.8b, {v16.16b, v17.16b, v18.16b, v19.16b}, v20.8b - ;; size=64 bbWeight=1 PerfScore 28.00
+ ldr q0, [fp, #0x20] // [V00 arg0] + ldr q16, [fp, #0x30] // [V00 arg0+0x10] + ldr q17, [fp, #0x40] // [V00 arg0+0x20] + ldr q18, [fp, #0x50] // [V00 arg0+0x30] + ldr d19, [fp, #0x18] // [V01 arg1] + mov v1.16b, v16.16b + mov v2.16b, v17.16b + mov v3.16b, v18.16b + tbl v0.8b, {v0.16b, v1.16b, v2.16b, v3.16b}, v19.8b + ;; size=36 bbWeight=1 PerfScore 14.50
G_M38175_IG03: ; bbWeight=1, epilog, nogc, extend
- ldp fp, lr, [sp], #0xA0
+ ldp fp, lr, [sp], #0x60
ret lr ;; size=8 bbWeight=1 PerfScore 2.00
-Total bytes of code 100, prolog size 8, PerfScore 36.50, instruction count 25, allocated bytes for code 100 (MethodHash=b5596ae0) for method System.Runtime.Intrinsics.Arm.AdvSimd:VectorTableLookup(System.ValueTuple`4[System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte]],System.Runtime.Intrinsics.Vector64`1[byte]):System.Runtime.Intrinsics.Vector64`1[byte] (Tier0)
+Total bytes of code 72, prolog size 8, PerfScore 23.00, instruction count 18, allocated bytes for code 72 (MethodHash=b5596ae0) for method System.Runtime.Intrinsics.Arm.AdvSimd:VectorTableLookup(System.ValueTuple`4[System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte]],System.Runtime.Intrinsics.Vector64`1[byte]):System.Runtime.Intrinsics.Vector64`1[byte] (Tier0)
; ============================================================ Unwind Info: @@ -59,7 +47,7 @@ Unwind Info: E bit : 0 X bit : 0 Vers : 0
- Function Length : 25 (0x00019) Actual length = 100 (0x000064)
+ Function Length : 18 (0x00012) Actual length = 72 (0x000048)
---- Epilog scopes ---- ---- Scope 0 Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e) @@ -67,7 +55,7 @@ Unwind Info: ---- Unwind codes ---- E1 set_fp; mov fp, sp ---- Epilog start at index 1 ----
- 93 save_fplr_x #19 (0x13); stp fp, lr, [sp, #-160]!
+ 8B save_fplr_x #11 (0x0B); stp fp, lr, [sp, #-96]!
E4 end E4 end

-28 (-28.00%) : 45575.dasm - System.Runtime.Intrinsics.Arm.AdvSimd+Arm64:VectorTableLookup(System.ValueTuple4[System.Runtime.Intrinsics.Vector1281[byte],System.Runtime.Intrinsics.Vector1281[byte],System.Runtime.Intrinsics.Vector1281[byte],System.Runtime.Intrinsics.Vector1281[byte]],System.Runtime.Intrinsics.Vector1281[byte]):System.Runtime.Intrinsics.Vector128`1byte

@@ -5,50 +5,38 @@ ; partially interruptible ; Final local variable assignments ;
-; V00 arg0 [V00 ] ( 1, 1 ) struct (64) [fp+0x70] HFA(simd16) do-not-enreg[SFA] multireg-arg <System.ValueTuple`4[System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte]]> -; V01 arg1 [V01 ] ( 1, 1 ) simd16 -> [fp+0x60] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[byte]>
+; V00 arg0 [V00 ] ( 1, 1 ) struct (64) [fp+0x20] HFA(simd16) do-not-enreg[SFA] multireg-arg <System.ValueTuple`4[System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte]]> +; V01 arg1 [V01 ] ( 1, 1 ) simd16 -> [fp+0x10] HFA(simd16) do-not-enreg[S] <System.Runtime.Intrinsics.Vector128`1[byte]>
;# V02 OutArgs [V02 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; TEMP_05 simd16 -> [fp+0x50] -; TEMP_04 simd16 -> [fp+0x40] -; TEMP_03 simd16 -> [fp+0x30] -; TEMP_02 simd16 -> [fp+0x20] -; TEMP_01 simd16 -> [fp+0x10]
;
-; Lcl frame size = 160
+; Lcl frame size = 80
G_M22952_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, nogc <-- Prolog IG
- stp fp, lr, [sp, #-0xB0]!
+ stp fp, lr, [sp, #-0x60]!
mov fp, sp
- str q0, [fp, #0x70] // [V00 arg0] - str q1, [fp, #0x80] // [V00 arg0+0x10] - str q2, [fp, #0x90] // [V00 arg0+0x20] - str q3, [fp, #0xA0] // [V00 arg0+0x30] - str q4, [fp, #0x60] // [V01 arg1]
+ str q0, [fp, #0x20] // [V00 arg0] + str q1, [fp, #0x30] // [V00 arg0+0x10] + str q2, [fp, #0x40] // [V00 arg0+0x20] + str q3, [fp, #0x50] // [V00 arg0+0x30] + str q4, [fp, #0x10] // [V01 arg1]
;; size=28 bbWeight=1 PerfScore 6.50 G_M22952_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- ldr q16, [fp, #0x70] // [V00 arg0] - str q16, [fp, #0x50] // [TEMP_05] - ldr q16, [fp, #0x80] // [V00 arg0+0x10] - str q16, [fp, #0x40] // [TEMP_04] - ldr q16, [fp, #0x90] // [V00 arg0+0x20] - str q16, [fp, #0x30] // [TEMP_03] - ldr q16, [fp, #0xA0] // [V00 arg0+0x30] - str q16, [fp, #0x20] // [TEMP_02] - ldr q16, [fp, #0x60] // [V01 arg1] - str q16, [fp, #0x10] // [TEMP_01] - ldr q16, [fp, #0x50] // [TEMP_05] - ldr q17, [fp, #0x40] // [TEMP_04] - ldr q18, [fp, #0x30] // [TEMP_03] - ldr q19, [fp, #0x20] // [TEMP_02] - ldr q20, [fp, #0x10] // [TEMP_01] - tbl v0.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v20.16b - ;; size=64 bbWeight=1 PerfScore 28.00
+ ldr q0, [fp, #0x20] // [V00 arg0] + ldr q16, [fp, #0x30] // [V00 arg0+0x10] + ldr q17, [fp, #0x40] // [V00 arg0+0x20] + ldr q18, [fp, #0x50] // [V00 arg0+0x30] + ldr q19, [fp, #0x10] // [V01 arg1] + mov v1.16b, v16.16b + mov v2.16b, v17.16b + mov v3.16b, v18.16b + tbl v0.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v19.16b + ;; size=36 bbWeight=1 PerfScore 14.50
G_M22952_IG03: ; bbWeight=1, epilog, nogc, extend
- ldp fp, lr, [sp], #0xB0
+ ldp fp, lr, [sp], #0x60
ret lr ;; size=8 bbWeight=1 PerfScore 2.00
-Total bytes of code 100, prolog size 8, PerfScore 36.50, instruction count 25, allocated bytes for code 100 (MethodHash=601ea657) for method System.Runtime.Intrinsics.Arm.AdvSimd+Arm64:VectorTableLookup(System.ValueTuple`4[System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte]],System.Runtime.Intrinsics.Vector128`1[byte]):System.Runtime.Intrinsics.Vector128`1[byte] (Tier0)
+Total bytes of code 72, prolog size 8, PerfScore 23.00, instruction count 18, allocated bytes for code 72 (MethodHash=601ea657) for method System.Runtime.Intrinsics.Arm.AdvSimd+Arm64:VectorTableLookup(System.ValueTuple`4[System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte],System.Runtime.Intrinsics.Vector128`1[byte]],System.Runtime.Intrinsics.Vector128`1[byte]):System.Runtime.Intrinsics.Vector128`1[byte] (Tier0)
; ============================================================ Unwind Info: @@ -59,7 +47,7 @@ Unwind Info: E bit : 0 X bit : 0 Vers : 0
- Function Length : 25 (0x00019) Actual length = 100 (0x000064)
+ Function Length : 18 (0x00012) Actual length = 72 (0x000048)
---- Epilog scopes ---- ---- Scope 0 Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e) @@ -67,7 +55,7 @@ Unwind Info: ---- Unwind codes ---- E1 set_fp; mov fp, sp ---- Epilog start at index 1 ----
- 95 save_fplr_x #21 (0x15); stp fp, lr, [sp, #-176]!
+ 8B save_fplr_x #11 (0x0B); stp fp, lr, [sp, #-96]!
E4 end E4 end

Details

Improvements/regressions per collection

Collection Contexts with diffs Improvements Regressions Same size Improvements (bytes) Regressions (bytes)
benchmarks.run.linux.arm64.checked.mch 0 0 0 0 -0 +0
benchmarks.run_pgo.linux.arm64.checked.mch 3 3 0 0 -372 +0
benchmarks.run_tiered.linux.arm64.checked.mch 2 2 0 0 -220 +0
coreclr_tests.run.linux.arm64.checked.mch 266 266 0 0 -40,760 +0
libraries.crossgen2.linux.arm64.checked.mch 0 0 0 0 -0 +0
libraries.pmi.linux.arm64.checked.mch 0 0 0 0 -0 +0
libraries_tests.run.linux.arm64.Release.mch 0 0 0 0 -0 +0
librariestestsnotieredcompilation.run.linux.arm64.Release.mch 0 0 0 0 -0 +0
realworld.run.linux.arm64.checked.mch 0 0 0 0 -0 +0
smoke_tests.nativeaot.linux.arm64.checked.mch 0 0 0 0 -0 +0
271 271 0 0 -41,352 +0

Context information

Collection Diffed contexts MinOpts FullOpts Missed, base Missed, diff
benchmarks.run.linux.arm64.checked.mch 32,435 2,362 30,073 0 (0.00%) 0 (0.00%)
benchmarks.run_pgo.linux.arm64.checked.mch 152,737 60,751 91,986 14 (0.01%) 14 (0.01%)
benchmarks.run_tiered.linux.arm64.checked.mch 60,787 45,077 15,710 0 (0.00%) 0 (0.00%)
coreclr_tests.run.linux.arm64.checked.mch 626,684 383,548 243,136 12 (0.00%) 12 (0.00%)
libraries.crossgen2.linux.arm64.checked.mch 1,936 0 1,936 0 (0.00%) 0 (0.00%)
libraries.pmi.linux.arm64.checked.mch 295,687 6 295,681 3 (0.00%) 3 (0.00%)
libraries_tests.run.linux.arm64.Release.mch 750,983 494,543 256,440 128 (0.02%) 128 (0.02%)
librariestestsnotieredcompilation.run.linux.arm64.Release.mch 304,826 21,600 283,226 2 (0.00%) 2 (0.00%)
realworld.run.linux.arm64.checked.mch 33,343 157 33,186 0 (0.00%) 0 (0.00%)
smoke_tests.nativeaot.linux.arm64.checked.mch 52 0 52 0 (0.00%) 0 (0.00%)
2,259,470 1,008,044 1,251,426 159 (0.01%) 159 (0.01%)

jit-analyze output

benchmarks.run_pgo.linux.arm64.checked.mch

To reproduce these diffs on Windows x64: superpmi.py asmdiffs -target_os linux -target_arch arm64 -arch x64


Summary of Code Size diffs:
(Lower is better)

Total bytes of base: 79903244 (overridden on cmd)
Total bytes of diff: 79902872 (overridden on cmd)
Total bytes of delta: -372 (-0.00 % of base)
    relative diff is a regression.

Detail diffs



0 total files with Code Size differences (0 improved, 0 regressed), 3 unchanged.

0 total methods with Code Size differences (0 improved, 0 regressed).


benchmarks.run_tiered.linux.arm64.checked.mch

To reproduce these diffs on Windows x64: superpmi.py asmdiffs -target_os linux -target_arch arm64 -arch x64


Summary of Code Size diffs:
(Lower is better)

Total bytes of base: 22276872 (overridden on cmd)
Total bytes of diff: 22276652 (overridden on cmd)
Total bytes of delta: -220 (-0.00 % of base)
    relative diff is a regression.

Detail diffs



0 total files with Code Size differences (0 improved, 0 regressed), 2 unchanged.

0 total methods with Code Size differences (0 improved, 0 regressed).


coreclr_tests.run.linux.arm64.checked.mch

To reproduce these diffs on Windows x64: superpmi.py asmdiffs -target_os linux -target_arch arm64 -arch x64


Summary of Code Size diffs:
(Lower is better)

Total bytes of base: 509740232 (overridden on cmd)
Total bytes of diff: 509699472 (overridden on cmd)
Total bytes of delta: -40760 (-0.01 % of base)
    relative diff is a regression.

Detail diffs



0 total files with Code Size differences (0 improved, 0 regressed), 72 unchanged.



linux x64

Diffs are based on 2,249,703 contexts (981,298 MinOpts, 1,268,405 FullOpts).

MISSED contexts: 134 (0.01%)

No diffs found.

Details

Context information

Collection Diffed contexts MinOpts FullOpts Missed, base Missed, diff
benchmarks.run.linux.x64.checked.mch 34,975 3,135 31,840 0 (0.00%) 0 (0.00%)
benchmarks.run_pgo.linux.x64.checked.mch 156,554 60,225 96,329 13 (0.01%) 13 (0.01%)
benchmarks.run_tiered.linux.x64.checked.mch 56,298 42,308 13,990 0 (0.00%) 0 (0.00%)
coreclr_tests.run.linux.x64.checked.mch 598,040 355,280 242,760 10 (0.00%) 10 (0.00%)
libraries.crossgen2.linux.x64.checked.mch 1,930 0 1,930 0 (0.00%) 0 (0.00%)
libraries.pmi.linux.x64.checked.mch 296,878 6 296,872 0 (0.00%) 0 (0.00%)
libraries_tests.run.linux.x64.Release.mch 766,353 498,383 267,970 111 (0.01%) 111 (0.01%)
librariestestsnotieredcompilation.run.linux.x64.Release.mch 305,396 21,912 283,484 0 (0.00%) 0 (0.00%)
realworld.run.linux.x64.checked.mch 33,215 49 33,166 0 (0.00%) 0 (0.00%)
smoke_tests.nativeaot.linux.x64.checked.mch 64 0 64 0 (0.00%) 0 (0.00%)
2,249,703 981,298 1,268,405 134 (0.01%) 134 (0.01%)


windows x64

Diffs are based on 2,098,526 contexts (926,221 MinOpts, 1,172,305 FullOpts).

MISSED contexts: 138 (0.01%)

No diffs found.

Details

Context information

Collection Diffed contexts MinOpts FullOpts Missed, base Missed, diff
benchmarks.run.windows.x64.checked.mch 27,917 4 27,913 0 (0.00%) 0 (0.00%)
benchmarks.run_pgo.windows.x64.checked.mch 102,635 50,161 52,474 19 (0.02%) 19 (0.02%)
benchmarks.run_tiered.windows.x64.checked.mch 54,333 36,871 17,462 0 (0.00%) 0 (0.00%)
coreclr_tests.run.windows.x64.checked.mch 573,722 341,128 232,594 8 (0.00%) 8 (0.00%)
libraries.crossgen2.windows.x64.checked.mch 2,104 0 2,104 0 (0.00%) 0 (0.00%)
libraries.pmi.windows.x64.checked.mch 309,148 6 309,142 0 (0.00%) 0 (0.00%)
libraries_tests.run.windows.x64.Release.mch 671,224 476,124 195,100 111 (0.02%) 111 (0.02%)
librariestestsnotieredcompilation.run.windows.x64.Release.mch 320,489 21,924 298,565 0 (0.00%) 0 (0.00%)
realworld.run.windows.x64.checked.mch 36,887 3 36,884 0 (0.00%) 0 (0.00%)
smoke_tests.nativeaot.windows.x64.checked.mch 67 0 67 0 (0.00%) 0 (0.00%)
2,098,526 926,221 1,172,305 138 (0.01%) 138 (0.01%)