【spec性能分析】iv冗余的swtx

- 源码：spec 525 pixel.c 函数：x264_pixel_sad_16x16

```
static int name( uint8_t *pix1, int i_stride_pix1,  \
                 uint8_t *pix2, int i_stride_pix2 ) \
{                                                   \
    int i_sum = 0;                                  \
    for( int y = 0; y < ly; y++ )                   \
    {                                               \
        for( int x = 0; x < lx; x++ )               \
        {                                           \
            i_sum += abs( pix1[x] - pix2[x] );      \
        }                                           \
        pix1 += i_stride_pix1;                      \
        pix2 += i_stride_pix2;                      \
    }                                               \
    return i_sum;                                   \
}
```

maple汇编
```
1386   mov w4, w1
1387   mov w1, #0
1388   mov w6, #0
1389   sxtw  x7, w3
1390   sxtw  x8, w4
1391 .L.4__4:
1392   mov w5, #0   <===== "mov x5, #0  x5为iv"
1393 .L.4__2:
1394   sxtw  x3, w5   <====== "冗余"
1395   ldrb  w4, [x0,x3]
1396   ldrb  w3, [x2,x3]
1397   subs  w3, w4, w3
1398   cneg  w3, w3, MI
1399   add w1, w1, w3
1400   add w5, w5, #1
1401   cmp w5, #16
1402   blt .L.4__2
1403 .L.4__1:
1404   add x0, x0, x8
1405   add x2, x2, x7
1406   add w6, w6, #1
1407   cmp w6, #16
1408   blt .L.4__4
1409 .L.4__3:
1410   mov w0, w1
1411 .L.4__12:
1412   ret
```

maple IR
```
func &x264_pixel_sad_16x16 static (reg %7 <* u8>, reg %11 i32, reg %8 <* u8>, reg %10 i32) i32 {
1335   funcid 4
1336
1337 LOC 2 61
1338   regassign i32 %4 (constval i32 0)
1339   regassign i32 %5 (constval i32 0)
1340   regassign u64 %1 (cvt u64 i32 (regread i32 %10))
1341   regassign u64 %2 (cvt u64 i32 (regread i32 %11))
1342 @@4   regassign i32 %6 (constval i32 0)   <====== "%6为 iv 可直接变为 i64"
1343 @@2   regassign u64 %3 (cvt u64 i32 (regread i32 %6))    <====== "冗余"
1344   regassign i32 %4 (add i32 (
1345       regread i32 %4,
1346       abs i32 (sub i32 (
1347         iread u32 <* u8> 0 (add u64 (regread ptr %7, regread u64 %3)),
1348         iread u32 <* u8> 0 (add u64 (regread ptr %8, regread u64 %3))))))
1349   regassign i32 %6 (add i32 (regread i32 %6, constval i32 1))
1350   brtrue @@2 (lt i32 i32 (regread i32 %6, constval i32 16))
1351 @@1   regassign ptr %7 (add u64 (regread ptr %7, regread u64 %2))
1352   regassign ptr %8 (add u64 (regread ptr %8, regread u64 %1))
1353   regassign i32 %5 (add i32 (regread i32 %5, constval i32 1))
1354   brtrue @@4 (lt i32 i32 (regread i32 %5, constval i32 16))
1355 @@3   return (regread i32 %4)
1356 }
```

gcc汇编
```
18:   mov    x5, #0x0                        // #0
20:   ldrb   w4, [x7, x5]
      ldrb   w6, [x2, x5]
      subs   w4, w4, w6
      cneg   w4, w4, mi  // mi = first
      add    w0, w0, w4
      add    x5, x5, #0x1
      cmp    x5, #0x10
      b.ne   20
      add    x7, x7, x1
      add    x2, x2, x3
      subs   w8, w8, #0x1
      b.ne   18
```

GVP 方舟编译器/OpenArkCompiler

Content Risk Flag

Comments (4)

GVP方舟编译器/OpenArkCompiler

Content Risk Flag