• Pierre Langlois's avatar
    [arm][arm64] Do not allocate temp registers for the write barrier. · 3f1a59f4
    Pierre Langlois authored
    Improve code generation for stores with write barriers slightly by using the
    assembler's dedicated scratch registers (x16 and x17 on Arm64, ip on Arm)
    instead of allocating temporaries.
    
    To do this, we've done two things:
    
      - Use ip as a scratch register when loading page flags.
    
      - TurboAssembler::CallRecordWriteStub() now takes the offset of the slot
        that's written to rather than its address, removing the need to allocate a
        temporary register for it.
    
    In essence, we've gone from:
    
    ```
    ;; Do the store.
    stur x19, [x9, #15]
    ;; Check *destination* object page flags and jump out-of-line.
    and x4, x9, #0xfffffffffff80000
    ldr x4, [x4, #8]
    tbnz x4, #2, #+0x1e7c
    |     ;; Check *source* object page flags.
    | `-> and x4, x19, #0xfffffffffff80000
    |     ldr x4, [xM, #8]
    |,--- tbz x4, #1, #-0x1e80
    |     ;; Compute address of slot.
    |     add x5, x9, #0xf (15)
    |     ;; Setup arguments to RecordWrite
    |     stp x2, x3, [sp, #-32]!
    |     stp x4, lr, [sp, #16]
    |     stp x0, x1, [sp, #-16]!
    |     mov x0, x9 ;; Object address in x9
    |     mov x1, x5 ;; Slot address in x5
    |     movz x2, #0x0
    |     movz x3, #0x100000000
    |     ;; Call RecordWrite
    |     ldr x16, pc+2056
    |     blr x16
    ```
    
    Which allocates x4 and x5 as temporaries.
    
    To:
    
    ```
    stur x19, [x9, #15]
    and x16, x9, #0xfffffffffff80000 ;; Using x16 instead of allocating x4.
    ldr x16, [x16, #8]
    tbnz x16, #2, #+0x1e7c
    | `-> and x16, x19, #0xfffffffffff80000
    |     ldr x16, [xM, #8]
    |,--- tbz x16, #1, #-0x1e80
    |     stp x2, x3, [sp, #-32]!
    |     stp x4, lr, [sp, #16]
    |     stp x0, x1, [sp, #-16]!
    |     mov x0, x9            ;; Object address still in x9.
    |     add x1, x9, #0xf (15) ;; Compute the slot address directly.
    |     movz x2, #0x0
    |     movz x3, #0x100000000
    |     ldr x16, pc+2056
    |     blr x16
    ```
    
    Finally, `RecordWriteField()` does not need an extra scratch register anymore.
    
    Change-Id: Icb71310e7b8ab1ca83ced250851456166b337d00
    Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1505793
    Commit-Queue: Pierre Langlois <pierre.langlois@arm.com>
    Reviewed-by: 's avatarSigurd Schneider <sigurds@chromium.org>
    Reviewed-by: 's avatarUlan Degenbaev <ulan@chromium.org>
    Cr-Commit-Position: refs/heads/master@{#61153}
    3f1a59f4
turbo-assembler-arm64-unittest.cc 5.97 KB