Skip to content

performance regress in masked u128 field updates on x86_64 #155422

@camc314

Description

@camc314

Code

I tried this code:

const LOW_MASK: u128 = 0xFFFF_FFFF;

#[repr(transparent)]
pub struct Token(u128);

impl Token {
    #[inline(always)]
    fn set_start_mask(&mut self, start: u32) {
        self.0 &= !LOW_MASK;
        self.0 |= start as u128;
    }

    #[inline(always)]
    fn set_end_mask(&mut self, end: u32) {
        self.0 &= !(LOW_MASK << 32);
        self.0 |= (end as u128) << 32;
    }
    
    #[inline(always)]
    fn set_start_ptr(&mut self, start: u32) {
        #[cfg(target_endian = "little")]
        unsafe {
            core::ptr::from_mut(&mut self.0).cast::<u32>().write(start);
        }
    
        #[cfg(not(target_endian = "little"))]
        {
            self.0 &= !LOW_MASK;
            self.0 |= start as u128;
        }
    }


    #[inline(always)]
    fn set_end_ptr(&mut self, end: u32) {
        #[cfg(target_endian = "little")]
        unsafe {
            core::ptr::from_mut(&mut self.0).cast::<u32>().add(1).write(end);
        }
    
        #[cfg(not(target_endian = "little"))]
        {
            token.0 &= !(LOW_MASK << 32);
            token.0 |= (end as u128) << 32;
        }
    }
}

#[unsafe(no_mangle)]
pub extern "C" fn hot_start_mask(token: &mut Token, start: u32) -> u128 {
    token.set_start_mask(start);
    token.0
}

#[unsafe(no_mangle)]
pub extern "C" fn hot_end_mask(token: &mut Token, end: u32) -> u128 {
    token.set_end_mask(end);
    token.0
}

#[unsafe(no_mangle)]
pub extern "C" fn hot_start_ptr(token: &mut Token, start: u32) -> u128 {
    token.set_start_ptr(start);
    token.0
}

#[unsafe(no_mangle)]
pub extern "C" fn hot_end_ptr(token: &mut Token, end: u32) -> u128 {
    token.set_end_ptr(end);
    token.0
}

I expected to see this happen:

hot_end_mask:
        mov     rdx, qword ptr [rdi + 8]
        mov     eax, dword ptr [rdi]
        shl     rsi, 32
        or      rax, rsi
        mov     qword ptr [rdi], rax
        ret

hot_end_ptr:
        mov     dword ptr [rdi + 4], esi
        mov     rdx, qword ptr [rdi + 8]
        mov     rax, qword ptr [rdi]
        ret

hot_start_mask:
        mov     rdx, qword ptr [rdi + 8]
        mov     ecx, dword ptr [rdi + 4]
        shl     rcx, 32
        mov     eax, esi
        or      rax, rcx
        mov     qword ptr [rdi], rax
        ret

hot_start_ptr:
        mov     dword ptr [rdi], esi
        mov     rdx, qword ptr [rdi + 8]
        mov     rax, qword ptr [rdi]
        ret

Instead, this happened:

hot_end_mask:
        pshufd  xmm0, xmmword ptr [rdi], 238
        mov     eax, dword ptr [rdi]
        movq    rdx, xmm0
        shl     rsi, 32
        or      rax, rsi
        movq    qword ptr [rdi + 8], xmm0
        mov     qword ptr [rdi], rax
        ret

hot_end_ptr:
        mov     dword ptr [rdi + 4], esi
        mov     rdx, qword ptr [rdi + 8]
        mov     rax, qword ptr [rdi]
        ret

.LCPI2_0:
        .long   0
        .long   4294967295
        .long   4294967295
        .long   4294967295
hot_start_mask:
        movdqa  xmm0, xmmword ptr [rdi]
        pshufd  xmm1, xmm0, 238
        pand    xmm0, xmmword ptr [rip + .LCPI2_0]
        movq    rdx, xmm1
        movq    rcx, xmm0
        mov     eax, esi
        or      rax, rcx
        movq    qword ptr [rdi + 8], xmm1
        mov     qword ptr [rdi], rax
        ret

hot_start_ptr:
        mov     dword ptr [rdi], esi
        mov     rdx, qword ptr [rdi + 8]
        mov     rax, qword ptr [rdi]
        ret

Version it worked on

It most recently worked on: 1.94

Version with regression

rustc --version --verbose:

1.95

References: oxc-project/oxc#21509

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-LLVMArea: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.C-bugCategory: This is a bug.I-prioritizeIssue: Indicates that prioritization has been requested for this issue.needs-triageThis issue may need triage. Remove it if it has been sufficiently triaged.regression-untriagedUntriaged performance or correctness regression.

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions