汇编——SSE对齐(一. 未对齐情况)

SIMD是(Single Instrument Multi Data),MMX实现了SIMD;SSE是(Streaming SIMD Extension),它取代了MMX;后来AVX(Advanced Vector Extension,高级向量扩展)对SSE进行了扩展。如下代码展示了SSE处理未对齐内存的情况:

; sse_unaligned.asm
extern printf
section .data
    spvector1   dd  1.1
                dd  2.2
                dd  3.3
                dd  4.4
    spvector2   dd  1.1
                dd  2.2
                dd  2.2
                dd  3.3
    dpvector1   dq  1.1
                dq  2.2
    dpvector2   dq  3.3
                dq  4.4
    fmt1        db  "Single Precision Vector 1: %f, %f, %f, %f", 10, 0
    fmt2        db  "Single Precision Vector 2: %f, %f, %f, %f", 10, 0
    fmt3        db  "Sum of Single Precision Vector 1 and Vector 2: %f, %f, %f %f", 10, 0
    fmt4        db  "Doule Precision Vector 1: %f, %f", 10, 0
    fmt5        db  "Doule Precision Vector 2: %f, %f", 10, 0
    fmt6        db  "Sum of Double Precision Vector 1 and Vector 2: %f, %f", 10, 0

section .bss
    spvector_res resd 4
    dpvector_res resq 4
section .text
    global main
main:
push rbp
mov rbp, rsp
    mov     rsi, spvector1
    mov     rdi, fmt1
    call    printspfp

    mov     rsi, spvector2
    mov     rdi, fmt2
    call    printspfp

    movups  xmm0, [spvector1]
    movups  xmm1, [spvector2]
    addps   xmm0, xmm1
    movups  [spvector_res], xmm0
    mov     rsi, spvector_res
    mov     rdi, fmt3
    call    printspfp

    mov     rsi, dpvector1
    mov     rdi, fmt4
    call    printdpfp

    mov     rsi, dpvector2
    mov     rdi, fmt5
    call    printdpfp

    movupd  xmm0, [dpvector1]
    movupd  xmm1, [dpvector2]
    addpd   xmm0, xmm1
    movupd  [dpvector_res], xmm0
    mov     rsi, dpvector_res
    mov     rdi, fmt6
    call    printdpfp
leave
ret

printspfp:
push rbp
mov rbp, rsp
    movss       xmm0, [rsi]
    cvtss2sd    xmm0, xmm0
    movss       xmm1, [rsi+4]
    cvtss2sd    xmm1, xmm1
    movss       xmm2, [rsi+8]
    cvtss2sd    xmm2, xmm2
    movss       xmm3, [rsi+12]
    cvtss2sd    xmm3, xmm3
    mov         rax, 4
    call        printf
leave
ret

printdpfp:
push rbp
mov rbp, rsp
    movsd   xmm0, [rsi]
    movsd   xmm1, [rsi+8]
    mov     rax, 2
    call    printf
leave
ret

需要注意的几个指令如下:
movups: 移动未对齐的打包单精度;(u:未对齐unalignedp:打包的packeds:单精度single;)
addps: 打包单精度相加;
movss: 移动标量单精度;(s:标量scalars:单精度single
cvtss2sd: 将标量单精度转换为标量双精度;(d:双精度double

相关推荐

  1. 汇编——SSE对齐. 情况

    2024-04-02 01:06:02       31 阅读

最近更新

  1. docker php8.1+nginx base 镜像 dockerfile 配置

    2024-04-02 01:06:02       94 阅读
  2. Could not load dynamic library ‘cudart64_100.dll‘

    2024-04-02 01:06:02       100 阅读
  3. 在Django里面运行非项目文件

    2024-04-02 01:06:02       82 阅读
  4. Python语言-面向对象

    2024-04-02 01:06:02       91 阅读

热门阅读

  1. 【qt】打开图像、保存图像

    2024-04-02 01:06:02       28 阅读
  2. table Diffusion 的Web 用户界面简介

    2024-04-02 01:06:02       36 阅读
  3. 详解SPWM与SVPWM的原理、算法以及两者的区别

    2024-04-02 01:06:02       35 阅读
  4. 服了,一线城市的后端都卷成这样了吗!?

    2024-04-02 01:06:02       32 阅读