We recommend using Visual Studio 2017
This documentation is archived and is not being maintained.


Microsoft Specific

Generates the haddps instruction.

__m128 _mm_hadd_ps(
   __m128 a,
   __m128 b,

[in] a

The first operand.

[in] b

The second operand.




Intel SSE3

Header file <intrin.h>

The haddps instruction performs a horizontal add, meaning that adjacent elements in the same operand are added together. Each 128-bit argument is considered as four 32-bit floating-point elements, numbered from 0 to 3, with 3 being the high-order element. The result of the operation on operand a (A3, A2, A1, A0) and operand b (B3, B2, B1, B0) is (B3 + B2, B1 + B0, A3 + A2, A1 + A0).

This routine is only available as an intrinsic.

// processor: x86 with SSE3
// Execute the hadd_ps instruction using the intrinsic
// _mm_hadd_ps

#include <stdio.h>
#include <intrin.h>

#pragma intrinsic ( _mm_hadd_ps )

int main( )
    __m128 u, v, w;
    __declspec(align(16)) float a[4] = { 0.1, 0.2, 0.3, 0.4 };
    __declspec(align(16)) float b[4] = { 0.0001, 0.002, 0.003, 0.004 };

    printf_s("Loading floating-point values\n"
             "%5.3f %5.3f %5.3f %5.3f into XMM register.\n ",
             a[0], a[1], a[2], a[3] );
    u = _mm_load_ps(a);
    printf_s("Loading floating-point values\n"
             "%5.3f %5.3f %5.3f %5.3f into XMM register.\n",
             b[0], b[1], b[2], b[3] );
    v = _mm_load_ps(b);

    printf_s("Calling _mm_hadd_ps to modify these values.\n");
    w = _mm_hadd_ps ( u , v);

    printf_s("Result: %5.3f %5.3f %5.3f %5.3f\n", w.m128_f32[0],
             w.m128_f32[1], w.m128_f32[2], w.m128_f32[3] );
Loading floating-point values
0.100 0.200 0.300 0.400 into XMM register.
Loading floating-point values
0.000 0.002 0.003 0.004 into XMM register.
Calling _mm_hadd_ps to modify these values.
Result: 0.300 0.700 0.002 0.007