Export (0) Print
Expand All
Expand Minimize

_mm_mpsadbw_epu8

Microsoft Specific

Emits the Streaming SIMD Extensions 4 (SSE4) instruction mpsadbw. This instruction computes multiple packed sums on the absolute value of the difference between two parameters.

__m128i _mm_mpsadbw_epu8( 
   __m128i a,
   __m128i b,
   const int mask 
);

[in] a

A 128-bit parameter that contains sixteen 8-bit unsigned integers.

[in] b

A 128-bit parameter that contains sixteen 8-bit unsigned integers.

[in] mask

A constant that specifies which integers to use in the calculation.

A 128-bit result that contains eight 16-bit unsigned integers. The values of these integers can be computed as follows:

i = mask2 * 4
j = mask0-1 * 4
for (k = 0; k < 8; k = k + 1) {
        t0 = abs(a[i + k + 0] - b[j + 0])
        t1 = abs(a[i + k + 1] - b[j + 1])
        t2 = abs(a[i + k + 2] - b[j + 2])
        t3 = abs(a[i + k + 3] - b[j + 3])
        r[k] = t0 + t1 + t2 + t3
}

Intrinsic

Architecture

_mm_mpsadbw_epu8

x86, x64

Header file <smmintrin.h>

a[n] and b[n] indicate the nth ordered unsigned 8-bit integer of parameters a and b where a[0] and b[0] are the lowest 8 bits. r[n] is the nth ordered unsigned 16-bit element of result r, where r[0] refers to the lowest 16 bits. mask0, mask1, and mask2 are the three least significant bits of parameter mask.

Before you use this intrinsic, software must ensure that the processor supports the instruction.

#include <stdio.h>
#include <smmintrin.h>

int main ()
{
    __m128i a, b;
    // A mask value of 0101 (5) will add four to each index
    const int mask = 5;

    a.m128i_u8[0] = 15;
    a.m128i_u8[1] = 60;
    a.m128i_u8[2] = 55;
    a.m128i_u8[3] = 31;
    a.m128i_u8[4] = 0;
    a.m128i_u8[5] = 1;
    a.m128i_u8[6] = 2;
    a.m128i_u8[7] = 4;
    a.m128i_u8[8] = 8;
    a.m128i_u8[9] = 16;
    a.m128i_u8[10] = 32;
    a.m128i_u8[11] = 64;
    a.m128i_u8[12] = 128;
    a.m128i_u8[13] = 255;
    a.m128i_u8[14] = 1;
    a.m128i_u8[15] = 17;

    b.m128i_u8[0] = 2;
    b.m128i_u8[1] = 4;
    b.m128i_u8[2] = 8;
    b.m128i_u8[3] = 64;
    b.m128i_u8[4] = 255;
    b.m128i_u8[5] = 0;
    b.m128i_u8[6] = 1;
    b.m128i_u8[7] = 16;
    b.m128i_u8[8] = 32;
    b.m128i_u8[9] = 64;
    b.m128i_u8[10] = 128;
    b.m128i_u8[11] = 255;
    b.m128i_u8[12] = 75;
    b.m128i_u8[13] = 31;
    b.m128i_u8[14] = 42;
    b.m128i_u8[15] = 11;

    __m128i res = _mm_mpsadbw_epu8(a, b, mask);

    __m128i final;
    int temp1, temp2, temp3, temp4, index;

    for (index = 0; index < 8; index++)
    {
        temp1 = abs(a.m128i_u8[4 + index] - b.m128i_u8[4]);
        temp2 = abs(a.m128i_u8[4 + index + 1] - b.m128i_u8[4 + 1]);
        temp3 = abs(a.m128i_u8[4 + index + 2] - b.m128i_u8[4 + 2]);
        temp4 = abs(a.m128i_u8[4 + index + 3] - b.m128i_u8[4 + 3]);
        final.m128i_u16[index] = temp1 + temp2 + temp3 + temp4;
    }

    printf_s("Res0 should be %d: %d\nRes1 should be %d: %d\n",
                final.m128i_u16[0], res.m128i_u16[0], 
                final.m128i_u16[1], res.m128i_u16[1]);
    printf_s("Res2 should be %d: %d\nRes3 should be %d: %d\n",
                final.m128i_u16[2], res.m128i_u16[2], 
                final.m128i_u16[3], res.m128i_u16[3]);
    printf_s("Res4 should be %d: %d\nRes5 should be %d: %d\n",
                final.m128i_u16[4], res.m128i_u16[4], 
                final.m128i_u16[5], res.m128i_u16[5]);
    printf_s("Res6 should be %d: %d\nRes7 should be %d: %d\n",
                final.m128i_u16[6], res.m128i_u16[6], 
                final.m128i_u16[7], res.m128i_u16[7]);

    return 0;
}
Res0 should be 269: 269 Res1 should be 267: 267 Res2 should be 264: 264 Res3 should be 290: 290 Res4 should be 342: 342 Res5 should be 446: 446 Res6 should be 653: 653 Res7 should be 588: 588

Community Additions

ADD
Show:
© 2014 Microsoft