__m64_pmpyshr2, __m64_pmpyshr2u

Microsoft Specific

Emit the Parallel Multiply and Shift Right (pmpyshr) instruction.

__m64 __m64_pmpyshr2( 
   __m64 a, 
   __m64 b, 
   const int nBit 
);
__m64 __m64_pmpyshr2u( 
   __m64 a, 
   __m64 b, 
   const int nBit 
);

Parameters

  • [in] a
    An __m64 union containing an array of four 16-bit integers.

  • [in] b
    An __m64 union containing an array of four 16-bit integers.

  • [in] nBit
    The number of bits to shift the results of the multiplications, which are truncated to four 16-bit values. Allowed values are 0, 7, 15, or 16.

Requirements

Intrinsic

Architecture

__m64_pmpyshr2

IPF

__m64_pmpyshr2u

IPF

Header file <intrin.h>

Remarks

__m64_pmpyshr2 performs a signed multiplication of the integers in a with the corresponding integers in band then a right shift of nBit bits; for example, a[i]*b[i] >> nBit for elements numbered by i; __m64_pmpyshr2u is the same but performs an unsigned multiplication.

Example

// pmpyshr2.cpp
// processor: IPF
#include <stdio.h>
#include <intrin.h>

#pragma intrinsic(__m64_pmpyshr2, __m64_pmpyshr2u)

void print16(__int16* ia)
{
    printf_s("{ %#x, %#x, %#x, %#x }\n", ia[0], ia[1], ia[2], ia[3]);
}

void printresult(__int16* hi, __int16* lo)
{
    printf_s("{ 0x%x%x, 0x%x%x 0x%x%x 0x%x%x }\n",
             hi[0], lo[0], hi[1], lo[1],
             hi[2], lo[2], hi[3], lo[3]);
}

int main()
{
    __int16 a[4] = { 0xffff, 1, 8, 0x5000 };
    __int16 b[4] = { 0xffff, 2, 0x10, 0x5000 };
    int i;
    __m64 m, n, result_lo, result_hi;

    printf_s("a: \n");
    print16(a);
    printf_s("b: \n");
    print16(b);

    for (int i = 0; i < 4; i++)
    {
       m.m64_i16[i] = a[i];
       n.m64_i16[i] = b[i];
    }

    printf_s("__m64_pmpyshr2 computes :\n");
    result_lo = __m64_pmpyshr2(m, n, 0);
    result_hi = __m64_pmpyshr2(m, n, 16);
    printresult(result_hi.m64_i16, result_lo.m64_i16);
    
    result_lo = __m64_pmpyshr2u(m, n, 0);
    result_hi = __m64_pmpyshr2u(m, n, 16);
    printf_s("__m64_pmpyshr2u computes  :\n");
    printresult(result_hi.m64_i16, result_lo.m64_i16);

}
a: 
{ 0xffffffff, 0x1, 0x8, 0x5000 }
b: 
{ 0xffffffff, 0x2, 0x10, 0x5000 }
__m64_pmpyshr2 computes :
{ 0x01, 0x02 0x080 0x19000 }
__m64_pmpyshr2u computes  :
{ 0xfffffffe1, 0x02 0x080 0x19000 }

See Also

Reference

__m64

Compiler Intrinsics