__m64_pmpyshr2, __m64_pmpyshr2u
Visual Studio 2010
Microsoft Specific
Emit the Parallel Multiply and Shift Right (pmpyshr) instruction.
__m64 __m64_pmpyshr2( __m64 a, __m64 b, const int nBit ); __m64 __m64_pmpyshr2u( __m64 a, __m64 b, const int nBit );
// pmpyshr2.cpp
// processor: IPF
#include <stdio.h>
#include <intrin.h>
#pragma intrinsic(__m64_pmpyshr2, __m64_pmpyshr2u)
void print16(__int16* ia)
{
printf_s("{ %#x, %#x, %#x, %#x }\n", ia[0], ia[1], ia[2], ia[3]);
}
void printresult(__int16* hi, __int16* lo)
{
printf_s("{ 0x%x%x, 0x%x%x 0x%x%x 0x%x%x }\n",
hi[0], lo[0], hi[1], lo[1],
hi[2], lo[2], hi[3], lo[3]);
}
int main()
{
__int16 a[4] = { 0xffff, 1, 8, 0x5000 };
__int16 b[4] = { 0xffff, 2, 0x10, 0x5000 };
int i;
__m64 m, n, result_lo, result_hi;
printf_s("a: \n");
print16(a);
printf_s("b: \n");
print16(b);
for (int i = 0; i < 4; i++)
{
m.m64_i16[i] = a[i];
n.m64_i16[i] = b[i];
}
printf_s("__m64_pmpyshr2 computes :\n");
result_lo = __m64_pmpyshr2(m, n, 0);
result_hi = __m64_pmpyshr2(m, n, 16);
printresult(result_hi.m64_i16, result_lo.m64_i16);
result_lo = __m64_pmpyshr2u(m, n, 0);
result_hi = __m64_pmpyshr2u(m, n, 16);
printf_s("__m64_pmpyshr2u computes :\n");
printresult(result_hi.m64_i16, result_lo.m64_i16);
}
a:
{ 0xffffffff, 0x1, 0x8, 0x5000 }
b:
{ 0xffffffff, 0x2, 0x10, 0x5000 }
__m64_pmpyshr2 computes :
{ 0x01, 0x02 0x080 0x19000 }
__m64_pmpyshr2u computes :
{ 0xfffffffe1, 0x02 0x080 0x19000 }