SH-4 Inline Assembly Code Examples (Windows CE 5.0)

Windows CE 5.0
Send Feedback

The following sections provide examples of SH-4 inline assembly code used in common situations.

Pass Address of Floating Point Parameter

The following code example shows how to pass the address of float arguments to __asm call and then how to load the argument to a register within the __asm statement:

#include <stdio.h>

void __asm(const char *, ...);

// Compute x+y+z --> result
float add_trig(float x, float y, float z)
  float result;

      "fmov.s  @r4,    fr0    ; copy x to fr0 \n"
      "fmov.s  @r5,    fr1    ; copy y to fr1 \n"
      "fmov.s  @r6,    fr2    ; copy z to fr2 \n"
      "fadd  fr0,    fr1    ; compute x+y\n"
      "fadd  fr1,    fr2    ; compute x+y+z \n"
      "fmov.s  fr2,    @r7    ; store fr2 into result addr\n",
      &x,            // pointer to x passed in r4
      &y,            // pointer to y passed in r5
      &z,            // pointer to z passed in r6
      &result);        // pointer to result passed in r7 

  return result;


void main()
  float  retval = add_trig(1.0f, 2.0f,3.0f);
  printf("%f\n", retval);

Access Double Parameters from Integer Registers

The following code example shows how to access double parameters from integer registers and local argument stack space using inline assembly:

#include <stdio.h>

void __asm(const char *, ...);

// Compute x+y+z --> result
double add_trig(double x, double y, double z)
  double result;
    "lds    r4,    fpul      ; load lw-part of x to fpul \n"
    "fsts  fpul,  fr5      ; copy lw-part of x to fr5 \n"
    "lds    r5,    fpul      ; load hi-part of x to fpul \n"
    "fsts  fpul,  fr4      ; copy hi-part of x to fr4 \n"
    "lds    r6,    fpul      ; load lw-part of y to fpul \n"
    "fsts  fpul,  fr7      ; copy lw-part of y to fr7 \n"
    "lds    r7,    fpul      ; load hi-part of y to fpul \n"
    "fsts  fpul,  fr6      ; copy hi-part of y to fr6 \n"
    "mov.l  @(16,sp),r0      ; load lw-part of z to r0 \n"
    "lds    r0,    fpul    ; \n"
    "fsts  fpul,  fr9      ; copy lw-part of z to fr9 \n"
    "mov.l  @(20,sp),r0      ; load hi-part of z to r0 \n"
    "lds    r0,    fpul    ; \n"
    "fsts  fpul,  fr8      ; copy hi-part of z to fr8 \n"
    "mov    #8,    r0      ; prepare to mask the pr bit \n"
    "shll16  r0        ; \n"
    "sts    fpscr,r1      ; \n"
    "xor    r0,    r1      ; toggle pr bit \n"
    "lds    r1,    fpscr    ; turn-on pr bit \n"
    "fadd  dr4,    dr6      ; compute x+y \n"
    "fadd  dr6,    dr8      ; compute x+y+z \n"
    "xor    r0,    r1      ; toggle pr bit \n"
    "lds    r1,    fpscr    ; turn-off pr bit\n"
    "mov.l  @(24,sp), r0      ; load result address\n"
    "add    #4,    r0      ; increment result addr by 4\n"
    "fmov.s  fr8,    @r0      ; store hi-part into result addr\n"
    "fmov.s  fr9,    @-r0      ; store lw-part into result addr\n",
    x,              // passed in r4 and r5 
    y,              // passed in r6 and r7
    z,              // passed in @(16,sp) and @(20,sp)
    &result);          // passed in @(24,sp) 

  return result;

void main()
  double  retval = add_trig(1.0, 2.0,3.0);
  printf("%g\n", retval);

Compute Inner Product of Vectors

The following code example shows how to compute the inner products of the two dimensional vectors:

#include <stdio.h>

void __asm(const char *, ...);

// Compute an inner product of v1 and v2 vectors
float dot(float *v1, float *v2)

  float retval[1];

    "fmov.s @r4+, fr0    ; Load v1 vector into fr0..fr3\n"
    "fmov.s @r4+, fr1\n"
    "fmov.s @r4+, fr2\n"
    "fmov.s @r4+, fr3\n"
    "fmov.s @r5+, fr4    ; Load v2 vector into fr4..fr7\n"
    "fmov.s @r5+, fr5\n"
    "fmov.s @r5+, fr6\n"
    "fmov.s @r5+, fr7\n"
    "fipr  fv0, fv4  ; Do the operation\n"
    "fmov.s  fr7, @r6      ; Store the return value\n", 
    v1,            // passed in R4 
    v2,            // passed in R5
    retval);          // passed in R6

    return retval[0];

void main()

  float v1[4]={1.0, 2.0, 3.0, 4.0};
  float v2[4]={1.0, 2.0, 3.0, 4.0};
  float retval;

  retval = dot(v1, v2);

  printf("retval=%f\n", retval);
// range

See Also

SH-4 Inline Assembly Language

Send Feedback on this topic to the authors

Feedback FAQs

© 2006 Microsoft Corporation. All rights reserved.