Double Complex

This code sample demonstrates how to use C, Streaming SIMD Extensions 2 (SSE2) and Streaming SIMD Extensions 3 (SSE3) intrinsics to multiply two complex numbers. The following output is typical of this code: 23.00+ -2.00i. Output may vary depending on your compiler version and the components of your computing platform.  

 SSE3 intrinsics do not run on processors from the Pentium® III family or earlier.

 

/*

 * Copyright (C) 2006 Intel Corporation.  All rights reserved.

 *

 * The information and source code contained herein is the exclusive

 * property  of Intel Corporation and may not be disclosed, examined,

 * or reproduced in  whole or in part without explicit written

 * authorization from the Company.

 *

 * [Description]

 * This code sample demonstrates the use of C in comparison with SSE2

 * and SSE3 instrinsics to multiply two complex numbers.

 *

 * [Compile]

 * icc double_complex.c (linux)

 * icl double_complex.c (windows)

 *

 * [Output]

 * Complex Product(C):    23.00+ -2.00i

 * Complex Product(SSE3): 23.00+ -2.00i

 * Complex Product(SSE2): 23.00+ -2.00i

 */

#include <stdio.h>

#include <pmmintrin.h>

typedef struct {

  double real;

  double img;

} complex_num;

// Multiplying complex numbers in C

void multiply_C(complex_num x, complex_num y, complex_num *z)

{

  z->real = (x.real*y.real) - (x.img*y.img);

  z->img  = (x.img*y.real)  + (y.img*x.real);

}

#if __INTEL_COMPILER

// Multiplying complex numbers using SSE3 intrinsics

void multiply_SSE3(complex_num x, complex_num y, complex_num *z)

{

  __m128d num1, num2, num3;

  // Duplicates lower vector element into upper vector element.

  //   num1: [x.real, x.real]

  num1 = _mm_loaddup_pd(&x.real);

  // Move y elements into a vector

  //   num2: [y.img, y.real]

  num2 = _mm_set_pd(y.img, y.real);

  // Multiplies vector elements

  //   num3: [(x.real*y.img), (x.real*y.real)]

  num3 = _mm_mul_pd(num2, num1);

  //   num1: [x.img, x.img]

  num1 = _mm_loaddup_pd(&x.img);

  // Swaps the vector elements

  //   num2: [y.real, y.img]

  num2 = _mm_shuffle_pd(num2, num2, 1);

  //   num2: [(x.img*y.real), (x.img*y.img)]

  num2 = _mm_mul_pd(num2, num1);

  // Adds upper vector element while subtracting lower vector element

  //   num3: [((x.real *y.img)+(x.img*y.real)),

  //          ((x.real*y.real)-(x.img*y.img))]

  num3 = _mm_addsub_pd(num3, num2);

  // Stores the elements of num3 into z

  _mm_storeu_pd((double *)z, num3);

}

#endif

#if __INTEL_COMPILER

// Multiplying complex numbers using SSE2 intrinsics

void multiply_SSE2(complex_num x, complex_num y, complex_num *z)

{

  __m128d num1, num2, num3, num4;

  // Copies a single element into the vector

  //   num1:  [x.real, x.real]

  num1 = _mm_load1_pd(&x.real);

  // Move y elements into a vector

  //   num2: [y.img, y.real]

  num2 = _mm_set_pd(y.img, y.real);

  // Multiplies vector elements

  //   num3: [(x.real*y.img), (x.real*y.real)]

  num3 = _mm_mul_pd(num2, num1);

  //   num1: [x.img, x.img]

  num1 = _mm_load1_pd(&x.img);

  // Swaps the vector elements.

  //   num2: [y.real, y.img]

  num2 = _mm_shuffle_pd(num2, num2, 1);

  //   num2: [(x.img*y.real), (x.img*y.img)]

  num2 = _mm_mul_pd(num2, num1);

  num4 = _mm_add_pd(num3, num2);

  num3 = _mm_sub_pd(num3, num2);

  num4 = _mm_shuffle_pd(num3, num4, 2);

  // Stores the elements of num4 into z

  _mm_storeu_pd((double *)z, num4);

}

#endif

int main()

{

  complex_num a, b, c;

  // Initialize complex numbers

  a.real =  3;

  a.img  =  2;

  b.real =  5;

  b.img  = -4;

  // Output for each: 23.00+ -2.00i

  multiply_C(a, b, &c);

  printf("Complex Product(C):    %2.2f+ %2.2fi\n", c.real, c.img);

#if __INTEL_COMPILER

  multiply_SSE3(a, b, &c);

  printf("Complex Product(SSE3): %2.2f+ %2.2fi\n", c.real, c.img);

  multiply_SSE2(a, b, &c);

  printf("Complex Product(SSE2): %2.2f+ %2.2fi\n", c.real, c.img);

#endif

  return 0;

}