2

My current project requires a turbulence code that will use the velocity components of a fluid to advect a distribution of passive tracers.

I have written a code below that uses a structure of arrays (SoA) to define the fluid so that I can make FFTW calls to transform the velocity components later. The passive tracers are defined as array of structures (AoS) as this allows me to enable quick sort the tracers along any given direction.

#include<stdio.h>
#include<stddef.h>
#include<stdlib.h>
#include<complex.h>

// 2D fluid (SoA)
typedef struct { 
  ptrdiff_t n;
  ptrdiff_t size;
  double *u0;
  double *u1;
  ptrdiff_t numTracers;
} myfluid;

// Passive tracers (AoS)
typedef struct { 
  ptrdiff_t ID;
  double r0;
  double r1;
} mytracer;

void allocate_memory (myfluid *f, mytracer **t){
  f->u0 = malloc (f->size * sizeof(double));
  f->u1 = malloc (f->size * sizeof(double));

  *t  = malloc (f->numTracers * sizeof(mytracer));
}

void initialize (myfluid *f, mytracer *t) {
  // random velocity component in the range (-1, 1)
  srand48 (1);
  for (ptrdiff_t i = 0; i < f->size; i ++){
    f->u0[i] = -1. + 2 * drand48 ();
    f->u1[i] = -1. + 2 * drand48 ();
  }
  // random positions inside the unit square (0, 1)
  for (ptrdiff_t i = 0; i < f->numTracers; i ++){
    t[i].ID = i;
    t[i].r0 = drand48 ();
    t[i].r1 = drand48 ();
  }
}

int compare (const void *a, const void *b){
  mytracer *A = (mytracer *)a;
  mytracer *B = (mytracer *)b;
  return (A->r0 > B->r0) - (A->r0 < B->r0);
}

void printout (myfluid *f, mytracer *t) {
  printf("\n2D fluid\n");
  for (ptrdiff_t i = 0; i < f->n; i ++){
    for (ptrdiff_t j = 0; j < f->n; j ++){
      ptrdiff_t idx = j + i * f->n;
      printf ("(%.2lf, %.2lf)\t", f->u0[idx], f->u1[idx]);
    }
    printf ("\n");
  }

  printf("\nTracers\n");
  for (ptrdiff_t i = 0; i < f->numTracers; i ++){
    printf ("%td\t %.2lf\t %.2lf\n", t[i].ID, t[i].r0, t[i].r1);
  }
}
 
void deallocate_memory (myfluid *fluid, mytracer **t) {
  free (fluid->u0);
  free (fluid->u1);

  free (*t);
}

int main (int argc, char **argv){
  myfluid fluid;
  mytracer *tracer;

  fluid.n = 8;
  fluid.numTracers = 8;

  fluid.size = fluid.n * fluid.n;

  allocate_memory (&fluid, &tracer);

  initialize (&fluid, tracer);
  
  // sort the tracers along r0 direction
  qsort(tracer, fluid.numTracers, sizeof(mytracer), compare);

  printout (&fluid, tracer);

  deallocate_memory(&fluid, &tracer);

  return 0;
}

Is it possible to pass the tracers also as SoA, without losing the ability to quick sort them as shown here?

6
  • qsort() can only swap whole elements of a single array-like object around, so the answer is "no". Commented Jun 14, 2024 at 14:28
  • If you constructed an array of consecutive indices, you could sort that array of indices with the (non-standard) qsort_r function (like qsort but with a context pointer value that also gets passed to the comparison function), using a pointer to the SoA as context. Then you just have the problem of swapping the elements of each array according to the sorted array of indices. It's a bit tricky to do the swapping in-place because in general there will be multiple cyclic chains of elements to be rotated, but there are algorithms to deal with that sort of thing. Commented Jun 14, 2024 at 14:41
  • @ashwin, Why are .n, .size and .numTracers of type ptrdiff_t instead of type size_t? Commented Jun 14, 2024 at 15:15
  • Maybe you can incorporate an index array and sort that helper array instead: tracer[0], tracer[1], ... becomes tracer[helper[0]], tracer[helper[1]], ... keeping tracer[0], tracer[1], ... unmoved Commented Jun 14, 2024 at 15:17
  • @chux-ReinstateMonica, I did this because the fluid arrays will be sent to FFTW library for parallel transforms, which since version 3.x.x, demands that all of its MPI routines take ptrdiff_t arguments instead of int as for the serial FFTW. As per their documentation, ptrdiff_t is a standard C integer type which is (at least) 32 bits wide on a 32-bit machine and 64 bits wide on a 64-bit machine. This is to make it easy to specify very large parallel transforms on a 64-bit machine. Commented Jun 15, 2024 at 3:23

2 Answers 2

2

Here is a version using Glibc's qsort_r to sort an array of indices according to a comparison function that uses a pointer to the myfluid as context, and then permutes the arrays inside the structure in-place according to the sorted indices.

If qsort_r is not available, qsort could be used, but the context would need to be passed using global variables. FreeBSD's standard C library also includes the qsort_r function.

Note: There may be portability issues with qsort_r because some versions have the parameters of qsort_r in a different order and the parameters of the comparison function in a different order. See this answer by Schwern for the sordid history of this function and its relatives.

The permutation part of the code after the qsort_r call in the tracer_sort function is stolen from this answer by AnT stands with Russia.

#define _GNU_SOURCE // for glibc's qsort_r

#include<stdio.h>
#include<stddef.h>
#include<stdlib.h>
#include<complex.h>

// 2D fluid (SoA)
typedef struct { 
  ptrdiff_t n;
  ptrdiff_t size;
  double *u0;
  double *u1;
  ptrdiff_t numTracers;
  ptrdiff_t *t_ID;
  double *t_r0;
  double *t_r1;
} myfluid;

void allocate_memory (myfluid *f){
  f->u0 = malloc (f->size * sizeof(*f->u0));
  f->u1 = malloc (f->size * sizeof(*f->u1));
  f->t_ID = malloc (f->numTracers * sizeof(*f->t_ID));
  f->t_r0 = malloc (f->numTracers * sizeof(*f->t_r0));
  f->t_r1 = malloc (f->numTracers * sizeof(*f->t_r1));
}

void initialize (myfluid *f) {
  // random velocity component in the range (-1, 1)
  srand48 (1);
  for (ptrdiff_t i = 0; i < f->size; i ++){
    f->u0[i] = -1. + 2 * drand48 ();
    f->u1[i] = -1. + 2 * drand48 ();
  }
  // random positions inside the unit square (0, 1)
  for (ptrdiff_t i = 0; i < f->numTracers; i ++){
    f->t_ID[i] = i;
    f->t_r0[i] = drand48 ();
    f->t_r1[i] = drand48 ();
  }
}

// a and b are pointers into an array of indices of type ptrdiff_t
// context is a pointer to myfluid
int compare (const void *a, const void *b, void *context){
  const myfluid *f = context;
  double r0_a = f->t_r0[*(const ptrdiff_t *)a];
  double r0_b = f->t_r0[*(const ptrdiff_t *)b];

  return (r0_a > r0_b) - (r0_a < r0_b);
}

void printout (myfluid *f) {
  printf("\n2D fluid\n");
  for (ptrdiff_t i = 0; i < f->n; i ++){
    for (ptrdiff_t j = 0; j < f->n; j ++){
      ptrdiff_t idx = j + i * f->n;
      printf ("(%.2lf, %.2lf)\t", f->u0[idx], f->u1[idx]);
    }
    printf ("\n");
  }

  printf("\nTracers\n");
  for (ptrdiff_t i = 0; i < f->numTracers; i ++){
    printf ("%td\t %.2lf\t %.2lf\n", f->t_ID[i], f->t_r0[i], f->t_r1[i]);
  }
}

void tracer_sort(myfluid *fluid,
    int (*cmp)(const void *, const void *, void *)) {
  ptrdiff_t n = fluid->numTracers;
  ptrdiff_t index[n];

  // initialize index
  for (ptrdiff_t i = 0; i < n; i++) {
    index[i] = i;
  }
  // sort index
  qsort_r(index, n, sizeof(index[0]), cmp, fluid);
  // permute the tracer elements according to the index
  // this will clobber the index
  for (ptrdiff_t i_dst_first = 0; i_dst_first < n; i_dst_first++) {
    ptrdiff_t i_src = index[i_dst_first];

    if (i_src != i_dst_first) {
      // follow the permutation cycle, clobbering the index as we go
      ptrdiff_t i_dst = i_dst_first;
      ptrdiff_t t_ID = fluid->t_ID[i_dst];
      double t_r0 = fluid->t_r0[i_dst];
      double t_r1 = fluid->t_r1[i_dst];

      do {
        fluid->t_ID[i_dst] = fluid->t_ID[i_src];
        fluid->t_r0[i_dst] = fluid->t_r0[i_src];
        fluid->t_r1[i_dst] = fluid->t_r1[i_src];
        index[i_dst] = i_dst;

        i_dst = i_src;
        i_src = index[i_src];
      } while (i_src != i_dst_first);

      fluid->t_ID[i_dst] = t_ID;
      fluid->t_r0[i_dst] = t_r0;
      fluid->t_r1[i_dst] = t_r1;
      index[i_dst] = i_dst;
    }
  }
}
 
void deallocate_memory (myfluid *fluid) {
  free (fluid->u0);
  free (fluid->u1);
  free (fluid->t_ID);
  free (fluid->t_r0);
  free (fluid->t_r1);
}

int main (int argc, char **argv){
  myfluid fluid;

  fluid.n = 8;
  fluid.numTracers = 8;

  fluid.size = fluid.n * fluid.n;

  allocate_memory (&fluid);

  initialize (&fluid);

  // sort the tracers along r0 direction
  tracer_sort(&fluid, compare);

  printout (&fluid);

  deallocate_memory(&fluid);

  return 0;
}

Note: if variable length arrays (VLAs) are not supported, replace ptrdiff_t index[n]; in tracer_sort with dynamically allocated storage, which must be freed before returning from the function.

Note about complexity: The space complexity is O(n) due to the use of an auxiliary array of indices. The sorting of the indices has time complexity O(n log n) and the subsequent permutation of the various arrays according to the sorted indices has time complexity O(n), so the overall time complexity is O(n log n).

Sign up to request clarification or add additional context in comments.

4 Comments

Thanks a lot @Ian Abbott. I like your solution as it kept the time complexity same and increased only the space complexity from O(1) --> O(n). I will keep this in mind as my fluid structure swells to 3D with size 512 x 512 x 512, or even higher -- I am writing a parallel 3D turbulence solver.
I missed the portability issue with qsort_r that you pointed earlier. I am using macOS for coding at home, and my students with whom I will share this code are using linux! Wish qsort_r was standardized across platforms.
@ashwin The Austin Group have gone with the glibc qsort_r interface for the latest POSIX standard that was published a few days ago, but we shall have to see how macOS deals with it. FreeBSD's qsort_r changed to the glibc interface in FreeBSD 14.0.
@ashwin And of course the context can contain anything else you want, such as a vector to specify the sorting direction.
1

This is not directly possible with qsort, but you could map your structure of arrays to an array of structures, which you could sort with qsort, then map those values back into a structure of arrays. It seems roundabout because it is (and begs the question of why you're not simply storing an array of structures in the first place), but it is possible.

E.g.

#include <stdlib.h>
#include <stdio.h>

struct foo { 
    int a[10];
    int b[10];
};

struct pair { 
    int a;
    int b;
};

int cmp_pair(const void *a, const void *b) {
    const struct pair *c = a;
    const struct pair *d = b;

    if (c->a < d->a) return -1;
    if (c->a > d->a) return 1;
    if (c->b < d->b) return -1;
    if (c->b > d->b) return 1;

    return 0;
}

int main(void) {
    struct foo x = {
        {1, 5, 8, 2, 4, 5, 6, 7, 8, 0},
        {4, 8, 7, 6, 1, 2, 6, 0, 9, 2}
    };

    struct pair y[10];

    for (size_t i = 0; i < 10; i++) {
        y[i] = (struct pair){ .a = x.a[i], .b = x.b[i] };
    }

    qsort(y, 10, sizeof(struct pair), cmp_pair);

    for (size_t i = 0; i < 10; i++) {
        x.a[i] = y[i].a;
        x.b[i] = y[i].b;
    }

    for (size_t i = 0; i < 10; i++) {
        printf("%d, %d\n", x.a[i], x.b[i]);
    }
}

Output:

0, 2
1, 4
2, 6
4, 1
5, 2
5, 8
6, 6
7, 0
8, 7
8, 9

2 Comments

Thanks a lot @Chris. Will try this out. Quick question: Does the wrapper 'pair' increase the space complexity of this sorting? I am guessing it will, as you are copying the contents from 'foo' to 'pair' in the main (), right? If this is true, then this might present as a space constraint when my fluid structure becomes 3D with size 512 x 512 x 512, which is my eventual goal. I am doing all this for writing a 3D parallel turbulence solver.
Yes, this would be O(n) for space complexity.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.