3

I was making a simple homework in which I had to develop a software in C to find the two nearest points between 100 of them.

When I finished, I was curious to see how much time it would take to run it with a lot more points and with full VC++ optimizations enabled. I tried with 10000 and it took about 8~9 seconds. Then I was curious to see how much time C# and Java would take to do the same thing. As expected, C# took a little longer, 9~10 seconds; Java, however, took only ~400 milliseconds! Why does this happen?!

This is my code in C, C# and Java:

C:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <Windows.h>

long perfFrequency = 0;

typedef struct
{
    double X;
    double Y;
} Point;

double distance(Point p1, Point p2)
{
    return sqrt(pow(p1.X - p2.X, 2) + pow(p1.Y - p2.Y, 2));
}

double smallerDistance(Point *points, int size, Point *smallerA, Point  *smallerB)
{
    int i, j;
    double smaller = distance(points[0], points[1]);

    for (i = 0; i < size; i++)
    {
        for (j = i + 1; j < size; j++)
        {
            double dist = distance(points[i], points[j]);
            if (dist < smaller)
            {
                smaller= dist;
                *smallerA = points[i];
                *smallerB = points[j];
            }
        }
    }
    return smaller;
}

void main()
{
    // read size and points from file.
    int size;
    Point *points= (Point *)malloc(size * sizeof(Point));

    // just to make sure everything is ready before the benchmark begins    
    system("pause");

    Point smallerA, smallerB;
    if (!QueryPerformanceFrequency((LARGE_INTEGER *)&perfFrequency))
        printf("Couldn't query performance frequency.");

    long long start, end;   
    double smaller;
    QueryPerformanceCounter((LARGE_INTEGER *)&start);

    smaller= smallerDistance(points, size, &smallerA, &smallerB);

    QueryPerformanceCounter((LARGE_INTEGER *)&end);

    printf("The smaller distance is: %lf. The coordinates of the most close points are: (%lf, %lf) and (%lf, %lf). Time taken: %lfms\n",
        smaller, smallerA.X, smallerA.Y, smallerB.X, smallerB.Y, (end - start) * 1000.0 / perfFrequency);

}

C#:

using System;
using System.IO;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Diagnostics;

namespace StructuredTest
{
    struct Point
    {
        public double X;
        public double Y;
    }

    class Program
    {
        static double Distance(Point p1, Point p2)
        {
            return Math.Sqrt(Math.Pow(p1.X - p2.X, 2) + Math.Pow(p1.Y - p2.Y, 2));
        }

        static double SmallerDistance(Point[] points, int size, out Point smallerA, out Point smallerB)
        {
            int i, j;
            double smaller = Distance(points[0], points[1]);
            smallerA = default(Point);
            smallerB = default(Point);

            for (i = 0; i < size; i++)
            {
                for (j = i + 1; j < size; j++)
                {
                    double dist = Distance(points[i], points[j]);
                    if (dist < smaller)
                    {
                        smaller = dist;
                        smallerA = points[i];
                        smallerB = points[j];
                    }
                }
            }

            return smaller;
        }

        static void Main(string[] args)
        {
            // read size and points from file 
            int size = int.Parse(file[0]);
            Point[] points= new Point[size];                   

            // make sure everything is ready
            Console.WriteLine("Press any key to continue...");
            Console.ReadKey(true);

            Point smallerA, smallerB;
            double smaller;

            Stopwatch sw = new Stopwatch();
            sw.Restart();

            smaller = SmallerDistance(points, size, out smallerA, out smallerB);

            sw.Stop();

            Console.WriteLine($"The smaller distance is: {smaller}. The coordinates of the most close points are: ({smallerA.X}, {smallerA.Y}) and " +
                $"({smallerB.X}, {smallerB.Y}). Time taken: {sw.ElapsedMilliseconds}ms.");

        }
    }
}

Java:

package structuredtest;

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;

class Point {

    public Point(double X, double Y) {
        this.X = X;
        this.Y = Y;
    }

    double X;
    double Y;
}

class Result {

    double distance;
    Point p1;
    Point p2;
}

public class StructuredTest {

    static double distance(Point p1, Point p2) {
        return Math.sqrt(Math.pow(p1.X - p2.X, 2) + Math.pow(p1.Y - p2.Y, 2));
    }

    static Result smallerDistance(Point[] points, int size) {
        int i, j;
        double smaller = distance(points[0], points[1]);
        Result r = new Result();

        for (i = 0; i < size; i++) {
            for (j = i + 1; j < size; j++) {
                double dist = distance(points[i], points[j]);
                if (dist < smaller) {
                    smaller = dist;
                    r.p1 = points[i];
                    r.p2 = points[j];
                }
            }
        }

        r.distance = smaller;
        return r;
    }

    public static void main(String[] args) throws IOException {
        // read size and points from file
        int size = Integer.parseInt(file[0]);
        Point[] points = new Point[size];

        // make sure everything is ready    
        System.out.println("Press any key to continue...");
        System.in.read();

        double start = System.nanoTime(), end;

        Result r = smallerDistance(points, size);

        end = System.nanoTime();

        System.out.println("The smaller distance is: " + r.distance + ". The most close points are: ("
                + r.p1.X + "," + r.p1.Y + ") and " + r.p2.X + "," + r.p2.Y + "). Time taken: " + (end - start) / 1000000 + "ms.");

    }

}

If java had beaten both C and C# by a small margin I wouldn't be surprised, but 20 times faster?!

The file is in the following format:

3 // number of points in the file. Note that there no comments in the actual file
(3.7098722472288, 4.49056397953787) // point (X,Y)
(8.90232811621332, 9.67982769279173)
(5.68254334818822, 1.71918922506136)
(6.22585901842366, 9.51660500242835)

Funny thing: At first, the file with 10000 points that I mentioned earlier, which I was using to benchmark, was actually just a 100 times copy paste of another file with 100 random points. Like this:

(Point 1)
(Point 2)
(Point 3)
(Point 1)
(Point 2)
(Point 3)

I thought that there was no need of generating 10000 random points because as the code has to run through all of the numbers anyway, it would make little difference (only more assignments). But then I decided to generate 10000 random points to see how they would react: both C and C# still ran in about the same time (~50ms increase); Java, on the other hand, had a increase of ~500ms.

Also, I think it is worth noting that java takes about 11 seconds when running inside NetBeans (even in "Run" mode, not "Debug").

And I also tried compiling as C++ instead of C, but it made no difference.

I'm using VS 2015 for both C and C#.

These are the settings for each language:

C:

x64
Optimization: Maximize Speed (/O2)
Intrinsic Functions: Yes (/Oi)
Favor Size or Speed: Favor fast code (/Ot)
Enable Fiber-Safe Optimizations: Yes (/GT)
Security Check: Disable Security Check (/GS-)
Floating point model: Fast (/fp:fast)
Everything else: default

C#:

x64
Release Mode
Optimize Code: Enabled
Check for arithmetic overflow: Disabled
.NET 4.5.2 

Java:

JRE/JDK 1.8
Default settings (if there are any)

EDIT:

Okay, I re-did the tests, following the suggestions:

First off, I used the Result class/struct in both C and C#. The reason I used it in java but not in C/C# is because java can't pass by reference. Second, I now repeat the test in the main() function. And thanks @Tony D for catching that bug! :)

I won't post the code because the changes are minor: simply implement exactly the java version in the other tests, that's what I did.

This time I tested with only 7000 Points (not 10000) and with only 30 iterations, because it is taking a long time to test and its quite late here.

The results didn't change much: C# took 5228ms in average, C 4424ms and Java 223ms. Java still wins by being 20 or more times faster.

Then I tried removing the calls to Math.Pow (simply changing for ((p1.X - p2.X) * (p1.X - p2.X)) + ((p1.Y - p2.Y) * (p1.Y - p2.Y))), then everything changed. The new results:

Java: 220ms average

C#: 195ms average

C: 195ms average

If I only checked that before :p

As I commented, I thought about doing that, but then decided that it would be better to test each compiler's ability to inline functions and optimize this kind of simple calls. However, when I obtained those strange results, I should've gone back and done this, but I got so nervous that I forgot about doing it.

Anyway, to be really honest, I'm surprised that the Java compiler was able to completely optimize that line of code while C#'s and C++'s weren't. Although I know about the corner case checks and the internal calls on C#, I find it really interesting that the Java compiler was able to notice that no corner-case checks were necessary in that code whatsoever.

15
  • 1
    Perhaps Java is optimising the Math.pow(p1.X - p2.X, 2) call to a multiplication: you could explicitly change it to a multiplication in the C version and see if that accounts for the performance difference. QueryPerformanceCounter is also famously unreliable, depending on your exact OS version and hardware, though if your machine is otherwise idle and it doesn't flip the program across cores you shouldn't be affected by the bugs. Commented Oct 30, 2015 at 3:56
  • 1
    Why does the java version have a Result class, but the C# and C++ versions do not? Are you sure you are comparing apples to apples? Is the output from these 3 programs equivalent? Commented Oct 30, 2015 at 3:57
  • 2
    Is this after the obligatory 40 sec warmup? Commented Oct 30, 2015 at 3:58
  • 2
    I've copied the program into VC++2012 - for 1000 points initialised with random numbers, it takes ~6.8ms on this PC. For 10k points, ~738ms. Release mode build with no care taken over other optimisation settings. (You have a bug: if not other points are closer together than points[0] and points[1], you never set *smallerA and *smallerB). Avoiding pow in favour of manual multiplication: ~700ms. Commented Oct 30, 2015 at 4:22
  • 2
    In fiddling with things, that which made the largest difference was changing sqrt(pow(p1.X - p2.X, 2) + pow(p1.Y - p2.Y, 2)) to sqrt((p1.X - p2.X)*(p1.X - p2.X) + (p1.Y - p2.Y)*(p1.Y - p2.Y)); made it take 1/10 the time. (10000 random points, 4500mS down to 322mS) this is with 2015 community edition Commented Oct 30, 2015 at 4:24

2 Answers 2

11

As explained here and checked here, in pure C there is no overload with integer power, like this one:

double pow(double base, int exponent );

It means that when you call pow in C, it is processed in a way similar to:

double pow(double base, double exponent) {
    return exp(log(base) * exponent);
}

Also there should be some check for the case of negative base and integer power, which is handled in special way. It is not a good idea to add conditions like if (exponent == 1.0) and if (exponent == 2.0) here, because it would slow down mathematical code that really uses pow for proper purposes. As a result, squaring becomes slower in twelve times or something like that.

In principle, the only reasonable way to optimize pow(x, 2) to x * x is to make compiler recognize such antipatterns and generate special code for them. It just happened so that your C and C# compilers with your settings are not able to do it, while Java compiler can do it. Note that it has nothing to do with inlining capabilities: just inlining exp(log(a) * b) would not make this code any faster.

As a conclusion, I'd like to note that no programmer good in writing mathematical code would ever write pow(x, 2) in his code.

Sign up to request clarification or add additional context in comments.

1 Comment

Also, note that using x*x allows it to be used in a constexpr in C++11 which is not true to cmath libraries.
0

Another thing I noticed is that in the C-Version, you have pointers to your "Point" structure. When you do the assignments *smallerA = points[i]; and *smallerB = points[j]; you dereference the target.

I.e. you don't change a pointer, as you do in the C# and Java codes. You copy the whole instance. That's twice as many move-operations.

1 Comment

The C# version uses a struct, and thus it also copies. You’re right about Java, though.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.