I was making a simple homework in which I had to develop a software in C to find the two nearest points between 100 of them.
When I finished, I was curious to see how much time it would take to run it with a lot more points and with full VC++ optimizations enabled. I tried with 10000 and it took about 8~9 seconds. Then I was curious to see how much time C# and Java would take to do the same thing. As expected, C# took a little longer, 9~10 seconds; Java, however, took only ~400 milliseconds! Why does this happen?!
This is my code in C, C# and Java:
C:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <Windows.h>
long perfFrequency = 0;
typedef struct
{
    double X;
    double Y;
} Point;
double distance(Point p1, Point p2)
{
    return sqrt(pow(p1.X - p2.X, 2) + pow(p1.Y - p2.Y, 2));
}
double smallerDistance(Point *points, int size, Point *smallerA, Point  *smallerB)
{
    int i, j;
    double smaller = distance(points[0], points[1]);
    for (i = 0; i < size; i++)
    {
        for (j = i + 1; j < size; j++)
        {
            double dist = distance(points[i], points[j]);
            if (dist < smaller)
            {
                smaller= dist;
                *smallerA = points[i];
                *smallerB = points[j];
            }
        }
    }
    return smaller;
}
void main()
{
    // read size and points from file.
    int size;
    Point *points= (Point *)malloc(size * sizeof(Point));
    // just to make sure everything is ready before the benchmark begins    
    system("pause");
    Point smallerA, smallerB;
    if (!QueryPerformanceFrequency((LARGE_INTEGER *)&perfFrequency))
        printf("Couldn't query performance frequency.");
    long long start, end;   
    double smaller;
    QueryPerformanceCounter((LARGE_INTEGER *)&start);
    smaller= smallerDistance(points, size, &smallerA, &smallerB);
    QueryPerformanceCounter((LARGE_INTEGER *)&end);
    printf("The smaller distance is: %lf. The coordinates of the most close points are: (%lf, %lf) and (%lf, %lf). Time taken: %lfms\n",
        smaller, smallerA.X, smallerA.Y, smallerB.X, smallerB.Y, (end - start) * 1000.0 / perfFrequency);
}
C#:
using System;
using System.IO;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Diagnostics;
namespace StructuredTest
{
    struct Point
    {
        public double X;
        public double Y;
    }
    class Program
    {
        static double Distance(Point p1, Point p2)
        {
            return Math.Sqrt(Math.Pow(p1.X - p2.X, 2) + Math.Pow(p1.Y - p2.Y, 2));
        }
        static double SmallerDistance(Point[] points, int size, out Point smallerA, out Point smallerB)
        {
            int i, j;
            double smaller = Distance(points[0], points[1]);
            smallerA = default(Point);
            smallerB = default(Point);
            for (i = 0; i < size; i++)
            {
                for (j = i + 1; j < size; j++)
                {
                    double dist = Distance(points[i], points[j]);
                    if (dist < smaller)
                    {
                        smaller = dist;
                        smallerA = points[i];
                        smallerB = points[j];
                    }
                }
            }
            return smaller;
        }
        static void Main(string[] args)
        {
            // read size and points from file 
            int size = int.Parse(file[0]);
            Point[] points= new Point[size];                   
            // make sure everything is ready
            Console.WriteLine("Press any key to continue...");
            Console.ReadKey(true);
            Point smallerA, smallerB;
            double smaller;
            Stopwatch sw = new Stopwatch();
            sw.Restart();
            smaller = SmallerDistance(points, size, out smallerA, out smallerB);
            sw.Stop();
            Console.WriteLine($"The smaller distance is: {smaller}. The coordinates of the most close points are: ({smallerA.X}, {smallerA.Y}) and " +
                $"({smallerB.X}, {smallerB.Y}). Time taken: {sw.ElapsedMilliseconds}ms.");
        }
    }
}
Java:
package structuredtest;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
class Point {
    public Point(double X, double Y) {
        this.X = X;
        this.Y = Y;
    }
    double X;
    double Y;
}
class Result {
    double distance;
    Point p1;
    Point p2;
}
public class StructuredTest {
    static double distance(Point p1, Point p2) {
        return Math.sqrt(Math.pow(p1.X - p2.X, 2) + Math.pow(p1.Y - p2.Y, 2));
    }
    static Result smallerDistance(Point[] points, int size) {
        int i, j;
        double smaller = distance(points[0], points[1]);
        Result r = new Result();
        for (i = 0; i < size; i++) {
            for (j = i + 1; j < size; j++) {
                double dist = distance(points[i], points[j]);
                if (dist < smaller) {
                    smaller = dist;
                    r.p1 = points[i];
                    r.p2 = points[j];
                }
            }
        }
        r.distance = smaller;
        return r;
    }
    public static void main(String[] args) throws IOException {
        // read size and points from file
        int size = Integer.parseInt(file[0]);
        Point[] points = new Point[size];
        // make sure everything is ready    
        System.out.println("Press any key to continue...");
        System.in.read();
        double start = System.nanoTime(), end;
        Result r = smallerDistance(points, size);
        end = System.nanoTime();
        System.out.println("The smaller distance is: " + r.distance + ". The most close points are: ("
                + r.p1.X + "," + r.p1.Y + ") and " + r.p2.X + "," + r.p2.Y + "). Time taken: " + (end - start) / 1000000 + "ms.");
    }
}
If java had beaten both C and C# by a small margin I wouldn't be surprised, but 20 times faster?!
The file is in the following format:
3 // number of points in the file. Note that there no comments in the actual file
(3.7098722472288, 4.49056397953787) // point (X,Y)
(8.90232811621332, 9.67982769279173)
(5.68254334818822, 1.71918922506136)
(6.22585901842366, 9.51660500242835)
Funny thing: At first, the file with 10000 points that I mentioned earlier, which I was using to benchmark, was actually just a 100 times copy paste of another file with 100 random points. Like this:
(Point 1)
(Point 2)
(Point 3)
(Point 1)
(Point 2)
(Point 3)
I thought that there was no need of generating 10000 random points because as the code has to run through all of the numbers anyway, it would make little difference (only more assignments). But then I decided to generate 10000 random points to see how they would react: both C and C# still ran in about the same time (~50ms increase); Java, on the other hand, had a increase of ~500ms.
Also, I think it is worth noting that java takes about 11 seconds when running inside NetBeans (even in "Run" mode, not "Debug").
And I also tried compiling as C++ instead of C, but it made no difference.
I'm using VS 2015 for both C and C#.
These are the settings for each language:
C:
x64
Optimization: Maximize Speed (/O2)
Intrinsic Functions: Yes (/Oi)
Favor Size or Speed: Favor fast code (/Ot)
Enable Fiber-Safe Optimizations: Yes (/GT)
Security Check: Disable Security Check (/GS-)
Floating point model: Fast (/fp:fast)
Everything else: default
C#:
x64
Release Mode
Optimize Code: Enabled
Check for arithmetic overflow: Disabled
.NET 4.5.2 
Java:
JRE/JDK 1.8
Default settings (if there are any)
EDIT:
Okay, I re-did the tests, following the suggestions:
First off, I used the Result class/struct in both C and C#. The reason I used it in java but not in C/C# is because java can't pass by reference. 
Second, I now repeat the test in the main() function.
And thanks @Tony D for catching that bug! :)
I won't post the code because the changes are minor: simply implement exactly the java version in the other tests, that's what I did.
This time I tested with only 7000 Points (not 10000) and with only 30 iterations, because it is taking a long time to test and its quite late here.
The results didn't change much: C# took 5228ms in average, C 4424ms and Java 223ms. Java still wins by being 20 or more times faster.
Then I tried removing the calls to Math.Pow (simply changing for ((p1.X - p2.X) * (p1.X - p2.X)) + ((p1.Y - p2.Y) * (p1.Y - p2.Y))), then everything changed. The new results:
Java: 220ms average
C#: 195ms average
C: 195ms average
If I only checked that before :p
As I commented, I thought about doing that, but then decided that it would be better to test each compiler's ability to inline functions and optimize this kind of simple calls. However, when I obtained those strange results, I should've gone back and done this, but I got so nervous that I forgot about doing it.
Anyway, to be really honest, I'm surprised that the Java compiler was able to completely optimize that line of code while C#'s and C++'s weren't. Although I know about the corner case checks and the internal calls on C#, I find it really interesting that the Java compiler was able to notice that no corner-case checks were necessary in that code whatsoever.

Math.pow(p1.X - p2.X, 2)call to a multiplication: you could explicitly change it to a multiplication in the C version and see if that accounts for the performance difference.QueryPerformanceCounteris also famously unreliable, depending on your exact OS version and hardware, though if your machine is otherwise idle and it doesn't flip the program across cores you shouldn't be affected by the bugs.Resultclass, but the C# and C++ versions do not? Are you sure you are comparing apples to apples? Is the output from these 3 programs equivalent?*smallerAand*smallerB). Avoiding pow in favour of manual multiplication: ~700ms.sqrt(pow(p1.X - p2.X, 2) + pow(p1.Y - p2.Y, 2))tosqrt((p1.X - p2.X)*(p1.X - p2.X) + (p1.Y - p2.Y)*(p1.Y - p2.Y));made it take 1/10 the time. (10000 random points, 4500mS down to 322mS) this is with 2015 community edition