I've got a method that parses a file.  I take all the words and add them to a SortedSet.  Every word contains a list of Lines that contain said word.  Words are not strings but a class I created:
class Word : IComparable<Word>
{
    public Word()
    {
        Lines = new List<Line>();
    }
    public string WordStr { get; set; }
    public List<Line> Lines { get; set; }
    public int CompareTo(Word other)
    {
        return string.Compare(this.WordStr, other.WordStr, StringComparison.OrdinalIgnoreCase);
    }
}
And the method that parses the file (I suspect I am not doing something properly here):
private void AddFile(string path)
    {
        Regex regex = new Regex("[^A-Za-z0-9\\s\\']");
        FileInfo fi = new FileInfo(path);
        if (!fi.Exists || Files.Contains(path.ToLower())) //File does not exist or file already indexed
        {
            return;
        }
        Files.Add(path.ToLower());
        StreamReader sr = new StreamReader(path);
        string file = sr.ReadToEnd();
        string saniFile = regex.Replace(file, "");
        string[] saniLines = saniFile.Split(new char[]{'\r', '\n'}, StringSplitOptions.RemoveEmptyEntries);
        int lineNo = 1;
        foreach (var l in saniLines)
        {
            Line line = new Line(l, path, lineNo);
            string[] words = l.Split(' ');
            foreach (var word in words)
            {
                Word w = new Word();
                w.WordStr = word;
                if (Words.Contains(w, new WordComparer())) //Set already contains the word
                {
                    Word wordToAdd = (from wors in Words where wors.WordStr.ToLower() == w.WordStr.ToLower() select wors).First();
                    if (!wordToAdd.Lines.Contains(line))
                        wordToAdd.Lines.Add(line);
                }
                else
                {
                    w.Lines.Add(line);
                    Words.Add(w);
                }
            }
            lineNo++;
        }
    }
I have the exact same functionality working in C++ and it's orders of magnitudes faster.  So is there something I am doing incorrectly?  What if I used a SortedDictionary instead of a SortedSet for the words?  Then the key could be the string that is the word and the value would be the list of lines that contain that word.  
For reference, a 618KB text file takes a few seconds to parse and index in C++. It's taking me minutes to do it in C#.


foreachto aforif you can. I was trying to do a lot of small things really fast (few thousand in less than a second) andforeachadded a great deal of overhead (causing it to run more than 1 second). I don't know if that would apply in a longer running situation. But I do know in general,forwill be faster thanforeach. I've seen benchmarks on SO where it's upwards of 5x faster in some situations. \$\endgroup\$