I'm just looking for advice on how I can get my code to operate faster. It's pretty quick right now with searching through 30 3-page PDFs, but I imagine once there gets to be thousands of files to search that it will take longer than I'd like. I can change SearchOption.AllDirectories to TopDirectoryOnly. I've done some testing though and it seems like what takes the longest is the searching in the files not actually enumerating the directory.
public string ReadPdfFile(string fileName, String searchText)
{
List<int> pages = new List<int>();
if (File.Exists(fileName))
{
PdfReader pdfReader = new PdfReader(fileName);
for (int page = 1; page <= pdfReader.NumberOfPages; page++)
{
ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
string currentPageText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
if (currentPageText.Contains(searchText))
{
pages.Add(page);
}
}
pdfReader.Close();
}
if (pages.Count == 0)
return null;
else
return fileName;
}
protected void txtBoxSearchPDF_Click(object sender, EventArgs e)
{
if (txtBoxSearchString.Text == "")
{
lblNoSearchString.Visible = true;
}
else
{
lblNoSearchString.Visible = false;
var files = from file in Directory.EnumerateFiles(@"C:\schools\syllabus", "*.pdf", SearchOption.AllDirectories)
select new
{
File = file,
};
StringBuilder sb = new StringBuilder();
foreach (var f in files)
{
string fileNameOnly = string.Empty;
string pdfSearchMatch = ReadPdfFile(f.File, txtBoxSearchString.Text);
if (pdfSearchMatch != null)
{
string domainURL = Regex.Replace(pdfSearchMatch, @"C:\\schools\\syllabus", @"https://mywebsite.com/search/syllabus/");
string finalSyllabusURL = Regex.Replace(domainURL, " ", "%20");
fileNameOnly = Regex.Replace(domainURL, @"https://mywebsite.com/search/syllabus/", "");
string pdfHyperlink = @"<a href="+finalSyllabusURL+">"+fileNameOnly+"</a>";
sb.AppendLine(pdfHyperlink);
sb.AppendLine("<br>");
}
Regex regex = new Regex(txtBoxSearchString.Text, RegexOptions.IgnoreCase);
string domainURLfileName = Regex.Replace(f.File, @"C:\\schools\\syllabus", @"https://mywebsite.com/search/syllabus/");
string finalSyllabusURLfileName = Regex.Replace(domainURLfileName, " ", "%20");
string fileNameOnly2 = Regex.Replace(domainURLfileName, @"https://mywebsite.com/search/syllabus/", "");
string pdfHyperlinkMappedDrive = @"<a href=" + finalSyllabusURLfileName + ">" + fileNameOnly2 + "</a>";
if ((regex.IsMatch(fileNameOnly2)) && (fileNameOnly != fileNameOnly2))
{
sb.AppendLine(pdfHyperlinkMappedDrive);
sb.AppendLine("<br>");
}
else
{
//moving on
}
}
Panel1.Controls.Clear();
if (sb.ToString() != "")
{
Panel1.Attributes["style"] = "height: 222px;";
Panel1.Controls.Add(new LiteralControl(sb.ToString()));
lblNoSearchString.Visible = false;
}
else
{
string noResults = "No results matched the specified search string.";
Panel1.Attributes["style"] = "padding-left: 5px; height: 22px; padding-top: 2px;";
Panel1.Controls.Add(new LiteralControl(noResults));
lblNoSearchString.Visible = false;
}
}
}