I wrote this code earlier to parse Chase.com's online transactions page. It's written in WinForms.
stepBtnis a button that starts this.wbis a WebBrowser that is already navigated to the page.
Can you give me advice? I'm primarily looking for optimizations, but any other advice would be great.
private void stepBtn_Click(object sender, EventArgs e)
{
List<string> date = new List<string>();
List<string> desc = new List<string>();
List<double> amt = new List<double>();
//Parse for date
var links = wb.Document.GetElementsByTagName("span");
foreach (HtmlElement link in links)
{
if (link.InnerText != null)
{
string inner = link.InnerText.Trim();
if (inner == "Pending" || Regex.IsMatch(inner, @"^(0[1-9]|1[012])[- \/.](0[1-9]|[12][0-9]|3[01])[- \/.](19|20)\d\d$"))
{
date.Add(inner);
}
}
}
//Parse for description
links = wb.Document.GetElementsByTagName("td");
foreach (HtmlElement link in links)
{
if (link.GetAttribute("classname") == "cellStyle")
{
desc.Add(link.InnerText);
}
}
//Parse for amount
//Assuming I never make a $1000+ purchase, every number < 1000 will be shown. That way, the balance is not included.
links = wb.Document.GetElementsByTagName("td");
int times = 0; //There are two $xxxx.xx before what I want to parse, the present and available balance.
foreach (HtmlElement link in links)
{
if (times > 2 && link.InnerText != null && link.InnerText.Trim().Length >= 5)
{
string inner = link.InnerText.Trim();
if (inner.Substring(0, 1) == "$" && Convert.ToDouble(inner.Substring(1)) < 1000)
{
amt.Add(Convert.ToDouble(inner.Substring(1)));
}
}
times++;
}
//Check to make sure all lists have the same length
if (date.Count != amt.Count || amt.Count != desc.Count)
{
MessageBox.Show("The three arrays do not have the same length.\n\nDate: " + date.Count + "\nAmt: " + amt.Count + "\nDesc: " + desc.Count, Text, MessageBoxButtons.OK, MessageBoxIcon.Error);
return;
}
//Output the arrays to the first checklist
for (int i = 0; i < date.Count; i++)
{
firstCheck.Items.Add(date[i] + ": " + desc[i] + " (" + amt[i] + ")");
}
}