I have a csv file like this:
[email protected], 01-05-2014
[email protected], 01-05-2014
[email protected], 01-05-2014
[email protected], 01-05-2014
I need to read the above csv file and extract domain name and also the count of emails address by domain name and date as well. All these things I need to insert into MySQL database but somehow I am stuck how to insert into MySQL database after iterating the list I got.
Query will be like this:
INSERT INTO domains(domain_name, cnt, date_of_entry) VALUES (%s, %s, %s);
Below is the code
#!/usr/bin/python
import fileinput
import csv
import os
import sys
import MySQLdb
from collections import defaultdict
lst = defaultdict(list)
d_lst = defaultdict(list)
# ======================== Defined Functions ======================
def get_file_path(filename):
    currentdirpath = os.getcwd()  
    # get current working directory path
    filepath = os.path.join(currentdirpath, filename)
    return filepath
# ===========================================================
def read_CSV(filepath):
   domain_list = []
   domain_date_list = []
   sorted_domain_list_bydate = defaultdict(list)
   with open(filepath, 'rb') as csvfile:
       reader = csv.reader(csvfile)
       for row in reader:
          # insert the 1st & 2nd column of the CSV file into a set called input_list
           email = row[0].strip().lower()
           date  = row[1].strip()
           domain_date_list.append([date, email[ email.find("@") : ]])
           domain_list.append(email[ email.find("@") : ])
   for k, v in domain_date_list: 
         sorted_domain_list_bydate[k].append(v)
   # remove duplicates from domain list
   domain_list = list(set(domain_list))
   return sorted_domain_list_bydate, domain_list
# ===========================================================
def update_DB(lst):
    # open a database connection
    db = MySQLdb.connect(host="localhost", # your host, usually localhost
                         user="root", # your username
                          passwd="abcdef1234", # your password
                          db="test") # name of the data base
    cur = db.cursor() 
    a = []
    for k, v in lst.items():
        # now what should I do here?
        # this is what I am confuse
    db.commit()
    db.close()
# ==========================================================
# ======================= main program =======================================
path = get_file_path('emails.csv') 
[lst, d_lst] = read_CSV(path) # read the input file
update_DB(lst) # insert data into domains table
I am confuse in update_DB method.
lsthas sorted domain list by date.