I need to compare two large csv files.
But the thing is I have to iterate each line of file1 with all other lines of file2 and do some computation for different columns.
Part of my code that I tried in python:
import csv
def getOverlap(a,b):
    return max(0, min(a[1], b[1]) - max(a[0], b[0]))
masterlist = [row for row in c2]
for hosts_row in c1:
    chr1 = hosts_row[3]
    a1 = [int(hosts_row[4]),int(hosts_row[5])]
    found = False
    for master_row in masterlist:
        if hosts_row[7] == master_row[7]:
            c3.writerow(hosts_row)
            chr2 = master_row[3]
            b1 = [int(master_row[4]),int(master_row[5])]
            if getOverlap(a1,b1) != 0 and chr1 == chr2:
                c5.writerow(hosts_row)
            else:
                c6.writerow(hosts_row)
            found = True
            break
    if not found:
        c4.writerow(hosts_row)
        found2 = False
        for master_row2 in masterlist:
            chr2 = master_row[3]
            b1 = [int(master_row[4]),int(master_row[5])]
            if getOverlap(a1,b1) != 0 and chr1 == chr2:
                c7.writerow(hosts_row)
                found2 = True
                break
        if not found2:
            c8.writerow(hosts_row)
But it takes about 5 to 6 hours of running. Is there any quicker way for it. I have 16gb ram.