Skip to main content
# Set all csv attributes

    cache = {}
    in_file = open(sript_path + '/cp_updates/' + update_file, 'r')
    reader = csv.DictReader(in_file, delimiter= ',')
    out_file = open(sript_path + '/cp_updates/' + update_file + '.new', 'w')
    out_file.write("StockNumber,SKU,ChannelProfileID\n")
    writer = csv.DictWriter(out_file, fieldnames=('StockNumber', 'SKU', 'ChannelProfileID'), delimiter=',')
    check_file = open(sript_path + '/feeds/' + feed_file, 'r')
    ch_file_reader = csv.DictReader(check_file, delimiter=',')

    #loop through the csv's, find stock levels and update file

    for row in reader:
        #print row
        check_file.seek(0)
        found = False
        for ch_row in ch_file_reader:
            #if row['SKU'] not in cache:
            if ch_row['ProductCode'] == row[' Stock']:
                Stk_Lvl = int(ch_row[stk_lvl_header])
                if Stk_Lvl > 0:
                    res = 3746
                elif Stk_Lvl == 0:
                    res = 3745
                else:
                    res = " "
                found = True
                print ch_row
                print res
                    
                cache[row['SKU']] = res
        if not found:
            res = " "
            #print ch_row
            #print res
            cache[row['SKU']] = res     
        row['ChannelProfileID'] = cache[row['SKU']]
        writer.writerow(row) 

    
# Set all csv attributes

    cache = {}
    in_file = open(sript_path + '/cp_updates/' + update_file, 'r')
    reader = csv.DictReader(in_file, delimiter= ',')
    out_file = open(sript_path + '/cp_updates/' + update_file + '.new', 'w')
    out_file.write("StockNumber,SKU,ChannelProfileID\n")
    writer = csv.DictWriter(out_file, fieldnames=('StockNumber', 'SKU', 'ChannelProfileID'), delimiter=',')
    check_file = open(sript_path + '/feeds/' + feed_file, 'r')
    ch_file_reader = csv.DictReader(check_file, delimiter=',')

    #loop through the csv's, find stock levels and update file

    for row in reader:
        #print row
        check_file.seek(0)
        found = False
        for ch_row in ch_file_reader:
            #if row['SKU'] not in cache:
            if ch_row['ProductCode'] == row[' Stock']:
                Stk_Lvl = int(ch_row[stk_lvl_header])
                if Stk_Lvl > 0:
                    res = 3746
                elif Stk_Lvl == 0:
                    res = 3745
                else:
                    res = " "
                found = True
                print ch_row
                print res
                    
                cache[row['SKU']] = res
        if not found:
            res = " "
            #print ch_row
            #print res
            cache[row['SKU']] = res     
        row['ChannelProfileID'] = cache[row['SKU']]
        writer.writerow(row)
    
# Set all csv attributes

cache = {}
in_file = open(sript_path + '/cp_updates/' + update_file, 'r')
reader = csv.DictReader(in_file, delimiter= ',')
out_file = open(sript_path + '/cp_updates/' + update_file + '.new', 'w')
out_file.write("StockNumber,SKU,ChannelProfileID\n")
writer = csv.DictWriter(out_file, fieldnames=('StockNumber', 'SKU', 'ChannelProfileID'), delimiter=',')
check_file = open(sript_path + '/feeds/' + feed_file, 'r')
ch_file_reader = csv.DictReader(check_file, delimiter=',')

#loop through the csv's, find stock levels and update file

for row in reader:
    #print row
    check_file.seek(0)
    found = False
    for ch_row in ch_file_reader:
        #if row['SKU'] not in cache:
        if ch_row['ProductCode'] == row[' Stock']:
            Stk_Lvl = int(ch_row[stk_lvl_header])
            if Stk_Lvl > 0:
                res = 3746
            elif Stk_Lvl == 0:
                res = 3745
            else:
                res = " "
            found = True
            print ch_row
            print res
                
            cache[row['SKU']] = res
    if not found:
        res = " "
        #print ch_row
        #print res
        cache[row['SKU']] = res     
    row['ChannelProfileID'] = cache[row['SKU']]
    writer.writerow(row) 

    
deleted 284 characters in body; edited tags; edited title
Source Link
Jamal
  • 35.2k
  • 13
  • 134
  • 238

searching Searching for a value from one csvCSV file in another csvCSV file - Python

I am writing a script that takes one csvCSV file searches for a value in another csvCSV file then writes an output depending on the result it finds.

I have been using python's csv.Python's CSV Distreader and writer, I. I have it working, but it is very inefficient because it is looping through the 2 setssets of data until it finds a result.

this is what i have so far, thereThere are a few bits in the code which are specific to my setup (file locations etc), but I'm sure people can see around this;this.

# Set all csv atributesattributes

    cache = {}
    in_file = open(sript_path + '/cp_updates/' + update_file, 'r')
    reader = csv.DictReader(in_file, delimiter= ',')
    out_file = open(sript_path + '/cp_updates/' + update_file + '.new', 'w')
    out_file.write("StockNumber,SKU,ChannelProfileID\n")
    writer = csv.DictWriter(out_file, fieldnames=('StockNumber', 'SKU', 'ChannelProfileID'), delimiter=',')
    check_file = open(sript_path + '/feeds/' + feed_file, 'r')
    ch_file_reader = csv.DictReader(check_file, delimiter=',')

    #loop through the csv's, find stock levels and update file

    for row in reader:
        #print row
        check_file.seek(0)
        found = False
        for ch_row in ch_file_reader:
            #if row['SKU'] not in cache:
            if ch_row['ProductCode'] == row[' Stock']:
                Stk_Lvl = int(ch_row[stk_lvl_header])
                if Stk_Lvl > 0:
                    res = 3746
                elif Stk_Lvl == 0:
                    res = 3745
                else:
                    res = " "
                found = True
                print ch_row
                print res
                    
                cache[row['SKU']] = res
        if not found:
            res = " "
            #print ch_row
            #print res
            cache[row['SKU']] = res     
        row['ChannelProfileID'] = cache[row['SKU']]
        writer.writerow(row)
    

This is a few lines from my in_filein_file and also the outfile is the same structure, it. It just updates the ChannelProfileIDChannelProfileID depending on the results found;found.

"StockNumber","SKU","ChannelProfileID"
"10m_s-vid#APTIIAMZ","2VV-10",3746
"10m_s-vid#CSE","2VV-10",3746
"1RR-01#CSE","1RR-01",3746
"1RR-01#PCAWS","1RR-01",3746
"1m_s-vid_ext#APTIIAMZ","2VV-101",3746
"StockNumber","SKU","ChannelProfileID"
"10m_s-vid#APTIIAMZ","2VV-10",3746
"10m_s-vid#CSE","2VV-10",3746
"1RR-01#CSE","1RR-01",3746
"1RR-01#PCAWS","1RR-01",3746
"1m_s-vid_ext#APTIIAMZ","2VV-101",3746

This is a few line from the check_file;check_file:

ProductCode, Description, Supplier, CostPrice, RRPPrice, Stock, Manufacturer, SupplierProductCode, ManuCode, LeadTime
2VV-03,3MTR BLACK SVHS M - M GOLD CABLE - B/Q 100,Cables Direct Ltd,0.43,,930,CDL,2VV-03,2VV-03,1
2VV-05,5MTR BLACK SVHS M - M GOLD CABLE - B/Q 100,Cables Direct Ltd,0.54,,1935,CDL,2VV-05,2VV-05,1
2VV-10,10MTR BLACK SVHS M - M GOLD CABLE - B/Q 50,Cables Direct Ltd,0.86,,1991,CDL,2VV-10,2VV-10,1
ProductCode, Description, Supplier, CostPrice, RRPPrice, Stock, Manufacturer, SupplierProductCode, ManuCode, LeadTime
2VV-03,3MTR BLACK SVHS M - M GOLD CABLE - B/Q 100,Cables Direct Ltd,0.43,,930,CDL,2VV-03,2VV-03,1
2VV-05,5MTR BLACK SVHS M - M GOLD CABLE - B/Q 100,Cables Direct Ltd,0.54,,1935,CDL,2VV-05,2VV-05,1
2VV-10,10MTR BLACK SVHS M - M GOLD CABLE - B/Q 50,Cables Direct Ltd,0.86,,1991,CDL,2VV-10,2VV-10,1

So youYou can see it selects the first line from the in_filein_file, looks up the SKU in the check_file then writes the out_fileout_file in the same format as the in_file changing the ChannelProfileIDChannelProfileID depending what it finds in the Stock field of the check_filecheck_file, it then goes back to the first line in the check_filecheck_file and performs the same on the next line on the in_filein_file.

As I say this script is working and outputs exactly what I want, but I believe it is slow and inefficient due to having to keep loop through the check_filecheck_file until it finds a result.

What i'mI'm after is suggestions on how to improve the efficiency, I'm. I'm guessing there's a better way to find the data than to keep looping through the check_file!??

I am a beginner at python and this is my first post, so I apologize in advance for anything incorrect or unclear, I think I have posted according to forum rules and have clearly set out what my script is doing and the help I am after!

Thanks in advance for any help/suggestionscheck_file.

searching a value from one csv file in another csv file - Python

I am writing a script that takes one csv file searches a value in another csv file then writes an output depending on the result it finds.

I have been using python's csv. Distreader and writer, I have it working, but it is very inefficient because it is looping through the 2 sets of data until it finds a result.

this is what i have so far, there are a few bits in the code which are specific to my setup (file locations etc) but I'm sure people can see around this;

# Set all csv atributes

    cache = {}
    in_file = open(sript_path + '/cp_updates/' + update_file, 'r')
    reader = csv.DictReader(in_file, delimiter= ',')
    out_file = open(sript_path + '/cp_updates/' + update_file + '.new', 'w')
    out_file.write("StockNumber,SKU,ChannelProfileID\n")
    writer = csv.DictWriter(out_file, fieldnames=('StockNumber', 'SKU', 'ChannelProfileID'), delimiter=',')
    check_file = open(sript_path + '/feeds/' + feed_file, 'r')
    ch_file_reader = csv.DictReader(check_file, delimiter=',')

    #loop through the csv's, find stock levels and update file

    for row in reader:
        #print row
        check_file.seek(0)
        found = False
        for ch_row in ch_file_reader:
            #if row['SKU'] not in cache:
            if ch_row['ProductCode'] == row[' Stock']:
                Stk_Lvl = int(ch_row[stk_lvl_header])
                if Stk_Lvl > 0:
                    res = 3746
                elif Stk_Lvl == 0:
                    res = 3745
                else:
                    res = " "
                found = True
                print ch_row
                print res
                    
                cache[row['SKU']] = res
        if not found:
            res = " "
            #print ch_row
            #print res
            cache[row['SKU']] = res     
        row['ChannelProfileID'] = cache[row['SKU']]
        writer.writerow(row)
    

This is a few lines from my in_file and also the outfile is the same structure, it just updates the ChannelProfileID depending on the results found;

"StockNumber","SKU","ChannelProfileID"
"10m_s-vid#APTIIAMZ","2VV-10",3746
"10m_s-vid#CSE","2VV-10",3746
"1RR-01#CSE","1RR-01",3746
"1RR-01#PCAWS","1RR-01",3746
"1m_s-vid_ext#APTIIAMZ","2VV-101",3746

This is a few line from the check_file;

ProductCode, Description, Supplier, CostPrice, RRPPrice, Stock, Manufacturer, SupplierProductCode, ManuCode, LeadTime
2VV-03,3MTR BLACK SVHS M - M GOLD CABLE - B/Q 100,Cables Direct Ltd,0.43,,930,CDL,2VV-03,2VV-03,1
2VV-05,5MTR BLACK SVHS M - M GOLD CABLE - B/Q 100,Cables Direct Ltd,0.54,,1935,CDL,2VV-05,2VV-05,1
2VV-10,10MTR BLACK SVHS M - M GOLD CABLE - B/Q 50,Cables Direct Ltd,0.86,,1991,CDL,2VV-10,2VV-10,1

So you can see it selects the first line from the in_file, looks up the SKU in the check_file then writes the out_file in the same format as the in_file changing the ChannelProfileID depending what it finds in the Stock field of the check_file, it then goes back to the first line in the check_file and performs the same on the next line on the in_file.

As I say this script is working and outputs exactly what I want, but I believe is slow and inefficient due to having to keep loop through the check_file until it finds a result.

What i'm after is suggestions on how to improve the efficiency, I'm guessing there's a better way to find the data than keep looping through the check_file!??

I am a beginner at python and this is my first post, so I apologize in advance for anything incorrect or unclear, I think I have posted according to forum rules and have clearly set out what my script is doing and the help I am after!

Thanks in advance for any help/suggestions.

Searching for a value from one CSV file in another CSV file

I am writing a script that takes one CSV file searches for a value in another CSV file then writes an output depending on the result it finds.

I have been using Python's CSV Distreader and writer. I have it working, but it is very inefficient because it is looping through the 2 sets of data until it finds a result.

There are a few bits in the code which are specific to my setup (file locations etc), but I'm sure people can see around this.

# Set all csv attributes

    cache = {}
    in_file = open(sript_path + '/cp_updates/' + update_file, 'r')
    reader = csv.DictReader(in_file, delimiter= ',')
    out_file = open(sript_path + '/cp_updates/' + update_file + '.new', 'w')
    out_file.write("StockNumber,SKU,ChannelProfileID\n")
    writer = csv.DictWriter(out_file, fieldnames=('StockNumber', 'SKU', 'ChannelProfileID'), delimiter=',')
    check_file = open(sript_path + '/feeds/' + feed_file, 'r')
    ch_file_reader = csv.DictReader(check_file, delimiter=',')

    #loop through the csv's, find stock levels and update file

    for row in reader:
        #print row
        check_file.seek(0)
        found = False
        for ch_row in ch_file_reader:
            #if row['SKU'] not in cache:
            if ch_row['ProductCode'] == row[' Stock']:
                Stk_Lvl = int(ch_row[stk_lvl_header])
                if Stk_Lvl > 0:
                    res = 3746
                elif Stk_Lvl == 0:
                    res = 3745
                else:
                    res = " "
                found = True
                print ch_row
                print res
                    
                cache[row['SKU']] = res
        if not found:
            res = " "
            #print ch_row
            #print res
            cache[row['SKU']] = res     
        row['ChannelProfileID'] = cache[row['SKU']]
        writer.writerow(row)
    

This is a few lines from my in_file and also the outfile is the same structure. It just updates the ChannelProfileID depending on the results found.

"StockNumber","SKU","ChannelProfileID"
"10m_s-vid#APTIIAMZ","2VV-10",3746
"10m_s-vid#CSE","2VV-10",3746
"1RR-01#CSE","1RR-01",3746
"1RR-01#PCAWS","1RR-01",3746
"1m_s-vid_ext#APTIIAMZ","2VV-101",3746

This is a few line from the check_file:

ProductCode, Description, Supplier, CostPrice, RRPPrice, Stock, Manufacturer, SupplierProductCode, ManuCode, LeadTime
2VV-03,3MTR BLACK SVHS M - M GOLD CABLE - B/Q 100,Cables Direct Ltd,0.43,,930,CDL,2VV-03,2VV-03,1
2VV-05,5MTR BLACK SVHS M - M GOLD CABLE - B/Q 100,Cables Direct Ltd,0.54,,1935,CDL,2VV-05,2VV-05,1
2VV-10,10MTR BLACK SVHS M - M GOLD CABLE - B/Q 50,Cables Direct Ltd,0.86,,1991,CDL,2VV-10,2VV-10,1

You can see it selects the first line from the in_file, looks up the SKU in the check_file then writes the out_file in the same format as the in_file changing the ChannelProfileID depending what it finds in the Stock field of the check_file, it then goes back to the first line in the check_file and performs the same on the next line on the in_file.

As I say this script is working and outputs exactly what I want, but I believe it is slow and inefficient due to having to keep loop through the check_file until it finds a result.

What I'm after is suggestions on how to improve the efficiency. I'm guessing there's a better way to find the data than to keep looping through the check_file.

Tweeted twitter.com/#!/StackCodeReview/status/150265794020913153
Post Migrated Here from stackoverflow.com (revisions)
Source Link
gingebot
  • 73
  • 1
  • 1
  • 4

searching a value from one csv file in another csv file - Python

I am writing a script that takes one csv file searches a value in another csv file then writes an output depending on the result it finds.

I have been using python's csv. Distreader and writer, I have it working, but it is very inefficient because it is looping through the 2 sets of data until it finds a result.

this is what i have so far, there are a few bits in the code which are specific to my setup (file locations etc) but I'm sure people can see around this;

# Set all csv atributes

    cache = {}
    in_file = open(sript_path + '/cp_updates/' + update_file, 'r')
    reader = csv.DictReader(in_file, delimiter= ',')
    out_file = open(sript_path + '/cp_updates/' + update_file + '.new', 'w')
    out_file.write("StockNumber,SKU,ChannelProfileID\n")
    writer = csv.DictWriter(out_file, fieldnames=('StockNumber', 'SKU', 'ChannelProfileID'), delimiter=',')
    check_file = open(sript_path + '/feeds/' + feed_file, 'r')
    ch_file_reader = csv.DictReader(check_file, delimiter=',')

    #loop through the csv's, find stock levels and update file

    for row in reader:
        #print row
        check_file.seek(0)
        found = False
        for ch_row in ch_file_reader:
            #if row['SKU'] not in cache:
            if ch_row['ProductCode'] == row[' Stock']:
                Stk_Lvl = int(ch_row[stk_lvl_header])
                if Stk_Lvl > 0:
                    res = 3746
                elif Stk_Lvl == 0:
                    res = 3745
                else:
                    res = " "
                found = True
                print ch_row
                print res
                    
                cache[row['SKU']] = res
        if not found:
            res = " "
            #print ch_row
            #print res
            cache[row['SKU']] = res     
        row['ChannelProfileID'] = cache[row['SKU']]
        writer.writerow(row)
    

This is a few lines from my in_file and also the outfile is the same structure, it just updates the ChannelProfileID depending on the results found;

"StockNumber","SKU","ChannelProfileID"
"10m_s-vid#APTIIAMZ","2VV-10",3746
"10m_s-vid#CSE","2VV-10",3746
"1RR-01#CSE","1RR-01",3746
"1RR-01#PCAWS","1RR-01",3746
"1m_s-vid_ext#APTIIAMZ","2VV-101",3746

This is a few line from the check_file;

ProductCode, Description, Supplier, CostPrice, RRPPrice, Stock, Manufacturer, SupplierProductCode, ManuCode, LeadTime
2VV-03,3MTR BLACK SVHS M - M GOLD CABLE - B/Q 100,Cables Direct Ltd,0.43,,930,CDL,2VV-03,2VV-03,1
2VV-05,5MTR BLACK SVHS M - M GOLD CABLE - B/Q 100,Cables Direct Ltd,0.54,,1935,CDL,2VV-05,2VV-05,1
2VV-10,10MTR BLACK SVHS M - M GOLD CABLE - B/Q 50,Cables Direct Ltd,0.86,,1991,CDL,2VV-10,2VV-10,1

So you can see it selects the first line from the in_file, looks up the SKU in the check_file then writes the out_file in the same format as the in_file changing the ChannelProfileID depending what it finds in the Stock field of the check_file, it then goes back to the first line in the check_file and performs the same on the next line on the in_file.

As I say this script is working and outputs exactly what I want, but I believe is slow and inefficient due to having to keep loop through the check_file until it finds a result.

What i'm after is suggestions on how to improve the efficiency, I'm guessing there's a better way to find the data than keep looping through the check_file!??

I am a beginner at python and this is my first post, so I apologize in advance for anything incorrect or unclear, I think I have posted according to forum rules and have clearly set out what my script is doing and the help I am after!

Thanks in advance for any help/suggestions.