I have created a small program that gets search_id from a csv file, and then uses that to scrape through a webform to write to another csv file (in this example product name and price.)
import csv
with open("file1.csv","rb+a") as file1_read:
r = csv.reader(file_read1, delimiter = ",")
for search_id in r:
# -- Logic for web scraping here, omitted --
with open("file_2.csv","a") as file2_write:
wr = csv.writer(file2_write, delimiter = ",")
wr.writerow(search_id)
wr.writerow(name)
wr.writerow(price)
Example – for 2 rows of search_id, gives 6 rows of data in 3 columns:
id01
Coffee
$4 $5 $3
id02
Soda
$2 $3 $4
The reason I get three cells for "price" is because I'm scraping for a price series.
Now, I would like to output this as so instead:
Coffee id01 $4
Coffee id01 $5
Coffee id01 $3
Soda id02 $4
Soda id02 $5
Soda id02 $3
Any ideas on how I can reassign the code to output to above format?
Update: Here is more of the code:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import re
import csv
from time import sleep
from itertools import *
# Omitted code here, navigates to search form on web page.
with open("file1.csv", "rb+a") as file1_read: # Open file1.csv containing column with searchkey (id).
r = csv.reader(file1_read, delimiter = ",")
for id in r:
searchkey = driver.find_element_by_name("searchkey")
searchkey.clear()
searchkey.send_keys(id)
print "\nSearching for %s ..." % id
sleep(9) # Sleep timer, allow for retrieving results
search_reply = driver.find_element_by_class_name("ac_results") # Get list, variable how many items and what their contents may be. Price and product names are extracted through regex.
price = re.findall("((?<=\()[0-9]*)", search_reply.text)
product_name = re.findall("(.*?)\(", search_reply.text)
with open("file2.csv", "a") as file2_write: # Write to file2.csv.
wr = csv.writer(file2_write, delimiter = ",")
wr.writerow(id)
wr.writerow(product_name)
wr.writerow(price)
driver.quit()