I have a webpage that displays some products, and I need to go to each of these products and obtain the table data under the tab called technical details, and get this data into one big table in excel. I wrote the following code, but I seem to get a blank excel file. Where is it going wrong?
import requests
import xlsxwriter
from bs4 import BeautifulSoup
def cpap_spider(url):
global row_i
source_code = requests.get(url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text, 'html.parser')
for link in soup.findAll('td', {'class': 'name name2_padd'}):
href = link.get('href')
title = link.string
worksheet.write(row_i, 0, title)
each_item(href)
print(href)
def each_item(item_url):
global cols_names, row_i
source_code = requests.get(item_url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text, 'html.parser')
table = soup.find('table', {'class': 'width_table'})
if table:
table_rows = table.find_all('tr')
else:
return
for row in table_rows:
cols = row.select('td')
for ele in range(0, len(cols)):
temp = cols[ele].text.strip()
if temp:
if temp[-1] == ':':
temp = temp[:-1]
# Name of column
if ele == 0:
try:
cols_names_i = cols_names.index(temp)
except:
cols_names.append(temp)
cols_names_i = len(cols_names) - 1
worksheet.write(0, cols_names_i + 1, temp)
continue;
worksheet.write(row_i, cols_names_i + 1, temp)
row_i += 1
cols_names = []
cols_names_i = 0
row_i = 1
workbook = xlsxwriter.Workbook('st.xlsx')
worksheet = workbook.add_worksheet()
worksheet.write(0, 0, 'Title')
cpap_spider('https://www.respshop.com/cpap-machines/manual/')
workbook.close()
