I would like to scrape multiple URLs using Selenium. However, I still get only one URL scraped. What could be wrong with the code? Thank you!
def __init__(self):
#headless options
options = Options()
options.add_argument('--no-sandbox')
options.add_argument("--headless")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
self.driver = webdriver.Chrome('path',options=options)
def parse(self,response):
start_urls = [
'https://www.milieuproperties.com/search-results.aspx?paramb=ADVANCE%20SEARCH:%20Province%20(Western%20Cape),%20%20Area%20(Cape%20Town)',
'https://www.milieuproperties.com/search-results-rent.aspx?paramb=ADVANCE%20SEARCH:%20Province%20(Western%20Cape),%20Rental%20To%20'
]
links = []
for url in start_urls:
self.driver.get(url)
current_page_number = self.driver.find_element_by_css_selector('#ContentPlaceHolder1_lvDataPager1>span').text
while True:
links.extend([link.get_attribute('href') for link in self.driver.find_elements_by_css_selector('.hoverdetail a')])
try:
elem = WebDriverWait(self.driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="ContentPlaceHolder1_lvDataPager1"]/a[text()="Next" and not(@class)]')))
elem.click()
except TimeoutException:
break
WebDriverWait(self.driver, 10).until(lambda driver: self.driver.find_element_by_css_selector('#ContentPlaceHolder1_lvDataPager1>span').text != current_page_number)
current_page_number = self.driver.find_element_by_css_selector('#ContentPlaceHolder1_lvDataPager1>span').text