I wrote this code over the last few days and I learned a lot, and it works as I expect. However I suspect it's woefully inefficient:
import requests, bs4, os
os.chdir('../../Desktop')
wallpaperFolder = os.makedirs('Wallpapers', exist_ok=True)
pageCount = 1
GalleryUrl = 'https://wall.alphacoders.com/search.php?search=Game+Of+Thrones&page=%s' % (pageCount)
linkList = []
while not GalleryUrl.endswith('page=3'):
# Download the page
GalleryUrl = 'https://wall.alphacoders.com/search.php?search=Game+Of+Thrones&page=%s' % (pageCount)
print('Opening gallary... %s' % (GalleryUrl))
res = requests.get(GalleryUrl)
res.raise_for_status()
soup = bs4.BeautifulSoup(res.text, 'html.parser')
# find urls to image page
section = soup.find_all('div', attrs={'class': 'boxgrid'})
for images in section:
imageLink = images.find_all('a')
for a in imageLink:
imageUrl = a.get('href')
linkList.append(imageUrl)
print('Image found at... ' + imageUrl)
linkCount = 0
# Follow links and download images on page.
while linkCount != len(linkList):
print('Downloading image at... %s' % (str(linkList[linkCount])))
imagePageUrl = ('https://wall.alphacoders.com/' + str(linkList[linkCount]))
res = requests.get(imagePageUrl)
wallpaperSoup = bs4.BeautifulSoup(res.text, 'html.parser')
link = wallpaperSoup.find(id='main_wallpaper')
imageSrc = link['src']
# Save image to folder
res = requests.get(imageSrc)
imageFile = open(os.path.join(os.getcwd(), 'Wallpapers', os.path.basename(imageSrc)), 'wb')
for chunk in res.iter_content(12000):
imageFile.write(chunk)
imageFile.close()
linkCount += 1
# Move to next page and empty link list
pageCount += 1
del linkList[:]
print('Done')
Aside from cutting the print statements and simply letting the program do its thing, how could I optimize this code to run more efficiently?