Requests takes a dict of parameters and also has a json method, so this can be much cleaner.
import time
import requests
def scrape_site(self):
self.items = []
page = 1
with requests.Session() as s:
while True:
params = {
'page': page,
'limit': 250
}
try:
r = s.get(self.url, params=params, headers=self.headers, proxies=self.proxy, verify=False, timeout=20)
r.raise_for_status()
output = r.json()
if not output:
break
for product in output['products']:
product_item = {
'title': product['title'],
'image': product['images'][0]['src'],
'handle': product['handle'],
'variants':product['variants']
}
self.items.append(product_item)
logging.info(f'Successfully scraped page {page}')
page += 1
time.sleep(1)
except Exception as e:
logging.error(e)
break
return self.items