MacOS_Parsers/Pars_Decathlon/parser (2).py

60 lines
2.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from json import load
from time import sleep
import cloudscraper
from os.path import abspath
# класс парсера с обходом защиты Cloudflare
class Parser:
def __init__(self, json_data):
self.proxies = {
"http": f'{json_data["proxy"]}',
"https": f'{json_data["proxy"]}'
} if json_data["proxy"] != "" else None
self.request_delay = json_data["request_delay"]
self.request_repeats = json_data["request_repeats"]
self.request_repeat_delay = json_data["request_repeat_delay"]
# Инициализация scraper с обходом защиты Cloudflare
self.scraper = cloudscraper.create_scraper()
if self.proxies:
self.scraper.proxies.update(self.proxies)
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)',
'Accept-Language': 'pl-PL,pl;q=0.9,en-US;q=0.8,en;q=0.7',
'Accept': 'text/html,application/xhtml+xml',
'Referer': 'https://www.google.com/'
}
def parse(self, url, method="GET", return_type="text"):
sleep(self.request_delay)
for i in range(self.request_repeats):
try:
if method == "GET":
response = self.scraper.get(url, headers=self.headers)
else:
response = self.scraper.post(url, headers=self.headers)
except Exception as error:
print(f"Request Error: {error} - {url}")
continue
if response.status_code == 200:
if return_type == "text":
return response.text
else:
return response.json()
else:
print(f"bad response, status code -> {response.status_code} - {url}")
if response.status_code == 404:
break
sleep(self.request_repeat_delay)
return None
# получение объекта Парсера с настройками из request_settings.json
def get_parser():
with open(abspath("request_settings.json"), "r", encoding="utf-8") as file:
return Parser(load(file))