from json import load from time import sleep import cloudscraper from os.path import abspath # класс парсера с обходом защиты Cloudflare class Parser: def __init__(self, json_data): self.proxies = { "http": f'{json_data["proxy"]}', "https": f'{json_data["proxy"]}' } if json_data["proxy"] != "" else None self.request_delay = json_data["request_delay"] self.request_repeats = json_data["request_repeats"] self.request_repeat_delay = json_data["request_repeat_delay"] # Инициализация scraper с обходом защиты Cloudflare self.scraper = cloudscraper.create_scraper() if self.proxies: self.scraper.proxies.update(self.proxies) self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', 'Accept-Language': 'pl-PL,pl;q=0.9,en-US;q=0.8,en;q=0.7', 'Accept': 'text/html,application/xhtml+xml', 'Referer': 'https://www.google.com/' } def parse(self, url, method="GET", return_type="text"): sleep(self.request_delay) for i in range(self.request_repeats): try: if method == "GET": response = self.scraper.get(url, headers=self.headers) else: response = self.scraper.post(url, headers=self.headers) except Exception as error: print(f"Request Error: {error} - {url}") continue if response.status_code == 200: if return_type == "text": return response.text else: return response.json() else: print(f"bad response, status code -> {response.status_code} - {url}") if response.status_code == 404: break sleep(self.request_repeat_delay) return None # получение объекта Парсера с настройками из request_settings.json def get_parser(): with open(abspath("request_settings.json"), "r", encoding="utf-8") as file: return Parser(load(file))