43 lines
2.0 KiB
Python
43 lines
2.0 KiB
Python
# config.py
|
|
import httpx
|
|
# 🔧 КОНФИГУРАЦИЯ
|
|
CONFIG = {'input_file': '',
|
|
'output_file': 'C:\\Coding\\auto-scraper\\output\\phones_{timestamp}.xlsx',
|
|
'log_file': 'C:\\Coding\\auto-scraper\\logs\\scraper.log',
|
|
'excluded_domains': {'auto.drom.ru',
|
|
'auto.ru',
|
|
'autocompass-j.ru',
|
|
'autocompass-v.ru',
|
|
'avito.ru',
|
|
'duckduckgo.com',
|
|
'google.com',
|
|
'sberauto.com',
|
|
'sberleasing.ru'},
|
|
'urls': [],
|
|
'output_format': 'both',
|
|
'http': {'timeout': httpx.Timeout(10.0, connect=5.0),
|
|
'max_redirects': 5,
|
|
'retry_attempts': 3,
|
|
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like '
|
|
'Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
'headers': {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
'Accept-Language': 'ru-RU,ru;q=0.9,en;q=0.8',
|
|
'Referer': 'https://yandex.ru/',
|
|
'Sec-Fetch-Dest': 'document',
|
|
'Sec-Fetch-Mode': 'navigate'}},
|
|
'phone': {'patterns': ['href=["\\\']tel:([^"\\\']+)["\\\']',
|
|
'tel["\\\']?\\s*[:=]\\s*["\\\']?([+()0-9\\-\\s]{10,})["\\\']?',
|
|
'(?:телефон|phone|контакт)["\\\']?\\s*[:=]?\\s*["\\\']?([+()0-9\\-\\s]{10,})'],
|
|
'country_code': '7',
|
|
'min_digits': 10,
|
|
'max_digits': 12},
|
|
'required_keywords': ['Краснодар', 'краснодар'],
|
|
'stop_keywords': ['аренда', 'АРЕНДА', 'Аренда', '2311373680', 'autocompass'],
|
|
'headless': False,
|
|
'search_pages': 3,
|
|
'workers': 3,
|
|
'search_template_url': 'https://ya.ru/search/?text={search}&lr={lr}',
|
|
'region_lr': 35,
|
|
'region_name': 'Краснодар',
|
|
'search_file': 'search.txt',
|
|
'profile_dir': 'C:\\Users\\Дмитрий\\chrome_profile_yandex'} |