Compare commits
No commits in common. "30a9e4b3b68c6c394d269edfd4796992830eebb3" and "6325b485b2a5463e1149e19c8c4ef41cb2a14c0d" have entirely different histories.
30a9e4b3b6
...
6325b485b2
23
.gitignore
vendored
23
.gitignore
vendored
@ -1,23 +0,0 @@
|
|||||||
# Байт-код, оптимизированные / DLL-файлы
|
|
||||||
__pycache__/
|
|
||||||
*.py[cod]
|
|
||||||
|
|
||||||
# Файлы конфигурации для venv
|
|
||||||
venv/
|
|
||||||
.env
|
|
||||||
|
|
||||||
# Файлы дистрибутивов и артефакты упаковки
|
|
||||||
dist/
|
|
||||||
*.egg-info/
|
|
||||||
|
|
||||||
# Файлы покрытия, кэши и прочие ненужные файлы
|
|
||||||
.coverage
|
|
||||||
*.py,cover
|
|
||||||
.cache/
|
|
||||||
|
|
||||||
# Файлы *.py,cover не несут никакого смысла
|
|
||||||
*.py,cover
|
|
||||||
|
|
||||||
# Контрольные точки в Jupyter notebook лучше оставить в стороне
|
|
||||||
.ipynb_checkpoints
|
|
||||||
|
|
||||||
72
bot.py
72
bot.py
@ -16,6 +16,7 @@ from aiogram.types import InlineKeyboardMarkup, InlineKeyboardButton, FSInputFil
|
|||||||
|
|
||||||
# Импорт только нужного
|
# Импорт только нужного
|
||||||
from config import CONFIG
|
from config import CONFIG
|
||||||
|
from link_collector import collect_links
|
||||||
from main import process_batch, save_to_excel, load_urls
|
from main import process_batch, save_to_excel, load_urls
|
||||||
|
|
||||||
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
|
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
|
||||||
@ -329,6 +330,76 @@ async def edit_search_callback(query: CallbackQuery):
|
|||||||
|
|
||||||
# ==================== ОСНОВНЫЕ ДЕЙСТВИЯ ====================
|
# ==================== ОСНОВНЫЕ ДЕЙСТВИЯ ====================
|
||||||
|
|
||||||
|
async def scrape_callback(query: CallbackQuery):
|
||||||
|
await query.answer("🚀 Запуск скрейпинга...")
|
||||||
|
msg = await query.message.edit_text("⏳ <b>Выполняю полный скрейпинг...</b>", parse_mode="HTML")
|
||||||
|
|
||||||
|
last = [time.time()]
|
||||||
|
unique_phones = set()
|
||||||
|
|
||||||
|
async def progress_cb(done: int, total: int):
|
||||||
|
await simple_progress(msg, done, total, last, unique_phones)
|
||||||
|
|
||||||
|
try:
|
||||||
|
links = collect_links()
|
||||||
|
urls = load_urls(links)
|
||||||
|
if not urls:
|
||||||
|
await msg.answer("❌ Нет ссылок для обработки", reply_markup=main_menu_keyboard())
|
||||||
|
return
|
||||||
|
|
||||||
|
raw = await process_batch(urls, progress_callback=progress_cb, unique_phones=unique_phones) # Передаём unique_phones
|
||||||
|
|
||||||
|
seen = set()
|
||||||
|
unique = [r for r in raw if r[1] and r[2] not in seen and not seen.add(r[2])] # 🔥 Фикс
|
||||||
|
results = [(o, p, d, promo, "—") for o, p, d, promo in unique] # 🔥 Фикс распаковки
|
||||||
|
|
||||||
|
# Финальное обновление (unique_phones уже заполнен)
|
||||||
|
await progress_cb(len(urls), len(urls))
|
||||||
|
|
||||||
|
if results:
|
||||||
|
global last_output_file
|
||||||
|
fmt = CONFIG.get("output_format", "excel")
|
||||||
|
|
||||||
|
if fmt == "excel":
|
||||||
|
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
path = CONFIG["output_file"].format(timestamp=ts)
|
||||||
|
save_to_excel(results, path)
|
||||||
|
last_output_file = path
|
||||||
|
|
||||||
|
promo_cnt = sum(1 for r in results if r[3])
|
||||||
|
await query.message.answer(
|
||||||
|
f"✅ <b>Скрейпинг завершён!</b>\n"
|
||||||
|
f"📊 Обработано URL: {len(urls)}\n"
|
||||||
|
f"📞 Найдено телефонов: {len(results)}\n"
|
||||||
|
f"🎯 Из них promo: {promo_cnt}",
|
||||||
|
parse_mode="HTML"
|
||||||
|
)
|
||||||
|
await query.message.answer_document(FSInputFile(path), caption="📁 Результаты")
|
||||||
|
else:
|
||||||
|
# Текстовый вывод
|
||||||
|
text = format_results(results, fmt)
|
||||||
|
if text:
|
||||||
|
if len(text) > 3800:
|
||||||
|
with tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8', suffix='.txt', delete=False) as f:
|
||||||
|
f.write(text)
|
||||||
|
path = f.name
|
||||||
|
caption = {"phones": "📞 Номера", "domains": "🌐 Домены", "both": "📞+🌐 Результаты"}.get(fmt, "Результаты")
|
||||||
|
await query.message.answer_document(FSInputFile(path), caption=f"✅ {caption}")
|
||||||
|
Path(path).unlink()
|
||||||
|
else:
|
||||||
|
labels = {"phones": "📞", "domains": "🌐", "both": "📞+🌐"}
|
||||||
|
await query.message.answer(
|
||||||
|
f"✅ <b>{labels.get(fmt, '')} Результаты:</b>\n\n{text}",
|
||||||
|
parse_mode="HTML"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
await query.message.answer("⚠️ Нет данных для отображения")
|
||||||
|
else:
|
||||||
|
await query.message.answer("⚠️ Телефоны не найдены")
|
||||||
|
except Exception as e:
|
||||||
|
await query.message.answer(f"❌ Ошибка: {e}")
|
||||||
|
|
||||||
|
await query.message.answer("Что дальше?", reply_markup=main_menu_keyboard())
|
||||||
|
|
||||||
async def process_callback(query: CallbackQuery):
|
async def process_callback(query: CallbackQuery):
|
||||||
await query.answer()
|
await query.answer()
|
||||||
@ -445,6 +516,7 @@ def main_bot(token: str):
|
|||||||
|
|
||||||
dp.callback_query.register(menu_callback, F.data == "main_menu")
|
dp.callback_query.register(menu_callback, F.data == "main_menu")
|
||||||
dp.callback_query.register(status_handler, F.data == "status")
|
dp.callback_query.register(status_handler, F.data == "status")
|
||||||
|
dp.callback_query.register(scrape_callback, F.data == "scrape")
|
||||||
dp.callback_query.register(process_callback, F.data == "process")
|
dp.callback_query.register(process_callback, F.data == "process")
|
||||||
dp.callback_query.register(upload_links_callback, F.data == "upload_links")
|
dp.callback_query.register(upload_links_callback, F.data == "upload_links")
|
||||||
dp.callback_query.register(search_menu_callback, F.data == "search_menu")
|
dp.callback_query.register(search_menu_callback, F.data == "search_menu")
|
||||||
|
|||||||
@ -39,5 +39,5 @@ CONFIG = {'input_file': '',
|
|||||||
'search_template_url': 'https://ya.ru/search/?text={search}&lr={lr}',
|
'search_template_url': 'https://ya.ru/search/?text={search}&lr={lr}',
|
||||||
'region_lr': 35,
|
'region_lr': 35,
|
||||||
'region_name': 'Краснодар',
|
'region_name': 'Краснодар',
|
||||||
'search_file': 'search.txt'
|
'search_file': 'search.txt',
|
||||||
}
|
'profile_dir': 'C:\\Users\\Дмитрий\\chrome_profile_yandex'}
|
||||||
5
main.py
5
main.py
@ -23,6 +23,7 @@ import logging
|
|||||||
from httpx import PoolTimeout, ConnectTimeout, ReadTimeout, HTTPStatusError, RequestError
|
from httpx import PoolTimeout, ConnectTimeout, ReadTimeout, HTTPStatusError, RequestError
|
||||||
|
|
||||||
from config import CONFIG
|
from config import CONFIG
|
||||||
|
from link_collector import collect_links
|
||||||
|
|
||||||
# Suppress httpx info logs
|
# Suppress httpx info logs
|
||||||
logging.getLogger("httpx").setLevel(logging.WARNING)
|
logging.getLogger("httpx").setLevel(logging.WARNING)
|
||||||
@ -366,6 +367,10 @@ async def main():
|
|||||||
|
|
||||||
promo_only = args.promo_only
|
promo_only = args.promo_only
|
||||||
|
|
||||||
|
try:
|
||||||
|
collected_links = collect_links()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Ошибка в collect_links: {e}. Продолжаем без собранных ссылок.")
|
||||||
collected_links = []
|
collected_links = []
|
||||||
|
|
||||||
urls = load_urls(collected_links + args.urls)
|
urls = load_urls(collected_links + args.urls)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user