Compare commits
No commits in common. "30a9e4b3b68c6c394d269edfd4796992830eebb3" and "6325b485b2a5463e1149e19c8c4ef41cb2a14c0d" have entirely different histories.
30a9e4b3b6
...
6325b485b2
23
.gitignore
vendored
23
.gitignore
vendored
@ -1,23 +0,0 @@
|
||||
# Байт-код, оптимизированные / DLL-файлы
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
# Файлы конфигурации для venv
|
||||
venv/
|
||||
.env
|
||||
|
||||
# Файлы дистрибутивов и артефакты упаковки
|
||||
dist/
|
||||
*.egg-info/
|
||||
|
||||
# Файлы покрытия, кэши и прочие ненужные файлы
|
||||
.coverage
|
||||
*.py,cover
|
||||
.cache/
|
||||
|
||||
# Файлы *.py,cover не несут никакого смысла
|
||||
*.py,cover
|
||||
|
||||
# Контрольные точки в Jupyter notebook лучше оставить в стороне
|
||||
.ipynb_checkpoints
|
||||
|
||||
72
bot.py
72
bot.py
@ -16,6 +16,7 @@ from aiogram.types import InlineKeyboardMarkup, InlineKeyboardButton, FSInputFil
|
||||
|
||||
# Импорт только нужного
|
||||
from config import CONFIG
|
||||
from link_collector import collect_links
|
||||
from main import process_batch, save_to_excel, load_urls
|
||||
|
||||
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
|
||||
@ -329,6 +330,76 @@ async def edit_search_callback(query: CallbackQuery):
|
||||
|
||||
# ==================== ОСНОВНЫЕ ДЕЙСТВИЯ ====================
|
||||
|
||||
async def scrape_callback(query: CallbackQuery):
|
||||
await query.answer("🚀 Запуск скрейпинга...")
|
||||
msg = await query.message.edit_text("⏳ <b>Выполняю полный скрейпинг...</b>", parse_mode="HTML")
|
||||
|
||||
last = [time.time()]
|
||||
unique_phones = set()
|
||||
|
||||
async def progress_cb(done: int, total: int):
|
||||
await simple_progress(msg, done, total, last, unique_phones)
|
||||
|
||||
try:
|
||||
links = collect_links()
|
||||
urls = load_urls(links)
|
||||
if not urls:
|
||||
await msg.answer("❌ Нет ссылок для обработки", reply_markup=main_menu_keyboard())
|
||||
return
|
||||
|
||||
raw = await process_batch(urls, progress_callback=progress_cb, unique_phones=unique_phones) # Передаём unique_phones
|
||||
|
||||
seen = set()
|
||||
unique = [r for r in raw if r[1] and r[2] not in seen and not seen.add(r[2])] # 🔥 Фикс
|
||||
results = [(o, p, d, promo, "—") for o, p, d, promo in unique] # 🔥 Фикс распаковки
|
||||
|
||||
# Финальное обновление (unique_phones уже заполнен)
|
||||
await progress_cb(len(urls), len(urls))
|
||||
|
||||
if results:
|
||||
global last_output_file
|
||||
fmt = CONFIG.get("output_format", "excel")
|
||||
|
||||
if fmt == "excel":
|
||||
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
path = CONFIG["output_file"].format(timestamp=ts)
|
||||
save_to_excel(results, path)
|
||||
last_output_file = path
|
||||
|
||||
promo_cnt = sum(1 for r in results if r[3])
|
||||
await query.message.answer(
|
||||
f"✅ <b>Скрейпинг завершён!</b>\n"
|
||||
f"📊 Обработано URL: {len(urls)}\n"
|
||||
f"📞 Найдено телефонов: {len(results)}\n"
|
||||
f"🎯 Из них promo: {promo_cnt}",
|
||||
parse_mode="HTML"
|
||||
)
|
||||
await query.message.answer_document(FSInputFile(path), caption="📁 Результаты")
|
||||
else:
|
||||
# Текстовый вывод
|
||||
text = format_results(results, fmt)
|
||||
if text:
|
||||
if len(text) > 3800:
|
||||
with tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8', suffix='.txt', delete=False) as f:
|
||||
f.write(text)
|
||||
path = f.name
|
||||
caption = {"phones": "📞 Номера", "domains": "🌐 Домены", "both": "📞+🌐 Результаты"}.get(fmt, "Результаты")
|
||||
await query.message.answer_document(FSInputFile(path), caption=f"✅ {caption}")
|
||||
Path(path).unlink()
|
||||
else:
|
||||
labels = {"phones": "📞", "domains": "🌐", "both": "📞+🌐"}
|
||||
await query.message.answer(
|
||||
f"✅ <b>{labels.get(fmt, '')} Результаты:</b>\n\n{text}",
|
||||
parse_mode="HTML"
|
||||
)
|
||||
else:
|
||||
await query.message.answer("⚠️ Нет данных для отображения")
|
||||
else:
|
||||
await query.message.answer("⚠️ Телефоны не найдены")
|
||||
except Exception as e:
|
||||
await query.message.answer(f"❌ Ошибка: {e}")
|
||||
|
||||
await query.message.answer("Что дальше?", reply_markup=main_menu_keyboard())
|
||||
|
||||
async def process_callback(query: CallbackQuery):
|
||||
await query.answer()
|
||||
@ -445,6 +516,7 @@ def main_bot(token: str):
|
||||
|
||||
dp.callback_query.register(menu_callback, F.data == "main_menu")
|
||||
dp.callback_query.register(status_handler, F.data == "status")
|
||||
dp.callback_query.register(scrape_callback, F.data == "scrape")
|
||||
dp.callback_query.register(process_callback, F.data == "process")
|
||||
dp.callback_query.register(upload_links_callback, F.data == "upload_links")
|
||||
dp.callback_query.register(search_menu_callback, F.data == "search_menu")
|
||||
|
||||
@ -39,5 +39,5 @@ CONFIG = {'input_file': '',
|
||||
'search_template_url': 'https://ya.ru/search/?text={search}&lr={lr}',
|
||||
'region_lr': 35,
|
||||
'region_name': 'Краснодар',
|
||||
'search_file': 'search.txt'
|
||||
}
|
||||
'search_file': 'search.txt',
|
||||
'profile_dir': 'C:\\Users\\Дмитрий\\chrome_profile_yandex'}
|
||||
7
main.py
7
main.py
@ -23,6 +23,7 @@ import logging
|
||||
from httpx import PoolTimeout, ConnectTimeout, ReadTimeout, HTTPStatusError, RequestError
|
||||
|
||||
from config import CONFIG
|
||||
from link_collector import collect_links
|
||||
|
||||
# Suppress httpx info logs
|
||||
logging.getLogger("httpx").setLevel(logging.WARNING)
|
||||
@ -366,7 +367,11 @@ async def main():
|
||||
|
||||
promo_only = args.promo_only
|
||||
|
||||
collected_links = []
|
||||
try:
|
||||
collected_links = collect_links()
|
||||
except Exception as e:
|
||||
print(f"Ошибка в collect_links: {e}. Продолжаем без собранных ссылок.")
|
||||
collected_links = []
|
||||
|
||||
urls = load_urls(collected_links + args.urls)
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user