Compare commits
4 Commits
6325b485b2
...
30a9e4b3b6
| Author | SHA1 | Date | |
|---|---|---|---|
| 30a9e4b3b6 | |||
| 50c9955302 | |||
| 8a206a34eb | |||
| d56dca8cd8 |
23
.gitignore
vendored
Normal file
23
.gitignore
vendored
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# Байт-код, оптимизированные / DLL-файлы
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
|
||||||
|
# Файлы конфигурации для venv
|
||||||
|
venv/
|
||||||
|
.env
|
||||||
|
|
||||||
|
# Файлы дистрибутивов и артефакты упаковки
|
||||||
|
dist/
|
||||||
|
*.egg-info/
|
||||||
|
|
||||||
|
# Файлы покрытия, кэши и прочие ненужные файлы
|
||||||
|
.coverage
|
||||||
|
*.py,cover
|
||||||
|
.cache/
|
||||||
|
|
||||||
|
# Файлы *.py,cover не несут никакого смысла
|
||||||
|
*.py,cover
|
||||||
|
|
||||||
|
# Контрольные точки в Jupyter notebook лучше оставить в стороне
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
72
bot.py
72
bot.py
@ -16,7 +16,6 @@ from aiogram.types import InlineKeyboardMarkup, InlineKeyboardButton, FSInputFil
|
|||||||
|
|
||||||
# Импорт только нужного
|
# Импорт только нужного
|
||||||
from config import CONFIG
|
from config import CONFIG
|
||||||
from link_collector import collect_links
|
|
||||||
from main import process_batch, save_to_excel, load_urls
|
from main import process_batch, save_to_excel, load_urls
|
||||||
|
|
||||||
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
|
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
|
||||||
@ -330,76 +329,6 @@ async def edit_search_callback(query: CallbackQuery):
|
|||||||
|
|
||||||
# ==================== ОСНОВНЫЕ ДЕЙСТВИЯ ====================
|
# ==================== ОСНОВНЫЕ ДЕЙСТВИЯ ====================
|
||||||
|
|
||||||
async def scrape_callback(query: CallbackQuery):
|
|
||||||
await query.answer("🚀 Запуск скрейпинга...")
|
|
||||||
msg = await query.message.edit_text("⏳ <b>Выполняю полный скрейпинг...</b>", parse_mode="HTML")
|
|
||||||
|
|
||||||
last = [time.time()]
|
|
||||||
unique_phones = set()
|
|
||||||
|
|
||||||
async def progress_cb(done: int, total: int):
|
|
||||||
await simple_progress(msg, done, total, last, unique_phones)
|
|
||||||
|
|
||||||
try:
|
|
||||||
links = collect_links()
|
|
||||||
urls = load_urls(links)
|
|
||||||
if not urls:
|
|
||||||
await msg.answer("❌ Нет ссылок для обработки", reply_markup=main_menu_keyboard())
|
|
||||||
return
|
|
||||||
|
|
||||||
raw = await process_batch(urls, progress_callback=progress_cb, unique_phones=unique_phones) # Передаём unique_phones
|
|
||||||
|
|
||||||
seen = set()
|
|
||||||
unique = [r for r in raw if r[1] and r[2] not in seen and not seen.add(r[2])] # 🔥 Фикс
|
|
||||||
results = [(o, p, d, promo, "—") for o, p, d, promo in unique] # 🔥 Фикс распаковки
|
|
||||||
|
|
||||||
# Финальное обновление (unique_phones уже заполнен)
|
|
||||||
await progress_cb(len(urls), len(urls))
|
|
||||||
|
|
||||||
if results:
|
|
||||||
global last_output_file
|
|
||||||
fmt = CONFIG.get("output_format", "excel")
|
|
||||||
|
|
||||||
if fmt == "excel":
|
|
||||||
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
||||||
path = CONFIG["output_file"].format(timestamp=ts)
|
|
||||||
save_to_excel(results, path)
|
|
||||||
last_output_file = path
|
|
||||||
|
|
||||||
promo_cnt = sum(1 for r in results if r[3])
|
|
||||||
await query.message.answer(
|
|
||||||
f"✅ <b>Скрейпинг завершён!</b>\n"
|
|
||||||
f"📊 Обработано URL: {len(urls)}\n"
|
|
||||||
f"📞 Найдено телефонов: {len(results)}\n"
|
|
||||||
f"🎯 Из них promo: {promo_cnt}",
|
|
||||||
parse_mode="HTML"
|
|
||||||
)
|
|
||||||
await query.message.answer_document(FSInputFile(path), caption="📁 Результаты")
|
|
||||||
else:
|
|
||||||
# Текстовый вывод
|
|
||||||
text = format_results(results, fmt)
|
|
||||||
if text:
|
|
||||||
if len(text) > 3800:
|
|
||||||
with tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8', suffix='.txt', delete=False) as f:
|
|
||||||
f.write(text)
|
|
||||||
path = f.name
|
|
||||||
caption = {"phones": "📞 Номера", "domains": "🌐 Домены", "both": "📞+🌐 Результаты"}.get(fmt, "Результаты")
|
|
||||||
await query.message.answer_document(FSInputFile(path), caption=f"✅ {caption}")
|
|
||||||
Path(path).unlink()
|
|
||||||
else:
|
|
||||||
labels = {"phones": "📞", "domains": "🌐", "both": "📞+🌐"}
|
|
||||||
await query.message.answer(
|
|
||||||
f"✅ <b>{labels.get(fmt, '')} Результаты:</b>\n\n{text}",
|
|
||||||
parse_mode="HTML"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
await query.message.answer("⚠️ Нет данных для отображения")
|
|
||||||
else:
|
|
||||||
await query.message.answer("⚠️ Телефоны не найдены")
|
|
||||||
except Exception as e:
|
|
||||||
await query.message.answer(f"❌ Ошибка: {e}")
|
|
||||||
|
|
||||||
await query.message.answer("Что дальше?", reply_markup=main_menu_keyboard())
|
|
||||||
|
|
||||||
async def process_callback(query: CallbackQuery):
|
async def process_callback(query: CallbackQuery):
|
||||||
await query.answer()
|
await query.answer()
|
||||||
@ -516,7 +445,6 @@ def main_bot(token: str):
|
|||||||
|
|
||||||
dp.callback_query.register(menu_callback, F.data == "main_menu")
|
dp.callback_query.register(menu_callback, F.data == "main_menu")
|
||||||
dp.callback_query.register(status_handler, F.data == "status")
|
dp.callback_query.register(status_handler, F.data == "status")
|
||||||
dp.callback_query.register(scrape_callback, F.data == "scrape")
|
|
||||||
dp.callback_query.register(process_callback, F.data == "process")
|
dp.callback_query.register(process_callback, F.data == "process")
|
||||||
dp.callback_query.register(upload_links_callback, F.data == "upload_links")
|
dp.callback_query.register(upload_links_callback, F.data == "upload_links")
|
||||||
dp.callback_query.register(search_menu_callback, F.data == "search_menu")
|
dp.callback_query.register(search_menu_callback, F.data == "search_menu")
|
||||||
|
|||||||
@ -39,5 +39,5 @@ CONFIG = {'input_file': '',
|
|||||||
'search_template_url': 'https://ya.ru/search/?text={search}&lr={lr}',
|
'search_template_url': 'https://ya.ru/search/?text={search}&lr={lr}',
|
||||||
'region_lr': 35,
|
'region_lr': 35,
|
||||||
'region_name': 'Краснодар',
|
'region_name': 'Краснодар',
|
||||||
'search_file': 'search.txt',
|
'search_file': 'search.txt'
|
||||||
'profile_dir': 'C:\\Users\\Дмитрий\\chrome_profile_yandex'}
|
}
|
||||||
5
main.py
5
main.py
@ -23,7 +23,6 @@ import logging
|
|||||||
from httpx import PoolTimeout, ConnectTimeout, ReadTimeout, HTTPStatusError, RequestError
|
from httpx import PoolTimeout, ConnectTimeout, ReadTimeout, HTTPStatusError, RequestError
|
||||||
|
|
||||||
from config import CONFIG
|
from config import CONFIG
|
||||||
from link_collector import collect_links
|
|
||||||
|
|
||||||
# Suppress httpx info logs
|
# Suppress httpx info logs
|
||||||
logging.getLogger("httpx").setLevel(logging.WARNING)
|
logging.getLogger("httpx").setLevel(logging.WARNING)
|
||||||
@ -367,10 +366,6 @@ async def main():
|
|||||||
|
|
||||||
promo_only = args.promo_only
|
promo_only = args.promo_only
|
||||||
|
|
||||||
try:
|
|
||||||
collected_links = collect_links()
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Ошибка в collect_links: {e}. Продолжаем без собранных ссылок.")
|
|
||||||
collected_links = []
|
collected_links = []
|
||||||
|
|
||||||
urls = load_urls(collected_links + args.urls)
|
urls = load_urls(collected_links + args.urls)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user