Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions functions/telegram_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,89 @@ async def save_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
await update.message.reply_text(t('article_exists', user_lang))



async def export_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
"""Handle /export command - export saved articles as markdown."""
from .user_storage import get_saved_articles, get_user_language
from .translations import t
import io
from datetime import datetime

telegram_id = update.effective_user.id
user_lang = get_user_language(telegram_id)

# Send waiting message
await update.message.reply_text(t('export_preparing', user_lang), parse_mode='Markdown')

# Fetch articles (up to 1000)
articles = get_saved_articles(telegram_id, limit=1000)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The article limit 1000 is hardcoded. It would be better to define this as a constant at the module level (e.g., EXPORT_ARTICLE_LIMIT = 1000) for improved maintainability and easier configuration in the future.


if not articles:
await update.message.reply_text(t('export_empty', user_lang), parse_mode='Markdown')
return

# Group by category
by_category = {}
for article in articles:
cat = article.get('category', 'tech')
if cat not in by_category:
by_category[cat] = []
by_category[cat].append(article)
Comment on lines +695 to +700

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This block for grouping articles by category can be simplified by using collections.defaultdict. This makes the code more concise and Pythonic. Remember to add from collections import defaultdict to the function's imports at the top.

    from collections import defaultdict
    by_category = defaultdict(list)
    for article in articles:
        by_category[article.get('category', 'tech')].append(article)


# Build markdown content
md_lines = []
md_lines.append("# LensAI - Saved Articles Export")
md_lines.append(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This call to datetime.now() and the one on line 744 could produce different values if the function execution crosses a second or day boundary. It's better to call datetime.now() once at the beginning of the function (e.g., now = datetime.now()) and reuse the now variable for consistent timestamps in both the file content and the filename.

md_lines.append(f"Total articles: {len(articles)}")
md_lines.append("")

cat_emoji = {
'ai': '🤖', 'security': '🔒', 'crypto': '💰', 'startups': '🚀',
'hardware': '💻', 'software': '📱', 'tech': '🔧'
}

for cat, items in sorted(by_category.items()):
emoji = cat_emoji.get(cat, '🔧')
cat_label = t(f'cat_{cat}', user_lang)
md_lines.append(f"## {emoji} {cat_label}")
md_lines.append("")

for item in items:
title = item.get('title', 'Untitled')
url = item.get('url', '')
saved_at = item.get('saved_at', '')[:10]
source = item.get('source', '')

if url.startswith('http'):
line = f"- [{title}]({url})"
else:
line = f"- {title}"

meta = []
if source: meta.append(f"Source: {source}")
if saved_at: meta.append(f"Saved: {saved_at}")

if meta:
line += f" ({', '.join(meta)})"
md_lines.append(line)
Comment on lines +721 to +737

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-medium medium

The /export command constructs a Markdown file using article titles and sources directly from the database without escaping. Since these fields can contain arbitrary text from external news sources or user input, an attacker could inject malicious Markdown syntax. This could lead to link spoofing (phishing) or potentially XSS if the exported file is opened in a vulnerable Markdown viewer.

Remediation: Escape special Markdown characters in the title and source fields before including them in the generated Markdown content. You can use the existing escape_markdown_v1 utility or a similar sanitization function.

            title = item.get('title', 'Untitled')
            url = item.get('url', '')
            saved_at = item.get('saved_at', '')[:10]
            source = item.get('source', '')

            # Escape markdown characters to prevent injection
            from .security_utils import escape_markdown_v1
            safe_title = escape_markdown_v1(title)
            safe_source = escape_markdown_v1(source)

            if url.startswith('http'):
                line = f"- [{safe_title}]({url})"
            else:
                line = f"- {safe_title}"

            meta = []
            if safe_source: meta.append(f"Source: {safe_source}")
            if saved_at: meta.append(f"Saved: {saved_at}")

            if meta:
                line += f" ({', '.join(meta)})"
            md_lines.append(line)

md_lines.append("")

md_text = chr(10).join(md_lines)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using "\n".join() is the more standard and readable Python idiom for joining lines with a newline character.

    md_text = "\n".join(md_lines)


# Create file-like object
bio = io.BytesIO(md_text.encode('utf-8'))
bio.name = f"lensai_export_{datetime.now().strftime('%Y%m%d')}.md"

# Send document
try:
await update.message.reply_document(
document=bio,
caption=t('export_caption', user_lang, count=len(articles)),
parse_mode='Markdown'
)
except Exception as e:
await update.message.reply_text(f"Error exporting: {e}")
Comment on lines +753 to +754

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

This broad except Exception as e: clause leaks internal error details to the user, which is a potential information disclosure risk. It's better to log the specific error for debugging and show a generic, user-friendly error message.

    except Exception as e:
        print(f"Error exporting for user {telegram_id}: {e}")
        await update.message.reply_text(t('ai_error', user_lang, error="Could not generate export file."))


async def clear_saved_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
"""Handle /clear_saved command."""
from .user_storage import clear_saved_articles, get_user_language
Expand Down Expand Up @@ -1693,6 +1776,7 @@ def create_bot_application() -> Application:
application.add_handler(CommandHandler("save", save_command))
application.add_handler(CommandHandler("clear_saved", clear_saved_command))
application.add_handler(CommandHandler("clear", clear_saved_command)) # Alias for /clear_saved
application.add_handler(CommandHandler("export", export_command))
application.add_handler(CommandHandler("search", search_command))
application.add_handler(CommandHandler("language", language_command))
application.add_handler(CommandHandler("filter", filter_command))
Expand Down
10 changes: 10 additions & 0 deletions functions/translations.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
'article_saved': "✅ Article saved! View with /saved",
'article_exists': "ℹ️ Article already saved!",
'cleared_saved': "🗑️ All saved articles cleared!",
'export_preparing': "⏳ **Preparing your export...**\n\nGathering all your saved articles...",
'export_empty': "🔖 **No articles to export!**\n\nYou haven't saved any articles yet. Use /save to add some first.",
'export_caption': "📚 **Your Saved Articles**\n\nHere is your export containing {count} articles.\nUse this file to read them later in any Markdown viewer or note-taking app.",

'ai_error': "❌ Sorry, I couldn't process that: {error}",
'refresh_limit': "✅ **You're all caught up!**\n\nI've generated 2 digests for you and you've seen the top news. Check back later for more updates!",

Expand Down Expand Up @@ -58,6 +62,7 @@
• /filter <category> - Filter by category (ai, security, crypto, startups, hardware, software, tech)
• /recap - Weekly recap of saved articles
• /clear_saved - Clear all saved articles
• /export - Export articles as Markdown file

⚙️ **Settings**
• /schedule - Set daily digest time
Expand Down Expand Up @@ -160,6 +165,10 @@
'article_saved': "✅ Статья сохранена! Смотреть: /saved",
'article_exists': "ℹ️ Статья уже сохранена!",
'cleared_saved': "🗑️ Все сохранённые статьи удалены!",
'export_preparing': "⏳ **Подготовка экспорта...**\n\nСобираю ваши сохранённые статьи...",
'export_empty': "🔖 **Нечего экспортировать!**\n\nУ вас пока нет сохранённых статей. Сначала добавьте их с помощью /save.",
'export_caption': "📚 **Ваши сохранённые статьи**\n\nВот ваш экспорт, содержащий {count} статей.\nИспользуйте этот файл для чтения в любом Markdown-приложении или заметках.",

'ai_error': "❌ Не удалось обработать запрос: {error}",
'refresh_limit': "✅ **Вы узнали всё главное!**\n\nЯ уже сделал для вас 2 уникальных дайджеста. Заходите позже за новыми новостями!",

Expand Down Expand Up @@ -191,6 +200,7 @@
• /filter <категория> - Фильтр по категории (ai, security, crypto, startups, hardware, software, tech)
• /recap - Недельный обзор сохранённых
• /clear_saved - Очистить все сохранённые
• /export - Экспорт статей в файл Markdown

⚙️ **Настройки**
• /schedule - Время ежедневного дайджеста
Expand Down