// ==================== 爆款视频抓取器 - 后台服务（修正版）====================

// 要抓取的第三方网站配置（5个可用网站）
const SOURCES = [
    // 免费，不需要登录
    {
        id: 'shortsmonkey',
        name: 'ShortsMonkey',
        url: 'https://www.shortsmonkey.com/zh/hot-today',
        needsLogin: false,
        description: '短视频热门榜'
    },
    {
        id: 'findoutliers',
        name: 'FindOutliers',
        url: 'https://www.findoutliers.com/youtube-video-outliers',
        needsLogin: false,
        description: '爆款视频发现（游客60个）'
    },
    // 免费但需要登录
    {
        id: 'aihot',
        name: 'AIHot Video',
        url: 'https://www.aihot.video/zh/hot-videos/short/week',
        needsLogin: true,
        description: 'AI热门视频分析'
    },
    {
        id: 'viewstats',
        name: 'ViewStats',
        url: 'https://www.viewstats.com/top-list?tab=videos&videoType=shorts',
        needsLogin: true,
        description: 'MrBeast的视频分析工具'
    },
    {
        id: 'tubelab',
        name: 'TubeLab',
        url: 'https://tubelab.net/ideas-finder',
        needsLogin: true,
        description: '爆款视频库'
    }
];

// 导出配置供其他脚本使用
self.SOURCES = SOURCES;

// 监听来自内容脚本的消息
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
    if (request.action === 'scrapeAll') {
        scrapeAllSources().then(sendResponse);
        return true;
    }

    if (request.action === 'getSources') {
        sendResponse(SOURCES);
        return true;
    }

    if (request.action === 'scrapeSingle') {
        const source = SOURCES.find(s => s.id === request.sourceId);
        if (source) {
            scrapeSource(source).then(videos => {
                sendResponse({ success: true, videos });
            }).catch(error => {
                sendResponse({ success: false, error: error.message });
            });
        } else {
            sendResponse({ success: false, error: 'Source not found' });
        }
        return true;
    }
});

// 抓取所有数据源
async function scrapeAllSources() {
    const results = {
        videos: [],
        errors: [],
        sources: {}
    };

    for (const source of SOURCES) {
        try {
            console.log(`Scraping ${source.name}...`);
            const videos = await scrapeSource(source);
            results.sources[source.id] = {
                name: source.name,
                count: videos.length,
                success: true
            };
            results.videos.push(...videos);
        } catch (error) {
            console.error(`Error scraping ${source.name}:`, error);
            results.errors.push({ source: source.id, name: source.name, error: error.message });
            results.sources[source.id] = {
                name: source.name,
                count: 0,
                success: false,
                error: error.message
            };
        }
    }

    // 去重
    const uniqueVideos = [];
    const seenIds = new Set();
    for (const video of results.videos) {
        if (!seenIds.has(video.videoId)) {
            seenIds.add(video.videoId);
            uniqueVideos.push(video);
        }
    }
    results.videos = uniqueVideos;
    results.totalCount = uniqueVideos.length;
    results.timestamp = Date.now();
    results.sourceList = SOURCES.map(s => ({ id: s.id, name: s.name }));

    // 保存到存储
    await chrome.storage.local.set({ lastScrape: results });

    return results;
}

// 抓取单个数据源
async function scrapeSource(source) {
    return new Promise((resolve, reject) => {
        // 创建一个后台标签页
        chrome.tabs.create({ url: source.url, active: false }, async (tab) => {
            const tabId = tab.id;

            // 等待页面加载完成
            const waitForLoad = () => {
                return new Promise((res) => {
                    const listener = (tabIdUpdate, info) => {
                        if (tabIdUpdate === tabId && info.status === 'complete') {
                            chrome.tabs.onUpdated.removeListener(listener);
                            res();
                        }
                    };
                    chrome.tabs.onUpdated.addListener(listener);

                    // 超时处理
                    setTimeout(() => {
                        chrome.tabs.onUpdated.removeListener(listener);
                        res();
                    }, 20000);
                });
            };

            await waitForLoad();

            // 额外等待内容渲染
            await new Promise(r => setTimeout(r, 5000));

            try {
                // 执行脚本提取视频 ID
                const results = await chrome.scripting.executeScript({
                    target: { tabId },
                    func: extractVideoIdsFromPage,
                    args: [source.id]
                });

                // 关闭标签页
                chrome.tabs.remove(tabId);

                if (results && results[0] && results[0].result) {
                    resolve(results[0].result);
                } else {
                    resolve([]);
                }
            } catch (error) {
                chrome.tabs.remove(tabId).catch(() => { });
                reject(error);
            }
        });
    });
}

// 在页面中执行的函数
function extractVideoIdsFromPage(source) {
    const videos = [];
    const seenIds = new Set();
    const html = document.documentElement.innerHTML;

    // 匹配多种格式
    const patterns = [
        /watch\?v=([a-zA-Z0-9_-]{11})/g,
        /shorts\/([a-zA-Z0-9_-]{11})/g,
        /youtu\.be\/([a-zA-Z0-9_-]{11})/g,
        /\/video\/detail\/([a-zA-Z0-9_-]{11})/g,
        /"videoId":\s*"([a-zA-Z0-9_-]{11})"/g,
        /\/vi\/([a-zA-Z0-9_-]{11})\//g,
        /embed\/([a-zA-Z0-9_-]{11})/g,
        /v=([a-zA-Z0-9_-]{11})/g
    ];

    for (const pattern of patterns) {
        let match;
        while ((match = pattern.exec(html)) !== null && videos.length < 100) {
            const videoId = match[1];
            if (!seenIds.has(videoId) && videoId.length === 11) {
                seenIds.add(videoId);
                videos.push({ videoId, source });
            }
        }
    }

    return videos;
}
