import { chromium } from 'playwright'; import path from 'path'; import fs from 'fs'; const URL_BROWSER = 'ws://localhost:9222/devtools/browser/716e665f-22f3-4c81-b68d-71f7ab9d72f6' const USERNAME = 'protest' const MDP = 'Avalanche1' const URL_FORUMS = "https://www.avalancheassociation.ca/forums/" const PATH_ENTREE = 'Topics.aspx?forum=173521&group=135525' const SEPARATEUR_POST = "


" function getRandomDelay(min = 1000, max = 3000) { return Math.floor(Math.random() * (max - min + 1)) + min; } async function login(page) { await page.goto('https://www.avalancheassociation.ca/'); await page.waitForTimeout(getRandomDelay()); await page.getByPlaceholder('Username').fill(USERNAME); await page.waitForTimeout(getRandomDelay()); await page.getByPlaceholder('Password').fill(MDP); await page.waitForTimeout(getRandomDelay()); await Promise.all([ page.click('input[name="btn_submitLogin"]'), page.waitForNavigation({ waitUntil: 'networkidle0' }), ]); } async function genererPDF(page) { let cleanHTML = ""; const titreThread = await getPageTitle(page) const urlData = 'data/forums/' + titreThread console.log(titreThread) await downloadAttachedDocuments(page, urlData) do { const postsLocator = await page.locator('.FormTable1') const count = await postsLocator.count(); for(let i = 0; i < count; i++) { const post = postsLocator.nth(i); await post.evaluate(p => { const sectionInutiles = p.querySelectorAll('td[id*="tdButtonBar"], div.forumPosterPostCount'); if (sectionInutiles) { sectionInutiles.forEach(s => s.remove()); } }); const html = await post.innerHTML() cleanHTML += html?.split('
') .map(line => line.trim()) .filter(line => line !== '') .join('
'); cleanHTML += SEPARATEUR_POST } } while(await goToNextPage(page)) const pdfPage = await page.context().newPage(); await pdfPage.setContent(cleanHTML); await pdfPage.pdf({ path: urlData + '/' + titreThread + '.pdf', format: 'A4' }); await pdfPage.close() } async function goToNextPage(page) { await page.waitForTimeout(getRandomDelay()); const nextPage = await page.$('#ctl00_PageContent_fnb_PostPager_pnlNextPage'); if (nextPage !== null) { await Promise.all([ page.waitForNavigation({ waitUntil: 'load' }), nextPage.click() ]); return true } else { return false } } async function getPageTitle(page) { const titre = await page.$('#ctl00_PageContent_PageSummaryTitle'); if (titre !== null) return titre.textContent() } async function downloadAttachedDocuments(page, urlDownloadFolder) { const attachmentDivs = await page.locator('div[id*="pnlAttachments"]').all(); console.log('DL path: ', urlDownloadFolder ) if (!fs.existsSync(urlDownloadFolder)) { fs.mkdirSync(urlDownloadFolder); } for (let i = 0; i < attachmentDivs.length; i++) { const div = attachmentDivs[i]; // Get all links within the div const links = await div.locator('a').all(); for (let j = 0; j < links.length; j++) { const link = links[j]; const hrefAttachement = await link.getAttribute('href'); const fileName = await link.textContent(); if (hrefAttachement) { const downloadPromise = page.waitForEvent('download'); await link.click({ modifiers: ['Alt'] }); const download = await downloadPromise; const downloadPath = path.join(urlDownloadFolder, fileName); await download.saveAs(downloadPath); console.log(`Downloaded PDF: ${downloadPath}`); } } } } const browser = await chromium.connectOverCDP(URL_BROWSER); const contexts = browser.contexts(); if (contexts.length > 0) { const pages = await contexts[0].pages(); if (pages.length > 0) { let page = pages[0] await login(page) await page.goto(URL_FORUMS + PATH_ENTREE) await goToNextPage(page) //TODO remove //POUR CHAQUE PAGES do { const liensThreads = await page.locator('a[href*="Posts.aspx?topic="]') const count = await liensThreads.count(); //POUR CHAQUE THREADS for (let i = 0; i < count; i++) { if (i % 2 !== 0) continue //Filtre le lien vers la dernière page const href = await liensThreads.nth(i).getAttribute('href'); const newTab = await page.context().newPage() await newTab.waitForTimeout(getRandomDelay()); await newTab.goto(URL_FORUMS + href) await genererPDF(newTab) await newTab.waitForTimeout(getRandomDelay()); await newTab.close() } } while(await goToNextPage(page)) } else { console.log('No pages found in the context'); } } else { console.log('No contexts available'); }