Files
puppet-avalanche/forums.js
2025-10-16 08:24:51 -04:00

174 lines
4.8 KiB
JavaScript

import { chromium } from 'playwright';
import path from 'path';
import fs from 'fs';
const URL_BROWSER = 'ws://localhost:9222/devtools/browser/716e665f-22f3-4c81-b68d-71f7ab9d72f6'
const USERNAME = 'protest'
const MDP = 'Avalanche1'
const URL_FORUMS = "https://www.avalancheassociation.ca/forums/"
const PATH_ENTREE = 'Topics.aspx?forum=173521&group=135525'
const SEPARATEUR_POST = "<br> <hr> <br>"
function getRandomDelay(min = 1000, max = 3000) {
return Math.floor(Math.random() * (max - min + 1)) + min;
}
async function login(page) {
await page.goto('https://www.avalancheassociation.ca/');
await page.waitForTimeout(getRandomDelay());
await page.getByPlaceholder('Username').fill(USERNAME);
await page.waitForTimeout(getRandomDelay());
await page.getByPlaceholder('Password').fill(MDP);
await page.waitForTimeout(getRandomDelay());
await Promise.all([
page.click('input[name="btn_submitLogin"]'),
page.waitForNavigation({ waitUntil: 'networkidle0' }),
]);
}
async function genererPDF(page) {
let cleanHTML = "";
const titreThread = await getPageTitle(page)
const urlData = 'data/forums/' + titreThread
console.log(titreThread)
await downloadAttachedDocuments(page, urlData)
do {
const postsLocator = await page.locator('.FormTable1')
const count = await postsLocator.count();
for(let i = 0; i < count; i++) {
const post = postsLocator.nth(i);
await post.evaluate(p => {
const sectionInutiles = p.querySelectorAll('td[id*="tdButtonBar"], div.forumPosterPostCount');
if (sectionInutiles) {
sectionInutiles.forEach(s => s.remove());
}
});
const html = await post.innerHTML()
cleanHTML += html?.split('<br>')
.map(line => line.trim())
.filter(line => line !== '')
.join('<br>');
cleanHTML += SEPARATEUR_POST
}
}
while(await goToNextPage(page))
const pdfPage = await page.context().newPage();
await pdfPage.setContent(cleanHTML);
await pdfPage.pdf({ path: urlData + '/' + titreThread + '.pdf', format: 'A4' });
await pdfPage.close()
}
async function goToNextPage(page) {
await page.waitForTimeout(getRandomDelay());
const nextPage = await page.$('#ctl00_PageContent_fnb_PostPager_pnlNextPage');
if (nextPage !== null) {
await Promise.all([
page.waitForNavigation({ waitUntil: 'load' }),
nextPage.click()
]);
return true
}
else {
return false
}
}
async function getPageTitle(page) {
const titre = await page.$('#ctl00_PageContent_PageSummaryTitle');
if (titre !== null)
return titre.textContent()
}
async function downloadAttachedDocuments(page, urlDownloadFolder) {
const attachmentDivs = await page.locator('div[id*="pnlAttachments"]').all();
console.log('DL path: ', urlDownloadFolder )
if (!fs.existsSync(urlDownloadFolder)) {
fs.mkdirSync(urlDownloadFolder);
}
for (let i = 0; i < attachmentDivs.length; i++) {
const div = attachmentDivs[i];
// Get all links within the div
const links = await div.locator('a').all();
for (let j = 0; j < links.length; j++) {
const link = links[j];
const hrefAttachement = await link.getAttribute('href');
const fileName = await link.textContent();
if (hrefAttachement) {
const downloadPromise = page.waitForEvent('download');
await link.click({ modifiers: ['Alt'] });
const download = await downloadPromise;
const downloadPath = path.join(urlDownloadFolder, fileName);
await download.saveAs(downloadPath);
console.log(`Downloaded PDF: ${downloadPath}`);
}
}
}
}
const browser = await chromium.connectOverCDP(URL_BROWSER);
const contexts = browser.contexts();
if (contexts.length > 0) {
const pages = await contexts[0].pages();
if (pages.length > 0) {
let page = pages[0]
await login(page)
await page.goto(URL_FORUMS + PATH_ENTREE)
await goToNextPage(page) //TODO remove
//POUR CHAQUE PAGES
do {
const liensThreads = await page.locator('a[href*="Posts.aspx?topic="]')
const count = await liensThreads.count();
//POUR CHAQUE THREADS
for (let i = 0; i < count; i++) {
if (i % 2 !== 0) continue //Filtre le lien vers la dernière page
const href = await liensThreads.nth(i).getAttribute('href');
const newTab = await page.context().newPage()
await newTab.waitForTimeout(getRandomDelay());
await newTab.goto(URL_FORUMS + href)
await genererPDF(newTab)
await newTab.waitForTimeout(getRandomDelay());
await newTab.close()
}
}
while(await goToNextPage(page))
} else {
console.log('No pages found in the context');
}
} else {
console.log('No contexts available');
}