Test
This commit is contained in:
173
forums.js
Normal file
173
forums.js
Normal file
@@ -0,0 +1,173 @@
|
||||
import { chromium } from 'playwright';
|
||||
import path from 'path';
|
||||
import fs from 'fs';
|
||||
|
||||
const URL_BROWSER = 'ws://localhost:9222/devtools/browser/716e665f-22f3-4c81-b68d-71f7ab9d72f6'
|
||||
|
||||
const USERNAME = 'protest'
|
||||
const MDP = 'Avalanche1'
|
||||
|
||||
const URL_FORUMS = "https://www.avalancheassociation.ca/forums/"
|
||||
const PATH_ENTREE = 'Topics.aspx?forum=173521&group=135525'
|
||||
|
||||
const SEPARATEUR_POST = "<br> <hr> <br>"
|
||||
|
||||
|
||||
function getRandomDelay(min = 1000, max = 3000) {
|
||||
return Math.floor(Math.random() * (max - min + 1)) + min;
|
||||
}
|
||||
|
||||
async function login(page) {
|
||||
await page.goto('https://www.avalancheassociation.ca/');
|
||||
|
||||
await page.waitForTimeout(getRandomDelay());
|
||||
await page.getByPlaceholder('Username').fill(USERNAME);
|
||||
await page.waitForTimeout(getRandomDelay());
|
||||
await page.getByPlaceholder('Password').fill(MDP);
|
||||
|
||||
await page.waitForTimeout(getRandomDelay());
|
||||
await Promise.all([
|
||||
page.click('input[name="btn_submitLogin"]'),
|
||||
page.waitForNavigation({ waitUntil: 'networkidle0' }),
|
||||
]);
|
||||
}
|
||||
|
||||
async function genererPDF(page) {
|
||||
let cleanHTML = "";
|
||||
const titreThread = await getPageTitle(page)
|
||||
const urlData = 'data/forums/' + titreThread
|
||||
|
||||
console.log(titreThread)
|
||||
await downloadAttachedDocuments(page, urlData)
|
||||
|
||||
do {
|
||||
const postsLocator = await page.locator('.FormTable1')
|
||||
const count = await postsLocator.count();
|
||||
|
||||
for(let i = 0; i < count; i++) {
|
||||
const post = postsLocator.nth(i);
|
||||
|
||||
await post.evaluate(p => {
|
||||
const sectionInutiles = p.querySelectorAll('td[id*="tdButtonBar"], div.forumPosterPostCount');
|
||||
if (sectionInutiles) {
|
||||
sectionInutiles.forEach(s => s.remove());
|
||||
}
|
||||
});
|
||||
|
||||
const html = await post.innerHTML()
|
||||
cleanHTML += html?.split('<br>')
|
||||
.map(line => line.trim())
|
||||
.filter(line => line !== '')
|
||||
.join('<br>');
|
||||
|
||||
cleanHTML += SEPARATEUR_POST
|
||||
|
||||
}
|
||||
}
|
||||
while(await goToNextPage(page))
|
||||
|
||||
const pdfPage = await page.context().newPage();
|
||||
await pdfPage.setContent(cleanHTML);
|
||||
await pdfPage.pdf({ path: urlData + '/' + titreThread + '.pdf', format: 'A4' });
|
||||
await pdfPage.close()
|
||||
}
|
||||
|
||||
async function goToNextPage(page) {
|
||||
await page.waitForTimeout(getRandomDelay());
|
||||
const nextPage = await page.$('#ctl00_PageContent_fnb_PostPager_pnlNextPage');
|
||||
|
||||
|
||||
if (nextPage !== null) {
|
||||
await Promise.all([
|
||||
page.waitForNavigation({ waitUntil: 'load' }),
|
||||
nextPage.click()
|
||||
]);
|
||||
return true
|
||||
}
|
||||
else {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
async function getPageTitle(page) {
|
||||
const titre = await page.$('#ctl00_PageContent_PageSummaryTitle');
|
||||
if (titre !== null)
|
||||
return titre.textContent()
|
||||
}
|
||||
|
||||
|
||||
async function downloadAttachedDocuments(page, urlDownloadFolder) {
|
||||
const attachmentDivs = await page.locator('div[id*="pnlAttachments"]').all();
|
||||
console.log('DL path: ', urlDownloadFolder )
|
||||
if (!fs.existsSync(urlDownloadFolder)) {
|
||||
fs.mkdirSync(urlDownloadFolder);
|
||||
}
|
||||
|
||||
for (let i = 0; i < attachmentDivs.length; i++) {
|
||||
const div = attachmentDivs[i];
|
||||
|
||||
// Get all links within the div
|
||||
const links = await div.locator('a').all();
|
||||
|
||||
for (let j = 0; j < links.length; j++) {
|
||||
const link = links[j];
|
||||
const hrefAttachement = await link.getAttribute('href');
|
||||
const fileName = await link.textContent();
|
||||
|
||||
if (hrefAttachement) {
|
||||
const downloadPromise = page.waitForEvent('download');
|
||||
|
||||
await link.click({ modifiers: ['Alt'] });
|
||||
const download = await downloadPromise;
|
||||
|
||||
const downloadPath = path.join(urlDownloadFolder, fileName);
|
||||
await download.saveAs(downloadPath);
|
||||
console.log(`Downloaded PDF: ${downloadPath}`);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const browser = await chromium.connectOverCDP(URL_BROWSER);
|
||||
const contexts = browser.contexts();
|
||||
|
||||
if (contexts.length > 0) {
|
||||
const pages = await contexts[0].pages();
|
||||
|
||||
if (pages.length > 0) {
|
||||
let page = pages[0]
|
||||
|
||||
await login(page)
|
||||
await page.goto(URL_FORUMS + PATH_ENTREE)
|
||||
await goToNextPage(page) //TODO remove
|
||||
|
||||
//POUR CHAQUE PAGES
|
||||
do {
|
||||
const liensThreads = await page.locator('a[href*="Posts.aspx?topic="]')
|
||||
const count = await liensThreads.count();
|
||||
|
||||
//POUR CHAQUE THREADS
|
||||
for (let i = 0; i < count; i++) {
|
||||
if (i % 2 !== 0) continue //Filtre le lien vers la dernière page
|
||||
const href = await liensThreads.nth(i).getAttribute('href');
|
||||
const newTab = await page.context().newPage()
|
||||
|
||||
await newTab.waitForTimeout(getRandomDelay());
|
||||
await newTab.goto(URL_FORUMS + href)
|
||||
await genererPDF(newTab)
|
||||
|
||||
await newTab.waitForTimeout(getRandomDelay());
|
||||
await newTab.close()
|
||||
}
|
||||
}
|
||||
while(await goToNextPage(page))
|
||||
} else {
|
||||
console.log('No pages found in the context');
|
||||
}
|
||||
} else {
|
||||
console.log('No contexts available');
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user