from bs4 import BeautifulSoup from weasyprint import HTML, CSS import os # ---- Configuration ---- input_file = r"C:\Users\conne\Downloads\Print Invoice Set - Canadian Avalanche Association.html" css_folder = r"C:\Users\conne\Downloads\Print Invoice Set - Canadian Avalanche Association_files" # 👈 Update this to the actual CSS folder path base_output_folder = os.path.join("data", "invoices") # ------------------------ os.makedirs(base_output_folder, exist_ok=True) # Load HTML file with open(input_file, "r", encoding="utf-8") as f: html = f.read() soup = BeautifulSoup(html, "lxml") # Find all invoices invoices = soup.find_all("form", id=lambda x: x and x.startswith("frmInvoice-")) print(f"Found {len(invoices)} invoices") # Load all CSS files from folder css_files = [CSS(filename=os.path.join(css_folder, f)) for f in os.listdir(css_folder) if f.endswith(".css")] for form in invoices: bill_to_cell = form.find("th", string=lambda x: x and "Bill to" in x) if not bill_to_cell: continue bill_to_text = bill_to_cell.find_next("td").get_text(separator=" ", strip=True) name = bill_to_text.split("\n")[0].strip() safe_name = "".join(c for c in name if c.isalnum() or c in " _-").strip() person_folder = os.path.join(base_output_folder, safe_name) os.makedirs(person_folder, exist_ok=True) invoice_number = form.find("th", string=lambda x: x and "Invoice #" in x) invoice_number = ( invoice_number.find_next("td").get_text(strip=True) if invoice_number else "unknown" ) output_pdf = os.path.join(person_folder, f"Invoice_{invoice_number}.pdf") # Convert to PDF with CSS and base URL for relative paths HTML(string=str(form), base_url=os.path.dirname(input_file)).write_pdf( output_pdf, stylesheets=css_files ) print(f"Saved PDF → {output_pdf}") print("✅ Done! All invoices saved as PDFs under data/invoices/")