русский
Germany.ruForen → Архив Досок→ Quatschecke

Господа програмисты и иже с ними помогите !

17.01.25 17:51
Re: Господа програмисты и иже с ними помогите !
 
Gwenette свой человек
Gwenette
in Antwort Trollinger 10.01.25 20:16

Пользуйтесь Друзьяулыб


from selenium import webdriver

from selenium.webdriver.common.by import By

import time

from dotenv import load_dotenv

import os


driver = webdriver.Chrome()


UserID=os.getenv("UserID")

base_url = f"https://foren.germany.ru/dosearchuser.pl?UserID={UserID}"

page = 1

max_pages = 1

batch_size = 100

current_batch = []

batch_number = 1

blockquote_selector = "div.foren-body blockquote"

paragraph_selector = "div.foren-body p"

while page <= max_pages:

print(f"Lade Seite {page}...")

url = f"{base_url}&page={page}"

driver.get(url)

message_links = driver.find_elements(By.CSS_SELECTOR, "tr td a[href*='/m/']")

message_urls = [link.get_attribute("href") for link in message_links]

if not message_urls:

print(f"Keine Nachrichten-URLs auf Seite {page} gefunden.")

break

print(f"{len(message_urls)} Nachrichten-URLs auf Seite {page} gefunden.")

for message_url in message_urls:

driver.get(message_url)

time.sleep(2)

blockquote_elements = driver.find_elements(By.CSS_SELECTOR, blockquote_selector)

blockquote = blockquote_elements[0].text.strip() if blockquote_elements else ""

paragraph_elements = driver.find_elements(By.CSS_SELECTOR, paragraph_selector)

paragraph_text = "\n".join([p.text.strip() for p in paragraph_elements])

message_content = f"{blockquote}\n{paragraph_text}".strip()

print("Nachrichtentext:")

print(message_content)

current_batch.append(message_content)

if len(current_batch) >= batch_size:

with open(f"messages_batch_{batch_number}.txt", "w", encoding="utf-8") as file:

for idx, batch_message in enumerate(current_batch, start=1):

file.write(f"Nachricht {idx + (batch_number - 1) * batch_size}: {batch_message}\n\n")

print(f"Batch {batch_number} mit {len(current_batch)} Nachrichten gespeichert.")

current_batch = []

batch_number += 1

next_page = driver.find_elements(By.LINK_TEXT, "Weiter")

if not next_page:

print("Keine weiteren Seiten vorhanden.")

break

page += 1

time.sleep(1)

if current_batch:

with open(f"messages_batch_{batch_number}.txt", "w", encoding="utf-8") as file:

for idx, batch_message in enumerate(current_batch, start=1):

file.write(f"Nachricht {idx + (batch_number - 1) * batch_size}: {batch_message}\n\n")

print(f"Letzter Batch {batch_number} mit {len(current_batch)} Nachrichten gespeichert.")

print("Maximale Seitenanzahl erreicht oder keine weiteren Seiten vorhanden.")

driver.quit()

The Times 03/Jan/2009 Chancellor on brink of second bailout for banks
 

Sprung zu