More logical Mastodon parsing order (fixes parsing pages with lots of <h3>)
This commit is contained in:
parent
933ce8555f
commit
2906946ee9
@ -39,11 +39,11 @@ def get_mastodon_blocks(domain: str) -> dict:
|
||||
return {}
|
||||
|
||||
for header in doc.find_all("h3"):
|
||||
for line in header.find_next_siblings("table")[0].find_all("tr")[1:]:
|
||||
header_text = header.text
|
||||
if header_text in translations:
|
||||
header_text = translations[header_text]
|
||||
if header_text in blocks:
|
||||
header_text = header.text
|
||||
if header_text in translations:
|
||||
header_text = translations[header_text]
|
||||
if header_text in blocks:
|
||||
for line in header.find_next_siblings("table")[0].find_all("tr")[1:]:
|
||||
blocks[header_text].append(
|
||||
{
|
||||
"domain": line.find("span").text,
|
||||
|
Loading…
x
Reference in New Issue
Block a user