More logical Mastodon parsing order (fixes parsing pages with lots of <h3>)
This commit is contained in:
parent
933ce8555f
commit
2906946ee9
@ -39,11 +39,11 @@ def get_mastodon_blocks(domain: str) -> dict:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
for header in doc.find_all("h3"):
|
for header in doc.find_all("h3"):
|
||||||
for line in header.find_next_siblings("table")[0].find_all("tr")[1:]:
|
|
||||||
header_text = header.text
|
header_text = header.text
|
||||||
if header_text in translations:
|
if header_text in translations:
|
||||||
header_text = translations[header_text]
|
header_text = translations[header_text]
|
||||||
if header_text in blocks:
|
if header_text in blocks:
|
||||||
|
for line in header.find_next_siblings("table")[0].find_all("tr")[1:]:
|
||||||
blocks[header_text].append(
|
blocks[header_text].append(
|
||||||
{
|
{
|
||||||
"domain": line.find("span").text,
|
"domain": line.find("span").text,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user