chore: readme a bit
This commit is contained in:
@@ -12,7 +12,10 @@ def clean_text(text: str) -> str:
|
||||
|
||||
|
||||
def extract_zh_paragraphs(soup: BeautifulSoup) -> List[str]:
|
||||
"""extract paragraphs from zh HTML"""
|
||||
"""
|
||||
most chinese raws are split with 2 br tags rather than
|
||||
by <p> elements so.. yeah
|
||||
"""
|
||||
if h1_tag := soup.find("h1"):
|
||||
h1_tag.decompose()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user