Skip to content

Commit

Permalink
replace internal links with local paths
Browse files Browse the repository at this point in the history
  • Loading branch information
L-M-Sherlock committed Nov 13, 2024
1 parent 67cfc57 commit 310914d
Showing 1 changed file with 16 additions and 4 deletions.
20 changes: 16 additions & 4 deletions render.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,20 @@ def replace_url(url: str) -> str:
return url


def process_content(content: str) -> str:
# Remove zhihu redirect links
content = content.replace("//link.zhihu.com/?target=https%3A", "")
content = content.replace("//link.zhihu.com/?target=http%3A", "")

# Replace internal links with local paths
link_pattern = r"href=\"(.*?)\""
content = re.sub(
link_pattern, lambda m: f'href="{replace_url(m.group(1))}"', content
)

return content


def extract_reference(html: str) -> str:
reference_regex = re.compile(
r'<sup[^>]*data-text="([^"]*)"[^>]*data-url="([^"]*)"[^>]*data-numero="([^"]*)"[^>]*>'
Expand Down Expand Up @@ -123,8 +137,7 @@ def extract_reference(html: str) -> str:
created_time_str = created_time.isoformat()
created_time_formatted = created_time.strftime("%Y年%m月%d日")

data["content"] = data["content"].replace("//link.zhihu.com/?target=https%3A", "")
data["content"] = data["content"].replace("//link.zhihu.com/?target=http%3A", "")
data["content"] = process_content(data["content"])

# Prepare the HTML content
html_content = (
Expand Down Expand Up @@ -233,8 +246,7 @@ def extract_reference(html: str) -> str:
created_time_str = created_time.isoformat()
created_time_formatted = created_time.strftime("%Y年%m月%d日")

data["content"] = data["content"].replace("//link.zhihu.com/?target=https%3A", "")
data["content"] = data["content"].replace("//link.zhihu.com/?target=http%3A", "")
data["content"] = process_content(data["content"])

# Prepare the HTML content
html_content = (
Expand Down

0 comments on commit 310914d

Please sign in to comment.