Skip to content

Commit

Permalink
Merge pull request #43 from rss-translator/next_version
Browse files Browse the repository at this point in the history
Next version
  • Loading branch information
versun authored Mar 30, 2024
2 parents 31714f3 + d363fe7 commit 993bea9
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 8 deletions.
4 changes: 2 additions & 2 deletions core/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,12 +307,12 @@ def content_translate(original_content: str, target_language: str, engine: Trans
comments = soup.find_all(string=lambda text: isinstance(text, Comment))
[comment.extract() for comment in comments]

for element in soup.get_text():
for element in soup:
if text_handler.should_skip(element):
continue
#TODO 如果文字长度大于最大长度,就分段翻译,需要用chunk_translate

text = str(element)
text = element.get_text()
logging.info("Translate content: %s", text)
cached = Translated_Content.is_translated(text, target_language)

Expand Down
2 changes: 1 addition & 1 deletion templates/admin/base_site.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<div id="footer" style="text-align: center;">
<a href="https://rsstranslator.com" title="RSS Translator">RSS Translator</a> ·
<a href="https://afdian.net/a/versun" title="Donate">Donate</a> ·
Version: 2024.3.27
Version: 2024.3.30

<!-- <div class="social">-->

Expand Down
10 changes: 5 additions & 5 deletions utils/text_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,23 +56,23 @@ def group_chunks(split_chunks: dict, min_size: int, max_size: int,
return grouped_chunks

def should_skip(element):
if element.find_parent('code'):
if element.name == 'code':
return True
# 去除两端的空白字符
element = element.strip()
if not element:
text = element.get_text()
text = text.strip()
if not text:
return True

# 使用正则表达式来检查元素是否为数字、URL、电子邮件或包含特定符号
skip_patterns = [
r'^http', # URL
r'^[^@]+@[^@]+\.[^@]+$', # 电子邮件
r'^[\d\W]+$' # 纯数字或者数字和符号的组合
r'<code>.*</code>' # <code>标签的内容
]

for pattern in skip_patterns:
if re.match(pattern, element):
if re.match(pattern, text):
return True

return False
Expand Down

0 comments on commit 993bea9

Please sign in to comment.