From bd6c65237b0dc9e1521be71f9dbda649b383f7f4 Mon Sep 17 00:00:00 2001 From: Versun Date: Sat, 30 Mar 2024 10:23:41 +0800 Subject: [PATCH 1/2] fix error: content_translate: 'str' object has no attribute 'find_parent' --- core/tasks.py | 4 ++-- utils/text_handler.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/core/tasks.py b/core/tasks.py index 5cb9b89..d1fbcc4 100644 --- a/core/tasks.py +++ b/core/tasks.py @@ -307,12 +307,12 @@ def content_translate(original_content: str, target_language: str, engine: Trans comments = soup.find_all(string=lambda text: isinstance(text, Comment)) [comment.extract() for comment in comments] - for element in soup.get_text(): + for element in soup: if text_handler.should_skip(element): continue #TODO 如果文字长度大于最大长度,就分段翻译,需要用chunk_translate - text = str(element) + text = element.get_text() logging.info("Translate content: %s", text) cached = Translated_Content.is_translated(text, target_language) diff --git a/utils/text_handler.py b/utils/text_handler.py index 2a8f330..b8bd33f 100644 --- a/utils/text_handler.py +++ b/utils/text_handler.py @@ -56,11 +56,12 @@ def group_chunks(split_chunks: dict, min_size: int, max_size: int, return grouped_chunks def should_skip(element): - if element.find_parent('code'): + if element.name == 'code': return True # 去除两端的空白字符 - element = element.strip() - if not element: + text = element.get_text() + text = text.strip() + if not text: return True # 使用正则表达式来检查元素是否为数字、URL、电子邮件或包含特定符号 @@ -68,11 +69,10 @@ def should_skip(element): r'^http', # URL r'^[^@]+@[^@]+\.[^@]+$', # 电子邮件 r'^[\d\W]+$' # 纯数字或者数字和符号的组合 - r'.*' # 标签的内容 ] for pattern in skip_patterns: - if re.match(pattern, element): + if re.match(pattern, text): return True return False From d363fe756e189f6f26c601a78cd5437be9ee31a0 Mon Sep 17 00:00:00 2001 From: Versun Date: Sat, 30 Mar 2024 10:24:05 +0800 Subject: [PATCH 2/2] update version --- templates/admin/base_site.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/admin/base_site.html b/templates/admin/base_site.html index 717f935..331588d 100644 --- a/templates/admin/base_site.html +++ b/templates/admin/base_site.html @@ -4,7 +4,7 @@