From 2c4406bd6d37fd3295f767920253205b4a1c3792 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sat, 26 Oct 2024 14:50:22 +0100 Subject: [PATCH] translations: make sure we do not re-translate --- beetsplug/lyrics.py | 42 ++++++++++++++++++++++++++----------- test/plugins/test_lyrics.py | 15 ++++++++++--- 2 files changed, 42 insertions(+), 15 deletions(-) diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 472c1b7826..a9ee2666ac 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -733,6 +733,7 @@ def scrape(cls, html: str) -> str | None: class Translator(RequestHandler): TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate" LINE_PARTS_RE = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$") + remove_translations = partial(re.compile(r" / [^\n]+").sub, "") _log: beets.logging.Logger api_key: str @@ -800,23 +801,45 @@ def append_translations(self, lines: Iterable[str]) -> list[str]: # only add the space between non-empty timestamps and texts return [" ".join(filter(None, p)) for p in zip(timestamps, texts)] - def translate(self, lyrics: str) -> str: + def translate(self, new_lyrics: str, old_lyrics: str) -> str: """Translate the given lyrics to the target language. + Check old lyrics for existing translations and return them if their + original text matches the new lyrics. This is to avoid translating + the same lyrics multiple times. + If the lyrics are already in the target language or not in any of of the source languages (if configured), they are returned as is. The footer with the source URL is preserved, if present. """ - lyrics_language = langdetect.detect(lyrics).upper() - if lyrics_language == self.to_language or ( - self.from_languages and lyrics_language not in self.from_languages + if ( + " / " in old_lyrics + and self.remove_translations(old_lyrics) == new_lyrics ): - return lyrics + self.info("🔵 Translations already exist") + return old_lyrics + + lyrics_language = langdetect.detect(new_lyrics).upper() + if lyrics_language == self.to_language: + self.info( + "🔵 Lyrics are already in the target language {}", + self.to_language, + ) + return new_lyrics + + if self.from_languages and lyrics_language not in self.from_languages: + self.info( + "🔵 Configuration {} does not permit translating from {}", + self.from_languages, + lyrics_language, + ) + return new_lyrics - lyrics, *url = lyrics.split("\n\nSource: ") + lyrics, *url = new_lyrics.split("\n\nSource: ") with self.handle_request(): translated_lines = self.append_translations(lyrics.splitlines()) + self.info("🟢 Translated lyrics to {}", self.to_language) return "\n\nSource: ".join(["\n".join(translated_lines), *url]) @@ -1054,12 +1077,7 @@ def add_item_lyrics(self, item: Item, write: bool) -> None: if lyrics := self.find_lyrics(item): self.info("🟢 Found lyrics: {0}", item) if translator := self.translator: - initial_lyrics = lyrics - if (lyrics := translator.translate(lyrics)) != initial_lyrics: - self.info( - "🟢 Added translation to {}", - self.config["translate_to"].get().upper(), - ) + lyrics = translator.translate(lyrics, item.lyrics) else: self.info("🔴 Lyrics not found: {}", item) lyrics = self.config["fallback"].get() diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index abf61923eb..328c7deab5 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -560,7 +560,7 @@ def callback(request, _): requests_mock.post(lyrics.Translator.TRANSLATE_URL, json=callback) @pytest.mark.parametrize( - "initial_lyrics, expected", + "new_lyrics, old_lyrics, expected", [ pytest.param( """ @@ -569,6 +569,7 @@ def callback(request, _): My body wouldn't let me hide it (Hide it) No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold) Ridin' through the thunder, lightnin'""", + "", """ [Refrain: Doja Cat] / [Refrain : Doja Cat] Hard for me to let you go (Let you go, let you go) / Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir) @@ -584,6 +585,7 @@ def callback(request, _): [00:01.00] Some more synced lyrics Source: https://lrclib.net/api/123""", + "", """ [00:00.00] Some synced lyrics / Quelques paroles synchronisées [00:00:50] @@ -594,17 +596,24 @@ def callback(request, _): ), pytest.param( "Quelques paroles", + "", "Quelques paroles", id="already in the target language", ), + pytest.param( + "Some lyrics", + "Some lyrics / Some translation", + "Some lyrics / Some translation", + id="already translated", + ), ], ) - def test_translate(self, initial_lyrics, expected): + def test_translate(self, new_lyrics, old_lyrics, expected): plugin = lyrics.LyricsPlugin() bing = lyrics.Translator(plugin._log, "123", "FR", ["EN"]) assert bing.translate( - textwrap.dedent(initial_lyrics) + textwrap.dedent(new_lyrics), old_lyrics ) == textwrap.dedent(expected)