/
/
/
1"""Helpers for the Genius Lyrics provider."""
2
3import re
4
5from lyricsgenius.types import Song
6
7
8def clean_song_title(song_title: str) -> str:
9 """Clean song title string by removing metadata that may appear."""
10 # Keywords to look for in parentheses, brackets, or after a hyphen
11 keywords = (
12 r"(remaster(?:ed)?|anniversary|instrumental|live|edit(?:ion)?|"
13 r"single(s)?|stereo|album|radio|version|feat(?:uring)?|mix|bonus)"
14 )
15
16 # Regex pattern to match metadata within parentheses or brackets
17 paren_bracket_pattern = rf"[\(\[][^\)\]]*\b({keywords})\b[^\)\]]*[\)\]]"
18 cleaned_title = re.sub(paren_bracket_pattern, "", song_title, flags=re.IGNORECASE)
19
20 # Regex pattern to match a hyphen followed by metadata (keywords or a year)
21 hyphen_pattern = rf"(\s*-\s*(\d{{4}}|{keywords}).*)$"
22 cleaned_title = re.sub(hyphen_pattern, "", cleaned_title, flags=re.IGNORECASE)
23
24 # Remove any dangling hyphens or extra spaces
25 cleaned_title = re.sub(r"\s*-\s*$", "", cleaned_title).strip()
26
27 # Remove any leftover unmatched parentheses or brackets
28 return re.sub(r"\s[\(\[\{\]\)\}\s]+$", "", cleaned_title).strip()
29
30
31def cleanup_lyrics(song: Song) -> str:
32 """Clean lyrics string hackishly remove erroneous text that may appear."""
33 # Pattern1: match digits at beginning followed by "Contributors" and text followed by "Lyrics"
34 pattern1 = r"^(\d+) Contributor(.*?) Lyrics"
35 lyrics = re.sub(pattern1, "", song.lyrics, flags=re.DOTALL)
36
37 # Pattern2: match ending with "Embed"
38 lyrics = lyrics.rstrip("Embed")
39
40 # Pattern3: match ending with Pyong Count
41 lyrics = lyrics.rstrip(str(song.pyongs_count))
42
43 # Pattern4: match "See [artist] LiveGet tickets as low as $[price]"
44 pattern4 = rf"See {song.artist} LiveGet tickets as low as \$\d+"
45 lyrics = re.sub(pattern4, "", lyrics)
46
47 # Pattern5: match "You might also like" not followed by whitespace
48 pattern5 = r"You might also like(?!\s)"
49 return re.sub(pattern5, "", lyrics)
50