/
/
/
1"""All logic for metadata retrieval."""
2
3from __future__ import annotations
4
5import asyncio
6import collections
7import logging
8import os
9import random
10import urllib.parse
11from base64 import b64encode
12from contextlib import suppress
13from time import time
14from typing import TYPE_CHECKING, cast
15from uuid import uuid4
16
17import aiofiles
18from aiohttp import web
19from music_assistant_models.config_entries import ConfigEntry, ConfigValueOption, ConfigValueType
20from music_assistant_models.enums import (
21 AlbumType,
22 ConfigEntryType,
23 ImageType,
24 MediaType,
25 ProviderFeature,
26 ProviderType,
27)
28from music_assistant_models.errors import MediaNotFoundError, ProviderUnavailableError
29from music_assistant_models.helpers import get_global_cache_value
30from music_assistant_models.media_items import (
31 Album,
32 Artist,
33 Audiobook,
34 BrowseFolder,
35 ItemMapping,
36 MediaItemImage,
37 MediaItemType,
38 Playlist,
39 Podcast,
40 Track,
41)
42from music_assistant_models.unique_list import UniqueList
43
44from music_assistant.constants import (
45 CONF_LANGUAGE,
46 DB_TABLE_ARTISTS,
47 DB_TABLE_PLAYLISTS,
48 VARIOUS_ARTISTS_MBID,
49 VARIOUS_ARTISTS_NAME,
50 VERBOSE_LOG_LEVEL,
51)
52from music_assistant.helpers.api import api_command
53from music_assistant.helpers.compare import compare_strings
54from music_assistant.helpers.images import create_collage, get_image_thumb
55from music_assistant.helpers.security import is_safe_path
56from music_assistant.helpers.throttle_retry import Throttler
57from music_assistant.models.core_controller import CoreController
58from music_assistant.models.music_provider import MusicProvider
59
60if TYPE_CHECKING:
61 from music_assistant_models.config_entries import CoreConfig
62
63 from music_assistant import MusicAssistant
64 from music_assistant.models.metadata_provider import MetadataProvider
65 from music_assistant.providers.musicbrainz import MusicbrainzProvider
66
67LOCALES = {
68 "af_ZA": "African",
69 "ar_AE": "Arabic (United Arab Emirates)",
70 "ar_EG": "Arabic (Egypt)",
71 "ar_SA": "Saudi Arabia",
72 "bg_BG": "Bulgarian",
73 "cs_CZ": "Czech",
74 "zh_CN": "Chinese",
75 "hr_HR": "Croatian",
76 "da_DK": "Danish",
77 "de_DE": "German",
78 "el_GR": "Greek",
79 "en_AU": "English (AU)",
80 "en_US": "English (US)",
81 "en_GB": "English (UK)",
82 "es_ES": "Spanish",
83 "et_EE": "Estonian",
84 "fi_FI": "Finnish",
85 "fr_FR": "French",
86 "hu_HU": "Hungarian",
87 "is_IS": "Icelandic",
88 "it_IT": "Italian",
89 "lt_LT": "Lithuanian",
90 "lv_LV": "Latvian",
91 "ja_JP": "Japanese",
92 "ko_KR": "Korean",
93 "nl_NL": "Dutch",
94 "nb_NO": "Norwegian Bokmål",
95 "pl_PL": "Polish",
96 "pt_PT": "Portuguese",
97 "ro_RO": "Romanian",
98 "ru_RU": "Russian",
99 "sk_SK": "Slovak",
100 "sl_SI": "Slovenian",
101 "sr_RS": "Serbian",
102 "sv_SE": "Swedish",
103 "tr_TR": "Turkish",
104 "uk_UA": "Ukrainian",
105}
106
107DEFAULT_LANGUAGE = "en_US"
108REFRESH_INTERVAL_ARTISTS = 60 * 60 * 24 * 90 # 90 days
109REFRESH_INTERVAL_ALBUMS = 60 * 60 * 24 * 90 # 90 days
110REFRESH_INTERVAL_TRACKS = 60 * 60 * 24 * 90 # 90 days
111REFRESH_INTERVAL_AUDIOBOOKS = 60 * 60 * 24 * 90 # 90 days
112REFRESH_INTERVAL_PODCASTS = 60 * 60 * 24 * 90 # 90 days
113REFRESH_INTERVAL_PLAYLISTS = 60 * 60 * 24 * 14 # 14 days
114PERIODIC_SCAN_INTERVAL = 60 * 60 * 6 # 6 hours
115CONF_ENABLE_ONLINE_METADATA = "enable_online_metadata"
116
117
118class MetaDataController(CoreController):
119 """Several helpers to search and store metadata for mediaitems."""
120
121 domain: str = "metadata"
122 config: CoreConfig
123
124 def __init__(self, mass: MusicAssistant) -> None:
125 """Initialize class."""
126 super().__init__(mass)
127 self.cache = self.mass.cache
128 self._pref_lang: str | None = None
129 self.manifest.name = "Metadata controller"
130 self.manifest.description = (
131 "Music Assistant's core controller which handles all metadata for music."
132 )
133 self.manifest.icon = "book-information-variant"
134 self._lookup_jobs: MetadataLookupQueue = MetadataLookupQueue(100)
135 self._lookup_task: asyncio.Task[None] | None = None
136 self._throttler = Throttler(1, 30)
137
138 async def get_config_entries(
139 self,
140 action: str | None = None,
141 values: dict[str, ConfigValueType] | None = None,
142 ) -> tuple[ConfigEntry, ...]:
143 """Return all Config Entries for this core module (if any)."""
144 return (
145 ConfigEntry(
146 key=CONF_LANGUAGE,
147 type=ConfigEntryType.STRING,
148 label="Preferred language",
149 required=False,
150 default_value=DEFAULT_LANGUAGE,
151 description="Preferred language for metadata.\n\n"
152 "Note that English will always be used as fallback when content "
153 "in your preferred language is not available.",
154 options=[ConfigValueOption(value, key) for key, value in LOCALES.items()],
155 ),
156 ConfigEntry(
157 key=CONF_ENABLE_ONLINE_METADATA,
158 type=ConfigEntryType.BOOLEAN,
159 label="Enable metadata retrieval from online metadata providers",
160 required=False,
161 default_value=True,
162 description="Enable online metadata lookups.\n\n"
163 "This will allow Music Assistant to fetch additional metadata from (enabled) "
164 "metadata providers, such as The Audio DB and Fanart.tv.\n\n"
165 "Note that these online sources are only queried when no information is already "
166 "available from local files or the music providers and local artwork/metadata "
167 "will always have preference over online sources so consider metadata from online "
168 "sources as complementary only.\n\n"
169 "The retrieval of additional rich metadata is a process that is executed slowly "
170 "in the background to not overload these free services with requests. "
171 "You can speedup the process by storing the images and other metadata locally.",
172 ),
173 )
174
175 async def setup(self, config: CoreConfig) -> None:
176 """Async initialize of module."""
177 # wait for dependencies to be ready (streams and music)
178 await self.mass.streams.initialized.wait()
179 await self.mass.music.initialized.wait()
180
181 self.config = config
182 if not self.logger.isEnabledFor(VERBOSE_LOG_LEVEL):
183 # silence PIL logger
184 logging.getLogger("PIL").setLevel(logging.WARNING)
185 # make sure that our directory with collage images exists
186 self._collage_images_dir = os.path.join(self.mass.cache_path, "collage_images")
187 if not await asyncio.to_thread(os.path.exists, self._collage_images_dir):
188 await asyncio.to_thread(os.mkdir, self._collage_images_dir)
189 self.mass.streams.register_dynamic_route("/imageproxy", self.handle_imageproxy)
190 # the lookup task is used to process metadata lookup jobs
191 self._lookup_task = self.mass.create_task(self._process_metadata_lookup_jobs())
192 # just run the scan for missing metadata once at startup
193 # background scan for missing metadata
194 self.mass.call_later(300, self._scan_missing_metadata)
195 # migrate theaudiodb images to new url
196 # they updated their cdn url to r2.theaudiodb.com
197 # TODO: remove this after 2.7 release
198 query = (
199 "UPDATE artists SET metadata = "
200 "REPLACE (metadata, 'https://www.theaudiodb.com', 'https://r2.theaudiodb.com') "
201 "WHERE artists.metadata LIKE '%https://www.theaudiodb.com%'"
202 )
203 if self.mass.music.database:
204 await self.mass.music.database.execute(query)
205 await self.mass.music.database.commit()
206
207 async def close(self) -> None:
208 """Handle logic on server stop."""
209 if self._lookup_task and not self._lookup_task.done():
210 self._lookup_task.cancel()
211 self.mass.streams.unregister_dynamic_route("/imageproxy")
212
213 @property
214 def providers(self) -> list[MetadataProvider]:
215 """Return all loaded/running MetadataProviders."""
216 return cast("list[MetadataProvider]", self.mass.get_providers(ProviderType.METADATA))
217
218 @property
219 def preferred_language(self) -> str:
220 """Return preferred language for metadata (as 2 letter language code 'en')."""
221 return self.locale.split("_")[0]
222
223 @property
224 def locale(self) -> str:
225 """Return preferred language for metadata (as full locale code 'en_EN')."""
226 value = self.mass.config.get_raw_core_config_value(
227 self.domain, CONF_LANGUAGE, DEFAULT_LANGUAGE
228 )
229 return str(value)
230
231 @api_command("metadata/set_default_preferred_language")
232 def set_default_preferred_language(self, lang: str) -> None:
233 """
234 Set the default preferred language.
235
236 Reasoning behind this is that the backend can not make a wise choice for the default,
237 so relies on some external source that knows better to set this info, like the frontend
238 or a streaming provider.
239 Can only be set once (by this call or the user).
240 """
241 if self.mass.config.get_raw_core_config_value(self.domain, CONF_LANGUAGE):
242 return # already set
243 self.set_preferred_language(lang)
244
245 @api_command("metadata/set_preferred_language")
246 def set_preferred_language(self, lang: str) -> None:
247 """
248 Set the preferred language.
249
250 Note that this will not modify any existing metadata,
251 but will be used for future lookups.
252 """
253 # prefer exact match
254 if lang in LOCALES:
255 self.mass.config.set_raw_core_config_value(self.domain, CONF_LANGUAGE, lang)
256 return
257 # try strict matching on either locale code or region
258 lang = lang.lower().replace("-", "_")
259 for locale_code, lang_name in LOCALES.items():
260 if lang in (locale_code.lower(), lang_name.lower()):
261 self.mass.config.set_raw_core_config_value(self.domain, CONF_LANGUAGE, locale_code)
262 return
263 # attempt loose match on language code or region code
264 for lang_part in (lang[:2], lang[:-2]):
265 for locale_code in tuple(LOCALES):
266 language_code, region_code = locale_code.lower().split("_", 1)
267 if lang_part in (language_code, region_code):
268 self.mass.config.set_raw_core_config_value(
269 self.domain, CONF_LANGUAGE, locale_code
270 )
271 return
272 # if we reach this point, we couldn't match the language
273 self.logger.warning("%s is not a valid language", lang)
274
275 @api_command("metadata/update_metadata")
276 async def update_metadata(
277 self, item: str | MediaItemType, force_refresh: bool = False
278 ) -> MediaItemType:
279 """Get/update extra/enhanced metadata for/on given MediaItem."""
280 async with self.cache.handle_refresh(force_refresh):
281 if isinstance(item, str):
282 retrieved_item = await self.mass.music.get_item_by_uri(item)
283 if isinstance(retrieved_item, BrowseFolder):
284 raise TypeError("Cannot update metadata on a BrowseFolder item.")
285 item = retrieved_item
286
287 if item.provider != "library":
288 # this shouldn't happen but just in case.
289 raise RuntimeError("Metadata can only be updated for library items")
290
291 # just in case it was in the queue, prevent duplicate lookups
292 if item.uri:
293 self._lookup_jobs.pop(item.uri)
294 async with self._throttler:
295 if item.media_type == MediaType.ARTIST:
296 await self._update_artist_metadata(
297 cast("Artist", item), force_refresh=force_refresh
298 )
299 if item.media_type == MediaType.ALBUM:
300 await self._update_album_metadata(
301 cast("Album", item), force_refresh=force_refresh
302 )
303 if item.media_type == MediaType.TRACK:
304 await self._update_track_metadata(
305 cast("Track", item), force_refresh=force_refresh
306 )
307 if item.media_type == MediaType.PLAYLIST:
308 await self._update_playlist_metadata(
309 cast("Playlist", item), force_refresh=force_refresh
310 )
311 if item.media_type == MediaType.AUDIOBOOK:
312 await self._update_audiobook_metadata(
313 cast("Audiobook", item), force_refresh=force_refresh
314 )
315 if item.media_type == MediaType.PODCAST:
316 await self._update_podcast_metadata(
317 cast("Podcast", item), force_refresh=force_refresh
318 )
319 return item
320
321 def schedule_update_metadata(self, uri: str) -> None:
322 """Schedule metadata update for given MediaItem uri."""
323 if "library" not in uri:
324 return
325 if self._lookup_jobs.exists(uri):
326 return
327 with suppress(asyncio.QueueFull):
328 self._lookup_jobs.put_nowait(uri)
329
330 async def get_image_data_for_item(
331 self,
332 media_item: MediaItemType,
333 img_type: ImageType = ImageType.THUMB,
334 size: int = 0,
335 ) -> bytes | None:
336 """Get image data for given MedaItem."""
337 img_path = await self.get_image_url_for_item(
338 media_item=media_item,
339 img_type=img_type,
340 )
341 if not img_path:
342 return None
343 thumbnail = await self.get_thumbnail(img_path, provider="builtin", size=size)
344
345 return cast("bytes", thumbnail)
346
347 async def get_image_url_for_item(
348 self,
349 media_item: MediaItemType | ItemMapping,
350 img_type: ImageType = ImageType.THUMB,
351 resolve: bool = True,
352 ) -> str | None:
353 """Get url to image for given media media_item."""
354 if not media_item:
355 return None
356
357 if isinstance(media_item, ItemMapping):
358 # Check if the ItemMapping already has an image - avoid expensive API call
359 if media_item.image and media_item.image.type == img_type:
360 if media_item.image.remotely_accessible and resolve:
361 return self.get_image_url(media_item.image)
362 if not media_item.image.remotely_accessible:
363 return media_item.image.path
364
365 # Only retrieve full item if we don't have the image we need
366 if not media_item.uri:
367 return None
368 retrieved_item = await self.mass.music.get_item_by_uri(media_item.uri)
369 if isinstance(retrieved_item, BrowseFolder):
370 return None # can not happen, but guard for type checker
371 media_item = retrieved_item
372
373 if media_item and media_item.metadata.images:
374 for img in media_item.metadata.images:
375 if img.type != img_type:
376 continue
377 if not img.remotely_accessible and not resolve:
378 # ignore image if its not remotely accessible and we don't allow resolving
379 continue
380 return self.get_image_url(img, prefer_proxy=not img.remotely_accessible)
381
382 # retry with track's album
383 if isinstance(media_item, Track) and media_item.album:
384 return await self.get_image_url_for_item(media_item.album, img_type, resolve)
385
386 # try artist instead for albums
387 if isinstance(media_item, Album) and media_item.artists:
388 return await self.get_image_url_for_item(media_item.artists[0], img_type, resolve)
389
390 # last resort: track artist(s)
391 if isinstance(media_item, Track) and media_item.artists:
392 for artist in media_item.artists:
393 return await self.get_image_url_for_item(artist, img_type, resolve)
394
395 return None
396
397 def get_image_url(
398 self,
399 image: MediaItemImage,
400 size: int = 0,
401 prefer_proxy: bool = False,
402 image_format: str | None = None,
403 prefer_stream_server: bool = False,
404 ) -> str:
405 """Get (proxied) URL for MediaItemImage."""
406 if image_format is None:
407 image_format = "png" if image.path.lower().endswith(".png") else "jpg"
408 if not image.remotely_accessible or prefer_proxy or size:
409 # return imageproxy url for images that need to be resolved
410 # the original path is double encoded
411 encoded_url = urllib.parse.quote_plus(urllib.parse.quote_plus(image.path))
412 base_url = (
413 self.mass.streams.base_url if prefer_stream_server else self.mass.webserver.base_url
414 )
415 return (
416 f"{base_url}/imageproxy?provider={image.provider}"
417 f"&size={size}&fmt={image_format}&path={encoded_url}"
418 )
419 return image.path
420
421 async def get_thumbnail(
422 self,
423 path: str,
424 provider: str,
425 size: int | None = None,
426 base64: bool = False,
427 image_format: str | None = None,
428 ) -> bytes | str:
429 """Get/create thumbnail image for path (image url or local path)."""
430 if not self.mass.get_provider(provider) and not path.startswith("http"):
431 raise ProviderUnavailableError
432 if image_format is None:
433 image_format = "png" if path.lower().endswith(".png") else "jpg"
434 if provider == "builtin" and path.startswith("/collage/"):
435 # special case for collage images
436 collage_rel = path.split("/collage/")[-1]
437 if not is_safe_path(collage_rel):
438 raise FileNotFoundError("Invalid collage path")
439 path = os.path.join(self._collage_images_dir, collage_rel)
440 thumbnail_bytes = await get_image_thumb(
441 self.mass, path, size=size, provider=provider, image_format=image_format
442 )
443 if base64:
444 enc_image = b64encode(thumbnail_bytes).decode()
445 return f"data:image/{image_format};base64,{enc_image}"
446 return thumbnail_bytes
447
448 async def handle_imageproxy(self, request: web.Request) -> web.Response:
449 """Handle request for image proxy."""
450 path = request.query["path"]
451 provider = request.query.get("provider", "builtin")
452 if provider in ("url", "file", "http"):
453 # temporary for backwards compatibility
454 provider = "builtin"
455 size = int(request.query.get("size", "0"))
456 image_format = request.query.get("fmt", None)
457 if image_format is None:
458 image_format = "png" if path.lower().endswith(".png") else "jpg"
459 if not self.mass.get_provider(provider) and not path.startswith("http"):
460 return web.Response(status=404)
461 if "%" in path:
462 # assume (double) encoded url, decode it
463 path = urllib.parse.unquote_plus(path)
464 try:
465 image_data = await self.get_thumbnail(
466 path, size=size, provider=provider, image_format=image_format
467 )
468 # we set the cache header to 1 year (forever)
469 # assuming that images do not/rarely change
470 return web.Response(
471 body=image_data,
472 headers={"Cache-Control": "max-age=31536000", "Access-Control-Allow-Origin": "*"},
473 content_type=f"image/{image_format}",
474 )
475 except Exception as err:
476 # broadly catch all exceptions here to ensure we dont crash the request handler
477 if isinstance(err, FileNotFoundError):
478 self.logger.log(VERBOSE_LOG_LEVEL, "Image not found: %s", path)
479 else:
480 self.logger.warning(
481 "Error while fetching image %s: %s",
482 path,
483 str(err),
484 exc_info=err if self.logger.isEnabledFor(10) else None,
485 )
486 return web.Response(status=404)
487
488 async def create_collage_image(
489 self,
490 images: list[MediaItemImage],
491 filename: str,
492 fanart: bool = False,
493 ) -> MediaItemImage | None:
494 """Create collage thumb/fanart image for (in-library) playlist."""
495 if (len(images) < 8 and fanart) or len(images) < 3:
496 # require at least some images otherwise this does not make a lot of sense
497 return None
498 # limit to 50 images to prevent we're going OOM
499 if len(images) > 50:
500 images = random.sample(images, 50)
501 else:
502 random.shuffle(images)
503 try:
504 # create collage thumb from playlist tracks
505 # if playlist has no default image (e.g. a local playlist)
506 dimensions = (2500, 1750) if fanart else (1500, 1500)
507 img_data = await create_collage(self.mass, images, dimensions)
508 # always overwrite existing path
509 file_path = os.path.join(self._collage_images_dir, filename)
510 async with aiofiles.open(file_path, "wb") as _file:
511 await _file.write(img_data)
512 del img_data
513 return MediaItemImage(
514 type=ImageType.FANART if fanart else ImageType.THUMB,
515 path=f"/collage/{filename}",
516 provider="builtin",
517 remotely_accessible=False,
518 )
519 except Exception as err:
520 self.logger.warning(
521 "Error while creating playlist image: %s",
522 str(err),
523 exc_info=err if self.logger.isEnabledFor(10) else None,
524 )
525 return None
526
527 @api_command("metadata/get_track_lyrics")
528 async def get_track_lyrics(
529 self,
530 track: Track,
531 ) -> tuple[str | None, str | None]:
532 """
533 Get lyrics for given track from metadata providers.
534
535 Returns a tuple of (lyrics, lrc_lyrics) if found.
536 """
537 if track.metadata and track.metadata.lyrics:
538 return track.metadata.lyrics, track.metadata.lrc_lyrics
539
540 if track.provider == "library":
541 # try to update metadata first
542 await self._update_track_metadata(track, force_refresh=False)
543 return track.metadata.lyrics, track.metadata.lrc_lyrics
544
545 # prefer lyrics from the track's own provider
546 track_provider = self.mass.get_provider(track.provider, provider_type=MusicProvider)
547 if track_provider and ProviderFeature.LYRICS in track_provider.supported_features:
548 full_track = await self.mass.music.tracks.get_provider_item(
549 track.item_id, track.provider
550 )
551 if full_track.metadata and full_track.metadata.lyrics:
552 return full_track.metadata.lyrics, full_track.metadata.lrc_lyrics
553
554 # fallback to other metadata providers
555 for provider in self.providers:
556 if ProviderFeature.LYRICS not in provider.supported_features:
557 continue
558 if (metadata := await provider.get_track_metadata(track)) and (
559 metadata.lyrics or metadata.lrc_lyrics
560 ):
561 return metadata.lyrics, metadata.lrc_lyrics
562 return None, None
563
564 async def _update_artist_metadata(self, artist: Artist, force_refresh: bool = False) -> None:
565 """Get/update rich metadata for an artist."""
566 # collect metadata from all (online) music + metadata providers
567 # NOTE: we only do/allow this every REFRESH_INTERVAL
568 needs_refresh = (time() - (artist.metadata.last_refresh or 0)) > REFRESH_INTERVAL_ARTISTS
569 if not (force_refresh or needs_refresh):
570 return
571
572 self.logger.debug("Updating metadata for Artist %s", artist.name)
573 unique_keys: set[str] = set()
574
575 # collect (local) metadata from all local providers
576 local_provs = get_global_cache_value("non_streaming_providers")
577 if TYPE_CHECKING:
578 local_provs = cast("set[str]", local_provs)
579
580 # collect metadata from all [music] providers
581 # note that we sort the providers by priority so that we always
582 # prefer local providers over online providers
583 for prov_mapping in sorted(
584 artist.provider_mappings, key=lambda x: x.priority, reverse=True
585 ):
586 prov = self.mass.get_provider(
587 prov_mapping.provider_instance, provider_type=MusicProvider
588 )
589 if prov is None:
590 continue
591 # prefer domain for streaming providers as the catalog is the same across instances
592 prov_key = prov.domain if prov.is_streaming_provider else prov.instance_id
593 if prov_key in unique_keys:
594 continue
595 unique_keys.add(prov_key)
596 with suppress(MediaNotFoundError):
597 prov_item = await self.mass.music.artists.get_provider_item(
598 prov_mapping.item_id, prov_mapping.provider_instance
599 )
600 artist.metadata.update(prov_item.metadata)
601
602 # The musicbrainz ID is mandatory for all metadata lookups
603 if not artist.mbid:
604 # TODO: Use a global cache/proxy for the MB lookups to save on API calls
605 if mbid := await self._get_artist_mbid(artist):
606 artist.mbid = mbid
607
608 # collect metadata from all (online)[metadata] providers
609 # TODO: Utilize a global (cloud) cache for metadata lookups to save on API calls
610 if self.config.get_value(CONF_ENABLE_ONLINE_METADATA) and artist.mbid:
611 for provider in self.providers:
612 if ProviderFeature.ARTIST_METADATA not in provider.supported_features:
613 continue
614 if metadata := await provider.get_artist_metadata(artist):
615 artist.metadata.update(metadata)
616 self.logger.debug(
617 "Fetched metadata for Artist %s on provider %s",
618 artist.name,
619 provider.name,
620 )
621 # update final item in library database
622 # set timestamp, used to determine when this function was last called
623 artist.metadata.last_refresh = int(time())
624 await self.mass.music.artists.update_item_in_library(artist.item_id, artist)
625
626 async def _update_album_metadata(self, album: Album, force_refresh: bool = False) -> None:
627 """Get/update rich metadata for an album."""
628 # collect metadata from all (online) music + metadata providers
629 # NOTE: we only do/allow this every REFRESH_INTERVAL
630 needs_refresh = (time() - (album.metadata.last_refresh or 0)) > REFRESH_INTERVAL_ALBUMS
631 if not (force_refresh or needs_refresh):
632 return
633
634 self.logger.debug("Updating metadata for Album %s", album.name)
635
636 # collect metadata from all [music] providers
637 # note that we sort the providers by priority so that we always
638 # prefer local providers over online providers
639 unique_keys: set[str] = set()
640 for prov_mapping in sorted(album.provider_mappings, key=lambda x: x.priority, reverse=True):
641 prov = self.mass.get_provider(
642 prov_mapping.provider_instance, provider_type=MusicProvider
643 )
644 if prov is None:
645 continue
646 # prefer domain for streaming providers as the catalog is the same across instances
647 prov_key = prov.domain if prov.is_streaming_provider else prov.instance_id
648 if prov_key in unique_keys:
649 continue
650 unique_keys.add(prov_key)
651 with suppress(MediaNotFoundError):
652 prov_item = await self.mass.music.albums.get_provider_item(
653 prov_mapping.item_id, prov_mapping.provider_instance
654 )
655 album.metadata.update(prov_item.metadata)
656 if album.year is None and prov_item.year:
657 album.year = prov_item.year
658 if album.album_type == AlbumType.UNKNOWN:
659 album.album_type = prov_item.album_type
660
661 # collect metadata from all (online) [metadata] providers
662 # TODO: Utilize a global (cloud) cache for metadata lookups to save on API calls
663 if self.config.get_value(CONF_ENABLE_ONLINE_METADATA):
664 for provider in self.providers:
665 if ProviderFeature.ALBUM_METADATA not in provider.supported_features:
666 continue
667 if metadata := await provider.get_album_metadata(album):
668 album.metadata.update(metadata)
669 self.logger.debug(
670 "Fetched metadata for Album %s on provider %s",
671 album.name,
672 provider.name,
673 )
674 # update final item in library database
675 # set timestamp, used to determine when this function was last called
676 album.metadata.last_refresh = int(time())
677 await self.mass.music.albums.update_item_in_library(album.item_id, album)
678
679 async def _update_track_metadata(self, track: Track, force_refresh: bool = False) -> None:
680 """Get/update rich metadata for a track."""
681 # collect metadata from all (online) music + metadata providers
682 # NOTE: we only do/allow this every REFRESH_INTERVAL
683 needs_refresh = (time() - (track.metadata.last_refresh or 0)) > REFRESH_INTERVAL_TRACKS
684 if not (force_refresh or needs_refresh):
685 return
686
687 self.logger.debug("Updating metadata for Track %s", track.name)
688
689 # collect metadata from all [music] providers
690 # note that we sort the providers by priority so that we always
691 # prefer local providers over online providers
692 unique_keys: set[str] = set()
693 for prov_mapping in sorted(track.provider_mappings, key=lambda x: x.priority, reverse=True):
694 prov = self.mass.get_provider(
695 prov_mapping.provider_instance, provider_type=MusicProvider
696 )
697 if prov is None:
698 continue
699 # prefer domain for streaming providers as the catalog is the same across instances
700 prov_key = prov.domain if prov.is_streaming_provider else prov.instance_id
701 if prov_key in unique_keys:
702 continue
703 unique_keys.add(prov_key)
704 with suppress(MediaNotFoundError):
705 prov_item = await self.mass.music.tracks.get_provider_item(
706 prov_mapping.item_id, prov_mapping.provider_instance
707 )
708 track.metadata.update(prov_item.metadata)
709
710 # collect metadata from all [metadata] providers
711 # Only fetch metadata from these sources if force_refresh is set OR
712 # if the track needs a refresh (based on REFRESH_INTERVAL_TRACKS) AND
713 # online metadata is enabled.
714 if (force_refresh or needs_refresh) and self.config.get_value(CONF_ENABLE_ONLINE_METADATA):
715 for provider in self.providers:
716 if ProviderFeature.TRACK_METADATA not in provider.supported_features:
717 continue
718
719 if metadata := await provider.get_track_metadata(track):
720 track.metadata.update(metadata)
721 self.logger.debug(
722 "Fetched metadata for Track %s on provider %s",
723 track.name,
724 provider.name,
725 )
726 # set timestamp, used to determine when this function was last called
727 track.metadata.last_refresh = int(time())
728 # update final item in library database
729 await self.mass.music.tracks.update_item_in_library(track.item_id, track)
730
731 async def _update_playlist_metadata(
732 self, playlist: Playlist, force_refresh: bool = False
733 ) -> None:
734 """Get/update rich metadata for a playlist."""
735 # collect metadata + create collage images
736 # NOTE: we only do/allow this every REFRESH_INTERVAL
737 needs_refresh = (
738 time() - (playlist.metadata.last_refresh or 0)
739 ) > REFRESH_INTERVAL_PLAYLISTS
740 if not (force_refresh or needs_refresh):
741 return
742 self.logger.debug("Updating metadata for Playlist %s", playlist.name)
743 playlist.metadata.genres = set()
744 all_playlist_tracks_images: list[MediaItemImage] = []
745 playlist_genres: dict[str, int] = {}
746 # retrieve metadata for the playlist from the tracks (such as genres etc.)
747 # TODO: retrieve style/mood ?
748 async for track in self.mass.music.playlists.tracks(playlist.item_id, playlist.provider):
749 if (
750 track.image
751 and track.image not in all_playlist_tracks_images
752 and (
753 track.image.provider in ("url", "builtin", "http")
754 or self.mass.get_provider(track.image.provider)
755 )
756 ):
757 all_playlist_tracks_images.append(track.image)
758 if track.metadata.genres:
759 genres = track.metadata.genres
760 elif (
761 isinstance(track, Track)
762 and track.album
763 and isinstance(track.album, Album)
764 and track.album.metadata.genres
765 ):
766 genres = track.album.metadata.genres
767 else:
768 genres = set()
769 for genre in genres:
770 if genre not in playlist_genres:
771 playlist_genres[genre] = 0
772 playlist_genres[genre] += 1
773 await asyncio.sleep(0) # yield to eventloop
774
775 playlist_genres_filtered = {genre for genre, count in playlist_genres.items() if count > 5}
776 playlist_genres_filtered = set(list(playlist_genres_filtered)[:8])
777 playlist.metadata.genres.update(playlist_genres_filtered)
778 # create collage images
779 cur_images: list[MediaItemImage] = playlist.metadata.images or []
780 new_images = []
781 # thumb image
782 thumb_image = next((x for x in cur_images if x.type == ImageType.THUMB), None)
783 if not thumb_image or self._collage_images_dir in thumb_image.path:
784 img_filename = thumb_image.path if thumb_image else f"{uuid4().hex}_thumb.jpg"
785 if collage_thumb_image := await self.create_collage_image(
786 all_playlist_tracks_images, img_filename
787 ):
788 new_images.append(collage_thumb_image)
789 elif thumb_image:
790 # just use old image
791 new_images.append(thumb_image)
792 # fanart image
793 fanart_image = next((x for x in cur_images if x.type == ImageType.FANART), None)
794 if not fanart_image or self._collage_images_dir in fanart_image.path:
795 img_filename = fanart_image.path if fanart_image else f"{uuid4().hex}_fanart.jpg"
796 if collage_fanart_image := await self.create_collage_image(
797 all_playlist_tracks_images, img_filename, fanart=True
798 ):
799 new_images.append(collage_fanart_image)
800 elif fanart_image:
801 # just use old image
802 new_images.append(fanart_image)
803 playlist.metadata.images = UniqueList(new_images) if new_images else None
804 # set timestamp, used to determine when this function was last called
805 playlist.metadata.last_refresh = int(time())
806 # update final item in library database
807 await self.mass.music.playlists.update_item_in_library(playlist.item_id, playlist)
808
809 async def _update_audiobook_metadata(
810 self, audiobook: Audiobook, force_refresh: bool = False
811 ) -> None:
812 """Get/update rich metadata for an audiobook."""
813 # collect metadata from all (online) music + metadata providers
814 # NOTE: we only do/allow this every REFRESH_INTERVAL
815 needs_refresh = (
816 time() - (audiobook.metadata.last_refresh or 0)
817 ) > REFRESH_INTERVAL_AUDIOBOOKS
818 if not (force_refresh or needs_refresh):
819 return
820
821 self.logger.debug("Updating metadata for Audiobook %s", audiobook.name)
822
823 # collect metadata from all [music] providers
824 # note that we sort the providers by priority so that we always
825 # prefer local providers over online providers
826 unique_keys: set[str] = set()
827 for prov_mapping in sorted(
828 audiobook.provider_mappings, key=lambda x: x.priority, reverse=True
829 ):
830 prov = self.mass.get_provider(
831 prov_mapping.provider_instance, provider_type=MusicProvider
832 )
833 if prov is None:
834 continue
835 # prefer domain for streaming providers as the catalog is the same across instances
836 prov_key = prov.domain if prov.is_streaming_provider else prov.instance_id
837 if prov_key in unique_keys:
838 continue
839 unique_keys.add(prov_key)
840 with suppress(MediaNotFoundError):
841 prov_item = await self.mass.music.audiobooks.get_provider_item(
842 prov_mapping.item_id, prov_mapping.provider_instance
843 )
844 audiobook.metadata.update(prov_item.metadata)
845 if audiobook.publisher is None and prov_item.publisher:
846 audiobook.publisher = prov_item.publisher
847 if not audiobook.authors and prov_item.authors:
848 audiobook.authors = prov_item.authors
849 if not audiobook.narrators and prov_item.narrators:
850 audiobook.narrators = prov_item.narrators
851 if not audiobook.duration and prov_item.duration:
852 audiobook.duration = prov_item.duration
853
854 # update final item in library database
855 # set timestamp, used to determine when this function was last called
856 audiobook.metadata.last_refresh = int(time())
857 await self.mass.music.audiobooks.update_item_in_library(audiobook.item_id, audiobook)
858
859 async def _update_podcast_metadata(self, podcast: Podcast, force_refresh: bool = False) -> None:
860 """Get/update rich metadata for a podcast."""
861 # collect metadata from all (online) music + metadata providers
862 # NOTE: we only do/allow this every REFRESH_INTERVAL
863 needs_refresh = (time() - (podcast.metadata.last_refresh or 0)) > REFRESH_INTERVAL_PODCASTS
864 if not (force_refresh or needs_refresh):
865 return
866
867 self.logger.debug("Updating metadata for Podcast %s", podcast.name)
868
869 # collect metadata from all [music] providers
870 # note that we sort the providers by priority so that we always
871 # prefer local providers over online providers
872 unique_keys: set[str] = set()
873 for prov_mapping in sorted(
874 podcast.provider_mappings, key=lambda x: x.priority, reverse=True
875 ):
876 prov = self.mass.get_provider(
877 prov_mapping.provider_instance, provider_type=MusicProvider
878 )
879 if prov is None:
880 continue
881 # prefer domain for streaming providers as the catalog is the same across instances
882 prov_key = prov.domain if prov.is_streaming_provider else prov.instance_id
883 if prov_key in unique_keys:
884 continue
885 unique_keys.add(prov_key)
886 with suppress(MediaNotFoundError):
887 prov_item = await self.mass.music.podcasts.get_provider_item(
888 prov_mapping.item_id, prov_mapping.provider_instance
889 )
890 podcast.metadata.update(prov_item.metadata)
891 if podcast.publisher is None and prov_item.publisher:
892 podcast.publisher = prov_item.publisher
893 if not podcast.total_episodes and prov_item.total_episodes:
894 podcast.total_episodes = prov_item.total_episodes
895
896 # update final item in library database
897 # set timestamp, used to determine when this function was last called
898 podcast.metadata.last_refresh = int(time())
899 await self.mass.music.podcasts.update_item_in_library(podcast.item_id, podcast)
900
901 async def _get_artist_mbid(self, artist: Artist) -> str | None:
902 """Fetch musicbrainz id by performing search using the artist name, albums and tracks."""
903 if artist.mbid:
904 return artist.mbid
905 if compare_strings(artist.name, VARIOUS_ARTISTS_NAME):
906 return VARIOUS_ARTISTS_MBID
907
908 musicbrainz_provider = self.mass.get_provider("musicbrainz")
909 if not musicbrainz_provider:
910 return None
911 musicbrainz: MusicbrainzProvider = cast("MusicbrainzProvider", musicbrainz_provider)
912 if TYPE_CHECKING:
913 assert isinstance(musicbrainz, MusicbrainzProvider)
914 # first try with resource URL (e.g. streaming provider share URL)
915 for prov_mapping in artist.provider_mappings:
916 if prov_mapping.url and prov_mapping.url.startswith("http"):
917 if mb_artist := await musicbrainz.get_artist_details_by_resource_url(
918 prov_mapping.url
919 ):
920 return mb_artist.id
921
922 # start lookup of musicbrainz id using artist name, albums and tracks
923 ref_albums = await self.mass.music.artists.albums(
924 artist.item_id, artist.provider, in_library_only=False
925 )
926 ref_tracks = await self.mass.music.artists.tracks(
927 artist.item_id, artist.provider, in_library_only=False
928 )
929 # try with (strict) ref track(s), using recording id
930 for ref_track in ref_tracks:
931 if mb_artist := await musicbrainz.get_artist_details_by_track(artist.name, ref_track):
932 return mb_artist.id
933 # try with (strict) ref album(s), using releasegroup id
934 for ref_album in ref_albums:
935 if mb_artist := await musicbrainz.get_artist_details_by_album(artist.name, ref_album):
936 return mb_artist.id
937 # last restort: track matching by name
938 for ref_track in ref_tracks:
939 if not ref_track.album:
940 continue
941 if result := await musicbrainz.search(
942 artistname=artist.name,
943 albumname=ref_track.album.name,
944 trackname=ref_track.name,
945 trackversion=ref_track.version,
946 ):
947 return result[0].id
948
949 # lookup failed
950 ref_albums_str = "/".join(x.name for x in ref_albums) or "none"
951 ref_tracks_str = "/".join(x.name for x in ref_tracks) or "none"
952 self.logger.debug(
953 "Unable to get musicbrainz ID for artist %s\n"
954 " - using lookup-album(s): %s\n"
955 " - using lookup-track(s): %s\n",
956 artist.name,
957 ref_albums_str,
958 ref_tracks_str,
959 )
960 return None
961
962 async def _process_metadata_lookup_jobs(self) -> None:
963 """Task to process metadata lookup jobs."""
964 # postpone the lookup for a while to allow the system to start up and providers initialized
965 await asyncio.sleep(60)
966 while True:
967 item_uri = await self._lookup_jobs.get()
968 self.logger.debug(f"Processing metadata lookup for {item_uri}")
969 try:
970 item = await self.mass.music.get_item_by_uri(item_uri)
971 await self.update_metadata(cast("MediaItemType", item))
972 except MediaNotFoundError:
973 # this can happen when the item is removed from the library
974 pass
975 except Exception as err:
976 self.logger.error(
977 "Error while updating metadata for %s: %s",
978 item_uri,
979 str(err),
980 exc_info=err if self.logger.isEnabledFor(10) else None,
981 )
982
983 async def _scan_missing_metadata(self) -> None:
984 """Scanner for (missing) metadata, runs periodically in the background."""
985 # Scan for missing artist images
986 self.logger.debug("Start lookup for missing artist images...")
987 query = (
988 f"json_extract({DB_TABLE_ARTISTS}.metadata,'$.last_refresh') ISNULL "
989 f"AND (json_extract({DB_TABLE_ARTISTS}.metadata,'$.images') ISNULL "
990 f"OR json_extract({DB_TABLE_ARTISTS}.metadata,'$.images') = '[]')"
991 )
992 for artist in await self.mass.music.artists.get_library_items_by_query(
993 limit=5, order_by="random", extra_query_parts=[query]
994 ):
995 if artist.uri:
996 self.schedule_update_metadata(artist.uri)
997 await asyncio.sleep(30)
998
999 # Force refresh playlist metadata every refresh interval
1000 # this will e.g. update the playlist image and genres if the tracks have changed
1001 timestamp = int(time() - REFRESH_INTERVAL_PLAYLISTS)
1002 query = (
1003 f"json_extract({DB_TABLE_PLAYLISTS}.metadata,'$.last_refresh') ISNULL "
1004 f"OR json_extract({DB_TABLE_PLAYLISTS}.metadata,'$.last_refresh') < {timestamp}"
1005 )
1006 for playlist in await self.mass.music.playlists.get_library_items_by_query(
1007 limit=5, order_by="random", extra_query_parts=[query]
1008 ):
1009 if playlist.uri:
1010 self.schedule_update_metadata(playlist.uri)
1011 await asyncio.sleep(30)
1012
1013 # reschedule next scan
1014 self.mass.call_later(PERIODIC_SCAN_INTERVAL, self._scan_missing_metadata)
1015
1016
1017class MetadataLookupQueue(asyncio.Queue[str]):
1018 """Representation of a queue for metadata lookups."""
1019
1020 def _init(self, maxlen: int) -> None:
1021 self._queue: collections.deque[str] = collections.deque(maxlen=maxlen)
1022
1023 def _put(self, item: str) -> None:
1024 if item not in self._queue:
1025 self._queue.append(item)
1026
1027 def pop(self, item: str) -> None:
1028 """Remove item from queue."""
1029 if self.exists(item):
1030 self._queue.remove(item)
1031
1032 def exists(self, item: str) -> bool:
1033 """Check if item exists in queue."""
1034 return item in self._queue
1035