/
/
/
1"""All logic for metadata retrieval."""
2
3from __future__ import annotations
4
5import asyncio
6import collections
7import logging
8import os
9import random
10import urllib.parse
11from base64 import b64encode
12from contextlib import suppress
13from time import time
14from typing import TYPE_CHECKING, cast
15from uuid import uuid4
16
17import aiofiles
18from aiohttp import web
19from music_assistant_models.config_entries import ConfigEntry, ConfigValueOption, ConfigValueType
20from music_assistant_models.enums import (
21 AlbumType,
22 ConfigEntryType,
23 ImageType,
24 MediaType,
25 ProviderFeature,
26 ProviderType,
27)
28from music_assistant_models.errors import MediaNotFoundError, ProviderUnavailableError
29from music_assistant_models.helpers import get_global_cache_value
30from music_assistant_models.media_items import (
31 Album,
32 Artist,
33 Audiobook,
34 BrowseFolder,
35 ItemMapping,
36 MediaItemImage,
37 MediaItemType,
38 Playlist,
39 Podcast,
40 Track,
41)
42from music_assistant_models.unique_list import UniqueList
43
44from music_assistant.constants import (
45 CONF_LANGUAGE,
46 DB_TABLE_ARTISTS,
47 DB_TABLE_PLAYLISTS,
48 VARIOUS_ARTISTS_MBID,
49 VARIOUS_ARTISTS_NAME,
50 VERBOSE_LOG_LEVEL,
51)
52from music_assistant.helpers.api import api_command
53from music_assistant.helpers.compare import compare_strings
54from music_assistant.helpers.images import create_collage, get_image_thumb
55from music_assistant.helpers.security import is_safe_path
56from music_assistant.helpers.throttle_retry import Throttler
57from music_assistant.models.core_controller import CoreController
58from music_assistant.models.music_provider import MusicProvider
59
60if TYPE_CHECKING:
61 from music_assistant_models.config_entries import CoreConfig
62
63 from music_assistant import MusicAssistant
64 from music_assistant.models.metadata_provider import MetadataProvider
65 from music_assistant.providers.musicbrainz import MusicbrainzProvider
66
67LOCALES = {
68 "af_ZA": "African",
69 "ar_AE": "Arabic (United Arab Emirates)",
70 "ar_EG": "Arabic (Egypt)",
71 "ar_SA": "Saudi Arabia",
72 "bg_BG": "Bulgarian",
73 "cs_CZ": "Czech",
74 "zh_CN": "Chinese",
75 "hr_HR": "Croatian",
76 "da_DK": "Danish",
77 "de_DE": "German",
78 "el_GR": "Greek",
79 "en_AU": "English (AU)",
80 "en_US": "English (US)",
81 "en_GB": "English (UK)",
82 "es_ES": "Spanish",
83 "et_EE": "Estonian",
84 "fi_FI": "Finnish",
85 "fr_FR": "French",
86 "hu_HU": "Hungarian",
87 "is_IS": "Icelandic",
88 "it_IT": "Italian",
89 "lt_LT": "Lithuanian",
90 "lv_LV": "Latvian",
91 "ja_JP": "Japanese",
92 "ko_KR": "Korean",
93 "nl_NL": "Dutch",
94 "nb_NO": "Norwegian Bokmål",
95 "pl_PL": "Polish",
96 "pt_PT": "Portuguese",
97 "ro_RO": "Romanian",
98 "ru_RU": "Russian",
99 "sk_SK": "Slovak",
100 "sl_SI": "Slovenian",
101 "sr_RS": "Serbian",
102 "sv_SE": "Swedish",
103 "tr_TR": "Turkish",
104 "uk_UA": "Ukrainian",
105}
106
107DEFAULT_LANGUAGE = "en_US"
108REFRESH_INTERVAL_ARTISTS = 60 * 60 * 24 * 90 # 90 days
109REFRESH_INTERVAL_ALBUMS = 60 * 60 * 24 * 90 # 90 days
110REFRESH_INTERVAL_TRACKS = 60 * 60 * 24 * 90 # 90 days
111REFRESH_INTERVAL_AUDIOBOOKS = 60 * 60 * 24 * 90 # 90 days
112REFRESH_INTERVAL_PODCASTS = 60 * 60 * 24 * 90 # 90 days
113REFRESH_INTERVAL_PLAYLISTS = 60 * 60 * 24 * 14 # 14 days
114PERIODIC_SCAN_INTERVAL = 60 * 60 * 6 # 6 hours
115CONF_ENABLE_ONLINE_METADATA = "enable_online_metadata"
116
117
118class MetaDataController(CoreController):
119 """Several helpers to search and store metadata for mediaitems."""
120
121 domain: str = "metadata"
122 config: CoreConfig
123
124 def __init__(self, mass: MusicAssistant) -> None:
125 """Initialize class."""
126 super().__init__(mass)
127 self.cache = self.mass.cache
128 self._pref_lang: str | None = None
129 self.manifest.name = "Metadata controller"
130 self.manifest.description = (
131 "Music Assistant's core controller which handles all metadata for music."
132 )
133 self.manifest.icon = "book-information-variant"
134 self._lookup_jobs: MetadataLookupQueue = MetadataLookupQueue(100)
135 self._lookup_task: asyncio.Task[None] | None = None
136 self._throttler = Throttler(1, 30)
137
138 async def get_config_entries(
139 self,
140 action: str | None = None,
141 values: dict[str, ConfigValueType] | None = None,
142 ) -> tuple[ConfigEntry, ...]:
143 """Return all Config Entries for this core module (if any)."""
144 return (
145 ConfigEntry(
146 key=CONF_LANGUAGE,
147 type=ConfigEntryType.STRING,
148 label="Preferred language",
149 required=False,
150 default_value=DEFAULT_LANGUAGE,
151 description="Preferred language for metadata.\n\n"
152 "Note that English will always be used as fallback when content "
153 "in your preferred language is not available.",
154 options=[ConfigValueOption(value, key) for key, value in LOCALES.items()],
155 ),
156 ConfigEntry(
157 key=CONF_ENABLE_ONLINE_METADATA,
158 type=ConfigEntryType.BOOLEAN,
159 label="Enable metadata retrieval from online metadata providers",
160 required=False,
161 default_value=True,
162 description="Enable online metadata lookups.\n\n"
163 "This will allow Music Assistant to fetch additional metadata from (enabled) "
164 "metadata providers, such as The Audio DB and Fanart.tv.\n\n"
165 "Note that these online sources are only queried when no information is already "
166 "available from local files or the music providers and local artwork/metadata "
167 "will always have preference over online sources so consider metadata from online "
168 "sources as complementary only.\n\n"
169 "The retrieval of additional rich metadata is a process that is executed slowly "
170 "in the background to not overload these free services with requests. "
171 "You can speedup the process by storing the images and other metadata locally.",
172 ),
173 )
174
175 async def setup(self, config: CoreConfig) -> None:
176 """Async initialize of module."""
177 self.config = config
178 if not self.logger.isEnabledFor(VERBOSE_LOG_LEVEL):
179 # silence PIL logger
180 logging.getLogger("PIL").setLevel(logging.WARNING)
181 # make sure that our directory with collage images exists
182 self._collage_images_dir = os.path.join(self.mass.cache_path, "collage_images")
183 if not await asyncio.to_thread(os.path.exists, self._collage_images_dir):
184 await asyncio.to_thread(os.mkdir, self._collage_images_dir)
185 self.mass.streams.register_dynamic_route("/imageproxy", self.handle_imageproxy)
186 # the lookup task is used to process metadata lookup jobs
187 self._lookup_task = self.mass.create_task(self._process_metadata_lookup_jobs())
188 # just run the scan for missing metadata once at startup
189 # background scan for missing metadata
190 self.mass.call_later(300, self._scan_missing_metadata)
191 # migrate theaudiodb images to new url
192 # they updated their cdn url to r2.theaudiodb.com
193 # TODO: remove this after 2.7 release
194 query = (
195 "UPDATE artists SET metadata = "
196 "REPLACE (metadata, 'https://www.theaudiodb.com', 'https://r2.theaudiodb.com') "
197 "WHERE artists.metadata LIKE '%https://www.theaudiodb.com%'"
198 )
199 if self.mass.music.database:
200 await self.mass.music.database.execute(query)
201 await self.mass.music.database.commit()
202
203 async def close(self) -> None:
204 """Handle logic on server stop."""
205 if self._lookup_task and not self._lookup_task.done():
206 self._lookup_task.cancel()
207 self.mass.streams.unregister_dynamic_route("/imageproxy")
208
209 @property
210 def providers(self) -> list[MetadataProvider]:
211 """Return all loaded/running MetadataProviders."""
212 return cast("list[MetadataProvider]", self.mass.get_providers(ProviderType.METADATA))
213
214 @property
215 def preferred_language(self) -> str:
216 """Return preferred language for metadata (as 2 letter language code 'en')."""
217 return self.locale.split("_")[0]
218
219 @property
220 def locale(self) -> str:
221 """Return preferred language for metadata (as full locale code 'en_EN')."""
222 value = self.mass.config.get_raw_core_config_value(
223 self.domain, CONF_LANGUAGE, DEFAULT_LANGUAGE
224 )
225 return str(value)
226
227 @api_command("metadata/set_default_preferred_language")
228 def set_default_preferred_language(self, lang: str) -> None:
229 """
230 Set the default preferred language.
231
232 Reasoning behind this is that the backend can not make a wise choice for the default,
233 so relies on some external source that knows better to set this info, like the frontend
234 or a streaming provider.
235 Can only be set once (by this call or the user).
236 """
237 if self.mass.config.get_raw_core_config_value(self.domain, CONF_LANGUAGE):
238 return # already set
239 self.set_preferred_language(lang)
240
241 @api_command("metadata/set_preferred_language")
242 def set_preferred_language(self, lang: str) -> None:
243 """
244 Set the preferred language.
245
246 Note that this will not modify any existing metadata,
247 but will be used for future lookups.
248 """
249 # prefer exact match
250 if lang in LOCALES:
251 self.mass.config.set_raw_core_config_value(self.domain, CONF_LANGUAGE, lang)
252 return
253 # try strict matching on either locale code or region
254 lang = lang.lower().replace("-", "_")
255 for locale_code, lang_name in LOCALES.items():
256 if lang in (locale_code.lower(), lang_name.lower()):
257 self.mass.config.set_raw_core_config_value(self.domain, CONF_LANGUAGE, locale_code)
258 return
259 # attempt loose match on language code or region code
260 for lang_part in (lang[:2], lang[:-2]):
261 for locale_code in tuple(LOCALES):
262 language_code, region_code = locale_code.lower().split("_", 1)
263 if lang_part in (language_code, region_code):
264 self.mass.config.set_raw_core_config_value(
265 self.domain, CONF_LANGUAGE, locale_code
266 )
267 return
268 # if we reach this point, we couldn't match the language
269 self.logger.warning("%s is not a valid language", lang)
270
271 @api_command("metadata/update_metadata")
272 async def update_metadata(
273 self, item: str | MediaItemType, force_refresh: bool = False
274 ) -> MediaItemType:
275 """Get/update extra/enhanced metadata for/on given MediaItem."""
276 async with self.cache.handle_refresh(force_refresh):
277 if isinstance(item, str):
278 retrieved_item = await self.mass.music.get_item_by_uri(item)
279 if isinstance(retrieved_item, BrowseFolder):
280 raise TypeError("Cannot update metadata on a BrowseFolder item.")
281 item = retrieved_item
282
283 if item.provider != "library":
284 # this shouldn't happen but just in case.
285 raise RuntimeError("Metadata can only be updated for library items")
286
287 # just in case it was in the queue, prevent duplicate lookups
288 if item.uri:
289 self._lookup_jobs.pop(item.uri)
290 async with self._throttler:
291 if item.media_type == MediaType.ARTIST:
292 await self._update_artist_metadata(
293 cast("Artist", item), force_refresh=force_refresh
294 )
295 if item.media_type == MediaType.ALBUM:
296 await self._update_album_metadata(
297 cast("Album", item), force_refresh=force_refresh
298 )
299 if item.media_type == MediaType.TRACK:
300 await self._update_track_metadata(
301 cast("Track", item), force_refresh=force_refresh
302 )
303 if item.media_type == MediaType.PLAYLIST:
304 await self._update_playlist_metadata(
305 cast("Playlist", item), force_refresh=force_refresh
306 )
307 if item.media_type == MediaType.AUDIOBOOK:
308 await self._update_audiobook_metadata(
309 cast("Audiobook", item), force_refresh=force_refresh
310 )
311 if item.media_type == MediaType.PODCAST:
312 await self._update_podcast_metadata(
313 cast("Podcast", item), force_refresh=force_refresh
314 )
315 return item
316
317 def schedule_update_metadata(self, uri: str) -> None:
318 """Schedule metadata update for given MediaItem uri."""
319 if "library" not in uri:
320 return
321 if self._lookup_jobs.exists(uri):
322 return
323 with suppress(asyncio.QueueFull):
324 self._lookup_jobs.put_nowait(uri)
325
326 async def get_image_data_for_item(
327 self,
328 media_item: MediaItemType,
329 img_type: ImageType = ImageType.THUMB,
330 size: int = 0,
331 ) -> bytes | None:
332 """Get image data for given MedaItem."""
333 img_path = await self.get_image_url_for_item(
334 media_item=media_item,
335 img_type=img_type,
336 )
337 if not img_path:
338 return None
339 thumbnail = await self.get_thumbnail(img_path, provider="builtin", size=size)
340
341 return cast("bytes", thumbnail)
342
343 async def get_image_url_for_item(
344 self,
345 media_item: MediaItemType | ItemMapping,
346 img_type: ImageType = ImageType.THUMB,
347 resolve: bool = True,
348 ) -> str | None:
349 """Get url to image for given media media_item."""
350 if not media_item:
351 return None
352
353 if isinstance(media_item, ItemMapping):
354 # Check if the ItemMapping already has an image - avoid expensive API call
355 if media_item.image and media_item.image.type == img_type:
356 if media_item.image.remotely_accessible and resolve:
357 return self.get_image_url(media_item.image)
358 if not media_item.image.remotely_accessible:
359 return media_item.image.path
360
361 # Only retrieve full item if we don't have the image we need
362 if not media_item.uri:
363 return None
364 retrieved_item = await self.mass.music.get_item_by_uri(media_item.uri)
365 if isinstance(retrieved_item, BrowseFolder):
366 return None # can not happen, but guard for type checker
367 media_item = retrieved_item
368
369 if media_item and media_item.metadata.images:
370 for img in media_item.metadata.images:
371 if img.type != img_type:
372 continue
373 if not img.remotely_accessible and not resolve:
374 # ignore image if its not remotely accessible and we don't allow resolving
375 continue
376 return self.get_image_url(img, prefer_proxy=not img.remotely_accessible)
377
378 # retry with track's album
379 if isinstance(media_item, Track) and media_item.album:
380 return await self.get_image_url_for_item(media_item.album, img_type, resolve)
381
382 # try artist instead for albums
383 if isinstance(media_item, Album) and media_item.artists:
384 return await self.get_image_url_for_item(media_item.artists[0], img_type, resolve)
385
386 # last resort: track artist(s)
387 if isinstance(media_item, Track) and media_item.artists:
388 for artist in media_item.artists:
389 return await self.get_image_url_for_item(artist, img_type, resolve)
390
391 return None
392
393 def get_image_url(
394 self,
395 image: MediaItemImage,
396 size: int = 0,
397 prefer_proxy: bool = False,
398 image_format: str | None = None,
399 prefer_stream_server: bool = False,
400 ) -> str:
401 """Get (proxied) URL for MediaItemImage."""
402 if image_format is None:
403 image_format = "png" if image.path.lower().endswith(".png") else "jpg"
404 if not image.remotely_accessible or prefer_proxy or size:
405 # return imageproxy url for images that need to be resolved
406 # the original path is double encoded
407 encoded_url = urllib.parse.quote_plus(urllib.parse.quote_plus(image.path))
408 base_url = (
409 self.mass.streams.base_url if prefer_stream_server else self.mass.webserver.base_url
410 )
411 return (
412 f"{base_url}/imageproxy?provider={image.provider}"
413 f"&size={size}&fmt={image_format}&path={encoded_url}"
414 )
415 return image.path
416
417 async def get_thumbnail(
418 self,
419 path: str,
420 provider: str,
421 size: int | None = None,
422 base64: bool = False,
423 image_format: str | None = None,
424 ) -> bytes | str:
425 """Get/create thumbnail image for path (image url or local path)."""
426 if not self.mass.get_provider(provider) and not path.startswith("http"):
427 raise ProviderUnavailableError
428 if image_format is None:
429 image_format = "png" if path.lower().endswith(".png") else "jpg"
430 if provider == "builtin" and path.startswith("/collage/"):
431 # special case for collage images
432 collage_rel = path.split("/collage/")[-1]
433 if not is_safe_path(collage_rel):
434 raise FileNotFoundError("Invalid collage path")
435 path = os.path.join(self._collage_images_dir, collage_rel)
436 thumbnail_bytes = await get_image_thumb(
437 self.mass, path, size=size, provider=provider, image_format=image_format
438 )
439 if base64:
440 enc_image = b64encode(thumbnail_bytes).decode()
441 return f"data:image/{image_format};base64,{enc_image}"
442 return thumbnail_bytes
443
444 async def handle_imageproxy(self, request: web.Request) -> web.Response:
445 """Handle request for image proxy."""
446 path = request.query["path"]
447 provider = request.query.get("provider", "builtin")
448 if provider in ("url", "file", "http"):
449 # temporary for backwards compatibility
450 provider = "builtin"
451 size = int(request.query.get("size", "0"))
452 image_format = request.query.get("fmt", None)
453 if image_format is None:
454 image_format = "png" if path.lower().endswith(".png") else "jpg"
455 if not self.mass.get_provider(provider) and not path.startswith("http"):
456 return web.Response(status=404)
457 if "%" in path:
458 # assume (double) encoded url, decode it
459 path = urllib.parse.unquote_plus(path)
460 try:
461 image_data = await self.get_thumbnail(
462 path, size=size, provider=provider, image_format=image_format
463 )
464 # we set the cache header to 1 year (forever)
465 # assuming that images do not/rarely change
466 return web.Response(
467 body=image_data,
468 headers={"Cache-Control": "max-age=31536000", "Access-Control-Allow-Origin": "*"},
469 content_type=f"image/{image_format}",
470 )
471 except Exception as err:
472 # broadly catch all exceptions here to ensure we dont crash the request handler
473 if isinstance(err, FileNotFoundError):
474 self.logger.log(VERBOSE_LOG_LEVEL, "Image not found: %s", path)
475 else:
476 self.logger.warning(
477 "Error while fetching image %s: %s",
478 path,
479 str(err),
480 exc_info=err if self.logger.isEnabledFor(10) else None,
481 )
482 return web.Response(status=404)
483
484 async def create_collage_image(
485 self,
486 images: list[MediaItemImage],
487 filename: str,
488 fanart: bool = False,
489 ) -> MediaItemImage | None:
490 """Create collage thumb/fanart image for (in-library) playlist."""
491 if (len(images) < 8 and fanart) or len(images) < 3:
492 # require at least some images otherwise this does not make a lot of sense
493 return None
494 # limit to 50 images to prevent we're going OOM
495 if len(images) > 50:
496 images = random.sample(images, 50)
497 else:
498 random.shuffle(images)
499 try:
500 # create collage thumb from playlist tracks
501 # if playlist has no default image (e.g. a local playlist)
502 dimensions = (2500, 1750) if fanart else (1500, 1500)
503 img_data = await create_collage(self.mass, images, dimensions)
504 # always overwrite existing path
505 file_path = os.path.join(self._collage_images_dir, filename)
506 async with aiofiles.open(file_path, "wb") as _file:
507 await _file.write(img_data)
508 del img_data
509 return MediaItemImage(
510 type=ImageType.FANART if fanart else ImageType.THUMB,
511 path=f"/collage/{filename}",
512 provider="builtin",
513 remotely_accessible=False,
514 )
515 except Exception as err:
516 self.logger.warning(
517 "Error while creating playlist image: %s",
518 str(err),
519 exc_info=err if self.logger.isEnabledFor(10) else None,
520 )
521 return None
522
523 @api_command("metadata/get_track_lyrics")
524 async def get_track_lyrics(
525 self,
526 track: Track,
527 ) -> tuple[str | None, str | None]:
528 """
529 Get lyrics for given track from metadata providers.
530
531 Returns a tuple of (lyrics, lrc_lyrics) if found.
532 """
533 if track.metadata and track.metadata.lyrics:
534 return track.metadata.lyrics, track.metadata.lrc_lyrics
535
536 if track.provider == "library":
537 # try to update metadata first
538 await self._update_track_metadata(track, force_refresh=False)
539 return track.metadata.lyrics, track.metadata.lrc_lyrics
540
541 # prefer lyrics from the track's own provider
542 track_provider = self.mass.get_provider(track.provider, provider_type=MusicProvider)
543 if track_provider and ProviderFeature.LYRICS in track_provider.supported_features:
544 full_track = await self.mass.music.tracks.get_provider_item(
545 track.item_id, track.provider
546 )
547 if full_track.metadata and full_track.metadata.lyrics:
548 return full_track.metadata.lyrics, full_track.metadata.lrc_lyrics
549
550 # fallback to other metadata providers
551 for provider in self.providers:
552 if ProviderFeature.LYRICS not in provider.supported_features:
553 continue
554 if (metadata := await provider.get_track_metadata(track)) and (
555 metadata.lyrics or metadata.lrc_lyrics
556 ):
557 return metadata.lyrics, metadata.lrc_lyrics
558 return None, None
559
560 async def _update_artist_metadata(self, artist: Artist, force_refresh: bool = False) -> None:
561 """Get/update rich metadata for an artist."""
562 # collect metadata from all (online) music + metadata providers
563 # NOTE: we only do/allow this every REFRESH_INTERVAL
564 needs_refresh = (time() - (artist.metadata.last_refresh or 0)) > REFRESH_INTERVAL_ARTISTS
565 if not (force_refresh or needs_refresh):
566 return
567
568 self.logger.debug("Updating metadata for Artist %s", artist.name)
569 unique_keys: set[str] = set()
570
571 # collect (local) metadata from all local providers
572 local_provs = get_global_cache_value("non_streaming_providers")
573 if TYPE_CHECKING:
574 local_provs = cast("set[str]", local_provs)
575
576 # collect metadata from all [music] providers
577 # note that we sort the providers by priority so that we always
578 # prefer local providers over online providers
579 for prov_mapping in sorted(
580 artist.provider_mappings, key=lambda x: x.priority, reverse=True
581 ):
582 prov = self.mass.get_provider(
583 prov_mapping.provider_instance, provider_type=MusicProvider
584 )
585 if prov is None:
586 continue
587 # prefer domain for streaming providers as the catalog is the same across instances
588 prov_key = prov.domain if prov.is_streaming_provider else prov.instance_id
589 if prov_key in unique_keys:
590 continue
591 unique_keys.add(prov_key)
592 with suppress(MediaNotFoundError):
593 prov_item = await self.mass.music.artists.get_provider_item(
594 prov_mapping.item_id, prov_mapping.provider_instance
595 )
596 artist.metadata.update(prov_item.metadata)
597
598 # The musicbrainz ID is mandatory for all metadata lookups
599 if not artist.mbid:
600 # TODO: Use a global cache/proxy for the MB lookups to save on API calls
601 if mbid := await self._get_artist_mbid(artist):
602 artist.mbid = mbid
603
604 # collect metadata from all (online)[metadata] providers
605 # TODO: Utilize a global (cloud) cache for metadata lookups to save on API calls
606 if self.config.get_value(CONF_ENABLE_ONLINE_METADATA) and artist.mbid:
607 for provider in self.providers:
608 if ProviderFeature.ARTIST_METADATA not in provider.supported_features:
609 continue
610 if metadata := await provider.get_artist_metadata(artist):
611 artist.metadata.update(metadata)
612 self.logger.debug(
613 "Fetched metadata for Artist %s on provider %s",
614 artist.name,
615 provider.name,
616 )
617 # update final item in library database
618 # set timestamp, used to determine when this function was last called
619 artist.metadata.last_refresh = int(time())
620 await self.mass.music.artists.update_item_in_library(artist.item_id, artist)
621
622 async def _update_album_metadata(self, album: Album, force_refresh: bool = False) -> None:
623 """Get/update rich metadata for an album."""
624 # collect metadata from all (online) music + metadata providers
625 # NOTE: we only do/allow this every REFRESH_INTERVAL
626 needs_refresh = (time() - (album.metadata.last_refresh or 0)) > REFRESH_INTERVAL_ALBUMS
627 if not (force_refresh or needs_refresh):
628 return
629
630 self.logger.debug("Updating metadata for Album %s", album.name)
631
632 # collect metadata from all [music] providers
633 # note that we sort the providers by priority so that we always
634 # prefer local providers over online providers
635 unique_keys: set[str] = set()
636 for prov_mapping in sorted(album.provider_mappings, key=lambda x: x.priority, reverse=True):
637 prov = self.mass.get_provider(
638 prov_mapping.provider_instance, provider_type=MusicProvider
639 )
640 if prov is None:
641 continue
642 # prefer domain for streaming providers as the catalog is the same across instances
643 prov_key = prov.domain if prov.is_streaming_provider else prov.instance_id
644 if prov_key in unique_keys:
645 continue
646 unique_keys.add(prov_key)
647 with suppress(MediaNotFoundError):
648 prov_item = await self.mass.music.albums.get_provider_item(
649 prov_mapping.item_id, prov_mapping.provider_instance
650 )
651 album.metadata.update(prov_item.metadata)
652 if album.year is None and prov_item.year:
653 album.year = prov_item.year
654 if album.album_type == AlbumType.UNKNOWN:
655 album.album_type = prov_item.album_type
656
657 # collect metadata from all (online) [metadata] providers
658 # TODO: Utilize a global (cloud) cache for metadata lookups to save on API calls
659 if self.config.get_value(CONF_ENABLE_ONLINE_METADATA):
660 for provider in self.providers:
661 if ProviderFeature.ALBUM_METADATA not in provider.supported_features:
662 continue
663 if metadata := await provider.get_album_metadata(album):
664 album.metadata.update(metadata)
665 self.logger.debug(
666 "Fetched metadata for Album %s on provider %s",
667 album.name,
668 provider.name,
669 )
670 # update final item in library database
671 # set timestamp, used to determine when this function was last called
672 album.metadata.last_refresh = int(time())
673 await self.mass.music.albums.update_item_in_library(album.item_id, album)
674
675 async def _update_track_metadata(self, track: Track, force_refresh: bool = False) -> None:
676 """Get/update rich metadata for a track."""
677 # collect metadata from all (online) music + metadata providers
678 # NOTE: we only do/allow this every REFRESH_INTERVAL
679 needs_refresh = (time() - (track.metadata.last_refresh or 0)) > REFRESH_INTERVAL_TRACKS
680 if not (force_refresh or needs_refresh):
681 return
682
683 self.logger.debug("Updating metadata for Track %s", track.name)
684
685 # collect metadata from all [music] providers
686 # note that we sort the providers by priority so that we always
687 # prefer local providers over online providers
688 unique_keys: set[str] = set()
689 for prov_mapping in sorted(track.provider_mappings, key=lambda x: x.priority, reverse=True):
690 prov = self.mass.get_provider(
691 prov_mapping.provider_instance, provider_type=MusicProvider
692 )
693 if prov is None:
694 continue
695 # prefer domain for streaming providers as the catalog is the same across instances
696 prov_key = prov.domain if prov.is_streaming_provider else prov.instance_id
697 if prov_key in unique_keys:
698 continue
699 unique_keys.add(prov_key)
700 with suppress(MediaNotFoundError):
701 prov_item = await self.mass.music.tracks.get_provider_item(
702 prov_mapping.item_id, prov_mapping.provider_instance
703 )
704 track.metadata.update(prov_item.metadata)
705
706 # collect metadata from all [metadata] providers
707 # Only fetch metadata from these sources if force_refresh is set OR
708 # if the track needs a refresh (based on REFRESH_INTERVAL_TRACKS) AND
709 # online metadata is enabled.
710 if (force_refresh or needs_refresh) and self.config.get_value(CONF_ENABLE_ONLINE_METADATA):
711 for provider in self.providers:
712 if ProviderFeature.TRACK_METADATA not in provider.supported_features:
713 continue
714
715 if metadata := await provider.get_track_metadata(track):
716 track.metadata.update(metadata)
717 self.logger.debug(
718 "Fetched metadata for Track %s on provider %s",
719 track.name,
720 provider.name,
721 )
722 # set timestamp, used to determine when this function was last called
723 track.metadata.last_refresh = int(time())
724 # update final item in library database
725 await self.mass.music.tracks.update_item_in_library(track.item_id, track)
726
727 async def _update_playlist_metadata(
728 self, playlist: Playlist, force_refresh: bool = False
729 ) -> None:
730 """Get/update rich metadata for a playlist."""
731 # collect metadata + create collage images
732 # NOTE: we only do/allow this every REFRESH_INTERVAL
733 needs_refresh = (
734 time() - (playlist.metadata.last_refresh or 0)
735 ) > REFRESH_INTERVAL_PLAYLISTS
736 if not (force_refresh or needs_refresh):
737 return
738 self.logger.debug("Updating metadata for Playlist %s", playlist.name)
739 playlist.metadata.genres = set()
740 all_playlist_tracks_images: list[MediaItemImage] = []
741 playlist_genres: dict[str, int] = {}
742 # retrieve metadata for the playlist from the tracks (such as genres etc.)
743 # TODO: retrieve style/mood ?
744 async for track in self.mass.music.playlists.tracks(playlist.item_id, playlist.provider):
745 if (
746 track.image
747 and track.image not in all_playlist_tracks_images
748 and (
749 track.image.provider in ("url", "builtin", "http")
750 or self.mass.get_provider(track.image.provider)
751 )
752 ):
753 all_playlist_tracks_images.append(track.image)
754 if track.metadata.genres:
755 genres = track.metadata.genres
756 elif (
757 isinstance(track, Track)
758 and track.album
759 and isinstance(track.album, Album)
760 and track.album.metadata.genres
761 ):
762 genres = track.album.metadata.genres
763 else:
764 genres = set()
765 for genre in genres:
766 if genre not in playlist_genres:
767 playlist_genres[genre] = 0
768 playlist_genres[genre] += 1
769 await asyncio.sleep(0) # yield to eventloop
770
771 playlist_genres_filtered = {genre for genre, count in playlist_genres.items() if count > 5}
772 playlist_genres_filtered = set(list(playlist_genres_filtered)[:8])
773 playlist.metadata.genres.update(playlist_genres_filtered)
774 # create collage images
775 cur_images: list[MediaItemImage] = playlist.metadata.images or []
776 new_images = []
777 # thumb image
778 thumb_image = next((x for x in cur_images if x.type == ImageType.THUMB), None)
779 if not thumb_image or self._collage_images_dir in thumb_image.path:
780 img_filename = thumb_image.path if thumb_image else f"{uuid4().hex}_thumb.jpg"
781 if collage_thumb_image := await self.create_collage_image(
782 all_playlist_tracks_images, img_filename
783 ):
784 new_images.append(collage_thumb_image)
785 elif thumb_image:
786 # just use old image
787 new_images.append(thumb_image)
788 # fanart image
789 fanart_image = next((x for x in cur_images if x.type == ImageType.FANART), None)
790 if not fanart_image or self._collage_images_dir in fanart_image.path:
791 img_filename = fanart_image.path if fanart_image else f"{uuid4().hex}_fanart.jpg"
792 if collage_fanart_image := await self.create_collage_image(
793 all_playlist_tracks_images, img_filename, fanart=True
794 ):
795 new_images.append(collage_fanart_image)
796 elif fanart_image:
797 # just use old image
798 new_images.append(fanart_image)
799 playlist.metadata.images = UniqueList(new_images) if new_images else None
800 # set timestamp, used to determine when this function was last called
801 playlist.metadata.last_refresh = int(time())
802 # update final item in library database
803 await self.mass.music.playlists.update_item_in_library(playlist.item_id, playlist)
804
805 async def _update_audiobook_metadata(
806 self, audiobook: Audiobook, force_refresh: bool = False
807 ) -> None:
808 """Get/update rich metadata for an audiobook."""
809 # collect metadata from all (online) music + metadata providers
810 # NOTE: we only do/allow this every REFRESH_INTERVAL
811 needs_refresh = (
812 time() - (audiobook.metadata.last_refresh or 0)
813 ) > REFRESH_INTERVAL_AUDIOBOOKS
814 if not (force_refresh or needs_refresh):
815 return
816
817 self.logger.debug("Updating metadata for Audiobook %s", audiobook.name)
818
819 # collect metadata from all [music] providers
820 # note that we sort the providers by priority so that we always
821 # prefer local providers over online providers
822 unique_keys: set[str] = set()
823 for prov_mapping in sorted(
824 audiobook.provider_mappings, key=lambda x: x.priority, reverse=True
825 ):
826 prov = self.mass.get_provider(
827 prov_mapping.provider_instance, provider_type=MusicProvider
828 )
829 if prov is None:
830 continue
831 # prefer domain for streaming providers as the catalog is the same across instances
832 prov_key = prov.domain if prov.is_streaming_provider else prov.instance_id
833 if prov_key in unique_keys:
834 continue
835 unique_keys.add(prov_key)
836 with suppress(MediaNotFoundError):
837 prov_item = await self.mass.music.audiobooks.get_provider_item(
838 prov_mapping.item_id, prov_mapping.provider_instance
839 )
840 audiobook.metadata.update(prov_item.metadata)
841 if audiobook.publisher is None and prov_item.publisher:
842 audiobook.publisher = prov_item.publisher
843 if not audiobook.authors and prov_item.authors:
844 audiobook.authors = prov_item.authors
845 if not audiobook.narrators and prov_item.narrators:
846 audiobook.narrators = prov_item.narrators
847 if not audiobook.duration and prov_item.duration:
848 audiobook.duration = prov_item.duration
849
850 # update final item in library database
851 # set timestamp, used to determine when this function was last called
852 audiobook.metadata.last_refresh = int(time())
853 await self.mass.music.audiobooks.update_item_in_library(audiobook.item_id, audiobook)
854
855 async def _update_podcast_metadata(self, podcast: Podcast, force_refresh: bool = False) -> None:
856 """Get/update rich metadata for a podcast."""
857 # collect metadata from all (online) music + metadata providers
858 # NOTE: we only do/allow this every REFRESH_INTERVAL
859 needs_refresh = (time() - (podcast.metadata.last_refresh or 0)) > REFRESH_INTERVAL_PODCASTS
860 if not (force_refresh or needs_refresh):
861 return
862
863 self.logger.debug("Updating metadata for Podcast %s", podcast.name)
864
865 # collect metadata from all [music] providers
866 # note that we sort the providers by priority so that we always
867 # prefer local providers over online providers
868 unique_keys: set[str] = set()
869 for prov_mapping in sorted(
870 podcast.provider_mappings, key=lambda x: x.priority, reverse=True
871 ):
872 prov = self.mass.get_provider(
873 prov_mapping.provider_instance, provider_type=MusicProvider
874 )
875 if prov is None:
876 continue
877 # prefer domain for streaming providers as the catalog is the same across instances
878 prov_key = prov.domain if prov.is_streaming_provider else prov.instance_id
879 if prov_key in unique_keys:
880 continue
881 unique_keys.add(prov_key)
882 with suppress(MediaNotFoundError):
883 prov_item = await self.mass.music.podcasts.get_provider_item(
884 prov_mapping.item_id, prov_mapping.provider_instance
885 )
886 podcast.metadata.update(prov_item.metadata)
887 if podcast.publisher is None and prov_item.publisher:
888 podcast.publisher = prov_item.publisher
889 if not podcast.total_episodes and prov_item.total_episodes:
890 podcast.total_episodes = prov_item.total_episodes
891
892 # update final item in library database
893 # set timestamp, used to determine when this function was last called
894 podcast.metadata.last_refresh = int(time())
895 await self.mass.music.podcasts.update_item_in_library(podcast.item_id, podcast)
896
897 async def _get_artist_mbid(self, artist: Artist) -> str | None:
898 """Fetch musicbrainz id by performing search using the artist name, albums and tracks."""
899 if artist.mbid:
900 return artist.mbid
901 if compare_strings(artist.name, VARIOUS_ARTISTS_NAME):
902 return VARIOUS_ARTISTS_MBID
903
904 musicbrainz_provider = self.mass.get_provider("musicbrainz")
905 if not musicbrainz_provider:
906 return None
907 musicbrainz: MusicbrainzProvider = cast("MusicbrainzProvider", musicbrainz_provider)
908 if TYPE_CHECKING:
909 assert isinstance(musicbrainz, MusicbrainzProvider)
910 # first try with resource URL (e.g. streaming provider share URL)
911 for prov_mapping in artist.provider_mappings:
912 if prov_mapping.url and prov_mapping.url.startswith("http"):
913 if mb_artist := await musicbrainz.get_artist_details_by_resource_url(
914 prov_mapping.url
915 ):
916 return mb_artist.id
917
918 # start lookup of musicbrainz id using artist name, albums and tracks
919 ref_albums = await self.mass.music.artists.albums(
920 artist.item_id, artist.provider, in_library_only=False
921 )
922 ref_tracks = await self.mass.music.artists.tracks(
923 artist.item_id, artist.provider, in_library_only=False
924 )
925 # try with (strict) ref track(s), using recording id
926 for ref_track in ref_tracks:
927 if mb_artist := await musicbrainz.get_artist_details_by_track(artist.name, ref_track):
928 return mb_artist.id
929 # try with (strict) ref album(s), using releasegroup id
930 for ref_album in ref_albums:
931 if mb_artist := await musicbrainz.get_artist_details_by_album(artist.name, ref_album):
932 return mb_artist.id
933 # last restort: track matching by name
934 for ref_track in ref_tracks:
935 if not ref_track.album:
936 continue
937 if result := await musicbrainz.search(
938 artistname=artist.name,
939 albumname=ref_track.album.name,
940 trackname=ref_track.name,
941 trackversion=ref_track.version,
942 ):
943 return result[0].id
944
945 # lookup failed
946 ref_albums_str = "/".join(x.name for x in ref_albums) or "none"
947 ref_tracks_str = "/".join(x.name for x in ref_tracks) or "none"
948 self.logger.debug(
949 "Unable to get musicbrainz ID for artist %s\n"
950 " - using lookup-album(s): %s\n"
951 " - using lookup-track(s): %s\n",
952 artist.name,
953 ref_albums_str,
954 ref_tracks_str,
955 )
956 return None
957
958 async def _process_metadata_lookup_jobs(self) -> None:
959 """Task to process metadata lookup jobs."""
960 # postpone the lookup for a while to allow the system to start up and providers initialized
961 await asyncio.sleep(60)
962 while True:
963 item_uri = await self._lookup_jobs.get()
964 self.logger.debug(f"Processing metadata lookup for {item_uri}")
965 try:
966 item = await self.mass.music.get_item_by_uri(item_uri)
967 await self.update_metadata(cast("MediaItemType", item))
968 except MediaNotFoundError:
969 # this can happen when the item is removed from the library
970 pass
971 except Exception as err:
972 self.logger.error(
973 "Error while updating metadata for %s: %s",
974 item_uri,
975 str(err),
976 exc_info=err if self.logger.isEnabledFor(10) else None,
977 )
978
979 async def _scan_missing_metadata(self) -> None:
980 """Scanner for (missing) metadata, runs periodically in the background."""
981 # Scan for missing artist images
982 self.logger.debug("Start lookup for missing artist images...")
983 query = (
984 f"json_extract({DB_TABLE_ARTISTS}.metadata,'$.last_refresh') ISNULL "
985 f"AND (json_extract({DB_TABLE_ARTISTS}.metadata,'$.images') ISNULL "
986 f"OR json_extract({DB_TABLE_ARTISTS}.metadata,'$.images') = '[]')"
987 )
988 for artist in await self.mass.music.artists.get_library_items_by_query(
989 limit=5, order_by="random", extra_query_parts=[query]
990 ):
991 if artist.uri:
992 self.schedule_update_metadata(artist.uri)
993 await asyncio.sleep(30)
994
995 # Force refresh playlist metadata every refresh interval
996 # this will e.g. update the playlist image and genres if the tracks have changed
997 timestamp = int(time() - REFRESH_INTERVAL_PLAYLISTS)
998 query = (
999 f"json_extract({DB_TABLE_PLAYLISTS}.metadata,'$.last_refresh') ISNULL "
1000 f"OR json_extract({DB_TABLE_PLAYLISTS}.metadata,'$.last_refresh') < {timestamp}"
1001 )
1002 for playlist in await self.mass.music.playlists.get_library_items_by_query(
1003 limit=5, order_by="random", extra_query_parts=[query]
1004 ):
1005 if playlist.uri:
1006 self.schedule_update_metadata(playlist.uri)
1007 await asyncio.sleep(30)
1008
1009 # reschedule next scan
1010 self.mass.call_later(PERIODIC_SCAN_INTERVAL, self._scan_missing_metadata)
1011
1012
1013class MetadataLookupQueue(asyncio.Queue[str]):
1014 """Representation of a queue for metadata lookups."""
1015
1016 def _init(self, maxlen: int) -> None:
1017 self._queue: collections.deque[str] = collections.deque(maxlen=maxlen)
1018
1019 def _put(self, item: str) -> None:
1020 if item not in self._queue:
1021 self._queue.append(item)
1022
1023 def pop(self, item: str) -> None:
1024 """Remove item from queue."""
1025 if self.exists(item):
1026 self._queue.remove(item)
1027
1028 def exists(self, item: str) -> bool:
1029 """Check if item exists in queue."""
1030 return item in self._queue
1031