/
/
/
1"""All logic for metadata retrieval."""
2
3from __future__ import annotations
4
5import asyncio
6import collections
7import logging
8import os
9import pathlib
10import random
11import urllib.parse
12from base64 import b64encode
13from contextlib import suppress
14from time import time
15from typing import TYPE_CHECKING, cast
16from uuid import uuid4
17
18import aiofiles
19from aiohttp import web
20from music_assistant_models.config_entries import ConfigEntry, ConfigValueOption, ConfigValueType
21from music_assistant_models.enums import (
22 AlbumType,
23 ConfigEntryType,
24 ImageType,
25 MediaType,
26 ProviderFeature,
27 ProviderType,
28)
29from music_assistant_models.errors import MediaNotFoundError, ProviderUnavailableError
30from music_assistant_models.helpers import get_global_cache_value
31from music_assistant_models.media_items import (
32 Album,
33 Artist,
34 Audiobook,
35 BrowseFolder,
36 ItemMapping,
37 MediaItemImage,
38 MediaItemType,
39 Playlist,
40 Podcast,
41 Track,
42)
43from music_assistant_models.unique_list import UniqueList
44
45from music_assistant.constants import (
46 CONF_LANGUAGE,
47 DB_TABLE_ARTISTS,
48 DB_TABLE_PLAYLISTS,
49 VARIOUS_ARTISTS_MBID,
50 VARIOUS_ARTISTS_NAME,
51 VERBOSE_LOG_LEVEL,
52)
53from music_assistant.helpers.api import api_command
54from music_assistant.helpers.compare import compare_strings
55from music_assistant.helpers.images import create_collage, get_image_data, get_image_thumb
56from music_assistant.helpers.security import is_safe_path
57from music_assistant.helpers.throttle_retry import Throttler
58from music_assistant.models.core_controller import CoreController
59from music_assistant.models.music_provider import MusicProvider
60
61if TYPE_CHECKING:
62 from music_assistant_models.config_entries import CoreConfig
63
64 from music_assistant import MusicAssistant
65 from music_assistant.models.metadata_provider import MetadataProvider
66 from music_assistant.providers.musicbrainz import MusicbrainzProvider
67
68
69def _detect_image_format(path: str) -> str:
70 """Detect image format from file path extension, defaulting to jpg."""
71 match pathlib.PurePath(path).suffix.lower():
72 case ".svg":
73 return "svg"
74 case ".png":
75 return "png"
76 case _:
77 return "jpg"
78
79
80LOCALES = {
81 "af_ZA": "African",
82 "ar_AE": "Arabic (United Arab Emirates)",
83 "ar_EG": "Arabic (Egypt)",
84 "ar_SA": "Saudi Arabia",
85 "bg_BG": "Bulgarian",
86 "cs_CZ": "Czech",
87 "zh_CN": "Chinese",
88 "hr_HR": "Croatian",
89 "da_DK": "Danish",
90 "de_DE": "German",
91 "el_GR": "Greek",
92 "en_AU": "English (AU)",
93 "en_US": "English (US)",
94 "en_GB": "English (UK)",
95 "es_ES": "Spanish",
96 "et_EE": "Estonian",
97 "fi_FI": "Finnish",
98 "fr_FR": "French",
99 "hu_HU": "Hungarian",
100 "is_IS": "Icelandic",
101 "it_IT": "Italian",
102 "lt_LT": "Lithuanian",
103 "lv_LV": "Latvian",
104 "ja_JP": "Japanese",
105 "ko_KR": "Korean",
106 "nl_NL": "Dutch",
107 "nb_NO": "Norwegian Bokmål",
108 "pl_PL": "Polish",
109 "pt_PT": "Portuguese",
110 "ro_RO": "Romanian",
111 "ru_RU": "Russian",
112 "sk_SK": "Slovak",
113 "sl_SI": "Slovenian",
114 "sr_RS": "Serbian",
115 "sv_SE": "Swedish",
116 "tr_TR": "Turkish",
117 "uk_UA": "Ukrainian",
118}
119
120DEFAULT_LANGUAGE = "en_US"
121REFRESH_INTERVAL_ARTISTS = 60 * 60 * 24 * 90 # 90 days
122REFRESH_INTERVAL_ALBUMS = 60 * 60 * 24 * 90 # 90 days
123REFRESH_INTERVAL_TRACKS = 60 * 60 * 24 * 90 # 90 days
124REFRESH_INTERVAL_AUDIOBOOKS = 60 * 60 * 24 * 90 # 90 days
125REFRESH_INTERVAL_PODCASTS = 60 * 60 * 24 * 90 # 90 days
126REFRESH_INTERVAL_PLAYLISTS = 60 * 60 * 24 * 14 # 14 days
127PERIODIC_SCAN_INTERVAL = 60 * 60 * 6 # 6 hours
128CONF_ENABLE_ONLINE_METADATA = "enable_online_metadata"
129
130
131class MetaDataController(CoreController):
132 """Several helpers to search and store metadata for mediaitems."""
133
134 domain: str = "metadata"
135 config: CoreConfig
136
137 def __init__(self, mass: MusicAssistant) -> None:
138 """Initialize class."""
139 super().__init__(mass)
140 self.cache = self.mass.cache
141 self._pref_lang: str | None = None
142 self.manifest.name = "Metadata controller"
143 self.manifest.description = (
144 "Music Assistant's core controller which handles all metadata for music."
145 )
146 self.manifest.icon = "book-information-variant"
147 self._lookup_jobs: MetadataLookupQueue = MetadataLookupQueue(100)
148 self._lookup_task: asyncio.Task[None] | None = None
149 self._throttler = Throttler(1, 30)
150
151 async def get_config_entries(
152 self,
153 action: str | None = None,
154 values: dict[str, ConfigValueType] | None = None,
155 ) -> tuple[ConfigEntry, ...]:
156 """Return all Config Entries for this core module (if any)."""
157 return (
158 ConfigEntry(
159 key=CONF_LANGUAGE,
160 type=ConfigEntryType.STRING,
161 label="Preferred language",
162 required=False,
163 default_value=DEFAULT_LANGUAGE,
164 description="Preferred language for metadata.\n\n"
165 "Note that English will always be used as fallback when content "
166 "in your preferred language is not available.",
167 options=[ConfigValueOption(value, key) for key, value in LOCALES.items()],
168 ),
169 ConfigEntry(
170 key=CONF_ENABLE_ONLINE_METADATA,
171 type=ConfigEntryType.BOOLEAN,
172 label="Enable metadata retrieval from online metadata providers",
173 required=False,
174 default_value=True,
175 description="Enable online metadata lookups.\n\n"
176 "This will allow Music Assistant to fetch additional metadata from (enabled) "
177 "metadata providers, such as The Audio DB and Fanart.tv.\n\n"
178 "Note that these online sources are only queried when no information is already "
179 "available from local files or the music providers and local artwork/metadata "
180 "will always have preference over online sources so consider metadata from online "
181 "sources as complementary only.\n\n"
182 "The retrieval of additional rich metadata is a process that is executed slowly "
183 "in the background to not overload these free services with requests. "
184 "You can speedup the process by storing the images and other metadata locally.",
185 ),
186 )
187
188 async def setup(self, config: CoreConfig) -> None:
189 """Async initialize of module."""
190 # wait for dependencies to be ready (streams and music)
191 await self.mass.streams.initialized.wait()
192 await self.mass.music.initialized.wait()
193
194 self.config = config
195 if not self.logger.isEnabledFor(VERBOSE_LOG_LEVEL):
196 # silence PIL logger
197 logging.getLogger("PIL").setLevel(logging.WARNING)
198 # make sure that our directory with collage images exists
199 self._collage_images_dir = os.path.join(self.mass.cache_path, "collage_images")
200 if not await asyncio.to_thread(os.path.exists, self._collage_images_dir):
201 await asyncio.to_thread(os.mkdir, self._collage_images_dir)
202 self.mass.streams.register_dynamic_route("/imageproxy", self.handle_imageproxy)
203 # the lookup task is used to process metadata lookup jobs
204 self._lookup_task = self.mass.create_task(self._process_metadata_lookup_jobs())
205 # just run the scan for missing metadata once at startup
206 # background scan for missing metadata
207 self.mass.call_later(300, self._scan_missing_metadata)
208 # migrate theaudiodb images to new url
209 # they updated their cdn url to r2.theaudiodb.com
210 # TODO: remove this after 2.7 release
211 query = (
212 "UPDATE artists SET metadata = "
213 "REPLACE (metadata, 'https://www.theaudiodb.com', 'https://r2.theaudiodb.com') "
214 "WHERE artists.metadata LIKE '%https://www.theaudiodb.com%'"
215 )
216 if self.mass.music.database:
217 await self.mass.music.database.execute(query)
218 await self.mass.music.database.commit()
219
220 async def close(self) -> None:
221 """Handle logic on server stop."""
222 if self._lookup_task and not self._lookup_task.done():
223 self._lookup_task.cancel()
224 self.mass.streams.unregister_dynamic_route("/imageproxy")
225
226 @property
227 def providers(self) -> list[MetadataProvider]:
228 """Return all loaded/running MetadataProviders."""
229 return cast("list[MetadataProvider]", self.mass.get_providers(ProviderType.METADATA))
230
231 @property
232 def preferred_language(self) -> str:
233 """Return preferred language for metadata (as 2 letter language code 'en')."""
234 return self.locale.split("_")[0]
235
236 @property
237 def locale(self) -> str:
238 """Return preferred language for metadata (as full locale code 'en_EN')."""
239 value = self.mass.config.get_raw_core_config_value(
240 self.domain, CONF_LANGUAGE, DEFAULT_LANGUAGE
241 )
242 return str(value)
243
244 @api_command("metadata/set_default_preferred_language")
245 def set_default_preferred_language(self, lang: str) -> None:
246 """
247 Set the default preferred language.
248
249 Reasoning behind this is that the backend can not make a wise choice for the default,
250 so relies on some external source that knows better to set this info, like the frontend
251 or a streaming provider.
252 Can only be set once (by this call or the user).
253 """
254 if self.mass.config.get_raw_core_config_value(self.domain, CONF_LANGUAGE):
255 return # already set
256 self.set_preferred_language(lang)
257
258 @api_command("metadata/set_preferred_language")
259 def set_preferred_language(self, lang: str) -> None:
260 """
261 Set the preferred language.
262
263 Note that this will not modify any existing metadata,
264 but will be used for future lookups.
265 """
266 # prefer exact match
267 if lang in LOCALES:
268 self.mass.config.set_raw_core_config_value(self.domain, CONF_LANGUAGE, lang)
269 return
270 # try strict matching on either locale code or region
271 lang = lang.lower().replace("-", "_")
272 for locale_code, lang_name in LOCALES.items():
273 if lang in (locale_code.lower(), lang_name.lower()):
274 self.mass.config.set_raw_core_config_value(self.domain, CONF_LANGUAGE, locale_code)
275 return
276 # attempt loose match on language code or region code
277 for lang_part in (lang[:2], lang[:-2]):
278 for locale_code in tuple(LOCALES):
279 language_code, region_code = locale_code.lower().split("_", 1)
280 if lang_part in (language_code, region_code):
281 self.mass.config.set_raw_core_config_value(
282 self.domain, CONF_LANGUAGE, locale_code
283 )
284 return
285 # if we reach this point, we couldn't match the language
286 self.logger.warning("%s is not a valid language", lang)
287
288 @api_command("metadata/update_metadata")
289 async def update_metadata(
290 self, item: str | MediaItemType, force_refresh: bool = False
291 ) -> MediaItemType:
292 """Get/update extra/enhanced metadata for/on given MediaItem."""
293 async with self.cache.handle_refresh(force_refresh):
294 if isinstance(item, str):
295 retrieved_item = await self.mass.music.get_item_by_uri(item)
296 if isinstance(retrieved_item, BrowseFolder):
297 raise TypeError("Cannot update metadata on a BrowseFolder item.")
298 item = retrieved_item
299
300 if item.provider != "library":
301 # this shouldn't happen but just in case.
302 raise RuntimeError("Metadata can only be updated for library items")
303
304 # just in case it was in the queue, prevent duplicate lookups
305 if item.uri:
306 self._lookup_jobs.pop(item.uri)
307 async with self._throttler:
308 if item.media_type == MediaType.ARTIST:
309 await self._update_artist_metadata(
310 cast("Artist", item), force_refresh=force_refresh
311 )
312 if item.media_type == MediaType.ALBUM:
313 await self._update_album_metadata(
314 cast("Album", item), force_refresh=force_refresh
315 )
316 if item.media_type == MediaType.TRACK:
317 await self._update_track_metadata(
318 cast("Track", item), force_refresh=force_refresh
319 )
320 if item.media_type == MediaType.PLAYLIST:
321 await self._update_playlist_metadata(
322 cast("Playlist", item), force_refresh=force_refresh
323 )
324 if item.media_type == MediaType.AUDIOBOOK:
325 await self._update_audiobook_metadata(
326 cast("Audiobook", item), force_refresh=force_refresh
327 )
328 if item.media_type == MediaType.PODCAST:
329 await self._update_podcast_metadata(
330 cast("Podcast", item), force_refresh=force_refresh
331 )
332 return item
333
334 def schedule_update_metadata(self, uri: str) -> None:
335 """Schedule metadata update for given MediaItem uri."""
336 if "library" not in uri:
337 return
338 if self._lookup_jobs.exists(uri):
339 return
340 with suppress(asyncio.QueueFull):
341 self._lookup_jobs.put_nowait(uri)
342
343 async def get_image_data_for_item(
344 self,
345 media_item: MediaItemType,
346 img_type: ImageType = ImageType.THUMB,
347 size: int = 0,
348 ) -> bytes | None:
349 """Get image data for given MedaItem."""
350 img_path = await self.get_image_url_for_item(
351 media_item=media_item,
352 img_type=img_type,
353 )
354 if not img_path:
355 return None
356 thumbnail = await self.get_thumbnail(img_path, provider="builtin", size=size)
357
358 return cast("bytes", thumbnail)
359
360 async def get_image_url_for_item(
361 self,
362 media_item: MediaItemType | ItemMapping,
363 img_type: ImageType = ImageType.THUMB,
364 resolve: bool = True,
365 ) -> str | None:
366 """Get url to image for given media media_item."""
367 if not media_item:
368 return None
369
370 if isinstance(media_item, ItemMapping):
371 # Check if the ItemMapping already has an image - avoid expensive API call
372 if media_item.image and media_item.image.type == img_type:
373 if media_item.image.remotely_accessible and resolve:
374 return self.get_image_url(media_item.image)
375 if not media_item.image.remotely_accessible:
376 return media_item.image.path
377
378 # Only retrieve full item if we don't have the image we need
379 if not media_item.uri:
380 return None
381 retrieved_item = await self.mass.music.get_item_by_uri(media_item.uri)
382 if isinstance(retrieved_item, BrowseFolder):
383 return None # can not happen, but guard for type checker
384 media_item = retrieved_item
385
386 if media_item and media_item.metadata.images:
387 for img in media_item.metadata.images:
388 if img.type != img_type:
389 continue
390 if not img.remotely_accessible and not resolve:
391 # ignore image if its not remotely accessible and we don't allow resolving
392 continue
393 return self.get_image_url(img, prefer_proxy=not img.remotely_accessible)
394
395 # retry with track's album
396 if isinstance(media_item, Track) and media_item.album:
397 return await self.get_image_url_for_item(media_item.album, img_type, resolve)
398
399 # try artist instead for albums
400 if isinstance(media_item, Album) and media_item.artists:
401 return await self.get_image_url_for_item(media_item.artists[0], img_type, resolve)
402
403 # last resort: track artist(s)
404 if isinstance(media_item, Track) and media_item.artists:
405 for artist in media_item.artists:
406 return await self.get_image_url_for_item(artist, img_type, resolve)
407
408 return None
409
410 def get_image_url(
411 self,
412 image: MediaItemImage,
413 size: int = 0,
414 prefer_proxy: bool = False,
415 image_format: str | None = None,
416 prefer_stream_server: bool = False,
417 ) -> str:
418 """Get (proxied) URL for MediaItemImage."""
419 if image_format is None:
420 image_format = _detect_image_format(image.path)
421 if image_format == "svg":
422 # SVGs don't need resizing
423 size = 0
424 if not image.remotely_accessible or prefer_proxy or size:
425 # return imageproxy url for images that need to be resolved
426 # the original path is double encoded
427 encoded_url = urllib.parse.quote_plus(urllib.parse.quote_plus(image.path))
428 base_url = (
429 self.mass.streams.base_url if prefer_stream_server else self.mass.webserver.base_url
430 )
431 return (
432 f"{base_url}/imageproxy?provider={image.provider}"
433 f"&size={size}&fmt={image_format}&path={encoded_url}"
434 )
435 return image.path
436
437 async def get_thumbnail(
438 self,
439 path: str,
440 provider: str,
441 size: int | None = None,
442 base64: bool = False,
443 image_format: str | None = None,
444 ) -> bytes | str:
445 """Get/create thumbnail image for path (image url or local path)."""
446 if not self.mass.get_provider(provider) and not path.startswith("http"):
447 raise ProviderUnavailableError
448 if image_format is None:
449 image_format = _detect_image_format(path)
450 if provider == "builtin" and path.startswith("/collage/"):
451 # special case for collage images
452 collage_rel = path.split("/collage/")[-1]
453 if not is_safe_path(collage_rel):
454 raise FileNotFoundError("Invalid collage path")
455 path = os.path.join(self._collage_images_dir, collage_rel)
456 if image_format == "svg":
457 svg_bytes = await get_image_data(self.mass, path, provider)
458 if base64:
459 enc_image = b64encode(svg_bytes).decode()
460 return f"data:image/svg+xml;base64,{enc_image}"
461 return svg_bytes
462 thumbnail_bytes = await get_image_thumb(
463 self.mass, path, size=size, provider=provider, image_format=image_format
464 )
465 if base64:
466 enc_image = b64encode(thumbnail_bytes).decode()
467 return f"data:image/{image_format};base64,{enc_image}"
468 return thumbnail_bytes
469
470 async def handle_imageproxy(self, request: web.Request) -> web.Response:
471 """Handle request for image proxy."""
472 path = request.query["path"]
473 provider = request.query.get("provider", "builtin")
474 if provider in ("url", "file", "http"):
475 # temporary for backwards compatibility
476 provider = "builtin"
477 size = int(request.query.get("size", "0"))
478 image_format = request.query.get("fmt", None)
479 if image_format is None:
480 image_format = _detect_image_format(path)
481 if not self.mass.get_provider(provider) and not path.startswith("http"):
482 return web.Response(status=404)
483 if "%" in path:
484 # assume (double) encoded url, decode it
485 path = urllib.parse.unquote_plus(path)
486 try:
487 image_data = await self.get_thumbnail(
488 path, size=size, provider=provider, image_format=image_format
489 )
490 # we set the cache header to 1 year (forever)
491 # assuming that images do not/rarely change
492 content_type = "image/svg+xml" if image_format == "svg" else f"image/{image_format}"
493 return web.Response(
494 body=image_data,
495 headers={"Cache-Control": "max-age=31536000", "Access-Control-Allow-Origin": "*"},
496 content_type=content_type,
497 )
498 except Exception as err:
499 # broadly catch all exceptions here to ensure we dont crash the request handler
500 if isinstance(err, FileNotFoundError):
501 self.logger.log(VERBOSE_LOG_LEVEL, "Image not found: %s", path)
502 else:
503 self.logger.warning(
504 "Error while fetching image %s: %s",
505 path,
506 str(err),
507 exc_info=err if self.logger.isEnabledFor(10) else None,
508 )
509 return web.Response(status=404)
510
511 async def create_collage_image(
512 self,
513 images: list[MediaItemImage],
514 filename: str,
515 fanart: bool = False,
516 ) -> MediaItemImage | None:
517 """Create collage thumb/fanart image for (in-library) playlist."""
518 if (len(images) < 8 and fanart) or len(images) < 3:
519 # require at least some images otherwise this does not make a lot of sense
520 return None
521 # limit to 50 images to prevent we're going OOM
522 if len(images) > 50:
523 images = random.sample(images, 50)
524 else:
525 random.shuffle(images)
526 try:
527 # create collage thumb from playlist tracks
528 # if playlist has no default image (e.g. a local playlist)
529 dimensions = (2500, 1750) if fanart else (1500, 1500)
530 img_data = await create_collage(self.mass, images, dimensions)
531 # always overwrite existing path
532 file_path = os.path.join(self._collage_images_dir, filename)
533 async with aiofiles.open(file_path, "wb") as _file:
534 await _file.write(img_data)
535 del img_data
536 return MediaItemImage(
537 type=ImageType.FANART if fanart else ImageType.THUMB,
538 path=f"/collage/{filename}",
539 provider="builtin",
540 remotely_accessible=False,
541 )
542 except Exception as err:
543 self.logger.warning(
544 "Error while creating playlist image: %s",
545 str(err),
546 exc_info=err if self.logger.isEnabledFor(10) else None,
547 )
548 return None
549
550 @api_command("metadata/get_track_lyrics")
551 async def get_track_lyrics(
552 self,
553 track: Track,
554 ) -> tuple[str | None, str | None]:
555 """
556 Get lyrics for given track from metadata providers.
557
558 Returns a tuple of (lyrics, lrc_lyrics) if found.
559 """
560 if track.metadata and track.metadata.lyrics:
561 return track.metadata.lyrics, track.metadata.lrc_lyrics
562
563 if track.provider == "library":
564 # try to update metadata first
565 await self._update_track_metadata(track, force_refresh=False)
566 return track.metadata.lyrics, track.metadata.lrc_lyrics
567
568 # prefer lyrics from the track's own provider
569 track_provider = self.mass.get_provider(track.provider, provider_type=MusicProvider)
570 if track_provider and ProviderFeature.LYRICS in track_provider.supported_features:
571 full_track = await self.mass.music.tracks.get_provider_item(
572 track.item_id, track.provider
573 )
574 if full_track.metadata and full_track.metadata.lyrics:
575 return full_track.metadata.lyrics, full_track.metadata.lrc_lyrics
576
577 # fallback to other metadata providers
578 for provider in self.providers:
579 if ProviderFeature.LYRICS not in provider.supported_features:
580 continue
581 if (metadata := await provider.get_track_metadata(track)) and (
582 metadata.lyrics or metadata.lrc_lyrics
583 ):
584 return metadata.lyrics, metadata.lrc_lyrics
585 return None, None
586
587 async def _update_artist_metadata(self, artist: Artist, force_refresh: bool = False) -> None:
588 """Get/update rich metadata for an artist."""
589 # collect metadata from all (online) music + metadata providers
590 # NOTE: we only do/allow this every REFRESH_INTERVAL
591 needs_refresh = (time() - (artist.metadata.last_refresh or 0)) > REFRESH_INTERVAL_ARTISTS
592 if not (force_refresh or needs_refresh):
593 return
594
595 self.logger.debug("Updating metadata for Artist %s", artist.name)
596 unique_keys: set[str] = set()
597
598 # collect (local) metadata from all local providers
599 local_provs = get_global_cache_value("non_streaming_providers")
600 if TYPE_CHECKING:
601 local_provs = cast("set[str]", local_provs)
602
603 # collect metadata from all [music] providers
604 # note that we sort the providers by priority so that we always
605 # prefer local providers over online providers
606 for prov_mapping in sorted(
607 artist.provider_mappings, key=lambda x: x.priority, reverse=True
608 ):
609 prov = self.mass.get_provider(
610 prov_mapping.provider_instance, provider_type=MusicProvider
611 )
612 if prov is None:
613 continue
614 # prefer domain for streaming providers as the catalog is the same across instances
615 prov_key = prov.domain if prov.is_streaming_provider else prov.instance_id
616 if prov_key in unique_keys:
617 continue
618 unique_keys.add(prov_key)
619 with suppress(MediaNotFoundError):
620 prov_item = await self.mass.music.artists.get_provider_item(
621 prov_mapping.item_id, prov_mapping.provider_instance
622 )
623 artist.metadata.update(prov_item.metadata)
624
625 # The musicbrainz ID is mandatory for all metadata lookups
626 if not artist.mbid:
627 # TODO: Use a global cache/proxy for the MB lookups to save on API calls
628 if mbid := await self._get_artist_mbid(artist):
629 artist.mbid = mbid
630
631 # collect metadata from all (online)[metadata] providers
632 # TODO: Utilize a global (cloud) cache for metadata lookups to save on API calls
633 if self.config.get_value(CONF_ENABLE_ONLINE_METADATA) and artist.mbid:
634 for provider in self.providers:
635 if ProviderFeature.ARTIST_METADATA not in provider.supported_features:
636 continue
637 if metadata := await provider.get_artist_metadata(artist):
638 artist.metadata.update(metadata)
639 self.logger.debug(
640 "Fetched metadata for Artist %s on provider %s",
641 artist.name,
642 provider.name,
643 )
644 # update final item in library database
645 # set timestamp, used to determine when this function was last called
646 artist.metadata.last_refresh = int(time())
647 await self.mass.music.artists.update_item_in_library(artist.item_id, artist)
648
649 async def _update_album_metadata(self, album: Album, force_refresh: bool = False) -> None:
650 """Get/update rich metadata for an album."""
651 # collect metadata from all (online) music + metadata providers
652 # NOTE: we only do/allow this every REFRESH_INTERVAL
653 needs_refresh = (time() - (album.metadata.last_refresh or 0)) > REFRESH_INTERVAL_ALBUMS
654 if not (force_refresh or needs_refresh):
655 return
656
657 self.logger.debug("Updating metadata for Album %s", album.name)
658
659 # collect metadata from all [music] providers
660 # note that we sort the providers by priority so that we always
661 # prefer local providers over online providers
662 unique_keys: set[str] = set()
663 for prov_mapping in sorted(album.provider_mappings, key=lambda x: x.priority, reverse=True):
664 prov = self.mass.get_provider(
665 prov_mapping.provider_instance, provider_type=MusicProvider
666 )
667 if prov is None:
668 continue
669 # prefer domain for streaming providers as the catalog is the same across instances
670 prov_key = prov.domain if prov.is_streaming_provider else prov.instance_id
671 if prov_key in unique_keys:
672 continue
673 unique_keys.add(prov_key)
674 with suppress(MediaNotFoundError):
675 prov_item = await self.mass.music.albums.get_provider_item(
676 prov_mapping.item_id, prov_mapping.provider_instance
677 )
678 album.metadata.update(prov_item.metadata)
679 if album.year is None and prov_item.year:
680 album.year = prov_item.year
681 if album.album_type == AlbumType.UNKNOWN:
682 album.album_type = prov_item.album_type
683
684 # collect metadata from all (online) [metadata] providers
685 # TODO: Utilize a global (cloud) cache for metadata lookups to save on API calls
686 if self.config.get_value(CONF_ENABLE_ONLINE_METADATA):
687 for provider in self.providers:
688 if ProviderFeature.ALBUM_METADATA not in provider.supported_features:
689 continue
690 if metadata := await provider.get_album_metadata(album):
691 album.metadata.update(metadata)
692 self.logger.debug(
693 "Fetched metadata for Album %s on provider %s",
694 album.name,
695 provider.name,
696 )
697 # update final item in library database
698 # set timestamp, used to determine when this function was last called
699 album.metadata.last_refresh = int(time())
700 await self.mass.music.albums.update_item_in_library(album.item_id, album)
701
702 async def _update_track_metadata(self, track: Track, force_refresh: bool = False) -> None:
703 """Get/update rich metadata for a track."""
704 # collect metadata from all (online) music + metadata providers
705 # NOTE: we only do/allow this every REFRESH_INTERVAL
706 needs_refresh = (time() - (track.metadata.last_refresh or 0)) > REFRESH_INTERVAL_TRACKS
707 if not (force_refresh or needs_refresh):
708 return
709
710 self.logger.debug("Updating metadata for Track %s", track.name)
711
712 # collect metadata from all [music] providers
713 # note that we sort the providers by priority so that we always
714 # prefer local providers over online providers
715 unique_keys: set[str] = set()
716 for prov_mapping in sorted(track.provider_mappings, key=lambda x: x.priority, reverse=True):
717 prov = self.mass.get_provider(
718 prov_mapping.provider_instance, provider_type=MusicProvider
719 )
720 if prov is None:
721 continue
722 # prefer domain for streaming providers as the catalog is the same across instances
723 prov_key = prov.domain if prov.is_streaming_provider else prov.instance_id
724 if prov_key in unique_keys:
725 continue
726 unique_keys.add(prov_key)
727 with suppress(MediaNotFoundError):
728 prov_item = await self.mass.music.tracks.get_provider_item(
729 prov_mapping.item_id, prov_mapping.provider_instance
730 )
731 track.metadata.update(prov_item.metadata)
732
733 # collect metadata from all [metadata] providers
734 # Only fetch metadata from these sources if force_refresh is set OR
735 # if the track needs a refresh (based on REFRESH_INTERVAL_TRACKS) AND
736 # online metadata is enabled.
737 if (force_refresh or needs_refresh) and self.config.get_value(CONF_ENABLE_ONLINE_METADATA):
738 for provider in self.providers:
739 if ProviderFeature.TRACK_METADATA not in provider.supported_features:
740 continue
741
742 if metadata := await provider.get_track_metadata(track):
743 track.metadata.update(metadata)
744 self.logger.debug(
745 "Fetched metadata for Track %s on provider %s",
746 track.name,
747 provider.name,
748 )
749 # set timestamp, used to determine when this function was last called
750 track.metadata.last_refresh = int(time())
751 # update final item in library database
752 await self.mass.music.tracks.update_item_in_library(track.item_id, track)
753
754 async def _update_playlist_metadata(
755 self, playlist: Playlist, force_refresh: bool = False
756 ) -> None:
757 """Get/update rich metadata for a playlist."""
758 # collect metadata + create collage images
759 # NOTE: we only do/allow this every REFRESH_INTERVAL
760 needs_refresh = (
761 time() - (playlist.metadata.last_refresh or 0)
762 ) > REFRESH_INTERVAL_PLAYLISTS
763 if not (force_refresh or needs_refresh):
764 return
765 self.logger.debug("Updating metadata for Playlist %s", playlist.name)
766 playlist.metadata.genres = set()
767 all_playlist_tracks_images: list[MediaItemImage] = []
768 playlist_genres: dict[str, int] = {}
769 # retrieve metadata for the playlist from the tracks (such as genres etc.)
770 # TODO: retrieve style/mood ?
771 async for track in self.mass.music.playlists.tracks(playlist.item_id, playlist.provider):
772 if (
773 track.image
774 and track.image not in all_playlist_tracks_images
775 and (
776 track.image.provider in ("url", "builtin", "http")
777 or self.mass.get_provider(track.image.provider)
778 )
779 ):
780 all_playlist_tracks_images.append(track.image)
781 if track.metadata.genres:
782 genres = track.metadata.genres
783 elif (
784 isinstance(track, Track)
785 and track.album
786 and isinstance(track.album, Album)
787 and track.album.metadata.genres
788 ):
789 genres = track.album.metadata.genres
790 else:
791 genres = set()
792 for genre in genres:
793 if genre not in playlist_genres:
794 playlist_genres[genre] = 0
795 playlist_genres[genre] += 1
796 await asyncio.sleep(0) # yield to eventloop
797
798 playlist_genres_filtered = {genre for genre, count in playlist_genres.items() if count > 5}
799 playlist_genres_filtered = set(list(playlist_genres_filtered)[:8])
800 playlist.metadata.genres.update(playlist_genres_filtered)
801 # create collage images
802 cur_images: list[MediaItemImage] = playlist.metadata.images or []
803 new_images = []
804 # thumb image
805 thumb_image = next((x for x in cur_images if x.type == ImageType.THUMB), None)
806 if not thumb_image or self._collage_images_dir in thumb_image.path:
807 img_filename = thumb_image.path if thumb_image else f"{uuid4().hex}_thumb.jpg"
808 if collage_thumb_image := await self.create_collage_image(
809 all_playlist_tracks_images, img_filename
810 ):
811 new_images.append(collage_thumb_image)
812 elif thumb_image:
813 # just use old image
814 new_images.append(thumb_image)
815 # fanart image
816 fanart_image = next((x for x in cur_images if x.type == ImageType.FANART), None)
817 if not fanart_image or self._collage_images_dir in fanart_image.path:
818 img_filename = fanart_image.path if fanart_image else f"{uuid4().hex}_fanart.jpg"
819 if collage_fanart_image := await self.create_collage_image(
820 all_playlist_tracks_images, img_filename, fanart=True
821 ):
822 new_images.append(collage_fanart_image)
823 elif fanart_image:
824 # just use old image
825 new_images.append(fanart_image)
826 playlist.metadata.images = UniqueList(new_images) if new_images else None
827 # set timestamp, used to determine when this function was last called
828 playlist.metadata.last_refresh = int(time())
829 # update final item in library database
830 await self.mass.music.playlists.update_item_in_library(playlist.item_id, playlist)
831
832 async def _update_audiobook_metadata(
833 self, audiobook: Audiobook, force_refresh: bool = False
834 ) -> None:
835 """Get/update rich metadata for an audiobook."""
836 # collect metadata from all (online) music + metadata providers
837 # NOTE: we only do/allow this every REFRESH_INTERVAL
838 needs_refresh = (
839 time() - (audiobook.metadata.last_refresh or 0)
840 ) > REFRESH_INTERVAL_AUDIOBOOKS
841 if not (force_refresh or needs_refresh):
842 return
843
844 self.logger.debug("Updating metadata for Audiobook %s", audiobook.name)
845
846 # collect metadata from all [music] providers
847 # note that we sort the providers by priority so that we always
848 # prefer local providers over online providers
849 unique_keys: set[str] = set()
850 for prov_mapping in sorted(
851 audiobook.provider_mappings, key=lambda x: x.priority, reverse=True
852 ):
853 prov = self.mass.get_provider(
854 prov_mapping.provider_instance, provider_type=MusicProvider
855 )
856 if prov is None:
857 continue
858 # prefer domain for streaming providers as the catalog is the same across instances
859 prov_key = prov.domain if prov.is_streaming_provider else prov.instance_id
860 if prov_key in unique_keys:
861 continue
862 unique_keys.add(prov_key)
863 with suppress(MediaNotFoundError):
864 prov_item = await self.mass.music.audiobooks.get_provider_item(
865 prov_mapping.item_id, prov_mapping.provider_instance
866 )
867 audiobook.metadata.update(prov_item.metadata)
868 if audiobook.publisher is None and prov_item.publisher:
869 audiobook.publisher = prov_item.publisher
870 if not audiobook.authors and prov_item.authors:
871 audiobook.authors = prov_item.authors
872 if not audiobook.narrators and prov_item.narrators:
873 audiobook.narrators = prov_item.narrators
874 if not audiobook.duration and prov_item.duration:
875 audiobook.duration = prov_item.duration
876
877 # update final item in library database
878 # set timestamp, used to determine when this function was last called
879 audiobook.metadata.last_refresh = int(time())
880 await self.mass.music.audiobooks.update_item_in_library(audiobook.item_id, audiobook)
881
882 async def _update_podcast_metadata(self, podcast: Podcast, force_refresh: bool = False) -> None:
883 """Get/update rich metadata for a podcast."""
884 # collect metadata from all (online) music + metadata providers
885 # NOTE: we only do/allow this every REFRESH_INTERVAL
886 needs_refresh = (time() - (podcast.metadata.last_refresh or 0)) > REFRESH_INTERVAL_PODCASTS
887 if not (force_refresh or needs_refresh):
888 return
889
890 self.logger.debug("Updating metadata for Podcast %s", podcast.name)
891
892 # collect metadata from all [music] providers
893 # note that we sort the providers by priority so that we always
894 # prefer local providers over online providers
895 unique_keys: set[str] = set()
896 for prov_mapping in sorted(
897 podcast.provider_mappings, key=lambda x: x.priority, reverse=True
898 ):
899 prov = self.mass.get_provider(
900 prov_mapping.provider_instance, provider_type=MusicProvider
901 )
902 if prov is None:
903 continue
904 # prefer domain for streaming providers as the catalog is the same across instances
905 prov_key = prov.domain if prov.is_streaming_provider else prov.instance_id
906 if prov_key in unique_keys:
907 continue
908 unique_keys.add(prov_key)
909 with suppress(MediaNotFoundError):
910 prov_item = await self.mass.music.podcasts.get_provider_item(
911 prov_mapping.item_id, prov_mapping.provider_instance
912 )
913 podcast.metadata.update(prov_item.metadata)
914 if podcast.publisher is None and prov_item.publisher:
915 podcast.publisher = prov_item.publisher
916 if not podcast.total_episodes and prov_item.total_episodes:
917 podcast.total_episodes = prov_item.total_episodes
918
919 # update final item in library database
920 # set timestamp, used to determine when this function was last called
921 podcast.metadata.last_refresh = int(time())
922 await self.mass.music.podcasts.update_item_in_library(podcast.item_id, podcast)
923
924 async def _get_artist_mbid(self, artist: Artist) -> str | None:
925 """Fetch musicbrainz id by performing search using the artist name, albums and tracks."""
926 if artist.mbid:
927 return artist.mbid
928 if compare_strings(artist.name, VARIOUS_ARTISTS_NAME):
929 return VARIOUS_ARTISTS_MBID
930
931 musicbrainz_provider = self.mass.get_provider("musicbrainz")
932 if not musicbrainz_provider:
933 return None
934 musicbrainz: MusicbrainzProvider = cast("MusicbrainzProvider", musicbrainz_provider)
935 if TYPE_CHECKING:
936 assert isinstance(musicbrainz, MusicbrainzProvider)
937 # first try with resource URL (e.g. streaming provider share URL)
938 for prov_mapping in artist.provider_mappings:
939 if prov_mapping.url and prov_mapping.url.startswith("http"):
940 if mb_artist := await musicbrainz.get_artist_details_by_resource_url(
941 prov_mapping.url
942 ):
943 return mb_artist.id
944
945 # start lookup of musicbrainz id using artist name, albums and tracks
946 ref_albums = await self.mass.music.artists.albums(
947 artist.item_id, artist.provider, in_library_only=False
948 )
949 ref_tracks = await self.mass.music.artists.tracks(
950 artist.item_id, artist.provider, in_library_only=False
951 )
952 # try with (strict) ref track(s), using recording id
953 for ref_track in ref_tracks:
954 if mb_artist := await musicbrainz.get_artist_details_by_track(artist.name, ref_track):
955 return mb_artist.id
956 # try with (strict) ref album(s), using releasegroup id
957 for ref_album in ref_albums:
958 if mb_artist := await musicbrainz.get_artist_details_by_album(artist.name, ref_album):
959 return mb_artist.id
960 # last restort: track matching by name
961 for ref_track in ref_tracks:
962 if not ref_track.album:
963 continue
964 if result := await musicbrainz.search(
965 artistname=artist.name,
966 albumname=ref_track.album.name,
967 trackname=ref_track.name,
968 trackversion=ref_track.version,
969 ):
970 return result[0].id
971
972 # lookup failed
973 ref_albums_str = "/".join(x.name for x in ref_albums) or "none"
974 ref_tracks_str = "/".join(x.name for x in ref_tracks) or "none"
975 self.logger.debug(
976 "Unable to get musicbrainz ID for artist %s\n"
977 " - using lookup-album(s): %s\n"
978 " - using lookup-track(s): %s\n",
979 artist.name,
980 ref_albums_str,
981 ref_tracks_str,
982 )
983 return None
984
985 async def _process_metadata_lookup_jobs(self) -> None:
986 """Task to process metadata lookup jobs."""
987 # postpone the lookup for a while to allow the system to start up and providers initialized
988 await asyncio.sleep(60)
989 while True:
990 item_uri = await self._lookup_jobs.get()
991 self.logger.debug(f"Processing metadata lookup for {item_uri}")
992 try:
993 item = await self.mass.music.get_item_by_uri(item_uri)
994 await self.update_metadata(cast("MediaItemType", item))
995 except MediaNotFoundError:
996 # this can happen when the item is removed from the library
997 pass
998 except Exception as err:
999 self.logger.error(
1000 "Error while updating metadata for %s: %s",
1001 item_uri,
1002 str(err),
1003 exc_info=err if self.logger.isEnabledFor(10) else None,
1004 )
1005
1006 async def _scan_missing_metadata(self) -> None:
1007 """Scanner for (missing) metadata, runs periodically in the background."""
1008 # Scan for missing artist images
1009 self.logger.debug("Start lookup for missing artist images...")
1010 query = (
1011 f"json_extract({DB_TABLE_ARTISTS}.metadata,'$.last_refresh') ISNULL "
1012 f"AND (json_extract({DB_TABLE_ARTISTS}.metadata,'$.images') ISNULL "
1013 f"OR json_extract({DB_TABLE_ARTISTS}.metadata,'$.images') = '[]')"
1014 )
1015 for artist in await self.mass.music.artists.get_library_items_by_query(
1016 limit=5, order_by="random", extra_query_parts=[query]
1017 ):
1018 if artist.uri:
1019 self.schedule_update_metadata(artist.uri)
1020 await asyncio.sleep(30)
1021
1022 # Force refresh playlist metadata every refresh interval
1023 # this will e.g. update the playlist image and genres if the tracks have changed
1024 timestamp = int(time() - REFRESH_INTERVAL_PLAYLISTS)
1025 query = (
1026 f"json_extract({DB_TABLE_PLAYLISTS}.metadata,'$.last_refresh') ISNULL "
1027 f"OR json_extract({DB_TABLE_PLAYLISTS}.metadata,'$.last_refresh') < {timestamp}"
1028 )
1029 for playlist in await self.mass.music.playlists.get_library_items_by_query(
1030 limit=5, order_by="random", extra_query_parts=[query]
1031 ):
1032 if playlist.uri:
1033 self.schedule_update_metadata(playlist.uri)
1034 await asyncio.sleep(30)
1035
1036 # reschedule next scan
1037 self.mass.call_later(PERIODIC_SCAN_INTERVAL, self._scan_missing_metadata)
1038
1039
1040class MetadataLookupQueue(asyncio.Queue[str]):
1041 """Representation of a queue for metadata lookups."""
1042
1043 def _init(self, maxlen: int) -> None:
1044 self._queue: collections.deque[str] = collections.deque(maxlen=maxlen)
1045
1046 def _put(self, item: str) -> None:
1047 if item not in self._queue:
1048 self._queue.append(item)
1049
1050 def pop(self, item: str) -> None:
1051 """Remove item from queue."""
1052 if self.exists(item):
1053 self._queue.remove(item)
1054
1055 def exists(self, item: str) -> bool:
1056 """Check if item exists in queue."""
1057 return item in self._queue
1058