music-assistant-server

36.8 KBPY
provider.py
36.8 KB956 lines • python
1"""Internet Archive music provider implementation."""
2
3from __future__ import annotations
4
5import contextlib
6import re
7from collections.abc import AsyncGenerator
8from typing import TYPE_CHECKING, Any
9
10import aiohttp
11from music_assistant_models.enums import MediaType, ProviderFeature
12from music_assistant_models.errors import InvalidDataError, MediaNotFoundError
13from music_assistant_models.media_items import (
14    Album,
15    Artist,
16    Audiobook,
17    MediaItemChapter,
18    Podcast,
19    PodcastEpisode,
20    ProviderMapping,
21    SearchResults,
22    Track,
23)
24from music_assistant_models.unique_list import UniqueList
25
26from music_assistant.constants import UNKNOWN_ARTIST
27from music_assistant.controllers.cache import use_cache
28from music_assistant.helpers.throttle_retry import ThrottlerManager, throttle_with_retries
29from music_assistant.models.music_provider import MusicProvider
30
31from .helpers import InternetArchiveClient, clean_text, extract_year, parse_duration
32from .parsers import (
33    add_item_image,
34    artist_exists,
35    create_artist,
36    create_provider_mapping,
37    create_title_from_identifier,
38    doc_to_album,
39    doc_to_audiobook,
40    doc_to_podcast,
41    doc_to_track,
42    is_audiobook_content,
43    is_likely_album,
44    is_podcast_content,
45)
46from .streaming import InternetArchiveStreaming
47
48if TYPE_CHECKING:
49    from music_assistant_models.config_entries import ProviderConfig
50    from music_assistant_models.provider import ProviderManifest
51    from music_assistant_models.streamdetails import StreamDetails
52
53    from music_assistant import MusicAssistant
54
55
56class InternetArchiveProvider(MusicProvider):
57    """Implementation of Internet Archive music provider."""
58
59    def __init__(
60        self,
61        mass: MusicAssistant,
62        manifest: ProviderManifest,
63        config: ProviderConfig,
64        supported_features: set[ProviderFeature],
65    ) -> None:
66        """Initialize the provider."""
67        super().__init__(mass, manifest, config, supported_features)
68        self.throttler = ThrottlerManager(
69            rate_limit=10, period=60, retry_attempts=5, initial_backoff=5
70        )
71        self.client = InternetArchiveClient(mass)
72        self.streaming = InternetArchiveStreaming(self)
73
74    @property
75    def is_streaming_provider(self) -> bool:
76        """Return True if provider is a streaming provider."""
77        return True
78
79    @throttle_with_retries
80    async def _get_json(self, url: str, params: dict[str, Any] | None = None) -> dict[str, Any]:
81        """Make a GET request and return JSON response with throttling."""
82        return await self.client._get_json(url, params)
83
84    @throttle_with_retries
85    async def _search(self, **kwargs: Any) -> dict[str, Any]:
86        """Throttled search wrapper."""
87        return await self.client.search(**kwargs)
88
89    @throttle_with_retries
90    async def _get_metadata(self, identifier: str) -> dict[str, Any]:
91        """Throttled metadata wrapper."""
92        return await self.client.get_metadata(identifier)
93
94    @throttle_with_retries
95    @use_cache(expiration=86400 * 30)  # 30 days - file listings are static
96    async def _get_audio_files(self, identifier: str) -> list[dict[str, Any]]:
97        """Throttled audio files wrapper."""
98        return await self.client.get_audio_files(identifier)
99
100    @use_cache(86400 * 7)  # 7 days
101    async def search(
102        self,
103        search_query: str,
104        media_types: list[MediaType],
105        limit: int = 5,
106    ) -> SearchResults:
107        """
108        Perform search on Internet Archive.
109
110        Uses multiple search strategies to maximize result coverage with
111        proper result accumulation and broader search patterns.
112
113        Args:
114            search_query: The search term to look for
115            media_types: List of media types to search for
116            limit: Maximum number of results to return per media type
117
118        Returns:
119            SearchResults object containing found items
120        """
121        if not search_query.strip():
122            return SearchResults()
123
124        # Adjust search intensity based on what's being requested
125        rows_per_strategy = min(limit * 2, 16) if len(media_types) > 1 else min(limit * 2, 100)
126
127        # Collect results in separate lists
128        tracks: list[Track] = []
129        albums: list[Album] = []
130        artists: list[Artist] = []
131        audiobooks: list[Audiobook] = []
132        podcasts: list[Podcast] = []
133
134        # Track processed identifiers to avoid duplicates across strategies
135        processed_ids: set[str] = set()
136
137        # Build search strategies based on requested media types
138        search_strategies = []
139
140        # For music searches: focus on title and creator
141        if any(mt in media_types for mt in [MediaType.TRACK, MediaType.ALBUM, MediaType.ARTIST]):
142            search_strategies.extend(
143                [
144                    (f"creator:({search_query}) AND mediatype:audio", "downloads desc"),
145                    (f"title:({search_query}) AND mediatype:audio", "downloads desc"),
146                    (f"subject:({search_query}) AND mediatype:audio", "downloads desc"),
147                ]
148            )
149
150        # For audiobooks: search within audiobook collections, still limit to audio
151        if MediaType.AUDIOBOOK in media_types:
152            audiobook_query = f"{search_query} AND collection:(librivoxaudio OR audio_bookspoetry) AND mediatype:audio"  # noqa: E501
153            search_strategies.append((audiobook_query, "downloads desc"))
154
155        # For podcasts: search within podcast collections
156        if MediaType.PODCAST in media_types:
157            podcast_query = f"{search_query} AND collection:podcasts AND mediatype:audio"
158            search_strategies.append((podcast_query, "downloads desc"))
159
160        for strategy_idx, (strategy_query, sort_order) in enumerate(search_strategies):
161            self.logger.debug("Trying search strategy %d: %s", strategy_idx + 1, strategy_query)
162
163            try:
164                search_response = await self._search(
165                    query=strategy_query,
166                    rows=rows_per_strategy,
167                    sort=sort_order,
168                )
169
170                response_data = search_response.get("response", {})
171                docs = response_data.get("docs", [])
172                self.logger.debug(
173                    "Strategy %d '%s' found %d raw results",
174                    strategy_idx + 1,
175                    strategy_query,
176                    len(docs),
177                )
178
179                # Process results and extract different media types
180                strategy_processed = 0
181                strategy_skipped = 0
182
183                for doc in docs:
184                    try:
185                        identifier = doc.get("identifier")
186                        if not identifier or identifier in processed_ids:
187                            strategy_skipped += 1
188                            continue
189
190                        # Track this identifier to avoid duplicates
191                        processed_ids.add(identifier)
192
193                        await self._process_search_result(
194                            doc, tracks, albums, artists, audiobooks, podcasts, media_types
195                        )
196                        strategy_processed += 1
197
198                        # Check if we have enough results across all types
199                        if self._has_sufficient_results(
200                            tracks, albums, artists, audiobooks, podcasts, media_types, limit
201                        ):
202                            self.logger.debug(
203                                "Sufficient results found after strategy %d, stopping search",
204                                strategy_idx + 1,
205                            )
206                            break
207
208                    except (InvalidDataError, KeyError) as err:
209                        self.logger.debug("Skipping invalid search result: %s", err)
210                        strategy_skipped += 1
211                        continue
212
213                self.logger.debug(
214                    "Strategy %d '%s': processed %d new items, skipped %d items. "
215                    "Running totals - tracks: %d, albums: %d, artists: %d, "
216                    "audiobooks: %d, podcasts: %d",
217                    strategy_idx + 1,
218                    strategy_query,
219                    strategy_processed,
220                    strategy_skipped,
221                    len(tracks),
222                    len(albums),
223                    len(artists),
224                    len(audiobooks),
225                    len(podcasts),
226                )
227
228                # If we have sufficient results, stop trying more strategies
229                if self._has_sufficient_results(
230                    tracks, albums, artists, audiobooks, podcasts, media_types, limit
231                ):
232                    break
233
234            except Exception as err:
235                self.logger.warning("Search strategy %d failed: %s", strategy_idx + 1, err)
236                continue
237
238        # Log final results for debugging
239        self.logger.debug(
240            "Search for '%s' completed. Final results - tracks: %d, albums: %d, "
241            "artists: %d, audiobooks: %d, podcasts: %d (processed %d unique items)",
242            search_query,
243            len(tracks),
244            len(albums),
245            len(artists),
246            len(audiobooks),
247            len(podcasts),
248            len(processed_ids),
249        )
250
251        return SearchResults(
252            tracks=tracks[:limit] if MediaType.TRACK in media_types else [],
253            albums=albums[:limit] if MediaType.ALBUM in media_types else [],
254            artists=artists[:limit] if MediaType.ARTIST in media_types else [],
255            audiobooks=audiobooks[:limit] if MediaType.AUDIOBOOK in media_types else [],
256            podcasts=podcasts[:limit] if MediaType.PODCAST in media_types else [],
257        )
258
259    def _has_sufficient_results(
260        self,
261        tracks: list[Track],
262        albums: list[Album],
263        artists: list[Artist],
264        audiobooks: list[Audiobook],
265        podcasts: list[Podcast],
266        media_types: list[MediaType],
267        limit: int,
268    ) -> bool:
269        """Check if we have sufficient results for all requested media types."""
270        return (
271            (MediaType.TRACK not in media_types or len(tracks) >= limit)
272            and (MediaType.ALBUM not in media_types or len(albums) >= limit)
273            and (MediaType.ARTIST not in media_types or len(artists) >= limit)
274            and (MediaType.AUDIOBOOK not in media_types or len(audiobooks) >= limit)
275            and (MediaType.PODCAST not in media_types or len(podcasts) >= limit)
276        )
277
278    async def _process_search_result(
279        self,
280        doc: dict[str, Any],
281        tracks: list[Track],
282        albums: list[Album],
283        artists: list[Artist],
284        audiobooks: list[Audiobook],
285        podcasts: list[Podcast],
286        media_types: list[MediaType],
287    ) -> None:
288        """
289        Process a single search result document from Internet Archive.
290
291        Determines the appropriate media type and creates corresponding objects.
292        Uses improved heuristics to classify items as tracks, albums, or audiobooks.
293        """
294        identifier = doc.get("identifier")
295        if not identifier:
296            raise InvalidDataError("Missing identifier in search result")
297
298        title = clean_text(doc.get("title"))
299        creator = clean_text(doc.get("creator"))
300
301        # Be lenient - allow items without title if they have identifier
302        if not title and not identifier:
303            raise InvalidDataError("Missing both title and identifier in search result")
304
305        # Use identifier as fallback title if needed
306        if not title:
307            title = create_title_from_identifier(identifier)
308
309        # Determine what type of item this is
310        mediatype = doc.get("mediatype", "")
311        collection = doc.get("collection", [])
312        if isinstance(collection, str):
313            collection = [collection]
314
315        # Check if this is audiobook content using improved detection
316        if is_audiobook_content(doc) and MediaType.AUDIOBOOK in media_types:
317            audiobook = doc_to_audiobook(
318                doc, self.domain, self.instance_id, self.client.get_item_url
319            )
320            if audiobook:
321                audiobooks.append(audiobook)
322            return  # Don't process as other media types
323
324        # Check if this is podcast content
325        if is_podcast_content(doc) and MediaType.PODCAST in media_types:
326            podcast = doc_to_podcast(doc, self.domain, self.instance_id, self.client.get_item_url)
327            if podcast:
328                podcasts.append(podcast)
329            return  # Don't process as other media types
330
331        # For etree items, usually each item is an album (concert)
332        if mediatype == "etree" or "etree" in collection:
333            if MediaType.ALBUM in media_types:
334                album = doc_to_album(doc, self.domain, self.instance_id, self.client.get_item_url)
335                if album:
336                    albums.append(album)
337
338            if MediaType.ARTIST in media_types and creator:
339                artist = create_artist(creator, self.domain, self.instance_id)
340                if artist and not artist_exists(artist, artists):
341                    artists.append(artist)
342
343        elif mediatype == "audio":
344            # Use heuristics to determine album vs track without expensive API calls
345            if is_likely_album(doc):
346                if MediaType.ALBUM in media_types:
347                    album = doc_to_album(
348                        doc, self.domain, self.instance_id, self.client.get_item_url
349                    )
350                    if album:
351                        albums.append(album)
352            elif MediaType.TRACK in media_types:
353                track = doc_to_track(doc, self.domain, self.instance_id, self.client.get_item_url)
354                if track:
355                    tracks.append(track)
356
357            if MediaType.ARTIST in media_types and creator:
358                artist = create_artist(creator, self.domain, self.instance_id)
359                if artist and not artist_exists(artist, artists):
360                    artists.append(artist)
361
362    @use_cache(expiration=86400 * 60)  # Cache for 60 days - artist "tracks" change infrequently
363    async def get_track(self, prov_track_id: str) -> Track:
364        """Get full track details by id."""
365        metadata = await self._get_metadata(prov_track_id)
366        item_metadata = metadata.get("metadata", {})
367
368        title = clean_text(item_metadata.get("title"))
369        creator = clean_text(item_metadata.get("creator"))
370
371        if not title:
372            raise MediaNotFoundError(f"Track {prov_track_id} not found or invalid")
373
374        track = Track(
375            item_id=prov_track_id,
376            provider=self.instance_id,
377            name=title,
378            provider_mappings={
379                create_provider_mapping(
380                    prov_track_id, self.domain, self.instance_id, self.client.get_item_url
381                )
382            },
383        )
384
385        # Add artist
386        if creator:
387            track.artists = UniqueList([create_artist(creator, self.domain, self.instance_id)])
388        else:
389            track.artists = UniqueList(
390                [create_artist(UNKNOWN_ARTIST, self.domain, self.instance_id)]
391            )
392
393        # Add duration from first audio file
394        try:
395            audio_files = await self._get_audio_files(prov_track_id)
396            if audio_files and audio_files[0].get("length"):
397                duration = parse_duration(audio_files[0]["length"])
398                if duration:
399                    track.duration = duration
400        except (TimeoutError, aiohttp.ClientError) as err:
401            self.logger.debug("Network error getting duration for track %s: %s", prov_track_id, err)
402        except (KeyError, ValueError, TypeError) as err:
403            self.logger.debug("Could not parse duration for track %s: %s", prov_track_id, err)
404
405        # Add metadata
406        if description := clean_text(item_metadata.get("description")):
407            track.metadata.description = description
408
409        # Add thumbnail
410        add_item_image(track, prov_track_id, self.instance_id)
411
412        return track
413
414    @use_cache(expiration=86400 * 60)  # Cache for 60 days - album catalogs change infrequently
415    async def get_album(self, prov_album_id: str) -> Album:
416        """Get full album details by id."""
417        metadata = await self._get_metadata(prov_album_id)
418        item_metadata = metadata.get("metadata", {})
419
420        title = clean_text(item_metadata.get("title"))
421        creator = clean_text(item_metadata.get("creator"))
422
423        if not title:
424            raise MediaNotFoundError(f"Album {prov_album_id} not found or invalid")
425
426        album = Album(
427            item_id=prov_album_id,
428            provider=self.instance_id,
429            name=title,
430            provider_mappings={
431                create_provider_mapping(
432                    prov_album_id, self.domain, self.instance_id, self.client.get_item_url
433                )
434            },
435        )
436
437        # Add artist
438        if creator:
439            album.artists = UniqueList([create_artist(creator, self.domain, self.instance_id)])
440        else:
441            album.artists = UniqueList(
442                [create_artist(UNKNOWN_ARTIST, self.domain, self.instance_id)]
443            )
444
445        # Add metadata
446        if date := extract_year(item_metadata.get("date")):
447            album.year = date
448
449        if description := clean_text(item_metadata.get("description")):
450            album.metadata.description = description
451
452        # Add thumbnail
453        add_item_image(album, prov_album_id, self.instance_id)
454
455        return album
456
457    @use_cache(expiration=86400 * 60)  # Cache for 60 days - artist catalogs change infrequently
458    async def get_artist(self, prov_artist_id: str) -> Artist:
459        """
460        Get full artist details by id.
461
462        Args:
463            prov_artist_id: Provider-specific artist identifier (artist name)
464
465        Returns:
466            Artist object
467        """
468        # Artist IDs are just the creator names
469        return Artist(
470            item_id=prov_artist_id,
471            provider=self.instance_id,
472            name=prov_artist_id,
473            provider_mappings={
474                ProviderMapping(
475                    item_id=prov_artist_id,
476                    provider_domain=self.domain,
477                    provider_instance=self.instance_id,
478                )
479            },
480        )
481
482    @use_cache(expiration=86400 * 30)  # Cache for 30 days - audiobook catalogs change infrequently
483    async def get_audiobook(self, prov_audiobook_id: str) -> Audiobook:
484        """Get full audiobook details by id."""
485        metadata = await self._get_metadata(prov_audiobook_id)
486        item_metadata = metadata.get("metadata", {})
487
488        title = clean_text(item_metadata.get("title"))
489        creator = clean_text(item_metadata.get("creator"))
490
491        if not title:
492            raise MediaNotFoundError(f"Audiobook {prov_audiobook_id} not found or invalid")
493
494        audiobook = Audiobook(
495            item_id=prov_audiobook_id,
496            provider=self.instance_id,
497            name=title,
498            provider_mappings={
499                create_provider_mapping(
500                    prov_audiobook_id, self.domain, self.instance_id, self.client.get_item_url
501                )
502            },
503        )
504
505        # Add author/narrator
506        if creator:
507            author_list = [creator]
508            audiobook.authors = UniqueList(author_list)
509
510        # Add metadata
511        if description := clean_text(item_metadata.get("description")):
512            audiobook.metadata.description = description
513
514        # Add thumbnail
515        add_item_image(audiobook, prov_audiobook_id, self.instance_id)
516
517        # Calculate duration and chapters
518        try:
519            total_duration, chapters = await self._calculate_audiobook_duration_and_chapters(
520                prov_audiobook_id
521            )
522            audiobook.duration = total_duration
523            if len(chapters) > 1:
524                audiobook.metadata.chapters = chapters
525
526        except Exception as err:
527            self.logger.warning(
528                f"Could not process audio files for audiobook {prov_audiobook_id}: {err}"
529            )
530            audiobook.duration = 0
531            audiobook.metadata.chapters = []
532
533        return audiobook
534
535    async def get_album_tracks(self, prov_album_id: str) -> list[Track]:
536        """Get album tracks for given album id."""
537        metadata = await self._get_metadata(prov_album_id)
538        item_metadata = metadata.get("metadata", {})
539        audio_files = await self._get_audio_files(prov_album_id)
540        tracks = []
541
542        # Pre-create album artist to avoid duplicates
543        album_artist = clean_text(item_metadata.get("creator"))
544        album_artist_normalized = album_artist.lower() if album_artist else ""
545        album_artist_obj = None
546        if album_artist:
547            album_artist_obj = create_artist(album_artist, self.domain, self.instance_id)
548        else:
549            album_artist_obj = create_artist(UNKNOWN_ARTIST, self.domain, self.instance_id)
550
551        for i, file_info in enumerate(audio_files, 1):
552            filename = file_info.get("name", "")
553
554            # Use file's title if available, otherwise clean up filename
555            track_name = file_info.get("title", filename)
556            if not track_name or track_name == filename:
557                track_name = filename.rsplit(".", 1)[0] if "." in filename else filename
558
559            # Try to extract track number from file metadata first, then filename
560            track_number = self._extract_track_number(file_info, track_name, i)
561
562            track = Track(
563                item_id=f"{prov_album_id}#{filename}",
564                provider=self.instance_id,
565                name=track_name,
566                track_number=track_number,
567                provider_mappings={
568                    ProviderMapping(
569                        item_id=f"{prov_album_id}#{filename}",
570                        provider_domain=self.domain,
571                        provider_instance=self.instance_id,
572                        url=self.client.get_download_url(prov_album_id, filename),
573                        available=True,
574                    )
575                },
576            )
577
578            # Add file-specific artist if available, otherwise use album artist
579            file_artist = file_info.get("artist") or file_info.get("creator")
580            if file_artist:
581                file_artist_cleaned = clean_text(file_artist)
582                file_artist_normalized = file_artist_cleaned.lower()
583                # Check if this is the same as album artist to avoid duplicates (case-insensitive)
584                if album_artist_normalized and file_artist_normalized == album_artist_normalized:
585                    track.artists = UniqueList([album_artist_obj])
586                else:
587                    track.artists = UniqueList(
588                        [create_artist(file_artist_cleaned, self.domain, self.instance_id)]
589                    )
590            else:
591                # Use pre-created album artist object
592                track.artists = UniqueList([album_artist_obj])
593
594            # Add duration if available
595            if duration_str := file_info.get("length"):
596                if duration := parse_duration(duration_str):
597                    track.duration = duration
598
599            # Add genre if available
600            if genre := file_info.get("genre"):
601                track.metadata.genres = {clean_text(genre)}
602
603            tracks.append(track)
604
605        return tracks
606
607    def _extract_track_number(
608        self, file_info: dict[str, Any], track_name: str, fallback: int
609    ) -> int:
610        """Extract track number from file metadata or filename."""
611        track_number = None
612
613        if "track" in file_info:
614            with contextlib.suppress(ValueError, AttributeError):
615                track_number = int(str(file_info["track"]).split("/")[0])
616
617        if track_number is None:
618            # Fallback to filename parsing
619            track_num_match = re.search(r"^(\d+)[\s\-_.]*(.+)", track_name)
620            track_number = int(track_num_match.group(1)) if track_num_match else fallback
621
622        return track_number
623
624    @use_cache(expiration=86400 * 30)  # Cache for 30 days - artist catalogs change infrequently
625    async def get_artist_albums(self, prov_artist_id: str) -> list[Album]:
626        """
627        Get albums for a specific artist.
628
629        Uses metadata heuristics to determine likely albums without expensive
630        API calls for better performance.
631
632        Args:
633            prov_artist_id: Provider-specific artist identifier (artist name)
634
635        Returns:
636            List of Album objects by the artist
637        """
638        albums: list[Album] = []
639        page = 0
640        page_size = 200  # IA's maximum
641
642        while len(albums) < 1000:  # Reasonable upper limit
643            search_response = await self._search(
644                query=f'creator:"{prov_artist_id}" AND (format:"VBR MP3" OR format:"FLAC" \
645        OR format:"Ogg Vorbis")',
646                sort="downloads desc",
647                rows=page_size,
648                page=page,
649            )
650
651            docs = search_response.get("response", {}).get("docs", [])
652            if not docs:
653                break
654
655            for doc in docs:
656                try:
657                    # Use metadata heuristics instead of expensive API calls
658                    # to determine if item is an album
659                    if is_likely_album(doc):
660                        album = doc_to_album(
661                            doc, self.domain, self.instance_id, self.client.get_item_url
662                        )
663                        if album:
664                            albums.append(album)
665                except (KeyError, ValueError, TypeError) as err:
666                    self.logger.debug(
667                        "Skipping invalid album for artist %s: %s", prov_artist_id, err
668                    )
669                    continue
670                except (TimeoutError, aiohttp.ClientError) as err:
671                    self.logger.debug(
672                        "Network error processing album for artist %s: %s", prov_artist_id, err
673                    )
674                    continue
675                except Exception as err:
676                    self.logger.exception(
677                        "Unexpected error processing album for artist %s: %s", prov_artist_id, err
678                    )
679                    continue
680            page += 1
681        return albums
682
683    @use_cache(expiration=86400 * 7)  # Cache for 1 week
684    async def get_artist_toptracks(self, prov_artist_id: str) -> list[Track]:
685        """
686        Get top tracks for a specific artist.
687
688        Uses the same search as get_artist_albums but filters for single tracks.
689
690        Args:
691            prov_artist_id: Provider-specific artist identifier (artist name)
692
693        Returns:
694            List of Track objects representing the artist's top tracks
695        """
696        tracks = []
697        search_response = await self._search(
698            query=(
699                f'creator:"{prov_artist_id}" AND '
700                f'(format:"VBR MP3" OR format:"FLAC" OR format:"Ogg Vorbis")'
701            ),
702            rows=25,  # Limit for "top" tracks
703            sort="downloads desc",
704        )
705
706        response_data = search_response.get("response", {})
707        docs = response_data.get("docs", [])
708
709        for doc in docs:
710            try:
711                # Only include items that are NOT classified as albums
712                if not is_likely_album(doc):
713                    track = doc_to_track(
714                        doc, self.domain, self.instance_id, self.client.get_item_url
715                    )
716                    if track:
717                        tracks.append(track)
718            except (KeyError, ValueError, TypeError) as err:
719                self.logger.debug("Skipping invalid track for artist %s: %s", prov_artist_id, err)
720                continue
721            except (TimeoutError, aiohttp.ClientError) as err:
722                self.logger.debug(
723                    "Network error processing track for artist %s: %s", prov_artist_id, err
724                )
725                continue
726            except Exception as err:
727                self.logger.exception(
728                    "Unexpected error processing track for artist %s: %s", prov_artist_id, err
729                )
730                continue
731
732            if len(tracks) >= 25:
733                break
734
735        return tracks
736
737    async def get_stream_details(self, item_id: str, media_type: MediaType) -> StreamDetails:
738        """
739        Get streamdetails for a track or audiobook.
740
741        Delegates to the streaming handler for proper multi-file support.
742
743        Args:
744            item_id: Provider-specific item identifier
745            media_type: The type of media being requested
746
747        Returns:
748            StreamDetails object configured for the specific item type
749
750        Raises:
751            MediaNotFoundError: If no audio files are found for the item
752        """
753        return await self.streaming.get_stream_details(item_id, media_type)
754
755    async def _calculate_audiobook_duration_and_chapters(
756        self, item_id: str
757    ) -> tuple[int, list[MediaItemChapter]]:
758        """Calculate duration and chapters for audiobooks."""
759        audio_files = await self._get_audio_files(item_id)
760        total_duration = 0
761        chapters = []
762        current_position = 0.0
763
764        for i, file_info in enumerate(audio_files, 1):
765            chapter_duration = parse_duration(file_info.get("length", "0")) or 0
766            total_duration += chapter_duration
767
768            chapter_name = file_info.get("title") or file_info.get("name", f"Chapter {i}")
769            chapter = MediaItemChapter(
770                position=i,
771                name=clean_text(chapter_name),
772                start=current_position,
773                end=current_position + chapter_duration if chapter_duration > 0 else None,
774            )
775            chapters.append(chapter)
776            current_position += chapter_duration
777
778        return total_duration, chapters
779
780    async def get_audio_stream(
781        self, streamdetails: StreamDetails, seek_position: int = 0
782    ) -> AsyncGenerator[bytes, None]:
783        """Get audio stream from Internet Archive."""
784        # Use sock_read=None to allow long audiobook chapters to stream fully
785        timeout = aiohttp.ClientTimeout(sock_read=None, total=None)
786
787        if streamdetails.media_type == MediaType.AUDIOBOOK and isinstance(streamdetails.data, dict):
788            chapter_urls = streamdetails.data.get("chapters", [])
789            chapters_data = streamdetails.data.get("chapters_data", [])
790
791            # Calculate which chapter to start from based on seek_position
792            seek_position_ms = seek_position * 1000
793            start_chapter = 0
794
795            if seek_position > 0 and chapters_data:
796                accumulated_duration_ms = 0
797
798                for i, chapter_data in enumerate(chapters_data):
799                    chapter_duration_ms = (
800                        parse_duration(chapter_data.get("length", "0")) or 0
801                    ) * 1000
802
803                    if accumulated_duration_ms + chapter_duration_ms > seek_position_ms:
804                        start_chapter = i
805                        break
806                    accumulated_duration_ms += chapter_duration_ms
807
808            # Stream chapters starting from calculated position
809            chapters_yielded = False
810            for i in range(start_chapter, len(chapter_urls)):
811                chapter_url = chapter_urls[i]
812
813                try:
814                    async with self.mass.http_session.get(chapter_url, timeout=timeout) as response:
815                        response.raise_for_status()
816                        async for chunk in response.content.iter_chunked(8192):
817                            chapters_yielded = True
818                            yield chunk
819                except Exception as e:
820                    self.logger.error(f"Chapter {i + 1} streaming failed: {e}")
821                    continue
822
823            # If no chapters succeeded, raise an error instead of silent failure
824            if not chapters_yielded:
825                raise MediaNotFoundError(
826                    f"Failed to stream any chapters for audiobook {streamdetails.item_id}"
827                )
828
829        else:
830            # Handle single files
831            audio_files = await self._get_audio_files(streamdetails.item_id)
832            if audio_files:
833                download_url = self.client.get_download_url(
834                    streamdetails.item_id, audio_files[0]["name"]
835                )
836                async with self.mass.http_session.get(download_url, timeout=timeout) as response:
837                    response.raise_for_status()
838                    async for chunk in response.content.iter_chunked(8192):
839                        yield chunk
840
841    @use_cache(expiration=86400 * 7)  # Cache for 1 week
842    async def get_podcast(self, prov_podcast_id: str) -> Podcast:
843        """Get full podcast details by id."""
844        metadata = await self._get_metadata(prov_podcast_id)
845        item_metadata = metadata.get("metadata", {})
846
847        title = clean_text(item_metadata.get("title"))
848        creator = clean_text(item_metadata.get("creator"))
849
850        if not title:
851            raise MediaNotFoundError(f"Podcast {prov_podcast_id} not found or invalid")
852
853        podcast = Podcast(
854            item_id=prov_podcast_id,
855            provider=self.instance_id,
856            name=title,
857            provider_mappings={
858                create_provider_mapping(
859                    prov_podcast_id, self.domain, self.instance_id, self.client.get_item_url
860                )
861            },
862        )
863
864        # Add publisher/creator
865        if creator:
866            podcast.publisher = creator
867
868        # Add metadata
869        if description := clean_text(item_metadata.get("description")):
870            podcast.metadata.description = description
871
872        # Add thumbnail
873        add_item_image(podcast, prov_podcast_id, self.instance_id)
874
875        # Calculate total episodes
876        try:
877            audio_files = await self._get_audio_files(prov_podcast_id)
878            podcast.total_episodes = len(audio_files)
879        except Exception as err:
880            self.logger.warning(f"Could not get episode count for podcast {prov_podcast_id}: {err}")
881            podcast.total_episodes = None
882
883        return podcast
884
885    async def get_podcast_episodes(
886        self, prov_podcast_id: str
887    ) -> AsyncGenerator[PodcastEpisode, None]:
888        """Get podcast episodes for given podcast id."""
889        metadata = await self._get_metadata(prov_podcast_id)
890        item_metadata = metadata.get("metadata", {})
891        audio_files = await self._get_audio_files(prov_podcast_id)
892
893        # Create podcast reference for episodes
894        podcast = Podcast(
895            item_id=prov_podcast_id,
896            provider=self.instance_id,
897            name=clean_text(item_metadata.get("title", prov_podcast_id)),
898            provider_mappings={
899                create_provider_mapping(
900                    prov_podcast_id, self.domain, self.instance_id, self.client.get_item_url
901                )
902            },
903        )
904
905        for i, file_info in enumerate(audio_files, 1):
906            filename = file_info.get("name", "")
907
908            # Use file's title if available, otherwise clean up filename
909            episode_name = file_info.get("title", filename)
910            if not episode_name or episode_name == filename:
911                episode_name = filename.rsplit(".", 1)[0] if "." in filename else filename
912
913            # Try to extract episode number from file metadata first, then filename
914            episode_number = self._extract_track_number(file_info, episode_name, i)
915
916            episode = PodcastEpisode(
917                item_id=f"{prov_podcast_id}#{filename}",
918                provider=self.instance_id,
919                name=episode_name,
920                position=episode_number,
921                podcast=podcast,
922                provider_mappings={
923                    ProviderMapping(
924                        item_id=f"{prov_podcast_id}#{filename}",
925                        provider_domain=self.domain,
926                        provider_instance=self.instance_id,
927                        url=self.client.get_download_url(prov_podcast_id, filename),
928                        available=True,
929                    )
930                },
931            )
932
933            # Add duration if available
934            if duration_str := file_info.get("length"):
935                if duration := parse_duration(duration_str):
936                    episode.duration = duration
937
938            # Add episode metadata
939            if description := file_info.get("description"):
940                episode.metadata.description = clean_text(description)
941
942            yield episode
943
944    async def get_podcast_episode(self, prov_episode_id: str) -> PodcastEpisode:
945        """Get single podcast episode by id."""
946        if "#" not in prov_episode_id:
947            raise MediaNotFoundError(f"Invalid episode ID format: {prov_episode_id}")
948
949        podcast_id, _ = prov_episode_id.split("#", 1)
950
951        async for episode in self.get_podcast_episodes(podcast_id):
952            if episode.item_id == prov_episode_id:
953                return episode
954
955        raise MediaNotFoundError(f"Episode {prov_episode_id} not found")
956