music-assistant-server

6.7 KBPY
playlists.py
6.7 KB194 lines • python
1"""Helpers for parsing (online and offline) playlists."""
2
3from __future__ import annotations
4
5import configparser
6import logging
7from dataclasses import dataclass
8from typing import TYPE_CHECKING
9from urllib.parse import urlparse
10
11from aiohttp import ClientTimeout, client_exceptions
12from music_assistant_models.errors import InvalidDataError
13
14from music_assistant.helpers.util import detect_charset
15
16if TYPE_CHECKING:
17    from music_assistant.mass import MusicAssistant
18
19
20LOGGER = logging.getLogger(__name__)
21HLS_CONTENT_TYPES = (
22    # https://tools.ietf.org/html/draft-pantos-http-live-streaming-19#section-10
23    "application/vnd.apple.mpegurl",
24    # Additional informal types used by Mozilla gecko not included as they
25    # don't reliably indicate HLS streams
26)
27
28
29class IsHLSPlaylist(InvalidDataError):
30    """The playlist from an HLS stream and should not be parsed."""
31
32
33@dataclass
34class PlaylistItem:
35    """Playlist item."""
36
37    path: str
38    length: str | None = None
39    title: str | None = None
40    stream_info: dict[str, str] | None = None
41    key: str | None = None
42
43    @property
44    def is_url(self) -> bool:
45        """Validate the URL can be parsed and at least has scheme + netloc."""
46        result = urlparse(self.path)
47        return all([result.scheme, result.netloc])
48
49
50def parse_m3u(m3u_data: str) -> list[PlaylistItem]:
51    """Lightweight M3U/M3U8 parser for playlist URL extraction.
52
53    This parser returns a flat list of playlist items with basic metadata.
54    Supports HLS master playlist tags (#EXT-X-STREAM-INF, #EXT-X-KEY) for
55    stream selection and quality sorting, but does not preserve segment-level
56    details or playlist structure.
57
58    Based on https://github.com/dvndrsn/M3uParser/blob/master/m3uparser.py
59    """
60    # From Mozilla gecko source: https://github.com/mozilla/gecko-dev/blob/c4c1adbae87bf2d128c39832d72498550ee1b4b8/dom/media/DecoderTraits.cpp#L47-L52
61
62    m3u_lines = m3u_data.splitlines()
63
64    playlist = []
65
66    length = None
67    title = None
68    stream_info: dict[str, str] | None = None
69    key = None
70
71    for line in m3u_lines:
72        line = line.strip()  # noqa: PLW2901
73        if line.startswith("#EXTINF:"):
74            # Get length and title from #EXTINF line
75            info = line.split("#EXTINF:")[1].split(",", 1)
76            if len(info) != 2:
77                continue
78            length = info[0].strip()
79            if length == "-1":
80                length = None
81            title = info[1].strip()
82        elif line.startswith("#EXT-X-STREAM-INF:"):
83            # HLS master playlist variant stream properties (BANDWIDTH, RESOLUTION, etc.)
84            # https://datatracker.ietf.org/doc/html/draft-pantos-http-live-streaming-19#section-10
85            stream_info = {}
86            for part in line.replace("#EXT-X-STREAM-INF:", "").split(","):
87                if "=" not in part:
88                    continue
89                kev_value_parts = part.strip().split("=")
90                stream_info[kev_value_parts[0]] = kev_value_parts[1]
91        elif line.startswith("#EXT-X-KEY:"):
92            # Extract encryption key URI from master/media playlist
93            # METHOD=NONE means no encryption, so explicitly clear the key
94            if "METHOD=NONE" in line:
95                key = None
96            elif ",URI=" in line:
97                key = line.split(",URI=")[1].strip('"')
98        elif line.startswith("#"):
99            # Ignore other extensions
100            continue
101        elif len(line) != 0:
102            filepath = line
103            if "%20" in filepath:
104                # apparently VLC manages to encode spaces in filenames
105                filepath = filepath.replace("%20", " ")
106            # replace Windows directory separators
107            filepath = filepath.replace("\\", "/")
108            playlist.append(
109                PlaylistItem(
110                    path=filepath, length=length, title=title, stream_info=stream_info, key=key
111                )
112            )
113            # reset the song variables so it doesn't use the same EXTINF more than once
114            length = None
115            title = None
116            stream_info = None
117
118    return playlist
119
120
121def parse_pls(pls_data: str) -> list[PlaylistItem]:
122    """Parse (only) filenames/urls from pls playlist file."""
123    pls_parser = configparser.ConfigParser(strict=False)
124    try:
125        pls_parser.read_string(pls_data, "playlist")
126    except configparser.Error as err:
127        raise InvalidDataError("Can't parse playlist") from err
128
129    if "playlist" not in pls_parser:
130        raise InvalidDataError("Invalid playlist")
131
132    try:
133        num_entries = pls_parser.getint("playlist", "NumberOfEntries")
134    except (configparser.NoOptionError, ValueError) as err:
135        raise InvalidDataError("Invalid NumberOfEntries in playlist") from err
136
137    playlist_section = pls_parser["playlist"]
138
139    playlist = []
140    for entry in range(1, num_entries + 1):
141        file_option = f"File{entry}"
142        if file_option not in playlist_section:
143            continue
144        itempath = playlist_section[file_option]
145        length = playlist_section.get(f"Length{entry}")
146        playlist.append(
147            PlaylistItem(
148                length=length if length and length != "-1" else None,
149                title=playlist_section.get(f"Title{entry}"),
150                path=itempath,
151            )
152        )
153    return playlist
154
155
156async def fetch_playlist(
157    mass: MusicAssistant, url: str, raise_on_hls: bool = True
158) -> list[PlaylistItem]:
159    """Parse an online m3u or pls playlist."""
160    try:
161        async with mass.http_session.get(
162            url, allow_redirects=True, timeout=ClientTimeout(total=5)
163        ) as resp:
164            try:
165                raw_data = await resp.content.read(64 * 1024)
166                # NOTE: using resp.charset is not reliable, we need to detect it ourselves
167                encoding = resp.charset or await detect_charset(raw_data)
168                playlist_data = raw_data.decode(encoding, errors="replace")
169            except (ValueError, UnicodeDecodeError) as err:
170                msg = f"Could not decode playlist {url}"
171                raise InvalidDataError(msg) from err
172    except TimeoutError as err:
173        msg = f"Timeout while fetching playlist {url}"
174        raise InvalidDataError(msg) from err
175    except client_exceptions.ClientError as err:
176        msg = f"Error while fetching playlist {url}"
177        raise InvalidDataError(msg) from err
178
179    if (
180        raise_on_hls and "#EXT-X-VERSION:" in playlist_data
181    ) or "#EXT-X-STREAM-INF:" in playlist_data:
182        raise IsHLSPlaylist
183
184    if urlparse(url).path.endswith("pls") or "[playlist]" in playlist_data:
185        playlist = parse_pls(playlist_data)
186    else:
187        playlist = parse_m3u(playlist_data)
188
189    if not playlist:
190        msg = f"Empty playlist {url}"
191        raise InvalidDataError(msg)
192
193    return playlist
194