music-assistant-server

23.5 KB•PY

fades.py

23.5 KB • 610 lines • python

1"""Smart Fades - Audio fade implementations."""
2
3from __future__ import annotations
4
5import logging
6from abc import ABC, abstractmethod
7from typing import TYPE_CHECKING
8
9import aiofiles
10import numpy as np
11import numpy.typing as npt
12import shortuuid
13
14from music_assistant.constants import VERBOSE_LOG_LEVEL
15from music_assistant.controllers.streams.smart_fades.filters import (
16    CrossfadeFilter,
17    Filter,
18    FrequencySweepFilter,
19    TimeStretchFilter,
20    TrimFilter,
21)
22from music_assistant.helpers.process import communicate
23from music_assistant.helpers.util import remove_file
24from music_assistant.models.smart_fades import (
25    SmartFadesAnalysis,
26)
27
28if TYPE_CHECKING:
29    from music_assistant_models.media_items import AudioFormat
30
31SMART_CROSSFADE_DURATION = 45
32
33
34class SmartFade(ABC):
35    """Abstract base class for Smart Fades."""
36
37    filters: list[Filter]
38
39    def __init__(self, logger: logging.Logger) -> None:
40        """Initialize SmartFade base class."""
41        self.filters = []
42        self.logger = logger
43
44    @abstractmethod
45    def _build(self) -> None:
46        """Build the smart fades filter chain."""
47        ...
48
49    def _get_ffmpeg_filters(
50        self,
51        input_fadein_label: str = "[1]",
52        input_fadeout_label: str = "[0]",
53    ) -> list[str]:
54        """Get FFmpeg filters for smart fades."""
55        if not self.filters:
56            self._build()
57        filters = []
58        _cur_fadein_label = input_fadein_label
59        _cur_fadeout_label = input_fadeout_label
60        for audio_filter in self.filters:
61            filter_strings = audio_filter.apply(_cur_fadein_label, _cur_fadeout_label)
62            filters.extend(filter_strings)
63            _cur_fadein_label = f"[{audio_filter.output_fadein_label}]"
64            _cur_fadeout_label = f"[{audio_filter.output_fadeout_label}]"
65        return filters
66
67    async def apply(
68        self,
69        fade_out_part: bytes,
70        fade_in_part: bytes,
71        pcm_format: AudioFormat,
72    ) -> bytes:
73        """Apply the smart fade to the given PCM audio parts."""
74        # Write the fade_out_part to a temporary file
75        fadeout_filename = f"/tmp/{shortuuid.random(20)}.pcm"  # noqa: S108
76        async with aiofiles.open(fadeout_filename, "wb") as outfile:
77            await outfile.write(fade_out_part)
78
79        args = [
80            "ffmpeg",
81            "-hide_banner",
82            "-loglevel",
83            "error",
84            # Input 1: fadeout part (as file)
85            "-acodec",
86            pcm_format.content_type.name.lower(),  # e.g., "pcm_f32le" not just "f32le"
87            "-ac",
88            str(pcm_format.channels),
89            "-ar",
90            str(pcm_format.sample_rate),
91            "-channel_layout",
92            "mono" if pcm_format.channels == 1 else "stereo",
93            "-f",
94            pcm_format.content_type.value,
95            "-i",
96            fadeout_filename,
97            # Input 2: fade_in part (stdin)
98            "-acodec",
99            pcm_format.content_type.name.lower(),
100            "-ac",
101            str(pcm_format.channels),
102            "-ar",
103            str(pcm_format.sample_rate),
104            "-channel_layout",
105            "mono" if pcm_format.channels == 1 else "stereo",
106            "-f",
107            pcm_format.content_type.value,
108            "-i",
109            "-",
110        ]
111        smart_fade_filters = self._get_ffmpeg_filters()
112        self.logger.debug(
113            "Applying smartfade: %s",
114            self,
115        )
116        args.extend(
117            [
118                "-filter_complex",
119                ";".join(smart_fade_filters),
120                # Output format specification - must match input codec format
121                "-acodec",
122                pcm_format.content_type.name.lower(),
123                "-ac",
124                str(pcm_format.channels),
125                "-ar",
126                str(pcm_format.sample_rate),
127                "-channel_layout",
128                "mono" if pcm_format.channels == 1 else "stereo",
129                "-f",
130                pcm_format.content_type.value,
131                "-",
132            ]
133        )
134        self.logger.log(VERBOSE_LOG_LEVEL, "FFmpeg command args: %s", " ".join(args))
135
136        try:
137            # Execute the enhanced smart fade with full buffer
138            _, raw_crossfade_output, stderr = await communicate(args, fade_in_part)
139
140            if raw_crossfade_output:
141                return raw_crossfade_output
142            stderr_msg = stderr.decode() if stderr else "(no stderr output)"
143            raise RuntimeError(f"Smart crossfade failed. FFmpeg stderr: {stderr_msg}")
144        finally:
145            # Always cleanup temp file, even if ffmpeg fails
146            await remove_file(fadeout_filename)
147
148    def __repr__(self) -> str:
149        """Return string representation of SmartFade showing the filter chain."""
150        if not self.filters:
151            return f"<{self.__class__.__name__}: 0 filters>"
152
153        chain = " â ".join(repr(f) for f in self.filters)
154        return f"<{self.__class__.__name__}: {len(self.filters)} filters> {chain}"
155
156
157class SmartCrossFade(SmartFade):
158    """Smart fades class that implements a Smart Fade mode."""
159
160    # Only apply time stretching if BPM difference is < this %
161    time_stretch_bpm_percentage_threshold: float = 5.0
162
163    def __init__(
164        self,
165        logger: logging.Logger,
166        fade_out_analysis: SmartFadesAnalysis,
167        fade_in_analysis: SmartFadesAnalysis,
168    ) -> None:
169        """Initialize SmartFades with analysis data.
170
171        Args:
172            fade_out_analysis: Analysis data for the outgoing track
173            fade_in_analysis: Analysis data for the incoming track
174            logger: Optional logger for debug output
175        """
176        self.fade_out_analysis = fade_out_analysis
177        self.fade_in_analysis = fade_in_analysis
178        super().__init__(logger)
179
180    def _build(self) -> None:
181        """Build the smart fades filter chain."""
182        # Calculate tempo factor for time stretching
183        bpm_ratio = self.fade_in_analysis.bpm / self.fade_out_analysis.bpm
184        bpm_diff_percent = abs(1.0 - bpm_ratio) * 100
185
186        # Extrapolate downbeats for better bar calculation
187        self.extrapolated_fadeout_downbeats = extrapolate_downbeats(
188            self.fade_out_analysis.downbeats,
189            tempo_factor=1.0,
190            bpm=self.fade_out_analysis.bpm,
191        )
192
193        # Additional verbose logging to debug rare failures
194        self.logger.log(
195            VERBOSE_LOG_LEVEL,
196            "SmartCrossFade build: fade_out: %s, fade_in: %s",
197            self.fade_out_analysis,
198            self.fade_in_analysis,
199        )
200
201        # Calculate optimal crossfade bars that fit in available buffer
202        crossfade_bars = self._calculate_optimal_crossfade_bars()
203
204        # Calculate beat positions for the selected bar count
205        fadein_start_pos = self._calculate_optimal_fade_timing(crossfade_bars)
206
207        # Calculate initial crossfade duration (may be adjusted later for downbeat alignment)
208        crossfade_duration = self._calculate_crossfade_duration(crossfade_bars=crossfade_bars)
209
210        # Add time stretch filter if needed
211        if (
212            0.1 < bpm_diff_percent <= self.time_stretch_bpm_percentage_threshold
213            and crossfade_bars > 4
214        ):
215            self.filters.append(TimeStretchFilter(logger=self.logger, stretch_ratio=bpm_ratio))
216            # Re-extrapolate downbeats with actual tempo factor for time-stretched audio
217            self.extrapolated_fadeout_downbeats = extrapolate_downbeats(
218                self.fade_out_analysis.downbeats,
219                tempo_factor=bpm_ratio,
220                bpm=self.fade_out_analysis.bpm,
221            )
222
223        # Check if we would have enough audio after beat alignment for the crossfade
224        if fadein_start_pos and fadein_start_pos + crossfade_duration <= SMART_CROSSFADE_DURATION:
225            self.filters.append(TrimFilter(logger=self.logger, fadein_start_pos=fadein_start_pos))
226        else:
227            self.logger.log(
228                VERBOSE_LOG_LEVEL,
229                "Skipping beat alignment: not enough audio after trim (%.1fs + %.1fs > %.1fs)",
230                fadein_start_pos,
231                crossfade_duration,
232                SMART_CROSSFADE_DURATION,
233            )
234
235        # Adjust crossfade duration to align with outgoing track's downbeats
236        crossfade_duration = self._adjust_crossfade_to_downbeats(
237            crossfade_duration=crossfade_duration,
238            fadein_start_pos=fadein_start_pos,
239        )
240
241        # 90 BPM -> 1500Hz, 140 BPM -> 2500Hz
242        avg_bpm = (self.fade_out_analysis.bpm + self.fade_in_analysis.bpm) / 2
243        crossover_freq = int(np.clip(1500 + (avg_bpm - 90) * 20, 1500, 2500))
244
245        # Adjust for BPM mismatch
246        if abs(bpm_ratio - 1.0) > 0.3:
247            crossover_freq = int(crossover_freq * 0.85)
248
249        # For shorter fades, use exp/exp curves to avoid abruptness
250        if crossfade_bars < 8:
251            fadeout_curve = "exponential"
252            fadein_curve = "exponential"
253        # For long fades, use log/linear curves
254        else:
255            # Use logarithmic curve to give the next track more space
256            fadeout_curve = "logarithmic"
257            # Use linear curve for transition, predictable and not too abrupt
258            fadein_curve = "linear"
259
260        # Create lowpass filter on the outgoing track (unfiltered â low-pass)
261        # Extended lowpass effect to gradually remove bass frequencies
262        fadeout_eq_duration = min(max(crossfade_duration * 2.5, 8.0), SMART_CROSSFADE_DURATION)
263        # The crossfade always happens at the END of the buffer
264        fadeout_eq_start = max(0, SMART_CROSSFADE_DURATION - fadeout_eq_duration)
265        fadeout_sweep = FrequencySweepFilter(
266            logger=self.logger,
267            sweep_type="lowpass",
268            target_freq=crossover_freq,
269            duration=fadeout_eq_duration,
270            start_time=fadeout_eq_start,
271            sweep_direction="fade_in",
272            poles=1,
273            curve_type=fadeout_curve,
274            stream_type="fadeout",
275        )
276        self.filters.append(fadeout_sweep)
277
278        # Create high pass filter on the incoming track (high-pass â unfiltered)
279        # Quicker highpass removal to avoid lingering vocals after crossfade
280        fadein_eq_duration = crossfade_duration / 1.5
281        fadein_sweep = FrequencySweepFilter(
282            logger=self.logger,
283            sweep_type="highpass",
284            target_freq=crossover_freq,
285            duration=fadein_eq_duration,
286            start_time=0,
287            sweep_direction="fade_out",
288            poles=1,
289            curve_type=fadein_curve,
290            stream_type="fadein",
291        )
292        self.filters.append(fadein_sweep)
293
294        # Add final crossfade filter
295        crossfade_filter = CrossfadeFilter(
296            logger=self.logger, crossfade_duration=crossfade_duration
297        )
298        self.filters.append(crossfade_filter)
299
300    def _calculate_crossfade_duration(self, crossfade_bars: int) -> float:
301        """Calculate final crossfade duration based on musical bars and BPM."""
302        # Calculate crossfade duration based on incoming track's BPM
303        beats_per_bar = 4
304        seconds_per_beat = 60.0 / self.fade_in_analysis.bpm
305        musical_duration = crossfade_bars * beats_per_bar * seconds_per_beat
306
307        # Apply buffer constraint
308        actual_duration = min(musical_duration, SMART_CROSSFADE_DURATION)
309
310        # Log if we had to constrain the duration
311        if musical_duration > SMART_CROSSFADE_DURATION:
312            self.logger.log(
313                VERBOSE_LOG_LEVEL,
314                "Constraining crossfade duration from %.1fs to %.1fs (buffer limit)",
315                musical_duration,
316                actual_duration,
317            )
318
319        return actual_duration
320
321    def _calculate_optimal_crossfade_bars(self) -> int:
322        """Calculate optimal crossfade bars that fit in available buffer."""
323        bpm_in = self.fade_in_analysis.bpm
324        bpm_out = self.fade_out_analysis.bpm
325        bpm_diff_percent = abs(1.0 - bpm_in / bpm_out) * 100
326
327        # Calculate ideal bars based on BPM compatibility
328        ideal_bars = 10 if bpm_diff_percent <= self.time_stretch_bpm_percentage_threshold else 6
329
330        # Reduce bars until it fits in the fadein buffer
331        for bars in [ideal_bars, 8, 6, 4, 2, 1]:
332            if bars > ideal_bars:
333                continue
334
335            fadein_start_pos = self._calculate_optimal_fade_timing(bars)
336            if fadein_start_pos is None:
337                continue
338
339            # Calculate what the duration would be
340            test_duration = self._calculate_crossfade_duration(crossfade_bars=bars)
341
342            # Check if it fits in fadein buffer
343            fadein_buffer = SMART_CROSSFADE_DURATION - fadein_start_pos
344            if test_duration <= fadein_buffer:
345                if bars < ideal_bars:
346                    self.logger.log(
347                        VERBOSE_LOG_LEVEL,
348                        "Reduced crossfade from %d to %d bars (fadein buffer=%.1fs, needed=%.1fs)",
349                        ideal_bars,
350                        bars,
351                        fadein_buffer,
352                        test_duration,
353                    )
354                return bars
355
356        # Fall back to 1 bar if nothing else fits
357        return 1
358
359    def _calculate_optimal_fade_timing(self, crossfade_bars: int) -> float | None:
360        """Calculate beat positions for alignment."""
361        beats_per_bar = 4
362
363        def calculate_beat_positions(
364            fade_out_beats: npt.NDArray[np.float64],
365            fade_in_beats: npt.NDArray[np.float64],
366            num_beats: int,
367        ) -> float | None:
368            """Calculate start positions from beat arrays."""
369            if len(fade_out_beats) < num_beats or len(fade_in_beats) < num_beats:
370                return None
371
372            fade_in_slice = fade_in_beats[:num_beats]
373            return float(fade_in_slice[0])
374
375        # Try downbeats first for most musical timing
376        downbeat_positions = calculate_beat_positions(
377            self.extrapolated_fadeout_downbeats, self.fade_in_analysis.downbeats, crossfade_bars
378        )
379        if downbeat_positions:
380            return downbeat_positions
381
382        # Try regular beats if downbeats insufficient
383        required_beats = crossfade_bars * beats_per_bar
384        beat_positions = calculate_beat_positions(
385            self.fade_out_analysis.beats, self.fade_in_analysis.beats, required_beats
386        )
387        if beat_positions:
388            return beat_positions
389
390        # Fallback: No beat alignment possible
391        self.logger.log(VERBOSE_LOG_LEVEL, "No beat alignment possible (insufficient beats)")
392        return None
393
394    def _adjust_crossfade_to_downbeats(
395        self,
396        crossfade_duration: float,
397        fadein_start_pos: float | None,
398    ) -> float:
399        """Adjust crossfade duration to align with outgoing track's downbeats."""
400        # If we don't have downbeats or beat alignment is disabled, return original duration
401        if len(self.extrapolated_fadeout_downbeats) == 0 or fadein_start_pos is None:
402            return crossfade_duration
403
404        # Calculate where the crossfade would start in the buffer
405        ideal_start_pos = SMART_CROSSFADE_DURATION - crossfade_duration
406
407        # Debug logging
408        self.logger.log(
409            VERBOSE_LOG_LEVEL,
410            "Downbeat adjustment - ideal_start=%.2fs (buffer=%.1fs - crossfade=%.2fs), "
411            "fadein_start=%.2fs",
412            ideal_start_pos,
413            SMART_CROSSFADE_DURATION,
414            crossfade_duration,
415            fadein_start_pos,
416        )
417
418        # Find the closest downbeats (earlier and later)
419        earlier_downbeat = None
420        later_downbeat = None
421
422        for downbeat in self.extrapolated_fadeout_downbeats:
423            if downbeat <= ideal_start_pos:
424                earlier_downbeat = downbeat
425            elif downbeat > ideal_start_pos and later_downbeat is None:
426                later_downbeat = downbeat
427                break
428
429        # Try earlier downbeat first (longer crossfade)
430        if earlier_downbeat is not None:
431            adjusted_duration = float(SMART_CROSSFADE_DURATION - earlier_downbeat)
432            if fadein_start_pos + adjusted_duration <= SMART_CROSSFADE_DURATION:
433                if abs(adjusted_duration - crossfade_duration) > 0.1:
434                    self.logger.log(
435                        VERBOSE_LOG_LEVEL,
436                        "Adjusted crossfade duration from %.2fs to %.2fs to align with "
437                        "downbeat at %.2fs (earlier)",
438                        crossfade_duration,
439                        adjusted_duration,
440                        earlier_downbeat,
441                    )
442                return adjusted_duration
443
444        # Try later downbeat (shorter crossfade)
445        if later_downbeat is not None:
446            adjusted_duration = float(SMART_CROSSFADE_DURATION - later_downbeat)
447            if fadein_start_pos + adjusted_duration <= SMART_CROSSFADE_DURATION:
448                if abs(adjusted_duration - crossfade_duration) > 0.1:
449                    self.logger.log(
450                        VERBOSE_LOG_LEVEL,
451                        "Adjusted crossfade duration from %.2fs to %.2fs to align with "
452                        "downbeat at %.2fs (later)",
453                        crossfade_duration,
454                        adjusted_duration,
455                        later_downbeat,
456                    )
457                return adjusted_duration
458
459        # If no suitable downbeat found, return original duration
460        self.logger.log(
461            VERBOSE_LOG_LEVEL,
462            "Could not adjust crossfade duration to downbeats, using original %.2fs",
463            crossfade_duration,
464        )
465        return crossfade_duration
466
467
468class StandardCrossFade(SmartFade):
469    """Standard crossfade class that implements a standard crossfade mode."""
470
471    def __init__(self, logger: logging.Logger, crossfade_duration: float = 10.0) -> None:
472        """Initialize StandardCrossFade with crossfade duration."""
473        self.crossfade_duration = crossfade_duration
474        super().__init__(logger)
475
476    def _build(self) -> None:
477        """Build the standard crossfade filter chain."""
478        self.filters = [
479            CrossfadeFilter(logger=self.logger, crossfade_duration=self.crossfade_duration),
480        ]
481
482    async def apply(
483        self, fade_out_part: bytes, fade_in_part: bytes, pcm_format: AudioFormat
484    ) -> bytes:
485        """Apply the standard crossfade to the given PCM audio parts."""
486        # We need to override the default apply here, since standard crossfade only needs to be
487        # applied to the overlapping parts, not the full buffers.
488        crossfade_size = int(pcm_format.pcm_sample_size * self.crossfade_duration)
489        # Pre-crossfade: outgoing track minus the crossfaded portion
490        pre_crossfade = fade_out_part[:-crossfade_size]
491        # Post-crossfade: incoming track minus the crossfaded portion
492        post_crossfade = fade_in_part[crossfade_size:]
493        # Adjust portions to exact crossfade size
494        adjusted_fade_in_part = fade_in_part[:crossfade_size]
495        adjusted_fade_out_part = fade_out_part[-crossfade_size:]
496        # Adjust the duration to match actual sizes
497        self.crossfade_duration = min(
498            len(adjusted_fade_in_part) / pcm_format.pcm_sample_size,
499            len(adjusted_fade_out_part) / pcm_format.pcm_sample_size,
500        )
501        # Crossfaded portion: user's configured duration
502        crossfaded_section = await super().apply(
503            adjusted_fade_out_part, adjusted_fade_in_part, pcm_format
504        )
505        # Full result: everything concatenated
506        return pre_crossfade + crossfaded_section + post_crossfade
507
508
509# HELPER METHODS
510def get_bpm_diff_percentage(bpm1: float, bpm2: float) -> float:
511    """Calculate BPM difference percentage between two BPM values."""
512    return abs(1.0 - bpm1 / bpm2) * 100
513
514
515def extrapolate_downbeats(
516    downbeats: npt.NDArray[np.float64],
517    tempo_factor: float,
518    buffer_size: float = SMART_CROSSFADE_DURATION,
519    bpm: float | None = None,
520) -> npt.NDArray[np.float64]:
521    """Extrapolate downbeats based on actual intervals when detection is incomplete.
522
523    This is needed when we want to perform beat alignment in an 'atmospheric' outro
524    that does not have any detected downbeats.
525
526    Args:
527        downbeats: Array of detected downbeat positions in seconds
528        tempo_factor: Tempo adjustment factor for time stretching
529        buffer_size: Maximum buffer size in seconds
530        bpm: Optional BPM for validation when extrapolating with only 2 downbeats
531    """
532    # Handle case with exactly 2 downbeats (with BPM validation)
533    if len(downbeats) == 2 and bpm is not None:
534        interval = float(downbeats[1] - downbeats[0])
535
536        # Expected interval for this BPM (assuming 4/4 time signature)
537        expected_interval = (60.0 / bpm) * 4
538
539        # Only extrapolate if interval matches BPM within 15% tolerance
540        if abs(interval - expected_interval) / expected_interval < 0.15:
541            # Adjust detected downbeats for time stretching first
542            adjusted_downbeats = downbeats / tempo_factor
543            last_downbeat = adjusted_downbeats[-1]
544
545            # If the last downbeat is close to the buffer end, no extrapolation needed
546            if last_downbeat >= buffer_size - 5:
547                return adjusted_downbeats
548
549            # Adjust the interval for time stretching
550            adjusted_interval = interval / tempo_factor
551
552            # Extrapolate forward from last adjusted downbeat using adjusted interval
553            extrapolated = []
554            current_pos = last_downbeat + adjusted_interval
555            max_extrapolation_distance = 125.0  # Don't extrapolate more than 25s
556
557            while (
558                current_pos < buffer_size
559                and (current_pos - last_downbeat) <= max_extrapolation_distance
560            ):
561                extrapolated.append(current_pos)
562                current_pos += adjusted_interval
563
564            if extrapolated:
565                # Combine adjusted detected downbeats and extrapolated downbeats
566                return np.concatenate([adjusted_downbeats, np.array(extrapolated)])
567
568            return adjusted_downbeats
569        # else: interval doesn't match BPM, fall through to return original
570
571    if len(downbeats) < 2:
572        # Need at least 2 downbeats to extrapolate
573        return downbeats / tempo_factor
574
575    # Adjust detected downbeats for time stretching first
576    adjusted_downbeats = downbeats / tempo_factor
577    last_downbeat = adjusted_downbeats[-1]
578
579    # If the last downbeat is close to the buffer end, no extrapolation needed
580    if last_downbeat >= buffer_size - 5:
581        return adjusted_downbeats
582
583    # Calculate intervals from ORIGINAL downbeats (before time stretching)
584    intervals = np.diff(downbeats)
585    median_interval = float(np.median(intervals))
586    std_interval = float(np.std(intervals))
587
588    # Only extrapolate if intervals are consistent (low standard deviation)
589    if std_interval > 0.2:
590        return adjusted_downbeats
591
592    # Adjust the interval for time stretching
593    # When slowing down (tempo_factor < 1.0), intervals get longer
594    adjusted_interval = median_interval / tempo_factor
595
596    # Extrapolate forward from last adjusted downbeat using adjusted interval
597    extrapolated = []
598    current_pos = last_downbeat + adjusted_interval
599    max_extrapolation_distance = 25.0  # Don't extrapolate more than 25s
600
601    while current_pos < buffer_size and (current_pos - last_downbeat) <= max_extrapolation_distance:
602        extrapolated.append(current_pos)
603        current_pos += adjusted_interval
604
605    if extrapolated:
606        # Combine adjusted detected downbeats and extrapolated downbeats
607        return np.concatenate([adjusted_downbeats, np.array(extrapolated)])
608
609    return adjusted_downbeats
610

1"""Smart Fades - Audio fade implementations.""" 2 3from __future__ import annotations 4 5import logging 6from abc import ABC, abstractmethod 7from typing import TYPE_CHECKING 8 9import aiofiles 10import numpy as np 11import numpy.typing as npt 12import shortuuid 13 14from music_assistant.constants import VERBOSE_LOG_LEVEL 15from music_assistant.controllers.streams.smart_fades.filters import ( 16 CrossfadeFilter, 17 Filter, 18 FrequencySweepFilter, 19 TimeStretchFilter, 20 TrimFilter, 21) 22from music_assistant.helpers.process import communicate 23from music_assistant.helpers.util import remove_file 24from music_assistant.models.smart_fades import ( 25 SmartFadesAnalysis, 26) 27 28if TYPE_CHECKING: 29 from music_assistant_models.media_items import AudioFormat 30 31SMART_CROSSFADE_DURATION = 45 32 33 34class SmartFade(ABC): 35 """Abstract base class for Smart Fades.""" 36 37 filters: list[Filter] 38 39 def __init__(self, logger: logging.Logger) -> None: 40 """Initialize SmartFade base class.""" 41 self.filters = [] 42 self.logger = logger 43 44 @abstractmethod 45 def _build(self) -> None: 46 """Build the smart fades filter chain.""" 47 ... 48 49 def _get_ffmpeg_filters( 50 self, 51 input_fadein_label: str = "[1]", 52 input_fadeout_label: str = "[0]", 53 ) -> list[str]: 54 """Get FFmpeg filters for smart fades.""" 55 if not self.filters: 56 self._build() 57 filters = [] 58 _cur_fadein_label = input_fadein_label 59 _cur_fadeout_label = input_fadeout_label 60 for audio_filter in self.filters: 61 filter_strings = audio_filter.apply(_cur_fadein_label, _cur_fadeout_label) 62 filters.extend(filter_strings) 63 _cur_fadein_label = f"[{audio_filter.output_fadein_label}]" 64 _cur_fadeout_label = f"[{audio_filter.output_fadeout_label}]" 65 return filters 66 67 async def apply( 68 self, 69 fade_out_part: bytes, 70 fade_in_part: bytes, 71 pcm_format: AudioFormat, 72 ) -> bytes: 73 """Apply the smart fade to the given PCM audio parts.""" 74 # Write the fade_out_part to a temporary file 75 fadeout_filename = f"/tmp/{shortuuid.random(20)}.pcm" # noqa: S108 76 async with aiofiles.open(fadeout_filename, "wb") as outfile: 77 await outfile.write(fade_out_part) 78 79 args = [ 80 "ffmpeg", 81 "-hide_banner", 82 "-loglevel", 83 "error", 84 # Input 1: fadeout part (as file) 85 "-acodec", 86 pcm_format.content_type.name.lower(), # e.g., "pcm_f32le" not just "f32le" 87 "-ac", 88 str(pcm_format.channels), 89 "-ar", 90 str(pcm_format.sample_rate), 91 "-channel_layout", 92 "mono" if pcm_format.channels == 1 else "stereo", 93 "-f", 94 pcm_format.content_type.value, 95 "-i", 96 fadeout_filename, 97 # Input 2: fade_in part (stdin) 98 "-acodec", 99 pcm_format.content_type.name.lower(), 100 "-ac", 101 str(pcm_format.channels), 102 "-ar", 103 str(pcm_format.sample_rate), 104 "-channel_layout", 105 "mono" if pcm_format.channels == 1 else "stereo", 106 "-f", 107 pcm_format.content_type.value, 108 "-i", 109 "-", 110 ] 111 smart_fade_filters = self._get_ffmpeg_filters() 112 self.logger.debug( 113 "Applying smartfade: %s", 114 self, 115 ) 116 args.extend( 117 [ 118 "-filter_complex", 119 ";".join(smart_fade_filters), 120 # Output format specification - must match input codec format 121 "-acodec", 122 pcm_format.content_type.name.lower(), 123 "-ac", 124 str(pcm_format.channels), 125 "-ar", 126 str(pcm_format.sample_rate), 127 "-channel_layout", 128 "mono" if pcm_format.channels == 1 else "stereo", 129 "-f", 130 pcm_format.content_type.value, 131 "-", 132 ] 133 ) 134 self.logger.log(VERBOSE_LOG_LEVEL, "FFmpeg command args: %s", " ".join(args)) 135 136 try: 137 # Execute the enhanced smart fade with full buffer 138 _, raw_crossfade_output, stderr = await communicate(args, fade_in_part) 139 140 if raw_crossfade_output: 141 return raw_crossfade_output 142 stderr_msg = stderr.decode() if stderr else "(no stderr output)" 143 raise RuntimeError(f"Smart crossfade failed. FFmpeg stderr: {stderr_msg}") 144 finally: 145 # Always cleanup temp file, even if ffmpeg fails 146 await remove_file(fadeout_filename) 147 148 def __repr__(self) -> str: 149 """Return string representation of SmartFade showing the filter chain.""" 150 if not self.filters: 151 return f"<{self.__class__.__name__}: 0 filters>" 152 153 chain = " â ".join(repr(f) for f in self.filters) 154 return f"<{self.__class__.__name__}: {len(self.filters)} filters> {chain}" 155 156 157class SmartCrossFade(SmartFade): 158 """Smart fades class that implements a Smart Fade mode.""" 159 160 # Only apply time stretching if BPM difference is < this % 161 time_stretch_bpm_percentage_threshold: float = 5.0 162 163 def __init__( 164 self, 165 logger: logging.Logger, 166 fade_out_analysis: SmartFadesAnalysis, 167 fade_in_analysis: SmartFadesAnalysis, 168 ) -> None: 169 """Initialize SmartFades with analysis data. 170 171 Args: 172 fade_out_analysis: Analysis data for the outgoing track 173 fade_in_analysis: Analysis data for the incoming track 174 logger: Optional logger for debug output 175 """ 176 self.fade_out_analysis = fade_out_analysis 177 self.fade_in_analysis = fade_in_analysis 178 super().__init__(logger) 179 180 def _build(self) -> None: 181 """Build the smart fades filter chain.""" 182 # Calculate tempo factor for time stretching 183 bpm_ratio = self.fade_in_analysis.bpm / self.fade_out_analysis.bpm 184 bpm_diff_percent = abs(1.0 - bpm_ratio) * 100 185 186 # Extrapolate downbeats for better bar calculation 187 self.extrapolated_fadeout_downbeats = extrapolate_downbeats( 188 self.fade_out_analysis.downbeats, 189 tempo_factor=1.0, 190 bpm=self.fade_out_analysis.bpm, 191 ) 192 193 # Additional verbose logging to debug rare failures 194 self.logger.log( 195 VERBOSE_LOG_LEVEL, 196 "SmartCrossFade build: fade_out: %s, fade_in: %s", 197 self.fade_out_analysis, 198 self.fade_in_analysis, 199 ) 200 201 # Calculate optimal crossfade bars that fit in available buffer 202 crossfade_bars = self._calculate_optimal_crossfade_bars() 203 204 # Calculate beat positions for the selected bar count 205 fadein_start_pos = self._calculate_optimal_fade_timing(crossfade_bars) 206 207 # Calculate initial crossfade duration (may be adjusted later for downbeat alignment) 208 crossfade_duration = self._calculate_crossfade_duration(crossfade_bars=crossfade_bars) 209 210 # Add time stretch filter if needed 211 if ( 212 0.1 < bpm_diff_percent <= self.time_stretch_bpm_percentage_threshold 213 and crossfade_bars > 4 214 ): 215 self.filters.append(TimeStretchFilter(logger=self.logger, stretch_ratio=bpm_ratio)) 216 # Re-extrapolate downbeats with actual tempo factor for time-stretched audio 217 self.extrapolated_fadeout_downbeats = extrapolate_downbeats( 218 self.fade_out_analysis.downbeats, 219 tempo_factor=bpm_ratio, 220 bpm=self.fade_out_analysis.bpm, 221 ) 222 223 # Check if we would have enough audio after beat alignment for the crossfade 224 if fadein_start_pos and fadein_start_pos + crossfade_duration <= SMART_CROSSFADE_DURATION: 225 self.filters.append(TrimFilter(logger=self.logger, fadein_start_pos=fadein_start_pos)) 226 else: 227 self.logger.log( 228 VERBOSE_LOG_LEVEL, 229 "Skipping beat alignment: not enough audio after trim (%.1fs + %.1fs > %.1fs)", 230 fadein_start_pos, 231 crossfade_duration, 232 SMART_CROSSFADE_DURATION, 233 ) 234 235 # Adjust crossfade duration to align with outgoing track's downbeats 236 crossfade_duration = self._adjust_crossfade_to_downbeats( 237 crossfade_duration=crossfade_duration, 238 fadein_start_pos=fadein_start_pos, 239 ) 240 241 # 90 BPM -> 1500Hz, 140 BPM -> 2500Hz 242 avg_bpm = (self.fade_out_analysis.bpm + self.fade_in_analysis.bpm) / 2 243 crossover_freq = int(np.clip(1500 + (avg_bpm - 90) * 20, 1500, 2500)) 244 245 # Adjust for BPM mismatch 246 if abs(bpm_ratio - 1.0) > 0.3: 247 crossover_freq = int(crossover_freq * 0.85) 248 249 # For shorter fades, use exp/exp curves to avoid abruptness 250 if crossfade_bars < 8: 251 fadeout_curve = "exponential" 252 fadein_curve = "exponential" 253 # For long fades, use log/linear curves 254 else: 255 # Use logarithmic curve to give the next track more space 256 fadeout_curve = "logarithmic" 257 # Use linear curve for transition, predictable and not too abrupt 258 fadein_curve = "linear" 259 260 # Create lowpass filter on the outgoing track (unfiltered â low-pass) 261 # Extended lowpass effect to gradually remove bass frequencies 262 fadeout_eq_duration = min(max(crossfade_duration * 2.5, 8.0), SMART_CROSSFADE_DURATION) 263 # The crossfade always happens at the END of the buffer 264 fadeout_eq_start = max(0, SMART_CROSSFADE_DURATION - fadeout_eq_duration) 265 fadeout_sweep = FrequencySweepFilter( 266 logger=self.logger, 267 sweep_type="lowpass", 268 target_freq=crossover_freq, 269 duration=fadeout_eq_duration, 270 start_time=fadeout_eq_start, 271 sweep_direction="fade_in", 272 poles=1, 273 curve_type=fadeout_curve, 274 stream_type="fadeout", 275 ) 276 self.filters.append(fadeout_sweep) 277 278 # Create high pass filter on the incoming track (high-pass â unfiltered) 279 # Quicker highpass removal to avoid lingering vocals after crossfade 280 fadein_eq_duration = crossfade_duration / 1.5 281 fadein_sweep = FrequencySweepFilter( 282 logger=self.logger, 283 sweep_type="highpass", 284 target_freq=crossover_freq, 285 duration=fadein_eq_duration, 286 start_time=0, 287 sweep_direction="fade_out", 288 poles=1, 289 curve_type=fadein_curve, 290 stream_type="fadein", 291 ) 292 self.filters.append(fadein_sweep) 293 294 # Add final crossfade filter 295 crossfade_filter = CrossfadeFilter( 296 logger=self.logger, crossfade_duration=crossfade_duration 297 ) 298 self.filters.append(crossfade_filter) 299 300 def _calculate_crossfade_duration(self, crossfade_bars: int) -> float: 301 """Calculate final crossfade duration based on musical bars and BPM.""" 302 # Calculate crossfade duration based on incoming track's BPM 303 beats_per_bar = 4 304 seconds_per_beat = 60.0 / self.fade_in_analysis.bpm 305 musical_duration = crossfade_bars * beats_per_bar * seconds_per_beat 306 307 # Apply buffer constraint 308 actual_duration = min(musical_duration, SMART_CROSSFADE_DURATION) 309 310 # Log if we had to constrain the duration 311 if musical_duration > SMART_CROSSFADE_DURATION: 312 self.logger.log( 313 VERBOSE_LOG_LEVEL, 314 "Constraining crossfade duration from %.1fs to %.1fs (buffer limit)", 315 musical_duration, 316 actual_duration, 317 ) 318 319 return actual_duration 320 321 def _calculate_optimal_crossfade_bars(self) -> int: 322 """Calculate optimal crossfade bars that fit in available buffer.""" 323 bpm_in = self.fade_in_analysis.bpm 324 bpm_out = self.fade_out_analysis.bpm 325 bpm_diff_percent = abs(1.0 - bpm_in / bpm_out) * 100 326 327 # Calculate ideal bars based on BPM compatibility 328 ideal_bars = 10 if bpm_diff_percent <= self.time_stretch_bpm_percentage_threshold else 6 329 330 # Reduce bars until it fits in the fadein buffer 331 for bars in [ideal_bars, 8, 6, 4, 2, 1]: 332 if bars > ideal_bars: 333 continue 334 335 fadein_start_pos = self._calculate_optimal_fade_timing(bars) 336 if fadein_start_pos is None: 337 continue 338 339 # Calculate what the duration would be 340 test_duration = self._calculate_crossfade_duration(crossfade_bars=bars) 341 342 # Check if it fits in fadein buffer 343 fadein_buffer = SMART_CROSSFADE_DURATION - fadein_start_pos 344 if test_duration <= fadein_buffer: 345 if bars < ideal_bars: 346 self.logger.log( 347 VERBOSE_LOG_LEVEL, 348 "Reduced crossfade from %d to %d bars (fadein buffer=%.1fs, needed=%.1fs)", 349 ideal_bars, 350 bars, 351 fadein_buffer, 352 test_duration, 353 ) 354 return bars 355 356 # Fall back to 1 bar if nothing else fits 357 return 1 358 359 def _calculate_optimal_fade_timing(self, crossfade_bars: int) -> float | None: 360 """Calculate beat positions for alignment.""" 361 beats_per_bar = 4 362 363 def calculate_beat_positions( 364 fade_out_beats: npt.NDArray[np.float64], 365 fade_in_beats: npt.NDArray[np.float64], 366 num_beats: int, 367 ) -> float | None: 368 """Calculate start positions from beat arrays.""" 369 if len(fade_out_beats) < num_beats or len(fade_in_beats) < num_beats: 370 return None 371 372 fade_in_slice = fade_in_beats[:num_beats] 373 return float(fade_in_slice[0]) 374 375 # Try downbeats first for most musical timing 376 downbeat_positions = calculate_beat_positions( 377 self.extrapolated_fadeout_downbeats, self.fade_in_analysis.downbeats, crossfade_bars 378 ) 379 if downbeat_positions: 380 return downbeat_positions 381 382 # Try regular beats if downbeats insufficient 383 required_beats = crossfade_bars * beats_per_bar 384 beat_positions = calculate_beat_positions( 385 self.fade_out_analysis.beats, self.fade_in_analysis.beats, required_beats 386 ) 387 if beat_positions: 388 return beat_positions 389 390 # Fallback: No beat alignment possible 391 self.logger.log(VERBOSE_LOG_LEVEL, "No beat alignment possible (insufficient beats)") 392 return None 393 394 def _adjust_crossfade_to_downbeats( 395 self, 396 crossfade_duration: float, 397 fadein_start_pos: float | None, 398 ) -> float: 399 """Adjust crossfade duration to align with outgoing track's downbeats.""" 400 # If we don't have downbeats or beat alignment is disabled, return original duration 401 if len(self.extrapolated_fadeout_downbeats) == 0 or fadein_start_pos is None: 402 return crossfade_duration 403 404 # Calculate where the crossfade would start in the buffer 405 ideal_start_pos = SMART_CROSSFADE_DURATION - crossfade_duration 406 407 # Debug logging 408 self.logger.log( 409 VERBOSE_LOG_LEVEL, 410 "Downbeat adjustment - ideal_start=%.2fs (buffer=%.1fs - crossfade=%.2fs), " 411 "fadein_start=%.2fs", 412 ideal_start_pos, 413 SMART_CROSSFADE_DURATION, 414 crossfade_duration, 415 fadein_start_pos, 416 ) 417 418 # Find the closest downbeats (earlier and later) 419 earlier_downbeat = None 420 later_downbeat = None 421 422 for downbeat in self.extrapolated_fadeout_downbeats: 423 if downbeat <= ideal_start_pos: 424 earlier_downbeat = downbeat 425 elif downbeat > ideal_start_pos and later_downbeat is None: 426 later_downbeat = downbeat 427 break 428 429 # Try earlier downbeat first (longer crossfade) 430 if earlier_downbeat is not None: 431 adjusted_duration = float(SMART_CROSSFADE_DURATION - earlier_downbeat) 432 if fadein_start_pos + adjusted_duration <= SMART_CROSSFADE_DURATION: 433 if abs(adjusted_duration - crossfade_duration) > 0.1: 434 self.logger.log( 435 VERBOSE_LOG_LEVEL, 436 "Adjusted crossfade duration from %.2fs to %.2fs to align with " 437 "downbeat at %.2fs (earlier)", 438 crossfade_duration, 439 adjusted_duration, 440 earlier_downbeat, 441 ) 442 return adjusted_duration 443 444 # Try later downbeat (shorter crossfade) 445 if later_downbeat is not None: 446 adjusted_duration = float(SMART_CROSSFADE_DURATION - later_downbeat) 447 if fadein_start_pos + adjusted_duration <= SMART_CROSSFADE_DURATION: 448 if abs(adjusted_duration - crossfade_duration) > 0.1: 449 self.logger.log( 450 VERBOSE_LOG_LEVEL, 451 "Adjusted crossfade duration from %.2fs to %.2fs to align with " 452 "downbeat at %.2fs (later)", 453 crossfade_duration, 454 adjusted_duration, 455 later_downbeat, 456 ) 457 return adjusted_duration 458 459 # If no suitable downbeat found, return original duration 460 self.logger.log( 461 VERBOSE_LOG_LEVEL, 462 "Could not adjust crossfade duration to downbeats, using original %.2fs", 463 crossfade_duration, 464 ) 465 return crossfade_duration 466 467 468class StandardCrossFade(SmartFade): 469 """Standard crossfade class that implements a standard crossfade mode.""" 470 471 def __init__(self, logger: logging.Logger, crossfade_duration: float = 10.0) -> None: 472 """Initialize StandardCrossFade with crossfade duration.""" 473 self.crossfade_duration = crossfade_duration 474 super().__init__(logger) 475 476 def _build(self) -> None: 477 """Build the standard crossfade filter chain.""" 478 self.filters = [ 479 CrossfadeFilter(logger=self.logger, crossfade_duration=self.crossfade_duration), 480 ] 481 482 async def apply( 483 self, fade_out_part: bytes, fade_in_part: bytes, pcm_format: AudioFormat 484 ) -> bytes: 485 """Apply the standard crossfade to the given PCM audio parts.""" 486 # We need to override the default apply here, since standard crossfade only needs to be 487 # applied to the overlapping parts, not the full buffers. 488 crossfade_size = int(pcm_format.pcm_sample_size * self.crossfade_duration) 489 # Pre-crossfade: outgoing track minus the crossfaded portion 490 pre_crossfade = fade_out_part[:-crossfade_size] 491 # Post-crossfade: incoming track minus the crossfaded portion 492 post_crossfade = fade_in_part[crossfade_size:] 493 # Adjust portions to exact crossfade size 494 adjusted_fade_in_part = fade_in_part[:crossfade_size] 495 adjusted_fade_out_part = fade_out_part[-crossfade_size:] 496 # Adjust the duration to match actual sizes 497 self.crossfade_duration = min( 498 len(adjusted_fade_in_part) / pcm_format.pcm_sample_size, 499 len(adjusted_fade_out_part) / pcm_format.pcm_sample_size, 500 ) 501 # Crossfaded portion: user's configured duration 502 crossfaded_section = await super().apply( 503 adjusted_fade_out_part, adjusted_fade_in_part, pcm_format 504 ) 505 # Full result: everything concatenated 506 return pre_crossfade + crossfaded_section + post_crossfade 507 508 509# HELPER METHODS 510def get_bpm_diff_percentage(bpm1: float, bpm2: float) -> float: 511 """Calculate BPM difference percentage between two BPM values.""" 512 return abs(1.0 - bpm1 / bpm2) * 100 513 514 515def extrapolate_downbeats( 516 downbeats: npt.NDArray[np.float64], 517 tempo_factor: float, 518 buffer_size: float = SMART_CROSSFADE_DURATION, 519 bpm: float | None = None, 520) -> npt.NDArray[np.float64]: 521 """Extrapolate downbeats based on actual intervals when detection is incomplete. 522 523 This is needed when we want to perform beat alignment in an 'atmospheric' outro 524 that does not have any detected downbeats. 525 526 Args: 527 downbeats: Array of detected downbeat positions in seconds 528 tempo_factor: Tempo adjustment factor for time stretching 529 buffer_size: Maximum buffer size in seconds 530 bpm: Optional BPM for validation when extrapolating with only 2 downbeats 531 """ 532 # Handle case with exactly 2 downbeats (with BPM validation) 533 if len(downbeats) == 2 and bpm is not None: 534 interval = float(downbeats[1] - downbeats[0]) 535 536 # Expected interval for this BPM (assuming 4/4 time signature) 537 expected_interval = (60.0 / bpm) * 4 538 539 # Only extrapolate if interval matches BPM within 15% tolerance 540 if abs(interval - expected_interval) / expected_interval < 0.15: 541 # Adjust detected downbeats for time stretching first 542 adjusted_downbeats = downbeats / tempo_factor 543 last_downbeat = adjusted_downbeats[-1] 544 545 # If the last downbeat is close to the buffer end, no extrapolation needed 546 if last_downbeat >= buffer_size - 5: 547 return adjusted_downbeats 548 549 # Adjust the interval for time stretching 550 adjusted_interval = interval / tempo_factor 551 552 # Extrapolate forward from last adjusted downbeat using adjusted interval 553 extrapolated = [] 554 current_pos = last_downbeat + adjusted_interval 555 max_extrapolation_distance = 125.0 # Don't extrapolate more than 25s 556 557 while ( 558 current_pos < buffer_size 559 and (current_pos - last_downbeat) <= max_extrapolation_distance 560 ): 561 extrapolated.append(current_pos) 562 current_pos += adjusted_interval 563 564 if extrapolated: 565 # Combine adjusted detected downbeats and extrapolated downbeats 566 return np.concatenate([adjusted_downbeats, np.array(extrapolated)]) 567 568 return adjusted_downbeats 569 # else: interval doesn't match BPM, fall through to return original 570 571 if len(downbeats) < 2: 572 # Need at least 2 downbeats to extrapolate 573 return downbeats / tempo_factor 574 575 # Adjust detected downbeats for time stretching first 576 adjusted_downbeats = downbeats / tempo_factor 577 last_downbeat = adjusted_downbeats[-1] 578 579 # If the last downbeat is close to the buffer end, no extrapolation needed 580 if last_downbeat >= buffer_size - 5: 581 return adjusted_downbeats 582 583 # Calculate intervals from ORIGINAL downbeats (before time stretching) 584 intervals = np.diff(downbeats) 585 median_interval = float(np.median(intervals)) 586 std_interval = float(np.std(intervals)) 587 588 # Only extrapolate if intervals are consistent (low standard deviation) 589 if std_interval > 0.2: 590 return adjusted_downbeats 591 592 # Adjust the interval for time stretching 593 # When slowing down (tempo_factor < 1.0), intervals get longer 594 adjusted_interval = median_interval / tempo_factor 595 596 # Extrapolate forward from last adjusted downbeat using adjusted interval 597 extrapolated = [] 598 current_pos = last_downbeat + adjusted_interval 599 max_extrapolation_distance = 25.0 # Don't extrapolate more than 25s 600 601 while current_pos < buffer_size and (current_pos - last_downbeat) <= max_extrapolation_distance: 602 extrapolated.append(current_pos) 603 current_pos += adjusted_interval 604 605 if extrapolated: 606 # Combine adjusted detected downbeats and extrapolated downbeats 607 return np.concatenate([adjusted_downbeats, np.array(extrapolated)]) 608 609 return adjusted_downbeats 610