mirror of
https://github.com/androidx/media.git
synced 2025-04-30 06:46:50 +08:00
Update skip silence algorithm
The updated algorithm has two main improvements: - The silence padding is not constant but a ratio of the original silence (up to a defined max) to more naturally represent the original gaps. - The silence is not instantly going to zero, but uses a ramp down and up for a smooth volume transition and also retains a small percentage of the original noise for more natural "silences" that still contain some background noise. #minor-release Issue: google/ExoPlayer#7423 PiperOrigin-RevId: 602322442 (cherry picked from commit bb533332f4b6f46ac9d5ca17cb7943bd1bdb7dd2)
This commit is contained in:
parent
826f20dbbd
commit
dfe47219f9
@ -38,6 +38,13 @@ This release includes the following changes since the
|
|||||||
* Extract audio types from TS descriptors and map them to role flags,
|
* Extract audio types from TS descriptors and map them to role flags,
|
||||||
allowing users to make better-informed audio track selections
|
allowing users to make better-informed audio track selections
|
||||||
([#973](https://github.com/androidx/media/pull/973)).
|
([#973](https://github.com/androidx/media/pull/973)).
|
||||||
|
* Audio:
|
||||||
|
* Improve silence skipping algorithm with smooth volume ramp, retained
|
||||||
|
minimal silence and more natural silence durations
|
||||||
|
([#7423](https://github.com/google/ExoPlayer/issues/7423)).
|
||||||
|
* Video:
|
||||||
|
* Text:
|
||||||
|
* Metadata:
|
||||||
* Image:
|
* Image:
|
||||||
* Add support for DASH thumbnails. Grid images are cropped and individual
|
* Add support for DASH thumbnails. Grid images are cropped and individual
|
||||||
thumbnails are provided to `ImageOutput` close to their presentation
|
thumbnails are provided to `ImageOutput` close to their presentation
|
||||||
|
@ -15,14 +15,16 @@
|
|||||||
*/
|
*/
|
||||||
package androidx.media3.exoplayer.audio;
|
package androidx.media3.exoplayer.audio;
|
||||||
|
|
||||||
|
import static androidx.media3.common.util.Assertions.checkArgument;
|
||||||
|
import static androidx.media3.common.util.Assertions.checkState;
|
||||||
import static java.lang.Math.min;
|
import static java.lang.Math.min;
|
||||||
import static java.lang.annotation.ElementType.TYPE_USE;
|
import static java.lang.annotation.ElementType.TYPE_USE;
|
||||||
|
|
||||||
import androidx.annotation.IntDef;
|
import androidx.annotation.IntDef;
|
||||||
import androidx.media3.common.C;
|
import androidx.media3.common.C;
|
||||||
|
import androidx.media3.common.Format;
|
||||||
import androidx.media3.common.audio.AudioProcessor;
|
import androidx.media3.common.audio.AudioProcessor;
|
||||||
import androidx.media3.common.audio.BaseAudioProcessor;
|
import androidx.media3.common.audio.BaseAudioProcessor;
|
||||||
import androidx.media3.common.util.Assertions;
|
|
||||||
import androidx.media3.common.util.UnstableApi;
|
import androidx.media3.common.util.UnstableApi;
|
||||||
import androidx.media3.common.util.Util;
|
import androidx.media3.common.util.Util;
|
||||||
import java.lang.annotation.Documented;
|
import java.lang.annotation.Documented;
|
||||||
@ -39,103 +41,212 @@ import java.nio.ByteBuffer;
|
|||||||
public final class SilenceSkippingAudioProcessor extends BaseAudioProcessor {
|
public final class SilenceSkippingAudioProcessor extends BaseAudioProcessor {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The default value for {@link #SilenceSkippingAudioProcessor(long, long, short)
|
* Default fraction of the original silence to keep. Between [0, 1]. 1 means keep all silence. 0
|
||||||
* minimumSilenceDurationUs}.
|
* means remove all silence.
|
||||||
*/
|
*/
|
||||||
public static final long DEFAULT_MINIMUM_SILENCE_DURATION_US = 150_000;
|
public static final float DEFAULT_SILENCE_RETENTION_RATIO = 0.2f;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The default value for {@link #SilenceSkippingAudioProcessor(long, long, short)
|
* Default volume percentage to keep.
|
||||||
* paddingSilenceUs}.
|
*
|
||||||
|
* <p>Even when modifying the volume to a mute state, it is ideal to decrease the volume instead
|
||||||
|
* of making the volume zero. Completely silent audio sounds like playback has stopped. While
|
||||||
|
* decreased volume sounds like very light background noise at a recording studio.
|
||||||
*/
|
*/
|
||||||
public static final long DEFAULT_PADDING_SILENCE_US = 20_000;
|
public static final int DEFAULT_MIN_VOLUME_TO_KEEP_PERCENTAGE = 10;
|
||||||
|
|
||||||
/**
|
/** Default absolute level below which an individual PCM sample is classified as silent. */
|
||||||
* The default value for {@link #SilenceSkippingAudioProcessor(long, long, short)
|
|
||||||
* silenceThresholdLevel}.
|
|
||||||
*/
|
|
||||||
public static final short DEFAULT_SILENCE_THRESHOLD_LEVEL = 1024;
|
public static final short DEFAULT_SILENCE_THRESHOLD_LEVEL = 1024;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default minimum duration of audio that must be below {@code silenceThresholdLevel} before
|
||||||
|
* silence starts being trimmed. Specified in microseconds.
|
||||||
|
*/
|
||||||
|
public static final long DEFAULT_MINIMUM_SILENCE_DURATION_US = 100_000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default maximum silence to keep in microseconds. This maximum is applied after {@code
|
||||||
|
* silenceRetentionRatio}.
|
||||||
|
*/
|
||||||
|
public static final long DEFAULT_MAX_SILENCE_TO_KEEP_DURATION_US = 2_000_000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated Specify silence behaviour via {@code silenceRetentionRatio} instead.
|
||||||
|
*/
|
||||||
|
@Deprecated public static final long DEFAULT_PADDING_SILENCE_US = 20_000;
|
||||||
|
|
||||||
/** Trimming states. */
|
/** Trimming states. */
|
||||||
@Documented
|
@Documented
|
||||||
@Retention(RetentionPolicy.SOURCE)
|
@Retention(RetentionPolicy.SOURCE)
|
||||||
@Target(TYPE_USE)
|
@Target(TYPE_USE)
|
||||||
@IntDef({
|
@IntDef({
|
||||||
STATE_NOISY,
|
STATE_NOISY,
|
||||||
STATE_MAYBE_SILENT,
|
STATE_SHORTENING_SILENCE,
|
||||||
STATE_SILENT,
|
|
||||||
})
|
})
|
||||||
private @interface State {}
|
private @interface State {}
|
||||||
|
|
||||||
/** State when the input is not silent. */
|
/** State when the input is not silent. */
|
||||||
private static final int STATE_NOISY = 0;
|
private static final int STATE_NOISY = 0;
|
||||||
|
|
||||||
/** State when the input may be silent but we haven't read enough yet to know. */
|
/**
|
||||||
private static final int STATE_MAYBE_SILENT = 1;
|
* State when the input has been silent less than or equal to {@link #maxSilenceToKeepDurationUs}
|
||||||
|
* and the silence is being shortened according to {@link #calculateShortenedSilenceLength(int)}.
|
||||||
|
*/
|
||||||
|
private static final int STATE_SHORTENING_SILENCE = 1;
|
||||||
|
|
||||||
/** State when the input is silent. */
|
/** Ways to change the volume of silence. */
|
||||||
private static final int STATE_SILENT = 2;
|
@Documented
|
||||||
|
@Retention(RetentionPolicy.SOURCE)
|
||||||
|
@Target(TYPE_USE)
|
||||||
|
@IntDef({
|
||||||
|
FADE_OUT,
|
||||||
|
MUTE,
|
||||||
|
FADE_IN,
|
||||||
|
DO_NOT_CHANGE_VOLUME,
|
||||||
|
})
|
||||||
|
private @interface VolumeChangeType {}
|
||||||
|
|
||||||
private final long minimumSilenceDurationUs;
|
private static final int FADE_OUT = 0;
|
||||||
private final long paddingSilenceUs;
|
private static final int MUTE = 1;
|
||||||
private final short silenceThresholdLevel;
|
private static final int FADE_IN = 2;
|
||||||
private int bytesPerFrame;
|
private static final int DO_NOT_CHANGE_VOLUME = 3;
|
||||||
private boolean enabled;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Buffers audio data that may be classified as silence while in {@link #STATE_MAYBE_SILENT}. If
|
* Used with {@code minVolumeToKeepPercentageWhenMuting} to avoid round off errors. An alternative
|
||||||
* the input becomes noisy before the buffer has filled, it will be output. Otherwise, the buffer
|
* to this would be to use floats, but integer math is probably faster than floats.
|
||||||
* contents will be dropped and the state will transition to {@link #STATE_SILENT}.
|
*/
|
||||||
|
private static final int AVOID_TRUNCATION_FACTOR = 1000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fraction of the original silence to keep. Between [0, 1]. 1 means keep all silence. 0 means
|
||||||
|
* remove all silence.
|
||||||
|
*/
|
||||||
|
private final float silenceRetentionRatio;
|
||||||
|
|
||||||
|
/** Absolute level below which an individual PCM sample is classified as silent. */
|
||||||
|
private final short silenceThresholdLevel;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Volume percentage to keep. Even when modifying the volume to a mute state, it is ideal to
|
||||||
|
* decrease the volume instead of making the volume zero. Completely silent audio sounds like
|
||||||
|
* playback has stopped. While decreased volume sounds like very light background noise from a
|
||||||
|
* recording studio.
|
||||||
|
*/
|
||||||
|
private final int minVolumeToKeepPercentageWhenMuting;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Duration of audio that must be below {@link #silenceThresholdLevel} before silence starts being
|
||||||
|
* trimmed. Specified in microseconds.
|
||||||
|
*/
|
||||||
|
private final long minimumSilenceDurationUs;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maximum silence to keep in microseconds. This maximum is applied after {@link
|
||||||
|
* #silenceRetentionRatio}.
|
||||||
|
*/
|
||||||
|
private final long maxSilenceToKeepDurationUs;
|
||||||
|
|
||||||
|
private AudioFormat inputFormat;
|
||||||
|
private int bytesPerFrame;
|
||||||
|
private boolean enabled;
|
||||||
|
private @State int state;
|
||||||
|
private long skippedFrames;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The frames of silence that has been output since the last noise. Used to enforce {@link
|
||||||
|
* #maxSilenceToKeepDurationUs}.
|
||||||
|
*/
|
||||||
|
private int outputSilenceFramesSinceNoise = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Buffers audio data that may be classified as silence while in {@link
|
||||||
|
* #STATE_SHORTENING_SILENCE}. If the input becomes noisy before the buffer has filled, it will be
|
||||||
|
* output without shortening. Otherwise, the buffer will be output when filled as shortened
|
||||||
|
* silence and emptied.
|
||||||
*/
|
*/
|
||||||
private byte[] maybeSilenceBuffer;
|
private byte[] maybeSilenceBuffer;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stores the latest part of the input while silent. It will be output as padding if the next
|
* An index into {@link #maybeSilenceBuffer} pointing to the location where silence that has not
|
||||||
* input is noisy.
|
* been output starts.
|
||||||
*/
|
*/
|
||||||
private byte[] paddingBuffer;
|
private int maybeSilenceBufferStartIndex = 0;
|
||||||
|
|
||||||
private @State int state;
|
/**
|
||||||
private int maybeSilenceBufferSize;
|
* A count of the number of bytes of content in {@link #maybeSilenceBuffer}. The count starts at
|
||||||
private int paddingSize;
|
* {@link #maybeSilenceBufferStartIndex}, and the bytes counted may wrap around to the start of
|
||||||
private boolean hasOutputNoise;
|
* the buffer. The count will never be greater than {@link #maybeSilenceBuffer}'s length.
|
||||||
private long skippedFrames;
|
*/
|
||||||
|
private int maybeSilenceBufferContentsSize = 0;
|
||||||
|
|
||||||
|
/** Used to hold a subset of the contents of {@link #maybeSilenceBuffer} for convenience. */
|
||||||
|
// TODO: This processor can probably be more efficient if this array is not used. Operations like
|
||||||
|
// modifyVolume() can be applied to a non-contiguous contents, the code is just more complex.
|
||||||
|
private byte[] contiguousOutputBuffer;
|
||||||
|
|
||||||
/** Creates a new silence skipping audio processor. */
|
/** Creates a new silence skipping audio processor. */
|
||||||
public SilenceSkippingAudioProcessor() {
|
public SilenceSkippingAudioProcessor() {
|
||||||
this(
|
this(
|
||||||
DEFAULT_MINIMUM_SILENCE_DURATION_US,
|
DEFAULT_MINIMUM_SILENCE_DURATION_US,
|
||||||
DEFAULT_PADDING_SILENCE_US,
|
DEFAULT_SILENCE_RETENTION_RATIO,
|
||||||
|
DEFAULT_MAX_SILENCE_TO_KEEP_DURATION_US,
|
||||||
|
DEFAULT_MIN_VOLUME_TO_KEEP_PERCENTAGE,
|
||||||
DEFAULT_SILENCE_THRESHOLD_LEVEL);
|
DEFAULT_SILENCE_THRESHOLD_LEVEL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new silence skipping audio processor.
|
* @deprecated Use {@link #SilenceSkippingAudioProcessor(long, float, long, int, short)} instead.
|
||||||
*
|
|
||||||
* @param minimumSilenceDurationUs The minimum duration of audio that must be below {@code
|
|
||||||
* silenceThresholdLevel} to classify that part of audio as silent, in microseconds.
|
|
||||||
* @param paddingSilenceUs The duration of silence by which to extend non-silent sections, in
|
|
||||||
* microseconds. The value must not exceed {@code minimumSilenceDurationUs}.
|
|
||||||
* @param silenceThresholdLevel The absolute level below which an individual PCM sample is
|
|
||||||
* classified as silent.
|
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
public SilenceSkippingAudioProcessor(
|
public SilenceSkippingAudioProcessor(
|
||||||
long minimumSilenceDurationUs, long paddingSilenceUs, short silenceThresholdLevel) {
|
long minimumSilenceDurationUs, long paddingSilenceUs, short silenceThresholdLevel) {
|
||||||
Assertions.checkArgument(paddingSilenceUs <= minimumSilenceDurationUs);
|
this(
|
||||||
this.minimumSilenceDurationUs = minimumSilenceDurationUs;
|
minimumSilenceDurationUs,
|
||||||
this.paddingSilenceUs = paddingSilenceUs;
|
/* silenceRetentionRatio= */ (float) paddingSilenceUs / minimumSilenceDurationUs,
|
||||||
this.silenceThresholdLevel = silenceThresholdLevel;
|
/* maxSilenceToKeepDurationUs= */ minimumSilenceDurationUs,
|
||||||
|
/* minVolumeToKeepPercentageWhenMuting= */ 0,
|
||||||
maybeSilenceBuffer = Util.EMPTY_BYTE_ARRAY;
|
silenceThresholdLevel);
|
||||||
paddingBuffer = Util.EMPTY_BYTE_ARRAY;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets whether to skip silence in the input. This method may only be called after draining data
|
* Creates a new silence trimming audio processor.
|
||||||
* through the processor. The value returned by {@link #isActive()} may change, and the processor
|
|
||||||
* must be {@link #flush() flushed} before queueing more data.
|
|
||||||
*
|
*
|
||||||
* @param enabled Whether to skip silence in the input.
|
* @param minimumSilenceDurationUs Duration of audio that must be below {@code
|
||||||
|
* silenceThresholdLevel} before silence starts being trimmed, in microseconds.
|
||||||
|
* @param silenceRetentionRatio Fraction of the original silence to keep. Between [0, 1]. 1 means
|
||||||
|
* keep all silence. 0 means remove all silence.
|
||||||
|
* @param maxSilenceToKeepDurationUs Maximum silence to keep in microseconds. This maximum is
|
||||||
|
* applied after {@link #silenceRetentionRatio}.
|
||||||
|
* @param minVolumeToKeepPercentageWhenMuting Volume percentage to keep. Even when modifying the
|
||||||
|
* volume to a mute state, it is ideal to decrease the volume instead of making the volume
|
||||||
|
* zero. Completely silent audio sounds like playback has stopped. While decreased volume
|
||||||
|
* sounds like very light background noise from a recording studio.
|
||||||
|
* @param silenceThresholdLevel Absolute level below which an individual PCM sample is classified
|
||||||
|
* as silent.
|
||||||
|
*/
|
||||||
|
public SilenceSkippingAudioProcessor(
|
||||||
|
long minimumSilenceDurationUs,
|
||||||
|
float silenceRetentionRatio,
|
||||||
|
long maxSilenceToKeepDurationUs,
|
||||||
|
int minVolumeToKeepPercentageWhenMuting,
|
||||||
|
short silenceThresholdLevel) {
|
||||||
|
checkArgument(silenceRetentionRatio >= 0f && silenceRetentionRatio <= 1f);
|
||||||
|
this.minimumSilenceDurationUs = minimumSilenceDurationUs;
|
||||||
|
this.silenceRetentionRatio = silenceRetentionRatio;
|
||||||
|
this.maxSilenceToKeepDurationUs = maxSilenceToKeepDurationUs;
|
||||||
|
this.minVolumeToKeepPercentageWhenMuting = minVolumeToKeepPercentageWhenMuting;
|
||||||
|
this.silenceThresholdLevel = silenceThresholdLevel;
|
||||||
|
inputFormat = AudioFormat.NOT_SET;
|
||||||
|
maybeSilenceBuffer = Util.EMPTY_BYTE_ARRAY;
|
||||||
|
contiguousOutputBuffer = Util.EMPTY_BYTE_ARRAY;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets whether to shorten silence in the input. This method may only be called after draining
|
||||||
|
* data through the processor. The value returned by {@link #isActive()} may change, and the
|
||||||
|
* processor must be {@link #flush() flushed} before queueing more data.
|
||||||
|
*
|
||||||
|
* @param enabled Whether to shorten silence in the input.
|
||||||
*/
|
*/
|
||||||
public void setEnabled(boolean enabled) {
|
public void setEnabled(boolean enabled) {
|
||||||
this.enabled = enabled;
|
this.enabled = enabled;
|
||||||
@ -149,20 +260,20 @@ public final class SilenceSkippingAudioProcessor extends BaseAudioProcessor {
|
|||||||
return skippedFrames;
|
return skippedFrames;
|
||||||
}
|
}
|
||||||
|
|
||||||
// AudioProcessor implementation.
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AudioFormat onConfigure(AudioFormat inputAudioFormat)
|
protected AudioFormat onConfigure(AudioFormat inputAudioFormat)
|
||||||
throws UnhandledAudioFormatException {
|
throws UnhandledAudioFormatException {
|
||||||
if (inputAudioFormat.encoding != C.ENCODING_PCM_16BIT) {
|
if (inputAudioFormat.encoding != C.ENCODING_PCM_16BIT) {
|
||||||
throw new UnhandledAudioFormatException(inputAudioFormat);
|
throw new UnhandledAudioFormatException(inputAudioFormat);
|
||||||
}
|
}
|
||||||
return enabled ? inputAudioFormat : AudioFormat.NOT_SET;
|
this.inputFormat = inputAudioFormat;
|
||||||
|
bytesPerFrame = inputAudioFormat.channelCount * 2;
|
||||||
|
return inputAudioFormat;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isActive() {
|
public boolean isActive() {
|
||||||
return enabled;
|
return inputFormat.sampleRate != Format.NO_VALUE && enabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -172,11 +283,8 @@ public final class SilenceSkippingAudioProcessor extends BaseAudioProcessor {
|
|||||||
case STATE_NOISY:
|
case STATE_NOISY:
|
||||||
processNoisy(inputBuffer);
|
processNoisy(inputBuffer);
|
||||||
break;
|
break;
|
||||||
case STATE_MAYBE_SILENT:
|
case STATE_SHORTENING_SILENCE:
|
||||||
processMaybeSilence(inputBuffer);
|
shortenSilenceSilenceUntilNoise(inputBuffer);
|
||||||
break;
|
|
||||||
case STATE_SILENT:
|
|
||||||
processSilence(inputBuffer);
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw new IllegalStateException();
|
throw new IllegalStateException();
|
||||||
@ -185,48 +293,43 @@ public final class SilenceSkippingAudioProcessor extends BaseAudioProcessor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void onQueueEndOfStream() {
|
public void onQueueEndOfStream() {
|
||||||
if (maybeSilenceBufferSize > 0) {
|
// The maybeSilenceBuffer is only written to in the STATE_SHORTENING_SILENCE state, and
|
||||||
// We haven't received enough silence to transition to the silent state, so output the buffer
|
// is always completely flushed before leaving the STATE_SHORTENING_SILENCE.
|
||||||
// and switch back to the noisy state.
|
if (maybeSilenceBufferContentsSize > 0) {
|
||||||
output(maybeSilenceBuffer, maybeSilenceBufferSize);
|
// There's bytes in the buffer. So the final chunk of shortened silence will be output to
|
||||||
maybeSilenceBufferSize = 0;
|
// simulate a transition back to the noisy state and the end of output.
|
||||||
state = STATE_NOISY;
|
outputShortenedSilenceBuffer(/* shouldTransitionToNoisyState= */ true);
|
||||||
}
|
outputSilenceFramesSinceNoise = 0;
|
||||||
if (!hasOutputNoise) {
|
|
||||||
skippedFrames += paddingSize / bytesPerFrame;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void onFlush() {
|
public void onFlush() {
|
||||||
if (enabled) {
|
if (isActive()) {
|
||||||
bytesPerFrame = inputAudioFormat.bytesPerFrame;
|
// Divide by 2 to allow the buffer to be split into two bytesPerFrame aligned parts.
|
||||||
int maybeSilenceBufferSize = durationUsToFrames(minimumSilenceDurationUs) * bytesPerFrame;
|
int maybeSilenceBufferSize =
|
||||||
|
alignToBytePerFrameBoundary(durationUsToFrames(minimumSilenceDurationUs) / 2) * 2;
|
||||||
if (maybeSilenceBuffer.length != maybeSilenceBufferSize) {
|
if (maybeSilenceBuffer.length != maybeSilenceBufferSize) {
|
||||||
maybeSilenceBuffer = new byte[maybeSilenceBufferSize];
|
maybeSilenceBuffer = new byte[maybeSilenceBufferSize];
|
||||||
}
|
contiguousOutputBuffer = new byte[maybeSilenceBufferSize];
|
||||||
paddingSize = durationUsToFrames(paddingSilenceUs) * bytesPerFrame;
|
|
||||||
if (paddingBuffer.length != paddingSize) {
|
|
||||||
paddingBuffer = new byte[paddingSize];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
state = STATE_NOISY;
|
state = STATE_NOISY;
|
||||||
skippedFrames = 0;
|
skippedFrames = 0;
|
||||||
maybeSilenceBufferSize = 0;
|
outputSilenceFramesSinceNoise = 0;
|
||||||
hasOutputNoise = false;
|
maybeSilenceBufferStartIndex = 0;
|
||||||
|
maybeSilenceBufferContentsSize = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void onReset() {
|
public void onReset() {
|
||||||
enabled = false;
|
enabled = false;
|
||||||
paddingSize = 0;
|
inputFormat = AudioFormat.NOT_SET;
|
||||||
maybeSilenceBuffer = Util.EMPTY_BYTE_ARRAY;
|
maybeSilenceBuffer = Util.EMPTY_BYTE_ARRAY;
|
||||||
paddingBuffer = Util.EMPTY_BYTE_ARRAY;
|
contiguousOutputBuffer = Util.EMPTY_BYTE_ARRAY;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Internal methods.
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Incrementally processes new input from {@code inputBuffer} while in {@link #STATE_NOISY},
|
* Incrementally processes new input from {@code inputBuffer} while in {@link #STATE_NOISY},
|
||||||
* updating the state if needed.
|
* updating the state if needed.
|
||||||
@ -239,9 +342,9 @@ public final class SilenceSkippingAudioProcessor extends BaseAudioProcessor {
|
|||||||
int noiseLimit = findNoiseLimit(inputBuffer);
|
int noiseLimit = findNoiseLimit(inputBuffer);
|
||||||
if (noiseLimit == inputBuffer.position()) {
|
if (noiseLimit == inputBuffer.position()) {
|
||||||
// The buffer contains the start of possible silence.
|
// The buffer contains the start of possible silence.
|
||||||
state = STATE_MAYBE_SILENT;
|
state = STATE_SHORTENING_SILENCE;
|
||||||
} else {
|
} else {
|
||||||
inputBuffer.limit(noiseLimit);
|
inputBuffer.limit(min(noiseLimit, inputBuffer.capacity()));
|
||||||
output(inputBuffer);
|
output(inputBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -251,72 +354,349 @@ public final class SilenceSkippingAudioProcessor extends BaseAudioProcessor {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Incrementally processes new input from {@code inputBuffer} while in {@link
|
* Incrementally processes new input from {@code inputBuffer} while in {@link
|
||||||
* #STATE_MAYBE_SILENT}, updating the state if needed.
|
* #STATE_SHORTENING_SILENCE}, updating the state if needed.
|
||||||
|
*
|
||||||
|
* <p>If the amount of silence is less than {@link #minimumSilenceDurationUs}, then {@link
|
||||||
|
* #DO_NOT_CHANGE_VOLUME} is used to output the silence.
|
||||||
|
*
|
||||||
|
* <p>If the amount of silence is more than {@link #minimumSilenceDurationUs}, then the following
|
||||||
|
* will be output:
|
||||||
|
*
|
||||||
|
* <ul>
|
||||||
|
* <li>A half a buffer full of silence using {@link #FADE_OUT}. This padding has no
|
||||||
|
* discontinuities.
|
||||||
|
* <li>A number of bytes between 0 to ({@link #maxSilenceToKeepDurationUs} - padding}. This will
|
||||||
|
* have discontinuities, that are imperceptible due to {@linkplain #MUTE muting} the
|
||||||
|
* content.
|
||||||
|
* <li>If the silence length is over {@link #maxSilenceToKeepDurationUs} - a half buffer (for
|
||||||
|
* padding that will be applied later) then the silence begins to be thrown away entirely.
|
||||||
|
* <li>A final silence with a length of a half buffer will be used with a {@link #FADE_IN}. This
|
||||||
|
* padding has no discontinuities. It will transition with no discontinuities back to the
|
||||||
|
* {@link #STATE_NOISY}.
|
||||||
|
* </ul>
|
||||||
|
*
|
||||||
|
* <p>Transitions to {@link #STATE_NOISY} if noise is encountered. It writes to {@link
|
||||||
|
* #maybeSilenceBuffer} in contiguous blocks. If the silence available is enough to wrap around
|
||||||
|
* the end of the buffer then the buffer is filled from {@link #maybeSilenceBufferStartIndex} to
|
||||||
|
* the buffers end and the beginning of the buffer is filled upon the next call to this method.
|
||||||
*/
|
*/
|
||||||
private void processMaybeSilence(ByteBuffer inputBuffer) {
|
private void shortenSilenceSilenceUntilNoise(ByteBuffer inputBuffer) {
|
||||||
|
checkState(maybeSilenceBufferStartIndex < maybeSilenceBuffer.length);
|
||||||
|
|
||||||
int limit = inputBuffer.limit();
|
int limit = inputBuffer.limit();
|
||||||
int noisePosition = findNoisePosition(inputBuffer);
|
int noisePosition = findNoisePosition(inputBuffer);
|
||||||
int maybeSilenceInputSize = noisePosition - inputBuffer.position();
|
int silenceInputSize = noisePosition - inputBuffer.position();
|
||||||
int maybeSilenceBufferRemaining = maybeSilenceBuffer.length - maybeSilenceBufferSize;
|
|
||||||
if (noisePosition < limit && maybeSilenceInputSize < maybeSilenceBufferRemaining) {
|
int indexToWriteTo;
|
||||||
// The maybe silence buffer isn't full, so output it and switch back to the noisy state.
|
int contiguousBufferRemaining;
|
||||||
output(maybeSilenceBuffer, maybeSilenceBufferSize);
|
if (maybeSilenceBufferStartIndex + maybeSilenceBufferContentsSize < maybeSilenceBuffer.length) {
|
||||||
maybeSilenceBufferSize = 0;
|
// ^0---^start---^end---^length
|
||||||
state = STATE_NOISY;
|
contiguousBufferRemaining =
|
||||||
|
maybeSilenceBuffer.length
|
||||||
|
- (maybeSilenceBufferContentsSize + maybeSilenceBufferStartIndex);
|
||||||
|
indexToWriteTo = maybeSilenceBufferStartIndex + maybeSilenceBufferContentsSize;
|
||||||
} else {
|
} else {
|
||||||
// Fill as much of the maybe silence buffer as possible.
|
// The bytes have wrapped around. ^0---^end---^start---^length
|
||||||
int bytesToWrite = min(maybeSilenceInputSize, maybeSilenceBufferRemaining);
|
int amountInUpperPartOfBuffer = maybeSilenceBuffer.length - maybeSilenceBufferStartIndex;
|
||||||
inputBuffer.limit(inputBuffer.position() + bytesToWrite);
|
indexToWriteTo = maybeSilenceBufferContentsSize - amountInUpperPartOfBuffer;
|
||||||
inputBuffer.get(maybeSilenceBuffer, maybeSilenceBufferSize, bytesToWrite);
|
contiguousBufferRemaining = maybeSilenceBufferStartIndex - indexToWriteTo;
|
||||||
maybeSilenceBufferSize += bytesToWrite;
|
}
|
||||||
if (maybeSilenceBufferSize == maybeSilenceBuffer.length) {
|
|
||||||
// We've reached a period of silence, so skip it, taking in to account padding for both
|
boolean noiseFound = noisePosition < limit;
|
||||||
// the noisy to silent transition and any future silent to noisy transition.
|
// Fill as much of the silence buffer as possible.
|
||||||
if (hasOutputNoise) {
|
int bytesOfInput = min(silenceInputSize, contiguousBufferRemaining);
|
||||||
output(maybeSilenceBuffer, paddingSize);
|
inputBuffer.limit(inputBuffer.position() + bytesOfInput);
|
||||||
skippedFrames += (maybeSilenceBufferSize - paddingSize * 2) / bytesPerFrame;
|
inputBuffer.get(maybeSilenceBuffer, indexToWriteTo, bytesOfInput);
|
||||||
|
maybeSilenceBufferContentsSize += bytesOfInput;
|
||||||
|
|
||||||
|
checkState(maybeSilenceBufferContentsSize <= maybeSilenceBuffer.length);
|
||||||
|
|
||||||
|
boolean shouldTransitionToNoisyState =
|
||||||
|
noiseFound
|
||||||
|
&&
|
||||||
|
/* The silence before the noise is not enough to fill the remaining buffer. */
|
||||||
|
silenceInputSize < contiguousBufferRemaining;
|
||||||
|
|
||||||
|
outputShortenedSilenceBuffer(shouldTransitionToNoisyState);
|
||||||
|
|
||||||
|
if (shouldTransitionToNoisyState) {
|
||||||
|
state = STATE_NOISY;
|
||||||
|
outputSilenceFramesSinceNoise = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Restore the limit.
|
||||||
|
inputBuffer.limit(limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** See {@link #shortenSilenceSilenceUntilNoise}. */
|
||||||
|
private void outputShortenedSilenceBuffer(boolean shouldTransitionToNoisyState) {
|
||||||
|
int sizeBeforeOutput = maybeSilenceBufferContentsSize;
|
||||||
|
int bytesToOutput;
|
||||||
|
@VolumeChangeType int volumeChangeType;
|
||||||
|
int bytesConsumed;
|
||||||
|
// Only output when buffer is full or transitioning to noisy state.
|
||||||
|
if (maybeSilenceBufferContentsSize == maybeSilenceBuffer.length
|
||||||
|
|| shouldTransitionToNoisyState) {
|
||||||
|
if (outputSilenceFramesSinceNoise == 0) {
|
||||||
|
// This is the beginning of a silence chunk so keep MINIMUM_SILENCE_DURATION_US / 2 of the
|
||||||
|
// silence.
|
||||||
|
if (shouldTransitionToNoisyState) {
|
||||||
|
volumeChangeType = DO_NOT_CHANGE_VOLUME;
|
||||||
|
bytesToOutput = maybeSilenceBufferContentsSize;
|
||||||
|
outputSilence(bytesToOutput, volumeChangeType);
|
||||||
|
bytesConsumed = bytesToOutput;
|
||||||
} else {
|
} else {
|
||||||
skippedFrames += (maybeSilenceBufferSize - paddingSize) / bytesPerFrame;
|
checkState(maybeSilenceBufferContentsSize >= maybeSilenceBuffer.length / 2);
|
||||||
|
// To keep this block a tad simpler, by always outputting exactly buffer size / 2 to avoid
|
||||||
|
// needing to add the shortening code here.
|
||||||
|
volumeChangeType = FADE_OUT;
|
||||||
|
bytesToOutput = maybeSilenceBuffer.length / 2;
|
||||||
|
outputSilence(bytesToOutput, volumeChangeType);
|
||||||
|
bytesConsumed = bytesToOutput;
|
||||||
}
|
}
|
||||||
updatePaddingBuffer(inputBuffer, maybeSilenceBuffer, maybeSilenceBufferSize);
|
} else if (shouldTransitionToNoisyState) {
|
||||||
maybeSilenceBufferSize = 0;
|
volumeChangeType = FADE_IN;
|
||||||
state = STATE_SILENT;
|
|
||||||
|
int bytesRemainingAfterOutputtingHalfMin =
|
||||||
|
maybeSilenceBufferContentsSize - maybeSilenceBuffer.length / 2;
|
||||||
|
|
||||||
|
bytesConsumed = bytesRemainingAfterOutputtingHalfMin + maybeSilenceBuffer.length / 2;
|
||||||
|
int shortenedSilenceLength =
|
||||||
|
calculateShortenedSilenceLength(bytesRemainingAfterOutputtingHalfMin);
|
||||||
|
|
||||||
|
// For simplicity we fade in over the shortened silence and the half buffer of padding.
|
||||||
|
// This acts to increase the padding a bit which only helps (probably imperceptibly)
|
||||||
|
// the sound quality.
|
||||||
|
bytesToOutput = maybeSilenceBuffer.length / 2 + shortenedSilenceLength;
|
||||||
|
outputSilence(bytesToOutput, volumeChangeType);
|
||||||
|
} else {
|
||||||
|
volumeChangeType = MUTE;
|
||||||
|
// Output as much as possible while still keeping half the buffer full so that half the
|
||||||
|
// min silence can be output later as padding.
|
||||||
|
bytesConsumed = maybeSilenceBufferContentsSize - maybeSilenceBuffer.length / 2;
|
||||||
|
|
||||||
|
bytesToOutput = calculateShortenedSilenceLength(bytesConsumed);
|
||||||
|
outputSilence(bytesToOutput, volumeChangeType);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Restore the limit.
|
checkState(
|
||||||
inputBuffer.limit(limit);
|
bytesConsumed % bytesPerFrame == 0,
|
||||||
|
"bytesConsumed is not aligned to frame size: %s" + bytesConsumed);
|
||||||
|
|
||||||
|
checkState((sizeBeforeOutput >= bytesToOutput));
|
||||||
|
|
||||||
|
maybeSilenceBufferContentsSize -= bytesConsumed;
|
||||||
|
maybeSilenceBufferStartIndex += bytesConsumed;
|
||||||
|
// The start index might wrap back around to the start of the buffer.
|
||||||
|
maybeSilenceBufferStartIndex %= maybeSilenceBuffer.length;
|
||||||
|
|
||||||
|
outputSilenceFramesSinceNoise += bytesToOutput / bytesPerFrame;
|
||||||
|
skippedFrames += (bytesConsumed - bytesToOutput) / bytesPerFrame;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Incrementally processes new input from {@code inputBuffer} while in {@link #STATE_SILENT},
|
* Returns the appropriate size that a given number of bytes of silence should be shortened to. It
|
||||||
* updating the state if needed.
|
* calculates this using the {@link #outputSilenceFramesSinceNoise} and the {@link
|
||||||
|
* #silenceRetentionRatio}. The {@link #silenceRetentionRatio} multiplied by {@code
|
||||||
|
* silenceToShortenBytes} is returned until a max outputted silence length is hit, and then only
|
||||||
|
* the remaining silence between the current {@link #outputSilenceFramesSinceNoise} and {@link
|
||||||
|
* #maxSilenceToKeepDurationUs} is reached.
|
||||||
*/
|
*/
|
||||||
private void processSilence(ByteBuffer inputBuffer) {
|
private int calculateShortenedSilenceLength(int silenceToShortenBytes) {
|
||||||
int limit = inputBuffer.limit();
|
// Start skipping silence to keep the silence below MAX_SILENCE_DURATION_US long.
|
||||||
int noisyPosition = findNoisePosition(inputBuffer);
|
int bytesNeededToReachMax =
|
||||||
inputBuffer.limit(noisyPosition);
|
(durationUsToFrames(maxSilenceToKeepDurationUs) - outputSilenceFramesSinceNoise)
|
||||||
skippedFrames += inputBuffer.remaining() / bytesPerFrame;
|
* bytesPerFrame
|
||||||
updatePaddingBuffer(inputBuffer, paddingBuffer, paddingSize);
|
- maybeSilenceBuffer.length / 2;
|
||||||
if (noisyPosition < limit) {
|
|
||||||
// Output the padding, which may include previous input as well as new input, then transition
|
|
||||||
// back to the noisy state.
|
|
||||||
output(paddingBuffer, paddingSize);
|
|
||||||
state = STATE_NOISY;
|
|
||||||
|
|
||||||
// Restore the limit.
|
checkState(bytesNeededToReachMax >= 0);
|
||||||
inputBuffer.limit(limit);
|
|
||||||
}
|
return alignToBytePerFrameBoundary(
|
||||||
|
min(silenceToShortenBytes * silenceRetentionRatio + .5f, bytesNeededToReachMax));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copies {@code length} elements from {@code data} to populate a new output buffer from the
|
* Method used to avoid rounding errors while calculating output and skipped frames. The given
|
||||||
* processor.
|
* {@code value} is decreased to the nearest value that is divisible by {@link #bytesPerFrame}.
|
||||||
*/
|
*/
|
||||||
private void output(byte[] data, int length) {
|
private int alignToBytePerFrameBoundary(int value) {
|
||||||
replaceOutputBuffer(length).put(data, 0, length).flip();
|
return (value / bytesPerFrame) * bytesPerFrame;
|
||||||
if (length > 0) {
|
}
|
||||||
hasOutputNoise = true;
|
|
||||||
|
/**
|
||||||
|
* Method used to avoid rounding errors while calculating output and skipped frames. The given
|
||||||
|
* {@code value} is decreased to the nearest value that is divisible by {@link #bytesPerFrame}.
|
||||||
|
*/
|
||||||
|
private int alignToBytePerFrameBoundary(float value) {
|
||||||
|
return alignToBytePerFrameBoundary((int) value);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Copies elements from {@code data} to populate a new output buffer from the processor. */
|
||||||
|
private void outputRange(byte[] data, int size, @VolumeChangeType int rampType) {
|
||||||
|
checkArgument(
|
||||||
|
size % bytesPerFrame == 0, "byteOutput size is not aligned to frame size " + size);
|
||||||
|
|
||||||
|
modifyVolume(data, size, rampType);
|
||||||
|
replaceOutputBuffer(size).put(data, 0, size).flip();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copies {@code sizeToOutput} elements from the {@link #maybeSilenceBuffer} to {@link
|
||||||
|
* #contiguousOutputBuffer}. The contents of {@link #maybeSilenceBuffer} can wrap around from the
|
||||||
|
* end of the buffer and back to the beginning. The {@link #contiguousOutputBuffer} content always
|
||||||
|
* start from index 0.
|
||||||
|
*
|
||||||
|
* @param rampType This parameter is used to determine which part of the {@link
|
||||||
|
* #maybeSilenceBuffer} contents need to be kept. For {@link #FADE_IN} the end of the contents
|
||||||
|
* is always kept. Otherwise the beginning of the contents are always kept.
|
||||||
|
*/
|
||||||
|
private void outputSilence(int sizeToOutput, @VolumeChangeType int rampType) {
|
||||||
|
if (sizeToOutput == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
checkArgument(maybeSilenceBufferContentsSize >= sizeToOutput);
|
||||||
|
|
||||||
|
if (rampType == FADE_IN) {
|
||||||
|
// Keeps the end of the buffer because we are padding the start of the next chunk of noise.
|
||||||
|
if (maybeSilenceBufferStartIndex + maybeSilenceBufferContentsSize
|
||||||
|
<= maybeSilenceBuffer.length) {
|
||||||
|
// ^0---^start---^end---^length
|
||||||
|
System.arraycopy(
|
||||||
|
maybeSilenceBuffer,
|
||||||
|
maybeSilenceBufferStartIndex + maybeSilenceBufferContentsSize - sizeToOutput,
|
||||||
|
contiguousOutputBuffer,
|
||||||
|
0,
|
||||||
|
sizeToOutput);
|
||||||
|
} else {
|
||||||
|
// ^0---^end--^start---^length
|
||||||
|
int sizeInUpperPartOfArray = maybeSilenceBuffer.length - maybeSilenceBufferStartIndex;
|
||||||
|
int sizeInLowerPartOfArray = maybeSilenceBufferContentsSize - sizeInUpperPartOfArray;
|
||||||
|
if (sizeInLowerPartOfArray >= sizeToOutput) {
|
||||||
|
// We just need the lower part of the array.
|
||||||
|
System.arraycopy(
|
||||||
|
maybeSilenceBuffer,
|
||||||
|
sizeInLowerPartOfArray - sizeToOutput,
|
||||||
|
contiguousOutputBuffer,
|
||||||
|
0,
|
||||||
|
sizeToOutput);
|
||||||
|
} else {
|
||||||
|
int sizeToOutputInUpperPart = sizeToOutput - sizeInLowerPartOfArray;
|
||||||
|
System.arraycopy(
|
||||||
|
maybeSilenceBuffer,
|
||||||
|
maybeSilenceBuffer.length - sizeToOutputInUpperPart,
|
||||||
|
contiguousOutputBuffer,
|
||||||
|
0,
|
||||||
|
sizeToOutputInUpperPart);
|
||||||
|
|
||||||
|
// Copy everything from lower part. DO_NOT_CHANGE_VOLUME (which keeps everything) and
|
||||||
|
// MUTE (where the content that is kept only provides background noise).
|
||||||
|
System.arraycopy(
|
||||||
|
maybeSilenceBuffer,
|
||||||
|
0,
|
||||||
|
contiguousOutputBuffer,
|
||||||
|
sizeToOutputInUpperPart,
|
||||||
|
sizeInLowerPartOfArray);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (maybeSilenceBufferStartIndex + sizeToOutput <= maybeSilenceBuffer.length) {
|
||||||
|
// ^0---^start---^end---^length
|
||||||
|
System.arraycopy(
|
||||||
|
maybeSilenceBuffer,
|
||||||
|
maybeSilenceBufferStartIndex,
|
||||||
|
contiguousOutputBuffer,
|
||||||
|
0,
|
||||||
|
sizeToOutput);
|
||||||
|
} else {
|
||||||
|
// ^0---^end (of content to output now)---^start---^length
|
||||||
|
int sizeToCopyInUpperPartOfArray = maybeSilenceBuffer.length - maybeSilenceBufferStartIndex;
|
||||||
|
// Copy the upper part of the array.
|
||||||
|
System.arraycopy(
|
||||||
|
maybeSilenceBuffer,
|
||||||
|
maybeSilenceBufferStartIndex,
|
||||||
|
contiguousOutputBuffer,
|
||||||
|
0,
|
||||||
|
sizeToCopyInUpperPartOfArray);
|
||||||
|
int amountToCopyFromLowerPartOfArray = sizeToOutput - sizeToCopyInUpperPartOfArray;
|
||||||
|
System.arraycopy(
|
||||||
|
maybeSilenceBuffer,
|
||||||
|
0,
|
||||||
|
contiguousOutputBuffer,
|
||||||
|
sizeToCopyInUpperPartOfArray,
|
||||||
|
amountToCopyFromLowerPartOfArray);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
checkArgument(
|
||||||
|
sizeToOutput % bytesPerFrame == 0,
|
||||||
|
"sizeToOutput is not aligned to frame size: " + sizeToOutput);
|
||||||
|
checkState(maybeSilenceBufferStartIndex < maybeSilenceBuffer.length);
|
||||||
|
|
||||||
|
outputRange(contiguousOutputBuffer, sizeToOutput, rampType);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Modifies the amplitude of the samples in {@code sampleBuffer} based on the given {@link
|
||||||
|
* VolumeChangeType}.
|
||||||
|
*/
|
||||||
|
private void modifyVolume(byte[] sampleBuffer, int size, @VolumeChangeType int volumeChangeType) {
|
||||||
|
if (volumeChangeType == DO_NOT_CHANGE_VOLUME) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int idx = 0; idx < size; idx += 2) {
|
||||||
|
byte mostSignificantByte = sampleBuffer[idx + 1];
|
||||||
|
byte leastSignificantByte = sampleBuffer[idx];
|
||||||
|
int sample = twoByteSampleToInt(mostSignificantByte, leastSignificantByte);
|
||||||
|
|
||||||
|
int volumeModificationPercentage;
|
||||||
|
if (volumeChangeType == FADE_OUT) {
|
||||||
|
volumeModificationPercentage =
|
||||||
|
calculateFadeOutPercentage(/* value= */ idx, /* max= */ size - 1);
|
||||||
|
} else if (volumeChangeType == FADE_IN) {
|
||||||
|
volumeModificationPercentage =
|
||||||
|
calculateFadeInPercentage(/* value= */ idx, /* max= */ size - 1);
|
||||||
|
} else {
|
||||||
|
volumeModificationPercentage = minVolumeToKeepPercentageWhenMuting;
|
||||||
|
}
|
||||||
|
|
||||||
|
sample = (sample * volumeModificationPercentage) / 100;
|
||||||
|
sampleIntToTwoBigEndianBytes(sampleBuffer, idx, sample);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private int calculateFadeOutPercentage(int value, int max) {
|
||||||
|
return ((minVolumeToKeepPercentageWhenMuting - 100) * ((AVOID_TRUNCATION_FACTOR * value) / max))
|
||||||
|
/ AVOID_TRUNCATION_FACTOR
|
||||||
|
+ 100;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int calculateFadeInPercentage(int value, int max) {
|
||||||
|
return (minVolumeToKeepPercentageWhenMuting
|
||||||
|
+ ((100 - minVolumeToKeepPercentageWhenMuting) * (AVOID_TRUNCATION_FACTOR * value) / max)
|
||||||
|
/ AVOID_TRUNCATION_FACTOR);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int twoByteSampleToInt(byte mostSignificantByte, byte leastSignificantByte) {
|
||||||
|
return ((leastSignificantByte & 0xFF) | mostSignificantByte << 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts {@code sample} into the corresponding big-endian 16bit bytes within {@code byteArray}.
|
||||||
|
*/
|
||||||
|
private static void sampleIntToTwoBigEndianBytes(byte[] byteArray, int startIndex, int sample) {
|
||||||
|
// Avoid 16-bit-integer overflow when writing back the manipulated data.
|
||||||
|
if (sample >= Short.MAX_VALUE) {
|
||||||
|
byteArray[startIndex] = (byte) 0xFF;
|
||||||
|
byteArray[startIndex + 1] = (byte) 0x7F;
|
||||||
|
} else if (sample <= Short.MIN_VALUE) {
|
||||||
|
byteArray[startIndex] = (byte) 0x00;
|
||||||
|
byteArray[startIndex + 1] = (byte) 0x80;
|
||||||
|
} else {
|
||||||
|
byteArray[startIndex] = (byte) (sample & 0xFF);
|
||||||
|
byteArray[startIndex + 1] = (byte) (sample >> 8);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -324,36 +704,14 @@ public final class SilenceSkippingAudioProcessor extends BaseAudioProcessor {
|
|||||||
* Copies remaining bytes from {@code data} to populate a new output buffer from the processor.
|
* Copies remaining bytes from {@code data} to populate a new output buffer from the processor.
|
||||||
*/
|
*/
|
||||||
private void output(ByteBuffer data) {
|
private void output(ByteBuffer data) {
|
||||||
int length = data.remaining();
|
replaceOutputBuffer(data.remaining()).put(data).flip();
|
||||||
replaceOutputBuffer(length).put(data).flip();
|
|
||||||
if (length > 0) {
|
|
||||||
hasOutputNoise = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Fills {@link #paddingBuffer} using data from {@code input}, plus any additional buffered data
|
|
||||||
* at the end of {@code buffer} (up to its {@code size}) required to fill it, advancing the input
|
|
||||||
* position.
|
|
||||||
*/
|
|
||||||
private void updatePaddingBuffer(ByteBuffer input, byte[] buffer, int size) {
|
|
||||||
int fromInputSize = min(input.remaining(), paddingSize);
|
|
||||||
int fromBufferSize = paddingSize - fromInputSize;
|
|
||||||
System.arraycopy(
|
|
||||||
/* src= */ buffer,
|
|
||||||
/* srcPos= */ size - fromBufferSize,
|
|
||||||
/* dest= */ paddingBuffer,
|
|
||||||
/* destPos= */ 0,
|
|
||||||
/* length= */ fromBufferSize);
|
|
||||||
input.position(input.limit() - fromInputSize);
|
|
||||||
input.get(paddingBuffer, fromBufferSize, fromInputSize);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the number of input frames corresponding to {@code durationUs} microseconds of audio.
|
* Returns the number of input frames corresponding to {@code durationUs} microseconds of audio.
|
||||||
*/
|
*/
|
||||||
private int durationUsToFrames(long durationUs) {
|
private int durationUsToFrames(long durationUs) {
|
||||||
return (int) ((durationUs * inputAudioFormat.sampleRate) / C.MICROS_PER_SECOND);
|
return (int) ((durationUs * inputFormat.sampleRate) / C.MICROS_PER_SECOND);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -362,8 +720,8 @@ public final class SilenceSkippingAudioProcessor extends BaseAudioProcessor {
|
|||||||
*/
|
*/
|
||||||
private int findNoisePosition(ByteBuffer buffer) {
|
private int findNoisePosition(ByteBuffer buffer) {
|
||||||
// The input is in ByteOrder.nativeOrder(), which is little endian on Android.
|
// The input is in ByteOrder.nativeOrder(), which is little endian on Android.
|
||||||
for (int i = buffer.position(); i < buffer.limit(); i += 2) {
|
for (int i = buffer.position() + 1; i < buffer.limit(); i += 2) {
|
||||||
if (Math.abs(buffer.getShort(i)) > silenceThresholdLevel) {
|
if (isNoise(buffer.get(i), buffer.get(i - 1))) {
|
||||||
// Round to the start of the frame.
|
// Round to the start of the frame.
|
||||||
return bytesPerFrame * (i / bytesPerFrame);
|
return bytesPerFrame * (i / bytesPerFrame);
|
||||||
}
|
}
|
||||||
@ -377,12 +735,21 @@ public final class SilenceSkippingAudioProcessor extends BaseAudioProcessor {
|
|||||||
*/
|
*/
|
||||||
private int findNoiseLimit(ByteBuffer buffer) {
|
private int findNoiseLimit(ByteBuffer buffer) {
|
||||||
// The input is in ByteOrder.nativeOrder(), which is little endian on Android.
|
// The input is in ByteOrder.nativeOrder(), which is little endian on Android.
|
||||||
for (int i = buffer.limit() - 2; i >= buffer.position(); i -= 2) {
|
for (int i = buffer.limit() - 1; i >= buffer.position(); i -= 2) {
|
||||||
if (Math.abs(buffer.getShort(i)) > silenceThresholdLevel) {
|
if (isNoise(buffer.get(i), buffer.get(i - 1))) {
|
||||||
// Return the start of the next frame.
|
// Return the start of the next frame.
|
||||||
return bytesPerFrame * (i / bytesPerFrame) + bytesPerFrame;
|
return bytesPerFrame * (i / bytesPerFrame) + bytesPerFrame;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return buffer.position();
|
return buffer.position();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether the given two bytes represent a short signed PCM value that is greater than {@link
|
||||||
|
* #silenceThresholdLevel}.
|
||||||
|
*/
|
||||||
|
private boolean isNoise(byte mostSignificantByte, byte leastSignificantByte) {
|
||||||
|
return Math.abs(twoByteSampleToInt(mostSignificantByte, leastSignificantByte))
|
||||||
|
> silenceThresholdLevel;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -23,6 +23,7 @@ import static java.lang.Short.MAX_VALUE;
|
|||||||
import androidx.media3.common.C;
|
import androidx.media3.common.C;
|
||||||
import androidx.media3.common.audio.AudioProcessor.AudioFormat;
|
import androidx.media3.common.audio.AudioProcessor.AudioFormat;
|
||||||
import androidx.test.ext.junit.runners.AndroidJUnit4;
|
import androidx.test.ext.junit.runners.AndroidJUnit4;
|
||||||
|
import com.google.common.collect.Range;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.ByteOrder;
|
import java.nio.ByteOrder;
|
||||||
import java.nio.ShortBuffer;
|
import java.nio.ShortBuffer;
|
||||||
@ -86,7 +87,7 @@ public final class SilenceSkippingAudioProcessorTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void skipInSilentSignal_skipsEverything() throws Exception {
|
public void skipInSilentSignal_skipsEverything() throws Exception {
|
||||||
// Given a signal with only noise.
|
// Given a signal with only silence.
|
||||||
InputBufferProvider inputBufferProvider =
|
InputBufferProvider inputBufferProvider =
|
||||||
getInputBufferProviderForAlternatingSilenceAndNoise(
|
getInputBufferProviderForAlternatingSilenceAndNoise(
|
||||||
TEST_SIGNAL_SILENCE_DURATION_MS, /* noiseDurationMs= */ 0, TEST_SIGNAL_FRAME_COUNT);
|
TEST_SIGNAL_SILENCE_DURATION_MS, /* noiseDurationMs= */ 0, TEST_SIGNAL_FRAME_COUNT);
|
||||||
@ -99,14 +100,15 @@ public final class SilenceSkippingAudioProcessorTest {
|
|||||||
long totalOutputFrames =
|
long totalOutputFrames =
|
||||||
process(silenceSkippingAudioProcessor, inputBufferProvider, INPUT_BUFFER_SIZE);
|
process(silenceSkippingAudioProcessor, inputBufferProvider, INPUT_BUFFER_SIZE);
|
||||||
|
|
||||||
// The entire signal is skipped.
|
// The entire signal is skipped except for the DEFAULT_MAX_SILENCE_TO_KEEP_DURATION_US.
|
||||||
assertThat(totalOutputFrames).isEqualTo(0);
|
assertThat(totalOutputFrames).isEqualTo(2000);
|
||||||
assertThat(silenceSkippingAudioProcessor.getSkippedFrames()).isEqualTo(TEST_SIGNAL_FRAME_COUNT);
|
assertThat(silenceSkippingAudioProcessor.getSkippedFrames())
|
||||||
|
.isEqualTo(TEST_SIGNAL_FRAME_COUNT - 2000);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void skipInNoisySignal_skipsNothing() throws Exception {
|
public void skipInNoisySignal_skipsNothing() throws Exception {
|
||||||
// Given a signal with only silence.
|
// Given a signal with only noise.
|
||||||
InputBufferProvider inputBufferProvider =
|
InputBufferProvider inputBufferProvider =
|
||||||
getInputBufferProviderForAlternatingSilenceAndNoise(
|
getInputBufferProviderForAlternatingSilenceAndNoise(
|
||||||
/* silenceDurationMs= */ 0, TEST_SIGNAL_NOISE_DURATION_MS, TEST_SIGNAL_FRAME_COUNT);
|
/* silenceDurationMs= */ 0, TEST_SIGNAL_NOISE_DURATION_MS, TEST_SIGNAL_FRAME_COUNT);
|
||||||
@ -126,6 +128,30 @@ public final class SilenceSkippingAudioProcessorTest {
|
|||||||
assertThat(silenceSkippingAudioProcessor.getSkippedFrames()).isEqualTo(0);
|
assertThat(silenceSkippingAudioProcessor.getSkippedFrames()).isEqualTo(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void skipInNoisySignalWithShortSilences_skipsNothing() throws Exception {
|
||||||
|
// Given a signal with only noise.
|
||||||
|
InputBufferProvider inputBufferProvider =
|
||||||
|
getInputBufferProviderForAlternatingSilenceAndNoise(
|
||||||
|
/* silenceDurationMs= */ 30,
|
||||||
|
TEST_SIGNAL_NOISE_DURATION_MS - 30,
|
||||||
|
TEST_SIGNAL_FRAME_COUNT);
|
||||||
|
|
||||||
|
// When processing the entire signal.
|
||||||
|
SilenceSkippingAudioProcessor silenceSkippingAudioProcessor =
|
||||||
|
new SilenceSkippingAudioProcessor();
|
||||||
|
silenceSkippingAudioProcessor.setEnabled(true);
|
||||||
|
silenceSkippingAudioProcessor.configure(AUDIO_FORMAT);
|
||||||
|
silenceSkippingAudioProcessor.flush();
|
||||||
|
assertThat(silenceSkippingAudioProcessor.isActive()).isTrue();
|
||||||
|
long totalOutputFrames =
|
||||||
|
process(silenceSkippingAudioProcessor, inputBufferProvider, INPUT_BUFFER_SIZE);
|
||||||
|
|
||||||
|
// None of the signal is skipped.
|
||||||
|
assertThat(totalOutputFrames).isEqualTo(TEST_SIGNAL_FRAME_COUNT);
|
||||||
|
assertThat(silenceSkippingAudioProcessor.getSkippedFrames()).isEqualTo(0);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void skipInAlternatingTestSignal_hasCorrectOutputAndSkippedFrameCounts() throws Exception {
|
public void skipInAlternatingTestSignal_hasCorrectOutputAndSkippedFrameCounts() throws Exception {
|
||||||
// Given a signal that alternates between silence and noise.
|
// Given a signal that alternates between silence and noise.
|
||||||
@ -145,10 +171,10 @@ public final class SilenceSkippingAudioProcessorTest {
|
|||||||
long totalOutputFrames =
|
long totalOutputFrames =
|
||||||
process(silenceSkippingAudioProcessor, inputBufferProvider, INPUT_BUFFER_SIZE);
|
process(silenceSkippingAudioProcessor, inputBufferProvider, INPUT_BUFFER_SIZE);
|
||||||
|
|
||||||
// The output consists of 50000 frames of noise, plus 20 frames of padding at the start and 99 *
|
// The output has 50000 frames of noise, plus 50 * 0.2 * 1000 padding (plus rounding errors).
|
||||||
// 40 frames of padding after that.
|
assertThat(totalOutputFrames).isIn(Range.closed(60000L - 500L, 60000L + 500L));
|
||||||
assertThat(totalOutputFrames).isEqualTo(50000 + (20 + 99 * 40));
|
assertThat(silenceSkippingAudioProcessor.getSkippedFrames())
|
||||||
assertThat(silenceSkippingAudioProcessor.getSkippedFrames()).isEqualTo(50000 - (20 + 99 * 40));
|
.isEqualTo(TEST_SIGNAL_FRAME_COUNT - totalOutputFrames);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -171,10 +197,10 @@ public final class SilenceSkippingAudioProcessorTest {
|
|||||||
long totalOutputFrames =
|
long totalOutputFrames =
|
||||||
process(silenceSkippingAudioProcessor, inputBufferProvider, /* inputBufferSize= */ 80);
|
process(silenceSkippingAudioProcessor, inputBufferProvider, /* inputBufferSize= */ 80);
|
||||||
|
|
||||||
// The output consists of 50000 frames of noise, plus 20 frames of padding at the start and 99 *
|
// The output has 50000 frames of noise, plus 50 * 0.2 * 1000 padding (plus rounding errors).
|
||||||
// 40 frames of padding after that.
|
assertThat(totalOutputFrames).isIn(Range.closed(60000L - 500L, 60000L + 500L));
|
||||||
assertThat(totalOutputFrames).isEqualTo(50000 + (20 + 99 * 40));
|
assertThat(silenceSkippingAudioProcessor.getSkippedFrames())
|
||||||
assertThat(silenceSkippingAudioProcessor.getSkippedFrames()).isEqualTo(50000 - (20 + 99 * 40));
|
.isEqualTo(TEST_SIGNAL_FRAME_COUNT - totalOutputFrames);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -197,14 +223,14 @@ public final class SilenceSkippingAudioProcessorTest {
|
|||||||
long totalOutputFrames =
|
long totalOutputFrames =
|
||||||
process(silenceSkippingAudioProcessor, inputBufferProvider, /* inputBufferSize= */ 120);
|
process(silenceSkippingAudioProcessor, inputBufferProvider, /* inputBufferSize= */ 120);
|
||||||
|
|
||||||
// The output consists of 50000 frames of noise, plus 20 frames of padding at the start and 99 *
|
// The output has 50000 frames of noise, plus 50 * 0.2 * 1000 padding (plus rounding errors).
|
||||||
// 40 frames of padding after that.
|
assertThat(totalOutputFrames).isIn(Range.closed(60000L - 500L, 60000L + 500L));
|
||||||
assertThat(totalOutputFrames).isEqualTo(50000 + (20 + 99 * 40));
|
assertThat(silenceSkippingAudioProcessor.getSkippedFrames())
|
||||||
assertThat(silenceSkippingAudioProcessor.getSkippedFrames()).isEqualTo(50000 - (20 + 99 * 40));
|
.isEqualTo(TEST_SIGNAL_FRAME_COUNT - totalOutputFrames);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void customPaddingValue_hasCorrectOutputAndSkippedFrameCounts() throws Exception {
|
public void customSilenceRetentionValue_hasCorrectOutputAndSkippedFrameCounts() throws Exception {
|
||||||
// Given a signal that alternates between silence and noise.
|
// Given a signal that alternates between silence and noise.
|
||||||
InputBufferProvider inputBufferProvider =
|
InputBufferProvider inputBufferProvider =
|
||||||
getInputBufferProviderForAlternatingSilenceAndNoise(
|
getInputBufferProviderForAlternatingSilenceAndNoise(
|
||||||
@ -212,11 +238,13 @@ public final class SilenceSkippingAudioProcessorTest {
|
|||||||
TEST_SIGNAL_NOISE_DURATION_MS,
|
TEST_SIGNAL_NOISE_DURATION_MS,
|
||||||
TEST_SIGNAL_FRAME_COUNT);
|
TEST_SIGNAL_FRAME_COUNT);
|
||||||
|
|
||||||
// When processing the entire signal with a larger than normal padding silence.
|
// When processing the entire signal with a smaller than normal retention ratio.
|
||||||
SilenceSkippingAudioProcessor silenceSkippingAudioProcessor =
|
SilenceSkippingAudioProcessor silenceSkippingAudioProcessor =
|
||||||
new SilenceSkippingAudioProcessor(
|
new SilenceSkippingAudioProcessor(
|
||||||
SilenceSkippingAudioProcessor.DEFAULT_MINIMUM_SILENCE_DURATION_US,
|
SilenceSkippingAudioProcessor.DEFAULT_MINIMUM_SILENCE_DURATION_US,
|
||||||
/* paddingSilenceUs= */ 21_000,
|
/* silenceRetentionRatio= */ 0.05f,
|
||||||
|
SilenceSkippingAudioProcessor.DEFAULT_MAX_SILENCE_TO_KEEP_DURATION_US,
|
||||||
|
SilenceSkippingAudioProcessor.DEFAULT_MIN_VOLUME_TO_KEEP_PERCENTAGE,
|
||||||
SilenceSkippingAudioProcessor.DEFAULT_SILENCE_THRESHOLD_LEVEL);
|
SilenceSkippingAudioProcessor.DEFAULT_SILENCE_THRESHOLD_LEVEL);
|
||||||
silenceSkippingAudioProcessor.setEnabled(true);
|
silenceSkippingAudioProcessor.setEnabled(true);
|
||||||
silenceSkippingAudioProcessor.configure(AUDIO_FORMAT);
|
silenceSkippingAudioProcessor.configure(AUDIO_FORMAT);
|
||||||
@ -225,10 +253,10 @@ public final class SilenceSkippingAudioProcessorTest {
|
|||||||
long totalOutputFrames =
|
long totalOutputFrames =
|
||||||
process(silenceSkippingAudioProcessor, inputBufferProvider, /* inputBufferSize= */ 120);
|
process(silenceSkippingAudioProcessor, inputBufferProvider, /* inputBufferSize= */ 120);
|
||||||
|
|
||||||
// The output consists of 50000 frames of noise, plus 21 frames of padding at the start and 99 *
|
// The output has 50000 frames of noise, plus 50 * 0.05 * 1000 padding (plus rounding errors).
|
||||||
// 42 frames of padding after that.
|
assertThat(totalOutputFrames).isIn(Range.closed(52500L - 500L, 52500L + 500L));
|
||||||
assertThat(totalOutputFrames).isEqualTo(50000 + (21 + 99 * 42));
|
assertThat(silenceSkippingAudioProcessor.getSkippedFrames())
|
||||||
assertThat(silenceSkippingAudioProcessor.getSkippedFrames()).isEqualTo(50000 - (21 + 99 * 42));
|
.isEqualTo(TEST_SIGNAL_FRAME_COUNT - totalOutputFrames);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
Loading…
x
Reference in New Issue
Block a user