Handle media item (Effects/Format) changes in AudioSamplePipeline.

On a MediaItem change, the input Format (and Effects to apply) may be
different. Therefore the AudioProcessingPipeline must be reconfigured
to determine what processing is active, and what the AudioFormat of the
data output is. In the event that it is different, additional
AudioProcessor instances must be used to ensure the encoder will still
be able to accept the audio buffers.

PiperOrigin-RevId: 544338451
This commit is contained in:
samrobinson 2023-06-29 13:29:41 +00:00 committed by Tianyi Feng
parent 32bd5dd718
commit f60f79bb10
12 changed files with 733 additions and 155 deletions

View File

@ -170,7 +170,12 @@ public final class AudioProcessingPipeline {
} }
} }
/** Returns the {@link AudioFormat} currently being output. */ /**
* Returns the {@link AudioFormat} of data being output through {@link #getOutput()}.
*
* @return The {@link AudioFormat} currently being output, or {@link AudioFormat#NOT_SET} if no
* {@linkplain #configure(AudioFormat) configuration} has been {@linkplain #flush() applied}.
*/
public AudioFormat getOutputAudioFormat() { public AudioFormat getOutputAudioFormat() {
return outputAudioFormat; return outputAudioFormat;
} }

View File

@ -637,110 +637,38 @@ sample:
presentationTimeUs = 967709 presentationTimeUs = 967709
sample: sample:
trackIndex = 1 trackIndex = 1
dataHashCode = -1759454975 dataHashCode = 1916158593
size = 440 size = 5164
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 992653 presentationTimeUs = 992653
sample: sample:
trackIndex = 1 trackIndex = 1
dataHashCode = -1759454975 dataHashCode = -1950877403
size = 440 size = 1120
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 995147 presentationTimeUs = 1021927
sample: sample:
trackIndex = 1 trackIndex = 1
dataHashCode = -1759454975 dataHashCode = 1590174306
size = 440
isKeyFrame = true
presentationTimeUs = 997641
sample:
trackIndex = 1
dataHashCode = -1759454975
size = 440
isKeyFrame = true
presentationTimeUs = 1000136
sample:
trackIndex = 1
dataHashCode = -1759454975
size = 440
isKeyFrame = true
presentationTimeUs = 1002630
sample:
trackIndex = 1
dataHashCode = -1759454975
size = 440
isKeyFrame = true
presentationTimeUs = 1005124
sample:
trackIndex = 1
dataHashCode = -1759454975
size = 440
isKeyFrame = true
presentationTimeUs = 1007619
sample:
trackIndex = 1
dataHashCode = -1759454975
size = 440
isKeyFrame = true
presentationTimeUs = 1010113
sample:
trackIndex = 1
dataHashCode = -1759454975
size = 440
isKeyFrame = true
presentationTimeUs = 1012607
sample:
trackIndex = 1
dataHashCode = -1759454975
size = 440
isKeyFrame = true
presentationTimeUs = 1015102
sample:
trackIndex = 1
dataHashCode = -2121773972
size = 440
isKeyFrame = true
presentationTimeUs = 1017596
sample:
trackIndex = 1
dataHashCode = 1517597454
size = 440
isKeyFrame = true
presentationTimeUs = 1020090
sample:
trackIndex = 1
dataHashCode = 1952035733
size = 448
isKeyFrame = true
presentationTimeUs = 1022585
sample:
trackIndex = 1
dataHashCode = -1861807761
size = 604
isKeyFrame = true
presentationTimeUs = 1025124
sample:
trackIndex = 1
dataHashCode = -1460926592
size = 880 size = 880
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 1028548 presentationTimeUs = 1028276
sample: sample:
trackIndex = 1 trackIndex = 1
dataHashCode = 1194085269 dataHashCode = -432400310
size = 532 size = 1588
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 1033537 presentationTimeUs = 1033265
sample: sample:
trackIndex = 1 trackIndex = 1
dataHashCode = -493118955 dataHashCode = -270222999
size = 2296 size = 832
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 1036553 presentationTimeUs = 1042267
sample: sample:
trackIndex = 1 trackIndex = 1
dataHashCode = 90420756 dataHashCode = -1941634508
size = 4568 size = 5028
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 1049569 presentationTimeUs = 1046984
released = true released = true

View File

@ -66,68 +66,74 @@ sample:
presentationTimeUs = 872766 presentationTimeUs = 872766
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = -949798077 dataHashCode = 992130724
size = 9424 size = 2580
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 970680 presentationTimeUs = 970680
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = -1275686831 dataHashCode = 2042155098
size = 8088 size = 6482
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 1077528 presentationTimeUs = 999931
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = -1360039206 dataHashCode = 1417355469
size = 8858 size = 8556
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 1169229 presentationTimeUs = 1073424
sample:
trackIndex = 0
dataHashCode = -2107697498
size = 8754
isKeyFrame = true
presentationTimeUs = 1170430
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = 736072795 dataHashCode = 736072795
size = 8908 size = 8908
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 1269659 presentationTimeUs = 1269682
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = -1913553170 dataHashCode = -1913553170
size = 9208 size = 9208
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 1370657 presentationTimeUs = 1370680
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = 13583718 dataHashCode = 13583718
size = 8968 size = 8968
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 1475056 presentationTimeUs = 1475079
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = -1444602526 dataHashCode = -1444602526
size = 8588 size = 8588
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 1576734 presentationTimeUs = 1576757
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = -1693065958 dataHashCode = -1693065958
size = 8778 size = 8778
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 1674104 presentationTimeUs = 1674126
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = 2071205641 dataHashCode = 2071205641
size = 8736 size = 8736
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 1773628 presentationTimeUs = 1773650
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = 1433538831 dataHashCode = 1433538831
size = 8636 size = 8636
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 1872675 presentationTimeUs = 1872698
sample: sample:
trackIndex = 0 trackIndex = 0
dataHashCode = 992130724 dataHashCode = 992130724
size = 2580 size = 2580
isKeyFrame = true isKeyFrame = true
presentationTimeUs = 1970589 presentationTimeUs = 1970612
released = true released = true

View File

@ -0,0 +1,139 @@
format 0:
averageBitrate = 131072
sampleMimeType = audio/mp4a-latm
channelCount = 1
sampleRate = 44100
pcmEncoding = 2
sample:
trackIndex = 0
dataHashCode = 2042155098
size = 6482
isKeyFrame = true
presentationTimeUs = 0
sample:
trackIndex = 0
dataHashCode = 1417355469
size = 8556
isKeyFrame = true
presentationTimeUs = 73492
sample:
trackIndex = 0
dataHashCode = -2107697498
size = 8754
isKeyFrame = true
presentationTimeUs = 170498
sample:
trackIndex = 0
dataHashCode = 736072795
size = 8908
isKeyFrame = true
presentationTimeUs = 269750
sample:
trackIndex = 0
dataHashCode = -1913553170
size = 9208
isKeyFrame = true
presentationTimeUs = 370748
sample:
trackIndex = 0
dataHashCode = 13583718
size = 8968
isKeyFrame = true
presentationTimeUs = 475147
sample:
trackIndex = 0
dataHashCode = -1444602526
size = 8588
isKeyFrame = true
presentationTimeUs = 576825
sample:
trackIndex = 0
dataHashCode = -1693065958
size = 8778
isKeyFrame = true
presentationTimeUs = 674195
sample:
trackIndex = 0
dataHashCode = 2071205641
size = 8736
isKeyFrame = true
presentationTimeUs = 773718
sample:
trackIndex = 0
dataHashCode = 1433538831
size = 8636
isKeyFrame = true
presentationTimeUs = 872766
sample:
trackIndex = 0
dataHashCode = 992130724
size = 2580
isKeyFrame = true
presentationTimeUs = 970680
sample:
trackIndex = 0
dataHashCode = 2002762528
size = 6450
isKeyFrame = true
presentationTimeUs = 999931
sample:
trackIndex = 0
dataHashCode = -657557348
size = 9288
isKeyFrame = true
presentationTimeUs = 1073061
sample:
trackIndex = 0
dataHashCode = 1101737790
size = 8026
isKeyFrame = true
presentationTimeUs = 1178367
sample:
trackIndex = 0
dataHashCode = 942749699
size = 9580
isKeyFrame = true
presentationTimeUs = 1269365
sample:
trackIndex = 0
dataHashCode = 1696779430
size = 9176
isKeyFrame = true
presentationTimeUs = 1377981
sample:
trackIndex = 0
dataHashCode = -1146784158
size = 7898
isKeyFrame = true
presentationTimeUs = 1482018
sample:
trackIndex = 0
dataHashCode = -108739018
size = 9244
isKeyFrame = true
presentationTimeUs = 1571564
sample:
trackIndex = 0
dataHashCode = 1527742727
size = 8614
isKeyFrame = true
presentationTimeUs = 1676371
sample:
trackIndex = 0
dataHashCode = -771072152
size = 8408
isKeyFrame = true
presentationTimeUs = 1774036
sample:
trackIndex = 0
dataHashCode = -307907100
size = 10212
isKeyFrame = true
presentationTimeUs = 1869365
sample:
trackIndex = 0
dataHashCode = -2024887086
size = 1312
isKeyFrame = true
presentationTimeUs = 1985147
released = true

View File

@ -0,0 +1,109 @@
format 0:
averageBitrate = 131072
sampleMimeType = audio/mp4a-latm
channelCount = 2
sampleRate = 48000
pcmEncoding = 2
sample:
trackIndex = 0
dataHashCode = -226169394
size = 15528
isKeyFrame = true
presentationTimeUs = 0
sample:
trackIndex = 0
dataHashCode = -1654106973
size = 17436
isKeyFrame = true
presentationTimeUs = 80875
sample:
trackIndex = 0
dataHashCode = 1078555669
size = 19384
isKeyFrame = true
presentationTimeUs = 171687
sample:
trackIndex = 0
dataHashCode = -1783495042
size = 8904
isKeyFrame = true
presentationTimeUs = 272645
sample:
trackIndex = 0
dataHashCode = 1369234850
size = 5688
isKeyFrame = true
presentationTimeUs = 319020
sample:
trackIndex = 0
dataHashCode = 340823895
size = 14108
isKeyFrame = true
presentationTimeUs = 348645
sample:
trackIndex = 0
dataHashCode = 591641755
size = 18624
isKeyFrame = true
presentationTimeUs = 422125
sample:
trackIndex = 0
dataHashCode = -909124577
size = 19056
isKeyFrame = true
presentationTimeUs = 519125
sample:
trackIndex = 0
dataHashCode = -1644849415
size = 19392
isKeyFrame = true
presentationTimeUs = 618375
sample:
trackIndex = 0
dataHashCode = 660809049
size = 20044
isKeyFrame = true
presentationTimeUs = 719375
sample:
trackIndex = 0
dataHashCode = -601805369
size = 19524
isKeyFrame = true
presentationTimeUs = 823770
sample:
trackIndex = 0
dataHashCode = -1348998373
size = 18696
isKeyFrame = true
presentationTimeUs = 925458
sample:
trackIndex = 0
dataHashCode = 1191266277
size = 19108
isKeyFrame = true
presentationTimeUs = 1022833
sample:
trackIndex = 0
dataHashCode = -1278785599
size = 19016
isKeyFrame = true
presentationTimeUs = 1122354
sample:
trackIndex = 0
dataHashCode = -809901361
size = 18800
isKeyFrame = true
presentationTimeUs = 1221395
sample:
trackIndex = 0
dataHashCode = 163546833
size = 5616
isKeyFrame = true
presentationTimeUs = 1319312
sample:
trackIndex = 0
dataHashCode = 743627
size = 4
isKeyFrame = true
presentationTimeUs = 1348562
released = true

View File

@ -0,0 +1,109 @@
format 0:
averageBitrate = 131072
sampleMimeType = audio/mp4a-latm
channelCount = 2
sampleRate = 48000
pcmEncoding = 2
sample:
trackIndex = 0
dataHashCode = -226169394
size = 15528
isKeyFrame = true
presentationTimeUs = 0
sample:
trackIndex = 0
dataHashCode = -1654106973
size = 17436
isKeyFrame = true
presentationTimeUs = 80875
sample:
trackIndex = 0
dataHashCode = 1078555669
size = 19384
isKeyFrame = true
presentationTimeUs = 171687
sample:
trackIndex = 0
dataHashCode = -1783495042
size = 8904
isKeyFrame = true
presentationTimeUs = 272645
sample:
trackIndex = 0
dataHashCode = 1369234850
size = 5688
isKeyFrame = true
presentationTimeUs = 319020
sample:
trackIndex = 0
dataHashCode = 711439073
size = 14040
isKeyFrame = true
presentationTimeUs = 348645
sample:
trackIndex = 0
dataHashCode = -555451391
size = 20216
isKeyFrame = true
presentationTimeUs = 421770
sample:
trackIndex = 0
dataHashCode = 1700905759
size = 17472
isKeyFrame = true
presentationTimeUs = 527062
sample:
trackIndex = 0
dataHashCode = -416119019
size = 20856
isKeyFrame = true
presentationTimeUs = 618062
sample:
trackIndex = 0
dataHashCode = -2060733789
size = 19976
isKeyFrame = true
presentationTimeUs = 726687
sample:
trackIndex = 0
dataHashCode = 212454167
size = 17192
isKeyFrame = true
presentationTimeUs = 830729
sample:
trackIndex = 0
dataHashCode = 956501205
size = 20124
isKeyFrame = true
presentationTimeUs = 920270
sample:
trackIndex = 0
dataHashCode = 299859135
size = 18748
isKeyFrame = true
presentationTimeUs = 1025083
sample:
trackIndex = 0
dataHashCode = 636945085
size = 18304
isKeyFrame = true
presentationTimeUs = 1122729
sample:
trackIndex = 0
dataHashCode = -754285165
size = 22232
isKeyFrame = true
presentationTimeUs = 1218062
sample:
trackIndex = 0
dataHashCode = -1967816889
size = 2856
isKeyFrame = true
presentationTimeUs = 1333854
sample:
trackIndex = 0
dataHashCode = 1012987
size = 4
isKeyFrame = true
presentationTimeUs = 1348729
released = true

View File

@ -0,0 +1,97 @@
format 0:
averageBitrate = 131072
sampleMimeType = audio/mp4a-latm
channelCount = 2
sampleRate = 48000
pcmEncoding = 2
sample:
trackIndex = 0
dataHashCode = -278103001
size = 19200
isKeyFrame = true
presentationTimeUs = 0
sample:
trackIndex = 0
dataHashCode = 1522105084
size = 19200
isKeyFrame = true
presentationTimeUs = 100000
sample:
trackIndex = 0
dataHashCode = 932319027
size = 19200
isKeyFrame = true
presentationTimeUs = 200000
sample:
trackIndex = 0
dataHashCode = 325000240
size = 9336
isKeyFrame = true
presentationTimeUs = 300000
sample:
trackIndex = 0
dataHashCode = -717358295
size = 19196
isKeyFrame = true
presentationTimeUs = 348625
sample:
trackIndex = 0
dataHashCode = -877978019
size = 19200
isKeyFrame = true
presentationTimeUs = 448604
sample:
trackIndex = 0
dataHashCode = 903565693
size = 19200
isKeyFrame = true
presentationTimeUs = 548604
sample:
trackIndex = 0
dataHashCode = -1424006305
size = 19200
isKeyFrame = true
presentationTimeUs = 648604
sample:
trackIndex = 0
dataHashCode = -1450884853
size = 19200
isKeyFrame = true
presentationTimeUs = 748604
sample:
trackIndex = 0
dataHashCode = 1566382623
size = 19200
isKeyFrame = true
presentationTimeUs = 848604
sample:
trackIndex = 0
dataHashCode = -510424155
size = 19200
isKeyFrame = true
presentationTimeUs = 948604
sample:
trackIndex = 0
dataHashCode = 13706341
size = 19200
isKeyFrame = true
presentationTimeUs = 1048604
sample:
trackIndex = 0
dataHashCode = -1421600077
size = 19200
isKeyFrame = true
presentationTimeUs = 1148604
sample:
trackIndex = 0
dataHashCode = 722747579
size = 19200
isKeyFrame = true
presentationTimeUs = 1248604
sample:
trackIndex = 0
dataHashCode = 326119
size = 4
isKeyFrame = true
presentationTimeUs = 1348604
released = true

View File

@ -19,10 +19,12 @@ package androidx.media3.transformer;
import static androidx.media3.common.util.Assertions.checkArgument; import static androidx.media3.common.util.Assertions.checkArgument;
import static androidx.media3.common.util.Assertions.checkNotNull; import static androidx.media3.common.util.Assertions.checkNotNull;
import static androidx.media3.common.util.Assertions.checkState; import static androidx.media3.common.util.Assertions.checkState;
import static androidx.media3.common.util.Assertions.checkStateNotNull;
import static androidx.media3.decoder.DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DIRECT; import static androidx.media3.decoder.DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DIRECT;
import static androidx.media3.decoder.DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DISABLED; import static androidx.media3.decoder.DecoderInputBuffer.BUFFER_REPLACEMENT_MODE_DISABLED;
import static java.lang.Math.min; import static java.lang.Math.min;
import android.util.Pair;
import androidx.annotation.Nullable; import androidx.annotation.Nullable;
import androidx.media3.common.C; import androidx.media3.common.C;
import androidx.media3.common.Format; import androidx.media3.common.Format;
@ -30,7 +32,11 @@ import androidx.media3.common.MimeTypes;
import androidx.media3.common.audio.AudioProcessingPipeline; import androidx.media3.common.audio.AudioProcessingPipeline;
import androidx.media3.common.audio.AudioProcessor; import androidx.media3.common.audio.AudioProcessor;
import androidx.media3.common.audio.AudioProcessor.AudioFormat; import androidx.media3.common.audio.AudioProcessor.AudioFormat;
import androidx.media3.common.audio.ChannelMixingAudioProcessor;
import androidx.media3.common.audio.ChannelMixingMatrix;
import androidx.media3.common.audio.SonicAudioProcessor;
import androidx.media3.common.audio.SpeedChangingAudioProcessor; import androidx.media3.common.audio.SpeedChangingAudioProcessor;
import androidx.media3.common.util.NullableType;
import androidx.media3.common.util.Util; import androidx.media3.common.util.Util;
import androidx.media3.decoder.DecoderInputBuffer; import androidx.media3.decoder.DecoderInputBuffer;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
@ -38,6 +44,7 @@ import java.nio.ByteBuffer;
import java.nio.ByteOrder; import java.nio.ByteOrder;
import java.util.Queue; import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedDeque; import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.atomic.AtomicReference;
import org.checkerframework.dataflow.qual.Pure; import org.checkerframework.dataflow.qual.Pure;
/** Pipeline to process, re-encode and mux raw audio samples. */ /** Pipeline to process, re-encode and mux raw audio samples. */
@ -49,12 +56,14 @@ import org.checkerframework.dataflow.qual.Pure;
private final SilentAudioGenerator silentAudioGenerator; private final SilentAudioGenerator silentAudioGenerator;
private final Queue<DecoderInputBuffer> availableInputBuffers; private final Queue<DecoderInputBuffer> availableInputBuffers;
private final Queue<DecoderInputBuffer> pendingInputBuffers; private final Queue<DecoderInputBuffer> pendingInputBuffers;
private final AudioProcessingPipeline audioProcessingPipeline;
private final Codec encoder; private final Codec encoder;
private final AudioFormat encoderInputAudioFormat; private final AudioFormat encoderInputAudioFormat;
private final DecoderInputBuffer encoderInputBuffer; private final DecoderInputBuffer encoderInputBuffer;
private final DecoderInputBuffer encoderOutputBuffer; private final DecoderInputBuffer encoderOutputBuffer;
private final AtomicReference<@NullableType Pair<EditedMediaItem, @NullableType Format>>
pendingMediaItem;
private boolean receivedFirstMediaItemCallback;
private AudioProcessingPipeline audioProcessingPipeline;
private long encoderTotalInputBytes; private long encoderTotalInputBytes;
private volatile boolean queueEndOfStreamAfterSilence; private volatile boolean queueEndOfStreamAfterSilence;
@ -64,13 +73,13 @@ import org.checkerframework.dataflow.qual.Pure;
Format firstAssetLoaderInputFormat, Format firstAssetLoaderInputFormat,
Format firstPipelineInputFormat, Format firstPipelineInputFormat,
TransformationRequest transformationRequest, TransformationRequest transformationRequest,
boolean flattenForSlowMotion, EditedMediaItem firstEditedMediaItem,
ImmutableList<AudioProcessor> audioProcessors,
Codec.EncoderFactory encoderFactory, Codec.EncoderFactory encoderFactory,
MuxerWrapper muxerWrapper, MuxerWrapper muxerWrapper,
FallbackListener fallbackListener) FallbackListener fallbackListener)
throws ExportException { throws ExportException {
super(firstAssetLoaderInputFormat, muxerWrapper); super(firstAssetLoaderInputFormat, muxerWrapper);
checkArgument(firstPipelineInputFormat.pcmEncoding != Format.NO_VALUE);
availableInputBuffers = new ConcurrentLinkedDeque<>(); availableInputBuffers = new ConcurrentLinkedDeque<>();
ByteBuffer emptyBuffer = ByteBuffer.allocateDirect(0).order(ByteOrder.nativeOrder()); ByteBuffer emptyBuffer = ByteBuffer.allocateDirect(0).order(ByteOrder.nativeOrder());
@ -80,36 +89,21 @@ import org.checkerframework.dataflow.qual.Pure;
availableInputBuffers.add(inputBuffer); availableInputBuffers.add(inputBuffer);
} }
pendingInputBuffers = new ConcurrentLinkedDeque<>(); pendingInputBuffers = new ConcurrentLinkedDeque<>();
encoderInputBuffer = new DecoderInputBuffer(BUFFER_REPLACEMENT_MODE_DISABLED); encoderInputBuffer = new DecoderInputBuffer(BUFFER_REPLACEMENT_MODE_DISABLED);
encoderOutputBuffer = new DecoderInputBuffer(BUFFER_REPLACEMENT_MODE_DISABLED); encoderOutputBuffer = new DecoderInputBuffer(BUFFER_REPLACEMENT_MODE_DISABLED);
pendingMediaItem = new AtomicReference<>();
checkArgument(firstPipelineInputFormat.pcmEncoding != Format.NO_VALUE);
AudioFormat inputAudioFormat = new AudioFormat(firstPipelineInputFormat); AudioFormat inputAudioFormat = new AudioFormat(firstPipelineInputFormat);
silentAudioGenerator = new SilentAudioGenerator(inputAudioFormat); silentAudioGenerator = new SilentAudioGenerator(inputAudioFormat);
audioProcessingPipeline =
configureProcessing(
/* editedMediaItem= */ firstEditedMediaItem,
/* trackFormat= */ firstPipelineInputFormat,
/* inputAudioFormat= */ inputAudioFormat,
/* requiredOutputAudioFormat= */ AudioFormat.NOT_SET);
AudioFormat outputAudioFormat = audioProcessingPipeline.getOutputAudioFormat();
checkState(!outputAudioFormat.equals(AudioFormat.NOT_SET));
if (flattenForSlowMotion && firstAssetLoaderInputFormat.metadata != null) { encoderInputAudioFormat = outputAudioFormat;
audioProcessors =
new ImmutableList.Builder<AudioProcessor>()
.add(
new SpeedChangingAudioProcessor(
new SegmentSpeedProvider(firstAssetLoaderInputFormat.metadata)))
.addAll(audioProcessors)
.build();
}
audioProcessingPipeline = new AudioProcessingPipeline(audioProcessors);
try {
encoderInputAudioFormat = audioProcessingPipeline.configure(inputAudioFormat);
} catch (AudioProcessor.UnhandledAudioFormatException unhandledAudioFormatException) {
throw ExportException.createForAudioProcessing(
unhandledAudioFormatException, inputAudioFormat);
}
audioProcessingPipeline.flush();
Format requestedEncoderFormat = Format requestedEncoderFormat =
new Format.Builder() new Format.Builder()
.setSampleMimeType( .setSampleMimeType(
@ -153,16 +147,22 @@ import org.checkerframework.dataflow.qual.Pure;
if (isLast) { if (isLast) {
queueEndOfStreamAfterSilence = true; queueEndOfStreamAfterSilence = true;
} }
} else {
checkState(MimeTypes.isAudio(trackFormat.sampleMimeType));
checkState(trackFormat.pcmEncoding != Format.NO_VALUE);
}
if (!receivedFirstMediaItemCallback) {
receivedFirstMediaItemCallback = true;
return; return;
} }
checkState(MimeTypes.isAudio(trackFormat.sampleMimeType)); pendingMediaItem.set(Pair.create(editedMediaItem, trackFormat));
checkState(trackFormat.pcmEncoding != Format.NO_VALUE);
} }
@Override @Override
@Nullable @Nullable
public DecoderInputBuffer getInputBuffer() { public DecoderInputBuffer getInputBuffer() {
if (shouldGenerateSilence()) { if (shouldGenerateSilence() || pendingMediaItem.get() != null) {
return null; return null;
} }
return availableInputBuffers.peek(); return availableInputBuffers.peek();
@ -170,6 +170,7 @@ import org.checkerframework.dataflow.qual.Pure;
@Override @Override
public boolean queueInputBuffer() { public boolean queueInputBuffer() {
checkState(pendingMediaItem.get() == null);
DecoderInputBuffer inputBuffer = availableInputBuffers.remove(); DecoderInputBuffer inputBuffer = availableInputBuffers.remove();
pendingInputBuffers.add(inputBuffer); pendingInputBuffers.add(inputBuffer);
return true; return true;
@ -218,10 +219,34 @@ import org.checkerframework.dataflow.qual.Pure;
return encoder.isEnded(); return encoder.isEnded();
} }
/**
* Reconfigures audio processing based on the pending {@linkplain #onMediaItemChanged media item
* change}.
*
* <p>Before reconfiguration, all pending buffers must be fully processed and drained to the
* encoder, however end of stream buffers should be handled so the encoder is not {@link
* #queueEndOfStreamToEncoder() queued end of stream}.
*/
private void reconfigureProcessingForPendingMediaItem() throws ExportException {
Pair<EditedMediaItem, @NullableType Format> pendingChange =
checkStateNotNull(pendingMediaItem.get());
AudioFormat pendingAudioFormat =
pendingChange.second != null
? new AudioFormat(pendingChange.second)
: silentAudioGenerator.audioFormat;
audioProcessingPipeline =
configureProcessing(
/* editedMediaItem= */ pendingChange.first,
/* trackFormat= */ pendingChange.second,
/* inputAudioFormat= */ pendingAudioFormat,
/* requiredOutputAudioFormat= */ encoderInputAudioFormat);
pendingMediaItem.set(null);
}
/** /**
* Attempts to pass input data to the encoder. * Attempts to pass input data to the encoder.
* *
* @return Whether it may be possible to feed more data immediately by calling this method again. * @return Whether the {@link AudioSamplePipeline} may be able to continue processing data.
*/ */
private boolean feedEncoderFromInput() throws ExportException { private boolean feedEncoderFromInput() throws ExportException {
if (!encoder.maybeDequeueInputBuffer(encoderInputBuffer)) { if (!encoder.maybeDequeueInputBuffer(encoderInputBuffer)) {
@ -234,6 +259,10 @@ import org.checkerframework.dataflow.qual.Pure;
} }
if (pendingInputBuffers.isEmpty()) { if (pendingInputBuffers.isEmpty()) {
if (pendingMediaItem.get() != null) {
reconfigureProcessingForPendingMediaItem();
return true;
}
// Only read volatile variable queueEndOfStreamAfterSilence if there is a chance that end of // Only read volatile variable queueEndOfStreamAfterSilence if there is a chance that end of
// stream should be queued. // stream should be queued.
if (!silentAudioGenerator.hasRemaining() && queueEndOfStreamAfterSilence) { if (!silentAudioGenerator.hasRemaining() && queueEndOfStreamAfterSilence) {
@ -244,7 +273,9 @@ import org.checkerframework.dataflow.qual.Pure;
DecoderInputBuffer pendingInputBuffer = pendingInputBuffers.element(); DecoderInputBuffer pendingInputBuffer = pendingInputBuffers.element();
if (pendingInputBuffer.isEndOfStream()) { if (pendingInputBuffer.isEndOfStream()) {
queueEndOfStreamToEncoder(); if (pendingMediaItem.get() == null) {
queueEndOfStreamToEncoder();
}
removePendingInputBuffer(); removePendingInputBuffer();
return false; return false;
} }
@ -260,7 +291,7 @@ import org.checkerframework.dataflow.qual.Pure;
/** /**
* Attempts to feed audio processor output data to the encoder. * Attempts to feed audio processor output data to the encoder.
* *
* @return Whether it may be possible to feed more data immediately by calling this method again. * @return Whether the {@link AudioSamplePipeline} may be able to continue processing data.
*/ */
private boolean feedEncoderFromProcessingPipeline() throws ExportException { private boolean feedEncoderFromProcessingPipeline() throws ExportException {
if (!encoder.maybeDequeueInputBuffer(encoderInputBuffer)) { if (!encoder.maybeDequeueInputBuffer(encoderInputBuffer)) {
@ -268,9 +299,12 @@ import org.checkerframework.dataflow.qual.Pure;
} }
ByteBuffer processingPipelineOutputBuffer = audioProcessingPipeline.getOutput(); ByteBuffer processingPipelineOutputBuffer = audioProcessingPipeline.getOutput();
if (!processingPipelineOutputBuffer.hasRemaining()) { if (!processingPipelineOutputBuffer.hasRemaining()) {
if (audioProcessingPipeline.isEnded()) { if (audioProcessingPipeline.isEnded()) {
if (pendingMediaItem.get() != null) {
reconfigureProcessingForPendingMediaItem();
return true;
}
queueEndOfStreamToEncoder(); queueEndOfStreamToEncoder();
} }
return false; return false;
@ -283,7 +317,7 @@ import org.checkerframework.dataflow.qual.Pure;
/** /**
* Attempts to feed input data to the {@link AudioProcessingPipeline}. * Attempts to feed input data to the {@link AudioProcessingPipeline}.
* *
* @return Whether it may be possible to feed more data immediately by calling this method again. * @return Whether the {@link AudioSamplePipeline} may be able to continue processing data.
*/ */
private boolean feedProcessingPipelineFromInput() { private boolean feedProcessingPipelineFromInput() {
if (shouldGenerateSilence()) { if (shouldGenerateSilence()) {
@ -295,7 +329,8 @@ import org.checkerframework.dataflow.qual.Pure;
if (pendingInputBuffers.isEmpty()) { if (pendingInputBuffers.isEmpty()) {
// Only read volatile variable queueEndOfStreamAfterSilence if there is a chance that end of // Only read volatile variable queueEndOfStreamAfterSilence if there is a chance that end of
// stream should be queued. // stream should be queued.
if (!silentAudioGenerator.hasRemaining() && queueEndOfStreamAfterSilence) { if (pendingMediaItem.get() != null
|| (!silentAudioGenerator.hasRemaining() && queueEndOfStreamAfterSilence)) {
audioProcessingPipeline.queueEndOfStream(); audioProcessingPipeline.queueEndOfStream();
} }
return false; return false;
@ -370,4 +405,57 @@ import org.checkerframework.dataflow.qual.Pure;
private boolean shouldGenerateSilence() { private boolean shouldGenerateSilence() {
return silentAudioGenerator.hasRemaining() && pendingInputBuffers.isEmpty(); return silentAudioGenerator.hasRemaining() && pendingInputBuffers.isEmpty();
} }
private static AudioProcessingPipeline configureProcessing(
EditedMediaItem editedMediaItem,
@Nullable Format trackFormat,
AudioFormat inputAudioFormat,
AudioFormat requiredOutputAudioFormat)
throws ExportException {
ImmutableList.Builder<AudioProcessor> audioProcessors = new ImmutableList.Builder<>();
if (editedMediaItem.flattenForSlowMotion
&& trackFormat != null
&& trackFormat.metadata != null) {
audioProcessors.add(
new SpeedChangingAudioProcessor(new SegmentSpeedProvider(trackFormat.metadata)));
}
audioProcessors.addAll(editedMediaItem.effects.audioProcessors);
// Ensure the output from APP matches what the encoder is configured to receive.
if (!requiredOutputAudioFormat.equals(AudioFormat.NOT_SET)) {
SonicAudioProcessor sampleRateChanger = new SonicAudioProcessor();
sampleRateChanger.setOutputSampleRateHz(requiredOutputAudioFormat.sampleRate);
audioProcessors.add(sampleRateChanger);
// TODO(b/262706549): Handle channel mixing with AudioMixer.
if (requiredOutputAudioFormat.channelCount <= 2) {
// ChannelMixingMatrix.create only has defaults for mono/stereo input/output.
ChannelMixingAudioProcessor channelCountChanger = new ChannelMixingAudioProcessor();
channelCountChanger.putChannelMixingMatrix(
ChannelMixingMatrix.create(
/* inputChannelCount= */ 1, requiredOutputAudioFormat.channelCount));
channelCountChanger.putChannelMixingMatrix(
ChannelMixingMatrix.create(
/* inputChannelCount= */ 2, requiredOutputAudioFormat.channelCount));
audioProcessors.add(channelCountChanger);
}
}
AudioProcessingPipeline audioProcessingPipeline =
new AudioProcessingPipeline(audioProcessors.build());
try {
AudioFormat outputAudioFormat = audioProcessingPipeline.configure(inputAudioFormat);
if (!requiredOutputAudioFormat.equals(AudioFormat.NOT_SET)
&& !outputAudioFormat.equals(requiredOutputAudioFormat)) {
throw new AudioProcessor.UnhandledAudioFormatException(
"Audio format can not be modified to match existing downstream format",
inputAudioFormat);
}
} catch (AudioProcessor.UnhandledAudioFormatException unhandledAudioFormatException) {
throw ExportException.createForAudioProcessing(
unhandledAudioFormatException, inputAudioFormat);
}
audioProcessingPipeline.flush();
return audioProcessingPipeline;
}
} }

View File

@ -26,7 +26,9 @@ import java.util.concurrent.atomic.AtomicLong;
/* package */ final class SilentAudioGenerator { /* package */ final class SilentAudioGenerator {
private static final int DEFAULT_BUFFER_SIZE_FRAMES = 1024; private static final int DEFAULT_BUFFER_SIZE_FRAMES = 1024;
private final AudioFormat audioFormat; /** The {@link AudioFormat} of the silent audio generated. */
public final AudioFormat audioFormat;
private final ByteBuffer internalBuffer; private final ByteBuffer internalBuffer;
private final AtomicLong remainingBytesToOutput; private final AtomicLong remainingBytesToOutput;

View File

@ -555,8 +555,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
addedTrackInfo.firstAssetLoaderInputFormat, addedTrackInfo.firstAssetLoaderInputFormat,
/* firstPipelineInputFormat= */ firstAssetLoaderOutputFormat, /* firstPipelineInputFormat= */ firstAssetLoaderOutputFormat,
transformationRequest, transformationRequest,
firstEditedMediaItem.flattenForSlowMotion, firstEditedMediaItem,
firstEditedMediaItem.effects.audioProcessors,
encoderFactory, encoderFactory,
muxerWrapper, muxerWrapper,
fallbackListener); fallbackListener);

View File

@ -19,6 +19,7 @@ package androidx.media3.transformer;
import static androidx.media3.common.util.Assertions.checkNotNull; import static androidx.media3.common.util.Assertions.checkNotNull;
import static androidx.media3.transformer.TestUtil.ASSET_URI_PREFIX; import static androidx.media3.transformer.TestUtil.ASSET_URI_PREFIX;
import static androidx.media3.transformer.TestUtil.FILE_AUDIO_RAW; import static androidx.media3.transformer.TestUtil.FILE_AUDIO_RAW;
import static androidx.media3.transformer.TestUtil.FILE_AUDIO_RAW_STEREO_48000KHZ;
import static androidx.media3.transformer.TestUtil.FILE_AUDIO_VIDEO; import static androidx.media3.transformer.TestUtil.FILE_AUDIO_VIDEO;
import static androidx.media3.transformer.TestUtil.FILE_AUDIO_VIDEO_INCREASING_TIMESTAMPS_15S; import static androidx.media3.transformer.TestUtil.FILE_AUDIO_VIDEO_INCREASING_TIMESTAMPS_15S;
import static androidx.media3.transformer.TestUtil.FILE_AUDIO_VIDEO_STEREO; import static androidx.media3.transformer.TestUtil.FILE_AUDIO_VIDEO_STEREO;
@ -43,7 +44,6 @@ import java.nio.file.Files;
import java.nio.file.Paths; import java.nio.file.Paths;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test; import org.junit.Test;
import org.junit.runner.RunWith; import org.junit.runner.RunWith;
@ -233,7 +233,7 @@ public final class SequenceExportTest {
} }
@Test @Test
public void start_concatenateSameAudioItem_completesSuccessfully() throws Exception { public void concatenateTwoAudioItems_withSameFormat_completesSuccessfully() throws Exception {
Transformer transformer = Transformer transformer =
createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build();
MediaItem audioOnlyMediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW); MediaItem audioOnlyMediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW);
@ -253,7 +253,8 @@ public final class SequenceExportTest {
} }
@Test @Test
public void start_concatenateSameAudioItemWithEffects_completesSuccessfully() throws Exception { public void concatenateTwoAudioItems_withSameFormatAndSameEffects_completesSuccessfully()
throws Exception {
Transformer transformer = Transformer transformer =
createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build();
MediaItem audioOnlyMediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW); MediaItem audioOnlyMediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW);
@ -277,8 +278,7 @@ public final class SequenceExportTest {
} }
@Test @Test
@Ignore("Handle MediaItem effects changes (See [internal: b/274093424]).") public void concatenateTwoAudioItems_withSameFormatAndDiffEffects_completesSuccessfully()
public void start_concatenateSameAudioItemWithDifferentEffects_completesSuccessfully()
throws Exception { throws Exception {
Transformer transformer = Transformer transformer =
createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build(); createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build();
@ -313,4 +313,99 @@ public final class SequenceExportTest {
checkNotNull(testMuxerHolder.testMuxer), checkNotNull(testMuxerHolder.testMuxer),
getDumpFileName(FILE_AUDIO_RAW + ".high_pitch_then_low_pitch")); getDumpFileName(FILE_AUDIO_RAW + ".high_pitch_then_low_pitch"));
} }
@Test
public void concatenateTwoAudioItems_withDiffFormat_completesSuccessfully() throws Exception {
Transformer transformer =
createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build();
MediaItem stereo48000Audio =
MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW_STEREO_48000KHZ);
MediaItem mono44100Audio = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW);
EditedMediaItemSequence editedMediaItemSequence =
new EditedMediaItemSequence(
ImmutableList.of(
new EditedMediaItem.Builder(stereo48000Audio).build(),
new EditedMediaItem.Builder(mono44100Audio).build()));
Composition composition =
new Composition.Builder(ImmutableList.of(editedMediaItemSequence)).build();
transformer.start(composition, outputPath);
TransformerTestRunner.runLooper(transformer);
DumpFileAsserts.assertOutput(
context,
checkNotNull(testMuxerHolder.testMuxer),
getDumpFileName(FILE_AUDIO_RAW_STEREO_48000KHZ + "_then_sample.wav"));
}
@Test
public void concatenateTwoAudioItems_withDiffFormatAndSameEffects_completesSuccessfully()
throws Exception {
Transformer transformer =
createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build();
Effects highPitch =
new Effects(
ImmutableList.of(createPitchChangingAudioProcessor(/* pitch= */ 2f)),
/* videoEffects= */ ImmutableList.of());
EditedMediaItem stereo48000Audio =
new EditedMediaItem.Builder(
MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW_STEREO_48000KHZ))
.setEffects(highPitch)
.build();
EditedMediaItem mono44100Audio =
new EditedMediaItem.Builder(MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW))
.setEffects(highPitch)
.build();
EditedMediaItemSequence editedMediaItemSequence =
new EditedMediaItemSequence(ImmutableList.of(stereo48000Audio, mono44100Audio));
Composition composition =
new Composition.Builder(ImmutableList.of(editedMediaItemSequence)).build();
transformer.start(composition, outputPath);
TransformerTestRunner.runLooper(transformer);
DumpFileAsserts.assertOutput(
context,
checkNotNull(testMuxerHolder.testMuxer),
getDumpFileName(FILE_AUDIO_RAW_STEREO_48000KHZ + "-high_pitch_then_sample.wav-high_pitch"));
}
@Test
public void concatenateTwoAudioItems_withDiffFormatAndDiffEffects_completesSuccessfully()
throws Exception {
Transformer transformer =
createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build();
EditedMediaItem stereo48000AudioHighPitch =
new EditedMediaItem.Builder(
MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW_STEREO_48000KHZ))
.setEffects(
new Effects(
ImmutableList.of(createPitchChangingAudioProcessor(/* pitch= */ 2f)),
/* videoEffects= */ ImmutableList.of()))
.build();
EditedMediaItem mono44100AudioLowPitch =
new EditedMediaItem.Builder(MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW))
.setEffects(
new Effects(
ImmutableList.of(createPitchChangingAudioProcessor(/* pitch= */ 0.5f)),
/* videoEffects= */ ImmutableList.of()))
.build();
EditedMediaItemSequence editedMediaItemSequence =
new EditedMediaItemSequence(
ImmutableList.of(stereo48000AudioHighPitch, mono44100AudioLowPitch));
Composition composition =
new Composition.Builder(ImmutableList.of(editedMediaItemSequence)).build();
transformer.start(composition, outputPath);
TransformerTestRunner.runLooper(transformer);
DumpFileAsserts.assertOutput(
context,
checkNotNull(testMuxerHolder.testMuxer),
getDumpFileName(FILE_AUDIO_RAW_STEREO_48000KHZ + "-high_pitch_then_sample.wav-low_pitch"));
}
} }

View File

@ -153,6 +153,7 @@ public final class TestUtil {
public static final String FILE_AUDIO_VIDEO_INCREASING_TIMESTAMPS_15S = public static final String FILE_AUDIO_VIDEO_INCREASING_TIMESTAMPS_15S =
"mp4/sample_with_increasing_timestamps_320w_240h.mp4"; "mp4/sample_with_increasing_timestamps_320w_240h.mp4";
public static final String FILE_AUDIO_RAW = "wav/sample.wav"; public static final String FILE_AUDIO_RAW = "wav/sample.wav";
public static final String FILE_AUDIO_RAW_STEREO_48000KHZ = "wav/sample_rf64.wav";
public static final String FILE_WITH_SUBTITLES = "mkv/sample_with_srt.mkv"; public static final String FILE_WITH_SUBTITLES = "mkv/sample_with_srt.mkv";
public static final String FILE_WITH_SEF_SLOW_MOTION = "mp4/sample_sef_slow_motion.mp4"; public static final String FILE_WITH_SEF_SLOW_MOTION = "mp4/sample_sef_slow_motion.mp4";
public static final String FILE_AUDIO_UNSUPPORTED_BY_DECODER = "amr/sample_wb.amr"; public static final String FILE_AUDIO_UNSUPPORTED_BY_DECODER = "amr/sample_wb.amr";
@ -168,8 +169,8 @@ public final class TestUtil {
public static void createEncodersAndDecoders() { public static void createEncodersAndDecoders() {
ShadowMediaCodec.CodecConfig codecConfig = ShadowMediaCodec.CodecConfig codecConfig =
new ShadowMediaCodec.CodecConfig( new ShadowMediaCodec.CodecConfig(
/* inputBufferSize= */ 10_000, /* inputBufferSize= */ 100_000,
/* outputBufferSize= */ 10_000, /* outputBufferSize= */ 100_000,
/* codec= */ (in, out) -> out.put(in)); /* codec= */ (in, out) -> out.put(in));
addCodec( addCodec(
MimeTypes.AUDIO_AAC, MimeTypes.AUDIO_AAC,
@ -194,8 +195,8 @@ public final class TestUtil {
ShadowMediaCodec.CodecConfig throwingCodecConfig = ShadowMediaCodec.CodecConfig throwingCodecConfig =
new ShadowMediaCodec.CodecConfig( new ShadowMediaCodec.CodecConfig(
/* inputBufferSize= */ 10_000, /* inputBufferSize= */ 100_000,
/* outputBufferSize= */ 10_000, /* outputBufferSize= */ 100_000,
new ShadowMediaCodec.CodecConfig.Codec() { new ShadowMediaCodec.CodecConfig.Codec() {
@Override @Override