Add dump tests for concatenating 2 audio items.

Audio only tests are now using RAW audio where possible, which is
passed through the Robolectric decoders/encoders, and can be handled by
the AudioProcessor instances accurately.

PiperOrigin-RevId: 541648853
This commit is contained in:
samrobinson 2023-06-19 16:19:31 +01:00 committed by Ian Baker
parent 8d8c514d12
commit 6d648f8bdb
5 changed files with 348 additions and 229 deletions

View File

@ -1,207 +0,0 @@
format 0:
averageBitrate = 131072
sampleMimeType = audio/mp4a-latm
channelCount = 1
sampleRate = 44100
pcmEncoding = 2
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0], xyz: latitude=40.68, longitude=-74.5, Mp4Timestamp: creation time=3547558895, modification time=3547558895, timescale=1000]
container metadata = entries=[TSSE: description=null: values=[Lavf56.1.0], xyz: latitude=40.68, longitude=-74.5, Mp4Timestamp: creation time=3547558895, modification time=3547558895, timescale=1000]
sample:
trackIndex = 0
dataHashCode = 915609509
size = 792
isKeyFrame = true
presentationTimeUs = 0
sample:
trackIndex = 0
dataHashCode = -1580893866
size = 678
isKeyFrame = true
presentationTimeUs = 8979
sample:
trackIndex = 0
dataHashCode = -31547651
size = 304
isKeyFrame = true
presentationTimeUs = 16666
sample:
trackIndex = 0
dataHashCode = 1415140636
size = 460
isKeyFrame = true
presentationTimeUs = 20113
sample:
trackIndex = 0
dataHashCode = 1721060815
size = 850
isKeyFrame = true
presentationTimeUs = 25328
sample:
trackIndex = 0
dataHashCode = 1707913464
size = 446
isKeyFrame = true
presentationTimeUs = 34965
sample:
trackIndex = 0
dataHashCode = -776771764
size = 852
isKeyFrame = true
presentationTimeUs = 40022
sample:
trackIndex = 0
dataHashCode = -609146892
size = 368
isKeyFrame = true
presentationTimeUs = 49682
sample:
trackIndex = 0
dataHashCode = -2044977387
size = 1166
isKeyFrame = true
presentationTimeUs = 53854
sample:
trackIndex = 0
dataHashCode = -753877175
size = 506
isKeyFrame = true
presentationTimeUs = 67074
sample:
trackIndex = 0
dataHashCode = 1491046836
size = 578
isKeyFrame = true
presentationTimeUs = 72811
sample:
trackIndex = 0
dataHashCode = 621394572
size = 668
isKeyFrame = true
presentationTimeUs = 79365
sample:
trackIndex = 0
dataHashCode = -58393202
size = 268
isKeyFrame = true
presentationTimeUs = 86938
sample:
trackIndex = 0
dataHashCode = 1253593269
size = 318
isKeyFrame = true
presentationTimeUs = 89977
sample:
trackIndex = 0
dataHashCode = -1544714160
size = 424
isKeyFrame = true
presentationTimeUs = 93582
sample:
trackIndex = 0
dataHashCode = -2038565545
size = 294
isKeyFrame = true
presentationTimeUs = 98390
sample:
trackIndex = 0
dataHashCode = 803611858
size = 394
isKeyFrame = true
presentationTimeUs = 101723
sample:
trackIndex = 0
dataHashCode = 890682839
size = 812
isKeyFrame = true
presentationTimeUs = 106190
sample:
trackIndex = 0
dataHashCode = 1798765816
size = 332
isKeyFrame = true
presentationTimeUs = 115396
sample:
trackIndex = 0
dataHashCode = -155329417
size = 250
isKeyFrame = true
presentationTimeUs = 119160
sample:
trackIndex = 0
dataHashCode = 2061435630
size = 304
isKeyFrame = true
presentationTimeUs = 121995
sample:
trackIndex = 0
dataHashCode = -667770092
size = 1318
isKeyFrame = true
presentationTimeUs = 125442
sample:
trackIndex = 0
dataHashCode = 1947321516
size = 224
isKeyFrame = true
presentationTimeUs = 140385
sample:
trackIndex = 0
dataHashCode = 1744495738
size = 446
isKeyFrame = true
presentationTimeUs = 142925
sample:
trackIndex = 0
dataHashCode = 801488010
size = 838
isKeyFrame = true
presentationTimeUs = 147981
sample:
trackIndex = 0
dataHashCode = -867204691
size = 520
isKeyFrame = true
presentationTimeUs = 157482
sample:
trackIndex = 0
dataHashCode = 1994555264
size = 230
isKeyFrame = true
presentationTimeUs = 163378
sample:
trackIndex = 0
dataHashCode = -748724753
size = 380
isKeyFrame = true
presentationTimeUs = 165986
sample:
trackIndex = 0
dataHashCode = -1557661843
size = 692
isKeyFrame = true
presentationTimeUs = 170294
sample:
trackIndex = 0
dataHashCode = 461522726
size = 270
isKeyFrame = true
presentationTimeUs = 178140
sample:
trackIndex = 0
dataHashCode = 1058760091
size = 238
isKeyFrame = true
presentationTimeUs = 181201
sample:
trackIndex = 0
dataHashCode = 1541647596
size = 722
isKeyFrame = true
presentationTimeUs = 183900
sample:
trackIndex = 0
dataHashCode = -2107816707
size = 2062
isKeyFrame = true
presentationTimeUs = 192086
released = true

View File

@ -0,0 +1,127 @@
format 0:
averageBitrate = 131072
sampleMimeType = audio/mp4a-latm
channelCount = 1
sampleRate = 44100
pcmEncoding = 2
sample:
trackIndex = 0
dataHashCode = -85819864
size = 8820
isKeyFrame = true
presentationTimeUs = 0
sample:
trackIndex = 0
dataHashCode = 566487491
size = 8820
isKeyFrame = true
presentationTimeUs = 100000
sample:
trackIndex = 0
dataHashCode = -1256531710
size = 8820
isKeyFrame = true
presentationTimeUs = 200000
sample:
trackIndex = 0
dataHashCode = 793455796
size = 8820
isKeyFrame = true
presentationTimeUs = 300000
sample:
trackIndex = 0
dataHashCode = -268235582
size = 8820
isKeyFrame = true
presentationTimeUs = 400000
sample:
trackIndex = 0
dataHashCode = -8136122
size = 8820
isKeyFrame = true
presentationTimeUs = 500000
sample:
trackIndex = 0
dataHashCode = 1750866613
size = 8820
isKeyFrame = true
presentationTimeUs = 600000
sample:
trackIndex = 0
dataHashCode = -1100753636
size = 8820
isKeyFrame = true
presentationTimeUs = 700000
sample:
trackIndex = 0
dataHashCode = 507833230
size = 8820
isKeyFrame = true
presentationTimeUs = 800000
sample:
trackIndex = 0
dataHashCode = 1472467506
size = 8820
isKeyFrame = true
presentationTimeUs = 900000
sample:
trackIndex = 0
dataHashCode = -85819864
size = 8820
isKeyFrame = true
presentationTimeUs = 1000000
sample:
trackIndex = 0
dataHashCode = 566487491
size = 8820
isKeyFrame = true
presentationTimeUs = 1100000
sample:
trackIndex = 0
dataHashCode = -1256531710
size = 8820
isKeyFrame = true
presentationTimeUs = 1200000
sample:
trackIndex = 0
dataHashCode = 793455796
size = 8820
isKeyFrame = true
presentationTimeUs = 1300000
sample:
trackIndex = 0
dataHashCode = -268235582
size = 8820
isKeyFrame = true
presentationTimeUs = 1400000
sample:
trackIndex = 0
dataHashCode = -8136122
size = 8820
isKeyFrame = true
presentationTimeUs = 1500000
sample:
trackIndex = 0
dataHashCode = 1750866613
size = 8820
isKeyFrame = true
presentationTimeUs = 1600000
sample:
trackIndex = 0
dataHashCode = -1100753636
size = 8820
isKeyFrame = true
presentationTimeUs = 1700000
sample:
trackIndex = 0
dataHashCode = 507833230
size = 8820
isKeyFrame = true
presentationTimeUs = 1800000
sample:
trackIndex = 0
dataHashCode = 1472467506
size = 8820
isKeyFrame = true
presentationTimeUs = 1900000
released = true

View File

@ -0,0 +1,133 @@
format 0:
averageBitrate = 131072
sampleMimeType = audio/mp4a-latm
channelCount = 1
sampleRate = 44100
pcmEncoding = 2
sample:
trackIndex = 0
dataHashCode = 2042155098
size = 6482
isKeyFrame = true
presentationTimeUs = 0
sample:
trackIndex = 0
dataHashCode = 1417355469
size = 8556
isKeyFrame = true
presentationTimeUs = 73492
sample:
trackIndex = 0
dataHashCode = -2107697498
size = 8754
isKeyFrame = true
presentationTimeUs = 170498
sample:
trackIndex = 0
dataHashCode = 736072795
size = 8908
isKeyFrame = true
presentationTimeUs = 269750
sample:
trackIndex = 0
dataHashCode = -1913553170
size = 9208
isKeyFrame = true
presentationTimeUs = 370748
sample:
trackIndex = 0
dataHashCode = 13583718
size = 8968
isKeyFrame = true
presentationTimeUs = 475147
sample:
trackIndex = 0
dataHashCode = -1444602526
size = 8588
isKeyFrame = true
presentationTimeUs = 576825
sample:
trackIndex = 0
dataHashCode = -1693065958
size = 8778
isKeyFrame = true
presentationTimeUs = 674195
sample:
trackIndex = 0
dataHashCode = 2071205641
size = 8736
isKeyFrame = true
presentationTimeUs = 773718
sample:
trackIndex = 0
dataHashCode = 1433538831
size = 8636
isKeyFrame = true
presentationTimeUs = 872766
sample:
trackIndex = 0
dataHashCode = -949798077
size = 9424
isKeyFrame = true
presentationTimeUs = 970680
sample:
trackIndex = 0
dataHashCode = -1275686831
size = 8088
isKeyFrame = true
presentationTimeUs = 1077528
sample:
trackIndex = 0
dataHashCode = -1360039206
size = 8858
isKeyFrame = true
presentationTimeUs = 1169229
sample:
trackIndex = 0
dataHashCode = 736072795
size = 8908
isKeyFrame = true
presentationTimeUs = 1269659
sample:
trackIndex = 0
dataHashCode = -1913553170
size = 9208
isKeyFrame = true
presentationTimeUs = 1370657
sample:
trackIndex = 0
dataHashCode = 13583718
size = 8968
isKeyFrame = true
presentationTimeUs = 1475056
sample:
trackIndex = 0
dataHashCode = -1444602526
size = 8588
isKeyFrame = true
presentationTimeUs = 1576734
sample:
trackIndex = 0
dataHashCode = -1693065958
size = 8778
isKeyFrame = true
presentationTimeUs = 1674104
sample:
trackIndex = 0
dataHashCode = 2071205641
size = 8736
isKeyFrame = true
presentationTimeUs = 1773628
sample:
trackIndex = 0
dataHashCode = 1433538831
size = 8636
isKeyFrame = true
presentationTimeUs = 1872675
sample:
trackIndex = 0
dataHashCode = 992130724
size = 2580
isKeyFrame = true
presentationTimeUs = 1970589
released = true

View File

@ -18,9 +18,11 @@ package androidx.media3.transformer;
import static androidx.media3.common.util.Assertions.checkNotNull;
import static androidx.media3.transformer.TestUtil.ASSET_URI_PREFIX;
import static androidx.media3.transformer.TestUtil.FILE_AUDIO_RAW;
import static androidx.media3.transformer.TestUtil.FILE_AUDIO_VIDEO;
import static androidx.media3.transformer.TestUtil.FILE_AUDIO_VIDEO_INCREASING_TIMESTAMPS_15S;
import static androidx.media3.transformer.TestUtil.createEncodersAndDecoders;
import static androidx.media3.transformer.TestUtil.createPitchChangingAudioProcessor;
import static androidx.media3.transformer.TestUtil.createTransformerBuilder;
import static androidx.media3.transformer.TestUtil.getDumpFileName;
import static androidx.media3.transformer.TestUtil.removeEncodersAndDecoders;
@ -40,12 +42,20 @@ import java.nio.file.Files;
import java.nio.file.Paths;
import org.junit.After;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
/**
* End-to-end test for exporting a single {@link EditedMediaItemSequence} containing multiple {@link
* EditedMediaItem} instances with {@link Transformer}.
*
* <p>Video tracks can not be processed by Robolectric, as the muxer audio/video interleaving means
* it waits for more audio samples before writing video samples. Robolectric decoders (currently)
* just copy input buffers to the output. Audio timestamps are computed based on the amount of data
* passed through (see [internal: b/178685617]), so are much smaller than expected because they are
* based on encoded samples. As a result, input files with video and audio must either remove or
* transmux the video.
*/
@RunWith(AndroidJUnit4.class)
public final class SequenceExportTest {
@ -97,8 +107,7 @@ public final class SequenceExportTest {
Transformer transformer =
createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build();
MediaItem mediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_VIDEO);
SonicAudioProcessor sonicAudioProcessor = new SonicAudioProcessor();
sonicAudioProcessor.setPitch(2f);
SonicAudioProcessor sonicAudioProcessor = createPitchChangingAudioProcessor(/* pitch= */ 2f);
Effect videoEffect = RgbFilter.createGrayscaleFilter();
Effects effects =
new Effects(ImmutableList.of(sonicAudioProcessor), ImmutableList.of(videoEffect));
@ -172,11 +181,11 @@ public final class SequenceExportTest {
Transformer transformer =
createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build();
MediaItem mediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_VIDEO);
EditedMediaItem noAudioEditedMediaItem =
EditedMediaItem videoOnlyMediaItem =
new EditedMediaItem.Builder(mediaItem).setRemoveAudio(true).build();
EditedMediaItem audioEditedMediaItem = new EditedMediaItem.Builder(mediaItem).build();
EditedMediaItem audioVideoMediaItem = new EditedMediaItem.Builder(mediaItem).build();
EditedMediaItemSequence sequence =
new EditedMediaItemSequence(ImmutableList.of(noAudioEditedMediaItem, audioEditedMediaItem));
new EditedMediaItemSequence(ImmutableList.of(videoOnlyMediaItem, audioVideoMediaItem));
Composition composition =
new Composition.Builder(ImmutableList.of(sequence))
.experimentalSetForceAudioTrack(true)
@ -198,8 +207,7 @@ public final class SequenceExportTest {
Transformer transformer =
createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build();
MediaItem mediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_VIDEO);
SonicAudioProcessor sonicAudioProcessor = new SonicAudioProcessor();
sonicAudioProcessor.setPitch(2f);
SonicAudioProcessor sonicAudioProcessor = createPitchChangingAudioProcessor(/* pitch= */ 2f);
Effects effects =
new Effects(ImmutableList.of(sonicAudioProcessor), /* videoEffects= */ ImmutableList.of());
EditedMediaItem noAudioEditedMediaItem =
@ -224,22 +232,11 @@ public final class SequenceExportTest {
}
@Test
public void start_concatenateSameAudioItemWithEffects_completesSuccessfully() throws Exception {
public void start_concatenateSameAudioItem_completesSuccessfully() throws Exception {
Transformer transformer =
createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build();
MediaItem mediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_VIDEO);
SonicAudioProcessor sonicAudioProcessor = new SonicAudioProcessor();
sonicAudioProcessor.setPitch(2f);
Effects effects =
new Effects(ImmutableList.of(sonicAudioProcessor), /* videoEffects= */ ImmutableList.of());
// The video track must be removed in order for the export to end. Indeed, the
// Robolectric decoder just copies the input buffers to the output and the audio timestamps are
// therefore computed based on the encoded samples (see [internal: b/178685617]). As a result,
// the audio timestamps are much smaller than they should be and the muxer waits for more audio
// samples before writing video samples.
EditedMediaItem editedMediaItem =
new EditedMediaItem.Builder(mediaItem).setEffects(effects).setRemoveVideo(true).build();
MediaItem audioOnlyMediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW);
EditedMediaItem editedMediaItem = new EditedMediaItem.Builder(audioOnlyMediaItem).build();
EditedMediaItemSequence editedMediaItemSequence =
new EditedMediaItemSequence(ImmutableList.of(editedMediaItem, editedMediaItem));
Composition composition =
@ -251,6 +248,68 @@ public final class SequenceExportTest {
DumpFileAsserts.assertOutput(
context,
checkNotNull(testMuxerHolder.testMuxer),
getDumpFileName(FILE_AUDIO_VIDEO + ".concatenated_audio_high_pitch"));
getDumpFileName(FILE_AUDIO_RAW + ".concatenated"));
}
@Test
public void start_concatenateSameAudioItemWithEffects_completesSuccessfully() throws Exception {
Transformer transformer =
createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build();
MediaItem audioOnlyMediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW);
SonicAudioProcessor sonicAudioProcessor = createPitchChangingAudioProcessor(/* pitch= */ 2f);
Effects effects =
new Effects(ImmutableList.of(sonicAudioProcessor), /* videoEffects= */ ImmutableList.of());
EditedMediaItem editedMediaItem =
new EditedMediaItem.Builder(audioOnlyMediaItem).setEffects(effects).build();
EditedMediaItemSequence editedMediaItemSequence =
new EditedMediaItemSequence(ImmutableList.of(editedMediaItem, editedMediaItem));
Composition composition =
new Composition.Builder(ImmutableList.of(editedMediaItemSequence)).build();
transformer.start(composition, outputPath);
TransformerTestRunner.runLooper(transformer);
DumpFileAsserts.assertOutput(
context,
checkNotNull(testMuxerHolder.testMuxer),
getDumpFileName(FILE_AUDIO_RAW + ".concatenated_high_pitch"));
}
@Test
@Ignore("Handle MediaItem effects changes (See [internal: b/274093424]).")
public void start_concatenateSameAudioItemWithDifferentEffects_completesSuccessfully()
throws Exception {
Transformer transformer =
createTransformerBuilder(testMuxerHolder, /* enableFallback= */ false).build();
MediaItem audioOnlyMediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW);
Effects highPitchEffects =
new Effects(
ImmutableList.of(createPitchChangingAudioProcessor(/* pitch= */ 2f)),
/* videoEffects= */ ImmutableList.of());
EditedMediaItem highPitchMediaItem =
new EditedMediaItem.Builder(audioOnlyMediaItem)
.setRemoveVideo(true)
.setEffects(highPitchEffects)
.build();
Effects lowPitchEffects =
new Effects(
ImmutableList.of(createPitchChangingAudioProcessor(/* pitch= */ 0.5f)),
/* videoEffects= */ ImmutableList.of());
EditedMediaItem lowPitchMediaItem =
new EditedMediaItem.Builder(audioOnlyMediaItem)
.setRemoveVideo(true)
.setEffects(lowPitchEffects)
.build();
EditedMediaItemSequence sequence =
new EditedMediaItemSequence(ImmutableList.of(highPitchMediaItem, lowPitchMediaItem));
Composition composition = new Composition.Builder(ImmutableList.of(sequence)).build();
transformer.start(composition, outputPath);
TransformerTestRunner.runLooper(transformer);
DumpFileAsserts.assertOutput(
context,
checkNotNull(testMuxerHolder.testMuxer),
getDumpFileName(FILE_AUDIO_RAW + ".high_pitch_then_low_pitch"));
}
}

View File

@ -24,6 +24,7 @@ import androidx.annotation.Nullable;
import androidx.media3.common.C;
import androidx.media3.common.Format;
import androidx.media3.common.MimeTypes;
import androidx.media3.common.audio.SonicAudioProcessor;
import androidx.media3.common.util.UnstableApi;
import androidx.media3.common.util.Util;
import androidx.media3.test.utils.FakeClock;
@ -239,6 +240,12 @@ public final class TestUtil {
new DefaultEncoderFactory.Builder(context).setEnableFallback(enableFallback).build());
}
public static SonicAudioProcessor createPitchChangingAudioProcessor(float pitch) {
SonicAudioProcessor sonicAudioProcessor = new SonicAudioProcessor();
sonicAudioProcessor.setPitch(pitch);
return sonicAudioProcessor;
}
public static String getDumpFileName(String originalFileName) {
return DUMP_FILE_OUTPUT_DIRECTORY + '/' + originalFileName + '.' + DUMP_FILE_EXTENSION;
}