Allow any pcm encoding raw input pre-effects in AudioGraphInput.

AudioGraphInput now accepts a range of inputs, as long as the effects
provided modify the audio to be int 16.

As part of this, add the workaround to DefaultCodec to ensure pcm
encoding is correct, and remove parameterized tests that are not valid.

PiperOrigin-RevId: 631404152
This commit is contained in:
samrobinson 2024-05-07 06:36:02 -07:00 committed by Copybara-Service
parent 16df05ec29
commit 9942255894
9 changed files with 55 additions and 804 deletions

View File

@ -1,328 +0,0 @@
format audio:
averageBitrate = 131072
sampleMimeType = audio/mp4a-latm
channelCount = 1
sampleRate = 44100
pcmEncoding = 2
metadata = entries=[xyz: latitude=40.68, longitude=-74.5, Mp4Timestamp: creation time=0, modification time=0, timescale=1000]
format video:
id = 1
sampleMimeType = video/avc
codecs = avc1.64001F
maxInputSize = 36722
width = 1080
height = 720
frameRate = 29.970028
colorInfo:
lumaBitdepth = 8
chromaBitdepth = 8
metadata = entries=[xyz: latitude=40.68, longitude=-74.5, Mp4Timestamp: creation time=0, modification time=0, timescale=1000]
initializationData:
data = length 29, hash 4746B5D9
data = length 10, hash 7A0D0F2B
container metadata:
entry = Mp4Timestamp: creation time=0, modification time=0, timescale=1000
entry = Mp4Timestamp: creation time=0, modification time=0, timescale=1000
entry = xyz: latitude=40.68, longitude=-74.5
entry = xyz: latitude=40.68, longitude=-74.5
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 413799331
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1108115494
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -1885521440
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1112600185
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -377617286
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1174897446
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -652005356
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -1841655008
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1815238174
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -1745464927
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -969133005
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 943244167
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 981350607
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -1886507040
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -502968940
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1607535982
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -1557286731
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1932764288
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -480251183
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1508860307
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -1713708904
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 2107276453
size = 2282
isKeyFrame = true
sample:
trackType = video
dataHashCode = -770308242
size = 36692
isKeyFrame = true
presentationTimeUs = 0
sample:
trackType = video
dataHashCode = -732087136
size = 5312
isKeyFrame = false
presentationTimeUs = 66733
sample:
trackType = video
dataHashCode = 468156717
size = 599
isKeyFrame = false
presentationTimeUs = 33366
sample:
trackType = video
dataHashCode = 1150349584
size = 7735
isKeyFrame = false
presentationTimeUs = 200200
sample:
trackType = video
dataHashCode = 1443582006
size = 987
isKeyFrame = false
presentationTimeUs = 133466
sample:
trackType = video
dataHashCode = -310585145
size = 673
isKeyFrame = false
presentationTimeUs = 100100
sample:
trackType = video
dataHashCode = 807460688
size = 523
isKeyFrame = false
presentationTimeUs = 166833
sample:
trackType = video
dataHashCode = 1936487090
size = 6061
isKeyFrame = false
presentationTimeUs = 333666
sample:
trackType = video
dataHashCode = -32297181
size = 992
isKeyFrame = false
presentationTimeUs = 266933
sample:
trackType = video
dataHashCode = 1529616406
size = 623
isKeyFrame = false
presentationTimeUs = 233566
sample:
trackType = video
dataHashCode = 1949198785
size = 421
isKeyFrame = false
presentationTimeUs = 300300
sample:
trackType = video
dataHashCode = -147880287
size = 4899
isKeyFrame = false
presentationTimeUs = 433766
sample:
trackType = video
dataHashCode = 1369083472
size = 568
isKeyFrame = false
presentationTimeUs = 400400
sample:
trackType = video
dataHashCode = 965782073
size = 620
isKeyFrame = false
presentationTimeUs = 367033
sample:
trackType = video
dataHashCode = -261176150
size = 5450
isKeyFrame = false
presentationTimeUs = 567233
sample:
trackType = video
dataHashCode = -1830836678
size = 1051
isKeyFrame = false
presentationTimeUs = 500500
sample:
trackType = video
dataHashCode = 1767407540
size = 874
isKeyFrame = false
presentationTimeUs = 467133
sample:
trackType = video
dataHashCode = 918440283
size = 781
isKeyFrame = false
presentationTimeUs = 533866
sample:
trackType = video
dataHashCode = -1408463661
size = 4725
isKeyFrame = false
presentationTimeUs = 700700
sample:
trackType = video
dataHashCode = 1569455924
size = 1022
isKeyFrame = false
presentationTimeUs = 633966
sample:
trackType = video
dataHashCode = -1723778407
size = 790
isKeyFrame = false
presentationTimeUs = 600600
sample:
trackType = video
dataHashCode = 1578275472
size = 610
isKeyFrame = false
presentationTimeUs = 667333
sample:
trackType = video
dataHashCode = 1989768395
size = 2751
isKeyFrame = false
presentationTimeUs = 834166
sample:
trackType = video
dataHashCode = -1215674502
size = 745
isKeyFrame = false
presentationTimeUs = 767433
sample:
trackType = video
dataHashCode = -814473606
size = 621
isKeyFrame = false
presentationTimeUs = 734066
sample:
trackType = video
dataHashCode = 498370894
size = 505
isKeyFrame = false
presentationTimeUs = 800800
sample:
trackType = video
dataHashCode = -1051506468
size = 1268
isKeyFrame = false
presentationTimeUs = 967633
sample:
trackType = video
dataHashCode = -1025604144
size = 880
isKeyFrame = false
presentationTimeUs = 900900
sample:
trackType = video
dataHashCode = -913586520
size = 530
isKeyFrame = false
presentationTimeUs = 867533
sample:
trackType = video
dataHashCode = 1340459242
size = 568
isKeyFrame = false
presentationTimeUs = 934266
released = true

View File

@ -1,440 +0,0 @@
format audio:
averageBitrate = 131072
sampleMimeType = audio/mp4a-latm
channelCount = 2
sampleRate = 44100
pcmEncoding = 2
format video:
id = 1
sampleMimeType = video/avc
codecs = avc1.64001F
maxInputSize = 36722
width = 1080
height = 720
frameRate = 29.970028
colorInfo:
lumaBitdepth = 8
chromaBitdepth = 8
metadata = entries=[xyz: latitude=40.68, longitude=-74.5, Mp4Timestamp: creation time=0, modification time=0, timescale=1000]
initializationData:
data = length 29, hash 4746B5D9
data = length 10, hash 7A0D0F2B
container metadata:
entry = Mp4Timestamp: creation time=0, modification time=0, timescale=1000
entry = xyz: latitude=40.68, longitude=-74.5
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = -1355500415
size = 412
isKeyFrame = true
sample:
trackType = video
dataHashCode = -770308242
size = 36692
isKeyFrame = true
presentationTimeUs = 0
sample:
trackType = video
dataHashCode = -732087136
size = 5312
isKeyFrame = false
presentationTimeUs = 66733
sample:
trackType = video
dataHashCode = 468156717
size = 599
isKeyFrame = false
presentationTimeUs = 33366
sample:
trackType = video
dataHashCode = 1150349584
size = 7735
isKeyFrame = false
presentationTimeUs = 200200
sample:
trackType = video
dataHashCode = 1443582006
size = 987
isKeyFrame = false
presentationTimeUs = 133466
sample:
trackType = video
dataHashCode = -310585145
size = 673
isKeyFrame = false
presentationTimeUs = 100100
sample:
trackType = video
dataHashCode = 807460688
size = 523
isKeyFrame = false
presentationTimeUs = 166833
sample:
trackType = video
dataHashCode = 1936487090
size = 6061
isKeyFrame = false
presentationTimeUs = 333666
sample:
trackType = video
dataHashCode = -32297181
size = 992
isKeyFrame = false
presentationTimeUs = 266933
sample:
trackType = video
dataHashCode = 1529616406
size = 623
isKeyFrame = false
presentationTimeUs = 233566
sample:
trackType = video
dataHashCode = 1949198785
size = 421
isKeyFrame = false
presentationTimeUs = 300300
sample:
trackType = video
dataHashCode = -147880287
size = 4899
isKeyFrame = false
presentationTimeUs = 433766
sample:
trackType = video
dataHashCode = 1369083472
size = 568
isKeyFrame = false
presentationTimeUs = 400400
sample:
trackType = video
dataHashCode = 965782073
size = 620
isKeyFrame = false
presentationTimeUs = 367033
sample:
trackType = video
dataHashCode = -261176150
size = 5450
isKeyFrame = false
presentationTimeUs = 567233
sample:
trackType = video
dataHashCode = -1830836678
size = 1051
isKeyFrame = false
presentationTimeUs = 500500
sample:
trackType = video
dataHashCode = 1767407540
size = 874
isKeyFrame = false
presentationTimeUs = 467133
sample:
trackType = video
dataHashCode = 918440283
size = 781
isKeyFrame = false
presentationTimeUs = 533866
sample:
trackType = video
dataHashCode = -1408463661
size = 4725
isKeyFrame = false
presentationTimeUs = 700700
sample:
trackType = video
dataHashCode = 1569455924
size = 1022
isKeyFrame = false
presentationTimeUs = 633966
sample:
trackType = video
dataHashCode = -1723778407
size = 790
isKeyFrame = false
presentationTimeUs = 600600
sample:
trackType = video
dataHashCode = 1578275472
size = 610
isKeyFrame = false
presentationTimeUs = 667333
sample:
trackType = video
dataHashCode = 1989768395
size = 2751
isKeyFrame = false
presentationTimeUs = 834166
sample:
trackType = video
dataHashCode = -1215674502
size = 745
isKeyFrame = false
presentationTimeUs = 767433
sample:
trackType = video
dataHashCode = -814473606
size = 621
isKeyFrame = false
presentationTimeUs = 734066
sample:
trackType = video
dataHashCode = 498370894
size = 505
isKeyFrame = false
presentationTimeUs = 800800
sample:
trackType = video
dataHashCode = -1051506468
size = 1268
isKeyFrame = false
presentationTimeUs = 967633
sample:
trackType = video
dataHashCode = -1025604144
size = 880
isKeyFrame = false
presentationTimeUs = 900900
sample:
trackType = video
dataHashCode = -913586520
size = 530
isKeyFrame = false
presentationTimeUs = 867533
sample:
trackType = video
dataHashCode = 1340459242
size = 568
isKeyFrame = false
presentationTimeUs = 934266
released = true

View File

@ -37,112 +37,112 @@ sample:
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = -678086945
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = -1880516932
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = 143968770
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = 472444105
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = 79747624
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = 228396668
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = 196615182
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = -932953854
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = -1617231932
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = -568537823
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = -188600433
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = 391469691
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = -1831929805
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = 637762178
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = 283314062
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = -1143099212
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = 1693519821
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = -270204830
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = -678661135
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = 1579425647
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1742602241
dataHashCode = -114252150
size = 4096
isKeyFrame = true
sample:
trackType = audio
dataHashCode = 1340921537
dataHashCode = -363894499
size = 2282
isKeyFrame = true
sample:

View File

@ -64,9 +64,7 @@ import java.util.Objects;
/** Returns whether an {@link AudioFormat} is valid as an input format. */
public static boolean isInputAudioFormatValid(AudioFormat format) {
// AudioGraphInput assumes PCM_16BIT -- see, for example, the automatic format conversions
// in AudioGraphInput.configureProcessing.
if (format.encoding != C.ENCODING_PCM_16BIT) {
if (format.encoding == Format.NO_VALUE) {
return false;
}
if (format.sampleRate == Format.NO_VALUE) {

View File

@ -105,6 +105,8 @@ import java.util.concurrent.atomic.AtomicReference;
// APP configuration not active until flush called. getOutputAudioFormat based on active config.
audioProcessingPipeline.flush();
outputAudioFormat = audioProcessingPipeline.getOutputAudioFormat();
checkArgument(
outputAudioFormat.encoding == C.ENCODING_PCM_16BIT, /* errorMessage= */ outputAudioFormat);
startTimeUs = new AtomicLong(C.TIME_UNSET);
}

View File

@ -364,7 +364,11 @@ public final class DefaultCodec implements Codec {
// The raw audio decoder incorrectly sets the channel count for output format to stereo.
if (isDecoder && Objects.equals(configurationFormat.sampleMimeType, MimeTypes.AUDIO_RAW)) {
outputFormat =
outputFormat.buildUpon().setChannelCount(configurationFormat.channelCount).build();
outputFormat
.buildUpon()
.setChannelCount(configurationFormat.channelCount)
.setPcmEncoding(configurationFormat.pcmEncoding)
.build();
}
}
return false;

View File

@ -51,15 +51,6 @@ public class AudioGraphTest {
private static final AudioFormat SURROUND_50000 =
new AudioFormat(/* sampleRate= */ 50_000, /* channelCount= */ 6, C.ENCODING_PCM_16BIT);
@Test
public void floatPcmFormat_isNotValidInputFormat() {
assertThat(
AudioGraph.isInputAudioFormatValid(
new AudioFormat(
/* sampleRate= */ 44_100, /* channelCount= */ 1, C.ENCODING_PCM_FLOAT)))
.isFalse();
}
@Test
public void silentItem_outputsCorrectAmountOfBytes() throws Exception {
AudioGraph audioGraph =

View File

@ -70,6 +70,7 @@ import androidx.media3.common.Format;
import androidx.media3.common.MediaItem;
import androidx.media3.common.MimeTypes;
import androidx.media3.common.audio.SonicAudioProcessor;
import androidx.media3.common.audio.ToInt16PcmAudioProcessor;
import androidx.media3.effect.Contrast;
import androidx.media3.effect.Presentation;
import androidx.media3.effect.ScaleAndRotateTransformation;
@ -542,6 +543,29 @@ public final class MediaItemExportTest {
getDumpFileName(/* originalFileName= */ FILE_AUDIO_RAW, /* modifications...= */ "48000hz"));
}
@Test
public void start_withRawBigEndianAudioInput_completesSuccessfully() throws Exception {
CapturingMuxer.Factory muxerFactory = new CapturingMuxer.Factory(/* handleAudioAsPcm= */ true);
ToInt16PcmAudioProcessor toInt16PcmAudioProcessor = new ToInt16PcmAudioProcessor();
Transformer transformer =
createTransformerBuilder(muxerFactory, /* enableFallback= */ false).build();
MediaItem mediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + "mp4/sample_twos_pcm.mp4");
EditedMediaItem editedMediaItem =
new EditedMediaItem.Builder(mediaItem)
.setEffects(createAudioEffects(toInt16PcmAudioProcessor))
.build();
transformer.start(editedMediaItem, outputDir.newFile().getPath());
TransformerTestRunner.runLooper(transformer);
DumpFileAsserts.assertOutput(
context,
muxerFactory.getCreatedMuxer(),
getDumpFileName(
/* originalFileName= */ "mp4/sample_twos_pcm.mp4", /* modifications...= */ "toInt16"));
}
@Test
public void start_singleMediaItemAndTransmux_ignoresTransmux() throws Exception {
CapturingMuxer.Factory muxerFactory = new CapturingMuxer.Factory(/* handleAudioAsPcm= */ true);

View File

@ -70,7 +70,7 @@ public final class ParameterizedItemExportTest {
FILE_AUDIO_AMR_NB);
private static final ImmutableList<String> AUDIO_VIDEO_ASSETS =
ImmutableList.of(FILE_AUDIO_RAW_VIDEO, "mp4/sample_twos_pcm.mp4", FILE_AUDIO_VIDEO);
ImmutableList.of(FILE_AUDIO_RAW_VIDEO, FILE_AUDIO_VIDEO);
private static final ImmutableList<String> VIDEO_ONLY_ASSETS = ImmutableList.of(FILE_VIDEO_ONLY);