From b293cf2a76b6181fd8c4261c74d5a705f24eb739 Mon Sep 17 00:00:00 2001 From: Oliver Woodman Date: Tue, 17 Nov 2015 15:50:20 +0000 Subject: [PATCH] Parse QuickTime variant audio sample entries. Issue: #958 --- .../extractor/mp4/Mp4ExtractorTest.java | 13 ++-- .../android/exoplayer/extractor/mp4/Atom.java | 1 + .../exoplayer/extractor/mp4/AtomParsers.java | 70 +++++++++++++++---- .../extractor/mp4/FragmentedMp4Extractor.java | 2 +- .../exoplayer/extractor/mp4/Mp4Extractor.java | 35 +++++++++- 5 files changed, 97 insertions(+), 24 deletions(-) diff --git a/library/src/androidTest/java/com/google/android/exoplayer/extractor/mp4/Mp4ExtractorTest.java b/library/src/androidTest/java/com/google/android/exoplayer/extractor/mp4/Mp4ExtractorTest.java index 7b2d9acf0b..a7a572d3cd 100644 --- a/library/src/androidTest/java/com/google/android/exoplayer/extractor/mp4/Mp4ExtractorTest.java +++ b/library/src/androidTest/java/com/google/android/exoplayer/extractor/mp4/Mp4ExtractorTest.java @@ -70,6 +70,9 @@ public final class Mp4ExtractorTest extends TestCase { private static final byte[] AUDIO_MDHD_PAYLOAD = getByteArray("00000000cf6c4889cf6c488a0000ac4400a3e40055c40000"); + /** String of hexadecimal bytes for an ftyp payload with major_brand mp41 and minor_version 0. **/ + private static final byte[] FTYP_PAYLOAD = getByteArray("6d70343100000000"); + /** String of hexadecimal bytes containing an mvhd payload from an AVC/AAC video. */ private static final byte[] MVHD_PAYLOAD = getByteArray( "00000000cf6c4888cf6c48880000025800023ad40001000001000000000000000000000000010000000000" @@ -88,7 +91,7 @@ public final class Mp4ExtractorTest extends TestCase { /** Indices of key-frames. */ private static final boolean[] SAMPLE_IS_SYNC = {true, false, false, false, true, true}; /** Indices of video frame chunk offsets. */ - private static final int[] CHUNK_OFFSETS = {1200, 2120, 3120, 4120}; + private static final int[] CHUNK_OFFSETS = {1208, 2128, 3128, 4128}; /** Numbers of video frames in each chunk. */ private static final int[] SAMPLES_IN_CHUNK = {2, 2, 1, 1}; /** The mdat box must be large enough to avoid reading chunk sample data out of bounds. */ @@ -368,7 +371,7 @@ public final class Mp4ExtractorTest extends TestCase { /** Gets a valid MP4 file with audio/video tracks and synchronization data. */ private static byte[] getTestMp4File(boolean mp4vFormat) { return Mp4Atom.serialize( - atom(Atom.TYPE_ftyp, EMPTY), + atom(Atom.TYPE_ftyp, FTYP_PAYLOAD), atom(Atom.TYPE_moov, atom(Atom.TYPE_mvhd, MVHD_PAYLOAD), atom(Atom.TYPE_trak, @@ -400,13 +403,13 @@ public final class Mp4ExtractorTest extends TestCase { atom(Atom.TYPE_stsc, getStsc()), atom(Atom.TYPE_stsz, getStsz()), atom(Atom.TYPE_stco, getStco())))))), - atom(Atom.TYPE_mdat, getMdat(mp4vFormat ? 1168 : 1158, !mp4vFormat))); + atom(Atom.TYPE_mdat, getMdat(mp4vFormat ? 1176 : 1166, !mp4vFormat))); } /** Gets a valid MP4 file with audio/video tracks and without a synchronization table. */ private static byte[] getTestMp4FileWithoutSynchronizationData(boolean mp4vFormat) { return Mp4Atom.serialize( - atom(Atom.TYPE_ftyp, EMPTY), + atom(Atom.TYPE_ftyp, FTYP_PAYLOAD), atom(Atom.TYPE_moov, atom(Atom.TYPE_mvhd, MVHD_PAYLOAD), atom(Atom.TYPE_trak, @@ -436,7 +439,7 @@ public final class Mp4ExtractorTest extends TestCase { atom(Atom.TYPE_stsc, getStsc()), atom(Atom.TYPE_stsz, getStsz()), atom(Atom.TYPE_stco, getStco())))))), - atom(Atom.TYPE_mdat, getMdat(mp4vFormat ? 1112 : 1102, !mp4vFormat))); + atom(Atom.TYPE_mdat, getMdat(mp4vFormat ? 1120 : 1110, !mp4vFormat))); } private static Mp4Atom atom(int type, Mp4Atom... containedMp4Atoms) { diff --git a/library/src/main/java/com/google/android/exoplayer/extractor/mp4/Atom.java b/library/src/main/java/com/google/android/exoplayer/extractor/mp4/Atom.java index f9a40c244e..3a4acb00ae 100644 --- a/library/src/main/java/com/google/android/exoplayer/extractor/mp4/Atom.java +++ b/library/src/main/java/com/google/android/exoplayer/extractor/mp4/Atom.java @@ -53,6 +53,7 @@ import java.util.List; public static final int TYPE_d263 = Util.getIntegerCodeForString("d263"); public static final int TYPE_mdat = Util.getIntegerCodeForString("mdat"); public static final int TYPE_mp4a = Util.getIntegerCodeForString("mp4a"); + public static final int TYPE_wave = Util.getIntegerCodeForString("wave"); public static final int TYPE_ac_3 = Util.getIntegerCodeForString("ac-3"); public static final int TYPE_dac3 = Util.getIntegerCodeForString("dac3"); public static final int TYPE_ec_3 = Util.getIntegerCodeForString("ec-3"); diff --git a/library/src/main/java/com/google/android/exoplayer/extractor/mp4/AtomParsers.java b/library/src/main/java/com/google/android/exoplayer/extractor/mp4/AtomParsers.java index f41653befe..106e7d22df 100644 --- a/library/src/main/java/com/google/android/exoplayer/extractor/mp4/AtomParsers.java +++ b/library/src/main/java/com/google/android/exoplayer/extractor/mp4/AtomParsers.java @@ -42,9 +42,10 @@ import java.util.List; * * @param trak Atom to parse. * @param mvhd Movie header atom, used to get the timescale. + * @param isQuickTime True for QuickTime media. False otherwise. * @return A {@link Track} instance, or {@code null} if the track's type isn't supported. */ - public static Track parseTrak(Atom.ContainerAtom trak, Atom.LeafAtom mvhd) { + public static Track parseTrak(Atom.ContainerAtom trak, Atom.LeafAtom mvhd, boolean isQuickTime) { Atom.ContainerAtom mdia = trak.getContainerAtomOfType(Atom.TYPE_mdia); int trackType = parseHdlr(mdia.getLeafAtomOfType(Atom.TYPE_hdlr).data); if (trackType != Track.TYPE_soun && trackType != Track.TYPE_vide && trackType != Track.TYPE_text @@ -66,7 +67,7 @@ import java.util.List; Pair mdhdData = parseMdhd(mdia.getLeafAtomOfType(Atom.TYPE_mdhd).data); StsdData stsdData = parseStsd(stbl.getLeafAtomOfType(Atom.TYPE_stsd).data, tkhdData.id, - durationUs, tkhdData.rotationDegrees, mdhdData.second); + durationUs, tkhdData.rotationDegrees, mdhdData.second, isQuickTime); Pair edtsData = parseEdts(trak.getContainerAtomOfType(Atom.TYPE_edts)); return stsdData.mediaFormat == null ? null : new Track(tkhdData.id, trackType, mdhdData.first, movieTimescale, durationUs, @@ -429,10 +430,11 @@ import java.util.List; * @param durationUs The duration of the track in microseconds. * @param rotationDegrees The rotation of the track in degrees. * @param language The language of the track. + * @param isQuickTime True for QuickTime media. False otherwise. * @return An object containing the parsed data. */ private static StsdData parseStsd(ParsableByteArray stsd, int trackId, long durationUs, - int rotationDegrees, String language) { + int rotationDegrees, String language, boolean isQuickTime) { stsd.setPosition(Atom.FULL_HEADER_SIZE); int numberOfEntries = stsd.readInt(); StsdData out = new StsdData(numberOfEntries); @@ -452,7 +454,7 @@ import java.util.List; || childAtomType == Atom.TYPE_dtsc || childAtomType == Atom.TYPE_dtse || childAtomType == Atom.TYPE_dtsh || childAtomType == Atom.TYPE_dtsl) { parseAudioSampleEntry(stsd, childAtomType, childStartPosition, childAtomSize, trackId, - durationUs, language, out, i); + durationUs, language, isQuickTime, out, i); } else if (childAtomType == Atom.TYPE_TTML) { out.mediaFormat = MediaFormat.createTextFormat(Integer.toString(trackId), MimeTypes.APPLICATION_TTML, MediaFormat.NO_VALUE, durationUs, language); @@ -695,14 +697,31 @@ import java.util.List; } private static void parseAudioSampleEntry(ParsableByteArray parent, int atomType, int position, - int size, int trackId, long durationUs, String language, StsdData out, int entryIndex) { + int size, int trackId, long durationUs, String language, boolean isQuickTime, StsdData out, + int entryIndex) { parent.setPosition(position + Atom.HEADER_SIZE); - parent.skipBytes(16); + + int quickTimeSoundDescriptionVersion = 0; + if (isQuickTime) { + parent.skipBytes(8); + quickTimeSoundDescriptionVersion = parent.readUnsignedShort(); + parent.skipBytes(6); + } else { + parent.skipBytes(16); + } + int channelCount = parent.readUnsignedShort(); int sampleSize = parent.readUnsignedShort(); parent.skipBytes(4); int sampleRate = parent.readUnsignedFixedPoint1616(); + if (quickTimeSoundDescriptionVersion > 0) { + parent.skipBytes(16); + if (quickTimeSoundDescriptionVersion == 2) { + parent.skipBytes(20); + } + } + // If the atom type determines a MIME type, set it immediately. String mimeType = null; if (atomType == Atom.TYPE_ac_3) { @@ -716,17 +735,22 @@ import java.util.List; } byte[] initializationData = null; - int childPosition = parent.getPosition(); - while (childPosition - position < size) { - parent.setPosition(childPosition); - int childStartPosition = parent.getPosition(); + int childAtomPosition = parent.getPosition(); + while (childAtomPosition - position < size) { + parent.setPosition(childAtomPosition); int childAtomSize = parent.readInt(); Assertions.checkArgument(childAtomSize > 0, "childAtomSize should be positive"); int childAtomType = parent.readInt(); if (atomType == Atom.TYPE_mp4a || atomType == Atom.TYPE_enca) { + int esdsAtomPosition = -1; if (childAtomType == Atom.TYPE_esds) { + esdsAtomPosition = childAtomPosition; + } else if (isQuickTime && childAtomType == Atom.TYPE_wave) { + esdsAtomPosition = findEsdsPosition(parent, childAtomPosition, childAtomSize); + } + if (esdsAtomPosition != -1) { Pair mimeTypeAndInitializationData = - parseEsdsFromParent(parent, childStartPosition); + parseEsdsFromParent(parent, esdsAtomPosition); mimeType = mimeTypeAndInitializationData.first; initializationData = mimeTypeAndInitializationData.second; if (MimeTypes.AUDIO_AAC.equals(mimeType)) { @@ -738,18 +762,18 @@ import java.util.List; channelCount = audioSpecificConfig.second; } } else if (childAtomType == Atom.TYPE_sinf) { - out.trackEncryptionBoxes[entryIndex] = parseSinfFromParent(parent, childStartPosition, + out.trackEncryptionBoxes[entryIndex] = parseSinfFromParent(parent, childAtomPosition, childAtomSize); } } else if (atomType == Atom.TYPE_ac_3 && childAtomType == Atom.TYPE_dac3) { // TODO: Choose the right AC-3 track based on the contents of dac3/dec3. // TODO: Add support for encryption (by setting out.trackEncryptionBoxes). - parent.setPosition(Atom.HEADER_SIZE + childStartPosition); + parent.setPosition(Atom.HEADER_SIZE + childAtomPosition); out.mediaFormat = Ac3Util.parseAnnexFAc3Format(parent, Integer.toString(trackId), durationUs, language); return; } else if (atomType == Atom.TYPE_ec_3 && childAtomType == Atom.TYPE_dec3) { - parent.setPosition(Atom.HEADER_SIZE + childStartPosition); + parent.setPosition(Atom.HEADER_SIZE + childAtomPosition); out.mediaFormat = Ac3Util.parseAnnexFEAc3Format(parent, Integer.toString(trackId), durationUs, language); return; @@ -761,7 +785,7 @@ import java.util.List; language); return; } - childPosition += childAtomSize; + childAtomPosition += childAtomSize; } // If the media type was not recognized, ignore the track. @@ -775,6 +799,22 @@ import java.util.List; language); } + /** Returns the position of the esds box within a parent, or -1 if no esds box is found */ + private static int findEsdsPosition(ParsableByteArray parent, int position, int size) { + int childAtomPosition = parent.getPosition(); + while (childAtomPosition - position < size) { + parent.setPosition(childAtomPosition); + int childAtomSize = parent.readInt(); + Assertions.checkArgument(childAtomSize > 0, "childAtomSize should be positive"); + int childType = parent.readInt(); + if (childType == Atom.TYPE_esds) { + return childAtomPosition; + } + childAtomPosition += childAtomSize; + } + return -1; + } + /** Returns codec-specific initialization data contained in an esds box. */ private static Pair parseEsdsFromParent(ParsableByteArray parent, int position) { parent.setPosition(position + Atom.HEADER_SIZE + 4); diff --git a/library/src/main/java/com/google/android/exoplayer/extractor/mp4/FragmentedMp4Extractor.java b/library/src/main/java/com/google/android/exoplayer/extractor/mp4/FragmentedMp4Extractor.java index 7c721d9e0d..8a00dd1872 100644 --- a/library/src/main/java/com/google/android/exoplayer/extractor/mp4/FragmentedMp4Extractor.java +++ b/library/src/main/java/com/google/android/exoplayer/extractor/mp4/FragmentedMp4Extractor.java @@ -305,7 +305,7 @@ public final class FragmentedMp4Extractor implements Extractor { ContainerAtom mvex = moov.getContainerAtomOfType(Atom.TYPE_mvex); extendsDefaults = parseTrex(mvex.getLeafAtomOfType(Atom.TYPE_trex).data); track = AtomParsers.parseTrak(moov.getContainerAtomOfType(Atom.TYPE_trak), - moov.getLeafAtomOfType(Atom.TYPE_mvhd)); + moov.getLeafAtomOfType(Atom.TYPE_mvhd), false); checkState(track != null); trackOutput.format(track.mediaFormat); } diff --git a/library/src/main/java/com/google/android/exoplayer/extractor/mp4/Mp4Extractor.java b/library/src/main/java/com/google/android/exoplayer/extractor/mp4/Mp4Extractor.java index 2ddccab99c..a1f01b0cb8 100644 --- a/library/src/main/java/com/google/android/exoplayer/extractor/mp4/Mp4Extractor.java +++ b/library/src/main/java/com/google/android/exoplayer/extractor/mp4/Mp4Extractor.java @@ -25,6 +25,7 @@ import com.google.android.exoplayer.extractor.mp4.Atom.ContainerAtom; import com.google.android.exoplayer.util.Assertions; import com.google.android.exoplayer.util.NalUnitUtil; import com.google.android.exoplayer.util.ParsableByteArray; +import com.google.android.exoplayer.util.Util; import java.io.IOException; import java.util.ArrayList; @@ -42,6 +43,9 @@ public final class Mp4Extractor implements Extractor, SeekMap { private static final int STATE_READING_ATOM_PAYLOAD = 2; private static final int STATE_READING_SAMPLE = 3; + // Brand stored in the ftyp atom for QuickTime media. + private static final int BRAND_QUICKTIME = Util.getIntegerCodeForString("qt "); + /** * When seeking within the source, if the offset is greater than or equal to this value (or the * offset is negative), the source will be reloaded. @@ -68,6 +72,7 @@ public final class Mp4Extractor implements Extractor, SeekMap { // Extractor outputs. private ExtractorOutput extractorOutput; private Mp4Track[] tracks; + private boolean isQuickTime; public Mp4Extractor() { atomHeader = new ParsableByteArray(Atom.LONG_HEADER_SIZE); @@ -210,7 +215,9 @@ public final class Mp4Extractor implements Extractor, SeekMap { boolean seekRequired = false; if (atomData != null) { input.readFully(atomData.data, atomHeaderBytesRead, (int) atomPayloadSize); - if (!containerAtoms.isEmpty()) { + if (atomType == Atom.TYPE_ftyp) { + isQuickTime = processFtypAtom(atomData); + } else if (!containerAtoms.isEmpty()) { containerAtoms.peek().add(new Atom.LeafAtom(atomType, atomData)); } } else { @@ -240,6 +247,27 @@ public final class Mp4Extractor implements Extractor, SeekMap { return seekRequired; } + /** + * Process an ftyp atom to determine whether the media is QuickTime. + * + * @param atomData The ftyp atom data. + * @return True if the media is QuickTime. False otherwise. + */ + private static boolean processFtypAtom(ParsableByteArray atomData) { + atomData.setPosition(Atom.HEADER_SIZE); + int majorBrand = atomData.readInt(); + if (majorBrand == BRAND_QUICKTIME) { + return true; + } + atomData.skipBytes(4); // minor_version + while (atomData.bytesLeft() > 0) { + if (atomData.readInt() == BRAND_QUICKTIME) { + return true; + } + } + return false; + } + /** Updates the stored track metadata to reflect the contents of the specified moov atom. */ private void processMoovAtom(ContainerAtom moov) { List tracks = new ArrayList<>(); @@ -250,7 +278,8 @@ public final class Mp4Extractor implements Extractor, SeekMap { continue; } - Track track = AtomParsers.parseTrak(atom, moov.getLeafAtomOfType(Atom.TYPE_mvhd)); + Track track = AtomParsers.parseTrak(atom, moov.getLeafAtomOfType(Atom.TYPE_mvhd), + isQuickTime); if (track == null) { continue; } @@ -387,7 +416,7 @@ public final class Mp4Extractor implements Extractor, SeekMap { || atom == Atom.TYPE_stsd || atom == Atom.TYPE_stts || atom == Atom.TYPE_stss || atom == Atom.TYPE_ctts || atom == Atom.TYPE_elst || atom == Atom.TYPE_stsc || atom == Atom.TYPE_stsz || atom == Atom.TYPE_stco || atom == Atom.TYPE_co64 - || atom == Atom.TYPE_tkhd; + || atom == Atom.TYPE_tkhd || atom == Atom.TYPE_ftyp; } /** Returns whether the extractor should parse a container atom with type {@code atom}. */