Widen support for fMP4 streams.

- Add support for parsing avc3 boxes.
- Make workaround for signed sample offsets in trun files always enabled.
- Generalize remaining workaround into a flag, to make it easy to add additional workarounds going forward without changing the API.
- Fix DataSourceStream bug where read wouldn't return -1 having fully read segment whose spec length was unbounded.
This commit is contained in:
Oliver Woodman 2014-07-07 15:55:01 +01:00
parent b398c594fa
commit 43b7efa986
4 changed files with 45 additions and 31 deletions

View File

@ -21,6 +21,7 @@ import java.util.List;
/* package */ abstract class Atom { /* package */ abstract class Atom {
public static final int TYPE_avc1 = 0x61766331; public static final int TYPE_avc1 = 0x61766331;
public static final int TYPE_avc3 = 0x61766333;
public static final int TYPE_esds = 0x65736473; public static final int TYPE_esds = 0x65736473;
public static final int TYPE_mdat = 0x6D646174; public static final int TYPE_mdat = 0x6D646174;
public static final int TYPE_mfhd = 0x6D666864; public static final int TYPE_mfhd = 0x6D666864;

View File

@ -49,6 +49,15 @@ import java.util.UUID;
*/ */
public final class FragmentedMp4Extractor { public final class FragmentedMp4Extractor {
/**
* Flag to work around an issue in some video streams where every frame is marked as a sync frame.
* The workaround overrides the sync frame flags in the stream, forcing them to false except for
* the first sample in each segment.
* <p>
* This flag does nothing if the stream is not a video stream.
*/
public static final int WORKAROUND_EVERY_VIDEO_FRAME_IS_SYNC_FRAME = 1;
/** /**
* An attempt to read from the input stream returned 0 bytes of data. * An attempt to read from the input stream returned 0 bytes of data.
*/ */
@ -97,6 +106,7 @@ public final class FragmentedMp4Extractor {
static { static {
HashSet<Integer> parsedAtoms = new HashSet<Integer>(); HashSet<Integer> parsedAtoms = new HashSet<Integer>();
parsedAtoms.add(Atom.TYPE_avc1); parsedAtoms.add(Atom.TYPE_avc1);
parsedAtoms.add(Atom.TYPE_avc3);
parsedAtoms.add(Atom.TYPE_esds); parsedAtoms.add(Atom.TYPE_esds);
parsedAtoms.add(Atom.TYPE_hdlr); parsedAtoms.add(Atom.TYPE_hdlr);
parsedAtoms.add(Atom.TYPE_mdat); parsedAtoms.add(Atom.TYPE_mdat);
@ -140,7 +150,7 @@ public final class FragmentedMp4Extractor {
CONTAINER_TYPES = Collections.unmodifiableSet(atomContainerTypes); CONTAINER_TYPES = Collections.unmodifiableSet(atomContainerTypes);
} }
private final boolean enableSmoothStreamingWorkarounds; private final int workaroundFlags;
// Parser state // Parser state
private final ParsableByteArray atomHeader; private final ParsableByteArray atomHeader;
@ -172,16 +182,15 @@ public final class FragmentedMp4Extractor {
private TrackFragment fragmentRun; private TrackFragment fragmentRun;
public FragmentedMp4Extractor() { public FragmentedMp4Extractor() {
this(false); this(0);
} }
/** /**
* @param enableSmoothStreamingWorkarounds Set to true if this extractor will be used to parse * @param workaroundFlags Flags to allow parsing of faulty streams.
* SmoothStreaming streams. This will enable workarounds for SmoothStreaming violations of * {@link #WORKAROUND_EVERY_VIDEO_FRAME_IS_SYNC_FRAME} is currently the only flag defined.
* the ISO base media file format (ISO 14496-12). Set to false otherwise.
*/ */
public FragmentedMp4Extractor(boolean enableSmoothStreamingWorkarounds) { public FragmentedMp4Extractor(int workaroundFlags) {
this.enableSmoothStreamingWorkarounds = enableSmoothStreamingWorkarounds; this.workaroundFlags = workaroundFlags;
parserState = STATE_READING_ATOM_HEADER; parserState = STATE_READING_ATOM_HEADER;
atomHeader = new ParsableByteArray(ATOM_HEADER_SIZE); atomHeader = new ParsableByteArray(ATOM_HEADER_SIZE);
containerAtoms = new Stack<ContainerAtom>(); containerAtoms = new Stack<ContainerAtom>();
@ -466,7 +475,7 @@ public final class FragmentedMp4Extractor {
private void onMoofContainerAtomRead(ContainerAtom moof) { private void onMoofContainerAtomRead(ContainerAtom moof) {
fragmentRun = new TrackFragment(); fragmentRun = new TrackFragment();
parseMoof(track, extendsDefaults, moof, fragmentRun, enableSmoothStreamingWorkarounds); parseMoof(track, extendsDefaults, moof, fragmentRun, workaroundFlags);
sampleIndex = 0; sampleIndex = 0;
lastSyncSampleIndex = 0; lastSyncSampleIndex = 0;
pendingSeekSyncSampleIndex = 0; pendingSeekSyncSampleIndex = 0;
@ -572,11 +581,12 @@ public final class FragmentedMp4Extractor {
int childStartPosition = stsd.getPosition(); int childStartPosition = stsd.getPosition();
int childAtomSize = stsd.readInt(); int childAtomSize = stsd.readInt();
int childAtomType = stsd.readInt(); int childAtomType = stsd.readInt();
if (childAtomType == Atom.TYPE_avc1 || childAtomType == Atom.TYPE_encv) { if (childAtomType == Atom.TYPE_avc1 || childAtomType == Atom.TYPE_avc3
Pair<MediaFormat, TrackEncryptionBox> avc1 = || childAtomType == Atom.TYPE_encv) {
parseAvc1FromParent(stsd, childStartPosition, childAtomSize); Pair<MediaFormat, TrackEncryptionBox> avc =
mediaFormat = avc1.first; parseAvcFromParent(stsd, childStartPosition, childAtomSize);
trackEncryptionBoxes[i] = avc1.second; mediaFormat = avc.first;
trackEncryptionBoxes[i] = avc.second;
} else if (childAtomType == Atom.TYPE_mp4a || childAtomType == Atom.TYPE_enca) { } else if (childAtomType == Atom.TYPE_mp4a || childAtomType == Atom.TYPE_enca) {
Pair<MediaFormat, TrackEncryptionBox> mp4a = Pair<MediaFormat, TrackEncryptionBox> mp4a =
parseMp4aFromParent(stsd, childStartPosition, childAtomSize); parseMp4aFromParent(stsd, childStartPosition, childAtomSize);
@ -588,7 +598,7 @@ public final class FragmentedMp4Extractor {
return Pair.create(mediaFormat, trackEncryptionBoxes); return Pair.create(mediaFormat, trackEncryptionBoxes);
} }
private static Pair<MediaFormat, TrackEncryptionBox> parseAvc1FromParent(ParsableByteArray parent, private static Pair<MediaFormat, TrackEncryptionBox> parseAvcFromParent(ParsableByteArray parent,
int position, int size) { int position, int size) {
parent.setPosition(position + ATOM_HEADER_SIZE); parent.setPosition(position + ATOM_HEADER_SIZE);
@ -695,7 +705,7 @@ public final class FragmentedMp4Extractor {
int childAtomSize = parent.readInt(); int childAtomSize = parent.readInt();
int childAtomType = parent.readInt(); int childAtomType = parent.readInt();
if (childAtomType == Atom.TYPE_frma) { if (childAtomType == Atom.TYPE_frma) {
parent.readInt(); // dataFormat. Expect TYPE_avc1 (video) or TYPE_mp4a (audio). parent.readInt(); // dataFormat.
} else if (childAtomType == Atom.TYPE_schm) { } else if (childAtomType == Atom.TYPE_schm) {
parent.skip(4); parent.skip(4);
parent.readInt(); // schemeType. Expect cenc parent.readInt(); // schemeType. Expect cenc
@ -774,11 +784,11 @@ public final class FragmentedMp4Extractor {
} }
private static void parseMoof(Track track, DefaultSampleValues extendsDefaults, private static void parseMoof(Track track, DefaultSampleValues extendsDefaults,
ContainerAtom moof, TrackFragment out, boolean enableSmoothStreamingWorkarounds) { ContainerAtom moof, TrackFragment out, int workaroundFlags) {
// TODO: Consider checking that the sequence number returned by parseMfhd is as expected. // TODO: Consider checking that the sequence number returned by parseMfhd is as expected.
parseMfhd(moof.getLeafAtomOfType(Atom.TYPE_mfhd).getData()); parseMfhd(moof.getLeafAtomOfType(Atom.TYPE_mfhd).getData());
parseTraf(track, extendsDefaults, moof.getContainerAtomOfType(Atom.TYPE_traf), parseTraf(track, extendsDefaults, moof.getContainerAtomOfType(Atom.TYPE_traf),
out, enableSmoothStreamingWorkarounds); out, workaroundFlags);
} }
/** /**
@ -796,7 +806,7 @@ public final class FragmentedMp4Extractor {
* Parses a traf atom (defined in 14496-12). * Parses a traf atom (defined in 14496-12).
*/ */
private static void parseTraf(Track track, DefaultSampleValues extendsDefaults, private static void parseTraf(Track track, DefaultSampleValues extendsDefaults,
ContainerAtom traf, TrackFragment out, boolean enableSmoothStreamingWorkarounds) { ContainerAtom traf, TrackFragment out, int workaroundFlags) {
LeafAtom saiz = traf.getLeafAtomOfType(Atom.TYPE_saiz); LeafAtom saiz = traf.getLeafAtomOfType(Atom.TYPE_saiz);
if (saiz != null) { if (saiz != null) {
parseSaiz(saiz.getData(), out); parseSaiz(saiz.getData(), out);
@ -809,8 +819,7 @@ public final class FragmentedMp4Extractor {
out.setSampleDescriptionIndex(fragmentHeader.sampleDescriptionIndex); out.setSampleDescriptionIndex(fragmentHeader.sampleDescriptionIndex);
LeafAtom trun = traf.getLeafAtomOfType(Atom.TYPE_trun); LeafAtom trun = traf.getLeafAtomOfType(Atom.TYPE_trun);
parseTrun(track, fragmentHeader, decodeTime, enableSmoothStreamingWorkarounds, trun.getData(), parseTrun(track, fragmentHeader, decodeTime, workaroundFlags, trun.getData(), out);
out);
LeafAtom uuid = traf.getLeafAtomOfType(Atom.TYPE_uuid); LeafAtom uuid = traf.getLeafAtomOfType(Atom.TYPE_uuid);
if (uuid != null) { if (uuid != null) {
parseUuid(uuid.getData(), out); parseUuid(uuid.getData(), out);
@ -895,8 +904,7 @@ public final class FragmentedMp4Extractor {
* @param out The {@TrackFragment} into which parsed data should be placed. * @param out The {@TrackFragment} into which parsed data should be placed.
*/ */
private static void parseTrun(Track track, DefaultSampleValues defaultSampleValues, private static void parseTrun(Track track, DefaultSampleValues defaultSampleValues,
long decodeTime, boolean enableSmoothStreamingWorkarounds, ParsableByteArray trun, long decodeTime, int workaroundFlags, ParsableByteArray trun, TrackFragment out) {
TrackFragment out) {
trun.setPosition(ATOM_HEADER_SIZE); trun.setPosition(ATOM_HEADER_SIZE);
int fullAtom = trun.readInt(); int fullAtom = trun.readInt();
int version = parseFullAtomVersion(fullAtom); int version = parseFullAtomVersion(fullAtom);
@ -926,6 +934,9 @@ public final class FragmentedMp4Extractor {
long timescale = track.timescale; long timescale = track.timescale;
long cumulativeTime = decodeTime; long cumulativeTime = decodeTime;
boolean workaroundEveryVideoFrameIsSyncFrame = track.type == Track.TYPE_VIDEO
&& ((workaroundFlags & WORKAROUND_EVERY_VIDEO_FRAME_IS_SYNC_FRAME)
== WORKAROUND_EVERY_VIDEO_FRAME_IS_SYNC_FRAME);
for (int i = 0; i < numberOfEntries; i++) { for (int i = 0; i < numberOfEntries; i++) {
// Use trun values if present, otherwise tfhd, otherwise trex. // Use trun values if present, otherwise tfhd, otherwise trex.
int sampleDuration = sampleDurationsPresent ? trun.readUnsignedIntToInt() int sampleDuration = sampleDurationsPresent ? trun.readUnsignedIntToInt()
@ -934,11 +945,14 @@ public final class FragmentedMp4Extractor {
int sampleFlags = (i == 0 && firstSampleFlagsPresent) ? firstSampleFlags int sampleFlags = (i == 0 && firstSampleFlagsPresent) ? firstSampleFlags
: sampleFlagsPresent ? trun.readInt() : defaultSampleValues.flags; : sampleFlagsPresent ? trun.readInt() : defaultSampleValues.flags;
if (sampleCompositionTimeOffsetsPresent) { if (sampleCompositionTimeOffsetsPresent) {
// Fragmented mp4 streams packaged for smooth streaming violate the BMFF spec by specifying
// the sample offset as a signed integer in conjunction with a box version of 0.
int sampleOffset; int sampleOffset;
if (version == 0 && !enableSmoothStreamingWorkarounds) { if (version == 0) {
sampleOffset = trun.readUnsignedIntToInt(); // The BMFF spec (ISO 14496-12) states that sample offsets should be unsigned integers in
// version 0 trun boxes, however a significant number of streams violate the spec and use
// signed integers instead. It's safe to always parse sample offsets as signed integers
// here, because unsigned integers will still be parsed correctly (unless their top bit is
// set, which is never true in practice because sample offsets are always small).
sampleOffset = trun.readInt();
} else { } else {
sampleOffset = trun.readInt(); sampleOffset = trun.readInt();
} }
@ -947,9 +961,7 @@ public final class FragmentedMp4Extractor {
sampleDecodingTimeTable[i] = (int) ((cumulativeTime * 1000) / timescale); sampleDecodingTimeTable[i] = (int) ((cumulativeTime * 1000) / timescale);
sampleSizeTable[i] = sampleSize; sampleSizeTable[i] = sampleSize;
boolean isSync = ((sampleFlags >> 16) & 0x1) == 0; boolean isSync = ((sampleFlags >> 16) & 0x1) == 0;
if (track.type == Track.TYPE_VIDEO && enableSmoothStreamingWorkarounds && i != 0) { if (workaroundEveryVideoFrameIsSyncFrame && i != 0) {
// Fragmented mp4 streams packaged for smooth streaming violate the BMFF spec by indicating
// that every sample is a sync frame, when this is not actually the case.
isSync = false; isSync = false;
} }
if (isSync) { if (isSync) {

View File

@ -110,7 +110,8 @@ public class SmoothStreamingChunkSource implements ChunkSource {
MediaFormat mediaFormat = getMediaFormat(streamElement, trackIndex); MediaFormat mediaFormat = getMediaFormat(streamElement, trackIndex);
int trackType = streamElement.type == StreamElement.TYPE_VIDEO ? Track.TYPE_VIDEO int trackType = streamElement.type == StreamElement.TYPE_VIDEO ? Track.TYPE_VIDEO
: Track.TYPE_AUDIO; : Track.TYPE_AUDIO;
FragmentedMp4Extractor extractor = new FragmentedMp4Extractor(true); FragmentedMp4Extractor extractor = new FragmentedMp4Extractor(
FragmentedMp4Extractor.WORKAROUND_EVERY_VIDEO_FRAME_IS_SYNC_FRAME);
extractor.setTrack(new Track(trackIndex, trackType, streamElement.timeScale, mediaFormat, extractor.setTrack(new Track(trackIndex, trackType, streamElement.timeScale, mediaFormat,
trackEncryptionBoxes)); trackEncryptionBoxes));
if (protectionElement != null) { if (protectionElement != null) {

View File

@ -176,7 +176,7 @@ public final class DataSourceStream implements Loadable, NonBlockingInputStream
*/ */
private int read(ByteBuffer target, byte[] targetArray, int targetArrayOffset, private int read(ByteBuffer target, byte[] targetArray, int targetArrayOffset,
ReadHead readHead, int readLength) { ReadHead readHead, int readLength) {
if (readHead.position == dataSpec.length) { if (isEndOfStream()) {
return -1; return -1;
} }
int bytesToRead = (int) Math.min(loadPosition - readHead.position, readLength); int bytesToRead = (int) Math.min(loadPosition - readHead.position, readLength);