Improve DASH manifest parsing.

- Parse all attributes that may exist in either the AdaptationSet or
  in the child Representation elements at both levels.
- Correctly infer TYPE_TEXT for Representation elements whose mimeType
  is application/mp4 and whose codecs attribute indicates a known text
  codec type.

Issue: #689
This commit is contained in:
Oliver Woodman 2015-08-11 18:12:43 +01:00
parent d6152dc36a
commit dbaeecc4da
5 changed files with 93 additions and 65 deletions

View File

@ -272,7 +272,7 @@ public class DashRendererBuilder implements RendererBuilder {
List<String> codecs = new ArrayList<>(); List<String> codecs = new ArrayList<>();
for (int i = 0; i < audioRepresentations.size(); i++) { for (int i = 0; i < audioRepresentations.size(); i++) {
Format format = audioRepresentations.get(i).format; Format format = audioRepresentations.get(i).format;
audioTrackNameList.add(format.id + " (" + format.numChannels + "ch, " + audioTrackNameList.add(format.id + " (" + format.audioChannels + "ch, " +
format.audioSamplingRate + "Hz)"); format.audioSamplingRate + "Hz)");
audioChunkSourceList.add(new DashChunkSource(manifestFetcher, audioAdaptationSetIndex, audioChunkSourceList.add(new DashChunkSource(manifestFetcher, audioAdaptationSetIndex,
new int[] {i}, audioDataSource, audioEvaluator, LIVE_EDGE_LATENCY_MS, new int[] {i}, audioDataSource, audioEvaluator, LIVE_EDGE_LATENCY_MS,

View File

@ -69,7 +69,7 @@ public class Format {
/** /**
* The number of audio channels, or -1 if unknown or not applicable. * The number of audio channels, or -1 if unknown or not applicable.
*/ */
public final int numChannels; public final int audioChannels;
/** /**
* The audio sampling rate in Hz, or -1 if unknown or not applicable. * The audio sampling rate in Hz, or -1 if unknown or not applicable.
@ -131,20 +131,20 @@ public class Format {
* @param height The height of the video in pixels, or -1 if unknown or not applicable. * @param height The height of the video in pixels, or -1 if unknown or not applicable.
* @param frameRate The frame rate of the video in frames per second, or -1 if unknown or not * @param frameRate The frame rate of the video in frames per second, or -1 if unknown or not
* applicable. * applicable.
* @param numChannels The number of audio channels, or -1 if unknown or not applicable. * @param audioChannels The number of audio channels, or -1 if unknown or not applicable.
* @param audioSamplingRate The audio sampling rate in Hz, or -1 if unknown or not applicable. * @param audioSamplingRate The audio sampling rate in Hz, or -1 if unknown or not applicable.
* @param bitrate The average bandwidth of the format in bits per second. * @param bitrate The average bandwidth of the format in bits per second.
* @param language The language of the format. * @param language The language of the format.
* @param codecs The codecs used to decode the format. * @param codecs The codecs used to decode the format.
*/ */
public Format(String id, String mimeType, int width, int height, float frameRate, int numChannels, public Format(String id, String mimeType, int width, int height, float frameRate,
int audioSamplingRate, int bitrate, String language, String codecs) { int audioChannels, int audioSamplingRate, int bitrate, String language, String codecs) {
this.id = Assertions.checkNotNull(id); this.id = Assertions.checkNotNull(id);
this.mimeType = mimeType; this.mimeType = mimeType;
this.width = width; this.width = width;
this.height = height; this.height = height;
this.frameRate = frameRate; this.frameRate = frameRate;
this.numChannels = numChannels; this.audioChannels = audioChannels;
this.audioSamplingRate = audioSamplingRate; this.audioSamplingRate = audioSamplingRate;
this.bitrate = bitrate; this.bitrate = bitrate;
this.language = language; this.language = language;

View File

@ -184,14 +184,17 @@ public class MediaPresentationDescriptionParser extends DefaultHandler
protected AdaptationSet parseAdaptationSet(XmlPullParser xpp, String baseUrl, long periodStartMs, protected AdaptationSet parseAdaptationSet(XmlPullParser xpp, String baseUrl, long periodStartMs,
long periodDurationMs, SegmentBase segmentBase) throws XmlPullParserException, IOException { long periodDurationMs, SegmentBase segmentBase) throws XmlPullParserException, IOException {
int id = parseInt(xpp, "id", -1); int id = parseInt(xpp, "id", -1);
int contentType = parseContentType(xpp);
String mimeType = xpp.getAttributeValue(null, "mimeType"); String mimeType = xpp.getAttributeValue(null, "mimeType");
String codecs = xpp.getAttributeValue(null, "codecs");
int width = parseInt(xpp, "width", -1);
int height = parseInt(xpp, "height", -1);
float frameRate = parseFrameRate(xpp, -1);
int audioChannels = -1;
int audioSamplingRate = parseInt(xpp, "audioSamplingRate", -1);
String language = xpp.getAttributeValue(null, "lang"); String language = xpp.getAttributeValue(null, "lang");
int contentType = parseAdaptationSetType(xpp.getAttributeValue(null, "contentType"));
if (contentType == AdaptationSet.TYPE_UNKNOWN) {
contentType = parseAdaptationSetTypeFromMimeType(xpp.getAttributeValue(null, "mimeType"));
}
ContentProtectionsBuilder contentProtectionsBuilder = new ContentProtectionsBuilder(); ContentProtectionsBuilder contentProtectionsBuilder = new ContentProtectionsBuilder();
List<Representation> representations = new ArrayList<>(); List<Representation> representations = new ArrayList<>();
@ -203,15 +206,16 @@ public class MediaPresentationDescriptionParser extends DefaultHandler
contentProtectionsBuilder.addAdaptationSetProtection(parseContentProtection(xpp)); contentProtectionsBuilder.addAdaptationSetProtection(parseContentProtection(xpp));
} else if (isStartTag(xpp, "ContentComponent")) { } else if (isStartTag(xpp, "ContentComponent")) {
language = checkLanguageConsistency(language, xpp.getAttributeValue(null, "lang")); language = checkLanguageConsistency(language, xpp.getAttributeValue(null, "lang"));
contentType = checkAdaptationSetTypeConsistency(contentType, contentType = checkContentTypeConsistency(contentType, parseContentType(xpp));
parseAdaptationSetType(xpp.getAttributeValue(null, "contentType")));
} else if (isStartTag(xpp, "Representation")) { } else if (isStartTag(xpp, "Representation")) {
Representation representation = parseRepresentation(xpp, baseUrl, periodStartMs, Representation representation = parseRepresentation(xpp, baseUrl, periodStartMs,
periodDurationMs, mimeType, language, segmentBase, contentProtectionsBuilder); periodDurationMs, mimeType, codecs, width, height, frameRate, audioChannels,
audioSamplingRate, language, segmentBase, contentProtectionsBuilder);
contentProtectionsBuilder.endRepresentation(); contentProtectionsBuilder.endRepresentation();
contentType = checkAdaptationSetTypeConsistency(contentType, contentType = checkContentTypeConsistency(contentType, getContentType(representation));
parseAdaptationSetTypeFromMimeType(representation.format.mimeType));
representations.add(representation); representations.add(representation);
} else if (isStartTag(xpp, "AudioChannelConfiguration")) {
audioChannels = parseAudioChannelConfiguration(xpp);
} else if (isStartTag(xpp, "SegmentBase")) { } else if (isStartTag(xpp, "SegmentBase")) {
segmentBase = parseSegmentBase(xpp, baseUrl, (SingleSegmentBase) segmentBase); segmentBase = parseSegmentBase(xpp, baseUrl, (SingleSegmentBase) segmentBase);
} else if (isStartTag(xpp, "SegmentList")) { } else if (isStartTag(xpp, "SegmentList")) {
@ -232,7 +236,8 @@ public class MediaPresentationDescriptionParser extends DefaultHandler
return new AdaptationSet(id, contentType, representations, contentProtections); return new AdaptationSet(id, contentType, representations, contentProtections);
} }
protected int parseAdaptationSetType(String contentType) { protected int parseContentType(XmlPullParser xpp) {
String contentType = xpp.getAttributeValue(null, "contentType");
return TextUtils.isEmpty(contentType) ? AdaptationSet.TYPE_UNKNOWN return TextUtils.isEmpty(contentType) ? AdaptationSet.TYPE_UNKNOWN
: MimeTypes.BASE_TYPE_AUDIO.equals(contentType) ? AdaptationSet.TYPE_AUDIO : MimeTypes.BASE_TYPE_AUDIO.equals(contentType) ? AdaptationSet.TYPE_AUDIO
: MimeTypes.BASE_TYPE_VIDEO.equals(contentType) ? AdaptationSet.TYPE_VIDEO : MimeTypes.BASE_TYPE_VIDEO.equals(contentType) ? AdaptationSet.TYPE_VIDEO
@ -240,12 +245,25 @@ public class MediaPresentationDescriptionParser extends DefaultHandler
: AdaptationSet.TYPE_UNKNOWN; : AdaptationSet.TYPE_UNKNOWN;
} }
protected int parseAdaptationSetTypeFromMimeType(String mimeType) { protected int getContentType(Representation representation) {
return TextUtils.isEmpty(mimeType) ? AdaptationSet.TYPE_UNKNOWN String mimeType = representation.format.mimeType;
: MimeTypes.isAudio(mimeType) ? AdaptationSet.TYPE_AUDIO if (TextUtils.isEmpty(mimeType)) {
: MimeTypes.isVideo(mimeType) ? AdaptationSet.TYPE_VIDEO return AdaptationSet.TYPE_UNKNOWN;
: MimeTypes.isText(mimeType) || MimeTypes.isTtml(mimeType) ? AdaptationSet.TYPE_TEXT } else if (MimeTypes.isVideo(mimeType)) {
: AdaptationSet.TYPE_UNKNOWN; return AdaptationSet.TYPE_VIDEO;
} else if (MimeTypes.isAudio(mimeType)) {
return AdaptationSet.TYPE_AUDIO;
} else if (MimeTypes.isText(mimeType) || MimeTypes.APPLICATION_TTML.equals(mimeType)) {
return AdaptationSet.TYPE_TEXT;
} else if (MimeTypes.APPLICATION_MP4.equals(mimeType)) {
// The representation uses mp4 but does not contain video or audio. Use codecs to determine
// whether the container holds text.
String codecs = representation.format.codecs;
if ("stpp".equals(codecs) || "wvtt".equals(codecs)) {
return AdaptationSet.TYPE_TEXT;
}
}
return AdaptationSet.TYPE_UNKNOWN;
} }
/** /**
@ -293,40 +311,30 @@ public class MediaPresentationDescriptionParser extends DefaultHandler
// Representation parsing. // Representation parsing.
protected Representation parseRepresentation(XmlPullParser xpp, String baseUrl, protected Representation parseRepresentation(XmlPullParser xpp, String baseUrl,
long periodStartMs, long periodDurationMs, String mimeType, String language, long periodStartMs, long periodDurationMs, String adaptationSetMimeType,
SegmentBase segmentBase, ContentProtectionsBuilder contentProtectionsBuilder) String adaptationSetCodecs, int adaptationSetWidth, int adaptationSetHeight,
float adaptationSetFrameRate, int adaptationSetAudioChannels,
int adaptationSetAudioSamplingRate, String adaptationSetLanguage, SegmentBase segmentBase,
ContentProtectionsBuilder contentProtectionsBuilder)
throws XmlPullParserException, IOException { throws XmlPullParserException, IOException {
String id = xpp.getAttributeValue(null, "id"); String id = xpp.getAttributeValue(null, "id");
int bandwidth = parseInt(xpp, "bandwidth"); int bandwidth = parseInt(xpp, "bandwidth");
int audioSamplingRate = parseInt(xpp, "audioSamplingRate");
int width = parseInt(xpp, "width");
int height = parseInt(xpp, "height");
float frameRate = -1; String mimeType = parseString(xpp, "mimeType", adaptationSetMimeType);
String frameRateAttribute = xpp.getAttributeValue(null, "frameRate"); String codecs = parseString(xpp, "codecs", adaptationSetCodecs);
if (frameRateAttribute != null) { int width = parseInt(xpp, "width", adaptationSetWidth);
Matcher frameRateMatcher = FRAME_RATE_PATTERN.matcher(frameRateAttribute); int height = parseInt(xpp, "height", adaptationSetHeight);
if (frameRateMatcher.matches()) { float frameRate = parseFrameRate(xpp, adaptationSetFrameRate);
int numerator = Integer.parseInt(frameRateMatcher.group(1)); int audioChannels = adaptationSetAudioChannels;
String denominatorString = frameRateMatcher.group(2); int audioSamplingRate = parseInt(xpp, "audioSamplingRate", adaptationSetAudioSamplingRate);
if (!TextUtils.isEmpty(denominatorString)) { String language = adaptationSetLanguage;
frameRate = (float) numerator / Integer.parseInt(denominatorString);
} else {
frameRate = numerator;
}
}
}
mimeType = parseString(xpp, "mimeType", mimeType);
String codecs = parseString(xpp, "codecs", null);
int numChannels = -1;
do { do {
xpp.next(); xpp.next();
if (isStartTag(xpp, "BaseURL")) { if (isStartTag(xpp, "BaseURL")) {
baseUrl = parseBaseUrl(xpp, baseUrl); baseUrl = parseBaseUrl(xpp, baseUrl);
} else if (isStartTag(xpp, "AudioChannelConfiguration")) { } else if (isStartTag(xpp, "AudioChannelConfiguration")) {
numChannels = Integer.parseInt(xpp.getAttributeValue(null, "value")); audioChannels = parseAudioChannelConfiguration(xpp);
} else if (isStartTag(xpp, "SegmentBase")) { } else if (isStartTag(xpp, "SegmentBase")) {
segmentBase = parseSegmentBase(xpp, baseUrl, (SingleSegmentBase) segmentBase); segmentBase = parseSegmentBase(xpp, baseUrl, (SingleSegmentBase) segmentBase);
} else if (isStartTag(xpp, "SegmentList")) { } else if (isStartTag(xpp, "SegmentList")) {
@ -339,15 +347,15 @@ public class MediaPresentationDescriptionParser extends DefaultHandler
} }
} while (!isEndTag(xpp, "Representation")); } while (!isEndTag(xpp, "Representation"));
Format format = buildFormat(id, mimeType, width, height, frameRate, numChannels, Format format = buildFormat(id, mimeType, width, height, frameRate, audioChannels,
audioSamplingRate, bandwidth, language, codecs); audioSamplingRate, bandwidth, language, codecs);
return buildRepresentation(periodStartMs, periodDurationMs, contentId, -1, format, return buildRepresentation(periodStartMs, periodDurationMs, contentId, -1, format,
segmentBase != null ? segmentBase : new SingleSegmentBase(baseUrl)); segmentBase != null ? segmentBase : new SingleSegmentBase(baseUrl));
} }
protected Format buildFormat(String id, String mimeType, int width, int height, float frameRate, protected Format buildFormat(String id, String mimeType, int width, int height, float frameRate,
int numChannels, int audioSamplingRate, int bandwidth, String language, String codecs) { int audioChannels, int audioSamplingRate, int bandwidth, String language, String codecs) {
return new Format(id, mimeType, width, height, frameRate, numChannels, audioSamplingRate, return new Format(id, mimeType, width, height, frameRate, audioChannels, audioSamplingRate,
bandwidth, language, codecs); bandwidth, language, codecs);
} }
@ -540,6 +548,17 @@ public class MediaPresentationDescriptionParser extends DefaultHandler
return new RangedUri(baseUrl, urlText, rangeStart, rangeLength); return new RangedUri(baseUrl, urlText, rangeStart, rangeLength);
} }
// AudioChannelConfiguration parsing.
protected int parseAudioChannelConfiguration(XmlPullParser xpp)
throws XmlPullParserException, IOException {
int audioChannels = parseInt(xpp, "value");
do {
xpp.next();
} while (!isEndTag(xpp, "AudioChannelConfiguration"));
return audioChannels;
}
// Utility methods. // Utility methods.
/** /**
@ -564,8 +583,8 @@ public class MediaPresentationDescriptionParser extends DefaultHandler
} }
/** /**
* Checks two adaptation set types for consistency, returning the consistent type, or throwing an * Checks two adaptation set content types for consistency, returning the consistent type, or
* {@link IllegalStateException} if the types are inconsistent. * throwing an {@link IllegalStateException} if the types are inconsistent.
* <p> * <p>
* Two types are consistent if they are equal, or if one is {@link AdaptationSet#TYPE_UNKNOWN}. * Two types are consistent if they are equal, or if one is {@link AdaptationSet#TYPE_UNKNOWN}.
* Where one of the types is {@link AdaptationSet#TYPE_UNKNOWN}, the other is returned. * Where one of the types is {@link AdaptationSet#TYPE_UNKNOWN}, the other is returned.
@ -574,7 +593,7 @@ public class MediaPresentationDescriptionParser extends DefaultHandler
* @param secondType The second type. * @param secondType The second type.
* @return The consistent type. * @return The consistent type.
*/ */
private static int checkAdaptationSetTypeConsistency(int firstType, int secondType) { private static int checkContentTypeConsistency(int firstType, int secondType) {
if (firstType == AdaptationSet.TYPE_UNKNOWN) { if (firstType == AdaptationSet.TYPE_UNKNOWN) {
return secondType; return secondType;
} else if (secondType == AdaptationSet.TYPE_UNKNOWN) { } else if (secondType == AdaptationSet.TYPE_UNKNOWN) {
@ -598,6 +617,24 @@ public class MediaPresentationDescriptionParser extends DefaultHandler
return xpp.getEventType() == XmlPullParser.START_TAG; return xpp.getEventType() == XmlPullParser.START_TAG;
} }
protected static float parseFrameRate(XmlPullParser xpp, float defaultValue) {
float frameRate = defaultValue;
String frameRateAttribute = xpp.getAttributeValue(null, "frameRate");
if (frameRateAttribute != null) {
Matcher frameRateMatcher = FRAME_RATE_PATTERN.matcher(frameRateAttribute);
if (frameRateMatcher.matches()) {
int numerator = Integer.parseInt(frameRateMatcher.group(1));
String denominatorString = frameRateMatcher.group(2);
if (!TextUtils.isEmpty(denominatorString)) {
frameRate = (float) numerator / Integer.parseInt(denominatorString);
} else {
frameRate = numerator;
}
}
}
return frameRate;
}
protected static long parseDuration(XmlPullParser xpp, String name, long defaultValue) { protected static long parseDuration(XmlPullParser xpp, String name, long defaultValue) {
String value = xpp.getAttributeValue(null, name); String value = xpp.getAttributeValue(null, name);
if (value == null) { if (value == null) {

View File

@ -392,10 +392,10 @@ public class SmoothStreamingChunkSource implements ChunkSource {
csd = Arrays.asList(trackElement.csd); csd = Arrays.asList(trackElement.csd);
} else { } else {
csd = Collections.singletonList(CodecSpecificDataUtil.buildAacAudioSpecificConfig( csd = Collections.singletonList(CodecSpecificDataUtil.buildAacAudioSpecificConfig(
trackFormat.audioSamplingRate, trackFormat.numChannels)); trackFormat.audioSamplingRate, trackFormat.audioChannels));
} }
MediaFormat format = MediaFormat.createAudioFormat(mimeType, MediaFormat.NO_VALUE, MediaFormat format = MediaFormat.createAudioFormat(mimeType, MediaFormat.NO_VALUE,
trackFormat.numChannels, trackFormat.audioSamplingRate, csd); trackFormat.audioChannels, trackFormat.audioSamplingRate, csd);
return format; return format;
} else if (streamElement.type == StreamElement.TYPE_TEXT) { } else if (streamElement.type == StreamElement.TYPE_TEXT) {
return MediaFormat.createTextFormat(trackFormat.mimeType, trackFormat.language); return MediaFormat.createTextFormat(trackFormat.mimeType, trackFormat.language);

View File

@ -50,6 +50,7 @@ public final class MimeTypes {
public static final String TEXT_VTT = BASE_TYPE_TEXT + "/vtt"; public static final String TEXT_VTT = BASE_TYPE_TEXT + "/vtt";
public static final String APPLICATION_MP4 = BASE_TYPE_APPLICATION + "/mp4";
public static final String APPLICATION_ID3 = BASE_TYPE_APPLICATION + "/id3"; public static final String APPLICATION_ID3 = BASE_TYPE_APPLICATION + "/id3";
public static final String APPLICATION_EIA608 = BASE_TYPE_APPLICATION + "/eia-608"; public static final String APPLICATION_EIA608 = BASE_TYPE_APPLICATION + "/eia-608";
public static final String APPLICATION_SUBRIP = BASE_TYPE_APPLICATION + "/x-subrip"; public static final String APPLICATION_SUBRIP = BASE_TYPE_APPLICATION + "/x-subrip";
@ -113,14 +114,4 @@ public final class MimeTypes {
return getTopLevelType(mimeType).equals(BASE_TYPE_APPLICATION); return getTopLevelType(mimeType).equals(BASE_TYPE_APPLICATION);
} }
/**
* Whether the mimeType is {@link #APPLICATION_TTML}.
*
* @param mimeType The mimeType to test.
* @return Whether the mimeType is {@link #APPLICATION_TTML}.
*/
public static boolean isTtml(String mimeType) {
return mimeType.equals(APPLICATION_TTML);
}
} }