Improve DASH manifest parsing.

- Parse all attributes that may exist in either the AdaptationSet or
  in the child Representation elements at both levels.
- Correctly infer TYPE_TEXT for Representation elements whose mimeType
  is application/mp4 and whose codecs attribute indicates a known text
  codec type.

Issue: #689
This commit is contained in:
Oliver Woodman 2015-08-11 18:12:43 +01:00
parent d6152dc36a
commit dbaeecc4da
5 changed files with 93 additions and 65 deletions

View File

@ -272,7 +272,7 @@ public class DashRendererBuilder implements RendererBuilder {
List<String> codecs = new ArrayList<>();
for (int i = 0; i < audioRepresentations.size(); i++) {
Format format = audioRepresentations.get(i).format;
audioTrackNameList.add(format.id + " (" + format.numChannels + "ch, " +
audioTrackNameList.add(format.id + " (" + format.audioChannels + "ch, " +
format.audioSamplingRate + "Hz)");
audioChunkSourceList.add(new DashChunkSource(manifestFetcher, audioAdaptationSetIndex,
new int[] {i}, audioDataSource, audioEvaluator, LIVE_EDGE_LATENCY_MS,

View File

@ -69,7 +69,7 @@ public class Format {
/**
* The number of audio channels, or -1 if unknown or not applicable.
*/
public final int numChannels;
public final int audioChannels;
/**
* The audio sampling rate in Hz, or -1 if unknown or not applicable.
@ -131,20 +131,20 @@ public class Format {
* @param height The height of the video in pixels, or -1 if unknown or not applicable.
* @param frameRate The frame rate of the video in frames per second, or -1 if unknown or not
* applicable.
* @param numChannels The number of audio channels, or -1 if unknown or not applicable.
* @param audioChannels The number of audio channels, or -1 if unknown or not applicable.
* @param audioSamplingRate The audio sampling rate in Hz, or -1 if unknown or not applicable.
* @param bitrate The average bandwidth of the format in bits per second.
* @param language The language of the format.
* @param codecs The codecs used to decode the format.
*/
public Format(String id, String mimeType, int width, int height, float frameRate, int numChannels,
int audioSamplingRate, int bitrate, String language, String codecs) {
public Format(String id, String mimeType, int width, int height, float frameRate,
int audioChannels, int audioSamplingRate, int bitrate, String language, String codecs) {
this.id = Assertions.checkNotNull(id);
this.mimeType = mimeType;
this.width = width;
this.height = height;
this.frameRate = frameRate;
this.numChannels = numChannels;
this.audioChannels = audioChannels;
this.audioSamplingRate = audioSamplingRate;
this.bitrate = bitrate;
this.language = language;

View File

@ -184,14 +184,17 @@ public class MediaPresentationDescriptionParser extends DefaultHandler
protected AdaptationSet parseAdaptationSet(XmlPullParser xpp, String baseUrl, long periodStartMs,
long periodDurationMs, SegmentBase segmentBase) throws XmlPullParserException, IOException {
int id = parseInt(xpp, "id", -1);
int contentType = parseContentType(xpp);
String mimeType = xpp.getAttributeValue(null, "mimeType");
String codecs = xpp.getAttributeValue(null, "codecs");
int width = parseInt(xpp, "width", -1);
int height = parseInt(xpp, "height", -1);
float frameRate = parseFrameRate(xpp, -1);
int audioChannels = -1;
int audioSamplingRate = parseInt(xpp, "audioSamplingRate", -1);
String language = xpp.getAttributeValue(null, "lang");
int contentType = parseAdaptationSetType(xpp.getAttributeValue(null, "contentType"));
if (contentType == AdaptationSet.TYPE_UNKNOWN) {
contentType = parseAdaptationSetTypeFromMimeType(xpp.getAttributeValue(null, "mimeType"));
}
ContentProtectionsBuilder contentProtectionsBuilder = new ContentProtectionsBuilder();
List<Representation> representations = new ArrayList<>();
@ -203,15 +206,16 @@ public class MediaPresentationDescriptionParser extends DefaultHandler
contentProtectionsBuilder.addAdaptationSetProtection(parseContentProtection(xpp));
} else if (isStartTag(xpp, "ContentComponent")) {
language = checkLanguageConsistency(language, xpp.getAttributeValue(null, "lang"));
contentType = checkAdaptationSetTypeConsistency(contentType,
parseAdaptationSetType(xpp.getAttributeValue(null, "contentType")));
contentType = checkContentTypeConsistency(contentType, parseContentType(xpp));
} else if (isStartTag(xpp, "Representation")) {
Representation representation = parseRepresentation(xpp, baseUrl, periodStartMs,
periodDurationMs, mimeType, language, segmentBase, contentProtectionsBuilder);
periodDurationMs, mimeType, codecs, width, height, frameRate, audioChannels,
audioSamplingRate, language, segmentBase, contentProtectionsBuilder);
contentProtectionsBuilder.endRepresentation();
contentType = checkAdaptationSetTypeConsistency(contentType,
parseAdaptationSetTypeFromMimeType(representation.format.mimeType));
contentType = checkContentTypeConsistency(contentType, getContentType(representation));
representations.add(representation);
} else if (isStartTag(xpp, "AudioChannelConfiguration")) {
audioChannels = parseAudioChannelConfiguration(xpp);
} else if (isStartTag(xpp, "SegmentBase")) {
segmentBase = parseSegmentBase(xpp, baseUrl, (SingleSegmentBase) segmentBase);
} else if (isStartTag(xpp, "SegmentList")) {
@ -232,7 +236,8 @@ public class MediaPresentationDescriptionParser extends DefaultHandler
return new AdaptationSet(id, contentType, representations, contentProtections);
}
protected int parseAdaptationSetType(String contentType) {
protected int parseContentType(XmlPullParser xpp) {
String contentType = xpp.getAttributeValue(null, "contentType");
return TextUtils.isEmpty(contentType) ? AdaptationSet.TYPE_UNKNOWN
: MimeTypes.BASE_TYPE_AUDIO.equals(contentType) ? AdaptationSet.TYPE_AUDIO
: MimeTypes.BASE_TYPE_VIDEO.equals(contentType) ? AdaptationSet.TYPE_VIDEO
@ -240,12 +245,25 @@ public class MediaPresentationDescriptionParser extends DefaultHandler
: AdaptationSet.TYPE_UNKNOWN;
}
protected int parseAdaptationSetTypeFromMimeType(String mimeType) {
return TextUtils.isEmpty(mimeType) ? AdaptationSet.TYPE_UNKNOWN
: MimeTypes.isAudio(mimeType) ? AdaptationSet.TYPE_AUDIO
: MimeTypes.isVideo(mimeType) ? AdaptationSet.TYPE_VIDEO
: MimeTypes.isText(mimeType) || MimeTypes.isTtml(mimeType) ? AdaptationSet.TYPE_TEXT
: AdaptationSet.TYPE_UNKNOWN;
protected int getContentType(Representation representation) {
String mimeType = representation.format.mimeType;
if (TextUtils.isEmpty(mimeType)) {
return AdaptationSet.TYPE_UNKNOWN;
} else if (MimeTypes.isVideo(mimeType)) {
return AdaptationSet.TYPE_VIDEO;
} else if (MimeTypes.isAudio(mimeType)) {
return AdaptationSet.TYPE_AUDIO;
} else if (MimeTypes.isText(mimeType) || MimeTypes.APPLICATION_TTML.equals(mimeType)) {
return AdaptationSet.TYPE_TEXT;
} else if (MimeTypes.APPLICATION_MP4.equals(mimeType)) {
// The representation uses mp4 but does not contain video or audio. Use codecs to determine
// whether the container holds text.
String codecs = representation.format.codecs;
if ("stpp".equals(codecs) || "wvtt".equals(codecs)) {
return AdaptationSet.TYPE_TEXT;
}
}
return AdaptationSet.TYPE_UNKNOWN;
}
/**
@ -293,40 +311,30 @@ public class MediaPresentationDescriptionParser extends DefaultHandler
// Representation parsing.
protected Representation parseRepresentation(XmlPullParser xpp, String baseUrl,
long periodStartMs, long periodDurationMs, String mimeType, String language,
SegmentBase segmentBase, ContentProtectionsBuilder contentProtectionsBuilder)
long periodStartMs, long periodDurationMs, String adaptationSetMimeType,
String adaptationSetCodecs, int adaptationSetWidth, int adaptationSetHeight,
float adaptationSetFrameRate, int adaptationSetAudioChannels,
int adaptationSetAudioSamplingRate, String adaptationSetLanguage, SegmentBase segmentBase,
ContentProtectionsBuilder contentProtectionsBuilder)
throws XmlPullParserException, IOException {
String id = xpp.getAttributeValue(null, "id");
int bandwidth = parseInt(xpp, "bandwidth");
int audioSamplingRate = parseInt(xpp, "audioSamplingRate");
int width = parseInt(xpp, "width");
int height = parseInt(xpp, "height");
float frameRate = -1;
String frameRateAttribute = xpp.getAttributeValue(null, "frameRate");
if (frameRateAttribute != null) {
Matcher frameRateMatcher = FRAME_RATE_PATTERN.matcher(frameRateAttribute);
if (frameRateMatcher.matches()) {
int numerator = Integer.parseInt(frameRateMatcher.group(1));
String denominatorString = frameRateMatcher.group(2);
if (!TextUtils.isEmpty(denominatorString)) {
frameRate = (float) numerator / Integer.parseInt(denominatorString);
} else {
frameRate = numerator;
}
}
}
String mimeType = parseString(xpp, "mimeType", adaptationSetMimeType);
String codecs = parseString(xpp, "codecs", adaptationSetCodecs);
int width = parseInt(xpp, "width", adaptationSetWidth);
int height = parseInt(xpp, "height", adaptationSetHeight);
float frameRate = parseFrameRate(xpp, adaptationSetFrameRate);
int audioChannels = adaptationSetAudioChannels;
int audioSamplingRate = parseInt(xpp, "audioSamplingRate", adaptationSetAudioSamplingRate);
String language = adaptationSetLanguage;
mimeType = parseString(xpp, "mimeType", mimeType);
String codecs = parseString(xpp, "codecs", null);
int numChannels = -1;
do {
xpp.next();
if (isStartTag(xpp, "BaseURL")) {
baseUrl = parseBaseUrl(xpp, baseUrl);
} else if (isStartTag(xpp, "AudioChannelConfiguration")) {
numChannels = Integer.parseInt(xpp.getAttributeValue(null, "value"));
audioChannels = parseAudioChannelConfiguration(xpp);
} else if (isStartTag(xpp, "SegmentBase")) {
segmentBase = parseSegmentBase(xpp, baseUrl, (SingleSegmentBase) segmentBase);
} else if (isStartTag(xpp, "SegmentList")) {
@ -339,15 +347,15 @@ public class MediaPresentationDescriptionParser extends DefaultHandler
}
} while (!isEndTag(xpp, "Representation"));
Format format = buildFormat(id, mimeType, width, height, frameRate, numChannels,
Format format = buildFormat(id, mimeType, width, height, frameRate, audioChannels,
audioSamplingRate, bandwidth, language, codecs);
return buildRepresentation(periodStartMs, periodDurationMs, contentId, -1, format,
segmentBase != null ? segmentBase : new SingleSegmentBase(baseUrl));
}
protected Format buildFormat(String id, String mimeType, int width, int height, float frameRate,
int numChannels, int audioSamplingRate, int bandwidth, String language, String codecs) {
return new Format(id, mimeType, width, height, frameRate, numChannels, audioSamplingRate,
int audioChannels, int audioSamplingRate, int bandwidth, String language, String codecs) {
return new Format(id, mimeType, width, height, frameRate, audioChannels, audioSamplingRate,
bandwidth, language, codecs);
}
@ -540,6 +548,17 @@ public class MediaPresentationDescriptionParser extends DefaultHandler
return new RangedUri(baseUrl, urlText, rangeStart, rangeLength);
}
// AudioChannelConfiguration parsing.
protected int parseAudioChannelConfiguration(XmlPullParser xpp)
throws XmlPullParserException, IOException {
int audioChannels = parseInt(xpp, "value");
do {
xpp.next();
} while (!isEndTag(xpp, "AudioChannelConfiguration"));
return audioChannels;
}
// Utility methods.
/**
@ -564,8 +583,8 @@ public class MediaPresentationDescriptionParser extends DefaultHandler
}
/**
* Checks two adaptation set types for consistency, returning the consistent type, or throwing an
* {@link IllegalStateException} if the types are inconsistent.
* Checks two adaptation set content types for consistency, returning the consistent type, or
* throwing an {@link IllegalStateException} if the types are inconsistent.
* <p>
* Two types are consistent if they are equal, or if one is {@link AdaptationSet#TYPE_UNKNOWN}.
* Where one of the types is {@link AdaptationSet#TYPE_UNKNOWN}, the other is returned.
@ -574,7 +593,7 @@ public class MediaPresentationDescriptionParser extends DefaultHandler
* @param secondType The second type.
* @return The consistent type.
*/
private static int checkAdaptationSetTypeConsistency(int firstType, int secondType) {
private static int checkContentTypeConsistency(int firstType, int secondType) {
if (firstType == AdaptationSet.TYPE_UNKNOWN) {
return secondType;
} else if (secondType == AdaptationSet.TYPE_UNKNOWN) {
@ -598,6 +617,24 @@ public class MediaPresentationDescriptionParser extends DefaultHandler
return xpp.getEventType() == XmlPullParser.START_TAG;
}
protected static float parseFrameRate(XmlPullParser xpp, float defaultValue) {
float frameRate = defaultValue;
String frameRateAttribute = xpp.getAttributeValue(null, "frameRate");
if (frameRateAttribute != null) {
Matcher frameRateMatcher = FRAME_RATE_PATTERN.matcher(frameRateAttribute);
if (frameRateMatcher.matches()) {
int numerator = Integer.parseInt(frameRateMatcher.group(1));
String denominatorString = frameRateMatcher.group(2);
if (!TextUtils.isEmpty(denominatorString)) {
frameRate = (float) numerator / Integer.parseInt(denominatorString);
} else {
frameRate = numerator;
}
}
}
return frameRate;
}
protected static long parseDuration(XmlPullParser xpp, String name, long defaultValue) {
String value = xpp.getAttributeValue(null, name);
if (value == null) {

View File

@ -392,10 +392,10 @@ public class SmoothStreamingChunkSource implements ChunkSource {
csd = Arrays.asList(trackElement.csd);
} else {
csd = Collections.singletonList(CodecSpecificDataUtil.buildAacAudioSpecificConfig(
trackFormat.audioSamplingRate, trackFormat.numChannels));
trackFormat.audioSamplingRate, trackFormat.audioChannels));
}
MediaFormat format = MediaFormat.createAudioFormat(mimeType, MediaFormat.NO_VALUE,
trackFormat.numChannels, trackFormat.audioSamplingRate, csd);
trackFormat.audioChannels, trackFormat.audioSamplingRate, csd);
return format;
} else if (streamElement.type == StreamElement.TYPE_TEXT) {
return MediaFormat.createTextFormat(trackFormat.mimeType, trackFormat.language);

View File

@ -50,6 +50,7 @@ public final class MimeTypes {
public static final String TEXT_VTT = BASE_TYPE_TEXT + "/vtt";
public static final String APPLICATION_MP4 = BASE_TYPE_APPLICATION + "/mp4";
public static final String APPLICATION_ID3 = BASE_TYPE_APPLICATION + "/id3";
public static final String APPLICATION_EIA608 = BASE_TYPE_APPLICATION + "/eia-608";
public static final String APPLICATION_SUBRIP = BASE_TYPE_APPLICATION + "/x-subrip";
@ -113,14 +114,4 @@ public final class MimeTypes {
return getTopLevelType(mimeType).equals(BASE_TYPE_APPLICATION);
}
/**
* Whether the mimeType is {@link #APPLICATION_TTML}.
*
* @param mimeType The mimeType to test.
* @return Whether the mimeType is {@link #APPLICATION_TTML}.
*/
public static boolean isTtml(String mimeType) {
return mimeType.equals(APPLICATION_TTML);
}
}