From f7ffeb75cf3681ba4557a5f02f4c60f28e40bc5f Mon Sep 17 00:00:00 2001 From: Oliver Woodman Date: Tue, 11 Aug 2015 18:18:19 +0100 Subject: [PATCH] Parse pixel aspect ratio from SPS for use when there's no pasp atom. Issue: #697 --- .../exoplayer/extractor/mp4/AtomParsers.java | 45 +++- .../exoplayer/extractor/ts/H264Reader.java | 118 +--------- .../exoplayer/util/CodecSpecificDataUtil.java | 207 +++++++++++------- 3 files changed, 170 insertions(+), 200 deletions(-) diff --git a/library/src/main/java/com/google/android/exoplayer/extractor/mp4/AtomParsers.java b/library/src/main/java/com/google/android/exoplayer/extractor/mp4/AtomParsers.java index 1eabba2566..d978b41c45 100644 --- a/library/src/main/java/com/google/android/exoplayer/extractor/mp4/AtomParsers.java +++ b/library/src/main/java/com/google/android/exoplayer/extractor/mp4/AtomParsers.java @@ -22,6 +22,7 @@ import com.google.android.exoplayer.util.Assertions; import com.google.android.exoplayer.util.CodecSpecificDataUtil; import com.google.android.exoplayer.util.MimeTypes; import com.google.android.exoplayer.util.NalUnitUtil; +import com.google.android.exoplayer.util.ParsableBitArray; import com.google.android.exoplayer.util.ParsableByteArray; import com.google.android.exoplayer.util.Util; @@ -370,6 +371,7 @@ import java.util.List; parent.skipBytes(24); int width = parent.readUnsignedShort(); int height = parent.readUnsignedShort(); + boolean pixelWidthHeightRatioFromPasp = false; float pixelWidthHeightRatio = 1; parent.skipBytes(50); @@ -389,9 +391,12 @@ import java.util.List; if (childAtomType == Atom.TYPE_avcC) { Assertions.checkState(mimeType == null); mimeType = MimeTypes.VIDEO_H264; - Pair, Integer> avcCData = parseAvcCFromParent(parent, childStartPosition); - initializationData = avcCData.first; - out.nalUnitLengthFieldLength = avcCData.second; + AvcCData avcCData = parseAvcCFromParent(parent, childStartPosition); + initializationData = avcCData.initializationData; + out.nalUnitLengthFieldLength = avcCData.nalUnitLengthFieldLength; + if (!pixelWidthHeightRatioFromPasp) { + pixelWidthHeightRatio = avcCData.pixelWidthAspectRatio; + } } else if (childAtomType == Atom.TYPE_hvcC) { Assertions.checkState(mimeType == null); mimeType = MimeTypes.VIDEO_H265; @@ -412,6 +417,7 @@ import java.util.List; parseSinfFromParent(parent, childStartPosition, childAtomSize); } else if (childAtomType == Atom.TYPE_pasp) { pixelWidthHeightRatio = parsePaspFromParent(parent, childStartPosition); + pixelWidthHeightRatioFromPasp = true; } childPosition += childAtomSize; } @@ -425,8 +431,7 @@ import java.util.List; width, height, pixelWidthHeightRatio, initializationData); } - private static Pair, Integer> parseAvcCFromParent(ParsableByteArray parent, - int position) { + private static AvcCData parseAvcCFromParent(ParsableByteArray parent, int position) { parent.setPosition(position + Atom.HEADER_SIZE + 4); // Start of the AVCDecoderConfigurationRecord (defined in 14496-15) int nalUnitLengthFieldLength = (parent.readUnsignedByte() & 0x3) + 1; @@ -434,8 +439,7 @@ import java.util.List; throw new IllegalStateException(); } List initializationData = new ArrayList<>(); - // TODO: We should try and parse these using CodecSpecificDataUtil.parseSpsNalUnit, and - // expose the AVC profile and level somewhere useful; Most likely in MediaFormat. + float pixelWidthAspectRatio = 1; int numSequenceParameterSets = parent.readUnsignedByte() & 0x1F; for (int j = 0; j < numSequenceParameterSets; j++) { initializationData.add(NalUnitUtil.parseChildNalUnit(parent)); @@ -444,7 +448,17 @@ import java.util.List; for (int j = 0; j < numPictureParameterSets; j++) { initializationData.add(NalUnitUtil.parseChildNalUnit(parent)); } - return Pair.create(initializationData, nalUnitLengthFieldLength); + + if (numSequenceParameterSets > 0) { + // Parse the first sequence parameter set to obtain pixelWidthAspectRatio. + ParsableBitArray spsDataBitArray = new ParsableBitArray(initializationData.get(0)); + // Skip the NAL header consisting of the nalUnitLengthField and the type (1 byte). + spsDataBitArray.setPosition(8 * (nalUnitLengthFieldLength + 1)); + pixelWidthAspectRatio = CodecSpecificDataUtil.parseSpsNalUnit(spsDataBitArray) + .pixelWidthAspectRatio; + } + + return new AvcCData(initializationData, nalUnitLengthFieldLength, pixelWidthAspectRatio); } private static Pair, Integer> parseHvcCFromParent(ParsableByteArray parent, @@ -705,4 +719,19 @@ import java.util.List; } + private static final class AvcCData { + + public final List initializationData; + public final int nalUnitLengthFieldLength; + public final float pixelWidthAspectRatio; + + public AvcCData(List initializationData, int nalUnitLengthFieldLength, + float pixelWidthAspectRatio) { + this.initializationData = initializationData; + this.nalUnitLengthFieldLength = nalUnitLengthFieldLength; + this.pixelWidthAspectRatio = pixelWidthAspectRatio; + } + + } + } diff --git a/library/src/main/java/com/google/android/exoplayer/extractor/ts/H264Reader.java b/library/src/main/java/com/google/android/exoplayer/extractor/ts/H264Reader.java index 731254dbde..8be76ec411 100644 --- a/library/src/main/java/com/google/android/exoplayer/extractor/ts/H264Reader.java +++ b/library/src/main/java/com/google/android/exoplayer/extractor/ts/H264Reader.java @@ -18,13 +18,13 @@ package com.google.android.exoplayer.extractor.ts; import com.google.android.exoplayer.C; import com.google.android.exoplayer.MediaFormat; import com.google.android.exoplayer.extractor.TrackOutput; +import com.google.android.exoplayer.util.CodecSpecificDataUtil; +import com.google.android.exoplayer.util.CodecSpecificDataUtil.SpsData; import com.google.android.exoplayer.util.MimeTypes; import com.google.android.exoplayer.util.NalUnitUtil; import com.google.android.exoplayer.util.ParsableBitArray; import com.google.android.exoplayer.util.ParsableByteArray; -import android.util.Log; - import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -34,8 +34,6 @@ import java.util.List; */ /* package */ final class H264Reader extends ElementaryStreamReader { - private static final String TAG = "H264Reader"; - private static final int FRAME_TYPE_I = 2; private static final int FRAME_TYPE_ALL_I = 7; @@ -205,121 +203,19 @@ import java.util.List; initializationData.add(spsData); initializationData.add(ppsData); - // Unescape and then parse the SPS unit. + // Unescape and parse the SPS unit. NalUnitUtil.unescapeStream(sps.nalData, sps.nalLength); ParsableBitArray bitArray = new ParsableBitArray(sps.nalData); bitArray.skipBits(32); // NAL header - int profileIdc = bitArray.readBits(8); - bitArray.skipBits(16); // constraint bits (6), reserved (2) and level_idc (8) - bitArray.readUnsignedExpGolombCodedInt(); // seq_parameter_set_id - - int chromaFormatIdc = 1; // Default is 4:2:0 - if (profileIdc == 100 || profileIdc == 110 || profileIdc == 122 || profileIdc == 244 - || profileIdc == 44 || profileIdc == 83 || profileIdc == 86 || profileIdc == 118 - || profileIdc == 128 || profileIdc == 138) { - chromaFormatIdc = bitArray.readUnsignedExpGolombCodedInt(); - if (chromaFormatIdc == 3) { - bitArray.skipBits(1); // separate_colour_plane_flag - } - bitArray.readUnsignedExpGolombCodedInt(); // bit_depth_luma_minus8 - bitArray.readUnsignedExpGolombCodedInt(); // bit_depth_chroma_minus8 - bitArray.skipBits(1); // qpprime_y_zero_transform_bypass_flag - boolean seqScalingMatrixPresentFlag = bitArray.readBit(); - if (seqScalingMatrixPresentFlag) { - int limit = (chromaFormatIdc != 3) ? 8 : 12; - for (int i = 0; i < limit; i++) { - boolean seqScalingListPresentFlag = bitArray.readBit(); - if (seqScalingListPresentFlag) { - skipScalingList(bitArray, i < 6 ? 16 : 64); - } - } - } - } - - bitArray.readUnsignedExpGolombCodedInt(); // log2_max_frame_num_minus4 - long picOrderCntType = bitArray.readUnsignedExpGolombCodedInt(); - if (picOrderCntType == 0) { - bitArray.readUnsignedExpGolombCodedInt(); // log2_max_pic_order_cnt_lsb_minus4 - } else if (picOrderCntType == 1) { - bitArray.skipBits(1); // delta_pic_order_always_zero_flag - bitArray.readSignedExpGolombCodedInt(); // offset_for_non_ref_pic - bitArray.readSignedExpGolombCodedInt(); // offset_for_top_to_bottom_field - long numRefFramesInPicOrderCntCycle = bitArray.readUnsignedExpGolombCodedInt(); - for (int i = 0; i < numRefFramesInPicOrderCntCycle; i++) { - bitArray.readUnsignedExpGolombCodedInt(); // offset_for_ref_frame[i] - } - } - bitArray.readUnsignedExpGolombCodedInt(); // max_num_ref_frames - bitArray.skipBits(1); // gaps_in_frame_num_value_allowed_flag - - int picWidthInMbs = bitArray.readUnsignedExpGolombCodedInt() + 1; - int picHeightInMapUnits = bitArray.readUnsignedExpGolombCodedInt() + 1; - boolean frameMbsOnlyFlag = bitArray.readBit(); - int frameHeightInMbs = (2 - (frameMbsOnlyFlag ? 1 : 0)) * picHeightInMapUnits; - if (!frameMbsOnlyFlag) { - bitArray.skipBits(1); // mb_adaptive_frame_field_flag - } - - bitArray.skipBits(1); // direct_8x8_inference_flag - int frameWidth = picWidthInMbs * 16; - int frameHeight = frameHeightInMbs * 16; - boolean frameCroppingFlag = bitArray.readBit(); - if (frameCroppingFlag) { - int frameCropLeftOffset = bitArray.readUnsignedExpGolombCodedInt(); - int frameCropRightOffset = bitArray.readUnsignedExpGolombCodedInt(); - int frameCropTopOffset = bitArray.readUnsignedExpGolombCodedInt(); - int frameCropBottomOffset = bitArray.readUnsignedExpGolombCodedInt(); - int cropUnitX, cropUnitY; - if (chromaFormatIdc == 0) { - cropUnitX = 1; - cropUnitY = 2 - (frameMbsOnlyFlag ? 1 : 0); - } else { - int subWidthC = (chromaFormatIdc == 3) ? 1 : 2; - int subHeightC = (chromaFormatIdc == 1) ? 2 : 1; - cropUnitX = subWidthC; - cropUnitY = subHeightC * (2 - (frameMbsOnlyFlag ? 1 : 0)); - } - frameWidth -= (frameCropLeftOffset + frameCropRightOffset) * cropUnitX; - frameHeight -= (frameCropTopOffset + frameCropBottomOffset) * cropUnitY; - } - - float pixelWidthHeightRatio = 1; - boolean vuiParametersPresentFlag = bitArray.readBit(); - if (vuiParametersPresentFlag) { - boolean aspectRatioInfoPresentFlag = bitArray.readBit(); - if (aspectRatioInfoPresentFlag) { - int aspectRatioIdc = bitArray.readBits(8); - if (aspectRatioIdc == NalUnitUtil.EXTENDED_SAR) { - int sarWidth = bitArray.readBits(16); - int sarHeight = bitArray.readBits(16); - if (sarWidth != 0 && sarHeight != 0) { - pixelWidthHeightRatio = (float) sarWidth / sarHeight; - } - } else if (aspectRatioIdc < NalUnitUtil.ASPECT_RATIO_IDC_VALUES.length) { - pixelWidthHeightRatio = NalUnitUtil.ASPECT_RATIO_IDC_VALUES[aspectRatioIdc]; - } else { - Log.w(TAG, "Unexpected aspect_ratio_idc value: " + aspectRatioIdc); - } - } - } + SpsData parsedSpsData = CodecSpecificDataUtil.parseSpsNalUnit(bitArray); + // Construct and output the format. output.format(MediaFormat.createVideoFormat(MimeTypes.VIDEO_H264, MediaFormat.NO_VALUE, - C.UNKNOWN_TIME_US, frameWidth, frameHeight, pixelWidthHeightRatio, initializationData)); + C.UNKNOWN_TIME_US, parsedSpsData.width, parsedSpsData.height, + parsedSpsData.pixelWidthAspectRatio, initializationData)); hasOutputFormat = true; } - private void skipScalingList(ParsableBitArray bitArray, int size) { - int lastScale = 8; - int nextScale = 8; - for (int i = 0; i < size; i++) { - if (nextScale != 0) { - int deltaScale = bitArray.readSignedExpGolombCodedInt(); - nextScale = (lastScale + deltaScale + 256) % 256; - } - lastScale = (nextScale == 0) ? lastScale : nextScale; - } - } - /** * A buffer specifically for IFR units that can be used to parse the IFR's slice type. */ diff --git a/library/src/main/java/com/google/android/exoplayer/util/CodecSpecificDataUtil.java b/library/src/main/java/com/google/android/exoplayer/util/CodecSpecificDataUtil.java index fa212d2a19..1ec1cbf9d1 100644 --- a/library/src/main/java/com/google/android/exoplayer/util/CodecSpecificDataUtil.java +++ b/library/src/main/java/com/google/android/exoplayer/util/CodecSpecificDataUtil.java @@ -15,8 +15,7 @@ */ package com.google.android.exoplayer.util; -import android.annotation.SuppressLint; -import android.media.MediaCodecInfo.CodecProfileLevel; +import android.util.Log; import android.util.Pair; import java.util.ArrayList; @@ -27,6 +26,23 @@ import java.util.List; */ public final class CodecSpecificDataUtil { + /** + * Holds data parsed from a sequence parameter set NAL unit. + */ + public static final class SpsData { + + public final int width; + public final int height; + public final float pixelWidthAspectRatio; + + public SpsData(int width, int height, float pixelWidthAspectRatio) { + this.width = width; + this.height = height; + this.pixelWidthAspectRatio = pixelWidthAspectRatio; + } + + } + private static final byte[] NAL_START_CODE = new byte[] {0, 0, 0, 1}; private static final int[] AUDIO_SPECIFIC_CONFIG_SAMPLING_RATE_TABLE = new int[] { @@ -37,7 +53,7 @@ public final class CodecSpecificDataUtil { 0, 1, 2, 3, 4, 5, 6, 8 }; - private static final int SPS_NAL_UNIT_TYPE = 7; + private static final String TAG = "CodecSpecificDataUtil"; private CodecSpecificDataUtil() {} @@ -188,88 +204,117 @@ public final class CodecSpecificDataUtil { /** * Parses an SPS NAL unit. * - * @param spsNalUnit The NAL unit. - * @return A pair consisting of AVC profile and level constants, as defined in - * {@link CodecProfileLevel}. Null if the input data was not an SPS NAL unit. + * @param bitArray A {@link ParsableBitArray} containing the SPS data. The position must to set + * to the start of the data (i.e. the first bit of the profile_idc field). + * @return A parsed representation of the SPS data. */ - public static Pair parseSpsNalUnit(byte[] spsNalUnit) { - // SPS NAL unit: - // - Start prefix (4 bytes) - // - Forbidden zero bit (1 bit) - // - NAL ref idx (2 bits) - // - NAL unit type (5 bits) - // - Profile idc (8 bits) - // - Constraint bits (3 bits) - // - Reserved bits (5 bits) - // - Level idx (8 bits) - if (isNalStartCode(spsNalUnit, 0) && spsNalUnit.length == 8 - && (spsNalUnit[5] & 0x1F) == SPS_NAL_UNIT_TYPE) { - return Pair.create(parseAvcProfile(spsNalUnit), parseAvcLevel(spsNalUnit)); + public static SpsData parseSpsNalUnit(ParsableBitArray bitArray) { + int profileIdc = bitArray.readBits(8); + bitArray.skipBits(16); // constraint bits (6), reserved (2) and level_idc (8) + bitArray.readUnsignedExpGolombCodedInt(); // seq_parameter_set_id + + int chromaFormatIdc = 1; // Default is 4:2:0 + if (profileIdc == 100 || profileIdc == 110 || profileIdc == 122 || profileIdc == 244 + || profileIdc == 44 || profileIdc == 83 || profileIdc == 86 || profileIdc == 118 + || profileIdc == 128 || profileIdc == 138) { + chromaFormatIdc = bitArray.readUnsignedExpGolombCodedInt(); + if (chromaFormatIdc == 3) { + bitArray.skipBits(1); // separate_colour_plane_flag + } + bitArray.readUnsignedExpGolombCodedInt(); // bit_depth_luma_minus8 + bitArray.readUnsignedExpGolombCodedInt(); // bit_depth_chroma_minus8 + bitArray.skipBits(1); // qpprime_y_zero_transform_bypass_flag + boolean seqScalingMatrixPresentFlag = bitArray.readBit(); + if (seqScalingMatrixPresentFlag) { + int limit = (chromaFormatIdc != 3) ? 8 : 12; + for (int i = 0; i < limit; i++) { + boolean seqScalingListPresentFlag = bitArray.readBit(); + if (seqScalingListPresentFlag) { + skipScalingList(bitArray, i < 6 ? 16 : 64); + } + } + } } - return null; + + bitArray.readUnsignedExpGolombCodedInt(); // log2_max_frame_num_minus4 + long picOrderCntType = bitArray.readUnsignedExpGolombCodedInt(); + if (picOrderCntType == 0) { + bitArray.readUnsignedExpGolombCodedInt(); // log2_max_pic_order_cnt_lsb_minus4 + } else if (picOrderCntType == 1) { + bitArray.skipBits(1); // delta_pic_order_always_zero_flag + bitArray.readSignedExpGolombCodedInt(); // offset_for_non_ref_pic + bitArray.readSignedExpGolombCodedInt(); // offset_for_top_to_bottom_field + long numRefFramesInPicOrderCntCycle = bitArray.readUnsignedExpGolombCodedInt(); + for (int i = 0; i < numRefFramesInPicOrderCntCycle; i++) { + bitArray.readUnsignedExpGolombCodedInt(); // offset_for_ref_frame[i] + } + } + bitArray.readUnsignedExpGolombCodedInt(); // max_num_ref_frames + bitArray.skipBits(1); // gaps_in_frame_num_value_allowed_flag + + int picWidthInMbs = bitArray.readUnsignedExpGolombCodedInt() + 1; + int picHeightInMapUnits = bitArray.readUnsignedExpGolombCodedInt() + 1; + boolean frameMbsOnlyFlag = bitArray.readBit(); + int frameHeightInMbs = (2 - (frameMbsOnlyFlag ? 1 : 0)) * picHeightInMapUnits; + if (!frameMbsOnlyFlag) { + bitArray.skipBits(1); // mb_adaptive_frame_field_flag + } + + bitArray.skipBits(1); // direct_8x8_inference_flag + int frameWidth = picWidthInMbs * 16; + int frameHeight = frameHeightInMbs * 16; + boolean frameCroppingFlag = bitArray.readBit(); + if (frameCroppingFlag) { + int frameCropLeftOffset = bitArray.readUnsignedExpGolombCodedInt(); + int frameCropRightOffset = bitArray.readUnsignedExpGolombCodedInt(); + int frameCropTopOffset = bitArray.readUnsignedExpGolombCodedInt(); + int frameCropBottomOffset = bitArray.readUnsignedExpGolombCodedInt(); + int cropUnitX, cropUnitY; + if (chromaFormatIdc == 0) { + cropUnitX = 1; + cropUnitY = 2 - (frameMbsOnlyFlag ? 1 : 0); + } else { + int subWidthC = (chromaFormatIdc == 3) ? 1 : 2; + int subHeightC = (chromaFormatIdc == 1) ? 2 : 1; + cropUnitX = subWidthC; + cropUnitY = subHeightC * (2 - (frameMbsOnlyFlag ? 1 : 0)); + } + frameWidth -= (frameCropLeftOffset + frameCropRightOffset) * cropUnitX; + frameHeight -= (frameCropTopOffset + frameCropBottomOffset) * cropUnitY; + } + + float pixelWidthHeightRatio = 1; + boolean vuiParametersPresentFlag = bitArray.readBit(); + if (vuiParametersPresentFlag) { + boolean aspectRatioInfoPresentFlag = bitArray.readBit(); + if (aspectRatioInfoPresentFlag) { + int aspectRatioIdc = bitArray.readBits(8); + if (aspectRatioIdc == NalUnitUtil.EXTENDED_SAR) { + int sarWidth = bitArray.readBits(16); + int sarHeight = bitArray.readBits(16); + if (sarWidth != 0 && sarHeight != 0) { + pixelWidthHeightRatio = (float) sarWidth / sarHeight; + } + } else if (aspectRatioIdc < NalUnitUtil.ASPECT_RATIO_IDC_VALUES.length) { + pixelWidthHeightRatio = NalUnitUtil.ASPECT_RATIO_IDC_VALUES[aspectRatioIdc]; + } else { + Log.w(TAG, "Unexpected aspect_ratio_idc value: " + aspectRatioIdc); + } + } + } + + return new SpsData(frameWidth, frameHeight, pixelWidthHeightRatio); } - @SuppressLint("InlinedApi") - private static int parseAvcProfile(byte[] data) { - int profileIdc = data[6] & 0xFF; - switch (profileIdc) { - case 0x42: - return CodecProfileLevel.AVCProfileBaseline; - case 0x4d: - return CodecProfileLevel.AVCProfileMain; - case 0x58: - return CodecProfileLevel.AVCProfileExtended; - case 0x64: - return CodecProfileLevel.AVCProfileHigh; - case 0x6e: - return CodecProfileLevel.AVCProfileHigh10; - case 0x7a: - return CodecProfileLevel.AVCProfileHigh422; - case 0xf4: - return CodecProfileLevel.AVCProfileHigh444; - default: - return 0; - } - } - - @SuppressLint("InlinedApi") - private static int parseAvcLevel(byte[] data) { - int levelIdc = data[8] & 0xFF; - switch (levelIdc) { - case 9: - return CodecProfileLevel.AVCLevel1b; - case 10: - return CodecProfileLevel.AVCLevel1; - case 11: - return CodecProfileLevel.AVCLevel11; - case 12: - return CodecProfileLevel.AVCLevel12; - case 13: - return CodecProfileLevel.AVCLevel13; - case 20: - return CodecProfileLevel.AVCLevel2; - case 21: - return CodecProfileLevel.AVCLevel21; - case 22: - return CodecProfileLevel.AVCLevel22; - case 30: - return CodecProfileLevel.AVCLevel3; - case 31: - return CodecProfileLevel.AVCLevel31; - case 32: - return CodecProfileLevel.AVCLevel32; - case 40: - return CodecProfileLevel.AVCLevel4; - case 41: - return CodecProfileLevel.AVCLevel41; - case 42: - return CodecProfileLevel.AVCLevel42; - case 50: - return CodecProfileLevel.AVCLevel5; - case 51: - return CodecProfileLevel.AVCLevel51; - default: - return 0; + private static void skipScalingList(ParsableBitArray bitArray, int size) { + int lastScale = 8; + int nextScale = 8; + for (int i = 0; i < size; i++) { + if (nextScale != 0) { + int deltaScale = bitArray.readSignedExpGolombCodedInt(); + nextScale = (lastScale + deltaScale + 256) % 256; + } + lastScale = (nextScale == 0) ? lastScale : nextScale; } }