diff --git a/libraries/container/src/main/java/androidx/media3/container/NalUnitUtil.java b/libraries/container/src/main/java/androidx/media3/container/NalUnitUtil.java index 874ade6988..1ded85e362 100644 --- a/libraries/container/src/main/java/androidx/media3/container/NalUnitUtil.java +++ b/libraries/container/src/main/java/androidx/media3/container/NalUnitUtil.java @@ -15,6 +15,7 @@ */ package androidx.media3.container; +import static com.google.common.math.DoubleMath.log2; import static java.lang.Math.max; import static java.lang.Math.min; @@ -26,8 +27,11 @@ import androidx.media3.common.MimeTypes; import androidx.media3.common.util.Assertions; import androidx.media3.common.util.Log; import androidx.media3.common.util.UnstableApi; +import com.google.common.collect.ImmutableList; +import java.math.RoundingMode; import java.nio.ByteBuffer; import java.util.Arrays; +import java.util.List; /** Utility methods for handling H.264/AVC and H.265/HEVC NAL units. */ @UnstableApi @@ -59,6 +63,15 @@ public final class NalUnitUtil { /** Prefix NAL unit. */ public static final int NAL_UNIT_TYPE_PREFIX = 14; + /** H265 video parameter set. */ + public static final int H265_NAL_UNIT_TYPE_VPS = 32; + + /** H265 sequence parameter set. */ + public static final int H265_NAL_UNIT_TYPE_SPS = 33; + + /** H265 SEI. */ + public static final int H265_NAL_UNIT_TYPE_PREFIX_SEI = 39; + /** Holds data parsed from a H.264 sequence parameter set NAL unit. */ public static final class SpsData { @@ -127,18 +140,193 @@ public final class NalUnitUtil { } } - /** Holds data parsed from a H.265 sequence parameter set NAL unit. */ - public static final class H265SpsData { + /** Holds data parsed from a H.265 NAL unit header. */ + public static final class H265NalHeader { + + public final int nalUnitType; + public final int layerId; + public final int temporalId; + + public H265NalHeader(int nalUnitType, int layerId, int temporalId) { + this.nalUnitType = nalUnitType; + this.layerId = layerId; + this.temporalId = temporalId; + } + } + + /** Holds data that maps NAL unit header layer ID to the internal layer info specified in VPS. */ + public static final class H265LayerInfo { + public final int layerIdInVps; + + /** Internal layer ID used within VPS. */ + public final int viewId; + + /** Indicates the view ID (>= 0) for the multiview case. */ + public H265LayerInfo(int layerIdInVps, int viewId) { + this.layerIdInVps = layerIdInVps; + this.viewId = viewId; + } + } + + /** Holds data parsed from a H.265 profile_tier_level() of either VPS or SPS. */ + public static final class H265ProfileTierLevel { public final int generalProfileSpace; public final boolean generalTierFlag; public final int generalProfileIdc; public final int generalProfileCompatibilityFlags; + public final int[] constraintBytes; + public final int generalLevelIdc; + + public H265ProfileTierLevel( + int generalProfileSpace, + boolean generalTierFlag, + int generalProfileIdc, + int generalProfileCompatibilityFlags, + int[] constraintBytes, + int generalLevelIdc) { + this.generalProfileSpace = generalProfileSpace; + this.generalTierFlag = generalTierFlag; + this.generalProfileIdc = generalProfileIdc; + this.generalProfileCompatibilityFlags = generalProfileCompatibilityFlags; + this.constraintBytes = constraintBytes; + this.generalLevelIdc = generalLevelIdc; + } + } + + /** Holds a list of H.265 profile_tier_level()s and a corresponding list of indices. */ + public static final class H265ProfileTierLevelsAndIndices { + /** The list of profile_tier_level()s that can be referenced by each layer. */ + public final ImmutableList profileTierLevels; + + /** + * The list of indices to the {@code profileTierLevels}. For each layer available in the L-HEVC + * bitstream (identified by the layer ID in VPS), an index to the profileTierLevels list is set. + */ + public final int[] indices; + + public H265ProfileTierLevelsAndIndices( + List profileTierLevels, int[] indices) { + this.profileTierLevels = ImmutableList.copyOf(profileTierLevels); + this.indices = indices; + } + } + + /** Holds data parsed from a H.265 rep_format() of VPS extension - vps_extension(). */ + public static final class H265RepFormat { + + public final int chromaFormatIdc; + public final int bitDepthLumaMinus8; + public final int bitDepthChromaMinus8; + public final int width; + public final int height; + + public H265RepFormat( + int chromaFormatIdc, + int bitDepthLumaMinus8, + int bitDepthChromaMinus8, + int width, + int height) { + this.chromaFormatIdc = chromaFormatIdc; + this.bitDepthLumaMinus8 = bitDepthLumaMinus8; + this.bitDepthChromaMinus8 = bitDepthChromaMinus8; + this.width = width; + this.height = height; + } + } + + /** Holds a list of H.265 rep_format()s and a corresponding list of indices. */ + public static final class H265RepFormatsAndIndices { + + /** The list of rep_format()s that can be referenced by each layer. */ + public final ImmutableList repFormats; + + /** + * The list of indices to the {@code repFormats}; for each layer available in the L-HEVC + * bitstream (identified by the layer ID in VPS), an index to the repFormats list is set. + */ + public final int[] indices; + + public H265RepFormatsAndIndices(List repFormats, int[] indices) { + this.repFormats = ImmutableList.copyOf(repFormats); + this.indices = indices; + } + } + + /** Holds data parsed from a H.265 video_signal_info() of vps_vui() of vps_extension(). */ + public static final class H265VideoSignalInfo { + + public final @C.ColorSpace int colorSpace; + public final @C.ColorRange int colorRange; + public final @C.ColorTransfer int colorTransfer; + + public H265VideoSignalInfo( + @C.ColorSpace int colorSpace, + @C.ColorRange int colorRange, + @C.ColorTransfer int colorTransfer) { + this.colorSpace = colorSpace; + this.colorRange = colorRange; + this.colorTransfer = colorTransfer; + } + } + + /** Holds a list of H.265 video_signal_info()s and a corresponding list of indices. */ + public static final class H265VideoSignalInfosAndIndices { + + /** The list of video_signal_info()s that can be referenced by each layer. */ + public final ImmutableList videoSignalInfos; + + /** + * The list of indices to the {@code videoSignalInfos}; for each layer available in the L-HEVC + * bitstream (identified by the layer ID in VPS), an index to the videoSignalInfos list is set. + */ + public final int[] indices; + + public H265VideoSignalInfosAndIndices( + List videoSignalInfos, int[] indices) { + this.videoSignalInfos = ImmutableList.copyOf(videoSignalInfos); + this.indices = indices; + } + } + + /** Holds data parsed from a H.265 video parameter set NAL unit. */ + public static final class H265VpsData { + + public final H265NalHeader nalHeader; + + public final ImmutableList layerInfos; + + /** The list of profile_tier_level()s and corresponding lis of indices. */ + public final H265ProfileTierLevelsAndIndices profileTierLevelsAndIndices; + + /** The list of rep_format()s and corresponding list of indices. */ + @Nullable public final H265RepFormatsAndIndices repFormatsAndIndices; + + /** The list of video_signal_info()s and corresponding list of indices. */ + @Nullable public final H265VideoSignalInfosAndIndices videoSignalInfosAndIndices; + + public H265VpsData( + H265NalHeader nalHeader, + @Nullable List layerInfos, + H265ProfileTierLevelsAndIndices profileTierLevelsAndIndices, + @Nullable H265RepFormatsAndIndices repFormatsAndIndices, + @Nullable H265VideoSignalInfosAndIndices videoSignalInfosAndIndices) { + this.nalHeader = nalHeader; + this.layerInfos = layerInfos != null ? ImmutableList.copyOf(layerInfos) : ImmutableList.of(); + this.profileTierLevelsAndIndices = profileTierLevelsAndIndices; + this.repFormatsAndIndices = repFormatsAndIndices; + this.videoSignalInfosAndIndices = videoSignalInfosAndIndices; + } + } + + /** Holds data parsed from a H.265 sequence parameter set NAL unit. */ + public static final class H265SpsData { + + public final H265NalHeader nalHeader; + @Nullable public final H265ProfileTierLevel profileTierLevel; public final int chromaFormatIdc; public final int bitDepthLumaMinus8; public final int bitDepthChromaMinus8; - public final int[] constraintBytes; - public final int generalLevelIdc; public final int seqParameterSetId; public final int width; public final int height; @@ -149,15 +337,11 @@ public final class NalUnitUtil { public final @C.ColorTransfer int colorTransfer; public H265SpsData( - int generalProfileSpace, - boolean generalTierFlag, - int generalProfileIdc, - int generalProfileCompatibilityFlags, + H265NalHeader nalHeader, + @Nullable H265ProfileTierLevel profileTierLevel, int chromaFormatIdc, int bitDepthLumaMinus8, int bitDepthChromaMinus8, - int[] constraintBytes, - int generalLevelIdc, int seqParameterSetId, int width, int height, @@ -166,15 +350,11 @@ public final class NalUnitUtil { @C.ColorSpace int colorSpace, @C.ColorRange int colorRange, @C.ColorTransfer int colorTransfer) { - this.generalProfileSpace = generalProfileSpace; - this.generalTierFlag = generalTierFlag; - this.generalProfileIdc = generalProfileIdc; - this.generalProfileCompatibilityFlags = generalProfileCompatibilityFlags; + this.nalHeader = nalHeader; + this.profileTierLevel = profileTierLevel; this.chromaFormatIdc = chromaFormatIdc; this.bitDepthLumaMinus8 = bitDepthLumaMinus8; this.bitDepthChromaMinus8 = bitDepthChromaMinus8; - this.constraintBytes = constraintBytes; - this.generalLevelIdc = generalLevelIdc; this.seqParameterSetId = seqParameterSetId; this.width = width; this.height = height; @@ -231,9 +411,7 @@ public final class NalUnitUtil { 2f }; - private static final int H264_NAL_UNIT_TYPE_SEI = 6; // Supplemental enhancement information - private static final int H264_NAL_UNIT_TYPE_SPS = 7; // Sequence parameter set - private static final int H265_NAL_UNIT_TYPE_PREFIX_SEI = 39; + private static final int INVALID_ID = -1; // Invalid ID. private static final Object scratchEscapePositionsLock = new Object(); @@ -306,7 +484,7 @@ public final class NalUnitUtil { while (offset + 1 < length) { int value = data.get(offset) & 0xFF; if (consecutiveZeros == 3) { - if (value == 1 && (data.get(offset + 1) & 0x1F) == H264_NAL_UNIT_TYPE_SPS) { + if (value == 1 && (data.get(offset + 1) & 0x1F) == NAL_UNIT_TYPE_SPS) { // Copy from this NAL unit onwards to the start of the buffer. ByteBuffer offsetData = data.duplicate(); offsetData.position(offset - 3); @@ -338,7 +516,7 @@ public final class NalUnitUtil { */ public static boolean isNalUnitSei(@Nullable String mimeType, byte nalUnitHeaderFirstByte) { return (MimeTypes.VIDEO_H264.equals(mimeType) - && (nalUnitHeaderFirstByte & 0x1F) == H264_NAL_UNIT_TYPE_SEI) + && (nalUnitHeaderFirstByte & 0x1F) == NAL_UNIT_TYPE_SEI) || (MimeTypes.VIDEO_H265.equals(mimeType) && ((nalUnitHeaderFirstByte & 0x7E) >> 1) == H265_NAL_UNIT_TYPE_PREFIX_SEI); } @@ -584,89 +762,642 @@ public final class NalUnitUtil { maxNumReorderFrames); } + /** + * Parses a H.265 VPS NAL unit using the syntax defined in ITU-T Recommendation H.265 (2019) + * subsections 7.3.2.1 and F.7.3.2.1. + * + * @param nalData A buffer containing escaped VPS data. + * @param nalOffset The offset of the NAL unit header in {@code nalData}. + * @param nalLimit The limit of the NAL unit in {@code nalData}. + * @return A parsed representation of the VPS data. + */ + public static H265VpsData parseH265VpsNalUnit(byte[] nalData, int nalOffset, int nalLimit) { + ParsableNalUnitBitArray data = new ParsableNalUnitBitArray(nalData, nalOffset, nalLimit); + H265NalHeader nalHeader = parseH265NalHeader(data); + return parseH265VpsNalUnitPayload(data, nalHeader); + } + + /** + * Parses and returns a H.265 NAL unit header using the syntax defined in ITU-T Recommendation + * H.265 (2019) subsection 7.3.1.2. This function updates the ParsableNalUnitBitArray data with + * new byte and bit offsets that point to the end of the nal_unit_header(). + */ + private static H265NalHeader parseH265NalHeader(ParsableNalUnitBitArray data) { + // For HEVC and L-HEVC, the layer ID and temporal layer ID can be extracted from the 2 byte + // nal_unit_header(). + data.skipBit(); // forbidden_zero_bit + int nalUnitType = data.readBits(6); // nal_unit_type + int layerId = data.readBits(6); // nuh_layer_id + int temporalId = data.readBits(3) - 1; // nuh_temporal_id_plus1 + return new H265NalHeader(nalUnitType, layerId, temporalId); + } + + /** + * Parses a H.265 VPS NAL unit payload (excluding the NAL unit header) using the syntax defined in + * ITU-T Recommendation H.265 (2019) subsections 7.3.2.1 and F.7.3.2.1. + */ + private static H265VpsData parseH265VpsNalUnitPayload( + ParsableNalUnitBitArray data, H265NalHeader nalHeader) { + data.skipBits(4); // vps_video_parameter_set_id + boolean baseLayerInternalFlag = data.readBit(); // vps_base_layer_internal_flag + boolean baseLayerAvailableFlag = data.readBit(); // vps_base_layer_available_flag + int maxLayers = data.readBits(6) + 1; // vps_max_layers_minus1 + + int maxSubLayersMinus1 = data.readBits(3); // vps_max_sub_layers_minus1 + data.skipBits(17); // vps_temporal_id_nesting_flag, vps_reserved_0xffff_16bits + + H265ProfileTierLevel profileTierLevel = + parseH265ProfileTierLevel( + data, + /* profilePresentFlag= */ true, + maxSubLayersMinus1, + /* prevProfileTierLevel= */ null); + + // for (i = vps_sub_layer_ordering_info_present_flag ? 0 : vps_max_sub_layers_minus1; ...) + for (int i = data.readBit() ? 0 : maxSubLayersMinus1; i <= maxSubLayersMinus1; i++) { + data.readUnsignedExpGolombCodedInt(); // vps_max_dec_pic_buffering_minus1[i] + data.readUnsignedExpGolombCodedInt(); // vps_max_num_reorder_pics[i] + data.readUnsignedExpGolombCodedInt(); // vps_max_latency_increase_plus1[i] + } + + int maxLayerId = data.readBits(6); // vps_max_layer_id + int numLayerSets = data.readUnsignedExpGolombCodedInt() + 1; // vps_num_layer_sets_minus1 + + ImmutableList profileTierLevels = ImmutableList.of(profileTierLevel); + H265ProfileTierLevelsAndIndices baseLayerProfileTierLevelsAndIndices = + new H265ProfileTierLevelsAndIndices(profileTierLevels, new int[1]); + + // Given that the first layer set is dedicated for the base layer, numLayerSets must be greater + // than 1 for the L-HEVC case. + boolean haveEnoughLayerSets = maxLayers >= 2 && numLayerSets >= 2; + // This implementation only supports the case where the base layer is included within the L-HEVC + // bitstream. + boolean baseLayerIncluded = baseLayerInternalFlag && baseLayerAvailableFlag; + // The NAL unit header's layer ID, nuh_layer_id, (which doesn't need to be sequential) better + // have enough range to cover the specified max number of layers. + boolean haveLargeEnoughMaxLayerIdInNuh = maxLayerId + 1 >= maxLayers; + if (!haveEnoughLayerSets || !baseLayerIncluded || !haveLargeEnoughMaxLayerIdInNuh) { + // Fallback to single layer HEVC. + return new H265VpsData( + nalHeader, + /* layerInfos= */ null, + baseLayerProfileTierLevelsAndIndices, + /* repFormatsAndIndices= */ null, + /* videoSignalInfosAndIndices= */ null); + } + + // Define each layer set information: list of layer IDs, number of layers, and max layer ID. + int[][] layerSetLayerIdList = new int[numLayerSets][maxLayerId + 1]; + int[] numLayersInIdList = new int[numLayerSets]; + int[] layerSetMaxLayerId = new int[numLayerSets]; + // The first layer set is comprised of only the base layer. + layerSetLayerIdList[0][0] = 0; + numLayersInIdList[0] = 1; + layerSetMaxLayerId[0] = 0; + // Define other layer sets. + for (int i = 1; i < numLayerSets; i++) { + int n = 0; + for (int j = 0; j <= maxLayerId; j++) { + if (data.readBit()) { // layer_id_included_flag[i][j] + layerSetLayerIdList[i][n++] = j; + layerSetMaxLayerId[i] = j; + } + numLayersInIdList[i] = n; + } + } + + if (data.readBit()) { // vps_timing_info_present_flag + data.skipBits(64); // vps_num_units_in_tick, vps_time_scale + if (data.readBit()) { // vps_poc_proportional_to_timing_flag + data.readUnsignedExpGolombCodedInt(); // vps_num_ticks_poc_diff_one_minus1 + } + int numHrdParameters = data.readUnsignedExpGolombCodedInt(); // vps_num_hrd_parameters + for (int i = 0; i < numHrdParameters; i++) { + data.readUnsignedExpGolombCodedInt(); // hrd_layer_set_idx[i] + skipH265HrdParameters( + data, /* commonInfPresentFlag= */ i == 0 || data.readBit(), maxSubLayersMinus1); + } + } + + // For L-HEVC, vps_extension() needs to be parsed. + if (!data.readBit()) { // vps_extension_flag + // If no vps_extension(), then fallback to single layer HEVC. + return new H265VpsData( + nalHeader, + /* layerInfos= */ null, + baseLayerProfileTierLevelsAndIndices, + /* repFormatsAndIndices= */ null, + /* videoSignalInfosAndIndices= */ null); + } + + data.byteAlign(); + + // Parsing vps_extension(). + H265ProfileTierLevel baseLayerProfileTierLevel = + parseH265ProfileTierLevel( + data, /* profilePresentFlag= */ false, maxSubLayersMinus1, profileTierLevel); + + boolean splittingFlag = data.readBit(); // splitting_flag + boolean[] scalabilityMaskFlag = new boolean[16]; + int numScalabilityTypes = 0; + for (int i = 0; i < 16; i++) { + scalabilityMaskFlag[i] = data.readBit(); // scalability_mask_flag[i] + if (scalabilityMaskFlag[i]) { + numScalabilityTypes++; + } + } + // As listed in Table F.1 of the spec, numScalabilityTypes indicates the number of different + // scalability dimensions. If there is no scalability dimension, then we simply have a + // single-layer HEVC. Of the 16 scalability dimensions, dimension 1 is used for multiview; + // currently only the multiview case is supported. + if (numScalabilityTypes == 0 || !scalabilityMaskFlag[1]) { + return new H265VpsData( + nalHeader, + /* layerInfos= */ null, + baseLayerProfileTierLevelsAndIndices, + /* repFormatsAndIndices= */ null, + /* videoSignalInfosAndIndices= */ null); + } + + int[] dimensionIdLenMinus1 = new int[numScalabilityTypes]; + for (int i = 0; i < numScalabilityTypes - (splittingFlag ? 1 : 0); i++) { + dimensionIdLenMinus1[i] = data.readBits(3); // dimension_id_len_minus1[i] + } + int[] dimBitOffset = new int[numScalabilityTypes + 1]; + if (splittingFlag) { + for (int i = 1; i < numScalabilityTypes; i++) { + for (int j = 0; j < i; j++) { + dimBitOffset[i] += dimensionIdLenMinus1[j] + 1; + } + } + dimBitOffset[numScalabilityTypes] = 6; + } + + int[][] dimensionId = new int[maxLayers][numScalabilityTypes]; + // Get layerIdInNuh that maps the layer ID used in this VPS to the NAL unit header's parsed + // layer ID - nuh_layer_id. + int[] layerIdInNuh = new int[maxLayers]; + layerIdInNuh[0] = 0; + boolean nuhLayerIdPresentFlag = data.readBit(); // vps_nuh_layer_id_present_flag + for (int i = 1; i < maxLayers; i++) { + if (nuhLayerIdPresentFlag) { + layerIdInNuh[i] = data.readBits(6); // layer_id_in_nuh[i] + } else { + layerIdInNuh[i] = i; + } + if (!splittingFlag) { + for (int j = 0; j < numScalabilityTypes; j++) { + dimensionId[i][j] = data.readBits(dimensionIdLenMinus1[j] + 1); // dimension_id[i][j] + } + } else { + for (int j = 0; j < numScalabilityTypes; j++) { + dimensionId[i][j] = + (layerIdInNuh[i] & ((1 << dimBitOffset[j + 1]) - 1)) >> dimBitOffset[j]; + } + } + } + + // Derive viewOrderIdx[] and numViews following (F-3) in subsection F.7.4.3.1.1. + int[] viewOrderIdx = new int[maxLayerId + 1]; + int numViews = 1; + for (int i = 0; i < maxLayers; i++) { + viewOrderIdx[layerIdInNuh[i]] = C.INDEX_UNSET; + // for (smId x= 0, j = 0; smIdx < 16; ...) + for (int scalabilityMaskFlagIndex = 0, j = 0; + scalabilityMaskFlagIndex < 16; + scalabilityMaskFlagIndex++) { + if (scalabilityMaskFlag[scalabilityMaskFlagIndex]) { + if (scalabilityMaskFlagIndex == 1) { // multiview + // Note that viewOrderIdx is expected to be an index as it is used to access + // view_id_val[]; however, dimensionId[i][j] is not expected to follow the index + // constraint. It is up to the encoder to ensure that the dimensionId[i][j] is + // consistent with the use of viewOrderIdx. + viewOrderIdx[layerIdInNuh[i]] = dimensionId[i][j]; + } + j++; + } + } + if (i > 0) { + boolean newView = true; + for (int j = 0; j < i; j++) { + if (viewOrderIdx[layerIdInNuh[i]] == viewOrderIdx[layerIdInNuh[j]]) { + newView = false; + break; + } + } + if (newView) { + numViews++; + } + } + } + + int viewIdLen = data.readBits(4); // view_id_len + if (numViews < 2 || viewIdLen == 0) { + // This means all views have viewId of 0, so no multiview effect. + return new H265VpsData( + nalHeader, + /* layerInfos= */ null, + baseLayerProfileTierLevelsAndIndices, + /* repFormatsAndIndices= */ null, + /* videoSignalInfosAndIndices= */ null); + } + int[] viewIdVals = new int[numViews]; + for (int i = 0; i < numViews; i++) { + viewIdVals[i] = data.readBits(viewIdLen); // view_id_val[i] + } + + int[] layerIdInVps = new int[maxLayerId + 1]; + for (int i = 0; i < maxLayers; i++) { + layerIdInVps[min(layerIdInNuh[i], maxLayerId)] = i; + } + ImmutableList.Builder layerInfosBuilder = ImmutableList.builder(); + for (int i = 0; i <= maxLayerId; i++) { + int viewIdValIdx = min(viewOrderIdx[i], numViews - 1); + int viewIdVal = viewIdValIdx >= 0 ? viewIdVals[viewIdValIdx] : INVALID_ID; + layerInfosBuilder.add(new H265LayerInfo(layerIdInVps[i], viewIdVal)); + } + ImmutableList layerInfos = layerInfosBuilder.build(); + + if (layerInfos.get(0).viewId == INVALID_ID) { + // The base layer must be the primary view; fallback to single layer HEVC. + return new H265VpsData( + nalHeader, + /* layerInfos= */ null, + baseLayerProfileTierLevelsAndIndices, + /* repFormatsAndIndices= */ null, + /* videoSignalInfosAndIndices= */ null); + } + int secondaryViewLayerId = INVALID_ID; + for (int i = 1; i <= maxLayerId; i++) { + if (layerInfos.get(i).viewId != INVALID_ID) { + secondaryViewLayerId = i; + break; + } + } + if (secondaryViewLayerId == INVALID_ID) { + // No secondary view defined; fallback to single layer HEVC. + return new H265VpsData( + nalHeader, + /* layerInfos= */ null, + baseLayerProfileTierLevelsAndIndices, + /* repFormatsAndIndices= */ null, + /* videoSignalInfosAndIndices= */ null); + } + + // Derive H.265 layer dependency structure following (F-4) in subsection F.7.4.3.1.1. + boolean[][] directDependencyFlag = new boolean[maxLayers][maxLayers]; + boolean[][] dependencyFlag = new boolean[maxLayers][maxLayers]; + for (int i = 1; i < maxLayers; i++) { + for (int j = 0; j < i; j++) { + directDependencyFlag[i][j] = + dependencyFlag[i][j] = data.readBit(); // direct_dependency_flag[i][j] + } + } + for (int i = 1; i < maxLayers; i++) { + for (int j = 0; j < maxLayers - 1; j++) { + for (int k = 0; k < i; k++) { + if (dependencyFlag[i][k] && dependencyFlag[k][j]) { + dependencyFlag[i][j] = true; + break; + } + } + } + } + + // Derive numDirectRefLayers following (F-5) in subsection F.7.4.3.1.1. + int[] numDirectRefLayers = new int[maxLayerId + 1]; + for (int i = 0; i < maxLayers; i++) { + int d = 0; + for (int j = 0; j < i; j++) { + d += directDependencyFlag[i][j] ? 1 : 0; + } + numDirectRefLayers[layerIdInNuh[i]] = d; + } + // Derive numIndependentLayers following (F-6) in subsection F.7.4.3.1.1. + int numIndependentLayers = 0; + for (int i = 0; i < maxLayers; i++) { + if (numDirectRefLayers[layerIdInNuh[i]] == 0) { + numIndependentLayers++; + } + } + if (numIndependentLayers > 1) { + // Current implementation only supports one independent layer - the base layer. + return new H265VpsData( + nalHeader, + /* layerInfos= */ null, + baseLayerProfileTierLevelsAndIndices, + /* repFormatsAndIndices= */ null, + /* videoSignalInfosAndIndices= */ null); + } + + // Since only one independent layer is expected, num_add_layer_sets is implied to be 0. + + int[] subLayersVpsMaxMinus1 = new int[maxLayers]; + int[] maxSubLayersInLayerSet = new int[numLayerSets]; + if (data.readBit()) { // vps_sub_layers_max_minus1_present_flag + for (int i = 0; i < maxLayers; i++) { + subLayersVpsMaxMinus1[i] = data.readBits(3); // sub_layers_vps_max_minus1[i] + } + } else { + Arrays.fill(subLayersVpsMaxMinus1, 0, maxLayers, maxSubLayersMinus1); + } + for (int i = 0; i < numLayerSets; i++) { + int layerSetMaxSubLayersMinus1 = 0; // maxSlMinus1 + for (int k = 0; k < numLayersInIdList[i]; k++) { + int layerId = layerSetLayerIdList[i][k]; + layerSetMaxSubLayersMinus1 = + max( + layerSetMaxSubLayersMinus1, + subLayersVpsMaxMinus1[layerInfos.get(layerId).layerIdInVps]); + } + maxSubLayersInLayerSet[i] = layerSetMaxSubLayersMinus1 + 1; + } + + if (data.readBit()) { // max_tid_ref_present_flag + for (int i = 0; i < maxLayers - 1; i++) { + for (int j = i + 1; j < maxLayers; j++) { + if (directDependencyFlag[j][i]) { + data.skipBits(3); // max_tid_il_ref_pics_plus1[i][j] + } + } + } + } + data.skipBit(); // default_ref_layers_active_flag + + // Get profile_tier_level()s needed for non-base layer. + int numProfileTierLevels = data.readUnsignedExpGolombCodedInt() + 1; + ImmutableList.Builder profileTierLevelsBuilder = ImmutableList.builder(); + profileTierLevelsBuilder.add(profileTierLevel); + if (numProfileTierLevels > 1) { + profileTierLevelsBuilder.add(baseLayerProfileTierLevel); + H265ProfileTierLevel prevProfileTierLevel = baseLayerProfileTierLevel; + for (int i = 2; i < numProfileTierLevels; i++) { + H265ProfileTierLevel nextProfileTierLevel = + parseH265ProfileTierLevel( + data, + /* profilePresentFlag= */ data.readBit(), + maxSubLayersMinus1, + prevProfileTierLevel); + profileTierLevelsBuilder.add(nextProfileTierLevel); + prevProfileTierLevel = nextProfileTierLevel; + } + } + profileTierLevels = profileTierLevelsBuilder.build(); + + // Define output layer sets. + + int numOutputLayerSets = numLayerSets + data.readUnsignedExpGolombCodedInt(); // num_add_olss + if (numOutputLayerSets > numLayerSets) { + // Current implementation only supports the output layer set being the same as the layer set. + // Fallback to single layer HEVC if the constraint is not met. + return new H265VpsData( + nalHeader, + /* layerInfos= */ null, + baseLayerProfileTierLevelsAndIndices, + /* repFormatsAndIndices= */ null, + /* videoSignalInfosAndIndices= */ null); + } + + int defaultOutputLayerIdc = data.readBits(2); // default_output_layer_idc + + boolean[][] outputLayerFlag = new boolean[numOutputLayerSets][maxLayerId + 1]; + int[] numOutputLayersInOutputLayerSet = new int[numOutputLayerSets]; + int[] olsHighestOutputLayerId = new int[numOutputLayerSets]; + for (int i = 0; i < numLayerSets; i++) { + numOutputLayersInOutputLayerSet[i] = 0; + olsHighestOutputLayerId[i] = layerSetMaxLayerId[i]; + if (defaultOutputLayerIdc == 0) { + Arrays.fill(outputLayerFlag[i], 0, numLayersInIdList[i], true); + numOutputLayersInOutputLayerSet[i] = numLayersInIdList[i]; + } else if (defaultOutputLayerIdc == 1) { + int highestLayerId = layerSetMaxLayerId[i]; + for (int j = 0; j < numLayersInIdList[i]; j++) { + outputLayerFlag[i][j] = layerSetLayerIdList[i][j] == highestLayerId; + } + numOutputLayersInOutputLayerSet[i] = 1; + } else { + outputLayerFlag[0][0] = true; + numOutputLayersInOutputLayerSet[0] = 1; + } + } + + int[] profileTierLevelIndices = new int[maxLayerId + 1]; + boolean[][] necessaryLayerFlag = new boolean[numOutputLayerSets][maxLayerId + 1]; + int targetOutputLayerSetIdx = 0; + + for (int i = 1; i < numOutputLayerSets; i++) { + if (defaultOutputLayerIdc == 2) { + for (int j = 0; j < numLayersInIdList[i]; j++) { + outputLayerFlag[i][j] = data.readBit(); // output_layer_flag[i][j] + numOutputLayersInOutputLayerSet[i] += outputLayerFlag[i][j] ? 1 : 0; + if (outputLayerFlag[i][j]) { + olsHighestOutputLayerId[i] = layerSetLayerIdList[i][j]; + } + } + } + + // Look for the first output layer set that includes the base layer (primary view) and the + // layer with the secondary view. + if (targetOutputLayerSetIdx == 0 && layerSetLayerIdList[i][0] == 0 && outputLayerFlag[i][0]) { + for (int j = 1; j < numLayersInIdList[i]; j++) { + if (layerSetLayerIdList[i][j] == secondaryViewLayerId + && outputLayerFlag[i][secondaryViewLayerId]) { + targetOutputLayerSetIdx = i; + } + } + } + + for (int j = 0; j < numLayersInIdList[i]; j++) { + if (numProfileTierLevels > 1) { + necessaryLayerFlag[i][j] = outputLayerFlag[i][j]; + int bitLen = log2(numProfileTierLevels, RoundingMode.CEILING); + if (!necessaryLayerFlag[i][j]) { + int currLayerIdInVps = layerInfos.get(layerSetLayerIdList[i][j]).layerIdInVps; + for (int k = 0; k < j; k++) { + int refLayerIdInVps = layerInfos.get(layerSetLayerIdList[i][k]).layerIdInVps; + if (dependencyFlag[currLayerIdInVps][refLayerIdInVps]) { + necessaryLayerFlag[i][j] = true; + break; + } + } + } + if (necessaryLayerFlag[i][j]) { + if (targetOutputLayerSetIdx > 0 && i == targetOutputLayerSetIdx) { + // Only store the information needed for the target output layer set. + profileTierLevelIndices[j] = data.readBits(bitLen); // profile_tier_level_idx[i][j] + } else { + data.skipBits(bitLen); // profile_tier_level_idx[i][j] + } + } + } + } + if (numOutputLayersInOutputLayerSet[i] == 1 + && numDirectRefLayers[olsHighestOutputLayerId[i]] > 0) { + data.skipBit(); // alt_output_layer_flag[i] + } + } + + if (targetOutputLayerSetIdx == 0) { + // The selected target output layer set only contains the base layer. + return new H265VpsData( + nalHeader, + /* layerInfos= */ null, + baseLayerProfileTierLevelsAndIndices, + /* repFormatsAndIndices= */ null, + /* videoSignalInfosAndIndices= */ null); + } + + H265RepFormatsAndIndices repFormatsAndIndices = parseH265RepFormatsAndIndices(data, maxLayers); + + data.skipBits(2); // max_one_active_ref_layer_flag, vps_poc_lsb_aligned_flag + for (int i = 1; i < maxLayers; i++) { + if (numDirectRefLayers[layerIdInNuh[i]] == 0) { + data.skipBit(); // poc_lsb_not_present_flag[i] + } + } + + skipH265DpbSize( + data, numOutputLayerSets, maxSubLayersInLayerSet, numLayersInIdList, necessaryLayerFlag); + + skipToH265VuiPresentFlagAfterDpbSize(data, maxLayers, directDependencyFlag); + + H265VideoSignalInfosAndIndices videoSignalInfosAndIndices = null; + if (data.readBit()) { // vps_vui_present_flag + data.byteAlign(); + videoSignalInfosAndIndices = + parseH265VideoSignalInfosAndIndices( + data, maxLayers, numLayerSets, maxSubLayersInLayerSet); + } + + return new H265VpsData( + nalHeader, + layerInfos, + new H265ProfileTierLevelsAndIndices(profileTierLevels, profileTierLevelIndices), + repFormatsAndIndices, + videoSignalInfosAndIndices); + } + /** * Parses a H.265 SPS NAL unit using the syntax defined in ITU-T Recommendation H.265 (2019) - * subsection 7.3.2.2.1. + * subsections 7.3.2.2.1 and F.7.3.2.2.1. * * @param nalData A buffer containing escaped SPS data. * @param nalOffset The offset of the NAL unit header in {@code nalData}. * @param nalLimit The limit of the NAL unit in {@code nalData}. + * @param vpsData The VPS that the SPS refers to or {@code null} if unavailable. * @return A parsed representation of the SPS data. */ - public static H265SpsData parseH265SpsNalUnit(byte[] nalData, int nalOffset, int nalLimit) { - return parseH265SpsNalUnitPayload(nalData, nalOffset + 2, nalLimit); + public static H265SpsData parseH265SpsNalUnit( + byte[] nalData, int nalOffset, int nalLimit, @Nullable H265VpsData vpsData) { + H265NalHeader nalHeader = + parseH265NalHeader(new ParsableNalUnitBitArray(nalData, nalOffset, nalLimit)); + // The nal_unit_header() - nalHeader - is comprised of 2 bytes. + return parseH265SpsNalUnitPayload(nalData, nalOffset + 2, nalLimit, nalHeader, vpsData); } /** * Parses a H.265 SPS NAL unit payload (excluding the NAL unit header) using the syntax defined in - * ITU-T Recommendation H.265 (2019) subsection 7.3.2.2.1. + * ITU-T Recommendation H.265 (2019) subsections 7.3.2.2.1 and F.7.3.2.2.1. * * @param nalData A buffer containing escaped SPS data. * @param nalOffset The offset of the NAL unit payload in {@code nalData}. * @param nalLimit The limit of the NAL unit in {@code nalData}. + * @param nalHeader The parsed representation of the NAL header. + * @param vpsData The VPS that the SPS refers to or {@code null} if unavailable. * @return A parsed representation of the SPS data. */ public static H265SpsData parseH265SpsNalUnitPayload( - byte[] nalData, int nalOffset, int nalLimit) { + byte[] nalData, + int nalOffset, + int nalLimit, + H265NalHeader nalHeader, + @Nullable H265VpsData vpsData) { ParsableNalUnitBitArray data = new ParsableNalUnitBitArray(nalData, nalOffset, nalLimit); data.skipBits(4); // sps_video_parameter_set_id + // Represents sps_max_sub_layers_minus1 (when nuh_layer_id == 0) or + // sps_ext_or_max_sub_layers_minus1 (when nuh_layer_id != 0). int maxSubLayersMinus1 = data.readBits(3); - data.skipBit(); // sps_temporal_id_nesting_flag - int generalProfileSpace = data.readBits(2); - boolean generalTierFlag = data.readBit(); - int generalProfileIdc = data.readBits(5); - int generalProfileCompatibilityFlags = 0; - for (int i = 0; i < 32; i++) { - if (data.readBit()) { - generalProfileCompatibilityFlags |= (1 << i); + boolean multiLayerExtSpsFlag = nalHeader.layerId != 0 && maxSubLayersMinus1 == 7; + + int layerIdInVps = 0; + if (vpsData != null && !vpsData.layerInfos.isEmpty()) { + int layerId = min(nalHeader.layerId, vpsData.layerInfos.size() - 1); + layerIdInVps = vpsData.layerInfos.get(layerId).layerIdInVps; + } + @Nullable H265ProfileTierLevel profileTierLevel = null; + if (!multiLayerExtSpsFlag) { + data.skipBit(); // sps_temporal_id_nesting_flag + profileTierLevel = + parseH265ProfileTierLevel( + data, + /* profilePresentFlag= */ true, + maxSubLayersMinus1, + /* prevProfileTierLevel= */ null); + } else if (vpsData != null) { + int profileTierLevelIdx = vpsData.profileTierLevelsAndIndices.indices[layerIdInVps]; + if (vpsData.profileTierLevelsAndIndices.profileTierLevels.size() > profileTierLevelIdx) { + profileTierLevel = + vpsData.profileTierLevelsAndIndices.profileTierLevels.get(profileTierLevelIdx); } } - int[] constraintBytes = new int[6]; - for (int i = 0; i < constraintBytes.length; ++i) { - constraintBytes[i] = data.readBits(8); - } - int generalLevelIdc = data.readBits(8); - int toSkip = 0; - for (int i = 0; i < maxSubLayersMinus1; i++) { - if (data.readBit()) { // sub_layer_profile_present_flag[i] - toSkip += 89; - } - if (data.readBit()) { // sub_layer_level_present_flag[i] - toSkip += 8; - } - } - data.skipBits(toSkip); - if (maxSubLayersMinus1 > 0) { - data.skipBits(2 * (8 - maxSubLayersMinus1)); - } + int seqParameterSetId = data.readUnsignedExpGolombCodedInt(); - int chromaFormatIdc = data.readUnsignedExpGolombCodedInt(); - if (chromaFormatIdc == 3) { - data.skipBit(); // separate_colour_plane_flag + int chromaFormatIdc = 0; + int frameWidth = 0; + int frameHeight = 0; + int bitDepthLumaMinus8 = 0; + int bitDepthChromaMinus8 = 0; + int spsRepFormatIdx = C.INDEX_UNSET; + if (multiLayerExtSpsFlag) { + if (data.readBit()) { // update_rep_format_flag + spsRepFormatIdx = data.readBits(8); // sps_rep_format_idx + } + if (vpsData != null && vpsData.repFormatsAndIndices != null) { + if (spsRepFormatIdx == C.INDEX_UNSET) { + spsRepFormatIdx = vpsData.repFormatsAndIndices.indices[layerIdInVps]; + } + if (spsRepFormatIdx != C.INDEX_UNSET + && vpsData.repFormatsAndIndices.repFormats.size() > spsRepFormatIdx) { + H265RepFormat repFormat = vpsData.repFormatsAndIndices.repFormats.get(spsRepFormatIdx); + chromaFormatIdc = repFormat.chromaFormatIdc; + frameWidth = repFormat.width; + frameHeight = repFormat.height; + bitDepthLumaMinus8 = repFormat.bitDepthLumaMinus8; + bitDepthChromaMinus8 = repFormat.bitDepthChromaMinus8; + } + } + } else { + chromaFormatIdc = data.readUnsignedExpGolombCodedInt(); + if (chromaFormatIdc == 3) { + data.skipBit(); // separate_colour_plane_flag + } + frameWidth = data.readUnsignedExpGolombCodedInt(); + frameHeight = data.readUnsignedExpGolombCodedInt(); + if (data.readBit()) { // conformance_window_flag + int confWinLeftOffset = data.readUnsignedExpGolombCodedInt(); + int confWinRightOffset = data.readUnsignedExpGolombCodedInt(); + int confWinTopOffset = data.readUnsignedExpGolombCodedInt(); + int confWinBottomOffset = data.readUnsignedExpGolombCodedInt(); + frameWidth = + applyConformanceWindowToWidth( + frameWidth, chromaFormatIdc, confWinLeftOffset, confWinRightOffset); + frameHeight = + applyConformanceWindowToHeight( + frameHeight, chromaFormatIdc, confWinTopOffset, confWinBottomOffset); + } + bitDepthLumaMinus8 = data.readUnsignedExpGolombCodedInt(); + bitDepthChromaMinus8 = data.readUnsignedExpGolombCodedInt(); } - int frameWidth = data.readUnsignedExpGolombCodedInt(); - int frameHeight = data.readUnsignedExpGolombCodedInt(); - if (data.readBit()) { // conformance_window_flag - int confWinLeftOffset = data.readUnsignedExpGolombCodedInt(); - int confWinRightOffset = data.readUnsignedExpGolombCodedInt(); - int confWinTopOffset = data.readUnsignedExpGolombCodedInt(); - int confWinBottomOffset = data.readUnsignedExpGolombCodedInt(); - // H.265/HEVC (2014) Table 6-1 - int subWidthC = chromaFormatIdc == 1 || chromaFormatIdc == 2 ? 2 : 1; - int subHeightC = chromaFormatIdc == 1 ? 2 : 1; - frameWidth -= subWidthC * (confWinLeftOffset + confWinRightOffset); - frameHeight -= subHeightC * (confWinTopOffset + confWinBottomOffset); - } - int bitDepthLumaMinus8 = data.readUnsignedExpGolombCodedInt(); - int bitDepthChromaMinus8 = data.readUnsignedExpGolombCodedInt(); int log2MaxPicOrderCntLsbMinus4 = data.readUnsignedExpGolombCodedInt(); int maxNumReorderPics = -1; - // for (i = sps_sub_layer_ordering_info_present_flag ? 0 : sps_max_sub_layers_minus1; ...) - for (int i = data.readBit() ? 0 : maxSubLayersMinus1; i <= maxSubLayersMinus1; i++) { - data.readUnsignedExpGolombCodedInt(); // sps_max_dec_pic_buffering_minus1[i] - // sps_max_num_reorder_pics[i] - maxNumReorderPics = max(data.readUnsignedExpGolombCodedInt(), maxNumReorderPics); - data.readUnsignedExpGolombCodedInt(); // sps_max_latency_increase_plus1[i] + if (!multiLayerExtSpsFlag) { + // for (i = sps_sub_layer_ordering_info_present_flag ? 0 : sps_max_sub_layers_minus1; ...) + for (int i = data.readBit() ? 0 : maxSubLayersMinus1; i <= maxSubLayersMinus1; i++) { + data.readUnsignedExpGolombCodedInt(); // sps_max_dec_pic_buffering_minus1[i] + // sps_max_num_reorder_pics[i] + maxNumReorderPics = max(data.readUnsignedExpGolombCodedInt(), maxNumReorderPics); + data.readUnsignedExpGolombCodedInt(); // sps_max_latency_increase_plus1[i] + } } data.readUnsignedExpGolombCodedInt(); // log2_min_luma_coding_block_size_minus3 data.readUnsignedExpGolombCodedInt(); // log2_diff_max_min_luma_coding_block_size @@ -674,10 +1405,16 @@ public final class NalUnitUtil { data.readUnsignedExpGolombCodedInt(); // log2_diff_max_min_luma_transform_block_size data.readUnsignedExpGolombCodedInt(); // max_transform_hierarchy_depth_inter data.readUnsignedExpGolombCodedInt(); // max_transform_hierarchy_depth_intra - // if (scaling_list_enabled_flag) { if (sps_scaling_list_data_present_flag) {...}} - boolean scalingListEnabled = data.readBit(); - if (scalingListEnabled && data.readBit()) { - skipH265ScalingList(data); + if (data.readBit()) { // scaling_list_enabled_flag + boolean inferScalingListFlag = false; + if (multiLayerExtSpsFlag) { + inferScalingListFlag = data.readBit(); // sps_infer_scaling_list_flag + } + if (inferScalingListFlag) { + data.skipBits(6); // sps_scaling_list_ref_layer_id + } else if (data.readBit()) { // sps_scaling_list_data_present_flag + skipH265ScalingList(data); + } } data.skipBits(2); // amp_enabled_flag (1), sample_adaptive_offset_enabled_flag (1) if (data.readBit()) { // pcm_enabled_flag @@ -687,7 +1424,7 @@ public final class NalUnitUtil { data.readUnsignedExpGolombCodedInt(); // log2_diff_max_min_pcm_luma_coding_block_size data.skipBit(); // pcm_loop_filter_disabled_flag } - skipShortTermReferencePictureSets(data); + skipH265ShortTermReferencePictureSets(data); if (data.readBit()) { // long_term_ref_pics_present_flag int numLongTermRefPicsSps = data.readUnsignedExpGolombCodedInt(); for (int i = 0; i < numLongTermRefPicsSps; i++) { @@ -732,6 +1469,15 @@ public final class NalUnitUtil { colorTransfer = ColorInfo.isoTransferCharacteristicsToColorTransfer(transferCharacteristics); } + } else if (vpsData != null && vpsData.videoSignalInfosAndIndices != null) { + int videoSignalInfoIdx = vpsData.videoSignalInfosAndIndices.indices[layerIdInVps]; + if (vpsData.videoSignalInfosAndIndices.videoSignalInfos.size() > videoSignalInfoIdx) { + H265VideoSignalInfo videoSignalInfo = + vpsData.videoSignalInfosAndIndices.videoSignalInfos.get(videoSignalInfoIdx); + colorSpace = videoSignalInfo.colorSpace; + colorRange = videoSignalInfo.colorRange; + colorTransfer = videoSignalInfo.colorTransfer; + } } if (data.readBit()) { // chroma_loc_info_present_flag data.readUnsignedExpGolombCodedInt(); // chroma_sample_loc_type_top_field @@ -746,15 +1492,11 @@ public final class NalUnitUtil { } return new H265SpsData( - generalProfileSpace, - generalTierFlag, - generalProfileIdc, - generalProfileCompatibilityFlags, + nalHeader, + profileTierLevel, chromaFormatIdc, bitDepthLumaMinus8, bitDepthChromaMinus8, - constraintBytes, - generalLevelIdc, seqParameterSetId, frameWidth, frameHeight, @@ -895,6 +1637,353 @@ public final class NalUnitUtil { return limit; } + /** + * Skips all bits in hrd_parameters() defined in ITU-T Recommendation H.265 (2019) subsection + * E.2.2. This function updates the ParsableNalUnitBitArray data with new byte and bit offsets + * that point to the end of the hrd_parameters(). + */ + private static void skipH265HrdParameters( + ParsableNalUnitBitArray data, boolean commonInfPresentFlag, int maxSubLayersMinus1) { + boolean nalHrdParametersPresentFlag = false; + boolean vclHrdParametersPresentFlag = false; + boolean subPicHrdParametersPresentFlag = false; + if (commonInfPresentFlag) { + nalHrdParametersPresentFlag = data.readBit(); // nal_hrd_parameters_present_flag + vclHrdParametersPresentFlag = data.readBit(); // vcl_hrd_parameters_present_flag + if (nalHrdParametersPresentFlag || vclHrdParametersPresentFlag) { + subPicHrdParametersPresentFlag = data.readBit(); // sub_pic_hrd_params_present_flag + if (subPicHrdParametersPresentFlag) { + // tick_divisor_minus2, du_cpb_removal_delay_increment_length_minus1, + // sub_pic_cpb_params_in_pic_timing_sei_flag, dpb_output_delay_du_length_minus1 + data.skipBits(19); + } + data.skipBits(8); // bit_rate_scale, cpb_size_scale + if (subPicHrdParametersPresentFlag) { + data.skipBits(4); // cpb_size_du_scale + } + // initial_cpb_removal_delay_length_minus1, au_cpb_removal_delay_length_minus1, + // dpb_output_delay_length_minus1 + data.skipBits(15); + } + } + for (int i = 0; i <= maxSubLayersMinus1; i++) { + boolean fixedPicRateGeneralFlag = data.readBit(); // fixed_pic_rate_general_flag[i] + boolean fixedPicRateWithinCvsFlag = fixedPicRateGeneralFlag; + boolean lowDelayHrdFlag = false; + int cpbCntMinus1 = 0; + if (!fixedPicRateGeneralFlag) { + fixedPicRateWithinCvsFlag = data.readBit(); // fixed_pic_rate_within_cvs_flag[i] + } + if (fixedPicRateWithinCvsFlag) { + data.readUnsignedExpGolombCodedInt(); // elemental_duration_in_tc_minus1[i] + } else { + lowDelayHrdFlag = data.readBit(); // low_delay_hrd_flag[i] + } + if (!lowDelayHrdFlag) { + cpbCntMinus1 = data.readUnsignedExpGolombCodedInt(); // cpb_cnt_minus1[i] + } + int numSubLayerHrdParameters = 0; + numSubLayerHrdParameters += nalHrdParametersPresentFlag ? 1 : 0; + numSubLayerHrdParameters += vclHrdParametersPresentFlag ? 1 : 0; + for (int j = 0; j < numSubLayerHrdParameters; j++) { + for (int k = 0; k <= cpbCntMinus1; k++) { + data.readUnsignedExpGolombCodedInt(); // bit_rate_value_minus1[k] + data.readUnsignedExpGolombCodedInt(); // cpb_size_value_minus1[k] + if (subPicHrdParametersPresentFlag) { + data.readUnsignedExpGolombCodedInt(); // cpb_size_du_value_minus1[k] + data.readUnsignedExpGolombCodedInt(); // bit_rate_du_value_minus1[k] + } + data.skipBit(); // cbr_flag[k] + } + } + } + } + + /** + * Parses a H.265 profile_tier_level() using the syntax defined in ITU-T Recommendation H.265 + * (2019) subsection 7.3.3. This function updates the ParsableNalUnitBitArray data with new byte + * and bit offsets that point to the end of parsing the profile_tier_level(). + */ + private static H265ProfileTierLevel parseH265ProfileTierLevel( + ParsableNalUnitBitArray data, + boolean profilePresentFlag, + int maxSubLayersMinus1, + @Nullable H265ProfileTierLevel prevProfileTierLevel) { + int generalProfileSpace = 0; + boolean generalTierFlag = false; + int generalProfileIdc = 0; + int generalProfileCompatibilityFlags = 0; + int[] constraintBytes = new int[6]; + if (profilePresentFlag) { + generalProfileSpace = data.readBits(2); // general_profile_space + generalTierFlag = data.readBit(); // general_tier_flag + generalProfileIdc = data.readBits(5); // general_profile_idc + generalProfileCompatibilityFlags = 0; + for (int i = 0; i < 32; i++) { + if (data.readBit()) { + generalProfileCompatibilityFlags |= (1 << i); // general_profile_compatibility_flag[i] + } + } + for (int i = 0; i < constraintBytes.length; ++i) { + constraintBytes[i] = data.readBits(8); + } + } else if (prevProfileTierLevel != null) { + generalProfileSpace = prevProfileTierLevel.generalProfileSpace; + generalTierFlag = prevProfileTierLevel.generalTierFlag; + generalProfileIdc = prevProfileTierLevel.generalProfileIdc; + generalProfileCompatibilityFlags = prevProfileTierLevel.generalProfileCompatibilityFlags; + constraintBytes = prevProfileTierLevel.constraintBytes; + } + int generalLevelIdc = data.readBits(8); // general_level_idc + + // Skip to the end of profile_tier_level(). + int toSkip = 0; + for (int i = 0; i < maxSubLayersMinus1; i++) { + if (data.readBit()) { // sub_layer_profile_present_flag[i] + toSkip += 88; + } + if (data.readBit()) { // sub_layer_level_present_flag[i] + toSkip += 8; + } + } + data.skipBits(toSkip); + if (maxSubLayersMinus1 > 0) { + data.skipBits(2 * (8 - maxSubLayersMinus1)); // reserved_zero_2bits + } + + return new H265ProfileTierLevel( + generalProfileSpace, + generalTierFlag, + generalProfileIdc, + generalProfileCompatibilityFlags, + constraintBytes, + generalLevelIdc); + } + + // Applies the conformance window offsets to the width following H.265/HEVC (2014) Table 6-1. + private static int applyConformanceWindowToWidth( + int width, int chromaFormatIdc, int offsetLeft, int offsetRight) { + int subWidthC = chromaFormatIdc == 1 || chromaFormatIdc == 2 ? 2 : 1; + return width - subWidthC * (offsetLeft + offsetRight); + } + + // Applies the conformance window offsets to the height following H.265/HEVC (2014) Table 6-1. + private static int applyConformanceWindowToHeight( + int height, int chromaFormatIdc, int offsetTop, int offsetBottom) { + int subHeightC = chromaFormatIdc == 1 ? 2 : 1; + return height - subHeightC * (offsetTop + offsetBottom); + } + + /** + * Parses H.265 rep_format()s and corresponding indices (vps_rep_format_idx[]) within + * vps_extension(). This function updates the ParsableNalUnitBitArray data with new byte and bit + * offsets that point to the end of parsing the rep_format()s and indices. + */ + private static H265RepFormatsAndIndices parseH265RepFormatsAndIndices( + ParsableNalUnitBitArray data, int maxLayers) { + int numRepFormats = data.readUnsignedExpGolombCodedInt() + 1; + ImmutableList.Builder repFormats = + ImmutableList.builderWithExpectedSize(numRepFormats); + int[] repFormatIndices = new int[maxLayers]; + for (int i = 0; i < numRepFormats; i++) { + // rep_format() + repFormats.add(parseH265RepFormat(data)); + } + if (numRepFormats > 1 && data.readBit()) { // rep_format_idx_present_flag + int bitLen = log2(numRepFormats, RoundingMode.CEILING); + // Here, vps_base_layer_internal_flag == true as we only support the case where the base layer + // is included within the L-HEVC bitstream; hence the index i starts from 1. + for (int i = 1; i < maxLayers; i++) { + repFormatIndices[i] = data.readBits(bitLen); // vps_rep_format_idx[i] + } + } else { + for (int i = 1; i < maxLayers; i++) { + repFormatIndices[i] = min(i, numRepFormats - 1); + } + } + return new H265RepFormatsAndIndices(repFormats.build(), repFormatIndices); + } + + /** + * Parses a H.265 rep_format() using the syntax defined in ITU-T Recommendation H.265 (2019) + * subsection F.7.3.2.1.2. This function updates the ParsableNalUnitBitArray data with new byte + * and bit offsets that point to the end of the parsing of the rep_format(). + */ + private static H265RepFormat parseH265RepFormat(ParsableNalUnitBitArray data) { + int frameWidth = data.readBits(16); // pic_width_vps_in_luma_samples + int frameHeight = data.readBits(16); // pic_height_vps_in_luma_samples + int chromaFormatIdc = 0; + int bitDepthLumaMinus8 = 0; + int bitDepthChromaMinus8 = 0; + if (data.readBit()) { // chroma_and_bit_depth_vps_present_flag + chromaFormatIdc = data.readBits(2); // chroma_format_vps_idc + if (chromaFormatIdc == 3) { + data.skipBit(); // separate_colour_plane_vps_flag + } + bitDepthLumaMinus8 = data.readBits(4); // bit_depth_vps_luma_minus8 + bitDepthChromaMinus8 = data.readBits(4); // bit_depth_vps_chroma_minus8 + } + if (data.readBit()) { // conformance_window_vps_flag + int confWinLeftOffset = data.readUnsignedExpGolombCodedInt(); // conf_win_vps_left_offset + int confWinRightOffset = data.readUnsignedExpGolombCodedInt(); // conf_win_vps_right_offset + int confWinTopOffset = data.readUnsignedExpGolombCodedInt(); // conf_win_vps_top_offset + int confWinBottomOffset = data.readUnsignedExpGolombCodedInt(); // conf_win_vps_bottom_offset + frameWidth = + applyConformanceWindowToWidth( + frameWidth, chromaFormatIdc, confWinLeftOffset, confWinRightOffset); + frameHeight = + applyConformanceWindowToHeight( + frameHeight, chromaFormatIdc, confWinTopOffset, confWinBottomOffset); + } + return new H265RepFormat( + chromaFormatIdc, bitDepthLumaMinus8, bitDepthChromaMinus8, frameWidth, frameHeight); + } + + /** + * Skips H.265 dpb_size() within vps_extension(). This function updates the + * ParsableNalUnitBitArray data with new byte and bit offsets that point to the end of the parsing + * of the dpb_size(). + */ + private static void skipH265DpbSize( + ParsableNalUnitBitArray data, + int numOutputLayerSets, + int[] maxSubLayersInLayerSet, + int[] numLayersInIdList, + boolean[][] necessaryLayerFlag) { + for (int i = 1; i < numOutputLayerSets; i++) { + boolean subLayerFlagInfoPresentFlag = data.readBit(); // sub_layer_flag_info_present_flag[i] + for (int j = 0; j < maxSubLayersInLayerSet[i]; j++) { + boolean subLayerDpbInfoPresentFlag; + if (j > 0 && subLayerFlagInfoPresentFlag) { + subLayerDpbInfoPresentFlag = data.readBit(); // sub_layer_dpb_info_present_flag[i][j] + } else { + subLayerDpbInfoPresentFlag = j == 0; + } + if (subLayerDpbInfoPresentFlag) { + for (int k = 0; k < numLayersInIdList[i]; k++) { + // Note that here we assume that vps_base_layer_internal_flag is always true. + if (necessaryLayerFlag[i][k]) { + data.readUnsignedExpGolombCodedInt(); // max_vps_dec_pic_buffering_minus1[i][k][j] + } + } + data.readUnsignedExpGolombCodedInt(); // max_vps_num_reorder_pics[i][j] + data.readUnsignedExpGolombCodedInt(); // max_vps_latency_increase_plus1[i][j] + } + } + } + } + + /** + * Skips up to (not including) vps_vui_present_flag starting right after dbp_size() within H.265 + * vps_extension(). This function updates the ParsableNalUnitBitArray data with new byte and bit + * offsets that point to vps_vui_present_flag. + */ + private static void skipToH265VuiPresentFlagAfterDpbSize( + ParsableNalUnitBitArray data, int maxLayers, boolean[][] directDependencyFlag) { + int directDepTypeLen = data.readUnsignedExpGolombCodedInt() + 2; // direct_dep_type_len_minus2 + if (data.readBit()) { // direct_dependency_all_layers_flag + data.skipBits(directDepTypeLen); // direct_dependency_all_layers_type + } else { + // Here, vps_base_layer_internal_flag == true as we only support the case where the base layer + // is included within the L-HEVC bitstream; hence the index i starts from 1 and j from 0. + for (int i = 1; i < maxLayers; i++) { + for (int j = 0; j < i; j++) { + if (directDependencyFlag[i][j]) { + data.skipBits(directDepTypeLen); // direct_dependency_type[i][j] + } + } + } + } + int nonVuiExtensionLen = data.readUnsignedExpGolombCodedInt(); // vps_non_vui_extension_length + for (int i = 1; i <= nonVuiExtensionLen; i++) { + data.skipBits(8); // vps_non_vui_extension_data_byte + } + } + + /** + * Parses H.265 video_signal_info()s and corresponding indices (vps_video_signal_info_idx[]) + * within vps_vui() that is within vps_extension(). This function updates the + * ParsableNalUnitBitArray data with new byte and bit offsets that point to the end of the parsing + * of the video_signal_info()s and indices. + */ + private static H265VideoSignalInfosAndIndices parseH265VideoSignalInfosAndIndices( + ParsableNalUnitBitArray data, int maxLayers, int numLayerSets, int[] maxSubLayersInLayerSet) { + boolean crossLayerIrapAlignedFlag = true; + if (!data.readBit()) { // cross_layer_pic_type_aligned_flag + crossLayerIrapAlignedFlag = data.readBit(); // cross_layer_irap_aligned_flag + } + if (crossLayerIrapAlignedFlag) { + data.skipBit(); // all_layers_idr_aligned_flag + } + + boolean bitRatePresentVpsFlag = data.readBit(); // bit_rate_present_vps_flag + boolean picRatePresentVpsFlag = data.readBit(); // pic_rate_present_vps_flag + if (bitRatePresentVpsFlag || picRatePresentVpsFlag) { + // Here, vps_base_layer_internal_flag == true as we only support the case where the base layer + // is included within the L-HEVC bitstream; hence the index i starts from 0. + for (int i = 0; i < numLayerSets; i++) { + for (int j = 0; j < maxSubLayersInLayerSet[i]; j++) { + boolean bitRatePresentFlag = false; + boolean picRatePresentFlag = false; + if (bitRatePresentVpsFlag) { + bitRatePresentFlag = data.readBit(); // bit_rate_present_flag[i][j] + } + if (picRatePresentVpsFlag) { + picRatePresentFlag = data.readBit(); // pic_rate_present_flag[i][j] + } + if (bitRatePresentFlag) { + data.skipBits(32); // avg_bit_rate[i][j], max_bit_rate[i][j] + } + if (picRatePresentFlag) { + data.skipBits(18); // constant_pic_rate_idc[i][j], avg_pic_rate[i][j] + } + } + } + } + int numVideoSignalInfos = maxLayers; + boolean videoSignalInfoIdxPresentFlag = data.readBit(); // video_signal_info_idx_present_flag + if (videoSignalInfoIdxPresentFlag) { + numVideoSignalInfos = data.readBits(4) + 1; // vps_num_video_signal_info_minus1 + } + ImmutableList.Builder videoSignalInfos = + ImmutableList.builderWithExpectedSize(numVideoSignalInfos); + int[] videoSignalInfoIdices = new int[maxLayers]; + for (int i = 0; i < numVideoSignalInfos; i++) { + // video_signal_info() + videoSignalInfos.add(parseH265VideoSignalInfo(data)); + } + if (videoSignalInfoIdxPresentFlag && numVideoSignalInfos > 1) { + // Here, vps_base_layer_internal_flag == true as we only support the case where the base layer + // is included within the L-HEVC bitstream; hence the index i starts from 0. + for (int i = 0; i < maxLayers; i++) { + videoSignalInfoIdices[i] = data.readBits(4); // vps_video_signal_info_idx[i] + } + } + return new H265VideoSignalInfosAndIndices(videoSignalInfos.build(), videoSignalInfoIdices); + } + + /** + * Parses a H.265 video_signal_info() using the syntax defined in ITU-T Recommendation H.265 + * (2019) subsection F.7.3.2.1.5. This function updates the ParsableNalUnitBitArray data with new + * byte and bit offsets that point to the end of parsing the video_signal_info(). + */ + private static H265VideoSignalInfo parseH265VideoSignalInfo(ParsableNalUnitBitArray data) { + data.skipBits(3); // video_vps_format + @C.ColorRange + int colorRange = + data.readBit() ? C.COLOR_RANGE_FULL : C.COLOR_RANGE_LIMITED; // video_full_range_vps_flag + @C.ColorSpace + int colorSpace = + ColorInfo.isoColorPrimariesToColorSpace(data.readBits(8)); // colour_primaries_vps + @C.ColorTransfer + int colorTransfer = + ColorInfo.isoTransferCharacteristicsToColorTransfer( + data.readBits(8)); // transfer_characteristics_vps + data.skipBits(8); // matrix_coeffs_vps + + return new H265VideoSignalInfo(colorSpace, colorRange, colorTransfer); + } + private static void skipScalingList(ParsableNalUnitBitArray bitArray, int size) { int lastScale = 8; int nextScale = 8; @@ -944,12 +2033,12 @@ public final class NalUnitUtil { } /** - * Skips any short term reference picture sets contained in a SPS. + * Skips any short term reference picture sets contained in a H.265 SPS. * *

Note: The st_ref_pic_set parsing in this method is simplified for the case where they're * contained in a SPS, and would need generalizing for use elsewhere. */ - private static void skipShortTermReferencePictureSets(ParsableNalUnitBitArray bitArray) { + private static void skipH265ShortTermReferencePictureSets(ParsableNalUnitBitArray bitArray) { int numShortTermRefPicSets = bitArray.readUnsignedExpGolombCodedInt(); // As this method applies in a SPS, each short term reference picture set only accesses data // from the previous one. This is because RefRpsIdx = stRpsIdx - (delta_idx_minus1 + 1), and diff --git a/libraries/container/src/main/java/androidx/media3/container/ParsableNalUnitBitArray.java b/libraries/container/src/main/java/androidx/media3/container/ParsableNalUnitBitArray.java index 79d3bf8734..1eed8ff4be 100644 --- a/libraries/container/src/main/java/androidx/media3/container/ParsableNalUnitBitArray.java +++ b/libraries/container/src/main/java/androidx/media3/container/ParsableNalUnitBitArray.java @@ -92,6 +92,13 @@ public final class ParsableNalUnitBitArray { assertValidOffset(); } + /** Skips bits until at a byte alignment; if already byte aligned, then simply returns. */ + public void byteAlign() { + if (bitOffset > 0) { + skipBits(8 - bitOffset); + } + } + /** * Returns whether it's possible to read {@code n} bits starting from the current offset. The * offset is not modified. diff --git a/libraries/container/src/test/java/androidx/media3/container/NalUnitUtilTest.java b/libraries/container/src/test/java/androidx/media3/container/NalUnitUtilTest.java index e80baedae0..c768fcbecc 100644 --- a/libraries/container/src/test/java/androidx/media3/container/NalUnitUtilTest.java +++ b/libraries/container/src/test/java/androidx/media3/container/NalUnitUtilTest.java @@ -20,6 +20,7 @@ import static com.google.common.truth.Truth.assertThat; import androidx.media3.common.util.Util; import androidx.test.ext.junit.runners.AndroidJUnit4; +import com.google.common.collect.ImmutableList; import java.nio.ByteBuffer; import java.util.Arrays; import org.junit.Test; @@ -37,6 +38,58 @@ public final class NalUnitUtilTest { 0x91, 0x00, 0x00, 0x7E, 0xA0); private static final int SPS_TEST_DATA_OFFSET = 3; + // Below are H.265 VPS and SPS samples obtained from the "24-bit big endian raw audio LPCM + // (MP4,H265,raw)" clip in the ExoPlayer sample. + private static final byte[] H265_VPS_TEST_DATA = + createByteArray( + 0x40, 0x01, 0x0C, 0x01, 0xFF, 0xFF, 0x22, 0x20, 0x00, 0x00, 0x03, 0x00, 0xB0, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x03, 0x00, 0x99, 0x2C, 0x09); + private static final byte[] H265_SPS_TEST_DATA = + createByteArray( + 0x42, 0x01, 0x01, 0x22, 0x20, 0x00, 0x00, 0x03, 0x00, 0xB0, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x03, 0x00, 0x99, 0xA0, 0x01, 0xE0, 0x20, 0x02, 0x1C, 0x4D, 0x94, 0xBB, 0xB4, 0xA3, 0x32, + 0xAA, 0xC0, 0x5A, 0x84, 0x89, 0x04, 0x8A, 0x00, 0x00, 0x07, 0xD2, 0x00, 0x00, 0xBB, 0x80, + 0xE4, 0x68, 0x7C, 0x9C, 0x00, 0x01, 0x2E, 0x1F, 0x80, 0x00, 0x21, 0xFD, 0x30, 0x00, 0x02, + 0x5C, 0x3F, 0x00, 0x00, 0x43, 0xFA, 0x62); + + // Below are MV-HEVC VPS and SPS samples obtained from the two sample clips in b/40937818. + private static final byte[] H265_VPS_TEST_DATA_2VIEWS = + createByteArray( + 0x40, 0x01, 0x0C, 0x11, 0xFF, 0xFF, 0x01, 0x60, 0x00, 0x00, 0x03, 0x00, 0xB0, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x03, 0x00, 0x78, 0x15, 0xC1, 0x5B, 0x00, 0x20, 0x00, 0x28, 0x24, 0xC1, + 0x97, 0x06, 0x02, 0x00, 0x00, 0x03, 0x00, 0xBF, 0x80, 0x00, 0x00, 0x03, 0x00, 0x00, 0x78, + 0x8D, 0x07, 0x80, 0x04, 0x40, 0xA0, 0x1E, 0x5C, 0x52, 0xBF, 0x48); + private static final byte[] H265_SPS_TEST_DATA_2VIEWS_VIEW_0 = + createByteArray( + 0x42, 0x01, 0x01, 0x01, 0x60, 0x00, 0x00, 0x03, 0x00, 0xB0, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x03, 0x00, 0x78, 0xA0, 0x03, 0xC0, 0x80, 0x11, 0x07, 0xCB, 0x88, 0x15, 0xEE, 0x45, 0x95, + 0x4D, 0x40, 0x40, 0x40, 0x40, 0x20); + private static final byte[] H265_SPS_TEST_DATA_2VIEWS_VIEW_1 = + createByteArray(0x42, 0x09, 0x0E, 0x82, 0x2E, 0x45, 0x8A, 0xA0, 0x05, 0x01); + + private static final byte[] H265_VPS_TEST_DATA_2VIEWS_HDR = + createByteArray( + 0x40, 0x01, 0x0C, 0x11, 0xFF, 0xFF, 0x02, 0x20, 0x00, 0x00, 0x03, 0x00, 0xB0, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x03, 0x00, 0x99, 0x98, 0xA3, 0x41, 0x5C, 0x00, 0x00, 0x0F, 0xA4, 0x00, + 0x03, 0xA9, 0x83, 0xFF, 0x99, 0x20, 0x00, 0x21, 0x16, 0x93, 0x93, 0x11, 0x00, 0x00, 0x03, + 0x00, 0x5E, 0xC4, 0x00, 0x00, 0x03, 0x00, 0x00, 0x4C, 0xC6, 0x87, 0x80, 0x04, 0x38, 0x52, + 0x24, 0x31, 0x8A, 0x3B, 0xA4, 0x80); + private static final byte[] H265_SPS_TEST_DATA_2VIEWS_HDR_VIEW_0 = + createByteArray( + 0x42, 0x01, 0x01, 0x02, 0x20, 0x00, 0x00, 0x03, 0x00, 0xB0, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x03, 0x00, 0x99, 0xA0, 0x01, 0xE0, 0x20, 0x02, 0x1C, 0x4D, 0x94, 0x62, 0x8D, 0x92, 0x42, + 0x97, 0x55, 0x58, 0x43, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF9, 0x18, 0x82, 0x8D, + 0x08, 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x91, 0x88, 0x28, 0xD0, 0x87, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF9, 0x2B, 0xC0, 0x41, 0x40, 0x00, 0x00, 0xFA, 0x40, + 0x00, 0x3A, 0x98, 0x3C, 0x24, 0x82, 0x4D, 0xC0, 0x00, 0x26, 0x25, 0xA0, 0x00, 0x13, 0x12, + 0xDF, 0xC4, 0xC7, 0x8F, 0x40); + private static final byte[] H265_SPS_TEST_DATA_2VIEWS_HDR_VIEW_1 = + createByteArray( + 0x42, 0x09, 0x0E, 0x85, 0x92, 0x42, 0x96, 0xAA, 0xAC, 0x21, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFC, 0x8C, 0x41, 0x46, 0x84, 0x3F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xC8, 0xC4, 0x14, 0x68, 0x43, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFC, 0x95, 0xA8, + 0x18); + @Test public void findNalUnit() { byte[] data = buildTestData(); @@ -143,6 +196,172 @@ public final class NalUnitUtilTest { assertThat(data.maxNumReorderFrames).isEqualTo(1); } + @Test + public void parseH265VpsAndSpsNalUnits() { + NalUnitUtil.H265VpsData vpsData = + NalUnitUtil.parseH265VpsNalUnit( + H265_VPS_TEST_DATA, /* nalOffset= */ 0, H265_VPS_TEST_DATA.length); + assertThat(vpsData.nalHeader.layerId).isEqualTo(0); + assertThat(vpsData.layerInfos).isEmpty(); + ImmutableList profileTierLevels = + vpsData.profileTierLevelsAndIndices.profileTierLevels; + assertThat(profileTierLevels).hasSize(1); + assertThat(profileTierLevels.get(0).generalProfileIdc).isEqualTo(2); + assertThat(profileTierLevels.get(0).generalProfileCompatibilityFlags).isEqualTo(4); + assertThat(profileTierLevels.get(0).generalLevelIdc).isEqualTo(153); + + NalUnitUtil.H265SpsData spsData = + NalUnitUtil.parseH265SpsNalUnit( + H265_SPS_TEST_DATA, /* nalOffset= */ 0, H265_SPS_TEST_DATA.length, vpsData); + assertThat(spsData.nalHeader.layerId).isEqualTo(0); + assertThat(spsData.profileTierLevel.generalProfileIdc).isEqualTo(2); + assertThat(spsData.profileTierLevel.generalProfileCompatibilityFlags).isEqualTo(4); + assertThat(spsData.profileTierLevel.generalLevelIdc).isEqualTo(153); + assertThat(spsData.chromaFormatIdc).isEqualTo(1); + assertThat(spsData.width).isEqualTo(3840); + assertThat(spsData.height).isEqualTo(2160); + assertThat(spsData.bitDepthLumaMinus8).isEqualTo(2); + assertThat(spsData.bitDepthChromaMinus8).isEqualTo(2); + assertThat(spsData.colorSpace).isEqualTo(6); + assertThat(spsData.colorRange).isEqualTo(2); + assertThat(spsData.colorTransfer).isEqualTo(7); + } + + @Test + public void parseH265VpsAndSpsNalUnits2Views() { + NalUnitUtil.H265VpsData vpsData = + NalUnitUtil.parseH265VpsNalUnit( + H265_VPS_TEST_DATA_2VIEWS, /* nalOffset= */ 0, H265_VPS_TEST_DATA_2VIEWS.length); + assertThat(vpsData.nalHeader.layerId).isEqualTo(0); + ImmutableList layerInfos = vpsData.layerInfos; + assertThat(layerInfos).hasSize(2); + assertThat(layerInfos.get(0).layerIdInVps).isEqualTo(0); + assertThat(layerInfos.get(0).viewId).isEqualTo(0); + assertThat(layerInfos.get(1).layerIdInVps).isEqualTo(1); + assertThat(layerInfos.get(1).viewId).isEqualTo(1); + ImmutableList profileTierLevels = + vpsData.profileTierLevelsAndIndices.profileTierLevels; + assertThat(profileTierLevels).hasSize(3); + assertThat(profileTierLevels.get(0).generalProfileIdc).isEqualTo(1); + assertThat(profileTierLevels.get(0).generalProfileCompatibilityFlags).isEqualTo(6); + assertThat(profileTierLevels.get(0).generalLevelIdc).isEqualTo(120); + assertThat(profileTierLevels.get(1).generalProfileIdc).isEqualTo(1); + assertThat(profileTierLevels.get(1).generalProfileCompatibilityFlags).isEqualTo(6); + assertThat(profileTierLevels.get(1).generalLevelIdc).isEqualTo(0); + assertThat(profileTierLevels.get(2).generalProfileIdc).isEqualTo(6); + assertThat(profileTierLevels.get(2).generalProfileCompatibilityFlags).isEqualTo(64); + assertThat(profileTierLevels.get(2).generalLevelIdc).isEqualTo(120); + ImmutableList repFormats = vpsData.repFormatsAndIndices.repFormats; + assertThat(repFormats).hasSize(1); + assertThat(repFormats.get(0).chromaFormatIdc).isEqualTo(1); + assertThat(repFormats.get(0).width).isEqualTo(1920); + assertThat(repFormats.get(0).height).isEqualTo(1080); + assertThat(repFormats.get(0).bitDepthLumaMinus8).isEqualTo(0); + assertThat(repFormats.get(0).bitDepthChromaMinus8).isEqualTo(0); + + NalUnitUtil.H265SpsData spsDataView0 = + NalUnitUtil.parseH265SpsNalUnit( + H265_SPS_TEST_DATA_2VIEWS_VIEW_0, + /* nalOffset= */ 0, + H265_SPS_TEST_DATA_2VIEWS_VIEW_0.length, + vpsData); + assertThat(spsDataView0.nalHeader.layerId).isEqualTo(0); + assertThat(spsDataView0.profileTierLevel.generalProfileIdc).isEqualTo(1); + assertThat(spsDataView0.profileTierLevel.generalProfileCompatibilityFlags).isEqualTo(6); + assertThat(spsDataView0.profileTierLevel.generalLevelIdc).isEqualTo(120); + assertThat(spsDataView0.chromaFormatIdc).isEqualTo(1); + assertThat(spsDataView0.width).isEqualTo(1920); + assertThat(spsDataView0.height).isEqualTo(1080); + assertThat(spsDataView0.bitDepthLumaMinus8).isEqualTo(0); + assertThat(spsDataView0.bitDepthChromaMinus8).isEqualTo(0); + assertThat(spsDataView0.colorSpace).isEqualTo(1); + assertThat(spsDataView0.colorRange).isEqualTo(2); + assertThat(spsDataView0.colorTransfer).isEqualTo(3); + + NalUnitUtil.H265SpsData spsDataView1 = + NalUnitUtil.parseH265SpsNalUnit( + H265_SPS_TEST_DATA_2VIEWS_VIEW_1, + /* nalOffset= */ 0, + H265_SPS_TEST_DATA_2VIEWS_VIEW_1.length, + vpsData); + assertThat(spsDataView1.nalHeader.layerId).isEqualTo(1); + assertThat(spsDataView1.profileTierLevel.generalProfileIdc).isEqualTo(6); + assertThat(spsDataView1.profileTierLevel.generalProfileCompatibilityFlags).isEqualTo(64); + assertThat(spsDataView1.profileTierLevel.generalLevelIdc).isEqualTo(120); + assertThat(spsDataView1.chromaFormatIdc).isEqualTo(1); + assertThat(spsDataView1.width).isEqualTo(1920); + assertThat(spsDataView1.height).isEqualTo(1080); + assertThat(spsDataView1.bitDepthLumaMinus8).isEqualTo(0); + assertThat(spsDataView1.bitDepthChromaMinus8).isEqualTo(0); + } + + @Test + public void parseH265VpsAndSpsNalUnits2ViewsHdr() { + NalUnitUtil.H265VpsData vpsData = + NalUnitUtil.parseH265VpsNalUnit( + H265_VPS_TEST_DATA_2VIEWS_HDR, + /* nalOffset= */ 0, + H265_VPS_TEST_DATA_2VIEWS_HDR.length); + assertThat(vpsData.nalHeader.layerId).isEqualTo(0); + ImmutableList layerInfos = vpsData.layerInfos; + assertThat(layerInfos).hasSize(2); + assertThat(layerInfos.get(0).layerIdInVps).isEqualTo(0); + assertThat(layerInfos.get(0).viewId).isEqualTo(0); + assertThat(layerInfos.get(1).layerIdInVps).isEqualTo(1); + assertThat(layerInfos.get(1).viewId).isEqualTo(1); + ImmutableList profileTierLevels = + vpsData.profileTierLevelsAndIndices.profileTierLevels; + assertThat(profileTierLevels).hasSize(3); + assertThat(profileTierLevels.get(0).generalProfileIdc).isEqualTo(2); + assertThat(profileTierLevels.get(0).generalProfileCompatibilityFlags).isEqualTo(4); + assertThat(profileTierLevels.get(0).generalLevelIdc).isEqualTo(153); + assertThat(profileTierLevels.get(1).generalProfileIdc).isEqualTo(2); + assertThat(profileTierLevels.get(1).generalProfileCompatibilityFlags).isEqualTo(4); + assertThat(profileTierLevels.get(1).generalLevelIdc).isEqualTo(153); + assertThat(profileTierLevels.get(2).generalProfileIdc).isEqualTo(6); + assertThat(profileTierLevels.get(2).generalProfileCompatibilityFlags).isEqualTo(68); + assertThat(profileTierLevels.get(2).generalLevelIdc).isEqualTo(153); + ImmutableList repFormats = vpsData.repFormatsAndIndices.repFormats; + assertThat(repFormats).hasSize(1); + assertThat(repFormats.get(0).chromaFormatIdc).isEqualTo(1); + assertThat(repFormats.get(0).width).isEqualTo(3840); + assertThat(repFormats.get(0).height).isEqualTo(2160); + assertThat(repFormats.get(0).bitDepthLumaMinus8).isEqualTo(2); + assertThat(repFormats.get(0).bitDepthChromaMinus8).isEqualTo(2); + + NalUnitUtil.H265SpsData spsDataView0 = + NalUnitUtil.parseH265SpsNalUnit( + H265_SPS_TEST_DATA_2VIEWS_HDR_VIEW_0, + /* nalOffset= */ 0, + H265_SPS_TEST_DATA_2VIEWS_HDR_VIEW_0.length, + vpsData); + assertThat(spsDataView0.nalHeader.layerId).isEqualTo(0); + assertThat(spsDataView0.profileTierLevel.generalProfileIdc).isEqualTo(2); + assertThat(spsDataView0.profileTierLevel.generalProfileCompatibilityFlags).isEqualTo(4); + assertThat(spsDataView0.profileTierLevel.generalLevelIdc).isEqualTo(153); + assertThat(spsDataView0.chromaFormatIdc).isEqualTo(1); + assertThat(spsDataView0.width).isEqualTo(3840); + assertThat(spsDataView0.height).isEqualTo(2160); + assertThat(spsDataView0.bitDepthLumaMinus8).isEqualTo(2); + assertThat(spsDataView0.bitDepthChromaMinus8).isEqualTo(2); + + NalUnitUtil.H265SpsData spsDataView1 = + NalUnitUtil.parseH265SpsNalUnit( + H265_SPS_TEST_DATA_2VIEWS_HDR_VIEW_1, + /* nalOffset= */ 0, + H265_SPS_TEST_DATA_2VIEWS_HDR_VIEW_1.length, + vpsData); + assertThat(spsDataView1.nalHeader.layerId).isEqualTo(1); + assertThat(spsDataView1.profileTierLevel.generalProfileIdc).isEqualTo(6); + assertThat(spsDataView1.profileTierLevel.generalProfileCompatibilityFlags).isEqualTo(68); + assertThat(spsDataView1.profileTierLevel.generalLevelIdc).isEqualTo(153); + assertThat(spsDataView1.chromaFormatIdc).isEqualTo(1); + assertThat(spsDataView1.width).isEqualTo(3840); + assertThat(spsDataView1.height).isEqualTo(2160); + assertThat(spsDataView1.bitDepthLumaMinus8).isEqualTo(2); + assertThat(spsDataView1.bitDepthChromaMinus8).isEqualTo(2); + } + @Test public void unescapeDoesNotModifyBuffersWithoutStartCodes() { assertUnescapeDoesNotModify(""); @@ -185,15 +404,18 @@ public final class NalUnitUtilTest { -128, 28, 120, 1, -57, 0, 56, -15 }; + NalUnitUtil.H265NalHeader nalHeader = + new NalUnitUtil.H265NalHeader(NalUnitUtil.H265_NAL_UNIT_TYPE_SPS, 0, 0); NalUnitUtil.H265SpsData spsData = - NalUnitUtil.parseH265SpsNalUnitPayload(spsNalUnitPayload, 0, spsNalUnitPayload.length); + NalUnitUtil.parseH265SpsNalUnitPayload( + spsNalUnitPayload, 0, spsNalUnitPayload.length, nalHeader, null); - assertThat(spsData.constraintBytes).isEqualTo(new int[] {144, 0, 0, 0, 0, 0}); - assertThat(spsData.generalLevelIdc).isEqualTo(150); - assertThat(spsData.generalProfileCompatibilityFlags).isEqualTo(4); - assertThat(spsData.generalProfileIdc).isEqualTo(2); - assertThat(spsData.generalProfileSpace).isEqualTo(0); - assertThat(spsData.generalTierFlag).isFalse(); + assertThat(spsData.profileTierLevel.constraintBytes).isEqualTo(new int[] {144, 0, 0, 0, 0, 0}); + assertThat(spsData.profileTierLevel.generalLevelIdc).isEqualTo(150); + assertThat(spsData.profileTierLevel.generalProfileCompatibilityFlags).isEqualTo(4); + assertThat(spsData.profileTierLevel.generalProfileIdc).isEqualTo(2); + assertThat(spsData.profileTierLevel.generalProfileSpace).isEqualTo(0); + assertThat(spsData.profileTierLevel.generalTierFlag).isFalse(); assertThat(spsData.height).isEqualTo(2160); assertThat(spsData.pixelWidthHeightRatio).isEqualTo(1); assertThat(spsData.seqParameterSetId).isEqualTo(0); @@ -214,15 +436,18 @@ public final class NalUnitUtilTest { 8 }; + NalUnitUtil.H265NalHeader nalHeader = + new NalUnitUtil.H265NalHeader(NalUnitUtil.H265_NAL_UNIT_TYPE_SPS, 0, 0); NalUnitUtil.H265SpsData spsData = - NalUnitUtil.parseH265SpsNalUnitPayload(spsNalUnitPayload, 0, spsNalUnitPayload.length); + NalUnitUtil.parseH265SpsNalUnitPayload( + spsNalUnitPayload, 0, spsNalUnitPayload.length, nalHeader, null); - assertThat(spsData.constraintBytes).isEqualTo(new int[] {0, 0, 0, 0, 0, 0}); - assertThat(spsData.generalLevelIdc).isEqualTo(150); - assertThat(spsData.generalProfileCompatibilityFlags).isEqualTo(6); - assertThat(spsData.generalProfileIdc).isEqualTo(2); - assertThat(spsData.generalProfileSpace).isEqualTo(0); - assertThat(spsData.generalTierFlag).isFalse(); + assertThat(spsData.profileTierLevel.constraintBytes).isEqualTo(new int[] {0, 0, 0, 0, 0, 0}); + assertThat(spsData.profileTierLevel.generalLevelIdc).isEqualTo(150); + assertThat(spsData.profileTierLevel.generalProfileCompatibilityFlags).isEqualTo(6); + assertThat(spsData.profileTierLevel.generalProfileIdc).isEqualTo(2); + assertThat(spsData.profileTierLevel.generalProfileSpace).isEqualTo(0); + assertThat(spsData.profileTierLevel.generalTierFlag).isFalse(); assertThat(spsData.width).isEqualTo(1080); assertThat(spsData.height).isEqualTo(1920); assertThat(spsData.pixelWidthHeightRatio).isEqualTo(1); diff --git a/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/RtspMediaTrack.java b/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/RtspMediaTrack.java index 67536efed4..345ae420ec 100644 --- a/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/RtspMediaTrack.java +++ b/libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/RtspMediaTrack.java @@ -467,7 +467,10 @@ import com.google.common.collect.ImmutableMap; byte[] spsNalDataWithStartCode = initializationData.get(1); NalUnitUtil.H265SpsData spsData = NalUnitUtil.parseH265SpsNalUnit( - spsNalDataWithStartCode, NAL_START_CODE.length, spsNalDataWithStartCode.length); + spsNalDataWithStartCode, + NAL_START_CODE.length, + spsNalDataWithStartCode.length, + /* vpsData= */ null); formatBuilder.setPixelWidthHeightRatio(spsData.pixelWidthHeightRatio); formatBuilder.setHeight(spsData.height).setWidth(spsData.width); formatBuilder.setColorInfo( @@ -479,14 +482,16 @@ import com.google.common.collect.ImmutableMap; .setChromaBitdepth(spsData.bitDepthChromaMinus8 + 8) .build()); - formatBuilder.setCodecs( - CodecSpecificDataUtil.buildHevcCodecString( - spsData.generalProfileSpace, - spsData.generalTierFlag, - spsData.generalProfileIdc, - spsData.generalProfileCompatibilityFlags, - spsData.constraintBytes, - spsData.generalLevelIdc)); + if (spsData.profileTierLevel != null) { + formatBuilder.setCodecs( + CodecSpecificDataUtil.buildHevcCodecString( + spsData.profileTierLevel.generalProfileSpace, + spsData.profileTierLevel.generalTierFlag, + spsData.profileTierLevel.generalProfileIdc, + spsData.profileTierLevel.generalProfileCompatibilityFlags, + spsData.profileTierLevel.constraintBytes, + spsData.profileTierLevel.generalLevelIdc)); + } } /** diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/HevcConfig.java b/libraries/extractor/src/main/java/androidx/media3/extractor/HevcConfig.java index 29317967e6..d7cb7816a4 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/HevcConfig.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/HevcConfig.java @@ -89,7 +89,7 @@ public final class HevcConfig { if (nalUnitType == SPS_NAL_UNIT_TYPE && j == 0) { NalUnitUtil.H265SpsData spsData = NalUnitUtil.parseH265SpsNalUnit( - buffer, bufferPosition, bufferPosition + nalUnitLength); + buffer, bufferPosition, bufferPosition + nalUnitLength, null); width = spsData.width; height = spsData.height; bitdepthLuma = spsData.bitDepthLumaMinus8 + 8; @@ -100,14 +100,16 @@ public final class HevcConfig { pixelWidthHeightRatio = spsData.pixelWidthHeightRatio; maxNumReorderPics = spsData.maxNumReorderPics; - codecs = - CodecSpecificDataUtil.buildHevcCodecString( - spsData.generalProfileSpace, - spsData.generalTierFlag, - spsData.generalProfileIdc, - spsData.generalProfileCompatibilityFlags, - spsData.constraintBytes, - spsData.generalLevelIdc); + if (spsData.profileTierLevel != null) { + codecs = + CodecSpecificDataUtil.buildHevcCodecString( + spsData.profileTierLevel.generalProfileSpace, + spsData.profileTierLevel.generalTierFlag, + spsData.profileTierLevel.generalProfileIdc, + spsData.profileTierLevel.generalProfileCompatibilityFlags, + spsData.profileTierLevel.constraintBytes, + spsData.profileTierLevel.generalLevelIdc); + } } bufferPosition += nalUnitLength; data.skipBytes(nalUnitLength); diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/ts/H265Reader.java b/libraries/extractor/src/main/java/androidx/media3/extractor/ts/H265Reader.java index 51b4a007bf..cdfa0b9c68 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/ts/H265Reader.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/ts/H265Reader.java @@ -246,17 +246,20 @@ public final class H265Reader implements ElementaryStreamReader { // Skip the 3-byte NAL unit start code synthesised by the NalUnitTargetBuffer constructor. NalUnitUtil.H265SpsData spsData = - NalUnitUtil.parseH265SpsNalUnit(sps.nalData, /* nalOffset= */ 3, sps.nalLength); - - String codecs = - CodecSpecificDataUtil.buildHevcCodecString( - spsData.generalProfileSpace, - spsData.generalTierFlag, - spsData.generalProfileIdc, - spsData.generalProfileCompatibilityFlags, - spsData.constraintBytes, - spsData.generalLevelIdc); + NalUnitUtil.parseH265SpsNalUnit( + sps.nalData, /* nalOffset= */ 3, sps.nalLength, /* vpsData= */ null); + @Nullable String codecs = null; + if (spsData.profileTierLevel != null) { + codecs = + CodecSpecificDataUtil.buildHevcCodecString( + spsData.profileTierLevel.generalProfileSpace, + spsData.profileTierLevel.generalTierFlag, + spsData.profileTierLevel.generalProfileIdc, + spsData.profileTierLevel.generalProfileCompatibilityFlags, + spsData.profileTierLevel.constraintBytes, + spsData.profileTierLevel.generalLevelIdc); + } return new Format.Builder() .setId(formatId) .setSampleMimeType(MimeTypes.VIDEO_H265) diff --git a/libraries/muxer/src/main/java/androidx/media3/muxer/Boxes.java b/libraries/muxer/src/main/java/androidx/media3/muxer/Boxes.java index 6db416fb23..de96d83fff 100644 --- a/libraries/muxer/src/main/java/androidx/media3/muxer/Boxes.java +++ b/libraries/muxer/src/main/java/androidx/media3/muxer/Boxes.java @@ -1202,7 +1202,7 @@ import java.util.List; NalUnitUtil.H265SpsData h265SpsData = NalUnitUtil.parseH265SpsNalUnit( - spsArray, /* nalOffset= */ 0, /* nalLimit= */ spsArray.length); + spsArray, /* nalOffset= */ 0, /* nalLimit= */ spsArray.length, /* vpsData= */ null); byte chromaFormat = (byte) (0xFC | h265SpsData.chromaFormatIdc); // First 6 bits reserved byte bitDepthLumaMinus8 =