Improve frame rate calculation by using media duration from mdhd box

- Added logic to parse media duration from the `mdhd` box for accurate frame rate calculation.
- Fallbacks to track duration from `tkhd` when `mdhd` contains invalid or missing data.
- Avoids incorrect frame rate calculations in MP4 files with an edit list (`elst`) box.
- Adds frame rate calculations for partially fragmented MP4 files.
- Verified accuracy with tools like `mediainfo` and `ffprobe`.

Issue: androidx/media#1531

**Note**: The slight difference in frame rate values in dump files that aren’t MP4s with an edit list or fragmented MP4s isn’t due to differences in `tkhd` and `mdhd` duration values (which should be identical for non-edited or non-fragmented files). Rather, it’s because they are calculated using different timescales. The `mvhd` box defines a global movie timescale, which is used for the track's `tkhd` duration. Meanwhile, each track’s `mdhd` box defines its own timescale specific to its content type, which we now use for more accurate frame rate calculation.

PiperOrigin-RevId: 676046744
This commit is contained in:
rohks 2024-09-18 10:39:48 -07:00 committed by Copybara-Service
parent 8799bf4bfe
commit ecb0024a0b
54 changed files with 98 additions and 54 deletions

View File

@ -40,6 +40,9 @@
* Fix preroll sample handling for non-keyframe media start positions when
processing edit lists in MP4 files
([#1659](https://github.com/google/ExoPlayer/issues/1659)).
* Improved frame rate calculation by using media duration from the `mdhd`
box in `Mp4Extractor` and `FragmentedMp4Extractor`
([#1531](https://github.com/androidx/media/issues/1531)).
* DataSource:
* Audio:
* Video:

View File

@ -203,6 +203,7 @@ public class DefaultSsChunkSource implements SsChunkSource {
streamElement.timescale,
C.TIME_UNSET,
manifest.durationUs,
/* mediaDurationUs= */ manifest.durationUs,
format,
Track.TRANSFORMATION_NONE,
trackEncryptionBoxes,

View File

@ -351,8 +351,7 @@ public final class BoxParser {
checkNotNull(mdia.getContainerBoxOfType(Mp4Box.TYPE_minf))
.getContainerBoxOfType(Mp4Box.TYPE_stbl));
Pair<Long, String> mdhdData =
parseMdhd(checkNotNull(mdia.getLeafBoxOfType(Mp4Box.TYPE_mdhd)).data);
MdhdData mdhdData = parseMdhd(checkNotNull(mdia.getLeafBoxOfType(Mp4Box.TYPE_mdhd)).data);
LeafBox stsd = stbl.getLeafBoxOfType(Mp4Box.TYPE_stsd);
if (stsd == null) {
throw ParserException.createForMalformedContainer(
@ -363,7 +362,7 @@ public final class BoxParser {
stsd.data,
tkhdData.id,
tkhdData.rotationDegrees,
mdhdData.second,
mdhdData.language,
drmInitData,
isQuickTime);
@Nullable long[] editListDurations = null;
@ -383,9 +382,10 @@ public final class BoxParser {
: new Track(
tkhdData.id,
trackType,
mdhdData.first,
mdhdData.timescale,
movieTimescale,
durationUs,
mdhdData.mediaDurationUs,
stsdData.format,
stsdData.requiredSampleTransformation,
stsdData.trackEncryptionBoxes,
@ -431,6 +431,12 @@ public final class BoxParser {
/* durationUs= */ 0);
}
if (track.type == C.TRACK_TYPE_VIDEO && track.mediaDurationUs > 0) {
float frameRate = sampleCount / (track.mediaDurationUs / 1000000f);
Format format = track.format.buildUpon().setFrameRate(frameRate).build();
track = track.copyWithFormat(format);
}
// Entries are byte offsets of chunks.
boolean chunkOffsetsAreLongs = false;
@Nullable LeafBox chunkOffsetsAtom = stblBox.getLeafBoxOfType(Mp4Box.TYPE_stco);
@ -927,23 +933,30 @@ public final class BoxParser {
* Parses an mdhd atom (defined in ISO/IEC 14496-12).
*
* @param mdhd The mdhd atom to decode.
* @return A pair consisting of the media timescale defined as the number of time units that pass
* in one second, and the language code.
* @return An {@link MdhdData} object containing the parsed data.
*/
private static Pair<Long, String> parseMdhd(ParsableByteArray mdhd) {
private static MdhdData parseMdhd(ParsableByteArray mdhd) {
mdhd.setPosition(Mp4Box.HEADER_SIZE);
int fullAtom = mdhd.readInt();
int version = parseFullBoxVersion(fullAtom);
mdhd.skipBytes(version == 0 ? 8 : 16);
long timescale = mdhd.readUnsignedInt();
mdhd.skipBytes(version == 0 ? 4 : 8);
long mediaDuration = version == 0 ? mdhd.readUnsignedInt() : mdhd.readUnsignedLongToLong();
long mediaDurationUs;
if (mediaDuration == 0) {
// 0 duration normally indicates that the file is fully fragmented (i.e. all of the media
// samples are in fragments). Treat as unknown.
mediaDurationUs = C.TIME_UNSET;
} else {
mediaDurationUs = Util.scaleLargeTimestamp(mediaDuration, C.MICROS_PER_SECOND, timescale);
}
int languageCode = mdhd.readUnsignedShort();
String language =
""
+ (char) (((languageCode >> 10) & 0x1F) + 0x60)
+ (char) (((languageCode >> 5) & 0x1F) + 0x60)
+ (char) ((languageCode & 0x1F) + 0x60);
return Pair.create(timescale, language);
return new MdhdData(timescale, mediaDurationUs, language);
}
/**
@ -2408,6 +2421,19 @@ public final class BoxParser {
}
}
/** Data parsed from mdhd box. */
private static final class MdhdData {
private final long timescale;
private final long mediaDurationUs;
private final String language;
public MdhdData(long timescale, long mediaDurationUs, String language) {
this.timescale = timescale;
this.mediaDurationUs = mediaDurationUs;
this.language = language;
}
}
/** Data parsed from vexu box. */
/* package */ static final class VexuData {
@Nullable private final EyesData eyesData;

View File

@ -738,7 +738,9 @@ public final class Mp4Extractor implements Extractor, SeekMap {
roleFlags |=
firstVideoTrackIndex == C.INDEX_UNSET ? C.ROLE_FLAG_MAIN : C.ROLE_FLAG_ALTERNATE;
}
if (trackDurationUs > 0 && trackSampleTable.sampleCount > 0) {
if (track.format.frameRate == Format.NO_VALUE
&& trackDurationUs > 0
&& trackSampleTable.sampleCount > 0) {
float frameRate = trackSampleTable.sampleCount / (trackDurationUs / 1000000f);
formatBuilder.setFrameRate(frameRate);
}

View File

@ -64,6 +64,9 @@ public final class Track {
/** The duration of the track in microseconds, or {@link C#TIME_UNSET} if unknown. */
public final long durationUs;
/** The duration of the media in microseconds, or {@link C#TIME_UNSET} if unknown. */
public final long mediaDurationUs;
/** The format. */
public final Format format;
@ -93,6 +96,7 @@ public final class Track {
long timescale,
long movieTimescale,
long durationUs,
long mediaDurationUs,
Format format,
@Transformation int sampleTransformation,
@Nullable TrackEncryptionBox[] sampleDescriptionEncryptionBoxes,
@ -104,6 +108,7 @@ public final class Track {
this.timescale = timescale;
this.movieTimescale = movieTimescale;
this.durationUs = durationUs;
this.mediaDurationUs = mediaDurationUs;
this.format = format;
this.sampleTransformation = sampleTransformation;
this.sampleDescriptionEncryptionBoxes = sampleDescriptionEncryptionBoxes;
@ -133,6 +138,7 @@ public final class Track {
timescale,
movieTimescale,
durationUs,
mediaDurationUs,
format,
sampleTransformation,
sampleDescriptionEncryptionBoxes,
@ -148,6 +154,7 @@ public final class Track {
timescale,
movieTimescale,
durationUs,
mediaDurationUs,
format,
sampleTransformation,
sampleDescriptionEncryptionBoxes,

View File

@ -45,6 +45,7 @@ public class FragmentedMp4ExtractorNoSniffingTest {
/* timescale= */ 30_000,
/* movieTimescale= */ 1000,
/* durationUs= */ C.TIME_UNSET,
/* mediaDurationUs= */ C.TIME_UNSET,
new Format.Builder().setSampleMimeType(MimeTypes.VIDEO_H264).build(),
/* sampleTransformation= */ Track.TRANSFORMATION_NONE,
/* sampleDescriptionEncryptionBoxes= */ null,

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1280
height = 720
frameRate = 13.31
frameRate = 13.32
colorInfo:
colorSpace = 2
colorRange = 1

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1280
height = 720
frameRate = 13.31
frameRate = 13.32
colorInfo:
colorSpace = 2
colorRange = 1

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1280
height = 720
frameRate = 13.31
frameRate = 13.32
colorInfo:
colorSpace = 2
colorRange = 1

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1280
height = 720
frameRate = 13.31
frameRate = 13.32
colorInfo:
colorSpace = 2
colorRange = 1

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1280
height = 720
frameRate = 13.31
frameRate = 13.32
colorInfo:
colorSpace = 2
colorRange = 1

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1280
height = 720
frameRate = 13.31
frameRate = 13.32
colorInfo:
colorSpace = 2
colorRange = 1

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1280
height = 720
frameRate = 13.31
frameRate = 13.32
colorInfo:
colorSpace = 2
colorRange = 1

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1280
height = 720
frameRate = 13.31
frameRate = 13.32
colorInfo:
colorSpace = 2
colorRange = 1

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1280
height = 720
frameRate = 13.31
frameRate = 13.32
colorInfo:
colorSpace = 2
colorRange = 1

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1280
height = 720
frameRate = 13.31
frameRate = 13.32
colorInfo:
colorSpace = 2
colorRange = 1

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 32.57
frameRate = 28.03
rotationDegrees = 90
colorInfo:
colorSpace = 6

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 32.57
frameRate = 28.03
rotationDegrees = 90
colorInfo:
colorSpace = 6

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 32.57
frameRate = 28.03
rotationDegrees = 90
colorInfo:
colorSpace = 6

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 32.57
frameRate = 28.03
rotationDegrees = 90
colorInfo:
colorSpace = 6

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 32.57
frameRate = 28.03
rotationDegrees = 90
colorInfo:
colorSpace = 6

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 32.57
frameRate = 28.03
rotationDegrees = 90
colorInfo:
colorSpace = 6

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 32.57
frameRate = 28.03
rotationDegrees = 90
colorInfo:
colorSpace = 6

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 32.57
frameRate = 28.03
rotationDegrees = 90
colorInfo:
colorSpace = 6

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 32.57
frameRate = 28.03
rotationDegrees = 90
colorInfo:
colorSpace = 6

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 32.57
frameRate = 28.03
rotationDegrees = 90
colorInfo:
colorSpace = 6

View File

@ -13,6 +13,7 @@ track 0:
maxNumReorderSamples = 2
width = 1080
height = 720
frameRate = 29.97
colorInfo:
lumaBitdepth = 8
chromaBitdepth = 8

View File

@ -13,6 +13,7 @@ track 0:
maxNumReorderSamples = 2
width = 1080
height = 720
frameRate = 29.97
colorInfo:
lumaBitdepth = 8
chromaBitdepth = 8

View File

@ -13,6 +13,7 @@ track 0:
maxNumReorderSamples = 2
width = 1080
height = 720
frameRate = 29.97
colorInfo:
lumaBitdepth = 8
chromaBitdepth = 8

View File

@ -13,6 +13,7 @@ track 0:
maxNumReorderSamples = 2
width = 1080
height = 720
frameRate = 29.97
colorInfo:
lumaBitdepth = 8
chromaBitdepth = 8

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 30.17
frameRate = 30.00
rotationDegrees = 90
colorInfo:
colorSpace = 6

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 30.17
frameRate = 30.00
rotationDegrees = 90
colorInfo:
colorSpace = 6

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 30.17
frameRate = 30.00
rotationDegrees = 90
colorInfo:
colorSpace = 6

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 30.17
frameRate = 30.00
rotationDegrees = 90
colorInfo:
colorSpace = 6

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 30.17
frameRate = 30.00
rotationDegrees = 90
colorInfo:
colorSpace = 6

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 30.17
frameRate = 30.00
rotationDegrees = 90
colorInfo:
colorSpace = 6

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 30.17
frameRate = 30.00
rotationDegrees = 90
colorInfo:
colorSpace = 6

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 30.17
frameRate = 30.00
rotationDegrees = 90
colorInfo:
colorSpace = 6

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 30.17
frameRate = 30.00
rotationDegrees = 90
colorInfo:
colorSpace = 6

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1080
frameRate = 30.17
frameRate = 30.00
rotationDegrees = 90
colorInfo:
colorSpace = 6

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1440
frameRate = 29.18
frameRate = 28.81
rotationDegrees = 90
colorInfo:
colorRange = 1

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1440
frameRate = 29.18
frameRate = 28.81
rotationDegrees = 90
colorInfo:
colorRange = 1

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1440
frameRate = 29.18
frameRate = 28.81
rotationDegrees = 90
colorInfo:
colorRange = 1

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1440
frameRate = 29.18
frameRate = 28.81
rotationDegrees = 90
colorInfo:
colorRange = 1

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1440
frameRate = 29.18
frameRate = 28.81
rotationDegrees = 90
colorInfo:
colorRange = 1

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1440
frameRate = 29.18
frameRate = 28.81
rotationDegrees = 90
colorInfo:
colorRange = 1

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1440
frameRate = 29.18
frameRate = 28.81
rotationDegrees = 90
colorInfo:
colorRange = 1

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1440
frameRate = 29.18
frameRate = 28.81
rotationDegrees = 90
colorInfo:
colorRange = 1

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1440
frameRate = 29.18
frameRate = 28.81
rotationDegrees = 90
colorInfo:
colorRange = 1

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 2
width = 1920
height = 1440
frameRate = 29.18
frameRate = 28.81
rotationDegrees = 90
colorInfo:
colorRange = 1

View File

@ -65,7 +65,7 @@ track 2:
maxNumReorderSamples = 0
width = 1920
height = 1080
frameRate = 30.00
frameRate = 29.98
colorInfo:
colorSpace = 2
colorRange = 1

View File

@ -17,7 +17,7 @@ track 0:
maxNumReorderSamples = 0
width = 1920
height = 1080
frameRate = 30.00
frameRate = 29.98
rotationDegrees = 90
colorInfo:
colorSpace = 2

View File

@ -6,7 +6,7 @@ format video:
maxNumReorderSamples = 2
width = 704
height = 576
frameRate = 1.04
frameRate = 1.00
colorInfo:
lumaBitdepth = 8
chromaBitdepth = 8

View File

@ -6,7 +6,7 @@ format video:
maxNumReorderSamples = 2
width = 320
height = 240
frameRate = 29.52
frameRate = 30.47
colorInfo:
lumaBitdepth = 8
chromaBitdepth = 8