Add support for reading location data in MP4 extractor

The geodata is stored in the "udta" box as per MediaMuxer reference
https://cs.android.com/android/platform/superproject/+/master:frameworks/av/media/libstagefright/MPEG4Writer.cpp;drc=master;l=5588

PiperOrigin-RevId: 515095127
This commit is contained in:
sheenachhabra 2023-03-08 19:33:09 +00:00 committed by tonihei
parent c0a8b944a6
commit 2f01f9c53b
19 changed files with 194 additions and 42 deletions

View File

@ -0,0 +1,93 @@
/*
* Copyright (C) 2023 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package androidx.media3.extractor.metadata.mp4;
import android.os.Parcel;
import android.os.Parcelable;
import androidx.annotation.Nullable;
import androidx.media3.common.Metadata;
import androidx.media3.common.util.UnstableApi;
import com.google.common.primitives.Floats;
/** Stores MP4 location data. */
@UnstableApi
public final class Mp4LocationData implements Metadata.Entry {
public final float latitude;
public final float longitude;
/** Creates an instance. */
public Mp4LocationData(float latitude, float longitude) {
this.latitude = latitude;
this.longitude = longitude;
}
private Mp4LocationData(Parcel in) {
latitude = in.readFloat();
longitude = in.readFloat();
}
@Override
public boolean equals(@Nullable Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {
return false;
}
Mp4LocationData other = (Mp4LocationData) obj;
return latitude == other.latitude && longitude == other.longitude;
}
@Override
public int hashCode() {
int result = 17;
result = 31 * result + Floats.hashCode(latitude);
result = 31 * result + Floats.hashCode(longitude);
return result;
}
@Override
public String toString() {
return "xyz: latitude=" + latitude + " longitude=" + longitude;
}
// Parcelable implementation.
@Override
public int describeContents() {
return 0;
}
@Override
public void writeToParcel(Parcel dest, int flags) {
dest.writeFloat(latitude);
dest.writeFloat(longitude);
}
public static final Parcelable.Creator<Mp4LocationData> CREATOR =
new Parcelable.Creator<Mp4LocationData>() {
@Override
public Mp4LocationData createFromParcel(Parcel in) {
return new Mp4LocationData(in);
}
@Override
public Mp4LocationData[] newArray(int size) {
return new Mp4LocationData[size];
}
};
}

View File

@ -347,6 +347,9 @@ import java.util.List;
@SuppressWarnings("ConstantCaseForConstants")
public static final int TYPE_meta = 0x6d657461;
@SuppressWarnings("ConstantCaseForConstants")
public static final int TYPE_xyz = 0xa978797a;
@SuppressWarnings("ConstantCaseForConstants")
public static final int TYPE_smta = 0x736d7461;

View File

@ -42,6 +42,7 @@ import androidx.media3.extractor.ExtractorUtil;
import androidx.media3.extractor.GaplessInfoHolder;
import androidx.media3.extractor.HevcConfig;
import androidx.media3.extractor.OpusUtil;
import androidx.media3.extractor.metadata.mp4.Mp4LocationData;
import androidx.media3.extractor.metadata.mp4.SmtaMetadataEntry;
import androidx.media3.extractor.mp4.Atom.LeafAtom;
import com.google.common.base.Function;
@ -58,6 +59,26 @@ import org.checkerframework.checker.nullness.compatqual.NullableType;
@SuppressWarnings("ConstantField")
/* package */ final class AtomParsers {
/** Stores metadata retrieved from the udta atom. */
public static final class UdtaInfo {
/** The metadata retrieved from the meta sub atom. */
@Nullable public final Metadata metaMetadata;
/** The metadata retrieved from the smta sub atom. */
@Nullable public final Metadata smtaMetadata;
/** The location metadata retrieved from the xyz sub atom. */
@Nullable public final Metadata xyzMetadata;
/** Creates an instance. */
public UdtaInfo(
@Nullable Metadata metaMetadata,
@Nullable Metadata smtaMetadata,
@Nullable Metadata xyzMetadata) {
this.metaMetadata = metaMetadata;
this.smtaMetadata = smtaMetadata;
this.xyzMetadata = xyzMetadata;
}
}
private static final String TAG = "AtomParsers";
@SuppressWarnings("ConstantCaseForConstants")
@ -157,15 +178,15 @@ import org.checkerframework.checker.nullness.compatqual.NullableType;
* Parses a udta atom.
*
* @param udtaAtom The udta (user data) atom to decode.
* @return A {@link Pair} containing the metadata from the meta child atom as first value (if
* any), and the metadata from the smta child atom as second value (if any).
* @return A {@link UdtaInfo} containing the metadata extracted from the meta, smta and xyz child
* atoms (if present).
*/
public static Pair<@NullableType Metadata, @NullableType Metadata> parseUdta(
Atom.LeafAtom udtaAtom) {
public static UdtaInfo parseUdta(Atom.LeafAtom udtaAtom) {
ParsableByteArray udtaData = udtaAtom.data;
udtaData.setPosition(Atom.HEADER_SIZE);
@Nullable Metadata metaMetadata = null;
@Nullable Metadata smtaMetadata = null;
@Nullable Metadata xyzMetadata = null;
while (udtaData.bytesLeft() >= Atom.HEADER_SIZE) {
int atomPosition = udtaData.getPosition();
int atomSize = udtaData.readInt();
@ -176,10 +197,12 @@ import org.checkerframework.checker.nullness.compatqual.NullableType;
} else if (atomType == Atom.TYPE_smta) {
udtaData.setPosition(atomPosition);
smtaMetadata = parseSmta(udtaData, atomPosition + atomSize);
} else if (atomType == Atom.TYPE_xyz) {
xyzMetadata = parseXyz(udtaData);
}
udtaData.setPosition(atomPosition + atomSize);
}
return Pair.create(metaMetadata, smtaMetadata);
return new UdtaInfo(metaMetadata, smtaMetadata, xyzMetadata);
}
/**
@ -760,6 +783,27 @@ import org.checkerframework.checker.nullness.compatqual.NullableType;
return entries.isEmpty() ? null : new Metadata(entries);
}
/** Parses the location metadata from the xyz atom. */
@Nullable
private static Metadata parseXyz(ParsableByteArray xyzBox) {
int length = xyzBox.readShort();
xyzBox.skipBytes(2); // language code.
String location = xyzBox.readString(length);
// The location string looks like "+35.1345-15.1020/".
int plusSignIndex = location.lastIndexOf('+');
int minusSignIndex = location.lastIndexOf('-');
int latitudeEndIndex = max(plusSignIndex, minusSignIndex);
try {
float latitude = Float.parseFloat(location.substring(0, latitudeEndIndex));
float longitude =
Float.parseFloat(location.substring(latitudeEndIndex, location.length() - 1));
return new Metadata(new Mp4LocationData(latitude, longitude));
} catch (IndexOutOfBoundsException | NumberFormatException exception) {
// Invalid input.
return null;
}
}
/**
* Parses metadata from a Samsung smta atom.
*

View File

@ -23,7 +23,6 @@ import static java.lang.Math.max;
import static java.lang.Math.min;
import static java.lang.annotation.ElementType.TYPE_USE;
import android.util.Pair;
import androidx.annotation.IntDef;
import androidx.annotation.Nullable;
import androidx.media3.common.C;
@ -58,7 +57,6 @@ import java.lang.annotation.Target;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.List;
import org.checkerframework.checker.nullness.compatqual.NullableType;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
/** Extracts data from the MP4 container format. */
@ -494,14 +492,15 @@ public final class Mp4Extractor implements Extractor, SeekMap {
// Process metadata.
@Nullable Metadata udtaMetaMetadata = null;
@Nullable Metadata smtaMetadata = null;
@Nullable Metadata xyzMetadata = null;
boolean isQuickTime = fileType == FILE_TYPE_QUICKTIME;
GaplessInfoHolder gaplessInfoHolder = new GaplessInfoHolder();
@Nullable Atom.LeafAtom udta = moov.getLeafAtomOfType(Atom.TYPE_udta);
if (udta != null) {
Pair<@NullableType Metadata, @NullableType Metadata> udtaMetadata =
AtomParsers.parseUdta(udta);
udtaMetaMetadata = udtaMetadata.first;
smtaMetadata = udtaMetadata.second;
AtomParsers.UdtaInfo udtaInfo = AtomParsers.parseUdta(udta);
udtaMetaMetadata = udtaInfo.metaMetadata;
smtaMetadata = udtaInfo.smtaMetadata;
xyzMetadata = udtaInfo.xyzMetadata;
if (udtaMetaMetadata != null) {
gaplessInfoHolder.setFromMetadata(udtaMetaMetadata);
}
@ -562,7 +561,8 @@ public final class Mp4Extractor implements Extractor, SeekMap {
mdtaMetadata,
formatBuilder,
smtaMetadata,
slowMotionMetadataEntries.isEmpty() ? null : new Metadata(slowMotionMetadataEntries));
slowMotionMetadataEntries.isEmpty() ? null : new Metadata(slowMotionMetadataEntries),
xyzMetadata);
mp4Track.trackOutput.format(formatBuilder.build());
if (track.type == C.TRACK_TYPE_VIDEO && firstVideoTrackIndex == C.INDEX_UNSET) {

View File

@ -1,10 +1,10 @@
seekMap:
isSeekable = true
duration = 1024000
getPosition(0) = [[timeUs=0, position=48]]
getPosition(1) = [[timeUs=0, position=48]]
getPosition(512000) = [[timeUs=0, position=48]]
getPosition(1024000) = [[timeUs=0, position=48]]
getPosition(0) = [[timeUs=0, position=2269]]
getPosition(1) = [[timeUs=0, position=2269]]
getPosition(512000) = [[timeUs=0, position=2269]]
getPosition(1024000) = [[timeUs=0, position=2269]]
numberOfTracks = 2
track 0:
total output bytes = 89876
@ -17,6 +17,7 @@ track 0:
width = 1080
height = 720
frameRate = 29.970028
metadata = entries=[xyz: latitude=40.68 longitude=-74.5]
initializationData:
data = length 29, hash 4746B5D9
data = length 10, hash 7A0D0F2B
@ -152,7 +153,7 @@ track 1:
channelCount = 1
sampleRate = 44100
language = und
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0]]
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0], xyz: latitude=40.68 longitude=-74.5]
initializationData:
data = length 2, hash 5F7
sample 0:

View File

@ -1,10 +1,10 @@
seekMap:
isSeekable = true
duration = 1024000
getPosition(0) = [[timeUs=0, position=48]]
getPosition(1) = [[timeUs=0, position=48]]
getPosition(512000) = [[timeUs=0, position=48]]
getPosition(1024000) = [[timeUs=0, position=48]]
getPosition(0) = [[timeUs=0, position=2269]]
getPosition(1) = [[timeUs=0, position=2269]]
getPosition(512000) = [[timeUs=0, position=2269]]
getPosition(1024000) = [[timeUs=0, position=2269]]
numberOfTracks = 2
track 0:
total output bytes = 89876
@ -17,6 +17,7 @@ track 0:
width = 1080
height = 720
frameRate = 29.970028
metadata = entries=[xyz: latitude=40.68 longitude=-74.5]
initializationData:
data = length 29, hash 4746B5D9
data = length 10, hash 7A0D0F2B
@ -152,7 +153,7 @@ track 1:
channelCount = 1
sampleRate = 44100
language = und
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0]]
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0], xyz: latitude=40.68 longitude=-74.5]
initializationData:
data = length 2, hash 5F7
sample 0:

View File

@ -1,10 +1,10 @@
seekMap:
isSeekable = true
duration = 1024000
getPosition(0) = [[timeUs=0, position=48]]
getPosition(1) = [[timeUs=0, position=48]]
getPosition(512000) = [[timeUs=0, position=48]]
getPosition(1024000) = [[timeUs=0, position=48]]
getPosition(0) = [[timeUs=0, position=2269]]
getPosition(1) = [[timeUs=0, position=2269]]
getPosition(512000) = [[timeUs=0, position=2269]]
getPosition(1024000) = [[timeUs=0, position=2269]]
numberOfTracks = 2
track 0:
total output bytes = 89876
@ -17,6 +17,7 @@ track 0:
width = 1080
height = 720
frameRate = 29.970028
metadata = entries=[xyz: latitude=40.68 longitude=-74.5]
initializationData:
data = length 29, hash 4746B5D9
data = length 10, hash 7A0D0F2B
@ -152,7 +153,7 @@ track 1:
channelCount = 1
sampleRate = 44100
language = und
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0]]
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0], xyz: latitude=40.68 longitude=-74.5]
initializationData:
data = length 2, hash 5F7
sample 0:

View File

@ -1,10 +1,10 @@
seekMap:
isSeekable = true
duration = 1024000
getPosition(0) = [[timeUs=0, position=48]]
getPosition(1) = [[timeUs=0, position=48]]
getPosition(512000) = [[timeUs=0, position=48]]
getPosition(1024000) = [[timeUs=0, position=48]]
getPosition(0) = [[timeUs=0, position=2269]]
getPosition(1) = [[timeUs=0, position=2269]]
getPosition(512000) = [[timeUs=0, position=2269]]
getPosition(1024000) = [[timeUs=0, position=2269]]
numberOfTracks = 2
track 0:
total output bytes = 89876
@ -17,6 +17,7 @@ track 0:
width = 1080
height = 720
frameRate = 29.970028
metadata = entries=[xyz: latitude=40.68 longitude=-74.5]
initializationData:
data = length 29, hash 4746B5D9
data = length 10, hash 7A0D0F2B
@ -152,7 +153,7 @@ track 1:
channelCount = 1
sampleRate = 44100
language = und
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0]]
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0], xyz: latitude=40.68 longitude=-74.5]
initializationData:
data = length 2, hash 5F7
sample 0:

View File

@ -1,10 +1,10 @@
seekMap:
isSeekable = true
duration = 1024000
getPosition(0) = [[timeUs=0, position=48]]
getPosition(1) = [[timeUs=0, position=48]]
getPosition(512000) = [[timeUs=0, position=48]]
getPosition(1024000) = [[timeUs=0, position=48]]
getPosition(0) = [[timeUs=0, position=2269]]
getPosition(1) = [[timeUs=0, position=2269]]
getPosition(512000) = [[timeUs=0, position=2269]]
getPosition(1024000) = [[timeUs=0, position=2269]]
numberOfTracks = 2
track 0:
total output bytes = 89876
@ -17,6 +17,7 @@ track 0:
width = 1080
height = 720
frameRate = 29.970028
metadata = entries=[xyz: latitude=40.68 longitude=-74.5]
initializationData:
data = length 29, hash 4746B5D9
data = length 10, hash 7A0D0F2B
@ -152,7 +153,7 @@ track 1:
channelCount = 1
sampleRate = 44100
language = und
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0]]
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0], xyz: latitude=40.68 longitude=-74.5]
initializationData:
data = length 2, hash 5F7
sample 0:

View File

@ -6,6 +6,7 @@ format 0:
width = 1080
height = 720
frameRate = 29.970028
metadata = entries=[xyz: latitude=40.68 longitude=-74.5]
initializationData:
data = length 29, hash 4746B5D9
data = length 10, hash 7A0D0F2B
@ -14,7 +15,7 @@ format 1:
channelCount = 1
sampleRate = 48000
pcmEncoding = 2
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0]]
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0], xyz: latitude=40.68 longitude=-74.5]
sample:
trackIndex = 1
dataHashCode = 1868041800

View File

@ -6,6 +6,7 @@ format 0:
width = 1080
height = 720
frameRate = 29.970028
metadata = entries=[xyz: latitude=40.68 longitude=-74.5]
initializationData:
data = length 29, hash 4746B5D9
data = length 10, hash 7A0D0F2B
@ -18,7 +19,7 @@ format 1:
channelCount = 1
sampleRate = 44100
language = und
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0]]
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0], xyz: latitude=40.68 longitude=-74.5]
initializationData:
data = length 2, hash 5F7
sample:

View File

@ -3,7 +3,7 @@ format 0:
channelCount = 1
sampleRate = 44100
pcmEncoding = 2
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0]]
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0], xyz: latitude=40.68 longitude=-74.5]
sample:
trackIndex = 0
dataHashCode = 915609509

View File

@ -6,6 +6,7 @@ format 0:
width = 1080
height = 720
frameRate = 29.970028
metadata = entries=[xyz: latitude=40.68 longitude=-74.5]
initializationData:
data = length 29, hash 4746B5D9
data = length 10, hash 7A0D0F2B
@ -18,7 +19,7 @@ format 1:
channelCount = 1
sampleRate = 44100
language = und
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0]]
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0], xyz: latitude=40.68 longitude=-74.5]
initializationData:
data = length 2, hash 5F7
sample:

View File

@ -6,6 +6,7 @@ format 0:
width = 1080
height = 720
frameRate = 29.970028
metadata = entries=[xyz: latitude=40.68 longitude=-74.5]
initializationData:
data = length 29, hash 4746B5D9
data = length 10, hash 7A0D0F2B

View File

@ -7,7 +7,7 @@ format 0:
channelCount = 1
sampleRate = 44100
language = und
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0]]
metadata = entries=[TSSE: description=null: values=[Lavf56.1.0], xyz: latitude=40.68 longitude=-74.5]
initializationData:
data = length 2, hash 5F7
sample:

View File

@ -6,6 +6,7 @@ format 0:
width = 1080
height = 720
frameRate = 29.970028
metadata = entries=[xyz: latitude=40.68 longitude=-74.5]
initializationData:
data = length 29, hash 4746B5D9
data = length 10, hash 7A0D0F2B

View File

@ -6,6 +6,7 @@ format 0:
width = 1080
height = 720
frameRate = 29.970028
metadata = entries=[xyz: latitude=40.68 longitude=-74.5]
initializationData:
data = length 29, hash 4746B5D9
data = length 10, hash 7A0D0F2B

View File

@ -6,6 +6,7 @@ format 0:
width = 1080
height = 720
frameRate = 29.970028
metadata = entries=[xyz: latitude=40.68 longitude=-74.5]
initializationData:
data = length 29, hash 4746B5D9
data = length 10, hash 7A0D0F2B