Prototype frame extraction based on analyzer mode

Make ByteBufferGlEffect public.
Build a speed test, and end to end test that verify
frames can be copied to CPU-accessible ByteBuffer

PiperOrigin-RevId: 670213343
This commit is contained in:
dancho 2024-09-02 07:32:00 -07:00 committed by Copybara-Service
parent 0843444a34
commit 207684ca66
6 changed files with 218 additions and 54 deletions

View File

@ -35,6 +35,7 @@ import android.opengl.GLUtils;
import android.opengl.Matrix;
import androidx.annotation.IntRange;
import androidx.annotation.Nullable;
import androidx.annotation.RequiresApi;
import androidx.media3.common.C;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
@ -897,6 +898,10 @@ public final class GlUtil {
*
* <p>This a non-blocking call which reads the data asynchronously.
*
* <p>Requires API 24: This method must call the version of {@link GLES30#glReadPixels(int, int,
* int, int, int, int, int)} which accepts an integer offset as the last parameter. This version
* of glReadPixels is not available in the Java {@link GLES30} wrapper until API 24.
*
* <p>HDR support is not yet implemented.
*
* @param readFboId The framebuffer that holds pixel data.
@ -904,6 +909,7 @@ public final class GlUtil {
* @param height The image height.
* @param bufferId The pixel buffer object to read into.
*/
@RequiresApi(24)
public static void schedulePixelBufferRead(int readFboId, int width, int height, int bufferId)
throws GlException {
focusFramebufferUsingCurrentContext(readFboId, width, height);
@ -940,10 +946,13 @@ public final class GlUtil {
* <p>This call blocks until the pixel buffer data from the last {@link #schedulePixelBufferRead}
* call is available.
*
* <p>Requires API 24: see {@link #schedulePixelBufferRead}.
*
* @param bufferId The pixel buffer object.
* @param size The size of the pixel buffer object's data store to be mapped.
* @return The {@link ByteBuffer} that holds pixel data.
*/
@RequiresApi(24)
public static ByteBuffer mapPixelBufferObject(int bufferId, int size) throws GlException {
GLES20.glBindBuffer(GLES30.GL_PIXEL_PACK_BUFFER, bufferId);
checkGlError();
@ -971,8 +980,11 @@ public final class GlUtil {
* <p>When this method returns, the pixel buffer object {@code bufferId} can be reused by {@link
* #schedulePixelBufferRead}.
*
* <p>Requires API 24: see {@link #schedulePixelBufferRead}.
*
* @param bufferId The pixel buffer object.
*/
@RequiresApi(24)
public static void unmapPixelBufferObject(int bufferId) throws GlException {
GLES30.glBindBuffer(GLES30.GL_PIXEL_PACK_BUFFER, bufferId);
GlUtil.checkGlError();

View File

@ -17,8 +17,10 @@ package androidx.media3.effect;
import static androidx.media3.common.util.Assertions.checkNotNull;
import static androidx.media3.common.util.Assertions.checkState;
import static androidx.media3.common.util.Util.SDK_INT;
import static com.google.common.util.concurrent.Futures.immediateFailedFuture;
import android.opengl.GLES20;
import androidx.media3.common.C;
import androidx.media3.common.GlObjectsProvider;
import androidx.media3.common.GlTextureInfo;
@ -110,6 +112,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
new GlRect(effectInputTexture.width, effectInputTexture.height));
TexturePixelBuffer texturePixelBuffer = new TexturePixelBuffer(effectInputTexture);
texturePixelBuffer.schedulePixelBufferRead(pixelBufferObjectProvider);
unmappedPixelBuffers.add(texturePixelBuffer);
return Util.transformFutureAsync(
texturePixelBuffer.imageSettableFuture,
@ -124,7 +127,7 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
throws VideoFrameProcessingException {
try {
TexturePixelBuffer oldestRunningFrame = checkNotNull(mappedPixelBuffers.poll());
oldestRunningFrame.unmapAndRecycle();
oldestRunningFrame.unmapAndRecycle(pixelBufferObjectProvider);
} catch (GlUtil.GlException e) {
throw new VideoFrameProcessingException(e);
}
@ -167,10 +170,10 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
private void unmapAndRecyclePixelBuffers() throws GlUtil.GlException {
TexturePixelBuffer texturePixelBuffer;
while ((texturePixelBuffer = unmappedPixelBuffers.poll()) != null) {
texturePixelBuffer.unmapAndRecycle();
texturePixelBuffer.unmapAndRecycle(pixelBufferObjectProvider);
}
while ((texturePixelBuffer = mappedPixelBuffers.poll()) != null) {
texturePixelBuffer.unmapAndRecycle();
texturePixelBuffer.unmapAndRecycle(pixelBufferObjectProvider);
}
}
@ -188,32 +191,61 @@ import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
* Manages the lifecycle of a {@link PixelBufferObjectInfo} which is mapped to a {@link
* GlTextureInfo}.
*/
private final class TexturePixelBuffer {
public final int width;
public final int height;
public final PixelBufferObjectInfo pixelBufferObjectInfo;
private static final class TexturePixelBuffer {
public final SettableFuture<ByteBufferGlEffect.Image> imageSettableFuture;
private final GlTextureInfo textureInfo;
private @MonotonicNonNull PixelBufferObjectInfo pixelBufferObjectInfo;
private boolean mapped;
public TexturePixelBuffer(GlTextureInfo textureInfo) throws GlUtil.GlException {
width = textureInfo.width;
height = textureInfo.height;
int pixelBufferSize = texturePixelBufferSize(textureInfo);
pixelBufferObjectInfo = pixelBufferObjectProvider.getPixelBufferObject(pixelBufferSize);
GlUtil.schedulePixelBufferRead(textureInfo.fboId, width, height, pixelBufferObjectInfo.id);
public TexturePixelBuffer(GlTextureInfo textureInfo) {
this.textureInfo = textureInfo;
imageSettableFuture = SettableFuture.create();
}
public void schedulePixelBufferRead(PixelBufferObjectProvider pixelBufferObjectProvider)
throws GlUtil.GlException {
int pixelBufferSize = texturePixelBufferSize(textureInfo);
pixelBufferObjectInfo = pixelBufferObjectProvider.getPixelBufferObject(pixelBufferSize);
if (SDK_INT >= 24) {
GlUtil.schedulePixelBufferRead(
textureInfo.fboId, textureInfo.width, textureInfo.height, pixelBufferObjectInfo.id);
}
}
public void map() throws GlUtil.GlException {
ByteBuffer byteBuffer =
GlUtil.mapPixelBufferObject(pixelBufferObjectInfo.id, pixelBufferObjectInfo.size);
imageSettableFuture.set(new ByteBufferGlEffect.Image(width, height, byteBuffer));
checkNotNull(pixelBufferObjectInfo);
ByteBuffer byteBuffer;
if (SDK_INT >= 24) {
byteBuffer =
GlUtil.mapPixelBufferObject(pixelBufferObjectInfo.id, pixelBufferObjectInfo.size);
} else {
// Asynchronous OpenGL reading isn't supported. Fall back to blocking glReadPixels.
int pixelBufferSize = texturePixelBufferSize(textureInfo);
byteBuffer = ByteBuffer.allocateDirect(pixelBufferSize);
GlUtil.focusFramebufferUsingCurrentContext(
textureInfo.fboId, textureInfo.width, textureInfo.height);
GlUtil.checkGlError();
GLES20.glReadPixels(
/* x= */ 0,
/* y= */ 0,
textureInfo.width,
textureInfo.height,
GLES20.GL_RGBA,
GLES20.GL_UNSIGNED_BYTE,
byteBuffer);
GlUtil.checkGlError();
}
imageSettableFuture.set(
new ByteBufferGlEffect.Image(textureInfo.width, textureInfo.height, byteBuffer));
mapped = true;
}
public void unmapAndRecycle() throws GlUtil.GlException {
if (mapped) {
public void unmapAndRecycle(PixelBufferObjectProvider pixelBufferObjectProvider)
throws GlUtil.GlException {
checkNotNull(pixelBufferObjectInfo);
if (mapped && SDK_INT >= 24) {
GlUtil.unmapPixelBufferObject(pixelBufferObjectInfo.id);
}
pixelBufferObjectProvider.recycle(pixelBufferObjectInfo);

View File

@ -39,7 +39,7 @@ import java.util.concurrent.Future;
* available such as another GPU context, FPGAs, or NPUs.
*/
@UnstableApi
/* package */ class ByteBufferGlEffect<T> implements GlEffect {
public class ByteBufferGlEffect<T> implements GlEffect {
private static final int DEFAULT_QUEUE_SIZE = 6;
private static final int DEFAULT_PENDING_PIXEL_BUFFER_QUEUE_SIZE = 1;

View File

@ -26,6 +26,7 @@ import static androidx.media3.common.util.Assertions.checkState;
import static androidx.media3.common.util.Util.SDK_INT;
import static androidx.media3.test.utils.TestUtil.retrieveTrackFormat;
import static com.google.common.truth.Truth.assertThat;
import static com.google.common.util.concurrent.Futures.immediateFuture;
import static org.junit.Assume.assumeFalse;
import android.content.Context;
@ -43,10 +44,13 @@ import androidx.media3.common.Format;
import androidx.media3.common.GlObjectsProvider;
import androidx.media3.common.GlTextureInfo;
import androidx.media3.common.MimeTypes;
import androidx.media3.common.util.GlRect;
import androidx.media3.common.util.GlUtil;
import androidx.media3.common.util.Log;
import androidx.media3.common.util.MediaFormatUtil;
import androidx.media3.common.util.Size;
import androidx.media3.common.util.Util;
import androidx.media3.effect.ByteBufferGlEffect;
import androidx.media3.effect.DefaultGlObjectsProvider;
import androidx.media3.effect.GlEffect;
import androidx.media3.effect.GlShaderProgram;
@ -57,6 +61,7 @@ import androidx.media3.test.utils.BitmapPixelTestUtil;
import androidx.media3.test.utils.VideoDecodingWrapper;
import com.google.common.base.Ascii;
import com.google.common.collect.ImmutableList;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.errorprone.annotations.CanIgnoreReturnValue;
import java.io.File;
import java.io.FileWriter;
@ -1088,6 +1093,47 @@ public final class AndroidTestUtil {
}
}
/**
* Implementation of {@link ByteBufferGlEffect.Processor} that counts how many frames are copied
* to CPU memory.
*/
public static final class FrameCountingByteBufferProcessor
implements ByteBufferGlEffect.Processor<Integer> {
public final AtomicInteger frameCount;
private int width;
private int height;
public FrameCountingByteBufferProcessor() {
frameCount = new AtomicInteger();
}
@Override
public Size configure(int inputWidth, int inputHeight) {
width = inputWidth;
height = inputHeight;
return new Size(width, height);
}
@Override
public GlRect getScaledRegion(long presentationTimeUs) {
return new GlRect(width, height);
}
@Override
public ListenableFuture<Integer> processImage(
ByteBufferGlEffect.Image image, long presentationTimeUs) {
return immediateFuture(frameCount.incrementAndGet());
}
@Override
public void finishProcessingAndBlend(
GlTextureInfo outputFrame, long presentationTimeUs, Integer result) {}
@Override
public void release() {}
}
/**
* Writes the summary of a test run to the application cache file.
*

View File

@ -71,6 +71,7 @@ import androidx.media3.common.audio.SpeedProvider;
import androidx.media3.common.util.GlUtil;
import androidx.media3.common.util.Util;
import androidx.media3.datasource.DataSourceBitmapLoader;
import androidx.media3.effect.ByteBufferGlEffect;
import androidx.media3.effect.Contrast;
import androidx.media3.effect.DefaultGlObjectsProvider;
import androidx.media3.effect.DefaultVideoFrameProcessor;
@ -88,6 +89,7 @@ import androidx.media3.test.utils.FakeExtractorOutput;
import androidx.media3.test.utils.FakeTrackOutput;
import androidx.media3.test.utils.TestSpeedProvider;
import androidx.media3.test.utils.TestUtil;
import androidx.media3.transformer.AndroidTestUtil.FrameCountingByteBufferProcessor;
import androidx.media3.transformer.AssetLoader.CompositionSettings;
import androidx.test.core.app.ApplicationProvider;
import androidx.test.ext.junit.runners.AndroidJUnit4;
@ -1641,6 +1643,35 @@ public class TransformerEndToEndTest {
assertThat(result.exportResult.fileSizeBytes).isEqualTo(C.LENGTH_UNSET);
}
@Test
public void extractFrames_usingAnalyzerMode_completesSuccessfully() throws Exception {
assumeFormatsSupported(
context,
testId,
/* inputFormat= */ MP4_ASSET_WITH_INCREASING_TIMESTAMPS_320W_240H_15S.videoFormat,
/* outputFormat= */ null);
Transformer transformer = ExperimentalAnalyzerModeFactory.buildAnalyzer(context);
FrameCountingByteBufferProcessor frameCountingProcessor =
new FrameCountingByteBufferProcessor();
// Analysis must be added to item effects because composition effects are not applied to single
// input video.
EditedMediaItem editedMediaItem =
new EditedMediaItem.Builder(
MediaItem.fromUri(
Uri.parse(MP4_ASSET_WITH_INCREASING_TIMESTAMPS_320W_240H_15S.uri)))
.setEffects(
new Effects(
ImmutableList.of(),
ImmutableList.of(new ByteBufferGlEffect<>(frameCountingProcessor))))
.build();
new TransformerAndroidTestRunner.Builder(context, transformer)
.build()
.run(testId, editedMediaItem);
assertThat(frameCountingProcessor.frameCount.get()).isEqualTo(932);
}
@Test
public void transcode_withOutputVideoMimeTypeAv1_completesSuccessfully() throws Exception {
assumeFormatsSupported(

View File

@ -32,8 +32,10 @@ import androidx.media3.common.MediaItem;
import androidx.media3.common.MimeTypes;
import androidx.media3.common.util.Clock;
import androidx.media3.common.util.Util;
import androidx.media3.effect.ByteBufferGlEffect;
import androidx.media3.effect.Presentation;
import androidx.media3.transformer.AndroidTestUtil;
import androidx.media3.transformer.AndroidTestUtil.ForceEncodeEncoderFactory;
import androidx.media3.transformer.AndroidTestUtil.FrameCountingByteBufferProcessor;
import androidx.media3.transformer.AssetLoader;
import androidx.media3.transformer.Codec;
import androidx.media3.transformer.DefaultAssetLoaderFactory;
@ -78,7 +80,7 @@ public class TranscodeSpeedTest {
Transformer transformer =
new Transformer.Builder(context)
.setVideoMimeType(MimeTypes.VIDEO_H264)
.setEncoderFactory(new AndroidTestUtil.ForceEncodeEncoderFactory(context))
.setEncoderFactory(new ForceEncodeEncoderFactory(context))
.build();
MediaItem mediaItem =
MediaItem.fromUri(Uri.parse(MP4_LONG_ASSET_WITH_INCREASING_TIMESTAMPS.uri))
@ -151,6 +153,44 @@ public class TranscodeSpeedTest {
assertThat(result.throughputFps).isAtLeast(isHighPerformance ? 400 : 20);
}
@Test
public void extractFrames_onHighPerformanceDevice_usingAnalyzerMode_completesWithHighThroughput()
throws Exception {
assumeTrue(
Ascii.toLowerCase(Util.MODEL).contains("pixel")
&& (Ascii.toLowerCase(Util.MODEL).contains("6")
|| Ascii.toLowerCase(Util.MODEL).contains("7")
|| Ascii.toLowerCase(Util.MODEL).contains("8")
|| Ascii.toLowerCase(Util.MODEL).contains("fold")
|| Ascii.toLowerCase(Util.MODEL).contains("tablet")));
// Pixel 6 is usually quick, unless it's on API 33. See b/358519058.
assumeFalse(Util.SDK_INT == 33 && Ascii.toLowerCase(Util.MODEL).contains("pixel 6"));
FrameCountingByteBufferProcessor frameCountingProcessor =
new FrameCountingByteBufferProcessor();
MediaItem mediaItem =
MediaItem.fromUri(Uri.parse(MP4_LONG_ASSET_WITH_INCREASING_TIMESTAMPS.uri))
.buildUpon()
.setClippingConfiguration(
new MediaItem.ClippingConfiguration.Builder().setEndPositionMs(45_000L).build())
.build();
EditedMediaItem editedMediaItem =
new EditedMediaItem.Builder(mediaItem)
.setRemoveAudio(true)
.setEffects(
new Effects(
/* audioProcessors= */ ImmutableList.of(),
ImmutableList.of(
Presentation.createForHeight(240),
new ByteBufferGlEffect<>(frameCountingProcessor))))
.build();
ExportTestResult result = analyzeVideoWithConfiguredOperatingRate(testId, editedMediaItem);
assertThat(frameCountingProcessor.frameCount.get()).isEqualTo(1350);
float throughputFps = 1000f * frameCountingProcessor.frameCount.get() / result.elapsedTimeMs;
assertThat(throughputFps).isAtLeast(350);
}
@Test
public void
analyzeVideo_onHighPerformanceDevice_withConfiguredOperatingRate_completesWithHighThroughput()
@ -165,13 +205,22 @@ public class TranscodeSpeedTest {
// Pixel 6 is usually quick, unless it's on API 33. See b/358519058.
assumeFalse(Util.SDK_INT == 33 && Ascii.toLowerCase(Util.MODEL).contains("pixel 6"));
AtomicInteger videoFramesSeen = new AtomicInteger(/* initialValue= */ 0);
MediaItem mediaItem =
MediaItem.fromUri(Uri.parse(MP4_LONG_ASSET_WITH_INCREASING_TIMESTAMPS.uri))
.buildUpon()
.setClippingConfiguration(
new MediaItem.ClippingConfiguration.Builder().setEndPositionMs(45_000L).build())
.build();
EditedMediaItem editedMediaItem =
new EditedMediaItem.Builder(mediaItem)
.setRemoveAudio(true)
.setEffects(
new Effects(
/* audioProcessors= */ ImmutableList.of(),
ImmutableList.of(createFrameCountingEffect(videoFramesSeen))))
.build();
ExportTestResult result =
analyzeVideoWithConfiguredOperatingRate(
testId,
Uri.parse(MP4_LONG_ASSET_WITH_INCREASING_TIMESTAMPS.uri),
/* durationMs= */ 45_000,
videoFramesSeen);
ExportTestResult result = analyzeVideoWithConfiguredOperatingRate(testId, editedMediaItem);
int expectedFrameCount = 1350;
checkState(videoFramesSeen.get() == expectedFrameCount);
@ -189,36 +238,11 @@ public class TranscodeSpeedTest {
/* inputFormat= */ MP4_LONG_ASSET_WITH_INCREASING_TIMESTAMPS.videoFormat,
/* outputFormat= */ null);
AtomicInteger videoFramesSeen = new AtomicInteger(/* initialValue= */ 0);
analyzeVideoWithConfiguredOperatingRate(
testId,
Uri.parse(MP4_LONG_ASSET_WITH_INCREASING_TIMESTAMPS.uri),
/* durationMs= */ 15_000,
videoFramesSeen);
int expectedFrameCount = 450;
assertThat(videoFramesSeen.get()).isEqualTo(expectedFrameCount);
}
private static ExportTestResult analyzeVideoWithConfiguredOperatingRate(
String testId, Uri mediaUri, long durationMs, AtomicInteger videoFramesSeen)
throws Exception {
Context context = ApplicationProvider.getApplicationContext();
videoFramesSeen.set(0);
Codec.DecoderFactory decoderFactory =
new DefaultDecoderFactory.Builder(context).setShouldConfigureOperatingRate(true).build();
AssetLoader.Factory assetLoaderFactory =
new DefaultAssetLoaderFactory(context, decoderFactory, Clock.DEFAULT);
Transformer transformer =
ExperimentalAnalyzerModeFactory.buildAnalyzer(context)
.buildUpon()
.setAssetLoaderFactory(assetLoaderFactory)
.build();
MediaItem mediaItem =
MediaItem.fromUri(mediaUri)
MediaItem.fromUri(Uri.parse(MP4_LONG_ASSET_WITH_INCREASING_TIMESTAMPS.uri))
.buildUpon()
.setClippingConfiguration(
new MediaItem.ClippingConfiguration.Builder().setEndPositionMs(durationMs).build())
new MediaItem.ClippingConfiguration.Builder().setEndPositionMs(15_000L).build())
.build();
EditedMediaItem editedMediaItem =
new EditedMediaItem.Builder(mediaItem)
@ -229,6 +253,25 @@ public class TranscodeSpeedTest {
ImmutableList.of(createFrameCountingEffect(videoFramesSeen))))
.build();
analyzeVideoWithConfiguredOperatingRate(testId, editedMediaItem);
int expectedFrameCount = 450;
assertThat(videoFramesSeen.get()).isEqualTo(expectedFrameCount);
}
private static ExportTestResult analyzeVideoWithConfiguredOperatingRate(
String testId, EditedMediaItem editedMediaItem) throws Exception {
Context context = ApplicationProvider.getApplicationContext();
Codec.DecoderFactory decoderFactory =
new DefaultDecoderFactory.Builder(context).setShouldConfigureOperatingRate(true).build();
AssetLoader.Factory assetLoaderFactory =
new DefaultAssetLoaderFactory(context, decoderFactory, Clock.DEFAULT);
Transformer transformer =
ExperimentalAnalyzerModeFactory.buildAnalyzer(context)
.buildUpon()
.setAssetLoaderFactory(assetLoaderFactory)
.build();
return new TransformerAndroidTestRunner.Builder(context, transformer)
.build()
.run(testId, editedMediaItem);