Add frame processor based on MediaPipe to demo

PiperOrigin-RevId: 446432695
This commit is contained in:
andrewlewis 2022-05-04 13:48:56 +01:00 committed by Ian Baker
parent 9ca0f7862a
commit e6518126ac
9 changed files with 333 additions and 5 deletions

View File

@ -0,0 +1,22 @@
# Build targets for a demo MediaPipe graph.
# See README.md for instructions on using MediaPipe in the demo.
load("//mediapipe/java/com/google/mediapipe:mediapipe_aar.bzl", "mediapipe_aar")
load(
"//mediapipe/framework/tool:mediapipe_graph.bzl",
"mediapipe_binary_graph",
)
mediapipe_aar(
name = "edge_detector_mediapipe_aar",
calculators = [
"//mediapipe/calculators/image:luminance_calculator",
"//mediapipe/calculators/image:sobel_edges_calculator",
],
)
mediapipe_binary_graph(
name = "edge_detector_binary_graph",
graph = "edge_detector_mediapipe_graph.pbtxt",
output_name = "edge_detector_mediapipe_graph.binarypb",
)

View File

@ -6,4 +6,60 @@ example by removing audio or video.
See the [demos README](../README.md) for instructions on how to build and run
this demo.
## MediaPipe frame processing demo
Building the demo app with [MediaPipe][] integration enabled requires some extra
manual steps.
1. Follow the
[instructions](https://google.github.io/mediapipe/getting_started/install.html)
to install MediaPipe.
1. Copy the Transformer demo's build configuration and MediaPipe graph text
protocol buffer under the MediaPipe source tree. This makes it easy to
[build an AAR][] with bazel by reusing MediaPipe's workspace.
```sh
cd "<path to MediaPipe checkout>"
MEDIAPIPE_ROOT="$(pwd)"
MEDIAPIPE_TRANSFORMER_ROOT="${MEDIAPIPE_ROOT}/mediapipe/java/com/google/mediapipe/transformer"
cd "<path to the transformer demo (containing this readme)>"
TRANSFORMER_DEMO_ROOT="$(pwd)"
mkdir -p "${MEDIAPIPE_TRANSFORMER_ROOT}"
mkdir -p "${TRANSFORMER_DEMO_ROOT}/libs"
cp ${TRANSFORMER_DEMO_ROOT}/BUILD.bazel ${MEDIAPIPE_TRANSFORMER_ROOT}/BUILD
cp ${TRANSFORMER_DEMO_ROOT}/src/withMediaPipe/assets/edge_detector_mediapipe_graph.pbtxt \
${MEDIAPIPE_TRANSFORMER_ROOT}
```
1. Build the AAR and the binary proto for the demo's MediaPipe graph, then copy
them to Transformer.
```sh
cd ${MEDIAPIPE_ROOT}
bazel build -c opt --strip=ALWAYS \
--host_crosstool_top=@bazel_tools//tools/cpp:toolchain \
--fat_apk_cpu=arm64-v8a,armeabi-v7a \
--legacy_whole_archive=0 \
--features=-legacy_whole_archive \
--copt=-fvisibility=hidden \
--copt=-ffunction-sections \
--copt=-fdata-sections \
--copt=-fstack-protector \
--copt=-Oz \
--copt=-fomit-frame-pointer \
--copt=-DABSL_MIN_LOG_LEVEL=2 \
--linkopt=-Wl,--gc-sections,--strip-all \
mediapipe/java/com/google/mediapipe/transformer:edge_detector_mediapipe_aar.aar
cp bazel-bin/mediapipe/java/com/google/mediapipe/transformer/edge_detector_mediapipe_aar.aar \
${TRANSFORMER_DEMO_ROOT}/libs
bazel build mediapipe/java/com/google/mediapipe/transformer:edge_detector_binary_graph
cp bazel-bin/mediapipe/java/com/google/mediapipe/transformer/edge_detector_mediapipe_graph.binarypb \
${TRANSFORMER_DEMO_ROOT}/src/withMediaPipe/assets
```
1. Select the `withMediaPipe` build variant in Android Studio, then build and
run the demo app and select a MediaPipe-based effect.
[Transformer]: https://exoplayer.dev/transforming-media.html
[MediaPipe]: https://google.github.io/mediapipe/
[build an AAR]: https://google.github.io/mediapipe/getting_started/android_archive_library.html

View File

@ -45,6 +45,17 @@ android {
// This demo app isn't indexed and doesn't have translations.
disable 'GoogleAppIndexingWarning','MissingTranslation'
}
flavorDimensions "mediaPipe"
productFlavors {
noMediaPipe {
dimension "mediaPipe"
}
withMediaPipe {
dimension "mediaPipe"
}
}
}
dependencies {
@ -59,4 +70,11 @@ dependencies {
implementation project(modulePrefix + 'lib-exoplayer-dash')
implementation project(modulePrefix + 'lib-transformer')
implementation project(modulePrefix + 'lib-ui')
// For MediaPipe and its dependencies:
withMediaPipeImplementation fileTree(dir: 'libs', include: ['*.aar'])
withMediaPipeImplementation 'com.google.flogger:flogger:latest.release'
withMediaPipeImplementation 'com.google.flogger:flogger-system-backend:latest.release'
withMediaPipeImplementation 'com.google.code.findbugs:jsr305:latest.release'
withMediaPipeImplementation 'com.google.protobuf:protobuf-javalite:3.19.1'
}

View File

@ -92,9 +92,14 @@ public final class ConfigurationActivity extends AppCompatActivity {
"MP4 with HDR (HDR10) H265 video (encoding may fail)",
};
private static final String[] DEMO_EFFECTS = {
"Dizzy crop", "Periodic vignette", "3D spin", "Overlay logo & timer", "Zoom in start"
"Dizzy crop",
"Edge detector (Media Pipe)",
"Periodic vignette",
"3D spin",
"Overlay logo & timer",
"Zoom in start",
};
private static final int PERIODIC_VIGNETTE_INDEX = 1;
private static final int PERIODIC_VIGNETTE_INDEX = 2;
private static final String SAME_AS_INPUT_OPTION = "same as input";
private static final float HALF_DIAGONAL = 1f / (float) Math.sqrt(2);

View File

@ -31,6 +31,7 @@ import android.view.ViewGroup;
import android.widget.TextView;
import android.widget.Toast;
import androidx.annotation.Nullable;
import androidx.annotation.StringRes;
import androidx.appcompat.app.AppCompatActivity;
import androidx.media3.common.C;
import androidx.media3.common.MediaItem;
@ -41,6 +42,7 @@ import androidx.media3.exoplayer.util.DebugTextViewHelper;
import androidx.media3.transformer.DefaultEncoderFactory;
import androidx.media3.transformer.EncoderSelector;
import androidx.media3.transformer.GlEffect;
import androidx.media3.transformer.GlFrameProcessor;
import androidx.media3.transformer.ProgressHolder;
import androidx.media3.transformer.TransformationException;
import androidx.media3.transformer.TransformationRequest;
@ -54,6 +56,7 @@ import com.google.common.base.Ticker;
import com.google.common.collect.ImmutableList;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
@ -249,6 +252,29 @@ public final class TransformerActivity extends AppCompatActivity {
effects.add(MatrixTransformationFactory.createDizzyCropEffect());
}
if (selectedEffects[1]) {
try {
Class<?> clazz =
Class.forName("androidx.media3.demo.transformer.MediaPipeFrameProcessor");
Constructor<?> constructor =
clazz.getConstructor(String.class, String.class, String.class);
effects.add(
() -> {
try {
return (GlFrameProcessor)
constructor.newInstance(
/* graphName= */ "edge_detector_mediapipe_graph.binarypb",
/* inputStreamName= */ "input_video",
/* outputStreamName= */ "output_video");
} catch (Exception e) {
showToast(R.string.no_media_pipe_error);
throw new RuntimeException("Failed to load MediaPipe processor", e);
}
});
} catch (Exception e) {
showToast(R.string.no_media_pipe_error);
}
}
if (selectedEffects[2]) {
effects.add(
() ->
new PeriodicVignetteFrameProcessor(
@ -260,13 +286,13 @@ public final class TransformerActivity extends AppCompatActivity {
ConfigurationActivity.PERIODIC_VIGNETTE_OUTER_RADIUS),
bundle.getFloat(ConfigurationActivity.PERIODIC_VIGNETTE_OUTER_RADIUS)));
}
if (selectedEffects[2]) {
if (selectedEffects[3]) {
effects.add(MatrixTransformationFactory.createSpin3dEffect());
}
if (selectedEffects[3]) {
if (selectedEffects[4]) {
effects.add(BitmapOverlayFrameProcessor::new);
}
if (selectedEffects[4]) {
if (selectedEffects[5]) {
effects.add(MatrixTransformationFactory.createZoomInTransition());
}
transformerBuilder.setVideoFrameEffects(effects.build());
@ -363,6 +389,10 @@ public final class TransformerActivity extends AppCompatActivity {
}
}
private void showToast(@StringRes int messageResource) {
Toast.makeText(getApplicationContext(), getString(messageResource), Toast.LENGTH_LONG).show();
}
private final class DemoDebugViewProvider implements Transformer.DebugViewProvider {
@Nullable

View File

@ -31,6 +31,7 @@
<string name="hdr_editing" translatable="false">[Experimental] HDR editing</string>
<string name="select_demo_effects" translatable="false">Add demo effects</string>
<string name="periodic_vignette_options" translatable="false">Periodic vignette options</string>
<string name="no_media_pipe_error" translatable="false">Failed to load MediaPipe processor. Check the README for instructions.</string>
<string name="transform" translatable="false">Transform</string>
<string name="debug_preview" translatable="false">Debug preview:</string>
<string name="debug_preview_not_available" translatable="false">No debug preview available.</string>

View File

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
~ Copyright 2022 The Android Open Source Project
~
~ Licensed under the Apache License, Version 2.0 (the "License");
~ you may not use this file except in compliance with the License.
~ You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<manifest package="androidx.media3.demo.transformer">
<uses-sdk />
</manifest>

View File

@ -0,0 +1,13 @@
# Demo MediaPipe graph that shows edges using a SobelEdgesCalculator.
input_stream: "input_video"
output_stream: "output_video"
node: {
calculator: "LuminanceCalculator"
input_stream: "input_video"
output_stream: "luma_video"
}
node: {
calculator: "SobelEdgesCalculator"
input_stream: "luma_video"
output_stream: "output_video"
}

View File

@ -0,0 +1,164 @@
/*
* Copyright 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package androidx.media3.demo.transformer;
import static androidx.media3.common.util.Assertions.checkState;
import static androidx.media3.common.util.Assertions.checkStateNotNull;
import android.content.Context;
import android.opengl.EGL14;
import android.opengl.GLES20;
import android.util.Size;
import androidx.media3.common.util.ConditionVariable;
import androidx.media3.common.util.GlProgram;
import androidx.media3.common.util.GlUtil;
import androidx.media3.common.util.LibraryLoader;
import androidx.media3.transformer.GlFrameProcessor;
import com.google.mediapipe.components.FrameProcessor;
import com.google.mediapipe.framework.AppTextureFrame;
import com.google.mediapipe.framework.TextureFrame;
import com.google.mediapipe.glutil.EglManager;
import java.io.IOException;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
/**
* Runs a MediaPipe graph on input frames. The implementation is currently limited to graphs that
* can immediately produce one output frame per input frame.
*/
/* package */ final class MediaPipeFrameProcessor implements GlFrameProcessor {
private static final LibraryLoader LOADER =
new LibraryLoader("mediapipe_jni") {
@Override
protected void loadLibrary(String name) {
System.loadLibrary(name);
}
};
private static final String COPY_VERTEX_SHADER_NAME = "vertex_shader_copy_es2.glsl";
private static final String COPY_FRAGMENT_SHADER_NAME = "shaders/fragment_shader_copy_es2.glsl";
private final String graphName;
private final String inputStreamName;
private final String outputStreamName;
private final ConditionVariable frameProcessorConditionVariable;
private @MonotonicNonNull FrameProcessor frameProcessor;
private int inputWidth;
private int inputHeight;
private int inputTexId;
private @MonotonicNonNull GlProgram glProgram;
private @MonotonicNonNull TextureFrame outputFrame;
private @MonotonicNonNull RuntimeException frameProcessorPendingError;
/**
* Creates a new frame processor that wraps a MediaPipe graph.
*
* @param graphName Name of a MediaPipe graph asset to load.
* @param inputStreamName Name of the input video stream in the graph.
* @param outputStreamName Name of the input video stream in the graph.
*/
public MediaPipeFrameProcessor(
String graphName, String inputStreamName, String outputStreamName) {
checkState(LOADER.isAvailable());
this.graphName = graphName;
this.inputStreamName = inputStreamName;
this.outputStreamName = outputStreamName;
frameProcessorConditionVariable = new ConditionVariable();
}
@Override
public void initialize(Context context, int inputTexId, int inputWidth, int inputHeight)
throws IOException {
this.inputTexId = inputTexId;
this.inputWidth = inputWidth;
this.inputHeight = inputHeight;
glProgram = new GlProgram(context, COPY_VERTEX_SHADER_NAME, COPY_FRAGMENT_SHADER_NAME);
EglManager eglManager = new EglManager(EGL14.eglGetCurrentContext());
frameProcessor =
new FrameProcessor(
context, eglManager.getNativeContext(), graphName, inputStreamName, outputStreamName);
// Unblock drawFrame when there is an output frame or an error.
frameProcessor.setConsumer(
frame -> {
outputFrame = frame;
frameProcessorConditionVariable.open();
});
frameProcessor.setAsynchronousErrorListener(
error -> {
frameProcessorPendingError = error;
frameProcessorConditionVariable.open();
});
}
@Override
public Size getOutputSize() {
return new Size(inputWidth, inputHeight);
}
@Override
public void drawFrame(long presentationTimeUs) {
frameProcessorConditionVariable.close();
// Pass the input frame to MediaPipe.
AppTextureFrame appTextureFrame = new AppTextureFrame(inputTexId, inputWidth, inputHeight);
appTextureFrame.setTimestamp(presentationTimeUs);
checkStateNotNull(frameProcessor).onNewFrame(appTextureFrame);
// Wait for output to be passed to the consumer.
try {
frameProcessorConditionVariable.block();
} catch (InterruptedException e) {
// Propagate the interrupted flag so the next blocking operation will throw.
// TODO(b/230469581): The next processor that runs will not have valid input due to returning
// early here. This could be fixed by checking for interruption in the outer loop that runs
// through the frame processors.
Thread.currentThread().interrupt();
return;
}
if (frameProcessorPendingError != null) {
throw new IllegalStateException(frameProcessorPendingError);
}
// Copy from MediaPipe's output texture to the current output.
try {
checkStateNotNull(glProgram).use();
glProgram.setSamplerTexIdUniform(
"uTexSampler", checkStateNotNull(outputFrame).getTextureName(), /* texUnitIndex= */ 0);
glProgram.setBufferAttribute(
"aFramePosition",
GlUtil.getNormalizedCoordinateBounds(),
GlUtil.RECTANGLE_VERTICES_COUNT);
glProgram.setBufferAttribute(
"aTexSamplingCoord",
GlUtil.getTextureCoordinateBounds(),
GlUtil.RECTANGLE_VERTICES_COUNT);
glProgram.bindAttributesAndUniforms();
GLES20.glDrawArrays(GLES20.GL_TRIANGLE_STRIP, /* first= */ 0, /* count= */ 4);
GlUtil.checkGlError();
} finally {
checkStateNotNull(outputFrame).release();
}
}
@Override
public void release() {
checkStateNotNull(frameProcessor).close();
}
}