はじめに

このチュートリアルでは、OpenCV のディープラーニングモジュールを使用して Android デバイス上でディープラーニングネットワークを実行する方法を学ぶ。チュートリアルは Android Studio 2022.2.1 向けに書かれている。

要件

https://developer.android.com/studio から Android Studio をダウンロードしてインストールする。
https://github.com/opencv/opencv/releases から最新のビルド済み OpenCV for Android リリースを入手し、展開する(例えば opencv-4.X.Y-android-sdk.zip、最低でもバージョン4.9が必要)。
https://github.com/chuanqi305/MobileNet-SSD から MobileNet 物体検出モデルをダウンロードする。設定ファイル MobileNetSSD_deploy.prototxt とモデルの重み MobileNetSSD_deploy.caffemodel が必要である。

空の Android Studio プロジェクトを作成し OpenCV 依存関係を追加する

OpenCVによるAndroid開発チュートリアルを使用してプロジェクトを初期化し、OpenCV を追加する。

アプリを作成する

このサンプルはカメラから画像を取得し、それをディープネットワークに渡し、矩形・クラス識別子・[0, 1] の範囲の信頼度値の集合を受け取る。

まず、処理されたフレームを表示するために必要なウィジェットを追加する必要がある。app/src/main/res/layout/activity_main.xml を変更する:
<?xml version="1.0" encoding="utf-8"?>

<FrameLayout xmlns:android="http://schemas.android.com/apk/res/android"

xmlns:app="http://schemas.android.com/apk/res-auto"

xmlns:tools="http://schemas.android.com/tools"

android:layout_width="match_parent"

android:layout_height="match_parent"

tools:context="org.opencv.samples.opencv_mobilenet.MainActivity">

<org.opencv.android.JavaCameraView

android:id="@+id/CameraView"

android:layout_width="match_parent"

android:layout_height="match_parent"

android:visibility="visible" />

</FrameLayout>
/app/src/main/AndroidManifest.xml を変更して全画面モードを有効にし、正しい画面の向きを設定し、カメラの使用を許可する。
<?xml version="1.0" encoding="utf-8"?>

<manifest xmlns:android="http://schemas.android.com/apk/res/android">

<application

android:label="@string/app_name">

<activity

android:exported="true"

android:name=".MainActivity"

android:screenOrientation="landscape"> 

<intent-filter>

<action android:name="android.intent.action.MAIN" />

<category android:name="android.intent.category.LAUNCHER" />

</intent-filter>

</activity>

</application>



<uses-permission android:name="android.permission.CAMERA"/>

<uses-feature android:name="android.hardware.camera" android:required="false"/>

<uses-feature android:name="android.hardware.camera.autofocus" android:required="false"/>

<uses-feature android:name="android.hardware.camera.front" android:required="false"/>

<uses-feature android:name="android.hardware.camera.front.autofocus" android:required="false"/>

</manifest>
app/src/main/java/com/example/myapplication/MainActivity.java の内容を置き換え、必要であればカスタムパッケージ名を設定する:

package com.example.myapplication;

import android.content.Context;
import android.content.res.AssetManager;
import android.os.Bundle;
import android.util.Log;
import android.widget.Toast;
 
import org.opencv.android.CameraActivity;
import org.opencv.android.CameraBridgeViewBase;
import org.opencv.android.CameraBridgeViewBase.CvCameraViewFrame;
import org.opencv.android.CameraBridgeViewBase.CvCameraViewListener2;
import org.opencv.android.OpenCVLoader;
import org.opencv.core.Core;
import org.opencv.core.Mat;
import org.opencv.core.MatOfByte;
import org.opencv.core.Point;
import org.opencv.core.Scalar;
import org.opencv.core.Size;
import org.opencv.dnn.Net;
import org.opencv.dnn.Dnn;
import org.opencv.imgproc.Imgproc;
 
import java.io.InputStream;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
 
public class MainActivity extends CameraActivity implements CvCameraViewListener2 {
 
    @Override
 public void onResume() {
        super.onResume();
 if (mOpenCvCameraView != null)
            mOpenCvCameraView.enableView();
    }
 
    @Override
 protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
 
 if (OpenCVLoader.initLocal()) {
            Log.i(TAG, "OpenCV loaded successfully");
        } else {
            Log.e(TAG, "OpenCV initialization failed!");
            (Toast.makeText(this, "OpenCV initialization failed!", Toast.LENGTH_LONG)).show();
 return;
        }
 
        mModelBuffer = loadFileFromResource(R.raw.mobilenet_iter_73000);
        mConfigBuffer = loadFileFromResource(R.raw.deploy);
 if (mModelBuffer == null || mConfigBuffer == null) {
            Log.e(TAG, "Failed to load model from resources");
        } else
            Log.i(TAG, "Model files loaded successfully");
 
        net = Dnn.readNet("caffe", mModelBuffer, mConfigBuffer);
        Log.i(TAG, "Network loaded successfully");
 
        setContentView(R.layout.activity_main);
 
 // Set up camera listener.
        mOpenCvCameraView = (CameraBridgeViewBase)findViewById(R.id.CameraView);
        mOpenCvCameraView.setVisibility(CameraBridgeViewBase.VISIBLE);
        mOpenCvCameraView.setCvCameraViewListener(this);
    }
 
    @Override
 public void onPause()
    {
        super.onPause();
 if (mOpenCvCameraView != null)
            mOpenCvCameraView.disableView();
    }
 
    @Override
 protected List<? extends CameraBridgeViewBase> getCameraViewList() {
 return Collections.singletonList(mOpenCvCameraView);
    }
 
 public void onDestroy() {
        super.onDestroy();
 if (mOpenCvCameraView != null)
            mOpenCvCameraView.disableView();
 
        mModelBuffer.release();
        mConfigBuffer.release();
    }
 
 // Load a network.
 public void onCameraViewStarted(int width, int height) {
    }
 
 public Mat onCameraFrame(CvCameraViewFrame inputFrame) {
 final int IN_WIDTH = 300;
 final int IN_HEIGHT = 300;
 final float WH_RATIO = (float)IN_WIDTH / IN_HEIGHT;
 final double IN_SCALE_FACTOR = 0.007843;
 final double MEAN_VAL = 127.5;
 final double THRESHOLD = 0.2;
 
 // Get a new frame
        Log.d(TAG, "handle new frame!");
        Mat frame = inputFrame.rgba();
        Imgproc.cvtColor(frame, frame, Imgproc.COLOR_RGBA2RGB);
 
 // Forward image through network.
        Mat blob = Dnn.blobFromImage(frame, IN_SCALE_FACTOR,
 new Size(IN_WIDTH, IN_HEIGHT),
 new Scalar(MEAN_VAL, MEAN_VAL, MEAN_VAL), /*swapRB*/false, /*crop*/false);
        net.setInput(blob);
        Mat detections = net.forward();
 
 int cols = frame.cols();
 int rows = frame.rows();
 
        detections = detections.reshape(1, (int)detections.total() / 7);
 
 for (int i = 0; i < detections.rows(); ++i) {
 double confidence = detections.get(i, 2)[0];
 if (confidence > THRESHOLD) {
 int classId = (int)detections.get(i, 1)[0];
 
 int left   = (int)(detections.get(i, 3)[0] * cols);
 int top    = (int)(detections.get(i, 4)[0] * rows);
 int right  = (int)(detections.get(i, 5)[0] * cols);
 int bottom = (int)(detections.get(i, 6)[0] * rows);
 
 // Draw rectangle around detected object.
                Imgproc.rectangle(frame, new Point(left, top), new Point(right, bottom),
 new Scalar(0, 255, 0));
 String label = classNames[classId] + ": " + confidence;
 int[] baseLine = new int[1];
 Size labelSize = Imgproc.getTextSize(label, Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, 1, baseLine);
 
 // Draw background for label.
                Imgproc.rectangle(frame, new Point(left, top - labelSize.height),
 new Point(left + labelSize.width, top + baseLine[0]),
 new Scalar(255, 255, 255), Imgproc.FILLED);
 // Write class name and confidence.
                Imgproc.putText(frame, label, new Point(left, top),
                        Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, new Scalar(0, 0, 0));
            }
        }
 
 return frame;
    }
 
 public void onCameraViewStopped() {}
 
 private MatOfByte loadFileFromResource(int id) {
 byte[] buffer;
 try {
 // load cascade file from application resources
            InputStream is = getResources().openRawResource(id);
 
 int size = is.available();
            buffer = new byte[size];
 int bytesRead = is.read(buffer);
            is.close();
        } catch (IOException e) {
            e.printStackTrace();
            Log.e(TAG, "Failed to ONNX model from resources! Exception thrown: " + e);
            (Toast.makeText(this, "Failed to ONNX model from resources!", Toast.LENGTH_LONG)).show();
 return null;
        }
 
 return new MatOfByte(buffer);
    }
 
 private static final String TAG = "OpenCV-MobileNet";
 private static final String[] classNames = {"background",
 "aeroplane", "bicycle", "bird", "boat",
 "bottle", "bus", "car", "cat", "chair",
 "cow", "diningtable", "dog", "horse",
 "motorbike", "person", "pottedplant",
 "sheep", "sofa", "train", "tvmonitor"};
 
 private MatOfByte            mConfigBuffer;
 private MatOfByte            mModelBuffer;
 private Net                  net;
 private CameraBridgeViewBase mOpenCvCameraView;
}

ダウンロードした deploy.prototxt と mobilenet_iter_73000.caffemodel を app/src/main/res/raw フォルダに配置する。OpenCV DNN モデルは主にファイルから ML および DNN モデルを読み込むように設計されている。最近の Android では追加の権限なしにこれを許可していないが、リソースからバイト列を読み込む Java API を提供している。このサンプルでは、ファイルではなくメモリ内バッファからモデルを初期化する代替の DNN API を使用する。次の関数はリソースからモデルファイルを読み込み、OpenCV Java API に適した MatOfBytes(C++ の世界での std::vector<char> に相当)オブジェクトに変換する:

 private MatOfByte loadFileFromResource(int id) {
 byte[] buffer;
 try {
 // load cascade file from application resources
            InputStream is = getResources().openRawResource(id);
 
 int size = is.available();
            buffer = new byte[size];
 int bytesRead = is.read(buffer);
            is.close();
        } catch (IOException e) {
            e.printStackTrace();
            Log.e(TAG, "Failed to ONNX model from resources! Exception thrown: " + e);
            (Toast.makeText(this, "Failed to ONNX model from resources!", Toast.LENGTH_LONG)).show();
 return null;
        }
 
 return new MatOfByte(buffer);
    }

そして、ネットワークの初期化は次の行で行われる:

        mModelBuffer = loadFileFromResource(R.raw.mobilenet_iter_73000);
        mConfigBuffer = loadFileFromResource(R.raw.deploy);
 if (mModelBuffer == null || mConfigBuffer == null) {
            Log.e(TAG, "Failed to load model from resources");
        } else
            Log.i(TAG, "Model files loaded successfully");
 
        net = Dnn.readNet("caffe", mModelBuffer, mConfigBuffer);
        Log.i(TAG, "Network loaded successfully");

リソースに関する Android のドキュメントも参照すること

DNN モデルの入力がどのように準備され、推論結果がどのように解釈されるかを見てみる:

        Mat blob = Dnn.blobFromImage(frame, IN_SCALE_FACTOR,
 new Size(IN_WIDTH, IN_HEIGHT),
 new Scalar(MEAN_VAL, MEAN_VAL, MEAN_VAL), /*swapRB*/false, /*crop*/false);
        net.setInput(blob);
        Mat detections = net.forward();
 
 int cols = frame.cols();
 int rows = frame.rows();
 
        detections = detections.reshape(1, (int)detections.total() / 7);
 
 for (int i = 0; i < detections.rows(); ++i) {
 double confidence = detections.get(i, 2)[0];
 if (confidence > THRESHOLD) {
 int classId = (int)detections.get(i, 1)[0];
 
 int left   = (int)(detections.get(i, 3)[0] * cols);
 int top    = (int)(detections.get(i, 4)[0] * rows);
 int right  = (int)(detections.get(i, 5)[0] * cols);
 int bottom = (int)(detections.get(i, 6)[0] * rows);
 
 // Draw rectangle around detected object.
                Imgproc.rectangle(frame, new Point(left, top), new Point(right, bottom),
 new Scalar(0, 255, 0));
 String label = classNames[classId] + ": " + confidence;
 int[] baseLine = new int[1];
 Size labelSize = Imgproc.getTextSize(label, Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, 1, baseLine);
 
 // Draw background for label.
                Imgproc.rectangle(frame, new Point(left, top - labelSize.height),
 new Point(left + labelSize.width, top + baseLine[0]),
 new Scalar(255, 255, 255), Imgproc.FILLED);
 // Write class name and confidence.
                Imgproc.putText(frame, label, new Point(left, top),
                        Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, new Scalar(0, 0, 0));
            }
        }

Dnn.blobFromImage はカメラフレームをニューラルネットワークの入力テンソルに変換する。リサイズと統計的な正規化が適用される。ネットワーク出力テンソルの各行には、検出された1つの物体に関する情報が次の順序で含まれる: [0, 1] の範囲の信頼度、クラスID、左・上・右・下のボックス座標。すべての座標は [0, 1] の範囲にあり、描画前に画像サイズにスケーリングする必要がある。

アプリケーションを起動して楽しもう！


原著者	Dmitry Kurtaev
互換性	OpenCV >= 4.9

目次

はじめに

要件

空の Android Studio プロジェクトを作成し OpenCV 依存関係を追加する

アプリを作成する