修复不正确的bindingBox坐标?

我正在使用Google ML和CameraX为Android(Java)开发一个对象检测应用程序。我还使用了TensorFlow模型,该模型可以在here中找到。我的问题是我的bindingBox的坐标稍微有点不对齐,如下图所示。请忽略它被检测为铲子的事实,我的问题当前专注于捕获屏幕上显示的图形中的图像。

这是用于绘制GraphicOverlay的以下类;

DrawGraphic.java;

public class DrawGraphic extends View {

    Paint borderPaint, textPaint;
    Rect rect;
    String text;

    ImageProxy imageProxy;
    PreviewView previewView;


    public DrawGraphic(Context context, Rect rect, String text, ImageProxy imageProxy, PreviewView previewView) {
        super(context);
        this.rect = rect;
        this.text = text;

        borderPaint = new Paint();
        borderPaint.setColor(Color.WHITE);
        borderPaint.setStrokeWidth(10f);
        borderPaint.setStyle(Paint.Style.STROKE);

        textPaint = new Paint();
        textPaint.setColor(Color.WHITE);
        textPaint.setStrokeWidth(50f);
        textPaint.setTextSize(32f);
        textPaint.setStyle(Paint.Style.FILL);
    }

    @Override
    protected void onDraw(Canvas canvas) {
        super.onDraw(canvas);
        canvas.setMatrix(getMappingMatrix(imageProxy, previewView));
        canvas.concat(getMappingMatrix(imageProxy, previewView));
        canvas.drawText(text, rect.centerX(), rect.centerY(), textPaint);
        canvas.drawRect(rect.left, rect.bottom, rect.right, rect.top, borderPaint);

        ImageProxy imageProxy;
        PreviewView previewView;
    }

    Matrix getMappingMatrix(ImageProxy imageProxy, PreviewView previewView) {
        Rect cropRect = imageProxy.getCropRect();
        int rotationDegrees = imageProxy.getImageInfo().getRotationDegrees();
        Matrix matrix = new Matrix();

        float[] source = {
                cropRect.left,
                cropRect.top,
                cropRect.right,
                cropRect.top,
                cropRect.right,
                cropRect.bottom,
                cropRect.left,
                cropRect.bottom
        };

        float[] destination = {
                0f,
                0f,
                previewView.getWidth(),
                0f,
                previewView.getWidth(),
                previewView.getHeight(),
                0f,
                previewView.getHeight()
        };

        int vertexSize = 2;

        int shiftOffset = rotationDegrees / 90 * vertexSize;
        float[] tempArray = destination.clone();
        for (int toIndex = 0; toIndex < source.length; toIndex++) {
            int fromIndex = (toIndex + shiftOffset) % source.length;
            destination[toIndex] = tempArray[fromIndex];
        }
        matrix.setPolyToPoly(source, 0, destination, 0, 4);
        return matrix;
    }
}

MainActivity.java

public class MainActivity extends AppCompatActivity {

    private static final int PERMISSIONS_REQUEST = 1;

    private static final String PERMISSION_CAMERA = Manifest.permission.CAMERA;

    public static final Size DESIRED_PREVIEW_SIZE = new Size(640, 480);

    private PreviewView previewView;

    ActivityMainBinding binding;

    @Override
    protected void onCreate(@Nullable Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        binding = ActivityMainBinding.inflate(getLayoutInflater());
        setContentView(binding.getRoot());

        previewView = findViewById(R.id.previewView);

        if (hasPermission()) {
            // Start CameraX
            startCamera();
        } else {
            requestPermission();
        }
    }

    @SuppressLint("UnsafeOptInUsageError")
    private void startCamera() {
        ListenableFuture<ProcessCameraProvider> cameraProviderFuture = ProcessCameraProvider.getInstance(this);

        cameraProviderFuture.addListener(() -> {
            // Camera provider is now guaranteed to be available
            try {
                ProcessCameraProvider cameraProvider = cameraProviderFuture.get();

                // Set up the view finder use case to display camera preview
                Preview preview = new Preview.Builder().build();

                // Choose the camera by requiring a lens facing
                CameraSelector cameraSelector = new CameraSelector.Builder()
                        .requireLensFacing(CameraSelector.LENS_FACING_BACK)
                        .build();

                // Image Analysis
                ImageAnalysis imageAnalysis =
                        new ImageAnalysis.Builder()
                                .setTargetResolution(DESIRED_PREVIEW_SIZE)
                                .setBackpressureStrategy(ImageAnalysis.STRATEGY_KEEP_ONLY_LATEST)
                                .build();

                imageAnalysis.setAnalyzer(ContextCompat.getMainExecutor(this), imageProxy -> {
                    // Define rotation Degrees of the imageProxy
                    int rotationDegrees = imageProxy.getImageInfo().getRotationDegrees();
                    Log.v("ImageAnalysis_degrees", String.valueOf(rotationDegrees));

                    @SuppressLint("UnsafeExperimentalUsageError") Image mediaImage = imageProxy.getImage();
                    if (mediaImage != null) {
                        InputImage image =
                                InputImage.fromMediaImage(mediaImage, imageProxy.getImageInfo().getRotationDegrees());
                        //Pass image to an ML Kit Vision API
                        //...

                        LocalModel localModel =
                                new LocalModel.Builder()
                                        .setAssetFilePath("mobilenet_v1_0.75_192_quantized_1_metadata_1.tflite")
                                        .build();

                        CustomObjectDetectorOptions customObjectDetectorOptions =
                                new CustomObjectDetectorOptions.Builder(localModel)
                                        .setDetectorMode(CustomObjectDetectorOptions.STREAM_MODE)
                                        .enableClassification()
                                        .setClassificationConfidenceThreshold(0.5f)
                                        .setMaxPerObjectLabelCount(3)
                                        .build();

                        ObjectDetector objectDetector =
                                ObjectDetection.getClient(customObjectDetectorOptions);

                        objectDetector.process(image)
                                .addOnSuccessListener(detectedObjects -> {
                                    getObjectResults(detectedObjects);
                                    Log.d("TAG", "onSuccess" + detectedObjects.size());
                                    for (DetectedObject detectedObject : detectedObjects) {
                                        Rect boundingBox = detectedObject.getBoundingBox();

                                        Integer trackingId = detectedObject.getTrackingId();
                                        for (DetectedObject.Label label : detectedObject.getLabels()) {
                                            String text = label.getText();
                                            int index = label.getIndex();
                                            float confidence = label.getConfidence();
                                        }
                                    }
                                })
                                .addOnFailureListener(e -> Log.e("TAG", e.getLocalizedMessage()))
                                .addOnCompleteListener(result -> imageProxy.close());
                    }

                });

                // Connect the preview use case to the previewView
                preview.setSurfaceProvider(
                        previewView.getSurfaceProvider());

                // Attach use cases to the camera with the same lifecycle owner
                if (cameraProvider != null) {
                    Camera camera = cameraProvider.bindToLifecycle(
                            this,
                            cameraSelector,
                            imageAnalysis,
                            preview);
                }

            } catch (ExecutionException | InterruptedException e) {
                e.printStackTrace();
            }


        }, ContextCompat.getMainExecutor(this));
    }

    private void getObjectResults(List<DetectedObject> detectedObjects) {
        for (DetectedObject object : detectedObjects) {
            if (binding.parentlayout.getChildCount() > 1) {
                binding.parentlayout.removeViewAt(1);
            }
            Rect rect = object.getBoundingBox();
            String text = "Undefined";
            if (object.getLabels().size() != 0) {
                text = object.getLabels().get(0).getText();
            }

            DrawGraphic drawGraphic = new DrawGraphic(this, rect, text);
            binding.parentlayout.addView(drawGraphic);
        }
    }

    private boolean hasPermission() {
        if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) {
            return checkSelfPermission(PERMISSION_CAMERA) == PackageManager.PERMISSION_GRANTED;
        } else {
            return true;
        }
    }

    private void requestPermission() {
        if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) {
            if (shouldShowRequestPermissionRationale(PERMISSION_CAMERA)) {
                Toast.makeText(
                        this,
                        "Camera permission is required for this demo",
                        Toast.LENGTH_LONG)
                        .show();
            }
            requestPermissions(new String[]{PERMISSION_CAMERA}, PERMISSIONS_REQUEST);
        }
    }

    @Override
    public void onRequestPermissionsResult(
            final int requestCode, final String[] permissions, final int[] grantResults) {
        super.onRequestPermissionsResult(requestCode, permissions, grantResults);
        if (requestCode == PERMISSIONS_REQUEST) {
            if (allPermissionsGranted(grantResults)) {
                // Start CameraX
                startCamera();
            } else {
                requestPermission();
            }
        }
    }

    private static boolean allPermissionsGranted(final int[] grantResults) {
        for (int result : grantResults) {
            if (result != PackageManager.PERMISSION_GRANTED) {
                return false;
            }
        }
        return true;
    }
}

所有这些都引出了我的问题,为什么bindingBox稍微关闭了。补充此问题所需的任何进一步信息将根据请求提供。


解决方案

如模型说明中所述;

图像数据:ByteBuffer大小为192 x 192 x 3 x像素深度,其中,对于浮点模型,像素深度为4,对于量化模型,为1。

确保media.Image具有相同的分辨率。如果您提供的是不同的图像数据,这可能会导致错误的边界框和检测。这很可能就是为什么它一开始就被检测为铲子的原因。 您可以将ImageAnalysis配置设置为以此分辨率向您发送图像,或者在将其作为模型的输入之前必须调整图像的大小。

请记住,输出边界框将根据192 x 192图像。现在,您需要将该坐标转换为预览视图的坐标。为此,有很多解决方案,但您可以使用this。

相关文章