修复不正确的bindingBox坐标?
我正在使用Google ML和CameraX为Android(Java)开发一个对象检测应用程序。我还使用了TensorFlow模型,该模型可以在here中找到。我的问题是我的bindingBox的坐标稍微有点不对齐,如下图所示。请忽略它被检测为铲子的事实,我的问题当前专注于捕获屏幕上显示的图形中的图像。
这是用于绘制GraphicOverlay的以下类;
DrawGraphic.java;
public class DrawGraphic extends View {
Paint borderPaint, textPaint;
Rect rect;
String text;
ImageProxy imageProxy;
PreviewView previewView;
public DrawGraphic(Context context, Rect rect, String text, ImageProxy imageProxy, PreviewView previewView) {
super(context);
this.rect = rect;
this.text = text;
borderPaint = new Paint();
borderPaint.setColor(Color.WHITE);
borderPaint.setStrokeWidth(10f);
borderPaint.setStyle(Paint.Style.STROKE);
textPaint = new Paint();
textPaint.setColor(Color.WHITE);
textPaint.setStrokeWidth(50f);
textPaint.setTextSize(32f);
textPaint.setStyle(Paint.Style.FILL);
}
@Override
protected void onDraw(Canvas canvas) {
super.onDraw(canvas);
canvas.setMatrix(getMappingMatrix(imageProxy, previewView));
canvas.concat(getMappingMatrix(imageProxy, previewView));
canvas.drawText(text, rect.centerX(), rect.centerY(), textPaint);
canvas.drawRect(rect.left, rect.bottom, rect.right, rect.top, borderPaint);
ImageProxy imageProxy;
PreviewView previewView;
}
Matrix getMappingMatrix(ImageProxy imageProxy, PreviewView previewView) {
Rect cropRect = imageProxy.getCropRect();
int rotationDegrees = imageProxy.getImageInfo().getRotationDegrees();
Matrix matrix = new Matrix();
float[] source = {
cropRect.left,
cropRect.top,
cropRect.right,
cropRect.top,
cropRect.right,
cropRect.bottom,
cropRect.left,
cropRect.bottom
};
float[] destination = {
0f,
0f,
previewView.getWidth(),
0f,
previewView.getWidth(),
previewView.getHeight(),
0f,
previewView.getHeight()
};
int vertexSize = 2;
int shiftOffset = rotationDegrees / 90 * vertexSize;
float[] tempArray = destination.clone();
for (int toIndex = 0; toIndex < source.length; toIndex++) {
int fromIndex = (toIndex + shiftOffset) % source.length;
destination[toIndex] = tempArray[fromIndex];
}
matrix.setPolyToPoly(source, 0, destination, 0, 4);
return matrix;
}
}
MainActivity.java
public class MainActivity extends AppCompatActivity {
private static final int PERMISSIONS_REQUEST = 1;
private static final String PERMISSION_CAMERA = Manifest.permission.CAMERA;
public static final Size DESIRED_PREVIEW_SIZE = new Size(640, 480);
private PreviewView previewView;
ActivityMainBinding binding;
@Override
protected void onCreate(@Nullable Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
binding = ActivityMainBinding.inflate(getLayoutInflater());
setContentView(binding.getRoot());
previewView = findViewById(R.id.previewView);
if (hasPermission()) {
// Start CameraX
startCamera();
} else {
requestPermission();
}
}
@SuppressLint("UnsafeOptInUsageError")
private void startCamera() {
ListenableFuture<ProcessCameraProvider> cameraProviderFuture = ProcessCameraProvider.getInstance(this);
cameraProviderFuture.addListener(() -> {
// Camera provider is now guaranteed to be available
try {
ProcessCameraProvider cameraProvider = cameraProviderFuture.get();
// Set up the view finder use case to display camera preview
Preview preview = new Preview.Builder().build();
// Choose the camera by requiring a lens facing
CameraSelector cameraSelector = new CameraSelector.Builder()
.requireLensFacing(CameraSelector.LENS_FACING_BACK)
.build();
// Image Analysis
ImageAnalysis imageAnalysis =
new ImageAnalysis.Builder()
.setTargetResolution(DESIRED_PREVIEW_SIZE)
.setBackpressureStrategy(ImageAnalysis.STRATEGY_KEEP_ONLY_LATEST)
.build();
imageAnalysis.setAnalyzer(ContextCompat.getMainExecutor(this), imageProxy -> {
// Define rotation Degrees of the imageProxy
int rotationDegrees = imageProxy.getImageInfo().getRotationDegrees();
Log.v("ImageAnalysis_degrees", String.valueOf(rotationDegrees));
@SuppressLint("UnsafeExperimentalUsageError") Image mediaImage = imageProxy.getImage();
if (mediaImage != null) {
InputImage image =
InputImage.fromMediaImage(mediaImage, imageProxy.getImageInfo().getRotationDegrees());
//Pass image to an ML Kit Vision API
//...
LocalModel localModel =
new LocalModel.Builder()
.setAssetFilePath("mobilenet_v1_0.75_192_quantized_1_metadata_1.tflite")
.build();
CustomObjectDetectorOptions customObjectDetectorOptions =
new CustomObjectDetectorOptions.Builder(localModel)
.setDetectorMode(CustomObjectDetectorOptions.STREAM_MODE)
.enableClassification()
.setClassificationConfidenceThreshold(0.5f)
.setMaxPerObjectLabelCount(3)
.build();
ObjectDetector objectDetector =
ObjectDetection.getClient(customObjectDetectorOptions);
objectDetector.process(image)
.addOnSuccessListener(detectedObjects -> {
getObjectResults(detectedObjects);
Log.d("TAG", "onSuccess" + detectedObjects.size());
for (DetectedObject detectedObject : detectedObjects) {
Rect boundingBox = detectedObject.getBoundingBox();
Integer trackingId = detectedObject.getTrackingId();
for (DetectedObject.Label label : detectedObject.getLabels()) {
String text = label.getText();
int index = label.getIndex();
float confidence = label.getConfidence();
}
}
})
.addOnFailureListener(e -> Log.e("TAG", e.getLocalizedMessage()))
.addOnCompleteListener(result -> imageProxy.close());
}
});
// Connect the preview use case to the previewView
preview.setSurfaceProvider(
previewView.getSurfaceProvider());
// Attach use cases to the camera with the same lifecycle owner
if (cameraProvider != null) {
Camera camera = cameraProvider.bindToLifecycle(
this,
cameraSelector,
imageAnalysis,
preview);
}
} catch (ExecutionException | InterruptedException e) {
e.printStackTrace();
}
}, ContextCompat.getMainExecutor(this));
}
private void getObjectResults(List<DetectedObject> detectedObjects) {
for (DetectedObject object : detectedObjects) {
if (binding.parentlayout.getChildCount() > 1) {
binding.parentlayout.removeViewAt(1);
}
Rect rect = object.getBoundingBox();
String text = "Undefined";
if (object.getLabels().size() != 0) {
text = object.getLabels().get(0).getText();
}
DrawGraphic drawGraphic = new DrawGraphic(this, rect, text);
binding.parentlayout.addView(drawGraphic);
}
}
private boolean hasPermission() {
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) {
return checkSelfPermission(PERMISSION_CAMERA) == PackageManager.PERMISSION_GRANTED;
} else {
return true;
}
}
private void requestPermission() {
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) {
if (shouldShowRequestPermissionRationale(PERMISSION_CAMERA)) {
Toast.makeText(
this,
"Camera permission is required for this demo",
Toast.LENGTH_LONG)
.show();
}
requestPermissions(new String[]{PERMISSION_CAMERA}, PERMISSIONS_REQUEST);
}
}
@Override
public void onRequestPermissionsResult(
final int requestCode, final String[] permissions, final int[] grantResults) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
if (requestCode == PERMISSIONS_REQUEST) {
if (allPermissionsGranted(grantResults)) {
// Start CameraX
startCamera();
} else {
requestPermission();
}
}
}
private static boolean allPermissionsGranted(final int[] grantResults) {
for (int result : grantResults) {
if (result != PackageManager.PERMISSION_GRANTED) {
return false;
}
}
return true;
}
}
所有这些都引出了我的问题,为什么bindingBox稍微关闭了。补充此问题所需的任何进一步信息将根据请求提供。
解决方案
如模型说明中所述;
确保图像数据:ByteBuffer大小为192 x 192 x 3 x像素深度,其中,对于浮点模型,像素深度为4,对于量化模型,为1。
media.Image
具有相同的分辨率。如果您提供的是不同的图像数据,这可能会导致错误的边界框和检测。这很可能就是为什么它一开始就被检测为铲子的原因。
您可以将ImageAnalysis
配置设置为以此分辨率向您发送图像,或者在将其作为模型的输入之前必须调整图像的大小。
请记住,输出边界框将根据192 x 192图像。现在,您需要将该坐标转换为预览视图的坐标。为此,有很多解决方案,但您可以使用this。
相关文章