Skip to content

Commit 3a60cde

Browse files
authored
feat: Read ID cards (#571)
1 parent 4dc2acd commit 3a60cde

File tree

6 files changed

+217
-53
lines changed

6 files changed

+217
-53
lines changed

app/ios/CameraView.swift

Lines changed: 82 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,37 @@
11
// CameraView.swift
22
// SwiftUI camera preview with frame capture callback
33

4+
import UIKit
45
import SwiftUI
56
import AVFoundation
67

78
struct CameraView: UIViewControllerRepresentable {
8-
var frameHandler: (UIImage) -> Void
9+
var frameHandler: (UIImage, CGRect) -> Void
910
var captureInterval: TimeInterval = 0.5 // seconds
11+
var showOverlay: Bool = true // For debug purposes. Set this value in LiveMRZScannerView.swift
1012

1113
func makeUIViewController(context: Context) -> CameraViewController {
1214
let controller = CameraViewController()
1315
controller.frameHandler = frameHandler
1416
controller.captureInterval = captureInterval
17+
controller.showOverlay = showOverlay
1518
return controller
1619
}
1720

18-
func updateUIViewController(_ uiViewController: CameraViewController, context: Context) {}
21+
func updateUIViewController(_ uiViewController: CameraViewController, context: Context) {
22+
uiViewController.showOverlay = showOverlay
23+
}
1924
}
2025

2126
class CameraViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
22-
var frameHandler: ((UIImage) -> Void)?
27+
var frameHandler: ((UIImage, CGRect) -> Void)?
2328
var captureInterval: TimeInterval = 0.5
29+
var showOverlay: Bool = false
2430
private let session = AVCaptureSession()
2531
private let videoOutput = AVCaptureVideoDataOutput()
2632
private var lastCaptureTime = Date(timeIntervalSince1970: 0)
2733
private var previewLayer: AVCaptureVideoPreviewLayer?
34+
private var roiOverlay: UIView? = nil
2835

2936
override func viewDidLoad() {
3037
super.viewDidLoad()
@@ -45,12 +52,81 @@ class CameraViewController: UIViewController, AVCaptureVideoDataOutputSampleBuff
4552
if let previewLayer = previewLayer {
4653
view.layer.addSublayer(previewLayer)
4754
}
55+
// ROI overlay - for debugging
56+
if showOverlay && roiOverlay == nil {
57+
let overlay = UIView()
58+
overlay.layer.borderColor = UIColor.green.cgColor
59+
overlay.layer.borderWidth = 2.0
60+
overlay.backgroundColor = UIColor.clear
61+
overlay.isUserInteractionEnabled = false
62+
view.addSubview(overlay)
63+
roiOverlay = overlay
64+
}
4865
session.startRunning()
4966
}
5067

68+
private func calculateGreenBoxFrame() -> CGRect {
69+
guard let previewLayer = previewLayer else { return .zero }
70+
let videoRect = previewLayer.layerRectConverted(fromMetadataOutputRect: CGRect(x: 0, y: 0, width: 1, height: 1))
71+
let visibleRect = videoRect.intersection(view.bounds)
72+
73+
//Lottie animation frame
74+
let lottieWidth = visibleRect.width * 1.3 // 130% of width
75+
let lottieHeight = visibleRect.height * 1.3 // 130% of height
76+
77+
//bottom 25% of the Lottie animation
78+
let boxHeight = lottieHeight * 0.25
79+
80+
// Center the box horizontally and ensure it's within bounds
81+
let boxX = max(0, (visibleRect.width - lottieWidth) / 2)
82+
let boxWidth = min(lottieWidth, visibleRect.width)
83+
84+
//Vertical offset to move the ROI a bit up. 15% in this case
85+
let verticalOffset = visibleRect.height * 0.15
86+
87+
//GreenBox should stay within the visible area
88+
let maxY = visibleRect.maxY - verticalOffset
89+
let minY = visibleRect.minY
90+
let boxY = max(minY, min(maxY - boxHeight, maxY - boxHeight))
91+
// let boxY = visibleRect.maxY - boxHeight
92+
93+
return CGRect(x: boxX, y: boxY, width: boxWidth, height: boxHeight)
94+
}
95+
96+
var roiInImageCoordinates: CGRect {
97+
guard let previewLayer = previewLayer else { return .zero }
98+
let videoRect = previewLayer.layerRectConverted(fromMetadataOutputRect: CGRect(x: 0, y: 0, width: 1, height: 1))
99+
let greenBox = calculateGreenBoxFrame()
100+
101+
// map greenBox to normalized coordinates within videoRect
102+
let normX = (greenBox.minX - videoRect.minX) / videoRect.width
103+
let normY = (greenBox.minY - videoRect.minY) / videoRect.height
104+
let normWidth = greenBox.width / videoRect.width
105+
let normHeight = greenBox.height / videoRect.height
106+
107+
// Ensure normalized coordinates are within [0,1] bounds as vision's max ROI is (0,0) to (1,1)
108+
let clampedX = max(0, min(1, normX))
109+
let clampedY = max(0, min(1, normY))
110+
let clampedWidth = max(0, min(1 - clampedX, normWidth))
111+
let clampedHeight = max(0, min(1 - clampedY, normHeight))
112+
113+
// Vision expects (0,0) at bottom-left, so flip Y
114+
let roiYVision = 1.0 - clampedY - clampedHeight
115+
let roi = CGRect(x: clampedX, y: roiYVision, width: clampedWidth, height: clampedHeight)
116+
117+
print("[CameraViewController] FINAL ROI for Vision (flipped Y, visible only): \(roi)")
118+
return roi
119+
}
120+
51121
override func viewDidLayoutSubviews() {
52122
super.viewDidLayoutSubviews()
123+
// Ensure previewLayer matches the visible area
53124
previewLayer?.frame = view.bounds
125+
print("[CameraViewController] view.bounds: \(view.bounds)")
126+
if let overlay = roiOverlay {
127+
overlay.isHidden = !showOverlay
128+
overlay.frame = calculateGreenBoxFrame()
129+
}
54130
}
55131

56132
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
@@ -62,10 +138,11 @@ class CameraViewController: UIViewController, AVCaptureVideoDataOutputSampleBuff
62138
let context = CIContext()
63139
if let cgImage = context.createCGImage(ciImage, from: ciImage.extent) {
64140
let originalImage = UIImage(cgImage: cgImage, scale: UIScreen.main.scale, orientation: .right)
65-
// Rotate to .up orientation
66141
let uprightImage = originalImage.fixedOrientation()
142+
print("[CameraViewController] cgImage size: \(cgImage.width)x\(cgImage.height), preview size: \(view.bounds.size), orientation: \(uprightImage.imageOrientation.rawValue)")
143+
let roi = roiInImageCoordinates
67144
DispatchQueue.main.async { [weak self] in
68-
self?.frameHandler?(uprightImage)
145+
self?.frameHandler?(uprightImage, roi)
69146
}
70147
}
71148
}

app/ios/LiveMRZScannerView.swift

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -80,38 +80,41 @@ struct LiveMRZScannerView: View {
8080

8181
var body: some View {
8282
ZStack(alignment: .bottom) {
83-
CameraView { image in
84-
// print("[LiveMRZScannerView] CameraView frame received. Size: \(image.size), Orientation: \(image.imageOrientation.rawValue)")
85-
if scanComplete { return }
86-
MRZScanner.scan(image: image) { result, boxes in
87-
recognizedText = result
88-
lastMRZDetection = Date()
89-
let parser = QKMRZParser(ocrCorrection: false)
90-
if let mrzResult = parser.parse(mrzString: result) {
91-
let doc = mrzResult;
92-
if doc.allCheckDigitsValid == true && !scanComplete {
93-
parsedMRZ = mrzResult
94-
scanComplete = true
95-
onScanComplete?(mrzResult)
96-
onScanResultAsDict?(mapVisionResultToDictionary(mrzResult))
97-
} else if doc.isDocumentNumberValid == false && !scanComplete {
98-
if let correctedResult = singleCorrectDocumentNumberInMRZ(result: result, docNumber: doc.documentNumber, parser: parser) {
99-
let correctedDoc = correctedResult
100-
if correctedDoc.allCheckDigitsValid == true {
101-
parsedMRZ = correctedResult
102-
scanComplete = true
103-
onScanComplete?(correctedResult)
104-
onScanResultAsDict?(mapVisionResultToDictionary(correctedResult))
83+
CameraView(
84+
frameHandler: { image, roi in
85+
if scanComplete { return }
86+
MRZScanner.scan(image: image, roi: roi) { result, boxes in
87+
recognizedText = result
88+
lastMRZDetection = Date()
89+
// print("[LiveMRZScannerView] result: \(result)")
90+
let parser = QKMRZParser(ocrCorrection: false)
91+
if let mrzResult = parser.parse(mrzString: result) {
92+
let doc = mrzResult;
93+
if doc.allCheckDigitsValid == true && !scanComplete {
94+
parsedMRZ = mrzResult
95+
scanComplete = true
96+
onScanComplete?(mrzResult)
97+
onScanResultAsDict?(mapVisionResultToDictionary(mrzResult))
98+
} else if doc.isDocumentNumberValid == false && !scanComplete {
99+
if let correctedResult = singleCorrectDocumentNumberInMRZ(result: result, docNumber: doc.documentNumber, parser: parser) {
100+
let correctedDoc = correctedResult
101+
if correctedDoc.allCheckDigitsValid == true {
102+
parsedMRZ = correctedResult
103+
scanComplete = true
104+
onScanComplete?(correctedResult)
105+
onScanResultAsDict?(mapVisionResultToDictionary(correctedResult))
106+
}
105107
}
106108
}
107-
}
108-
} else {
109-
if !scanComplete {
110-
parsedMRZ = nil
109+
} else {
110+
if !scanComplete {
111+
parsedMRZ = nil
112+
}
111113
}
112114
}
113-
}
114-
}
115+
},
116+
showOverlay: false
117+
)
115118

116119
VStack {
117120
if !scanComplete {

app/ios/MRZScanner.swift

Lines changed: 62 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,47 +5,95 @@ import Vision
55
import UIKit
66

77
struct MRZScanner {
8-
static func scan(image: UIImage, completion: @escaping (String, [CGRect]) -> Void) {
8+
static func scan(image: UIImage, roi: CGRect? = nil, completion: @escaping (String, [CGRect]) -> Void) {
99
guard let cgImage = image.cgImage else {
1010
completion("Image not valid", [])
1111
return
1212
}
1313

1414
let request = VNRecognizeTextRequest { (request, error) in
15+
if let error = error {
16+
print("Vision error: \(error)")
17+
}
18+
1519
guard let observations = request.results as? [VNRecognizedTextObservation] else {
20+
print("No text observations found")
1621
completion("No text found", [])
1722
return
1823
}
1924

25+
// print("Found \(observations.count) text observations")
26+
2027
var mrzLines: [String] = []
2128
var boxes: [CGRect] = []
22-
for obs in observations {
23-
// if let text = obs.topCandidates(1).first?.string, text.contains("<") {
24-
// mrzLines.append(text)
25-
if let candidate = obs.topCandidates(1).first, candidate.string.contains("<") {
26-
mrzLines.append(candidate.string)
27-
boxes.append(obs.boundingBox) // Normalized coordinates
28-
// Log confidence for each character
29-
// for (i, char) in candidate.string.enumerated() {
30-
// if let box = try? candidate.boundingBox(for: candidate.string.index(candidate.string.startIndex, offsetBy: i)..<candidate.string.index(candidate.string.startIndex, offsetBy: i+1)) {
31-
// print("Char: \(char), Confidence: \(box.confidence)")
32-
// }
33-
// }
29+
30+
// Sort lines from top to bottom
31+
let sortedObservations = observations.sorted { $0.boundingBox.minY > $1.boundingBox.minY }
32+
33+
for (index, obs) in sortedObservations.enumerated() {
34+
if let candidate = obs.topCandidates(1).first {
35+
let text = candidate.string
36+
let confidence = candidate.confidence
37+
// print("Line \(index): '\(text)' (confidence: \(confidence), position: \(obs.boundingBox))")
38+
39+
// Check if this looks like an MRZ line (either contains "<" or matches MRZ pattern)
40+
// TD1 format (ID cards): 30 chars, TD3 format (passports): 44 chars
41+
if text.contains("<") ||
42+
text.matches(pattern: "^[A-Z0-9<]{30}$") || //TD1 //case where there's no '<' in MRZ
43+
text.matches(pattern: "^[A-Z0-9<]{44}$") //TD3
44+
{
45+
// print("Matched MRZ pattern: \(text)")
46+
mrzLines.append(text)
47+
boxes.append(obs.boundingBox)
48+
49+
// Check if we have a complete MRZ
50+
if (mrzLines.count == 2 && mrzLines.allSatisfy { $0.count == 44 }) || // TD3 - passport
51+
(mrzLines.count == 3 && mrzLines.allSatisfy { $0.count == 30 }) { // TD1 - ID card
52+
break
53+
}
54+
} else {
55+
print("Did not match MRZ pattern: \(text)")
56+
}
3457
}
3558
}
3659

3760
if mrzLines.isEmpty {
61+
print("No MRZ lines found")
3862
completion("", [])
3963
} else {
64+
print("Found \(mrzLines.count) MRZ lines")
4065
completion(mrzLines.joined(separator: "\n"), boxes)
4166
}
4267
}
4368
request.recognitionLevel = .accurate
4469
request.usesLanguageCorrection = false
4570
request.recognitionLanguages = ["en"]
71+
72+
// Use provided ROI. If not use as bottom 20%
73+
if let roi = roi {
74+
print("[MRZScanner] Using provided ROI: \(roi) (image size: \(cgImage.width)x\(cgImage.height))")
75+
request.regionOfInterest = roi
76+
} else {
77+
let imageHeight = CGFloat(cgImage.height)
78+
let roiHeight = imageHeight * 0.2 // Bottom 20%
79+
let defaultRoi = CGRect(x: 0, y: 0, width: 1.0, height: roiHeight / imageHeight)
80+
print("[MRZScanner] Using default ROI: \(defaultRoi) (image size: \(cgImage.width)x\(cgImage.height), roi height: \(roiHeight))")
81+
request.regionOfInterest = defaultRoi
82+
}
83+
4684
let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
4785
DispatchQueue.global(qos: .userInitiated).async {
48-
try? handler.perform([request])
86+
do {
87+
try handler.perform([request])
88+
} catch {
89+
print("Failed to perform recognition: \(error)")
90+
}
4991
}
5092
}
5193
}
94+
95+
extension String {
96+
func matches(pattern: String) -> Bool {
97+
return range(of: pattern, options: .regularExpression) != nil
98+
}
99+
}

app/ios/PassportOCRViewManager.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,9 @@ class PassportOCRView: UIView {
3737
"data": [
3838
"documentNumber": resultDict["documentNumber"] as? String ?? "",
3939
"expiryDate": resultDict["expiryDate"] as? String ?? "",
40-
"birthDate": resultDict["dateOfBirth"] as? String ?? ""
40+
"birthDate": resultDict["dateOfBirth"] as? String ?? "",
41+
"documentType": resultDict["documentType"] as? String ?? "",
42+
"countryCode": resultDict["countryCode"] as? String ?? ""
4143
]])
4244
}
4345
)

app/src/components/native/PassportCamera.tsx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ interface RCTPassportOCRViewManagerProps extends RCTFragmentViewManagerProps {
1818
documentNumber: string;
1919
expiryDate: string;
2020
birthDate: string;
21+
documentType: string;
22+
countryCode: string;
2123
};
2224
}>,
2325
) => void;
@@ -73,6 +75,8 @@ export const PassportCamera: React.FC<PassportCameraProps> = ({
7375
documentNumber: string;
7476
expiryDate: string;
7577
birthDate: string;
78+
documentType: string;
79+
countryCode: string;
7680
};
7781
}>,
7882
) => {
@@ -86,6 +90,8 @@ export const PassportCamera: React.FC<PassportCameraProps> = ({
8690
passportNumber: event.nativeEvent.data.documentNumber,
8791
dateOfBirth: event.nativeEvent.data.birthDate,
8892
dateOfExpiry: event.nativeEvent.data.expiryDate,
93+
documentType: event.nativeEvent.data.documentType,
94+
countryCode: event.nativeEvent.data.countryCode,
8995
});
9096
}
9197
},

0 commit comments

Comments
 (0)