Closed caxerx closed 2 years ago
Sounds great!
import SwiftUI
import PlaygroundSupport
import Vision
let defImage = UIImage(#imageLiteral(resourceName: "照片.png"))
struct TextGroup: Identifiable {
init(items: [TextPos]) {
self.items = items;
self.text = items.compactMap { v in
return v.text
}.joined(separator: " ");
}
var id: UUID = UUID()
var items: [TextPos];
var text: String;
var rect: CGRect {
get {
var minX = items.first!.topLeft.x;
var maxX = items.first!.topLeft.x;
var minY = items.first!.topLeft.y;
var maxY = items.first!.topLeft.y;
items.forEach { item in
item.polygon.forEach { point in
minX = min(minX, point.x);
maxX = max(maxX, point.x);
minY = min(minY, point.y);
maxY = max(maxY, point.y);
}
}
return CGRect.init(x: minX, y: minY, width: maxX - minX, height: maxY - minY);
}
}
}
struct TextPos: Identifiable {
var id: UUID = UUID()
var text: String;
var topLeft: CGPoint
var topRight: CGPoint
var bottomLeft: CGPoint
var bottomRight: CGPoint
var polygon: [CGPoint] {
get {
return [topLeft, topRight, bottomRight, bottomLeft]
}
}
var leftHeight: Double {
get {
return abs(sqrt(pow(topLeft.x - bottomLeft.x, 2) + pow(topLeft.y - bottomLeft.y, 2)));
}
}
var radian: Double {
get {
let cx = topLeft.x;
let cy = topLeft.y;
let x1 = topRight.x;
let y1 = topRight.y;
let radian = atan2(y1 - cy, x1 - cx);
return radian
}
}
var angle: Double {
get {
return 180.0 / Double.pi * radian;
}
}
}
func hypotenuse(long: Double, angle: Double) -> CGPoint{
var radian = 2 * Double.pi / 360 * angle;
return CGPoint(x: sin(radian) * long, y: cos(radian) * long);
}
func polygonsIntersecting(a: [CGPoint], b: [CGPoint]) -> Bool {
for points in [a, b] {
for i1 in 0..<points.count {
let i2 = (i1 + 1) % points.count
let p1 = points[i1]
let p2 = points[i2]
let normal = CGPoint(x: p2.y - p1.y, y: p1.x - p2.x);
var minA: Double?;
var maxA: Double?;
a.forEach { p in
let projected = normal.x * p.x + normal.y * p.y
if(minA == nil || projected < minA!) {
minA = projected
}
if(maxA == nil || projected > maxA!) {
maxA = projected
}
}
var minB: Double?;
var maxB: Double?;
b.forEach { p in
let projected = normal.x * p.x + normal.y * p.y
if(minB == nil || projected < minA!) {
minB = projected
}
if(maxB == nil || projected > maxA!) {
maxB = projected
}
}
if(maxA! < minB! || maxB! < minA!) {
return false;
}
}
}
return true;
}
struct ContentView: View {
@State var showImagePicker: Bool = false
@State var image: UIImage? = nil
let frameW = 300.0
let frameH = 450.0
@State var data: [TextPos] = []
@State var textGroupList: [TextGroup] = []
func visionText() {
// Get the CGImage on which to perform requests.
guard let cgImage = (image ?? defImage).cgImage else { return }
// Create a new image-request handler.
let requestHandler = VNImageRequestHandler(cgImage: cgImage)
// Create a new request to recognize text.
let request = VNRecognizeTextRequest(completionHandler: recognizeTextHandler)
do {
// Perform the text-recognition request.
try requestHandler.perform([request])
} catch {
print("Unable to perform the requests: \(error).")
}
}
func recognizeTextHandler(request: VNRequest, error: Error?) {
guard let observations =
request.results as? [VNRecognizedTextObservation] else {
return
}
data = observations.compactMap({ observation in
return TextPos(
text: observation.topCandidates(1)[0].string,
topLeft: observation.topLeft,
topRight: observation.topRight,
bottomLeft: observation.bottomLeft,
bottomRight: observation.bottomRight
);
})
for index in 0..<data.count {
let item = data[index]
let angle = 360 - item.angle;
let tl = item.topLeft;
let ptl = hypotenuse(long: item.leftHeight/2, angle: angle);
data[index].topLeft = CGPoint(x:tl.x + ptl.x, y: tl.y + ptl.y);
let tr = item.topRight;
let ptr = hypotenuse(long: item.leftHeight/2, angle: angle);
data[index].topRight = CGPoint(x:tr.x + ptr.x, y: tr.y + ptr.y);
let bl = item.bottomLeft;
let pbl = hypotenuse(long: item.leftHeight/2, angle: angle + 180);
data[index].bottomLeft = CGPoint(x:bl.x + pbl.x, y: bl.y + pbl.y);
let br = item.bottomRight;
let pbr = hypotenuse(long: item.leftHeight/2, angle: angle + 180);
data[index].bottomRight = CGPoint(x:br.x + pbr.x, y: br.y + pbr.y);
}
var groupData: [[TextPos]] = []
data.forEach { newItem in
let groupIndex = groupData.firstIndex { items in
return nil != items.first { item in
let angleOk = abs(item.angle - newItem.angle) < 5
let heightOk = abs(item.leftHeight - newItem.leftHeight) < (min(item.leftHeight, newItem.leftHeight) / 2)
if( angleOk && heightOk) {
return polygonsIntersecting(a: item.polygon, b: newItem.polygon)
}
return false
}
}
if(groupIndex != nil) {
groupData[groupIndex!].append(newItem);
} else {
groupData.append([newItem])
}
}
textGroupList = groupData.compactMap({ items in
return TextGroup(items: items)
})
}
var body: some View {
VStack {
ZStack{
Image(uiImage: image ?? defImage)
.resizable()
.frame(width: frameW, height: frameH)
ForEach(textGroupList){ textGroup in
ForEach(textGroup.items) { item in
Path { path in
path.move(to: CGPoint(x: item.topLeft.x * frameW, y: frameH - item.topLeft.y * frameH))
path.addLine(to: CGPoint(x: item.topRight.x * frameW, y: frameH - item.topRight.y * frameH))
path.addLine(to: CGPoint(x: item.bottomRight.x * frameW, y: frameH - item.bottomRight.y * frameH))
path.addLine(to: CGPoint(x: item.bottomLeft.x * frameW, y: frameH - item.bottomLeft.y * frameH))
path.addLine(to: CGPoint(x: item.topLeft.x * frameW, y: frameH - item.topLeft.y * frameH))
}.fill(Color.green.opacity(0.5)).onTapGesture {
print(textGroup.text)
}
}
}
}
.frame(width: frameW, height: frameH)
Button("选择图片") {
showImagePicker = true
}
}
.sheet(isPresented: $showImagePicker) {
ImagePicker(sourceType: .photoLibrary) { image in
self.image = image
showImagePicker = false
visionText()
}
}
.onAppear(perform: visionText)
}
}
struct ImagePicker: UIViewControllerRepresentable {
@Environment(\.presentationMode)
private var presentationMode
let sourceType: UIImagePickerController.SourceType
let onImagePicked: (UIImage) -> Void
final class Coordinator: NSObject,
UINavigationControllerDelegate,
UIImagePickerControllerDelegate {
@Binding
private var presentationMode: PresentationMode
private let sourceType: UIImagePickerController.SourceType
private let onImagePicked: (UIImage) -> Void
init(presentationMode: Binding<PresentationMode>,
sourceType: UIImagePickerController.SourceType,
onImagePicked: @escaping (UIImage) -> Void) {
_presentationMode = presentationMode
self.sourceType = sourceType
self.onImagePicked = onImagePicked
}
func imagePickerController(_ picker: UIImagePickerController,
didFinishPickingMediaWithInfo info: [UIImagePickerController.InfoKey : Any]) {
let uiImage = info[UIImagePickerController.InfoKey.originalImage] as! UIImage
onImagePicked(uiImage)
presentationMode.dismiss()
}
func imagePickerControllerDidCancel(_ picker: UIImagePickerController) {
presentationMode.dismiss()
}
}
func makeCoordinator() -> Coordinator {
return Coordinator(presentationMode: presentationMode,
sourceType: sourceType,
onImagePicked: onImagePicked)
}
func makeUIViewController(context: UIViewControllerRepresentableContext<ImagePicker>) -> UIImagePickerController {
let picker = UIImagePickerController()
picker.sourceType = sourceType
picker.delegate = context.coordinator
return picker
}
func updateUIViewController(_ uiViewController: UIImagePickerController,
context: UIViewControllerRepresentableContext<ImagePicker>) {
}
}
PlaygroundPage.current.setLiveView(ContentView())
@xioxin Thanks! You have done most of it! Would you mind to remove import PlaygroundSupport
and PlaygroundPage.current.setLiveView(ContentView())
and submit a pull request? I'll finish the rest of the job.
@caxerx Unfortunately, Live Text does not support Russian, it is incorrectly recognized as similar to English. Other languages are still available, so I will continue this work.
Live Text is currently supported in English, Chinese, French, Italian, German, Portuguese, and Spanish. Use Live Text and Visual Look Up on your iPhone
@tatsuz0u 我再完善一下代码并增加一些注释。
最新的代码 https://gist.github.com/xioxin/5c3d3c77721784fb690be90bc56f07a8 我不知道改吧代码放在什么地方,代码量也很少,所以我就不PR了。
最新的代码 https://gist.github.com/xioxin/5c3d3c77721784fb690be90bc56f07a8 我不知道改吧代码放在什么地方,代码量也很少,所以我就不PR了。
我只是想加 credits 好鼓勵更多貢獻者出現...如果可以的話隨便丟進專案下面一個地方就可以了。 而且直接拿來用也有 license 的問題。🤔
@xioxin That's a bad news :( Hope Apple will add more language support in the future version.
Done. #227
Is your feature request related to a problem? Please describe. I'm always frustrated when some of my favorite doujinshi only have Russian version. However, I can't even recognize any of the Russian characters. It will be nice if I am able to copy the text in the image and translate them.
Describe the solution you'd like Adding an options to enable text recognition for the selected image. The Apple Vision framework might help for the image text recognize implementation.
Describe alternatives you've considered Currently, there is the copy image option that we might copy the image to other 3rd party text recognition service. However, this method is super inconvenience when reading some continuous contents.
Additional context Telegram has a similar feature that you can reference to: