You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
170 lines
4.2 KiB
170 lines
4.2 KiB
package ocr
|
|
|
|
import (
|
|
"image"
|
|
"image/color"
|
|
"math"
|
|
|
|
"github.com/LKKlein/gocv"
|
|
)
|
|
|
|
func resizeByShape(img gocv.Mat, resizeShape []int) (gocv.Mat, int, int) {
|
|
resizeH := resizeShape[0]
|
|
resizeW := resizeShape[1]
|
|
gocv.Resize(img, &img, image.Pt(resizeW, resizeH), 0, 0, gocv.InterpolationLinear)
|
|
return img, resizeH, resizeW
|
|
}
|
|
|
|
func resizeByMaxLen(img gocv.Mat, maxLen int) (gocv.Mat, int, int) {
|
|
oriH := img.Rows()
|
|
oriW := img.Cols()
|
|
var resizeH, resizeW int = oriH, oriW
|
|
|
|
var ratio float64 = 1.0
|
|
if resizeH > maxLen || resizeW > maxLen {
|
|
if resizeH > resizeW {
|
|
ratio = float64(maxLen) / float64(resizeH)
|
|
} else {
|
|
ratio = float64(maxLen) / float64(resizeW)
|
|
}
|
|
}
|
|
|
|
resizeH = int(float64(resizeH) * ratio)
|
|
resizeW = int(float64(resizeW) * ratio)
|
|
|
|
if resizeH%32 == 0 {
|
|
resizeH = resizeH
|
|
} else if resizeH/32 <= 1 {
|
|
resizeH = 32
|
|
} else {
|
|
resizeH = (resizeH/32 - 1) * 32
|
|
}
|
|
|
|
if resizeW%32 == 0 {
|
|
resizeW = resizeW
|
|
} else if resizeW/32 <= 1 {
|
|
resizeW = 32
|
|
} else {
|
|
resizeW = (resizeW/32 - 1) * 32
|
|
}
|
|
|
|
if resizeW <= 0 || resizeH <= 0 {
|
|
return gocv.NewMat(), 0, 0
|
|
}
|
|
|
|
gocv.Resize(img, &img, image.Pt(resizeW, resizeH), 0, 0, gocv.InterpolationLinear)
|
|
return img, resizeH, resizeW
|
|
}
|
|
|
|
func normPermute(img gocv.Mat, mean []float32, std []float32, scaleFactor float32) []float32 {
|
|
img.ConvertTo(&img, gocv.MatTypeCV32F)
|
|
img.DivideFloat(scaleFactor)
|
|
|
|
c := gocv.Split(img)
|
|
data := make([]float32, img.Rows()*img.Cols()*img.Channels())
|
|
for i := 0; i < 3; i++ {
|
|
c[i].SubtractFloat(mean[i])
|
|
c[i].DivideFloat(std[i])
|
|
defer c[i].Close()
|
|
x, _ := c[i].DataPtrFloat32()
|
|
copy(data[i*img.Rows()*img.Cols():], x)
|
|
}
|
|
return data
|
|
}
|
|
|
|
type DetPreProcess interface {
|
|
Run(gocv.Mat) ([]float32, int, int)
|
|
}
|
|
|
|
type DBPreProcess struct {
|
|
resizeType int
|
|
imageShape []int
|
|
maxSideLen int
|
|
mean []float32
|
|
std []float32
|
|
scaleFactor float32
|
|
}
|
|
|
|
func NewDBProcess(shape []int, sideLen int) *DBPreProcess {
|
|
db := &DBPreProcess{
|
|
resizeType: 0,
|
|
imageShape: shape,
|
|
maxSideLen: sideLen,
|
|
mean: []float32{0.485, 0.456, 0.406},
|
|
std: []float32{0.229, 0.224, 0.225},
|
|
scaleFactor: 255.0,
|
|
}
|
|
if len(shape) > 0 {
|
|
db.resizeType = 1
|
|
}
|
|
if sideLen == 0 {
|
|
db.maxSideLen = 2400
|
|
}
|
|
return db
|
|
}
|
|
|
|
func (d *DBPreProcess) Run(img gocv.Mat) ([]float32, int, int) {
|
|
var resizeH, resizeW int
|
|
if d.resizeType == 0 {
|
|
img, resizeH, resizeW = resizeByMaxLen(img, d.maxSideLen)
|
|
} else {
|
|
img, resizeH, resizeW = resizeByShape(img, d.imageShape)
|
|
}
|
|
|
|
im := normPermute(img, d.mean, d.std, d.scaleFactor)
|
|
return im, resizeH, resizeW
|
|
}
|
|
|
|
func clsResize(img gocv.Mat, resizeShape []int) gocv.Mat {
|
|
imgH, imgW := resizeShape[1], resizeShape[2]
|
|
h, w := img.Rows(), img.Cols()
|
|
ratio := float64(w) / float64(h)
|
|
var resizeW int
|
|
if math.Ceil(float64(imgH)*ratio) > float64(imgW) {
|
|
resizeW = imgW
|
|
} else {
|
|
resizeW = int(math.Ceil(float64(imgH) * ratio))
|
|
}
|
|
gocv.Resize(img, &img, image.Pt(resizeW, imgH), 0, 0, gocv.InterpolationLinear)
|
|
if resizeW < imgW {
|
|
gocv.CopyMakeBorder(img, &img, 0, 0, 0, imgW-resizeW, gocv.BorderConstant, color.RGBA{0, 0, 0, 0})
|
|
}
|
|
return img
|
|
}
|
|
|
|
func crnnPreprocess(img gocv.Mat, resizeShape []int, mean []float32, std []float32,
|
|
scaleFactor float32, whRatio float64, charType string) []float32 {
|
|
imgH := resizeShape[1]
|
|
imgW := resizeShape[2]
|
|
if charType == "ch" {
|
|
imgW = int(32 * whRatio)
|
|
}
|
|
h, w := img.Rows(), img.Cols()
|
|
ratio := float64(w) / float64(h)
|
|
var resizeW int
|
|
if math.Ceil(float64(imgH)*ratio) > float64(imgW) {
|
|
resizeW = imgW
|
|
} else {
|
|
resizeW = int(math.Ceil(float64(imgH) * ratio))
|
|
}
|
|
gocv.Resize(img, &img, image.Pt(resizeW, imgH), 0, 0, gocv.InterpolationLinear)
|
|
|
|
img.ConvertTo(&img, gocv.MatTypeCV32F)
|
|
img.DivideFloat(scaleFactor)
|
|
img.SubtractScalar(gocv.NewScalar(float64(mean[0]), float64(mean[1]), float64(mean[2]), 0))
|
|
img.DivideScalar(gocv.NewScalar(float64(std[0]), float64(std[1]), float64(std[2]), 0))
|
|
|
|
if resizeW < imgW {
|
|
gocv.CopyMakeBorder(img, &img, 0, 0, 0, imgW-resizeW, gocv.BorderConstant, color.RGBA{0, 0, 0, 0})
|
|
}
|
|
|
|
c := gocv.Split(img)
|
|
data := make([]float32, img.Rows()*img.Cols()*img.Channels())
|
|
for i := 0; i < 3; i++ {
|
|
defer c[i].Close()
|
|
x, _ := c[i].DataPtrFloat32()
|
|
copy(data[i*img.Rows()*img.Cols():], x)
|
|
}
|
|
return data
|
|
}
|