From b3b58aab70f67b7fddafa5b2057246bbf0522a0f Mon Sep 17 00:00:00 2001 From: Guilherme Fickel Date: Thu, 7 Jan 2021 17:56:44 -0300 Subject: [PATCH] Adding all files --- .gitignore | 4 +- build_cpp_bindings_unix.sh | 4 + cpp_bindings/CMakeLists.txt | 17 ++ cpp_bindings/cpp_bindings.py | 82 +++++++ cpp_bindings/src/convex_hull.cpp | 43 ++++ cpp_bindings/src/convex_hull.hpp | 27 +++ cpp_bindings/src/dilate.cpp | 86 ++++++++ cpp_bindings/src/dilate.hpp | 16 ++ cpp_bindings/src/find_components.cpp | 214 ++++++++++++++++++ cpp_bindings/src/find_components.hpp | 53 +++++ cpp_bindings/src/rotated_bounding_box.cpp | 254 ++++++++++++++++++++++ cpp_bindings/src/rotated_bounding_box.hpp | 26 +++ craft_utils.py | 151 ++++++++----- 13 files changed, 927 insertions(+), 50 deletions(-) create mode 100644 build_cpp_bindings_unix.sh create mode 100644 cpp_bindings/CMakeLists.txt create mode 100644 cpp_bindings/cpp_bindings.py create mode 100644 cpp_bindings/src/convex_hull.cpp create mode 100644 cpp_bindings/src/convex_hull.hpp create mode 100644 cpp_bindings/src/dilate.cpp create mode 100644 cpp_bindings/src/dilate.hpp create mode 100644 cpp_bindings/src/find_components.cpp create mode 100644 cpp_bindings/src/find_components.hpp create mode 100644 cpp_bindings/src/rotated_bounding_box.cpp create mode 100644 cpp_bindings/src/rotated_bounding_box.hpp diff --git a/.gitignore b/.gitignore index 8bd0b28..4377e68 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ *.pkl *.pth result* -weights* \ No newline at end of file +weights* + +cpp_bindings/build/** diff --git a/build_cpp_bindings_unix.sh b/build_cpp_bindings_unix.sh new file mode 100644 index 0000000..0ae0656 --- /dev/null +++ b/build_cpp_bindings_unix.sh @@ -0,0 +1,4 @@ +mkdir cpp_bindings/build/ +cd cpp_bindings/build/ +cmake .. -DCMAKE_BUILD_TYPE=RELEASE +make diff --git a/cpp_bindings/CMakeLists.txt b/cpp_bindings/CMakeLists.txt new file mode 100644 index 0000000..0b224ce --- /dev/null +++ b/cpp_bindings/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required(VERSION 3.2) + +project(cpp_bindings VERSION 0.0.1) + +set_property(GLOBAL PROPERTY USE_FOLDERS ON) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wfatal-errors") + +include_directories( + "src/" +) + +file(GLOB all_cpp_bindings_src + "src/*.cpp" +) + +add_library(fast_boxes SHARED ${all_cpp_bindings_src}) diff --git a/cpp_bindings/cpp_bindings.py b/cpp_bindings/cpp_bindings.py new file mode 100644 index 0000000..6506aa5 --- /dev/null +++ b/cpp_bindings/cpp_bindings.py @@ -0,0 +1,82 @@ +import ctypes +import numpy as np +from numpy.ctypeslib import ndpointer +import cv2 + +lib = ctypes.CDLL('cpp_bindings/build/libfast_boxes.so') + +c_find_char_boxes = lib.findMinMaxBoxes +c_find_char_boxes.argtypes = [ + ndpointer(ctypes.c_int, flags='C_CONTIGUOUS'), + ctypes.c_int, + ctypes.c_int, + ctypes.c_int, + ndpointer(ctypes.c_int, flags='C_CONTIGUOUS'), +] + + +c_find_word_boxes = lib.findWordBoxes +c_find_word_boxes.argtypes = [ + ndpointer(ctypes.c_float, flags='C_CONTIGUOUS'), + ctypes.c_int, + ctypes.c_int, + ndpointer(ctypes.c_int, flags='C_CONTIGUOUS'), + ctypes.c_int, + ndpointer(ctypes.c_float, flags='C_CONTIGUOUS'), + ndpointer(ctypes.c_float, flags='C_CONTIGUOUS'), + ndpointer(ctypes.c_float, flags='C_CONTIGUOUS'), + ndpointer(ctypes.c_float, flags='C_CONTIGUOUS'), + ndpointer(ctypes.c_float, flags='C_CONTIGUOUS'), + ctypes.c_float, + ctypes.c_int, + ctypes.c_int, + ndpointer(ctypes.c_float, flags='C_CONTIGUOUS'), + ndpointer(ctypes.c_int, flags='C_CONTIGUOUS'), +] + +c_get_max_num_components = lib.getMaxNumComponents + + +def find_char_boxes(markers, num_classes): + # Allocate numpy buffer to store the results + out_boxes = np.zeros((num_classes+1)*4, dtype=np.int32) + c_find_char_boxes(markers, markers.shape[1], markers.shape[0], + out_boxes.shape[0], out_boxes) + + char_boxes = [] + for idx in range(2,num_classes+1): + l,t = out_boxes[idx*4+0], out_boxes[idx*4+1] + r,b = out_boxes[idx*4+2], out_boxes[idx*4+3] + w, h = r-l, b-t + box = np.array([[l, t], [l + w, t], [l + w, t + h], [l, t + h]], dtype=np.float32) + char_boxes.append(box) + + return char_boxes + +def find_word_boxes(textmap, labels, nLabels, stats, text_threshold, + fast_mode=False, rotated_box=True): + """ + This function mostly mimics the hot loop from craft_utils.py . However it has two + new parameters: + fast_mode (bool): uses a faster post processing algorithm. The results should + stay mostly the same. + rotated_box (bool): indicates if the return boxes should be rotated boxes (the + original) or a simple bounding box. The latter is much faster, however it + does not works well for text on an angle. + """ + # Allocate numpy buffer to store the results, 4 points and a label + out_boxes = np.zeros(nLabels*8, dtype=np.float32) + out_labels = np.zeros(nLabels, dtype=np.int32) + + stat_left = np.array([stats[k,cv2.CC_STAT_LEFT] for k in range(nLabels)], dtype=np.float32) + stat_top = np.array([stats[k,cv2.CC_STAT_TOP] for k in range(nLabels)], dtype=np.float32) + stat_width = np.array([stats[k,cv2.CC_STAT_WIDTH] for k in range(nLabels)], dtype=np.float32) + stat_height = np.array([stats[k,cv2.CC_STAT_HEIGHT] for k in range(nLabels)], dtype=np.float32) + stat_area = np.array([stats[k,cv2.CC_STAT_AREA] for k in range(nLabels)], dtype=np.float32) + + num_boxes = c_find_word_boxes(textmap, textmap.shape[1], textmap.shape[0], + labels, nLabels, stat_left, stat_top, stat_width, stat_height, + stat_area, text_threshold, int(fast_mode), int(rotated_box), out_boxes, + out_labels) + out_boxes = [np.array(out_boxes[x*8:x*8+8]).reshape(4,2) for x in range(num_boxes)] + return out_boxes, out_labels diff --git a/cpp_bindings/src/convex_hull.cpp b/cpp_bindings/src/convex_hull.cpp new file mode 100644 index 0000000..87fa235 --- /dev/null +++ b/cpp_bindings/src/convex_hull.cpp @@ -0,0 +1,43 @@ +#include "convex_hull.hpp" + + +bool cmp(Point a, Point b) { + return a.x < b.x || (a.x == b.x && a.y < b.y); +} + +bool cw(Point a, Point b, Point c) { + return a.x*(b.y-c.y)+b.x*(c.y-a.y)+c.x*(a.y-b.y) < 0; +} + +bool ccw(Point a, Point b, Point c) { + return a.x*(b.y-c.y)+b.x*(c.y-a.y)+c.x*(a.y-b.y) > 0; +} + +void convexHull(std::vector& a) { + if (a.size() == 1) + return; + + sort(a.begin(), a.end(), &cmp); + Point p1 = a[0], p2 = a.back(); + std::vector up, down; + up.push_back(p1); + down.push_back(p1); + for (int i = 1; i < (int)a.size(); i++) { + if (i == (int)a.size() - 1 || cw(p1, a.at(i), p2)) { + while (up.size() >= 2 && !cw(up.at(up.size()-2), up.at(up.size()-1), a.at(i))) + up.pop_back(); + up.push_back(a.at(i)); + } + if (i == (int)a.size() - 1 || ccw(p1, a.at(i), p2)) { + while(down.size() >= 2 && !ccw(down.at(down.size()-2), down.at(down.size()-1), a.at(i))) + down.pop_back(); + down.push_back(a.at(i)); + } + } + + a.clear(); + for (int i = 0; i < (int)up.size(); i++) + a.push_back(up.at(i)); + for (int i = down.size() - 2; i > 0; i--) + a.push_back(down.at(i)); +} diff --git a/cpp_bindings/src/convex_hull.hpp b/cpp_bindings/src/convex_hull.hpp new file mode 100644 index 0000000..346d2f7 --- /dev/null +++ b/cpp_bindings/src/convex_hull.hpp @@ -0,0 +1,27 @@ +// Original code: https://cp-algorithms.com/geometry/grahams-scan-convex-hull.html +#ifndef CONVEX_HULL_H +#define CONVEX_HULL_H + +#include +#include + + +struct Point { + float x, y; + Point(int x, int y) { + this->x = x; + this->y = y; + }; + Point() { + this->x = 0; + this->y = 0; + }; +}; + +bool cmp(Point a, Point b); +bool cw(Point a, Point b, Point c); +bool ccw(Point a, Point b, Point c); + +void convexHull(std::vector& a); + +#endif diff --git a/cpp_bindings/src/dilate.cpp b/cpp_bindings/src/dilate.cpp new file mode 100644 index 0000000..e7a533f --- /dev/null +++ b/cpp_bindings/src/dilate.cpp @@ -0,0 +1,86 @@ +#include "dilate.hpp" + + +// https://github.com/opencv/opencv/blob/198b5096aaf8f5d855b98337e9de2fc45485c5a7/modules/ts/src/ts_func.cpp#L642 +void dilate(const unsigned char *src, int srcW, int srcH, + unsigned char *dst, int dstW, int dstH, + const unsigned char *kernel, int kerW, int kerH, + int roiLeft, int roiTop, int roiRight, int roiBottom) +{ + if (dstW < srcW || dstH < srcH) + return; + if (src == dst) + return; + + int new_src_w = srcW + kerW/2 + kerW - kerW/2 - 1; + unsigned char *new_src = copyMakeBorder(src, srcW, srcH, kerH/2, kerH - kerH/2 - 1, + kerW/2, kerW - kerW/2 - 1); + + int *ofs = new int[kerW*kerH]; + int ofs_size = 0; + for( int i = 0; i < kerH; i++ ) + for( int j = 0; j < kerW; j++ ) + if( kernel[i*kerW + j] != 0 ) { + ofs[ofs_size++] = (i*new_src_w + j); + } + + if( ofs_size == 0 ) + ofs[ofs_size++] = kerH/2*new_src_w + kerW; + + for( int y = roiTop; y < roiBottom; y++ ) + { + const unsigned char *sptr = &new_src[y*new_src_w]; + unsigned char *dptr = &dst[y*dstW]; + + for( int x = roiLeft; x < roiRight; x++ ) + { + unsigned char result = sptr[x + ofs[0]]; + for( int i = 1; i < ofs_size; i++ ) { + result = std::max(result, sptr[x + ofs[i]]); + } + dptr[x] = result; + } + } + + delete [] ofs; + delete [] new_src; +} + +unsigned char* copyMakeBorder(const unsigned char *src, int srcW, int srcH, int top, int bottom, int left, int right) +{ + unsigned char *dst = new unsigned char[(srcH + top + bottom) * (srcW + left + right)]; + int dstW = (srcW + left + right); + int i, j, esz = sizeof(unsigned char); + int width = srcW*esz; + + left *= esz; + right *= esz; + for( i = 0; i < srcH; i++ ) + { + const unsigned char* sptr = &src[i*srcW]; + unsigned char* dptr = &dst[(i + top)*dstW + left]; + for( j = 0; j < left; j++ ) + dptr[j - left] = 0; + if( dptr != sptr ) + for( j = 0; j < width; j++ ) + dptr[j] = sptr[j]; + for( j = 0; j < right; j++ ) + dptr[j + width] = 0; + } + + for( i = 0; i < top; i++ ) + { + unsigned char* dptr = &dst[i*dstW]; + for( j = 0; j < dstW; j++ ) + dptr[j] = 0; + } + + for( i = 0; i < bottom; i++ ) + { + unsigned char* dptr = &dst[(i + top + srcH)*dstW]; + for( j = 0; j < dstW; j++ ) + dptr[j] = 0; + } + + return dst; +} diff --git a/cpp_bindings/src/dilate.hpp b/cpp_bindings/src/dilate.hpp new file mode 100644 index 0000000..ec8e13d --- /dev/null +++ b/cpp_bindings/src/dilate.hpp @@ -0,0 +1,16 @@ +#ifndef DILATE_H +#define DILATE_H + +#include +#include + +// WARNING (gfickel): src and dst cannot be the same on dilate! +void dilate(const unsigned char *src, int srcW, int srcH, + unsigned char *dst, int dstW, int dstH, + const unsigned char *kernel, int kerW, int kerH, + int roiLeft, int roiTop, int roiRight, int roiBottom); + +unsigned char* copyMakeBorder(const unsigned char *src, int srcW, + int srcH, int top, int bottom, int left, int right); + +#endif diff --git a/cpp_bindings/src/find_components.cpp b/cpp_bindings/src/find_components.cpp new file mode 100644 index 0000000..6cb282e --- /dev/null +++ b/cpp_bindings/src/find_components.cpp @@ -0,0 +1,214 @@ +#include "find_components.hpp" + +int max_coord[MAX_COMPONENTS*2]; +int min_coord[MAX_COMPONENTS*2]; + +float max_textmap[MAX_COMPONENTS]; + +inline int getIndex(int width, int y, int x) { + return y*width+x; +} + +void matmul(float *mat1, int mat1Rows, int mat1Cols, float *mat2, int mat2Rows, int mat2Cols, float *outMat) { + for (int i=0; i segmap_vec(textmapHeight*textmapWidth); + std::vector dilate_dst_vec(textmapHeight*textmapWidth); + unsigned char *segmap = segmap_vec.data(); + unsigned char *dilate_dst = dilate_dst_vec.data(); + + memset(segmap, 0, sizeof(unsigned char)*textmapWidth*textmapHeight); + memset(dilate_dst, 0, sizeof(unsigned char)*textmapWidth*textmapHeight); + + // findMinMaxBoxes will update our max_coord and min_coord variables + // with the min and max position of each label + findMinMaxBoxes(labels, textmapWidth, textmapHeight, 0, NULL); + + std::vector rot_vec(3*2); + std::vector corners_vec(2*4); + std::vector pts_array_vec(3*4); + float *rot = rot_vec.data(); + float *corners = corners_vec.data(); + // Each point is a column. + float *pts_array = pts_array_vec.data(); + pts_array[8] = pts_array[9] = pts_array[10] = pts_array[11] = 1; + std::vector corners_list; + for (int k=1; k pts; + if (fastMode == false) { // original implementation + for (int i=min_coord[k*2+1]; i<=max_coord[k*2+1]; i++) { + for (int j=min_coord[k*2+0]; j<=max_coord[k*2+0]; j++) { + int idx = getIndex(textmapWidth, i, j); + if (labels[idx] == k) { + segmap_vec.at(idx) = 255; + } + } + } + unsigned char *kernel = new unsigned char[(1+niter)*(1+niter)]; + for (int i=0; i<(1+niter)*(1+niter); i++) { + kernel[i] = 255; + } + + dilate(segmap, textmapWidth, textmapHeight, dilate_dst, textmapWidth, textmapHeight, + kernel, (1+niter), (1+niter), sx, sy, ex, ey); + + for (int i=sy; i 0) { + pts.push_back(Point(j,i)); + } + // Erase what was written to segmap and dilate_dst + dilate_dst_vec.at(idx) = segmap_vec.at(idx) = 0; + } + } + rot_box = rotatedBoxFromPoints(pts); + delete [] kernel; + } else { // fastMode: ignore dilate + for (int i=min_coord[k*2+1]; i<=max_coord[k*2+1]; i++) { + for (int j=min_coord[k*2+0]; j<=max_coord[k*2+0]; j++) { + int idx = getIndex(textmapWidth, i, j); + if (labels[idx] == k) { + segmap_vec.at(idx) = 255; + pts.push_back(Point(j,i)); + } + } + } + rot_box = rotatedBoxFromPoints(pts); + } + } else { // Not a rotated box + rot_box.cx = (sx+ex)/2; + rot_box.cy = (sy+ey)/2; + rot_box.width = ex-sx; + rot_box.height = ey-sy; + rot_box.angle = 0; + } + + if (fastMode == true || rotatedBox == false) { + // If we didn't do the dilate, the box should be + // increased to compensate it. + rot_box.width += niter; + rot_box.height += niter; + } + + getRotationMatrix2D(rot_box.cx, rot_box.cy, -rot_box.angle, rot); + float l = rot_box.cx-rot_box.width/2; + float t = rot_box.cy-rot_box.height/2; + float r = rot_box.cx+rot_box.width/2; + float b = rot_box.cy+rot_box.height/2; + pts_array[0] = l; pts_array[4] = t; + pts_array[1] = r; pts_array[5] = t; + pts_array[2] = r; pts_array[6] = b; + pts_array[3] = l; pts_array[7] = b; + + matmul(rot, 2, 3, pts_array, 3, 4, corners); + corners_list.push_back(Corner(corners, k)); + } + for (int i=0; i<(int)corners_list.size(); i++) { + for (int j=0; j<4; j++) { + outBoxes[i*8+j*2+0] = corners_list.at(i).pts[0*4+j]; + outBoxes[i*8+j*2+1] = corners_list.at(i).pts[1*4+j]; + } + outLabels[i] = corners_list.at(i).label; + } + + return corners_list.size(); +} + +void findMinMaxBoxes(const int *markers, int width, int height, int lenOutBoxes, int* outBoxes) { + memset(&max_coord, 0, sizeof(int)*MAX_COMPONENTS*2); + // The following memset is not seting min_coord to 1! + memset(&min_coord, 1, sizeof(int)*MAX_COMPONENTS*2); + + int max_idx = 0; + for (int i=0; i max_idx) { + max_idx = markers[row+j]; + } + if (max_idx > MAX_COMPONENTS) { + // THIS IS BAAAAD!!! Stoping to avoid stack corruption + goto END_FOR; + } + if (j < min_coord[component_id]) { + min_coord[component_id] = j; + } + if (j > max_coord[component_id]) { + max_coord[component_id] = j; + } + if (i < min_coord[component_id+1]) { + min_coord[component_id+1] = i; + } + if (i > max_coord[component_id+1]) { + max_coord[component_id+1] = i; + } + } + } +END_FOR: + + for (int i=0; i<=max_idx; i++) { + if ((i*4+3) > lenOutBoxes) { + break; + } + outBoxes[i*4+0] = min_coord[i*2]; + outBoxes[i*4+1] = min_coord[i*2+1]; + outBoxes[i*4+2] = max_coord[i*2]+1; + outBoxes[i*4+3] = max_coord[i*2+1]+1; + } +} diff --git a/cpp_bindings/src/find_components.hpp b/cpp_bindings/src/find_components.hpp new file mode 100644 index 0000000..c1c7fea --- /dev/null +++ b/cpp_bindings/src/find_components.hpp @@ -0,0 +1,53 @@ +#ifndef FIND_COMPONENTS_H +#define FIND_COMPONENTS_H + +#include +#include +#include +#include +#include +#include "dilate.hpp" +#include "rotated_bounding_box.hpp" + +// 50K will use ~782KB in RAM, good and safe enough +#define MAX_COMPONENTS 50000 +// The following variables keep the current min and max coordinates +// on the following format: max_coord = xyxyxyxy.... +extern int max_coord[MAX_COMPONENTS*2]; +extern int min_coord[MAX_COMPONENTS*2]; + +extern float max_textmap[MAX_COMPONENTS]; + +typedef struct Box { + int l, t, r, b, label; + Box(int l, int t, int r, int b, int label) { + this->l = l; this->t = t; + this->r = r; this->b = b; + this->label = label; + }; +} Box; + +typedef struct Corner { + float pts[8]; + int label; + Corner(float *pts, int label) { + for (int i=0; i<8; i++) { + this->pts[i] = pts[i]; + } + this->label = label; + }; +} Corner; + +extern "C" { + void findMinMaxBoxes(const int *markers, int width, int height, int lenOutBoxes, int* outBoxes); + int findWordBoxes(const float *textmap, int textmapWidth, int textmapHeight, const int* labels, + int nLabels, float *statsLeft, float *statsTop, float *statsWidth, float *statsHeight, + float *statsArea, float textThreshold, int fastMode, int rotatedBox, float *outBoxes, + int *outLabels); + int getMaxNumComponents() { return MAX_COMPONENTS; }; +} + +void getRotationMatrix2D(float cx, float cy, float angle, float *outRot); +void matmul(float *mat1, int mat1Rows, int mat1Cols, float *mat2, int mat2Rows, int mat2Cols, float *outMat); + +#endif diff --git a/cpp_bindings/src/rotated_bounding_box.cpp b/cpp_bindings/src/rotated_bounding_box.cpp new file mode 100644 index 0000000..861fec1 --- /dev/null +++ b/cpp_bindings/src/rotated_bounding_box.cpp @@ -0,0 +1,254 @@ +#include "rotated_bounding_box.hpp" + +static void rotatingCalipers( const std::vector &points, int n, float* out ) +{ + float minarea = 1e16; + char buffer[32] = {}; + int i, k; + std::vector abuf(n*3); + float* inv_vect_length = abuf.data(); + Point* vect = (Point*)(inv_vect_length + n); + int left = 0, bottom = 0, right = 0, top = 0; + int seq[4] = { -1, -1, -1, -1 }; + + /* rotating calipers sides will always have coordinates + (a,b) (-b,a) (-a,-b) (b, -a) + */ + /* this is a first base vector (a,b) initialized by (1,0) */ + float orientation = 0; + float base_a; + float base_b = 0; + + float left_x, right_x, top_y, bottom_y; + Point pt0 = points[0]; + + left_x = right_x = pt0.x; + top_y = bottom_y = pt0.y; + + for( i = 0; i < n; i++ ) + { + double dx, dy; + + if( pt0.x < left_x ) + left_x = pt0.x, left = i; + + if( pt0.x > right_x ) + right_x = pt0.x, right = i; + + if( pt0.y > top_y ) + top_y = pt0.y, top = i; + + if( pt0.y < bottom_y ) + bottom_y = pt0.y, bottom = i; + + Point pt = points[(i+1) & (i+1 < n ? -1 : 0)]; + + dx = pt.x - pt0.x; + dy = pt.y - pt0.y; + + vect[i].x = (float)dx; + vect[i].y = (float)dy; + inv_vect_length[i] = (float)(1./std::sqrt(dx*dx + dy*dy)); + + pt0 = pt; + } + + // find convex hull orientation + { + double ax = vect[n-1].x; + double ay = vect[n-1].y; + + for( i = 0; i < n; i++ ) + { + double bx = vect[i].x; + double by = vect[i].y; + + double convexity = ax * by - ay * bx; + + if( convexity != 0 ) + { + orientation = (convexity > 0) ? 1.f : (-1.f); + break; + } + ax = bx; + ay = by; + } + } + base_a = orientation; + + /*****************************************************************************************/ + /* init calipers position */ + seq[0] = bottom; + seq[1] = right; + seq[2] = top; + seq[3] = left; + /*****************************************************************************************/ + /* Main loop - evaluate angles and rotate calipers */ + + /* all of edges will be checked while rotating calipers by 90 degrees */ + for( k = 0; k < n; k++ ) + { + /* sinus of minimal angle */ + /*float sinus;*/ + + /* compute cosine of angle between calipers side and polygon edge */ + /* dp - dot product */ + float dp[4] = { + +base_a * vect[seq[0]].x + base_b * vect[seq[0]].y, + -base_b * vect[seq[1]].x + base_a * vect[seq[1]].y, + -base_a * vect[seq[2]].x - base_b * vect[seq[2]].y, + +base_b * vect[seq[3]].x - base_a * vect[seq[3]].y, + }; + + float maxcos = dp[0] * inv_vect_length[seq[0]]; + + /* number of calipers edges, that has minimal angle with edge */ + int main_element = 0; + + /* choose minimal angle */ + for ( i = 1; i < 4; ++i ) + { + float cosalpha = dp[i] * inv_vect_length[seq[i]]; + if (cosalpha > maxcos) + { + main_element = i; + maxcos = cosalpha; + } + } + + /*rotate calipers*/ + { + //get next base + int pindex = seq[main_element]; + float lead_x = vect[pindex].x*inv_vect_length[pindex]; + float lead_y = vect[pindex].y*inv_vect_length[pindex]; + switch( main_element ) + { + case 0: + base_a = lead_x; + base_b = lead_y; + break; + case 1: + base_a = lead_y; + base_b = -lead_x; + break; + case 2: + base_a = -lead_x; + base_b = -lead_y; + break; + case 3: + base_a = -lead_y; + base_b = lead_x; + break; + default: + break; + } + } + /* change base point of main edge */ + seq[main_element] += 1; + seq[main_element] = (seq[main_element] == n) ? 0 : seq[main_element]; + + /* find area of rectangle */ + float height; + float area; + + /* find vector left-right */ + float dx = points[seq[1]].x - points[seq[3]].x; + float dy = points[seq[1]].y - points[seq[3]].y; + + /* dotproduct */ + float width = dx * base_a + dy * base_b; + + /* find vector left-right */ + dx = points[seq[2]].x - points[seq[0]].x; + dy = points[seq[2]].y - points[seq[0]].y; + + /* dotproduct */ + height = -dx * base_b + dy * base_a; + + area = width * height; + if( area <= minarea ) + { + float *buf = (float *) buffer; + + minarea = area; + /* leftist point */ + ((int *) buf)[0] = seq[3]; + buf[1] = base_a; + buf[2] = width; + buf[3] = base_b; + buf[4] = height; + /* bottom point */ + ((int *) buf)[5] = seq[0]; + buf[6] = area; + } + } /* for */ + + { + float *buf = (float *) buffer; + + float A1 = buf[1]; + float B1 = buf[3]; + + float A2 = -buf[3]; + float B2 = buf[1]; + + float C1 = A1 * points[((int *) buf)[0]].x + points[((int *) buf)[0]].y * B1; + float C2 = A2 * points[((int *) buf)[5]].x + points[((int *) buf)[5]].y * B2; + + float idet = 1.f / (A1 * B2 - A2 * B1); + + float px = (C1 * B2 - C2 * B1) * idet; + float py = (A1 * C2 - A2 * C1) * idet; + + out[0] = px; + out[1] = py; + + out[2] = A1 * buf[2]; + out[3] = B1 * buf[2]; + + out[4] = A2 * buf[4]; + out[5] = B2 * buf[4]; + } +} + + +RotatedBox rotatedBoxFromPoints(std::vector &pts) { + Point out[3]; + RotatedBox box; + + convexHull(pts); + + int n = pts.size(); + + if( n > 2 ) + { + rotatingCalipers( pts, n, (float*)out ); + box.cx = out[0].x + (out[1].x + out[2].x)*0.5f; + box.cy = out[0].y + (out[1].y + out[2].y)*0.5f; + box.width = (float)std::sqrt((double)out[1].x*out[1].x + (double)out[1].y*out[1].y); + box.height = (float)std::sqrt((double)out[2].x*out[2].x + (double)out[2].y*out[2].y); + box.angle = (float)atan2( (double)out[1].y, (double)out[1].x ); + } + else if( n == 2 ) + { + box.cx = (pts[0].x + pts[1].x)*0.5f; + box.cy = (pts[0].y + pts[1].y)*0.5f; + double dx = pts[1].x - pts[0].x; + double dy = pts[1].y - pts[0].y; + box.width = (float)std::sqrt(dx*dx + dy*dy); + box.height = 0; + box.angle = (float)atan2( dy, dx ); + } + else + { + if( n == 1 ) + { + box.cx = pts[0].x; + box.cy = pts[0].y; + } + } + + box.angle = (float)(box.angle*180/PI); + return box; +} diff --git a/cpp_bindings/src/rotated_bounding_box.hpp b/cpp_bindings/src/rotated_bounding_box.hpp new file mode 100644 index 0000000..e7ebb0c --- /dev/null +++ b/cpp_bindings/src/rotated_bounding_box.hpp @@ -0,0 +1,26 @@ +// Original code: https://github.com/opencv/opencv/blob/dd1494eebf205b0262c362bbdc57cef212464d1d/modules/imgproc/src/rotcalipers.cpp +#ifndef ROTATED_BOUNDING_BOX_H +#define ROTATED_BOUNDING_BOX_H + +#include +#include "convex_hull.hpp" +#define PI 3.14159265359 + +struct RotatedBox { + float cx, cy, width, height, angle; + int label; + RotatedBox(float cx, float cy, float width, float height, float angle) { + this->cx = cx; + this->cy = cy; + this->width = width; + this->height = height; + this->angle = angle; + }; + RotatedBox() { + cx = cy = width = height = angle = 0; + }; +}; + +RotatedBox rotatedBoxFromPoints(std::vector &pts); + +#endif diff --git a/craft_utils.py b/craft_utils.py index 43c1357..b3daf40 100755 --- a/craft_utils.py +++ b/craft_utils.py @@ -7,6 +7,11 @@ import numpy as np import cv2 import math +try: + from cpp_bindings.cpp_bindings import find_char_boxes, find_word_boxes + CPP_BIND_AVAILABLE = True +except BaseException as e: + CPP_BIND_AVAILABLE = False """ auxilary functions """ # unwarp corodinates @@ -15,8 +20,36 @@ def warpCoord(Minv, pt): return np.array([out[0]/out[2], out[1]/out[2]]) """ end of auxilary functions """ +def getCharBoxes(image, textmap, use_cpp_bindings=True): + char_boxes = [] + ret, sure_fg = cv2.threshold(textmap, 0.6, 1, 0) + ret, sure_bg = cv2.threshold(textmap, 0.2, 1, 0) + + sure_fg = np.uint8(sure_fg * 255) + sure_bg = np.uint8(sure_bg * 255) + + unknown = cv2.subtract(sure_bg, sure_fg) + ret, markers = cv2.connectedComponents(sure_fg) + markers = markers + 1 + markers[unknown == 255] = 0 + image = cv2.resize(image, textmap.shape[::-1], cv2.INTER_CUBIC) + cv2.watershed((image * 255).astype(np.uint8), markers) + num_classes = np.max(markers) + + # marker 1 is background + if CPP_BIND_AVAILABLE and use_cpp_bindings: + char_boxes = find_char_boxes(markers, num_classes) + else: + for i in range(2, np.max(markers) + 1): + np_contours = np.roll(np.array(np.where(markers == i)), 1, axis=0).transpose().reshape(-1, 2) + l, t, w, h = cv2.boundingRect(np_contours) + box = np.array([[l, t], [l + w, t], [l + w, t + h], [l, t + h]], dtype=np.float32) + char_boxes.append(box) + + return char_boxes -def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text): +def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text, + use_cpp_bindings=True, fast_mode=True, rotated_box=True): # prepare data linkmap = linkmap.copy() textmap = textmap.copy() @@ -29,52 +62,56 @@ def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text) text_score_comb = np.clip(text_score + link_score, 0, 1) nLabels, labels, stats, centroids = cv2.connectedComponentsWithStats(text_score_comb.astype(np.uint8), connectivity=4) - det = [] - mapper = [] - for k in range(1,nLabels): - # size filtering - size = stats[k, cv2.CC_STAT_AREA] - if size < 10: continue - - # thresholding - if np.max(textmap[labels==k]) < text_threshold: continue - - # make segmentation map - segmap = np.zeros(textmap.shape, dtype=np.uint8) - segmap[labels==k] = 255 - segmap[np.logical_and(link_score==1, text_score==0)] = 0 # remove link area - x, y = stats[k, cv2.CC_STAT_LEFT], stats[k, cv2.CC_STAT_TOP] - w, h = stats[k, cv2.CC_STAT_WIDTH], stats[k, cv2.CC_STAT_HEIGHT] - niter = int(math.sqrt(size * min(w, h) / (w * h)) * 2) - sx, ex, sy, ey = x - niter, x + w + niter + 1, y - niter, y + h + niter + 1 - # boundary check - if sx < 0 : sx = 0 - if sy < 0 : sy = 0 - if ex >= img_w: ex = img_w - if ey >= img_h: ey = img_h - kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(1 + niter, 1 + niter)) - segmap[sy:ey, sx:ex] = cv2.dilate(segmap[sy:ey, sx:ex], kernel) - - # make box - np_contours = np.roll(np.array(np.where(segmap!=0)),1,axis=0).transpose().reshape(-1,2) - rectangle = cv2.minAreaRect(np_contours) - box = cv2.boxPoints(rectangle) - - # align diamond-shape - w, h = np.linalg.norm(box[0] - box[1]), np.linalg.norm(box[1] - box[2]) - box_ratio = max(w, h) / (min(w, h) + 1e-5) - if abs(1 - box_ratio) <= 0.1: - l, r = min(np_contours[:,0]), max(np_contours[:,0]) - t, b = min(np_contours[:,1]), max(np_contours[:,1]) - box = np.array([[l, t], [r, t], [r, b], [l, b]], dtype=np.float32) - - # make clock-wise order - startidx = box.sum(axis=1).argmin() - box = np.roll(box, 4-startidx, 0) - box = np.array(box) - - det.append(box) - mapper.append(k) + if CPP_BIND_AVAILABLE and use_cpp_bindings: + det, mapper = find_word_boxes(textmap, labels, nLabels, stats, + text_threshold, fast_mode, rotated_box) + else: + det = [] + mapper = [] + for k in range(1,nLabels): + # size filtering + size = stats[k, cv2.CC_STAT_AREA] + if size < 10: continue + + # thresholding + if np.max(textmap[labels==k]) < text_threshold: continue + + # make segmentation map + segmap = np.zeros(textmap.shape, dtype=np.uint8) + segmap[labels==k] = 255 + segmap[np.logical_and(link_score==1, text_score==0)] = 0 # remove link area + x, y = stats[k, cv2.CC_STAT_LEFT], stats[k, cv2.CC_STAT_TOP] + w, h = stats[k, cv2.CC_STAT_WIDTH], stats[k, cv2.CC_STAT_HEIGHT] + niter = int(math.sqrt(size * min(w, h) / (w * h)) * 2) + sx, ex, sy, ey = x - niter, x + w + niter + 1, y - niter, y + h + niter + 1 + # boundary check + if sx < 0 : sx = 0 + if sy < 0 : sy = 0 + if ex >= img_w: ex = img_w + if ey >= img_h: ey = img_h + kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(1 + niter, 1 + niter)) + segmap[sy:ey, sx:ex] = cv2.dilate(segmap[sy:ey, sx:ex], kernel) + + # make box + np_contours = np.roll(np.array(np.where(segmap!=0)),1,axis=0).transpose().reshape(-1,2) + rectangle = cv2.minAreaRect(np_contours) + box = cv2.boxPoints(rectangle) + + # align diamond-shape + w, h = np.linalg.norm(box[0] - box[1]), np.linalg.norm(box[1] - box[2]) + box_ratio = max(w, h) / (min(w, h) + 1e-5) + if abs(1 - box_ratio) <= 0.1: + l, r = min(np_contours[:,0]), max(np_contours[:,0]) + t, b = min(np_contours[:,1]), max(np_contours[:,1]) + box = np.array([[l, t], [r, t], [r, b], [l, b]], dtype=np.float32) + + # make clock-wise order + startidx = box.sum(axis=1).argmin() + box = np.roll(box, 4-startidx, 0) + box = np.array(box) + + det.append(box) + mapper.append(k) return det, labels, mapper @@ -224,8 +261,12 @@ def getPoly_core(boxes, labels, mapper, linkmap): return polys -def getDetBoxes(textmap, linkmap, text_threshold, link_threshold, low_text, poly=False): - boxes, labels, mapper = getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text) +def getDetBoxes(textmap, linkmap, text_threshold, link_threshold, low_text, + poly=False, use_cpp_bindings=True, fast_mode=False, rotated_box=True): + + boxes, labels, mapper = getDetBoxes_core( + textmap, linkmap, text_threshold, link_threshold, + low_text, use_cpp_bindings, fast_mode, rotated_box) if poly: polys = getPoly_core(boxes, labels, mapper, linkmap) @@ -234,6 +275,18 @@ def getDetBoxes(textmap, linkmap, text_threshold, link_threshold, low_text, poly return boxes, polys +def getWordAndCharBoxes(image, textmap, linkmap, text_threshold, link_threshold, + low_text, poly=False, use_cpp_bindings=True, fast_mode=False, rotated_box=True): + + boxes, polys = getDetBoxes( + textmap, linkmap, text_threshold, link_threshold, + low_text, poly, use_cpp_bindings, fast_mode, rotated_box) + + char_boxes = getCharBoxes(image, textmap) + + return boxes, polys, char_boxes + + def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net = 2): if len(polys) > 0: polys = np.array(polys)