diff --git a/all_test.go b/all_test.go index d8ce498..8cbf5bd 100644 --- a/all_test.go +++ b/all_test.go @@ -163,7 +163,6 @@ func TestClient_SetImageFromBytes(t *testing.T) { } func TestClient_SetWhitelist(t *testing.T) { - if os.Getenv("TESS_LSTM_DISABLED") == "1" { t.Skip("Whitelist with LSTM is not working for now. Please check https://github.com/tesseract-ocr/tesseract/issues/751") } @@ -183,7 +182,6 @@ func TestClient_SetWhitelist(t *testing.T) { } func TestClient_SetBlacklist(t *testing.T) { - if os.Getenv("TESS_LSTM_DISABLED") == "1" { t.Skip("Blacklist with LSTM is not working for now. Please check https://github.com/tesseract-ocr/tesseract/issues/751") } @@ -218,7 +216,6 @@ func TestClient_SetLanguage(t *testing.T) { } func TestClient_ConfigFilePath(t *testing.T) { - if os.Getenv("TESS_LSTM_DISABLED") == "1" { t.Skip("Whitelist with LSTM is not working for now. Please check https://github.com/tesseract-ocr/tesseract/issues/751") } @@ -243,11 +240,9 @@ func TestClient_ConfigFilePath(t *testing.T) { err := client.SetConfigFile("./test/config/02.config") Expect(t, err).Not().ToBe(nil) }) - } func TestClientBoundingBox(t *testing.T) { - if os.Getenv("TESS_BOX_DISABLED") == "1" { t.Skip() } @@ -279,8 +274,34 @@ func TestClientBoundingBox(t *testing.T) { } } -func TestClient_HTML(t *testing.T) { +func TestClient_GetOrientation(t *testing.T) { + client := NewClient() + defer client.Close() + + client.SetPageSegMode(PSM_AUTO_OSD) + client.SetImage("./test/data/003-longer-text.png") + o, err := client.GetOrientation() + Expect(t, err).ToBe(nil) + Expect(t, o.Page).ToBe(ORIENTATION_PAGE_UP) + Expect(t, o.Writing).ToBe(WRITING_DIRECTION_LEFT_TO_RIGHT) + Expect(t, o.Line).ToBe(TEXTLINE_ORDER_TOP_TO_BOTTOM) + if !(-0.1 <= o.DeskewAngle && o.DeskewAngle <= 0.1) { + t.Fatalf("Expected DeskewAngle to be within [-0.1, 0.1] but is %f", o.DeskewAngle) + } + + client.SetImage("./test/data/004-longer-text-rot-left.png") + o, err = client.GetOrientation() + Expect(t, err).ToBe(nil) + Expect(t, o.Page).ToBe(ORIENTATION_PAGE_LEFT) + Expect(t, o.Writing).ToBe(WRITING_DIRECTION_LEFT_TO_RIGHT) + Expect(t, o.Line).ToBe(TEXTLINE_ORDER_TOP_TO_BOTTOM) + if !(-0.1 <= o.DeskewAngle && o.DeskewAngle <= 0.1) { + t.Fatalf("Expected DeskewAngle to be within [-0.1, 0.1] but is %f", o.DeskewAngle) + } +} + +func TestClient_HTML(t *testing.T) { if os.Getenv("TESS_BOX_DISABLED") == "1" { t.Skip() } diff --git a/benchmark_test.go b/benchmark_test.go index 5a9cfaa..82c68c4 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -37,3 +37,12 @@ func BenchmarkClient_GetBoundingBoxesVerbose(b *testing.B) { client.Close() } } + +func BenchmarkClient_GetOrientation(b *testing.B) { + for i := 0; i < b.N; i++ { + client := NewClient() + client.SetImage("./test/data/003-longer-text.png") + client.GetOrientation() + client.Close() + } +} diff --git a/client.go b/client.go index 8e8c27f..1456f93 100644 --- a/client.go +++ b/client.go @@ -4,6 +4,7 @@ package gosseract // #include // #include "tessbridge.h" import "C" + import ( "fmt" "image" @@ -14,10 +15,8 @@ import ( "unsafe" ) -var ( - // ErrClientNotConstructed is returned when a client is not constructed - ErrClientNotConstructed = fmt.Errorf("TessBaseAPI is not constructed, please use `gosseract.NewClient`") -) +// ErrClientNotConstructed is returned when a client is not constructed +var ErrClientNotConstructed = fmt.Errorf("TessBaseAPI is not constructed, please use `gosseract.NewClient`") // Version returns the version of Tesseract-OCR func Version() string { @@ -119,7 +118,6 @@ func (client *Client) Version() string { // SetImage sets path to image file to be processed OCR. func (client *Client) SetImage(imagepath string) error { - if client.api == nil { return ErrClientNotConstructed } @@ -150,7 +148,6 @@ func (client *Client) SetImage(imagepath string) error { // SetImageFromBytes sets the image data to be processed OCR. func (client *Client) SetImageFromBytes(data []byte) error { - if client.api == nil { return ErrClientNotConstructed } @@ -265,7 +262,6 @@ func (client *Client) SetTessdataPrefix(prefix string) error { // Initialize tesseract::TessBaseAPI func (client *Client) init() error { - if !client.shouldInit { C.SetPixImage(client.api, client.pixImage) return nil @@ -454,6 +450,38 @@ func (client *Client) GetBoundingBoxesVerbose() (out []BoundingBox, err error) { return } +type Orientation struct { + Page PageOrientation + Writing WritingDirection + Line TextlineOrder + DeskewAngle float32 +} + +// GetOrientation returns the orientation of the block. +func (client *Client) GetOrientation() (Orientation, error) { + if client.api == nil { + return Orientation{}, ErrClientNotConstructed + } + + // Because https://github.com/otiai10/gosseract/issues/167 + // we have to get and set PSM again. + psm := C.GetPageSegMode(client.api) + + if err := client.init(); err != nil { + return Orientation{}, err + } + + C.SetPageSegMode(client.api, psm) + + o := C.GetOrientation(client.api) + return Orientation{ + Page: PageOrientation(o.page), + Writing: WritingDirection(o.writing), + Line: TextlineOrder(o.line), + DeskewAngle: float32(o.deskew_angle), + }, nil +} + // getDataPath is useful hepler to determine where current tesseract // installation stores trained models func getDataPath() string { diff --git a/constant.go b/constant.go index b431d7c..3ac0396 100644 --- a/constant.go +++ b/constant.go @@ -57,6 +57,39 @@ const ( RIL_SYMBOL ) +// PageOrientation represents the oritentation of a page and maps +// directly to enum tesseract::Orientation. +// See https://github.com/tesseract-ocr/tesseract/blob/f96cb8d9cb6e7958ddaa52cbbb33792b5d111913/include/tesseract/publictypes.h#L114 +type PageOrientation int + +const ( + ORIENTATION_PAGE_UP PageOrientation = iota + ORIENTATION_PAGE_RIGHT + ORIENTATION_PAGE_DOWN + ORIENTATION_PAGE_LEFT +) + +// WritingDirection represents the direction in which grapheme clusters +// within a line of text are laid out logically. Maps directly to enum +// tesseract::WritingDirection. See https://github.com/tesseract-ocr/tesseract/blob/f96cb8d9cb6e7958ddaa52cbbb33792b5d111913/include/tesseract/publictypes.h#L129 +type WritingDirection int + +const ( + WRITING_DIRECTION_LEFT_TO_RIGHT WritingDirection = iota + WRITING_DIRECTION_RIGHT_TO_LEFT + WRITING_DIRECTION_TOP_TO_BOTTOM +) + +// TextlineOrder represents the sequence in which lines are read. Maps +// directly to tesseract::TextlineOrder. See https://github.com/tesseract-ocr/tesseract/blob/f96cb8d9cb6e7958ddaa52cbbb33792b5d111913/include/tesseract/publictypes.h#L146 +type TextlineOrder int + +const ( + TEXTLINE_ORDER_LEFT_TO_RIGHT TextlineOrder = iota + TEXTLINE_ORDER_RIGHT_TO_LEFT + TEXTLINE_ORDER_TOP_TO_BOTTOM +) + // SettableVariable represents available strings for TessBaseAPI::SetVariable. // See https://groups.google.com/forum/#!topic/tesseract-ocr/eHTBzrBiwvQ // and https://github.com/tesseract-ocr/tesseract/blob/master/src/ccmain/tesseractclass.h diff --git a/tessbridge.cpp b/tessbridge.cpp index c37378b..0f54805 100644 --- a/tessbridge.cpp +++ b/tessbridge.cpp @@ -202,6 +202,23 @@ bounding_boxes* GetBoundingBoxes(TessBaseAPI a, int pageIteratorLevel) { return box_array; } +orientation GetOrientation(TessBaseAPI a) { + tesseract::TessBaseAPI *api = (tesseract::TessBaseAPI *)a; + tesseract::Orientation page; + tesseract::WritingDirection writing; + tesseract::TextlineOrder line; + float deskew_angle; + + tesseract::PageIterator *it = api->AnalyseLayout(); + it->Orientation(&page, &writing, &line, &deskew_angle); + orientation o = {.page = page, + .writing = writing, + .line = line, + .deskew_angle = deskew_angle}; + delete it; + return o; +} + const char* Version(TessBaseAPI a) { tesseract::TessBaseAPI* api = (tesseract::TessBaseAPI*)a; const char* v = api->Version(); diff --git a/tessbridge.h b/tessbridge.h index 161c08a..84edebb 100644 --- a/tessbridge.h +++ b/tessbridge.h @@ -17,6 +17,13 @@ struct bounding_boxes { struct bounding_box* boxes; }; +struct orientation { + int page; + int writing; + int line; + float deskew_angle; +}; + TessBaseAPI Create(void); void Free(TessBaseAPI); @@ -25,6 +32,7 @@ void ClearPersistentCache(TessBaseAPI); int Init(TessBaseAPI, char*, char*, char*, char*); struct bounding_boxes* GetBoundingBoxes(TessBaseAPI, int); struct bounding_boxes* GetBoundingBoxesVerbose(TessBaseAPI); +struct orientation GetOrientation(TessBaseAPI); bool SetVariable(TessBaseAPI, char*, char*); void SetPixImage(TessBaseAPI a, PixImage pix); void SetPageSegMode(TessBaseAPI, int); diff --git a/test/data/004-longer-text-rot-left.png b/test/data/004-longer-text-rot-left.png new file mode 100644 index 0000000..5e8bf1b Binary files /dev/null and b/test/data/004-longer-text-rot-left.png differ