OpenCV 4.5.3(日本語機械翻訳)
ocr.hpp
1 /*M//////////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16 // Third party copyrights are property of their respective owners.
17 //
18 // Redistribution and use in source and binary forms, with or without modification,
19 // are permitted provided that the following conditions are met:
20 //
21 // * Redistribution's of source code must retain the above copyright notice,
22 // this list of conditions and the following disclaimer.
23 //
24 // * Redistribution's in binary form must reproduce the above copyright notice,
25 // this list of conditions and the following disclaimer in the documentation
26 // and/or other materials provided with the distribution.
27 //
28 // * The name of the copyright holders may not be used to endorse or promote products
29 // derived from this software without specific prior written permission.
30 //
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or implied warranties, including, but not limited to, the implied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall the Intel Corporation or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
41 //
42 //M*/
43
44 #ifndef __OPENCV_TEXT_OCR_HPP__
45 #define __OPENCV_TEXT_OCR_HPP__
46
47 #include <opencv2/core.hpp>
48
49 #include <vector>
50 #include <string>
51
52 namespace cv
53{
54 namespace text
55{
56
59
60 enum
61{
62 OCR_LEVEL_WORD,
63 OCR_LEVEL_TEXTLINE
64};
65
68{
69 PSM_OSD_ONLY,
70 PSM_AUTO_OSD,
71 PSM_AUTO_ONLY,
72 PSM_AUTO,
73 PSM_SINGLE_COLUMN,
74 PSM_SINGLE_BLOCK_VERT_TEXT,
75 PSM_SINGLE_BLOCK,
76 PSM_SINGLE_LINE,
77 PSM_SINGLE_WORD,
78 PSM_CIRCLE_WORD,
79 PSM_SINGLE_CHAR
80};
81
84{
85 OEM_TESSERACT_ONLY,
86 OEM_CUBE_ONLY,
87 OEM_TESSERACT_CUBE_COMBINED,
88 OEM_DEFAULT
89};
90
91 //base class BaseOCR declares a common API that would be used in a typical text recognition scenario
92 class CV_EXPORTS_W BaseOCR
93{
94 public:
95 virtual ~BaseOCR() {};
96 virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
97 std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
98 int component_level=0) = 0;
99 virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
100 std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
101 int component_level=0) = 0;
102};
103
116 class CV_EXPORTS_W OCRTesseract : public BaseOCR
117{
118 public:
135 virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
136 std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
137 int component_level=0) CV_OVERRIDE;
138
139 virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
140 std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
141 int component_level=0) CV_OVERRIDE;
142
143 // aliases for scripting
144 CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
145
146 CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
147
148 CV_WRAP virtual void setWhiteList(const String& char_whitelist) = 0;
149
150
165 CV_WRAP static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
166 const char* char_whitelist=NULL, int oem=OEM_DEFAULT, int psmode=PSM_AUTO);
167};
168
169
170 /* OCR HMM Decoder */
171
172enum decoder_mode
173{
174 OCR_DECODER_VITERBI = 0 // Other algorithms may be added
175};
176
177 /* OCR classifier type*/
178 enum classifier_type
179{
180 OCR_KNN_CLASSIFIER = 0,
181 OCR_CNN_CLASSIFIER = 1
182};
183
191 class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR
192{
193 public:
194
204 class CV_EXPORTS_W ClassifierCallback
205 {
206 public:
207 virtual ~ClassifierCallback() { }
216 virtual void eval( InputArray image, std::vector<int>& out_class, std::vector<double>& out_confidence);
217 };
218
219 public:
241 virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
242 std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
243 int component_level=0) CV_OVERRIDE;
244
268 virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
269 std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
270 int component_level=0) CV_OVERRIDE;
271
272 // aliases for scripting
273 CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
274
275 CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
276
294 CV_WRAP static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
295 const String& vocabulary, // The language vocabulary (chars when ASCII English text)
296 // size() must be equal to the number of classes
297 InputArray transition_probabilities_table, // Table with transition probabilities between character pairs
298 // cols == rows == vocabulary.size()
299 InputArray emission_probabilities_table, // Table with observation emission probabilities
300 // cols == rows == vocabulary.size()
301 int mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment)
302
307 CV_WRAP static Ptr<OCRHMMDecoder> create(const String& filename,
308
309 const String& vocabulary, // The language vocabulary (chars when ASCII English text)
310 // size() must be equal to the number of classes
311 InputArray transition_probabilities_table, // Table with transition probabilities between character pairs
312 // cols == rows == vocabulary.size()
313 InputArray emission_probabilities_table, // Table with observation emission probabilities
314 // cols == rows == vocabulary.size()
315 int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment)
316
317 int classifier = OCR_KNN_CLASSIFIER); // The character classifier type
318protected:
319
321 std::string vocabulary;
322 Mat transition_p;
323 Mat emission_p;
324 decoder_mode mode;
325};
326
341 CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename);
342
354 CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename);
355
363 CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifier(const String& filename, int classifier);
365
379CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector<std::string>& lexicon, OutputArray transition_probabilities_table);
380
381CV_EXPORTS_W Mat createOCRHMMTransitionsTable(const String& vocabulary, std::vector<cv::String>& lexicon);
382
383
384 /* OCR BeamSearch Decoder */
385
393 class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
394{
395 public:
396
406 class CV_EXPORTS_W ClassifierCallback
407 {
408 public:
409 virtual ~ClassifierCallback() { }
418 virtual void eval( InputArray image, std::vector< std::vector<double> >& recognition_probabilities, std::vector<int>& oversegmentation );
419
420 int getWindowSize() {return 0;}
421 int getStepSize() {return 0;}
422 };
423
424 public:
446 virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
447 std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
448 int component_level=0) CV_OVERRIDE;
449
450 virtual void run(Mat& image, Mat& mask, std::string& output_text, std::vector<Rect>* component_rects=NULL,
451 std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
452 int component_level=0) CV_OVERRIDE;
453
454 // aliases for scripting
455 CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
456
457 CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
458
477 static CV_WRAP
478 Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
479 const std::string& vocabulary, // The language vocabulary (chars when ASCII English text)
480 // size() must be equal to the number of classes
481 InputArray transition_probabilities_table, // Table with transition probabilities between character pairs
482 // cols == rows == vocabulary.size()
483 InputArray emission_probabilities_table, // Table with observation emission probabilities
484 // cols == rows == vocabulary.size()
485 text::decoder_mode mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment)
486 int beam_size = 500 // Size of the beam in Beam Search algorithm
487 );
488
494 static //CV_WRAP FIXIT bug in handling of Java overloads
495 Ptr<OCRBeamSearchDecoder> create(const String& filename, // The character classifier file
496 const String& vocabulary, // The language vocabulary (chars when ASCII English text)
497 // size() must be equal to the number of classes
498 InputArray transition_probabilities_table, // Table with transition probabilities between character pairs
499 // cols == rows == vocabulary.size()
500 InputArray emission_probabilities_table, // Table with observation emission probabilities
501 // cols == rows == vocabulary.size()
502 text::decoder_mode mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment)
503 int beam_size = 500 // Size of the beam in Beam Search algorithm
504 );
505protected:
506
508 std::string vocabulary;
509 Mat transition_p;
510 Mat emission_p;
511 decoder_mode mode;
512 int beam_size;
513};
514
525CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const String& filename);
526
527
536 class CV_EXPORTS OCRHolisticWordRecognizer : public BaseOCR
537{
538 public:
539 virtual void run(Mat& image,
540 std::string& output_text,
541 std::vector<Rect>* component_rects = NULL,
542 std::vector<std::string>* component_texts = NULL,
543 std::vector<float>* component_confidences = NULL,
544 int component_level = OCR_LEVEL_WORD) CV_OVERRIDE = 0;
545
569 virtual void run(Mat& image,
570 Mat& mask,
571 std::string& output_text,
572 std::vector<Rect>* component_rects = NULL,
573 std::vector<std::string>* component_texts = NULL,
574 std::vector<float>* component_confidences = NULL,
575 int component_level = OCR_LEVEL_WORD) CV_OVERRIDE = 0;
576
579 static Ptr<OCRHolisticWordRecognizer> create(const std::string &archFilename,
580 const std::string &weightsFilename,
581 const std::string &wordsFilename);
582};
583
585
586}} // cv::text::
587
588
589 #endif // _OPENCV_TEXT_OCR_HPP_
This type is very similar to InputArray except that it is used for input/output and output function p...
Definition: mat.hpp:295
n-dimensional dense array class
Definition: mat.hpp:802
Template class for 2D rectangles
Definition: core/types.hpp:421
Definition: ocr.hpp:93
Callback with the character classifier is made a class.
Definition: ocr.hpp:407
virtual void eval(InputArray image, std::vector< std::vector< double > > &recognition_probabilities, std::vector< int > &oversegmentation)
The character classifier must return a (ranked list of) class(es) id('s)
OCRBeamSearchDecoder class provides an interface for OCR using Beam Search algorithm.
Definition: ocr.hpp:394
virtual void run(Mat &image, std::string &output_text, std::vector< Rect > *component_rects=NULL, std::vector< std::string > *component_texts=NULL, std::vector< float > *component_confidences=NULL, int component_level=0) CV_OVERRIDE
Recognize text using Beam Search.
Callback with the character classifier is made a class.
Definition: ocr.hpp:205
virtual void eval(InputArray image, std::vector< int > &out_class, std::vector< double > &out_confidence)
The character classifier must return a (ranked list of) class(es) id('s)
OCRHMMDecoder class provides an interface for OCR using Hidden Markov Models.
Definition: ocr.hpp:192
virtual void run(Mat &image, std::string &output_text, std::vector< Rect > *component_rects=NULL, std::vector< std::string > *component_texts=NULL, std::vector< float > *component_confidences=NULL, int component_level=0) CV_OVERRIDE
Recognize text using HMM.
OCRHolisticWordRecognizer class provides the functionallity of segmented wordspotting....
Definition: ocr.hpp:537
static Ptr< OCRHolisticWordRecognizer > create(const std::string &archFilename, const std::string &weightsFilename, const std::string &wordsFilename)
Creates an instance of the OCRHolisticWordRecognizer class.
virtual void run(Mat &image, Mat &mask, std::string &output_text, std::vector< Rect > *component_rects=NULL, std::vector< std::string > *component_texts=NULL, std::vector< float > *component_confidences=NULL, int component_level=OCR_LEVEL_WORD) CV_OVERRIDE=0
Recognize text using a segmentation based word-spotting/classifier cnn.
OCRTesseract class provides an interface with the tesseract-ocr API (v3.02.02) in C++.
Definition: ocr.hpp:117
virtual void run(Mat &image, std::string &output_text, std::vector< Rect > *component_rects=NULL, std::vector< std::string > *component_texts=NULL, std::vector< float > *component_confidences=NULL, int component_level=0) CV_OVERRIDE
Recognize text using the tesseract-ocr API.
page_seg_mode
Tesseract.PageSegMode Enumeration
Definition: ocr.hpp:68
CV_EXPORTS_W Ptr< OCRHMMDecoder::ClassifierCallback > loadOCRHMMClassifierNM(const String &filename)
Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
ocr_engine_mode
Tesseract.OcrEngineMode Enumeration
Definition: ocr.hpp:84
CV_EXPORTS_W Ptr< OCRHMMDecoder::ClassifierCallback > loadOCRHMMClassifierCNN(const String &filename)
Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
CV_EXPORTS_W Ptr< OCRHMMDecoder::ClassifierCallback > loadOCRHMMClassifier(const String &filename, int classifier)
Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
cv
"black box" representation of the file storage associated with a file on disk.
Definition: aruco.hpp:75
Definition: cvstd_wrapper.hpp:74