recognition.hh 8.25 KB
Newer Older
1
2
// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
// Laboratory (LRDE)
3
//
4
// This file is part of Olena.
5
//
6
7
8
9
10
// Olena is free software: you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free
// Software Foundation, version 2 of the License.
//
// Olena is distributed in the hope that it will be useful,
11
12
13
14
15
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License
16
// along with Olena.  If not, see <http://www.gnu.org/licenses/>.
17
18
//
// As a special exception, you may use this file as part of a free
19
// software project without restriction.  Specifically, if other files
20
// instantiate templates or use macros or inline functions from this
21
22
23
24
25
// file, or you compile this file and link it with other files to produce
// an executable, this file does not by itself cause the resulting
// executable to be covered by the GNU General Public License.  This
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
26
27
28
29

#ifndef SCRIBO_TEXT_RECOGNITION_HH
# define SCRIBO_TEXT_RECOGNITION_HH

30
/// \file
31
32
///
/// Passes the text bounding boxes to an OCR (Tesseract).
Guillaume Lazzara's avatar
Guillaume Lazzara committed
33
34
35
///
/// \todo For each text bbox, we create a new image. We may like to avoid that.
/// \todo Do not store the result in an image?
36

Guillaume Lazzara's avatar
Guillaume Lazzara committed
37
38
# include <ostream>

Roland Levillain's avatar
Roland Levillain committed
39
# include <mln/core/image/dmorph/image_if.hh>
40
41
# include <mln/core/concept/neighborhood.hh>
# include <mln/core/site_set/box.hh>
42

43
44
# include <mln/util/array.hh>
# include <mln/data/fill.hh>
45
# include <mln/data/paste.hh>
Guillaume Lazzara's avatar
Guillaume Lazzara committed
46
# include <mln/data/paste_without_localization.hh>
47
# include <mln/pw/all.hh>
48
49
50
51
52
53

# include <mln/core/alias/w_window2d_int.hh>
# include <mln/make/w_window2d_int.hh>

# include <mln/border/resize.hh>

54
# include <scribo/core/macros.hh>
55

Guillaume Lazzara's avatar
Guillaume Lazzara committed
56
# include <scribo/text/clean_inplace.hh>
57

58
# include <scribo/core/line_set.hh>
59
60


61
# include <tesseract/baseapi.h>
62
63


64
65
66
67
68
# if !defined HAVE_TESSERACT_2 && !defined HAVE_TESSERACT_3
#  define HAVE_TESSERACT_2
# endif


69

70
71
72
73
74
75
namespace scribo
{

  namespace text
  {

Guillaume Lazzara's avatar
Guillaume Lazzara committed
76
77
    using namespace mln;

Guillaume Lazzara's avatar
Guillaume Lazzara committed
78
    /// Passes the text bboxes to Tesseract (OCR).
79
    ///
80
    /// \param[in] lines       The lines of text.
Guillaume Lazzara's avatar
Guillaume Lazzara committed
81
82
83
84
85
    /// \param[in] language    The language which should be recognized by
    ///		               Tesseract. (fra, en, ...)
    /// \param[in] output_file If set, store the recognized text in
    ///                        this file.
    //
86
    template <typename L>
Guillaume Lazzara's avatar
Guillaume Lazzara committed
87
    void
88
    recognition(line_set<L>& lines, const char *language);
89
90
91
92
93
94
95


    /// Recognize text from an image.
    template <typename I>
    void
    recognition(const Image<I>& line,
		const char *language,
Guillaume Lazzara's avatar
Guillaume Lazzara committed
96
		const std::string& output_file = std::string());
97

98
99
100
101


# ifndef MLN_INCLUDE_ONLY

102
103

    template <typename L>
Guillaume Lazzara's avatar
Guillaume Lazzara committed
104
    void
105
    recognition(line_set<L>& lines, const char *language)
106
107
108
    {
      trace::entering("scribo::text::recognition");

109

Guillaume Lazzara's avatar
Guillaume Lazzara committed
110
      // Initialize Tesseract.
111
#  ifdef HAVE_TESSERACT_2
112
      TessBaseAPI::InitWithLanguage(NULL, NULL, language, NULL, false, 0, NULL);
113
114
115
116
117
118
119
120
#  else // HAVE_TESSERACT_3
      tesseract::TessBaseAPI tess;
      if (tess.Init(NULL, language, NULL, 0, false) == -1)
      {
	std::cout << "Error: cannot initialize tesseract!" << std::endl;
	abort();
      }
      tess.SetPageSegMode(tesseract::PSM_SINGLE_LINE);
121

122
#  endif // HAVE_TESSERACT_2
Guillaume Lazzara's avatar
Guillaume Lazzara committed
123

124
125
      typedef mln_ch_value(L,bool) I;

Guillaume Lazzara's avatar
Guillaume Lazzara committed
126

Guillaume Lazzara's avatar
Guillaume Lazzara committed
127
      /// Use text bboxes with Tesseract
128
      for_all_lines(i, lines)
129
      {
130
	if (! lines(i).is_valid() || lines(i).is_hidden() || lines(i).type() != line::Text)
131
132
133
	  continue;

	mln_domain(I) box = lines(i).bbox();
Guillaume Lazzara's avatar
Guillaume Lazzara committed
134

Guillaume Lazzara's avatar
Guillaume Lazzara committed
135
136
137
	// Make sure characters are isolated from the borders.
	// Help Tesseract.
	box.enlarge(2);
Guillaume Lazzara's avatar
Guillaume Lazzara committed
138

Guillaume Lazzara's avatar
Guillaume Lazzara committed
139
	I text_ima(box);
140
	data::fill(text_ima, false);
Guillaume Lazzara's avatar
Guillaume Lazzara committed
141
142

	// Careful : background is set to 'False'
143
	const component_set<L>& comp_set = lines.components();
144
145
	const L& lbl = comp_set.labeled_image();

Guillaume Lazzara's avatar
Guillaume Lazzara committed
146
	// Extract each character components to create the line image.
147
148
149
150
151
	const mln::util::array<component_id_t>& comps = lines(i).components();
	for_all_elements(e, lines(i).components())
	{
	  unsigned comp_id = comps(e);
	  data::fill(((text_ima | comp_set(comp_id).bbox()).rw() | (pw::value(lbl) == pw::cst(comp_id))).rw(),
152
		     true);
153
	}
154
155

	/// Improve text quality.
Guillaume Lazzara's avatar
Guillaume Lazzara committed
156
	text::clean_inplace(lines(i), text_ima);
Guillaume Lazzara's avatar
Guillaume Lazzara committed
157

Guillaume Lazzara's avatar
Guillaume Lazzara committed
158
159
	// Make sure characters are isolated from the borders.
	// Help Tesseract.
Guillaume Lazzara's avatar
Guillaume Lazzara committed
160
161
162
163
164
165
	//
	// FIXME: can be improved! We need a morpher for a constant
	// extension set to false (avoid data::fill), a morpher for
	// translating the domain to (0,0) (avoid the creation of a
	// new image), change the default border::thickness to 0 and a
	// morpher to enlarge the domain to a part of the extension.
Guillaume Lazzara's avatar
Guillaume Lazzara committed
166
167
168
169
170
	mln_domain(I) lbox = text_ima.domain();
	lbox.enlarge(lines(i).char_space() + 2);
	I line_image(lbox, 0); // Make sure there is no border!
	data::fill(line_image, false);
	data::paste_without_localization(text_ima, line_image);
Guillaume Lazzara's avatar
Guillaume Lazzara committed
171

Guillaume Lazzara's avatar
Guillaume Lazzara committed
172
	// Recognize characters.
173
#  ifdef HAVE_TESSERACT_2
Guillaume Lazzara's avatar
Guillaume Lazzara committed
174
	char* s = TessBaseAPI::TesseractRect(
Guillaume Lazzara's avatar
Guillaume Lazzara committed
175
176
177
178
179
180
181
	    (unsigned char*) line_image.buffer(),
	    sizeof (bool),			 // Pixel size.
	    line_image.ncols() * sizeof (bool),  // Row_offset
	    0,					 // Left
	    0,					 // Top
	    line_image.ncols(),		         // n cols
	    line_image.nrows());		 // n rows
182
183
184
185
186
187
188
189
190
191
#  else // HAVE_TESSERACT_3
	char* s = tess.TesseractRect(
	  (unsigned char*) line_image.buffer(),
	  sizeof (bool),			 // Pixel size.
	  line_image.ncols() * sizeof (bool),    // Row_offset
	  0,					 // Left
	  0,					 // Top
	  line_image.ncols(),		         // n cols
	  line_image.nrows());		         // n rows
#  endif // ! HAVE_TESSERACT_2
Guillaume Lazzara's avatar
Guillaume Lazzara committed
192
193

	if (s != 0)
194
	{
195
196
197
	  std::string str(s);
	  str = str.substr(0, str.length() - 2);
	  lines(i).update_text(str);
198
	}
Guillaume Lazzara's avatar
Guillaume Lazzara committed
199

200
	// The string has been allocated by Tesseract. It must be released.
201
	delete [] s;
202
203
204
205
206
207
      }

      trace::exiting("scribo::text::recognition");
    }


208
209
210
211
    template <typename I>
    void
    recognition(const Image<I>& line_,
		const char *language,
Guillaume Lazzara's avatar
Guillaume Lazzara committed
212
		const std::string& output_file = std::string())
213
214
215
216
217
218
219
    {
      trace::entering("scribo::text::recognition");

      const I& line = exact(line_);
      mln_precondition(line.is_valid());

      // Initialize Tesseract.
220
#  ifdef HAVE_TESSERACT_2
221
      TessBaseAPI::InitWithLanguage(NULL, NULL, language, NULL, false, 0, NULL);
222
223
224
225
226
227
228
229
#  else // HAVE_TESSERACT_3
      tesseract::TessBaseAPI tess;
      if (tess.Init(NULL, language, NULL, 0, false) == -1)
      {
	std::cout << "Error: cannot initialize tesseract!" << std::endl;
	abort();
      }
#  endif // ! HAVE_TESSERACT_2
230
231

      std::ofstream file;
232
233
      if (!output_file.empty())
	file.open(output_file.c_str());
234
235
236
237
238
239
240
241
242
243
244
245
246
247

      mln_domain(I) box = line.domain();
      // Make sure characters are isolated from the borders.
      // Help Tesseract.
      box.enlarge(2);

      I text_ima(box);
      data::fill(text_ima, false);
      data::paste(line, text_ima);

      // Make sure there is no border.
      border::resize(text_ima, 0);

      // Recognize characters.
248
#  ifdef HAVE_TESSERACT_2
249
250
251
252
253
254
255
256
      char* s = TessBaseAPI::TesseractRect(
	(unsigned char*) text_ima.buffer(),
	sizeof (bool),			  // Pixel size.
	text_ima.ncols() * sizeof (bool), // Row_offset
	0,				  // Left
	0,				  // Top
	text_ima.ncols(),		  // n cols
	text_ima.nrows());		  // n rows
257
258
259
260
261
262
263
264
265
266
#  else // HAVE_TESSERACT_3
      char* s = tess.TesseractRect(
	(unsigned char*) text_ima.buffer(),
	sizeof (bool),			  // Pixel size.
	text_ima.ncols() * sizeof (bool), // Row_offset
	0,				  // Left
	0,				  // Top
	text_ima.ncols(),		  // n cols
	text_ima.nrows());		  // n rows
#  endif // ! HAVE_TESSERACT_2
267
268
269

	if (s != 0)
	{
270
	  if (!output_file.empty())
271
272
273
	  {
	    std::string str(s);
	    str = str.substr(0, str.length() - 1);
274
275
276
277
278
279
280
281
282
	    file << line.domain().bbox().pmin().row()
		 << " "
		 << line.domain().bbox().pmin().col()
		 << " "
		 << line.domain().bbox().pmax().row()
		 << " "
		 << line.domain().bbox().pmax().col()
		 << " "
		 << str;
283
	  }
284
285
286
	}

	// The string has been allocated by Tesseract. We must free it.
287
	delete [] s;
288

289
	if (!output_file.empty())
290
291
292
293
294
295
296
	  file.close();

	trace::exiting("scribo::text::recognition");
    }



297
298
299
300
301
302
303
# endif // ! MLN_INCLUDE_ONLY

  } // end of namespace scribo::text

} // end of namespace scribo

#endif // ! SCRIBO_TEXT_RECOGNITION_HH