Файл src/flipdetect.c

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "allheaders.h"

Функции

static void pixDebugFlipDetect (const char *filename, PIX *pixs, PIX *pixhm, l_int32 enable)
l_int32 pixOrientDetect (PIX *pixs, l_float32 *pupconf, l_float32 *pleftconf, l_int32 mincount, l_int32 debug)
l_int32 makeOrientDecision (l_float32 upconf, l_float32 leftconf, l_float32 minupconf, l_float32 minratio, l_int32 *porient, l_int32 debug)
l_int32 pixUpDownDetect (PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 debug)
l_int32 pixUpDownDetectGeneral (PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 npixels, l_int32 debug)
l_int32 pixOrientDetectDwa (PIX *pixs, l_float32 *pupconf, l_float32 *pleftconf, l_int32 mincount, l_int32 debug)
l_int32 pixUpDownDetectDwa (PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 debug)
l_int32 pixUpDownDetectGeneralDwa (PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 npixels, l_int32 debug)
l_int32 pixMirrorDetect (PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 debug)
l_int32 pixMirrorDetectDwa (PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 debug)

Переменные

static const char * textsel1
static const char * textsel2
static const char * textsel3
static const char * textsel4
static const l_int32 DEFAULT_MIN_UP_DOWN_COUNT = 70
static const l_float32 DEFAULT_MIN_UP_DOWN_CONF = 7.0
static const l_float32 DEFAULT_MIN_UP_DOWN_RATIO = 2.5
static const l_int32 DEFAULT_MIN_MIRROR_FLIP_COUNT = 100
static const l_float32 DEFAULT_MIN_MIRROR_FLIP_CONF = 5.0

Функции

l_int32 makeOrientDecision ( l_float32  upconf,
l_float32  leftconf,
l_float32  minupconf,
l_float32  minratio,
l_int32 porient,
l_int32  debug 
)

makeOrientDecision()

Input: upconf (nonzero) leftconf (nonzero) minupconf (minimum value for which a decision can be made) minratio (minimum conf ratio required for a decision) &orient (<return> text orientation enum {0,1,2,3,4}) debug (1 for debug output; 0 otherwise) Return: 0 if OK, 1 on error

Notes: (1) This can be run after pixOrientDetect() (2) Both upconf and leftconf must be nonzero; otherwise the orientation cannot be determined. (3) The abs values of the input confidences are compared to minupconf. (4) The abs value of the largest of (upconf/leftconf) and (leftconf/upconf) is compared with minratio. (5) Input 0.0 for the default values for minupconf and minratio. (6) The return value of orient is interpreted thus: L_TEXT_ORIENT_UNKNOWN: not enough evidence to determine L_TEXT_ORIENT_UP: text rightside-up L_TEXT_ORIENT_LEFT: landscape, text up facing left L_TEXT_ORIENT_DOWN: text upside-down L_TEXT_ORIENT_RIGHT: landscape, text up facing right

static void pixDebugFlipDetect ( const char *  filename,
PIX pixs,
PIX pixhm,
l_int32  enable 
) [static]

l_int32 pixMirrorDetect ( PIX pixs,
l_float32 pconf,
l_int32  mincount,
l_int32  debug 
)

pixMirrorDetect()

Input: pixs (1 bpp, deskewed, English text) &conf (<return> confidence that text is not LR mirror reversed) mincount (min number of left + right; use 0 for default) debug (1 for debug output; 0 otherwise) Return: 0 if OK, 1 on error

Notes: (1) For this test, it is necessary that the text is horizontally oriented, with ascenders going up. (2) conf is the normalized difference between the number of right and left facing characters with ascenders. Left-facing are {d}; right-facing are {b, h, k}. At least that was the expectation. In practice, we can really just say that it is the normalized difference in hits using two specific hit-miss filters, textsel1 and textsel2, after the image has been suitably pre-filtered so that these filters are effective. See (4) for what's really happening. (3) A large positive conf value indicates normal text, whereas a large negative conf value means the page is mirror reversed. (4) The implementation is a bit tricky. The general idea is to fill the x-height part of characters, but not the space between them, before doing the HMT. This is done by finding pixels added using two different operations -- a horizontal close and a vertical dilation -- and adding the intersection of these sets to the original. It turns out that the original intuition about the signal was largely in error: much of the signal for right-facing characters comes from the lower part of common x-height characters, like the e and c, that remain open after these operations. So it's important that the operations to close the x-height parts of the characters are purposely weakened sufficiently to allow these characters to remain open. The wonders of morphology!

l_int32 pixMirrorDetectDwa ( PIX pixs,
l_float32 pconf,
l_int32  mincount,
l_int32  debug 
)

pixMirrorDetectDwa()

Input: pixs (1 bpp, deskewed, English text) &conf (<return> confidence that text is not LR mirror reversed) mincount (min number of left + right; use 0 for default) debug (1 for debug output; 0 otherwise) Return: 0 if OK, 1 on error

Notes: (1) We assume the text is horizontally oriented, with ascenders going up. (2) See notes in pixMirrorDetect().

l_int32 pixOrientDetect ( PIX pixs,
l_float32 pupconf,
l_float32 pleftconf,
l_int32  mincount,
l_int32  debug 
)

pixOrientDetect()

Input: pixs (1 bpp, deskewed, English text, 150 - 300 ppi) &upconf (<optional return>=""> ; may be null) &leftconf (<optional return>=""> ; may be null) mincount (min number of up + down; use 0 for default) debug (1 for debug output; 0 otherwise) Return: 0 if OK, 1 on error

Notes: (1) See "Measuring document image skew and orientation" Dan S. Bloomberg, Gary E. Kopec and Lakshmi Dasari IS&T/SPIE EI'95, Conference 2422: Document Recognition II pp 302-316, Feb 6-7, 1995, San Jose, CA (2) upconf is the normalized difference between up ascenders and down ascenders. The image is analyzed without rotation for being rightside-up or upside-down. Set &upconf to null to skip this operation. (3) leftconf is the normalized difference between up ascenders and down ascenders in the image after it has been rotated 90 degrees clockwise. With that rotation, ascenders projecting to the left in the source image will project up in the rotated image. We compute this by rotating 90 degrees clockwise and testing for up and down ascenders. Set &leftconf to null to skip this operation. (4) Note that upconf and leftconf are not linear measures of confidence, e.g., in a range between 0 and 100. They measure how far you are out on the tail of a (presumably) normal distribution. For example, a confidence of 10 means that it is nearly certain that the difference did not happen at random. However, these values must be interpreted cautiously, taking into consideration the estimated prior for a particular orientation or mirror flip. The up-down signal is very strong if applied to text with ascenders up and down, and relatively weak for text at 90 degrees, but even at 90 degrees, the difference can look significant. For example, suppose the ascenders are oriented horizontally, but the test is done vertically. Then upconf can be < -MIN_CONF_FOR_UP_DOWN, suggesting the text may be upside-down. However, if instead the test were done horizontally, leftconf will be very much larger (in absolute value), giving the correct orientation. (5) If you compute both upconf and leftconf, and there is sufficient signal, the following table determines the cw angle necessary to rotate pixs so that the text is rightside-up: 0 deg : upconf >> 1, abs(upconf) >> abs(leftconf) 90 deg : leftconf >> 1, abs(leftconf) >> abs(upconf) 180 deg : upconf << -1, abs(upconf) >> abs(leftconf) 270 deg : leftconf << -1, abs(leftconf) >> abs(upconf) (6) One should probably not interpret the direction unless there are a sufficient number of counts for both orientations, in which case neither upconf nor leftconf will be 0.0. (7) Uses rasterop implementation of HMT.

l_int32 pixOrientDetectDwa ( PIX pixs,
l_float32 pupconf,
l_float32 pleftconf,
l_int32  mincount,
l_int32  debug 
)

pixOrientDetectDwa()

Input: pixs (1 bpp, deskewed, English text) &upconf (<optional return>=""> ; may be null) &leftconf (<optional return>=""> ; may be null) mincount (min number of up + down; use 0 for default) debug (1 for debug output; 0 otherwise) Return: 0 if OK, 1 on error

Notes: (1) Same interface as for pixOrientDetect(). See notes there for usage. (2) Uses auto-gen'd code for the Sels defined at the top of this file, with some renaming of functions. The auto-gen'd code is in fliphmtgen.c, and can be generated by a simple executable; see prog/flipselgen.c. (3) This runs about 2.5 times faster than the pixOrientDetect().

l_int32 pixUpDownDetect ( PIX pixs,
l_float32 pconf,
l_int32  mincount,
l_int32  debug 
)

pixUpDownDetect()

Input: pixs (1 bpp, deskewed, English text, 150 - 300 ppi) &conf (<return> confidence that text is rightside-up) mincount (min number of up + down; use 0 for default) debug (1 for debug output; 0 otherwise) Return: 0 if OK, 1 on error

Notes: (1) Special (typical, slightly faster) case, where the pixels identified through the HMT (hit-miss transform) are not clipped by a truncated word mask pixm. See pixOrientDetect() and pixUpDownDetectGeneral() for details. (2) The returned confidence is the normalized difference between the number of detected up and down ascenders, assuming that the text is either rightside-up or upside-down and not rotated at a 90 degree angle.

l_int32 pixUpDownDetectDwa ( PIX pixs,
l_float32 pconf,
l_int32  mincount,
l_int32  debug 
)

pixUpDownDetectDwa()

Input: pixs (1 bpp, deskewed, English text, 150 - 300 ppi) &conf (<return> confidence that text is rightside-up) mincount (min number of up + down; use 0 for default) debug (1 for debug output; 0 otherwise) Return: 0 if OK, 1 on error

Notes: (1) Faster (DWA) version of pixUpDownDetect(). (2) This is a special case (but typical and slightly faster) of pixUpDownDetectGeneralDwa(), where the pixels identified through the HMT (hit-miss transform) are not clipped by a truncated word mask pixm. See pixUpDownDetectGeneral() for usage and other details. (3) The returned confidence is the normalized difference between the number of detected up and down ascenders, assuming that the text is either rightside-up or upside-down and not rotated at a 90 degree angle.

l_int32 pixUpDownDetectGeneral ( PIX pixs,
l_float32 pconf,
l_int32  mincount,
l_int32  npixels,
l_int32  debug 
)

pixUpDownDetectGeneral()

Input: pixs (1 bpp, deskewed, English text, 150 - 300 ppi) &conf (<return> confidence that text is rightside-up) mincount (min number of up + down; use 0 for default) npixels (number of pixels removed from each side of word box) debug (1 for debug output; 0 otherwise) Return: 0 if OK, 1 on error

Notes: (1) See pixOrientDetect() for other details. (2) is the normalized difference between the number of detected up and down ascenders, assuming that the text is either rightside-up or upside-down and not rotated at a 90 degree angle. (3) The typical mode of operation is == 0. If > 0, this removes HMT matches at the beginning and ending of "words." This is useful for pages that may have mostly digits, because if npixels == 0, leading "1" and "3" digits can register as having ascenders or descenders, and "7" digits can match descenders. Consequently, a page image of only digits may register as being upside-down. (4) We want to count the number of instances found using the HMT. An expensive way to do this would be to count the number of connected components. A cheap way is to do a rank reduction cascade that reduces each component to a single pixel, and results (after two or three 2x reductions) in one pixel for each of the original components. After the reduction, you have a much smaller pix over which to count pixels. We do only 2 reductions, because this function is designed to work for input pix between 150 and 300 ppi, and an 8x reduction on a 150 ppi image is going too far -- components will get merged.

l_int32 pixUpDownDetectGeneralDwa ( PIX pixs,
l_float32 pconf,
l_int32  mincount,
l_int32  npixels,
l_int32  debug 
)

pixUpDownDetectGeneralDwa()

Input: pixs (1 bpp, deskewed, English text) &conf (<return> confidence that text is rightside-up) mincount (min number of up + down; use 0 for default) npixels (number of pixels removed from each side of word box) debug (1 for debug output; 0 otherwise) Return: 0 if OK, 1 on error

Notes: (1) See the notes in pixUpDownDetectGeneral() for usage.


Переменные

const l_int32 DEFAULT_MIN_MIRROR_FLIP_COUNT = 100 [static]

const l_float32 DEFAULT_MIN_UP_DOWN_CONF = 7.0 [static]

const l_int32 DEFAULT_MIN_UP_DOWN_COUNT = 70 [static]

const l_float32 DEFAULT_MIN_UP_DOWN_RATIO = 2.5 [static]

const char* textsel1 [static]

Инициализатор

 "x  oo "
                              "x oOo "
                              "x  o  "
                              "x     "
                              "xxxxxx"

const char* textsel2 [static]

Инициализатор

 " oo  x"
                              " oOo x"
                              "  o  x"
                              "     x"
                              "xxxxxx"

const char* textsel3 [static]

Инициализатор

 "xxxxxx"
                              "x     "
                              "x  o  "
                              "x oOo "
                              "x  oo "

const char* textsel4 [static]

Инициализатор

 "xxxxxx"
                              "     x"
                              "  o  x"
                              " oOo x"
                              " oo  x"


Документация по Leptonica. Последние изменения: Fri Aug 7 20:31:35 2009. Создано системой  doxygen 1.5.9