📄 wseg.cc
字号:
#include <SLList.h>#include <stdlib.h>#include "HWRawDataC.hh"// Redefine these to change word segmentation guidlines#define LGE_LENGTH 200 // length of a box#define MAX_THEIGHT 35 // maximum allowed height of a T cross box#define MIN_TLENGTH 65 // minimum allowed Tcross box length#define SMALL_AREA 150 //area of a box#define MAX_IDOT_POINTS 5 // idots are made quickly#define DEFAULT_SMALL_HGAP 25 // horizontal gap between 2 boxes#define SMALL_VGAP 200 // vertical gap between 2 boxesint Area(long XMax, long XMin, long YMax, long YMin);int Classify(struct Box PWBox,struct Box CWBox);long DeltaX(long CXMax, long CXMin, long PXMax, long PXMin);long DeltaY(long CYMax, long CYMin, long PYMax, long PYMin);int GetIntNum(struct Box PWBox,struct Box CWBox);int IsIdot(struct Box CBox);int IsTstroke(struct Box CBox);struct Box JoinBoxes(struct Box PrevWordBox,struct Box WordBox);void Write_Recs(HWRawDataC recs[],int index);struct Box { long XMin,XMax,YMin,YMax; int Points;};int small_hgap = DEFAULT_SMALL_HGAP;int main(int argc, char *argv[]){ HWHeaderC Header; cin >> Header; cout << Header; struct Box WordBox = {32000,0,32000,0,0},PrevWordBox = {0,0,0,0,0}; SLList<HWDataPointC> Points; int PenUp = 1, ClassType, index =0; long LastXPos = 0; HWRawDataC Last0Record; HWRawDataC ThisRecord; HWRawDataC recs[5000]; if (argc ==2) {small_hgap = atoi(argv[1]);} HWRawDataC WordBoundRecord, GraphBoundRecord; WordBoundRecord.Type = 4; while (ThisRecord.Type != 5) // stop at EOF record { if (!cin) break; cin >> ThisRecord; // Pen Down Event so Classify it if (ThisRecord.Type == 2) { ClassType = Classify(PrevWordBox,WordBox); switch (ClassType) { case END_WORDBODY : // End of a Word body break // Possible word break. Wont know for sure until find the // next WORDFRAG or WORDBODY. // This is definitely the first part of a new word so // start a new box. // But we can print out recs & reset the index. PrevWordBox = WordBox; WordBox.XMin = WordBox.YMin = 32000; WordBox.XMax = WordBox.YMax = 0; WordBox.Points = 0; Write_Recs(recs,index); index = 0; GraphBoundRecord.Type = END_WORDBODY; GraphBoundRecord.Time = ThisRecord.Time; recs[index++] = GraphBoundRecord; break; case END_WORDFRAG : // End of an Intraword Break // The last WORDBODY was really a WORDFRAG so change it. // Now we can print it out to file. // Then we have to extend the Pbox to include this new piece. // Finally start a new box. recs[0].Type = END_WORDFRAG; Write_Recs(recs,index); index = 0; PrevWordBox = JoinBoxes(PrevWordBox,WordBox); WordBox.XMin = WordBox.YMin = 32000; WordBox.XMax = WordBox.YMax = 0; WordBox.Points = 0; GraphBoundRecord.Time = ThisRecord.Time; GraphBoundRecord.Type = END_WORDBODY; recs[index++] = GraphBoundRecord; break; case END_TCROSS : // End of a T cross GraphBoundRecord.Time = ThisRecord.Time; GraphBoundRecord.Type = END_TCROSS; recs[index++] = GraphBoundRecord; WordBox.XMin = WordBox.YMin = 32000; WordBox.XMax = WordBox.YMax = 0; WordBox.Points = 0; break; case END_IDOT : // End of an I(J) dot GraphBoundRecord.Time = ThisRecord.Time; GraphBoundRecord.Type = END_IDOT; recs[index++] = GraphBoundRecord; WordBox.XMin = WordBox.YMin = 32000; WordBox.XMax = WordBox.YMax = 0; WordBox.Points = 0; break; case END_PUNCT : // End of a punctuation mark GraphBoundRecord.Time = ThisRecord.Time; GraphBoundRecord.Type = END_PUNCT; recs[index++] = GraphBoundRecord; WordBox.XMin = WordBox.YMin = 32000; WordBox.XMax = WordBox.YMax = 0; WordBox.Points = 0; break; case END_STROKEOUT : // End of an overstroke GraphBoundRecord.Time = ThisRecord.Time; GraphBoundRecord.Type = END_STROKEOUT; recs[index++] = GraphBoundRecord; WordBox.XMin = WordBox.YMin = 32000; WordBox.XMax = WordBox.YMax = 0; WordBox.Points = 0; break; default : // Unknown stroke GraphBoundRecord.Time = ThisRecord.Time; GraphBoundRecord.Type = END_UNKNOWN; recs[index++] = GraphBoundRecord; WordBox.XMin = WordBox.YMin = 32000; WordBox.XMax = WordBox.YMax = 0; WordBox.Points = 0; break; } // end switch } // end if if (ThisRecord.Type == 1 || ThisRecord.Type == 2) { // keep track if pen is up or down. PenUp = ThisRecord.Type-1; } // Output the record recs[index++] = ThisRecord; if (ThisRecord.Type == 0) { Last0Record = ThisRecord; } if (PenUp == 0 && ThisRecord.Type == 0) { WordBox.Points++; if (ThisRecord.DataPoint.XPos < WordBox.XMin) { WordBox.XMin = ThisRecord.DataPoint.XPos; } if (ThisRecord.DataPoint.XPos > WordBox.XMax) { WordBox.XMax = ThisRecord.DataPoint.XPos; } if (ThisRecord.DataPoint.YPos < WordBox.YMin) { WordBox.YMin = ThisRecord.DataPoint.YPos; } if (ThisRecord.DataPoint.YPos > WordBox.YMax) { WordBox.YMax = ThisRecord.DataPoint.YPos;} } } recs[index++] = ThisRecord; Write_Recs(recs,index); return 0;}int Classify(struct Box PWBox,struct Box CWBox) { int IntNum; // PWSBox is PreviousWordStretchedBox, CWSBox is CurrentW... struct Box PWSBox, CWSBox; long PLenStretch, PWidthStretch, CLenStretch, CWidthStretch; float XPstretch=0.0, YPstretch=0.0, XCstretch=0.0, YCstretch=0.0; // stretch values PLenStretch = (long)(XPstretch * (CWBox.XMax - CWBox.XMin)); PWidthStretch = (long)(YPstretch * (CWBox.YMax - CWBox.YMin)); CLenStretch = (long)(XCstretch * (CWBox.XMax - CWBox.XMin)); CWidthStretch = (long)(YCstretch * (CWBox.YMax - CWBox.YMin)); //Stretched Boxes PWSBox.XMin = PWBox.XMin - PLenStretch; PWSBox.XMax = PWBox.XMax + PLenStretch; PWSBox.YMin = PWBox.YMin - PWidthStretch; PWSBox.YMax = PWBox.YMax + PWidthStretch; PWSBox.Points = PWBox.Points; CWSBox.XMin = CWBox.XMin - CWidthStretch; CWSBox.XMax = CWBox.XMax + CWidthStretch; CWSBox.YMin = CWBox.YMin - CWidthStretch; CWSBox.YMax = CWBox.YMax + CWidthStretch; CWSBox.Points = CWBox.Points; //Get the intersection number and switch based on it IntNum = GetIntNum(PWSBox,CWSBox); switch(IntNum) { case 0: if ((CWSBox.XMin < PWSBox.XMin && CWSBox.XMax > PWSBox.XMax) && (CWSBox.YMin < PWSBox.YMin && CWSBox.YMax > PWSBox.YMax)) // PBox is entirely inside CBox { return END_STROKEOUT ; } else if (DeltaX(CWSBox.XMax,CWSBox.XMin,PWSBox.XMax,PWSBox.XMin) < small_hgap) {return END_WORDFRAG;} else { return END_WORDBODY;}; break; case 1 : if (Area(CWSBox.XMax,CWSBox.XMin,CWSBox.YMax,CWSBox.YMin) < SMALL_AREA) { return END_PUNCT;} else { if (CWSBox.XMin - PWSBox.XMax < small_hgap) { return END_WORDFRAG;} else { return END_WORDBODY; }; } break; case 2 : // if ((PWSBox.XMin - CWSBox.XMax < small_hgap) && (Area(CWSBox.XMax,CWSBox.XMin,CWSBox.YMax,CWSBox.YMin) < SMALL_AREA)) { return END_PUNCT;} // probably a quote else { if (DeltaX(CWSBox.XMax,CWSBox.XMin,PWSBox.XMax,PWSBox.XMin) < small_hgap) { return END_WORDFRAG;} else { return END_WORDBODY; }; } break; case 3 : // if (CWSBox.XMin > PWSBox.XMax) // Current box is after Prev box { if (IsIdot(CWSBox)) {return END_PUNCT;} else if (CWSBox.XMin - PWSBox.XMax < small_hgap) {return END_WORDFRAG;} else {return END_WORDBODY;} } else if (PWSBox.XMin - CWSBox.XMax < small_hgap) // Prev box is after Curr box {return END_WORDFRAG;} else {return END_WORDBODY;} break; case 4 : // if (DeltaY(CWSBox.YMax,CWSBox.YMin,PWSBox.YMax,PWSBox.YMin) < SMALL_VGAP) {return END_WORDFRAG;} else {return END_WORDBODY;} break; case 5 : // if (IsTstroke(CWSBox)) {return END_TCROSS;} else if (CWSBox.XMax - CWSBox.XMin > LGE_LENGTH) {return END_WORDBODY;} else {return END_WORDFRAG;} break; case 6 : // if (CWSBox.XMax - CWSBox.XMin > LGE_LENGTH) {return END_WORDBODY;} else {return END_WORDFRAG;} break; case 7 : // if (CWSBox.YMax - CWSBox.YMin > MAX_THEIGHT) if (CWSBox.XMax - CWSBox.XMin < LGE_LENGTH) {return END_WORDFRAG;} else {return END_WORDBODY;} else if (IsTstroke(CWSBox)) {return END_TCROSS;} else {return END_WORDFRAG;} break; case 8 : // if (DeltaY(CWSBox.YMax,CWSBox.YMin,PWSBox.YMax,PWSBox.YMin) < SMALL_VGAP) {return END_WORDFRAG;} else {return END_WORDBODY;} break; case 9 : // if (Area(CWSBox.XMax,CWSBox.XMin,CWSBox.YMax,CWSBox.YMin) < SMALL_AREA) {return END_PUNCT;} else if (IsTstroke(CWSBox)) {return END_TCROSS;} else if (CWSBox.XMax - CWSBox.XMin > LGE_LENGTH) {return END_WORDBODY;} else {return END_WORDFRAG;} break; case 10 : // if (Area(CWSBox.XMax,CWSBox.XMin,CWSBox.YMax,CWSBox.YMin) < SMALL_AREA) {return END_PUNCT;} else if (CWSBox.XMax - CWSBox.XMin > LGE_LENGTH) {return END_WORDBODY;} else {return END_WORDFRAG;} break; case 11 : // if (CWSBox.XMax - CWSBox.XMin > LGE_LENGTH) {return END_WORDBODY;} else if (IsTstroke(CWSBox)) {return END_TCROSS;} else {return END_WORDFRAG;} break; case 12 : // if (PWSBox.YMax < CWSBox.YMin) // Prev on top of Current if (PWSBox.YMax - CWSBox.YMin < SMALL_VGAP) {return END_WORDFRAG;} else {return END_WORDBODY;} else if (IsIdot(CWSBox)) {return END_IDOT;} else {return END_UNKNOWN;} break; case 13 : // if (IsTstroke(CWSBox)) {return END_TCROSS;} else {return END_WORDFRAG;} break; case 14 : // return END_WORDFRAG; break; case 15 : // if (IsTstroke(CWSBox)) {return END_TCROSS;} else if (IsIdot(CWSBox)) {return END_IDOT;} else {return END_WORDFRAG;} break; default: return 0; } //end switch } //end Classifyint GetIntNum(struct Box PWBox,struct Box CWBox){ int x = 0; if ((CWBox.XMin > PWBox.XMin) && (CWBox.XMin < PWBox.XMax)) {x = x+8;} if ((CWBox.XMax > PWBox.XMin) && (CWBox.XMax < PWBox.XMax)) {x = x+4;} if ((CWBox.YMin > PWBox.YMin) && (CWBox.YMin < PWBox.YMax)) {x = x+2;} if ((CWBox.YMax > PWBox.YMin) && (CWBox.YMax < PWBox.YMax)) {x = x+1;} return x;}int Area(long XMax, long XMin, long YMax, long YMin){ return (int)((XMax-XMin)*(YMax-YMin)); }long DeltaX(long CXMax, long CXMin, long PXMax, long PXMin){ if (CXMin > PXMax) // Curr is to right of Prev { return CXMin - PXMax;} else // Prev is to right of Curr {return PXMin - CXMax;}}long DeltaY(long CYMax, long CYMin, long PYMax, long PYMin){ if (CYMax > PYMin) // Curr is on top of Prev { return CYMax - PYMin;} else // Prev is on top of Curr {return PYMax - CYMin;}}void Write_Recs(HWRawDataC recs[],int index){ int i; for(i=0; i<index; i++) {cout << recs[i];}}struct Box JoinBoxes(struct Box PBox,struct Box CBox){ struct Box NewBox; if (PBox.XMin < CBox.XMin) NewBox.XMin = PBox.XMin; else NewBox.XMin = CBox.XMin; if (PBox.XMax > CBox.XMax) NewBox.XMin = PBox.XMax; else NewBox.XMax = CBox.XMax; if (PBox.YMin < CBox.YMin) NewBox.YMin = PBox.YMin; else NewBox.YMin = CBox.YMin; if (PBox.YMax > CBox.YMax) NewBox.YMax = PBox.YMax; else NewBox.YMax = CBox.YMax; return NewBox;}int IsTstroke(struct Box TBox){ if ((TBox.YMax - TBox.YMin < MAX_THEIGHT) && (TBox.XMax - TBox.XMin > MIN_TLENGTH)) {return 1;} else {return 0;}}int IsIdot(struct Box CBox){ if ((Area(CBox.XMax,CBox.XMin,CBox.YMax,CBox.YMin) < SMALL_AREA) || (CBox.Points <= MAX_IDOT_POINTS)) {return 1;} else {return 0;}}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -