📄 separate_words.cpp

📁 一个很好的贝叶斯分类器
💻 CPP
字号:

#include "stdafx.h"
#include"BeyesClassifier.h"

vector<string> *separate_words( const vector<string> *text_file )

{

       string filter("abcdefghijklmnopqrstuvwxy");
		 //包含独立的单词集合

       vector<string> *words = new vector<string>;

       short line_pos = 0;

       for ( ; line_pos<text_file->size(); ++line_pos )
       //一行行处理！
       {

              string textline = (*text_file)[line_pos];

             

              //  用来遍历所有的字母

              string::size_type pos      = 0;

              //  单词的开始位置

              string::size_type prev_pos = textline.find_first_of(filter);

              //  单词末尾的下一空格位置

              string::size_type temp_pos = textline.find_first_of(filter); 

              //  一个小开关，其值为TURE时，prev_pos指向单词开始的位置

              bool onoff = false;

              while ( (pos = textline.find_first_of(filter, pos))

                     != string::npos )

              {

                     if ( onoff )

                     {

                            prev_pos = temp_pos - 1;

                            //  将onoff值改为false，使单词开始的位置不会改变

                            onoff = false;

                     }

                     ++pos;

                    

                     if ( (pos - temp_pos) != 1 )

                     {

                            //  为下一次的赋值做准备

                            onoff = true;

                            //  将分离出的单词输入words

                            words->push_back(

                                   textline.substr( prev_pos, temp_pos - prev_pos ));

                     }

                    

                     temp_pos = pos;

              }    // end of while

             

              //  输入最后一个单词，除非这一段没有找到任何字母

              if ( prev_pos != string::npos )

              {

                     words->push_back(

                            textline.substr( prev_pos, temp_pos - prev_pos ));

              }

       }  //  end of for  

       return words;

}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -