htmlstripper.cpp

来自「Think in C++ 2nd」· C++ 代码 · 共 50 行

CPP
50
字号
//: C17:HTMLStripper.cpp

// From Thinking in C++, 2nd Edition

// Available at http://www.BruceEckel.com

// (c) Bruce Eckel 1999

// Copyright notice in Copyright.txt

// Filter to remove html tags and markers

#include "../require.h"

#include <fstream>

#include <iostream>

#include <string>

using namespace std;



string replaceAll(string s, string f, string r) {

  unsigned int found = s.find(f);

  while(found != string::npos) {

    s.replace(found, f.length(), r);

    found = s.find(f);

  }

  return s;

}



string stripHTMLTags(string s) {

  while(true) {

    unsigned int left = s.find('<');

    unsigned int right = s.find('>');

    if(left==string::npos || right==string::npos)

      break;

    s = s.erase(left, right - left + 1);

  }

  s = replaceAll(s, "&lt;", "<");

  s = replaceAll(s, "&gt;", ">");

  s = replaceAll(s, "&amp;", "&");

  s = replaceAll(s, "&nbsp;", " ");

  // Etc...

  return s;

}



int main(int argc, char* argv[]) {

  requireArgs(argc, 1, 

    "usage: HTMLStripper InputFile");

  ifstream in(argv[1]);

  assure(in, argv[1]);

  const int sz = 4096;

  char buf[sz];

  while(in.getline(buf, sz)) {

    string s(buf);

    cout << stripHTMLTags(s) << endl;

  }

} ///:~

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?