⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 html.h

📁 利用IE接口分析HTML文件
💻 H
字号:
/*
    Implement an HTML parser using IE4's IHTMLDocument2 interface.
*/

#ifndef __HTML_H__
#define __HTML_H__

#include <windows.h>
#include <string>

// if we are using VC6 or better get this from the stock include
// directory, otherwise get it from the Internet SDK
#if _MSC_VER >= 1200
#pragma warning(disable:4099)   // disable spurious namespace warnings
#include <mshtmdid.h>
#else
#include "./inetsdk/include/mshtmdid.h"
#endif

#import "mshtml.dll" named_guids no_namespace


using namespace std;



#define WM_USER_LOAD_COMPLETE   WM_USER+1


class HTMLParser: public IPropertyNotifySink, IOleClientSite, IDispatch
{

    public:
		static HTMLParser *Create();	// forces dynamic allocation
        STDMETHOD_(ULONG, Release)(); 

        BOOL LoadHTMLFile(LPCSTR pcszFile);

        long GetLinkCount();
        BOOL GetLinkURL(long lIndex, string &rstrURL);

        long GetImageCount();
        BOOL GetImageURL(long lIndex, string &rstrURL);

		BOOL IsConnected() const { return SUCCEEDED(m_hrConnected); }

	protected:

		// hidden constructors/destructor to force use of Create/Release
        HTMLParser(); 
		HTMLParser(const HTMLParser &); // eliminate compiler synthesized copy ctor
        virtual ~HTMLParser();

	 // IUnknown methods
        STDMETHOD(QueryInterface)(REFIID riid, LPVOID* ppv);
        STDMETHOD_(ULONG, AddRef)();

    // IPropertyNotifySink methods
        STDMETHOD(OnChanged)(DISPID dispID);
        STDMETHOD(OnRequestEdit)(DISPID dispID) { return NOERROR; }

	    // IOleClientSite methods
        STDMETHOD(SaveObject)(void) 
            { return E_NOTIMPL; }
       	STDMETHOD(GetMoniker)(DWORD dwAssign,DWORD dwWhichMoniker, IMoniker** ppmk)
			{ return E_NOTIMPL; }
    	STDMETHOD(GetContainer)(IOleContainer** ppContainer)
			{ return E_NOTIMPL; }
	    STDMETHOD(ShowObject)(void)
			{ return E_NOTIMPL; }
        STDMETHOD(OnShowWindow)(BOOL fShow)
			{ return E_NOTIMPL; }
        STDMETHOD(RequestNewObjectLayout)(void)
			{ return E_NOTIMPL; }

        	// IDispatch method
    	STDMETHOD(GetTypeInfoCount)(UINT* pctinfo)
			{ return E_NOTIMPL; }
    	STDMETHOD(GetTypeInfo)(UINT iTInfo,
            LCID lcid,
            ITypeInfo** ppTInfo)
			{ return E_NOTIMPL; }
    	STDMETHOD(GetIDsOfNames)(REFIID riid,
            LPOLESTR* rgszNames,
            UINT cNames,
            LCID lcid,
            DISPID* rgDispId)
			{ return E_NOTIMPL; }
    	STDMETHOD(Invoke)(DISPID dispIdMember,
            REFIID riid,
            LCID lcid,
            WORD wFlags,
            DISPPARAMS __RPC_FAR *pDispParams,
            VARIANT __RPC_FAR *pVarResult,
            EXCEPINFO __RPC_FAR *pExcepInfo,
            UINT __RPC_FAR *puArgErr);


		// helper functions
        BOOL GetURLFromCollection(IHTMLElementCollection *pCollection, 
                                  REFIID rIID, long lIndex, string &rstrURL);

		// member variables
        DWORD   m_dwRef;

        HRESULT  m_hrConnected;
        DWORD    m_dwCookie;

    	IHTMLDocument2* m_pMSHTML;
    	LPCONNECTIONPOINT m_pCP;

        IHTMLElementCollection *m_pAnchorLinks;
        IHTMLElementCollection *m_pImageLinks;
};

#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -