📄 sbineturl.cpp
字号:
/****************License************************************************ * * Copyright 2000-2003. ScanSoft, Inc. * * Use of this software is subject to notices and obligations set forth * in the SpeechWorks Public License - Software Version 1.2 which is * included with this software. * * ScanSoft is a registered trademark of ScanSoft, Inc., and OpenSpeech, * SpeechWorks and the SpeechWorks logo are registered trademarks or * trademarks of SpeechWorks International, Inc. in the United States * and other countries. * ***********************************************************************/ #ifndef _SB_USE_STD_NAMESPACE #define _SB_USE_STD_NAMESPACE #endif #ifndef UNICODE #define UNICODE #endif #ifndef _UNICODE #define _UNICODE #endif #ifdef _WIN32 #undef HTTP_VERSION #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #include <windows.h> #include <wininet.h> #include <urlmon.h> #else #include <unistd.h> // for getcwd() #endif /* WIN32 */ #include <stdio.h> #include <string.h> #include "VXIvalue.h" #include "VXIinet.h" #include "VXItrd.h" #include "SBinetURL.h" #include "SBinetChannel.h" #include "HttpUtils.hpp" #include <SBinetString.hpp> #define INET_MAX_PATH 1024 // #if defined(_decunix_) || defined(_solaris_) // static int my_wcscasecmp(const wchar_t *s1, const wchar_t *s2) // { // register unsigned int u1, u2; // for (;;) { // u1 = (unsigned int) *s1++; // u2 = (unsigned int) *s2++; // if (HttpUtils::toUpper(u1) != HttpUtils::toUpper(u2)) { // return HttpUtils::toUpper(u1) - HttpUtils::toUpper(u2); // } // if (u1 == '\0') { // return 0; // } // } // } // #else // #define my_wcscasecmp ::wcscasecmp // #endif static void appendArrayIndexToName(SBinetNString& fieldName, VXIunsigned index) { char tempBuf[8]; sprintf(tempBuf, ".%d", index); fieldName += tempBuf; } VXIinetResult SBinetURL::create(const VXIchar *pszName, const VXIchar *pszUrlBase, SBinetURL *& url) { url = new SBinetURL(); if (url == NULL) { return VXIinet_RESULT_OUT_OF_MEMORY; } VXIinetResult rc = url->parse(pszName, pszUrlBase); if (rc != VXIinet_RESULT_SUCCESS) { delete url; url = NULL; } return rc; } SBinetURL& SBinetURL::operator=(const SBinetURL& rhs) { if (this != &rhs) { _absoluteURL = rhs._absoluteURL; _baseURL = rhs._baseURL; _host = rhs._host; _strPath = rhs._strPath; _protocol = rhs._protocol; _port = rhs._port; N_absoluteURL = rhs.N_absoluteURL; N_baseURL = rhs.N_baseURL; N_host = rhs.N_host; N_strPath = rhs.N_strPath; } return *this; } bool SBinetURL::operator==(const SBinetURL& rhs) { if (this == &rhs) return true; if (_protocol != rhs._protocol) return false; if (::wcscmp(_strPath.c_str(), rhs._strPath.c_str()) != 0) return false; if (_protocol >= HTTP_PROTOCOL) { if ( _port != rhs._port) return false; if (SBinetHttpUtils::casecmp(_host.c_str(), rhs._host.c_str()) != 0) return false; } return true; } #ifdef _WIN32 VXIinetResult SBinetURL::parse(const VXIchar* pszUrl, const VXIchar* pszUrlBase) { VXIinetResult eResult( VXIinet_RESULT_SUCCESS ); if( !pszUrl || !*pszUrl) { //Error(200, L"%s%s", L"Operation", L"parse URL"); return VXIinet_RESULT_INVALID_ARGUMENT; } if (pszUrlBase != NULL) _baseURL = pszUrlBase; else _baseURL = L""; // If the caller is using "file:", just strip it because apparently // Internet*() doesn't do the right thing with file URI's bool relativeFileUri = false; if (!wcsncmp(L"file:", pszUrl, 5)) { // file:... pszUrl += 5; if (!wcsncmp(L"//", pszUrl, 2)) // file://... pszUrl += 2; relativeFileUri = true; } wchar_t *tmpUrl = NULL; const wchar_t *absoluteUrl = pszUrl; // The parsing of URL is complicated by the fact that we want unsafe // characters to be encoded using %-notation for HTTP URLs, but we want // %-notatation to be converted into unsaface characters for file URLs. So, // on entry we convert the URL into unsafe notation, then for HTTP URLs, we // re-encode it back using %-notation. // Combine the base and (possibly relative) URL. Since we stripped the // transport type above, don't combine them if the transport type of the // base URI differs. if( pszUrlBase && pszUrlBase[0] && !(relativeFileUri && (0 == wcsncmp(L"http:", pszUrlBase, 5)))) { VXIulong len = (::wcslen(pszUrlBase) + ::wcslen(pszUrl)) * 3; tmpUrl = new VXIchar [len]; if( !tmpUrl ) { //Error(103, NULL); return VXIinet_RESULT_OUT_OF_MEMORY; } if (InternetCombineUrl(pszUrlBase, pszUrl, tmpUrl, &len, ICU_BROWSER_MODE | ICU_DECODE| ICU_NO_ENCODE) == TRUE) { absoluteUrl = tmpUrl; } else { int errCode = GetLastError(); //Error(225, NULL); delete [] tmpUrl; return VXIinet_RESULT_NON_FATAL_ERROR; } } else { VXIulong len = ::wcslen (pszUrl) * 3; tmpUrl = new VXIchar [len]; if( !tmpUrl ) { //Error(103, NULL); return VXIinet_RESULT_OUT_OF_MEMORY; } if (InternetCanonicalizeUrl(pszUrl, tmpUrl, &len, ICU_BROWSER_MODE | ICU_DECODE | ICU_NO_ENCODE) == TRUE) { absoluteUrl = tmpUrl; } else { int errCode = GetLastError(); //Error(225, NULL); delete [] tmpUrl; return VXIinet_RESULT_NON_FATAL_ERROR; } } // Now parse the absolute URL to decide if it is file or network access int pathLen = 0; int queryLen = 0; wchar_t* queryPtr = ::wcschr(absoluteUrl, L'?'); if (queryPtr) { pathLen = queryPtr - absoluteUrl + 1; queryLen = ::wcslen(queryPtr) + 1; } else { pathLen = ::wcslen(absoluteUrl) + 1; queryLen = 0; } if (pathLen < INET_MAX_PATH) pathLen = INET_MAX_PATH; wchar_t protocol[INET_MAX_PATH]; wchar_t* host = new wchar_t [pathLen]; wchar_t* urlPath = new wchar_t [pathLen]; wchar_t* query = NULL; if (queryLen > 0) query = new wchar_t [queryLen]; URL_COMPONENTS components; memset (&components, 0, sizeof (URL_COMPONENTS)); components.dwStructSize = sizeof (URL_COMPONENTS); components.lpszScheme = protocol; components.dwSchemeLength = INET_MAX_PATH; components.lpszHostName = host; components.dwHostNameLength = pathLen; components.lpszUrlPath = urlPath; components.dwUrlPathLength = pathLen; components.lpszExtraInfo = query; components.dwExtraInfoLength = queryLen; if(InternetCrackUrl(absoluteUrl, ::wcslen (absoluteUrl), 0, &components) == TRUE) { switch (components.nScheme) { case INTERNET_SCHEME_FILE: // File access, return the local file path _protocol = FILE_PROTOCOL; _strPath = urlPath; _absoluteURL = absoluteUrl; break; case INTERNET_SCHEME_HTTPS: case INTERNET_SCHEME_HTTP: { // HTTP access, return the absolute URL _protocol = (components.nScheme == INTERNET_SCHEME_HTTP ? HTTP_PROTOCOL : HTTPS_PROTOCOL); _absoluteURL = absoluteUrl; // remove trailing / in absolute URL to ensure that www.spechworks.com // and www.speechwork.com/ are seen as the same URL. int idx = _absoluteURL.length() - 1; if (_absoluteURL[idx] == L'/') { _absoluteURL.resize(idx); } _host = host; // Retrieve the host name, the port number and the local file. _port = components.nPort; if (components.dwUrlPathLength == 0) { _strPath = L"/"; } else { _strPath = urlPath; SBinetNString utf8path, escapedPath; SBinetHttpUtils::utf8encode(urlPath, utf8path); SBinetHttpUtils::escapeString(utf8path.c_str(), SBinetHttpUtils::URL_PATH, escapedPath); _strPath = escapedPath; } if (components.dwExtraInfoLength > 0) _strPath += query; break; } default: delete [] tmpUrl; delete [] host; delete [] urlPath; delete [] query; return VXIinet_RESULT_INVALID_ARGUMENT; } } else { // Couldn't be parsed. // If the absoluteUrl contains a colon, it is because the parsing of the // URL failed. If the URL contained a colon and were valid, it would have // been parsed sucessfully by InternetCrackURL. If it doesn't contain a // colon, we assume it is a path relative to the current directory. // if (::wcschr(absoluteUrl, L':') != NULL) { delete [] tmpUrl; delete [] host; delete [] urlPath; delete [] query; return VXIinet_RESULT_INVALID_ARGUMENT; } wchar_t *ignored; *urlPath = L'\0'; _protocol = FILE_PROTOCOL; if ((GetFullPathName(absoluteUrl, pathLen, urlPath, &ignored) > 0) && *urlPath) { _strPath = urlPath; _absoluteURL = absoluteUrl; } else { //Error(225, L"%s%s", L"URL", pszUrl); eResult = VXIinet_RESULT_INVALID_ARGUMENT; } } N_absoluteURL = _absoluteURL; N_baseURL = _baseURL; N_host = _host; N_strPath = _strPath; delete [] tmpUrl; delete [] host; delete [] urlPath; delete [] query; return eResult; } #else /* not WIN32 */ struct URLInfo { SBinetString protocol; SBinetString fragment; SBinetString query; SBinetString path; SBinetString host; int port; }; static VXIinetResult parseURL(const VXIchar* const url, URLInfo& urlInfo) { // Initialize the URLInfo structure. urlInfo.port = -1; // Check to see if the URL is invalid. if (!url || (url[0] == 0)) return VXIinet_RESULT_NON_FATAL_ERROR; VXIchar* tmpUrl = new VXIchar [wcslen(url) + 1]; wcscpy(tmpUrl, url); const VXIchar* tmpUrlOriginal = tmpUrl; // Check for the protocol part. bool needToCanonicalize = false; VXIchar* protocolEndPtr = wcschr(tmpUrl, L':'); if (protocolEndPtr && (!wcsncmp(tmpUrl, L"file", 4) || !wcsncmp(tmpUrl, L"http", 4))) { // Found the protocol. Copy it to the URLInfo structure. int protLen = protocolEndPtr - tmpUrl; urlInfo.protocol.append(tmpUrl, protLen); // Advance past the protocol part of the URL. if (0 == wcsncmp(protocolEndPtr + 1, L"//", 2)) { tmpUrl = protocolEndPtr + 3; } else { tmpUrl = protocolEndPtr + 1; needToCanonicalize = 1; } } else { // The default protocol is the file protocol. urlInfo.protocol = L"file"; } if (!wcscmp(urlInfo.protocol.c_str(), L"file")) { // If the URL uses the file protocol, the rest of the URL is the path. // The path needs to be canonicalized. It could still be an absolute // path if it starts with a '/'. If it does not, create the absolute // path by prepending the current path with the current directory. if (needToCanonicalize && tmpUrl[0] != '/') { char buf[1024]; urlInfo.path = getcwd(buf, 1024); urlInfo.path += '/'; } VXIchar* endPtr = wcschr(tmpUrl, L'?'); VXIchar* fragmentPtr = wcschr(tmpUrl, L'#'); if (endPtr) { if (fragmentPtr && (fragmentPtr < endPtr)) endPtr = fragmentPtr; } else if (fragmentPtr) { endPtr = fragmentPtr; } // Strip the query and/or fragment part of the file URI if // there is any. if (endPtr) *endPtr = L'\0'; urlInfo.path += tmpUrl; } else { // Check for the fragment part. VXIchar* fragmentPtr = wcsrchr(tmpUrl, L'#'); if (fragmentPtr) { // Found the fragment. Copy it to the URLInfo structure. *fragmentPtr = L'\0'; urlInfo.fragment = ++fragmentPtr; } // Check for the query part. VXIchar* queryPtr = wcsrchr(tmpUrl, L'?'); if (queryPtr) { // Found the query. Copy it to the URLInfo structure. *queryPtr = L'\0'; urlInfo.query = ++queryPtr; } // Check for the path part. VXIchar* pathPtr = wcschr(tmpUrl, L'/'); if (pathPtr) { // Found the path. Copy it to the URLInfo structure. // First, we need to UTF-8 encode it and then escape it. SBinetNString utfPath, escapedPath; SBinetHttpUtils::utf8encode(pathPtr, utfPath); SBinetHttpUtils::escapeString(utfPath.c_str(), SBinetHttpUtils::URL_PATH, escapedPath); urlInfo.path = escapedPath; // Append the query to the path. if (queryPtr) { // Add the query to the path. urlInfo.path += L'?'; urlInfo.path += urlInfo.query; } *pathPtr = L'\0'; } else { // No path was specified so set the path to root ("/"). urlInfo.path = "/"; } // Check for the username:password part. VXIchar* userpassEndPtr = wcschr(tmpUrl, L'@'); if (userpassEndPtr) { // For now, just ignore username and password. tmpUrl = userpassEndPtr + 1; } // Check for the port part. VXIchar* portPtr = wcsrchr(tmpUrl, L':'); if (portPtr) { *portPtr = L'\0'; ++portPtr; // Check if the port is valid (all digits). VXIchar* end = NULL; long port = wcstol(portPtr, &end, 10); if ((end) && (*end == L'\0')) urlInfo.port = (int) port; } // The rest is the host part. urlInfo.host = tmpUrl; if (urlInfo.host[0] == 0) { delete [] tmpUrlOriginal; return VXIinet_RESULT_NON_FATAL_ERROR; } } delete [] tmpUrlOriginal; return VXIinet_RESULT_SUCCESS; } typedef std::basic_string<VXIchar> vxistring; static VXIinetResult combineURL(const VXIchar* const baseUrl, const VXIchar* const relativeUrl, SBinetString& absoluteUrl) { // Check to see if the relative URL is referencing another host // (i.e. the protocol is specified). const VXIchar* protocolEndPtr = wcschr(relativeUrl, L':'); if (!baseUrl || (protocolEndPtr && (!wcsncmp(relativeUrl, L"file", 4) || !wcsncmp(relativeUrl, L"http", 4)))) { // HACK to allow OSR to load absolute local URLs if (wcsstr(relativeUrl, L"/file://")) absoluteUrl = relativeUrl + 1; else absoluteUrl = relativeUrl;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -