📄 sbineturl.cpp

📁 OSB-PIK-OpenVXI-3.0.0源代码 “中国XML论坛 - 专业的XML技术讨论区--XML在语音技术中的应用”
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页
 /****************License************************************************  *  * Copyright 2000-2003.  ScanSoft, Inc.      *  * Use of this software is subject to notices and obligations set forth   * in the SpeechWorks Public License - Software Version 1.2 which is   * included with this software.   *  * ScanSoft is a registered trademark of ScanSoft, Inc., and OpenSpeech,   * SpeechWorks and the SpeechWorks logo are registered trademarks or   * trademarks of SpeechWorks International, Inc. in the United States   * and other countries.  *  ***********************************************************************/ #ifndef _SB_USE_STD_NAMESPACE #define _SB_USE_STD_NAMESPACE #endif  #ifndef UNICODE #define UNICODE #endif #ifndef _UNICODE #define _UNICODE #endif  #ifdef _WIN32 #undef HTTP_VERSION #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #include <windows.h> #include <wininet.h> #include <urlmon.h> #else #include <unistd.h> // for getcwd() #endif /* WIN32 */  #include <stdio.h> #include <string.h>  #include "VXIvalue.h" #include "VXIinet.h" #include "VXItrd.h"  #include "SBinetURL.h" #include "SBinetChannel.h" #include "HttpUtils.hpp"  #include <SBinetString.hpp>  #define INET_MAX_PATH 1024  // #if defined(_decunix_) || defined(_solaris_) // static int my_wcscasecmp(const wchar_t *s1, const wchar_t *s2) // { //   register unsigned int u1, u2;  //   for (;;) { //     u1 = (unsigned int) *s1++; //     u2 = (unsigned int) *s2++; //     if (HttpUtils::toUpper(u1) != HttpUtils::toUpper(u2)) { //       return HttpUtils::toUpper(u1) - HttpUtils::toUpper(u2); //     } //     if (u1 == '\0') { //       return 0; //     } //   } // } // #else // #define my_wcscasecmp ::wcscasecmp // #endif  static void appendArrayIndexToName(SBinetNString& fieldName,                        VXIunsigned index) {   char tempBuf[8];    sprintf(tempBuf, ".%d", index);   fieldName += tempBuf; }  VXIinetResult SBinetURL::create(const VXIchar *pszName,                                 const VXIchar *pszUrlBase,                                 SBinetURL *& url) {   url = new SBinetURL();    if (url == NULL)   {     return VXIinet_RESULT_OUT_OF_MEMORY;   }    VXIinetResult rc = url->parse(pszName, pszUrlBase);   if (rc != VXIinet_RESULT_SUCCESS)   {     delete url;     url = NULL;   }   return rc; }  SBinetURL& SBinetURL::operator=(const SBinetURL& rhs) {   if (this != &rhs)   {     _absoluteURL = rhs._absoluteURL;     _baseURL = rhs._baseURL;     _host = rhs._host;     _strPath = rhs._strPath;     _protocol = rhs._protocol;     _port = rhs._port;     N_absoluteURL = rhs.N_absoluteURL;     N_baseURL = rhs.N_baseURL;     N_host = rhs.N_host;     N_strPath = rhs.N_strPath;   }   return *this; }  bool SBinetURL::operator==(const SBinetURL& rhs) {   if (this == &rhs)     return true;    if (_protocol != rhs._protocol)     return false;    if (::wcscmp(_strPath.c_str(), rhs._strPath.c_str()) != 0)     return false;    if (_protocol >= HTTP_PROTOCOL)   {     if ( _port != rhs._port)       return false;      if (SBinetHttpUtils::casecmp(_host.c_str(), rhs._host.c_str()) != 0)       return false;   }    return true; }  #ifdef _WIN32 VXIinetResult SBinetURL::parse(const VXIchar* pszUrl, const VXIchar* pszUrlBase) {   VXIinetResult eResult( VXIinet_RESULT_SUCCESS );    if( !pszUrl || !*pszUrl)   {     //Error(200, L"%s%s", L"Operation", L"parse URL");     return VXIinet_RESULT_INVALID_ARGUMENT;   }    if (pszUrlBase != NULL)     _baseURL = pszUrlBase;   else     _baseURL = L"";    // If the caller is using "file:", just strip it because apparently   // Internet*() doesn't do the right thing with file URI's   bool relativeFileUri = false;   if (!wcsncmp(L"file:", pszUrl, 5)) { // file:...     pszUrl += 5;     if (!wcsncmp(L"//", pszUrl, 2)) // file://...       pszUrl += 2;     relativeFileUri = true;   }    wchar_t *tmpUrl = NULL;   const wchar_t *absoluteUrl = pszUrl;    // The parsing of URL is complicated by the fact that we want unsafe   // characters to be encoded using %-notation for HTTP URLs, but we want   // %-notatation to be converted into unsaface characters for file URLs.  So,   // on entry we convert the URL into unsafe notation, then for HTTP URLs, we   // re-encode it back using %-notation.    // Combine the base and (possibly relative) URL.  Since we stripped the   // transport type above, don't combine them if the transport type of the   // base URI differs.   if( pszUrlBase && pszUrlBase[0] && !(relativeFileUri && (0 == wcsncmp(L"http:", pszUrlBase, 5))))   {     VXIulong len = (::wcslen(pszUrlBase) + ::wcslen(pszUrl)) * 3;     tmpUrl = new VXIchar [len];     if( !tmpUrl )     {       //Error(103, NULL);       return VXIinet_RESULT_OUT_OF_MEMORY;     }      if (InternetCombineUrl(pszUrlBase, pszUrl, tmpUrl, &len, ICU_BROWSER_MODE | ICU_DECODE| ICU_NO_ENCODE) == TRUE)     {       absoluteUrl = tmpUrl;     }     else     {       int errCode = GetLastError();       //Error(225, NULL);       delete [] tmpUrl;       return VXIinet_RESULT_NON_FATAL_ERROR;     }   }   else   {     VXIulong len = ::wcslen (pszUrl) * 3;     tmpUrl = new VXIchar [len];     if( !tmpUrl )     {       //Error(103, NULL);       return VXIinet_RESULT_OUT_OF_MEMORY;     }     if (InternetCanonicalizeUrl(pszUrl, tmpUrl, &len, ICU_BROWSER_MODE | ICU_DECODE | ICU_NO_ENCODE) == TRUE)     {       absoluteUrl = tmpUrl;     }     else     {       int errCode = GetLastError();       //Error(225, NULL);       delete [] tmpUrl;       return VXIinet_RESULT_NON_FATAL_ERROR;     }   }    // Now parse the absolute URL to decide if it is file or network access   int pathLen = 0;   int queryLen = 0;   wchar_t* queryPtr = ::wcschr(absoluteUrl, L'?');   if (queryPtr)   {     pathLen = queryPtr - absoluteUrl + 1;     queryLen = ::wcslen(queryPtr) + 1;   }   else   {     pathLen = ::wcslen(absoluteUrl) + 1;     queryLen = 0;   }    if (pathLen < INET_MAX_PATH)     pathLen = INET_MAX_PATH;    wchar_t protocol[INET_MAX_PATH];   wchar_t* host = new wchar_t [pathLen];   wchar_t* urlPath = new wchar_t [pathLen];   wchar_t* query = NULL;   if (queryLen > 0)     query = new wchar_t [queryLen];    URL_COMPONENTS components;   memset (&components, 0, sizeof (URL_COMPONENTS));   components.dwStructSize = sizeof (URL_COMPONENTS);   components.lpszScheme = protocol;   components.dwSchemeLength = INET_MAX_PATH;   components.lpszHostName = host;   components.dwHostNameLength = pathLen;   components.lpszUrlPath = urlPath;   components.dwUrlPathLength = pathLen;   components.lpszExtraInfo = query;   components.dwExtraInfoLength = queryLen;    if(InternetCrackUrl(absoluteUrl, ::wcslen (absoluteUrl), 0, &components) == TRUE)   {     switch (components.nScheme)     {      case INTERNET_SCHEME_FILE:        // File access, return the local file path        _protocol = FILE_PROTOCOL;        _strPath = urlPath;        _absoluteURL = absoluteUrl;        break;      case INTERNET_SCHEME_HTTPS:      case INTERNET_SCHEME_HTTP:        {          // HTTP access, return the absolute URL          _protocol = (components.nScheme == INTERNET_SCHEME_HTTP ?                     HTTP_PROTOCOL : HTTPS_PROTOCOL);           _absoluteURL = absoluteUrl;           // remove trailing / in absolute URL to ensure that www.spechworks.com          // and www.speechwork.com/ are seen as the same URL.          int idx = _absoluteURL.length() - 1;          if (_absoluteURL[idx] == L'/')          {            _absoluteURL.resize(idx);          }           _host = host;           // Retrieve the host name, the port number and the local file.          _port = components.nPort;           if (components.dwUrlPathLength == 0)          {            _strPath = L"/";          }          else          {            _strPath = urlPath;            SBinetNString utf8path, escapedPath;            SBinetHttpUtils::utf8encode(urlPath, utf8path);            SBinetHttpUtils::escapeString(utf8path.c_str(),                                          SBinetHttpUtils::URL_PATH,                                          escapedPath);            _strPath = escapedPath;          }           if (components.dwExtraInfoLength > 0)            _strPath += query;          break;        }      default:        delete [] tmpUrl;        delete [] host;        delete [] urlPath;        delete [] query;        return VXIinet_RESULT_INVALID_ARGUMENT;     }   }   else   {     // Couldn't be parsed.      // If the absoluteUrl contains a colon, it is because the parsing of the     // URL failed.  If the URL contained a colon and were valid, it would have     // been parsed sucessfully by InternetCrackURL.  If it doesn't contain a     // colon, we assume it is a path relative to the current directory.     //     if (::wcschr(absoluteUrl, L':') != NULL)     {       delete [] tmpUrl;       delete [] host;       delete [] urlPath;       delete [] query;       return VXIinet_RESULT_INVALID_ARGUMENT;     }      wchar_t *ignored;     *urlPath = L'\0';     _protocol = FILE_PROTOCOL;      if ((GetFullPathName(absoluteUrl, pathLen, urlPath, &ignored) > 0) &&        *urlPath)     {       _strPath = urlPath;       _absoluteURL = absoluteUrl;     }     else     {       //Error(225, L"%s%s", L"URL", pszUrl);       eResult = VXIinet_RESULT_INVALID_ARGUMENT;     }   }    N_absoluteURL = _absoluteURL;   N_baseURL = _baseURL;   N_host = _host;   N_strPath = _strPath;    delete [] tmpUrl;   delete [] host;   delete [] urlPath;   delete [] query;    return eResult; }  #else /* not WIN32 */  struct URLInfo {   SBinetString protocol;   SBinetString fragment;   SBinetString query;   SBinetString path;   SBinetString host;   int port; };  static VXIinetResult parseURL(const VXIchar* const url, URLInfo& urlInfo) {   // Initialize the URLInfo structure.   urlInfo.port = -1;    // Check to see if the URL is invalid.   if (!url || (url[0] == 0))     return VXIinet_RESULT_NON_FATAL_ERROR;    VXIchar* tmpUrl = new VXIchar [wcslen(url) + 1];   wcscpy(tmpUrl, url);   const VXIchar* tmpUrlOriginal = tmpUrl;    // Check for the protocol part.   bool needToCanonicalize = false;   VXIchar* protocolEndPtr = wcschr(tmpUrl, L':');   if (protocolEndPtr && (!wcsncmp(tmpUrl, L"file", 4) || !wcsncmp(tmpUrl, L"http", 4)))   {     // Found the protocol.  Copy it to the URLInfo structure.     int protLen = protocolEndPtr - tmpUrl;     urlInfo.protocol.append(tmpUrl, protLen);      // Advance past the protocol part of the URL.     if (0 == wcsncmp(protocolEndPtr + 1, L"//", 2))     {       tmpUrl = protocolEndPtr + 3;     }     else     {       tmpUrl = protocolEndPtr + 1;       needToCanonicalize = 1;     }   }   else   {     // The default protocol is the file protocol.     urlInfo.protocol = L"file";   }    if (!wcscmp(urlInfo.protocol.c_str(), L"file"))   {     // If the URL uses the file protocol, the rest of the URL is the path.      // The path needs to be canonicalized.  It could still be an absolute     // path if it starts with a '/'.  If it does not, create the absolute     // path by prepending the current path with the current directory.     if (needToCanonicalize && tmpUrl[0] != '/')     {       char buf[1024];       urlInfo.path = getcwd(buf, 1024);       urlInfo.path += '/';     }     VXIchar* endPtr = wcschr(tmpUrl, L'?');     VXIchar* fragmentPtr = wcschr(tmpUrl, L'#');     if (endPtr)     {       if (fragmentPtr && (fragmentPtr < endPtr))         endPtr = fragmentPtr;     }     else if (fragmentPtr)     {       endPtr = fragmentPtr;     }      // Strip the query and/or fragment part of the file URI if     // there is any.     if (endPtr)       *endPtr = L'\0';      urlInfo.path += tmpUrl;   }   else   {     // Check for the fragment part.     VXIchar* fragmentPtr = wcsrchr(tmpUrl, L'#');     if (fragmentPtr)     {       // Found the fragment.  Copy it to the URLInfo structure.       *fragmentPtr = L'\0';       urlInfo.fragment = ++fragmentPtr;     }      // Check for the query part.     VXIchar* queryPtr = wcsrchr(tmpUrl, L'?');     if (queryPtr)     {       // Found the query.  Copy it to the URLInfo structure.       *queryPtr = L'\0';       urlInfo.query = ++queryPtr;     }      // Check for the path part.     VXIchar* pathPtr = wcschr(tmpUrl, L'/');     if (pathPtr)     {       // Found the path.  Copy it to the URLInfo structure.       // First, we need to UTF-8 encode it and then escape it.       SBinetNString utfPath, escapedPath;       SBinetHttpUtils::utf8encode(pathPtr, utfPath);       SBinetHttpUtils::escapeString(utfPath.c_str(),                                     SBinetHttpUtils::URL_PATH,                                     escapedPath);        urlInfo.path = escapedPath;        // Append the query to the path.       if (queryPtr)       {         // Add the query to the path.         urlInfo.path += L'?';         urlInfo.path += urlInfo.query;       }        *pathPtr = L'\0';     }     else     {       // No path was specified so set the path to root ("/").       urlInfo.path = "/";     }      // Check for the username:password part.     VXIchar* userpassEndPtr = wcschr(tmpUrl, L'@');     if (userpassEndPtr)     {       // For now, just ignore username and password.       tmpUrl = userpassEndPtr + 1;     }      // Check for the port part.     VXIchar* portPtr = wcsrchr(tmpUrl, L':');     if (portPtr)     {       *portPtr = L'\0';       ++portPtr;        // Check if the port is valid (all digits).       VXIchar* end = NULL;       long port = wcstol(portPtr, &end, 10);        if ((end) && (*end == L'\0'))         urlInfo.port = (int) port;     }      // The rest is the host part.     urlInfo.host = tmpUrl;      if (urlInfo.host[0] == 0)     {       delete [] tmpUrlOriginal;       return VXIinet_RESULT_NON_FATAL_ERROR;     }   }    delete [] tmpUrlOriginal;    return VXIinet_RESULT_SUCCESS; }  typedef std::basic_string<VXIchar> vxistring;  static VXIinetResult combineURL(const VXIchar* const baseUrl,                                 const VXIchar* const relativeUrl,                                 SBinetString& absoluteUrl) {   // Check to see if the relative URL is referencing another host   //    (i.e. the protocol is specified).   const VXIchar* protocolEndPtr = wcschr(relativeUrl, L':');   if (!baseUrl || (protocolEndPtr && (!wcsncmp(relativeUrl, L"file", 4) || !wcsncmp(relativeUrl, L"http", 4))))   {     // HACK to allow OSR to load absolute local URLs     if (wcsstr(relativeUrl, L"/file://"))       absoluteUrl = relativeUrl + 1;     else       absoluteUrl = relativeUrl;
12 下一页
💿 文件大小 2910 K
👤 上传用户 lz0324
📂 所属分类 Internet/网络编程
🏷️ 相关标签

#XML #OSB-PIK-OpenVXI #源代码 #技术讨论
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -