⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cs_url.cpp

📁 c-smile 一个语法类似与JS 又有点像C++的 编译器
💻 CPP
字号:
/*
*
* cs_url.cpp
*
* Copyright (c) 2001, 2002
* Andrew Fedoniouk - andrew@terra-informatica.org
* Portions: Serge Kuznetsov -  kuznetsov@deeptown.org
*
* See the file "COPYING" for information on usage 
* and redistribution of this file
*
*/
#include "cs_url.h"
#include <ctype.h>

#if !defined(_WIN32)
#define strnicmp	strncasecmp
#endif


namespace tool
{
  struct protoport
  {
    const char *proto;
    int         port;
  };

  static protoport protoports[] =
  {
    { "ftp",    21   },
    { "gopher", 70   },
    { "http",   80   },
    { "https",  443  },
    { "socks",  1080 } 
  };


  /*
  * ParseURL
  *
  * Turns a URL into a URLParts structure
  *
  * The good stuff was written by Rob May <robert.may@rd.eng.bbc.co.uk>
  * and heavily mangled/modified by john to suit his own weird style.
  * Made somewhat smarter (err, completely re-written) by GN 1997May02
  */
  bool
    url::parse ( const char * src )
  {
    const char *s, *t;
    char *fragmark;             /* '#' fragment marker if any */
    /* NB Fragments  (which the chimera source calls 'anchors' are part
    * of HTML href's but _not_ properly speaking of URLs;  they are handled
    * entirely at the client end and not by the server.
    * Nevertheless we look for them  (this routine should really be called
    * ParseHREF)  and store a fragment identifier separately if we find one.
    * --GN
    */

    /* RFC1738 says spaces in URLs are to be ignored -- GN 1997May02 */
    array<char> buffer;

    //t = start = buffer;
    for ( s = src; *s; s++ )
      if ( !isspace ( *s ) )
        buffer.push ( *s );
    buffer.push ( '\0' );

    char *start = &buffer [ 0 ];

    /* Lousy hack for URNs */
    if ( strnicmp ( start, "urn:", 4 ) == 0 )
    {
      protocol = "urn";
      filename = &buffer [ 4 ];
      return true;
    }
    /* Less lousy hack for URLs which say so */
    if (strnicmp(start, "url:", 4) == 0)
      s = start + 4;
    else
      s = start;

    /*
    * Check to see if there is a protocol (scheme) name.
    * Matches /^[A-Za-z0-9\+\-\.]+:/ in PERLese.
    */
    for ( t = s; *t; t++ )
    {
      if ( !isalnum ( *t ) && *t != '-' && *t != '+' && *t != '.' )
        break;
    }
    if ( *t == ':' )
    {
      protocol = string ( s, t - s );
      s = ++t;
    }
    /*
    * Check whether this is an 'Internet' URL i.e. the next bit begins
    * with "//".  In this case, what follows up to the next slash ought
    * to parse as "//user:passwd@host.dom.ain:port/" with almost every
    * component optional, and we'll continue later with s pointing at the
    * trailing slash.  If there is no further slash, we'll add one and
    * return.-- None of the fields are supposed to contain any visible
    * (unencoded)  colons, slashes or atsigns.
    */
    if ( s [ 0 ] == '/'  &&  s [ 1 ] == '/' )  /* looking at "//" */
    {
      char *atsign;             /* if present, user:passwd precedes it */
      char *colon;              /* colon separators after user or host */
      char *tslash;             /* trailing slash */

      s += 2;
      tslash = strchr ( s, '/' );
      if ( tslash != NULL )
        *tslash = '\0';         /* split the string, we'll undo this later */

      atsign = strchr ( s, '@' );

      if ( atsign != NULL )     /* a username is present, possibly empty */
      {
        *atsign = '\0';         /* split the string again */
        colon = strchr ( s, ':' );

        if ( colon != NULL )      /* a passwd is also present */
        {
          *colon = '\0';
          password = atsign + 1;
        }
        username = s;
        s = atsign + 1;
      }

      colon = strchr ( s, ':' );

      if ( colon != NULL )        /* a port is specified */
      {
        *colon = '\0';
        port = atoi ( colon + 1 );
      }

      hostname = s;

      if ( tslash == NULL )       /* nothing further */
      {
        filename = "/";
        goto fillport;
      }
      *tslash = '/';	/* restore the slash */
      s = tslash;			/* and stay there, don't step beyond */
    }

    /*
    * End of special treatment of Internet URLs.  Now s points at what
    * chimera calls the filename part  (if any).
    */
    fragmark = strchr ( s, '#' );

    if ( fragmark != NULL )
    {
      *fragmark = '\0';
      anchor = fragmark + 1;
    }

    filename = s;  /* everything else goes here */

  fillport:
    if ( port == 0 )
    {

      for ( int i = 0; i < sizeof ( protoports ) / sizeof ( protoport ); i++ )
      if ( stricmp ( protoports [ i ].proto, protocol ) == 0 )
      {
        port = protoports [ i ].port;
        break;
      }
    }
    return true;
  }


  /*
  * escape URL
  *
  * Puts escape codes in URLs.  (More complete than it used to be;
  * GN Jan 1997.  We escape all that isn't alphanumeric, "safe" or "extra"
  * as spec'd in RFCs 1738, 1808 and 2068.)
  */
  bool
    is_url_char ( unsigned char c )
  {
    if ( c > 128 )
      return false;
    if ( isalnum ( c ) )
      return true;
    if ( strchr ( "$-_.!*'(),", c ) )
      return true;
    return false;
  }


  string
    url::escape ( const char *src, bool space_to_plus )
  {
    const char *cp;
    static char *hex = "0123456789ABCDEF";

    array<char> buffer;

    for ( cp = src; *cp; cp++ )
    {
      if ( *cp == ' ' && space_to_plus )
      {
        buffer.push ( '+' );
      }
      else if ( is_url_char ( (unsigned char) *cp ) || ( *cp == '+' && !space_to_plus ) )
      {
        buffer.push ( *cp );
      }
      else
      {
        buffer.push ( '%' );
        buffer.push ( hex [ (unsigned char) *cp / 16 ] );
        buffer.push ( hex [ (unsigned char) *cp % 16 ] );
      }
    }

    buffer.push ( '\0' );
    return string ( &buffer [ 0 ] );
  }

  /*
  * UnescapeURL
  *
  * Converts the escape codes (%xx) into actual characters.  NOT complete.
  * Could do everthing in place I guess.
  */
  string url::unescape ( const char *src )
  {
    const char *cp;
    char hex [ 3 ];

    array<char> buffer;

    for  ( cp = src; *cp; cp++ )
    {
      if ( *cp == '%' )
      {
        cp++;
        if ( *cp == '%' )
          buffer.push ( *cp );
        else
        {
          hex [ 0 ] = *cp;
          cp++;
          hex [ 1 ] = *cp;
          hex [ 2 ] = '\0';
          buffer.push ( (char) strtol ( hex, NULL, 16 ) );
        }
      }
      else
        buffer.push ( *cp );
    }

    buffer.push ( '\0' );
    return ( &buffer [ 0 ] );
  }
};

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -