URL编解码

205 阅读1分钟

使用场景

想将一个urlA当作另一个urlB中的一个参数,则需要对urlA进行url编码。例如

http://127.0.0.1/service/snode/v1/live_subscribe?originUrl=http%3A%2F%2Fpull-flv-l1.ixigua.com%2Fnormal%2Fstream-106945342468259866.flv%3Fa%3D1%26b%3D2

其中originUrl就是编码之后的结果

#原始url:
http://pull-flv-l1.ixigua.com/normal/stream-106945342468259866.flv?a=1&b=2   
#编码后的url
http%3A%2F%2Fpull-flv-l1.ixigua.com%2Fnormal%2Fstream-106945342468259866.flv%3Fa%3D1%26b%3D2

编码算法

url使用到的特殊字符(: = / ? &等),如果参数中也包含同样的字符则需要编码。
例如 = 字符,二进制是 0011 1101,十进制是75
编码算法是 百分号(%) + 高四位的十进制 + 底四位的十进制
则就是 %3D

C++代码实现

#include <string>

class UrlCodec
{
public:
    static unsigned char ToHex(unsigned char x) 
    {
        return x > 9 ? x -10 + 'A': x + '0';  
    }

    static unsigned char FromHex(unsigned char x) 
    {
        return isdigit(x) ? x - '0' : x-'A' + 10;  
    }

    static std::string UrlEncode(const std::string& str)
    {
        std::string strOut;
        if (str.empty())
        {
            return strOut;
        }
        size_t length = str.length();

        for (size_t i = 0; i < length; i++)
        {
            if (isalnum((unsigned char)str[i]) || 
                (str[i] == '-') ||
                (str[i] == '_') || 
                (str[i] == '.') || 
                (str[i] == '~'))
            {
                strOut += str[i];
            }
            else if (str[i] == ' ')
            {
                strOut += "+";
            }
            else
            {
                strOut += '%';
                strOut += ToHex((unsigned char)str[i] >> 4);   // 取高四位
                strOut += ToHex((unsigned char)str[i] & 0x0F); // 取底四位
            }
        }
        return strOut;
    }

    static std::string UrlDecode(const std::string& str)
    {
        std::string strOut;
        if (str.empty())
        {
            return strOut;
        }
        size_t length = str.length();

        for (size_t i = 0; i < length; i++)
        {
            if (str[i] == '+')
            {
                strOut += ' ';
            }
            else if (str[i] == '%')
            {
                if (i + 2 >= length)
                {
                    // exception
                    return strOut;
                }
                unsigned char high = FromHex((unsigned char)str[++i]);
                unsigned char low = FromHex((unsigned char)str[++i]);
                strOut += (high << 4) | low;
            }
            else
            {
                strOut += str[i];
            }
        }
        return strOut;
    }
};