URL检测

40 阅读1分钟

#include <iostream>
#include <cctype>
#include <string>
#include <unordered_set>

using namespace std;

class IsServer {
public:
    void Set(const std::string& url) {
        black_list_.insert(FormatUrl(url));
    }

    bool IsBlackListed(const std::string& url) {
        return black_list_.count(FormatUrl(url)) > 0;
    }

    std::string FormatUrl(const std::string& url) {
        has_handle_port_ = false;

        std::string res = "";
        size_t p = RemoveProtocol(url);

        while(p < url.size()) {
            char c = url[p];    
            if (c == ':') {
                IgnorePort(url, p, res);
            }
            else if (c == '?') {
                return res;
            }
            else if (c == '/') {
                if (res.back() =='/') {
                    ++p;
                    continue;
                }
                else {
                    res.push_back(c);
                }
            } 
            else {
                res.push_back(std::tolower(c));
            }
            ++p;
        }

        if(res.back() == '/') {
            res.pop_back();
        }
        return res;
    }

private:
    void IgnorePort(const std::string& url, size_t& i, std::string& res) {
        if (has_handle_port_) {
            res.push_back(url.at(i));
            return;
        }
        while (i < url.size() && url.at(i) != '/') {
            ++i;
        }
        if (url.at(i) == '/' && res.back() != '/') {
            res.push_back( url.at(i));
        }
        has_handle_port_ = true;
    }

    size_t RemoveProtocol(const std::string& url) {
        size_t first_slash = url.find('/');
        size_t pos = url.find("://");
        if (pos == url.npos || pos > first_slash) {
            return 0;
        }
        return pos + 3;
    }

    bool has_handle_port_ = false;
    std::unordered_set<std::string> black_list_;
};

int main () {
    IsServer is;
    cout << is.FormatUrl("AKA.MS/A/b/C") << endl;
    cout << is.FormatUrl("https://aks.ms/a/b/c") << endl;
    cout << is.FormatUrl("http://aks.ms/a://b/c") << endl;
    cout << is.FormatUrl("http://aks.ms/a://b/c") << endl;
    cout << is.FormatUrl("aka.ms:80/a/b/c?p=1&q=2") << endl;
    cout << is.FormatUrl("aks.ma/a///b////c") << endl;
    cout << is.FormatUrl("aks.ma/a://b/c") << endl;
}