Previous: QR Code Home: Next: mailto URLs

Finicky

This example shows how to classify URLs according to a set of rules. It is inspired by Finicky application.

The URLs are classified and redirected to a browser according to their category. See the example config.json file.

/*
    This example shows how to classify URLs
    according to a set of rules. This example is
    inspired by Finicky. The URLs are classified
    and redirected to a browser according to their
    category. See the example config.json file.
    https://github.com/johnste/finicky
*/

#include <boost/url/url.hpp>
#include <boost/url/parse.hpp>
#include <boost/system/result.hpp>
#include <boost/json/stream_parser.hpp>
#include <boost/core/detail/string_view.hpp>
#include <boost/regex.hpp>
#include <iostream>
#include <fstream>

namespace urls = boost::urls;
namespace json = boost::json;
namespace core = boost::core;

json::value
read_json( std::istream& is, json::error_code& ec )
{
    json::parse_options opt;
    opt.allow_comments = true;
    json::stream_parser p(json::storage_ptr(), opt);
    std::string line;
    while( std::getline( is, line ) )
    {
        p.write( line, ec );
        if( ec )
            return nullptr;
    }
    p.finish( ec );
    if( ec )
        return nullptr;
    return p.release();
}

bool
glob_match(
    core::string_view pattern,
    core::string_view str)
{
    // regex
    if (str.starts_with("/") &&
        str.ends_with("/"))
    {
        const boost::regex pr(pattern.begin() + 1, pattern.end() - 1);
        return boost::regex_match(std::string(str), pr);
    }

    // literal
    if (!pattern.contains('*'))
    {
        return pattern == str;
    }

    // glob
    std::string p = pattern;
    std::size_t i = p.find('*');
    while (i != std::string::npos)
    {
        auto e = std::min(p.find_first_not_of('*', i), p.size());
        std::size_t n = e - i;
        if (n == 1)
        {
            p.replace(i, e, "[^/]*");
            i += 5;
        }
        else
        {
            p.replace(i, e, ".*");
            i += 2;
        }
        i = p.find('*', i);
    }
    const boost::regex pr(p);
    return boost::regex_match(std::string(str), pr);
}

bool
url_match(
    json::value& mv,
    urls::url const& u)
{
    if (mv.is_string())
    {
        json::string& p = mv.as_string();
        return glob_match(u.buffer(), p);
    }
    else if (mv.is_array())
    {
        json::array& m = mv.as_array();
        for (auto& mi: m)
        {
            if (!mi.is_string())
                throw std::invalid_argument(
                    "handle match is not a string");
            if (glob_match(mi.as_string(), u.buffer()))
                return true;
        }
    }
    else if (mv.is_object())
    {
        json::object& m = mv.as_object();
        std::pair<core::string_view, core::string_view>
            field_values[] = {
                {"protocol",  u.scheme()},
                {"authority", u.encoded_authority()},
                {"username",  u.encoded_user()},
                {"user",      u.encoded_user()},
                {"password",  u.encoded_password()},
                {"userinfo",  u.encoded_userinfo()},
                {"host",      u.encoded_host()},
                {"port",      u.port()},
                {"path",      u.encoded_path()},
                {"pathname",  u.encoded_path()},
                {"query",     u.encoded_query()},
                {"search",    u.encoded_query()},
                {"fragment",  u.encoded_fragment()},
                {"hash",      u.encoded_fragment()},
            };
        for (auto& p: field_values)
        {
            auto it = m.find(p.first);
            if (it != m.end())
            {
                if (!it->value().is_string())
                    throw std::invalid_argument(
                        "match fields should be a strings");
                if (glob_match(p.second, p.first))
                    return true;
            }
        }
    }
    return false;
}

#define CHECK(c, msg)             \
    if (!(c))                     \
    {                             \
        std::cerr << msg << "\n"; \
        return EXIT_FAILURE;      \
    }

int main(int argc, char** argv)
{
    if (argc < 3) {
        std::cout << argv[0] << "\n";
        std::cout << "Usage: finicky <config> <url>\n"
                     "options:\n"
                     "    <config>: Configuration file\n"
                     "    <url>:    The url to open\n"
                     "examples:\n"
                     "    finicky config.json \"http://www.example.com\"\n";
        return EXIT_FAILURE;
    }

    // Parse url
    boost::system::result<urls::url> ru = urls::parse_uri(argv[2]);
    CHECK(ru, "Invalid URL");
    urls::url u = *ru;

    // Open config file
    std::fstream fin(argv[1]);
    CHECK(fin.good(), "Cannot open configuration file");
    json::error_code ec;
    json::value c = read_json(fin, ec);
    CHECK(!ec.failed(), "Cannot parse configuration file");
    CHECK(c.is_object(), "Configuration file is not an object");
    json::object& o = c.as_object();

    // Set initial browser
    auto bit = o.find("defaultBrowser");
    CHECK(
        bit != o.end(),
        "Configuration file has no defaultBrowser");
    CHECK(
        bit->value().is_string(),
        "defaultBrowser should be a string");
    json::string& browser = bit->value().as_string();

    // Apply rewrites to the input string
    auto rsit = o.find("rewrite");
    if (rsit != o.end())
    {
        CHECK(
            rsit->value().is_array(),
            "rewrite rules should be an array");
        auto& rs = rsit->value().as_array();
        for (auto& rv: rs)
        {
            CHECK(
                rv.is_object(),
                "individual rewrite rule should be an object");
            json::object& r = rv.as_object();

            // Look for match
            auto mit = r.find("match");
            CHECK(
                mit != r.end(),
                "rewrite rule should have a match field");
            CHECK(
                mit->value().is_object() || mit->value().is_string(),
                "rewrite match field is not an object");
            if (!url_match(mit->value(), u))
                continue;

            // Apply replacement rule
            auto uit = r.find("url");
            CHECK(
                uit != r.end(),
                "rewrite rule should have a url field");
            CHECK(
                uit->value().is_object() ||
                uit->value().is_string(),
                "url field must be an object or string");

            if (uit->value().is_string())
            {
                json::string& uo = uit->value().as_string();
                auto ru1 = urls::parse_uri(uo);
                CHECK(ru1, "url " << uo.c_str() << " is invalid");
                u = *ru;
            }
            else
            {
                json::object& uo = uit->value().as_object();
                auto it = uo.find("protocol");
                if (it != uo.end())
                {
                    CHECK(
                        it->value().is_string(),
                        "protocol field should be a string");
                    u.set_scheme(it->value().as_string());
                }

                it = uo.find("authority");
                if (it != uo.end())
                {
                    CHECK(
                        it->value().is_string(),
                        "authority field should be a string");
                    u.set_encoded_authority(
                        it->value().as_string().subview());
                }

                it = uo.find("username");
                if (it == uo.end())
                    it = uo.find("user");
                if (it != uo.end())
                {
                    CHECK(
                        it->value().is_string(),
                        "username field should be a string");
                    u.set_encoded_user(
                        it->value().as_string().subview());
                }

                it = uo.find("password");
                if (it != uo.end())
                {
                    CHECK(
                        it->value().is_string(),
                        "password field should be a string");
                    u.set_encoded_password(
                        it->value().as_string().subview());
                }

                it = uo.find("userinfo");
                if (it != uo.end())
                {
                    CHECK(
                        it->value().is_string(),
                        "userinfo field should be a string");
                    u.set_encoded_userinfo(
                        it->value().as_string().subview());
                }

                it = uo.find("host");
                if (it != uo.end())
                {
                    CHECK(
                        it->value().is_string(),
                        "host field should be a string");
                    u.set_encoded_host(
                        it->value().as_string().subview());
                }

                it = uo.find("port");
                if (it != uo.end())
                {
                    CHECK(
                        it->value().is_string(),
                        "port field should be a string");
                    u.set_port(
                        it->value().as_string().subview());
                }

                it = uo.find("path");
                if (it == uo.end())
                    it = uo.find("pathname");
                if (it != uo.end())
                {
                    CHECK(
                        it->value().is_string(),
                        "path field should be a string");
                    u.set_encoded_path(
                        it->value().as_string().subview());
                }

                it = uo.find("query");
                if (it == uo.end())
                    it = uo.find("search");
                if (it != uo.end())
                {
                    CHECK(
                        it->value().is_string(),
                        "query field should be a string");
                    u.set_encoded_query(
                        it->value().as_string().subview());
                }

                it = uo.find("fragment");
                if (it == uo.end())
                    it = uo.find("hash");
                if (it != uo.end())
                {
                    CHECK(
                        it->value().is_string(),
                        "fragment field should be a string");
                    u.set_encoded_fragment(
                        it->value().as_string().subview());
                }
            }
        }
    }

    // Determine which browser should handle the url
    auto hsit = o.find("handlers");
    if (hsit != o.end())
    {
        CHECK(
            hsit->value().is_array(),
            "handler rules should be an array");
        auto& hs = hsit->value().as_array();
        for (auto& hv: hs)
        {
            CHECK(
                hv.is_object(),
                "individual handlers should be an object");
            json::object& h = hv.as_object();

            auto mit = h.find("match");
            CHECK(
                mit != h.end(),
                "handle rule should have a match field");
            CHECK(
                mit->value().is_string() || mit->value().is_array(),
                "handle match field must be an array or a string");

            auto hbit = h.find("browser");
            CHECK(
                hbit != h.end(),
                "handle rule should have a browser field");
            CHECK(
                hbit->value().is_string(),
                "browser field is not a string");

            // Look for match and change browser
            if (url_match(mit->value(), u))
            {
                browser = hbit->value().as_string().subview();
                break;
            }
        }
    }

    // Print command finicky would run
    std::cout << "\"" << browser.c_str() << "\" " << u << '\n';

    return EXIT_SUCCESS;
}