Skip to content

Commit

Permalink
add initial parser_url_pattern method
Browse files Browse the repository at this point in the history
  • Loading branch information
anonrig committed Dec 4, 2024
1 parent 5967d62 commit 18ef1a6
Show file tree
Hide file tree
Showing 4 changed files with 566 additions and 74 deletions.
11 changes: 4 additions & 7 deletions include/ada/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,10 @@
#ifndef ADA_PARSER_H
#define ADA_PARSER_H

#include <optional>
#include <string_view>
#include <variant>

#include "ada/encoding_type.h"
#include "ada/expected.h"
#include "ada/state.h"
#include "ada/url_pattern.h"

/**
Expand All @@ -33,7 +30,7 @@ namespace ada::parser {
* parameter that can be used to resolve relative URLs. If the base_url is
* provided, the user_input is resolved against the base_url.
*/
template <typename result_type = ada::url_aggregator>
template <typename result_type = url_aggregator>
result_type parse_url(std::string_view user_input,
const result_type* base_url = nullptr);

Expand All @@ -42,14 +39,14 @@ extern template url_aggregator parse_url<url_aggregator>(
extern template url parse_url<url>(std::string_view user_input,
const url* base_url);

template <typename result_type = ada::url_aggregator, bool store_values = true>
template <typename result_type = url_aggregator, bool store_values = true>
result_type parse_url_impl(std::string_view user_input,
const result_type* base_url = nullptr);

tl::expected<ada::URLPattern, ada::url_pattern::errors> parse_url_pattern(
tl::expected<URLPattern, url_pattern::errors> parse_url_pattern(
std::variant<std::string_view, URLPattern::Init> input,
const std::string_view* base_url = nullptr,
const ada::URLPattern::Options* options = nullptr);
const URLPattern::Options* options = nullptr);

extern template url_aggregator parse_url_impl<url_aggregator>(
std::string_view user_input, const url_aggregator* base_url);
Expand Down
26 changes: 17 additions & 9 deletions include/ada/url_aggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
#include <string>
#include <string_view>

#include "ada/url_pattern.h"
#include "ada/common_defs.h"
#include "ada/url_base.h"
#include "ada/url_components.h"
#include "ada/parser.h"

namespace ada {

Expand Down Expand Up @@ -208,15 +210,21 @@ struct url_aggregator : url_base {
inline void clear_search() override;

private:
friend ada::url_aggregator ada::parser::parse_url<ada::url_aggregator>(
std::string_view, const ada::url_aggregator *);
friend void ada::helpers::strip_trailing_spaces_from_opaque_path<
ada::url_aggregator>(ada::url_aggregator &url) noexcept;
friend ada::url_aggregator ada::parser::parse_url_impl<
ada::url_aggregator, true>(std::string_view, const ada::url_aggregator *);
friend ada::url_aggregator
ada::parser::parse_url_impl<ada::url_aggregator, false>(
std::string_view, const ada::url_aggregator *);
// helper methods
friend void helpers::strip_trailing_spaces_from_opaque_path<url_aggregator>(
url_aggregator &url) noexcept;
// parse_url methods
friend url_aggregator parser::parse_url<url_aggregator>(
std::string_view, const url_aggregator *);

friend url_aggregator parser::parse_url_impl<url_aggregator, true>(
std::string_view, const url_aggregator *);
friend url_aggregator parser::parse_url_impl<url_aggregator, false>(
std::string_view, const url_aggregator *);
// url_pattern methods
friend tl::expected<URLPattern, url_pattern::errors> parse_url_pattern(
std::variant<std::string_view, URLPattern::Init> input,
const std::string_view *base_url, const URLPattern::Options *options);

std::string buffer{};
url_components components{};
Expand Down
138 changes: 99 additions & 39 deletions include/ada/url_pattern.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,89 @@
#ifndef ADA_URL_PATTERN_H
#define ADA_URL_PATTERN_H

#include "ada/expected.h"

#include <string>
#include <unordered_map>
#include <variant>

namespace ada {

namespace url_pattern {
enum class errors : uint8_t { type_error };
} // namespace url_pattern

// URLPattern is a Web Platform standard API for matching URLs against a
// pattern syntax (think of it as a regular expression for URLs). It is
// defined in https://wicg.github.io/urlpattern.
// More information about the URL Pattern syntax can be found at
// https://developer.mozilla.org/en-US/docs/Web/API/URL_Pattern_API
class URLPattern {
public:
// A structure providing matching patterns for individual components
// of a URL. When a URLPattern is created, or when a URLPattern is
// used to match or test against a URL, the input can be given as
// either a string or a URLPatternInit struct. If a string is given,
// it will be parsed to create a URLPatternInit. The URLPatternInit
// API is defined as part of the URLPattern specification.
struct Init {
// @see https://urlpattern.spec.whatwg.org/#process-a-urlpatterninit
static tl::expected<Init, url_pattern::errors> process(
Init init, std::string type, std::optional<std::string_view> protocol,
std::optional<std::string_view> username,
std::optional<std::string_view> password,
std::optional<std::string_view> hostname,
std::optional<std::string_view> port,
std::optional<std::string_view> pathname,
std::optional<std::string_view> search,
std::optional<std::string_view> hash);

// @see https://urlpattern.spec.whatwg.org/#process-protocol-for-init
static tl::expected<std::string, url_pattern::errors> process_protocol(
std::string_view value, std::string_view type);

// @see https://urlpattern.spec.whatwg.org/#process-username-for-init
static tl::expected<std::string, url_pattern::errors> process_username(
std::string_view value, std::string_view type);

// @see https://urlpattern.spec.whatwg.org/#process-password-for-init
static tl::expected<std::string, url_pattern::errors> process_password(
std::string_view value, std::string_view type);

// @see https://urlpattern.spec.whatwg.org/#process-hostname-for-init
static tl::expected<std::string, url_pattern::errors> process_hostname(
std::string_view value, std::string_view type);

// @see https://urlpattern.spec.whatwg.org/#process-port-for-init
static tl::expected<std::string, url_pattern::errors> process_port(
std::string_view port, std::string_view protocol,
std::string_view type);

// @see https://urlpattern.spec.whatwg.org/#process-pathname-for-init
static tl::expected<std::string, url_pattern::errors> process_pathname(
std::string_view value, std::string_view protocol,
std::string_view type);

// @see https://urlpattern.spec.whatwg.org/#process-search-for-init
static tl::expected<std::string, url_pattern::errors> process_search(
std::string_view value, std::string_view type);

// @see https://urlpattern.spec.whatwg.org/#process-hash-for-init
static tl::expected<std::string, url_pattern::errors> process_hash(
std::string_view value, std::string_view type);

std::optional<std::string> protocol;
std::optional<std::string> username;
std::optional<std::string> password;
std::optional<std::string> hostname;
std::optional<std::string> port;
std::optional<std::string> pathname;
std::optional<std::string> search;
std::optional<std::string> hash;

std::optional<std::string> base_url;
};

class Component {
public:
explicit Component(std::string_view pattern, std::string_view regex,
Expand All @@ -43,26 +113,7 @@ class URLPattern {
bool has_regexp_groups_ = false;
};

// A structure providing matching patterns for individual components
// of a URL. When a URLPattern is created, or when a URLPattern is
// used to match or test against a URL, the input can be given as
// either a string or a URLPatternInit struct. If a string is given,
// it will be parsed to create a URLPatternInit. The URLPatternInit
// API is defined as part of the URLPattern specification.
struct Init {
std::optional<std::string> protocol;
std::optional<std::string> username;
std::optional<std::string> password;
std::optional<std::string> hostname;
std::optional<std::string> port;
std::optional<std::string> pathname;
std::optional<std::string> search;
std::optional<std::string> hash;

std::optional<std::string> base_url;
};

using Input = std::variant<std::string, Init>;
using Input = std::variant<std::string, URLPattern::Init>;

// A struct providing the URLPattern matching results for a single
// URL component. The URLPatternComponentResult is only ever used
Expand Down Expand Up @@ -140,18 +191,16 @@ class URLPattern {

namespace url_pattern {

enum class errors { type_error };

// @see https://urlpattern.spec.whatwg.org/#tokens
struct Token {
// @see https://urlpattern.spec.whatwg.org/#tokenize-policy
enum Policy {
enum class Policy {
STRICT,
LENIENT,
};

// @see https://urlpattern.spec.whatwg.org/#token
enum Type {
enum class Type {
INVALID_CHAR, // 0
OPEN, // 1
CLOSE, // 2
Expand All @@ -168,7 +217,7 @@ struct Token {
// @see https://urlpattern.spec.whatwg.org/#tokenizer
struct Tokenizer {
explicit Tokenizer(std::string_view input, Token::Policy policy)
: input(input), policy(std::move(policy));
: input(input), policy(policy) {}

// has an associated input, a pattern string, initially the empty string.
std::string input{};
Expand All @@ -191,7 +240,7 @@ struct ConstructorStringParser {

private:
// @see https://urlpattern.spec.whatwg.org/#constructor-string-parser-state
enum State {
enum class State {
INIT,
PROTOCOL,
AUTHORITY,
Expand Down Expand Up @@ -225,48 +274,59 @@ struct ConstructorStringParser {
// has an associated protocol matches a special scheme flag, a boolean,
// initially set to false.
bool protocol_matches_a_special_scheme_flag = false;
// has an associated state, a string, initially set to "init". It must be one
// of the following:
State state = INIT;
// has an associated state, a string, initially set to "init".
State state = State::INIT;
};

// @see https://urlpattern.spec.whatwg.org/#canonicalize-a-protocol
std::optional<std::string> canonicalize_protocol(std::string_view input);
tl::expected<std::string, errors> canonicalize_protocol(std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-a-username
std::optional<std::string> canonicalize_username(std::string_view input);
tl::expected<std::string, errors> canonicalize_username(std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-a-password
std::optional<std::string> canonicalize_password(std::string_view input);
tl::expected<std::string, errors> canonicalize_password(std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-a-password
std::optional<std::string> canonicalize_hostname(std::string_view input);
tl::expected<std::string, errors> canonicalize_hostname(std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-an-ipv6-hostname
std::optional<std::string> canonicalize_ipv6_hostname(std::string_view input);
tl::expected<std::string, errors> canonicalize_ipv6_hostname(
std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-a-port
std::optional<std::string> canonicalize_port(
tl::expected<std::string, errors> canonicalize_port(
std::string_view input, std::string_view protocol = "fake");

// @see https://wicg.github.io/urlpattern/#canonicalize-a-pathname
std::optional<std::string> canonicalize_pathname(std::string_view input);
tl::expected<std::string, errors> canonicalize_pathname(std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-an-opaque-pathname
std::optional<std::string> canonicalize_opaque_pathname(std::string_view input);
tl::expected<std::string, errors> canonicalize_opaque_pathname(
std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-a-search
std::optional<std::string> canonicalize_search(std::string_view input);
tl::expected<std::string, errors> canonicalize_search(std::string_view input);

// @see https://wicg.github.io/urlpattern/#canonicalize-a-hash
std::optional<std::string> canonicalize_hash(std::string_view input);
tl::expected<std::string, errors> canonicalize_hash(std::string_view input);

// @see https://urlpattern.spec.whatwg.org/#parse-a-constructor-string
URLPattern::Init parse_constructor_string(std::string_view input);

// @see https://urlpattern.spec.whatwg.org/#tokenize
std::string tokenize(std::string_view input, Token::Policy policy);

// @see https://urlpattern.spec.whatwg.org/#process-a-base-url-string
std::string process_base_url_string(std::string_view input,
std::string_view type);

// @see https://urlpattern.spec.whatwg.org/#escape-a-pattern-string
std::string escape_pattern(std::string_view input);

// @see https://urlpattern.spec.whatwg.org/#is-an-absolute-pathname
bool is_absolute_pathname(std::string_view input, std::string_view type);

} // namespace url_pattern

} // namespace ada
Expand Down
Loading

0 comments on commit 18ef1a6

Please sign in to comment.