Title: | A Fast 'WHATWG' Compliant URL Parser |
---|---|
Description: | A wrapper for 'ada-url', a 'WHATWG' compliant and fast URL parser written in modern 'C++'. Also contains auxiliary functions such as a public suffix extractor. |
Authors: | David Schoch [aut, cre] , Chung-hong Chan [aut] , Yagiz Nizipli [ctb, cph] (author of ada-url : <https://github.com/ada-url/ada>), Daniel Lemire [ctb, cph] (author of ada-url : <https://github.com/ada-url/ada>) |
Maintainer: | David Schoch <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.3.3 |
Built: | 2024-11-22 05:38:10 UTC |
Source: | https://github.com/gesistsa/adar |
These functions clears a specific component of URL.
ada_clear_port(url, decode = TRUE) ada_clear_hash(url, decode = TRUE) ada_clear_search(url, decode = TRUE)
ada_clear_port(url, decode = TRUE) ada_clear_hash(url, decode = TRUE) ada_clear_search(url, decode = TRUE)
url |
character. one or more URL to be parsed |
decode |
logical. Whether to decode the output (see |
character, NA
if not a valid URL
url <- "https://user_1:[email protected]:8080/dir/../api?q=1#frag" ada_clear_port(url) ada_clear_hash(url) ada_clear_search(url)
url <- "https://user_1:[email protected]:8080/dir/../api?q=1#frag" ada_clear_port(url) ada_clear_hash(url) ada_clear_search(url)
These functions get a specific component of URL.
ada_get_href(url, decode = TRUE) ada_get_username(url, decode = TRUE) ada_get_password(url, decode = TRUE) ada_get_port(url, decode = TRUE) ada_get_hash(url, decode = TRUE) ada_get_host(url, decode = TRUE) ada_get_hostname(url, decode = TRUE) ada_get_pathname(url, decode = TRUE) ada_get_search(url, decode = TRUE) ada_get_protocol(url, decode = TRUE) ada_get_domain(url, decode = TRUE) ada_get_basename(url)
ada_get_href(url, decode = TRUE) ada_get_username(url, decode = TRUE) ada_get_password(url, decode = TRUE) ada_get_port(url, decode = TRUE) ada_get_hash(url, decode = TRUE) ada_get_host(url, decode = TRUE) ada_get_hostname(url, decode = TRUE) ada_get_pathname(url, decode = TRUE) ada_get_search(url, decode = TRUE) ada_get_protocol(url, decode = TRUE) ada_get_domain(url, decode = TRUE) ada_get_basename(url)
url |
character. one or more URL to be parsed |
decode |
logical. Whether to decode the output (see |
character, NA
if not a valid URL
url <- "https://user_1:[email protected]:8080/dir/../api?q=1#frag" ada_get_href(url) ada_get_username(url) ada_get_password(url) ada_get_port(url) ada_get_hash(url) ada_get_host(url) ada_get_hostname(url) ada_get_pathname(url) ada_get_search(url) ada_get_protocol(url) ada_get_domain(url) ada_get_basename(url) ## these functions are vectorized urls <- c("http://www.google.com", "http://www.google.com:80", "noturl") ada_get_port(urls)
url <- "https://user_1:[email protected]:8080/dir/../api?q=1#frag" ada_get_href(url) ada_get_username(url) ada_get_password(url) ada_get_port(url) ada_get_hash(url) ada_get_host(url) ada_get_hostname(url) ada_get_pathname(url) ada_get_search(url) ada_get_protocol(url) ada_get_domain(url) ada_get_basename(url) ## these functions are vectorized urls <- c("http://www.google.com", "http://www.google.com:80", "noturl") ada_get_port(urls)
These functions check if URL has a certain component.
ada_has_credentials(url) ada_has_empty_hostname(url) ada_has_hostname(url) ada_has_non_empty_username(url) ada_has_non_empty_password(url) ada_has_port(url) ada_has_hash(url) ada_has_search(url)
ada_has_credentials(url) ada_has_empty_hostname(url) ada_has_hostname(url) ada_has_non_empty_username(url) ada_has_non_empty_password(url) ada_has_port(url) ada_has_hash(url) ada_has_search(url)
url |
character. one or more URL to be parsed |
logical, NA
if not a valid URL.
url <- c("https://user_1:[email protected]:8080/dir/../api?q=1#frag") ada_has_credentials(url) ada_has_empty_hostname(url) ada_has_hostname(url) ada_has_non_empty_username(url) ada_has_non_empty_password(url) ada_has_port(url) ada_has_hash(url) ada_has_search(url) ## these functions are vectorized urls <- c("http://www.google.com", "http://www.google.com:80", "noturl") ada_has_port(urls)
url <- c("https://user_1:[email protected]:8080/dir/../api?q=1#frag") ada_has_credentials(url) ada_has_empty_hostname(url) ada_has_hostname(url) ada_has_non_empty_username(url) ada_has_non_empty_password(url) ada_has_port(url) ada_has_hash(url) ada_has_search(url) ## these functions are vectorized urls <- c("http://www.google.com", "http://www.google.com:80", "noturl") ada_has_port(urls)
These functions set a specific component of URL.
ada_set_href(url, input, decode = TRUE) ada_set_username(url, input, decode = TRUE) ada_set_password(url, input, decode = TRUE) ada_set_port(url, input, decode = TRUE) ada_set_host(url, input, decode = TRUE) ada_set_hostname(url, input, decode = TRUE) ada_set_pathname(url, input, decode = TRUE) ada_set_protocol(url, input, decode = TRUE) ada_set_search(url, input, decode = TRUE) ada_set_hash(url, input, decode = TRUE)
ada_set_href(url, input, decode = TRUE) ada_set_username(url, input, decode = TRUE) ada_set_password(url, input, decode = TRUE) ada_set_port(url, input, decode = TRUE) ada_set_host(url, input, decode = TRUE) ada_set_hostname(url, input, decode = TRUE) ada_set_pathname(url, input, decode = TRUE) ada_set_protocol(url, input, decode = TRUE) ada_set_search(url, input, decode = TRUE) ada_set_hash(url, input, decode = TRUE)
url |
character. one or more URL to be parsed |
input |
character. containing new component for URL. Vector of length 1 or same length as url. |
decode |
logical. Whether to decode the output (see |
character, NA
if not a valid URL
url <- "https://user_1:[email protected]:8080/dir/../api?q=1#frag" ada_set_href(url, "https://google.de") ada_set_username(url, "user_2") ada_set_password(url, "hunter2") ada_set_port(url, "1234") ada_set_hash(url, "#section1") ada_set_host(url, "example.de") ada_set_hostname(url, "example.de") ada_set_pathname(url, "path/") ada_set_search(url, "q=2") ada_set_protocol(url, "ws:")
url <- "https://user_1:[email protected]:8080/dir/../api?q=1#frag" ada_set_href(url, "https://google.de") ada_set_username(url, "user_2") ada_set_password(url, "hunter2") ada_set_port(url, "1234") ada_set_hash(url, "#section1") ada_set_host(url, "example.de") ada_set_hostname(url, "example.de") ada_set_pathname(url, "path/") ada_set_search(url, "q=2") ada_set_protocol(url, "ws:")
Use ada-url to parse a url
ada_url_parse(url, decode = TRUE)
ada_url_parse(url, decode = TRUE)
url |
character. one or more URL to be parsed |
decode |
logical. Whether to decode the output (see |
For details on the returned components refer to the introductory vignette.
A data frame of the url components: href, protocol, username, password, host, hostname, port, pathname, search, and hash
ada_url_parse("https://user_1:[email protected]:8080/dir/../api?q=1#frag")
ada_url_parse("https://user_1:[email protected]:8080/dir/../api?q=1#frag")
Extract the public suffix from a vector of domains or hostnames
public_suffix(domains)
public_suffix(domains)
domains |
character. vector of domains or hostnames |
public suffixes of domains as character vector
public_suffix("http://example.com") # doesn't work for general URLs public_suffix("http://example.com/path/to/file") # extracting hostname first does the trick public_suffix(ada_get_hostname("http://example.com/path/to/file"))
public_suffix("http://example.com") # doesn't work for general URLs public_suffix("http://example.com/path/to/file") # extracting hostname first does the trick public_suffix(ada_get_hostname("http://example.com/path/to/file"))
Similar to utils::URLdecode
url_decode2(url)
url_decode2(url)
url |
a character vector |
precent decoded URLs as character vector
url_decode2("Hello%20World")
url_decode2("Hello%20World")