iri_string/parser/
validate.rs

1//! Validating parsers for non-trusted (possibly invalid) input.
2
3mod authority;
4mod path;
5
6use crate::parser::char;
7use crate::parser::str::{
8    find_split, find_split2_hole, find_split_hole, satisfy_chars_with_pct_encoded,
9};
10use crate::spec::Spec;
11use crate::validate::{Error, ErrorKind};
12
13pub(crate) use self::authority::{
14    validate_authority, validate_host, validate_reg_name, validate_userinfo,
15};
16pub(crate) use self::path::{validate_path, validate_path_segment};
17use self::path::{
18    validate_path_abempty, validate_path_absolute_authority_absent,
19    validate_path_relative_authority_absent,
20};
21
22/// Returns `Ok(_)` if the string matches `scheme`.
23pub(crate) fn validate_scheme(i: &str) -> Result<(), Error> {
24    let bytes = i.as_bytes();
25    if !i.is_empty()
26        && bytes[0].is_ascii_alphabetic()
27        && bytes[1..]
28            .iter()
29            .all(|&b| b.is_ascii() && char::is_ascii_scheme_continue(b))
30    {
31        Ok(())
32    } else {
33        Err(Error::with_kind(ErrorKind::InvalidScheme))
34    }
35}
36
37/// Returns `Ok(_)` if the string matches `query` or `iquery`.
38pub(crate) fn validate_query<S: Spec>(i: &str) -> Result<(), Error> {
39    let is_valid =
40        satisfy_chars_with_pct_encoded(i, char::is_ascii_frag_query, char::is_nonascii_query::<S>);
41    if is_valid {
42        Ok(())
43    } else {
44        Err(Error::with_kind(ErrorKind::InvalidQuery))
45    }
46}
47
48/// Returns `Ok(_)` if the string matches `authority path-abempty` rule sequence.
49fn validate_authority_path_abempty<S: Spec>(i: &str) -> Result<(), Error> {
50    let (maybe_authority, maybe_path) = match find_split(i, b'/') {
51        Some(v) => v,
52        None => (i, ""),
53    };
54    validate_authority::<S>(maybe_authority)?;
55    validate_path_abempty::<S>(maybe_path)
56}
57
58/// Returns `Ok(_)` if the string matches `URI`/`IRI` rules.
59#[inline]
60pub(crate) fn validate_uri<S: Spec>(i: &str) -> Result<(), Error> {
61    validate_uri_reference_common::<S>(i, UriReferenceRule::Absolute)
62}
63
64/// Returns `Ok(_)` if the string matches `URI-reference`/`IRI-reference` rules.
65#[inline]
66pub(crate) fn validate_uri_reference<S: Spec>(i: &str) -> Result<(), Error> {
67    validate_uri_reference_common::<S>(i, UriReferenceRule::Any)
68}
69
70/// Returns `Ok(_)` if the string matches `absolute-URI`/`absolute-IRI` rules.
71#[inline]
72pub(crate) fn validate_absolute_uri<S: Spec>(i: &str) -> Result<(), Error> {
73    validate_uri_reference_common::<S>(i, UriReferenceRule::AbsoluteWithoutFragment)
74}
75
76/// Syntax rule for URI/IRI references.
77#[derive(Clone, Copy, PartialEq, Eq, Hash)]
78enum UriReferenceRule {
79    /// `URI` and `IRI`.
80    ///
81    /// This can have a fragment.
82    Absolute,
83    /// `absolute-URI` and `absolute-IRI`.
84    ///
85    /// This cannot have a fragment.
86    AbsoluteWithoutFragment,
87    /// `URI-reference` and `IRI-reference`.
88    ///
89    /// This can be relative.
90    Any,
91}
92
93impl UriReferenceRule {
94    /// Returns `true` is the relative reference is allowed.
95    #[inline]
96    #[must_use]
97    fn is_relative_allowed(self) -> bool {
98        self == Self::Any
99    }
100
101    /// Returns `true` is the fragment part is allowed.
102    #[inline]
103    #[must_use]
104    fn is_fragment_allowed(self) -> bool {
105        matches!(self, Self::Absolute | Self::Any)
106    }
107}
108
109/// Returns `Ok(_)` if the string matches `URI-reference`/`IRI-reference` rules.
110fn validate_uri_reference_common<S: Spec>(
111    i: &str,
112    ref_rule: UriReferenceRule,
113) -> Result<(), Error> {
114    // Validate `scheme ":"`.
115    let (i, _scheme) = match find_split_hole(i, b':') {
116        None => {
117            if ref_rule.is_relative_allowed() {
118                return validate_relative_ref::<S>(i);
119            } else {
120                return Err(Error::with_kind(ErrorKind::UnexpectedRelative));
121            }
122        }
123        Some((maybe_scheme, rest)) => {
124            if validate_scheme(maybe_scheme).is_err() {
125                // The string before the first colon is not a scheme.
126                // Falling back to `relative-ref` parsing.
127                if ref_rule.is_relative_allowed() {
128                    return validate_relative_ref::<S>(i);
129                } else {
130                    return Err(Error::with_kind(ErrorKind::InvalidScheme));
131                }
132            }
133            (rest, maybe_scheme)
134        }
135    };
136
137    // Validate `hier-part`.
138    let after_path = match i.strip_prefix("//") {
139        Some(i) => {
140            let (maybe_authority_path, after_path) = match find_split2_hole(i, b'?', b'#') {
141                Some((maybe_authority_path, c, rest)) => (maybe_authority_path, Some((c, rest))),
142                None => (i, None),
143            };
144            validate_authority_path_abempty::<S>(maybe_authority_path)?;
145            after_path
146        }
147        None => {
148            let (maybe_path, after_path) = match find_split2_hole(i, b'?', b'#') {
149                Some((maybe_path, c, rest)) => (maybe_path, Some((c, rest))),
150                None => (i, None),
151            };
152            // Authority is absent.
153            validate_path_absolute_authority_absent::<S>(maybe_path)?;
154            after_path
155        }
156    };
157
158    // Validate `[ "?" query ] [ "#" fragment ]`.
159    if let Some((first, rest)) = after_path {
160        validate_after_path::<S>(first, rest, ref_rule.is_fragment_allowed())?;
161    }
162    Ok(())
163}
164
165/// Returns `Ok(_)` if the string matches `relative-ref`/`irelative-ref` rules.
166pub(crate) fn validate_relative_ref<S: Spec>(i: &str) -> Result<(), Error> {
167    // Validate `relative-part`.
168    let after_path = match i.strip_prefix("//") {
169        Some(i) => {
170            let (maybe_authority_path, after_path) = match find_split2_hole(i, b'?', b'#') {
171                Some((maybe_authority_path, c, rest)) => (maybe_authority_path, Some((c, rest))),
172                None => (i, None),
173            };
174            validate_authority_path_abempty::<S>(maybe_authority_path)?;
175            after_path
176        }
177        None => {
178            let (maybe_path, after_path) = match find_split2_hole(i, b'?', b'#') {
179                Some((maybe_path, c, rest)) => (maybe_path, Some((c, rest))),
180                None => (i, None),
181            };
182            // Authority is absent.
183            validate_path_relative_authority_absent::<S>(maybe_path)?;
184            after_path
185        }
186    };
187
188    // Validate `[ "?" query ] [ "#" fragment ]`.
189    if let Some((first, rest)) = after_path {
190        validate_after_path::<S>(first, rest, true)?;
191    }
192    Ok(())
193}
194
195/// Returns `Ok(_)` if the string matches `[ "?" query ] [ "#" fragment ]` (or IRI version).
196fn validate_after_path<S: Spec>(first: u8, rest: &str, accept_fragment: bool) -> Result<(), Error> {
197    let (maybe_query, maybe_fragment) = if first == b'?' {
198        match find_split_hole(rest, b'#') {
199            Some(v) => v,
200            None => (rest, ""),
201        }
202    } else {
203        debug_assert_eq!(first, b'#');
204        ("", rest)
205    };
206    validate_query::<S>(maybe_query)?;
207    if !accept_fragment && !maybe_fragment.is_empty() {
208        return Err(Error::with_kind(ErrorKind::UnexpectedFragment));
209    }
210    validate_fragment::<S>(maybe_fragment)
211}
212
213/// Returns `Ok(_)` if the string matches `fragment`/`ifragment` rules.
214pub(crate) fn validate_fragment<S: Spec>(i: &str) -> Result<(), Error> {
215    let is_valid = satisfy_chars_with_pct_encoded(
216        i,
217        char::is_ascii_frag_query,
218        char::is_nonascii_fragment::<S>,
219    );
220    if is_valid {
221        Ok(())
222    } else {
223        Err(Error::with_kind(ErrorKind::InvalidFragment))
224    }
225}