Skip to content

Commit 893b212

Browse files
committed
make some improvements
1 parent cded9e7 commit 893b212

2 files changed

Lines changed: 55 additions & 32 deletions

File tree

src/lib.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,27 @@ impl Url {
233233
}
234234
}
235235

236+
/// Parse `input` relative to an already-parsed `base` URL.
237+
///
238+
/// This is more efficient than [`Url::parse`] with a base string because the
239+
/// base URL is **not** re-parsed — use this in hot loops where the same base
240+
/// is reused across many inputs (e.g. the WPT URL benchmark pattern).
241+
///
242+
/// Returns `None` when either `base` is invalid or `input` cannot be resolved.
243+
#[must_use]
244+
pub fn parse_with_base<Input>(input: Input, base: &Url) -> Option<Self>
245+
where
246+
Input: AsRef<str>,
247+
{
248+
if !base.is_valid {
249+
return None;
250+
}
251+
match parser::parse_url(input.as_ref(), Some(base)) {
252+
Some(u) if u.is_valid => Some(u),
253+
_ => None,
254+
}
255+
}
256+
236257
/// Returns `true` when `input` can be parsed as a valid URL.
237258
///
238259
/// When `base` is `None` this uses a zero-allocation fast-path validator

src/validator.rs

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -102,49 +102,39 @@ fn validate_absolute_raw(b: &[u8]) -> bool {
102102
return false;
103103
}
104104

105-
#[allow(unused_assignments)]
106-
let mut scheme_end_in_b = 0usize;
105+
// Simultaneously scan scheme chars and build the lower-case classification
106+
// buffer in a single pass — no separate count() or second filter iteration.
107+
// All special schemes (http, https, ftp, ws, wss, file) are ≤5 significant
108+
// chars; anything longer is definitively non-special.
109+
let mut scheme_lower = [0u8; 5];
110+
let mut scheme_len = 0usize; // significant (non-tab/newline) char count
111+
107112
loop {
108113
let p = src.peek_pos();
109114
if p >= b.len() {
110115
return false;
111116
}
112117
let c = b[p];
113118
if c == b':' {
114-
scheme_end_in_b = p;
115119
src.advance_past(p);
116120
break;
117121
}
118122
if !is_alnum_plus(c) {
119123
return false;
120124
}
125+
// Copy into the lower-case buffer only while ≤5 chars (no branch after that)
126+
if scheme_len < 5 {
127+
scheme_lower[scheme_len] = c | 0x20;
128+
}
129+
scheme_len += 1;
121130
src.advance_past(p);
122131
}
123132

124-
// Lowercase scheme into a small stack buffer for special-scheme classification.
125-
// All special schemes (http, https, ftp, ws, wss, file) are ≤5 significant
126-
// chars. Schemes longer than that are definitively non-special — skip the copy.
127-
let raw_scheme = &b[..scheme_end_in_b];
128-
let scheme_type = {
129-
let significant_len = raw_scheme
130-
.iter()
131-
.filter(|&&c| !matches!(c, b'\t' | b'\n' | b'\r'))
132-
.count();
133-
if significant_len <= 5 {
134-
let mut scheme_lower = [0u8; 5];
135-
let mut scheme_len = 0usize;
136-
for &c in raw_scheme
137-
.iter()
138-
.filter(|&&c| !matches!(c, b'\t' | b'\n' | b'\r'))
139-
{
140-
scheme_lower[scheme_len] = c | 0x20;
141-
scheme_len += 1;
142-
}
143-
let scheme_str = unsafe { core::str::from_utf8_unchecked(&scheme_lower[..scheme_len]) };
144-
get_scheme_type(scheme_str)
145-
} else {
146-
SchemeType::NotSpecial
147-
}
133+
let scheme_type = if scheme_len <= 5 {
134+
let scheme_str = unsafe { core::str::from_utf8_unchecked(&scheme_lower[..scheme_len]) };
135+
get_scheme_type(scheme_str)
136+
} else {
137+
SchemeType::NotSpecial
148138
};
149139

150140
// Rest = everything after ':'
@@ -246,23 +236,35 @@ fn validate_authority_and_rest_raw(rest: &[u8], is_special: bool) -> bool {
246236
// ============================================================
247237

248238
fn validate_host_and_port_raw(host_port: &[u8], is_special: bool) -> bool {
249-
// Filter out tabs/newlines first (stack — no heap).
250-
// Since host_port is typically very short (≤ 253 bytes), a 256-byte stack
251-
// buffer covers virtually all realistic inputs.
239+
// Fast path (overwhelmingly common): no tabs/newlines present.
240+
// Validate directly on the original slice — zero copies, zero stack writes.
241+
if !host_port
242+
.iter()
243+
.any(|&c| matches!(c, b'\t' | b'\n' | b'\r'))
244+
{
245+
return validate_hp(host_port, is_special);
246+
}
247+
248+
// Rare path: strip tabs/newlines into a small stack buffer, then validate.
249+
// Host_port is at most 253 bytes (DNS limit) + port, so 256 covers everything.
252250
let mut buf = [0u8; 256];
253251
let mut len = 0usize;
254252
for &c in host_port {
255253
if matches!(c, b'\t' | b'\n' | b'\r') {
256254
continue;
257255
}
258256
if len >= 256 {
259-
return false; // pathologically long host — invalid
257+
return false; // pathologically long — invalid
260258
}
261259
buf[len] = c;
262260
len += 1;
263261
}
264-
let hp = &buf[..len];
262+
validate_hp(&buf[..len], is_special)
263+
}
265264

265+
/// Validate an already-clean (no tabs/newlines) host[:port] slice.
266+
#[inline]
267+
fn validate_hp(hp: &[u8], is_special: bool) -> bool {
266268
if hp.is_empty() {
267269
return !is_special;
268270
}

0 commit comments

Comments
 (0)