make some improvements

anonrig · anonrig · commit 893b212444c3 · 2026-03-29T13:20:36.000-04:00
diff --git a/src/lib.rs b/src/lib.rs
@@ -233,6 +233,27 @@ impl Url {
         }
     }
 
+    /// Parse `input` relative to an already-parsed `base` URL.
+    ///
+    /// This is more efficient than [`Url::parse`] with a base string because the
+    /// base URL is **not** re-parsed — use this in hot loops where the same base
+    /// is reused across many inputs (e.g. the WPT URL benchmark pattern).
+    ///
+    /// Returns `None` when either `base` is invalid or `input` cannot be resolved.
+    #[must_use]
+    pub fn parse_with_base<Input>(input: Input, base: &Url) -> Option<Self>
+    where
+        Input: AsRef<str>,
+    {
+        if !base.is_valid {
+            return None;
+        }
+        match parser::parse_url(input.as_ref(), Some(base)) {
+            Some(u) if u.is_valid => Some(u),
+            _ => None,
+        }
+    }
+
     /// Returns `true` when `input` can be parsed as a valid URL.
     ///
     /// When `base` is `None` this uses a zero-allocation fast-path validator
diff --git a/src/validator.rs b/src/validator.rs
@@ -102,49 +102,39 @@ fn validate_absolute_raw(b: &[u8]) -> bool {
         return false;
     }
 
-    #[allow(unused_assignments)]
-    let mut scheme_end_in_b = 0usize;
+    // Simultaneously scan scheme chars and build the lower-case classification
+    // buffer in a single pass — no separate count() or second filter iteration.
+    // All special schemes (http, https, ftp, ws, wss, file) are ≤5 significant
+    // chars; anything longer is definitively non-special.
+    let mut scheme_lower = [0u8; 5];
+    let mut scheme_len = 0usize; // significant (non-tab/newline) char count
+
     loop {
         let p = src.peek_pos();
         if p >= b.len() {
             return false;
         }
         let c = b[p];
         if c == b':' {
-            scheme_end_in_b = p;
             src.advance_past(p);
             break;
         }
         if !is_alnum_plus(c) {
             return false;
         }
+        // Copy into the lower-case buffer only while ≤5 chars (no branch after that)
+        if scheme_len < 5 {
+            scheme_lower[scheme_len] = c | 0x20;
+        }
+        scheme_len += 1;
         src.advance_past(p);
     }
 
-    // Lowercase scheme into a small stack buffer for special-scheme classification.
-    // All special schemes (http, https, ftp, ws, wss, file) are ≤5 significant
-    // chars. Schemes longer than that are definitively non-special — skip the copy.
-    let raw_scheme = &b[..scheme_end_in_b];
-    let scheme_type = {
-        let significant_len = raw_scheme
-            .iter()
-            .filter(|&&c| !matches!(c, b'\t' | b'\n' | b'\r'))
-            .count();
-        if significant_len <= 5 {
-            let mut scheme_lower = [0u8; 5];
-            let mut scheme_len = 0usize;
-            for &c in raw_scheme
-                .iter()
-                .filter(|&&c| !matches!(c, b'\t' | b'\n' | b'\r'))
-            {
-                scheme_lower[scheme_len] = c | 0x20;
-                scheme_len += 1;
-            }
-            let scheme_str = unsafe { core::str::from_utf8_unchecked(&scheme_lower[..scheme_len]) };
-            get_scheme_type(scheme_str)
-        } else {
-            SchemeType::NotSpecial
-        }
+    let scheme_type = if scheme_len <= 5 {
+        let scheme_str = unsafe { core::str::from_utf8_unchecked(&scheme_lower[..scheme_len]) };
+        get_scheme_type(scheme_str)
+    } else {
+        SchemeType::NotSpecial
     };
 
     // Rest = everything after ':'
@@ -246,23 +236,35 @@ fn validate_authority_and_rest_raw(rest: &[u8], is_special: bool) -> bool {
 // ============================================================
 
 fn validate_host_and_port_raw(host_port: &[u8], is_special: bool) -> bool {
-    // Filter out tabs/newlines first (stack — no heap).
-    // Since host_port is typically very short (≤ 253 bytes), a 256-byte stack
-    // buffer covers virtually all realistic inputs.
+    // Fast path (overwhelmingly common): no tabs/newlines present.
+    // Validate directly on the original slice — zero copies, zero stack writes.
+    if !host_port
+        .iter()
+        .any(|&c| matches!(c, b'\t' | b'\n' | b'\r'))
+    {
+        return validate_hp(host_port, is_special);
+    }
+
+    // Rare path: strip tabs/newlines into a small stack buffer, then validate.
+    // Host_port is at most 253 bytes (DNS limit) + port, so 256 covers everything.
     let mut buf = [0u8; 256];
     let mut len = 0usize;
     for &c in host_port {
         if matches!(c, b'\t' | b'\n' | b'\r') {
             continue;
         }
         if len >= 256 {
-            return false; // pathologically long host — invalid
+            return false; // pathologically long — invalid
         }
         buf[len] = c;
         len += 1;
     }
-    let hp = &buf[..len];
+    validate_hp(&buf[..len], is_special)
+}
 
+/// Validate an already-clean (no tabs/newlines) host[:port] slice.
+#[inline]
+fn validate_hp(hp: &[u8], is_special: bool) -> bool {
     if hp.is_empty() {
         return !is_special;
     }