@@ -102,49 +102,39 @@ fn validate_absolute_raw(b: &[u8]) -> bool {
102102 return false ;
103103 }
104104
105- #[ allow( unused_assignments) ]
106- let mut scheme_end_in_b = 0usize ;
105+ // Simultaneously scan scheme chars and build the lower-case classification
106+ // buffer in a single pass — no separate count() or second filter iteration.
107+ // All special schemes (http, https, ftp, ws, wss, file) are ≤5 significant
108+ // chars; anything longer is definitively non-special.
109+ let mut scheme_lower = [ 0u8 ; 5 ] ;
110+ let mut scheme_len = 0usize ; // significant (non-tab/newline) char count
111+
107112 loop {
108113 let p = src. peek_pos ( ) ;
109114 if p >= b. len ( ) {
110115 return false ;
111116 }
112117 let c = b[ p] ;
113118 if c == b':' {
114- scheme_end_in_b = p;
115119 src. advance_past ( p) ;
116120 break ;
117121 }
118122 if !is_alnum_plus ( c) {
119123 return false ;
120124 }
125+ // Copy into the lower-case buffer only while ≤5 chars (no branch after that)
126+ if scheme_len < 5 {
127+ scheme_lower[ scheme_len] = c | 0x20 ;
128+ }
129+ scheme_len += 1 ;
121130 src. advance_past ( p) ;
122131 }
123132
124- // Lowercase scheme into a small stack buffer for special-scheme classification.
125- // All special schemes (http, https, ftp, ws, wss, file) are ≤5 significant
126- // chars. Schemes longer than that are definitively non-special — skip the copy.
127- let raw_scheme = & b[ ..scheme_end_in_b] ;
128- let scheme_type = {
129- let significant_len = raw_scheme
130- . iter ( )
131- . filter ( |& & c| !matches ! ( c, b'\t' | b'\n' | b'\r' ) )
132- . count ( ) ;
133- if significant_len <= 5 {
134- let mut scheme_lower = [ 0u8 ; 5 ] ;
135- let mut scheme_len = 0usize ;
136- for & c in raw_scheme
137- . iter ( )
138- . filter ( |& & c| !matches ! ( c, b'\t' | b'\n' | b'\r' ) )
139- {
140- scheme_lower[ scheme_len] = c | 0x20 ;
141- scheme_len += 1 ;
142- }
143- let scheme_str = unsafe { core:: str:: from_utf8_unchecked ( & scheme_lower[ ..scheme_len] ) } ;
144- get_scheme_type ( scheme_str)
145- } else {
146- SchemeType :: NotSpecial
147- }
133+ let scheme_type = if scheme_len <= 5 {
134+ let scheme_str = unsafe { core:: str:: from_utf8_unchecked ( & scheme_lower[ ..scheme_len] ) } ;
135+ get_scheme_type ( scheme_str)
136+ } else {
137+ SchemeType :: NotSpecial
148138 } ;
149139
150140 // Rest = everything after ':'
@@ -246,23 +236,35 @@ fn validate_authority_and_rest_raw(rest: &[u8], is_special: bool) -> bool {
246236// ============================================================
247237
248238fn validate_host_and_port_raw ( host_port : & [ u8 ] , is_special : bool ) -> bool {
249- // Filter out tabs/newlines first (stack — no heap).
250- // Since host_port is typically very short (≤ 253 bytes), a 256-byte stack
251- // buffer covers virtually all realistic inputs.
239+ // Fast path (overwhelmingly common): no tabs/newlines present.
240+ // Validate directly on the original slice — zero copies, zero stack writes.
241+ if !host_port
242+ . iter ( )
243+ . any ( |& c| matches ! ( c, b'\t' | b'\n' | b'\r' ) )
244+ {
245+ return validate_hp ( host_port, is_special) ;
246+ }
247+
248+ // Rare path: strip tabs/newlines into a small stack buffer, then validate.
249+ // Host_port is at most 253 bytes (DNS limit) + port, so 256 covers everything.
252250 let mut buf = [ 0u8 ; 256 ] ;
253251 let mut len = 0usize ;
254252 for & c in host_port {
255253 if matches ! ( c, b'\t' | b'\n' | b'\r' ) {
256254 continue ;
257255 }
258256 if len >= 256 {
259- return false ; // pathologically long host — invalid
257+ return false ; // pathologically long — invalid
260258 }
261259 buf[ len] = c;
262260 len += 1 ;
263261 }
264- let hp = & buf[ ..len] ;
262+ validate_hp ( & buf[ ..len] , is_special)
263+ }
265264
265+ /// Validate an already-clean (no tabs/newlines) host[:port] slice.
266+ #[ inline]
267+ fn validate_hp ( hp : & [ u8 ] , is_special : bool ) -> bool {
266268 if hp. is_empty ( ) {
267269 return !is_special;
268270 }
0 commit comments