Skip to content

Commit d3d9c56

Browse files
committed
copy over some code from Netty that uses SWAR techniques for checking array segments match
format fix issue more fully copy Netty Two-Way string matching impl scalafmt format Update SWARUtil.scala Update ByteString.scala
1 parent 49e74d3 commit d3d9c56

4 files changed

Lines changed: 271 additions & 29 deletions

File tree

LICENSE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ Copyright EPFL and Lightbend, Inc.
222222
pekko-actor contains code from Netty which was released under an Apache 2.0 license.
223223
Copyright 2014 The Netty Project
224224
- actor/src/main/scala/org/apache/pekko/io/dns/DnsSettings.scala
225+
- actor/src/main/scala/org/apache/pekko/util/ByteString.scala
225226
- actor/src/main/scala/org/apache/pekko/util/SWARUtil.scala
226227

227228
---------------

actor/src/main/scala/org/apache/pekko/util/ByteString.scala

Lines changed: 200 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,21 @@
1111
* Copyright (C) 2009-2022 Lightbend Inc. <https://www.lightbend.com>
1212
*/
1313

14+
/*
15+
* Copyright 2012 The Netty Project
16+
*
17+
* The Netty Project licenses this file to you under the Apache License, version 2.0 (the
18+
* "License"); you may not use this file except in compliance with the License. You may obtain a
19+
* copy of the License at:
20+
*
21+
* https://www.apache.org/licenses/LICENSE-2.0
22+
*
23+
* Unless required by applicable law or agreed to in writing, software distributed under the License
24+
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
25+
* or implied. See the License for the specific language governing permissions and limitations under
26+
* the License.
27+
*/
28+
1429
package org.apache.pekko.util
1530

1631
import java.io.{ InputStream, ObjectInputStream, ObjectOutputStream, SequenceInputStream }
@@ -314,6 +329,84 @@ object ByteString {
314329
else -1
315330
}
316331

332+
// Derived from code in Netty
333+
// https://github.com/netty/netty/blob/d28a0fc6598b50fbe8f296831777cf4b653a475f/buffer/src/main/java/io/netty/buffer/ByteBufUtil.java#L242-L325
334+
override def indexOfSlice(slice: Array[Byte], from: Int): Int = {
335+
val n = length - from
336+
val m = slice.length
337+
if (m == 0) return 0
338+
// When the needle has only one byte that can be read,
339+
// the indexOf() can be used
340+
if (m == 1) return indexOf(slice.head, from)
341+
var i = 0
342+
var j = 0
343+
val aStartIndex = 0
344+
val bStartIndex = from
345+
val suffixes = SWARUtil.maxSuf(slice, m, aStartIndex, true)
346+
val prefixes = SWARUtil.maxSuf(slice, m, aStartIndex, false)
347+
val ell = Math.max((suffixes >> 32).toInt, (prefixes >> 32).toInt)
348+
var per = Math.max(suffixes.toInt, prefixes.toInt)
349+
var memory = 0
350+
val checkLen = Math.min(m - per, ell + 1)
351+
if (SWARUtil.arrayBytesMatch(slice, aStartIndex, slice, aStartIndex + per, checkLen)) {
352+
memory = -1
353+
while (j <= n - m) {
354+
i = Math.max(ell, memory) + 1
355+
while (i < m && slice(i + aStartIndex) == bytes(i + j + bStartIndex)) i += 1
356+
if (i > n) return -1
357+
if (i >= m) {
358+
i = ell
359+
while (i > memory && slice(i + aStartIndex) == bytes(i + j + bStartIndex)) i -= 1
360+
if (i <= memory) return j + bStartIndex
361+
j += per
362+
memory = m - per - 1
363+
} else {
364+
j += i - ell
365+
memory = -1
366+
}
367+
}
368+
} else {
369+
per = Math.max(ell + 1, m - ell - 1) + 1
370+
while (j <= n - m) {
371+
i = ell + 1
372+
while (i < m && slice(i + aStartIndex) == bytes(i + j + bStartIndex)) i += 1
373+
if (i > n) return -1
374+
if (i >= m) {
375+
i = ell
376+
while (i >= 0 && slice(i + aStartIndex) == bytes(i + j + bStartIndex)) i -= 1
377+
if (i < 0) return j + bStartIndex
378+
j += per
379+
} else j += i - ell
380+
}
381+
}
382+
-1
383+
}
384+
385+
// Derived from code in Netty
386+
// https://github.com/netty/netty/blob/d28a0fc6598b50fbe8f296831777cf4b653a475f/buffer/src/main/java/io/netty/buffer/ByteBufUtil.java#L366-L408
387+
override private[util] def bytesMatch(fromIndex: Int, checkBytes: Array[Byte], bytesFromIndex: Int,
388+
checkLength: Int): Boolean = {
389+
var aIndex = fromIndex
390+
var bIndex = bytesFromIndex
391+
val longCount = checkLength >>> 3
392+
val byteCount = checkLength & 7
393+
var i = 0
394+
while (i < longCount) {
395+
if (SWARUtil.getLong(bytes, aIndex) != SWARUtil.getLong(checkBytes, bIndex)) return false
396+
aIndex += 8
397+
bIndex += 8
398+
i += 1
399+
}
400+
i = 0
401+
while (i < byteCount) {
402+
if (bytes(aIndex) != checkBytes(bIndex)) return false
403+
aIndex += 1
404+
bIndex += 1
405+
i += 1
406+
}
407+
true
408+
}
409+
317410
override def slice(from: Int, until: Int): ByteString =
318411
if (from <= 0 && until >= length) this
319412
else if (from >= length || until <= 0 || from >= until) ByteString.empty
@@ -575,6 +668,86 @@ object ByteString {
575668
else -1
576669
}
577670

671+
// Derived from code in Netty
672+
// https://github.com/netty/netty/blob/d28a0fc6598b50fbe8f296831777cf4b653a475f/buffer/src/main/java/io/netty/buffer/ByteBufUtil.java#L242-L325
673+
override def indexOfSlice(slice: Array[Byte], from: Int): Int = {
674+
val n = length - from
675+
val m = slice.length
676+
if (m == 0) return 0
677+
// When the needle has only one byte that can be read,
678+
// the indexOf() can be used
679+
if (m == 1) return indexOf(slice.head, from)
680+
var i = 0
681+
var j = 0
682+
val aStartIndex = 0
683+
val bStartIndex = from + startIndex
684+
val suffixes = SWARUtil.maxSuf(slice, m, aStartIndex, true)
685+
val prefixes = SWARUtil.maxSuf(slice, m, aStartIndex, false)
686+
val ell = Math.max((suffixes >> 32).toInt, (prefixes >> 32).toInt)
687+
var per = Math.max(suffixes.toInt, prefixes.toInt)
688+
var memory = 0
689+
val checkLen = Math.min(m - per, ell + 1)
690+
if (SWARUtil.arrayBytesMatch(slice, aStartIndex, slice, aStartIndex + per, checkLen)) {
691+
memory = -1
692+
while (j <= n - m) {
693+
i = Math.max(ell, memory) + 1
694+
while (i < m && (slice(i + aStartIndex) == bytes(i + j + bStartIndex))) i += 1
695+
if (i > n) return -1
696+
if (i >= m) {
697+
i = ell
698+
while (i > memory && (slice(i + aStartIndex) == bytes(i + j + bStartIndex))) i -= 1
699+
if (i <= memory) return j + bStartIndex - startIndex
700+
j += per
701+
memory = m - per - 1
702+
} else {
703+
j += i - ell
704+
memory = -1
705+
}
706+
}
707+
} else {
708+
per = Math.max(ell + 1, m - ell - 1) + 1
709+
while (j <= n - m) {
710+
i = ell + 1
711+
while (i < m && (slice(i + aStartIndex) == bytes(i + j + bStartIndex))) i += 1
712+
if (i > n) return -1
713+
if (i >= m) {
714+
i = ell
715+
while (i >= 0 && (slice(i + aStartIndex) == bytes(i + j + bStartIndex))) i -= 1
716+
if (i < 0) return j + bStartIndex - startIndex
717+
j += per
718+
} else j += i - ell
719+
}
720+
}
721+
-1
722+
}
723+
724+
// Derived from code in Netty
725+
// https://github.com/netty/netty/blob/d28a0fc6598b50fbe8f296831777cf4b653a475f/buffer/src/main/java/io/netty/buffer/ByteBufUtil.java#L366-L408
726+
override private[util] def bytesMatch(fromIndex: Int,
727+
checkBytes: Array[Byte],
728+
bytesFromIndex: Int,
729+
checkLength: Int): Boolean = {
730+
var aIndex = fromIndex + startIndex
731+
var bIndex = bytesFromIndex
732+
val longCount = checkLength >>> 3
733+
val byteCount = checkLength & 7
734+
var i = 0
735+
while (i < longCount) {
736+
if (SWARUtil.getLong(bytes, aIndex) != SWARUtil.getLong(checkBytes, bIndex)) return false
737+
aIndex += 8
738+
bIndex += 8
739+
i += 1
740+
}
741+
i = 0
742+
while (i < byteCount) {
743+
if (bytes(aIndex) != checkBytes(bIndex)) return false
744+
aIndex += 1
745+
bIndex += 1
746+
i += 1
747+
}
748+
true
749+
}
750+
578751
override def copyToArray[B >: Byte](dest: Array[B], start: Int, len: Int): Int = {
579752
// min of the bytes available to copy, bytes there is room for in dest and the requested number of bytes
580753
val toCopy = math.min(math.min(len, length), dest.length - start)
@@ -912,6 +1085,22 @@ object ByteString {
9121085
}
9131086
}
9141087

1088+
private[util] def bytesMatch(fromIndex: Int,
1089+
checkBytes: Array[Byte],
1090+
checkBytesFromIndex: Int,
1091+
checkLength: Int): Boolean = {
1092+
if (checkLength > 1 && bytestrings.nonEmpty && bytestrings.head.length >= fromIndex + checkLength - 1) {
1093+
bytestrings.head.bytesMatch(fromIndex, checkBytes, checkBytesFromIndex, checkLength)
1094+
} else {
1095+
var i = 0
1096+
while (i < checkLength) {
1097+
if (apply(fromIndex + i) != checkBytes(checkBytesFromIndex + i)) return false
1098+
i += 1
1099+
}
1100+
true
1101+
}
1102+
}
1103+
9151104
protected def writeReplace(): AnyRef = new SerializationProxy(this)
9161105
}
9171106

@@ -1093,22 +1282,10 @@ sealed abstract class ByteString
10931282
* @since 2.0.0
10941283
*/
10951284
def indexOfSlice(slice: Array[Byte], from: Int): Int = {
1096-
// this is only called if the first byte matches, so we can skip that check
1097-
def check(startPos: Int): Boolean = {
1098-
var i = startPos + 1
1099-
var j = 1
1100-
// let's trust the calling code has ensured that we have enough bytes in this ByteString
1101-
while (j < slice.length) {
1102-
if (apply(i) != slice(j)) return false
1103-
i += 1
1104-
j += 1
1105-
}
1106-
true
1107-
}
11081285
@tailrec def rec(from: Int): Int = {
11091286
val startPos = indexOf(slice.head, from, length - slice.length + 1)
11101287
if (startPos == -1) -1
1111-
else if (check(startPos)) startPos
1288+
else if (bytesMatch(startPos, slice, 0, slice.length)) startPos
11121289
else rec(startPos + 1)
11131290
}
11141291
val sliceLength = slice.length
@@ -1147,18 +1324,7 @@ sealed abstract class ByteString
11471324
*/
11481325
def startsWith(bytes: Array[Byte], offset: Int): Boolean = {
11491326
if (length - offset < bytes.length) false
1150-
else {
1151-
var i = offset
1152-
var j = 0
1153-
while (j < bytes.length) {
1154-
// we know that byteString is at least as long as bytes,
1155-
// so no need to check i < length
1156-
if (apply(i) != bytes(j)) return false
1157-
i += 1
1158-
j += 1
1159-
}
1160-
true
1161-
}
1327+
else bytesMatch(offset, bytes, 0, bytes.length)
11621328
}
11631329

11641330
/**
@@ -1170,6 +1336,15 @@ sealed abstract class ByteString
11701336
*/
11711337
def startsWith(bytes: Array[Byte]): Boolean = startsWith(bytes, 0)
11721338

1339+
/**
1340+
* Tests whether the bytes in a segment of this ByteString match the provided bytes.
1341+
* Internal use only. ByteString1 and ByteString1C have optimized versions.
1342+
*/
1343+
private[util] def bytesMatch(fromIndex: Int,
1344+
checkBytes: Array[Byte],
1345+
checkBytesFromIndex: Int,
1346+
checkLength: Int): Boolean
1347+
11731348
override def grouped(size: Int): Iterator[ByteString] = {
11741349
if (size <= 0) {
11751350
throw new IllegalArgumentException(s"size=$size must be positive")

actor/src/main/scala/org/apache/pekko/util/SWARUtil.scala

Lines changed: 69 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ private[util] object SWARUtil {
4141
/**
4242
* Compiles given byte into a long pattern suitable for SWAR operations.
4343
*/
44-
def compilePattern(byteToFind: Byte): Long = (byteToFind & 0xFFL) * 0x101010101010101L
44+
final def compilePattern(byteToFind: Byte): Long =
45+
(byteToFind & 0xFFL) * 0x101010101010101L
4546

4647
/**
4748
* Applies a compiled pattern to given word.
@@ -51,7 +52,7 @@ private[util] object SWARUtil {
5152
* @param pattern the pattern to apply
5253
* @return a word where each byte that matches the pattern has the highest bit set
5354
*/
54-
def applyPattern(word: Long, pattern: Long): Long = {
55+
final def applyPattern(word: Long, pattern: Long): Long = {
5556
val input = word ^ pattern
5657
val tmp = (input & 0x7F7F7F7F7F7F7F7FL) + 0x7F7F7F7F7F7F7F7FL
5758
~(tmp | input | 0x7F7F7F7F7F7F7F7FL)
@@ -65,7 +66,7 @@ private[util] object SWARUtil {
6566
* @return the index of the first occurrence of the specified pattern in the specified word.
6667
* If no pattern is found, returns 8.
6768
*/
68-
def getIndex(word: Long): Int =
69+
final def getIndex(word: Long): Int =
6970
java.lang.Long.numberOfLeadingZeros(word) >>> 3
7071

7172
/**
@@ -76,8 +77,9 @@ private[util] object SWARUtil {
7677
* @param array the byte array to read from
7778
* @param index the index to read from
7879
* @return the long value at the specified index
80+
* @throws IndexOutOfBoundsException if index is out of bounds
7981
*/
80-
def getLong(array: Array[Byte], index: Int): Long = {
82+
final def getLong(array: Array[Byte], index: Int): Long = {
8183
if (longBeArrayViewSupported) {
8284
longBeArrayView.get(array, index)
8385
} else {
@@ -91,4 +93,67 @@ private[util] object SWARUtil {
9193
(array(index + 7).toLong & 0xFF)
9294
}
9395
}
96+
97+
// Derived from code in Netty
98+
// https://github.com/netty/netty/blob/d28a0fc6598b50fbe8f296831777cf4b653a475f/buffer/src/main/java/io/netty/buffer/ByteBufUtil.java#L366-L408
99+
final def arrayBytesMatch(arrayBytes: Array[Byte],
100+
fromIndex: Int,
101+
checkBytes: Array[Byte],
102+
bytesFromIndex: Int,
103+
checkLength: Int): Boolean = {
104+
var aIndex = fromIndex
105+
var bIndex = bytesFromIndex
106+
val longCount = checkLength >>> 3
107+
val byteCount = checkLength & 7
108+
var i = 0
109+
while (i < longCount) {
110+
if (getLong(arrayBytes, aIndex) != getLong(checkBytes, bIndex)) return false
111+
aIndex += 8
112+
bIndex += 8
113+
i += 1
114+
}
115+
i = 0
116+
while (i < byteCount) {
117+
if (arrayBytes(aIndex) != checkBytes(bIndex)) return false
118+
aIndex += 1
119+
bIndex += 1
120+
i += 1
121+
}
122+
true
123+
}
124+
125+
// Derived from code in Netty
126+
// https://github.com/netty/netty/blob/a5343227b10456ec889a3fdc5fa4246f036a216d/buffer/src/main/java/io/netty/buffer/ByteBufUtil.java#L327-L356
127+
final def maxSuf(arrayBytes: Array[Byte], m: Int, start: Int, isSuffix: Boolean): Long = {
128+
var p = 1
129+
var ms = -1
130+
var j = start
131+
var k = 1
132+
var a = 0
133+
var b = 0
134+
while (j + k < m) {
135+
a = arrayBytes(j + k)
136+
b = arrayBytes(ms + k)
137+
val suffix = if (isSuffix) a < b
138+
else a > b
139+
if (suffix) {
140+
j += k
141+
k = 1
142+
p = j - ms
143+
} else if (a == b) {
144+
if (k != p) {
145+
k += 1;
146+
} else {
147+
j += p;
148+
k = 1;
149+
}
150+
} else {
151+
ms = j
152+
j = ms + 1
153+
k = 1
154+
p = 1
155+
}
156+
}
157+
(ms.toLong << 32) + p
158+
}
94159
}

legal/pekko-actor-jar-license.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ Copyright EPFL and Lightbend, Inc.
222222
pekko-actor contains code from Netty which was released under an Apache 2.0 license.
223223
Copyright 2014 The Netty Project
224224
- actor/src/main/scala/org/apache/pekko/io/dns/DnsSettings.scala
225+
- actor/src/main/scala/org/apache/pekko/util/ByteString.scala
225226
- actor/src/main/scala/org/apache/pekko/util/SWARUtil.scala
226227

227228
---------------

0 commit comments

Comments
 (0)