Dark Mode

Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 9acf475

Browse files
committed
deps: V8: cherry-pick c135d5e81f82
Original commit message: [api][strings] Optimize ContainsOnlyOneByte with SIMD Add Highway-based SIMD implementation for checking if strings contain only one-byte (Latin-1) characters. Highway provides portable SIMD abstraction across different architectures with automatic target selection at runtime. The SIMD implementation processes 8 uint16_t values (128 bits) at once,checking if any character has the high byte set. This provides significant speedup over the previous implementation which processed 2-4 values per iteration and only checked every 16 iterations. The optimization applies to both String::ContainsOnlyOneByte() in the public API and the internal IsOnly8Bit() helper used during string hashing. This improves performance for string validation, encoding decisions, and UTF-8/UTF-16 conversion operations. Additionally, it resolves a TODO by leszeks. Change-Id: I41f519339fb96f3bf3f4fa30283f84ccbb5115d0 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/7159233 Reviewed-by: Erik Corry Commit-Queue: Erik Corry Reviewed-by: Leszek Swirski Cr-Commit-Position: refs/heads/main@{#104120} Refs: v8/v8@c135d5e
1 parent 1d41c8f commit 9acf475

File tree

5 files changed

+48
-64
lines changed
  • common.gypi
  • deps/v8/src
    • api
      • api.cc
    • strings
      • string-hasher-inl.h
      • string-hasher.cc
      • string-hasher.h

5 files changed

+48
-64
lines changed

common.gypi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838

3939
# Reset this number to 0 on major V8 upgrades.
4040
# Increment by one for each non-official patch applied to deps/v8.
41-
'v8_embedder_string': '-node.13',
41+
'v8_embedder_string': '-node.14',
4242

4343
##### V8 defaults for Node.js #####
4444

deps/v8/src/api/api.cc

Lines changed: 3 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@
159159
#include "src/objects/intl-objects.h"
160160
#endif // V8_INTL_SUPPORT
161161

162+
#include "src/strings/string-hasher-inl.h"
163+
162164
#if V8_OS_LINUX || V8_OS_DARWIN || V8_OS_FREEBSD
163165
#include <signal.h>
164166
#include <unistd.h>
@@ -5575,28 +5577,6 @@ bool String::IsOneByte() const {
55755577
return Utils::OpenDirectHandle(this)->IsOneByteRepresentation();
55765578
}
55775579

5578-
// Helpers for ContainsOnlyOneByteHelper
5579-
template <size_t size>
5580-
struct OneByteMask;
5581-
template <>
5582-
struct OneByteMask<4> {
5583-
static const uint32_t value = 0xFF00FF00;
5584-
};
5585-
template <>
5586-
struct OneByteMask<8> {
5587-
static const uint64_t value = 0xFF00'FF00'FF00'FF00;
5588-
};
5589-
static const uintptr_t kOneByteMask = OneByteMask<sizeof(uintptr_t)>::value;
5590-
static const uintptr_t kAlignmentMask = sizeof(uintptr_t) - 1;
5591-
static inline bool Unaligned(const uint16_t* chars) {
5592-
return reinterpret_cast<const uintptr_t>(chars) & kAlignmentMask;
5593-
}
5594-
5595-
static inline const uint16_t* Align(const uint16_t* chars) {
5596-
return reinterpret_cast<uint16_t*>(reinterpret_cast<uintptr_t>(chars) &
5597-
~kAlignmentMask);
5598-
}
5599-
56005580
class ContainsOnlyOneByteHelper {
56015581
public:
56025582
ContainsOnlyOneByteHelper() : is_one_byte_(true) {}
@@ -5613,35 +5593,7 @@ class ContainsOnlyOneByteHelper {
56135593
// Nothing to do.
56145594
}
56155595
void VisitTwoByteString(const uint16_t* chars, int length) {
5616-
// Accumulated bits.
5617-
uintptr_t acc = 0;
5618-
// Align to uintptr_t.
5619-
const uint16_t* end = chars + length;
5620-
while (Unaligned(chars) && chars != end) {
5621-
acc |= *chars++;
5622-
}
5623-
// Read word aligned in blocks,
5624-
// checking the return value at the end of each block.
5625-
const uint16_t* aligned_end = Align(end);
5626-
const int increment = sizeof(uintptr_t) / sizeof(uint16_t);
5627-
const int inner_loops = 16;
5628-
while (chars + inner_loops * increment < aligned_end) {
5629-
for (int i = 0; i < inner_loops; i++) {
5630-
acc |= *reinterpret_cast<const uintptr_t*>(chars);
5631-
chars += increment;
5632-
}
5633-
// Check for early return.
5634-
if ((acc & kOneByteMask) != 0) {
5635-
is_one_byte_ = false;
5636-
return;
5637-
}
5638-
}
5639-
// Read the rest.
5640-
while (chars != end) {
5641-
acc |= *chars++;
5642-
}
5643-
// Check result.
5644-
if ((acc & kOneByteMask) != 0) is_one_byte_ = false;
5596+
is_one_byte_ = internal::detail::IsOnly8Bit(chars, length);
56455597
}
56465598

56475599
private:

deps/v8/src/strings/string-hasher-inl.h

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,6 @@
1111
#include "src/common/globals.h"
1212
#include "src/utils/utils.h"
1313

14-
#ifdef __SSE2__
15-
#include <emmintrin.h>
16-
#elif defined(__ARM_NEON__)
17-
#include <arm_neon.h>
18-
#endif
19-
2014
// Comment inserted to prevent header reordering.
2115
#include <type_traits>
2216

@@ -43,14 +37,18 @@ uint32_t ConvertRawHashToUsableHash(T raw_hash) {
4337
}
4438

4539
V8_INLINE bool IsOnly8Bit(const uint16_t* chars, unsigned len) {
46-
// TODO(leszeks): This could be SIMD for efficiency on large strings, if we
47-
// need it.
48-
for (unsigned i = 0; i < len; ++i) {
49-
if (chars[i] > 255) {
50-
return false;
40+
// For small strings, use a simple scalar loop to avoid SIMD overhead.
41+
// Threshold of 16 is chosen to balance setup cost vs benefit.
42+
if (len <= 16) {
43+
for (unsigned i = 0; i < len; i++) {
44+
if (chars[i] > 0xFF) {
45+
return false;
46+
}
5147
}
48+
return true;
5249
}
53-
return true;
50+
// For larger strings, use the non-inlined SIMD implementation.
51+
return IsOnly8BitSIMD(chars, len);
5452
}
5553

5654
V8_INLINE uint64_t GetRapidHash(const uint8_t* chars, uint32_t length,

deps/v8/src/strings/string-hasher.cc

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
#include "src/strings/string-hasher.h"
77

8+
#include "hwy/highway.h"
89
#include "src/strings/string-hasher-inl.h"
910

1011
namespace v8::internal {
@@ -73,6 +74,33 @@ uint64_t HashConvertingTo8Bit(const uint16_t* chars, uint32_t length,
7374
return rapidhash(
7475
reinterpret_cast<const uint8_t*>(chars), length, seed, secret);
7576
}
77+
78+
bool IsOnly8BitSIMD(const uint16_t* chars, unsigned len) {
79+
namespace hw = hwy::HWY_NAMESPACE;
80+
hw::FixedTag<uint16_t, 8> tag;
81+
const size_t stride = hw::Lanes(tag);
82+
const auto high_byte_mask = hw::Set(tag, static_cast<uint16_t>(0xFF00));
83+
const auto zero = hw::Zero(tag);
84+
85+
const uint16_t* end = chars + len;
86+
while (chars + stride <= end) {
87+
const auto data = hw::LoadU(tag, chars);
88+
const auto high_bytes = hw::And(data, high_byte_mask);
89+
const auto cmp = hw::Eq(high_bytes, zero);
90+
if (!hw::AllTrue(tag, cmp)) {
91+
return false;
92+
}
93+
chars += stride;
94+
}
95+
// Handle remaining characters.
96+
while (chars < end) {
97+
if (*chars > 0xFF) {
98+
return false;
99+
}
100+
chars++;
101+
}
102+
return true;
103+
}
76104
} // namespace detail
77105

78106
} // namespace v8::internal

deps/v8/src/strings/string-hasher.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@ class Vector;
1717

1818
namespace internal {
1919

20+
namespace detail {
21+
// Non-inlined SIMD implementation for checking if a uint16_t string contains
22+
// only Latin1 characters. Used by the inline IsOnly8Bit wrapper.
23+
V8_EXPORT_PRIVATE bool IsOnly8BitSIMD(const uint16_t* chars, unsigned len);
24+
} // namespace detail
25+
2026
// A simple incremental string hasher. Slow but allows for special casing each
2127
// individual character.
2228
class RunningStringHasher final {

0 commit comments

Comments
(0)