1
0
Fork 0
mirror of https://github.com/ocornut/imgui.git synced 2026-02-06 04:20:08 +00:00

Merged feature/simd-memchr as a single commit.

This commit is contained in:
Kraionix 2025-02-21 21:34:27 +07:00 committed by ocornut
parent 1ec99f4fd3
commit 707ba8c472
2 changed files with 130 additions and 6 deletions

101
imgui.cpp
View file

@ -1960,6 +1960,107 @@ ImVec2 ImTriangleClosestPoint(const ImVec2& a, const ImVec2& b, const ImVec2& c,
// [SECTION] MISC HELPERS/UTILITIES (String, Format, Hash functions)
//-----------------------------------------------------------------------------
#if defined IMGUI_ENABLE_AVX2_IMMEMCHR
const void* ImMemchr(const void* buf, int val, size_t count)
{
const size_t SIMD_LENGTH = 32;
const size_t SIMD_LENGTH_MASK = SIMD_LENGTH - 1;
const unsigned char* ptr = (const unsigned char*)buf;
const unsigned char* end = ptr + count;
const unsigned char* align_end = end - SIMD_LENGTH;
const unsigned char ch = (const unsigned char)val;
if (ptr <= align_end)
{
const __m256i target = _mm256_set1_epi8(ch);
if ((uintptr_t)ptr & SIMD_LENGTH_MASK)
{
__m256i chunk = _mm256_lddqu_si256((const __m256i*)ptr);
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
if (mask)
return (const void*)(ptr + _tzcnt_u32(mask));
ptr = (const unsigned char*)_andn_u64(SIMD_LENGTH_MASK, (uintptr_t)ptr + SIMD_LENGTH_MASK);
}
for (; ptr <= align_end; ptr += SIMD_LENGTH)
{
__m256i chunk = _mm256_load_si256((const __m256i*)ptr);
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
if (mask)
return (const void*)(ptr + _tzcnt_u32(mask));
if (ptr <= end - 1024)
_mm_prefetch((const char*)(ptr + 1024), _MM_HINT_T0);
}
}
for (; ptr < end; ptr++)
{
if (*ptr == ch)
return (const void*)(ptr);
}
return nullptr;
}
#elif defined IMGUI_ENABLE_SSE_IMMEMCHR
const void* ImMemchr(const void* buf, int val, size_t count)
{
const size_t SIMD_LENGTH = 16;
const size_t SIMD_LENGTH_MASK = SIMD_LENGTH - 1;
const unsigned char* ptr = (const unsigned char*)buf;
const unsigned char* end = ptr + count;
const unsigned char* align_end = end - SIMD_LENGTH;
const unsigned char ch = (const unsigned char)val;
if (ptr <= align_end)
{
const __m128i target = _mm_set1_epi8(ch);
if ((uintptr_t)ptr & SIMD_LENGTH_MASK)
{
__m128i chunk = _mm_lddqu_si128((const __m128i*)ptr);
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
if (mask)
return (const void*)(ptr + _tzcnt_u32(mask));
ptr = (const unsigned char*)(((uintptr_t)ptr + SIMD_LENGTH_MASK) & ~SIMD_LENGTH_MASK);
}
for (; ptr <= align_end; ptr += SIMD_LENGTH)
{
__m128i chunk = _mm_load_si128((const __m128i*)ptr);
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
if (mask)
return (const void*)(ptr + _tzcnt_u32(mask));
if (ptr <= end - 1024)
_mm_prefetch((const char*)(ptr + 1024), _MM_HINT_T0);
}
}
for (; ptr < end; ptr++)
{
if (*ptr == ch)
return (const void*)(ptr);
}
return nullptr;
}
#else
const void* ImMemchr(const void* buf, int val, size_t count)
{
return memchr(buf, val, count);
}
#endif
// Consider using _stricmp/_strnicmp under Windows or strcasecmp/strncasecmp. We don't actually use either ImStricmp/ImStrnicmp in the codebase any more.
int ImStricmp(const char* str1, const char* str2)
{