mirror of
https://github.com/ocornut/imgui.git
synced 2026-02-02 03:40:06 +00:00
Merge c67c2026fd into 25158fe33b
This commit is contained in:
commit
1dd40baf53
2 changed files with 279 additions and 7 deletions
247
imgui.cpp
247
imgui.cpp
|
|
@ -2080,6 +2080,253 @@ ImVec2 ImTriangleClosestPoint(const ImVec2& a, const ImVec2& b, const ImVec2& c,
|
|||
// [SECTION] MISC HELPERS/UTILITIES (String, Format, Hash functions)
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
#if defined IMGUI_ENABLE_AVX2_IMSTRLEN
|
||||
size_t ImStrlen(const char* str)
|
||||
{
|
||||
const size_t SIMD_LENGTH = 32;
|
||||
const size_t SIMD_LENGTH_MASK = SIMD_LENGTH - 1;
|
||||
|
||||
const unsigned char* begin = (unsigned char*)str;
|
||||
const unsigned char* ptr = begin;
|
||||
|
||||
// first page
|
||||
{
|
||||
const size_t PAGE_LENGTH = 4096;
|
||||
const size_t PAGE_LENGTH_MASK = PAGE_LENGTH - 1;
|
||||
|
||||
const unsigned char* page_end = (const unsigned char*)_andn_u64(PAGE_LENGTH_MASK, (uintptr_t)ptr + PAGE_LENGTH_MASK);
|
||||
const unsigned char* align_page_end = (const unsigned char*)(page_end - SIMD_LENGTH);
|
||||
|
||||
// if ptr is far the end of page
|
||||
if (ptr <= align_page_end)
|
||||
{
|
||||
__m256i target = _mm256_setzero_si256();
|
||||
|
||||
// if ptr not aligned, align ptr to SIMD_LENGTH
|
||||
if ((uintptr_t)ptr & SIMD_LENGTH_MASK)
|
||||
{
|
||||
__m256i chunk = _mm256_lddqu_si256((const __m256i*)ptr);
|
||||
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
|
||||
|
||||
if (mask)
|
||||
return (uintptr_t)(ptr - begin + _tzcnt_u32(mask));
|
||||
|
||||
ptr = (const unsigned char*)_andn_u64(SIMD_LENGTH_MASK, (uintptr_t)ptr + SIMD_LENGTH_MASK);
|
||||
}
|
||||
|
||||
// main loop of first page
|
||||
for (; ptr <= align_page_end; ptr += SIMD_LENGTH)
|
||||
{
|
||||
__m256i chunk = _mm256_load_si256((const __m256i*)ptr);
|
||||
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
|
||||
|
||||
if (mask)
|
||||
return (uintptr_t)(ptr - begin + _tzcnt_u32(mask));
|
||||
|
||||
_mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
|
||||
}
|
||||
}
|
||||
|
||||
// if ptr is near the end of page
|
||||
for (; ptr < page_end; ptr++)
|
||||
{
|
||||
if (!(*ptr))
|
||||
return (uintptr_t)(ptr - begin);
|
||||
}
|
||||
}
|
||||
|
||||
__m256i target = _mm256_setzero_si256();
|
||||
|
||||
// main loop
|
||||
for (; ; ptr += SIMD_LENGTH)
|
||||
{
|
||||
__m256i chunk = _mm256_load_si256((const __m256i*)ptr);
|
||||
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
|
||||
|
||||
if (mask)
|
||||
return (uintptr_t)(ptr - begin + _tzcnt_u32(mask));
|
||||
|
||||
_mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
|
||||
}
|
||||
}
|
||||
#elif defined IMGUI_ENABLE_SSE_IMSTRLEN
|
||||
size_t ImStrlen(const char* str)
|
||||
{
|
||||
const size_t SIMD_LENGTH = 16;
|
||||
const size_t SIMD_LENGTH_MASK = SIMD_LENGTH - 1;
|
||||
|
||||
const unsigned char* begin = (unsigned char*)str;
|
||||
const unsigned char* ptr = begin;
|
||||
const unsigned char ch = '\0';
|
||||
|
||||
// first page
|
||||
{
|
||||
const size_t PAGE_LENGTH = 4096;
|
||||
const size_t PAGE_LENGTH_MASK = PAGE_LENGTH - 1;
|
||||
|
||||
const unsigned char* page_end = (const unsigned char*)(((uintptr_t)ptr + PAGE_LENGTH_MASK) & ~PAGE_LENGTH_MASK);
|
||||
const unsigned char* align_page_end = (const unsigned char*)(page_end - SIMD_LENGTH);
|
||||
|
||||
// if ptr is far the end of page
|
||||
if (ptr <= align_page_end)
|
||||
{
|
||||
__m128i target = _mm_set1_epi8(ch);
|
||||
|
||||
// if ptr not aligned, align ptr to SIMD_LENGTH
|
||||
if ((uintptr_t)ptr & SIMD_LENGTH_MASK)
|
||||
{
|
||||
__m128i chunk = _mm_lddqu_si128((const __m128i*)ptr);
|
||||
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
|
||||
|
||||
if (mask)
|
||||
return (uintptr_t)(ptr + _tzcnt_u32(mask) - begin);
|
||||
|
||||
ptr = (const unsigned char*)(((uintptr_t)ptr + SIMD_LENGTH_MASK) & ~SIMD_LENGTH_MASK);
|
||||
}
|
||||
|
||||
// main loop of first page
|
||||
for (; ptr <= align_page_end; ptr += SIMD_LENGTH)
|
||||
{
|
||||
__m128i chunk = _mm_load_si128((const __m128i*)ptr);
|
||||
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
|
||||
|
||||
if (mask)
|
||||
return (uintptr_t)(ptr + _tzcnt_u32(mask) - begin);
|
||||
|
||||
_mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
|
||||
}
|
||||
}
|
||||
|
||||
// if ptr is near the end of page
|
||||
for (; ptr < page_end; ptr++)
|
||||
{
|
||||
if (*ptr == ch)
|
||||
return (uintptr_t)(ptr - begin);
|
||||
}
|
||||
}
|
||||
|
||||
__m128i target = _mm_set1_epi8(ch);
|
||||
|
||||
// main loop
|
||||
for (; ; ptr += SIMD_LENGTH)
|
||||
{
|
||||
__m128i chunk = _mm_load_si128((const __m128i*)ptr);
|
||||
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
|
||||
|
||||
if (mask)
|
||||
return (uintptr_t)(ptr + _tzcnt_u32(mask) - begin);
|
||||
|
||||
_mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
|
||||
}
|
||||
}
|
||||
#else
|
||||
size_t ImStrlen(const char* str)
|
||||
{
|
||||
return strlen(str);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined IMGUI_ENABLE_AVX2_IMMEMCHR
|
||||
const void* ImMemchr(const void* buf, int val, size_t count)
|
||||
{
|
||||
const size_t SIMD_LENGTH = 32;
|
||||
const size_t SIMD_LENGTH_MASK = SIMD_LENGTH - 1;
|
||||
|
||||
const unsigned char* ptr = (const unsigned char*)buf;
|
||||
const unsigned char* end = ptr + count;
|
||||
const unsigned char* align_end = end - SIMD_LENGTH;
|
||||
const unsigned char ch = (const unsigned char)val;
|
||||
|
||||
if (ptr <= align_end)
|
||||
{
|
||||
const __m256i target = _mm256_set1_epi8(ch);
|
||||
|
||||
if ((uintptr_t)ptr & SIMD_LENGTH_MASK)
|
||||
{
|
||||
__m256i chunk = _mm256_lddqu_si256((const __m256i*)ptr);
|
||||
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
|
||||
|
||||
if (mask)
|
||||
return (const void*)(ptr + _tzcnt_u32(mask));
|
||||
|
||||
ptr = (const unsigned char*)_andn_u64(SIMD_LENGTH_MASK, (uintptr_t)ptr + SIMD_LENGTH_MASK);
|
||||
}
|
||||
|
||||
for (; ptr <= align_end; ptr += SIMD_LENGTH)
|
||||
{
|
||||
__m256i chunk = _mm256_load_si256((const __m256i*)ptr);
|
||||
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
|
||||
|
||||
if (mask)
|
||||
return (const void*)(ptr + _tzcnt_u32(mask));
|
||||
|
||||
if (ptr <= end - 1024)
|
||||
_mm_prefetch((const char*)(ptr + 1024), _MM_HINT_T0);
|
||||
}
|
||||
}
|
||||
|
||||
for (; ptr < end; ptr++)
|
||||
{
|
||||
if (*ptr == ch)
|
||||
return (const void*)(ptr);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
#elif defined IMGUI_ENABLE_SSE_IMMEMCHR
|
||||
const void* ImMemchr(const void* buf, int val, size_t count)
|
||||
{
|
||||
const size_t SIMD_LENGTH = 16;
|
||||
const size_t SIMD_LENGTH_MASK = SIMD_LENGTH - 1;
|
||||
|
||||
const unsigned char* ptr = (const unsigned char*)buf;
|
||||
const unsigned char* end = ptr + count;
|
||||
const unsigned char* align_end = end - SIMD_LENGTH;
|
||||
const unsigned char ch = (const unsigned char)val;
|
||||
|
||||
if (ptr <= align_end)
|
||||
{
|
||||
const __m128i target = _mm_set1_epi8(ch);
|
||||
|
||||
if ((uintptr_t)ptr & SIMD_LENGTH_MASK)
|
||||
{
|
||||
__m128i chunk = _mm_lddqu_si128((const __m128i*)ptr);
|
||||
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
|
||||
|
||||
if (mask)
|
||||
return (const void*)(ptr + _tzcnt_u32(mask));
|
||||
|
||||
ptr = (const unsigned char*)(((uintptr_t)ptr + SIMD_LENGTH_MASK) & ~SIMD_LENGTH_MASK);
|
||||
}
|
||||
|
||||
for (; ptr <= align_end; ptr += SIMD_LENGTH)
|
||||
{
|
||||
__m128i chunk = _mm_load_si128((const __m128i*)ptr);
|
||||
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
|
||||
|
||||
if (mask)
|
||||
return (const void*)(ptr + _tzcnt_u32(mask));
|
||||
|
||||
if (ptr <= end - 1024)
|
||||
_mm_prefetch((const char*)(ptr + 1024), _MM_HINT_T0);
|
||||
}
|
||||
}
|
||||
|
||||
for (; ptr < end; ptr++)
|
||||
{
|
||||
if (*ptr == ch)
|
||||
return (const void*)(ptr);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
#else
|
||||
const void* ImMemchr(const void* buf, int val, size_t count)
|
||||
{
|
||||
return memchr(buf, val, count);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Consider using _stricmp/_strnicmp under Windows or strcasecmp/strncasecmp. We don't actually use either ImStricmp/ImStrnicmp in the codebase any more.
|
||||
int ImStricmp(const char* str1, const char* str2)
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue