1
0
Fork 0
mirror of https://github.com/ocornut/imgui.git synced 2026-01-11 00:04:24 +00:00
This commit is contained in:
Kraionix 2026-01-04 18:52:26 +01:00 committed by GitHub
commit 1dd40baf53
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 279 additions and 7 deletions

247
imgui.cpp
View file

@ -2080,6 +2080,253 @@ ImVec2 ImTriangleClosestPoint(const ImVec2& a, const ImVec2& b, const ImVec2& c,
// [SECTION] MISC HELPERS/UTILITIES (String, Format, Hash functions) // [SECTION] MISC HELPERS/UTILITIES (String, Format, Hash functions)
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
#if defined IMGUI_ENABLE_AVX2_IMSTRLEN
size_t ImStrlen(const char* str)
{
const size_t SIMD_LENGTH = 32;
const size_t SIMD_LENGTH_MASK = SIMD_LENGTH - 1;
const unsigned char* begin = (unsigned char*)str;
const unsigned char* ptr = begin;
// first page
{
const size_t PAGE_LENGTH = 4096;
const size_t PAGE_LENGTH_MASK = PAGE_LENGTH - 1;
const unsigned char* page_end = (const unsigned char*)_andn_u64(PAGE_LENGTH_MASK, (uintptr_t)ptr + PAGE_LENGTH_MASK);
const unsigned char* align_page_end = (const unsigned char*)(page_end - SIMD_LENGTH);
// if ptr is far the end of page
if (ptr <= align_page_end)
{
__m256i target = _mm256_setzero_si256();
// if ptr not aligned, align ptr to SIMD_LENGTH
if ((uintptr_t)ptr & SIMD_LENGTH_MASK)
{
__m256i chunk = _mm256_lddqu_si256((const __m256i*)ptr);
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
if (mask)
return (uintptr_t)(ptr - begin + _tzcnt_u32(mask));
ptr = (const unsigned char*)_andn_u64(SIMD_LENGTH_MASK, (uintptr_t)ptr + SIMD_LENGTH_MASK);
}
// main loop of first page
for (; ptr <= align_page_end; ptr += SIMD_LENGTH)
{
__m256i chunk = _mm256_load_si256((const __m256i*)ptr);
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
if (mask)
return (uintptr_t)(ptr - begin + _tzcnt_u32(mask));
_mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
}
}
// if ptr is near the end of page
for (; ptr < page_end; ptr++)
{
if (!(*ptr))
return (uintptr_t)(ptr - begin);
}
}
__m256i target = _mm256_setzero_si256();
// main loop
for (; ; ptr += SIMD_LENGTH)
{
__m256i chunk = _mm256_load_si256((const __m256i*)ptr);
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
if (mask)
return (uintptr_t)(ptr - begin + _tzcnt_u32(mask));
_mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
}
}
#elif defined IMGUI_ENABLE_SSE_IMSTRLEN
size_t ImStrlen(const char* str)
{
const size_t SIMD_LENGTH = 16;
const size_t SIMD_LENGTH_MASK = SIMD_LENGTH - 1;
const unsigned char* begin = (unsigned char*)str;
const unsigned char* ptr = begin;
const unsigned char ch = '\0';
// first page
{
const size_t PAGE_LENGTH = 4096;
const size_t PAGE_LENGTH_MASK = PAGE_LENGTH - 1;
const unsigned char* page_end = (const unsigned char*)(((uintptr_t)ptr + PAGE_LENGTH_MASK) & ~PAGE_LENGTH_MASK);
const unsigned char* align_page_end = (const unsigned char*)(page_end - SIMD_LENGTH);
// if ptr is far the end of page
if (ptr <= align_page_end)
{
__m128i target = _mm_set1_epi8(ch);
// if ptr not aligned, align ptr to SIMD_LENGTH
if ((uintptr_t)ptr & SIMD_LENGTH_MASK)
{
__m128i chunk = _mm_lddqu_si128((const __m128i*)ptr);
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
if (mask)
return (uintptr_t)(ptr + _tzcnt_u32(mask) - begin);
ptr = (const unsigned char*)(((uintptr_t)ptr + SIMD_LENGTH_MASK) & ~SIMD_LENGTH_MASK);
}
// main loop of first page
for (; ptr <= align_page_end; ptr += SIMD_LENGTH)
{
__m128i chunk = _mm_load_si128((const __m128i*)ptr);
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
if (mask)
return (uintptr_t)(ptr + _tzcnt_u32(mask) - begin);
_mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
}
}
// if ptr is near the end of page
for (; ptr < page_end; ptr++)
{
if (*ptr == ch)
return (uintptr_t)(ptr - begin);
}
}
__m128i target = _mm_set1_epi8(ch);
// main loop
for (; ; ptr += SIMD_LENGTH)
{
__m128i chunk = _mm_load_si128((const __m128i*)ptr);
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
if (mask)
return (uintptr_t)(ptr + _tzcnt_u32(mask) - begin);
_mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
}
}
#else
size_t ImStrlen(const char* str)
{
return strlen(str);
}
#endif
#if defined IMGUI_ENABLE_AVX2_IMMEMCHR
const void* ImMemchr(const void* buf, int val, size_t count)
{
const size_t SIMD_LENGTH = 32;
const size_t SIMD_LENGTH_MASK = SIMD_LENGTH - 1;
const unsigned char* ptr = (const unsigned char*)buf;
const unsigned char* end = ptr + count;
const unsigned char* align_end = end - SIMD_LENGTH;
const unsigned char ch = (const unsigned char)val;
if (ptr <= align_end)
{
const __m256i target = _mm256_set1_epi8(ch);
if ((uintptr_t)ptr & SIMD_LENGTH_MASK)
{
__m256i chunk = _mm256_lddqu_si256((const __m256i*)ptr);
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
if (mask)
return (const void*)(ptr + _tzcnt_u32(mask));
ptr = (const unsigned char*)_andn_u64(SIMD_LENGTH_MASK, (uintptr_t)ptr + SIMD_LENGTH_MASK);
}
for (; ptr <= align_end; ptr += SIMD_LENGTH)
{
__m256i chunk = _mm256_load_si256((const __m256i*)ptr);
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
if (mask)
return (const void*)(ptr + _tzcnt_u32(mask));
if (ptr <= end - 1024)
_mm_prefetch((const char*)(ptr + 1024), _MM_HINT_T0);
}
}
for (; ptr < end; ptr++)
{
if (*ptr == ch)
return (const void*)(ptr);
}
return nullptr;
}
#elif defined IMGUI_ENABLE_SSE_IMMEMCHR
const void* ImMemchr(const void* buf, int val, size_t count)
{
const size_t SIMD_LENGTH = 16;
const size_t SIMD_LENGTH_MASK = SIMD_LENGTH - 1;
const unsigned char* ptr = (const unsigned char*)buf;
const unsigned char* end = ptr + count;
const unsigned char* align_end = end - SIMD_LENGTH;
const unsigned char ch = (const unsigned char)val;
if (ptr <= align_end)
{
const __m128i target = _mm_set1_epi8(ch);
if ((uintptr_t)ptr & SIMD_LENGTH_MASK)
{
__m128i chunk = _mm_lddqu_si128((const __m128i*)ptr);
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
if (mask)
return (const void*)(ptr + _tzcnt_u32(mask));
ptr = (const unsigned char*)(((uintptr_t)ptr + SIMD_LENGTH_MASK) & ~SIMD_LENGTH_MASK);
}
for (; ptr <= align_end; ptr += SIMD_LENGTH)
{
__m128i chunk = _mm_load_si128((const __m128i*)ptr);
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
if (mask)
return (const void*)(ptr + _tzcnt_u32(mask));
if (ptr <= end - 1024)
_mm_prefetch((const char*)(ptr + 1024), _MM_HINT_T0);
}
}
for (; ptr < end; ptr++)
{
if (*ptr == ch)
return (const void*)(ptr);
}
return nullptr;
}
#else
const void* ImMemchr(const void* buf, int val, size_t count)
{
return memchr(buf, val, count);
}
#endif
// Consider using _stricmp/_strnicmp under Windows or strcasecmp/strncasecmp. We don't actually use either ImStricmp/ImStrnicmp in the codebase any more. // Consider using _stricmp/_strnicmp under Windows or strcasecmp/strncasecmp. We don't actually use either ImStricmp/ImStrnicmp in the codebase any more.
int ImStricmp(const char* str1, const char* str2) int ImStricmp(const char* str1, const char* str2)
{ {

View file

@ -59,20 +59,45 @@ Index of this file:
#include <math.h> // sqrtf, fabsf, fmodf, powf, floorf, ceilf, cosf, sinf #include <math.h> // sqrtf, fabsf, fmodf, powf, floorf, ceilf, cosf, sinf
#include <limits.h> // INT_MIN, INT_MAX #include <limits.h> // INT_MIN, INT_MAX
// Enable SSE intrinsics if available // Include compiler-specific intrinsics header
#if (defined __SSE__ || defined __x86_64__ || defined _M_X64 || (defined(_M_IX86_FP) && (_M_IX86_FP >= 1))) && !defined(IMGUI_DISABLE_SSE) #if !defined(IMGUI_DISABLE_SIMD)
#if defined(_MSC_VER)
#include <intrin.h>
#elif defined(__GNUC__) || defined(__clang__)
#include <x86intrin.h>
#endif
#endif
// Enable SIMD x86-64 intrinsics if available
#if (defined __x86_64__ || defined _M_X64) && !defined(IMGUI_DISABLE_SIMD)
#if (defined __SSE__ || (defined(_M_IX86_FP) && (_M_IX86_FP >= 1))) && !defined(IMGUI_DISABLE_SSE)
#define IMGUI_ENABLE_SSE #define IMGUI_ENABLE_SSE
#include <immintrin.h> #endif
#if (defined __AVX__ || defined __SSE4_2__) #if defined (__SSE4_2__) && !defined(IMGUI_DISABLE_SSE4_2)
#define IMGUI_ENABLE_SSE4_2 #define IMGUI_ENABLE_SSE4_2
#include <nmmintrin.h> #endif
#if (defined __AVX__) && !defined(IMGUI_DISABLE_AVX)
#define IMGUI_ENABLE_AVX
#endif
#if (defined __AVX2__) && !defined(IMGUI_DISABLE_AVX2)
#define IMGUI_ENABLE_AVX2
#endif #endif
#endif #endif
// Emscripten has partial SSE 4.2 support where _mm_crc32_u32 is not available. See https://emscripten.org/docs/porting/simd.html#id11 and #8213 // Emscripten has partial SSE 4.2 support where _mm_crc32_u32 is not available. See https://emscripten.org/docs/porting/simd.html#id11 and #8213
#if defined(IMGUI_ENABLE_SSE4_2) && !defined(IMGUI_USE_LEGACY_CRC32_ADLER) && !defined(__EMSCRIPTEN__) #if defined(IMGUI_ENABLE_SSE4_2) && !defined(IMGUI_USE_LEGACY_CRC32_ADLER) && !defined(__EMSCRIPTEN__)
#define IMGUI_ENABLE_SSE4_2_CRC #define IMGUI_ENABLE_SSE4_2_CRC
#endif #endif
// Only AVX2 supports integer and byte instructions for 256-bit registers. Implementation this on AVX1 is not possible.
#if defined(IMGUI_ENABLE_AVX2)
#define IMGUI_ENABLE_AVX2_IMSTRLEN
#define IMGUI_ENABLE_AVX2_IMMEMCHR
#elif defined(IMGUI_ENABLE_AVX) || defined(IMGUI_ENABLE_SSE)
#define IMGUI_ENABLE_SSE_IMSTRLEN
#define IMGUI_ENABLE_SSE_IMMEMCHR
#endif
// Visual Studio warnings // Visual Studio warnings
#ifdef _MSC_VER #ifdef _MSC_VER
#pragma warning (push) #pragma warning (push)
@ -383,8 +408,8 @@ inline int ImUpperPowerOfTwo(int v) { v--; v |= v >> 1;
inline unsigned int ImCountSetBits(unsigned int v) { unsigned int count = 0; while (v > 0) { v = v & (v - 1); count++; } return count; } inline unsigned int ImCountSetBits(unsigned int v) { unsigned int count = 0; while (v > 0) { v = v & (v - 1); count++; } return count; }
// Helpers: String // Helpers: String
#define ImStrlen strlen IMGUI_API size_t ImStrlen(const char* str); // Compute the length of a null-terminated string.
#define ImMemchr memchr IMGUI_API const void* ImMemchr(const void* buf, int val, size_t count); // Find first occurrence of 'val' in buffer given length.
IMGUI_API int ImStricmp(const char* str1, const char* str2); // Case insensitive compare. IMGUI_API int ImStricmp(const char* str1, const char* str2); // Case insensitive compare.
IMGUI_API int ImStrnicmp(const char* str1, const char* str2, size_t count); // Case insensitive compare to a certain count. IMGUI_API int ImStrnicmp(const char* str1, const char* str2, size_t count); // Case insensitive compare to a certain count.
IMGUI_API void ImStrncpy(char* dst, const char* src, size_t count); // Copy to a certain count and always zero terminate (strncpy doesn't). IMGUI_API void ImStrncpy(char* dst, const char* src, size_t count); // Copy to a certain count and always zero terminate (strncpy doesn't).