mirror of
https://github.com/ocornut/imgui.git
synced 2026-01-11 00:04:24 +00:00
Merge c67c2026fd into 25158fe33b
This commit is contained in:
commit
1dd40baf53
2 changed files with 279 additions and 7 deletions
247
imgui.cpp
247
imgui.cpp
|
|
@ -2080,6 +2080,253 @@ ImVec2 ImTriangleClosestPoint(const ImVec2& a, const ImVec2& b, const ImVec2& c,
|
||||||
// [SECTION] MISC HELPERS/UTILITIES (String, Format, Hash functions)
|
// [SECTION] MISC HELPERS/UTILITIES (String, Format, Hash functions)
|
||||||
//-----------------------------------------------------------------------------
|
//-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#if defined IMGUI_ENABLE_AVX2_IMSTRLEN
|
||||||
|
size_t ImStrlen(const char* str)
|
||||||
|
{
|
||||||
|
const size_t SIMD_LENGTH = 32;
|
||||||
|
const size_t SIMD_LENGTH_MASK = SIMD_LENGTH - 1;
|
||||||
|
|
||||||
|
const unsigned char* begin = (unsigned char*)str;
|
||||||
|
const unsigned char* ptr = begin;
|
||||||
|
|
||||||
|
// first page
|
||||||
|
{
|
||||||
|
const size_t PAGE_LENGTH = 4096;
|
||||||
|
const size_t PAGE_LENGTH_MASK = PAGE_LENGTH - 1;
|
||||||
|
|
||||||
|
const unsigned char* page_end = (const unsigned char*)_andn_u64(PAGE_LENGTH_MASK, (uintptr_t)ptr + PAGE_LENGTH_MASK);
|
||||||
|
const unsigned char* align_page_end = (const unsigned char*)(page_end - SIMD_LENGTH);
|
||||||
|
|
||||||
|
// if ptr is far the end of page
|
||||||
|
if (ptr <= align_page_end)
|
||||||
|
{
|
||||||
|
__m256i target = _mm256_setzero_si256();
|
||||||
|
|
||||||
|
// if ptr not aligned, align ptr to SIMD_LENGTH
|
||||||
|
if ((uintptr_t)ptr & SIMD_LENGTH_MASK)
|
||||||
|
{
|
||||||
|
__m256i chunk = _mm256_lddqu_si256((const __m256i*)ptr);
|
||||||
|
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
|
||||||
|
|
||||||
|
if (mask)
|
||||||
|
return (uintptr_t)(ptr - begin + _tzcnt_u32(mask));
|
||||||
|
|
||||||
|
ptr = (const unsigned char*)_andn_u64(SIMD_LENGTH_MASK, (uintptr_t)ptr + SIMD_LENGTH_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
// main loop of first page
|
||||||
|
for (; ptr <= align_page_end; ptr += SIMD_LENGTH)
|
||||||
|
{
|
||||||
|
__m256i chunk = _mm256_load_si256((const __m256i*)ptr);
|
||||||
|
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
|
||||||
|
|
||||||
|
if (mask)
|
||||||
|
return (uintptr_t)(ptr - begin + _tzcnt_u32(mask));
|
||||||
|
|
||||||
|
_mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// if ptr is near the end of page
|
||||||
|
for (; ptr < page_end; ptr++)
|
||||||
|
{
|
||||||
|
if (!(*ptr))
|
||||||
|
return (uintptr_t)(ptr - begin);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__m256i target = _mm256_setzero_si256();
|
||||||
|
|
||||||
|
// main loop
|
||||||
|
for (; ; ptr += SIMD_LENGTH)
|
||||||
|
{
|
||||||
|
__m256i chunk = _mm256_load_si256((const __m256i*)ptr);
|
||||||
|
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
|
||||||
|
|
||||||
|
if (mask)
|
||||||
|
return (uintptr_t)(ptr - begin + _tzcnt_u32(mask));
|
||||||
|
|
||||||
|
_mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#elif defined IMGUI_ENABLE_SSE_IMSTRLEN
|
||||||
|
size_t ImStrlen(const char* str)
|
||||||
|
{
|
||||||
|
const size_t SIMD_LENGTH = 16;
|
||||||
|
const size_t SIMD_LENGTH_MASK = SIMD_LENGTH - 1;
|
||||||
|
|
||||||
|
const unsigned char* begin = (unsigned char*)str;
|
||||||
|
const unsigned char* ptr = begin;
|
||||||
|
const unsigned char ch = '\0';
|
||||||
|
|
||||||
|
// first page
|
||||||
|
{
|
||||||
|
const size_t PAGE_LENGTH = 4096;
|
||||||
|
const size_t PAGE_LENGTH_MASK = PAGE_LENGTH - 1;
|
||||||
|
|
||||||
|
const unsigned char* page_end = (const unsigned char*)(((uintptr_t)ptr + PAGE_LENGTH_MASK) & ~PAGE_LENGTH_MASK);
|
||||||
|
const unsigned char* align_page_end = (const unsigned char*)(page_end - SIMD_LENGTH);
|
||||||
|
|
||||||
|
// if ptr is far the end of page
|
||||||
|
if (ptr <= align_page_end)
|
||||||
|
{
|
||||||
|
__m128i target = _mm_set1_epi8(ch);
|
||||||
|
|
||||||
|
// if ptr not aligned, align ptr to SIMD_LENGTH
|
||||||
|
if ((uintptr_t)ptr & SIMD_LENGTH_MASK)
|
||||||
|
{
|
||||||
|
__m128i chunk = _mm_lddqu_si128((const __m128i*)ptr);
|
||||||
|
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
|
||||||
|
|
||||||
|
if (mask)
|
||||||
|
return (uintptr_t)(ptr + _tzcnt_u32(mask) - begin);
|
||||||
|
|
||||||
|
ptr = (const unsigned char*)(((uintptr_t)ptr + SIMD_LENGTH_MASK) & ~SIMD_LENGTH_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
// main loop of first page
|
||||||
|
for (; ptr <= align_page_end; ptr += SIMD_LENGTH)
|
||||||
|
{
|
||||||
|
__m128i chunk = _mm_load_si128((const __m128i*)ptr);
|
||||||
|
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
|
||||||
|
|
||||||
|
if (mask)
|
||||||
|
return (uintptr_t)(ptr + _tzcnt_u32(mask) - begin);
|
||||||
|
|
||||||
|
_mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// if ptr is near the end of page
|
||||||
|
for (; ptr < page_end; ptr++)
|
||||||
|
{
|
||||||
|
if (*ptr == ch)
|
||||||
|
return (uintptr_t)(ptr - begin);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__m128i target = _mm_set1_epi8(ch);
|
||||||
|
|
||||||
|
// main loop
|
||||||
|
for (; ; ptr += SIMD_LENGTH)
|
||||||
|
{
|
||||||
|
__m128i chunk = _mm_load_si128((const __m128i*)ptr);
|
||||||
|
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
|
||||||
|
|
||||||
|
if (mask)
|
||||||
|
return (uintptr_t)(ptr + _tzcnt_u32(mask) - begin);
|
||||||
|
|
||||||
|
_mm_prefetch((const char*)ptr + 1024, _MM_HINT_T0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
size_t ImStrlen(const char* str)
|
||||||
|
{
|
||||||
|
return strlen(str);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined IMGUI_ENABLE_AVX2_IMMEMCHR
|
||||||
|
const void* ImMemchr(const void* buf, int val, size_t count)
|
||||||
|
{
|
||||||
|
const size_t SIMD_LENGTH = 32;
|
||||||
|
const size_t SIMD_LENGTH_MASK = SIMD_LENGTH - 1;
|
||||||
|
|
||||||
|
const unsigned char* ptr = (const unsigned char*)buf;
|
||||||
|
const unsigned char* end = ptr + count;
|
||||||
|
const unsigned char* align_end = end - SIMD_LENGTH;
|
||||||
|
const unsigned char ch = (const unsigned char)val;
|
||||||
|
|
||||||
|
if (ptr <= align_end)
|
||||||
|
{
|
||||||
|
const __m256i target = _mm256_set1_epi8(ch);
|
||||||
|
|
||||||
|
if ((uintptr_t)ptr & SIMD_LENGTH_MASK)
|
||||||
|
{
|
||||||
|
__m256i chunk = _mm256_lddqu_si256((const __m256i*)ptr);
|
||||||
|
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
|
||||||
|
|
||||||
|
if (mask)
|
||||||
|
return (const void*)(ptr + _tzcnt_u32(mask));
|
||||||
|
|
||||||
|
ptr = (const unsigned char*)_andn_u64(SIMD_LENGTH_MASK, (uintptr_t)ptr + SIMD_LENGTH_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; ptr <= align_end; ptr += SIMD_LENGTH)
|
||||||
|
{
|
||||||
|
__m256i chunk = _mm256_load_si256((const __m256i*)ptr);
|
||||||
|
int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, target));
|
||||||
|
|
||||||
|
if (mask)
|
||||||
|
return (const void*)(ptr + _tzcnt_u32(mask));
|
||||||
|
|
||||||
|
if (ptr <= end - 1024)
|
||||||
|
_mm_prefetch((const char*)(ptr + 1024), _MM_HINT_T0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; ptr < end; ptr++)
|
||||||
|
{
|
||||||
|
if (*ptr == ch)
|
||||||
|
return (const void*)(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
#elif defined IMGUI_ENABLE_SSE_IMMEMCHR
|
||||||
|
const void* ImMemchr(const void* buf, int val, size_t count)
|
||||||
|
{
|
||||||
|
const size_t SIMD_LENGTH = 16;
|
||||||
|
const size_t SIMD_LENGTH_MASK = SIMD_LENGTH - 1;
|
||||||
|
|
||||||
|
const unsigned char* ptr = (const unsigned char*)buf;
|
||||||
|
const unsigned char* end = ptr + count;
|
||||||
|
const unsigned char* align_end = end - SIMD_LENGTH;
|
||||||
|
const unsigned char ch = (const unsigned char)val;
|
||||||
|
|
||||||
|
if (ptr <= align_end)
|
||||||
|
{
|
||||||
|
const __m128i target = _mm_set1_epi8(ch);
|
||||||
|
|
||||||
|
if ((uintptr_t)ptr & SIMD_LENGTH_MASK)
|
||||||
|
{
|
||||||
|
__m128i chunk = _mm_lddqu_si128((const __m128i*)ptr);
|
||||||
|
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
|
||||||
|
|
||||||
|
if (mask)
|
||||||
|
return (const void*)(ptr + _tzcnt_u32(mask));
|
||||||
|
|
||||||
|
ptr = (const unsigned char*)(((uintptr_t)ptr + SIMD_LENGTH_MASK) & ~SIMD_LENGTH_MASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; ptr <= align_end; ptr += SIMD_LENGTH)
|
||||||
|
{
|
||||||
|
__m128i chunk = _mm_load_si128((const __m128i*)ptr);
|
||||||
|
int mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, target));
|
||||||
|
|
||||||
|
if (mask)
|
||||||
|
return (const void*)(ptr + _tzcnt_u32(mask));
|
||||||
|
|
||||||
|
if (ptr <= end - 1024)
|
||||||
|
_mm_prefetch((const char*)(ptr + 1024), _MM_HINT_T0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; ptr < end; ptr++)
|
||||||
|
{
|
||||||
|
if (*ptr == ch)
|
||||||
|
return (const void*)(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
const void* ImMemchr(const void* buf, int val, size_t count)
|
||||||
|
{
|
||||||
|
return memchr(buf, val, count);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Consider using _stricmp/_strnicmp under Windows or strcasecmp/strncasecmp. We don't actually use either ImStricmp/ImStrnicmp in the codebase any more.
|
// Consider using _stricmp/_strnicmp under Windows or strcasecmp/strncasecmp. We don't actually use either ImStricmp/ImStrnicmp in the codebase any more.
|
||||||
int ImStricmp(const char* str1, const char* str2)
|
int ImStricmp(const char* str1, const char* str2)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -59,20 +59,45 @@ Index of this file:
|
||||||
#include <math.h> // sqrtf, fabsf, fmodf, powf, floorf, ceilf, cosf, sinf
|
#include <math.h> // sqrtf, fabsf, fmodf, powf, floorf, ceilf, cosf, sinf
|
||||||
#include <limits.h> // INT_MIN, INT_MAX
|
#include <limits.h> // INT_MIN, INT_MAX
|
||||||
|
|
||||||
// Enable SSE intrinsics if available
|
// Include compiler-specific intrinsics header
|
||||||
#if (defined __SSE__ || defined __x86_64__ || defined _M_X64 || (defined(_M_IX86_FP) && (_M_IX86_FP >= 1))) && !defined(IMGUI_DISABLE_SSE)
|
#if !defined(IMGUI_DISABLE_SIMD)
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
#include <intrin.h>
|
||||||
|
#elif defined(__GNUC__) || defined(__clang__)
|
||||||
|
#include <x86intrin.h>
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Enable SIMD x86-64 intrinsics if available
|
||||||
|
#if (defined __x86_64__ || defined _M_X64) && !defined(IMGUI_DISABLE_SIMD)
|
||||||
|
#if (defined __SSE__ || (defined(_M_IX86_FP) && (_M_IX86_FP >= 1))) && !defined(IMGUI_DISABLE_SSE)
|
||||||
#define IMGUI_ENABLE_SSE
|
#define IMGUI_ENABLE_SSE
|
||||||
#include <immintrin.h>
|
#endif
|
||||||
#if (defined __AVX__ || defined __SSE4_2__)
|
#if defined (__SSE4_2__) && !defined(IMGUI_DISABLE_SSE4_2)
|
||||||
#define IMGUI_ENABLE_SSE4_2
|
#define IMGUI_ENABLE_SSE4_2
|
||||||
#include <nmmintrin.h>
|
#endif
|
||||||
|
#if (defined __AVX__) && !defined(IMGUI_DISABLE_AVX)
|
||||||
|
#define IMGUI_ENABLE_AVX
|
||||||
|
#endif
|
||||||
|
#if (defined __AVX2__) && !defined(IMGUI_DISABLE_AVX2)
|
||||||
|
#define IMGUI_ENABLE_AVX2
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Emscripten has partial SSE 4.2 support where _mm_crc32_u32 is not available. See https://emscripten.org/docs/porting/simd.html#id11 and #8213
|
// Emscripten has partial SSE 4.2 support where _mm_crc32_u32 is not available. See https://emscripten.org/docs/porting/simd.html#id11 and #8213
|
||||||
#if defined(IMGUI_ENABLE_SSE4_2) && !defined(IMGUI_USE_LEGACY_CRC32_ADLER) && !defined(__EMSCRIPTEN__)
|
#if defined(IMGUI_ENABLE_SSE4_2) && !defined(IMGUI_USE_LEGACY_CRC32_ADLER) && !defined(__EMSCRIPTEN__)
|
||||||
#define IMGUI_ENABLE_SSE4_2_CRC
|
#define IMGUI_ENABLE_SSE4_2_CRC
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Only AVX2 supports integer and byte instructions for 256-bit registers. Implementation this on AVX1 is not possible.
|
||||||
|
#if defined(IMGUI_ENABLE_AVX2)
|
||||||
|
#define IMGUI_ENABLE_AVX2_IMSTRLEN
|
||||||
|
#define IMGUI_ENABLE_AVX2_IMMEMCHR
|
||||||
|
#elif defined(IMGUI_ENABLE_AVX) || defined(IMGUI_ENABLE_SSE)
|
||||||
|
#define IMGUI_ENABLE_SSE_IMSTRLEN
|
||||||
|
#define IMGUI_ENABLE_SSE_IMMEMCHR
|
||||||
|
#endif
|
||||||
|
|
||||||
// Visual Studio warnings
|
// Visual Studio warnings
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#pragma warning (push)
|
#pragma warning (push)
|
||||||
|
|
@ -383,8 +408,8 @@ inline int ImUpperPowerOfTwo(int v) { v--; v |= v >> 1;
|
||||||
inline unsigned int ImCountSetBits(unsigned int v) { unsigned int count = 0; while (v > 0) { v = v & (v - 1); count++; } return count; }
|
inline unsigned int ImCountSetBits(unsigned int v) { unsigned int count = 0; while (v > 0) { v = v & (v - 1); count++; } return count; }
|
||||||
|
|
||||||
// Helpers: String
|
// Helpers: String
|
||||||
#define ImStrlen strlen
|
IMGUI_API size_t ImStrlen(const char* str); // Compute the length of a null-terminated string.
|
||||||
#define ImMemchr memchr
|
IMGUI_API const void* ImMemchr(const void* buf, int val, size_t count); // Find first occurrence of 'val' in buffer given length.
|
||||||
IMGUI_API int ImStricmp(const char* str1, const char* str2); // Case insensitive compare.
|
IMGUI_API int ImStricmp(const char* str1, const char* str2); // Case insensitive compare.
|
||||||
IMGUI_API int ImStrnicmp(const char* str1, const char* str2, size_t count); // Case insensitive compare to a certain count.
|
IMGUI_API int ImStrnicmp(const char* str1, const char* str2, size_t count); // Case insensitive compare to a certain count.
|
||||||
IMGUI_API void ImStrncpy(char* dst, const char* src, size_t count); // Copy to a certain count and always zero terminate (strncpy doesn't).
|
IMGUI_API void ImStrncpy(char* dst, const char* src, size_t count); // Copy to a certain count and always zero terminate (strncpy doesn't).
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue