1 Star 2 Fork 0

Asciphx/itoa

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
i2a.hpp 10.04 KB
一键复制 编辑 原始数据 按行查看 历史
Asciphx 提交于 2023-12-31 08:15 . upgrade
#ifndef I2A_HPP
#define I2A_HPP
/*
* This software is licensed under the AGPL-3.0 License.
*
* Copyright (C) 2023 Asciphx
*
* Permissions of this strongest copyleft license are conditioned on making
* available complete source code of licensed works and modifications, which
* include larger works using a licensed work, under the same license. Copyright
* and license notices must be preserved. Contributors provide an express grant
* of patent rights. When a modified version is used to provide a service over a
* network, the complete source code of the modified version must be made
* available.
*/
// from https://github.com/asciphx/Nod/blob/main/fc/include/hpp/i2a.hpp
#include <immintrin.h>
#include <stdint.h>
#ifdef _MSVC_LANG
#define _INLINE __forceinline
#define __ALIGN(s) __declspec(align(s))
#define _likely(x) x
#include <intrin0.h>
#else
#define _likely(x) __builtin_expect(!!(x),1)
#define __ALIGN(s) __attribute__((aligned(s)))
#define _INLINE inline __attribute__((always_inline))
#endif
static __ALIGN(16) const unsigned int K_Div0x2710[4] = { 0xD1B71759, 0xD1B71759, 0xD1B71759, 0xD1B71759 };
static __ALIGN(16) const unsigned int K_2710[4] = { 0x2710, 0x2710, 10000, 0x2710 };
static __ALIGN(16) const uint16_t K_DivPowers[8] = { 0x20c5, 5243, 0x3334, 32768, 8389, 0x147B, 13108, 0x8000 };
static __ALIGN(16) const uint16_t K_ShiftPowers[8] = { 0x0080, 0x0800, 0x2000, 0x8000, 128, 2048, 8192, 32768 };
static __ALIGN(16) const uint16_t K_10[8] = { 10, 10, 10, 10, 10, 10, 10, 10 };
static __ALIGN(16) const char K_Ascii0[16] = { 0x30, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, '0' };
_INLINE __m128i U2ASSE(unsigned int v) {
const __m128i A = _mm_cvtsi32_si128(v); const __m128i B = _mm_srli_epi64(_mm_mul_epu32(A, *(const __m128i *)(K_Div0x2710)), 45);
__m128i C = _mm_sub_epi32(A, _mm_mul_epu32(B, reinterpret_cast<const __m128i*>(K_2710)[0]));
C = _mm_unpacklo_epi16(B, C); C = _mm_slli_epi64(C, 2); C = _mm_unpacklo_epi16(C, C);
C = _mm_unpacklo_epi32(C, C); C = _mm_mulhi_epu16(C, reinterpret_cast<const __m128i*>(K_DivPowers)[0]);
const __m128i v4 = _mm_mulhi_epu16(C, reinterpret_cast<const __m128i*>(K_ShiftPowers)[0]);
C = _mm_mullo_epi16(v4,*(const __m128i *)(K_10)); C = _mm_slli_epi64(C, 16); C = _mm_sub_epi16(v4, C); return C;
};
_INLINE __m128i ShiftDigitsSSE2(__m128i _, unsigned d) {
switch (d) {
case 0: return _; case 1: return _mm_srli_si128(_, 1);
case 2: return _mm_srli_si128(_, 2); case 3: return _mm_srli_si128(_, 3);
case 4: return _mm_srli_si128(_, 4); case 5: return _mm_srli_si128(_, 5);
case 6: return _mm_srli_si128(_, 6); case 7: return _mm_srli_si128(_, 7);
case 8: return _mm_srli_si128(_, 8);
}
return _;
};
static __ALIGN(2) const char _c1DigitsLut[100][2] = {
{'0','\0'},{49,'\0'},{50,'\0'},{51,0},{52,0},{53,0},{54,0},{55,0},{56,0},{57,0},{49,48},{49,49},{49,50},{49,51},{49,52},{49,53},{49,54},{49,55},{49,56},{49,57}
,{50,48},{50,49},{50,50},{50,51},{50,52},{50,53},{50,54},{50,55},{50,56},{50,57},{51,48},{51,49},{51,50},{51,51},{51,52},{51,53},{51,54},{51,55},{51,56},{51,57}
,{52,48},{52,49},{52,50},{52,51},{52,52},{52,53},{52,54},{52,55},{52,56},{52,57},{53,48},{53,49},{53,50},{53,51},{53,52},{53,53},{53,54},{53,55},{53,56},{53,57}
,{54,48},{54,49},{54,50},{54,51},{54,52},{54,53},{54,54},{54,55},{54,56},{54,57},{55,48},{55,49},{55,50},{55,51},{55,52},{55,53},{55,54},{55,55},{55,56},{55,57}
,{56,48},{56,49},{56,50},{56,51},{56,52},{56,53},{56,54},{56,55},{56,56},{56,57},{57,48},{57,49},{57,50},{57,51},{57,52},{57,53},{57,54},{57,55},{57,56},{57,57}
};
static __ALIGN(2) const char _c2DigitsLut[100][2] = {
{48,48},{48,49},{48,50},{48,51},{48,52},{48,53},{48,54},{48,55},{48,56},{48,57},{49,48},{49,49},{49,50},{49,51},{49,52},{49,53},{49,54},{49,55},{49,56},{49,57}
,{50,48},{50,49},{50,50},{50,51},{50,52},{50,53},{50,54},{50,55},{50,56},{50,57},{51,48},{51,49},{51,50},{51,51},{51,52},{51,53},{51,54},{51,55},{51,56},{51,57}
,{52,48},{52,49},{52,50},{52,51},{52,52},{52,53},{52,54},{52,55},{52,56},{52,57},{53,48},{53,49},{53,50},{53,51},{53,52},{53,53},{53,54},{53,55},{53,56},{53,57}
,{54,48},{54,49},{54,50},{54,51},{54,52},{54,53},{54,54},{54,55},{54,56},{54,57},{55,48},{55,49},{55,50},{55,51},{55,52},{55,53},{55,54},{55,55},{55,56},{55,57}
,{56,48},{56,49},{56,50},{56,51},{56,52},{56,53},{56,54},{56,55},{56,56},{56,57},{57,48},{57,49},{57,50},{57,51},{57,52},{57,53},{57,54},{57,55},{57,56},{57,57}
};
// The fastest htoa fuction
_INLINE static char* h2a(char* c, unsigned char i) {
const char* r; if (i < 100) { r = _c2DigitsLut[i]; if (i > 9) *c++ = r[0]; *c = r[1]; return ++c; }
unsigned char u = i / 100; *c = u + 0x30; r = _c2DigitsLut[i -= u * 100]; *++c = r[0]; *++c = r[1]; return ++c;
}
// The fastest atoa fuction
_INLINE static char* a2a(char* c, char i) { if (i < 0) { *c = 45; return h2a(++c, ~--i); } return h2a(c, i); }
// The fastest ttoa fuction
_INLINE static char* t2a(char* c, unsigned short i) {
const char* r;
if (i < 10000) {
if (i < 100) { r = _c2DigitsLut[i]; if (i > 9) *c++ = r[0]; *c++ = r[1]; return c; }
unsigned char u = i / 100; r = _c2DigitsLut[u]; if (i > 999) *c++ = r[0]; *c++ = r[1]; i -= u * 100; r = _c2DigitsLut[i]; *c++ = r[0]; *c++ = r[1]; return c;
}
unsigned long long $ = 0x68dB9ULL * i; r = _c2DigitsLut[$ >> 0x20]; *c++ = r[1];
$ = ($ & 0Xffffffff) * 100; r = _c2DigitsLut[$ >> 0x20]; *c++ = r[0]; *c++ = r[1];
$ = ($ & 0Xffffffff) * 100; r = _c2DigitsLut[$ >> 0x20]; *c++ = r[0]; *c++ = r[1];
return c;
}
// The fastest stoa fuction
_INLINE static char* s2a(char* c, short i) { if (i < 0) { *c = 45; return t2a(++c, ~--i); } return t2a(c, i); }
// The fastest htoa fuction
_INLINE static char* u2a(char* c, unsigned int i) {
const char* r;
if (i < 10000) {
if (i < 100) { r = _c2DigitsLut[i]; if (i > 9) *c++ = r[0]; *c++ = r[1]; return c; }
unsigned int $ = 0X28F5DU * i; r = _c1DigitsLut[$ >> 0X18]; *c++ = r[0]; *c++ = r[1]; c -= i < 1000;
$ = ($ & 0xFFFFFF) * 100; r = _c2DigitsLut[$ >> 0X18]; *c++ = r[0]; *c++ = r[1]; return c;
} else if (i < 100000000) {
if (i < 1000000) {
unsigned long long $ = 0X68Db9ULL * i; r = _c1DigitsLut[$ >> 0x20]; *c++ = r[0]; *c++ = r[1]; c -= i < 100000;
$ = ($ & 0Xffffffff) * 100; r = _c2DigitsLut[$ >> 0x20]; *c++ = r[0]; *c++ = r[1];
$ = ($ & 0Xffffffff) * 100; r = _c2DigitsLut[$ >> 0x20]; *c++ = r[0]; *c++ = r[1]; return c;
}
unsigned long long $ = 0x10C6F7A1ULL * i >> 16; r = _c1DigitsLut[$ >> 0x20]; *c++ = r[0]; *c++ = r[1]; c -= i < 10000000;
$ = ($ & 0Xffffffff) * 100; r = _c2DigitsLut[$ >> 0x20]; *c++ = r[0]; *c++ = r[1];
$ = ($ & 0Xffffffff) * 100; r = _c2DigitsLut[$ >> 0x20]; *c++ = r[0]; *c++ = r[1];
$ = ($ & 0Xffffffff) * 100; r = _c2DigitsLut[$ >> 0x20]; *c++ = r[0]; *c++ = r[1]; return c;
} else {
unsigned long long $ = 1441151881ULL * i; r = _c1DigitsLut[$ >> 0x39]; *c++ = r[0]; *c++ = r[1]; c -= i < 1000000000;
$ = ($ & 0x1FFFFFFFFFFFFFF) * 100; r = _c2DigitsLut[$ >> 0x39]; *c++ = r[0]; *c++ = r[1];
$ = ($ & 0X1ffffffffffffff) * 100; r = _c2DigitsLut[$ >> 0x39]; *c++ = r[0]; *c++ = r[1];
$ = ($ & 0X1ffffffffffffff) * 100; r = _c2DigitsLut[$ >> 0x39]; *c++ = r[0]; *c++ = r[1];
$ = ($ & 0x1FFFFFFFFFFFFFF) * 100; r = _c2DigitsLut[$ >> 0x39]; *c++ = r[0]; *c++ = r[1]; return c;
}
}
// The fastest atoa fuction
_INLINE static char* i2a(char* c, int i) { if (i < 0) { *c = 45; return u2a(++c, ~--i); } return u2a(c, i); }
// The fastest u64toa fuction
_INLINE static char* u64toa(char* c, unsigned long long i) {
if (_likely(i < 100000000)) {
if (i < 10000) {
if (i < 100) { const char* r = _c2DigitsLut[i]; if (i > 9) *c++ = r[0]; *c++ = r[1]; return c; }
unsigned long long $ = 0X28F5DULL * i; const char* r = _c1DigitsLut[$ >> 0X18]; *c++ = r[0]; *c++ = r[1]; c -= i < 1000;
$ = ($ & 0xFFFFFF) * 100; r = _c2DigitsLut[$ >> 0X18]; *c++ = r[0]; *c++ = r[1]; return c;
}
if (i < 1000000) {
unsigned long long $ = 0X68Db9ULL * i; const char* r = _c1DigitsLut[$ >> 0x20]; *c++ = r[0]; *c++ = r[1]; c -= i < 100000;
$ = ($ & 0Xffffffff) * 100; r = _c2DigitsLut[$ >> 0x20]; *c++ = r[0]; *c++ = r[1];
$ = ($ & 0Xffffffff) * 100; r = _c2DigitsLut[$ >> 0x20]; *c++ = r[0]; *c++ = r[1]; return c;
}
unsigned long long $ = 0x10C6F7A1ULL * i >> 16; const char* r = _c1DigitsLut[$ >> 0x20]; *c++ = r[0]; *c++ = r[1]; c -= i < 10000000;
$ = ($ & 0Xffffffff) * 100; r = _c2DigitsLut[$ >> 0x20]; *c++ = r[0]; *c++ = r[1];
$ = ($ & 0Xffffffff) * 100; r = _c2DigitsLut[$ >> 0x20]; *c++ = r[0]; *c++ = r[1];
$ = ($ & 0Xffffffff) * 100; r = _c2DigitsLut[$ >> 0x20]; *c++ = r[0]; *c++ = r[1]; return c;
} else if (_likely(i < 10000000000000000)) {
const __m128i va = _mm_add_epi8(_mm_packus_epi16(U2ASSE(static_cast<unsigned int>(i / 100000000)),
U2ASSE(static_cast<unsigned int>(i % 100000000))), reinterpret_cast<const __m128i*>(K_Ascii0)[0]);
const unsigned mask = _mm_movemask_epi8(_mm_cmpeq_epi8(va, reinterpret_cast<const __m128i*>(K_Ascii0)[0]));
#ifdef _MSC_VER
unsigned long digit; _BitScanForward(&digit, ~mask | 0x8000);
#else
unsigned digit = __builtin_ctz(~mask | 0x8000);
#endif
_mm_storeu_si128(reinterpret_cast<__m128i*>(c), ShiftDigitsSSE2(va, digit)); // c[16 - digit] = '\0';
return c + 16 - digit;
} else {
const unsigned int z = static_cast<unsigned int>(i / 10000000000000000); i %= 10000000000000000;
if (z < 100) { if (z > 9) *c++ = _c2DigitsLut[z][0]; *c++ = _c2DigitsLut[z][1]; } else {
unsigned int $ = 0X28F5DU * z; const char* r = _c1DigitsLut[$ >> 0X18]; *c++ = r[0]; *c++ = r[1]; c -= z < 1000;
$ = ($ & 0xFFFFFF) * 100; r = _c2DigitsLut[$ >> 0X18]; *c++ = r[0]; *c++ = r[1];
}
_mm_storeu_si128(reinterpret_cast<__m128i*>(c), _mm_add_epi8(_mm_packus_epi16(U2ASSE(static_cast<unsigned int>(i / 100000000)),
U2ASSE(static_cast<unsigned int>(i % 100000000))), reinterpret_cast<const __m128i*>(K_Ascii0)[0])); // c[16] = '\0';
return c + 16;
}
}
// The fastest i64toa fuction
_INLINE static char* i64toa(char* c, long long i) { if (i < 0) { *c = 45; return u64toa(++c, ~--i); } return u64toa(c, i); }
#undef __ALIGN
#undef _INLINE
#endif // I2A_HPP
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
C
1
https://gitee.com/ASCIPHX/itoa.git
git@gitee.com:ASCIPHX/itoa.git
ASCIPHX
itoa
itoa
main

搜索帮助

D67c1975 1850385 1daf7b77 1850385