Comments (1)
Huh, this is interesting. A standalone benchmark of my two C versions shows the newer one as slightly slower and almost double the instructions.
old (~0.137s and 2,282,158,658 instructions):
#include <stdio.h>
#include <stdint.h>
typedef struct pair { char t, o; } pair;
#define P(T) T, '0', T, '1', T, '2', T, '3', T, '4', T, '5', T, '6', T, '7', T, '8', T, '9'
static const pair s_pairs[] = { P('0'), P('1'), P('2'), P('3'), P('4'), P('5'), P('6'), P('7'), P('8'), P('9') };
#define W(N, I) *(pair*)&b[N] = s_pairs[I]
#define A(N) t = ((uint64_t)(1) << (32 + N / 5 * N * 53 / 16)) / (uint32_t)(1e##N) + 1 + N/6 - N/8, t *= u, t >>= N / 5 * N * 53 / 16, t += N / 6 * 4, W(0, t >> 32)
#define S(N) b[N] = (char)((uint64_t)(10) * (uint32_t)(t) >> 32) + '0'
#define D(N) t = (uint64_t)(100) * (uint32_t)(t), W(N, t >> 32)
#define L0 b[0] = (char)(u) + '0'
#define L1 W(0, u)
#define L2 A(1), S(2)
#define L3 A(2), D(2)
#define L4 A(3), D(2), S(4)
#define L5 A(4), D(2), D(4)
#define L6 A(5), D(2), D(4), S(6)
#define L7 A(6), D(2), D(4), D(6)
#define L8 A(7), D(2), D(4), D(6), S(8)
#define L9 A(8), D(2), D(4), D(6), D(8)
#define LN(N) (L##N, b += N + 1)
#define LZ LN
#define LG(F) (u<100 ? u<10 ? F(0) : F(1) : u<1000000 ? u<10000 ? u<1000 ? F(2) : F(3) : u<100000 ? F(4) : F(5) : u<100000000 ? u<10000000 ? F(6) : F(7) : u<1000000000 ? F(8) : F(9))
static char * u64toa_jeaiii(uint64_t n, char* b) {
uint32_t u;
uint64_t t;
if ((uint32_t)(n >> 32) == 0)
return u = (uint32_t)(n), LG(LZ);
uint64_t a = n / 100000000;
if ((uint32_t)(a >> 32) == 0) {
u = (uint32_t)(a);
LG(LN);
}
else {
u = (uint32_t)(a / 100000000);
LG(LN);
u = a % 100000000;
LN(7);
}
u = n % 100000000;
return LZ(7);
}
void main(void) {
char work[20];
long sum = 0;
for (unsigned int i = 0; i < 100000000; i++) {
u64toa_jeaiii(i, work);
sum += work[0];
}
printf("sum = %ld\n", sum);
}
new (~0.188s and 4,094,107,679 instructions):
#include <stdio.h>
#include <stdint.h>
struct pair
{
char dd[2];
};
static const struct pair dd[100] =
{
{'0','0'}, {'0','1'}, {'0','2'}, {'0','3'}, {'0','4'}, {'0','5'}, {'0','6'}, {'0','7'}, {'0','8'}, {'0','9'},
{'1','0'}, {'1','1'}, {'1','2'}, {'1','3'}, {'1','4'}, {'1','5'}, {'1','6'}, {'1','7'}, {'1','8'}, {'1','9'},
{'2','0'}, {'2','1'}, {'2','2'}, {'2','3'}, {'2','4'}, {'2','5'}, {'2','6'}, {'2','7'}, {'2','8'}, {'2','9'},
{'3','0'}, {'3','1'}, {'3','2'}, {'3','3'}, {'3','4'}, {'3','5'}, {'3','6'}, {'3','7'}, {'3','8'}, {'3','9'},
{'4','0'}, {'4','1'}, {'4','2'}, {'4','3'}, {'4','4'}, {'4','5'}, {'4','6'}, {'4','7'}, {'4','8'}, {'4','9'},
{'5','0'}, {'5','1'}, {'5','2'}, {'5','3'}, {'5','4'}, {'5','5'}, {'5','6'}, {'5','7'}, {'5','8'}, {'5','9'},
{'6','0'}, {'6','1'}, {'6','2'}, {'6','3'}, {'6','4'}, {'6','5'}, {'6','6'}, {'6','7'}, {'6','8'}, {'6','9'},
{'7','0'}, {'7','1'}, {'7','2'}, {'7','3'}, {'7','4'}, {'7','5'}, {'7','6'}, {'7','7'}, {'7','8'}, {'7','9'},
{'8','0'}, {'8','1'}, {'8','2'}, {'8','3'}, {'8','4'}, {'8','5'}, {'8','6'}, {'8','7'}, {'8','8'}, {'8','9'},
{'9','0'}, {'9','1'}, {'9','2'}, {'9','3'}, {'9','4'}, {'9','5'}, {'9','6'}, {'9','7'}, {'9','8'}, {'9','9'},
};
static const struct pair fd[100] =
{
{'0','\0'}, {'1','\0'}, {'2','\0'}, {'3','\0'}, {'4','\0'}, {'5','\0'}, {'6','\0'}, {'7','\0'}, {'8','\0'}, {'9','\0'},
{'1','0'}, {'1','1'}, {'1','2'}, {'1','3'}, {'1','4'}, {'1','5'}, {'1','6'}, {'1','7'}, {'1','8'}, {'1','9'},
{'2','0'}, {'2','1'}, {'2','2'}, {'2','3'}, {'2','4'}, {'2','5'}, {'2','6'}, {'2','7'}, {'2','8'}, {'2','9'},
{'3','0'}, {'3','1'}, {'3','2'}, {'3','3'}, {'3','4'}, {'3','5'}, {'3','6'}, {'3','7'}, {'3','8'}, {'3','9'},
{'4','0'}, {'4','1'}, {'4','2'}, {'4','3'}, {'4','4'}, {'4','5'}, {'4','6'}, {'4','7'}, {'4','8'}, {'4','9'},
{'5','0'}, {'5','1'}, {'5','2'}, {'5','3'}, {'5','4'}, {'5','5'}, {'5','6'}, {'5','7'}, {'5','8'}, {'5','9'},
{'6','0'}, {'6','1'}, {'6','2'}, {'6','3'}, {'6','4'}, {'6','5'}, {'6','6'}, {'6','7'}, {'6','8'}, {'6','9'},
{'7','0'}, {'7','1'}, {'7','2'}, {'7','3'}, {'7','4'}, {'7','5'}, {'7','6'}, {'7','7'}, {'7','8'}, {'7','9'},
{'8','0'}, {'8','1'}, {'8','2'}, {'8','3'}, {'8','4'}, {'8','5'}, {'8','6'}, {'8','7'}, {'8','8'}, {'8','9'},
{'9','0'}, {'9','1'}, {'9','2'}, {'9','3'}, {'9','4'}, {'9','5'}, {'9','6'}, {'9','7'}, {'9','8'}, {'9','9'},
};
static const uint64_t mask24 = ((uint64_t)(1) << 24) - 1;
static const uint64_t mask32 = ((uint64_t)(1) << 32) - 1;
static const uint64_t mask57 = ((uint64_t)(1) << 57) - 1;
static char * u64toa_jeaiii(uint64_t n, char* b) {
if (n < (uint32_t)(1e2))
{
*(struct pair *)(b) = fd[n];
return n < 10 ? b + 1 : b + 2;
}
if (n < (uint32_t)(1e6))
{
if (n < (uint32_t)(1e4))
{
uint32_t f0 = (uint32_t)(10 * (1 << 24) / 1e3 + 1) * n;
*(struct pair *)(b) = fd[f0 >> 24];
b -= n < (uint32_t)(1e3);
uint32_t f2 = (f0 & mask24) * 100;
*(struct pair *)(b + 2) = dd[f2 >> 24];
return b + 4;
}
uint64_t f0 = (uint64_t)(10 * (1ull << 32ull)/ 1e5 + 1) * n;
*(struct pair *)(b) = fd[f0 >> 32];
b -= n < (uint32_t)(1e5);
uint64_t f2 = (f0 & mask32) * 100;
*(struct pair *)(b + 2) = dd[f2 >> 32];
uint64_t f4 = (f2 & mask32) * 100;
*(struct pair *)(b + 4) = dd[f4 >> 32];
return b + 6;
}
if (n < (uint64_t)(1ull << 32ull))
{
if (n < (uint32_t)(1e8))
{
uint64_t f0 = (uint64_t)(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16;
*(struct pair *)(b) = fd[f0 >> 32];
b -= n < (uint32_t)(1e7);
uint64_t f2 = (f0 & mask32) * 100;
*(struct pair *)(b + 2) = dd[f2 >> 32];
uint64_t f4 = (f2 & mask32) * 100;
*(struct pair *)(b + 4) = dd[f4 >> 32];
uint64_t f6 = (f4 & mask32) * 100;
*(struct pair *)(b + 6) = dd[f6 >> 32];
return b + 8;
}
uint64_t f0 = (uint64_t)(10 * (1ull << 57ull) / 1e9 + 1) * n;
*(struct pair *)(b) = fd[f0 >> 57];
b -= n < (uint32_t)(1e9);
uint64_t f2 = (f0 & mask57) * 100;
*(struct pair *)(b + 2) = dd[f2 >> 57];
uint64_t f4 = (f2 & mask57) * 100;
*(struct pair *)(b + 4) = dd[f4 >> 57];
uint64_t f6 = (f4 & mask57) * 100;
*(struct pair *)(b + 6) = dd[f6 >> 57];
uint64_t f8 = (f6 & mask57) * 100;
*(struct pair *)(b + 8) = dd[f8 >> 57];
return b + 10;
}
// if we get here U must be (uint64_t) but some compilers don't know that, so reassign n to a (uint64_t) to avoid warnings
uint32_t z = n % (uint32_t)(1e8);
uint64_t u = n / (uint32_t)(1e8);
if (u < (uint32_t)(1e2))
{
// u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number)
*(struct pair *)(b) = dd[u];
b += 2;
}
else if (u < (uint32_t)(1e6))
{
if (u < (uint32_t)(1e4))
{
uint32_t f0 = (uint32_t)(10 * (1 << 24) / 1e3 + 1) * u;
*(struct pair *)(b) = fd[f0 >> 24];
b -= u < (uint32_t)(1e3);
uint32_t f2 = (f0 & mask24) * 100;
*(struct pair *)(b + 2) = dd[f2 >> 24];
b += 4;
}
else
{
uint64_t f0 = (uint64_t)(10 * (1ull << 32ull) / 1e5 + 1) * u;
*(struct pair *)(b) = fd[f0 >> 32];
b -= u < (uint32_t)(1e5);
uint64_t f2 = (f0 & mask32) * 100;
*(struct pair *)(b + 2) = dd[f2 >> 32];
uint64_t f4 = (f2 & mask32) * 100;
*(struct pair *)(b + 4) = dd[f4 >> 32];
b += 6;
}
}
else if (u < (uint32_t)(1e8))
{
uint64_t f0 = (uint64_t)(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16;
*(struct pair *)(b) = fd[f0 >> 32];
b -= u < (uint32_t)(1e7);
uint64_t f2 = (f0 & mask32) * 100;
*(struct pair *)(b + 2) = dd[f2 >> 32];
uint64_t f4 = (f2 & mask32) * 100;
*(struct pair *)(b + 4) = dd[f4 >> 32];
uint64_t f6 = (f4 & mask32) * 100;
*(struct pair *)(b + 6) = dd[f6 >> 32];
b += 8;
}
else if (u < (uint64_t)(1ull << 32ull))
{
uint64_t f0 = (uint64_t)(10 * (1ull << 57ull) / 1e9 + 1) * u;
*(struct pair *)(b) = fd[f0 >> 57];
b -= u < (uint32_t)(1e9);
uint64_t f2 = (f0 & mask57) * 100;
*(struct pair *)(b + 2) = dd[f2 >> 57];
uint64_t f4 = (f2 & mask57) * 100;
*(struct pair *)(b + 4) = dd[f4 >> 57];
uint64_t f6 = (f4 & mask57) * 100;
*(struct pair *)(b + 6) = dd[f6 >> 57];
uint64_t f8 = (f6 & mask57) * 100;
*(struct pair *)(b + 8) = dd[f8 >> 57];
b += 10;
}
else
{
uint32_t y = u % (uint32_t)(1e8);
u /= (uint32_t)(1e8);
// u is 2, 3, or 4 digits (if u < 10 it would have been handled above)
if (u < (uint32_t)(1e2))
{
*(struct pair *)(b) = dd[u];
b += 2;
}
else
{
uint32_t f0 = (uint32_t)(10 * (1 << 24) / 1e3 + 1) * u;
*(struct pair *)(b) = fd[f0 >> 24];
b -= u < (uint32_t)(1e3);
uint32_t f2 = (f0 & mask24) * 100;
*(struct pair *)(b + 2) = dd[f2 >> 24];
b += 4;
}
// do 8 digits
uint64_t f0 = ((uint64_t)((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1;
*(struct pair *)(b) = dd[f0 >> 32];
uint64_t f2 = (f0 & mask32) * 100;
*(struct pair *)(b + 2) = dd[f2 >> 32];
uint64_t f4 = (f2 & mask32) * 100;
*(struct pair *)(b + 4) = dd[f4 >> 32];
uint64_t f6 = (f4 & mask32) * 100;
*(struct pair *)(b + 6) = dd[f6 >> 32];
b += 8;
}
// do 8 digits
uint64_t f0 = ((uint64_t)((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1;
*(struct pair *)(b) = dd[f0 >> 32];
uint64_t f2 = (f0 & mask32) * 100;
*(struct pair *)(b + 2) = dd[f2 >> 32];
uint64_t f4 = (f2 & mask32) * 100;
*(struct pair *)(b + 4) = dd[f4 >> 32];
uint64_t f6 = (f4 & mask32) * 100;
*(struct pair *)(b + 6) = dd[f6 >> 32];
return b + 8;
}
void main(void) {
char work[20];
long sum = 0;
for (unsigned int i = 0; i < 100000000; i++) {
u64toa_jeaiii(i, work);
sum += work[0];
}
printf("sum = %ld\n", sum);
}
from itoa.
Related Issues (12)
- How hard would it be to provide a C version of jeaiii_to_text.h? HOT 2
- warnings emitted by msvc HOT 4
- Issue in integer to string conversion HOT 3
- Features: zero-padding and fixed-point HOT 2
- Versions for uint8_t, int8_t, uint16_t and int16_t HOT 1
- Clarify roles of the two implementations HOT 2
- Clang missing-braces
- FYI, we're using this in MoarVM HOT 1
- GCC warning due to ASCII art HOT 7
- I don't think `n` can ever be < 10 here
- remove usage of template variable `mask` to support c++11
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from itoa.