Giter Site home page Giter Site logo

C conversion/adaptation of jeaiii_to_text.h compared to C conversion/adaptation of itoa_jeaiii.cpp not as much faster as hoped about itoa HOT 1 OPEN

jeaiii avatar jeaiii commented on May 27, 2024
C conversion/adaptation of jeaiii_to_text.h compared to C conversion/adaptation of itoa_jeaiii.cpp not as much faster as hoped

from itoa.

Comments (1)

MasterDuke17 avatar MasterDuke17 commented on May 27, 2024

Huh, this is interesting. A standalone benchmark of my two C versions shows the newer one as slightly slower and almost double the instructions.

old (~0.137s and 2,282,158,658 instructions):

#include <stdio.h>
#include <stdint.h>

typedef struct pair { char t, o; } pair;
#define P(T) T, '0',  T, '1', T, '2', T, '3', T, '4', T, '5', T, '6', T, '7', T, '8', T, '9'
static const pair s_pairs[] = { P('0'), P('1'), P('2'), P('3'), P('4'), P('5'), P('6'), P('7'), P('8'), P('9') };

#define W(N, I) *(pair*)&b[N] = s_pairs[I]
#define A(N) t = ((uint64_t)(1) << (32 + N / 5 * N * 53 / 16)) / (uint32_t)(1e##N) + 1 + N/6 - N/8, t *= u, t >>= N / 5 * N * 53 / 16, t += N / 6 * 4, W(0, t >> 32)
#define S(N) b[N] = (char)((uint64_t)(10) * (uint32_t)(t) >> 32) + '0'
#define D(N) t = (uint64_t)(100) * (uint32_t)(t), W(N, t >> 32)

#define L0 b[0] = (char)(u) + '0'
#define L1 W(0, u)
#define L2 A(1), S(2)
#define L3 A(2), D(2)
#define L4 A(3), D(2), S(4)
#define L5 A(4), D(2), D(4)
#define L6 A(5), D(2), D(4), S(6)
#define L7 A(6), D(2), D(4), D(6)
#define L8 A(7), D(2), D(4), D(6), S(8)
#define L9 A(8), D(2), D(4), D(6), D(8)

#define LN(N) (L##N, b += N + 1)
#define LZ LN

#define LG(F) (u<100 ? u<10 ? F(0) : F(1) : u<1000000 ? u<10000 ? u<1000 ? F(2) : F(3) : u<100000 ? F(4) : F(5) : u<100000000 ? u<10000000 ? F(6) : F(7) : u<1000000000 ? F(8) : F(9))

static char * u64toa_jeaiii(uint64_t n, char* b) {
    uint32_t u;
    uint64_t t;

    if ((uint32_t)(n >> 32) == 0)
        return u = (uint32_t)(n), LG(LZ);

    uint64_t a = n / 100000000;

    if ((uint32_t)(a >> 32) == 0) {
        u = (uint32_t)(a);
        LG(LN);
    }
    else {
        u = (uint32_t)(a / 100000000);
        LG(LN);
        u = a % 100000000;
        LN(7);
    }

    u = n % 100000000;
    return LZ(7);
}

void main(void) {
    char work[20];
    long sum = 0;
    for (unsigned int i = 0; i < 100000000; i++) {
        u64toa_jeaiii(i, work);
        sum += work[0];
    }
    printf("sum = %ld\n", sum);
}

new (~0.188s and 4,094,107,679 instructions):

#include <stdio.h>
#include <stdint.h>

struct pair
{
    char dd[2];
};

static const struct pair dd[100] =
{
    {'0','0'}, {'0','1'}, {'0','2'}, {'0','3'}, {'0','4'}, {'0','5'}, {'0','6'}, {'0','7'}, {'0','8'}, {'0','9'},
    {'1','0'}, {'1','1'}, {'1','2'}, {'1','3'}, {'1','4'}, {'1','5'}, {'1','6'}, {'1','7'}, {'1','8'}, {'1','9'},
    {'2','0'}, {'2','1'}, {'2','2'}, {'2','3'}, {'2','4'}, {'2','5'}, {'2','6'}, {'2','7'}, {'2','8'}, {'2','9'},
    {'3','0'}, {'3','1'}, {'3','2'}, {'3','3'}, {'3','4'}, {'3','5'}, {'3','6'}, {'3','7'}, {'3','8'}, {'3','9'},
    {'4','0'}, {'4','1'}, {'4','2'}, {'4','3'}, {'4','4'}, {'4','5'}, {'4','6'}, {'4','7'}, {'4','8'}, {'4','9'},
    {'5','0'}, {'5','1'}, {'5','2'}, {'5','3'}, {'5','4'}, {'5','5'}, {'5','6'}, {'5','7'}, {'5','8'}, {'5','9'},
    {'6','0'}, {'6','1'}, {'6','2'}, {'6','3'}, {'6','4'}, {'6','5'}, {'6','6'}, {'6','7'}, {'6','8'}, {'6','9'},
    {'7','0'}, {'7','1'}, {'7','2'}, {'7','3'}, {'7','4'}, {'7','5'}, {'7','6'}, {'7','7'}, {'7','8'}, {'7','9'},
    {'8','0'}, {'8','1'}, {'8','2'}, {'8','3'}, {'8','4'}, {'8','5'}, {'8','6'}, {'8','7'}, {'8','8'}, {'8','9'},
    {'9','0'}, {'9','1'}, {'9','2'}, {'9','3'}, {'9','4'}, {'9','5'}, {'9','6'}, {'9','7'}, {'9','8'}, {'9','9'},
};
static const struct pair fd[100] =
{
    {'0','\0'}, {'1','\0'}, {'2','\0'}, {'3','\0'}, {'4','\0'}, {'5','\0'}, {'6','\0'}, {'7','\0'}, {'8','\0'}, {'9','\0'},
    {'1','0'}, {'1','1'}, {'1','2'}, {'1','3'}, {'1','4'}, {'1','5'}, {'1','6'}, {'1','7'}, {'1','8'}, {'1','9'},
    {'2','0'}, {'2','1'}, {'2','2'}, {'2','3'}, {'2','4'}, {'2','5'}, {'2','6'}, {'2','7'}, {'2','8'}, {'2','9'},
    {'3','0'}, {'3','1'}, {'3','2'}, {'3','3'}, {'3','4'}, {'3','5'}, {'3','6'}, {'3','7'}, {'3','8'}, {'3','9'},
    {'4','0'}, {'4','1'}, {'4','2'}, {'4','3'}, {'4','4'}, {'4','5'}, {'4','6'}, {'4','7'}, {'4','8'}, {'4','9'},
    {'5','0'}, {'5','1'}, {'5','2'}, {'5','3'}, {'5','4'}, {'5','5'}, {'5','6'}, {'5','7'}, {'5','8'}, {'5','9'},
    {'6','0'}, {'6','1'}, {'6','2'}, {'6','3'}, {'6','4'}, {'6','5'}, {'6','6'}, {'6','7'}, {'6','8'}, {'6','9'},
    {'7','0'}, {'7','1'}, {'7','2'}, {'7','3'}, {'7','4'}, {'7','5'}, {'7','6'}, {'7','7'}, {'7','8'}, {'7','9'},
    {'8','0'}, {'8','1'}, {'8','2'}, {'8','3'}, {'8','4'}, {'8','5'}, {'8','6'}, {'8','7'}, {'8','8'}, {'8','9'},
    {'9','0'}, {'9','1'}, {'9','2'}, {'9','3'}, {'9','4'}, {'9','5'}, {'9','6'}, {'9','7'}, {'9','8'}, {'9','9'},
};

static const uint64_t mask24 = ((uint64_t)(1) << 24) - 1;
static const uint64_t mask32 = ((uint64_t)(1) << 32) - 1;
static const uint64_t mask57 = ((uint64_t)(1) << 57) - 1;

static char * u64toa_jeaiii(uint64_t n, char* b) {
    if (n < (uint32_t)(1e2))
    {
        *(struct pair *)(b) = fd[n];
        return n < 10 ? b + 1 : b + 2;
    }
    if (n < (uint32_t)(1e6))
    {
        if (n < (uint32_t)(1e4))
        {
            uint32_t f0 = (uint32_t)(10 * (1 << 24) / 1e3 + 1) * n;
            *(struct pair *)(b) = fd[f0 >> 24];
            b -= n < (uint32_t)(1e3);
            uint32_t f2 = (f0 & mask24) * 100;
            *(struct pair *)(b + 2) = dd[f2 >> 24];
            return b + 4;
        }
        uint64_t f0 = (uint64_t)(10 * (1ull << 32ull)/ 1e5 + 1) * n;
        *(struct pair *)(b) = fd[f0 >> 32];
        b -= n < (uint32_t)(1e5);
        uint64_t f2 = (f0 & mask32) * 100;
        *(struct pair *)(b + 2) = dd[f2 >> 32];
        uint64_t f4 = (f2 & mask32) * 100;
        *(struct pair *)(b + 4) = dd[f4 >> 32];
        return b + 6;
    }
    if (n < (uint64_t)(1ull << 32ull))
    {
        if (n < (uint32_t)(1e8))
        {
            uint64_t f0 = (uint64_t)(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16;
            *(struct pair *)(b) = fd[f0 >> 32];
            b -= n < (uint32_t)(1e7);
            uint64_t f2 = (f0 & mask32) * 100;
            *(struct pair *)(b + 2) = dd[f2 >> 32];
            uint64_t f4 = (f2 & mask32) * 100;
            *(struct pair *)(b + 4) = dd[f4 >> 32];
            uint64_t f6 = (f4 & mask32) * 100;
            *(struct pair *)(b + 6) = dd[f6 >> 32];
            return b + 8;
        }
        uint64_t f0 = (uint64_t)(10 * (1ull << 57ull) / 1e9 + 1) * n;
        *(struct pair *)(b) = fd[f0 >> 57];
        b -= n < (uint32_t)(1e9);
        uint64_t f2 = (f0 & mask57) * 100;
        *(struct pair *)(b + 2) = dd[f2 >> 57];
        uint64_t f4 = (f2 & mask57) * 100;
        *(struct pair *)(b + 4) = dd[f4 >> 57];
        uint64_t f6 = (f4 & mask57) * 100;
        *(struct pair *)(b + 6) = dd[f6 >> 57];
        uint64_t f8 = (f6 & mask57) * 100;
        *(struct pair *)(b + 8) = dd[f8 >> 57];
        return b + 10;
    }

    // if we get here U must be (uint64_t) but some compilers don't know that, so reassign n to a (uint64_t) to avoid warnings
    uint32_t z = n % (uint32_t)(1e8);
    uint64_t u = n / (uint32_t)(1e8);

    if (u < (uint32_t)(1e2))
    {
        // u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number)
        *(struct pair *)(b) = dd[u];
        b += 2;
    }
    else if (u < (uint32_t)(1e6))
    {
        if (u < (uint32_t)(1e4))
        {
            uint32_t f0 = (uint32_t)(10 * (1 << 24) / 1e3 + 1) * u;
            *(struct pair *)(b) = fd[f0 >> 24];
            b -= u < (uint32_t)(1e3);
            uint32_t f2 = (f0 & mask24) * 100;
            *(struct pair *)(b + 2) = dd[f2 >> 24];
            b += 4;
        }
        else
        {
            uint64_t f0 = (uint64_t)(10 * (1ull << 32ull) / 1e5 + 1) * u;
            *(struct pair *)(b) = fd[f0 >> 32];
            b -= u < (uint32_t)(1e5);
            uint64_t f2 = (f0 & mask32) * 100;
            *(struct pair *)(b + 2) = dd[f2 >> 32];
            uint64_t f4 = (f2 & mask32) * 100;
            *(struct pair *)(b + 4) = dd[f4 >> 32];
            b += 6;
        }
    }
    else if (u < (uint32_t)(1e8))
    {
        uint64_t f0 = (uint64_t)(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16;
        *(struct pair *)(b) = fd[f0 >> 32];
        b -= u < (uint32_t)(1e7);
        uint64_t f2 = (f0 & mask32) * 100;
        *(struct pair *)(b + 2) = dd[f2 >> 32];
        uint64_t f4 = (f2 & mask32) * 100;
        *(struct pair *)(b + 4) = dd[f4 >> 32];
        uint64_t f6 = (f4 & mask32) * 100;
        *(struct pair *)(b + 6) = dd[f6 >> 32];
        b += 8;
    }
    else if (u < (uint64_t)(1ull << 32ull))
    {
        uint64_t f0 = (uint64_t)(10 * (1ull << 57ull) / 1e9 + 1) * u;
        *(struct pair *)(b) = fd[f0 >> 57];
        b -= u < (uint32_t)(1e9);
        uint64_t f2 = (f0 & mask57) * 100;
        *(struct pair *)(b + 2) = dd[f2 >> 57];
        uint64_t f4 = (f2 & mask57) * 100;
        *(struct pair *)(b + 4) = dd[f4 >> 57];
        uint64_t f6 = (f4 & mask57) * 100;
        *(struct pair *)(b + 6) = dd[f6 >> 57];
        uint64_t f8 = (f6 & mask57) * 100;
        *(struct pair *)(b + 8) = dd[f8 >> 57];
        b += 10;
    }
    else
    {
        uint32_t y = u % (uint32_t)(1e8);
        u /= (uint32_t)(1e8);

        // u is 2, 3, or 4 digits (if u < 10 it would have been handled above)
        if (u < (uint32_t)(1e2))
        {
            *(struct pair *)(b) = dd[u];
            b += 2;
        }
        else
        {
            uint32_t f0 = (uint32_t)(10 * (1 << 24) / 1e3 + 1) * u;
            *(struct pair *)(b) = fd[f0 >> 24];
            b -= u < (uint32_t)(1e3);
            uint32_t f2 = (f0 & mask24) * 100;
            *(struct pair *)(b + 2) = dd[f2 >> 24];
            b += 4;
        }
        // do 8 digits
        uint64_t f0 = ((uint64_t)((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1;
        *(struct pair *)(b) = dd[f0 >> 32];
        uint64_t f2 = (f0 & mask32) * 100;
        *(struct pair *)(b + 2) = dd[f2 >> 32];
        uint64_t f4 = (f2 & mask32) * 100;
        *(struct pair *)(b + 4) = dd[f4 >> 32];
        uint64_t f6 = (f4 & mask32) * 100;
        *(struct pair *)(b + 6) = dd[f6 >> 32];
        b += 8;
    }
    // do 8 digits
    uint64_t f0 = ((uint64_t)((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1;
    *(struct pair *)(b) = dd[f0 >> 32];
    uint64_t f2 = (f0 & mask32) * 100;
    *(struct pair *)(b + 2) = dd[f2 >> 32];
    uint64_t f4 = (f2 & mask32) * 100;
    *(struct pair *)(b + 4) = dd[f4 >> 32];
    uint64_t f6 = (f4 & mask32) * 100;
    *(struct pair *)(b + 6) = dd[f6 >> 32];
    return b + 8;
}

void main(void) {
    char work[20];
    long sum = 0;
    for (unsigned int i = 0; i < 100000000; i++) {
        u64toa_jeaiii(i, work);
        sum += work[0];
    }
    printf("sum = %ld\n", sum);
}

from itoa.

Related Issues (12)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.