/**
 *  @file   qsort.c
 *  @brief  qsort() routine.
 *  @author tenk*
 *  @date   2010
 */

#define QSORT_STK_BUF_SIZE      1024

#include <stddef.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>

#ifdef _WIN32
#define uint8_t             unsigned char
#else
#include <stdint.h>
#endif
#define ALGN_T              double


static void     quick_sort_sub (uint8_t *data, size_t unitCount, size_t unitBytes, int(*cmp)(const void *, const void *) );
static void     insertion_sort (uint8_t* data, size_t unitCount, size_t unitBytes, int(*cmp)(const void *, const void *) );
static void     insertion_sort2(uint8_t* data, size_t unitCount, size_t unitBytes, int(*cmp)(const void *, const void *) );
static size_t   quick_sort_loop(uint8_t *data, size_t unitCount, size_t unitBytes, int(*cmp)(const void *, const void *) );


void QSORT(void *base, size_t unitCount, size_t unitBytes, int(*cmp)(const void *, const void *))
{
    uint8_t*    data = (uint8_t*)base;
    assert(base != 0);
    assert(unitBytes > 0);
    assert(cmp != NULL);
    if (base == 0 || unitBytes == 0 || cmp == NULL) {
        return;
    }
    quick_sort_sub(data, unitCount, unitBytes, cmp);
}


static void quick_sort_sub(uint8_t *data, size_t unitCount, size_t unitBytes, int(*cmp)(const void *, const void *))
{
    size_t   mid;
    if (unitCount <= 32) {
        if (unitCount > 1) {
            if (unitBytes <= QSORT_STK_BUF_SIZE) {
                insertion_sort (data, unitCount, unitBytes, cmp);
            } else {
                insertion_sort2(data, unitCount, unitBytes, cmp);
            }
        }
        return;
    }
    mid = quick_sort_loop(data, unitCount, unitBytes, cmp);
    quick_sort_sub(data                , mid           , unitBytes, cmp);
    quick_sort_sub(data + mid*unitBytes, unitCount-mid , unitBytes, cmp);
}


//#define SWAP_P(type, l, r)    do  { type tMp_ = *(type*)(l); *(type*)(l) = *(type*)(r); *(type*)(r) = tMp_; } while (0)
#define   SWAP_P(type, l, r)        { type tMp_ = *(type*)(l); *(type*)(l) = *(type*)(r); *(type*)(r) = tMp_; }

#define SWAP_MEM(l,r,sz)    do {                        \
        uint8_t*    l_eNd_ = (uint8_t*)((l) + (sz));    \
        do {                                            \
            SWAP_P(uint8_t,(l), (r));                   \
            ++(l);                                      \
            ++(r);                                      \
        } while ((uint8_t*)(l) != l_eNd_);              \
    } while (0)

#define MEM_CPY(dst,src,size)   memcpy(dst,src,size)


static void insertion_sort(uint8_t* data, size_t unitCount, size_t unitBytes, int(*cmp)(const void *, const void *)) {
    //if (unitCount > 1)
    {
        ALGN_T      tmp[ (QSORT_STK_BUF_SIZE+sizeof(ALGN_T)-1) / sizeof(ALGN_T) ];
        uint8_t*    end = data + unitCount * unitBytes;
        uint8_t*    last= end - unitBytes;
        uint8_t*    key = last;
        do {
            uint8_t*    jp;
            key -= unitBytes;
            for (jp = key; jp < last && cmp(key, jp+unitBytes) > 0; jp += unitBytes) {
                ;
            }
            if (jp != key) {
                size_t   sz = jp - key;
                uint8_t* d  = key;
                uint8_t* s  = d + unitBytes;
                MEM_CPY(tmp, key, unitBytes);
                MEM_CPY(d  , s  , sz);
                MEM_CPY(key, tmp, unitBytes);
            }
        } while (key > data);
    }
}


static void insertion_sort2(uint8_t* data, size_t unitCount, size_t unitBytes, int(*cmp)(const void *, const void *)) {
    //if (unitCount > 1)
    {
        uint8_t*    end = data + unitCount * unitBytes;
        uint8_t*    ip  = data + unitBytes;
        do {
            uint8_t*    jp = ip;
            uint8_t*    kp = ip - unitBytes;
            while (jp > data && cmp(jp, kp) < 0) {
                SWAP_MEM(jp, kp, unitBytes);
                jp -= unitBytes;
                kp -= unitBytes;
            }
            ip += unitBytes;
        } while (ip < end);
    }
}


static size_t   quick_sort_loop(uint8_t *data, size_t unitCount, size_t unitBytes, int(*cmp)(const void *, const void *))
{
    uint8_t*    l   = data;
    uint8_t*    m   = data + (unitCount/2) * unitBytes;
    uint8_t*    r   = data + unitCount * unitBytes;
    size_t      ln  = 0;
    for (;;) {
        while (cmp(l, m) < 0)
            l += unitBytes, ++ln;
        do {
            r -= unitBytes;
        } while (cmp(m, r) < 0);
        if (l >= r)
            return ln;
        SWAP_MEM(l, r, unitBytes);
        l += unitBytes;
        ++ln;
    }
}