#include "pthread_adapt.h"
#include "hs_config.h"


#include <stdlib.h>
#include <stdio.h>

#include <pthread.h>
#include <unistd.h>
#include <assert.h>

static unsigned long count;
static pthread_mutex_t count_ward = PTHREAD_MUTEX_INITIALIZER;

unsigned long get_status()
{
    return count;
}

typedef struct work
{
    hs_potential_t *volatile data;
    volatile size_t data_sz;
    pthread_mutex_t ward;
} work_t;

typedef struct launcher
{
    work_t *work;
    size_t nbthread;
    volatile size_t currthread;
    volatile size_t remaining_job;
    const struct make_pot_param *param;
    pthread_mutex_t ward;
} launcher_t;

/* see http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog */
static size_t ilog(size_t v)
{
    static const size_t b[] = { 0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000 };
    static const size_t S[] = { 1, 2, 4, 8, 16 };
    int i;
    register size_t r = 0;
    for(i = 4; i >= 0; i--)
    {
        if(v & b[i])
        {
            v >>= S[i];
            r |= S[i];
        }
    }
    return r + 1;
}

#if !defined MYTHRESHOLD
#define MYTHRESHOLD 10
#endif

typedef enum bool_t
{
    true = 1,
    false = 0
} bool;

static size_t extract_seq( /*launcher_t *launch, */ work_t * work,
   hs_potential_t ** local_data)
{
    pthread_mutex_lock(&work->ward);
    if(work->data_sz == 0)
    {
        pthread_mutex_unlock(&work->ward);

/*      
        pthread_mutex_lock(&launch->ward);
        --launch->remaining_job;
        pthread_mutex_unlock(&launch->ward);
	*/

        return 0;
    }
    else
    {
        size_t local_size = MYTHRESHOLD * ilog(work->data_sz);
        if(local_size > work->data_sz)
            local_size = work->data_sz;

        *local_data = work->data;
        work->data += local_size;
        work->data_sz -= local_size;


        pthread_mutex_unlock(&work->ward);

        return local_size;
    }
}

static bool extract_par(work_t * work, launcher_t * launch)
{
    size_t i;

    while(launch->remaining_job)
    {
        for(i = 0; i < launch->nbthread; i++)
        {
            work_t *rwork = launch->work + i;

            if(work != rwork && rwork->data_sz > MYTHRESHOLD)
            {
                size_t data_sz_tmp;
                hs_potential_t *data_tmp;

                pthread_mutex_lock(&rwork->ward);

                data_sz_tmp = rwork->data_sz >> 1;

                if(data_sz_tmp < MYTHRESHOLD)
                {
                    pthread_mutex_unlock(&rwork->ward);
                }
                else
                {

                    pthread_mutex_lock(&launch->ward);
                    ++launch->remaining_job;
                    pthread_mutex_unlock(&launch->ward);

                    data_tmp = rwork->data;
                    rwork->data += data_sz_tmp;
                    rwork->data_sz -= data_sz_tmp;

                    pthread_mutex_unlock(&rwork->ward);

                    pthread_mutex_lock(&work->ward);

                    work->data_sz = data_sz_tmp;
                    work->data = data_tmp;

                    pthread_mutex_unlock(&work->ward);

                    return true;
                }
            }
        }
    }
    return false;
}
#define local_run(local_data, local_sz, param)\
{\
    size_t i;\
    for(i=0; i< local_sz; ++i)\
    {\
        make_pot(local_data+i,param);\
    }\
}

static void *run_slave(launcher_t * launch)
{
    size_t tid = launch->currthread++;
    work_t *local_work;
    bool wait = false;
    pthread_t pid;

    launch->work[tid].data = NULL;
    launch->work[tid].data_sz = 0;
    pthread_mutex_init(&launch->work[tid].ward, NULL);
    local_work = launch->work + tid;

    if(launch->currthread < launch->nbthread)
    {
        wait =
           !pthread_create(&pid, NULL, (void *(*)(void *)) run_slave,
           launch);
    }

    while(extract_par(local_work, launch))
    {
        size_t local_size = 0;
        hs_potential_t *local_data = NULL;
        while((local_size =
              extract_seq( /*launch, */ local_work, &local_data)))
        {
            local_run(local_data, local_size, launch->param);
            pthread_mutex_lock(&count_ward);
            count += local_size;
            pthread_mutex_unlock(&count_ward);
        }
        pthread_mutex_lock(&launch->ward);
        --launch->remaining_job;
        pthread_mutex_unlock(&launch->ward);
    }

    if(wait)
        pthread_join(pid, NULL);

    return NULL;
}

static void run_master(launcher_t * launch)
{
    pthread_t pid;
    bool wait = false;

    work_t *local_work = launch->work;
    assert(local_work->data != NULL);

    if(++launch->currthread < launch->nbthread)
    {
        wait =
           !pthread_create(&pid, NULL, (void *(*)(void *)) run_slave,
           launch);
    }

    do
    {
        size_t local_size = 0;
        hs_potential_t *local_data = NULL;
        while((local_size =
              extract_seq( /*launch, */ local_work, &local_data)))
        {
            local_run(local_data, local_size, launch->param);
            pthread_mutex_lock(&count_ward);
            count += local_size;
            pthread_mutex_unlock(&count_ward);
        }
        pthread_mutex_lock(&launch->ward);
        --launch->remaining_job;
        pthread_mutex_unlock(&launch->ward);
    }
    while(extract_par(local_work, launch));

    if(wait)
        pthread_join(pid, NULL);

}

int adapt_transform(hs_potential_t * iter, size_t nbiter,
   const struct make_pot_param *param, size_t nb_thread)
{
    work_t work = {
        iter,
        nbiter,
        PTHREAD_MUTEX_INITIALIZER
    };
    work_t *work_ptr = malloc(nb_thread * sizeof(*work_ptr));
    launcher_t launch = {
        work_ptr,
        nb_thread,
        0,
        1,
        param,
        PTHREAD_MUTEX_INITIALIZER
    };
    if(!work_ptr)
    {
        perror("[adapt_transform::malloc]");
        return 0;
    }
    work_ptr[0] = work;

    count = 0;

    run_master(&launch);
    free(work_ptr);
    return 1;
}
