// Copyright (C)  2000 Intel Corporation.  All rights reserved.
//
// $Header: /usr/development/orp/orp/arch/ia32/ia32_o3_jit/loop_invariant_path.cpp,v 1.4 2001/08/14 01:40:50 xhshi Exp $
//


#include "defines.h"
#include "loop_invariant_path.h"
#include "bit_vector.h"

class Loop_Header_Picker : public Closure
{
public:
    Loop_Header_Picker(Work_Set *w, Mem_Manager &m) : ws(w), mem(m) {}
    Work_Set *ws;
    Mem_Manager &mem;
};

int is_recur_xor_imm_one(Inst *inst)
{
    if (!inst->is_bitwise()) return -1;
    if (((Bitwise_Inst *)inst)->kind != Bitwise_Inst::k_xor) return -1;
    if (inst->n_srcs < 2) return -1;
    int return_value = -1;
    Reg_Operand *src_reg = NULL;
    if (inst->src(0)->kind==Operand::Immediate && inst->src(1)->is_reg())
    {
        if (((Imm_Operand *)inst->src(0))->imm()==1) 
        {   return_value = 0;
            src_reg = (Reg_Operand *)inst->src(1);
        }

    }
    if (inst->src(1)->kind==Operand::Immediate && inst->src(0)->is_reg())
    {
        if (((Imm_Operand *)inst->src(1))->imm()==1) 
        {   return_value = 1;
            src_reg = (Reg_Operand *)inst->src(0);
        }
    }
    if (return_value >= 0)
    {   // check the next instruction to make sure it is a recurrence instruction
        Inst *next_inst = inst->next();
        if (next_inst->is_reg_assignment() && next_inst->src(0)==inst->dst() && next_inst->dst()==src_reg)
            return return_value;
    }
    return -1;
}

bool replace_xor_imm_with_reg(Inst *inst, Reg_Operand *reg)
{
    int src_pos = is_recur_xor_imm_one(inst);
    if (src_pos < 0) return false;
    if (src_pos==0) inst->replace_src(0,reg);
    else if (src_pos==1) inst->replace_src(1,reg);
    return true;
}

bool Loop_Invariant_Path::optimize(unsigned bv_size)
{
#ifdef TRACE_O3
    cout << "Candidate of a loop invariant path: header " << _loop_header->label <<
        " tail " << _tail_block->label << " postexit " << _post_exit->label << endl;
#endif // TRACE_O3
    // reserve 2 slots for this optimization
    fg->set_traversal_num(fg->traversal_num()+2);
    unsigned traversal_number = fg->traversal_num();

    Mem_Manager mem(bv_size << 3); // should be large enough
    unsigned n_bits = bv_size << 1;
    Bit_Vector live_reg(n_bits, mem, false);
    Bit_Vector invariant_reg(n_bits, mem, false);
    Bit_Vector induced_reg(n_bits, mem, false);
    induced_reg.set(((Reg_Operand *)_induc_inst->dst())->bv_position());
    // induc_inst must be in tail node
    if (!_process_block(_tail_block, live_reg, invariant_reg, induced_reg, _induc_inst)) 
        return false;
    _successful = false;
    _traverse_blocks(_loop_header, live_reg, invariant_reg, induced_reg, traversal_number, false);
    return _successful;
}

bool Loop_Invariant_Path::_traverse_blocks(Cfg_Node *node, Bit_Vector &live_reg, Bit_Vector &invariant_reg, 
                                       Bit_Vector &induced_reg, unsigned tn, bool recurring_opt)
{
    if (node->latest_traversal==tn) return true;
    if (!_process_block(node,live_reg,invariant_reg,induced_reg)) 
    {
        node->latest_traversal = tn-1;
        return false;
    }
    node->latest_traversal = tn;
    bool need_recur_opt = _check_recurring_opt(node);
    if (node==_tail_block)
    {
        if (need_recur_opt) // tail block need not do recur_opt
        {
#ifdef TRACE_O3
            cout << "      violating recurr opt in tail " << node->label << endl;
#endif // TRACE_O3
            return false;
        }
        else
            return true;
    }
    // now, do this in topological order
    bool return_value = true;
    int i;
    for (i = 0; i < node->out_edge_size(); i ++) 
    {
        // do this in topological order
        Cfg_Node *succ = node->out_edges(i);
        if (succ->loop_header != _loop_header) 
        {
            if (recurring_opt) // cannot perform recurring_opt if there's an early exit
            {
#ifdef TRACE_O3
                cout << "      violating early exit in recur path " << node->label << endl;
#endif // TRACE_O3
                return false;
            }
            else
                continue;
        }
        if (succ->latest_traversal==tn-1)
        {
            return_value = false;
            continue;
        }
        bool do_process = true;
        int l;
        for (l = 0; l < succ->in_edge_size(); l ++)
        {
            if (succ->in_edges(l)->latest_traversal != tn &&
                succ != _tail_block)
                do_process = false;
        }
        if (do_process)
        {
            bool succ_value = _traverse_blocks(succ,live_reg,invariant_reg,induced_reg,tn,recurring_opt||need_recur_opt);
            return_value = return_value && succ_value;
            if (succ == _tail_block && succ_value)
                _transform_block(node);
        }
    }
    if (return_value && need_recur_opt)
    {
        _perform_recurring_opt(node);
    }
    return return_value;
}

bool Loop_Invariant_Path::_process_block(Cfg_Node *node, Bit_Vector &live_reg, Bit_Vector &invariant_reg, 
                                     Bit_Vector &induced_reg, Inst *start_inst)
{
#ifdef TRACE_O3
    cout << "    process block " << node->label << endl;
#endif // TRACE_O3
    Inst *curr_inst;
    if (start_inst) curr_inst = start_inst->next();
    else curr_inst = node->IR_instruction_list()->next();
    while (curr_inst != node->IR_instruction_list())
    {
        if (curr_inst==_induc_inst && start_inst == NULL)
        {
#ifdef TRACE_O3
            cout << "    end 2 process block " << node->label << endl;
#endif // TRACE_O3
            return true;
        }
        if (curr_inst==_cmp_inst || curr_inst==_induc_inst)
        {   // skip special instructions
            curr_inst = curr_inst->next();
            continue;
        }
        if (curr_inst->is_call())
        {
#ifdef TRACE_O3
            cout << "      call instr encounter: inst " << curr_inst->global_id << endl;
#endif // TRACE_O3
            return false;
        }
        bool is_recur_instr;
        if (is_recur_xor_imm_one(curr_inst)>=0) 
            is_recur_instr = true;
        else
            is_recur_instr = false;
        bool violate_condition = false;
        int i;
        for (i = 0; i < curr_inst->n_srcs; i++)
        {   // induction variable should not be used across basic block
            if (curr_inst->src(i)->is_array())
            {
                Array_Operand *ao = (Array_Operand *)curr_inst->src(i);
                if (ao->base()->is_reg())
                {
                    unsigned base_pos = ((Reg_Operand *)ao->base())->bv_position();
                    invariant_reg.set(base_pos);
                    if (induced_reg.is_set(base_pos)) violate_condition = true;
                }
                if (ao->index()->is_reg())
                {
                    unsigned index_pos = ((Reg_Operand *)ao->index())->bv_position();
                    invariant_reg.set(index_pos);
                    if (induced_reg.is_set(index_pos)) violate_condition = true;
                }
            }
            else if (curr_inst->src(i)->kind == Operand::Field)
            {
                Field_Operand *fo = (Field_Operand *)curr_inst->src(i);
                if (fo->base()->is_reg())
                {
                    unsigned base_pos = ((Reg_Operand *)fo->base())->bv_position();
                    invariant_reg.set(base_pos);
                    if (induced_reg.is_set(base_pos)) violate_condition = true;
                }
            }
            else if (curr_inst->src(i)->is_reg())
            {
                if (!is_recur_instr)
                    invariant_reg.set(curr_inst->src(i)->bv_position());
                else
                    invariant_reg.reset(curr_inst->src(i)->bv_position());
                if (curr_inst->is_assignment() && curr_inst->src(i)==_induc_inst->dst() &&
                    curr_inst->dst() && curr_inst->dst()->is_reg())
                {
                    induced_reg.set(((Reg_Operand *)curr_inst->dst())->bv_position());
                }
                else if (induced_reg.is_set(curr_inst->src(i)->bv_position()))
                    violate_condition = true;
            }
            if (violate_condition) break;
        }
        if (curr_inst->dst() && curr_inst->dst()->is_array())
        {
            Array_Operand *ao = (Array_Operand *)curr_inst->dst();
            if (ao->base()->is_reg())
            {
                unsigned base_pos = ((Reg_Operand *)ao->base())->bv_position();
                invariant_reg.set(base_pos);
                if (induced_reg.is_set(base_pos)) violate_condition = true;
            }
            if (ao->index()->is_reg())
            {
                unsigned index_pos = ((Reg_Operand *)ao->index())->bv_position();
                invariant_reg.set(index_pos);
                if (induced_reg.is_set(index_pos)) violate_condition = true;
            }
        }
        if (curr_inst->dst() && curr_inst->dst()->kind == Operand::Field)
        {
            Field_Operand *fo = (Field_Operand *)curr_inst->dst();
            if (fo->base()->is_reg())
            {
                unsigned base_pos = ((Reg_Operand *)fo->base())->bv_position();
                invariant_reg.set(base_pos);
                if (induced_reg.is_set(base_pos)) violate_condition = true;
            }
        }
        if (violate_condition)
        {
#ifdef TRACE_O3
            cout << "      violating first condition: inst " << curr_inst->global_id << endl;
#endif // TRACE_O3
            return false; // violation of first condition
        }
        if (curr_inst->dst() && curr_inst->dst()->is_reg())
        {   // registers should not be defined twice
            Reg_Operand *reg = (Reg_Operand *)curr_inst->dst();
            if (//(curr_inst->is_reg_assignment() && !live_reg.is_set(curr_inst->src(0)->bv_position(),_bv_head)) ||
                (curr_inst->is_same_reg_copying()) || // same reg copying is still invariant
                (is_recur_instr)) // register copy of an invariant or is_xor_imm_one
            {
                // Do nothing
            }
            else if (curr_inst->is_imm_assignment()) // assigning immediate value is a loop invariant operation
            {
                invariant_reg.set(reg->bv_position());
                //live_reg.set(reg->bv_position(),_bv_head);
            }
            else if (live_reg.is_set(reg->bv_position()) || // defined twice
                     invariant_reg.is_set(reg->bv_position()))    // used before defined
            {
#ifdef TRACE_O3
                cout << "      violating second condition: inst " << curr_inst->global_id << endl;
#endif // TRACE_O3
                return false; // violation of second condition
            }
            else
                live_reg.set(reg->bv_position());
        }
        curr_inst = curr_inst->next();
    }
#ifdef TRACE_O3
    cout << "    end process block " << node->label << endl;
#endif // TRACE_O3
    return true;
}

bool Loop_Invariant_Path::_check_recurring_opt(Cfg_Node *node)
{
    Inst *curr_inst = node->IR_instruction_list()->next();
    while (curr_inst != node->IR_instruction_list())
    {
        if (is_recur_xor_imm_one(curr_inst)>=0) return true;
        curr_inst = curr_inst->next();
    }
    return false;
}

void Loop_Invariant_Path::_perform_recurring_opt(Cfg_Node *node)
{
#ifdef TRACE_O3
    cout << "    perform recur opt on block " << node->label << endl;
#endif // TRACE_O3
    Inst *curr_inst = node->IR_instruction_list()->next();
    // we also need to add two instructions to the beginning of node
    // subi ( temp = bound_reg - i )
    // andi ( y = temp and 1 )
    // first, sub instr
    Imm_Operand *init_imm = (Imm_Operand *)_init_inst->src(0);
    Inst *new_inst = new (fg->mem_manager) Sub_Inst(Sub_Inst::sub, _loop_bound_reg, init_imm,
        _induc_inst->exp, curr_inst);
    Reg_Operand *temp_reg = exprs->create_new_temp_reg(JAVA_TYPE_INT);
    new_inst->set_dst(temp_reg);
    // second, and instr
    Imm_Operand *imm_one = new (fg->mem_manager) Imm_Operand(1,JAVA_TYPE_INT);
    new_inst = new (fg->mem_manager) Bitwise_Inst(Bitwise_Inst::k_and, temp_reg, imm_one, 
        _induc_inst->exp, curr_inst); // insert before the compare instr
    Reg_Operand *parity_reg = exprs->create_new_temp_reg(JAVA_TYPE_INT);
    //parity_reg->set_global_reg_cand();
    new_inst->set_dst(parity_reg);

    // for every block transformed, we need to modify xor's
    bool xor_transform = false;
    while (curr_inst != node->IR_instruction_list())
    {
        bool temp = replace_xor_imm_with_reg(curr_inst, parity_reg);
        xor_transform = xor_transform || temp;
        curr_inst = curr_inst->next();
    }
}

void Loop_Invariant_Path::_transform_block(Cfg_Node *node)
{
#ifdef TRACE_O3
    cout << "    ******** transform block " << node->label << endl;
#endif // TRACE_O3
    // here, we assume one of the node's successors is _tail_block
    Inst *new_inst;				  
    if (_exit_condition==NULL)
    {
        _exit_condition = fg->splice_cfg_nodes(node,_tail_block);
        // now add compensation instructions to _exit_condition
        assert(_induc_inst->dst()->is_reg());
        Reg_Operand *induc_reg = (Reg_Operand *)_induc_inst->dst();
        if (_cmp_inst->src(0)==induc_reg){//:: sxh    2001.7.16. Avoid to cast something NOT "Reg_Operand" type to "Reg_Operand"
#ifndef TURN_OFF_FOR_DEBUG
			if(_cmp_inst->src(1)->is_reg())
#endif
				_loop_bound_reg = (Reg_Operand *)_cmp_inst->src(1);
#ifndef TURN_OFF_FOR_DEBUG
			else{
				_successful=false ;
				return ;
			}
#endif
        }else
            _loop_bound_reg = (Reg_Operand *)_cmp_inst->src(0);
        _loop_bound_reg->set_global_reg_cand();
        Imm_Operand *imm_one = new (fg->mem_manager) Imm_Operand(1,JAVA_TYPE_INT);
        new_inst = new (fg->mem_manager) Sub_Inst(Sub_Inst::sub, _loop_bound_reg, imm_one, 
            _induc_inst->exp, _exit_condition->IR_instruction_list());
        new_inst->set_dst(induc_reg);
    }
    else
    {   // replace edge
        Cfg_Int edge;
        for (edge=0; edge<node->out_edge_size(); edge++) 
        {
            if (node->out_edges(edge)==_tail_block)
                node->replace_edge(fg->mem_manager, _tail_block, _exit_condition);
        }
    }
    _successful = true;
}

bool contain_induction_variable(Inst *inst)
{
    return inst->is_iinc();
}

bool is_compare_with_induction(Inst *inst, Operand *rhs_reg)
{
    if (!inst->is_compare()) return false;
    if (inst->n_srcs < 2) return false;
    if (inst->src(0)==rhs_reg || inst->src(1)==rhs_reg)
        return true;
    return false;
}

static bool optimize_eligible_loops(Cfg_Node *node, Flow_Graph *flow_graph, Expressions *exprs, unsigned bv_size)
{
    int j;
    assert(node->loop_header == node);
    Cfg_Node *header = node;
    Cfg_Node *post_exit_node = NULL;
    for (j = 0; j < header->out_edge_size(); j++)
    {
        Cfg_Node *succ_node = header->out_edges(j);
        if (succ_node->loop_header != header)
        {
            if (post_exit_node==NULL)
                post_exit_node = succ_node;
            else
                return false; // more than two exit edges from header?
        }
    }
    if (post_exit_node==NULL) return false; // loop has been linearized
    Cfg_Node *tail_node = NULL;
    Cfg_Node *pre_header = NULL;
    for (j = 0; j < header->in_edge_size(); j++)
    {
        Cfg_Node *pred_node = header->in_edges(j);
        if (pred_node->loop_header == header)
        {
            if (tail_node==NULL)
                tail_node = pred_node;
            else
                return false; // more than two tail nodes, so don't do this loop
        }
        else
            pre_header = pred_node;
    }
    if (!tail_node) return false; // ****** check this again
    if (!pre_header) return false; // ****** check this again
    assert(tail_node); // must have a tail node, right?
    // finally, find the induction variable in the tail node
    Inst *induc_inst = tail_node->IR_instruction_list()->prev();
    if (induc_inst==tail_node->IR_instruction_list()) return false;
    while (!contain_induction_variable(induc_inst))
    {
        induc_inst = induc_inst->prev();
        if (induc_inst==tail_node->IR_instruction_list()) return false;
    }
    // verify the induction variable in the header
    Inst *cmp_inst = header->IR_instruction_list()->next();
    if (cmp_inst==header->IR_instruction_list()) return false;
    while (!is_compare_with_induction(cmp_inst, induc_inst->dst()))
    {
        cmp_inst = cmp_inst->next();
        if (cmp_inst==header->IR_instruction_list()) return false;
    }
    // find instr that initializes the induction variable
    Inst *init_inst = pre_header->IR_instruction_list()->prev();
    while (init_inst != pre_header->IR_instruction_list())
    {
        if (init_inst->is_imm_assignment() && init_inst->dst()==induc_inst->dst())
            break;
        init_inst = init_inst->prev();
    }
    if (init_inst==pre_header->IR_instruction_list()) return false; // init not found
    // try optmize
    Loop_Invariant_Path ele(header, tail_node, post_exit_node, init_inst, induc_inst, 
        cmp_inst, flow_graph, exprs);
    return ele.optimize(bv_size);
}

static void get_loop_headers(Cfg_Node *node, Closure *c)
{
    if (node->loop_header == node)
    {
        Loop_Header_Picker *cc = (Loop_Header_Picker *)c;
        WL_Node *wn = new (cc->mem) WL_Node();
        wn->node = node;
        wn->insert_before(&cc->ws->work_list);
    }
}

extern bool O3_bc_opt;
bool transform_loop_invariant_path(Flow_Graph *f, Expressions &exprs)
{
    if (!O3_bc_opt)
        return false;
    unsigned num_opt_success = 0;
    unsigned num_vars = exprs.reg_map.curr_tmp_reg_id();
    //Loop_Invariant_Path_Number num(f, &exprs, num_vars);
    Mem_Manager mm(20*sizeof(void*)); // average 20 loops per function?
    Work_Set ws;
    Loop_Header_Picker picker(&ws, mm);
    f->apply(get_loop_headers,&picker);

    WL_Node *w = NULL;
    for (;!ws.is_empty(); ws.free(w)) 
    {
        w = (WL_Node *)ws.work_list.get_next();
        Cfg_Node *node = (Cfg_Node*)w->node;
        if (optimize_eligible_loops(node, f, &exprs, num_vars))
            num_opt_success ++;
    }
    return (num_opt_success ? true : false);
}

