// Copyright (C)  2000 Intel Corporation.  All rights reserved.
//
// $Header: /usr/development/orp/orp/base_natives/common/mon_enter_exit.cpp,v 1.18 2001/11/20 20:09:22 rlhudson Exp $
//

#ifndef  OBJECT_LOCK_V2
#include "platform.h"
#include <assert.h>
#include <iostream.h>

#include "orp_types.h"
#include "Class.h"
#include "stack_manipulation.h"
#include "exceptions.h"
#include "environment.h"
#include "object_layout.h"
#include "orp_threads.h"
#include "nogc.h"
#include "root_set_enum.h"
#include "orp_utils.h"
#include "orp_synch.h"
#include "sync_bits.h"

#include "orp_stats.h"
#include "object_manager.h"
#include "mon_enter_exit.h"

#ifdef OBJECT_SPLITTING
#include "gc_interface.h"
#endif // OBJECT_SPLITTING


#ifdef ORP_POSIX
#include "platform2.h"
#ifdef __linux__
#include <asm/bitops.h>
#endif
#endif

extern uint32 address_of_setup_java_to_native_frame;
extern uint32 address_of_pop_java_to_native_frame;


void (__stdcall *p_mon_enter_code)  (Java_java_lang_Object *);
void (__stdcall *p_mon_exit_code)  (Java_java_lang_Object *); 



///////////////////////////////////////////////////////////////////////////////
///////////////
/////////////// WARNING: start_of_object_busybit_critical_zone() MUST BE  THE FIRST
///////////////
/////////////// PROCEDURE IN java_lang_Object.cpp
///////////////
///////////////////////////////////////////////////////////////////////////////

void start_of_mon_enter_exit_busybit_critical_zone()
{
////////////// THIS MUST BE THE FIRST PROCEDURE IN java_lang_Object.cpp
////////////// SEE in_busybit_critical_zone() for details
}

#ifdef MONITOR_STO
#ifndef USE_IA64_JIT

Java_java_lang_Object *lazylist[1000];
int num_lazylock = (int)lazylist;
bool bMultithreaded;

#ifdef _DEBUG
void lazy_monitor_enter_check(){
	assert( num_lazylock >= (int)lazylist );
	assert( *(int **)num_lazylock !=NULL );
#ifdef ORP_STATS
	orp_stats_total.num_lazy_monitor_enter++;
#endif

}
void lazy_monitor_exit_check(){
	assert( (num_lazylock-4) >= (int)lazylist );
	assert( *(int **)(num_lazylock-4) != NULL );
#ifdef ORP_STATS
	orp_stats_total.num_lazy_monitor_exit++;
#endif

}

#endif //_DEBUG
#endif //USE_IA64_JIT
#endif //MONITOR_STO

volatile int active_thread_count;

// orp_monitor_enter_or_null
// Args
//    p_obj - the object whose lock is requested.
//
// This is a performance improvement meant to eliminate the need to build the last
// java frame on the stack each time we do a monitor enter. The jit runtime support
// should call this first and if successful it can eliminate the ljf overhead.
// It this is not successful then a call to opr_monitor_enter is required.
//
// On IA64 it would be nice if quick_thread_index_shifted_left_with_recursion_set_to_one
// was in a thread specific register, say reg R.
// Now if p_obj passed in reg reg1, the compile could generate something similar to 
// the following.
//
// Then a reasonable sequence would be 
//       add    reg2 = -8, reg1 // The header location
//       mov    ar.ccv, 0      // clear the compare value application register (is this how?)
//       cmpxchg8.acq reg3 = [reg2], regR, ar.ccv // Do the compare exchange, 
//                                                // acq for enter, st.rel for exit
//       cmp.eq p1,p2 reg3, 0  // check the results
// if p2 br slow               // do the call to the slower monitor enter routines.
//
// A note about orp_monitor_exit_or_null which does not exist since orp_monitor_exit always
// executes without the gc being enabled.
//

POINTER_SIZE_INT orp_monitor_cmp_value(Java_java_lang_Object *p_obj)
{
    POINTER_SIZE_INT quick_thread_index_shifted_left_with_recursion_set_to_one
        = (POINTER_SIZE_INT)p_TLS_orpthread->quick_thread_index_shifted_left_with_recursion_set_to_one;
    return quick_thread_index_shifted_left_with_recursion_set_to_one;
} //orp_monitor_cmp_value


    
Boolean orp_monitor_enter_or_null(Java_java_lang_Object *p_obj)
{
    volatile POINTER_SIZE_INT *p_header = P_OBJ_INFO(p_obj);
    
    POINTER_SIZE_INT quick_thread_index_shifted_left_with_recursion_set_to_one
        = (POINTER_SIZE_INT)p_TLS_orpthread->quick_thread_index_shifted_left_with_recursion_set_to_one;
    
#ifdef _DEBUG
    p_TLS_orpthread->number_of_monitor_enters++;
    // It should be pointer aligned
    assert(  (((POINTER_SIZE_INT)p_header) & POINTER_ALIGNMENT_CHECK_MASK) == 0); 
    assert(p_obj != NULL);
    assert(quick_thread_id[p_TLS_orpthread->quick_thread_index].p_orpthread);
    assert(quick_thread_index_shifted_left_with_recursion_set_to_one != 0);
    assert((quick_thread_index_shifted_left_with_recursion_set_to_one & BUSY_FORWARDING_BIT) == 0);
    assert(!orp_is_gc_enabled(p_TLS_orpthread));
#endif 
    
#ifndef ORP_POSIX
    // There does not seem to be a interlocked compare exchange interface for Linux 
    // so we do it with inline asm for now.
    
    // We need this 0 check so that if the lock is released before the CMPXCHG we 
    // don't reset it.
    if (InterlockedCompareExchangePointer ((PVOID *)p_header, 
        (PVOID) quick_thread_index_shifted_left_with_recursion_set_to_one,
        (PVOID) UNCONTESTED_HEADER_VALUE) == (PVOID)UNCONTESTED_HEADER_VALUE) {
        // RLH 3-6-00
        // the Spec marks hit this location 90,222,720 times vs only 942,237 that it doesn't
        // I don't know how pBob does but it is hopefully similar.
        // gc_monitor_stress is a perverse example of how to defeat this optimization.
        return TRUE;
    }
    return FALSE;
#else //ORP_POSIX
    int result=0; int header = 0;
    asm volatile(
        "xorl  %%eax, %%eax\n\t"
        "lock  cmpxchg %1, (%2)\n\t"
        "sub   $0, %%eax\n\t"
        "jne   1f\n\t"
        "movl  $1, %0\n\t"
        "jmp   2f\n\t"
        "1:\t"
        "movl  $0, %0\n\t"
        "2:\t"
        :"=r"(result)
        :"r"(quick_thread_index_shifted_left_with_recursion_set_to_one), "r"(p_header)
	:"eax"
    );
    return result;
#endif //ORP_POSIX

}



// This takes care of the simple uncontested monitor exit.
Boolean orp_monitor_exit_or_null(Java_java_lang_Object *p_obj)
{
    volatile POINTER_SIZE_INT *p_header = P_OBJ_INFO(p_obj);
    POINTER_SIZE_INT quick_thread_index_shifted_left_with_recursion_set_to_one
        = (POINTER_SIZE_INT)p_TLS_orpthread->quick_thread_index_shifted_left_with_recursion_set_to_one;

#ifdef _DEBUG
    p_TLS_orpthread->number_of_monitor_exits++;
    assert(  (((int)p_header)  & POINTER_ALIGNMENT_CHECK_MASK) == 0);  // objects must be 4-byte aligned
    assert(  !orp_is_gc_enabled(p_TLS_orpthread) );
    assert (p_obj != NULL);
    assert(quick_thread_id[p_TLS_orpthread->quick_thread_index].p_orpthread);
#endif

#ifdef ORP_POSIX
// Do it with ugly inline asm until we get a Linux lib with a compare exchange.
    int result=0;
    asm volatile(
        "mov   %1, %%eax\n\t"
        "lock  cmpxchg %2, (%3)\n\t"
        "sub   %1, %%eax\n\t"
        "jne   1f\n\t"
        "movl  $1, %0\n\t"
        "jmp   2f\n\t"
        "1:\t"
        "movl  $0, %0\n\t"
        "2:\t"
        :"=r"(result)
        :"r"(quick_thread_index_shifted_left_with_recursion_set_to_one), "r"(0), "r"(p_header)
	:"eax"
    );
    return result;
#else //!ORP_POSIX
    // If this lock in uncontested, release it, we need to do this atomically since someone
    // might be trying to get the metalock and that would cause a race condition.
    if (InterlockedCompareExchangePointer ((PVOID *)p_header, 
                                    (PVOID) UNCONTESTED_HEADER_VALUE,
                                    (PVOID) quick_thread_index_shifted_left_with_recursion_set_to_one) == (PVOID) quick_thread_index_shifted_left_with_recursion_set_to_one) {
        return TRUE;
    }
    return FALSE;
#endif //!ORP_POSIX
}



// When this routine is called we should be prepared to do a GC.
void __stdcall orp_monitor_enter(Java_java_lang_Object *p_obj)
{
#ifdef MONITOR_STO
#ifndef USE_IA64_JIT

	if(	!bMultithreaded){
		*((struct  Java_java_lang_Object **)num_lazylock) = p_obj;

#ifdef _DEBUG
        lazy_monitor_enter_check();
#endif
		num_lazylock += 4;
		return;
	}
#endif //USE_IA64_JIT
#endif //MONITOR_STO

    // Try the fast route, we might not have contention.
    int iterations = 0;
    int i = 0;
    double x=15.0, y=35.0, z; 
    if(active_thread_count != 1 ){
        if(active_thread_count > 3 ){
            iterations = active_thread_count/4;
            for(i=0 ; i< iterations; i++){
#ifdef ORP_POSIX
                sched_yield();
#else
                Sleep(0);
#endif

                if (orp_monitor_enter_or_null(p_obj)){
                     return;
                }
            }//for( iterations )
        }else{  //! active_thread_count > 3
            for(i=0;i<1000;i++ ) z = x/y;
            if (orp_monitor_enter_or_null(p_obj)){
                return;
           }
        } // else
    } //active_thread_count!=1

    // Let other threads have a chance to release the lock by doing a
    // context switch. Sree showed that this improves pBob a bunch.

#ifdef GC_SAPPHIRE
    // If we need to use the lock in the "to" object because of sapphire
    // then we will always end up here. The fast path will never work on the
    // "from" object because it will have the slow locking bit set. The fast path
    // will work on the "to" objects since no such bits are set. The only task here
    // is to redirect p_obj to point to the "to" object if we need to and then
    // just fall through.
    p_obj = gc_sapphire_get_lock_object(p_obj);
    // Race condition - p_obj points to a "from" object and then sapphire
    // creates a "to" object here then the following orp_monitor_enter_or_null will fail.
    // If we have the "to" object or some "U" object then the orp_monitor_enter_or_null
    // might succeed.
#endif

    assert (p_obj != NULL);
    // Ugg a field and a var with the same name.
    assert(quick_thread_id[p_TLS_orpthread->quick_thread_index].p_orpthread);
    ORP_thread *p_current_orpthread = p_TLS_orpthread; 

#ifdef _DEBUG
    p_current_orpthread->number_of_monitor_enters++;
#endif

    Lock_Block *p_lock_block;

    volatile POINTER_SIZE_INT *p_header = P_OBJ_INFO(p_obj);
    assert(  (((POINTER_SIZE_INT)p_header) & POINTER_ALIGNMENT_CHECK_MASK) == 0); // It should be pointer aligned

    POINTER_SIZE_INT quick_thread_index_shifted_left_with_recursion_set_to_one
        = (POINTER_SIZE_INT)p_current_orpthread->quick_thread_index_shifted_left_with_recursion_set_to_one;
    assert (quick_thread_index_shifted_left_with_recursion_set_to_one != 0);
    assert ((quick_thread_index_shifted_left_with_recursion_set_to_one & BUSY_FORWARDING_BIT) == 0);

    assert(  !orp_is_gc_enabled(p_TLS_orpthread) );

    POINTER_SIZE_INT quick_thread_index_shifted_left
        = (POINTER_SIZE_INT)p_current_orpthread->quick_thread_index_shifted_left;

    ACQUIRE_BUSY_BIT(p_header);
    volatile POINTER_SIZE_INT xx = *p_header;

#ifdef GC_SAPPHIRE
    // We have the busy bit of one of three object. A sapphire "to" space object or a
    // U space object. In either case we need to do nothing special and we can just contine.
    // If however we have a "from" object with a corresponding "to" object then we need to
    // get the "to" object. This could have happened between the time we got the "to" object
    // at the start of the routine and now. Sapphire can't create a "to" object without 
    // acquiring the busy bit so we don't have to worry about that race condition.
    //
    Java_java_lang_Object *p_old_obj = p_obj;
    volatile POINTER_SIZE_INT *p_old_header = p_header;
    p_obj = gc_sapphire_get_lock_object(p_obj);
    if (p_old_obj != p_obj) {
        // recalculate the header value.
        p_header =  P_OBJ_INFO(p_obj);
        xx = *p_header;
        // we need to release the busy bit on the from object and reacquire it
        // on the to object.
        RELEASE_BUSY_BIT(p_old_header); // Release the from busy bit.
        // If some other thread steps tries to get the busy bit here, they will
        // be able to but that is OK. We will just wait for them to finish.
        ACQUIRE_BUSY_BIT(p_header);
        xx = *p_header;
    }
#endif

    if ( (xx & (QUICK_THREAD_INDEX_MASK | QUICK_RECURSION_MASK)) == 0 ) {
       xx = xx | quick_thread_index_shifted_left_with_recursion_set_to_one;
       *p_header = xx;
       RELEASE_BUSY_BIT(p_header); // Might be faster if we merge it with the above |
       return;
    }

    else {  // not_so_simple:
        if ( (xx & QUICK_RECURSION_MASK) == SLOW_LOCKING) {
            goto slow_locking;
        }
        else {
            if ( (xx & QUICK_THREAD_INDEX_MASK) == quick_thread_index_shifted_left) {
                
                if ( ((xx & QUICK_RECURSION_MASK) == QUICK_RECURSION_ABOUT_TO_OVERFLOW) ) {
                    goto recursion_count_about_to_overflow;
                }
                else {
                    xx += QUICK_RECURSION_INC_DEC_REMENT;
                    *p_header = xx;
                    RELEASE_BUSY_BIT(p_header);
                    return;
                }
            }
            else {
                goto some_other_thread;
            }
        }
    }

  // We hold the busy bit header lock at this point.
slow_locking:

    {        
        assert(p_current_orpthread->p_current_object == 0);
        p_current_orpthread->p_current_object = p_obj;

        assert( (p_current_orpthread->app_status == thread_is_running) ||
                (p_current_orpthread->app_status == thread_is_dying)      );

        Lock_Block *p_lock_chain = (Lock_Block *)(*p_header & LOCK_BLOCK_POINTER_MASK);
        if (  (p_lock_chain->old_object_header & QUICK_THREAD_INDEX_MASK) == 
                        p_current_orpthread->quick_thread_index_shifted_left)          {

            // in this case, its a recursive lock request
            //_ B->B
            if ( (p_lock_chain->old_object_header & QUICK_RECURSION_MASK) ==
                    QUICK_RECURSION_ABOUT_TO_OVERFLOW ) 
            {
                assert(  (p_lock_chain->lock_or_wait_state == waiting_for_the_lock) ||
                         (p_lock_chain->lock_or_wait_state == waiting_for_the_notify)  );
                p_lock_block = get_a_block(p_current_orpthread);
                p_lock_block->p_back_link = 0;
                p_lock_block->p_forward_link = p_lock_chain;
                p_lock_chain->p_back_link = p_lock_block;
                p_lock_block->lock_or_wait_state = holding_the_lock;
                p_lock_block->lock_recursion_count_shifted_left = 
                    QUICK_RECURSION_ABOUT_TO_OVERFLOW + QUICK_RECURSION_INC_DEC_REMENT;
                p_lock_block->old_object_header = p_lock_chain->old_object_header;
                p_lock_block->old_object_header |= SLOW_LOCKING;
                p_lock_block->p_object_header = p_header;

                p_lock_chain->old_object_header = 0;
                p_lock_chain->p_object_header = 0;

                p_current_orpthread->p_current_object = 0;

                POINTER_SIZE_INT new_header = (POINTER_SIZE_INT)p_lock_block; // & LOCK_BLOCK_POINTER_MASK;
                new_header |= SLOW_LOCKING;
                new_header |= BUSY_FORWARDING_BIT;
                *p_header = new_header;/////////////////////////////////////////
                RELEASE_BUSY_BIT(p_header); //////////////////////////////////////////////
                return;
            }
            else if ( (p_lock_chain->old_object_header & QUICK_RECURSION_MASK) ==
                                                SLOW_LOCKING) 
            {
                assert(p_lock_chain->lock_or_wait_state == holding_the_lock);
                p_lock_chain->lock_recursion_count_shifted_left += QUICK_RECURSION_INC_DEC_REMENT;
                // BUGBUG if (lock_recursion_count_shifted_left > 1024) assert(0);
                p_current_orpthread->p_current_object = 0;
                RELEASE_BUSY_BIT(p_header); //////////////////////////////////////////////
                return;
            }
            else 
            {
                assert(  (p_lock_chain->lock_or_wait_state == waiting_for_the_lock) ||
                         (p_lock_chain->lock_or_wait_state == waiting_for_the_notify)  );
                p_lock_chain->old_object_header += QUICK_RECURSION_INC_DEC_REMENT;
                p_current_orpthread->p_current_object = 0;
                RELEASE_BUSY_BIT(p_header); //////////////////////////////////////////////
                return;
            }
        }
        if ((p_lock_chain->old_object_header & QUICK_RECURSION_MASK) == 0)
         //   &&  (p_lock_chain->lock_or_wait_state == waiting_for_the_notify)  )
        {
            // nobody has the lock, only wait for notify lock blocks on p_lock_chain
            // therefor, we grab the lock and run
            //_ C->E

            assert(p_lock_chain->lock_or_wait_state == waiting_for_the_notify);
            p_lock_chain->old_object_header &= ~QUICK_THREAD_INDEX_MASK;
            p_lock_chain->old_object_header |=
                quick_thread_index_shifted_left_with_recursion_set_to_one;
            p_current_orpthread->p_current_object = 0;
            RELEASE_BUSY_BIT(p_header); //////////////////////////////////////////////////
            return;
        }

        // if we get here, wait at head of blocks waiting for the lock
        //_ B->D

        assert(  (p_lock_chain->lock_or_wait_state == waiting_for_the_lock)   ||
                 (p_lock_chain->lock_or_wait_state == waiting_for_the_notify) ||
                 (p_lock_chain->lock_or_wait_state == holding_the_lock)  );

        p_lock_block = get_a_block(p_current_orpthread);

        if (p_lock_chain->lock_or_wait_state == holding_the_lock)
        {   
            assert(0);  // if this is hit, debug this block of code
            orp_cout << "need to debug holding_the_lock code segment" << endl;
            orp_exit(7449);

            assert(p_lock_chain->p_back_link == 0);
            p_lock_block->p_back_link = p_lock_chain;
            p_lock_block->p_forward_link = p_lock_chain->p_forward_link;
            if (p_lock_chain->p_forward_link)
                p_lock_chain->p_forward_link->p_back_link = p_lock_block;
        
            p_lock_block->lock_or_wait_state = waiting_for_the_lock;
            p_lock_block->lock_recursion_count_shifted_left = QUICK_RECURSION_INC_DEC_REMENT;     
        }
        else {
            p_lock_block->p_back_link = 0;
            p_lock_block->p_forward_link = p_lock_chain;
            p_lock_chain->p_back_link = p_lock_block;
            p_lock_block->old_object_header = p_lock_chain->old_object_header;
            p_lock_block->p_object_header = p_header;
        
            p_lock_block->lock_or_wait_state = waiting_for_the_lock;
            p_lock_block->lock_recursion_count_shifted_left = QUICK_RECURSION_INC_DEC_REMENT;

            p_lock_chain->old_object_header = 0;
            p_lock_chain->p_object_header = 0;

            POINTER_SIZE_INT new_header = (POINTER_SIZE_INT)p_lock_block; // & LOCK_BLOCK_POINTER_MASK;
            new_header |= SLOW_LOCKING;
            new_header |= BUSY_FORWARDING_BIT;  // NOTE: do not turn off busy bit yet

            *p_header = new_header;/////////////////////////////////////////////////
        }
        goto do_the_wait;
    }

some_other_thread:
    {
        //_ B->D
        assert(p_current_orpthread->p_current_object == 0);
        p_current_orpthread->p_current_object = p_obj;

        p_lock_block = get_a_block(p_current_orpthread);
        p_lock_block->p_back_link = 0;
        p_lock_block->p_forward_link = 0;
        p_lock_block->lock_or_wait_state = waiting_for_the_lock;
        p_lock_block->lock_recursion_count_shifted_left = QUICK_RECURSION_INC_DEC_REMENT;
        p_lock_block->old_object_header = *p_header;
        p_lock_block->p_object_header = p_header;
        POINTER_SIZE_INT new_header = (POINTER_SIZE_INT)p_lock_block; // & LOCK_BLOCK_POINTER_MASK;
        new_header = new_header | SLOW_LOCKING | BUSY_FORWARDING_BIT;
        *p_header = new_header; ////////////////////////////////////////////////

        goto do_the_wait;
    }

do_the_wait:
    {
 
#ifdef _DEBUG
        queue_verifier(p_current_orpthread->p_current_object);
        p_current_orpthread->number_of_waiting_for_the_lock++;
        assert(p_current_orpthread->p_latest_mon_enter_lock_block == 0);
        p_current_orpthread->p_latest_mon_enter_lock_block = p_lock_block;
#endif

        DWORD stat;

        stat = ResetEvent(p_current_orpthread->event_handle_monitor);
        assert(stat);

        RELEASE_BUSY_BIT(p_header); //////////////////////////////////////////////////////

        assert(  !orp_is_gc_enabled(p_TLS_orpthread) );

        orp_enable_gc(); 
        stat = WaitForSingleObject(
            p_current_orpthread->event_handle_monitor, INFINITE);
        assert(stat != WAIT_FAILED);

#ifdef _DEBUG
        p_current_orpthread->p_latest_mon_enter_lock_block = 0;
#endif

        assert(quick_thread_id[p_TLS_orpthread->quick_thread_index].p_orpthread);

        orp_disable_gc(); 

        p_header = P_OBJ_INFO(p_current_orpthread->p_current_object);

        ACQUIRE_BUSY_BIT(p_header);           ////////////////////////////////////////////   

        if (p_lock_block->lock_or_wait_state != owning_thread_needs_to_call_free_routine)
            assert(0);
        free_this_block(p_current_orpthread, p_lock_block);

#ifdef _DEBUG
#ifndef GC_SAPPHIRE
        queue_verifier(p_current_orpthread->p_current_object);
        if (  (*p_header & QUICK_RECURSION_MASK) != SLOW_LOCKING) {
            assert(  (*p_header & QUICK_THREAD_INDEX_MASK) == 
                        (p_current_orpthread->quick_thread_index_shifted_left)       );
        }
        else {
            Lock_Block *p_xx = (Lock_Block *)(*p_header & LOCK_BLOCK_POINTER_MASK);
            assert(  (p_xx->old_object_header & QUICK_THREAD_INDEX_MASK) ==
                        (p_current_orpthread->quick_thread_index_shifted_left)       ); 
        }
        
#endif
#endif

        p_current_orpthread->p_current_object = 0;

        RELEASE_BUSY_BIT(p_header); //////////////////////////////////////////////////////
        return;
    }

recursion_count_about_to_overflow:

    assert( (*p_header & QUICK_RECURSION_MASK) == 
                            QUICK_RECURSION_ABOUT_TO_OVERFLOW );

    p_lock_block = get_a_block(p_current_orpthread);

    p_lock_block->old_object_header = *p_header & QUICK_HASH_MASK;
    p_lock_block->old_object_header |= p_current_orpthread->quick_thread_index_shifted_left;
    p_lock_block->old_object_header |= SLOW_LOCKING;
    p_lock_block->p_object_header = p_header;

    p_lock_block->lock_or_wait_state = holding_the_lock;
    p_lock_block->lock_recursion_count_shifted_left = 
        QUICK_RECURSION_ABOUT_TO_OVERFLOW + QUICK_RECURSION_INC_DEC_REMENT;

    POINTER_SIZE_INT new_header = (POINTER_SIZE_INT)p_lock_block; // & LOCK_BLOCK_POINTER_MASK;
    new_header |= SLOW_LOCKING;
    new_header |= BUSY_FORWARDING_BIT;
    assert ((*p_header & BUSY_FORWARDING_BIT) == BUSY_FORWARDING_BIT); // make sure you have the lock.
    *p_header = new_header; ////////////////////////////////////////////////
    RELEASE_BUSY_BIT(p_header); //////////////////////////////////////////////////////

    return;

    assert(0);  // should exit by "return;" statements
}

void __stdcall orp_monitor_exit(Java_java_lang_Object *p_obj)
{

#ifdef MONITOR_STO
#ifndef USE_IA64_JIT

    if(	!bMultithreaded){      
#ifdef _DEBUG
        lazy_monitor_exit_check();
#endif
        num_lazylock -= 4;
        if( *((struct  Java_java_lang_Object **)num_lazylock) != p_obj){
            assert(0); //BUG, only for debug; if hit, then remove it
            throw_java_exception("java/lang/IllegalMonitorStateException");
            assert(0);  // should never return from the throw
        }
        *((struct  Java_java_lang_Object **)num_lazylock) = 0;
        return;
    }
#endif //USE_IA64_JIT
#endif //MONITOR_STO

#ifdef GC_SAPPHIRE
    Java_java_lang_Object *p_orig_obj = p_obj;
    // If we need to use the lock in the "to" object because of sapphire
    // then we will always end up here. The fast path will never work on the
    // "from" object because it will have the slow locking bit set. The fast path
    // will work on the "to" objects since no such bits are set. The only task here
    // is to redirect p_obj to point to the "to" object if we need to and then
    // just fall through.
    p_obj = gc_sapphire_get_lock_object(p_obj);
    // Race condition - p_obj points to a "from" object and then sapphire
    // creates a "to" object here then the following orp_monitor_enter_or_null will fail.
    // If we have the "to" object or some "U" object then the orp_monitor_enter_or_null
    // might succeed.
#endif
    assert(  !orp_is_gc_enabled(p_TLS_orpthread) );
    assert (p_obj != NULL);
    assert(quick_thread_id[p_TLS_orpthread->quick_thread_index].p_orpthread);

    ORP_thread *p_current_orpthread = p_TLS_orpthread;    

#ifdef _DEBUG
    p_current_orpthread->number_of_monitor_exits++;
#endif

    volatile POINTER_SIZE_INT *p_header = P_OBJ_INFO(p_obj);
    assert(  (((int)p_header)  & POINTER_ALIGNMENT_CHECK_MASK) == 0);  // objects must be 4-byte aligned

    POINTER_SIZE_INT quick_thread_index_shifted_left_with_recursion_set_to_one
        = (POINTER_SIZE_INT)p_current_orpthread->quick_thread_index_shifted_left_with_recursion_set_to_one;

#ifdef ORP_POSIX
// Do it the slow way until we get a Linux lib with a compare exchange.
    if( orp_monitor_exit_or_null(p_obj) ) return ;
#else
    // If this lock is uncontested, release it, we need to do this atomically since someone
    // might be trying to get the metalock and that would cause a race condition.
    if (InterlockedCompareExchangePointer ((PVOID *)p_header, 
                                    (PVOID) UNCONTESTED_HEADER_VALUE,
                                    (PVOID) quick_thread_index_shifted_left_with_recursion_set_to_one) == (PVOID) quick_thread_index_shifted_left_with_recursion_set_to_one) {
        return;
    }
#endif // else ORP_POSIX
    
    POINTER_SIZE_INT quick_thread_index_shifted_left
        = (POINTER_SIZE_INT)p_current_orpthread->quick_thread_index_shifted_left;

    ACQUIRE_BUSY_BIT(p_header);
    
    volatile POINTER_SIZE_INT xx = *p_header;
#ifdef GC_SAPPHIRE
    // We have the busy bit of one of three object. A sapphire "to" space object or a
    // U space object. In either case we need to do nothing special and we can just contine.
    // If however we have a "from" object with a corresponding "to" object then we need to
    // get the "to" object. This could have happened between the time we got the "to" object
    // at the start of the routine and now. Sapphire can't create a "to" object without 
    // acquiring the busy bit so we don't have to worry about that race condition.
    //
    Java_java_lang_Object *p_old_obj = p_obj;
    volatile POINTER_SIZE_INT *p_old_header = p_header;
    p_obj = gc_sapphire_get_lock_object(p_obj);
    if (p_old_obj != p_obj) {
        // recalculate the header value.
        p_header = P_OBJ_INFO(p_obj); 
        xx = *p_header;
        // we need to release the busy bit on the from version and reacquire it
        // on the to .
        RELEASE_BUSY_BIT(p_old_header); // Release the from busy bit.
        // If some other thread steps tries to get the busy bit here, they will
        // be able to but that is OK. We will just wait for them to finish.
        ACQUIRE_BUSY_BIT(p_header);
        xx = *p_header;
    }
#endif
    if ( ( xx & (QUICK_THREAD_INDEX_MASK | QUICK_RECURSION_MASK) ) == 
            quick_thread_index_shifted_left_with_recursion_set_to_one)     {

        *p_header = xx & ~(QUICK_THREAD_INDEX_MASK | QUICK_RECURSION_MASK);
        // We could merge the following and get rid of the RELEASE.
        // *p_header = xx & ~(QUICK_THREAD_INDEX_MASK | QUICK_RECURSION_MASK | BUSY_FORWARDING_BIT);
        RELEASE_BUSY_BIT(p_header);
        return;
    }

    else {  // which_thread_is_it:
        if ( (xx & QUICK_RECURSION_MASK) == SLOW_LOCKING) {
            goto slow_locking;
        }
        else {
            if ( (xx & QUICK_THREAD_INDEX_MASK) == quick_thread_index_shifted_left) {

                xx -= QUICK_RECURSION_INC_DEC_REMENT;
                *p_header = xx;
                RELEASE_BUSY_BIT(p_header);
                return;
            }
            else {
                goto illegal_monitor_state;
            }
        }
    }

illegal_monitor_state:
    {
        assert(0);  // BUGBUG remove after debug
        throw_java_exception("java/lang/IllegalMonitorStateException");
        assert(0);  // should never return from the throw
    }

slow_locking:
    {    
        assert(p_current_orpthread->p_current_object == 0);
        p_current_orpthread->p_current_object = p_obj;

        assert( (p_current_orpthread->app_status == thread_is_running) ||
                (p_current_orpthread->app_status == thread_is_dying)      );

        Lock_Block *p_lock_chain = (Lock_Block *)(*p_header & LOCK_BLOCK_POINTER_MASK);
#ifdef GC_SAPPHIRE
        assert(p_lock_chain->discriminator == normal_lock_block);
#endif
        assert(p_lock_chain->p_back_link == 0);

        if ( (p_lock_chain->old_object_header & QUICK_RECURSION_MASK) == SLOW_LOCKING)
        {
            // current thread still holds the lock, thus decrement and return to callee
            //_ B->B
            assert(p_lock_chain->lock_or_wait_state == holding_the_lock);

            // BUGBUG replace the assert w/ throw_java_exception("java/lang/IllegalMonitorStateException");
            assert( (p_lock_chain->old_object_header & QUICK_THREAD_INDEX_MASK) ==
                        p_current_orpthread->quick_thread_index_shifted_left );

            p_lock_chain->lock_recursion_count_shifted_left -= 
                                            QUICK_RECURSION_INC_DEC_REMENT;

            if (p_lock_chain->lock_recursion_count_shifted_left == 
                QUICK_RECURSION_ABOUT_TO_OVERFLOW) {

                Lock_Block *p_toss_it = p_lock_chain;

                p_lock_chain = p_lock_chain->p_forward_link;
                if (p_lock_chain) {
                    p_lock_chain->p_back_link = 0;
                    p_lock_chain->old_object_header = 
                        p_toss_it->old_object_header & QUICK_HASH_MASK;
                    p_lock_chain->old_object_header |= 
                        p_current_orpthread->quick_thread_index_shifted_left;
                    p_lock_chain->old_object_header |= 
                        QUICK_RECURSION_ABOUT_TO_OVERFLOW;

                    free_this_block(p_current_orpthread, p_toss_it);

                    POINTER_SIZE_INT new_header = (POINTER_SIZE_INT)p_lock_chain; // & LOCK_BLOCK_POINTER_MASK;
                    new_header |= SLOW_LOCKING;
                    new_header |= BUSY_FORWARDING_BIT;
                    p_current_orpthread->p_current_object = 0;
                    *p_header = new_header;/////////////////////////////////////
                    RELEASE_BUSY_BIT(p_header); //////////////////////////////////////////
                    return;
                }
                else {
                    POINTER_SIZE_INT new_header = p_toss_it->old_object_header & QUICK_HASH_MASK;
                    new_header |= p_current_orpthread->quick_thread_index_shifted_left;
                    new_header |= QUICK_RECURSION_ABOUT_TO_OVERFLOW;
                    new_header |= BUSY_FORWARDING_BIT;

                    free_this_block(p_current_orpthread, p_toss_it);
                    p_current_orpthread->p_current_object = 0;
                    *p_header = new_header;/////////////////////////////////////
                    RELEASE_BUSY_BIT(p_header); //////////////////////////////////////////
                    return;
                }
            }
            else {
                assert(p_lock_chain->lock_recursion_count_shifted_left >
                                            QUICK_RECURSION_ABOUT_TO_OVERFLOW);
                // BUGBUG replace hardcoded limit below with some reasonable limit and a "throw exception"
                assert(p_lock_chain->lock_recursion_count_shifted_left < (10 * 1024 * 1024) );
                                            
                p_current_orpthread->p_current_object = 0;
                RELEASE_BUSY_BIT(p_header); //////////////////////////////////////////////
                return;
            }
        }

        if ( ((p_lock_chain->old_object_header & QUICK_RECURSION_MASK) > 0)  &&
             ((p_lock_chain->old_object_header & QUICK_RECURSION_MASK) <= 
                                            QUICK_RECURSION_ABOUT_TO_OVERFLOW)  )
        {
            p_lock_chain->old_object_header -= QUICK_RECURSION_INC_DEC_REMENT;
            if ( (p_lock_chain->old_object_header & QUICK_RECURSION_MASK) != 0)
            {
                p_current_orpthread->p_current_object = 0;
                RELEASE_BUSY_BIT(p_header); //////////////////////////////////////////////
                return;
            }
        }

        assert(  (p_lock_chain->lock_recursion_count_shifted_left & //?????
                                            ~QUICK_RECURSION_MASK) == 0);//?????
        assert(p_lock_chain->lock_recursion_count_shifted_left !=//?????
                                            QUICK_RECURSION_MASK );//?????
        POINTER_SIZE_INT new_header;
        DWORD stat;

        if (p_lock_chain->lock_or_wait_state == waiting_for_the_lock)
        {
            assert( (p_lock_chain->lock_recursion_count_shifted_left & 
                                                QUICK_RECURSION_MASK) != 0);

            assert( (p_lock_chain->lock_recursion_count_shifted_left & 
                                                QUICK_RECURSION_MASK) != SLOW_LOCKING);

            if (p_lock_chain->p_forward_link == 0) {

                //_ D->B, D->D
                new_header = p_lock_chain->old_object_header & QUICK_HASH_MASK;

                new_header |= p_lock_chain->p_orp_thread->
                                            quick_thread_index_shifted_left;
                new_header |= p_lock_chain->lock_recursion_count_shifted_left;
                new_header |= BUSY_FORWARDING_BIT;

                p_lock_chain->lock_or_wait_state = owning_thread_needs_to_call_free_routine;

                p_current_orpthread->p_current_object = 0;
                *p_header = new_header; ////////////////////////////////////////

                HANDLE target_event = 0;

                switch (p_lock_chain->p_orp_thread->app_status) {

                case thread_is_waiting:
                    {   
                        p_lock_chain->p_orp_thread->app_status = thread_is_running;

                        assert(p_lock_chain->p_orp_thread->which_trap == x_java_wait);
                        p_lock_chain->p_orp_thread->which_trap = x_nothing;

                        assert(p_lock_chain->p_orp_thread->gc_status == gc_at_safepoint);
                        p_lock_chain->p_orp_thread->gc_status = zero;

                        target_event = p_lock_chain->p_orp_thread->event_handle_interrupt;
                        break;
                    }
                case thread_is_sleeping:
                    {
                        assert(0);  // do something similar to "thread_is_waiting" aboive
                        target_event = p_lock_chain->p_orp_thread->event_handle_sleep;
                        break;
                    }
                case thread_is_running:
                case thread_is_dying:
                    { 
                        target_event = p_lock_chain->p_orp_thread->event_handle_monitor;
                        break;
                    }
                default:
                    assert(0);
                    break;
                }

                RELEASE_BUSY_BIT(p_header); //////////////////////////////////////////////
                stat = SetEvent(target_event);
                assert(stat);
                return;
            }

            // p_lock_chain has a non-zero p_forward_link
            //_ F->D, D->D, D->E
        
            POINTER_SIZE_INT header_for_the_forward_link;
            header_for_the_forward_link = p_lock_chain->old_object_header & QUICK_HASH_MASK;

            header_for_the_forward_link |= p_lock_chain->p_orp_thread->
                                        quick_thread_index_shifted_left;

            header_for_the_forward_link |= p_lock_chain->lock_recursion_count_shifted_left;
            p_lock_chain->p_forward_link->old_object_header = header_for_the_forward_link;

            new_header = (POINTER_SIZE_INT)(p_lock_chain->p_forward_link) & LOCK_BLOCK_POINTER_MASK;
            new_header |= SLOW_LOCKING;
            new_header |= BUSY_FORWARDING_BIT;

            p_lock_chain->p_forward_link->p_back_link = 0;

            p_lock_chain->lock_or_wait_state = owning_thread_needs_to_call_free_routine;

            p_current_orpthread->p_current_object = 0;

            *p_header = new_header; ////////////////////////////////////////////
            
            assert(p_lock_chain->lock_or_wait_state == owning_thread_needs_to_call_free_routine);

            HANDLE target_event2 = 0;

            if (p_lock_chain->p_orp_thread->app_status == thread_is_waiting) 
            {
                p_lock_chain->p_orp_thread->app_status = thread_is_running;

                assert(p_lock_chain->p_orp_thread->which_trap == x_java_wait);
                p_lock_chain->p_orp_thread->which_trap = x_nothing;

                assert(p_lock_chain->p_orp_thread->gc_status == gc_at_safepoint);
                p_lock_chain->p_orp_thread->gc_status = zero;

                target_event2 = p_lock_chain->p_orp_thread->event_handle_interrupt;
            }
            else
            {
                assert(     (p_lock_chain->p_orp_thread->app_status == thread_is_running) ||
                            (p_lock_chain->p_orp_thread->app_status == thread_is_dying )       );

                target_event2 = p_lock_chain->p_orp_thread->event_handle_monitor;
            }

            RELEASE_BUSY_BIT(p_header); //////////////////////////////////////////////////
            stat = SetEvent(target_event2);
            assert(stat);
            return;
        }


        if (p_lock_chain->lock_or_wait_state == waiting_for_the_notify)
        {
            //_ E->C
            new_header = p_lock_chain->old_object_header & QUICK_HASH_MASK;

            new_header |= p_lock_chain->p_orp_thread->
                                        quick_thread_index_shifted_left;

            assert(p_lock_chain->lock_recursion_count_shifted_left != 0);

            assert(  (new_header & QUICK_RECURSION_MASK) == 0  );

            p_lock_chain->old_object_header = new_header;

            p_current_orpthread->p_current_object = 0;
            RELEASE_BUSY_BIT(p_header); //////////////////////////////////////////////////
            return;
        }

        assert(0);  // p_lock_chain->lock_or_wait_state is wrong
        return;
    }
}


#if defined(USE_IA64_JIT)

// These are all stubs for IA64 (NT and Linux)

void orp_monitor_multithread()
{
}

#endif // USE_IA64_JIT


#if 0 //000000000000000000000
_declspec(naked) void __stdcall orp_monitor_enter_naked(Java_java_lang_Object *)
{
   // this uses a field in NT 4.0 teb (expects THREAD_INDEX|RECURSION_COUNT in it)
    __asm {
        jmp do_it_the_slow_way

        mov eax, [esp + 4]  // the object that needs locking

        sub eax, 4  // this points to the object's header
        orp_lock_prefix bts [eax], BUSY_FORWARDING_OFFSET

        // get p_TLS_orpthread->quick_thread_index_shifted_
        // left_with_recursion_count_set_to_one


#if defined(ORP_NT)       
        mov edx, fs:0C0h  // NT 4.0 specific hack 
#else
#error
#endif
		mov ecx, [eax]  // the header is now in ecx

        jc do_it_the_slow_way

        and ecx, QUICK_THREAD_INDEX_MASK | QUICK_RECURSION_MASK
        cmp ecx, 0
        jne do_it_the_slow_way2

        //_ A->B
        mov ecx, [eax]  // get the object header, set the bits 

        // or in p_TLS_orpthread->quick_thread_index_shifted_
        // left_with_recursion_count_set_to_one
        or ecx, edx

        and ecx, ~BUSY_FORWARDING_BIT

        mov [eax], ecx

        ret 4

    do_it_the_slow_way2:
        mov ecx, [eax]
        and ecx, ~BUSY_FORWARDING_BIT
        mov [eax], ecx

    do_it_the_slow_way:

        // below 2 lines replace SETUP_J2N_FRAME macro
        // since we need an indirect call to make it 
        // easy to relocate the executable image
        // see orp_monitor_init()
        mov eax, address_of_setup_java_to_native_frame
        call eax

        push    dword ptr [esp + TYPE J2N_Saved_State]
        mov eax, address_of_orp_monitor_enter
        call eax

        // below 2 lines replace POP_J2N_FRAME
        // see comment above
        mov eax, address_of_pop_java_to_native_frame
        call eax

        ret 4
    }
} //orp_monitor_enter_naked


POINTER_SIZE_INT single_thread_w_recursion_set_to_one;  // protected by p_thread_lock->lock()

_declspec(naked) void __stdcall orp_monitor_enter_single_naked(Java_java_lang_Object *)
{
   // this uses a field in NT 4.0 teb (expects THREAD_INDEX|RECURSION_COUNT in it)
    __asm {
        mov eax, [esp + 4]  // the object that needs locking

        sub eax, 4  // this points to the object's header

        // get p_TLS_orpthread->quick_thread_index_shifted_
        // left_with_recursion_count_set_to_one

		mov ecx, [eax]  // the header is now in ecx

        and ecx, QUICK_THREAD_INDEX_MASK | QUICK_RECURSION_MASK
        cmp ecx, 0
        jne do_it_the_slow_way2

        //_ A->B
        mov ecx, [eax]  // get the object header, set the bits 

        // or in p_TLS_orpthread->quick_thread_index_shifted_
        // left_with_recursion_count_set_to_one

        or ecx, [single_thread_w_recursion_set_to_one]

        mov [eax], ecx

        ret 4

do_it_the_slow_way2:

        mov eax, address_of_setup_java_to_native_frame
        call eax

        push    dword ptr [esp + TYPE J2N_Saved_State]
        mov eax, address_of_orp_monitor_enter
        call eax

        mov eax, address_of_pop_java_to_native_frame
        call eax

        ret     4
    }

} //orp_monitor_enter_single_naked


_declspec(naked) void __stdcall orp_monitor_exit_naked(Java_java_lang_Object *)
{
   // this uses a field in NT 4.0 teb (expects THREAD_INDEX|RECURSION_COUNT in it)
    __asm {
        jmp do_it_the_slow_way
        mov eax, [esp + 4]  // the object that needs locking
        sub eax, 4  // this points to the object's header
        orp_lock_prefix bts [eax], BUSY_FORWARDING_OFFSET

        // get p_TLS_orpthread->quick_thread_index_shifted_
        //  left_with_recursion_count_set_to_one


#if defined(ORP_NT)
		mov edx, fs:0C0h
#else
#error
#endif
        mov ecx, [eax]

        jc do_it_the_slow_way

        and ecx, (QUICK_THREAD_INDEX_MASK | QUICK_RECURSION_MASK)
        cmp ecx, edx
        jne do_it_the_slow_way2

        //_ B->A
        mov ecx, [eax]
        and ecx, QUICK_HASH_MASK
        mov [eax], ecx

        ret 4

    do_it_the_slow_way2:
        mov ecx, [eax]
        and ecx, ~BUSY_FORWARDING_BIT
        mov [eax], ecx

    do_it_the_slow_way:

        // below 2 lines replace SETUP_J2N_FRAME macro
        // since we need an indirect call to make it 
        // easy to relocate the executable image
        // see orp_monitor_init()
        mov eax, address_of_setup_java_to_native_frame
        call eax

        push    dword ptr [esp + TYPE J2N_Saved_State]
        mov eax, address_of_orp_monitor_exit
        call eax

        // below 2 lines replace POP_J2N_FRAME
        // see comment above
        mov eax, address_of_pop_java_to_native_frame
        call eax

        ret 4
    }
} //orp_monitor_exit_naked


_declspec(naked) void __stdcall orp_monitor_exit_single_naked(Java_java_lang_Object *)
{
   // this uses a field in NT 4.0 teb (expects THREAD_INDEX|RECURSION_COUNT in it)
    __asm {
        mov eax, [esp + 4]  // the object that needs locking
        sub eax, 4  // this points to the object's header

        // get p_TLS_orpthread->quick_thread_index_shifted_
        //  left_with_recursion_count_set_to_one

        mov ecx, [eax]

        and ecx, (QUICK_THREAD_INDEX_MASK | QUICK_RECURSION_MASK)

        cmp ecx, [single_thread_w_recursion_set_to_one]
        jne do_it_the_slow_way2

        //_ B->A
        mov ecx, [eax]
        and ecx, QUICK_HASH_MASK
        mov [eax], ecx

        ret 4

    do_it_the_slow_way2:

        mov eax, address_of_setup_java_to_native_frame
        call eax

        push    dword ptr [esp + TYPE J2N_Saved_State]
        mov eax, address_of_orp_monitor_exit
        call eax

        mov eax, address_of_pop_java_to_native_frame
        call eax

        ret 4
    }
} //orp_monitor_exit_single_naked


bool __stdcall  do_I_own_this_monitor (/*struct*/ Java_java_lang_Object *p_obj)
{
    POINTER_SIZE_INT *p_header = P_OBJ_INFO(p_obj);

    while(1) {
        __asm {
            mov eax, p_header
            orp_lock_prefix bts [eax], BUSY_FORWARDING_OFFSET

            jnc header_is_now_locked
        }
        SleepEx(0, FALSE);  // yield the CPU and try to get the lock again
    }   

header_is_now_locked:

    POINTER_SIZE_INT lock_owner_shifted_left;

    if (  (*p_header & SLOW_LOCKING) == SLOW_LOCKING  ) 
    {
        Lock_Block *p_lock_chain = (Lock_Block *)(*p_header & LOCK_BLOCK_POINTER_MASK);
        lock_owner_shifted_left = p_lock_chain->old_object_header & QUICK_THREAD_INDEX_MASK;
    }
    else lock_owner_shifted_left = *p_header & QUICK_THREAD_INDEX_MASK;

    RELEASE_BUSY_BIT(p_header); //////////////////////////////////////////////////////////

    if (p_TLS_orpthread->quick_thread_index_shifted_left == lock_owner_shifted_left)
        return true;
    else return false;
}


void orp_monitor_singlethread()
{
    assert(0);  // turned off for now

    extern bool orp_initialized;
    if (p_active_threads_list == 0) {
    //assert(orp_initialized == false);
        single_thread_w_recursion_set_to_one = SINGLE_THREAD_W_RECURSION_SET_TO_ONE;
    }
    else {
        ORP_thread *p_scan = p_active_threads_list;

        uint32 active_count = 0;
        ORP_thread *p_candidate = 0;
        // below only optimizes for the case where only ONE java thread is in
        // thread_is_running state and all other java threads are in thread_is_waiting
        // state.  Thus thread_is_sleeping and thread_is_dying do not trigger the single
        // thread optimization -- this makes the logic simpler.
        while(p_scan) {
            assert( (p_scan->app_status == thread_is_sleeping) ||
                    (p_scan->app_status == thread_is_running)  ||
                    (p_scan->app_status == thread_is_waiting)  ||
                    (p_scan->app_status == thread_is_dying)			);

            if (p_scan->app_status != thread_is_waiting) {
                p_candidate = p_scan;
                active_count++;
            }
            p_scan = p_scan->p_active;
        }

        if (active_count != 1) {
            return;  // multiple ...running/sleeping/dying threads, do not go single thread
        }

        if (p_candidate->app_status != thread_is_running) {
            return;  // only one candidate thread and it is sleeping or dying thus skip
        }

        single_thread_w_recursion_set_to_one = p_candidate->
            quick_thread_index_shifted_left_with_recursion_set_to_one;
        ORP_thread *p_thr = p_active_threads_list;
        for(uint32 vv2 = 0; p_thr; vv2++) {
            p_thr = p_thr->p_active;
        }
    }

    //orp_cout << "GOING SINGLE THREAD ------------------------ " << hex << single_thread_w_recursion_set_to_one << endl;

    uint32 offset = (uint32)orp_monitor_enter_single_naked - (uint32)mon_enter_code_pointer - 5;
    uint8 *p_jmp = (uint8 *)mon_enter_code_pointer;
    *p_jmp = 0xe9; // emit an 'e9' jmp op code
    p_jmp++;
    uint32 *p_offset = (uint32 *)p_jmp;
    *p_offset = offset;

    offset = (uint32)orp_monitor_exit_single_naked - (uint32)mon_exit_code_pointer - 5;
    p_jmp = (uint8 *)mon_exit_code_pointer;
    *p_jmp = 0xe9; // emit an 'e9' jmp op code
    p_jmp++;
    p_offset = (uint32 *)p_jmp;
    *p_offset = offset;
}


void orp_monitor_multithread()
{
    //orp_cout << "GOING MULTI THREAD ------------------------" << endl;

#if 0
    uint32 offset = (uint32)orp_monitor_enter_naked - (uint32)mon_enter_code_pointer - 5;
    uint8 *p_jmp = (uint8 *)mon_enter_code_pointer;
    *p_jmp = 0xe9; // emit an 'e9' jmp op code
    p_jmp++;
    uint32 *p_offset = (uint32 *)p_jmp;
    *p_offset = offset;
#endif /// 0

    uint32 offset = (uint32)orp_monitor_exit_naked - (uint32)mon_exit_code_pointer - 5;
    uint8 *p_jmp = (uint8 *)mon_exit_code_pointer;
    *p_jmp = 0xe9; // emit an 'e9' jmp op code
    p_jmp++;
    uint32 *p_offset = (uint32 *)p_jmp;
    *p_offset = offset;
}
#endif // 00000000000000000000


void orp_monitor_init()
{
#if defined(USE_IA64_JIT)
#else
    address_of_setup_java_to_native_frame = (uint32)getaddress__setup_java_to_native_frame();
    address_of_pop_java_to_native_frame = (uint32)getaddress__pop_java_to_native_frame();

#if 0  ////00000 

    address_of_orp_monitor_enter = (uint32)orp_monitor_enter;
    address_of_orp_monitor_exit = (uint32)orp_monitor_exit;

     // turned off for now  --->orp_monitor_multithread();   orp_monitor_singlethread();
    mon_enter_code_pointer = (uint32)malloc(64);
    mon_enter_code_pointer += 3;
    mon_enter_code_pointer &= 0xffFFffFC;
    p_mon_enter_code =
        ( void (__stdcall *)(Java_java_lang_Object *) )mon_enter_code_pointer;

    mon_exit_code_pointer = (uint32)malloc(64);
    mon_exit_code_pointer += 3;
    mon_exit_code_pointer &= 0xffFFffFC;
    p_mon_exit_code = 
        ( void (__stdcall *)(Java_java_lang_Object *) )mon_exit_code_pointer;
#endif /// 0000
#endif
}



///////////////////////////////////////////////////////////////////////////////
///////////////
/////////////// WARNING: end_of_object_busybit_critical_zone() MUST BE  THE LAST
///////////////
/////////////// PROCEDURE IN java_lang_Object.cpp
///////////////
///////////////////////////////////////////////////////////////////////////////

void end_of_mon_enter_exit_busybit_critical_zone()
{
////////////// THIS MUST BE THE LAST PROCEDURE IN java_lang_Object.cpp
////////////// SEE in_busybit_critical_zone() for details
}

#endif //#ifndef  OBJECT_LOCK_V2 



