static char dqs_c_dqs_execd_rcsid[]="$Id: dqs_c_dqs_execd.c,v 1.1.1.1 1997/04/10 15:10:31 green Exp $";

/*----------------------------------------------------
 * dqs_c_dqs_execd.c Tom Green Mon Jan 31 10:42:36 1994
 *
 * Copyright 1993
 *
 * SUPER COMPUTER COMPUTATIONS RESEARCH INSTITUTE
 *            FLORIDA STATE UNIVERSITY
 *
 *
 * SCRI representatives make no claims about the
 * suitability of this software for any purpose.
 * It is provided "as is" without express or
 * implied warranty.
 *
 * $Log: dqs_c_dqs_execd.c,v $
 * Revision 1.1.1.1  1997/04/10 15:10:31  green
 * DQS 3.1.3.4.1 Distribution
 *
 * Revision 3.10  1996/11/20 23:03:10  nrl
 * Several fixes submitted by or as a result of investigations by
 * Ron Lee, Bodo Bechenback, Guntram Wolski and Frank Dwyyer.
 *
 * Revision 3.9  1996/06/27  01:55:39  nrl
 * changes to accomodate osf gcc
 *
 * Revision 3.8  1996/03/22  04:19:46  nrl
 * Added error cataloguing number to all routines
 *
 * Revision 3.7  1996/03/12  17:11:51  nrl
 * removed aborts and replaced with an error messaging scheme
 * to send email to the dqs adminsitrator and wait for
 * actions by that administrator
 *
 * Revision 3.6  1995/02/24  23:36:07  nrl
 * Changed all Host lookups to use dqs_locate_host subroutine,
 * and changed that routine to look for the "registered host name"
 *
 * Revision 3.5  1995/02/16  20:34:11  nrl
 * Force cleared job structure to prevent garbage from fouling things up.
 * Added SIGCHLD to iomask to keep "mailer" from messing up
 * socket transfers.
 *
 * Revision 3.4  1995/02/01  23:17:31  nrl
 * Tidied up and hopefully bulletproofed "tid" management. Reversed
 * tid file naming to sort by time.
 *
 * Revision 3.3  1995/01/30  15:21:44  nrl
 * added "tid" verification between execd and qmaster to prevent
 * "ghost" jobs from persisting in visible queue. Changed ERROR messages
 * which were for information only to DEBUG messages.
 *
 * Revision 3.2  1994/06/15  15:28:51  green
 * support for using DQS trusted host list for dshd
 *
 *      passing of Host_head -n dqs_c_dqs_execd.c
 *      time stamp Host_head on deletion in dqs_c_qconf.c
 *      ck trusted host list in dqs_dshd.c
 *      grab Host_head at startup in dqs_execd.c
 *      rebuild Host_head/Host_hash in dqs_execd_rebuild_host_hash.c
 *      dqs_free_hash in dqs_hash.c
 *      grab new Host_head in dqs_load_avg.c
 *      error log/printing in dsh.c
 *
 * Bug in my syslog code(or certain vendors required nullifying use
 * of syslogd until I can track it down...
 *
 * Revision 3.1  1994/06/15  11:36:42  green
 * dsh now utilizes the same authentication routines as the qmaster.
 *
 * Revision 3.0  1994/03/07  04:13:14  green
 * 3.0 freeze
 *
 * Revision 1.1.1.1  1994/02/01  17:57:37  green
 * DQS 3.0 ALPHA
 *
 *--------------------------------------------------*/

 
#include "h.h"
#include "def.h"
#include "dqs.h"
#include "struct.h"
#include "func.h"
#include "globals.h"
#include "dqs_errno.h"

/************************************************************************/
void dqs_c_dqs_execd(sfd,request_head)
int           sfd;
dqs_list_type **request_head;

/*
  dqs_c_dqs_execd -  is used solely by the qmaster and handles requests 
  forwarded from the dqs_execd.

  The most commonly used service is that of the dqs_execd sending in
  load average information.

  Secondly is that of the dqs_execd sending in statistics of reaped
  jobs.

  Other interfaces include the dqs_execd informing the qmaster that
  it has bee restarted.
*/
  
{

     int                update_queue=FALSE;
     dqs_hash_type      *hashel_ptr;
     dqs_list_type      *request_list;
     dqs_list_type      listel;
     dqs_list_type      *lp;
     dqs_list_type      *err_rpt;
     char dqs_mail_subj[1024];
     char dqs_mail_body[1024];
     DENTER((DQS_EVENT,"dqs_c_dqs_execd"));
     
     request_list= *request_head;
     
     DPRINTF((DQS_EVENT,"------------------------------------------------------"));

     switch(request_list->int0) 
     {
          /*------------------------------------------------------*/
        case STARTING_UP:/* ??? NC need to add restarting jobs */
          DPRINTF((DQS_EVENT,"=====>STARTING_UP: dqs_execd on >%s< is starting up",
                   request_list->str0));
          bzero((char *)&listel,sizeof(listel));
          if (Host_head)
          listel.chain=Host_head;
          if (dqs_send_list(NULL,NULL,sfd,&listel)<0)
          {
               ERROR((DQS_EVENT,"DQS_ERROR_0027 error: sending ck in list to %s",request_list->str0));
               bzero((char *)&listel,sizeof(listel));
               *request_head=dqs_free_list(*request_head);
               DEXITE;
               return;
          }
          dqs_close_sfd(sfd);
          bzero((char *)&listel,sizeof(listel));
          *request_head=dqs_free_list(*request_head);
          DEXIT;
          return;
          
          /*------------------------------------------------------*/
        case LOAD_AVG:
          DPRINTF((DQS_EVENT,"=====>LOAD_AVG: dqs_execd on >%s< is reporting load of %d",
                   request_list->str0,request_list->int1));

          bzero((char *)&listel,sizeof(listel));
          if (Host_head)
          {
               DPRINTF((DQS_EVENT,"Host_head time stamps qmaster %d dqs_execd %d",
                      Host_head->int0,request_list->int3));
               if (request_list->int3!=Host_head->int0) /* they need a new host list */
               listel.chain=Host_head;
          }

          if (dqs_send_list(NULL,NULL,sfd,&listel)<0)
          {
               ERROR((DQS_EVENT,"DQS_ERROR_0028 error: sending loadavg ACK to %s",request_list->str0));
               bzero((char *)&listel,sizeof(listel));
               *request_head=dqs_free_list(*request_head);
               DEXITE;
               return;
          }
          dqs_close_sfd(sfd);
          bzero((char *)&listel,sizeof(listel));

          if (request_list->int2) /* syncing state  execd requesting cleanup*/

          dqs_tid_del_x_host(request_list->str0);

          hashel_ptr=dqs_locate_host(request_list->str0);
          if (!hashel_ptr)
          {
               ERROR((DQS_EVENT,"DQS_ERROR_0029 cannot locate host >%s<",request_list->str0));
          }
          else
          {
               if (!hashel_ptr->load_avg_ptr)
               {
                    ERROR((DQS_EVENT,"DQS_ERROR_0030 error: Host_hash is screwed"));
                    err_rpt=(dqs_list_type *)dqs_malloc(sizeof(dqs_list_type));
                    bzero((char *)&err_rpt,sizeof(err_rpt));
                    err_rpt->next= request_list;
                    err_rpt->str1= dqs_string_insert(NULL,
                        "dqs_execd process  host hash is screwed ");
            dqs_report_problem(err_rpt, FALSE);                         
                  
               }
               update_queue=TRUE;
               DTRACE;
               *hashel_ptr->load_avg_ptr=request_list->int1;
               DTRACE;
               *hashel_ptr->lt_heard_from_ptr=dqs_get_gmt();
               DTRACE;
          }
          if (update_queue)
          dqs_update_queue_state();
          *request_head=dqs_free_list(*request_head);
          DEXIT;
          return;
          
          /*------------------------------------------------------*/
        case JOB_EXIT:
          DPRINTF((DQS_EVENT,"===>JOB_EXIT: dqs_execd on \"%s\" is reporting reaped job >%s<",
                   request_list->tid->str0,request_list->str0));
          if (dqs_send_ack(sfd,request_list))
          {
               ERROR((DQS_EVENT,"DQS_ERROR_0031 error: sending ACK to %s",request_list->str0));
               *request_head=dqs_free_list(*request_head);
               DEXITE;
               return;
          }
          dqs_close_sfd(sfd);
          while (request_list)
          {
               if (dqs_locate_tid(request_list->tid))
               {  /* this transaction has already occurred */
                    DPRINTF((DQS_EVENT,"TRANSACTION ALREADY OCCURRED"));
                    request_list=request_list->next;
                    continue;
               }
               else
               {
                    dqs_save_tid(request_list->tid,DQS_EXECD);
                            request_list->tid= NULL;
               }
               dqs_job_exit(request_list);
               request_list=request_list->next;
          }
          *request_head=dqs_free_list(*request_head);
          DEXIT;
          return;
          
           /*------------------------------------------------------*/
      /* this function will be expanded in later releases to provide for  */
      /* more bulletproffing of the daemons. For this release we send an  */
      /* email to the administrator and close the socket..   at some point */
      /* the administrator may invoke teh QADMIN function to direct actions */
      /* to cleanup the failing dqs_execd                                   */
      /* the request here also contains all the job info from the dqs_execd */
      /* at time of failure and should probably be retained for use by     */
      /* more complex actions than SHUTDOWN                                */   
           
        case QMASTER_ACTION_REQUEST:
          DPRINTF((DQS_EVENT,"=====>:QMASTER_ACTION_REQUEST  from dqs_execd on >%s<",
                   request_list->str0));

          switch(request_list->int0)  {
              case ERROR_REPORT:
              case MAIL_TO_ADMINISTRATOR:
                  lp=dqs_resolve_cell(me.default_cell);   /*  get qmaster host */
                  if(lp){
                    sprintf(dqs_mail_subj,"dqs_execd in distress!");
                    strcat(dqs_mail_body,request_list->str1 );
                    dqs_send_mail(conf.administrator, me.unqualified_hostname,dqs_mail_subj, dqs_mail_body);
                   }
                   break;
              default:
                ERROR((DQS_EVENT,"DQS_ERROR_0032 error:illegal action request from deq_execd"));
          } 
          

          dqs_close_sfd(sfd);
          bzero((char *)&listel,sizeof(listel));
          *request_head=dqs_free_list(*request_head);
          DEXIT;
          return;

          /*------------------------------------------------------*/


        
        default:
          INFO((DQS_EVENT,"DQS_ERROR_0033 CASE unknown list type %d",request_list->type));
          (void) dqs_send_nak(sfd,request_list);
          dqs_close_sfd(sfd);
          *request_head=dqs_free_list(*request_head);
          DEXITE;
          return;
     }
     
}
