File: sge_gdi_packet_internal.c

package info (click to toggle)
gridengine 8.1.9%2Bdfsg-10
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 56,880 kB
  • sloc: ansic: 432,689; java: 87,068; cpp: 31,958; sh: 29,429; jsp: 7,757; perl: 6,336; xml: 5,828; makefile: 4,701; csh: 3,928; ruby: 2,221; tcl: 1,676; lisp: 669; yacc: 519; python: 503; lex: 361; javascript: 200
file content (898 lines) | stat: -rw-r--r-- 32,076 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
/*___INFO__MARK_BEGIN__*/
/*************************************************************************
 * 
 *  The Contents of this file are made available subject to the terms of
 *  the Sun Industry Standards Source License Version 1.2
 * 
 *  Sun Microsystems Inc., March, 2001
 * 
 * 
 *  Sun Industry Standards Source License Version 1.2
 *  =================================================
 *  The contents of this file are subject to the Sun Industry Standards
 *  Source License Version 1.2 (the "License"); You may not use this file
 *  except in compliance with the License. You may obtain a copy of the
 *  License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html
 * 
 *  Software provided under this License is provided on an "AS IS" basis,
 *  WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
 *  WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
 *  MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
 *  See the License for the specific provisions governing your rights and
 *  obligations concerning the Software.
 * 
 *   The Initial Developer of the Original Code is: Sun Microsystems, Inc.
 * 
 *   Copyright: 2001 by Sun Microsystems, Inc.
 * 
 *   All Rights Reserved.
 * 
 ************************************************************************/
/*___INFO__MARK_END__*/

#include <stdlib.h>
#include <string.h>

#ifdef KERBEROS
#  include "krb_lib.h"
#endif

#include "basis_types.h"

#include "comm/commlib.h"

#include "uti/sge_mtutil.h"
#include "uti/sge_rmon.h"
#include "uti/sge_log.h"
#include "uti/sge_tq.h"
#include "uti/sge_string.h"

#include "gdi/sge_gdi2.h"
#include "gdi/sge_gdi_packet_pb_cull.h"
#include "gdi/sge_security.h"
#include "gdi/sge_gdi_packet.h"
#include "gdi/msg_gdilib.h"

#include "sgeobj/sge_answer.h"
#include "sgeobj/sge_multi_MA_L.h"
#include "sgeobj/sge_jsv.h"

#include "msg_common.h"

#define CLIENT_WAIT_TIME_S 1

sge_tq_queue_t *Master_Task_Queue = NULL;

/****** gdi/request_internal/sge_gdi_packet_create_multi_answer() ***********
*  NAME
*     sge_gdi_packet_create_multi_answer() -- create multi answer 
*
*  SYNOPSIS
*     static bool 
*     sge_gdi_packet_create_multi_answer(sge_gdi_ctx_class_t* ctx, 
*                                        lList **answer_list, 
*                                        sge_gdi_packet_class_t **packet, 
*                                        lList **malpp) 
*
*  FUNCTION
*     Creates a multi answer element ("malpp") from the given "packet".
*     The lists and answer lists for the (multi) GDI request will be
*     moved from the task structures conteined in the packet, into the
*     multi answer list. After all information has been moved the packet
*     and all subelement will be freed so that the *packet will be NULL
*     when the function returns.
*
*     Threre are no errors expected from this function. So the return 
*     value "false" or a filled "answer_list" will never bee seen
*     after return.
*
*  INPUTS
*     sge_gdi_ctx_class_t* ctx        - context (not used) 
*     lList **answer_list             - answer_list (not used) 
*     sge_gdi_packet_class_t **packet - packet 
*     lList **malpp                   - multi answer
*
*  RESULT
*     static bool - error state
*        true  - always
*
*  NOTES
*     MT-NOTE: sge_gdi_packet_create_multi_answer() is MT safe 
*
*  SEE ALSO
*     gdi/request_internal/sge_gdi_packet_execute_external() 
*     gdi/request_internal/sge_gdi_packet_execute_internal() 
*     gdi/request_internal/sge_gdi_packet_wait_for_result_external()
*     gdi/request_internal/sge_gdi_packet_wait_for_result_internal()
*     sge_gdi_packet_is_handled()
*******************************************************************************/
static bool
sge_gdi_packet_create_multi_answer(sge_gdi_ctx_class_t* ctx, lList **answer_list,
                                   sge_gdi_packet_class_t **packet, lList **malpp)
{
   sge_gdi_task_class_t *task = NULL;
   bool ret = true;

   DENTER(TOP_LAYER, "sge_packet_create_multi_answer");

   /* 
    * make multi answer list and move all data contained in packet 
    * into that structure 
    */
   task = (*packet)->first_task;
   while (task != NULL) {
      u_long32 operation = SGE_GDI_GET_OPERATION(task->command);
      u_long32 sub_command = SGE_GDI_GET_SUBCOMMAND(task->command);
      lListElem *map = lAddElemUlong(malpp, MA_id, task->id, MA_Type);

      if (operation == SGE_GDI_GET || operation == SGE_GDI_PERMCHECK ||
          (operation == SGE_GDI_ADD && sub_command == SGE_GDI_RETURN_NEW_VERSION)) {
         lSetList(map, MA_objects, task->data_list);
         task->data_list = NULL;
      }

      lSetList(map, MA_answers, task->answer_list);
      task->answer_list = NULL;

      task = task->next;
   }

   /*
    * It is time to free the element. It is really not needed anymore.
    */
   sge_gdi_packet_free(packet);

   DRETURN(ret);
}


/****** gdi/request_internal/sge_gdi_packet_wait_till_handled() *************
*  NAME
*     sge_gdi_packet_wait_till_handled() -- wait til packet is handled 
*
*  SYNOPSIS
*     void 
*     sge_gdi_packet_wait_till_handled(sge_gdi_packet_class_t *packet) 
*
*  FUNCTION
*     This function blocks the calling thread till another one executes
*     sge_gdi_packet_broadcast_that_handled(). Mutiple threads can use
*     this call to get response if the packet is accessed by someone 
*     else anymore.
*
*     This function is used to synchronize packet producers (listerner,
*     scheduler, jvm thread ...) with packet consumers (worker threads)
*     which all use a packet queue to synchronize the access to
*     packet elements. 
*
*     Packet producers store packets in the packet queue and then
*     they call this function to wait that they can access the packet
*     structure again. 
*
*  INPUTS
*     sge_gdi_packet_class_t *packet - packet element 
*
*  RESULT
*     void - none
*
*  NOTES
*     MT-NOTE: sge_gdi_packet_wait_till_handled() is MT safe 
*
*  SEE ALSO
*     gdi/request_internal/Master_Packet_Queue
*     gdi/request_internal/sge_gdi_packet_queue_wait_for_new_packet()
*     gdi/request_internal/sge_gdi_packet_queue_store_notify()
*     gdi/request_internal/sge_gdi_packet_broadcast_that_handled()
*     gdi/request_internal/sge_gdi_packet_is_handled()
*******************************************************************************/
void
sge_gdi_packet_wait_till_handled(sge_gdi_packet_class_t *packet)
{
   DENTER(TOP_LAYER, "sge_gdi_packet_wait_till_handled");

   if (packet != NULL) {
      sge_mutex_lock(GDI_PACKET_MUTEX, SGE_FUNC, __LINE__, &(packet->mutex));

      while (packet->is_handled == false) {
         struct timespec ts; 

         DPRINTF(("waiting for packet to be handling by worker\n"));
         sge_relative_timespec(CLIENT_WAIT_TIME_S, &ts);
         pthread_cond_timedwait(&(packet->cond), &(packet->mutex), &ts);
      }

      sge_mutex_unlock(GDI_PACKET_MUTEX, SGE_FUNC, __LINE__, &(packet->mutex));

      DPRINTF(("got signal that packet is handled\n"));
   }

   DRETURN_VOID;   
}

/****** gdi/request_internal/sge_gdi_packet_is_handled() ********************
*  NAME
*     sge_gdi_packet_is_handled() -- returns if packet was handled by worker
*
*  SYNOPSIS
*     void 
*     sge_gdi_packet_is_handled(sge_gdi_packet_class_t *packet) 
*
*  FUNCTION
*     Returns if the given packet was already handled by a worker thread.
*     "true" means that the packet is completely done so that a call
*     to sge_gdi_packet_wait_till_handled() will return immediately. If 
*     "false" is returned the the packet is not finished so a call to
*     sge_gdi_packet_wait_till_handled() might block when it is called 
*     afterwards.
*
*  INPUTS
*     sge_gdi_packet_class_t *packet - packet element 
*
*  RESULT
*     bool - true    packet was already handled by a worker
*            false   packet is not done. 
*
*  NOTES
*     MT-NOTE: sge_gdi_packet_is_handled() is MT safe 
*
*  SEE ALSO
*     gdi/request_internal/Master_Packet_Queue
*     gdi/request_internal/sge_gdi_packet_queue_wait_for_new_packet()
*     gdi/request_internal/sge_gdi_packet_queue_store_notify()
*     gdi/request_internal/sge_gdi_packet_broadcast_that_handled()
*******************************************************************************/
bool
sge_gdi_packet_is_handled(sge_gdi_packet_class_t *packet)
{
   bool ret = true;

   DENTER(TOP_LAYER, "sge_gdi_packet_wait_till_handled");
   if (packet != NULL) {   
      sge_mutex_lock(GDI_PACKET_MUTEX, SGE_FUNC, __LINE__, &(packet->mutex));
      ret = packet->is_handled;
      sge_mutex_unlock(GDI_PACKET_MUTEX, SGE_FUNC, __LINE__, &(packet->mutex));
   }
   DRETURN(ret);
}

/****** gdi/request_internal/sge_gdi_packet_broadcast_that_handled() ********
*  NAME
*     sge_gdi_packet_broadcast_that_handled() -- broadcast to waiting threads 
*
*  SYNOPSIS
*     void 
*     sge_gdi_packet_broadcast_that_handled(sge_gdi_packet_class_t *packet) 
*
*  FUNCTION
*     This functions wakes up all threads waiting in 
*     sge_gdi_packet_wait_till_handled(). 
*
*     This function is used to synchronize packet producers (listerner,
*     scheduler, jvm thread ...) with packet consumers (worker threads)
*     which all use a packet queue to synchronize the access to
*     packet elements. 
*
*     Packet producers store packets in the packet queue and then
*     they call sge_gdi_packet_wait_till_handled(). Packet consumers
*     fetch a packet from the queue. After they have finished using
*     the packet structure they call this function to notify
*     the waiting threads that the packet is not accessed anymore.
*
*  INPUTS
*     sge_gdi_packet_class_t *packet - packet element 
*
*  RESULT
*     void - NONE
*
*  NOTES
*     MT-NOTE: sge_gdi_packet_broadcast_that_handled() is MT safe 
*
*  SEE ALSO
*     gdi/request_internal/Master_Packet_Queue
*     gdi/request_internal/sge_gdi_packet_queue_wait_for_new_packet()
*     gdi/request_internal/sge_gdi_packet_queue_store_notify()
*     gdi/request_internal/sge_gdi_packet_wait_till_handled()
*******************************************************************************/
void
sge_gdi_packet_broadcast_that_handled(sge_gdi_packet_class_t *packet)
{
   DENTER(TOP_LAYER, "sge_gdi_packet_broadcast_that_handled");

   sge_mutex_lock(GDI_PACKET_MUTEX, SGE_FUNC, __LINE__, &(packet->mutex));
   packet->is_handled = true; 
   DPRINTF(("broadcast that packet is handled\n"));
   pthread_cond_broadcast(&(packet->cond));
   sge_mutex_unlock(GDI_PACKET_MUTEX, SGE_FUNC, __LINE__, &(packet->mutex));

   DRETURN_VOID;   
}


/****** gdi/request/get_gdi_retries_value() ***************************************
*  NAME
*     get_gdi_retries_value() -- get commlib parameter list value "gdi_retries"
*
*  SYNOPSIS
*     static int get_gdi_retries_value(void) 
*
*  FUNCTION
*     Returns the nr of retries on synchron receive retries when getting a
*     GDI response message.
*
*  INPUTS
*     void - none
*
*  RESULT
*     static int - configured "gdi_retries" value (set by qmaster_params)
*
*  SEE ALSO
*     gdi/request/get_cl_ping_value()
*     gdi/request/get_gdi_retries_value()
*******************************************************************************/
static int get_gdi_retries_value(void) {
   char* gdi_retries = NULL;
   int retries = 0;
   cl_com_get_parameter_list_value("gdi_retries", &gdi_retries);
   if (gdi_retries != NULL) {
      retries = atoi(gdi_retries);
      sge_free(&gdi_retries);
   }
   return retries;
}

/****** gdi/request/get_cl_ping_value() *******************************************
*  NAME
*     get_cl_ping_value() -- get commlib parameter list value "cl_ping"
*
*  SYNOPSIS
*     static bool get_cl_ping_value(void) 
*
*  FUNCTION
*     Returns the value of the "cl_ping" commlib parameter. The value is true
*     or false and specifies if gdi should send a SIM message to qmaster which
*     should be used for desiction making if qmaster is still working or
*     unreachable.
*
*  INPUTS
*     void - none
*
*  RESULT
*     static bool - true or false (enabled or disabled)
*
*  SEE ALSO
*     gdi/request/get_cl_ping_value()
*     gdi/request/get_gdi_retries_value()
*******************************************************************************/
static bool get_cl_ping_value(void) {
   char* cl_ping = NULL;
   bool do_ping = false;

   cl_com_get_parameter_list_value("cl_ping", &cl_ping);
   if (cl_ping != NULL) {
      if (strcasecmp(cl_ping, "true") == 0) {
         do_ping = true;
      }
      sge_free(&cl_ping);
   }
   return do_ping;
}



/****** gdi/request_internal/sge_gdi_packet_execute_external() ****************
*  NAME
*     sge_gdi_packet_execute_external() -- execute a GDI packet 
*
*  SYNOPSIS
*     bool 
*     sge_gdi_packet_execute_external(sge_gdi_ctx_class_t* ctx, 
*                                     lList **answer_list, 
*                                     sge_gdi_packet_class_t *packet) 
*
*  FUNCTION
*     This functions sends a GDI "packet" from an external client
*     to the qmaster process. If the packet is handled on master side
*     the response is send back to the client which then will fill
*     "packet" with the received information.
*
*     To send packets from internal clients (threads) the function 
*     sge_gdi_packet_execute_internal() has to be used.
*
*     Please note that in contrast to sge_gdi_packet_execute_internal()
*     this function assures that the GDI request contained in the
*     "packet" is completely executed (either successfull or with errors)
*     after this function returns.
*
*     a GDI multi answer lists structure from the information contained
*     in the packet after this function has been called.
*     
*  INPUTS
*     sge_gdi_ctx_class_t* ctx       - context handle 
*     lList **answer_list            - answer list 
*     sge_gdi_packet_class_t *packet - packet 
*
*  RESULT
*     bool - error state
*        true   - success
*        false  - error (answer_lists will contain details)
*
*  NOTES
*     MT-NOTE: sge_gdi_packet_execute_extern() is MT safe 
*
*  SEE ALSO
*     gdi/request_internal/sge_gdi_packet_execute_external() 
*     gdi/request_internal/sge_gdi_packet_execute_internal() 
*     gdi/request_internal/sge_gdi_packet_wait_for_result_external()
*     gdi/request_internal/sge_gdi_packet_wait_for_result_internal()
*******************************************************************************/
bool 
sge_gdi_packet_execute_external(sge_gdi_ctx_class_t* ctx, lList **answer_list, 
                                sge_gdi_packet_class_t *packet) 
{
   bool ret = true;
   sge_pack_buffer pb;
   bool pb_initialized = false;
   sge_pack_buffer rpb;
   sge_gdi_packet_class_t *ret_packet = NULL;
   int commlib_error;
   u_long32 message_id;

   DENTER(TOP_LAYER, "sge_gdi_packet_execute_external");

   /* here the packet gets a unique request id */
   packet->id = gdi_state_get_next_request_id();

#ifdef KERBEROS
   /* request that the Kerberos library forward the TGT */
   if (ret && packet->first_task->target == SGE_JB_LIST && 
       SGE_GDI_GET_OPERATION(packet->first_task->command) == SGE_GDI_ADD ) {
      krb_set_client_flags(krb_get_client_flags() | KRB_FORWARD_TGT);
      krb_set_tgt_id(packet->id);
   }
#endif

    /*
     * Now we will execute the JSV script if we got a job submission request.
     * It is necessary to dechain the job which is verified because the
     * job verification process might destroy the job and create a completely
     * new one with adjusted job attributes.
     */
    if (ret) {
       sge_gdi_task_class_t *task = packet->first_task;

       if (task->target == SGE_JB_LIST &&
           ((SGE_GDI_GET_OPERATION(task->command) == SGE_GDI_ADD) ||
           (SGE_GDI_GET_OPERATION(task->command) == SGE_GDI_COPY))) {
          lListElem *job, *next_job;

          next_job = lLast(task->data_list);
          while (ret && ((job = next_job) != NULL)) {
             next_job = lNext(job);

             lDechainElem(task->data_list, job);
             ret &= jsv_do_verify(ctx, JSV_CONTEXT_CLIENT, &job, answer_list, false);
             lInsertElem(task->data_list, NULL, job);
          }
       }
    }

   /* 
    * pack packet into packbuffer
    */ 
   /* 
    * EB: TODO: dry run necessary to calculate initial buffer size?  
    *    JG told me during review of ST that it might be possible that
    *    the dry run to calculate the buffer size might be slower
    *    than direcly packing. RD might have done tests...
    */
   if (ret) {
      size_t size = sge_gdi_packet_get_pb_size(packet);

      if (size > 0) {
         int pack_ret;

         pack_ret = init_packbuffer(&pb, size, 0);
         if (pack_ret != PACK_SUCCESS) {
            SGE_ADD_MSG_ID(sprintf(SGE_EVENT, "unable to prepare packbuffer for sending request"));
            ret = false;
         } else {
            pb_initialized = true;
         }
      }
   }
   if (ret) {
      ret = sge_gdi_packet_pack(packet, answer_list, &pb);
   }

   /* 
    * send packbuffer to master. keep care that user does not see
    * commlib related error messages if master is not up and running
    */
   if (ret) {
      const char *commproc = prognames[QMASTER];
      const char *host = ctx->get_master(ctx, false);
      int id = 1;
      int response_id = 0;
      commlib_error = sge_gdi2_send_any_request(ctx, 0, &message_id, host, commproc, id, &pb,
                                                TAG_GDI_REQUEST, response_id, NULL);
      if (commlib_error != CL_RETVAL_OK) {
         ret = false;
         commlib_error = ctx->is_alive(ctx);
         if (commlib_error != CL_RETVAL_OK) {
            u_long32 sge_qmaster_port = ctx->get_sge_qmaster_port(ctx);
            const char *mastername = ctx->get_master(ctx, true);

            if (commlib_error == CL_RETVAL_CONNECT_ERROR ||
                commlib_error == CL_RETVAL_CONNECTION_NOT_FOUND ) {
               /* For the default case, just print a simple message */
               SGE_ADD_MSG_ID(sprintf(SGE_EVENT, MSG_GDI_UNABLE_TO_CONNECT_SUS,
                                      prognames[QMASTER], sge_u32c(sge_qmaster_port),
                                      mastername?mastername:"<NULL>"));            
            } else { 
               /* For unusual errors, give more detail */
               SGE_ADD_MSG_ID(sprintf(SGE_EVENT, MSG_GDI_CANT_SEND_MSG_TO_PORT_ON_HOST_SUSS,
                                      prognames[QMASTER], sge_u32c(sge_qmaster_port),
                                      mastername?mastername:"<NULL>", 
                                      cl_get_error_text(commlib_error))); 
            }
         } else {
            SGE_ADD_MSG_ID(sprintf(SGE_EVENT, SFNMAX, MSG_GDI_SENDINGGDIREQUESTFAILED));
         }
         answer_list_add(answer_list, SGE_EVENT, STATUS_NOQMASTER, ANSWER_QUALITY_ERROR);
         ret = false;
      }

   }

   /* after this point we do no longer need pb - free its resources */
   if (pb_initialized == true) {
      clear_packbuffer(&pb);
   }

   /* 
    * wait for response from master; also here keep care that commlib
    * related error messages are hidden if master is not up and running anymore
    */
   if (ret) {
      const char *commproc = prognames[QMASTER];
      const char *host = ctx->get_master(ctx, false);
      char rcv_host[CL_MAXHOSTLEN];
      char rcv_commproc[CL_MAXHOSTLEN];
      int tag = TAG_GDI_REQUEST;
      u_short id = 1;
      int gdi_error = CL_RETVAL_OK;
      int runs = 0;
      int retries = 0;
      bool do_ping = false;

      sge_strlcpy(rcv_host, host, CL_MAXHOSTLEN);
      sge_strlcpy(rcv_commproc, commproc, CL_MAXHOSTLEN);

      /*running this loop as long as configured in gdi_retries, doing a break after getting a gdi_request*/
      do {
         gdi_error = sge_gdi2_get_any_request(ctx, rcv_host, rcv_commproc, &id, &rpb, &tag, 
                                              true, message_id, NULL);

         do_ping = get_cl_ping_value();
         retries = get_gdi_retries_value();

         if (gdi_error == CL_RETVAL_OK) {
            /*no error happened, leaving while*/
            ret = true;
            break;
         } else {
            ret = false;
            /*this error appears, if qmaster or any qmaster thread is not responding, or overloaded*/
            if (gdi_error == CL_RETVAL_SYNC_RECEIVE_TIMEOUT) {
               cl_com_SIRM_t* cl_endpoint_status = NULL;
               cl_com_handle_t* handle = NULL;
               DPRINTF(("TEST_2372_OUTPUT: CL_RETVAL_SYNC_RECEIVE_TIMEOUT: RUNS="sge_U32CFormat"\n", sge_u32c(runs)));

               handle = ctx->get_com_handle(ctx);
               if (handle != NULL) {
                  DPRINTF(("TEST_2372_OUTPUT: GDI_TIMEOUT="sge_U32CFormat"\n", sge_u32c(handle->synchron_receive_timeout)));
               }
               if (do_ping == true) {
                  DPRINTF(("TEST_2372_OUTPUT: CL_PING=TRUE\n"));
                  cl_commlib_get_endpoint_status(handle, rcv_host, rcv_commproc, id, &cl_endpoint_status);
                  if (cl_endpoint_status != NULL) {
                     if (cl_endpoint_status->application_status != 0) {
                        DPRINTF(("TEST_2372_OUTPUT: QPING: error\n"));
                     } else {
                        DPRINTF(("TEST_2372_OUTPUT: QPING: ok\n"));
                     }
                     cl_com_free_sirm_message(&cl_endpoint_status);
                  } else {
                     DPRINTF(("TEST_2372_OUTPUT: QPING: failed\n"));
                     break;
                  }
               } else {
                  DPRINTF(("TEST_2372_OUTPUT: CL_PING=FALSE\n"));
               }
            } else {
               break;
            }
         }
         /* only increment runs if retries != -1 (-1 means retry forever) */
      } while (retries == -1 || runs++ < retries);
      
      if (ret == false) {
         commlib_error = ctx->is_alive(ctx);
         if (commlib_error != CL_RETVAL_OK) {
            u_long32 sge_qmaster_port = ctx->get_sge_qmaster_port(ctx);
            const char *mastername = ctx->get_master(ctx, true);

            if (commlib_error == CL_RETVAL_CONNECT_ERROR ||
                commlib_error == CL_RETVAL_CONNECTION_NOT_FOUND ) {
               /* For the default case, just print a simple message */
               SGE_ADD_MSG_ID(sprintf(SGE_EVENT, MSG_GDI_UNABLE_TO_CONNECT_SUS,
                                      prognames[QMASTER], sge_u32c(sge_qmaster_port),
                                      mastername?mastername:"<NULL>"));            
            } else { 
               /* For unusual errors, give more detail */
               SGE_ADD_MSG_ID(sprintf(SGE_EVENT, MSG_GDI_CANT_SEND_MSG_TO_PORT_ON_HOST_SUSS,
                                      prognames[QMASTER], sge_u32c(sge_qmaster_port),
                                      mastername?mastername:"<NULL>", 
                                      cl_get_error_text(commlib_error))); 
            }
         } else {
            SGE_ADD_MSG_ID(sprintf(SGE_EVENT, MSG_GDI_RECEIVEGDIREQUESTFAILED_US,
                    sge_u32c(message_id),
                    cl_get_error_text(gdi_error)));
         }
         answer_list_add(answer_list, SGE_EVENT, STATUS_NOQMASTER, ANSWER_QUALITY_ERROR);
      }
   }

   /* 
    * unpack result. the returned packet contains data and/or answer lists 
    */
   if (ret) {
      ret = sge_gdi_packet_unpack(&ret_packet, answer_list, &rpb);
      clear_packbuffer(&rpb); 
   }

   /* 
    * consistency check of received data:
    *    - is the packet id the same
    *    - does it contain the same number of tasks
    *    - is the task sequence and the task id of each received task the same
    */
   if (ret) {
      sge_gdi_task_class_t *send;
      sge_gdi_task_class_t *recv;
      bool gdi_mismatch = false;

      if (packet->id != ret_packet->id) {
         gdi_mismatch = true;
      }

      send = packet->first_task;
      recv = ret_packet->first_task;
      while (send != NULL && recv != NULL) {
         if (send->id == recv->id) {
            lFreeList(&send->data_list);
            send->data_list = recv->data_list;
            send->answer_list = recv->answer_list;
            recv->data_list = NULL;
            recv->answer_list = NULL;
         } else {
            gdi_mismatch = true;
            break;
         }
         send = send->next;
         recv = recv->next;
      }
      if (send != NULL || recv != NULL) {
         gdi_mismatch = true;
      }
      if (gdi_mismatch) {
         /* For unusual errors, give more detail */
         SGE_ADD_MSG_ID(sprintf(SGE_EVENT, SFNMAX, MSG_GDI_MISMATCH_SEND_RECEIVE));
         answer_list_add(answer_list, SGE_EVENT, STATUS_NOQMASTER, ANSWER_QUALITY_ERROR);
         ret = false;
      }
   }

#ifdef KERBEROS
   /* clear the forward TGT request */
   if (ret && packet->first_task->target == SGE_JB_LIST &&
       SGE_GDI_GET_OPERATION(packet->first_task->command) == SGE_GDI_ADD) {
      krb_set_client_flags(krb_get_client_flags() & ~KRB_FORWARD_TGT);
      krb_set_tgt_id(0);
   }
#endif

   sge_gdi_packet_free(&ret_packet);
   DRETURN(ret);
}

/****** gdi/request_internal/sge_gdi_packet_execute_internal() ****************
*  NAME
*     sge_gdi_packet_execute_internal() -- execute a GDI packet 
*
*  SYNOPSIS
*     bool 
*     sge_gdi_packet_execute_internal(sge_gdi_ctx_class_t* ctx, 
*                                     lList **answer_list, 
*                                     sge_gdi_packet_class_t *packet) 
*
*  FUNCTION
*     This functions stores a GDI "packet" in the "Master_Packet_Queue"
*     so that it will be executed in future. This function can only
*     be used in the context of an internal GDI client (thread in 
*     qmaster). 
*
*     To send packets from external clients 
*     the function sge_gdi_packet_execute_external() has to be used.
*
*     Please note that in contrast to sge_gdi_packet_execute_external()
*     this function does not assures that the GDI request contained in the
*     "packet" is already executed after this function returns.
*
*     sge_gdi_packet_wait_for_result_internal() has to be called to 
*     assure this. This function will also creates a GDI multi answer 
*     lists structure from the information contained in the handled
*     packet after this function has been called.
*
*     
*  INPUTS
*     sge_gdi_ctx_class_t* ctx       - context handle 
*     lList **answer_list            - answer list 
*     sge_gdi_packet_class_t *packet - packet 
*
*  RESULT
*     bool - error state
*        true   - success
*        false  - error (answer_lists will contain details)
*
*  NOTES
*     MT-NOTE: sge_gdi_packet_execute_extern() is MT safe 
*
*  SEE ALSO
*     gdi/request_internal/sge_gdi_packet_execute_external() 
*     gdi/request_internal/sge_gdi_packet_execute_internal() 
*     gdi/request_internal/sge_gdi_packet_wait_for_result_external()
*     gdi/request_internal/sge_gdi_packet_wait_for_result_internal()
*******************************************************************************/
bool 
sge_gdi_packet_execute_internal(sge_gdi_ctx_class_t* ctx, lList **answer_list, 
                                sge_gdi_packet_class_t *packet) 
{
   bool ret = true;

   DENTER(TOP_LAYER, "sge_gdi_packet_execute_internal");

   /* 
    * here the packet gets a unique request id and source for host
    * user and group is initialized
    */
   packet->id = gdi_state_get_next_request_id();
   packet->commproc = strdup(prognames[QMASTER]);      
   packet->host = strdup(ctx->get_master(ctx, false));
   packet->is_intern_request = true;

   ret = sge_gdi_packet_parse_auth_info(packet, &(packet->first_task->answer_list));

   /* 
    * append the packet to the packet list of the worker threads
    */
   sge_tq_store_notify(Master_Task_Queue, SGE_TQ_GDI_PACKET, packet);

   DRETURN(ret);
}

/****** gdi/request_internal/sge_gdi_packet_wait_for_result_external() ******
*  NAME
*     sge_gdi_packet_wait_for_result_external() -- wait for packet result 
*
*  SYNOPSIS
*     bool 
*     sge_gdi_packet_wait_for_result_external(sge_gdi_ctx_class_t* ctx, 
*                                             lList **answer_list, 
*                                             sge_gdi_packet_class_t *packet, 
*                                             lList **malpp) 
*
*  FUNCTION
*     Despite to its name this function does not wait. This is not necessary
*     because the GDI request handled in the execution process previously
*     is already done. A call to this function simply breates a GDI multi 
*     answer list.
*
*  INPUTS
*     sge_gdi_ctx_class_t* ctx        - context handle 
*     lList **answer_list             - answer list 
*     sge_gdi_packet_class_t **packet - GDI packet 
*     lList **malpp                   - multi answer list 
*
*  RESULT
*     bool - error state
*        true  - success
*        false - error
*
*  NOTES
*     MT-NOTE: sge_gdi_packet_wait_for_result_external() is MT safe 
*
*  SEE ALSO
*     gdi/request_internal/sge_gdi_packet_execute_external() 
*     gdi/request_internal/sge_gdi_packet_execute_internal() 
*     gdi/request_internal/sge_gdi_packet_wait_for_result_external()
*     gdi/request_internal/sge_gdi_packet_wait_for_result_internal()
*******************************************************************************/
bool 
sge_gdi_packet_wait_for_result_external(sge_gdi_ctx_class_t* ctx, lList **answer_list,
                                        sge_gdi_packet_class_t **packet, lList **malpp)
{
   bool ret = true;

   DENTER(TOP_LAYER, "sge_gdi_packet_wait_for_result_extern");

   /* 
    * The packet itself has already be executed in sge_gdi_packet_execute_external() 
    * so it is only necessary to create the muti answer and do cleanup
    */
   ret = sge_gdi_packet_create_multi_answer(ctx, answer_list, packet, malpp);

   DRETURN(ret);
}

/****** gdi/request_internal/sge_gdi_packet_wait_for_result_internal() ******
*  NAME
*     sge_gdi_packet_wait_for_result_internal() -- wait for handled packet 
*
*  SYNOPSIS
*     bool 
*     sge_gdi_packet_wait_for_result_internal(sge_gdi_ctx_class_t* ctx, 
*                                             lList **answer_list, 
*                                             sge_gdi_packet_class_t *packet, 
*                                             lList **malpp) 
*
*  FUNCTION
*     This function can only be called in a qmaster thread. Then
*     this function blocks until the GDI packet, which has to be
*     given to qmaster via sge_gdi_packet_execute_internal(), is
*     executed completely (either successfull or with errors). 
*
*     After that it creates a multi answer list.
*
*  INPUTS
*     sge_gdi_ctx_class_t* ctx        - context handle 
*     lList **answer_list             - answer list 
*     sge_gdi_packet_class_t **packet - GDI packet 
*     lList **malpp                   - multi answer list 
*
*  RESULT
*     bool - error state
*        true  - success
*        false - error
*
*  NOTES
*     MT-NOTE: sge_gdi_packet_wait_for_result_internal() is MT safe 
*
*  SEE ALSO
*     gdi/request_internal/sge_gdi_packet_execute_external() 
*     gdi/request_internal/sge_gdi_packet_execute_internal() 
*     gdi/request_internal/sge_gdi_packet_wait_for_result_external()
*     gdi/request_internal/sge_gdi_packet_wait_for_result_internal()
*******************************************************************************/
bool 
sge_gdi_packet_wait_for_result_internal(sge_gdi_ctx_class_t* ctx, lList **answer_list,
                                        sge_gdi_packet_class_t **packet, lList **malpp)
{
   bool ret = true;

   DENTER(TOP_LAYER, "sge_gdi_packet_wait_for_result_internal");

   /* 
    * wait for response from worker thread that the packet is handled
    */
   sge_gdi_packet_wait_till_handled(*packet);

   /*
    * create the multi answer and destroy the packet
    */
   ret = sge_gdi_packet_create_multi_answer(ctx, answer_list, packet, malpp);

   DRETURN(ret);
}