[Mauiusers] GRES not functional

Tom Rudwick tomr at intrinsity.com
Mon Aug 25 14:14:51 MDT 2008


I forgot. You may need the attached patches, which I've submitted to the
list before, but haven't been applied to the maui sources.

Tom

Brian O'Connor wrote:
> Hi
> 
> using maui 3.2.6p19/torque-2.3.0 to try and configure license
> management on a single machine as is instructed in the Admin guide;
> 
> http://www.clusterresources.com/products/maui/docs/MauiSchedulerAdminMan
> ual.pdf
> 
> add
> 
> NODECFG[GLOBAL]  GRES=matlab:2
> 
> restart maui.
> 
> If I use
> 
> qsub -W x=GRES:matlab script.sh
> 
> multiple(3) times they all run.
> 
> The matlab generic resource is not "consumed"
> 
> I am going crazy with this. Using the machine name instead of "GLOBAL"
> does not help, and I have tried all manner of syntax to try and get it
> right. 
> 
> Any body got a recipe that I can use?
> 
> Is there a way to check that the GRES configuration has been seen by
> maui?
> 
> showconfig -v does not mention it at all.
> 
> or is this all in moab?
> 
> Brian O'Connor
> -----------------------------------------------------------------------
> SGI Consulting
> Email: briano at sgi.com, Mobile +61 417 746 452
> Phone: +61 3 9963 1900, Fax:  +61 3 9963 1902
> 357 Camberwell Road, Camberwell, Victoria, 3124
> AUSTRALIA
> http://www.sgi.com/support/services
> -----------------------------------------------------------------------
> _______________________________________________
> mauiusers mailing list
> mauiusers at supercluster.org
> http://www.supercluster.org/mailman/listinfo/mauiusers
> 

-------------- next part --------------
--- src/moab/MPBSI.c
+++ src/moab/MPBSI.c
@@ -4249,7 +4249,7 @@
 
     /* adjust 'per task limits */
 
-    if (MaxJobMem > 0)
+    if ((MaxJobMem > 0) && (RQ->DRes.Procs))
       {
       /* set job wide dedicated resources */
 
@@ -4261,7 +4261,7 @@
       RQ->URes.Mem /= RQ->TaskCount;
       }
 
-    if (MaxJobSwap > 0)
+    if ((MaxJobSwap > 0) && (RQ->DRes.Procs))
       {
       /* set both dedicated resources AND node requirements */
 
@@ -6034,10 +6034,11 @@
 
     if (TA != NULL)
       {
-      if (TA->JobMemLimit > 0)
+	if ((TA->JobMemLimit > 0) && (RQ->DRes.Procs)) {
         RQ->DRes.Mem = MAX(RQ->DRes.Mem,TA->JobMemLimit / RQ->TaskCount);
+	}
 
-      if (TA->JobSwapLimit > 0)
+	if ((TA->JobSwapLimit > 0) && (RQ->DRes.Procs))
         RQ->DRes.Swap = MAX(RQ->DRes.Swap,TA->JobSwapLimit / RQ->TaskCount);
       }  /* END if (TA != NULL) */
  
@@ -6054,9 +6055,9 @@
 
         RQ->RequiredMemory = J->Req[0]->RequiredMemory;
         RQ->MemCmp         = J->Req[0]->MemCmp;
-        RQ->RequiredSwap   = J->Req[0]->RequiredMemory;
+        RQ->RequiredSwap   = J->Req[0]->RequiredSwap;
         RQ->SwapCmp        = J->Req[0]->SwapCmp;
-        RQ->RequiredDisk   = J->Req[0]->RequiredMemory;
+        RQ->RequiredDisk   = J->Req[0]->RequiredDisk;
         RQ->DiskCmp        = J->Req[0]->DiskCmp;
         }
  
-------------- next part --------------
--- src/moab/MPBSI.c.no_async	2007-06-26 11:35:14.000000000 -0500
+++ src/moab/MPBSI.c	2007-10-10 14:39:51.000000000 -0500
@@ -1904,7 +1904,7 @@
 
       return(FAILURE);
       }
-
+    /*
     if (MPBSJobModify(
           J,
           R,
@@ -1939,6 +1939,7 @@
         J->Name,
         HostList);
       }
+    */
     }
   else
     {
@@ -2017,7 +2018,7 @@
 
   MJobGetName(J,NULL,R,tmpJobName,sizeof(tmpJobName),mjnRMName);       
 
-  rc = pbs_runjob(R->U.PBS.ServerSD,tmpJobName,MasterHost,NULL);
+  rc = pbs_asyrunjob(R->U.PBS.ServerSD,tmpJobName,HostList,NULL);
 
   if (rc != 0)
     {
@@ -2041,6 +2042,7 @@
     JobStartFailed = TRUE;
     }
 
+  /*
   if (J->NeedNodes != NULL)
     {
     if (MPBSJobModify(
@@ -2062,7 +2064,7 @@
         J->NeedNodes);
       }
     }
-
+  */
   if (JobStartFailed == TRUE)
     {
     /* job could not be started */
-------------- next part --------------
--- src/moab/MPBSI.c~	2007-10-22 15:43:16.000000000 -0500
+++ src/moab/MPBSI.c	2007-11-09 15:01:48.000000000 -0600
@@ -4112,40 +4112,58 @@
         }
       else if (!strcmp(AP->resource,"software"))
         {
-        /* NOTE:  old hack (map software to node feature */
+      int rqindex;
 
-        /* MReqSetAttr(J,RQ,mrqaReqNodeFeature,(void **)AP->value,mdfString,mAdd); */
+      int RIndex;
 
-        /* NOTE:  software handled at job load time, no support for dynamic software spec */
+      mreq_t *tmpRQ;
+
+      if ((RIndex = MUMAGetIndex(eGRes,AP->value,mAdd)) == 0)
+        {
+        /* cannot add support for generic res */
 
-        /* Food for further ruminations:
+        DBG(1,fPBS) DPrint("ALERT:    cannot add support for GRes software '%s'\n",
+          AP->value);
+ 
+        continue;
+        }
+
+      /* verify software req does not already exist */
+
+      for (rqindex = 0;J->Req[rqindex] != NULL;rqindex++)
+        {
+        if (J->Req[rqindex]->DRes.GRes[RIndex].count > 0)
+          break;
+        }  /* END for (rqindex) */
 
-            * software licenses can be either floating or node-locked
+      if (J->Req[rqindex] != NULL)
+        {
+        /* software req already added */
 
-            * the above works in the situation of a node-locked license
-               for unlimited users; limiting # of concurrent uses could
-              be accomplished by forcing users to submit to a specific
-               queue/class and limit the number of concurrent jobs in
-              that class
+	  continue;
+        }
 
-            * one can imagine future support looking something like this (from the POV
-              of the config file):
+      /* add software req */
 
-              # Node-locked on a single host, unlimited concurrent usage
-               SOFTWARECFG[pkg1] HOSTLIST=node01
+      if (MReqCreate(J,NULL,&tmpRQ,FALSE) == FAILURE)
+        {
+        DBG(1,fPBS) DPrint("ALERT:    cannot add req to job %s for GRes software '%s'\n",
+          J->Name,
+          AP->value);
 
-              # Node-locked on a single host, limited to one concurrent use
-               SOFTWARECFG[pkg2] HOSTMAXCOUNT=1 HOSTLIST=node02
+        continue;
+        }
 
-               # Floating across several hosts, global maximum on concurrent usage
-               SOFTWARECFG[pkg3] MAXCOUNT=5 HOSTLIST=node[1-4][0-9]
+      /* NOTE:  PBS currently supports only one license request per job */
 
-              # Floating across several hosts, global and per-host maxima on concurrent usage
-              SOFTWARECFG[pkg4] MAXCOUNT=10 HOSTMAXCOUNT=2 HOSTLIST=node[5-8][0-9]
+      tmpRQ->DRes.GRes[RIndex].count = 1;
+      tmpRQ->DRes.GRes[0].count      = 1;
+      tmpRQ->TaskCount               = 1;
+      tmpRQ->NodeCount               = 1;
+ 
+      /* NOTE:  prior workaround (map software to node feature */
 
-            * this would probably also require support in diagnose ("diagnose -S",
-              maybe?)
-        */
+      /* MReqSetAttr(J,RQ,mrqaReqNodeFeature,(void **)AP->value,mdfString,mAdd); */
         }
       else
         {
-------------- next part --------------
--- maui-3.2.6p19.orig/src/server/UserI.c	2007-03-28 15:05:40.000000000 -0500
+++ maui-3.2.6p19/src/server/UserI.c	2007-07-02 15:32:04.000000000 -0500
@@ -4097,12 +4097,11 @@
   for (findex = 0;findex < MAX_MFRAME;findex++)
     {
     F = &MFrame[findex];
+    if ( (!F) || (F->Name[0] == '\0') || (F->NodeCount <= 0)) continue;
 
     DBG(5,fUI) DPrint("INFO:     collecting status for frame %s\n",
       F->Name); 
 
-    if (F->NodeCount <= 0)
-      continue;
 
     switch(DisplayMode)
       {
@@ -4147,6 +4146,9 @@
     if (N->Name[0] == '\1')
       continue;
 
+    if (!strcmp(N->Name,"GLOBAL")) continue;
+
+    DBG(5,fUI) DPrint("INFO: checking node %s\n", N->Name);
     /* display failure information */
 
     if ((N->CRes.Disk > 0) && (N->ARes.Disk <= 0))


More information about the mauiusers mailing list