[torquedev] saving inodes with lots of TM tasks

Garrick Staples garrick at clusterresources.com
Fri Feb 16 20:33:28 MST 2007


I have an immediate requirement to support hundreds of thousands of TM
tasks within a single job.  The limiter at this point is that pbs_mom
saves a TM struct in a file per task.  After a few hundred thousand
tasks, the filesystem runs out of inodes!

The purpose of the disk files are only to read in on a pbs_mom restart.
In normal use, they are never read.

So the solution seems simple: use 1 file.  I've attached a patch that
implements this idea by opening 1 file and seeking (sizeof(struct) *
task_id) bytes into the file before writing.  I didn't bother trying to
be backwards compatible at this point because I think this will only be
for 2.2.x.

Thoughts?

-------------- next part --------------
Index: src/resmom/mom_comm.c
===================================================================
--- src/resmom/mom_comm.c	(revision 1249)
+++ src/resmom/mom_comm.c	(working copy)
@@ -214,14 +214,11 @@
   int	fds;
   int	i;
   char	namebuf[MAXPATHLEN];
-  char	filnam[MAXPATHLEN];
   int	openflags;
 
   strcpy(namebuf,path_jobs);      /* job directory path */
   strcat(namebuf,pjob->ji_qs.ji_fileprefix);
   strcat(namebuf,JOB_TASKDIR_SUFFIX);
-  sprintf(filnam,task_fmt,ptask->ti_qs.ti_task);
-  strcat(namebuf,filnam);
 
   openflags = O_WRONLY|O_CREAT|O_Sync;
 
@@ -246,12 +243,17 @@
     return(-1);
     }
 
-  /* NOTE:  to avoid partial write failures in fs full situations,             */
-  /*        attempt write of empty buffer, if success, then write actual task? */
-  /*        (NYI) */
-
   /* just write the "critical" base structure to the file */
 
+ if (lseek(fds,(off_t)sizeof(ptask->ti_qs) * ptask->ti_qs.ti_task,SEEK_SET) < 0) 
+    {
+    log_err(errno,id,"lseek");
+
+    close(fds);
+
+    return(-1);
+    }
+
   while ((i = write(
       fds, 
       (char *)&ptask->ti_qs, 
@@ -261,7 +263,7 @@
       {	
       /* retry the write */
 
-      if (lseek(fds,(off_t)0,SEEK_SET) < 0) 
+      if (lseek(fds,(off_t)sizeof(ptask->ti_qs) * ptask->ti_qs.ti_task,SEEK_SET) < 0) 
         {
         log_err(errno,id,"lseek");
 
@@ -496,72 +498,40 @@
   static	char	id[] = "task_recov";
   int		fds;
   task		*pt;
-  char		dirname[MAXPATHLEN];
   char		namebuf[MAXPATHLEN];
-  DIR		*dir;
-  struct	dirent	*pdirent;
   struct	taskfix	task_save;
 
-  strcpy(dirname,path_jobs);      /* job directory path */
-  strcat(dirname,pjob->ji_qs.ji_fileprefix);
-  strcat(dirname,JOB_TASKDIR_SUFFIX);
+  strcpy(namebuf,path_jobs);      /* job directory path */
+  strcat(namebuf,pjob->ji_qs.ji_fileprefix);
+  strcat(namebuf,JOB_TASKDIR_SUFFIX);
 
-  if ((dir = opendir(dirname)) == NULL)
+  if ((fds = open(namebuf,O_RDONLY,0)) < 0)
     {
+    log_err(errno,id,"open of task file");
+
     return(-1);
     }
 
-  strcat(dirname,"/");
-
-  while ((pdirent = readdir(dir)) != NULL) 
+    /* read in task quick save sub-structure */
+  while (read(fds,(char *)&task_save,sizeof(task_save)) == sizeof(task_save)) 
     {
-    if (pdirent->d_name[0] == '.')
-      continue;
 
-    strcpy(namebuf,dirname);
-    strcat(namebuf,pdirent->d_name);
-
-    fds = open(namebuf,O_RDONLY,0);
-
-    if (fds < 0) 
-      {
-      log_err(errno,id,"open of task file");
-
-      unlink(namebuf);
-
+    if (task_save.ti_task == 0)
       continue;
-      }
 
-    /* read in task quick save sub-structure */
-
-    if (read(fds,(char *)&task_save,sizeof(task_save)) != sizeof(task_save)) 
-      {
-      log_err(errno,id,"read");
-
-      unlink(namebuf);
-
-      close(fds);
-
-      continue;
-      }
-
     if ((pt = pbs_task_create(pjob,TM_NULL_TASK)) == NULL)  
       {
       log_err(errno,id,"cannot create task");
 
-      unlink(namebuf);
-
       close(fds);
 
       continue;
       }
 
     pt->ti_qs = task_save;
-
-    close(fds);
     }  /* END while ((pdirent = readdir(dir)) != NULL) */
 
-  closedir(dir);
+  close(fds);
 
   /* SUCCESS */
 
@@ -2185,23 +2155,6 @@
 
       job_save(pjob,SAVEJOB_FULL);
 
-      strcpy(namebuf,path_jobs);      /* job directory path */
-      strcat(namebuf,pjob->ji_qs.ji_fileprefix);
-      strcat(namebuf,JOB_TASKDIR_SUFFIX);
-  
-      if (mkdir(namebuf,0700) == -1) 
-        {
-        log_err(-1,id,"cannot create temporary directory");
-
-        job_purge(pjob);
-
-        /* cannot create temporary job directory */
-
-        SEND_ERR(PBSE_SYSTEM)
-  
-        goto done;
-        }
-  
       sprintf(log_buffer,"JOIN JOB as node %d", 
         nodeid);
   
Index: src/server/job_func.c
===================================================================
--- src/server/job_func.c	(revision 1249)
+++ src/server/job_func.c	(working copy)
@@ -1120,7 +1120,11 @@
   strcpy(namebuf,path_jobs);      /* job directory path */
   strcat(namebuf,pjob->ji_qs.ji_fileprefix);
   strcat(namebuf,JOB_TASKDIR_SUFFIX);
-  remtree(namebuf);
+  if (unlink(namebuf) < 0)
+    {
+    if (errno != ENOENT)
+      log_err(errno,id,msg_err_purgejob);
+    }
 
 #if MOM_CHECKPOINT == 1
   {
Index: src/server/req_quejob.c
===================================================================
--- src/server/req_quejob.c	(revision 1249)
+++ src/server/req_quejob.c	(working copy)
@@ -524,28 +524,6 @@
 
       return;
       }
-
-    strcpy(namebuf,path_jobs);      /* job directory path */
-    strcat(namebuf,basename);
-    strcat(namebuf,JOB_TASKDIR_SUFFIX);
-
-    if ((mkdir(namebuf,0700) == -1) && (errno != EEXIST))
-      {
-      /* FAILURE */
-
-      char tmpLine[1024];
-
-      sprintf(tmpLine,"cannot create directory '%s'",
-        namebuf);
-
-      log_err(errno,tmpLine,msg_init_abt);
-
-      job_purge(pj);
-
-      req_reject(PBSE_SYSTEM,0,preq,NULL,tmpLine);
-
-      return;
-      }
     }    /* END else (pj != NULL) */
 
 #endif  /* PBS_MOM */


More information about the torquedev mailing list