Changeset 5991


Ignore:
Timestamp:
Aug 20, 2020, 9:58:52 AM (16 months ago)
Author:
Nicklas Nordborg
Message:

References #1259: Add support for Slurm

Implemented a simple solution for translating options between Open Grid and Slurm. So far, the implemented translation is between 'pe' and 'cpus-per-task' since that is the only option we use in Reggie.

Location:
extensions/net.sf.basedb.opengrid/trunk/src/net/sf/basedb/opengrid
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • extensions/net.sf.basedb.opengrid/trunk/src/net/sf/basedb/opengrid/config/JobConfig.java

    r5990 r5991  
    55import java.util.Map;
    66import java.util.Set;
     7import java.util.regex.Matcher;
     8import java.util.regex.Pattern;
    79
    810import net.sf.basedb.opengrid.JobDefinition;
     
    1012/**
    1113  Configuration settings related to a job definition that is
    12   about to be run on an Open Grid cluster. The information becomes
    13   readonly when a {@link JobDefinition} instance has been created.
     14  about to be run on an Open Grid or Slurm cluster. The information
     15  becomes readonly when a {@link JobDefinition} instance has been created.
    1416  A single configuration instance can be used with multiple jobs.
     17  <p>
     18  Note that options for the Open Grid Engine and Slurm can be very
     19  different and it is best if the client uses either {@link #setQsubOption(String, String)}
     20  or {@link #setSbatchOption(String, String)} depending on the type of the
     21  cluster.
     22  <p>
     23  Some options can be automatically converted between the two systems. This will
     24  only happen if options for one system has been set, but no options for the other.
     25  It is also possible to force conversion by directly calling
     26  {@link #convertQsubToSbatchOptions()} or {@link #convertSbatchToQsubOptions()}.
    1527 
    1628  @author nicklas
     
    93105  private Map<String, String> qsubOptions;
    94106  private Map<String, String> sbatchOptions;
     107  private boolean autoConvertOptions;
    95108 
    96109  /**
     
    98111  */
    99112  public JobConfig()
     113  {
     114    this(true);
     115  }
     116 
     117  /**
     118    Create a new job configuration instance.
     119    @param autoConvertOptions If TRUE, options for qsub or sbatch are automatically
     120      converted
     121    @since 1.4
     122  */
     123  public JobConfig(boolean autoConvertOptions)
    100124  {
    101125    this.failImmediately = true;
     
    105129    this.qsubOptions = new LinkedHashMap<>();
    106130    this.sbatchOptions = new LinkedHashMap<>();
     131    this.autoConvertOptions = autoConvertOptions;
    107132  }
    108133
     
    342367      throw new IllegalArgumentException("Priority must be in the range -1023..+1024: " + priority);
    343368    }
    344     if (nice != null && (nice < 2147483645 || nice > 2147483645))
     369    if (nice != null && (nice < -2147483645 || nice > 2147483645))
    345370    {
    346371      throw new IllegalArgumentException("Nice must be in the range +/- 2147483645: " + nice);
    347372    }
    348   }
    349 
     373    if (forLock && autoConvertOptions) autoConvertOptions();
     374  }
     375
     376  private void autoConvertOptions()
     377  {
     378    if (sbatchOptions.size() == 0 && qsubOptions.size() > 0)
     379    {
     380      convertQsubToSbatchOptions();
     381    }
     382    else if (sbatchOptions.size() > 0 && qsubOptions.size() == 0)
     383    {
     384      convertSbatchToQsubOptions();
     385    }
     386  }
     387 
     388  /**
     389    Convert options set for 'qsub' (Open Grid) to options for 'sbatch' (Slurm).
     390    Supported options are:
     391   
     392    * -pe smp X-Y: --nodes=1, --ntasks=1, --cpus-per-task=max(X, Y)
     393
     394    @since 1.4
     395  */
     396  public void convertQsubToSbatchOptions()
     397  {
     398    checkLocked("convertQsubToSbatchOptions()");
     399    for (Map.Entry<String, String> entry : qsubOptions.entrySet())
     400    {
     401      String option = entry.getKey();
     402      if ("pe".equals(option))
     403      {
     404        sbatchOptions.put("nodes", "1");
     405        sbatchOptions.put("ntasks", "1");
     406        sbatchOptions.put("cpus-per-task", getMaxNumberInString(entry.getValue()));
     407      }
     408    }
     409  }
     410 
     411  /**
     412    Convert options set for 'sbatch' (Slurm) to options for 'qsub' (Open Grid).
     413    Supported options are:
     414   
     415    * --cpus-per-task=N: -pe smp N
     416 
     417    @since 1.4
     418  */
     419  public void convertSbatchToQsubOptions()
     420  {
     421    checkLocked("convertSbatchToQsubOptions()");
     422    for (Map.Entry<String, String> entry : sbatchOptions.entrySet())
     423    {
     424      String option = entry.getKey();
     425      if ("cpus-per-task".equals(option) || "c".equals(option))
     426      {
     427        // '--cpu-per-task=N' is converted to '-pe smp N'
     428        qsubOptions.put("pe", "smp " + entry.getValue());
     429      }
     430    }
     431  }
     432 
     433  /**
     434    Get the highest number that can be found in the given string.
     435    Use to extract the number of requested slots in, for example,
     436    -pe smp 8-16.
     437  */
     438  private String getMaxNumberInString(String s)
     439  {
     440    Pattern p = Pattern.compile("\\d+");
     441    Matcher m = p.matcher(s);
     442    int max = 1;
     443    while (m.find())
     444    {
     445      int val = Integer.valueOf(m.group());
     446      if (val > max) max = val;
     447    }
     448    return Integer.toString(max);
     449  }
    350450 
    351451  /**
  • extensions/net.sf.basedb.opengrid/trunk/src/net/sf/basedb/opengrid/engine/SlurmEngine.java

    r5990 r5991  
    8383    script.append("#SBATCH --output=stdout\n"); // Stdout is saved to this file
    8484    script.append("#SBATCH --error=stderr\n"); // Stderr is saved to this file
    85 //    script.append("#SBATCH --nodes=1\n"); // 1 task on 1 node
    86 //    script.append("#SBATCH --ntasks=1\n");
    87 //    script.append("#SBATCH --cpus-per-task=2\n");
    8885   
    8986    if (config.getSlurmNice() != null)
     
    119116    if (config.getFailImmediately()) script.append("set -e\n");
    120117    if (config.getCreatePrivateFiles()) script.append("umask -S u=rwx,g=,o=\n");
    121    
    122     /*
    123     // TODO -- remove before release
    124     ScriptBuilder debug = new ScriptBuilder();
    125     debug.progress(10, "Starting up...");
    126     debug.cmd("date");
    127     debug.cmd("sleep 60s");
    128     debug.progress(20, "Analyzing...");
    129     debug.cmd("nproc");
    130     debug.cmd("sleep 60s");
    131     debug.progress(80, "Copying result files...");
    132     debug.cmd("echo fakefile > ${WD}/files.out");
    133     debug.cmd("sleep 60s");
    134     debug.progress(90, "Saving to database...");
    135     debug.cmd("sleep 60s");
    136     debug.newLine();
    137    
    138     script.append("# -- for debugging\n");
    139     script.append(debug.toString());
    140     script.append("exit\n");
    141     script.append("# -- end debugging\n");
    142     // --
    143     */
    144    
     118
    145119    script.append("# --- user script ---\n");
    146120    script.append(job.getCmd()+"\n");
Note: See TracChangeset for help on using the changeset viewer.