diff --git a/LSF-Script/collection/job b/LSF-Script/collection/job index af9e88d3d14c364dfc83ab9390f06a673cf301ac..c25151ddbdcddc5a7c66ced66a470776545f4a58 100644 --- a/LSF-Script/collection/job +++ b/LSF-Script/collection/job @@ -73,10 +73,8 @@ def strToJson(jobInfoStr): jobInfos = os.popen(JOB_INFOS_CMD).read() jobInfosArr = jobInfos.strip().split("\n") -# 数组作业状态表 {'jobId':'PEND,DONE,RUN,EXIT,SSUSP,USUSP,PSUSP'} -arrayJobStatDict = {} # 构造非数组作业及数组作业子作业信息 及数组作业状态表 -nonArrayJobList = [] +outputJobList = [] for index in range(len(jobInfosArr)): if index == 0: continue @@ -97,66 +95,15 @@ for index in range(len(jobInfosArr)): 'jobRunTime': jobInfo['RUN_TIME'], 'originState': jobInfo['STAT'], 'exitMsg': jobInfo['EXIT_REASON']} if outputJob['execHome'] == '': outputJob['execHome'] = jobInfo['EXEC_HOME'] - nonArrayJobList.append(outputJob) + jobId = outputJob['jobId'] + arrayJobInfo = os.popen(ARRAY_JOB_INFO_CMD + jobId).read().splitlines() + # 判断是否为数组作业 + if len(arrayJobInfo) == 2: + arrayJobInfoValue = arrayJobInfo[1].split() + if len(arrayJobInfoValue) > 2: + outputJob['jobId'] = "{}.{}".format(jobId, outputJob['jobIndex']) + outputJob['jobIndex'] = 0 + outputJobList.append(outputJob) - # 存储子作业状态到状态表中 - subJobStat = jobInfo['STAT'] - subJobId = jobInfo['JOBID'] - statValArr = [0] * 7 - if not arrayJobStatDict.has_key(subJobId): - arrayJobStatDict[subJobId] = statValArr - else: - statValArr = arrayJobStatDict[subJobId] - subJobStat = jobInfo['STAT'] - if subJobStat == 'PEND': - statValArr[0] = statValArr[0] + 1 - if subJobStat == 'DONE': - statValArr[1] = statValArr[1] + 1 - if subJobStat == 'RUN': - statValArr[2] = statValArr[2] + 1 - if subJobStat == 'EXIT': - statValArr[3] = statValArr[3] + 1 - if (subJobStat == 'PSUSP') | (subJobStat == 'USUSP'): - statValArr[4] = statValArr[4] + 1 - if subJobStat == 'SSUSP': - statValArr[5] = statValArr[5] + 1 - if subJobStat == 'ZOMBI': - statValArr[6] = statValArr[6] + 1 - arrayJobStatDict[subJobId] = statValArr - -jobIdSet = set() -outputJobList = nonArrayJobList -# 构造数组作业父作业信息 -for jobInfo in nonArrayJobList: - if int(jobInfo['jobIndex']) > 0: - jobId = jobInfo['jobId'] - # 构造数组作业父作业信息 - if jobId not in jobIdSet: - jobIdSet.add(jobId) - arrayJobInfo = os.popen(ARRAY_JOB_INFO_CMD + jobId).read().splitlines() - if len(arrayJobInfo) == 2: - arrayJobInfoValue = arrayJobInfo[1].split() - parentJobInfo = copy.deepcopy(jobInfo) - if len(arrayJobInfoValue) > 2: - parentJobName = arrayJobInfoValue[1] - parentJobInfo['jobName'] = parentJobName - arrayJobStatArr = arrayJobStatDict[jobId] - aggJobState(parentJobInfo, arrayJobStatArr) - parentJobInfo['originState'] = parentJobInfo['stat'] - parentJobInfo['stat'] = str(statMap[parentJobInfo['stat']]) - parentJobInfo['jobIndex'] = 0 - # 调度器父作业runtime等信息默认不聚合,用户根据需要聚合 - parentJobInfo['jobRunTime'] = None - parentJobInfo['startTime'] = None - parentJobInfo['finishTime'] = None - parentJobInfo['queue'] = None - parentJobInfo['execHost'] = None - parentJobInfo['outputFile'] = None - parentJobInfo['errorFile'] = None - parentJobInfo['execHome'] = None - parentJobInfo['jobPriority'] = None - parentJobInfo['exitCode'] = None - parentJobInfo['pendReason'] = None - outputJobList.append(parentJobInfo) outputStr = json.dumps(outputJobList) print(outputStr) diff --git a/LSF-Script/collection/jobSample b/LSF-Script/collection/jobSample index 39b8811797dcc15bec610505cbf6a04370595f8a..2da9bae0e1d3f145a6f45790eab19e5533326eca 100644 --- a/LSF-Script/collection/jobSample +++ b/LSF-Script/collection/jobSample @@ -93,9 +93,8 @@ def strToJson(jobInfoStr): jobInfos = os.popen(getOrder()).read() jobInfosArr = jobInfos.strip().split("\n") -arrayJobStatDict = {} # 构造非数组作业及数组作业子作业信息 及数组作业状态表 -nonArrayJobList = [] +outputJobList = [] timeresult = time.localtime(time.time()) timelist = list(timeresult) @@ -146,66 +145,15 @@ for index in range(len(jobInfosArr)): if outputJob['execHome'] == '': outputJob['execHome'] = jobInfo['EXEC_HOME'] - nonArrayJobList.append(outputJob) + jobId = outputJob['jobId'] + arrayJobInfo = os.popen(ARRAY_JOB_INFO_CMD + jobId).read().splitlines() + # 判断是否为数组作业 + if len(arrayJobInfo) == 2: + arrayJobInfoValue = arrayJobInfo[1].split() + if len(arrayJobInfoValue) > 2: + outputJob['jobId'] = "{}.{}".format(jobId, outputJob['jobIndex']) + outputJob['jobIndex'] = 0 + outputJobList.append(outputJob) - # 存储子作业状态到状态表中 - subJobStat = jobInfo['STAT'] - subJobId = jobInfo['JOBID'] - statValArr = [0] * 7 - if not arrayJobStatDict.has_key(subJobId): - arrayJobStatDict[subJobId] = statValArr - else: - statValArr = arrayJobStatDict[subJobId] - subJobStat = jobInfo['STAT'] - if subJobStat == 'PEND': - statValArr[0] = statValArr[0] + 1 - if subJobStat == 'DONE': - statValArr[1] = statValArr[1] + 1 - if subJobStat == 'RUN': - statValArr[2] = statValArr[2] + 1 - if subJobStat == 'EXIT': - statValArr[3] = statValArr[3] + 1 - if (subJobStat == 'PSUSP') | (subJobStat == 'USUSP'): - statValArr[4] = statValArr[4] + 1 - if subJobStat == 'SSUSP': - statValArr[5] = statValArr[5] + 1 - if subJobStat == 'ZOMBI': - statValArr[6] = statValArr[6] + 1 - arrayJobStatDict[subJobId] = statValArr - -jobIdSet = set() -outputJobList = nonArrayJobList -# 构造数组作业父作业信息 -for jobInfo in nonArrayJobList: - if int(jobInfo['jobIndex']) > 0: - jobId = jobInfo['jobId'] - # 构造数组作业父作业信息 - if jobId not in jobIdSet: - jobIdSet.add(jobId) - arrayJobInfo = os.popen(ARRAY_JOB_INFO_CMD + jobId).read().splitlines() - if len(arrayJobInfo) == 2: - arrayJobInfoValue = arrayJobInfo[1].split() - parentJobInfo = copy.deepcopy(jobInfo) - if len(arrayJobInfoValue) > 2: - parentJobName = arrayJobInfoValue[1] - parentJobInfo['jobName'] = parentJobName - arrayJobStatArr = arrayJobStatDict[jobId] - aggJobState(parentJobInfo, arrayJobStatArr) - parentJobInfo['originState'] = parentJobInfo['stat'] - parentJobInfo['stat'] = str(statMap[parentJobInfo['stat']]) - parentJobInfo['jobIndex'] = 0 - # 调度器父作业runtime等信息默认不聚合,用户根据需要聚合 - parentJobInfo['jobRunTime'] = None - parentJobInfo['startTime'] = None - parentJobInfo['finishTime'] = None - parentJobInfo['queue'] = None - parentJobInfo['execHost'] = None - parentJobInfo['outputFile'] = None - parentJobInfo['errorFile'] = None - parentJobInfo['execHome'] = None - parentJobInfo['jobPriority'] = None - parentJobInfo['exitCode'] = None - parentJobInfo['pendReason'] = None - outputJobList.append(parentJobInfo) outputStr = json.dumps(outputJobList) print(outputStr) diff --git a/LSF-Script/job/submit b/LSF-Script/job/submit index 0b62dee9b1f07f0ccef58bc87b17504638557bca..beebaf84c040fc05301e37bce52d193d59f441f8 100644 --- a/LSF-Script/job/submit +++ b/LSF-Script/job/submit @@ -56,7 +56,8 @@ submit_job() { fi if [ "$ret_value" -eq 0 ]; then - echo "The job has been submitted successfully. Job ID is: [$job_result]." + first_job_index=$(bjobs -a -o "jobindex" ${job_result} | tail -n +2 | sort | head -n 1) + echo "The job has been submitted successfully. Job ID is: [$job_result]. First job index is: [$first_job_index]." elif [ "$ret_value" -eq 124 ]; then echo "$ret_value" 1>&2 exit "$ret_value" diff --git a/README.en.md b/README.en.md index c7761e83d106cbd5f7eacc69be236fb8c81f68d2..eafbadba9127a57b5a96c45b79bd4ead6d831433 100644 --- a/README.en.md +++ b/README.en.md @@ -37,19 +37,19 @@ Python2/Python3
[root@host34 scheduler]# tree LSF/
LSF/
├── collection
-   │   ├── job.sample
-   │   └── jobSample.sample
+   │   ├── job
+   │   └── jobSample
    ├── job
-   │   ├── rerun.sample
-   │   ├── resume.sample
-   │   ├── stop.sample
-   │   ├── submit.sample
-   │   └── suspend.sample
+   │   ├── rerun
+   │   ├── resume
+   │   ├── stop
+   │   ├── submit
+   │   └── suspend
    ├── node
-   │   ├── node.sample
-   │   └── nodeSample.sample
+   │   ├── node
+   │   └── nodeSample
    └── queue
-       └── query-active.sample 
+ └── query-active 3. Change the owner of the script to the client installation user with permission 644. @@ -106,7 +106,7 @@ Python2/Python3 #### Precautions -1. The current script adaptation is for versions after HPC_22.0.0; +1. The current script adaptation is for versions after HPC_23.0.0; 2. Strictly follow the operation steps, otherwise the script may fail to execute. #### Contribution diff --git a/README.md b/README.md index ef1cfbf89e0c5927262a1b650297c612e5966955..6232881cb4f6c8fb9a562816dd01606c9cb02d49 100644 --- a/README.md +++ b/README.md @@ -37,19 +37,19 @@ Python2/Python3
[root@host34 scheduler]# tree LSF/
LSF/
├── collection
-   │   ├── job.sample
-   │   └── jobSample.sample
+   │   ├── job
+   │   └── jobSample
    ├── job
-   │   ├── rerun.sample
-   │   ├── resume.sample
-   │   ├── stop.sample
-   │   ├── submit.sample
-   │   └── suspend.sample
+   │   ├── rerun
+   │   ├── resume
+   │   ├── stop
+   │   ├── submit
+   │   └── suspend
    ├── node
-   │   ├── node.sample
-   │   └── nodeSample.sample
+   │   ├── node
+   │   └── nodeSample
    └── queue
-       └── query-active.sample 
+ └── query-active 3. 更改脚本的属主为client安装用户,权限为644 @@ -106,7 +106,7 @@ Python2/Python3 #### 注意事项 -1. 当前脚本适配是针对HPC_22.0.0之后的版本; +1. 当前脚本适配是针对HPC_23.0.0之后的版本; 2. 严格按照操作步骤执行,否则可能会导致脚本执行失败 #### 参与贡献