From 778e076799bebb09b5749bb0c3f5cd9698758c05 Mon Sep 17 00:00:00 2001 From: wangjian Date: Tue, 17 Oct 2023 06:56:01 +0000 Subject: [PATCH] =?UTF-8?q?=E9=80=82=E9=85=8DHPC=2023.0.0=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E5=8E=BBtask=E9=9C=80=E6=B1=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: wangjian --- LSF-Script/collection/job | 73 +++++---------------------------- LSF-Script/collection/jobSample | 72 +++++--------------------------- LSF-Script/job/submit | 3 +- README.en.md | 22 +++++----- README.md | 22 +++++----- 5 files changed, 44 insertions(+), 148 deletions(-) diff --git a/LSF-Script/collection/job b/LSF-Script/collection/job index af9e88d..c25151d 100644 --- a/LSF-Script/collection/job +++ b/LSF-Script/collection/job @@ -73,10 +73,8 @@ def strToJson(jobInfoStr): jobInfos = os.popen(JOB_INFOS_CMD).read() jobInfosArr = jobInfos.strip().split("\n") -# 数组作业状态表 {'jobId':'PEND,DONE,RUN,EXIT,SSUSP,USUSP,PSUSP'} -arrayJobStatDict = {} # 构造非数组作业及数组作业子作业信息 及数组作业状态表 -nonArrayJobList = [] +outputJobList = [] for index in range(len(jobInfosArr)): if index == 0: continue @@ -97,66 +95,15 @@ for index in range(len(jobInfosArr)): 'jobRunTime': jobInfo['RUN_TIME'], 'originState': jobInfo['STAT'], 'exitMsg': jobInfo['EXIT_REASON']} if outputJob['execHome'] == '': outputJob['execHome'] = jobInfo['EXEC_HOME'] - nonArrayJobList.append(outputJob) + jobId = outputJob['jobId'] + arrayJobInfo = os.popen(ARRAY_JOB_INFO_CMD + jobId).read().splitlines() + # 判断是否为数组作业 + if len(arrayJobInfo) == 2: + arrayJobInfoValue = arrayJobInfo[1].split() + if len(arrayJobInfoValue) > 2: + outputJob['jobId'] = "{}.{}".format(jobId, outputJob['jobIndex']) + outputJob['jobIndex'] = 0 + outputJobList.append(outputJob) - # 存储子作业状态到状态表中 - subJobStat = jobInfo['STAT'] - subJobId = jobInfo['JOBID'] - statValArr = [0] * 7 - if not arrayJobStatDict.has_key(subJobId): - arrayJobStatDict[subJobId] = statValArr - else: - statValArr = arrayJobStatDict[subJobId] - subJobStat = jobInfo['STAT'] - if subJobStat == 'PEND': - statValArr[0] = statValArr[0] + 1 - if subJobStat == 'DONE': - statValArr[1] = statValArr[1] + 1 - if subJobStat == 'RUN': - statValArr[2] = statValArr[2] + 1 - if subJobStat == 'EXIT': - statValArr[3] = statValArr[3] + 1 - if (subJobStat == 'PSUSP') | (subJobStat == 'USUSP'): - statValArr[4] = statValArr[4] + 1 - if subJobStat == 'SSUSP': - statValArr[5] = statValArr[5] + 1 - if subJobStat == 'ZOMBI': - statValArr[6] = statValArr[6] + 1 - arrayJobStatDict[subJobId] = statValArr - -jobIdSet = set() -outputJobList = nonArrayJobList -# 构造数组作业父作业信息 -for jobInfo in nonArrayJobList: - if int(jobInfo['jobIndex']) > 0: - jobId = jobInfo['jobId'] - # 构造数组作业父作业信息 - if jobId not in jobIdSet: - jobIdSet.add(jobId) - arrayJobInfo = os.popen(ARRAY_JOB_INFO_CMD + jobId).read().splitlines() - if len(arrayJobInfo) == 2: - arrayJobInfoValue = arrayJobInfo[1].split() - parentJobInfo = copy.deepcopy(jobInfo) - if len(arrayJobInfoValue) > 2: - parentJobName = arrayJobInfoValue[1] - parentJobInfo['jobName'] = parentJobName - arrayJobStatArr = arrayJobStatDict[jobId] - aggJobState(parentJobInfo, arrayJobStatArr) - parentJobInfo['originState'] = parentJobInfo['stat'] - parentJobInfo['stat'] = str(statMap[parentJobInfo['stat']]) - parentJobInfo['jobIndex'] = 0 - # 调度器父作业runtime等信息默认不聚合,用户根据需要聚合 - parentJobInfo['jobRunTime'] = None - parentJobInfo['startTime'] = None - parentJobInfo['finishTime'] = None - parentJobInfo['queue'] = None - parentJobInfo['execHost'] = None - parentJobInfo['outputFile'] = None - parentJobInfo['errorFile'] = None - parentJobInfo['execHome'] = None - parentJobInfo['jobPriority'] = None - parentJobInfo['exitCode'] = None - parentJobInfo['pendReason'] = None - outputJobList.append(parentJobInfo) outputStr = json.dumps(outputJobList) print(outputStr) diff --git a/LSF-Script/collection/jobSample b/LSF-Script/collection/jobSample index 39b8811..2da9bae 100644 --- a/LSF-Script/collection/jobSample +++ b/LSF-Script/collection/jobSample @@ -93,9 +93,8 @@ def strToJson(jobInfoStr): jobInfos = os.popen(getOrder()).read() jobInfosArr = jobInfos.strip().split("\n") -arrayJobStatDict = {} # 构造非数组作业及数组作业子作业信息 及数组作业状态表 -nonArrayJobList = [] +outputJobList = [] timeresult = time.localtime(time.time()) timelist = list(timeresult) @@ -146,66 +145,15 @@ for index in range(len(jobInfosArr)): if outputJob['execHome'] == '': outputJob['execHome'] = jobInfo['EXEC_HOME'] - nonArrayJobList.append(outputJob) + jobId = outputJob['jobId'] + arrayJobInfo = os.popen(ARRAY_JOB_INFO_CMD + jobId).read().splitlines() + # 判断是否为数组作业 + if len(arrayJobInfo) == 2: + arrayJobInfoValue = arrayJobInfo[1].split() + if len(arrayJobInfoValue) > 2: + outputJob['jobId'] = "{}.{}".format(jobId, outputJob['jobIndex']) + outputJob['jobIndex'] = 0 + outputJobList.append(outputJob) - # 存储子作业状态到状态表中 - subJobStat = jobInfo['STAT'] - subJobId = jobInfo['JOBID'] - statValArr = [0] * 7 - if not arrayJobStatDict.has_key(subJobId): - arrayJobStatDict[subJobId] = statValArr - else: - statValArr = arrayJobStatDict[subJobId] - subJobStat = jobInfo['STAT'] - if subJobStat == 'PEND': - statValArr[0] = statValArr[0] + 1 - if subJobStat == 'DONE': - statValArr[1] = statValArr[1] + 1 - if subJobStat == 'RUN': - statValArr[2] = statValArr[2] + 1 - if subJobStat == 'EXIT': - statValArr[3] = statValArr[3] + 1 - if (subJobStat == 'PSUSP') | (subJobStat == 'USUSP'): - statValArr[4] = statValArr[4] + 1 - if subJobStat == 'SSUSP': - statValArr[5] = statValArr[5] + 1 - if subJobStat == 'ZOMBI': - statValArr[6] = statValArr[6] + 1 - arrayJobStatDict[subJobId] = statValArr - -jobIdSet = set() -outputJobList = nonArrayJobList -# 构造数组作业父作业信息 -for jobInfo in nonArrayJobList: - if int(jobInfo['jobIndex']) > 0: - jobId = jobInfo['jobId'] - # 构造数组作业父作业信息 - if jobId not in jobIdSet: - jobIdSet.add(jobId) - arrayJobInfo = os.popen(ARRAY_JOB_INFO_CMD + jobId).read().splitlines() - if len(arrayJobInfo) == 2: - arrayJobInfoValue = arrayJobInfo[1].split() - parentJobInfo = copy.deepcopy(jobInfo) - if len(arrayJobInfoValue) > 2: - parentJobName = arrayJobInfoValue[1] - parentJobInfo['jobName'] = parentJobName - arrayJobStatArr = arrayJobStatDict[jobId] - aggJobState(parentJobInfo, arrayJobStatArr) - parentJobInfo['originState'] = parentJobInfo['stat'] - parentJobInfo['stat'] = str(statMap[parentJobInfo['stat']]) - parentJobInfo['jobIndex'] = 0 - # 调度器父作业runtime等信息默认不聚合,用户根据需要聚合 - parentJobInfo['jobRunTime'] = None - parentJobInfo['startTime'] = None - parentJobInfo['finishTime'] = None - parentJobInfo['queue'] = None - parentJobInfo['execHost'] = None - parentJobInfo['outputFile'] = None - parentJobInfo['errorFile'] = None - parentJobInfo['execHome'] = None - parentJobInfo['jobPriority'] = None - parentJobInfo['exitCode'] = None - parentJobInfo['pendReason'] = None - outputJobList.append(parentJobInfo) outputStr = json.dumps(outputJobList) print(outputStr) diff --git a/LSF-Script/job/submit b/LSF-Script/job/submit index 0b62dee..beebaf8 100644 --- a/LSF-Script/job/submit +++ b/LSF-Script/job/submit @@ -56,7 +56,8 @@ submit_job() { fi if [ "$ret_value" -eq 0 ]; then - echo "The job has been submitted successfully. Job ID is: [$job_result]." + first_job_index=$(bjobs -a -o "jobindex" ${job_result} | tail -n +2 | sort | head -n 1) + echo "The job has been submitted successfully. Job ID is: [$job_result]. First job index is: [$first_job_index]." elif [ "$ret_value" -eq 124 ]; then echo "$ret_value" 1>&2 exit "$ret_value" diff --git a/README.en.md b/README.en.md index c7761e8..eafbadb 100644 --- a/README.en.md +++ b/README.en.md @@ -37,19 +37,19 @@ Python2/Python3
[root@host34 scheduler]# tree LSF/
LSF/
├── collection
-   │   ├── job.sample
-   │   └── jobSample.sample
+   │   ├── job
+   │   └── jobSample
    ├── job
-   │   ├── rerun.sample
-   │   ├── resume.sample
-   │   ├── stop.sample
-   │   ├── submit.sample
-   │   └── suspend.sample
+   │   ├── rerun
+   │   ├── resume
+   │   ├── stop
+   │   ├── submit
+   │   └── suspend
    ├── node
-   │   ├── node.sample
-   │   └── nodeSample.sample
+   │   ├── node
+   │   └── nodeSample
    └── queue
-       └── query-active.sample 
+ └── query-active 3. Change the owner of the script to the client installation user with permission 644. @@ -106,7 +106,7 @@ Python2/Python3 #### Precautions -1. The current script adaptation is for versions after HPC_22.0.0; +1. The current script adaptation is for versions after HPC_23.0.0; 2. Strictly follow the operation steps, otherwise the script may fail to execute. #### Contribution diff --git a/README.md b/README.md index ef1cfbf..6232881 100644 --- a/README.md +++ b/README.md @@ -37,19 +37,19 @@ Python2/Python3
[root@host34 scheduler]# tree LSF/
LSF/
├── collection
-   │   ├── job.sample
-   │   └── jobSample.sample
+   │   ├── job
+   │   └── jobSample
    ├── job
-   │   ├── rerun.sample
-   │   ├── resume.sample
-   │   ├── stop.sample
-   │   ├── submit.sample
-   │   └── suspend.sample
+   │   ├── rerun
+   │   ├── resume
+   │   ├── stop
+   │   ├── submit
+   │   └── suspend
    ├── node
-   │   ├── node.sample
-   │   └── nodeSample.sample
+   │   ├── node
+   │   └── nodeSample
    └── queue
-       └── query-active.sample 
+ └── query-active 3. 更改脚本的属主为client安装用户,权限为644 @@ -106,7 +106,7 @@ Python2/Python3 #### 注意事项 -1. 当前脚本适配是针对HPC_22.0.0之后的版本; +1. 当前脚本适配是针对HPC_23.0.0之后的版本; 2. 严格按照操作步骤执行,否则可能会导致脚本执行失败 #### 参与贡献 -- Gitee