diff --git a/ci/tools/anti_attack_system/README.en.md b/ci/tools/anti_attack_system/README.en.md new file mode 100644 index 0000000000000000000000000000000000000000..236621c148c28ad3810d8962665d728ac1519702 --- /dev/null +++ b/ci/tools/anti_attack_system/README.en.md @@ -0,0 +1,36 @@ +# zhu_it + +#### Description +{**When you're done, you can delete the content in this README and update the file with details for others getting started with your repository**} + +#### Software Architecture +Software architecture description + +#### Installation + +1. xxxx +2. xxxx +3. xxxx + +#### Instructions + +1. xxxx +2. xxxx +3. xxxx + +#### Contribution + +1. Fork the repository +2. Create Feat_xxx branch +3. Commit your code +4. Create Pull Request + + +#### Gitee Feature + +1. You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md +2. Gitee blog [blog.gitee.com](https://blog.gitee.com) +3. Explore open source project [https://gitee.com/explore](https://gitee.com/explore) +4. The most valuable open source project [GVP](https://gitee.com/gvp) +5. The manual of Gitee [https://gitee.com/help](https://gitee.com/help) +6. The most popular members [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/ci/tools/anti_attack_system/README.md b/ci/tools/anti_attack_system/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b1310a43e2484061cb17e0e07b1aa44aa7855b9d --- /dev/null +++ b/ci/tools/anti_attack_system/README.md @@ -0,0 +1,43 @@ +# zhu_it + +#### 介绍 +1、项目名称:anti_attack_system(防攻击系统) +2、批量克隆处理openeuler系统对应版本的所有仓库数据 +3、对所有仓库数据的tar包通过rpmbuild工具进行解压 +#### 软件架构 +软件架构说明 + +#### 安装工具 +1、yum -y install rpm +2、yum -y install rpm-build +3、yum -y install autoconf +4、创建目录:mkdir -p /root/rpmbuild/{BUILD,RPMS,SOURCES,SPECS,SRPMS},使用rpmbuild工具解压tar包需要,目录位置与目录名称固定 + +#### 使用说明 +1. 运行命令:python3 main.py +2. constant.py 常量 +3. common.py 公共类 +4. custom_log.py 自定义日志 +5. main.py 程序入口 +6. repo_src_data_decomp.py 业务处理 +7. requirements.txt 安装依赖 +#### 运行环境 +1、openeuler系统 +2、docker镜像:docker pull openeuler/openeuler:latest +3、脚本运行环境:python3.6以上 +#### 参与贡献 + +1. Fork 本仓库 +2. 新建 Feat_xxx 分支 +3. 提交代码 +4. 新建 Pull Request + + +#### 特技 + +1. 使用 Readme\_XXX.md 来支持不同的语言,例如 Readme\_en.md, Readme\_zh.md +2. Gitee 官方博客 [blog.gitee.com](https://blog.gitee.com) +3. 你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解 Gitee 上的优秀开源项目 +4. [GVP](https://gitee.com/gvp) 全称是 Gitee 最有价值开源项目,是综合评定出的优秀开源项目 +5. Gitee 官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help) +6. Gitee 封面人物是一档用来展示 Gitee 会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/ci/tools/anti_attack_system/handle_logic/common.py b/ci/tools/anti_attack_system/handle_logic/common.py new file mode 100644 index 0000000000000000000000000000000000000000..876c1f165ac6968881727e40352f86801d98ef38 --- /dev/null +++ b/ci/tools/anti_attack_system/handle_logic/common.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2022/1/20 9:52 +# @Author : zWX1133887 +# @File : common.py +# @Software: PyCharm +# Description: Public class and method +import argparse +import os +import subprocess + +from anti_attack_system.handle_logic.constant import UNZIPPED_DIRECTORY, TMP_FAILED_SPEC_FILE_DIR, \ + EXECUTE_BP_CMD_FAILED_REPOS +from anti_attack_system.handle_logic.custom_log import my_logging + + +class CommonClass(object): + def __init__(self): + self.unzipped_dir = UNZIPPED_DIRECTORY + self.tmp_failed_spec_file_dir = TMP_FAILED_SPEC_FILE_DIR + self.bp_cmd_exec_failure_dir = EXECUTE_BP_CMD_FAILED_REPOS + self.logger = my_logging() + + def subprocess_exe_run_cmd(self, exe_cmd, flag=False): + """ + execute command + :param exe_cmd: + :return: + """ + # Check: if this parameter is set to true and the process exit status code is not 0, + # a calledprocesserror exception will pop up. + ret_code = subprocess.run(exe_cmd, shell=True, check=flag) + if ret_code.returncode == 0: # clone success + return True + else: + self.logger.info(f"exe_cmd={exe_cmd}") + return False + + def unzipped_directories(self): + """ + Directory to be unzipped + :return: + """ + repos_src_dir = os.path.join(os.getcwd(), self.unzipped_dir) + if not os.path.exists(repos_src_dir): + os.mkdir(repos_src_dir) + return repos_src_dir + + def tmp_failed_spec_file_dir_fun(self): + """ + :return: + """ + tmp_failed_spec_dir = os.path.join(os.getcwd(), self.tmp_failed_spec_file_dir) + if not os.path.exists(tmp_failed_spec_dir): + os.mkdir(tmp_failed_spec_dir) + return tmp_failed_spec_dir + + def bp_cmd_exec_failure(self): + """ + :return: + """ + bp_cmd_exec_failure_dir = os.path.join(os.getcwd(), self.bp_cmd_exec_failure_dir) + if not os.path.exists(bp_cmd_exec_failure_dir): + os.mkdir(bp_cmd_exec_failure_dir) + return bp_cmd_exec_failure_dir + + @staticmethod + def get_cmd_params(): + """ + Get command line parameters + notes: required=True Required + :return: args object + """ + parser = argparse.ArgumentParser(f"cmd param") + parser.add_argument('-v', '--version_number', type=int, required=False, default=0) # version number + return parser.parse_args() + + @staticmethod + def os_system_cmd(exe_cmd): + """ + os.system module + :param exe_cmd: + :return: + """ + return os.system(exe_cmd) diff --git a/ci/tools/anti_attack_system/handle_logic/constant.py b/ci/tools/anti_attack_system/handle_logic/constant.py new file mode 100644 index 0000000000000000000000000000000000000000..3b9e32a959a9b29631eef9cb5a63aef355119484 --- /dev/null +++ b/ci/tools/anti_attack_system/handle_logic/constant.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2022/1/19 16:56 +# @Author : zWX1133887 +# @File : constant.py +# @Software: PyCharm +# Description: constant +WAIT_TIME = 3 + +ATTEMPT_TIMES = 7 + +RPM_SOURCES_PATH = f"/root/rpmbuild/SOURCES" +RPM_BUILD_PATH = f"/root/rpmbuild/BUILD" + +RPM_SPECS_PATH = f"/root/rpmbuild/SPECS" + +TMP_FAILED_SPEC_FILE_DIR = f"tmp_failed_spec_file" + +EXECUTE_BP_CMD_FAILED_REPOS = f"bp_cmd_failed_repos" +BUILD_TMP_DIR = f"build_tmp" + +UNZIPPED_DIRECTORY = f"unzipped_repos_data" + +CLONE_URL = f"https://gitee.com/openeuler/release-management.git" + +HTTPS_URL = f"https://gitee.com/src-openeuler/" + +YAML_FILE_NAME = f"pckg-mgmt.yaml" + +CLONE_REPOS_FAILURE_URL_FILE = f"repos-failure-clone-https-url.txt" + +RETRY_COUNT = 7 + +VERSION_DIR_NAME_PREFIX = f"openEuler-" # Version directory name prefix + +# Private repository list +PRIVATE_REPOS = ['jarjar', 'java-base64', 'sbinary', 'geronimo-validation', 'http-builder', 'ehcache-parent', + 'perl-Tie-IxHash', + 'bytelist', 'args4j', 'liboggz', 'libkate', 'liboil', 'libofa', 'jsch-agent-proxy', + 'geronimo-interceptor', + 'protoparser', 'appstream-data', 'jatl', 'ezmorph', 'rubygem-hpricot'] + +# repository that need to be triggered manually +NEED_LICENSE = ["bazel"] + +# Manual confirmation is required +MANUAL_CONFIRM = [ + # 可以正常拉取,但使用rpmbuild工具需要手动出发 + "plexus-i18n", "plexus-interactivity", "ukui-themes", "bazel", + # 需要手动输入密码和用户名拉取 + "libkate", "liboil", "rubygem-hpricot", + "appstream-data", "liboggz", "liboflsa", "libofa" +] + +MANUAL_CONFIRM.extend(NEED_LICENSE) +MANUAL_CONFIRM.extend(PRIVATE_REPOS) +WORKER_MAX = 20 + +# total_failure_repo_list +TOTAL_FAILURE_REPO_LIST = ['nodejs-tap-parser', 'nodejs-es6-symbol', 'nodejs-boom', 'eclipse-jgit', 'mozjs78', + 'nodejs-window-size', 'jboss-jstl-1.2-api', 'wildfly-security-manager', 'xalan-j2', + 'rubygem-rouge', 'rubygem-faraday-net_http', 'mybatis-parent', 'felix-shell', + 'rubygem-http_parser', 'rubygem-xpath', 'hibernate4', 'nodejs-proxyquire', 'nodejs-raw-body', + 'maven-plugin-bundle', 'rubygem-webmock', 'weld-core', 'rubygem-ruby-shadow', + 'python-XStatic-Angular-lrdragndrop', 'gnu-free-fonts', 'felix-framework', 'rubygem-loofah', + 'nodejs-burrito', 'felix-osgi-foundation', 'rubygem-crass', 'jetty-assembly-descriptors', + 'rubygem-execjs', 'nodejs-http-signature', 'java-oauth', 'jboss-jacc-1.4-api', 'novnc', + 'apache-commons-math', 'apache-commons-digester', 'glassfish-jax-rs-api', 'rubygem-ZenTest', + 'rubygem-faraday-net_http_persistent', 'rubygem-prawn', 'rubygem-sqlite3', 'gd', + 'kohsuke-pom', 'nodejs-difflet', 'rubygem-faraday-patron', 'openstack-ironic-inspector', + 'jboss-jacc-1.5-api', 'nodejs-read-package-json', 'nodejs-mkdirp', 'lightdm-gtk', + 'rubygem-ronn', 'rubygem-haml', 'multiverse', 'annotation-indexer', 'rubygem-pdf-core', + 'apache-commons-javaflow', 'rubygem-coderay', 'libisal', 'libwd', 'rubygem-ffi', + 'openstack-tempest', 'nodejs-load-grunt-tasks', 'rubygem-multi_test', 'rubygem-actioncable', + 'hunspell-bg', 'gmetrics', 'nodejs-difflib', 'apache-parent', 'uglify-js', + 'nodejs-grunt-contrib-watch', 'rubygem-hashdiff', 'glassfish-jsp', 'arpack-ng', + 'mybatis-generator', 'rubygem-sass-rails', 'rubygem-erubi', 'rubygem-strptime', + 'rubygem-msgpack', 'nodejs-grunt', 'nodejs-har-validator', 'jboss-jsf-2.1-api', + 'rubygem-shoulda-matchers', 'rubygem-faraday-excon', 'nodejs-sinon', 'rubygem-globalid', + 'glassfish-annotation-api', 'jcodings', 'rubygem-pry-nav', 'jboss-jsp-2.2-api', 'regexp', + 'takari-incrementalbuild', 'rubygem-simplecov', 'rubygem-expression_parser', 'unique', + 'replacer', 'nodejs-pkg-up', 'nodejs-simple-assert', 'rubygem-kramdown-parser-gfm', + 'ocaml-ounit', 'aqute-bnd', 'xfce4-mailwatch-plugin', 'apache-commons-jxpath', 'sgml-common', + 'rubygem-safe_yaml', 'nodejs-escodegen', 'arm-trusted-firmware', 'rubygem-sigdump', + 'rubygem-backports', 'rubygem-tzinfo', 'rubygem-mustermann', 'rubygem-liquid', + 'istack-commons', 'rubygem-Ascii85', 'jetty-toolchain', 'rubygem-bootsnap', + 'rubygem-sinatra', 'rubygem-simplecov-html', 'openEuler-indexhtml', 'nodejs-sntp', + 'rubygem-rspec-mocks', 'nodejs-readable-stream', 'xorg-x11-drv-dummy', 'codemodel', + 'rubygem-mail', 'rubygem-mini_mime', 'python-qrcode', 'nodejs-globule', + 'apache-commons-jexl', 'geronimo-jcdi-1.0-api', 'signpost-core', 'octave', + 'nodejs-core-util-is', 'itrustee_sdk', 'rubygem-listen', 'nodejs-glob', + 'metadata-extractor2', 'nodejs-less', 'rubygem-activeresource', 'weld-parent', + 'rubygem-rspec2', 'rubygem-RedCloth', 'nodejs-is-my-json-valid', 'rubygem-power_assert', + 'rubygem-sass', 'rubygem-gem2rpm', 'felix-gogo-shell', 'portaudio', 'net-tools', + 'apache-commons-beanutils', 'tomcat-taglibs-standard', 'felix-scr', + 'nodejs-grunt-legacy-util', 'initscripts', 'geronimo-annotation', 'xerces-j2', + 'custom_build_tool', 'rubygem-bundler', 'rubygem-afm', 'rubygem-prawn-table', + 'nodejs-constantinople', 'rubygem-puma', 'json-path', 'nodejs-rimraf', 'rubygem-webrick', + 'pigpio', 'openstack-ironic', 'nodejs-grunt-legacy-log-utils', 'codenarc', + 'hibernate-jpa-2.1-api', 'rubygem-capybara', 'jetty-schemas', 'grub2', + 'rubygem-cucumber-wire', 'maven-doxia', 'cassandra-java-driver', 'felix-osgi-obr', + 'rubygem-websocket-extensions', 'rubygem-rspec2-mocks', 'cbi-plugins', 'nodejs-hawk', + 'rubygem-cucumber-tag_expressions', 'rubygem-fattr', 'rubygem-mimemagic', 'python-clufter', + 'nodejs-supports-color', 'raspberrypi-kernel', 'rubygem-cucumber-expressions', + 'apache-commons-parent', 'rubygem-rspec-expectations', 'rubygem-rspec-support', + 'nodejs-es6-iterator', 'nodejs-maxmin', 'geronimo-jaxrpc', 'ws-xmlschema', + 'rubygem-faraday-rack', 'polkit-gnome', 'google-gson', 'rubygem-httpclient', + 'rubygem-sprockets', 'apache-commons-pool', 'jdeparser1', 'xml-commons-apis', + 'apache-commons-exec', 'apache-commons-collections4', 'glassfish-ejb-api', 'runc', 'libusbx', + 'nodejs-noptify', 'nodejs-jade', 'aries-blueprint-api', 'apache-commons-validator', + 'aries-blueprint-parser', 'rubygem-fluent-plugin-elasticsearch', 'apache-commons-lang3', + 'nodejs-fileset', 'nodejs-tiny-lr-fork', 'nodejs-ebnf-parser', 'stax-ex', 'spymemcached', + 'rubygem-faraday-em_http', 'nodejs-through2', 'json_simple', 'rubygem-rspec2-core', + 'rubygem-aruba', 'deepin-graphics-driver-manager', 'rubygem-fakefs', 'mina-ftpserver', + 'rubygem-shindo', 'nodejs-chai', 'rubygem-sdoc', 'rubygem-tzinfo-data', 'unique3', + 'felix-osgi-obr-resolver', 'nodejs-grunt-contrib-uglify', 'nodejs-clean-css', 'xml-security', + 'nodejs-jju', 'plexus-build-api', 'nodejs-extend', 'apache-commons-daemon', 'rubygem-spring', + 'apache-poi', 'rubygem-idn', 'nodejs-tape', 'maven-plugin-build-helper', + 'jaxb2-common-basics', 'gmavenplus-plugin', 'apache-commons-collections', 'bsh', + 'perl-PerlIO-via-QuotedPrint', 'jboss-el-3.0-api', 'felix-utils', + 'jboss-annotations-1.2-api', 'resteasy', 'leveldb-java', 'nodejs-end-of-stream', 'antlr32', + 'geronimo-ejb', 'rubygem-wikicloth', 'springframework', 'tycho-extras', + 'glassfish-websocket-api', 'openjfx8', 'rubygem-rspec2-expectations', 'nodejs-tough-cookie', + 'rubygem-sprockets-rails', 'nodejs-jison-lex', 'groovy18', 'rubygem-minitest-reporters', + 'jboss-logging-tools1', 'compress-lzf', 'rubygem-public_suffix', 'nodejs-string_decoder', + 'kf5-frameworkintegration', 'rubygem-concurrent-ruby', 'python-pycparser', + 'nodejs-concat-stream', 'nodejs-string', 'rubygem-creole', 'nodejs-promises-aplus-tests', + 'felix-scr-annotations', 'relaxngDatatype', 'rubygem-activerecord', 'rubygem-shoulda', + 'netty3', 'python-sphinx-feature-classification', 'jackson-datatypes-collections', + 'apache-commons-codec', 'weld-api', 'tycho', 'isula-build', 'nodejs-util', 'cdi-api', + 'jgroups', 'glassfish-jsp-api', 'openEuler-logos', 'libkae', 'geronimo-jms', 'rubygem-i18n', + 'lightcouch', 'jdepend', 'apache-commons-dbcp', 'nodejs-promise', 'rubygem-docile', + 'nodejs-source-map-support', 'bean-validation-api', 'nodejs-defence', 'rubygem-erubis', + 'nodejs-inflight', 'jsr-311', 'pam', 'glassfish-fastinfoset', 'raspi-config', + 'nodejs-event-emitter', 'prefetch_tuning', 'nodejs-iconv', 'rubygem-pry', + 'nodejs-are-we-there-yet', 'nodejs-shelljs', 'mimepull', 'containerd', 'eclipse-ecf', + 'python-pytest-fixture-config', 'maven-processor-plugin', 'maven-enforcer', + 'rubygem-eventmachine', 'linux-firmware', 'rubygem-addressable', 'lucene4', + 'nodejs-bluebird', 'nodejs-nomnom', 'nodejs-gaze', 'apache-commons-compress', 'jspc', + 'felix-gogo-runtime', 'rubygem-yard', 'jboss-jsf-2.2-api', 'apache-mime4j', + 'rubygem-activemodel-serializers-xml', 'bullet', 'python-os-resource-classes', + 'rubygem-rack-test', 'nodejs-debug', 'rubygem-activestorage', 'openEuler-release', + 'apache-commons-logging', 'openstack-trove', 'maven-plugins-pom', 'rubygem-ttfunk', + 'nodejs-chalk', 'glassfish-servlet-api', 'jboss-jaxrs-2.0-api', 'rubygem-excon', + 'nodejs-argparse', 'rubygem-ansi', 'nodejs-js-yaml', 'hibernate3', 'rubygem-rb-inotify', + 'nodejs-transformers', 'eclipselink-persistence-api', 'nodejs-form-data', 'mustache-java', + 'rubygem-websocket-driver', 'jboss-jms-1.1-api', 'rubygem-dalli', 'nodejs-mime-types', + 'glassfish-jaxrpc-api', 'libburn1', 'openstack-glance', 'rubygem-thor', 'rubygem-contracts', + 'kae_driver', 'jvnet-parent', 'rubygem-ruby-rc4', 'apache-commons-lang', 'nodejs-tap', + 'rubygem-faraday-em_synchrony', 'shrinkwrap-resolver', 'jboss-rmi-1.0-api', + 'rubygem-rdiscount', 'hsqldb1', 'apache-mina', 'kernel', 'woodstox-core', + 'rubygem-websocket', 'nodejs-grunt-cli', 'nodejs-monocle', 'eclipse-m2e-workspace', + 'mongo-java-driver2', 'dain-snappy', 'nodejs-readdirp', 'rubygem-thread_safe', + 'portlet-2.0-api', 'kata_integration', 'enchant2', 'jboss-jaspi-1.1-api', + 'rubygem-serverengine', 'rubygem-rails-html-sanitizer', 'geronimo-jaspic-spec', 'jctools', + 'openstack-neutron', 'rubygem-faraday', 'python-utils', 'nodejs-jison', 'jetty-alpn-api', + 'openstack-nova', 'python-castellan', 'nodejs-memoizee', 'rubygem-cool.io', + 'jboss-interceptors-1.2-api', 'jboss-ejb-3.2-api', 'rubygem-mini_magick', 'xmlstreambuffer', + 'aries-quiesce-api', 'rubygem-minitest4', 'rubygem-ruby-progressbar', 'apache-commons-net', + 'nodejs-closure-compiler', 'nodejs-with', 'rubygem-kramdown', 'rubygem-bindex', + 'rubygem-activesupport', 'docker-client-java', 'rubygem-ruby2_keywords', 'classmate', + 'openstack-swift', 'log4j12', 'geronimo-jcache', 'raspberrypi-firmware', 'proguard', + 'rubygem-rspec-core', 'jakarta-commons-httpclient', 'rubygem-pdf-reader', 'nodejs-istanbul', + 'rubygem-activejob', 'ehcache-sizeof-agent', 'uima-parent-pom', 'felix-osgi-compendium', + 'rubygem-rails-dom-testing', 'rubygem-curb', 'python-neutron-lib', 'rubygem-thread_order', + 'nodejs-require-uncached', 'rubygem-ronn-ng', 'apache-commons-configuration', + 'glassfish-transaction-api', 'apache-commons-vfs', 'felix-bundlerepository', + 'nodejs-handlebars', 'XmlSchema', 'rubygem-faraday-httpclient', 'apache-commons-io', + 'rubygem-rspec-its', 'nodejs-request', 'nodejs-yargs', 'jcifs', 'rubygem-yajl-ruby', + 'raspberrypi-bluetooth', 'maven-wagon', 'felix-osgi-core', 'felix-gogo-command', 'artemis', + 'ant-contrib', 'apache-commons-jci', 'nodejs-json-diff', 'openstack-ironic-python-agent', + 'rxjava', 'hadoop-3.1', 'maven-doxia-sitetools', 'nodejs-cli-color', + 'nodejs-grunt-legacy-log', 'rubygem-nio4r', 'jboss-jaspi-1.0-api', + 'rubygem-turbolinks-source', 'jetty-test-helper', 'fasterxml-oss-parent', + 'nodejs-es-to-primitive', 'nodejs-path-array', 'jboss-jaxws-2.2-api', 'rubik', + 'nodejs-optimist', 'serp', 'openjfx11', 'geronimo-saaj', 'apache-commons-chain', + 'openstack-keystone', 'qt5-qtwebkit', 'nodejs-isstream', 'nodejs-es6-weak-map', + 'nodejs-es-abstract', 'jaf', 'felix-scr-generator', 'rubygem-cucumber-core', + 'nodejs-resolve-pkg', 'rubygem-fluentd', 'jdom2', 'apache-commons-fileupload', + 'snowball-java', 'jetty-test-policy', 'rubygem-dig_rb', 'apache-commons-discovery', + 'guava20', 'nodejs-npmlog', 'felix-main', 'rubygem-ejs', 'rubygem-gherkin', + 'rubygem-cucumber', 'tuscany-sdo-java', 'jibx', 'rhq-plugin-annotations', + 'jboss-connector-1.6-api', 'apache-commons-cli', 'shrinkwrap-descriptors', + 'openstack-cinder', 'python-crypto', 'nodejs-deep-eql', 'rubygem-byebug', 'GConf2', + 'kata-containers', 'python-straight-plugin', 'python-sphinx', 'rubygem-mime-types-data', + 'jboss-servlet-2.5-api', 'rubygem-uglifier', 'ongres-scram', 'libtiff', + 'aries-blueprint-annotation-api', 'google-guice', 'nodejs-css-stringify', 'qpid-proton-java', + 'typesafe-config', 'uadk_engine', 'sonatype-oss-parent', 'rubygem-shoulda-context', + 'nodejs-vows', 'jboss-jms-2.0-api', 'apache-commons-csv', 'python-gnocchiclient', 'maven2', + 'jboss-websocket-1.1-api', 'rubygem-connection_pool', 'rubygem-rails-controller-testing', + 'openoffice.org-dict-cs_CZ', 'apache-commons-pool2', 'nodejs-findup-sync', 'rubygem-marcel', + 'python-py', 'nodejs-hooker', 'python-os-xenapi', 'nodejs-type-detect', 'snakeyaml', + 'glassfish-master-pom', 'libraqm'] diff --git a/ci/tools/anti_attack_system/handle_logic/custom_log.py b/ci/tools/anti_attack_system/handle_logic/custom_log.py new file mode 100644 index 0000000000000000000000000000000000000000..e62a8c930d59f8a182bfffa4789282189b1e8c57 --- /dev/null +++ b/ci/tools/anti_attack_system/handle_logic/custom_log.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2022/1/20 14:37 +# @Author : zWX1133887 +# @File : custom_log.py +# @Software: PyCharm +# Description: Custom log + +import time +from pathlib import Path + +from loguru import logger + +log = logger +log_path = Path(Path.cwd(), "log") + + +def my_logging(): + """ + Custom log + :return: + """ + logger.add(f"{log_path}/runtime-log-{time.strftime('%Y-%m-%d')}.log", + rotation="500MB", + encoding="utf-8", + enqueue=True, + retention="7 days") + return logger + + +if __name__ == '__main__': + my_logging().info("this is test") diff --git a/ci/tools/anti_attack_system/handle_logic/repo_src_data_decomp.py b/ci/tools/anti_attack_system/handle_logic/repo_src_data_decomp.py new file mode 100644 index 0000000000000000000000000000000000000000..6d51582ba0acc07540dda32731ff706f6fe93081 --- /dev/null +++ b/ci/tools/anti_attack_system/handle_logic/repo_src_data_decomp.py @@ -0,0 +1,513 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2022/1/17 19:04 +# @Author : zWX1133887 +# @File : repo_src_data_decomp.py +# @Software: PyCharm +# Description: Repository source data decompression +import os +import re +import shutil +import time +from multiprocessing.pool import ThreadPool +from pathlib import Path + +import tenacity +import yaml +from tenacity import retry, wait_fixed, stop_after_attempt, retry_if_result + +from anti_attack_system.handle_logic.common import CommonClass +from anti_attack_system.handle_logic.constant import ATTEMPT_TIMES, WAIT_TIME, RPM_SPECS_PATH, \ + RPM_SOURCES_PATH, CLONE_REPOS_FAILURE_URL_FILE, RETRY_COUNT, VERSION_DIR_NAME_PREFIX, PRIVATE_REPOS, NEED_LICENSE, \ + RPM_BUILD_PATH, BUILD_TMP_DIR, WORKER_MAX + + +def is_false(failure_repos_list): + print(f"failure_repos_list={failure_repos_list}") + return True if failure_repos_list else False # True retry, otherwise do not retry + + +class RepoSrcDataDeco(object): + def __init__(self, release_management_yaml_url, repository_name, version_number, yaml_file_name, https_url): + self.common_class = CommonClass() + self.clone_yaml_url = release_management_yaml_url + self.current_local_dir = self.common_class.unzipped_directories() # current local directory + self.tmp_spec_file_dir = self.common_class.tmp_failed_spec_file_dir_fun() # execute failed spec file directory + self.bp_cmd_exec_failure_dir = self.common_class.bp_cmd_exec_failure() # bp command execute failed repository + self.repo_local_path = os.path.join(self.current_local_dir, repository_name) + self.version_number = version_number + self.yaml_file_name = yaml_file_name + self.https_url = https_url + # self.failure_repo_file_path = os.path.join(self.current_local_dir, CLONE_REPOS_FAILURE_URL_FILE) + self.failure_repo_file_path = os.path.join(Path(Path.cwd().parent), CLONE_REPOS_FAILURE_URL_FILE) + + def get_release_management_yaml_data(self): + """ + Clone repository yaml file + :return: + """ + exe_clone_cmd = f"cd {self.current_local_dir} && git clone {self.clone_yaml_url}" + for i in range(RETRY_COUNT): + self.del_local_repo_dir() + ret_boolean = self.common_class.subprocess_exe_run_cmd(exe_clone_cmd) + self.common_class.logger.info(f"Clone repository yaml file---{i}---times") + if not ret_boolean: # cloning failure + if i == RETRY_COUNT - 1: + self.common_class.logger.info(f"Please restart. It has been tried {i} times and failed") + time.sleep(2) + continue + else: + break + + def del_failure_clone_file(self): + """ + delete failure clone file + :return: + """ + if os.path.exists(self.failure_repo_file_path): + os.remove(self.failure_repo_file_path) + + def del_local_repo_dir(self): + """ + delete local repository directory + :return: + """ + if os.path.exists(self.repo_local_path): + shutil.rmtree(self.repo_local_path) + + def get_repository_name_from_yaml_file(self): + """ + Get repository name from yaml file + :return: repo_name_generator: generator object + """ + all_version_file_list = [] + version_directories = [directory for directory in os.listdir(self.repo_local_path) if + directory.startswith(VERSION_DIR_NAME_PREFIX)] + + version_directories.sort(reverse=True) + for version_directory in version_directories: + pckg_mgmt_yaml_file_path = os.path.join(self.repo_local_path, version_directory, self.yaml_file_name) + if os.path.exists(pckg_mgmt_yaml_file_path): + all_version_file_list.append({version_directory: self.yaml_file_name}) + + if len(all_version_file_list) < self.version_number: + self.common_class.logger.info(f"The version does not exist: {self.version_number}") + return + latest_version = all_version_file_list[self.version_number] # 0: version number + for version, yaml_file in latest_version.items(): + with open(os.path.join(self.repo_local_path, version, yaml_file), "r+", encoding="utf-8") as fr: + for y in yaml.load_all(fr, Loader=yaml.FullLoader): + # todo:The file structure of each release may be different, so it needs to be changed + packages = y.get("packages").get("natural") + if not packages: + packages = y.get("packages").get("everything").get("baseos") # baseos 基础iso所包含的rpm包设计到的所有软件包 + other_repos = y.get("packages").get("everything").get("other") # other 全量iso除了iso外的软件包 + epol_repos = y.get("packages").get("epol") # epol epol的repo源里rpm包涉及的所有软件包 + recycle_repos = y.get("packages").get("recycle") # recycle 判定衰退的软件包 + delete_repos = y.get("packages").get("delete") # delete 被删除的软件包 + + other_generator = (other.get("name") for other in other_repos) if other_repos else [] + epol_generator = (epol.get("name") for epol in epol_repos) if epol_repos else [] + recycle_generator = (recycle.get("name") for recycle in + recycle_repos) if recycle_repos else [] + delete_generator = (delete.get("name") for delete in delete_repos) if delete_repos else [] + + baseos_generator = (package.get("name") for package in packages) if packages else [] + + return baseos_generator, other_generator, epol_generator, recycle_generator, delete_generator + + def clone_repo(self, tmp_baseos_list, tmp_other_list, tmp_epol_list, tmp_recycle_list, tmp_delete_list): + """ + clone repository + :param tmp_baseos_list: list + :param tmp_other_list: list + :param tmp_epol_list: list + :param tmp_recycle_list: list + :param tmp_delete_list: list + :return: + """ + # fixme + # =============================================优化之前============================================================ + # processed_repo_data = os.listdir(os.path.join(self.current_local_dir)) + # self.common_class.logger.info(f"processed_repos={len(processed_repo_data)}") + # + # processed_repo_data.extend(PRIVATE_REPOS) + # processed_repo_data.extend(NEED_LICENSE) + # + # self.common_class.logger.info(f"all_processed_repos={len(processed_repo_data)}") + + # all_repo_list = self.statistics_all_repos(tmp_baseos_list, tmp_other_list, tmp_epol_list, tmp_recycle_list, + # tmp_delete_list) + + # for repo_name in all_repo_list: + # # if repo_name != "xxx": # For testing + # # continue + # if repo_name in processed_repo_data: # Ignore repository + # continue + # + # self.common_class.logger.info(f"repo_name={repo_name}") + # + # self.rec_del_local_repo_dir(repo_name) + # self.del_tmp_dir(repo_name) + # full_https_url = self.https_url + repo_name + f".git" + # exe_clone_cmd = f'cd {self.current_local_dir} && git clone {full_https_url}' + # ret_boolean = self.common_class.subprocess_exe_run_cmd(exe_clone_cmd) + # + # if not ret_boolean: # cloning failure + # with open(self.failure_repo_file_path, "a+", encoding="utf-8") as clone_failure_url: + # clone_failure_url.write(full_https_url) + # clone_failure_url.write("\n") + # else: + # # Every time a repository is executed, it is deleted + # self.unzip_src_data(repo_name) + # # break # for testing + # time.sleep(0.2) + + # if os.path.exists(self.failure_repo_file_path): + # self.common_class.logger.info(f"---start retry clone---") + # try: + # self.retry_clone_failure_repos() + # except tenacity.RetryError as e: + # pass + # self.common_class.logger.info(f"---retry clone over---") + # + # self.common_class.logger.info(f"statistics success and failed repositories") + # self.statistics_success_failure_repos() + # ========================================================================================================= + + # --------------------------------------------优化之后------------------------------------------------------------- + all_repo_list = self.statistics_all_repos(tmp_baseos_list, tmp_other_list, tmp_epol_list, tmp_recycle_list, + tmp_delete_list) + self.common_class.logger.info(f"-----Start clone-----") + thread_pool = ThreadPool(WORKER_MAX) # Create a thread pool + thread_pool.map(self.thread_pool_execute, all_repo_list) # Fill the thread pool with threads + thread_pool.close() # Close the thread pool and no longer accept threads + thread_pool.join() # Wait for all threads in the thread pool to finish executing + self.common_class.logger.info(f"-----Clone complete-----") + + if os.path.exists(self.failure_repo_file_path): + self.common_class.logger.info(f"---start retry clone---") + try: + self.retry_clone_failure_repos() + except tenacity.RetryError as e: + pass + self.common_class.logger.info(f"---retry clone over---") + + self.common_class.logger.info(f"---Start processing---") + for repo_name in os.listdir(self.current_local_dir): + self.unzip_src_data(repo_name) + self.common_class.logger.info(f"---Processing complete---") + + self.common_class.logger.info(f"statistics success and failed repositories") + self.statistics_success_failure_repos() + # --------------------------------------------------------------------------------------------------------- + + def thread_pool_execute(self, repo_name): + """ + :return: + """ + processed_repo_data = os.listdir(os.path.join(self.current_local_dir)) + self.common_class.logger.info(f"processed_repos={len(processed_repo_data)}") + + processed_repo_data.extend(PRIVATE_REPOS) + processed_repo_data.extend(NEED_LICENSE) + + self.common_class.logger.info(f"all_processed_repos={len(processed_repo_data)}") + if repo_name not in processed_repo_data: # Ignore repository + self.common_class.logger.info(f"repo_name={repo_name}") + + self.rec_del_local_repo_dir(repo_name) + self.del_tmp_dir(repo_name) + full_https_url = self.https_url + repo_name + f".git" + exe_clone_cmd = f'cd {self.current_local_dir} && git clone {full_https_url}' + ret_boolean = self.common_class.subprocess_exe_run_cmd(exe_clone_cmd) + + if not ret_boolean: # cloning failure + with open(self.failure_repo_file_path, "a+", encoding="utf-8") as clone_failure_url: + clone_failure_url.write(full_https_url) + clone_failure_url.write("\n") + # else: + # # Every time a repository is executed, it is deleted + # self.unzip_src_data(repo_name) + # # break # for testing + time.sleep(0.2) + + def statistics_all_repos(self, tmp_baseos_list, tmp_other_list, tmp_epol_list, tmp_recycle_list, tmp_delete_list): + """ + return all repositories + :param tmp_baseos_list: + :param tmp_other_list: + :param tmp_epol_list: + :param tmp_recycle_list: + :param tmp_delete_list: + :return: + """ + all_repo_list = [] + all_repo_list.extend(tmp_baseos_list) + all_repo_list.extend(tmp_other_list) + all_repo_list.extend(tmp_epol_list) + all_repo_list.extend(tmp_recycle_list) + all_repo_list.extend(tmp_delete_list) + + self.common_class.logger.info(f"baseos = {len(tmp_baseos_list)}") + self.common_class.logger.info(f"other = {len(tmp_other_list)}") + self.common_class.logger.info(f"epol = {len(tmp_epol_list)}") + self.common_class.logger.info(f"recycl = {len(tmp_recycle_list)}") + self.common_class.logger.info(f"delete = {len(tmp_delete_list)}") + self.common_class.logger.info(f"all_repo_list = {len(all_repo_list)}") + + return all_repo_list + + def unzip_src_data(self, repo_name): + """ + unzip repository source data + :param repo_name: repository name + :return: + """ + # Every time a repository is executed, it is deleted + # Notes: It must be executed synchronously in sequence + self.cp_local_repo_data_to_rpm_sources(repo_name) + self.cp_local_repo_spec_file_to_rpm_specs(repo_name) + self.use_re_parse_spec_file() + self.exe_spec_file_cmd(repo_name) + self.del_rpm_spec_file() + self.del_rpm_source_all_files() + + @retry(wait=wait_fixed(WAIT_TIME), + stop=stop_after_attempt(ATTEMPT_TIMES), + # When retry_ clone_ failure_ The repos function returns the value false and will not retry + retry=retry_if_result(is_false)) + def retry_clone_failure_repos(self): + """ + Retry batch cloning failed repositories + :return: + """ + tmp_list = [] + with open(self.failure_repo_file_path, "r+", encoding="utf-8") as fr_retry_clone_url: + content_list = fr_retry_clone_url.readlines() + for retry_clone_url in content_list: + repo_name_dir = retry_clone_url.split("/")[-1].split(".git")[0] + self.rec_del_local_repo_dir(repo_name_dir) + # execute command + exe_retry_clone_cmd = f'cd {self.current_local_dir} && git clone {retry_clone_url}' + ret_boolean = self.common_class.subprocess_exe_run_cmd(exe_retry_clone_cmd) + if not ret_boolean: # clone failure + tmp_list.append(retry_clone_url) + else: + # self.rec_del_local_repo_dir(repo_name_dir) + self.del_tmp_dir(repo_name_dir) + self.unzip_src_data(repo_name_dir) + return tmp_list + + def rec_del_local_repo_dir(self, repo_name): + """ + Recursion delete local repository directory + :param repo_name: repository name + :return: + """ + repo_local_path = os.path.join(self.current_local_dir, repo_name) # repository local directory + if os.path.exists(repo_local_path): + shutil.rmtree(repo_local_path) + + def cp_local_repo_data_to_rpm_sources(self, repo_name): + """ + copy local repository data to rpmbuild sources path(e.g: /root/rpmbuild/SOURCES) + :param repo_name: local repository name + :return: + """ + # cmd: "cd /home/worker/test/xxx/xxx && cp * /root/rpmbuild/SOURCES" + local_repo_path = os.path.join(self.current_local_dir, repo_name) + exe_cp_cmd = f"cd {local_repo_path} && cp * {RPM_SOURCES_PATH}" + ret_boolean = self.common_class.subprocess_exe_run_cmd(exe_cp_cmd) + return ret_boolean + + def cp_local_repo_spec_file_to_rpm_specs(self, repo_name): + """ + Copy local repository .spec file to rpmbuild specs path(e.g: /root/rpmbuild/SPECS) + :param repo_name: local repository name + :return: Regular matching + """ + # cmd: cd /home/worker/test/xxx/xxx && cp *.spec /root/rpmbuild/SPECS + local_repo_path = os.path.join(self.current_local_dir, repo_name) + exe_cp_cmd = f"cd {local_repo_path} && cp *.spec {RPM_SPECS_PATH}" + ret_boolean = self.common_class.subprocess_exe_run_cmd(exe_cp_cmd) + return ret_boolean + + def exe_spec_file_cmd(self, repo_name): + """ + Execute .spec file command + :param repo_name: repository name + :return: + """ + + if os.listdir(RPM_SPECS_PATH): + spec_file = os.listdir(RPM_SPECS_PATH)[0] + exe_rpmbuild_cmd = f"cd {RPM_SPECS_PATH} && rpmbuild -bp {spec_file}" + + # ret_code = self.common_class.subprocess_exe_run_cmd(exe_rpmbuild_cmd, flag=False) + ret_code = self.common_class.os_system_cmd(exe_rpmbuild_cmd) + if ret_code != 0: # execute rpmbuild -bp command failure + self.common_class.logger.error(f"{exe_rpmbuild_cmd}: spec_file={spec_file}") + self.cp_failed_repo(repo_name) + self.del_spec_file(spec_file) + self.cp_failed_spec_file(spec_file) + + def cp_failed_repo(self, repo_name): + """ + copy failed repository to bp_cmd_failed_repos directory + :param repo_name: + :return: + """ + current_local_dir = os.path.join(self.current_local_dir, repo_name) + + bp_cmd_failure_dir = os.path.join(self.bp_cmd_exec_failure_dir, repo_name) + + if os.path.exists(bp_cmd_failure_dir): + shutil.rmtree(bp_cmd_failure_dir) + else: + os.mkdir(bp_cmd_failure_dir) + + exe_cp_cmd = f"cd {current_local_dir} && cp * {bp_cmd_failure_dir}" + ret_boolean = self.common_class.subprocess_exe_run_cmd(exe_cp_cmd) + if ret_boolean: + self.common_class.logger.info(f"{exe_cp_cmd}--is ok") + + def cp_failed_spec_file(self, spec_file): + """ + Copy execute failed spec file + :param spec_file: .spec file + :return: + """ + rpm_tmp_file_path = os.path.join(self.tmp_spec_file_dir, spec_file) + if os.path.exists(rpm_tmp_file_path): + os.remove(rpm_tmp_file_path) + exe_cp_cmd = f"cd {RPM_SPECS_PATH} && cp {spec_file} {self.tmp_spec_file_dir}" + ret_boolean = self.common_class.subprocess_exe_run_cmd(exe_cp_cmd) + if ret_boolean: + self.common_class.logger.info(f"{exe_cp_cmd}--is ok") + + def use_re_parse_spec_file(self): + """ + Use Regular parse .spec file + Notes: openeuler system unwanted + :return: + """ + if os.listdir(RPM_SPECS_PATH): + spec_file = os.listdir(RPM_SPECS_PATH)[0] + spec_file_path = os.path.join(RPM_SPECS_PATH) + file_name = os.path.join(spec_file_path, spec_file) + fr_lines = open(file_name, "r+", encoding="utf-8").readlines() + with open(file_name, "w+", encoding="utf-8") as fw_spec_file: + for row_content in fr_lines: + if row_content.startswith("BuildRequires:"): + fw_spec_file.write(row_content.replace("BuildRequires:", "#BuildRequires:")) + + if row_content.startswith("Obsoletes:") and "javadoc" not in row_content: + fw_spec_file.write(row_content.replace( + "Obsoletes: %{name}-javadoc", + "Obsoletes: %{name}-javadoc < %{version}-%{release}")) + + # %files help + if row_content.startswith("%files help"): + fw_spec_file.write(row_content.replace("%files help", "%files")) + + else: + fw_spec_file.write(row_content) + self.common_class.logger.info(f"execute file name=:{spec_file}") + + @staticmethod + def del_rpm_spec_file(): + """ + delete .spec file + :return: + """ + spec_file_lst = os.listdir(RPM_SPECS_PATH) + if spec_file_lst: + spec_file = spec_file_lst[0] + os.remove(os.path.join(RPM_SPECS_PATH, spec_file)) + return True + + @staticmethod + def del_rpm_source_all_files(): + """ + delete rpmbuild/SOURCES/ all files + :return: + """ + del_list = os.listdir(RPM_SOURCES_PATH) + for f in del_list: + file_path = os.path.join(RPM_SOURCES_PATH, f) + if os.path.isfile(file_path): + os.remove(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) + + def del_tmp_dir(self, repo_name): + """ + delete temporary directory + :param repo_name: repository name + :return: + """ + # temporary repository local directory + tmp_repo_local_path = os.path.join(self.bp_cmd_exec_failure_dir, repo_name) + if os.path.exists(tmp_repo_local_path): + shutil.rmtree(tmp_repo_local_path) + + def del_spec_file(self, spec_file): + """ + :param spec_file: .spec file + :return: + """ + tmp_spec_file_dir_path = os.path.join(self.tmp_spec_file_dir, spec_file) # temporary spec file local directory + if os.path.exists(tmp_spec_file_dir_path): + os.remove(tmp_spec_file_dir_path) + + def statistics_success_failure_repos(self): + """ + Count the number of repository successes and failures + :return: + """ + # fixme + success_rpm_build_path = os.path.join(RPM_BUILD_PATH) + # success_rpm_build_list = os.listdir(os.path.join(RPM_BUILD_PATH)) + success_rpm_build_list = self.calculate_directories_number(success_rpm_build_path) + self.common_class.logger.info(f"success_rpm_build_list count = {len(success_rpm_build_list)}") + self.common_class.logger.info(f"success_rpm_build_list data = {success_rpm_build_list}") + + bp_cmd_exec_failure_list = os.listdir(os.path.join(self.bp_cmd_exec_failure_dir)) + self.common_class.logger.info(f"bp_cmd_exec_failure_list count = {len(bp_cmd_exec_failure_list)}") + self.common_class.logger.info(f"bp_cmd_exec_failure_list data = {bp_cmd_exec_failure_list}") + + tmp_data_list = list( + set([i for i in bp_cmd_exec_failure_list for j in success_rpm_build_list if + j.startswith(str(i) + "-") and str(i) in str(j)])) + self.common_class.logger.info(f"tmp_data_list count = {len(tmp_data_list)}") + self.common_class.logger.info(f"tmp_data_list data = {tmp_data_list}") + + total_failure_repo_list = [i for i in bp_cmd_exec_failure_list if i not in tmp_data_list] + + self.common_class.logger.info(f"total_failure_repo_list count = {len(total_failure_repo_list)}") + self.common_class.logger.info(f"total_failure_repo_list data = {total_failure_repo_list}") + + @staticmethod + def calculate_directories_number(dir_name): + """ + Calculate the number of directories + :param dir_name: the number of directories + :return: + """ + tmp_list = [] + build_tmp_list = os.listdir(dir_name) + for d in build_tmp_list: + file_path = os.path.join(dir_name, d) + if os.path.isdir(file_path): + tmp_list.append(d) + return tmp_list + + def cp_build_dir_to_tmp_dir(self, src, dst): + """ + Data synchronization to local + :param src: + :param dst: + :return: + """ + pass diff --git a/ci/tools/anti_attack_system/main.py b/ci/tools/anti_attack_system/main.py new file mode 100644 index 0000000000000000000000000000000000000000..805d873ac379e4ec08b281d39acf70ad79c34b2d --- /dev/null +++ b/ci/tools/anti_attack_system/main.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2022/1/20 10:17 +# @Author : zWX1133887 +# @File : main.py +# @Software: PyCharm +# Description: Main enter +import os +import sys + +cur_path = os.path.abspath(os.path.dirname(__file__)) +root_path = os.path.split(cur_path)[0] +sys.path.append(root_path) + +from anti_attack_system.handle_logic.common import CommonClass +from anti_attack_system.handle_logic.constant import CLONE_URL, YAML_FILE_NAME, HTTPS_URL +from anti_attack_system.handle_logic.repo_src_data_decomp import RepoSrcDataDeco + + +def main(): + args = CommonClass.get_cmd_params() + version_number = args.version_number + + repository_name = CLONE_URL.split("/")[-1].split(".git")[0] + + repo_src_data_deco = RepoSrcDataDeco(CLONE_URL, repository_name, version_number, YAML_FILE_NAME, HTTPS_URL) + + repo_src_data_deco.get_release_management_yaml_data() + + repo_src_data_deco.del_failure_clone_file() + + baseos_generator, other_generator, epol_generator, recycle_generator, delete_generator = \ + repo_src_data_deco.get_repository_name_from_yaml_file() + + repo_src_data_deco.clone_repo(list(baseos_generator), list(other_generator), list(epol_generator), + list(recycle_generator), list(delete_generator)) + + +if __name__ == '__main__': + main() diff --git a/ci/tools/anti_attack_system/requirements.txt b/ci/tools/anti_attack_system/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..d650131db5a575b9521aecb570247199447eb1bd Binary files /dev/null and b/ci/tools/anti_attack_system/requirements.txt differ diff --git a/ci/tools/batch_clone_repos/batch_clone_repos.py b/ci/tools/batch_clone_repos/batch_clone_repos.py new file mode 100644 index 0000000000000000000000000000000000000000..85cd0c30c83375c5178815fec7f287edd3074e97 --- /dev/null +++ b/ci/tools/batch_clone_repos/batch_clone_repos.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2022/1/10 11:34 +# @Author : zWX1133887 +# @File : batch_clone_repos.py +# @Software: PyCharm +# Description: Batch clone all repositories of the corresponding organization + + +import argparse +import os +import shutil +import subprocess +import time +from concurrent.futures import ThreadPoolExecutor + +import requests +import tenacity +from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_result + +repo_type = "all" # Filter the type of repository, which can be all, public or private. Default: all +worker_count = 12 # Thread pool with maximum capacity of 12 +# URL file name format used to record the repository of cloning failure: "organization name - repos-failure-url.txt" +failure_file_name = f"-repos-failure-url.txt" +wait_time = 3 # waiting time +attempt_times = 7 # retry count + + +def get_cmd_params(): + """ + Get command line parameters + notes: required=True Required + :return: args object + """ + parser = argparse.ArgumentParser(f"cmd param") + parser.add_argument('-a', '--access_token', type=str, required=True) # access token + parser.add_argument('-o', '--org_name', type=str, required=True) # organization name + + return parser.parse_args() + + +def get_repos_total_page(access_token, org_name): + """ + Get the total number of repositories pages under the organization + :param access_token: access_token + :param org_name: organization name + :return: list + """ + get_repos_total_page_url = f"https://gitee.com/api/v5/orgs/{org_name}/repos?access_token" \ + f"={access_token}&type={repo_type}" + + response = requests.get(get_repos_total_page_url) + response_code = response.status_code + + if int(response_code) == 200: + response_headers = response.headers + total_page = response_headers.get("total_page") # total page + return [total_page] + else: + return [] + + +def batch_clone_repos_data(ret_data, org_name_dir, failure_file_path, org_name, access_token): + """ + Batch clone all repositories data under the corresponding organization of gitee + :param ret_data: total page + :param org_name_dir: organization name local directory + :param failure_file_path: clone failed file path + :param org_name: organization name + :param access_token: access token + :return: + """ + thread_executor = ThreadPoolExecutor(max_workers=worker_count) # Thread pool with maximum capacity of 12 + total_page = int(ret_data[0]) + for page in range(1, total_page + 1): + url_path = f"https://gitee.com/api/v5/orgs/{org_name}/repos?access_token" \ + f"={access_token}&type={repo_type}&page={page}" + try: + response_body = requests.get(f"{url_path}").json() + for item in response_body: + https_url = item.get("html_url") # repository address: https://gitee.com/xxx/xxx.git + if not https_url: + continue + try: + thread_executor.submit(gitee_clone_executor, https_url, org_name_dir, failure_file_path) + except Exception as e: + continue + except Exception as e: + continue + thread_executor.shutdown(wait=True) + + +def create_dir_and_del_file(org_name): + """ + Create organization name, local directory and delete files + :param org_name: organization name + :return: + """ + current_dir = os.getcwd() # current directory + org_name_dir = os.path.join(current_dir, org_name) # organization name local directory + if not os.path.exists(org_name_dir): + os.mkdir(org_name_dir) + + failure_file_path = os.path.join(current_dir, org_name + failure_file_name) + if os.path.exists(failure_file_path): + os.remove(failure_file_path) + return org_name_dir, failure_file_path + + +def del_local_repo_dir(clone_url, org_name_dir): + """ + Delete local repository directory + :param clone_url: + :param org_name_dir: + :return: + """ + # clone_url = "https://gitee.com/xxx/xxx.git" + repo_name_dir = clone_url.split("/")[-1].split(".git")[0] # repository name directory + repo_local_path = os.path.join(org_name_dir, repo_name_dir) # repository local directory + if os.path.exists(repo_local_path): + shutil.rmtree(repo_local_path) + + +def gitee_clone_executor(clone_url, org_name_dir, failure_file_path): + """ + clone repository + :param clone_url: repository address + :param org_name_dir: organization name local directory + :param failure_file_path: repository storage path where cloning failed + :return: + """ + del_local_repo_dir(clone_url, org_name_dir) + # execute command + exe_clone_cmd = f'cd {org_name_dir} && git clone {clone_url}' + ret_code = subprocess.run(exe_clone_cmd, shell=True, check=False) + if ret_code.returncode != 0: # cloning failure + with open(failure_file_path, "a+", encoding="utf-8") as clone_failure_url: + clone_failure_url.write(clone_url) + clone_failure_url.write("\n") + time.sleep(1) + + +def is_false(failure_repos_list): + return True if failure_repos_list else False # True retry, otherwise do not retry + + +@retry(wait=wait_fixed(wait_time), + stop=stop_after_attempt(attempt_times), + # When retry_ clone_ failure_ The repos function returns the value false and will not retry + retry=retry_if_result(is_false)) +def retry_clone_failure_repos(failure_file_path, org_name_dir): + """ + Retry batch cloning failed repositories + :param failure_file_path: repository file path where cloning failed + :param org_name_dir: organization name local directory + :return: + """ + tmp_list = [] + with open(failure_file_path, "r+", encoding="utf-8") as fr_retry_clone_url: + content_list = fr_retry_clone_url.readlines() + for retry_clone_url in content_list: + del_local_repo_dir(retry_clone_url, org_name_dir) + # execute command + exe_retry_clone_cmd = f'cd {org_name_dir} && git clone {retry_clone_url}' + ret_code = subprocess.run(exe_retry_clone_cmd, shell=True, check=False) + if ret_code.returncode != 0: # clone failure + tmp_list.append(retry_clone_url) + return tmp_list + + +def main(): + """ + :return: + """ + args = get_cmd_params() + access_token = args.access_token # access token + org_name = args.org_name # organization name + + # Get the total number of repositories pages under the organization + ret_data = get_repos_total_page(access_token, org_name) + if not ret_data: + print(f"Please check whether the access token or organization name is correct: {org_name}") + return + + org_name_dir, failure_file_path = create_dir_and_del_file(org_name) + + print(f"---batch clone start---") + batch_clone_repos_data(ret_data, org_name_dir, failure_file_path, org_name, access_token) + print(f"---batch clone over---") + + # Retry batch cloning failed repositories + if os.path.exists(failure_file_path): + print(f"---start retry clone---") + try: + retry_clone_failure_repos(failure_file_path, org_name_dir) + except tenacity.RetryError as e: + pass + print(f"---retry clone over---") + + +if __name__ == '__main__': + # operation mode: + # 1、python3 batch_clone_repos.py -a [access token] -o [organization name] + # 2、nohup python3 batch_clone_repos.py -a [access token] -o [organization name] >xxx.log 2>&1 & + main() diff --git a/ci/tools/batch_clone_repos/requirements.txt b/ci/tools/batch_clone_repos/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..01b9f4a02cb2c0c995a8520f37512673b1dae0c9 Binary files /dev/null and b/ci/tools/batch_clone_repos/requirements.txt differ diff --git a/ci/tools/code_statistics_tool/README.en.md b/ci/tools/code_statistics_tool/README.en.md new file mode 100644 index 0000000000000000000000000000000000000000..236621c148c28ad3810d8962665d728ac1519702 --- /dev/null +++ b/ci/tools/code_statistics_tool/README.en.md @@ -0,0 +1,36 @@ +# zhu_it + +#### Description +{**When you're done, you can delete the content in this README and update the file with details for others getting started with your repository**} + +#### Software Architecture +Software architecture description + +#### Installation + +1. xxxx +2. xxxx +3. xxxx + +#### Instructions + +1. xxxx +2. xxxx +3. xxxx + +#### Contribution + +1. Fork the repository +2. Create Feat_xxx branch +3. Commit your code +4. Create Pull Request + + +#### Gitee Feature + +1. You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md +2. Gitee blog [blog.gitee.com](https://blog.gitee.com) +3. Explore open source project [https://gitee.com/explore](https://gitee.com/explore) +4. The most valuable open source project [GVP](https://gitee.com/gvp) +5. The manual of Gitee [https://gitee.com/help](https://gitee.com/help) +6. The most popular members [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/ci/tools/code_statistics_tool/README.md b/ci/tools/code_statistics_tool/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ac5f539bbb19665c9998cde702a37d4df263846f --- /dev/null +++ b/ci/tools/code_statistics_tool/README.md @@ -0,0 +1,114 @@ +# zhu_it + +#### 介绍 + +1.代码批量克隆 + +2.代码统计 + + ①.使用cloc工具统计,需要安装cloc工具; + + ②.使用git工具统计,主要是根据时间统计仓库贡献者的代码数, 默认统计时间为一个月,需要安装git工具。 + +#### 软件架构 +软件架构说明 + + +#### cloc工具安装及使用说明 +1.下载 cloc-1.92.tar.gz(已下载,直接解压安装即可) + +2.解压 + + tar zxvf cloc-1.92.tar.gz + +3.拷贝cloc-1.90目录到 /usr/local/src/ + + cp -r cloc-1.92 /usr/local/src/ + +4.配置环境变量 + + vi /etc/profile + + export PATH=$PATH:/usr/local/src/cloc-1.92 + +5.生效配置文件 + + source /etc/profile + +6.统计代码(控制台输出) + + cloc 目录名称 + +7.输出统计报告(推荐) + + cloc 目录名称 --timeout=0 --csv --report-file=my_code_statistics_report.csv --quiet # 写入到csv 文件 + +#### 使用说明 + +运行方式 1: + +版本维度统计:克隆src-openeuler指定版本代码并统计仓库代码的行数 + +参数说明: + +-v: 版本号,从1开始,默认为1,代表最新版本 + +cmd: python3 main.py -v [version:default=1] + +e.g: python3 main.py + +运行方式 2: + +仓库维度统计:克隆仓库代码并统计仓库代码的行数 + +参数说明: + +-r: 仓库名称 + +-e: 截止时间 + +-i: 间隔时间(单位天) + +-f: 标识 + +cmd: python3 main.py -r [repository name] -e [end time:默认为当前日期] -i [interval time:默认为一个月] f [flag] + +e.g: python3 main.py -r openEuler/infrastructure -e 2022-02-23 -i 7 -f 3 + +e.g: python3 main.py -r openEuler/infrastructure -f 3 + + + +运行方式 3: + +组织维度统计:批量克隆组织代码并统计组织代码的行数 + +参数说明: + +-a: 访问令牌 + +-o: 组织名称 + +-f: 标识 + +cmd: python3 main.py -a [access_token] -o [organization name] -f [flag] + +e.g: python3 main.py -a 430ce73b1636a64b -o openeuler -f 2 + + +#### 参与贡献 + +1. Fork 本仓库 +2. 新建 Feat_xxx 分支 +3. 提交代码 +4. 新建 Pull Request + + +#### 特技 + +1. 使用 Readme\_XXX.md 来支持不同的语言,例如 Readme\_en.md, Readme\_zh.md +2. Gitee 官方博客 [blog.gitee.com](https://blog.gitee.com) +3. 你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解 Gitee 上的优秀开源项目 +4. [GVP](https://gitee.com/gvp) 全称是 Gitee 最有价值开源项目,是综合评定出的优秀开源项目 +5. Gitee 官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help) +6. Gitee 封面人物是一档用来展示 Gitee 会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/ci/tools/code_statistics_tool/business_processing/business_method_call.py b/ci/tools/code_statistics_tool/business_processing/business_method_call.py new file mode 100644 index 0000000000000000000000000000000000000000..9942d5f5a681096732a551f825d3b4381df611fa --- /dev/null +++ b/ci/tools/code_statistics_tool/business_processing/business_method_call.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2022/2/17 11:14 +# @Author : zWX1133887 +# @File : business_method_call.py +# @Software: PyCharm +# Description: business methods call +import os + +import tenacity + +from code_statistics_tool.business_processing.common import CommonOrg + + +def business_methods_call(access_token, org_name, repo_name, start_time, end_time, repository_name, version, flag): + com_org = CommonOrg(access_token, org_name, start_time, end_time, repository_name, version) + + if (version and version >= 1) and flag == 1: + com_org.get_release_management_yaml_data() + + baseos_generator, other_generator, epol_generator, recycle_generator, delete_generator = \ + com_org.get_repository_name_from_yaml_file() + + com_org.clone_repo(list(baseos_generator), list(other_generator), list(epol_generator), + list(recycle_generator), list(delete_generator)) + + elif all([access_token, org_name]) and flag == 2: + org_name_dir = batch_exec(com_org, org_name) + if org_name_dir: + com_org.use_cloc_tool_sta_org(org_name_dir) + com_org.use_git_tool_statistics_org(org_name_dir) + + elif repo_name and flag == 3: + # https://gitee.com/openeuler/infrastructure.git + https_url = f"https://gitee.com/{repo_name}.git" + com_org.clone_repo_and_statistics(https_url, repo_name) + + else: + com_org.my_logger.info(f"Please enter access token and organization" + f" name or repository name and flag:" + f"organization={org_name}, repository={repo_name}, flag={flag}") + return + + +def batch_exec(com_org, org_name): + ret_data = com_org.get_org_total_page() + if not ret_data: + com_org.my_logger.info(f"Please check whether the access token or organization name is correct: {org_name}") + return + org_name_dir, failure_file_path = com_org.create_dir_and_del_file() + + com_org.my_logger.info(f"---batch clone start---") + com_org.batch_clone_repos_data(ret_data, org_name_dir, failure_file_path) + com_org.my_logger.info(f"---batch clone over---") + + # Retry batch cloning failed repositories + if os.path.exists(failure_file_path): + com_org.my_logger.info(f"---start retry clone---") + try: + com_org.retry_clone_failure_repos(failure_file_path, org_name_dir) + except tenacity.RetryError as e: + pass + com_org.my_logger.info(f"---retry clone over---") + return org_name_dir diff --git a/ci/tools/code_statistics_tool/business_processing/common.py b/ci/tools/code_statistics_tool/business_processing/common.py new file mode 100644 index 0000000000000000000000000000000000000000..951864b9e354535c4c00bfa3551cbcb8ad00df1e --- /dev/null +++ b/ci/tools/code_statistics_tool/business_processing/common.py @@ -0,0 +1,548 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2022/2/17 10:53 +# @Author : zWX1133887 +# @File : common.py +# @Software: PyCharm +# Description: Common class +import csv +import os +import shutil +import subprocess +import time +from concurrent.futures import ThreadPoolExecutor +from multiprocessing.pool import ThreadPool +from pathlib import Path + +import requests +import yaml +from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_result + +from code_statistics_tool.constant import REPO_TYPE, FAILED_REPOS_NAME, WORKER_MAX, WAIT_TIME, ATTEMPT_TIMES, CUR_PATH, \ + CLOC_TOOL_REPORT_NAME, GIT_SHELL_CMD_FILE, GIT_STATISTICS_RESULT_FILE, GIT_STATISTICS_RESULT_CSV_FILE, ALL_REPOS, \ + RETRY_COUNT, VERSION_DIR_NAME_PREFIX, YAML_FILE_NAME, HTTPS_URL, CLONE_URL, CLONE_FAILED_REPOS, MANUAL_CONFIRM +from code_statistics_tool.custom_log import my_logging + + +def is_false(failure_repos_list): + return True if failure_repos_list else False # True retry, otherwise do not retry + + +class CommonOrg: + def __init__(self, access_token, org_name, start_time, end_time, + repository_name, version): + self.access_token = access_token + self.org_name = org_name + self.my_logger = my_logging() + self.start_time = start_time + self.end_time = end_time + + self.current_path = CUR_PATH + self.version = version - 1 + self.yaml_file_name = YAML_FILE_NAME + self.https_url = HTTPS_URL + self.clone_yaml_url = CLONE_URL + self.repo_local_path = os.path.join(self.current_path, repository_name) + + def all_repos_store_dir(self): + """ + :return: + """ + all_repo_path = os.path.join(self.current_path, ALL_REPOS) + if not os.path.exists(all_repo_path): + os.mkdir(all_repo_path) + return all_repo_path + + def get_release_management_yaml_data(self): + """ + Clone repository yaml file + :return: + """ + exe_clone_cmd = f"cd {self.current_path} && git clone {self.clone_yaml_url}" + for i in range(RETRY_COUNT): + self.del_local_repo_dir2() + ret_boolean = self.subprocess_exe_run_cmd(exe_clone_cmd) + self.my_logger.info(f"Clone repository yaml file---{i}---times") + if not ret_boolean: # cloning failure + if i == RETRY_COUNT - 1: + self.my_logger.info(f"Please restart, It has been tried {i} times and failed") + time.sleep(0.2) + continue + else: + break + + def del_local_repo_dir2(self): + """ + delete local repository directory + :return: + """ + if os.path.exists(self.repo_local_path): + shutil.rmtree(self.repo_local_path) + + def get_repository_name_from_yaml_file(self): + """ + Get repository name from yaml file + :return: repo_name_generator: generator object + """ + all_version_file_list = [] + version_directories = [directory for directory in os.listdir(self.repo_local_path) if + directory.startswith(VERSION_DIR_NAME_PREFIX)] + + version_directories.sort(reverse=True) + for version_directory in version_directories: + pckg_mgmt_yaml_file_path = os.path.join(self.repo_local_path, version_directory, self.yaml_file_name) + if os.path.exists(pckg_mgmt_yaml_file_path): + all_version_file_list.append({version_directory: self.yaml_file_name}) + + if len(all_version_file_list) < self.version: + self.my_logger.info(f"The version does not exist: {self.version}") + return + latest_version = all_version_file_list[self.version] # 0: version number + for version, yaml_file in latest_version.items(): + with open(os.path.join(self.repo_local_path, version, yaml_file), "r+", encoding="utf-8") as fr: + for y in yaml.load_all(fr, Loader=yaml.FullLoader): + # todo:The file structure of each release may be different, so it needs to be changed + packages = y.get("packages").get("natural") + if not packages: + packages = y.get("packages").get("everything").get("baseos") # baseos 基础iso所包含的rpm包设计到的所有软件包 + other_repos = y.get("packages").get("everything").get("other") # other 全量iso除了iso外的软件包 + epol_repos = y.get("packages").get("epol") # epol epol的repo源里rpm包涉及的所有软件包 + recycle_repos = y.get("packages").get("recycle") # recycle 判定衰退的软件包 + delete_repos = y.get("packages").get("delete") # delete 被删除的软件包 + + other_generator = (other.get("name") for other in other_repos) if other_repos else [] + epol_generator = (epol.get("name") for epol in epol_repos) if epol_repos else [] + recycle_generator = (recycle.get("name") for recycle in + recycle_repos) if recycle_repos else [] + delete_generator = (delete.get("name") for delete in delete_repos) if delete_repos else [] + + baseos_generator = (package.get("name") for package in packages) if packages else [] + + return baseos_generator, other_generator, epol_generator, recycle_generator, delete_generator + + def clone_repo(self, tmp_baseos_list, tmp_other_list, tmp_epol_list, tmp_recycle_list, tmp_delete_list): + """ + clone repository + :param tmp_baseos_list: list + :param tmp_other_list: list + :param tmp_epol_list: list + :param tmp_recycle_list: list + :param tmp_delete_list: list + :return: + """ + # fixme + all_repos_path = self.all_repos_store_dir() + + clone_failed_repo_path = os.path.join(self.current_path, CLONE_FAILED_REPOS) + if os.path.exists(clone_failed_repo_path): + os.remove(clone_failed_repo_path) + + all_repo_list = self.statistics_all_repos(tmp_baseos_list, tmp_other_list, tmp_epol_list, tmp_recycle_list, + tmp_delete_list) + + self.my_logger.info(f"-----Start clone-----") + thread_pool = ThreadPool(WORKER_MAX) # Create a thread pool + thread_pool.map(self.thread_pool_execute, all_repo_list) # Fill the thread pool with threads + thread_pool.close() # Close the thread pool and no longer accept threads + thread_pool.join() # Wait for all threads in the thread pool to finish executing + + self.my_logger.info(f"-----Enable retry mechanism-----") + + if os.path.exists(clone_failed_repo_path): + self.my_logger.info(f"-----Start retry-----") + self.retry_clone_failure_repos(clone_failed_repo_path, all_repos_path) + self.my_logger.info(f"-----Retry complete-----") + + self.my_logger.info(f"-----Start statistics-----") + self.use_cloc_tool_sta_src_all_repos(all_repos_path) + self.my_logger.info(f"-----Statistics complete-----") + + def use_cloc_tool_sta_src_all_repos(self, all_repo_path): + """ + Use the cloc tool to make statistics + :param all_repo_path: local directory + :return: + """ + current_path = Path(all_repo_path).parent + + self.my_logger.info(f"current_path={current_path}") + cloc_exec_cmd = f"cd {current_path} && cloc {ALL_REPOS} --timeout=0 --csv --report-file=" \ + f"{ALL_REPOS}-{CLOC_TOOL_REPORT_NAME} --quiet" + self.my_logger.info(f"cloc_exec_cmd={cloc_exec_cmd}") + self.my_logger.info(f"Start using the cloc tool to count code") + + ret_boolean = self.subprocess_exe_run_cmd(cloc_exec_cmd) + if ret_boolean: + self.my_logger.info(f"The cloc tool counts the {ALL_REPOS} code successfully:{ALL_REPOS}") + + def thread_pool_execute(self, repo_name): + """ + + :return: + """ + all_repos_path = os.path.join(self.current_path, ALL_REPOS) + if (repo_name not in MANUAL_CONFIRM) and (repo_name not in os.listdir(all_repos_path)): + clone_failed_repo_path = os.path.join(self.current_path, CLONE_FAILED_REPOS) + + self.rec_del_local_repo_dir(repo_name) + full_https_url = self.https_url + repo_name + f".git" + + exe_clone_cmd = f'cd {all_repos_path} && git clone {full_https_url}' + self.my_logger.info(f"exe_clone_cmd={exe_clone_cmd}") + self.my_logger.info(f"repo_name={repo_name}") + ret_boolean = self.subprocess_exe_run_cmd(exe_clone_cmd) + if not ret_boolean: # cloning failure + with open(clone_failed_repo_path, "a+", encoding="utf-8") as clone_failed_repo_url: + clone_failed_repo_url.write(full_https_url) + clone_failed_repo_url.write("\n") + + time.sleep(0.2) + + def rec_del_local_repo_dir(self, repo_name): + """ + Recursion delete local repository directory + :param repo_name: repository name + :return: + """ + repo_local_path = os.path.join(self.current_path, ALL_REPOS, repo_name) # repository local directory + if os.path.exists(repo_local_path): + shutil.rmtree(repo_local_path) + + def statistics_all_repos(self, tmp_baseos_list, tmp_other_list, tmp_epol_list, tmp_recycle_list, tmp_delete_list): + """ + return all repositories + :param tmp_baseos_list: + :param tmp_other_list: + :param tmp_epol_list: + :param tmp_recycle_list: + :param tmp_delete_list: + :return: + """ + all_repo_list = [] + all_repo_list.extend(tmp_baseos_list) + all_repo_list.extend(tmp_other_list) + all_repo_list.extend(tmp_epol_list) + all_repo_list.extend(tmp_recycle_list) + all_repo_list.extend(tmp_delete_list) + + self.my_logger.info(f"baseos = {len(tmp_baseos_list)}") + self.my_logger.info(f"other = {len(tmp_other_list)}") + self.my_logger.info(f"epol = {len(tmp_epol_list)}") + self.my_logger.info(f"recycl = {len(tmp_recycle_list)}") + self.my_logger.info(f"delete = {len(tmp_delete_list)}") + self.my_logger.info(f"all_repo_list = {len(all_repo_list)}") + + return all_repo_list + + def get_org_total_page(self): + """ + Get the total number of repositories pages under the organization + :return: list + """ + get_repos_total_page_url = f"https://gitee.com/api/v5/orgs/{self.org_name}/repos?access_token" \ + f"={self.access_token}&type={REPO_TYPE}" + + response = requests.get(get_repos_total_page_url) + response_code = response.status_code + + if int(response_code) == 200: + response_headers = response.headers + total_page = response_headers.get("total_page") # total page + return [total_page] + else: + return [] + + def create_dir_and_del_file(self): + """ + Create organization name, local directory and delete files + :param org_name: organization name + :return: + """ + current_dir = os.getcwd() # current directory + org_name_dir = os.path.join(current_dir, self.org_name) # organization name local directory + self.my_logger.info(f"org_name_dir={org_name_dir}") + if not os.path.exists(org_name_dir): + os.mkdir(org_name_dir) + + failure_file_path = os.path.join(current_dir, self.org_name + "-" + FAILED_REPOS_NAME) + if os.path.exists(failure_file_path): + os.remove(failure_file_path) + return org_name_dir, failure_file_path + + def batch_clone_repos_data(self, ret_data, org_name_dir, failure_file_path): + """ + Batch clone all repositories data under the corresponding organization of gitee + :param ret_data: total page + :param org_name_dir: organization name local directory + :param failure_file_path: clone failed file path + :return: + """ + thread_executor = ThreadPoolExecutor(max_workers=WORKER_MAX) # Thread pool with maximum capacity of 12 + total_page = int(ret_data[0]) + for page in range(1, total_page + 1): + url_path = f"https://gitee.com/api/v5/orgs/{self.org_name}/repos?access_token" \ + f"={self.access_token}&type={REPO_TYPE}&page={page}" + try: + response_body = requests.get(f"{url_path}").json() + for item in response_body: + https_url = item.get("html_url") # repository address: https://gitee.com/xxx/xxx.git + if not https_url: + continue + try: + thread_executor.submit(self.gitee_clone_executor, https_url, org_name_dir, failure_file_path) + except Exception as e: + continue + except Exception as e: + continue + thread_executor.shutdown(wait=True) + + def gitee_clone_executor(self, clone_url, org_name_dir, failure_file_path): + """ + clone repository + :param clone_url: repository address + :param org_name_dir: organization name local directory + :param failure_file_path: repository storage path where cloning failed + :return: + """ + self.del_local_repo_dir(clone_url, org_name_dir) + # execute command + exe_clone_cmd = f'cd {org_name_dir} && git clone {clone_url}' + ret_boolean = self.subprocess_exe_run_cmd(exe_clone_cmd) + if not ret_boolean: # cloning failure + with open(failure_file_path, "a+", encoding="utf-8") as clone_failure_url: + clone_failure_url.write(clone_url) + clone_failure_url.write("\n") + time.sleep(0.2) + + def subprocess_exe_run_cmd(self, exe_cmd, file_path=None): + """ + execute command + :param exe_cmd: execute command + :param file_path: file path + :param repo_name: repo_name + :return: + """ + # windows: encoding='gb2312' + # Linux: encoding='utf-8' + try: + ret_val = subprocess.Popen(exe_cmd, shell=True, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, encoding='utf-8') + + ret_data, _ = ret_val.communicate() + ret_code = ret_val.returncode + + if ret_code != 0: + return False + else: + if file_path: # write file + self.my_logger.info(f"ret_data={ret_data}") + with open(file_path, "a+", encoding="utf-8", newline="") as git_log_sta_code_file: + git_log_sta_code_file.write(ret_data) + git_log_sta_code_file.write("\n") + + return True + except Exception as e: + self.my_logger.error(f"subprocess_exe_run_cmd except: {e}") + return False + + def del_local_repo_dir(self, clone_url, org_name_dir): + """ + Delete local repository directory + :param clone_url: + :param org_name_dir: + :return: + """ + # clone_url = "https://gitee.com/xxx/xxx.git" + repo_name_dir = clone_url.split("/")[-1].split(".git")[0] # repository name directory + repo_local_path = os.path.join(org_name_dir, repo_name_dir) # repository local directory + if os.path.exists(repo_local_path): + shutil.rmtree(repo_local_path) + + @retry(wait=wait_fixed(WAIT_TIME), + stop=stop_after_attempt(ATTEMPT_TIMES), + # When retry_ clone_ failure_ The repos function returns the value false and will not retry + retry=retry_if_result(is_false)) + def retry_clone_failure_repos(self, failure_file_path, org_name_dir): + """ + Retry batch cloning failed repositories + :param failure_file_path: repository file path where cloning failed + :param org_name_dir: organization name local directory + :return: + """ + tmp_list = [] + with open(failure_file_path, "r+", encoding="utf-8") as fr_retry_clone_url: + content_list = fr_retry_clone_url.readlines() + for retry_clone_url in content_list: + self.del_local_repo_dir(retry_clone_url, org_name_dir) + # execute command + exe_retry_clone_cmd = f'cd {org_name_dir} && git clone {retry_clone_url}' + ret_boolean = self.subprocess_exe_run_cmd(exe_retry_clone_cmd) + if not ret_boolean: # clone failure + tmp_list.append(retry_clone_url) + return tmp_list + + def clone_repo_and_statistics(self, https_url, repo_name): + """ + clone repository and use cloc tool statistics + :param https_url: https url + :param repo_name: repository name + :return: + """ + tmp_list = repo_name.split("/") + if len(tmp_list) < 2: + self.my_logger.info(f"Please check whether the repository is correct " + f"or the repository does not exist: {repo_name}") + return + + org = tmp_list[0] # organization + repo = tmp_list[1] + if repo: + repo_local_path = os.path.join(CUR_PATH, repo) + if os.path.exists(repo_local_path): + shutil.rmtree(repo_local_path) + + exe_clone_cmd = f"cd {CUR_PATH} && git clone {https_url}" + self.my_logger.info(f"exe_clone_cmd={exe_clone_cmd}") + ret_boolean = self.subprocess_exe_run_cmd(exe_clone_cmd) + + if ret_boolean: + self.use_cloc_tool_sta_repo(repo) + self.use_git_tool_sta_repo(repo_local_path, repo) + + def use_git_tool_sta_repo(self, repo_local_path, repo): + """ + use git tool statistics repository + :param repo_local_path: repository local path + :param repo: repository name + :return: + """ + tmp_dst_file_name = os.path.join(repo_local_path, GIT_SHELL_CMD_FILE) + if os.path.exists(tmp_dst_file_name): + os.remove(tmp_dst_file_name) + + result_file_name = f"{repo}-{GIT_STATISTICS_RESULT_FILE}" + git_sta_code_file_path = os.path.join(CUR_PATH, result_file_name) + + csv_file_name = f"{repo}-{GIT_STATISTICS_RESULT_CSV_FILE}" + csv_file_path = os.path.join(CUR_PATH, csv_file_name) + + tmp_src_file_name = os.path.join(CUR_PATH, GIT_SHELL_CMD_FILE) + exec_cp_cmd = f"cp {tmp_src_file_name} {repo_local_path}" + self.my_logger.info(f"exec_cp_cmd={exec_cp_cmd}") + ret_boolean = self.subprocess_exe_run_cmd(exec_cp_cmd) + if ret_boolean: + self.my_logger.info(f"Start using git command statistics code") + # cd /home/worker/code_statistics_tool/zhu_it/ && sh /home/worker/code_statistics_tool/zhu_it/my_sh.sh + exec_git_sta_cmd = f"cd {repo_local_path} && sh {tmp_dst_file_name} {self.start_time} {self.end_time}" + self.my_logger.info(f"repository exec_git_sta_cmd={exec_git_sta_cmd}") + ret_bool = self.subprocess_exe_run_cmd(exec_git_sta_cmd, file_path=git_sta_code_file_path) + if ret_bool: + self.my_logger.info(f"The git command statistics code successfully and write file:{repo}") + self.read_txt_file_and_write_csv_file(git_sta_code_file_path, csv_file_path) + self.my_logger.info(f"write csv file:{csv_file_name}") + + else: + self.my_logger.info(f"The git command statistics code failure:{repo}") + + def read_txt_file_and_write_csv_file(self, git_sta_code_file_path, csv_file_path): + """ + from txt file read and write csv file + :param git_sta_code_file_path: + :param csv_file_path: + :return: + """ + csv_file_obj = open(csv_file_path, "w+", encoding='utf-8', newline="") + writer = csv.writer(csv_file_obj) + writer.writerow(["name", "added lines", "removed lines", "total lines"]) + txt_file_obj = open(git_sta_code_file_path, encoding="utf-8") + + for row in txt_file_obj: + tmp_data = [x for x in row.strip().split(",")] + ret_data = [y.split(":")[1] for y in tmp_data if y] + writer.writerow(ret_data) + + txt_file_obj.close() + csv_file_obj.close() + + if os.path.exists(git_sta_code_file_path): + os.remove(git_sta_code_file_path) + + def use_cloc_tool_sta_repo(self, repo): + """ + use cloc tool statistics repository + :param repo: repository name + :return: + """ + cloc_exec_cmd = f"cd {CUR_PATH} && cloc {repo} --timeout=0 --csv --report-file=" \ + f"{repo}-{CLOC_TOOL_REPORT_NAME} --quiet" + self.my_logger.info(f"cloc_exec_cmd={cloc_exec_cmd}") + ret_boolean = self.subprocess_exe_run_cmd(cloc_exec_cmd) + if ret_boolean: + self.my_logger.info(f"Cloc tool statistics succeeded:{repo}") + else: + self.my_logger.error(f"Cloc tool statistics failed:{repo}") + + def use_cloc_tool_sta_org(self, org_dir): + """ + Use the cloc tool to make statistics + :param org_dir: organization name local directory + :return: + """ + # org_dir_repo_list = os.listdir(org_dir) + # cloc repository name --report-file=code_statistics_report.csv + # for repo_name in org_dir_repo_list: + # cloc_exec_cmd = f"cd {org_dir} && cloc {repo_name} --csv --report-file={repo_name} + # -{CLOC_TOOL_REPORT_NAME}" + # self.my_logger.info(f"cloc_exec_cmd={cloc_exec_cmd}") + # self.my_logger.info(f"Start using the cloc tool to count code") + # ret_boolean = self.subprocess_exe_run_cmd(cloc_exec_cmd) + # if ret_boolean: + # self.my_logger.info(f"The cloc tool counts the repository code successfully:{repo_name}") + # else: + # continue + + current_path = Path(org_dir).parent + + self.my_logger.info(f"current_path={current_path}") + cloc_exec_cmd = f"cd {current_path} && cloc {self.org_name} --timeout=0 --csv --report-file=" \ + f"{self.org_name}-{CLOC_TOOL_REPORT_NAME} --quiet" + self.my_logger.info(f"cloc_exec_cmd={cloc_exec_cmd}") + self.my_logger.info(f"Start using the cloc tool to count code") + + ret_boolean = self.subprocess_exe_run_cmd(cloc_exec_cmd) + if ret_boolean: + self.my_logger.info(f"The cloc tool counts the organization code successfully:{self.org_name}") + + def use_git_tool_statistics_org(self, local_org_path): + """ + gil command statistics + :param local_org_path: local organization path + :return: + """ + org_dir_repo_list = os.listdir(local_org_path) + tmp_src_file_name = os.path.join(CUR_PATH, GIT_SHELL_CMD_FILE) + for repo_name in org_dir_repo_list: + if repo_name.endswith(CLOC_TOOL_REPORT_NAME): + continue + tmp_dst_file_path = os.path.join(local_org_path, repo_name) + tmp_dst_file_name = os.path.join(tmp_dst_file_path, GIT_SHELL_CMD_FILE) + + if os.path.exists(tmp_dst_file_name): + os.remove(tmp_dst_file_name) + + # cp /home/worker/code_statistics_tool/my_sh.sh /home/worker/code_statistics_tool/zhu_it/ + exec_cp_cmd = f"cp {tmp_src_file_name} {tmp_dst_file_path}" + self.my_logger.info(f"exec_cp_cmd={exec_cp_cmd}") + ret_boolean = self.subprocess_exe_run_cmd(exec_cp_cmd) + + if ret_boolean: + self.my_logger.info(f"Start using git command statistics code") + # cd /home/worker/code_statistics_tool/zhu_it/ && sh /home/worker/code_statistics_tool/zhu_it/my_sh.sh + exec_git_sta_cmd = f"cd {tmp_dst_file_path} && sh {tmp_dst_file_name}" + ret_bool = self.subprocess_exe_run_cmd(exec_git_sta_cmd) + if ret_bool: + self.my_logger.info(f"The git command statistics code successfully:{repo_name}") + else: + self.my_logger.error(f"The git command statistics code failure:{repo_name}") + else: + continue diff --git a/ci/tools/code_statistics_tool/business_processing/get_command_line_params.py b/ci/tools/code_statistics_tool/business_processing/get_command_line_params.py new file mode 100644 index 0000000000000000000000000000000000000000..7417f70b5509bf53e7ac373121cc98c715c326c5 --- /dev/null +++ b/ci/tools/code_statistics_tool/business_processing/get_command_line_params.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2022/2/17 10:24 +# @Author : zWX1133887 +# @File : get_command_line_params.py +# @Software: PyCharm +# Description: get command line parameters +import argparse +import datetime + + +def get_org_cmd_line_params(): + """ + Get command line parameters + notes: required=True Required + :return: args object + """ + parser = argparse.ArgumentParser(f"cmd param") + parser.add_argument('-a', '--access_token', type=str, default=None) # access token + parser.add_argument('-o', '--org_name', type=str, default=None) # organization name + + parser.add_argument('-r', '--repo_name', type=str, default=None) # repository name + + end_time = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d') + parser.add_argument('-e', '--end_time', type=str, default=end_time) # end time 2022-02-23 + parser.add_argument('-i', '--interval_time', type=int, default=30) # interval time 7 + + # version number 1: stands for the latest version + parser.add_argument('-v', '--version', type=int, required=False, default=1) + + # flag=1: version model + # flag=2: organization model + # flag=3: repository model + parser.add_argument('-f', '--flag', type=int, required=False, default=1) + + return parser.parse_args() diff --git a/ci/tools/code_statistics_tool/cloc-1.92.tar.gz b/ci/tools/code_statistics_tool/cloc-1.92.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..adcee90358840e168335382a8d8dee3e63da0eeb Binary files /dev/null and b/ci/tools/code_statistics_tool/cloc-1.92.tar.gz differ diff --git a/ci/tools/code_statistics_tool/constant.py b/ci/tools/code_statistics_tool/constant.py new file mode 100644 index 0000000000000000000000000000000000000000..aa31096b52378364ae499b94495a882a54e9405e --- /dev/null +++ b/ci/tools/code_statistics_tool/constant.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2022/2/17 10:51 +# @Author : zWX1133887 +# @File : constant.py +# @Software: PyCharm +# Description: +import os + +REPO_TYPE = "all" # Filter the type of repository, which can be all, public or private. Default: all + +FAILED_REPOS_NAME = f"repos-failure-url.txt" + +WORKER_MAX = 20 + +WAIT_TIME = 3 # waiting time + +ATTEMPT_TIMES = 7 # retry count + +CLOC_TOOL_REPORT_NAME = f"cloc-statistics-result.csv" + +CUR_PATH = os.getcwd() + +GIT_SHELL_CMD_FILE = f"git_statistics_code_cmd.sh" + +GIT_STATISTICS_RESULT_FILE = f"tmp-txt-result.txt" + +GIT_STATISTICS_RESULT_CSV_FILE = f"git-statistics-result.csv" + +CLONE_URL = f"https://gitee.com/openeuler/release-management.git" + +HTTPS_URL = f"https://gitee.com/src-openeuler/" + +YAML_FILE_NAME = f"pckg-mgmt.yaml" + +RETRY_COUNT = 7 + +VERSION_DIR_NAME_PREFIX = f"openEuler-" # Version directory name prefix + +ALL_REPOS = f"src-openeuler" + +CLONE_FAILED_REPOS = f"clone-failed-repos.txt" + +# repository that need to be triggered manually +NEED_LICENSE = ["bazel"] + +# Manual confirmation is required +MANUAL_CONFIRM = [ + "libkate", "liboil", "rubygem-hpricot", + "appstream-data", "liboggz", "liboflsa", "libofa" +] diff --git a/ci/tools/code_statistics_tool/custom_log.py b/ci/tools/code_statistics_tool/custom_log.py new file mode 100644 index 0000000000000000000000000000000000000000..e62a8c930d59f8a182bfffa4789282189b1e8c57 --- /dev/null +++ b/ci/tools/code_statistics_tool/custom_log.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2022/1/20 14:37 +# @Author : zWX1133887 +# @File : custom_log.py +# @Software: PyCharm +# Description: Custom log + +import time +from pathlib import Path + +from loguru import logger + +log = logger +log_path = Path(Path.cwd(), "log") + + +def my_logging(): + """ + Custom log + :return: + """ + logger.add(f"{log_path}/runtime-log-{time.strftime('%Y-%m-%d')}.log", + rotation="500MB", + encoding="utf-8", + enqueue=True, + retention="7 days") + return logger + + +if __name__ == '__main__': + my_logging().info("this is test") diff --git a/ci/tools/code_statistics_tool/git_statistics_code_cmd.sh b/ci/tools/code_statistics_tool/git_statistics_code_cmd.sh new file mode 100644 index 0000000000000000000000000000000000000000..5ef6af6817a9e5c0b7d141fd727022b097c23fff --- /dev/null +++ b/ci/tools/code_statistics_tool/git_statistics_code_cmd.sh @@ -0,0 +1,6 @@ +#!/bin/sh +git log --format='%aN' --since=${1} --until=${2} | sort -u | while read name +do + echo -n "$name:$name," +git log --author="$name" --since=${1} --until=${2} --pretty=tformat: --numstat | awk '{ add += $1; subs += $2; loc += $1 - $2 } END {printf "\t\t added lines: %s,\t\t removed lines: %s,\t\t total lines: %s\n", add, subs, loc }' +done diff --git a/ci/tools/code_statistics_tool/main.py b/ci/tools/code_statistics_tool/main.py new file mode 100644 index 0000000000000000000000000000000000000000..44bdacd59686eb259f93a13918a1ef946a078deb --- /dev/null +++ b/ci/tools/code_statistics_tool/main.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2022/2/17 10:14 +# @Author : zWX1133887 +# @File : main.py +# @Software: PyCharm +# Description: +import datetime +import os +import sys + +cur_path = os.path.abspath(os.path.dirname(__file__)) +root_path = os.path.split(cur_path)[0] +sys.path.append(root_path) + +from code_statistics_tool.business_processing.business_method_call import business_methods_call +from code_statistics_tool.business_processing.get_command_line_params import get_org_cmd_line_params +from code_statistics_tool.constant import CLONE_URL + + +def main(): + # run model 1: flag=1: version model + # Clone the repository code and count the number of lines of the repository code + # cmd: python3 main.py -v [version:default=1] + # e.g: python3 main.py + + # run model 2: flag=2: organization model + # Batch clone the organization code and count the number of lines of the organization code + # cmd: python3 main.py -a [access_token] -o [organization name] -f [flag] + # e.g: python3 main.py -a 430ce73b1636a64b -o openeuler -f 2 + + # run model 3: flag=3: repository model + # Clone the repository code and count the number of lines of the repository code + # cmd: python3 main.py -r [repository name] -e [end time] -i [interval time] -f [flag] + # e.g: python3 main.py -r openEuler/infrastructure -e 2022-02-23 -i 7 -f 3 + + args = get_org_cmd_line_params() + access_token = args.access_token # access token + org_name = args.org_name # organization name + repo_name = args.repo_name # repository name + + end_time = args.end_time # end_time + interval_time = args.interval_time # interval_time + + try: + end_val = datetime.datetime.strptime(end_time, '%Y-%m-%d') + except: + print(f"Day is out of range for month, please enter the correct time format!") + return + + start_time = (end_val - datetime.timedelta(days=int(interval_time))).strftime("%Y-%m-%d") + + version = args.version # version number + flag = args.flag # version number + + repository_name = CLONE_URL.split("/")[-1].split(".git")[0] + + business_methods_call(access_token, org_name, repo_name, start_time, end_time, repository_name, version, flag) + + +if __name__ == '__main__': + main() diff --git a/ci/tools/code_statistics_tool/requirements.txt b/ci/tools/code_statistics_tool/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..9a2a4e2202ae1f325d1f13ab38cbdf4cc291aa92 Binary files /dev/null and b/ci/tools/code_statistics_tool/requirements.txt differ