diff --git a/docs/zh/basic_tutorial/environment_variables.md b/docs/zh/basic_tutorial/environment_variables.md index 5a8545be16b021379c3aea53b509b11581906289..d0cbba72698158852a65fcdf8a9d3b5c2e835224 100644 --- a/docs/zh/basic_tutorial/environment_variables.md +++ b/docs/zh/basic_tutorial/environment_variables.md @@ -14,13 +14,14 @@ 用户可以通过配置以下环境变量来控制openMind Hub Client的某些行为。 -| 环境变量名称 | 环境变量描述 | 默认值 | -|------------------------------|-------------------------------------------------------------------|------| -| HUB_WHITE_LIST_PATHS | 敏感路径白名单。若未设置则取值为~/。 | None | -| OM_HUB_DISABLE_PROGRESS_BARS | 是否禁用进度条,默认不禁用。设置为“1”禁用进度条。 | None | -| XDG_CACHE_HOME | openMind的数据保存在本地的路径。若未设置则取值为~/.cache。 | None | -| DEFAULT_REQUEST_TIMEOUT | 未接收到服务端发送数据的超时时间,单位秒。若未设置则取值为10。 | None | -| HUB_GIT_PUSH_TIMEOUT | 使用Repository类执行`git_push`时的超时时间,单位秒。若未设置则取值为3600。 | None | -| OPENMIND_HUB_ENDPOINT | 指定访问的域名或IP。若未设置则取值为生产环境域名。 | None | -| GIT_LFS_PROGRESS | 使用Repository类时所创建的临时文件的路径,进程结束后会被置空。 | None | -| HUB_HTTPS_PROXY | 下载时配置代理,加快下载速度。 | None | +| 环境变量名称 | 环境变量描述 | 默认值 | +|------------------------------|---------------------------------------------------|------| +| HUB_WHITE_LIST_PATHS | 敏感路径白名单。若未设置则取值为~/。 | None | +| OM_HUB_DISABLE_PROGRESS_BARS | 是否禁用进度条,默认不禁用。设置为“1”禁用进度条。 | None | +| XDG_CACHE_HOME | openMind的数据保存在本地的路径。若未设置则取值为~/.cache。 | None | +| DEFAULT_REQUEST_TIMEOUT | 未接收到服务端发送数据的超时时间,单位秒。若未设置则取值为10。 | None | +| HUB_GIT_PUSH_TIMEOUT | 使用Repository类执行`git_push`时的超时时间,单位秒。若未设置则取值为3600。 | None | +| OPENMIND_HUB_ENDPOINT | 指定访问的域名或IP。若未设置则取值为生产环境域名。 | None | +| GIT_LFS_PROGRESS | 使用Repository类时所创建的临时文件的路径,进程结束后会被置空。 | None | +| HUB_HTTPS_PROXY | 下载时配置代理,加快下载速度。 | None | +| OPENMIND_PLATFORM | 指定交互的社区[详见](third-party_platform.md)。 | None | diff --git a/docs/zh/basic_tutorial/third-party_platform.md b/docs/zh/basic_tutorial/third-party_platform.md index 265e38b39294ceac418f15981d7e717abd35d470..9d248f6d3831dffae05b1a36f897debf3ec1cba2 100644 --- a/docs/zh/basic_tutorial/third-party_platform.md +++ b/docs/zh/basic_tutorial/third-party_platform.md @@ -172,15 +172,6 @@ def try_to_load_from_cache( ### 指定访问的社区 -+ 使用`set_platform()`方法: - - ```python - from openmind_hub import * - - set_platform("openi") - om_hub_download(repo_id="FoundationModel/ChatGLM2-6B", filename="config.json", local_dir=".") - ``` - + 使用`platform`参数: ```python @@ -194,7 +185,7 @@ def try_to_load_from_cache( ```python import os # 环境变量的设置需要在导入openmind_hub之前 - os.environ["platform"] = "openi" + os.environ["OPENMIND_PLATFORM"] = "openi" from openmind_hub import * om_hub_download(repo_id="FoundationModel/ChatGLM2-6B", filename="config.json", local_dir=".") @@ -203,7 +194,8 @@ def try_to_load_from_cache( 示例: ```python -from openmind_hub import set_platform, create_repo, upload_folder, snapshot_download +from openmind_hub import create_repo, upload_folder, snapshot_download +import os token = "token_in_openi" @@ -212,8 +204,8 @@ snapshot_download(repo_id="PyTorch-NPU/t5_small", local_dir="./t5_small") # 下载指定社区的FoundationModel/ChatGLM2-6B模型到./ChatGLM2-6B目录下 snapshot_download(repo_id="FoundationModel/ChatGLM2-6B", local_dir="./ChatGLM2-6B", platform="openi") -# 设置默认社区 -set_platform("openi") +# 设置环境变量 +os.environ["OPENMIND_PLATFORM"] = "openi" # 在指定社区创建owner/cool-model项目及cool-model模型(注意替换实际用户名及仓库名) create_repo(repo_id="owner/cool-model", token=token) @@ -238,7 +230,7 @@ gitcode社区适配以下方法,其参数和行为与原方法基本一致: ```python import os -os.environ["OPENMIND_HUB_ENDPOINT"] = "https://api.gitcode.com" +os.environ["OPENMIND_PLATFORM"] = "gitcode" from openmind_hub import snapshot_download token = "xxx" @@ -258,10 +250,9 @@ gitee AI社区适配下载相关接口。"revision"参数必填,仓库的默 ```python import os -os.environ["HF_ENDPOINT"] = "https://hf-api.gitee.com" +os.environ["OPENMIND_PLATFORM"] = "gitee" -from openmind_hub import snapshot_download, set_platform -set_platform("gitee") +from openmind_hub import snapshot_download token = "xxx" snapshot_download("owner/repo", token=token, revision="master") ``` @@ -280,8 +271,10 @@ snapshot_download("owner/repo", token=token, revision="master") + create_repo:不支持"fullname"、"desc"、"license"参数 ```python -from openmind_hub import snapshot_download, set_platform -set_platform("huggingface") +import os +os.environ["OPENMIND_PLATFORM"] = "huggingface" + +from openmind_hub import snapshot_download token = "xxx" snapshot_download("owner/repo", token=token) ``` diff --git a/docs/zh/developer_tutorial/trouble_shooting.md b/docs/zh/developer_tutorial/trouble_shooting.md index 016c44fd0c54069795afe3d70d68a246eff691d9..ff78798539c2a27285dcc18ce7fb09ac9c16f18f 100644 --- a/docs/zh/developer_tutorial/trouble_shooting.md +++ b/docs/zh/developer_tutorial/trouble_shooting.md @@ -8,13 +8,13 @@ 第一种:Python根CA证书与requests库版本不匹配,请按以下两种方案排查。 - ```commandline + ```python SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1108)') ``` + 修改requests库的版本: - ```commandline + ```bash # 查看requests库的版本 pip show requests @@ -42,7 +42,7 @@ 第二种:需手动配置魔乐社区的证书。 - ```commandline + ```python SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate (_ssl.c:1131)') ``` @@ -93,7 +93,7 @@ error: failed to push some refs to 'https://modelers.cn/xxx/xx.git' ## 上传报错:NameResolutionError -```commandline +```python requests.exceptions.ConnectionError: HTTPSConnectionPool(host='xxx',port=443):Max retries exceeded url: /api/v1/file/xxx/xxx/pre_upload...(Caused by NameResolutionError(":Failed to resolve 'modelers.cn'([Error -3] Temporary failure in name resolution)")) ``` @@ -101,7 +101,7 @@ requests.exceptions.ConnectionError: HTTPSConnectionPool(host='xxx',port=443):Ma ## 下载报错:ChunkedEncodingError -```commandline +```python requests.exceptions.ChunkedEncodingError: ('Connection broken: IncompleteRead(1247345764 bytes read, 13764675765 more expected)' ``` diff --git a/docs/zh/release_note.md b/docs/zh/release_note.md index 3dd2e2f72c1379f07d2972d7f89c390da0401ddf..2167d4e0b4b81ccce2a84a99760bc648a35b884b 100644 --- a/docs/zh/release_note.md +++ b/docs/zh/release_note.md @@ -6,10 +6,16 @@ #### 新增功能 +- 使用环境变量OPENMIND_PLATFORM与不同三方社区交互。 + #### 文档更新 +- [支持三方社区](./basic_tutorial/third-party_platform.md) + ### 已修复问题 +- 使用代理在多线程情况下,出现ProxyError。 + ### 已知问题 无 diff --git a/public_address_statement.md b/public_address_statement.md index e74b6c2f5787426190c0d84cbdc5f1962dd27ebd..6932292807831c0ef2e86d52b11d32091fc47ceb 100644 --- a/public_address_statement.md +++ b/public_address_statement.md @@ -27,6 +27,9 @@ | 开发引入 | - | tests/unit/public_address_lib.json | https://user:password@modelers.com/user/repo.git | 测试用例所用url | | 开发引入 | - | tests/unit/public_address_lib.json | https://example.com | 测试用例所用url | | 开发引入 | - | tests/unit/public_address_lib.json | https://modelers.cn/api/v1/file/user/repo/main/media/a/b | 测试用例所用url | -| 开发引入 | - | tests/unit/public_address_lib.json | https://modelers.cn | 测试用例所用url | +| 开发引入 | - | tests/unit/public_address_lib.json | https://modelers.cn | 测试用例所用url | | 开发引入 | - | pyproject.toml | https://gitee.com/openmind-ai/openmind-hub | openmind-hub项目仓库地址 | | 开发引入 | - | pyproject.toml | contact@openmind.cn | openmind邮箱联系地址 | +| 开发引入 | - | src\openmind_hub\plugins\openmind\constants.py | https://api.gitcode.com | gitcode社区交互endpoint | +| 开发引入 | - | src\openmind_hub\plugins\utils\platform_adapter.py | https://hf-api.gitee.com | GiteeAI社区交互endpoint | + diff --git a/src/openmind_hub/__init__.py b/src/openmind_hub/__init__.py index f83f8f6a3565d784f85b4c6ffe43693254e68078..a07e96aabe0d3b4f23d3b7b1521d2dc7e371c2b2 100644 --- a/src/openmind_hub/__init__.py +++ b/src/openmind_hub/__init__.py @@ -71,7 +71,6 @@ __all__ = [ "RepositoryNotFoundError", "om_raise_for_status", "build_om_headers", - "set_platform", "get_logger", "set_verbosity", "set_verbosity_info", @@ -89,7 +88,6 @@ from openmind_hub.plugins.utils.platform_adapter import ( default_cache_path, _CACHED_NO_EXIST, ENDPOINT, - set_platform, om_hub_download, get_om_file_metadata, om_hub_url, diff --git a/src/openmind_hub/interface/base_api.py b/src/openmind_hub/interface/base_api.py index bf05511f2c1b1e9876623948cc396059c833cf6c..96ca201a3f4a62e85a8c7c317de1deff5053d9b1 100644 --- a/src/openmind_hub/interface/base_api.py +++ b/src/openmind_hub/interface/base_api.py @@ -10,6 +10,7 @@ # EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, # MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. # See the Mulan PSL v2 for more details. +import os from abc import abstractmethod from dataclasses import dataclass from datetime import datetime @@ -31,8 +32,9 @@ from .dependent_methods import ( LastCommitInfo, ) -OM_HOME = None -OM_HUB_CACHE = None +default_home = str(Path.home() / ".cache") +OM_HOME = os.path.join(os.getenv("XDG_CACHE_HOME", default_home), "openmind") +OM_HUB_CACHE = os.path.join(OM_HOME, "hub") REGEX_COMMIT_HASH = None default_cache_path = None _CACHED_NO_EXIST = None diff --git a/src/openmind_hub/plugins/openmind/constants.py b/src/openmind_hub/plugins/openmind/constants.py index bde5f741fc4577e50d12b14bdfb095866833fcae..593772e6cc2fbfd40677cc781267c32f1f908543 100644 --- a/src/openmind_hub/plugins/openmind/constants.py +++ b/src/openmind_hub/plugins/openmind/constants.py @@ -57,6 +57,8 @@ UPLOAD_OBS_HOSTNAME_WHITE_LIST = [ ] DOWNLOAD_CDN_HOSTNAME_WHITE_LIST = ["openmind.cn", "osinfra.cn", "modelers.cn", "gitcode.com", "gitcode.net"] ENDPOINT = os.getenv("OPENMIND_HUB_ENDPOINT", OPENMIND_URL) +if os.getenv("OPENMIND_PLATFORM") == "gitcode": + ENDPOINT = "https://api.gitcode.com" if urlparse(ENDPOINT).scheme != "https": raise ValueError("Insecure scheme detected, exiting.") diff --git a/src/openmind_hub/plugins/openmind/om_api.py b/src/openmind_hub/plugins/openmind/om_api.py index 522190f6fce15e0019ae3c176bcce7375063093d..e9b5651e18b5fb088848efe060bf282ee8a4746a 100644 --- a/src/openmind_hub/plugins/openmind/om_api.py +++ b/src/openmind_hub/plugins/openmind/om_api.py @@ -14,6 +14,7 @@ # limitations under the License. import base64 import gc +import os from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass, asdict from datetime import datetime @@ -2132,7 +2133,10 @@ class OmApi: ] -api = OmApi() +if os.getenv("OPENMIND_PLATFORM") == "gitcode": + api = OmApi(endpoint=ENDPOINT) +else: + api = OmApi() whoami = api.whoami diff --git a/src/openmind_hub/plugins/openmind/utils/_validators.py b/src/openmind_hub/plugins/openmind/utils/_validators.py index 247c5e909971bad3edce03821c8c19862785a680..ce4fb18e8f46d7f1b9b083d7a2e2f2f5a3e0d1d8 100644 --- a/src/openmind_hub/plugins/openmind/utils/_validators.py +++ b/src/openmind_hub/plugins/openmind/utils/_validators.py @@ -34,7 +34,7 @@ from .logging import replace_invalid_characters CallableT = TypeVar("CallableT", bound=Callable) -OWNER_NAME_REGEXP = re.compile("^[a-zA-Z]([-_.]([a-zA-Z0-9])|[a-zA-Z0-9]){2,39}$") +OWNER_NAME_REGEXP = re.compile("^[a-zA-Z]([-_.%]([a-zA-Z0-9])|[a-zA-Z0-9]){2,39}$") REPO_NAME_REGEXP = re.compile("^[a-zA-Z0-9._-]{1,100}$") BRANCH_REGEXP = re.compile("^[a-zA-Z0-9_-]{1,100}$") REVISION_REGEXP = re.compile("^[a-zA-Z0-9]+$") @@ -61,7 +61,11 @@ def validate_om_hub_args(func: CallableT) -> CallableT: kwargs.items(), ): if arg_name == "repo_id": - validate_repo_id(arg_value) + if arg_value.count("/") == 2: + arg_value = validate_gitcode_repo_id(arg_value) + kwargs[arg_name] = arg_value + else: + validate_repo_id(arg_value) if arg_name == "branch": validate_branch(arg_value) if arg_name == "revision": @@ -91,6 +95,24 @@ def validate_repo_id(repo_id: str) -> None: raise OMValidationError("repo_id type must be string") +def validate_gitcode_repo_id(repo_id: str) -> str: + if not isinstance(repo_id, str): + raise OMValidationError("repo_id type must be string") + if os.getenv("OPENMIND_PLATFORM") != "gitcode": + raise OMValidationError("Repo id must be in the form 'repo_name' or 'owner/repo_name'") + + repo_id = repo_id[: repo_id.find("/")] + "%2F" + repo_id[repo_id.find("/") + 1:] + + owner, repo = repo_id.split("/") if "/" in repo_id else (None, repo_id) + + if repo.endswith(".git"): + raise OMValidationError("repo name cannot end by '.git'") + if repo is not None and not REPO_NAME_REGEXP.match(repo): + raise OMValidationError("invalid repo name") + + return repo_id + + def validate_branch(branch: str) -> None: if isinstance(branch, str): if not BRANCH_REGEXP.match(branch): diff --git a/src/openmind_hub/plugins/utils/platform_adapter.py b/src/openmind_hub/plugins/utils/platform_adapter.py index 7a64f0237f298f94ebbbeee58484d9abf2f9d1f5..757aac168ff92e9a8f6df1d87ba19e7af27892e5 100644 --- a/src/openmind_hub/plugins/utils/platform_adapter.py +++ b/src/openmind_hub/plugins/utils/platform_adapter.py @@ -22,9 +22,12 @@ from openmind_hub.plugins.utils.import_utils import is_openi_available, is_hf_av def get_plugin(platform: str = None): - platform = platform or os.getenv("platform") + platform = platform or os.getenv("OPENMIND_PLATFORM") if platform == "gitee": + os.environ["HF_ENDPOINT"] = "https://hf-api.gitee.com" platform = "huggingface" + elif platform == "gitcode": + platform = "openmind" if platform is None or platform == "openmind": module = openmind @@ -47,18 +50,6 @@ def get_plugin(platform: str = None): return module -def set_platform(platform: str = None): - if platform == "openmind" or platform == "openi": - os.environ["platform"] = platform - elif platform == "gitee" or platform == "huggingface": - os.environ["platform"] = platform - elif platform is None: - os.environ.pop("platform", None) - else: - raise ValueError("unknown platform") - print("current platform: ", platform) - - # 常数和类初始化后不会再变化,函数都可以通过设置环境变量或传递`platform`参数决定访问的服务端。 class HubApi(BaseApi): OmApi = get_plugin().OmApi diff --git a/tests/unit/test_download.py b/tests/unit/test_download.py index b5c8f3ded4cde2f891acc05ffa9464ebbc782d05..9e729cc0e3573e8f3e70b7ae3a6fb9167a65415b 100644 --- a/tests/unit/test_download.py +++ b/tests/unit/test_download.py @@ -44,7 +44,7 @@ from openmind_hub.plugins.openmind.constants import OM_HUB_CACHE, BIG_FILE_SIZE from openmind_hub.plugins.openmind._snapshot_download import snapshot_download from openmind_hub.plugins.openmind.om_api import LastCommitInfo from openmind_hub.plugins.openmind.utils import EntryNotFoundError, RevisionNotFoundError, OMValidationError -from openmind_hub.utils import LocalEntryNotFoundError +from openmind_hub import LocalEntryNotFoundError from tests.unit.testing_constants import ( ENDPOINT_SH, diff --git a/tests/unit/test_platform.py b/tests/unit/test_platform.py index f9ed6a9667b3c737801f897c16144f0d3c757ba3..8668b38d766c7de062fe8781c60a107f4e30cf67 100644 --- a/tests/unit/test_platform.py +++ b/tests/unit/test_platform.py @@ -18,7 +18,7 @@ from unittest import mock import testtools -from openmind_hub import upload_file, CommitOperationAdd, set_platform +from openmind_hub import upload_file, CommitOperationAdd from openmind_hub.plugins.utils.import_utils import is_openi_available from openmind_hub.plugins.utils.platform_adapter import get_plugin, HubApi @@ -37,11 +37,10 @@ class PlatformTest(unittest.TestCase): upload_file(path_or_fileobj="file", path_in_repo="file", repo_id="repo_id", platform="openi"), "openi", ) - # set_platform()方法指定 - set_platform("openi") + os.environ["OPENMIND_PLATFORM"] = "openi" self.assertEqual(upload_file(path_or_fileobj="file", path_in_repo="file", repo_id="repo_id"), "openi") # 环境变量指定 - os.environ["platform"] = "openmind" + os.environ["OPENMIND_PLATFORM"] = "openmind" self.assertEqual(upload_file(path_or_fileobj="file", path_in_repo="file", repo_id="repo_id"), "openmind") @mock.patch("openmind_hub.plugins.openi.CommitOperationAdd") @@ -56,11 +55,10 @@ class PlatformTest(unittest.TestCase): CommitOperationAdd(path_or_fileobj="file", path_in_repo="file", platform="openi"), "openi", ) - # set_platform()方法指定 - set_platform("openi") + os.environ["OPENMIND_PLATFORM"] = "openi" self.assertEqual(CommitOperationAdd(path_or_fileobj="file", path_in_repo="file"), "openi") # 环境变量指定 - os.environ["platform"] = "openmind" + os.environ["OPENMIND_PLATFORM"] = "openmind" self.assertEqual(CommitOperationAdd(path_or_fileobj="file", path_in_repo="file"), "openmind") @@ -73,27 +71,6 @@ class TestPlatformTestSupport(unittest.TestCase): with self.assertRaises(ValueError): get_plugin(platform="openi2") - def test_set_openmind_platform(self): - set_platform("openmind") - self.assertEqual(os.environ["platform"], "openmind") - - def test_set_openi_platform(self): - set_platform("openi") - self.assertEqual(os.environ["platform"], "openi") - - def test_remove_platform(self): - set_platform(None) - self.assertNotIn("platform", os.environ) - - def test_set_unknown_platform(self): - with self.assertRaises(ValueError) as context: - set_platform("unknown") - self.assertEqual(str(context.exception), "unknown platform") - - def test_set_platform_none_when_empty(self): - set_platform(None) - self.assertNotIn("platform", os.environ) - class TestHubApi(unittest.TestCase): def setUp(self):