diff --git a/docs/api_cpp/_custom/sphinx_builder_html b/docs/api_cpp/_custom/sphinx_builder_html new file mode 100644 index 0000000000000000000000000000000000000000..b73b00bd30f321f62f6e57990a8a32316c33ad89 --- /dev/null +++ b/docs/api_cpp/_custom/sphinx_builder_html @@ -0,0 +1,1269 @@ +""" + sphinx.builders.html + ~~~~~~~~~~~~~~~~~~~~ + + Several HTML builders. + + :copyright: Copyright 2007-2020 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import html +import posixpath +import re +import sys +import warnings +from hashlib import md5 +from os import path +from typing import Any, Dict, IO, Iterable, Iterator, List, Set, Tuple + +from docutils import nodes +from docutils.core import publish_parts +from docutils.frontend import OptionParser +from docutils.io import DocTreeInput, StringOutput +from docutils.nodes import Node +from docutils.utils import relative_path + +from sphinx import package_dir, __display_version__ +from sphinx.application import Sphinx +from sphinx.builders import Builder +from sphinx.config import Config +from sphinx.deprecation import RemovedInSphinx30Warning, RemovedInSphinx40Warning +from sphinx.domains import Domain, Index, IndexEntry +from sphinx.environment.adapters.asset import ImageAdapter +from sphinx.environment.adapters.indexentries import IndexEntries +from sphinx.environment.adapters.toctree import TocTree +from sphinx.errors import ConfigError, ThemeError +from sphinx.highlighting import PygmentsBridge +from sphinx.locale import _, __ +from sphinx.search import js_index +from sphinx.theming import HTMLThemeFactory +from sphinx.util import logging, progress_message, status_iterator +from sphinx.util.docutils import is_html5_writer_available, new_document +from sphinx.util.fileutil import copy_asset +from sphinx.util.i18n import format_date +from sphinx.util.inventory import InventoryFile +from sphinx.util.matching import patmatch, Matcher, DOTFILES +from sphinx.util.osutil import os_path, relative_uri, ensuredir, movefile, copyfile +from sphinx.util.tags import Tags +from sphinx.writers.html import HTMLWriter, HTMLTranslator + +if False: + # For type annotation + from typing import Type # for python3.5.1 + + +# HTML5 Writer is available or not +if is_html5_writer_available(): + from sphinx.writers.html5 import HTML5Translator + html5_ready = True +else: + html5_ready = False + +#: the filename for the inventory of objects +INVENTORY_FILENAME = 'objects.inv' + +logger = logging.getLogger(__name__) +return_codes_re = re.compile('[\r\n]+') + + +def get_stable_hash(obj: Any) -> str: + """ + Return a stable hash for a Python data structure. We can't just use + the md5 of str(obj) since for example dictionary items are enumerated + in unpredictable order due to hash randomization in newer Pythons. + """ + if isinstance(obj, dict): + return get_stable_hash(list(obj.items())) + elif isinstance(obj, (list, tuple)): + obj = sorted(get_stable_hash(o) for o in obj) + return md5(str(obj).encode()).hexdigest() + + +class Stylesheet(str): + """A metadata of stylesheet. + + To keep compatibility with old themes, an instance of stylesheet behaves as + its filename (str). + """ + + attributes = None # type: Dict[str, str] + filename = None # type: str + + def __new__(cls, filename: str, *args: str, **attributes: str) -> "Stylesheet": + self = str.__new__(cls, filename) # type: ignore + self.filename = filename + self.attributes = attributes + self.attributes.setdefault('rel', 'stylesheet') + self.attributes.setdefault('type', 'text/css') + if args: # old style arguments (rel, title) + self.attributes['rel'] = args[0] + self.attributes['title'] = args[1] + + return self + + +class JSContainer(list): + """The container for JavaScript scripts.""" + def insert(self, index: int, obj: str) -> None: + warnings.warn('To modify script_files in the theme is deprecated. ' + 'Please insert a ' % (' '.join(attrs), body) + + context['js_tag'] = js_tag + + +def validate_math_renderer(app: Sphinx) -> None: + if app.builder.format != 'html': + return + + name = app.builder.math_renderer_name # type: ignore + if name is None: + raise ConfigError(__('Many math_renderers are registered. ' + 'But no math_renderer is selected.')) + elif name not in app.registry.html_inline_math_renderers: + raise ConfigError(__('Unknown math_renderer %r is given.') % name) + + +def validate_html_extra_path(app: Sphinx, config: Config) -> None: + """Check html_extra_paths setting.""" + for entry in config.html_extra_path[:]: + extra_path = path.normpath(path.join(app.confdir, entry)) + if not path.exists(extra_path): + logger.warning(__('html_extra_path entry %r does not exist'), entry) + config.html_extra_path.remove(entry) + elif (path.splitdrive(app.outdir)[0] == path.splitdrive(extra_path)[0] and + path.commonpath([app.outdir, extra_path]) == app.outdir): + logger.warning(__('html_extra_path entry %r is placed inside outdir'), entry) + config.html_extra_path.remove(entry) + + +def validate_html_static_path(app: Sphinx, config: Config) -> None: + """Check html_static_paths setting.""" + for entry in config.html_static_path[:]: + static_path = path.normpath(path.join(app.confdir, entry)) + if not path.exists(static_path): + logger.warning(__('html_static_path entry %r does not exist'), entry) + config.html_static_path.remove(entry) + elif (path.splitdrive(app.outdir)[0] == path.splitdrive(static_path)[0] and + path.commonpath([app.outdir, static_path]) == app.outdir): + logger.warning(__('html_static_path entry %r is placed inside outdir'), entry) + config.html_static_path.remove(entry) + + +def validate_html_logo(app: Sphinx, config: Config) -> None: + """Check html_logo setting.""" + if config.html_logo and not path.isfile(path.join(app.confdir, config.html_logo)): + logger.warning(__('logo file %r does not exist'), config.html_logo) + config.html_logo = None # type: ignore + + +def validate_html_favicon(app: Sphinx, config: Config) -> None: + """Check html_favicon setting.""" + if config.html_favicon and not path.isfile(path.join(app.confdir, config.html_favicon)): + logger.warning(__('favicon file %r does not exist'), config.html_favicon) + config.html_favicon = None # type: ignore + + +# for compatibility +import sphinx.builders.dirhtml # NOQA +import sphinx.builders.singlehtml # NOQA +import sphinxcontrib.serializinghtml # NOQA + + +def setup(app: Sphinx) -> Dict[str, Any]: + # builders + app.add_builder(StandaloneHTMLBuilder) + + # config values + app.add_config_value('html_theme', 'alabaster', 'html') + app.add_config_value('html_theme_path', [], 'html') + app.add_config_value('html_theme_options', {}, 'html') + app.add_config_value('html_title', + lambda self: _('%s %s documentation') % (self.project, self.release), + 'html', [str]) + app.add_config_value('html_short_title', lambda self: self.html_title, 'html') + app.add_config_value('html_style', None, 'html', [str]) + app.add_config_value('html_logo', None, 'html', [str]) + app.add_config_value('html_favicon', None, 'html', [str]) + app.add_config_value('html_css_files', [], 'html') + app.add_config_value('html_js_files', [], 'html') + app.add_config_value('html_static_path', [], 'html') + app.add_config_value('html_extra_path', [], 'html') + app.add_config_value('html_last_updated_fmt', None, 'html', [str]) + app.add_config_value('html_sidebars', {}, 'html') + app.add_config_value('html_additional_pages', {}, 'html') + app.add_config_value('html_domain_indices', True, 'html', [list]) + app.add_config_value('html_add_permalinks', '¶', 'html') + app.add_config_value('html_use_index', True, 'html') + app.add_config_value('html_split_index', False, 'html') + app.add_config_value('html_copy_source', True, 'html') + app.add_config_value('html_show_sourcelink', True, 'html') + app.add_config_value('html_sourcelink_suffix', '.txt', 'html') + app.add_config_value('html_use_opensearch', '', 'html') + app.add_config_value('html_file_suffix', None, 'html', [str]) + app.add_config_value('html_link_suffix', None, 'html', [str]) + app.add_config_value('html_show_copyright', True, 'html') + app.add_config_value('html_show_sphinx', True, 'html') + app.add_config_value('html_context', {}, 'html') + app.add_config_value('html_output_encoding', 'utf-8', 'html') + app.add_config_value('html_compact_lists', True, 'html') + app.add_config_value('html_secnumber_suffix', '. ', 'html') + app.add_config_value('html_search_language', None, 'html', [str]) + app.add_config_value('html_search_options', {}, 'html') + app.add_config_value('html_search_scorer', '', None) + app.add_config_value('html_scaled_image_link', True, 'html') + app.add_config_value('html_baseurl', '', 'html') + app.add_config_value('html_math_renderer', None, 'env') + app.add_config_value('html4_writer', False, 'html') + + # event handlers + app.connect('config-inited', convert_html_css_files) + app.connect('config-inited', convert_html_js_files) + app.connect('config-inited', validate_html_extra_path) + app.connect('config-inited', validate_html_static_path) + app.connect('config-inited', validate_html_logo) + app.connect('config-inited', validate_html_favicon) + app.connect('builder-inited', validate_math_renderer) + app.connect('html-page-context', setup_js_tag_helper) + + # load default math renderer + app.setup_extension('sphinx.ext.mathjax') + + return { + 'version': 'builtin', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/docs/api_cpp/_custom/sphinx_writer_html5 b/docs/api_cpp/_custom/sphinx_writer_html5 new file mode 100644 index 0000000000000000000000000000000000000000..7e049217550723a33bab54693eab74c6ead203fe --- /dev/null +++ b/docs/api_cpp/_custom/sphinx_writer_html5 @@ -0,0 +1,817 @@ +""" + sphinx.writers.html5 + ~~~~~~~~~~~~~~~~~~~~ + + Experimental docutils writers for HTML5 handling Sphinx' custom nodes. + + :copyright: Copyright 2007-2020 by the Sphinx team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import os +import posixpath +import sys +import warnings +from typing import Any, Iterable, Tuple +from typing import cast + +from docutils import nodes +from docutils.nodes import Element, Node, Text +from docutils.writers.html5_polyglot import HTMLTranslator as BaseTranslator + +from sphinx import addnodes +from sphinx.builders import Builder +from sphinx.deprecation import RemovedInSphinx30Warning, RemovedInSphinx40Warning +from sphinx.locale import admonitionlabels, _, __ +from sphinx.util import logging +from sphinx.util.docutils import SphinxTranslator +from sphinx.util.images import get_image_size + +if False: + # For type annotation + from sphinx.builders.html import StandaloneHTMLBuilder + + +logger = logging.getLogger(__name__) + +# A good overview of the purpose behind these classes can be found here: +# http://www.arnebrodowski.de/blog/write-your-own-restructuredtext-writer.html + + +class HTML5Translator(SphinxTranslator, BaseTranslator): + """ + Our custom HTML translator. + """ + + builder = None # type: StandaloneHTMLBuilder + + def __init__(self, *args: Any) -> None: + if isinstance(args[0], nodes.document) and isinstance(args[1], Builder): + document, builder = args + else: + warnings.warn('The order of arguments for HTML5Translator has been changed. ' + 'Please give "document" as 1st and "builder" as 2nd.', + RemovedInSphinx40Warning, stacklevel=2) + builder, document = args + super().__init__(document, builder) + + self.highlighter = self.builder.highlighter + self.docnames = [self.builder.current_docname] # for singlehtml builder + self.manpages_url = self.config.manpages_url + self.protect_literal_text = 0 + self.permalink_text = self.config.html_add_permalinks + # support backwards-compatible setting to a bool + if not isinstance(self.permalink_text, str): + self.permalink_text = '¶' if self.permalink_text else '' + self.permalink_text = self.encode(self.permalink_text) + self.secnumber_suffix = self.config.html_secnumber_suffix + self.param_separator = '' + self.optional_param_level = 0 + self._table_row_index = 0 + self._fieldlist_row_index = 0 + self.required_params_left = 0 + + def visit_start_of_file(self, node: Element) -> None: + # only occurs in the single-file builder + self.docnames.append(node['docname']) + self.body.append('' % node['docname']) + + def depart_start_of_file(self, node: Element) -> None: + self.docnames.pop() + + def visit_desc(self, node: Element) -> None: + self.body.append(self.starttag(node, 'dl', CLASS=node['objtype'])) + + def depart_desc(self, node: Element) -> None: + self.body.append('\n\n') + + def visit_desc_signature(self, node: Element) -> None: + # the id is set automatically + self.body.append(self.starttag(node, 'dt')) + # anchor for per-desc interactive data + if node.parent['objtype'] != 'describe' \ + and node['ids'] and node['first']: + self.body.append('' % node['ids'][0]) + + def depart_desc_signature(self, node: Element) -> None: + if not node.get('is_multiline'): + self.add_permalink_ref(node, _('Permalink to this definition')) + self.body.append('\n') + + def visit_desc_signature_line(self, node: Element) -> None: + pass + + def depart_desc_signature_line(self, node: Element) -> None: + if node.get('add_permalink'): + # the permalink info is on the parent desc_signature node + self.add_permalink_ref(node.parent, _('Permalink to this definition')) + self.body.append('
') + + def visit_desc_addname(self, node: Element) -> None: + self.body.append(self.starttag(node, 'code', '', CLASS='sig-prename descclassname')) + + def depart_desc_addname(self, node: Element) -> None: + self.body.append('') + + def visit_desc_type(self, node: Element) -> None: + pass + + def depart_desc_type(self, node: Element) -> None: + pass + + def visit_desc_returns(self, node: Element) -> None: + self.body.append(' → ') + + def depart_desc_returns(self, node: Element) -> None: + pass + + def visit_desc_name(self, node: Element) -> None: + self.body.append(self.starttag(node, 'code', '', CLASS='sig-name descname')) + + def depart_desc_name(self, node: Element) -> None: + self.body.append('') + + def visit_desc_parameterlist(self, node: Element) -> None: + self.body.append('(') + self.first_param = 1 + self.optional_param_level = 0 + # How many required parameters are left. + self.required_params_left = sum([isinstance(c, addnodes.desc_parameter) + for c in node.children]) + self.param_separator = node.child_text_separator + + # fix error of extra space for C++ API. + if "'', ' ', ''" in str(self.body)[1:-1]: + for num, i in enumerate(self.body[:-2]): + if ['', ' '] == self.body[num:num+2]: + self.body[num:num + 2] = [' ', ' ',] + + def depart_desc_parameterlist(self, node: Element) -> None: + self.body.append(')') + + # If required parameters are still to come, then put the comma after + # the parameter. Otherwise, put the comma before. This ensures that + # signatures like the following render correctly (see issue #1001): + # + # foo([a, ]b, c[, d]) + # + def visit_desc_parameter(self, node: Element) -> None: + if self.first_param: + self.first_param = 0 + elif not self.required_params_left: + self.body.append(self.param_separator) + if self.optional_param_level == 0: + self.required_params_left -= 1 + if not node.hasattr('noemph'): + self.body.append('') + + # fix error of extra space for C++ API. + if "'', ' ', ''" in str(self.body)[1:-1]: + for num, i in enumerate(self.body[:-2]): + if ['', ' '] == self.body[num:num+2]: + self.body[num:num + 2] = [' ', ' ',] + + def depart_desc_parameter(self, node: Element) -> None: + if not node.hasattr('noemph'): + self.body.append('') + if self.required_params_left: + self.body.append(self.param_separator) + + def visit_desc_optional(self, node: Element) -> None: + self.optional_param_level += 1 + self.body.append('[') + + def depart_desc_optional(self, node: Element) -> None: + self.optional_param_level -= 1 + self.body.append(']') + + def visit_desc_annotation(self, node: Element) -> None: + self.body.append(self.starttag(node, 'em', '', CLASS='property')) + + def depart_desc_annotation(self, node: Element) -> None: + self.body.append('') + + def visit_desc_content(self, node: Element) -> None: + self.body.append(self.starttag(node, 'dd', '')) + + def depart_desc_content(self, node: Element) -> None: + self.body.append('') + + def visit_versionmodified(self, node: Element) -> None: + self.body.append(self.starttag(node, 'div', CLASS=node['type'])) + + def depart_versionmodified(self, node: Element) -> None: + self.body.append('\n') + + # overwritten + def visit_reference(self, node: Element) -> None: + atts = {'class': 'reference'} + if node.get('internal') or 'refuri' not in node: + atts['class'] += ' internal' + else: + atts['class'] += ' external' + if 'refuri' in node: + atts['href'] = node['refuri'] or '#' + if self.settings.cloak_email_addresses and atts['href'].startswith('mailto:'): + atts['href'] = self.cloak_mailto(atts['href']) + self.in_mailto = True + else: + assert 'refid' in node, \ + 'References must have "refuri" or "refid" attribute.' + atts['href'] = '#' + node['refid'] + if not isinstance(node.parent, nodes.TextElement): + assert len(node) == 1 and isinstance(node[0], nodes.image) + atts['class'] += ' image-reference' + if 'reftitle' in node: + atts['title'] = node['reftitle'] + if 'target' in node: + atts['target'] = node['target'] + self.body.append(self.starttag(node, 'a', '', **atts)) + + if node.get('secnumber'): + self.body.append(('%s' + self.secnumber_suffix) % + '.'.join(map(str, node['secnumber']))) + + def visit_number_reference(self, node: Element) -> None: + self.visit_reference(node) + + def depart_number_reference(self, node: Element) -> None: + self.depart_reference(node) + + # overwritten -- we don't want source comments to show up in the HTML + def visit_comment(self, node: Element) -> None: # type: ignore + raise nodes.SkipNode + + # overwritten + def visit_admonition(self, node: Element, name: str = '') -> None: + self.body.append(self.starttag( + node, 'div', CLASS=('admonition ' + name))) + if name: + node.insert(0, nodes.title(name, admonitionlabels[name])) + + def visit_seealso(self, node: Element) -> None: + self.visit_admonition(node, 'seealso') + + def depart_seealso(self, node: Element) -> None: + self.depart_admonition(node) + + def get_secnumber(self, node: Element) -> Tuple[int, ...]: + if node.get('secnumber'): + return node['secnumber'] + + if isinstance(node.parent, nodes.section): + if self.builder.name == 'singlehtml': + docname = self.docnames[-1] + anchorname = "%s/#%s" % (docname, node.parent['ids'][0]) + if anchorname not in self.builder.secnumbers: + anchorname = "%s/" % docname # try first heading which has no anchor + else: + anchorname = '#' + node.parent['ids'][0] + if anchorname not in self.builder.secnumbers: + anchorname = '' # try first heading which has no anchor + + if self.builder.secnumbers.get(anchorname): + return self.builder.secnumbers[anchorname] + + return None + + def add_secnumber(self, node: Element) -> None: + secnumber = self.get_secnumber(node) + if secnumber: + self.body.append('%s' % + ('.'.join(map(str, secnumber)) + self.secnumber_suffix)) + + def add_fignumber(self, node: Element) -> None: + def append_fignumber(figtype: str, figure_id: str) -> None: + if self.builder.name == 'singlehtml': + key = "%s/%s" % (self.docnames[-1], figtype) + else: + key = figtype + + if figure_id in self.builder.fignumbers.get(key, {}): + self.body.append('') + prefix = self.builder.config.numfig_format.get(figtype) + if prefix is None: + msg = __('numfig_format is not defined for %s') % figtype + logger.warning(msg) + else: + numbers = self.builder.fignumbers[key][figure_id] + self.body.append(prefix % '.'.join(map(str, numbers)) + ' ') + self.body.append('') + + figtype = self.builder.env.domains['std'].get_enumerable_node_type(node) + if figtype: + if len(node['ids']) == 0: + msg = __('Any IDs not assigned for %s node') % node.tagname + logger.warning(msg, location=node) + else: + append_fignumber(figtype, node['ids'][0]) + + def add_permalink_ref(self, node: Element, title: str) -> None: + if node['ids'] and self.permalink_text and self.builder.add_permalinks: + format = '%s' + self.body.append(format % (node['ids'][0], title, self.permalink_text)) + + # overwritten + def visit_bullet_list(self, node: Element) -> None: + if len(node) == 1 and isinstance(node[0], addnodes.toctree): + # avoid emitting empty + raise nodes.SkipNode + super().visit_bullet_list(node) + + # overwritten + def visit_definition(self, node: Element) -> None: + # don't insert here. + self.body.append(self.starttag(node, 'dd', '')) + + # overwritten + def depart_definition(self, node: Element) -> None: + self.body.append('\n') + + # overwritten + def visit_classifier(self, node: Element) -> None: + self.body.append(self.starttag(node, 'span', '', CLASS='classifier')) + + # overwritten + def depart_classifier(self, node: Element) -> None: + self.body.append('') + + next_node = node.next_node(descend=False, siblings=True) # type: Node + if not isinstance(next_node, nodes.classifier): + # close `
` tag at the tail of classifiers + self.body.append('
') + + # overwritten + def visit_term(self, node: Element) -> None: + self.body.append(self.starttag(node, 'dt', '')) + + # overwritten + def depart_term(self, node: Element) -> None: + next_node = node.next_node(descend=False, siblings=True) # type: Node + if isinstance(next_node, nodes.classifier): + # Leave the end tag to `self.depart_classifier()`, in case + # there's a classifier. + pass + else: + self.body.append('') + + # overwritten + def visit_title(self, node: Element) -> None: + super().visit_title(node) + self.add_secnumber(node) + self.add_fignumber(node.parent) + if isinstance(node.parent, nodes.table): + self.body.append('') + + def depart_title(self, node: Element) -> None: + close_tag = self.context[-1] + if (self.permalink_text and self.builder.add_permalinks and + node.parent.hasattr('ids') and node.parent['ids']): + # add permalink anchor + if close_tag.startswith('%s' % ( + _('Permalink to this headline'), + self.permalink_text)) + elif isinstance(node.parent, nodes.table): + self.body.append('') + self.add_permalink_ref(node.parent, _('Permalink to this table')) + elif isinstance(node.parent, nodes.table): + self.body.append('') + + super().depart_title(node) + + # overwritten + def visit_literal_block(self, node: Element) -> None: + if node.rawsource != node.astext(): + # most probably a parsed-literal block -- don't highlight + return super().visit_literal_block(node) + + lang = node.get('language', 'default') + linenos = node.get('linenos', False) + highlight_args = node.get('highlight_args', {}) + highlight_args['force'] = node.get('force', False) + if lang is self.builder.config.highlight_language: + # only pass highlighter options for original language + opts = self.builder.config.highlight_options + else: + opts = {} + + highlighted = self.highlighter.highlight_block( + node.rawsource, lang, opts=opts, linenos=linenos, + location=(self.builder.current_docname, node.line), **highlight_args + ) + starttag = self.starttag(node, 'div', suffix='', + CLASS='highlight-%s notranslate' % lang) + self.body.append(starttag + highlighted + '\n') + raise nodes.SkipNode + + def visit_caption(self, node: Element) -> None: + if isinstance(node.parent, nodes.container) and node.parent.get('literal_block'): + self.body.append('
') + else: + super().visit_caption(node) + self.add_fignumber(node.parent) + self.body.append(self.starttag(node, 'span', '', CLASS='caption-text')) + + def depart_caption(self, node: Element) -> None: + self.body.append('') + + # append permalink if available + if isinstance(node.parent, nodes.container) and node.parent.get('literal_block'): + self.add_permalink_ref(node.parent, _('Permalink to this code')) + elif isinstance(node.parent, nodes.figure): + self.add_permalink_ref(node.parent, _('Permalink to this image')) + elif node.parent.get('toctree'): + self.add_permalink_ref(node.parent.parent, _('Permalink to this toctree')) + + if isinstance(node.parent, nodes.container) and node.parent.get('literal_block'): + self.body.append('
\n') + else: + super().depart_caption(node) + + def visit_doctest_block(self, node: Element) -> None: + self.visit_literal_block(node) + + # overwritten to add the
(for XHTML compliance) + def visit_block_quote(self, node: Element) -> None: + self.body.append(self.starttag(node, 'blockquote') + '
') + + def depart_block_quote(self, node: Element) -> None: + self.body.append('
\n') + + # overwritten + def visit_literal(self, node: Element) -> None: + if 'kbd' in node['classes']: + self.body.append(self.starttag(node, 'kbd', '', + CLASS='docutils literal notranslate')) + else: + self.body.append(self.starttag(node, 'code', '', + CLASS='docutils literal notranslate')) + self.protect_literal_text += 1 + + def depart_literal(self, node: Element) -> None: + if 'kbd' in node['classes']: + self.body.append('') + else: + self.protect_literal_text -= 1 + self.body.append('') + + def visit_productionlist(self, node: Element) -> None: + self.body.append(self.starttag(node, 'pre')) + names = [] + productionlist = cast(Iterable[addnodes.production], node) + for production in productionlist: + names.append(production['tokenname']) + maxlen = max(len(name) for name in names) + lastname = None + for production in productionlist: + if production['tokenname']: + lastname = production['tokenname'].ljust(maxlen) + self.body.append(self.starttag(production, 'strong', '')) + self.body.append(lastname + ' ::= ') + elif lastname is not None: + self.body.append('%s ' % (' ' * len(lastname))) + production.walkabout(self) + self.body.append('\n') + self.body.append('\n') + raise nodes.SkipNode + + def depart_productionlist(self, node: Element) -> None: + pass + + def visit_production(self, node: Element) -> None: + pass + + def depart_production(self, node: Element) -> None: + pass + + def visit_centered(self, node: Element) -> None: + self.body.append(self.starttag(node, 'p', CLASS="centered") + + '') + + def depart_centered(self, node: Element) -> None: + self.body.append('

') + + def visit_compact_paragraph(self, node: Element) -> None: + pass + + def depart_compact_paragraph(self, node: Element) -> None: + pass + + def visit_download_reference(self, node: Element) -> None: + atts = {'class': 'reference download', + 'download': ''} + + if not self.builder.download_support: + self.context.append('') + elif 'refuri' in node: + atts['class'] += ' external' + atts['href'] = node['refuri'] + self.body.append(self.starttag(node, 'a', '', **atts)) + self.context.append('
') + elif 'filename' in node: + atts['class'] += ' internal' + atts['href'] = posixpath.join(self.builder.dlpath, node['filename']) + self.body.append(self.starttag(node, 'a', '', **atts)) + self.context.append('') + else: + self.context.append('') + + def depart_download_reference(self, node: Element) -> None: + self.body.append(self.context.pop()) + + # overwritten + def visit_image(self, node: Element) -> None: + olduri = node['uri'] + # rewrite the URI if the environment knows about it + if olduri in self.builder.images: + node['uri'] = posixpath.join(self.builder.imgpath, + self.builder.images[olduri]) + + if 'scale' in node: + # Try to figure out image height and width. Docutils does that too, + # but it tries the final file name, which does not necessarily exist + # yet at the time the HTML file is written. + if not ('width' in node and 'height' in node): + size = get_image_size(os.path.join(self.builder.srcdir, olduri)) + if size is None: + logger.warning(__('Could not obtain image size. :scale: option is ignored.'), # NOQA + location=node) + else: + if 'width' not in node: + node['width'] = str(size[0]) + if 'height' not in node: + node['height'] = str(size[1]) + + uri = node['uri'] + if uri.lower().endswith(('svg', 'svgz')): + atts = {'src': uri} + if 'width' in node: + atts['width'] = node['width'] + if 'height' in node: + atts['height'] = node['height'] + if 'scale' in node: + scale = node['scale'] / 100.0 + if 'width' in atts: + atts['width'] = int(atts['width']) * scale + if 'height' in atts: + atts['height'] = int(atts['height']) * scale + atts['alt'] = node.get('alt', uri) + if 'align' in node: + atts['class'] = 'align-%s' % node['align'] + self.body.append(self.emptytag(node, 'img', '', **atts)) + return + + super().visit_image(node) + + # overwritten + def depart_image(self, node: Element) -> None: + if node['uri'].lower().endswith(('svg', 'svgz')): + pass + else: + super().depart_image(node) + + def visit_toctree(self, node: Element) -> None: + # this only happens when formatting a toc from env.tocs -- in this + # case we don't want to include the subtree + raise nodes.SkipNode + + def visit_index(self, node: Element) -> None: + raise nodes.SkipNode + + def visit_tabular_col_spec(self, node: Element) -> None: + raise nodes.SkipNode + + def visit_glossary(self, node: Element) -> None: + pass + + def depart_glossary(self, node: Element) -> None: + pass + + def visit_acks(self, node: Element) -> None: + pass + + def depart_acks(self, node: Element) -> None: + pass + + def visit_hlist(self, node: Element) -> None: + self.body.append('') + + def depart_hlist(self, node: Element) -> None: + self.body.append('
\n') + + def visit_hlistcol(self, node: Element) -> None: + self.body.append('') + + def depart_hlistcol(self, node: Element) -> None: + self.body.append('') + + # overwritten + def visit_Text(self, node: Text) -> None: + text = node.astext() + encoded = self.encode(text) + if self.protect_literal_text: + # moved here from base class's visit_literal to support + # more formatting in literal nodes + for token in self.words_and_spaces.findall(encoded): + if token.strip(): + # protect literal text from line wrapping + self.body.append('%s' % token) + elif token in ' \n': + # allow breaks at whitespace + self.body.append(token) + else: + # protect runs of multiple spaces; the last one can wrap + self.body.append(' ' * (len(token) - 1) + ' ') + else: + if self.in_mailto and self.settings.cloak_email_addresses: + encoded = self.cloak_email(encoded) + self.body.append(encoded) + + def visit_note(self, node: Element) -> None: + self.visit_admonition(node, 'note') + + def depart_note(self, node: Element) -> None: + self.depart_admonition(node) + + def visit_warning(self, node: Element) -> None: + self.visit_admonition(node, 'warning') + + def depart_warning(self, node: Element) -> None: + self.depart_admonition(node) + + def visit_attention(self, node: Element) -> None: + self.visit_admonition(node, 'attention') + + def depart_attention(self, node: Element) -> None: + self.depart_admonition(node) + + def visit_caution(self, node: Element) -> None: + self.visit_admonition(node, 'caution') + + def depart_caution(self, node: Element) -> None: + self.depart_admonition(node) + + def visit_danger(self, node: Element) -> None: + self.visit_admonition(node, 'danger') + + def depart_danger(self, node: Element) -> None: + self.depart_admonition(node) + + def visit_error(self, node: Element) -> None: + self.visit_admonition(node, 'error') + + def depart_error(self, node: Element) -> None: + self.depart_admonition(node) + + def visit_hint(self, node: Element) -> None: + self.visit_admonition(node, 'hint') + + def depart_hint(self, node: Element) -> None: + self.depart_admonition(node) + + def visit_important(self, node: Element) -> None: + self.visit_admonition(node, 'important') + + def depart_important(self, node: Element) -> None: + self.depart_admonition(node) + + def visit_tip(self, node: Element) -> None: + self.visit_admonition(node, 'tip') + + def depart_tip(self, node: Element) -> None: + self.depart_admonition(node) + + def visit_literal_emphasis(self, node: Element) -> None: + return self.visit_emphasis(node) + + def depart_literal_emphasis(self, node: Element) -> None: + return self.depart_emphasis(node) + + def visit_literal_strong(self, node: Element) -> None: + return self.visit_strong(node) + + def depart_literal_strong(self, node: Element) -> None: + return self.depart_strong(node) + + def visit_abbreviation(self, node: Element) -> None: + attrs = {} + if node.hasattr('explanation'): + attrs['title'] = node['explanation'] + self.body.append(self.starttag(node, 'abbr', '', **attrs)) + + def depart_abbreviation(self, node: Element) -> None: + self.body.append('') + + def visit_manpage(self, node: Element) -> None: + self.visit_literal_emphasis(node) + if self.manpages_url: + node['refuri'] = self.manpages_url.format(**node.attributes) + self.visit_reference(node) + + def depart_manpage(self, node: Element) -> None: + if self.manpages_url: + self.depart_reference(node) + self.depart_literal_emphasis(node) + + # overwritten to add even/odd classes + + def generate_targets_for_table(self, node: Element) -> None: + """Generate hyperlink targets for tables. + + Original visit_table() generates hyperlink targets inside table tags + () if multiple IDs are assigned to listings. + That is invalid DOM structure. (This is a bug of docutils <= 0.13.1) + + This exports hyperlink targets before tables to make valid DOM structure. + """ + for id in node['ids'][1:]: + self.body.append('' % id) + node['ids'].remove(id) + + def visit_table(self, node: Element) -> None: + self.generate_targets_for_table(node) + + self._table_row_index = 0 + + classes = [cls.strip(' \t\n') for cls in self.settings.table_style.split(',')] + classes.insert(0, "docutils") # compat + if 'align' in node: + classes.append('align-%s' % node['align']) + tag = self.starttag(node, 'table', CLASS=' '.join(classes)) + self.body.append(tag) + + def visit_row(self, node: Element) -> None: + self._table_row_index += 1 + if self._table_row_index % 2 == 0: + node['classes'].append('row-even') + else: + node['classes'].append('row-odd') + self.body.append(self.starttag(node, 'tr', '')) + node.column = 0 # type: ignore + + def visit_field_list(self, node: Element) -> None: + self._fieldlist_row_index = 0 + return super().visit_field_list(node) + + def visit_field(self, node: Element) -> None: + self._fieldlist_row_index += 1 + if self._fieldlist_row_index % 2 == 0: + node['classes'].append('field-even') + else: + node['classes'].append('field-odd') + + def visit_math(self, node: Element, math_env: str = '') -> None: + name = self.builder.math_renderer_name + visit, _ = self.builder.app.registry.html_inline_math_renderers[name] + visit(self, node) + + def depart_math(self, node: Element, math_env: str = '') -> None: + name = self.builder.math_renderer_name + _, depart = self.builder.app.registry.html_inline_math_renderers[name] + if depart: + depart(self, node) + + def visit_math_block(self, node: Element, math_env: str = '') -> None: + name = self.builder.math_renderer_name + visit, _ = self.builder.app.registry.html_block_math_renderers[name] + visit(self, node) + + def depart_math_block(self, node: Element, math_env: str = '') -> None: + name = self.builder.math_renderer_name + _, depart = self.builder.app.registry.html_block_math_renderers[name] + if depart: + depart(self, node) + + def unknown_visit(self, node: Node) -> None: + raise NotImplementedError('Unknown node: ' + node.__class__.__name__) + + # --------- METHODS FOR COMPATIBILITY -------------------------------------- + + @property + def highlightlang(self) -> str: + warnings.warn('HTMLTranslator.highlightlang is deprecated.', + RemovedInSphinx30Warning, stacklevel=2) + return self.builder.config.highlight_language + + @property + def highlightlang_base(self) -> str: + warnings.warn('HTMLTranslator.highlightlang_base is deprecated.', + RemovedInSphinx30Warning, stacklevel=2) + return self.builder.config.highlight_language + + @property + def highlightopts(self) -> str: + warnings.warn('HTMLTranslator.highlightopts is deprecated.', + RemovedInSphinx30Warning, stacklevel=2) + return self.builder.config.highlight_options + + @property + def highlightlinenothreshold(self) -> int: + warnings.warn('HTMLTranslator.highlightlinenothreshold is deprecated.', + RemovedInSphinx30Warning, stacklevel=2) + return sys.maxsize diff --git a/docs/api_cpp/requirements.txt b/docs/api_cpp/requirements.txt index 862d1f4b12dea3d07ebd6e49558a09d9270fafc4..beb60acbe3ca97d814509fff2f2aef74f7e54b1b 100644 --- a/docs/api_cpp/requirements.txt +++ b/docs/api_cpp/requirements.txt @@ -1,8 +1,8 @@ -sphinx >= 2.2.1, <= 2.4.4 -recommonmark +sphinx == 2.4.4 +myst_parser == 0.14.0 sphinx-markdown-tables sphinx_rtd_theme exhale == 0.2.3 -breathe +breathe == 4.13.0 numpy jieba diff --git a/docs/api_cpp/source_en/conf.py b/docs/api_cpp/source_en/conf.py index d4d9a317a1072a1975703a430f1f2bf87499c3a6..780d4cc1ab248b9361c1ccdd264266195aa2b90b 100644 --- a/docs/api_cpp/source_en/conf.py +++ b/docs/api_cpp/source_en/conf.py @@ -35,7 +35,7 @@ release = 'master' # ones. extensions = [ 'sphinx_markdown_tables', - 'recommonmark', + 'myst_parser', 'breathe', 'exhale', 'sphinx.ext.mathjax', @@ -90,7 +90,7 @@ def specificationsForKind(kind): if kind == "class": return [ ":members:", - ":protected-members:", + # ":protected-members:", ":private-members:" ] else: @@ -161,6 +161,22 @@ with open("../_custom/graph", "r", encoding="utf8") as f: exec(source_code, exh_graph.__dict__) +# fix error of extra space for C++ API. +from sphinx.writers import html5 as sphinx_writer_html5 + +with open("../_custom/sphinx_writer_html5", "r", encoding="utf8") as f: + source_code = f.read() + +exec(source_code, sphinx_writer_html5.__dict__) + +# fix position of "Return" for C++ API. +from sphinx.builders import html as sphinx_builder_html + +with open("../_custom/sphinx_builder_html", "r", encoding="utf8") as f: + source_code = f.read() + +exec(source_code, sphinx_builder_html.__dict__) + # Copy sourcefiles from mindspore repository. ms_path = os.getenv("MS_PATH") if os.path.exists("../include"): diff --git a/docs/api_cpp/source_en/lite.md b/docs/api_cpp/source_en/lite.md index e617ed943d6f984240fb0b025ea83b934565b4b8..ba90124f2033bcb8e762efb73873531d5c21a7cd 100644 --- a/docs/api_cpp/source_en/lite.md +++ b/docs/api_cpp/source_en/lite.md @@ -252,13 +252,37 @@ An **enum** type. Defaults to **DT_CPU**. DeviceType is defined for holding device_info_ ``` -An **union** value, contains [**CpuDeviceInfo**](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#cpudeviceinfo) , [**GpuDeviceInfo**](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#gpudeviceinfo) and [**NpuDeviceInfo**](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#npudeviceinfo) . +An **struct** value, contains [**CpuDeviceInfo**](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#cpudeviceinfo), [**GpuDeviceInfo**](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#gpudeviceinfo) and [**NpuDeviceInfo**](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#npudeviceinfo). + +#### provider_ + +```cpp +std::string provider_ +``` + +Provider's name, need to be assigned when using registry kernel. + +#### provider_device_ + +``` cpp +std::string provider_device_ +``` + +Provider's device, need to be assigned when using registry kernel. + +#### allocator_ + +``` cpp +AllocatorPtr allocator_ +``` + +Optional, when registry kernel is not running on cpu [Allocator](https://mindspore.cn/doc/api_cpp/en/master/lite.html#allocator) is needed. ## DeviceInfo \#include <[context.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/context.h)> -An **union** value. DeviceInfo is defined for backend's configuration information. +An **struct** value. DeviceInfo is defined for backend's configuration information. ### Public Attributes diff --git a/docs/api_cpp/source_en/namespace/mindspore_dataset.rst b/docs/api_cpp/source_en/namespace/mindspore_dataset.rst index abecd5efffbf27b4fb00d241f31e5bf9d3f0953d..f5185a93bfc120126b74d21528817b58898715be 100644 --- a/docs/api_cpp/source_en/namespace/mindspore_dataset.rst +++ b/docs/api_cpp/source_en/namespace/mindspore_dataset.rst @@ -237,6 +237,8 @@ Constants Others ------ +This section contains some predefined classes related to Dataset operations, tool functions, and some Typedefs. + Classes ^^^^^^^ diff --git a/docs/api_cpp/source_en/session.md b/docs/api_cpp/source_en/session.md index 4b602201032c1049214985511503068a72bbb183..5de269739cf1ca124cf1ef157953e31e70938f96 100644 --- a/docs/api_cpp/source_en/session.md +++ b/docs/api_cpp/source_en/session.md @@ -180,6 +180,156 @@ Resize inputs shape. STATUS as an error code of resize inputs, STATUS is defined in [errorcode.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/errorcode.h). +#### Train + +```cpp +virtual int Train() = 0; +``` + +Set model to train mode. + +- Returns + + STATUS as an error code of compiling graph, STATUS is defined in [errorcode.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/errorcode.h). + +#### IsTrain + +```cpp +bool IsTrain() { return train_mode_ == true; } +``` + +Check whether the current model is under the train mode. + +- Returns + + Boolean indication if model is in train mode. + +#### Eval + +```cpp +virtual int Eval() = 0; +``` + +Set model to eval mode. + +- Returns + + STATUS as an error code of compiling graph, STATUS is defined in [errorcode.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/errorcode.h). + +#### IsEval + +```cpp +bool IsEval() { return train_mode_ == false; } +``` + +Check mode of model. + +- Returns + + Boolean indication if model is in eval mode. + +#### SetLearningRate + +```cpp +virtual int SetLearningRate(float learning_rate) = 0; +``` + +Set the learning rate for the current model. + +- Returns + + 0 represents success or -1 in case of error. + +#### GetLearningRate + +```cpp +virtual float GetLearningRate() = 0; +``` + +Get the learning rate of the current model. + +- Returns + + The learning rate of the current model, default is 0.0. + +#### SetupVirtualBatch + +```cpp +virtual int SetupVirtualBatch(int virtual_batch_multiplier, float lr = -1.0f, float momentum = -1.0f) = 0; +``` + +Customize the virtual batch size, in order to reduce memory consumption. + +- Parameters + + - `virtual_batch_multiplier`: virtual batch number. + - `lr`: learning rate. + - `momentum`: momentum. + +- Returns + + 0 represents success or -1 in case of error. + +#### GetPredictions + +```cpp +virtual std::vector GetPredictions() const = 0; +``` + +Get the predicting result of the trained model. + +- Returns + + Return the pointer vector of prediction results. + +#### Export + +```cpp +virtual int (const std::string &file_name, lite::ModelType model_type = lite::MT_TRAIN, + lite::QuantizationType quant_type = lite::QT_DEFAULT, lite::FormatType format= lite::FT_FLATBUFFERS) const = 0; +``` + +Save the trained model into a flatbuffer file. + +- Parameters + + - `filename`: Filename of the file to save buffer. + - `model_type`: Model save Type train or inference. + - `quant_type`: Quant type of Model. + - `format`: Model save. + +- Returns + + 0 represents success or -1 in case of error. + +#### GetFeatureMaps + +```cpp + virtual std::vector GetFeatureMaps() const = 0; +``` + +Get the model feature map. + +- Returns + + feature map list + +#### UpdateFeatureMaps + +```cpp + virtual int UpdateFeatureMaps(const std::vector &features) = 0; +``` + +Update model feature map. + +- Parameters + + - `features`: new features. + +- Returns + + STATUS as an error code of compiling graph, STATUS is defined in [errorcode.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/errorcode.h). + ### Static Public Member Functions #### CreateSession @@ -216,6 +366,45 @@ Static method to create a LiteSession pointer. The returned LiteSession pointer Pointer that points to MindSpore Lite LiteSession. +#### CreateTransferSession + +```cpp +static TrainSession *CreateTransferSession(const std::string &filename_backbone, const std::string &filename_head, const lite::Context *context, bool train_mode = false, const lite::TrainCfg *cfg = nullptr); +``` + +Static method that creates the object pointer that points to the transfer learning training session. + +- Parameters + + - `filename_backbone`: File name of the backbone network. + - `filename_head`: File name of the head network. + - `context`: Pointer that points to the target session. + - `train_mode`: Training mode to initialize the Session. + - `cfg`: Config of train session. + +- Returns + + Pointer that points to MindSpore Lite TrainSession. + +#### CreateTrainSession + +```cpp +static LiteSession *CreateTrainSession(const std::string &filename, const lite::Context *context, bool train_mode = false, const lite::TrainCfg *cfg = nullptr); +``` + +Static method to create a TrainSession object. + +- Parameters + + - `filename`: Train model file name. + - `context`: Pointer that points to the target session. + - `train_mode`: Training mode to initialize Session. + - `cfg`: Config of train session. + +- Returns + + Pointer that points to MindSpore Lite TrainSession. + ## TrainLoop \#include <[ltrain_loop.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/train/train_loop.h)> diff --git a/docs/api_cpp/source_en/tensor.md b/docs/api_cpp/source_en/tensor.md index 8ae519f68b82e874f448b8d97a47a04aaef2306b..c354ed9a48a0b7ef5bbbfb58a5a5a7c80a0c52d5 100644 --- a/docs/api_cpp/source_en/tensor.md +++ b/docs/api_cpp/source_en/tensor.md @@ -91,6 +91,8 @@ virtual void *MutableData() const Get the pointer of data in MSTensor. > The data pointer can be used to both write and read data in MSTensor. +> +> If current MSTensor is from opencl GPU, the data will be mapped from Device to Host using MutableData() interface. - Returns diff --git a/docs/api_cpp/source_zh_cn/conf.py b/docs/api_cpp/source_zh_cn/conf.py index 625e5acd3bde751f170596e75261be4bb2bde60f..6688ee7fa6112d57a391a9c4d4362f5077cdf51f 100644 --- a/docs/api_cpp/source_zh_cn/conf.py +++ b/docs/api_cpp/source_zh_cn/conf.py @@ -33,7 +33,7 @@ release = 'master' # ones. extensions = [ 'sphinx_markdown_tables', - 'recommonmark', + 'myst_parser', ] source_suffix = { diff --git a/docs/api_cpp/source_zh_cn/index.rst b/docs/api_cpp/source_zh_cn/index.rst index 779317bee1f0397ac1c5a78905b31b236f33d4f8..7f18dd58161d732b8ceb37326df28b8f050a2027 100644 --- a/docs/api_cpp/source_zh_cn/index.rst +++ b/docs/api_cpp/source_zh_cn/index.rst @@ -18,4 +18,4 @@ MindSpore C++ API session tensor errorcode_and_metatype - lite_cpp_example \ No newline at end of file + lite_cpp_example diff --git a/docs/api_cpp/source_zh_cn/lite.md b/docs/api_cpp/source_zh_cn/lite.md index 65c969ac9f759ea1054567e1163d5cec4d805363..e4db8307b2d65baa2a81c748eb9c155bed64decd 100644 --- a/docs/api_cpp/source_zh_cn/lite.md +++ b/docs/api_cpp/source_zh_cn/lite.md @@ -252,13 +252,37 @@ device_type device_info_ ``` -**union**类型,包含 [**CpuDeviceInfo**](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#cpudeviceinfo) 、 [**GpuDeviceInfo**](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#gpudeviceinfo) 和 [**NpuDeviceInfo**](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#npudeviceinfo) 。 +**struct**类型,包含[**CpuDeviceInfo**](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#cpudeviceinfo)、[**GpuDeviceInfo**](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#gpudeviceinfo)和[**NpuDeviceInfo**](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#npudeviceinfo)。 + +#### provider_ + +``` cpp +std::string provider_ +``` + +产商名,要使用南向自定义算子时需要指定。 + +#### provider_device_ + +``` cpp +std::string provider_device_ +``` + +南向自定义算子要运行的硬件设备。 + +#### allocator_ + +``` cpp +AllocatorPtr allocator_ +``` + +可选,如果南向自定义算子有自有的内存分配方式则需要自定义[Allocator](https://mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#allocator)。 ## DeviceInfo \#include <[context.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/context.h)> -**union**类型,设置不同硬件的环境变量。 +**struct**类型,设置不同硬件的环境变量。 ### 公有属性 diff --git a/docs/api_cpp/source_zh_cn/session.md b/docs/api_cpp/source_zh_cn/session.md index 230f7a273f327cfd134698eff8bd5d6d11e3ee40..65ac03cd7de3a8efced56e4ba326875450b2a254 100644 --- a/docs/api_cpp/source_zh_cn/session.md +++ b/docs/api_cpp/source_zh_cn/session.md @@ -178,6 +178,156 @@ virtual int Resize(const std::vector &inputs, const std::ve STATUS,即编译图的错误码。STATUS在[errorcode.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/errorcode.h)中定义。 +#### Train + +```cpp +virtual int Train() = 0; +``` + +设置为训练模式。 + +- 返回值 + + STATUS,即编译图的错误码。STATUS在[errorcode.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/errorcode.h)中定义。 + +#### IsTrain + +```cpp +bool IsTrain() { return train_mode_ == true; } +``` + +检查当前模型是否为训练模式。 + +- 返回值 + + true 或 false,即当前模型是否为训练模式。 + +#### Eval + +```cpp +virtual int Eval() = 0; +``` + +设置为验证模式。 + +- 返回值 + + STATUS,即编译图的错误码。STATUS在[errorcode.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/errorcode.h)中定义。 + +#### IsEval + +```cpp +bool IsEval() { return train_mode_ == false; } +``` + +检查当前模型是否为验证模式。 + +- 返回值 + + true 或 false,即当前模型是否为验证模式。 + +#### SetLearningRate + +```cpp +virtual int SetLearningRate(float learning_rate) = 0; +``` + +为当前模型设置学习率。 + +- 返回值 + + STATUS,即编译图的错误码。STATUS在[errorcode.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/errorcode.h)中定义。 + +#### GetLearningRate + +```cpp +virtual float GetLearningRate() = 0; +``` + +获取当前模型的学习率。 + +- 返回值 + + 当前模型的学习率, 如果未设置优化器则返回0.0。 + +#### SetupVirtualBatch + +```cpp +virtual int SetupVirtualBatch(int virtual_batch_multiplier, float lr = -1.0f, float momentum = -1.0f) = 0; +``` + +用户自定义虚拟批次数,,用于减少内存消耗。 + +- 参数 + + - `virtual_batch_multiplier`: 自定义虚拟批次数。 + - `lr`: 自定义学习率。 + - `momentum`: 自定义动量。 + +- 返回值 + + STATUS,即编译图的错误码。STATUS在[errorcode.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/errorcode.h)中定义。 + +#### GetPredictions + +```cpp +virtual std::vector GetPredictions() const = 0; +``` + +获取训练模型的预测结果。 + +- 返回值 + + 预测结果张量指针数组。 + +#### Export + +```cpp +virtual int Export(const std::string &file_name, lite::ModelType model_type = lite::MT_TRAIN, + lite::QuantizationType quant_type = lite::QT_DEFAULT, lite::FormatType format= lite::FT_FLATBUFFERS) const = 0; +``` + +保存已训练模型。 + +- 参数 + + - `filename`: 保存模型的文件名。 + - `model_type`: 训练或推理。 + - `quant_type`: 量化类型。 + - `format`: 保存模型格式。 + +- 返回值 + + STATUS,即编译图的错误码。STATUS在[errorcode.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/errorcode.h)中定义。 + +#### GetFeatureMaps + +```cpp + virtual std::vector GetFeatureMaps() const = 0; +``` + +获取训练模型权重。 + +- 返回值 + + 权重列表。 + +#### UpdateFeatureMaps + +```cpp + virtual int UpdateFeatureMaps(const std::vector &features) = 0; +``` + +更新训练模型权重。 + +- 参数 + + - `features`: 新的权重列表。 + +- 返回值 + + STATUS,即编译图的错误码。STATUS在[errorcode.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/errorcode.h)中定义。 + ### 静态公有成员函数 #### CreateSession @@ -214,6 +364,45 @@ static LiteSession *CreateSession(const char *model_buf, size_t size, const lite 指向MindSpore Lite LiteSession的指针。 +#### CreateTransferSession + +```cpp +static TrainSession *CreateTransferSession(const std::string &filename_backbone, const std::string &filename_head, const lite::Context *context, bool train_mode = false, const lite::TrainCfg *cfg = nullptr); +``` + +创建迁移学习训练会话指针的静态方法。 + +- 参数 + + - `filename_backbone`: 主干网络的名称。 + - `filename_head`: 顶层网络的名称。 + - `context`: 指向目标会话的指针。 + - `train_mode`: 是否开启训练模式。 + - `cfg`: 训练相关配置。 + +- 返回值 + + 指向训练会话的指针。 + +#### CreateTrainSession + +```cpp +static LiteSession *CreateTrainSession(const std::string &filename, const lite::Context *context, bool train_mode = false, const lite::TrainCfg *cfg = nullptr); +``` + +创建训练会话指针的静态方法。 + +- 参数 + + - `filename`: 指向文件名称。 + - `context`: 指向会话指针 + - `train_mode`: 是否开启训练模式。 + - `cfg`: 训练相关配置。 + +- 返回值 + + 指向训练会话的指针。 + ## TrainLoop \#include <[ltrain_loop.h](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/include/train/train_loop.h)> @@ -344,7 +533,7 @@ virtual int Train(int epochs, mindspore::dataset::Dataset *dataset, std::vector< virtual int Eval(mindspore::dataset::Dataset *dataset, std::vector cbs, LoadDataFunc load_func = nullptr, int max_steps = INT_MAX) = 0; ``` -执行迭代训练。 +执行推理。 - 参数 diff --git a/docs/api_cpp/source_zh_cn/tensor.md b/docs/api_cpp/source_zh_cn/tensor.md index 9fe8d8b7bfce145d155b404dee9487357feb5c8c..8d596f0a953fc0e0cc214deb6bcd6720b4e79d90 100644 --- a/docs/api_cpp/source_zh_cn/tensor.md +++ b/docs/api_cpp/source_zh_cn/tensor.md @@ -91,6 +91,8 @@ virtual void *MutableData() const 获取MSTensor中的数据的指针。 > 该数据指针可用于对MSTensor中的数据进行读取和写入。 +> +> 如果是opencl的MSTensor,使用MutableData()接口,数据将会从Device映射到Host。 - 返回值 diff --git a/docs/api_java/requirements.txt b/docs/api_java/requirements.txt index 162b50040286bb9a0177801c580a31013082a360..9d90e565b2e637e24cbc98932197ec454fb28d61 100644 --- a/docs/api_java/requirements.txt +++ b/docs/api_java/requirements.txt @@ -1,5 +1,5 @@ sphinx >= 2.2.1, <= 2.4.4 -recommonmark +myst_parser == 0.14.0 sphinx-markdown-tables sphinx_rtd_theme numpy diff --git a/docs/api_java/source_en/conf.py b/docs/api_java/source_en/conf.py index 4020d50f7b5f7a90b26785749cb1d41046b4723c..33a536935b0a43c41875400f9fb676d82f88e950 100644 --- a/docs/api_java/source_en/conf.py +++ b/docs/api_java/source_en/conf.py @@ -33,7 +33,7 @@ release = 'master' # ones. extensions = [ 'sphinx_markdown_tables', - 'recommonmark', + 'myst_parser', ] source_suffix = { diff --git a/docs/api_java/source_en/lite_session.md b/docs/api_java/source_en/lite_session.md index da87e6d4f7291f42da4883d6fab769ce35e2ffa2..2144b2dc69c5eca230cf63f4cb588bbfc33b5b76 100644 --- a/docs/api_java/source_en/lite_session.md +++ b/docs/api_java/source_en/lite_session.md @@ -22,8 +22,17 @@ LiteSession defines session in MindSpore Lite for compiling Model and forwarding | [Map getOutputMapByTensor()](#getoutputmapbytensor) | | [List getOutputTensorNames()](#getoutputtensornames) | | [MSTensor getOutputByTensorName(String tensorName)](#getoutputbytensorname) | -| [boolean resize(List inputs, int[][] dims](#resize) | +| [boolean resize(List inputs, int[][] dims)](#resize) | | [void free()](#free) | +| [boolean export(String modelFilename, int model_type, int quantization_type)](#export) | +| [boolean train()](#train) | +| [boolean eval()](#eval) | +| [boolean isTrain()](#isTrain) | +| [boolean isEval()](#isEval) | +| [boolean setLearningRate(float learning_rate)](#setLearningRate) | +| [boolean setupVirtualBatch(int virtualBatchMultiplier, float learningRate, float momentum)](#setupVirtualBatch) | +| [List getFeaturesMap()](#getFeaturesMap) | +| [boolean updateFeatures(List features)](#updateFeatures) | ## init @@ -189,3 +198,115 @@ public void free() ``` Free LiteSession. + +## export + +```java +public boolean export(String modelFilename, int model_type, int quantization_type) +``` + +Export the model. + +- Parameters + + - `modelFilename`: Model file name. + - `model_type`: Train or Inference type. + - `quantization_type`: The quant type. + +- Returns + + Whether the export is successful. + +## train + +```java +public void train() +``` + +Switch to the train mode. + +## eval + +```java +public void eval() +``` + +Switch to the eval mode. + +## istrain + +```java +public void isTrain() +``` + +It is Train mode. + +## iseval + +```java +public void isEval() +``` + +It is Eval mode. + +## setLearningRate + +```java +public boolean setLearningRate(float learning_rate) +``` + +Set learning rate. + +- Parameters + + - `learning_rate`: learning rate. + +- Returns + + Whether the set learning rate is successful. + +## setupVirtualBatch + +```java +public boolean setupVirtualBatch(int virtualBatchMultiplier, float learningRate, float momentum) +``` + +Set virtual batch. + +- Parameters + + - `virtualBatchMultiplier`: virtual batch multuplier. + - `learningRate`: learning rate. + - `momentum`: monentum. + +- Returns + + Whether the virtual batch is successfully set. + +## getFeaturesMap + +```java +public List getFeaturesMap() +``` + +Get the FeatureMap. + +- Returns + + FeaturesMap Tensor list. + +## updateFeatures + +```java +public boolean updateFeatures(List features) +``` + +Update model Features. + +- Parameters + + - `features`: new FeatureMap Tensor List. + +- Returns + + Whether the model features is successfully update. diff --git a/docs/api_java/source_zh_cn/conf.py b/docs/api_java/source_zh_cn/conf.py index e3dfb2a0a9fc6653113e7b2bb878a5497ceb4a2b..609d5fba93b1b33812df1bf22ec4ab9924ea744d 100644 --- a/docs/api_java/source_zh_cn/conf.py +++ b/docs/api_java/source_zh_cn/conf.py @@ -32,7 +32,7 @@ release = 'master' # ones. extensions = [ 'sphinx_markdown_tables', - 'recommonmark', + 'myst_parser', ] source_suffix = { diff --git a/docs/api_java/source_zh_cn/lite_session.md b/docs/api_java/source_zh_cn/lite_session.md index 6df2bf110160847495a3279ce9c1b4dd0adc9fea..6b99baf95965680cc8fd5113dde4b750708efa18 100644 --- a/docs/api_java/source_zh_cn/lite_session.md +++ b/docs/api_java/source_zh_cn/lite_session.md @@ -22,8 +22,17 @@ LiteSession定义了MindSpore Lite中的会话,用于进行Model的编译和 | [Map getOutputMapByTensor()](#getoutputmapbytensor) | | [List getOutputTensorNames()](#getoutputtensornames) | | [MSTensor getOutputByTensorName(String tensorName)](#getoutputbytensorname) | -| [boolean resize(List inputs, int[][] dims](#resize) | +| [boolean resize(List inputs, int[][] dims)](#resize) | | [void free()](#free) | +| [boolean export(String modelFilename, int model_type, int quantization_type)](#export) | +| [boolean train()](#train) | +| [boolean eval()](#eval) | +| [boolean isTrain()](#isTrain) | +| [boolean isEval()](#isEval) | +| [boolean setLearningRate(float learning_rate)](#setLearningRate) | +| [boolean setupVirtualBatch(int virtualBatchMultiplier, float learningRate, float momentum)](#setupVirtualBatch) | +| [List getFeaturesMap()](#getFeaturesMap) | +| [boolean updateFeatures(List features)](#updateFeatures) | ## init @@ -189,3 +198,115 @@ public void free() ``` 释放LiteSession。 + +## export + +```java +public boolean export(String modelFilename, int model_type, int quantization_type) +``` + +导出模型。 + +- 参数 + + - `modelFilename`: 模型文件名称。 + - `model_type`: 训练或者推理类型。 + - `quantization_type`: 量化类型。 + +- 返回值 + + 导出模型是否成功。 + +## train + +```java +public void train() +``` + +切换训练模式。 + +## eval + +```java +public void eval() +``` + +切换推理模式。 + +## istrain + +```java +public void isTrain() +``` + +是否训练模式。 + +## iseval + +```java +public void isEval() +``` + +是否推理模式。 + +## setLearningRate + +```java +public boolean setLearningRate(float learning_rate) +``` + +设置学习率。 + +- 参数 + + - `learning_rate`: 学习率。 + +- 返回值 + + 学习率设置是否成功。 + +## setupVirtualBatch + +```java +public boolean setupVirtualBatch(int virtualBatchMultiplier, float learningRate, float momentum) +``` + +设置虚批次系数。 + +- 参数 + + - `virtualBatchMultiplier`: 虚批次系数。 + - `learningRate`: 学习率。 + - `momentum`: 动量系数。 + +- 返回值 + + 虚批次系数设置是否成功。 + +## getFeaturesMap + +```java +public List getFeaturesMap() +``` + +获取权重参数。 + +- 返回值 + + 权重参数列表。 + +## updateFeatures + +```java +public boolean updateFeatures(List features) +``` + +更新权重参数。 + +- 参数 + + - `features`: 新的权重参数列表。 + +- 返回值 + + 权重是否更新成功。 diff --git a/docs/api_python/requirements.txt b/docs/api_python/requirements.txt index 162b50040286bb9a0177801c580a31013082a360..9d90e565b2e637e24cbc98932197ec454fb28d61 100644 --- a/docs/api_python/requirements.txt +++ b/docs/api_python/requirements.txt @@ -1,5 +1,5 @@ sphinx >= 2.2.1, <= 2.4.4 -recommonmark +myst_parser == 0.14.0 sphinx-markdown-tables sphinx_rtd_theme numpy diff --git a/docs/api_python/source_en/_ext/my_signature.py b/docs/api_python/source_en/_ext/my_signature.py index 7eb6cec6f4607af6b66dfe3d460b7e57d39e20d5..4f6a22dc9d8b7ef72aeb429f9124c2dabb1577d6 100644 --- a/docs/api_python/source_en/_ext/my_signature.py +++ b/docs/api_python/source_en/_ext/my_signature.py @@ -84,6 +84,10 @@ def _my_signature_from_function(cls, func): kwdefaults[arg_name] = defaults[num] else: kwdefaults = func.__kwdefaults__ + if not isinstance(kwdefaults, type(None)): + for key, value in kwdefaults.items(): + if isinstance(value, str): + kwdefaults[key] = '"' + value + '"' pos_defaults = func.__defaults__ if pos_defaults: diff --git a/docs/api_python/source_en/_templates/classtemplate_inherited.rst b/docs/api_python/source_en/_templates/classtemplate_inherited.rst index 8f4a423dca6e678c191df73d142e4e52a862a3db..bdebdbac946c80f978932f0ec249efbbc663924c 100644 --- a/docs/api_python/source_en/_templates/classtemplate_inherited.rst +++ b/docs/api_python/source_en/_templates/classtemplate_inherited.rst @@ -3,7 +3,17 @@ .. currentmodule:: {{ module }} -{% if objname[0].istitle() %} +{% if objname in "GraphData CelebADataset Cifar100Dataset Cifar10Dataset CocoDataset ImageFolderDataset MnistDataset VOCDataset" %} + +{{ fullname | underline }} + +.. autoclass:: {{ name }} + :inherited-members: + :exclude-members: bucket_batch_by_length, build_sentencepiece_vocab, build_vocab + :members: + +{% elif objname[0].istitle() %} + {{ fullname | underline }} .. autoclass:: {{ name }} diff --git a/docs/api_python/source_en/conf.py b/docs/api_python/source_en/conf.py index 816132a1181e626a462a640dd3032bf4ed4cab70..afec9ad64e87c997983938658f926ed8fc1595d6 100644 --- a/docs/api_python/source_en/conf.py +++ b/docs/api_python/source_en/conf.py @@ -55,7 +55,7 @@ extensions = [ 'sphinx.ext.napoleon', 'sphinx.ext.viewcode', 'sphinx_markdown_tables', - 'recommonmark', + 'myst_parser', ] source_suffix = { diff --git a/docs/api_python/source_en/index.rst b/docs/api_python/source_en/index.rst index 6fb98ca0b72b13fcee92bcca675b67acd3a4d5fe..5afaa280b71db4867c003f825d354f2114bafab6 100644 --- a/docs/api_python/source_en/index.rst +++ b/docs/api_python/source_en/index.rst @@ -65,4 +65,5 @@ MindSpore Python API mindquantum/mindquantum.ops mindquantum/mindquantum.hiqfermion mindquantum/mindquantum.nn + mindquantum/mindquantum.ansatz mindquantum/mindquantum.utils \ No newline at end of file diff --git a/docs/api_python/source_en/mindquantum/mindquantum.ansatz.rst b/docs/api_python/source_en/mindquantum/mindquantum.ansatz.rst new file mode 100644 index 0000000000000000000000000000000000000000..c4df6100896c877ef28017052bda10d058f995b1 --- /dev/null +++ b/docs/api_python/source_en/mindquantum/mindquantum.ansatz.rst @@ -0,0 +1,5 @@ +mindquantum.ansatz +------------------- + +.. automodule:: mindquantum.ansatz + :members: diff --git a/docs/api_python/source_en/mindquantum/mindquantum.nn.rst b/docs/api_python/source_en/mindquantum/mindquantum.nn.rst index d3909a4cecef7be9b0091200963a6e37d80b5b9e..74ac7db20443891ee1890401008cdf3f16f9a78a 100644 --- a/docs/api_python/source_en/mindquantum/mindquantum.nn.rst +++ b/docs/api_python/source_en/mindquantum/mindquantum.nn.rst @@ -17,3 +17,4 @@ Operators mindquantum.nn.MindQuantumLayer mindquantum.nn.MindQuantumAnsatzOnlyLayer mindquantum.nn.MindQuantumAnsatzOnlyOperator + mindquantum.nn.PQC diff --git a/docs/api_python/source_en/mindquantum/mindquantum.rst b/docs/api_python/source_en/mindquantum/mindquantum.rst index 819825eb913b528508cd47af699ed083332542e4..890545ee87d37ba8cad2fecb5da399057f5e0b41 100644 --- a/docs/api_python/source_en/mindquantum/mindquantum.rst +++ b/docs/api_python/source_en/mindquantum/mindquantum.rst @@ -41,8 +41,30 @@ mindquantum.circuit ------------------- .. automodule:: mindquantum.circuit + :exclude-members: C, D, A, AP, CPN :members: +functional +---------- + +The functional operators are shortcut of some pre-instantiated quantum circuit operators. + +.. list-table:: + :widths: 50 50 + :header-rows: 1 + + * - functional + - high level operators + * - mindquantum.circuit.C + - :class:`mindquantum.circuit.controlled` + * - mindquantum.circuit.D + - :class:`mindquantum.circuit.dagger` + * - mindquantum.circuit.A + - :class:`mindquantum.circuit.apply` + * - mindquantum.circuit.AP + - :class:`mindquantum.circuit.add_prefix` + * - mindquantum.circuit.CPN + - :class:`mindquantum.circuit.change_param_name` mindquantum.engine ------------------ diff --git a/docs/api_python/source_en/mindspore/mindspore.dataset.rst b/docs/api_python/source_en/mindspore/mindspore.dataset.rst index d5fb44e566727e8303276c880d03652ea0940eff..a07aed4b343344c38973ebc5e929eafb52dd1b8a 100644 --- a/docs/api_python/source_en/mindspore/mindspore.dataset.rst +++ b/docs/api_python/source_en/mindspore/mindspore.dataset.rst @@ -90,5 +90,11 @@ Others :template: classtemplate_inherited.rst mindspore.dataset.DatasetCache + mindspore.dataset.DSCallback mindspore.dataset.Schema + mindspore.dataset.WaitedDSCallback + mindspore.dataset.compare + mindspore.dataset.deserialize + mindspore.dataset.serialize + mindspore.dataset.show mindspore.dataset.zip diff --git a/docs/api_python/source_en/mindspore/mindspore.dataset.vision.rst b/docs/api_python/source_en/mindspore/mindspore.dataset.vision.rst index 6534637abb4d182b6ad93bede9408c81a73cda12..67d920ded7b2d941b7cd1f548f9552be4b9b7d95 100644 --- a/docs/api_python/source_en/mindspore/mindspore.dataset.vision.rst +++ b/docs/api_python/source_en/mindspore/mindspore.dataset.vision.rst @@ -36,9 +36,9 @@ mindspore.dataset.vision.c_transforms mindspore.dataset.vision.c_transforms.RandomHorizontalFlip mindspore.dataset.vision.c_transforms.RandomHorizontalFlipWithBBox mindspore.dataset.vision.c_transforms.RandomPosterize + mindspore.dataset.vision.c_transforms.RandomResize mindspore.dataset.vision.c_transforms.RandomResizedCrop mindspore.dataset.vision.c_transforms.RandomResizedCropWithBBox - mindspore.dataset.vision.c_transforms.RandomResize mindspore.dataset.vision.c_transforms.RandomResizeWithBBox mindspore.dataset.vision.c_transforms.RandomRotation mindspore.dataset.vision.c_transforms.RandomSelectSubpolicy @@ -53,7 +53,7 @@ mindspore.dataset.vision.c_transforms mindspore.dataset.vision.c_transforms.SoftDvppDecodeRandomCropResizeJpeg mindspore.dataset.vision.c_transforms.SoftDvppDecodeResizeJpeg mindspore.dataset.vision.c_transforms.UniformAugment - mindspore.dataset.vision.c_transforms.HorizontalFlip + mindspore.dataset.vision.c_transforms.VerticalFlip mindspore.dataset.vision.py_transforms ------------------------------------------------- @@ -76,6 +76,7 @@ mindspore.dataset.vision.py_transforms mindspore.dataset.vision.py_transforms.LinearTransformation mindspore.dataset.vision.py_transforms.MixUp mindspore.dataset.vision.py_transforms.Normalize + mindspore.dataset.vision.py_transforms.NormalizePad mindspore.dataset.vision.py_transforms.Pad mindspore.dataset.vision.py_transforms.RandomAffine mindspore.dataset.vision.py_transforms.RandomColor diff --git a/docs/api_python/source_en/mindspore/mindspore.nn.rst b/docs/api_python/source_en/mindspore/mindspore.nn.rst index c079bf306aa15c5db10358e7255809fe2164d021..952429a2ff0eb0ad21c764f1600fc44381b397c2 100644 --- a/docs/api_python/source_en/mindspore/mindspore.nn.rst +++ b/docs/api_python/source_en/mindspore/mindspore.nn.rst @@ -160,6 +160,18 @@ Pooling layers mindspore.nn.MaxPool1d mindspore.nn.MaxPool2d +Thor Layers +-------------- + +.. msplatformautosummary:: + :toctree: nn + :nosignatures: + :template: classtemplate.rst + + mindspore.nn.Conv2dThor + mindspore.nn.DenseThor + mindspore.nn.EmbeddingThor + Quantized Functions ------------------- @@ -194,7 +206,7 @@ Loss Functions mindspore.nn.DiceLoss mindspore.nn.FocalLoss mindspore.nn.L1Loss - mindspore.nn.Loss + mindspore.nn.LossBase mindspore.nn.MAELoss mindspore.nn.MSELoss mindspore.nn.MultiClassDiceLoss @@ -224,6 +236,7 @@ Optimizer Functions mindspore.nn.ProximalAdagrad mindspore.nn.RMSProp mindspore.nn.SGD + mindspore.nn.thor Wrapper Functions ----------------- diff --git a/docs/api_python/source_en/mindspore/mindspore.ops.rst b/docs/api_python/source_en/mindspore/mindspore.ops.rst index ed3374e90d74fc7105f560b6ad59d406e6d81b84..b2e4ae3ba9d2a1b15d28af62bee8ed0c9a0a0d66 100644 --- a/docs/api_python/source_en/mindspore/mindspore.ops.rst +++ b/docs/api_python/source_en/mindspore/mindspore.ops.rst @@ -47,195 +47,193 @@ The functional operators are the pre-instantiated Primitive operators, which can :header-rows: 1 * - functional - - operations + - Description * - mindspore.ops.add - - :class:`mindspore.ops.Add` + - Refer to :class:`mindspore.ops.Add`. * - mindspore.ops.addn - - :class:`mindspore.ops.AddN` + - Refer to :class:`mindspore.ops.AddN`. * - mindspore.ops.array_reduce - - :class:`mindspore.ops.Primitive` ('array_reduce') + - Reduce the dimension of the array. * - mindspore.ops.array_to_scalar - - :class:`mindspore.ops.Primitive` ('array_to_scalar') + - Convert the array to a scalar. * - mindspore.ops.assign - - :class:`mindspore.ops.Assign` + - Refer to :class:`mindspore.ops.Assign`. * - mindspore.ops.assign_add - - :class:`mindspore.ops.AssignAdd` + - Refer to :class:`mindspore.ops.AssignAdd`. * - mindspore.ops.assign_sub - - :class:`mindspore.ops.AssignSub` + - Refer to :class:`mindspore.ops.AssignSub`. * - mindspore.ops.bool_and - - :class:`mindspore.ops.Primitive` ('bool_and') + - Calculate the result of logical AND operation. * - mindspore.ops.bool_eq - - :class:`mindspore.ops.Primitive` ('bool_eq') + - Determine whether the Boolean values are equal. * - mindspore.ops.bool_not - - :class:`mindspore.ops.Primitive` ('bool_not') + - Calculate the result of logical NOT operation. * - mindspore.ops.bool_or - - :class:`mindspore.ops.Primitive` ('bool_or') + - Calculate the result of logical OR operation. * - mindspore.ops.cast - - :class:`mindspore.ops.Cast` - * - mindspore.ops.distribute - - :class:`mindspore.ops.Primitive` ('distribute') + - Refer to :class:`mindspore.ops.Cast`. * - mindspore.ops.dtype - - :class:`mindspore.ops.DType` + - Refer to :class:`mindspore.ops.DType`. * - mindspore.ops.equal - - :class:`mindspore.ops.Equal` + - Refer to :class:`mindspore.ops.Equal`. * - mindspore.ops.expand_dims - - :class:`mindspore.ops.ExpandDims` + - Refer to :class:`mindspore.ops.ExpandDims`. * - mindspore.ops.fill - - :class:`mindspore.ops.Fill` + - Refer to :class:`mindspore.ops.Fill`. * - mindspore.ops.gather - - :class:`mindspore.ops.Gather` + - Refer to :class:`mindspore.ops.Gather`. * - mindspore.ops.gather_nd - - :class:`mindspore.ops.GatherNd` + - Refer to :class:`mindspore.ops.GatherNd`. * - mindspore.ops.hastype - - :class:`mindspore.ops.Primitive` ('hastype') + - Determine whether the object has the specified type. * - mindspore.ops.in_dict - - :class:`mindspore.ops.Primitive` ('in_dict') + - Determine whether the object is in the dict. * - mindspore.ops.is_not - - :class:`mindspore.ops.Primitive` ('is_not') + - Determine whether the input is not the same as the other one. * - mindspore.ops.is\_ - - :class:`mindspore.ops.Primitive` ('is\_') + - Determine whether the input is the same as the other one. * - mindspore.ops.isconstant - - :class:`mindspore.ops.Primitive` ('is_constant') + - Determine whether the object is constant. * - mindspore.ops.isinstance\_ - - :class:`mindspore.ops.IsInstance` + - Refer to :class:`mindspore.ops.IsInstance`. * - mindspore.ops.issubclass\_ - - :class:`mindspore.ops.IsSubClass` + - Refer to :class:`mindspore.ops.IsSubClass`. * - mindspore.ops.logical_and - - :class:`mindspore.ops.LogicalAnd` + - Refer to :class:`mindspore.ops.LogicalAnd`. * - mindspore.ops.logical_not - - :class:`mindspore.ops.LogicalNot` + - Refer to :class:`mindspore.ops.LogicalNot`. * - mindspore.ops.logical_or - - :class:`mindspore.ops.LogicalOr` + - Refer to :class:`mindspore.ops.LogicalOr`. * - mindspore.ops.make_row_tensor - - :class:`mindspore.ops.Primitive` ('MakeRowTensor') + - Generate row tensor. * - mindspore.ops.make_sparse_tensor - - :class:`mindspore.ops.Primitive` ('MakeSparseTensor') + - Generate sparse tensor. * - mindspore.ops.mixed_precision_cast - - :class:`mindspore.ops.Primitive` ('mixed_precision_cast') + - A temporary ops for mixed precision will be converted to cast after the step of compiling. * - mindspore.ops.neg_tensor - - :class:`mindspore.ops.Neg` + - Refer to :class:`mindspore.ops.Neg`. * - mindspore.ops.not_equal - - :class:`mindspore.ops.NotEqual` + - Refer to :class:`mindspore.ops.NotEqual`. * - mindspore.ops.not_in_dict - - :class:`mindspore.ops.Primitive` ('not_in_dict') + - Determine whether the object is not in the dict. * - mindspore.ops.ones_like - - :class:`mindspore.ops.OnesLike` + - Refer to :class:`mindspore.ops.OnesLike`. * - mindspore.ops.print\_ - - :class:`mindspore.ops.Print` + - Refer to :class:`mindspore.ops.Print`. * - mindspore.ops.rank - - :class:`mindspore.ops.Rank` + - Refer to :class:`mindspore.ops.Rank`. * - mindspore.ops.reduced_shape - - :class:`mindspore.ops.Primitive` ('reduced_shape') + - Calculate the shape of the reduction operator. * - mindspore.ops.reshape - - :class:`mindspore.ops.Reshape` + - Refer to :class:`mindspore.ops.Reshape`. * - mindspore.ops.row_tensor_get_dense_shape - - :class:`mindspore.ops.Primitive` ('RowTensorGetDenseShape') + - Get corresponding dense shape of row tensor. * - mindspore.ops.row_tensor_get_indices - - :class:`mindspore.ops.Primitive` ('RowTensorGetIndices') + - Get indices of row tensor. * - mindspore.ops.row_tensor_get_values - - :class:`mindspore.ops.Primitive` ('RowTensorGetValues') + - Get values of row tensor. * - mindspore.ops.same_type_shape - - :class:`mindspore.ops.SameTypeShape` + - Refer to :class:`mindspore.ops.SameTypeShape`. * - mindspore.ops.scalar_add - - :class:`mindspore.ops.Primitive` ('scalar_add') + - Get the sum of two numbers. * - mindspore.ops.scalar_cast - - :class:`mindspore.ops.ScalarCast` + - Refer to :class:`mindspore.ops.ScalarCast`. * - mindspore.ops.scalar_div - - :class:`mindspore.ops.Primitive` ('scalar_div') + - Get the quotient of dividing the first input number by the second input number. * - mindspore.ops.scalar_eq - - :class:`mindspore.ops.Primitive` ('scalar_eq') + - Determine whether two numbers are equal. * - mindspore.ops.scalar_floordiv - - :class:`mindspore.ops.Primitive` ('scalar_floordiv') + - Divide the first input number by the second input number and round down to the closest integer. * - mindspore.ops.scalar_ge - - :class:`mindspore.ops.Primitive` ('scalar_ge') + - Determine whether the number is greater than or equal to another number. * - mindspore.ops.scalar_gt - - :class:`mindspore.ops.Primitive` ('scalar_gt') + - Determine whether the number is greater than another number. * - mindspore.ops.scalar_le - - :class:`mindspore.ops.Primitive` ('scalar_le') + - Determine whether the number is less than or equal to another number. * - mindspore.ops.scalar_log - - :class:`mindspore.ops.Primitive` ('scalar_log') + - Get the natural logarithm of the input number. * - mindspore.ops.scalar_lt - - :class:`mindspore.ops.Primitive` ('scalar_lt') + - Determine whether the number is less than another number. * - mindspore.ops.scalar_mod - - :class:`mindspore.ops.Primitive` ('scalar_mod') + - Get the remainder of dividing the first input number by the second input number. * - mindspore.ops.scalar_mul - - :class:`mindspore.ops.Primitive` ('scalar_mul') + - Get the product of the input two numbers. * - mindspore.ops.scalar_ne - - :class:`mindspore.ops.Primitive` ('scalar_ne') + - Determine whether two numbers are not equal. * - mindspore.ops.scalar_pow - - :class:`mindspore.ops.Primitive` ('scalar_pow') + - Compute a number to the power of the second input number. * - mindspore.ops.scalar_sub - - :class:`mindspore.ops.Primitive` ('scalar_sub') + - Subtract the second input number from the first input number. * - mindspore.ops.scalar_to_array - - :class:`mindspore.ops.ScalarToArray` + - Refer to :class:`mindspore.ops.ScalarToArray`. * - mindspore.ops.scalar_to_tensor - - :class:`mindspore.ops.ScalarToTensor` + - Refer to :class:`mindspore.ops.ScalarToTensor`. * - mindspore.ops.scalar_uadd - - :class:`mindspore.ops.Primitive` ('scalar_uadd') + - Get the positive value of the input number. * - mindspore.ops.scalar_usub - - :class:`mindspore.ops.Primitive` ('scalar_usub') + - Get the negative value of the input number. * - mindspore.ops.scatter_nd - - :class:`mindspore.ops.ScatterNd` + - Refer to :class:`mindspore.ops.ScatterNd`. * - mindspore.ops.scatter_nd_update - - :class:`mindspore.ops.ScatterNdUpdate` + - Refer to :class:`mindspore.ops.ScatterNdUpdate`. * - mindspore.ops.scatter_update - - :class:`mindspore.ops.ScatterUpdate` + - Refer to :class:`mindspore.ops.ScatterUpdate`. * - mindspore.ops.select - - :class:`mindspore.ops.Select` + - Refer to :class:`mindspore.ops.Select`. * - mindspore.ops.shape - - :class:`mindspore.ops.Shape` + - Refer to :class:`mindspore.ops.Shape`. * - mindspore.ops.shape_mul - - :class:`mindspore.ops.Primitive` ('shape_mul') + - The input of shape_mul must be shape multiply elements in tuple(shape). * - mindspore.ops.size - - :class:`mindspore.ops.Size` + - Refer to :class:`mindspore.ops.Size`. * - mindspore.ops.sparse_tensor_get_dense_shape - - :class:`mindspore.ops.Primitive` ('SparseTensorGetDenseShape') + - Get corresponding dense shape of sparse tensor. * - mindspore.ops.sparse_tensor_get_indices - - :class:`mindspore.ops.Primitive` ('SparseTensorGetIndices') + - Get indices of sparse tensor. * - mindspore.ops.sparse_tensor_get_values - - :class:`mindspore.ops.Primitive` ('SparseTensorGetValues') + - Get values of sparse tensor. * - mindspore.ops.sqrt - - :class:`mindspore.ops.Sqrt` + - Refer to :class:`mindspore.ops.Sqrt`. * - mindspore.ops.square - - :class:`mindspore.ops.Square` + - Refer to :class:`mindspore.ops.Square`. * - mindspore.ops.stack - - :class:`mindspore.ops.Stack` + - Refer to :class:`mindspore.ops.Stack`. * - mindspore.ops.stop_gradient - - :class:`mindspore.ops.Primitive` ('stop_gradient') + - Disable update during back propagation. (`stop_gradient `_) * - mindspore.ops.strided_slice - - :class:`mindspore.ops.StridedSlice` + - Refer to :class:`mindspore.ops.StridedSlice`. * - mindspore.ops.string_concat - - :class:`mindspore.ops.Primitive` ('string_concat') + - Concatenate two strings. * - mindspore.ops.string_eq - - :class:`mindspore.ops.Primitive` ('string_equal') + - Determine if two strings are equal. * - mindspore.ops.tensor_div - - :class:`mindspore.ops.RealDiv` + - Refer to :class:`mindspore.ops.RealDiv`. * - mindspore.ops.tensor_floordiv - - :class:`mindspore.ops.FloorDiv` + - Refer to :class:`mindspore.ops.FloorDiv`. * - mindspore.ops.tensor_ge - - :class:`mindspore.ops.GreaterEqual` + - Refer to :class:`mindspore.ops.GreaterEqual`. * - mindspore.ops.tensor_gt - - :class:`mindspore.ops.Greater` + - Refer to :class:`mindspore.ops.Greater`. * - mindspore.ops.tensor_le - - :class:`mindspore.ops.LessEqual` + - Refer to :class:`mindspore.ops.LessEqual`. * - mindspore.ops.tensor_lt - - :class:`mindspore.ops.Less` + - Refer to :class:`mindspore.ops.Less`. * - mindspore.ops.tensor_mod - - :class:`mindspore.ops.FloorMod` + - Refer to :class:`mindspore.ops.FloorMod`. * - mindspore.ops.tensor_mul - - :class:`mindspore.ops.Mul` + - Refer to :class:`mindspore.ops.Mul`. * - mindspore.ops.tensor_pow - - :class:`mindspore.ops.Pow` + - Refer to :class:`mindspore.ops.Pow`. * - mindspore.ops.tensor_sub - - :class:`mindspore.ops.Sub` + - Refer to :class:`mindspore.ops.Sub`. * - mindspore.ops.tile - - :class:`mindspore.ops.Tile` + - Refer to :class:`mindspore.ops.Tile`. * - mindspore.ops.tuple_to_array - - :class:`mindspore.ops.TupleToArray` + - Refer to :class:`mindspore.ops.TupleToArray`. * - mindspore.ops.typeof - - :class:`mindspore.ops.Primitive` ('typeof') + - Get type of object. * - mindspore.ops.zeros_like - - :class:`mindspore.ops.ZerosLike` + - Refer to :class:`mindspore.ops.ZerosLike`. primitive --------- @@ -269,7 +267,6 @@ op_info_register :nosignatures: :template: classtemplate.rst - mindspore.ops.AiCPURegOp mindspore.ops.DataType mindspore.ops.op_info_register mindspore.ops.TBERegOp diff --git a/docs/api_python/source_en/mindspore/mindspore.train.rst b/docs/api_python/source_en/mindspore/mindspore.train.rst index ff368bf47b07ac19f0c76c8133ddd0e9face92e3..b1bbafb131a0b608e1b92e804fe1444324c1aa84 100644 --- a/docs/api_python/source_en/mindspore/mindspore.train.rst +++ b/docs/api_python/source_en/mindspore/mindspore.train.rst @@ -12,3 +12,9 @@ mindspore.train.callback .. automodule:: mindspore.train.callback :members: + +mindspore.train.train_thor +-------------------------- + +.. automodule:: mindspore.train.train_thor + :members: diff --git a/docs/api_python/source_en/mindspore/operations.rst b/docs/api_python/source_en/mindspore/operations.rst index fe42da4e0035fa383194c446e30e65d2f846b4c0..a1127881cdba41188f5c61019aa88345aad7e173 100644 --- a/docs/api_python/source_en/mindspore/operations.rst +++ b/docs/api_python/source_en/mindspore/operations.rst @@ -241,6 +241,7 @@ Array Operators mindspore.ops.IsFinite mindspore.ops.IsInstance mindspore.ops.IsSubClass + mindspore.ops.MaskedSelect mindspore.ops.Meshgrid mindspore.ops.Ones mindspore.ops.OnesLike @@ -378,6 +379,7 @@ Sponge Operators mindspore.ops.LJForce mindspore.ops.LJForceWithPMEDirectForce mindspore.ops.MDIterationLeapFrog + mindspore.ops.MDIterationLeapFrogLiujian mindspore.ops.NeighborListUpdate mindspore.ops.PMEEnergy mindspore.ops.PMEExcludedForce diff --git a/docs/api_python/source_zh_cn/_ext/my_signature.py b/docs/api_python/source_zh_cn/_ext/my_signature.py index 7eb6cec6f4607af6b66dfe3d460b7e57d39e20d5..4f6a22dc9d8b7ef72aeb429f9124c2dabb1577d6 100644 --- a/docs/api_python/source_zh_cn/_ext/my_signature.py +++ b/docs/api_python/source_zh_cn/_ext/my_signature.py @@ -84,6 +84,10 @@ def _my_signature_from_function(cls, func): kwdefaults[arg_name] = defaults[num] else: kwdefaults = func.__kwdefaults__ + if not isinstance(kwdefaults, type(None)): + for key, value in kwdefaults.items(): + if isinstance(value, str): + kwdefaults[key] = '"' + value + '"' pos_defaults = func.__defaults__ if pos_defaults: diff --git a/docs/api_python/source_zh_cn/_templates/classtemplate_inherited.rst b/docs/api_python/source_zh_cn/_templates/classtemplate_inherited.rst index 8f4a423dca6e678c191df73d142e4e52a862a3db..bdebdbac946c80f978932f0ec249efbbc663924c 100644 --- a/docs/api_python/source_zh_cn/_templates/classtemplate_inherited.rst +++ b/docs/api_python/source_zh_cn/_templates/classtemplate_inherited.rst @@ -3,7 +3,17 @@ .. currentmodule:: {{ module }} -{% if objname[0].istitle() %} +{% if objname in "GraphData CelebADataset Cifar100Dataset Cifar10Dataset CocoDataset ImageFolderDataset MnistDataset VOCDataset" %} + +{{ fullname | underline }} + +.. autoclass:: {{ name }} + :inherited-members: + :exclude-members: bucket_batch_by_length, build_sentencepiece_vocab, build_vocab + :members: + +{% elif objname[0].istitle() %} + {{ fullname | underline }} .. autoclass:: {{ name }} diff --git a/docs/api_python/source_zh_cn/conf.py b/docs/api_python/source_zh_cn/conf.py index 073012d085634213d93992be12141866a57d0db8..8807c332bd0b4e10404b8f309a65ece708af472a 100644 --- a/docs/api_python/source_zh_cn/conf.py +++ b/docs/api_python/source_zh_cn/conf.py @@ -55,7 +55,7 @@ extensions = [ 'sphinx.ext.napoleon', 'sphinx.ext.viewcode', 'sphinx_markdown_tables', - 'recommonmark', + 'myst_parser', ] source_suffix = { diff --git a/docs/api_python/source_zh_cn/index.rst b/docs/api_python/source_zh_cn/index.rst index 13dcbc77859fc012f4ac9a38f7a05c9eafd9a568..cc49aa605585ef5aafeaa93560d9344f812ca9f6 100644 --- a/docs/api_python/source_zh_cn/index.rst +++ b/docs/api_python/source_zh_cn/index.rst @@ -65,4 +65,5 @@ MindSpore Python API mindquantum/mindquantum.ops mindquantum/mindquantum.hiqfermion mindquantum/mindquantum.nn + mindquantum/mindquantum.ansatz mindquantum/mindquantum.utils diff --git a/docs/api_python/source_zh_cn/mindquantum/mindquantum.ansatz.rst b/docs/api_python/source_zh_cn/mindquantum/mindquantum.ansatz.rst new file mode 100644 index 0000000000000000000000000000000000000000..c4df6100896c877ef28017052bda10d058f995b1 --- /dev/null +++ b/docs/api_python/source_zh_cn/mindquantum/mindquantum.ansatz.rst @@ -0,0 +1,5 @@ +mindquantum.ansatz +------------------- + +.. automodule:: mindquantum.ansatz + :members: diff --git a/docs/api_python/source_zh_cn/mindquantum/mindquantum.nn.rst b/docs/api_python/source_zh_cn/mindquantum/mindquantum.nn.rst index d3909a4cecef7be9b0091200963a6e37d80b5b9e..74ac7db20443891ee1890401008cdf3f16f9a78a 100644 --- a/docs/api_python/source_zh_cn/mindquantum/mindquantum.nn.rst +++ b/docs/api_python/source_zh_cn/mindquantum/mindquantum.nn.rst @@ -17,3 +17,4 @@ Operators mindquantum.nn.MindQuantumLayer mindquantum.nn.MindQuantumAnsatzOnlyLayer mindquantum.nn.MindQuantumAnsatzOnlyOperator + mindquantum.nn.PQC diff --git a/docs/api_python/source_zh_cn/mindquantum/mindquantum.rst b/docs/api_python/source_zh_cn/mindquantum/mindquantum.rst index 819825eb913b528508cd47af699ed083332542e4..890545ee87d37ba8cad2fecb5da399057f5e0b41 100644 --- a/docs/api_python/source_zh_cn/mindquantum/mindquantum.rst +++ b/docs/api_python/source_zh_cn/mindquantum/mindquantum.rst @@ -41,8 +41,30 @@ mindquantum.circuit ------------------- .. automodule:: mindquantum.circuit + :exclude-members: C, D, A, AP, CPN :members: +functional +---------- + +The functional operators are shortcut of some pre-instantiated quantum circuit operators. + +.. list-table:: + :widths: 50 50 + :header-rows: 1 + + * - functional + - high level operators + * - mindquantum.circuit.C + - :class:`mindquantum.circuit.controlled` + * - mindquantum.circuit.D + - :class:`mindquantum.circuit.dagger` + * - mindquantum.circuit.A + - :class:`mindquantum.circuit.apply` + * - mindquantum.circuit.AP + - :class:`mindquantum.circuit.add_prefix` + * - mindquantum.circuit.CPN + - :class:`mindquantum.circuit.change_param_name` mindquantum.engine ------------------ diff --git a/docs/api_python/source_zh_cn/mindspore/mindspore.dataset.rst b/docs/api_python/source_zh_cn/mindspore/mindspore.dataset.rst index d5fb44e566727e8303276c880d03652ea0940eff..a07aed4b343344c38973ebc5e929eafb52dd1b8a 100644 --- a/docs/api_python/source_zh_cn/mindspore/mindspore.dataset.rst +++ b/docs/api_python/source_zh_cn/mindspore/mindspore.dataset.rst @@ -90,5 +90,11 @@ Others :template: classtemplate_inherited.rst mindspore.dataset.DatasetCache + mindspore.dataset.DSCallback mindspore.dataset.Schema + mindspore.dataset.WaitedDSCallback + mindspore.dataset.compare + mindspore.dataset.deserialize + mindspore.dataset.serialize + mindspore.dataset.show mindspore.dataset.zip diff --git a/docs/api_python/source_zh_cn/mindspore/mindspore.dataset.vision.rst b/docs/api_python/source_zh_cn/mindspore/mindspore.dataset.vision.rst index 6534637abb4d182b6ad93bede9408c81a73cda12..67d920ded7b2d941b7cd1f548f9552be4b9b7d95 100644 --- a/docs/api_python/source_zh_cn/mindspore/mindspore.dataset.vision.rst +++ b/docs/api_python/source_zh_cn/mindspore/mindspore.dataset.vision.rst @@ -36,9 +36,9 @@ mindspore.dataset.vision.c_transforms mindspore.dataset.vision.c_transforms.RandomHorizontalFlip mindspore.dataset.vision.c_transforms.RandomHorizontalFlipWithBBox mindspore.dataset.vision.c_transforms.RandomPosterize + mindspore.dataset.vision.c_transforms.RandomResize mindspore.dataset.vision.c_transforms.RandomResizedCrop mindspore.dataset.vision.c_transforms.RandomResizedCropWithBBox - mindspore.dataset.vision.c_transforms.RandomResize mindspore.dataset.vision.c_transforms.RandomResizeWithBBox mindspore.dataset.vision.c_transforms.RandomRotation mindspore.dataset.vision.c_transforms.RandomSelectSubpolicy @@ -53,7 +53,7 @@ mindspore.dataset.vision.c_transforms mindspore.dataset.vision.c_transforms.SoftDvppDecodeRandomCropResizeJpeg mindspore.dataset.vision.c_transforms.SoftDvppDecodeResizeJpeg mindspore.dataset.vision.c_transforms.UniformAugment - mindspore.dataset.vision.c_transforms.HorizontalFlip + mindspore.dataset.vision.c_transforms.VerticalFlip mindspore.dataset.vision.py_transforms ------------------------------------------------- @@ -76,6 +76,7 @@ mindspore.dataset.vision.py_transforms mindspore.dataset.vision.py_transforms.LinearTransformation mindspore.dataset.vision.py_transforms.MixUp mindspore.dataset.vision.py_transforms.Normalize + mindspore.dataset.vision.py_transforms.NormalizePad mindspore.dataset.vision.py_transforms.Pad mindspore.dataset.vision.py_transforms.RandomAffine mindspore.dataset.vision.py_transforms.RandomColor diff --git a/docs/api_python/source_zh_cn/mindspore/mindspore.nn.rst b/docs/api_python/source_zh_cn/mindspore/mindspore.nn.rst index c079bf306aa15c5db10358e7255809fe2164d021..952429a2ff0eb0ad21c764f1600fc44381b397c2 100644 --- a/docs/api_python/source_zh_cn/mindspore/mindspore.nn.rst +++ b/docs/api_python/source_zh_cn/mindspore/mindspore.nn.rst @@ -160,6 +160,18 @@ Pooling layers mindspore.nn.MaxPool1d mindspore.nn.MaxPool2d +Thor Layers +-------------- + +.. msplatformautosummary:: + :toctree: nn + :nosignatures: + :template: classtemplate.rst + + mindspore.nn.Conv2dThor + mindspore.nn.DenseThor + mindspore.nn.EmbeddingThor + Quantized Functions ------------------- @@ -194,7 +206,7 @@ Loss Functions mindspore.nn.DiceLoss mindspore.nn.FocalLoss mindspore.nn.L1Loss - mindspore.nn.Loss + mindspore.nn.LossBase mindspore.nn.MAELoss mindspore.nn.MSELoss mindspore.nn.MultiClassDiceLoss @@ -224,6 +236,7 @@ Optimizer Functions mindspore.nn.ProximalAdagrad mindspore.nn.RMSProp mindspore.nn.SGD + mindspore.nn.thor Wrapper Functions ----------------- diff --git a/docs/api_python/source_zh_cn/mindspore/mindspore.ops.rst b/docs/api_python/source_zh_cn/mindspore/mindspore.ops.rst index 93dee4d2a2ead3cc7782678bc0c460b3b321bb03..d9ac23c0ea6d0732e00ac61afbb805cafd2d7886 100644 --- a/docs/api_python/source_zh_cn/mindspore/mindspore.ops.rst +++ b/docs/api_python/source_zh_cn/mindspore/mindspore.ops.rst @@ -47,195 +47,193 @@ The functional operators are the pre-instantiated Primitive operators, which can :header-rows: 1 * - functional - - operations + - Description * - mindspore.ops.add - - :class:`mindspore.ops.Add` + - Refer to :class:`mindspore.ops.Add`. * - mindspore.ops.addn - - :class:`mindspore.ops.AddN` + - Refer to :class:`mindspore.ops.AddN`. * - mindspore.ops.array_reduce - - :class:`mindspore.ops.Primitive` ('array_reduce') + - Reduce the dimension of the array. * - mindspore.ops.array_to_scalar - - :class:`mindspore.ops.Primitive` ('array_to_scalar') + - Convert the array to a scalar. * - mindspore.ops.assign - - :class:`mindspore.ops.Assign` + - Refer to :class:`mindspore.ops.Assign`. * - mindspore.ops.assign_add - - :class:`mindspore.ops.AssignAdd` + - Refer to :class:`mindspore.ops.AssignAdd`. * - mindspore.ops.assign_sub - - :class:`mindspore.ops.AssignSub` + - Refer to :class:`mindspore.ops.AssignSub`. * - mindspore.ops.bool_and - - :class:`mindspore.ops.Primitive` ('bool_and') + - Calculate the result of logical AND operation. * - mindspore.ops.bool_eq - - :class:`mindspore.ops.Primitive` ('bool_eq') + - Determine whether the Boolean values are equal. * - mindspore.ops.bool_not - - :class:`mindspore.ops.Primitive` ('bool_not') + - Calculate the result of logical NOT operation. * - mindspore.ops.bool_or - - :class:`mindspore.ops.Primitive` ('bool_or') + - Calculate the result of logical OR operation. * - mindspore.ops.cast - - :class:`mindspore.ops.Cast` - * - mindspore.ops.distribute - - :class:`mindspore.ops.Primitive` ('distribute') + - Refer to :class:`mindspore.ops.Cast`. * - mindspore.ops.dtype - - :class:`mindspore.ops.DType` + - Refer to :class:`mindspore.ops.DType`. * - mindspore.ops.equal - - :class:`mindspore.ops.Equal` + - Refer to :class:`mindspore.ops.Equal`. * - mindspore.ops.expand_dims - - :class:`mindspore.ops.ExpandDims` + - Refer to :class:`mindspore.ops.ExpandDims`. * - mindspore.ops.fill - - :class:`mindspore.ops.Fill` + - Refer to :class:`mindspore.ops.Fill`. * - mindspore.ops.gather - - :class:`mindspore.ops.Gather` + - Refer to :class:`mindspore.ops.Gather`. * - mindspore.ops.gather_nd - - :class:`mindspore.ops.GatherNd` + - Refer to :class:`mindspore.ops.GatherNd`. * - mindspore.ops.hastype - - :class:`mindspore.ops.Primitive` ('hastype') + - Determine whether the object has the specified type. * - mindspore.ops.in_dict - - :class:`mindspore.ops.Primitive` ('in_dict') + - Determine whether the object is in the dict. * - mindspore.ops.is_not - - :class:`mindspore.ops.Primitive` ('is_not') + - Determine whether the input is not the same as the other one. * - mindspore.ops.is\_ - - :class:`mindspore.ops.Primitive` ('is\_') + - Determine whether the input is the same as the other one. * - mindspore.ops.isconstant - - :class:`mindspore.ops.Primitive` ('is_constant') + - Determine whether the object is constant. * - mindspore.ops.isinstance\_ - - :class:`mindspore.ops.IsInstance` + - Refer to :class:`mindspore.ops.IsInstance`. * - mindspore.ops.issubclass\_ - - :class:`mindspore.ops.IsSubClass` + - Refer to :class:`mindspore.ops.IsSubClass`. * - mindspore.ops.logical_and - - :class:`mindspore.ops.LogicalAnd` + - Refer to :class:`mindspore.ops.LogicalAnd`. * - mindspore.ops.logical_not - - :class:`mindspore.ops.LogicalNot` + - Refer to :class:`mindspore.ops.LogicalNot`. * - mindspore.ops.logical_or - - :class:`mindspore.ops.LogicalOr` + - Refer to :class:`mindspore.ops.LogicalOr`. * - mindspore.ops.make_row_tensor - - :class:`mindspore.ops.Primitive` ('MakeRowTensor') + - Generate row tensor. * - mindspore.ops.make_sparse_tensor - - :class:`mindspore.ops.Primitive` ('MakeSparseTensor') + - Generate sparse tensor. * - mindspore.ops.mixed_precision_cast - - :class:`mindspore.ops.Primitive` ('mixed_precision_cast') + - A temporary ops for mixed precision will be converted to cast after the step of compiling. * - mindspore.ops.neg_tensor - - :class:`mindspore.ops.Neg` + - Refer to :class:`mindspore.ops.Neg`. * - mindspore.ops.not_equal - - :class:`mindspore.ops.NotEqual` + - Refer to :class:`mindspore.ops.NotEqual`. * - mindspore.ops.not_in_dict - - :class:`mindspore.ops.Primitive` ('not_in_dict') + - Determine whether the object is not in the dict. * - mindspore.ops.ones_like - - :class:`mindspore.ops.OnesLike` + - Refer to :class:`mindspore.ops.OnesLike`. * - mindspore.ops.print\_ - - :class:`mindspore.ops.Print` + - Refer to :class:`mindspore.ops.Print`. * - mindspore.ops.rank - - :class:`mindspore.ops.Rank` + - Refer to :class:`mindspore.ops.Rank`. * - mindspore.ops.reduced_shape - - :class:`mindspore.ops.Primitive` ('reduced_shape') + - Calculate the shape of the reduction operator. * - mindspore.ops.reshape - - :class:`mindspore.ops.Reshape` + - Refer to :class:`mindspore.ops.Reshape`. * - mindspore.ops.row_tensor_get_dense_shape - - :class:`mindspore.ops.Primitive` ('RowTensorGetDenseShape') + - Get corresponding dense shape of row tensor. * - mindspore.ops.row_tensor_get_indices - - :class:`mindspore.ops.Primitive` ('RowTensorGetIndices') + - Get indices of row tensor. * - mindspore.ops.row_tensor_get_values - - :class:`mindspore.ops.Primitive` ('RowTensorGetValues') + - Get values of row tensor. * - mindspore.ops.same_type_shape - - :class:`mindspore.ops.SameTypeShape` + - Refer to :class:`mindspore.ops.SameTypeShape`. * - mindspore.ops.scalar_add - - :class:`mindspore.ops.Primitive` ('scalar_add') + - Get the sum of two numbers. * - mindspore.ops.scalar_cast - - :class:`mindspore.ops.ScalarCast` + - Refer to :class:`mindspore.ops.ScalarCast`. * - mindspore.ops.scalar_div - - :class:`mindspore.ops.Primitive` ('scalar_div') + - Get the quotient of dividing the first input number by the second input number. * - mindspore.ops.scalar_eq - - :class:`mindspore.ops.Primitive` ('scalar_eq') + - Determine whether two numbers are equal. * - mindspore.ops.scalar_floordiv - - :class:`mindspore.ops.Primitive` ('scalar_floordiv') + - Divide the first input number by the second input number and round down to the closest integer. * - mindspore.ops.scalar_ge - - :class:`mindspore.ops.Primitive` ('scalar_ge') + - Determine whether the number is greater than or equal to another number. * - mindspore.ops.scalar_gt - - :class:`mindspore.ops.Primitive` ('scalar_gt') + - Determine whether the number is greater than another number. * - mindspore.ops.scalar_le - - :class:`mindspore.ops.Primitive` ('scalar_le') + - Determine whether the number is less than or equal to another number. * - mindspore.ops.scalar_log - - :class:`mindspore.ops.Primitive` ('scalar_log') + - Get the natural logarithm of the input number. * - mindspore.ops.scalar_lt - - :class:`mindspore.ops.Primitive` ('scalar_lt') + - Determine whether the number is less than another number. * - mindspore.ops.scalar_mod - - :class:`mindspore.ops.Primitive` ('scalar_mod') + - Get the remainder of dividing the first input number by the second input number. * - mindspore.ops.scalar_mul - - :class:`mindspore.ops.Primitive` ('scalar_mul') + - Get the product of the input two numbers. * - mindspore.ops.scalar_ne - - :class:`mindspore.ops.Primitive` ('scalar_ne') + - Determine whether two numbers are not equal. * - mindspore.ops.scalar_pow - - :class:`mindspore.ops.Primitive` ('scalar_pow') + - Compute a number to the power of the second input number. * - mindspore.ops.scalar_sub - - :class:`mindspore.ops.Primitive` ('scalar_sub') + - Subtract the second input number from the first input number. * - mindspore.ops.scalar_to_array - - :class:`mindspore.ops.ScalarToArray` + - Refer to :class:`mindspore.ops.ScalarToArray`. * - mindspore.ops.scalar_to_tensor - - :class:`mindspore.ops.ScalarToTensor` + - Refer to :class:`mindspore.ops.ScalarToTensor`. * - mindspore.ops.scalar_uadd - - :class:`mindspore.ops.Primitive` ('scalar_uadd') + - Get the positive value of the input number. * - mindspore.ops.scalar_usub - - :class:`mindspore.ops.Primitive` ('scalar_usub') + - Get the negative value of the input number. * - mindspore.ops.scatter_nd - - :class:`mindspore.ops.ScatterNd` + - Refer to :class:`mindspore.ops.ScatterNd`. * - mindspore.ops.scatter_nd_update - - :class:`mindspore.ops.ScatterNdUpdate` + - Refer to :class:`mindspore.ops.ScatterNdUpdate`. * - mindspore.ops.scatter_update - - :class:`mindspore.ops.ScatterUpdate` + - Refer to :class:`mindspore.ops.ScatterUpdate`. * - mindspore.ops.select - - :class:`mindspore.ops.Select` + - Refer to :class:`mindspore.ops.Select`. * - mindspore.ops.shape - - :class:`mindspore.ops.Shape` + - Refer to :class:`mindspore.ops.Shape`. * - mindspore.ops.shape_mul - - :class:`mindspore.ops.Primitive` ('shape_mul') + - The input of shape_mul must be shape multiply elements in tuple(shape). * - mindspore.ops.size - - :class:`mindspore.ops.Size` + - Refer to :class:`mindspore.ops.Size`. * - mindspore.ops.sparse_tensor_get_dense_shape - - :class:`mindspore.ops.Primitive` ('SparseTensorGetDenseShape') + - Get corresponding dense shape of sparse tensor. * - mindspore.ops.sparse_tensor_get_indices - - :class:`mindspore.ops.Primitive` ('SparseTensorGetIndices') + - Get indices of sparse tensor. * - mindspore.ops.sparse_tensor_get_values - - :class:`mindspore.ops.Primitive` ('SparseTensorGetValues') + - Get values of sparse tensor. * - mindspore.ops.sqrt - - :class:`mindspore.ops.Sqrt` + - Refer to :class:`mindspore.ops.Sqrt`. * - mindspore.ops.square - - :class:`mindspore.ops.Square` + - Refer to :class:`mindspore.ops.Square`. * - mindspore.ops.stack - - :class:`mindspore.ops.Stack` + - Refer to :class:`mindspore.ops.Stack`. * - mindspore.ops.stop_gradient - - :class:`mindspore.ops.Primitive` ('stop_gradient') + - Disable update during back propagation. (`stop_gradient `_) * - mindspore.ops.strided_slice - - :class:`mindspore.ops.StridedSlice` + - Refer to :class:`mindspore.ops.StridedSlice`. * - mindspore.ops.string_concat - - :class:`mindspore.ops.Primitive` ('string_concat') + - Concatenate two strings. * - mindspore.ops.string_eq - - :class:`mindspore.ops.Primitive` ('string_equal') + - Determine if two strings are equal. * - mindspore.ops.tensor_div - - :class:`mindspore.ops.RealDiv` + - Refer to :class:`mindspore.ops.RealDiv`. * - mindspore.ops.tensor_floordiv - - :class:`mindspore.ops.FloorDiv` + - Refer to :class:`mindspore.ops.FloorDiv`. * - mindspore.ops.tensor_ge - - :class:`mindspore.ops.GreaterEqual` + - Refer to :class:`mindspore.ops.GreaterEqual`. * - mindspore.ops.tensor_gt - - :class:`mindspore.ops.Greater` + - Refer to :class:`mindspore.ops.Greater`. * - mindspore.ops.tensor_le - - :class:`mindspore.ops.LessEqual` + - Refer to :class:`mindspore.ops.LessEqual`. * - mindspore.ops.tensor_lt - - :class:`mindspore.ops.Less` + - Refer to :class:`mindspore.ops.Less`. * - mindspore.ops.tensor_mod - - :class:`mindspore.ops.FloorMod` + - Refer to :class:`mindspore.ops.FloorMod`. * - mindspore.ops.tensor_mul - - :class:`mindspore.ops.Mul` + - Refer to :class:`mindspore.ops.Mul`. * - mindspore.ops.tensor_pow - - :class:`mindspore.ops.Pow` + - Refer to :class:`mindspore.ops.Pow`. * - mindspore.ops.tensor_sub - - :class:`mindspore.ops.Sub` + - Refer to :class:`mindspore.ops.Sub`. * - mindspore.ops.tile - - :class:`mindspore.ops.Tile` + - Refer to :class:`mindspore.ops.Tile`. * - mindspore.ops.tuple_to_array - - :class:`mindspore.ops.TupleToArray` + - Refer to :class:`mindspore.ops.TupleToArray`. * - mindspore.ops.typeof - - :class:`mindspore.ops.Primitive` ('typeof') + - Get type of object. * - mindspore.ops.zeros_like - - :class:`mindspore.ops.ZerosLike` + - Refer to :class:`mindspore.ops.ZerosLike`. primitive --------- @@ -269,7 +267,6 @@ op_info_register :nosignatures: :template: classtemplate.rst - mindspore.ops.AiCPURegOp mindspore.ops.DataType mindspore.ops.op_info_register mindspore.ops.TBERegOp diff --git a/docs/api_python/source_zh_cn/mindspore/mindspore.train.rst b/docs/api_python/source_zh_cn/mindspore/mindspore.train.rst index ff368bf47b07ac19f0c76c8133ddd0e9face92e3..b1bbafb131a0b608e1b92e804fe1444324c1aa84 100644 --- a/docs/api_python/source_zh_cn/mindspore/mindspore.train.rst +++ b/docs/api_python/source_zh_cn/mindspore/mindspore.train.rst @@ -12,3 +12,9 @@ mindspore.train.callback .. automodule:: mindspore.train.callback :members: + +mindspore.train.train_thor +-------------------------- + +.. automodule:: mindspore.train.train_thor + :members: diff --git a/docs/api_python/source_zh_cn/mindspore/operations.rst b/docs/api_python/source_zh_cn/mindspore/operations.rst index fe42da4e0035fa383194c446e30e65d2f846b4c0..a1127881cdba41188f5c61019aa88345aad7e173 100644 --- a/docs/api_python/source_zh_cn/mindspore/operations.rst +++ b/docs/api_python/source_zh_cn/mindspore/operations.rst @@ -241,6 +241,7 @@ Array Operators mindspore.ops.IsFinite mindspore.ops.IsInstance mindspore.ops.IsSubClass + mindspore.ops.MaskedSelect mindspore.ops.Meshgrid mindspore.ops.Ones mindspore.ops.OnesLike @@ -378,6 +379,7 @@ Sponge Operators mindspore.ops.LJForce mindspore.ops.LJForceWithPMEDirectForce mindspore.ops.MDIterationLeapFrog + mindspore.ops.MDIterationLeapFrogLiujian mindspore.ops.NeighborListUpdate mindspore.ops.PMEEnergy mindspore.ops.PMEExcludedForce diff --git a/docs/api_zh/.keep b/docs/api_zh/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/docs/faq/requirements.txt b/docs/faq/requirements.txt index 6e8a6bc4c2d8fce7c05a891fab76504cad65a5c7..6d8cd70439820e16bc32c4abc93e948ba81dc01b 100644 --- a/docs/faq/requirements.txt +++ b/docs/faq/requirements.txt @@ -1,5 +1,5 @@ sphinx >= 2.2.1, <= 2.4.4 -recommonmark +myst_parser == 0.14.0 sphinx-markdown-tables sphinx_rtd_theme numpy diff --git a/docs/faq/source_en/backend_compile.md b/docs/faq/source_en/backend_compile.md deleted file mode 100644 index 2d26b3d959851369db014712da7a633b4c1bb0bd..0000000000000000000000000000000000000000 --- a/docs/faq/source_en/backend_compile.md +++ /dev/null @@ -1,112 +0,0 @@ -# Banckend Compile - -`Linux` `Windows` `Ascend` `GPU` `CPU` `Environment Preparation` `Basic` `Intermediate` - - - -**Q: A piece of data contains multiple images which have different widths and heights. I need to perform the `map` operation on the data in mindrecord format for data processing. However, the data I read from `record` is in `np.ndarray` format. My `operations` are for the image format. How can I preprocess the generated data in mindrecord format?** - -A: You are advised to perform the following operations: - -```python -#1 The defined schema is as follows: Among them, data1, data2, data3, ... These fields store your image, and only the binary of the image is stored here. - -cv_schema_json = {"label": {"type": "int32"}, "data1": {"type": "bytes"}, "data2": {"type": "bytes"}, "data3": {"type": "bytes"}} - -#2 The organized data can be as follows, and then this data_list can be written by FileWriter.write_raw_data(...). - -data_list = [] -data = {} -data['label'] = 1 - -f = open("1.jpg", "rb") -image_bytes = f.read() -f.close - -data['data1'] = image_bytes - -f2 = open("2.jpg", "rb") -image_bytes2 = f2.read() -f2.close - -data['data2'] = image_bytes2 - -f3 = open("3.jpg", "rb") -image_bytes3 = f3.read() -f3.close - -data['data3'] = image_bytes3 - -data_list.append(data) - -#3 Use MindDataset to load, then use the decode operator we provide to decode, and then perform subsequent processing. - -data_set = ds.MindDataset("mindrecord_file_name") -data_set = data_set.map(input_columns=["data1"], operations=vision.Decode(), num_parallel_workers=2) -data_set = data_set.map(input_columns=["data2"], operations=vision.Decode(), num_parallel_workers=2) -data_set = data_set.map(input_columns=["data3"], operations=vision.Decode(), num_parallel_workers=2) -resize_op = vision.Resize((32, 32), interpolation=Inter.LINEAR) -data_set = data_set.map(operations=resize_op, input_columns=["data1"], num_parallel_workers=2) -for item in data_set.create_dict_iterator(output_numpy=True): - print(item) -``` - -
- -**Q: When a custom image dataset is converted to the mindrecord format, the data is in the `numpy.ndarray` format and `shape` is [4,100,132,3], indicating four three-channel frames, and each value ranges from 0 to 255. However, when I view the data that is converted into the mindrecord format, I find that the `shape` is `[19800]` but that of the original data is `[158400]`. Why?** - -A: The value of `dtype` in `ndarray` might be set to `int8`. The difference between `[158400]` and `[19800]` is eight times. You are advised to set `dtype` of `ndarray` to `float64`. - -
- -**Q: I want to save the generated image, but the image cannot be found in the corresponding directory after the code is executed. Similarly, a dataset is generated in JupyterLab for training. During training, data can be read in the corresponding path, but the image or dataset cannot be found in the path. Why?** - -A: The images or datasets generated by JumperLab are stored in Docker. The data downloaded by `moxing` can be viewed only in Docker during the training process. After the training is complete, the data is released with Docker. You can try to transfer the data that needs to be downloaded to `obs` through `moxing` in the training task, and then download the data to the local host through `obs`. - -
- -**Q: What framework models and formats can be directly read by MindSpore? Can the PTH Model Obtained Through Training in PyTorch Be Loaded to the MindSpore Framework for Use?** - -A: MindSpore uses protocol buffers (protobuf) to store training parameters and cannot directly read framework models. A model file stores parameters and their values. You can use APIs of other frameworks to read parameters, obtain the key-value pairs of parameters, and load the key-value pairs to MindSpore. If you want to use the .ckpt file trained by a framework, read the parameters and then call the `save_checkpoint` API of MindSpore to save the file as a .ckpt file that can be read by MindSpore. - -
- -**Q:What should I do if a Protobuf memory limit error is reported during the process of using ckpt or exporting a model?** - -A:When a single Protobuf data is too large, because Protobuf itself limits the size of the data stream, a memory limit error will be reported. At this time, the restriction can be lifted by setting the environment variable `PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python`. - -
- -**Q: What is the difference between the PyNative and Graph modes?** - -A: In terms of efficiency, operators used in the two modes are the same. Therefore, when the same network and operators are executed in the two modes, the accuracy is the same. The network execution performance varies according to the execution mechanism. Theoretically, operators provided by MindSpore support both the PyNative and Graph modes. - -In terms of application scenarios, Graph mode requires the network structure to be built at the beginning, and then the framework performs entire graph optimization and execution. This mode is suitable to scenarios where the network is fixed and high performance is required. - -The two modes are supported on different hardware (such as `Ascend`, `GPU`, and `CPU`). - -In terms of code debugging, since operators are executed line by line in PyNative mode, you can directly debug the Python code and view the `/api` output or execution result of the corresponding operator at any breakpoint in the code. In Graph mode, the network is built but not executed in the constructor function. Therefore, you cannot obtain the output of the corresponding operator at breakpoints in the `construct` function. The output can be viewed only after the network execution is complete. - -
- -**Q: What is the difference between `c_transforms` and `py_transforms`? Which one is recommended?** - -A: `c_transforms` is recommended. Its performance is better because it is executed only at the C layer. - -Principle: The underlying layer of `c_transform` uses `opencv/jpeg-turbo` of the C version for data processing, and `py_transform` uses `Pillow` of the Python version for data processing. - -
- -**Q: What is the difference between `bash -p` and `bash -e` when an error is reported during application build?** - -A: MindSpore Serving build and running depend on MindSpore. Serving provides two build modes: 1. Use `bash -p {python site-packages}/mindspore/lib` to specify an installed MindSpore path to avoid building MindSpore when building Serving. 2. Build Serving and the corresponding MindSpore. Serving passes the `-e`, `-V`, and `-j` options to MindSpore. -For example, use `bash -e ascend -V 910 -j32` in the Serving directory as follows: - -- Build MindSpore in the `third_party/mindspore` directory using `bash -e ascend -V 910 -j32`. -- Use the MindSpore build result as the Serving build dependency. - -
- -**Q: What can I do if an error `libmindspore.so: cannot open shared object file: No such file or directory` is reported during application running?** - -A: Check whether MindSpore that MindSpore Serving depends on is installed. In Serving 1.1, `LD_LIBRARY_PATH` needs to be configured to explicitly specify the path of `libmindspore.so`. `libmindspore.so` is in the `lib` directory of the MindSpore Python installation path. In Serving 1.2 or later, the path of `libmindspore.so` does not need to be specified. Serving searches for and adds `LD_LIBRARY_PATH` based on the MindSpore installation path, which does not need to be perceived by users. diff --git a/docs/faq/source_en/conf.py b/docs/faq/source_en/conf.py index a1fd767271ac159540440ed65bd0d676163366a9..c8424b34743810af30158493d44ff96bb88da62b 100644 --- a/docs/faq/source_en/conf.py +++ b/docs/faq/source_en/conf.py @@ -30,7 +30,7 @@ release = 'master' # ones. extensions = [ 'sphinx_markdown_tables', - 'recommonmark', + 'myst_parser' ] source_suffix = { diff --git a/docs/faq/source_en/data_processing.md b/docs/faq/source_en/data_processing.md index 830a9b04216e2bc85a8be224c664cc135fbaa58d..b6b0185949f3824b846992a0bcf3215b370b27e3 100644 --- a/docs/faq/source_en/data_processing.md +++ b/docs/faq/source_en/data_processing.md @@ -4,6 +4,75 @@ +**Q: What is the difference between `c_transforms` and `py_transforms`? Which one is recommended?** + +A: `c_transforms` is recommended. Its performance is better because it is executed only at the C layer. + +Principle: The underlying layer of `c_transform` uses `opencv/jpeg-turbo` of the C version for data processing, and `py_transform` uses `Pillow` of the Python version for data processing. + +
+ +**Q: A piece of data contains multiple images which have different widths and heights. I need to perform the `map` operation on the data in mindrecord format for data processing. However, the data I read from `record` is in `np.ndarray` format. My `operations` are for the image format. How can I preprocess the generated data in mindrecord format?** + +A: You are advised to perform the following operations: + +```python +#1 The defined schema is as follows: Among them, data1, data2, data3, ... These fields store your image, and only the binary of the image is stored here. + +cv_schema_json = {"label": {"type": "int32"}, "data1": {"type": "bytes"}, "data2": {"type": "bytes"}, "data3": {"type": "bytes"}} + +#2 The organized data can be as follows, and then this data_list can be written by FileWriter.write_raw_data(...). + +data_list = [] +data = {} +data['label'] = 1 + +f = open("1.jpg", "rb") +image_bytes = f.read() +f.close + +data['data1'] = image_bytes + +f2 = open("2.jpg", "rb") +image_bytes2 = f2.read() +f2.close + +data['data2'] = image_bytes2 + +f3 = open("3.jpg", "rb") +image_bytes3 = f3.read() +f3.close + +data['data3'] = image_bytes3 + +data_list.append(data) + +#3 Use MindDataset to load, then use the decode operator we provide to decode, and then perform subsequent processing. + +data_set = ds.MindDataset("mindrecord_file_name") +data_set = data_set.map(input_columns=["data1"], operations=vision.Decode(), num_parallel_workers=2) +data_set = data_set.map(input_columns=["data2"], operations=vision.Decode(), num_parallel_workers=2) +data_set = data_set.map(input_columns=["data3"], operations=vision.Decode(), num_parallel_workers=2) +resize_op = vision.Resize((32, 32), interpolation=Inter.LINEAR) +data_set = data_set.map(operations=resize_op, input_columns=["data1"], num_parallel_workers=2) +for item in data_set.create_dict_iterator(output_numpy=True): + print(item) +``` + +
+ +**Q: When a custom image dataset is converted to the mindrecord format, the data is in the `numpy.ndarray` format and `shape` is [4,100,132,3], indicating four three-channel frames, and each value ranges from 0 to 255. However, when I view the data that is converted into the mindrecord format, I find that the `shape` is `[19800]` but that of the original data is `[158400]`. Why?** + +A: The value of `dtype` in `ndarray` might be set to `int8`. The difference between `[158400]` and `[19800]` is eight times. You are advised to set `dtype` of `ndarray` to `float64`. + +
+ +**Q: I want to save the generated image, but the image cannot be found in the corresponding directory after the code is executed. Similarly, a dataset is generated in JupyterLab for training. During training, data can be read in the corresponding path, but the image or dataset cannot be found in the path. Why?** + +A: The images or datasets generated by JumperLab are stored in Docker. The data downloaded by `moxing` can be viewed only in Docker during the training process. After the training is complete, the data is released with Docker. You can try to transfer the data that needs to be downloaded to `obs` through `moxing` in the training task, and then download the data to the local host through `obs`. + +
+ **Q: How do I understand the `dataset_sink_mode` parameter in `model.train` of MindSpore?** A: When `dataset_sink_mode` is set to `True`, data processing and network computing are performed in pipeline mode. That is, when data processing is performed step by step, after a `batch` of data is processed, the data is placed in a queue which is used to cache the processed data. Then, network computing obtains data from the queue for training. In this case, data processing and network computing are performed in pipeline mode. The entire training duration is the longest data processing/network computing duration. @@ -20,7 +89,7 @@ A: You can refer to the usage of YOLOv3 which contains the resizing of different **Q: Must data be converted into MindRecords when MindSpore is used for segmentation training?** -A: [build_seg_data.py](https://github.com/mindspore-ai/mindspore/blob/master/model_zoo/official/cv/deeplabv3/src/data/build_seg_data.py) is used to generate MindRecords based on a dataset. You can directly use or adapt it to your dataset. Alternatively, you can use `GeneratorDataset` if you want to read the dataset by yourself. +A: [build_seg_data.py](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/deeplabv3/src/data/build_seg_data.py) is used to generate MindRecords based on a dataset. You can directly use or adapt it to your dataset. Alternatively, you can use `GeneratorDataset` if you want to read the dataset by yourself. [GenratorDataset example](https://www.mindspore.cn/doc/programming_guide/en/master/dataset_loading.html#loading-user-defined-dataset) @@ -34,7 +103,7 @@ A: You can use the customized data loading method `GeneratorDataset`. For detail
-**Q: When MindSpore performs multi-device training on the NPU hardware platform, how does the user-defined dataset transfer data to different NPUs?** +**Q: When MindSpore performs multi-device training on the Ascend hardware platform, how does the user-defined dataset transfer data to different chip?** A: When `GeneratorDataset` is used, the `num_shards=num_shards` and `shard_id=device_id` parameters can be used to control which shard of data is read by different devices. `__getitem__` and `__len__` are processed as full datasets. @@ -71,39 +140,21 @@ A: The MNIST gray scale image dataset is used for MindSpore training. Therefore, **Q: Can you introduce the dedicated data processing framework?** -A: MindData provides the heterogeneous hardware acceleration function for data processing. The high-concurrency data processing `pipeline` supports `NPU`, `GPU` and `CPU`. The `CPU` usage is reduced by 30%. For details, see [Optimizing Data Processing](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/optimize_data_processing.html). +A: MindData provides the heterogeneous hardware acceleration function for data processing. The high-concurrency data processing `pipeline` supports `Ascend`, `GPU` and `CPU`. The `CPU` usage is reduced by 30%. For details, see [Optimizing Data Processing](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/optimize_data_processing.html).
-**Q:When error raised during network training, indicating that sending data failed like "TDT Push data into device Failed", how to locate the problem?** +**Q: When error raised during network training, indicating that sending data failed like "TDT Push data into device Failed", how to locate the problem?** -A:Firstly, above error refers failed sending data to the device through the training data transfer channel (TDT). Here are several possible reasons for this error. Therefore, the corresponding checking suggestions are given in the log. In detail: +A: Firstly, above error refers failed sending data to the device through the training data transfer channel (TDT). Here are several possible reasons for this error. Therefore, the corresponding checking suggestions are given in the log. In detail: - 1. Commonly, we will find the first error (the first ERROR level error) or error traceBack thrown in the log, and try to find information that helps locate the cause of the error. + 1. Commonly, we will find the first error (the first ERROR level error) or error TraceBack thrown in the log, and try to find information that helps locate the cause of the error. 2. **When error raised in the graph compiling stage, as training has not started** (for example, the loss has not been printed in the log), please check the error log if there are errors reported by the network related operators or the environment configuration resulted Errors (such as hccl.json is incorrect, resulted abnormal initialization of multi-card communication) - 3. **When error raised during training process**, usually this is caused by the mismatch between the amount of data (batch number) sent by the host and the amount of data (step number) required for network training. You can print and check the number of batches of an epoch with `get_dataset_size` interface. And check the amount of data sent by the host and the amount of data received on the device (checking method is as follows): - - ```bash - # Note: The amount of data refers to the number of columns. For example, a batch contains two columns (like image and label), then 5 batches contain 10 columns of data. - # Note: If the environment variable "export ASCEND_SLOG_PRINT_TO_STDOUT=1" is enabled, the log in the following plog file will be printed directly on the screen or in the redirected log file. - - # Obtain data amount of host data process queue sending with tdt, pid is the training process id - # Log file name like: plog-64944-20210531165504682.log, we can find data amount with `|wc -l` to calculate the num of `has got` log. - - grep -rn "has got" ~/ascend/log/plog/plog-pid_timestamp0.log + 3. **When error raised during the training process**, usually this is caused by the mismatch between the amount of data (batch number) has been sent and the amount of data (step number) required for network training. You can print and check the number of batches of an epoch with `get_dataset_size` interface,several possible reason are as follows: - # Calculate the data amount that host tdt sending to device tdt, find it with following command, value after the key words like"index is" refer to the data amount. - grep -rn "has sent" ~/ascend/log/plog/plog-pid_timestamp0.log - - # Calculate the data amount received in device, pid refers to training process id, which is same with above host process id. - # Find with following command, the value after key words "index=" refers to the data amount received in device. - grep -rn "enqueue data" ~/ascend/log/device-id/device-pid_timestamp1.log - ``` - - - If the amount of data sent by the host side is equal to the amount of data received by the device side, and that value is less than the amount of data that the network training needed in normal case, here sending data failed mainly due to abnormal data processing on the host side, resulting failure to feed network training. There are three possible analysing ideas: - - when data amount is just an integer multiple of the batches number in an epoch, there may be a problem in the data processing part involving Epoch processing, such as the following case: + - With checking the print times of loss to figure out the trained steps when error raised, when data amount(trained steps) is just an integer multiple of the batches number in an epoch, there may be a problem in the data processing part involving Epoch processing, such as the following case: ```python ... @@ -111,15 +162,10 @@ A:Firstly, above error refers failed sending data to the device through the tr return dataset ``` - - The data processing performance is slow, and cannot keep up with the speed of network training. For this case, you can use the profiler tool and MindInsight to see if there is an obvious iteration gap, or manually iterating the dataset, and print the average single batch time , if longer than the combined forward and backward time of the network, there is a high probability that the performance of the data processing part needs to be optimized. - - Abnormal data occurred resulted exception raised during the training process, causing sending data failed. In this case, there will be other `ERROR` logs that shows which part of the data processing is abnormal and checking advice. If it is not obvious, you can also try to find the abnormal data by iterating each data batch in the dataset (such as turning off shuffle, and using dichotomy). - - if the data amount mismatch in host and device(commonly, host send much more data), in this case, there might be some problem in tdt module(like back pressure), here might need module developer helps to analyse the problem. + - The data processing performance is slow, and cannot keep up with the speed of network training. For this case, you can use the profiler tool and MindInsight to see if there is an obvious iteration gap, or manually iterating the dataset, and print the average single batch time , if longer than the combined forward and backward time of the network, there is a high probability that the performance of the data processing part needs to be optimized. - 4. **when error raised after training**(this is probably caused by forced release of resources), this error can be ignored. + - During the training process, the occurrence of abnormal data may resulted in exception, causing sending data failed. In this case, there will be other `ERROR` logs that shows which part of the data processing process is abnormal and checking advice. If it is not obvious, you can also try to find the abnormal data by iterating each data batch in the dataset (such as turning off shuffle, and using dichotomy). - 5. If still cannot locate the specific cause, please set log level into info level level of mindspore and CANN, and check the log to see the context near error location for helpful information. The CANN host log file path is: ~/ascend/log/plog/plog-pid-timestamp.log. + 4. **when error raised after training**(this is probably caused by forced release of resources), this error can be ignored. - ```bash - export GLOG_v=1 # set mindspore log level into info level - export GLOBAL_ASCEND_LOG_LEVEL=1 # set CANN log level into info level - ``` + 5. If the specific cause cannot be located, please create issue or raise question in huawei clound forum for help. diff --git a/docs/faq/source_en/distributed_configure.md b/docs/faq/source_en/distributed_configure.md index d9ea5d5598a1839ca0232dd86d7a5ff346f4a52f..6ac538db3b17c64f63b31bc27733ccde879d028a 100644 --- a/docs/faq/source_en/distributed_configure.md +++ b/docs/faq/source_en/distributed_configure.md @@ -20,13 +20,19 @@ A: This message means that MindSpore failed to load library `libgpu_collective.s
-**Q:The communication profile file needs to be configured on the Ascend environment, how should it be configured?** +**Q: The communication profile file needs to be configured on the Ascend environment, how should it be configured?** -A:Please refer to the [Configuring Distributed Environment Variables](https://mindspore.cn/tutorial/training/en/master/advanced_use/distributed_training_ascend.html#configuring-distributed-environment-variables) section of Ascend-based distributed training in the MindSpore tutorial. +A: Please refer to the [Configuring Distributed Environment Variables](https://mindspore.cn/tutorial/training/en/master/advanced_use/distributed_training_ascend.html#configuring-distributed-environment-variables) section of Ascend-based distributed training in the MindSpore tutorial.
-**Q:How to perform distributed multi-machine multi-card training?** +**Q: How to perform distributed multi-machine multi-card training?** -A:For Ascend environment, please refer to the [Multi-machine Training](https://mindspore.cn/tutorial/training/en/master/advanced_use/distributed_training_ascend.html#multi-machine-training) section of the MindSpore tutorial "distributed_training_ascend". +A: For Ascend environment, please refer to the [Multi-machine Training](https://mindspore.cn/tutorial/training/en/master/advanced_use/distributed_training_ascend.html#multi-machine-training) section of the MindSpore tutorial "distributed_training_ascend". For GPU-based environments, please refer to the [Run Multi-Host Script](https://mindspore.cn/tutorial/training/en/master/advanced_use/distributed_training_gpu.html#running-the-multi-host-script) section of the MindSpore tutorial "distributed_training_gpu". + +
+ +**Q: What is `rank_table_file`?** + +A: `rank_table_file` is the name of configuration file which we used to configure processor resource while running distributed training. It contains information about device ip, server ip and so on. We usually have to set environment variable `RANK_TABLE_FILE` to that file. For more information, please refer to [Configuring Processor Resources](https://support.huaweicloud.com/intl/en-us/ug-tf-training-tensorflow/atlasmprtg_13_0042.html). You could use [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools) to generate `rank_table_file` for the current server. diff --git a/docs/faq/source_en/feature_advice.md b/docs/faq/source_en/feature_advice.md index ad8fcbaf18c1dab87c85fc5c9ffb1230896b182b..c3131e7df9a8f0f9d2dc0e19ee6a627d3ec9b0ec 100644 --- a/docs/faq/source_en/feature_advice.md +++ b/docs/faq/source_en/feature_advice.md @@ -4,6 +4,44 @@ +**Q: Is the `input=np.random.uniform(...)` format fixed when the MindIR format is exported?** + +A: The format is not fixed. This step is to create an input for constructing the network structure. You only need to input the correct `shape` in `export`. You can use `np.ones` and `np.zeros` to create an input. + +
+ +**Q: What framework models and formats can be directly read by MindSpore? Can the PTH Model Obtained Through Training in PyTorch Be Loaded to the MindSpore Framework for Use?** + +A: MindSpore uses protocol buffers (Protobuf) to store training parameters and cannot directly read framework models. A model file stores parameters and their values. You can use APIs of other frameworks to read parameters, obtain the key-value pairs of parameters, and load the key-value pairs to MindSpore. If you want to use the .ckpt file trained by a framework, read the parameters and then call the `save_checkpoint` API of MindSpore to save the file as a .ckpt file that can be read by MindSpore. + +
+ +**Q: What should I do if a Protobuf memory limit error is reported during the process of using ckpt or exporting a model?** + +A: When a single Protobuf data is too large, because Protobuf itself limits the size of the data stream, a memory limit error will be reported. At this time, the restriction can be lifted by setting the environment variable `PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python`. + +
+ +**Q: What is the difference between the PyNative and Graph modes?** + +A: Compare through the following four aspects: + +- In terms of network execution:operators used in the two modes are the same. Therefore, when the same network and operators are executed in the two modes, the accuracy is the same. As Graph mode uses graph optimization, calculation graph sinking and other technologies, it has higher performance and efficiency in executing the network. + +- In terms of application scenarios,:Graph mode requires the network structure to be built at the beginning, and then the framework performs entire graph optimization and execution. This mode is suitable to scenarios where the network is fixed and high performance is required. + +- The two modes are supported on different hardware (such as `Ascend`, `GPU`, and `CPU`). + +- In terms of code debugging,:since operators are executed line by line in PyNative mode, you can directly debug the Python code and view the `/api` output or execution result of the corresponding operator at any breakpoint in the code. In Graph mode, the network is built but not executed in the constructor function. Therefore, you cannot obtain the output of the corresponding operator at breakpoints in the `construct` function. You can only specify operators and print their output results, and then view the results after the network execution is completed. + +
+ +**Q: Does MindSpore run only on Huawei `Ascend`?** + +A: MindSpore supports Huawei `Ascend`, `GPUs`, and `CPUs`, and supports heterogeneous computing. + +
+ **Q: If MindSpore and PyTorch are installed in an environment, can the syntax of the two frameworks be used together in a Python file?** A: You can use the two frameworks in a python file. Pay attention to the differences between types. For example, the tensor types created by the two frameworks are different, but the basic types of Python are general. @@ -28,15 +66,15 @@ A: An AIR model cannot be exported from the Ascend 310. You need to load a train
-**Q: Does MindSpore have any limitation on the input size of a single Tensor for exporting and loading models?** +**Q: Does MindSpore need a GPU computing unit? What hardware support is needed?** -A: Due to hardware limitations of ProtoBuf, when exporting AIR and ONNX models, the size of a single Tensor cannot exceed 2G. When loading the MindIR model, a single Tensor cannot exceed 2G. +A: MindSpore currently supports CPU, GPU, and Ascend. Currently, you can try out MindSpore through Docker images on laptops or in environments with GPUs. Some models in MindSpore Model Zoo support GPU-based training and inference, and other models are being improved. For distributed parallel training, MindSpore supports multi-GPU training. You can obtain the latest information from [Road Map](https://www.mindspore.cn/doc/note/en/master/roadmap.html) and [project release notes](https://gitee.com/mindspore/mindspore/blob/master/RELEASE.md#).
-**Q: Does MindSpore require computing units such as GPUs and NPUs? What hardware support is required?** +**Q: Does MindSpore have any limitation on the input size of a single Tensor for exporting and loading models?** -A: MindSpore currently supports CPU, GPU, Ascend, and NPU. Currently, you can try out MindSpore through Docker images on laptops or in environments with GPUs. Some models in MindSpore Model Zoo support GPU-based training and inference, and other models are being improved. For distributed parallel training, MindSpore supports multi-GPU training. You can obtain the latest information from [Road Map](https://www.mindspore.cn/doc/note/en/master/roadmap.html) and [project release notes](https://gitee.com/mindspore/mindspore/blob/master/RELEASE.md#). +A: Due to hardware limitations of Protobuf, when exporting to AIR and ONNX formats, the size of model parameters cannot exceed 2G; when exporting to MINDIR format, the size of a single Tensor cannot exceed 2G. MindSpore only supports MINDIR, and the size of a single Tensor cannot exceed 2G.
@@ -60,7 +98,7 @@ A: Yes. For details, see [Definition and Usage of Truncated Gradient](https://gi **Q: What is the MindSpore IR design concept?** -A: Function expression: All expressions are functions, and differentiation and automatic parallel analysis are easy to implement without side effect. `JIT` compilation capability: The graph-based IR, control flow dependency, and data flow are combined to balance the universality and usability. Turing-complete IR: More flexible syntaxes are provided for converting `Python`, such as recursion. +A: Function expression: All expressions are functions, and differentiation and automatic parallel analysis are easy to implement without side effect. `JIT` compilation capability: The graph-based IR, control flow dependency, and data flow are combined to balance the universality and usability. Graphically complete IR: More conversion `Python` flexible syntax, including recursion, etc.
@@ -94,24 +132,14 @@ A: The TensorFlow's object detection pipeline API belongs to the TensorFlow's Mo
-**Q: Does MindSpore Serving support hot loading to avoid inference service interruption?** - -A: MindSpore does not support hot loading. It is recommended that you run multiple Serving services and restart some of them when switching the version. - -
- -**Q: Does MindSpore Serving allow multiple workers to be started for one model to support multi-device and single-model concurrency?** +**Q: How do I perform transfer learning in PyNative mode?** -A: MindSpore Serving does not support distribution and this function is being developed. That is, multiple workers cannot be started for one model. It is recommended that multiple Serving services be deployed to implement distribution and load balancing. In addition, to avoid message forwarding between `master` and `worker`, you can use the `start_servable_in_master` API to enable `master` and `worker` to be executed in the same process, implementing lightweight deployment of the Serving services. +A: PyNative mode is compatible with transfer learning. For more tutorial information, see [Code for Loading a Pre-Trained Model](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/cv_mobilenetv2_fine_tune.html#code-for-loading-a-pre-trained-model).
-**Q: How does the MindSpore Serving version match the MindSpore version?** +**Q:What is the difference between [MindSpore ModelZoo](https://gitee.com/mindspore/mindspore/tree/master/model_zoo) and [Ascend ModelZoo](https://www.hiascend.com/software/modelzoo)?** -A: MindSpore Serving matches MindSpore in the same version. For example, Serving `1.1.1` matches MindSpore `1.1.1`. +A: `MindSpore ModelZoo` contains models only implemented by MindSpore. But these models support different devices including Ascend, GPU, CPU and mobile. `Ascend ModelZoo` contains models only running on Ascend which are implemented by different ML platform including MindSpore, PyTorch, TensorFlow and Caffe. You can refer to the corresponding [gitee repository](https://gitee.com/ascend/modelzoo). -
- -**Q: How do I perform transfer learning in PyNative mode?** - -A: PyNative mode is compatible with transfer learning. For more tutorial information, see [Code for Loading a Pre-Trained Model](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/cv_mobilenetv2_fine_tune.html#code-for-loading-a-pre-trained-model). +As for the models implemented by MindSpore running on Ascend, these are maintained in `MindSpore ModelZoo`, and will be released to `Ascend ModelZoo` regularly. diff --git a/docs/faq/source_en/frontend_compile.md b/docs/faq/source_en/frontend_compile.md deleted file mode 100644 index ea009040677dbb33c547f9d21d9720b8c8089d26..0000000000000000000000000000000000000000 --- a/docs/faq/source_en/frontend_compile.md +++ /dev/null @@ -1,50 +0,0 @@ -# Frontend Compile - -`Linux` `Windows` `Ascend` `GPU` `CPU` `Environment Preparation` `Basic` `Intermediate` - - - -**Q:What can I do if an error "Create python object \`\` failed, only support create Cell or Primitive object." is reported?** - -A: Currently, tensors cannot be constructed on the network. That is, the syntax `x = Tensor(args...)` is not supported. - -If it is a constant tensor, please define it in the function `__init__`. If not, you can use the `@constexpr` decorator to modify the function and generate the `Tensor` in the function. - -Please see the usage of `@constexpr` in . - -The constant `Tensor` used on the network can be used as a network attribute and defined in `init`, that is, `self.x = Tensor(args...)`. Then the constant can be used in `construct`. - -In the following example, `Tensor` of `shape = (3, 4), dtype = int64` is generated by `@constexpr`. - -```python -@constexpr -def generate_tensor(): - return Tensor(np.ones((3, 4))) -``` - -
- -**Q:What can I do if an error "'self.xx' should be defined in the class '__init__' function." is reported?** - -A: If you want to assign for a class member such as `self.xx` in the function `construct`, `self.xx` must have been defined to a [`Parameter`]() type firstly while the other types are not supported. But the local variable `xx` is not under the regulation. - -
- -**Q:What can I do if an error "This comparator 'AnyValue' is not supported. For statement 'is', only support compare with 'None', 'False' or 'True'" is reported?** - -A: For the syntax `is` or `is not`, currently `MindSpore` only supports comparisons with `True`, `False` and `None`. Other types, such as strings, are not supported. - -
- -**Q:What can I do if an error "MindSpore does not support comparison with operators more than one now, ops size =2" is reported?** - -A: For comparison statements, `MindSpore` supports at most one operator. Please modify your code. For example, you can use `1 < x and x < 3` to take the place of `1 < x < 3`. - -
- -**Q:What can I do if an error "TypeError: The function construct need 1 positional argument and 0 default argument, but provided 2" is reported?** - -A: When you call the instance of a network, the function `construct` will be executed. And the program will check the number of parameters required by the function `construct` and the number of parameters actually given. If they are not equal, the above exception will be thrown. -Please check your code to make sure they are equal. - -
\ No newline at end of file diff --git a/docs/faq/source_en/frontend_syntax.md b/docs/faq/source_en/frontend_syntax.md deleted file mode 100644 index 1e76a4ada803455acd8c74d05096ff67eef862f2..0000000000000000000000000000000000000000 --- a/docs/faq/source_en/frontend_syntax.md +++ /dev/null @@ -1,121 +0,0 @@ -# Frontend Syntax - -`Linux` `Windows` `Ascend` `GPU` `CPU` `Environment Preparation` `Basic` `Intermediate` - - - -**Q: Is the `input=np.random.uniform(...)` format fixed when the MindIR format is exported?** - -A: The format is not fixed. This step is to create an input for constructing the network structure. You only need to input the correct `shape` in `export`. You can use `np.ones` and `np.zeros` to create an input. - -
- -**Q: How do I modify parameters (such as the dropout value) on MindSpore?** - -A: When building a network, use `if self.training: x = dropput(x)`. During verification, set `network.set_train(mode_false)` before execution to disable the dropout function. During training, set `network.set_train(mode_false)` to True to enable the dropout function. - -
- -**Q: How do I view the number of model parameters?** - -A: You can load the checkpoint to count the parameter number. Variables in the momentum and optimizer may be counted, so you need to filter them out. -You can refer to the following APIs to collect the number of network parameters: - -```python -def count_params(net): - """Count number of parameters in the network - Args: - net (mindspore.nn.Cell): Mindspore network instance - Returns: - total_params (int): Total number of trainable params - """ - total_params = 0 - for param in net.trainable_params(): - total_params += np.prod(param.shape) - return total_params -``` - -[Script Link](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/research/cv/tinynet/src/utils.py). - -
- -**Q: How do I monitor the loss during training and save the training parameters when the `loss` is the lowest?** - -A: You can customize a `callback`.For details, see the writing method of `ModelCheckpoint`. In addition, the logic for determining loss is added. - -```python -class EarlyStop(Callback): -def __init__(self): - self.loss = None -def step_end(self, run_context): - loss = ****(get current loss) - if (self.loss == None or loss < self.loss): - self.loss = loss - # do save ckpt -``` - -
- -**Q: How do I obtain the expected `feature map` when `nn.Conv2d` is used?** - -A: For details about how to derive the `Conv2d shape`, click [here](https://www.mindspore.cn/doc/api_python/en/master/mindspore/nn/mindspore.nn.Conv2d.html#mindspore.nn.Conv2d) Change `pad_mode` of `Conv2d` to `same`. Alternatively, you can calculate the `pad` based on the Conv2d shape derivation formula to keep the `shape` unchanged. Generally, the pad is `(kernel_size-1)//2`. - -
- -**Q: Can MindSpore be used to customize a loss function that can return multiple values?** - -A: After customizing the `loss function`, you need to customize `TrainOneStepCell`. The number of `sens` for implementing gradient calculation is the same as the number of `network` outputs. For details, see the following: - -```python -net = Net() - -loss_fn = MyLoss() - -loss_with_net = MyWithLossCell(net, loss_fn) - -train_net = MyTrainOneStepCell(loss_with_net, optim) - -model = Model(net=train_net, loss_fn=None, optimizer=None) -``` - -
- -**Q: How does MindSpore implement the early stopping function?** - -A: You can customize the `callback` method to implement the early stopping function. -Example: When the loss value decreases to a certain value, the training stops. - -```python -class EarlyStop(Callback): - def __init__(self, control_loss=1): - super(EarlyStep, self).__init__() - self._control_loss = control_loss - - def step_end(self, run_context): - cb_params = run_context.original_args() - loss = cb_params.net_outputs - if loss.asnumpy() < self._control_loss: - # Stop training. - run_context._stop_requested = True - -stop_cb = EarlyStop(control_loss=1) -model.train(epoch_size, ds_train, callbacks=[stop_cb]) -``` - -
- -**Q: After a model is trained, how do I save the model output in text or `npy` format?** - -A: The network output is `Tensor`. You need to use the `asnumpy()` method to convert the `Tensor` to `NumPy` and then save the data. For details, see the following: - -```python -out = net(x) - -np.save("output.npy", out.asnumpy()) -``` - -
- -**Q: Does MindSpore run only on Huawei `NPUs`?** - -A: MindSpore supports Huawei Ascend `NPUs`, `GPUs`, and `CPUs`, and supports heterogeneous computing. diff --git a/docs/faq/source_en/implement_problem.md b/docs/faq/source_en/implement_problem.md new file mode 100644 index 0000000000000000000000000000000000000000..51fd11e2eba15eae03fd747f0d43c7f5ca6a5599 --- /dev/null +++ b/docs/faq/source_en/implement_problem.md @@ -0,0 +1,463 @@ +# Implement Problem + +`Linux` `Windows` `Ascend` `GPU` `CPU` `Environment Preparation` `Basic` `Intermediate` + + + +**Q: How do I modify parameters (such as the dropout value) on MindSpore?** + +A: When building a network, use `if self.training: x = dropput(x)`. When reasoning, set `network.set_train(mode_false)` before execution to disable the dropout function. During training, set `network.set_train(mode_false)` to True to enable the dropout function. + +
+ +**Q: How do I view the number of model parameters?** + +A: You can load the checkpoint to count the parameter number. Variables in the momentum and optimizer may be counted, so you need to filter them out. +You can refer to the following APIs to collect the number of network parameters: + +```python +def count_params(net): + """Count number of parameters in the network + Args: + net (mindspore.nn.Cell): Mindspore network instance + Returns: + total_params (int): Total number of trainable params + """ + total_params = 0 + for param in net.trainable_params(): + total_params += np.prod(param.shape) + return total_params +``` + +[Script Link](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/research/cv/tinynet/src/utils.py). + +
+ +**Q: How do I monitor the loss during training and save the training parameters when the `loss` is the lowest?** + +A: You can customize a `callback`.For details, see the writing method of `ModelCheckpoint`. In addition, the logic for determining loss is added. + +```python +class EarlyStop(Callback): + def __init__(self): + self.loss = None + def step_end(self, run_context): + loss = ****(get current loss) + if (self.loss == None or loss < self.loss): + self.loss = loss + # do save ckpt +``` + +
+ +**Q: How do I obtain the expected `feature map` when `nn.Conv2d` is used?** + +A: For details about how to derive the `Conv2d shape`, click [here](https://www.mindspore.cn/doc/api_python/en/master/mindspore/nn/mindspore.nn.Conv2d.html#mindspore.nn.Conv2d) Change `pad_mode` of `Conv2d` to `same`. Alternatively, you can calculate the `pad` based on the Conv2d shape derivation formula to keep the `shape` unchanged. Generally, the pad is `(kernel_size-1)//2`. + +
+ +**Q: Can MindSpore be used to customize a loss function that can return multiple values?** + +A: After customizing the `loss function`, you need to customize `TrainOneStepCell`. The number of `sens` for implementing gradient calculation is the same as the number of `network` outputs. For details, see the following: + +```python +net = Net() +loss_fn = MyLoss() +loss_with_net = MyWithLossCell(net, loss_fn) +train_net = MyTrainOneStepCell(loss_with_net, optim) +model = Model(net=train_net, loss_fn=None, optimizer=None) +``` + +
+ +**Q: How does MindSpore implement the early stopping function?** + +A: You can customize the `callback` method to implement the early stopping function. +Example: When the loss value decreases to a certain value, the training stops. + +```python +class EarlyStop(Callback): + def __init__(self, control_loss=1): + super(EarlyStep, self).__init__() + self._control_loss = control_loss + + def step_end(self, run_context): + cb_params = run_context.original_args() + loss = cb_params.net_outputs + if loss.asnumpy() < self._control_loss: + # Stop training. + run_context._stop_requested = True + +stop_cb = EarlyStop(control_loss=1) +model.train(epoch_size, ds_train, callbacks=[stop_cb]) +``` + +
+ +**Q: After a model is trained, how do I save the model output in text or `npy` format?** + +A: The network output is `Tensor`. You need to use the `asnumpy()` method to convert the `Tensor` to `NumPy` and then save the data. For details, see the following: + +```python +out = net(x) +np.save("output.npy", out.asnumpy()) +``` + +
+ +**Q: What can I do if an error "Create python object \`\` failed, only support create Cell or Primitive object." is reported?** + +A: Currently in graph mode, the `construct` function (or the function decorated by the `@ms_function` decorator) only supports the construction of `Cell` and `Primitive object`. The construction of `Tensor` is not supported, that is, the syntax `x = Tensor(args...)` is not supported. + +If it is a constant tensor, please define it in the function `__init__`. If not, you can use the `@constexpr` decorator to modify the function and generate the `Tensor` in the function. + +Please see the usage of `@constexpr` in . + +The constant `Tensor` used on the network can be used as a network attribute and defined in `init`, that is, `self.x = Tensor(args...)`. Then the constant can be used in the `construct` function (or the function decorated by the `@ms_function` decorator). + +In the following example, `Tensor` of `shape = (3, 4), dtype = int64` is generated by `@constexpr`. + +```python +@constexpr +def generate_tensor(): + return Tensor(np.ones((3, 4).astype(np.int64))) +``` + +
+ +**Q: What can I do if an error "'self.xx' should be defined in the class '__init__' function." is reported?** + +A: If you want to assign for a class member such as `self.xx` in the function `construct`, `self.xx` must have been defined to a [`Parameter`]() type firstly while the other types are not supported. But the local variable `xx` is not under the regulation. + +
+ +**Q: What can I do if an error "This comparator 'AnyValue' is not supported. For statement 'is', only support compare with 'None', 'False' or 'True'" is reported?** + +A: For the syntax `is` or `is not`, currently `MindSpore` only supports comparisons with `True`, `False` and `None`. Other types, such as strings, are not supported. + +
+ +**Q: What can I do if an error "MindSpore does not support comparison with operators more than one now, ops size =2" is reported?** + +A: For comparison statements, `MindSpore` supports at most one operator. Please modify your code. For example, you can use `1 < x and x < 3` to take the place of `1 < x < 3`. + +
+ +**Q: What can I do if an error "TypeError: The function construct need 1 positional argument and 0 default argument, but provided 2" is reported?** + +A: When you call the instance of a network, the function `construct` will be executed. And the program will check the number of parameters required by the function `construct` and the number of parameters actually given. If they are not equal, the above exception will be thrown. +Please check your code to make sure they are equal. + +
+ +**Q: What can I do if an error "Type Join Failed" or "Shape Join Failed" is reported?** + +A: In the inference stage of front-end compilation, the abstract types of nodes, including `type` and `shape`, will be inferred. Common abstract types include `AbstractScalar`, `AbstractTensor`, `AbstractFunction`, `AbstractTuple`, `AbstractList`, etc. In some scenarios, such as multi-branch scenarios, the abstract types of the return values of different branches will be joined to infer the abstract type of the returned result. If these abstract types do not match, or `type`/`shape` are inconsistent, the above exception will be thrown. + +When an error similar to "Type Join Failed: dtype1 = Float32, dtype2 = Float16" appears, it means that the data types are inconsistent, resulting in an exception when joining abstract. According to the provided data types and code line, the error can be quickly located. In addition, the specific abstract information and node information are provided in the error message. You can view the MindIR information through the `analyze_fail.dat` file to locate and solve the problem. For specific introduction of MindIR, please refer to [MindSpore IR (MindIR)](https://www.mindspore.cn/doc/note/en/master/design/mindspore/mindir.html). The code sample is as follows: + +```python +import numpy as np +import mindspore as ms +import mindspore.ops as ops +from mindspore import nn, Tensor, context + +context.set_context(mode=context.GRAPH_MODE) +class Net(nn.Cell): + def __init__(self): + super().__init__() + self.relu = ops.ReLU() + self.cast = ops.Cast() + + def construct(self, x, a, b): + if a > b: + return self.relu(x) + else: + return self.cast(self.relu(x), ms.float16) + +input_x = Tensor(np.random.rand(2, 3, 4, 5).astype(np.float32)) +input_a = Tensor(2, ms.float32) +input_b = Tensor(6, ms.float32) +net = Net() +out_me = net(input_x, input_a, input_b) +``` + +The result is as follows: + +```text +TypeError: The return values of different branches do not match. Type Join Failed: dtype1 = Float32, dtype2 = Float16. The abstract type of the return value of the current branch is AbstractTensor(shape: (2, 3, 4, 5), element: AbstractScalar(Type: Float16, Value: AnyValue, Shape: NoShape), value_ptr: 0x32ed00e0, value: AnyValue), and that of the previous branch is AbstractTensor(shape: (2, 3, 4, 5), element: AbstractScalar(Type: Float32, Value: AnyValue, Shape: NoShape), value_ptr: 0x32ed00e0, value: AnyValue). Please check the node construct.4:[CNode]5{[0]: [CNode]6}, true branch: ✓construct.2, false branch: ✗construct.3. trace: +In file test_join.py(14)/ if a > b:/ + +The function call stack (See file 'analyze_fail.dat' for more details): +# 0 In file test_join.py(14) + if a > b: +``` + +When an error similar to "Shape Join Failed: shape1 = (2, 3, 4, 5), shape2 = ()" appears, it means that the shapes are inconsistent, resulting in an exception when joining abstract. The code sample is as follows: + +```python +import numpy as np +import mindspore as ms +import mindspore.ops as ops +from mindspore import nn, Tensor, context + +context.set_context(mode=context.GRAPH_MODE) +class Net(nn.Cell): + def __init__(self): + super().__init__() + self.relu = ops.ReLU() + self.reducesum = ops.ReduceSum() + + def construct(self, x, a, b): + if a > b: + return self.relu(x) + else: + return self.reducesum(x) + +input_x = Tensor(np.random.rand(2, 3, 4, 5).astype(np.float32)) +input_a = Tensor(2, ms.float32) +input_b = Tensor(6, ms.float32) +net = Net() +out = net(input_x, input_a, input_b) +``` + +The result is as follows: + +```text +ValueError: The return values of different branches do not match. Shape Join Failed: shape1 = (2, 3, 4, 5), shape2 = (). The abstract type of the return value of the current branch is AbstractTensor(shape: (), element: AbstractScalar(Type: Float32, Value: AnyValue, Shape: NoShape), value_ptr: 0x239b5120, value: AnyValue), and that of the previous branch is AbstractTensor(shape: (2, 3, 4, 5), element: AbstractScalar(Type: Float32, Value: AnyValue, Shape: NoShape), value_ptr: 0x239b5120, value: AnyValue). Please check the node construct.4:[CNode]5{[0]: [CNode]6}, true branch: ✓construct.2, false branch: ✗construct.3. trace: +In file test_join1.py(14)/ if a > b:/ + +The function call stack (See file 'analyze_fail.dat' for more details): +# 0 In file test_join1.py(14) + if a > b: +``` + +When an error similar to "Type Join Failed: abstract type AbstractTensor can not join with AbstractTuple" appears, it means that the two abstract types are mismatched. You need to review the code and modify it based on the provided code line and other error information. + +
+ +**Q: Can the `vgg16` model be loaded and transferred on a GPU using the Hub?** + +A: Yes, but you need to manually modify the following two arguments: + +```python +# Add the **kwargs argument as follows: +def vgg16(num_classes=1000, args=None, phase="train", **kwargs): +``` + +```python +# Add the **kwargs argument as follows: +net = Vgg(cfg['16'], num_classes=num_classes, args=args, batch_norm=args.batch_norm, phase=phase, **kwargs) +``` + +
+ +**Q: How to obtain middle-layer features of a VGG model?** + +A: Obtaining the middle-layer features of a network is not closely related to the specific framework. For the `vgg` model defined in `torchvison`, the `features` field can be used to obtain the middle-layer features. The `vgg` source code of `torchvison` is as follows: + +```python +class VGG(nn.Module): + + def __init__(self, features, num_classes=1000, init_weights=True): + super(VGG, self).__init__() + self.features = features + self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) +``` + +The `vgg16` defined in ModelZoo of MindSpore can be obtained through the `layers` field as follows: + +```python +network = vgg16() +print(network.layers) +``` + +
+ +**Q: When MindSpore is used for model training, there are four input parameters for `CTCLoss`: `inputs`, `labels_indices`, `labels_values`, and `sequence_length`. How do I use `CTCLoss` for model training?** + +A: The `dataset` received by the defined `model.train` API can consist of multiple pieces of data, for example, (`data1`, `data2`, `data3`, ...). Therefore, the `dataset` can contain `inputs`, `labels_indices`, `labels_values`, and `sequence_length` information. You only need to define the dataset in the corresponding format and transfer it to `model.train`. For details, see [Data Processing API](https://www.mindspore.cn/doc/programming_guide/en/master/dataset_loading.html). + +
+ +**Q: How do I load the PyTorch weight to MindSpore during model transfer?** + +A: First, enter the `PTH` file of PyTorch. Take `ResNet-18` as an example. The network structure of MindSpore is the same as that of PyTorch. After transferring, the file can be directly loaded to the network. Only `BN` and `Conv2D` are used during loading. If the network names of MindSpore and PyTorch at other layers are different, change the names to the same. + +
+ +**Q: What are the available recommendation or text generation networks or models provided by MindSpore?** + +A: Currently, recommendation models such as Wide & Deep, DeepFM, and NCF are under development. In the natural language processing (NLP) field, Bert\_NEZHA is available and models such as MASS are under development. You can rebuild the network into a text generation network based on the scenario requirements. Please stay tuned for updates on the [MindSpore Model Zoo](https://gitee.com/mindspore/mindspore/tree/master/model_zoo). + +
+ +**Q: How do I use MindSpore to fit functions such as $f(x)=a \times sin(x)+b$?** + +A: The following is based on the official MindSpore linear fitting case. + +```python +# The fitting function is: f(x)=2*sin(x)+3. +import numpy as np +from mindspore import dataset as ds +from mindspore.common.initializer import Normal +from mindspore import nn, Model, context +from mindspore.train.callback import LossMonitor + +context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + +def get_data(num, w=2.0, b=3.0): + # f(x)=w * sin(x) + b + # f(x)=2 * sin(x) +3 + for i in range(num): + x = np.random.uniform(-np.pi, np.pi) + noise = np.random.normal(0, 1) + y = w * np.sin(x) + b + noise + yield np.array([np.sin(x)]).astype(np.float32), np.array([y]).astype(np.float32) + +def create_dataset(num_data, batch_size=16, repeat_size=1): + input_data = ds.GeneratorDataset(list(get_data(num_data)), column_names=['data','label']) + input_data = input_data.batch(batch_size) + input_data = input_data.repeat(repeat_size) + return input_data + +class LinearNet(nn.Cell): + def __init__(self): + super(LinearNet, self).__init__() + self.fc = nn.Dense(1, 1, Normal(0.02), Normal(0.02)) + + def construct(self, x): + x = self.fc(x) + return x + +if __name__ == "__main__": + + num_data = 1600 + batch_size = 16 + repeat_size = 1 + lr = 0.005 + momentum = 0.9 + + net = LinearNet() + net_loss = nn.loss.MSELoss() + opt = nn.Momentum(net.trainable_params(), lr, momentum) + model = Model(net, net_loss, opt) + + ds_train = create_dataset(num_data, batch_size=batch_size, repeat_size=repeat_size) + model.train(1, ds_train, callbacks=LossMonitor(), dataset_sink_mode=False) + + print(net.trainable_params()[0], "\n%s" % net.trainable_params()[1]) +``` + +
+ +**Q: How do I use MindSpore to fit quadratic functions such as $f(x)=ax^2+bx+c$?** + +A: The following code is referenced from the official [MindSpore tutorial code](https://gitee.com/mindspore/docs/blob/master/tutorials/tutorial_code/linear_regression.py). + +Modify the following items to fit $f(x) = ax^2 + bx + c$: + +1. Dataset generation. +2. Network fitting. +3. Optimizer. + +The following explains detailed information about the modification: + +```python +# The selected optimizer does not support CPUs. Therefore, the GPU computing platform is used for training. You need to install MindSpore of the GPU version. +context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + +# Assume that the function to be fitted is f(x)=2x^2+3x+4. Modify the data generation function as follows: +def get_data(num, a=2.0, b=3.0 ,c = 4): + for i in range(num): + x = np.random.uniform(-10.0, 10.0) + noise = np.random.normal(0, 1) + # For details about how to generate the value of y, see the to-be-fitted objective function ax^2+bx+c. + y = x * x * a + x * b + c + noise + # When fitting a*x^2 + b*x +c, a and b are weight parameters, and c is the offset parameter bias. The training data corresponding to the two weights is x^2 and x, respectively. Therefore, the dataset generation mode is changed as follows: + yield np.array([x*x, x]).astype(np.float32), np.array([y]).astype(np.float32) + +def create_dataset(num_data, batch_size=16, repeat_size=1): + input_data = ds.GeneratorDataset(list(get_data(num_data)), column_names=['data','label']) + input_data = input_data.batch(batch_size) + input_data = input_data.repeat(repeat_size) + return input_data + +class LinearNet(nn.Cell): + def __init__(self): + super(LinearNet, self).__init__() + # Two training parameters are input for the full connection function. Therefore, the input value is changed to 2. The first Normal(0.02) automatically allocates random weights to the two input parameters, and the second Normal is the random bias. + self.fc = nn.Dense(2, 1, Normal(0.02), Normal(0.02)) + + def construct(self, x): + x = self.fc(x) + return x + +if __name__ == "__main__": + num_data = 1600 + batch_size = 16 + repeat_size = 1 + lr = 0.005 + momentum = 0.9 + + net = LinearNet() + net_loss = nn.loss.MSELoss() + # RMSProp optimizer with better effect is selected for quadratic function fitting. Currently, Ascend and GPU computing platforms are supported. + opt = nn.RMSProp(net.trainable_params(), learning_rate=0.1) + model = Model(net, net_loss, opt) + + ds_train = create_dataset(num_data, batch_size=batch_size, repeat_size=repeat_size) + model.train(1, ds_train, callbacks=LossMonitor(), dataset_sink_mode=False) + + print(net.trainable_params()[0], "\n%s" % net.trainable_params()[1]) +``` + +
+ +**Q: How do I execute a single `ut` case in `mindspore/tests`?** + +A: `ut` cases are usually based on the MindSpore package of the debug version, which is not provided on the official website. You can run `sh build.sh` to compile the source code and then run the `pytest` command. The compilation in debug mode does not depend on the backend. Run the `sh build.sh -t on` command. For details about how to execute cases, see the `tests/runtest.sh` script. + +
+ +**Q: For Ascend users, how to get more detailed logs when the `run task error` is reported?** + +A: Use the msnpureport tool to set the on-device log level. The tool is stored in `/usr/local/Ascend/driver/tools/msnpureport`. + +```bash +- Global: /usr/local/Ascend/driver/tools/msnpureport -g info +``` + +```bash +- Module-level: /usr/local/Ascend/driver/tools/msnpureport -m SLOG:error +``` + +```bash +- Event-level: /usr/local/Ascend/driver/tools/msnpureport -e disable/enable +``` + +```bash +- Multi-device ID-level: /usr/local/Ascend/driver/tools/msnpureport -d 1 -g warning +``` + +Assume that the value range of deviceID is [0, 7], and `devices 0–3` and `devices 4–7` are on the same OS. `Devices 0–3` share the same log configuration file and `devices 4–7` share the same configuration file. In this way, changing the log level of any device (for example device 0) will change that of other devices (for example `devices 1–3`). This rule also applies to `devices 4–7`. + +After the driver package is installed (assuming that the installation path is /usr/local/HiAI and the execution file `msnpureport.exe` is in the C:\ProgramFiles\Huawei\Ascend\Driver\tools\ directory on Windows), run the command in the /home/shihangbo/ directory to export logs on the device to the current directory and store logs in a folder named after the timestamp. + +
+ +**Q: How do I change hyperparameters for calculating loss values during neural network training?** + +A: Sorry, this function is not available yet. You can find the optimal hyperparameters by training, redefining an optimizer, and then training. + +
+ +**Q: What should I do when error `error while loading shared libraries: libge_compiler.so: cannot open shared object file: No such file or directory` prompts during application running?** + +A: While installing Ascend 310 AI Processor software packages,the `CANN` package should install the full-featured `toolkit` version instead of the `nnrt` version. + +
+ +**Q: Why does context.set_ps_context(enable_ps=True) in model_zoo/official/cv/resnet/train.py in the MindSpore code have to be set before init?** + +A: In MindSpore Ascend mode, if init is called first, then all processes will be allocated cards, but in parameter server training mode, the server does not need to allocate cards, then the worker and server will use the same card, resulting in an error: Hccl dependent tsd is not open. diff --git a/docs/faq/source_en/index.rst b/docs/faq/source_en/index.rst index 0f121bd484918440bd536f99ba0cb0e78d4f3928..b59032531f5cd8bd93062f7e43a615229f204927 100644 --- a/docs/faq/source_en/index.rst +++ b/docs/faq/source_en/index.rst @@ -11,13 +11,10 @@ MindSpore FAQ :maxdepth: 1 installation - frontend_syntax - frontend_compile - backend_compile - operators_compile data_processing - script_implement + implement_problem training_visualization + operators_compile usage_migrate_3rd performance_tuning precision_tuning diff --git a/docs/faq/source_en/inference.md b/docs/faq/source_en/inference.md index e81c11d3eceb0079581e8940890e53b3d55d52d8..df823498d7340e329e0fad6c0bc27c5414645fea 100644 --- a/docs/faq/source_en/inference.md +++ b/docs/faq/source_en/inference.md @@ -14,21 +14,21 @@ ## MindSpore C++ Library Use -**Q:What should I do when error `/usr/bin/ld: warning: libxxx.so, needed by libmindspore.so, not found` prompts during application compiling?** +**Q: What should I do when error `/usr/bin/ld: warning: libxxx.so, needed by libmindspore.so, not found` prompts during application compiling?** -A:Find the directory where the missing dynamic library file is located, add the path to the environment variable `LD_LIBRARY_PATH`, and refer to [Inference Using the MindIR Model on Ascend 310 AI Processors#Building Inference Code](https://www.mindspore.cn/tutorial/inference/en/master/multi_platform_inference_ascend_310_mindir.html#building-inference-code) for environment variable settings. +A: Find the directory where the missing dynamic library file is located, add the path to the environment variable `LD_LIBRARY_PATH`, and refer to [Inference Using the MindIR Model on Ascend 310 AI Processors#Building Inference Code](https://www.mindspore.cn/tutorial/inference/en/master/multi_platform_inference_ascend_310_mindir.html#building-inference-code) for environment variable settings.
-**Q:What should I do when error `ModuleNotFoundError: No module named 'te'` prompts during application running?** +**Q: What should I do when error `ModuleNotFoundError: No module named 'te'` prompts during application running?** -A:First confirm whether the system environment is installed correctly and whether the whl packages such as `te` and `topi` are installed correctly. If there are multiple Python versions in the user environment, such as Conda virtual environment, you need to execute `ldd name_of_your_executable_app` to confirm whether the application link `libpython3.7m.so.1.0` is consistent with the current Python directory, if not, you need to adjust the order of the environment variable `LD_LIBRARY_PATH` . +A: First confirm whether the system environment is installed correctly and whether the whl packages such as `te` and `topi` are installed correctly. If there are multiple Python versions in the user environment, such as Conda virtual environment, you need to execute `ldd name_of_your_executable_app` to confirm whether the application link `libpython3.7m.so.1.0` is consistent with the current Python directory, if not, you need to adjust the order of the environment variable `LD_LIBRARY_PATH` .
-**Q:What should I do when error `error while loading shared libraries: libge_compiler.so: cannot open shared object file: No such file or directory` prompts during application running?** +**Q: What should I do when error `error while loading shared libraries: libge_compiler.so: cannot open shared object file: No such file or directory` prompts during application running?** -A:While installing Ascend 310 AI Processor software packages,the `CANN` package should install the full-featured `toolkit` version instead of the `nnrt` version. +A: While installing Ascend 310 AI Processor software packages,the `CANN` package should install the full-featured `toolkit` version instead of the `nnrt` version. ## MindSpore Serving @@ -40,7 +40,7 @@ A: MindSpore Serving does not support hot update. You need to restart MindSpore **Q: Does MindSpore Serving allow multiple workers to be started for one model to support multi-device and single-model concurrency?** -A: MindSpore Serving does not support distribution and this function is being developed. That is, multiple workers cannot be started for one model. It is recommended that multiple Serving services be deployed to implement distribution and load balancing. In addition, to avoid message forwarding between `master` and `worker`, you can use the `start_servable_in_master` API to enable `master` and `worker` to be executed in the same process, implementing lightweight deployment of the Serving services. +A: After MindSpore Serving version 1.3, it supports the deployment of multiple copies of a model in multiple cards to achieve concurrent execution of multiple cards and single models. For details, please refer to [Add Sample](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/serving_server.py).
@@ -50,7 +50,7 @@ A: MindSpore Serving matches MindSpore in the same version. For example, Serving
-**Q: What is the difference between `bash -p` and `bash -e` when an error is reported during application build?** +**Q: What is the difference between using `bash -p` method and `bash -e` method when compiling?** A: MindSpore Serving build and running depend on MindSpore. Serving provides two build modes: 1. Use `bash -p {python site-packages}/mindspore/lib` to specify an installed MindSpore path to avoid building MindSpore when building Serving. 2. Build Serving and the corresponding MindSpore. Serving passes the `-e`, `-V`, and `-j` options to MindSpore. For example, use `bash -e ascend -V 910 -j32` in the Serving directory as follows: @@ -64,12 +64,18 @@ For example, use `bash -e ascend -V 910 -j32` in the Serving directory as follow A: Check whether MindSpore that MindSpore Serving depends on is installed. In Serving 1.1, `LD_LIBRARY_PATH` needs to be configured to explicitly specify the path of `libmindspore.so`. `libmindspore.so` is in the `lib` directory of the MindSpore Python installation path. In Serving 1.2 or later, the path of `libmindspore.so` does not need to be specified. Serving searches for and adds `LD_LIBRARY_PATH` based on the MindSpore installation path, which does not need to be perceived by users. -**Q:How to control the output of Serving log?** +**Q: How to control the output of Serving log?** -A:MindSpore Serving uses glog to output logs, for more details, please refer to [Log-related Environment Variables and Configurations](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/custom_debugging_info.html?highlight=GLOG#log-related-environment-variables-and-configurations). On this basis, additional supplementary contents are as follows: +A: MindSpore Serving uses glog to output logs, for more details, please refer to [Log-related Environment Variables and Configurations](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/custom_debugging_info.html?highlight=GLOG#log-related-environment-variables-and-configurations). On this basis, additional supplementary contents are as follows: - MS_SUBMODULE_LOG_v This environment variable can also be used to control the log level of MindSpore Serving in addition to specifying the log level of each sub module of MindSpore C++. We can use GLOG_v=2 MS_SUBMODULE_LOG_v="{SERVING:1}" to set the log level of the Serving module to INFO, and the log level of other modules to WARNING. + +
+ +**Q: What can I do if an error `libmindspore.so: cannot open shared object file: No such file or directory` is reported during application running?** + +A: Check whether MindSpore that MindSpore Serving depends on is installed. In Serving 1.1, `LD_LIBRARY_PATH` needs to be configured to explicitly specify the path of `libmindspore.so`. `libmindspore.so` is in the `lib` directory of the MindSpore Python installation path. In Serving 1.2 or later, the path of `libmindspore.so` does not need to be specified. Serving searches for and adds `LD_LIBRARY_PATH` based on the MindSpore installation path, which does not need to be perceived by users. diff --git a/docs/faq/source_en/installation.md b/docs/faq/source_en/installation.md index 9eed71f22c23233eefeb2c89bbc69cb4cf75127b..4ffd0ee76b2dc40e7427e24a7b04219cd8726653 100644 --- a/docs/faq/source_en/installation.md +++ b/docs/faq/source_en/installation.md @@ -17,7 +17,7 @@ ## Installing Using pip -**Q: What can I do if an error message `cannot open shared object file:file such file or directory` is displayed when I install MindSpore of the GPU, CUDA 10.1, 0.5.0-beta, or Ubuntu-x86 version?** +**Q: When installing GPU, CUDA 10.1, 0.5.0-beta version of MindSpore, it prompts `cannot open shared object file:No such file or directory`, what should I do?** A: The error message indicates that the cuBLAS library is not found. Generally, the cause is that the cuBLAS library is not installed or is not added to the environment variable. Generally, cuBLAS is installed together with CUDA and the driver. After the installation, add the directory where cuBLAS is located to the `LD_LIBRARY_PATH` environment variable. @@ -47,9 +47,9 @@ A: MindSpore utilizes many of the new features in Python3.7+,therefore we reco
-**Q: Any specific requirements for protobuf version when use MindSpore?** +**Q: Any specific requirements for Protobuf version when use MindSpore?** -A: MindSpore installs version 3.8.0 of protobuf by default. If you have installed 3.12.0 or later version of protobuf locally, there will be many warnings in the log when using pytest to test the code. It is recommended that you use the command 'pip install protobuf==3.8.0' to reinstall version 3.8.0. +A: MindSpore installs version 3.13.0 of Protobuf by default. If it is not the version, there will be many warnings in the log when using pytest to test the code. It is recommended that you use the command 'pip install protobuf==3.13.0' to reinstall version 3.13.0.
@@ -74,13 +74,13 @@ A: Please execute `pip config list` to check the package index `index-url`. Some **Q: What should I do if I cannot find whl package for MindInsight or MindArmour on the installation page of MindSpore website?** -A: You can download whl package from the official [MindSpore Website download page](https://www.mindspore.cn/versions) and manually install it via `pip install`. +A: You can download whl package from the official [MindSpore Website download page](https://www.mindspore.cn/versions/en) and manually install it via `pip install`.
**Q: For Ascend users, what should I do when `RuntimeError: json.exception.parse_error.101 parse error at line 1, column 1: syntax error while parsing value - invalid literal; last read: 'T'` appears in personal Conda environment?** -A: When you encounter the error, you should update the `te/topi/hccl` python toolkits, unload them firstly and then using command `pip install /usr/local/Ascend/fwkacllib/lib64/{te/topi/hccl}*any.whl` to reinstall. +A: When you encounter the error, you should update the `te/topi/hccl` python toolkits, unload them firstly and then using command `pip install /usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/{te/topi/hccl}-{version}-py3-none-any.whl` to reinstall.
@@ -92,7 +92,7 @@ A: Set environment variables and specify the Android NDK path using `export ANDR
-**Q: A sample fails to be executed after I installed MindSpore 0.6.0 beta on Ascend 910 using Ubuntu_aarch64 and Python 3.7.5 and manually downloaded the .whl package of the corresponding version, compiled and installed GMP6.1.2, and installed other Python library dependencies. An error message is displayed, indicating that the .so file cannot be found. What can I do?** +**Q: MindSpore installation: Version 0.6.0-beta + Ascend 910 + Ubuntu_aarch64 + Python3.7.5, manually download the whl package of the corresponding version, compile and install gmp6.1.2. Other Python library dependencies have been installed, the execution of the sample fails, and an error shows that the so file cannot be found.** A: The `libdatatransfer.so` dynamic library is in the `fwkacllib/lib64` directory. Find the path of the library in the `/usr/local` directory, and then add the path to the `LD_LIBRARY_PATH` environment variable. After the settings take effect, execute the sample again. @@ -100,7 +100,7 @@ A: The `libdatatransfer.so` dynamic library is in the `fwkacllib/lib64` director **Q: What should I do if the compilation time of MindSpore source code takes too long or the process is constantly interrupted by errors?** -A: MindSpore imports third party dependencies through submodule mechanism, among which `protobuf` v3.8.0 might not have the optimal or steady download speed, it is recommended that you perform package cache in advance. +A: MindSpore imports third party dependencies through submodule mechanism, among which `Protobuf` v3.8.0 might not have the optimal or steady download speed, it is recommended that you perform package cache in advance.
@@ -138,25 +138,25 @@ A: While compiling MindSpore, if:
-**Q: what should I do when an error message `No module named 'mindpore.version'` is displayed when I execute the case? +**Q: What should I do when an error message `No module named 'mindpore.version'` is displayed when I execute the case?** A: Maybe you execute the case in the path with the same name as the MindSpore installation package. Rename the directory or exit one or more levels of directory to solve the problem.
-**Q: what should I do when an error message `MD5 does not match` is displayed when I execute the case? +**Q: what should I do when an error message `MD5 does not match` is displayed when I execute the case?** A: This kind of error may be caused by internet problem when some third party libraries are downloading. It fails to verify MD5 with a incomplete file when you recompile the project. Remove the related third-party file in .mslib cache path and recompile the project to solve the problem.
-**Q:What should I do if it prompts that `pthread not found` in CMakeError.txt after the compilation fails?** +**Q: What should I do if it prompts that `pthread not found` in CMakeError.txt after the compilation fails?** -A:The real reason for the failure will be showed in the stdout log. CMakeError.txt has no reference value. Please look for the first error in the stdout log. +A: The real reason for the failure will be showed in the stdout log. CMakeError.txt has no reference value. Please look for the first error in the stdout log.
-**Q:After the compilation is successful, an error `undefined reference to XXXX` or `undefined symbol to XXXX` occurs during runtime, what should I do?** +**Q: After the compilation is successful, an error `undefined reference to XXXX` or `undefined symbol XXXX` occurs during runtime, what should I do?** A: The possible reasons are: @@ -168,11 +168,21 @@ A: The possible reasons are:
+**Q: What is the difference between `bash -p` and `bash -e` when an error is reported during application build?** + +A: MindSpore Serving build and running depend on MindSpore. Serving provides two build modes: 1. Use `bash -p {python site-packages}/mindspore/lib` to specify an installed MindSpore path to avoid building MindSpore when building Serving. 2. Build Serving and the corresponding MindSpore. Serving passes the `-e`, `-V`, and `-j` options to MindSpore. +For example, use `bash -e ascend -V 910 -j32` in the Serving directory as follows: + +- Build MindSpore in the `third_party/mindspore` directory using `bash -e ascend -V 910 -j32`. +- Use the MindSpore build result as the Serving build dependency. + +
+ ## Uninstall **Q: How to uninstall MindSpore?** -A: Using `pip uninstall mindspore` to uninstall MindSpore. +A: First of all, please confirm the full name of MindSpore, for example, for the gpu version, you can execute the command `pip uninstall mindspore-gpu` to uninstall.
@@ -184,23 +194,23 @@ A: You can write the frequently-used environment settings to `~/.bash_profile` o
-**Q:How to set environment variable `DEVICE_ID` when using GPU version of MindSpore** +**Q: How to set environment variable `DEVICE_ID` when using GPU version of MindSpore** -A:Normally, GPU version of MindSpore doesn't need to set `DEVICE_ID`. MindSpore automatically chooses visible GPU devices according to the cuda environment variable `CUDA_VISIBLE_DEVICES`. After setting `CUDA_VISIBLE_DEVICES`, `DEVICE_ID` refers to the ordinal of the GPU device: +A: Normally, GPU version of MindSpore doesn't need to set `DEVICE_ID`. MindSpore automatically chooses visible GPU devices according to the cuda environment variable `CUDA_VISIBLE_DEVICES`. After setting `CUDA_VISIBLE_DEVICES`, `DEVICE_ID` refers to the ordinal of the GPU device: - After `export CUDA_VISIBLE_DEVICES=1,3,5`, `DEVICE_ID` should be exported as `0`, `1` or `2`. If `3` is exported, MindSpore will fail to execute because of the invalid device ordinal.
-**Q:What should I do when error `/usr/bin/ld: warning: libxxx.so, needed by libmindspore.so, not found` prompts during application compiling?** +**Q: What should I do when error `/usr/bin/ld: warning: libxxx.so, needed by libmindspore.so, not found` prompts during application compiling?** -A:Find the directory where the missing dynamic library file is located, add the path to the environment variable `LD_LIBRARY_PATH`, and refer to [Inference Using the MindIR Model on Ascend 310 AI Processors#Building Inference Code](https://www.mindspore.cn/tutorial/inference/en/master/multi_platform_inference_ascend_310_mindir.html#building-inference-code) for environment variable settings. +A: Find the directory where the missing dynamic library file is located, add the path to the environment variable `LD_LIBRARY_PATH`, and refer to [Inference Using the MindIR Model on Ascend 310 AI Processors#Building Inference Code](https://www.mindspore.cn/tutorial/inference/en/master/multi_platform_inference_ascend_310_mindir.html#building-inference-code) for environment variable settings.
-**Q:What should I do when error `ModuleNotFoundError: No module named 'te'` prompts during application running?** +**Q: What should I do when error `ModuleNotFoundError: No module named 'te'` prompts during application running?** -A:First confirm whether the system environment is installed correctly and whether the whl packages such as `te` and `topi` are installed correctly. If there are multiple Python versions in the user environment, such as Conda virtual environment, you need to execute `ldd name_of_your_executable_app` to confirm whether the application link `libpython3.7m.so.1.0` is consistent with the current Python directory, if not, you need to adjust the order of the environment variable `LD_LIBRARY_PATH` . +A: First confirm whether the system environment is installed correctly and whether the whl packages such as `te` and `topi` are installed correctly. If there are multiple Python versions in the user environment, such as Conda virtual environment, you need to execute `ldd name_of_your_executable_app` to confirm whether the application link `libpython3.7m.so.1.0` is consistent with the current Python directory, if not, you need to adjust the order of the environment variable `LD_LIBRARY_PATH` .
@@ -238,14 +248,46 @@ A: After MindSpore is installed on a CPU hardware platform, run the `python -c'i A: Use the following command to check whether the current Python environment meets the requirements of MindSpore. -- Text `python` in terminal window, check whether the version of Python interactive environment is `3.7.x` +- Enter `python` in the terminal window and check the following version information entered into the Python interactive environment. If an error is reported directly, there is no Python soft connection; if you enter a non-Python 3.7 environment, the current Python environment is not the MindSpore runtime needs. - If not, execute the `sudo ln -sf /usr/bin/python3.7.x /usr/bin/python` command to create Python's soft connection.
-**Q: Here in script when we import other python lib before `import mindspore`, error raised like follows (`/your_path/libgomp.so.1: cannot allocate memory in static TLS block`), how can we solve it?** +**Q: Here in script when we import other python lib before `import mindspore`, error raised like follows (`/{your_path}/libgomp.so.1: cannot allocate memory in static TLS block`), how can we solve it?** A: Above question is relatively common, and there are two feasible solutions, you can choose one of them: - Exchange the order of import, first `import mindspore` and then import other third party libraries. -- Before executing the program, we can add environment variables first (`export LD_PRELOAD=/your_path/libgomp.so.1`), where `your_path` is the path mentioned in above error. +- Before executing the program, we can add environment variables first (`export LD_PRELOAD=/{your_path}/libgomp.so.1`), where `{your_path}` is the path mentioned in above error. + +
+ +**Q: When the third-party component gensim is used to train the NLP network, the error "ValueError" may be reported. What can I do?** + +A: The following error information is displayed: + +```bash +>>> import gensim +Traceback (most recent call last): + File "", line 1, in + File "/home/miniconda3/envs/ci39_cj/lib/python3.9/site-packages/gensim/__init__.py", line 11, in + from gensim import parsing, corpora, matutils, interfaces, models, similarities, utils # noqa:F401 + File "/home/miniconda3/envs/ci39_cj/lib/python3.9/site-packages/gensim/corpora/__init__.py", line 6, in + from .indexedcorpus import IndexedCorpus # noqa:F401 must appear before the other classes + File "/home/miniconda3/envs/ci39_cj/lib/python3.9/site-packages/gensim/corpora/indexedcorpus.py", line 14, in + from gensim import interfaces, utils + File "/home/miniconda3/envs/ci39_cj/lib/python3.9/site-packages/gensim/interfaces.py", line 19, in + from gensim import utils, matutils + File "/home/miniconda3/envs/ci39_cj/lib/python3.9/site-packages/gensim/matutils.py", line 1024, in + from gensim._matutils import logsumexp, mean_absolute_difference, dirichlet_expectation + File "gensim/_matutils.pyx", line 1, in init gensim._matutils +ValueError: numpy.ndarray size changed, may indicate binary incompatibility. Expected 88 from C header, got 80 from PyObject +``` + +For details about the error cause, see the [gensim](https://github.com/RaRe-Technologies/gensim/issues/3095) or [numpy](https://github.com/numpy/numpy/issues/18709) official website. + +Solutions: + +Method 1: Reinstall the Numpy and Gensim and run the following commands: `pip uninstall gensim numpy -y && pip install numpy==1.18.5 gensim` + +Method 2: If the problem persists, delete the cache file of the wheel installation package and then perform method 1. (The cache directory of the wheel installation package is `~/.cache/pip/wheels`) diff --git a/docs/faq/source_en/operators_compile.md b/docs/faq/source_en/operators_compile.md index 7898b5d5e66474ebfe760fdb19655b38050a8b7a..9f21ad148c21424feaa89f7df705e31a8514246b 100644 --- a/docs/faq/source_en/operators_compile.md +++ b/docs/faq/source_en/operators_compile.md @@ -6,7 +6,7 @@ **Q: What is the function of the `TransData` operator? Can the performance be optimized?** -A: The `TransData` operator is used in the scenario where the data formats (such as NC1HWC0) used by interconnected operators on the network are inconsistent. In this case, the framework automatically inserts the `TransData` operator to convert the data formats into the same format and then performs computation. You can consider using the `amp` for mixed-precision training. In this way, some `FP32` operations and the invocation of some `TransData` operators can be reduced. +A: The `TransData` operator is used in the scenario where the data formats (such as NC1HWC0) used by interconnected operators on the network are inconsistent. In this case, the framework automatically inserts the `TransData` operator to convert the data formats into the same format and then performs computation. Huawei Ascend supports 5D format operations, and uses the `transdata` operator to convert data from 4D to 5D to improve performance.
@@ -19,22 +19,6 @@ A: The number of tensors to be concatenated at a time cannot exceed 192 accordin **Q: When `Conv2D` is used to define convolution, the `group` parameter is used. Is it necessary to ensure that the value of `group` can be exactly divided by the input and output dimensions? How is the group parameter transferred?** A: The `Conv2d` operator has the following constraint: When the value of `group` is greater than 1, the value must be the same as the number of input and output channels. Do not use `ops.Conv2D`. Currently, this operator does not support a value of `group` that is greater than 1. Currently, only the `nn.Conv2d` API of MindSpore supports `group` convolution. However, the number of groups must be the same as the number of input and output channels. -The `Conv2D` operator function is as follows: - -```python -def __init__(self, - out_channel, - kernel_size, - mode=1, - pad_mode="valid", - pad=0, - stride=1, - dilation=1, - group=1, - data_format="NCHW"): -``` - -If the function contains a `group` parameter, the parameter will be transferred to the C++ layer by default.
@@ -50,25 +34,6 @@ A: Currently, MindSpore does not have APIs or operators similar to variance whic
-**Q: Why is data loading abnormal when MindSpore1.0.1 is used in graph data offload mode?** - -A: An operator with the `axis` attribute, for example, `ops.Concat(axis=1)((x1, x2))`, is directly used in `construct`. You are advised to initialize the operator in `__init__` as follows: - -```python -from mindspore import nn -import mindspore.ops as ops - -class Net(nn.Cell): - def __init__(self): - super(Net, self).__init__() - self.concat = ops.Concat(axis=1) - def construct(self, x, y): - out = self.concat((x, y)) - return out -``` - -
- **Q: Compared with PyTorch, the `nn.Embedding` layer lacks the padding operation. Can other operators implement this operation?** A: In PyTorch, `padding_idx` is used to set the word vector in the `padding_idx` position in the embedding matrix to 0, and the word vector in the `padding_idx` position is not updated during backward propagation. @@ -82,15 +47,9 @@ A: The `multiples input` of the `Tile` operator must be a constant. (The value c
-**Q: What can I do if the LSTM example on the official website cannot run on Ascend?** - -A: Currently, the LSTM runs only on a GPU or CPU and does not support the hardware environment. You can click [MindSpore Operator List](https://www.mindspore.cn/doc/note/en/master/operator_list_ms.html) to view the supported operators. - -
- **Q: When conv2d is set to (3,10), Tensor[2,2,10,10] and it runs on Ascend on ModelArts, the error message `FM_W+pad_left+pad_right-KW>=strideW` is displayed. However, no error message is displayed when it runs on a CPU. What should I do?** -A: This is a TBE operator restriction that the width of x must be greater than that of the kernel. The CPU does not have this operator restriction. Therefore, no error is reported. +A: TBE (Tensor Boost Engine) operator is Huawei's self-developed Ascend operator development tool, which is extended on the basis of the TVM framework to develop custom operators. The above problem is the limitation of this TBE operator, the width of x must be greater than the width of the kernel. The CPU operator does not have this restriction, so no error is reported.
@@ -103,9 +62,7 @@ A: Currently, MindSpore does not provide anti-pooling APIs but you can customize **Q: What can I do if the error message `Pynative run op ExpandDims failed` is displayed when the ExpandDims operator is used? The code is as follows:** ```python -context.set_context( -mode=cintext.GRAPH_MODE, -device_target='ascend') +context.set_context(mode=context.GRAPH_MODE,device_target='Ascend') input_tensor=Tensor(np.array([[2,2],[2,2]]),mindspore.float32) expand_dims=ops.ExpandDims() output=expand_dims(input_tensor,0) @@ -114,6 +71,7 @@ output=expand_dims(input_tensor,0) A: The problem is that the Graph mode is selected but the PyNative mode is used. As a result, an error is reported. MindSpore supports the following running modes which are optimized in terms of debugging or running: - PyNative mode: dynamic graph mode. In this mode, operators in the neural network are delivered and executed one by one, facilitating the compilation and debugging of the neural network model. + - Graph mode: static graph mode. In this mode, the neural network model is compiled into an entire graph and then delivered for execution. This mode uses technologies such as graph optimization to improve the running performance and facilitates large-scale deployment and cross-platform running. You can select a proper mode and writing method to complete the training by referring to the official website [tutorial](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/debug_in_pynative_mode.html). diff --git a/docs/faq/source_en/precision_tuning.md b/docs/faq/source_en/precision_tuning.md index 4232f189e601c8481618683a58a05a2cf2179c96..1f82e68dd18ad5f03485de4199bc440b8fc79a68 100644 --- a/docs/faq/source_en/precision_tuning.md +++ b/docs/faq/source_en/precision_tuning.md @@ -8,8 +8,8 @@ A: There are many causes for the non-convergence of the loss value or the accuracy problem. You are advised to check the causes one by one by referring to the following links. -- [MindSpore Model Accuracy Tuning Practice (1): Common Accuracy Problems, Causes, and Tuning Approach](https://bbs.huaweicloud.com/forum/forum.php?mod=viewthread&tid=102750) +[MindSpore Model Accuracy Tuning Practice (1): Common Accuracy Problems, Causes, and Tuning Approach](https://bbs.huaweicloud.com/forum/forum.php?mod=viewthread&tid=102750) -- [MindSpore Model Accuracy Tuning Practice (2): Accuracy Debugging and Tuning Approach](https://bbs.huaweicloud.com/forum/forum.php?mod=viewthread&tid=106624) +[MindSpore Model Accuracy Tuning Practice (2): Accuracy Debugging and Tuning Approach](https://bbs.huaweicloud.com/forum/forum.php?mod=viewthread&tid=106624) -- [MindSpore Model Accuracy Tuning Practice (3): Common Accuracy Problems](https://bbs.huaweicloud.com/forum/forum.php?mod=viewthread&tid=119271) +[MindSpore Model Accuracy Tuning Practice (3): Common Accuracy Problems](https://bbs.huaweicloud.com/forum/forum.php?mod=viewthread&tid=119271) diff --git a/docs/faq/source_en/script_implement.md b/docs/faq/source_en/script_implement.md deleted file mode 100644 index e84fb079f6b065e165074d44eb71bc16c704e1af..0000000000000000000000000000000000000000 --- a/docs/faq/source_en/script_implement.md +++ /dev/null @@ -1,231 +0,0 @@ -# Script Implement - -`Linux` `Windows` `Ascend` `GPU` `CPU` `Environment Preparation` `Basic` `Intermediate` - - - -**Q: Can the `vgg16` model be loaded and transferred on a GPU using the Hub?** - -A: Yes, but you need to manually modify the following two arguments: - -```python -# Add the **kwargs argument as follows: -def vgg16(num_classes=1000, args=None, phase="train", **kwargs): -``` - -```python -# Add the **kwargs argument as follows: -net = Vgg(cfg['16'], num_classes=num_classes, args=args, batch_norm=args.batch_norm, phase=phase, **kwargs) -``` - -
- -**Q: How to obtain middle-layer features of a VGG model?** - -A: Obtaining the middle-layer features of a network is not closely related to the specific framework. For the `vgg` model defined in `torchvison`, the `features` field can be used to obtain the middle-layer features. The `vgg` source code of `torchvison` is as follows: - -```python -class VGG(nn.Module): - - def __init__(self, features, num_classes=1000, init_weights=True): - super(VGG, self).__init__() - self.features = features - self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) -``` - -The `vgg16` defined in ModelZoo of MindSpore can be obtained through the `layers` field as follows: - -```python -network = vgg16() -print(network.layers) -``` - -
- -**Q: When MindSpore is used for model training, there are four input parameters for `CTCLoss`: `inputs`, `labels_indices`, `labels_values`, and `sequence_length`. How do I use `CTCLoss` for model training?** - -A: The `dataset` received by the defined `model.train` API can consist of multiple pieces of data, for example, (`data1`, `data2`, `data3`, ...). Therefore, the `dataset` can contain `inputs`, `labels_indices`, `labels_values`, and `sequence_length` information. You only need to define the dataset in the corresponding format and transfer it to `model.train`. For details, see [Data Processing API](https://www.mindspore.cn/doc/programming_guide/en/master/dataset_loading.html). - -
- -**Q: How do I load the PyTorch weight to MindSpore during model transfer?** - -A: First, enter the `PTH` file of PyTorch. Take `ResNet-18` as an example. The network structure of MindSpore is the same as that of PyTorch. After transferring, the file can be directly loaded to the network. Only `BN` and `Conv2D` are used during loading. If the network names of MindSpore and PyTorch at other layers are different, change the names to the same. - -
- -**Q: What are the available recommendation or text generation networks or models provided by MindSpore?** - -A: Currently, recommendation models such as Wide & Deep, DeepFM, and NCF are under development. In the natural language processing (NLP) field, Bert\_NEZHA is available and models such as MASS are under development. You can rebuild the network into a text generation network based on the scenario requirements. Please stay tuned for updates on the [MindSpore Model Zoo](https://gitee.com/mindspore/mindspore/tree/master/model_zoo). - -
- -**Q: How do I use MindSpore to fit functions such as $f(x)=a \times sin(x)+b$?** - -A: The following is based on the official MindSpore linear fitting case. - -```python -# The fitting function is:f(x)=2*sin(x)+3. -import numpy as np -from mindspore import dataset as ds -from mindspore.common.initializer import Normal -from mindspore import nn, Model, context -from mindspore.train.callback import LossMonitor - -context.set_context(mode=context.GRAPH_MODE, device_target="CPU") - -def get_data(num, w=2.0, b=3.0): - # f(x)=w * sin(x) + b - # f(x)=2 * sin(x) +3 - for i in range(num): - x = np.random.uniform(-np.pi, np.pi) - noise = np.random.normal(0, 1) - y = w * np.sin(x) + b + noise - yield np.array([np.sin(x)]).astype(np.float32), np.array([y]).astype(np.float32) - -def create_dataset(num_data, batch_size=16, repeat_size=1): - input_data = ds.GeneratorDataset(list(get_data(num_data)), column_names=['data','label']) - input_data = input_data.batch(batch_size) - input_data = input_data.repeat(repeat_size) - return input_data - -class LinearNet(nn.Cell): - def __init__(self): - super(LinearNet, self).__init__() - self.fc = nn.Dense(1, 1, Normal(0.02), Normal(0.02)) - - def construct(self, x): - x = self.fc(x) - return x - -if __name__ == "__main__": - - num_data = 1600 - batch_size = 16 - repeat_size = 1 - lr = 0.005 - momentum = 0.9 - - net = LinearNet() - net_loss = nn.loss.MSELoss() - opt = nn.Momentum(net.trainable_params(), lr, momentum) - model = Model(net, net_loss, opt) - - ds_train = create_dataset(num_data, batch_size=batch_size, repeat_size=repeat_size) - model.train(1, ds_train, callbacks=LossMonitor(), dataset_sink_mode=False) - - print(net.trainable_params()[0], "\n%s" % net.trainable_params()[1]) -``` - -
- -**Q: How do I use MindSpore to fit quadratic functions such as $f(x)=ax^2+bx+c$?** - -A: The following code is referenced from the official [MindSpore tutorial code](https://gitee.com/mindspore/docs/blob/master/tutorials/tutorial_code/linear_regression.py). - -Modify the following items to fit $f(x) = ax^2 + bx + c$: - -1. Dataset generation. -2. Network fitting. -3. Optimizer. - -The following explains detailed information about the modification: - -```python -# The selected optimizer does not support CPUs. Therefore, the GPU computing platform is used for training. You need to install MindSpore of the GPU version. -context.set_context(mode=context.GRAPH_MODE, device_target="GPU") - -# Assume that the function to be fitted is f(x)=2x^2+3x+4. Modify the data generation function as follows: -def get_data(num, a=2.0, b=3.0 ,c = 4): - for i in range(num): - x = np.random.uniform(-10.0, 10.0) - noise = np.random.normal(0, 1) - # For details about how to generate the value of y, see the to-be-fitted objective function ax^2+bx+c. - y = x * x * a + x * b + c + noise - # When fitting a*x^2 + b*x +c, a and b are weight parameters, and c is the offset parameter bias. The training data corresponding to the two weights is x^2 and x, respectively. Therefore, the dataset generation mode is changed as follows: - yield np.array([x*x, x]).astype(np.float32), np.array([y]).astype(np.float32) - -def create_dataset(num_data, batch_size=16, repeat_size=1): - input_data = ds.GeneratorDataset(list(get_data(num_data)), column_names=['data','label']) - input_data = input_data.batch(batch_size) - input_data = input_data.repeat(repeat_size) - return input_data - -class LinearNet(nn.Cell): - def __init__(self): - super(LinearNet, self).__init__() - # Two training parameters are input for the full connection function. Therefore, the input value is changed to 2. The first Normal(0.02) automatically allocates random weights to the two input parameters, and the second Normal is the random bias. - self.fc = nn.Dense(2, 1, Normal(0.02), Normal(0.02)) - - def construct(self, x): - x = self.fc(x) - return x - -if __name__ == "__main__": - num_data = 1600 - batch_size = 16 - repeat_size = 1 - lr = 0.005 - momentum = 0.9 - - net = LinearNet() - net_loss = nn.loss.MSELoss() - # RMSProp optimizer with better effect is selected for quadratic function fitting. Currently, Ascend and GPU computing platforms are supported. - opt = nn.RMSProp(net.trainable_params(), learning_rate=0.1) - model = Model(net, net_loss, opt) - - ds_train = create_dataset(num_data, batch_size=batch_size, repeat_size=repeat_size) - model.train(1, ds_train, callbacks=LossMonitor(), dataset_sink_mode=False) - - print(net.trainable_params()[0], "\n%s" % net.trainable_params()[1]) -``` - -
- -**Q: How do I execute a single `ut` case in `mindspore/tests`?** - -A: `ut` cases are usually based on the MindSpore package of the debug version, which is not provided on the official website. You can run `sh build.sh` to compile the source code and then run the `pytest` command. The compilation in debug mode does not depend on the backend. Run the `sh build.sh -t on` command. For details about how to execute cases, see the `tests/runtest.sh` script. - -
- -**Q: For Ascend users, how to get more detailed logs when the `run task error` is reported?** - -A: Use the msnpureport tool to set the on-device log level. The tool is stored in `/usr/local/Ascend/driver/tools/msnpureport`. - -```bash -- Global: /usr/local/Ascend/driver/tools/msnpureport -g info -``` - -```bash -- Module-level: /usr/local/Ascend/driver/tools/msnpureport -m SLOG:error -``` - -```bash -- Event-level: /usr/local/Ascend/driver/tools/msnpureport -e disable/enable -``` - -```bash -- Multi-device ID-level: /usr/local/Ascend/driver/tools/msnpureport -d 1 -g warning -``` - -Assume that the value range of deviceID is [0, 7], and `devices 0–3` and `devices 4–7` are on the same OS. `Devices 0–3` share the same log configuration file and `devices 4–7` share the same configuration file. In this way, changing the log level of any device (for example device 0) will change that of other devices (for example `devices 1–3`). This rule also applies to `devices 4–7`. - -After the driver package is installed (assuming that the installation path is /usr/local/HiAI and the execution file `msnpureport.exe` is in the C:\ProgramFiles\Huawei\Ascend\Driver\tools\ directory on Windows), run the command in the /home/shihangbo/ directory to export logs on the device to the current directory and store logs in a folder named after the timestamp. - -
- -**Q: What can I do if the Google's Chrome browser prompts the error message `ERR_UNSAFE_PORT after` MindInsight is successfully started?** - -A: Chrome browser's kernel prohibits certain ports from being used as HTTP services. You can add `--explicitly-allowed-ports=port` in Chrome browser's configuration. Otherwise you can change the port or browser like IE browser. - -
- -**Q: How do I change hyperparameters for calculating loss values during neural network training?** - -A: Sorry, this function is not available yet. You can find the optimal hyperparameters by training, redefining an optimizer, and then training. - -
- -**Q:What should I do when error `error while loading shared libraries: libge_compiler.so: cannot open shared object file: No such file or directory` prompts during application running?** - -A:While installing Ascend 310 AI Processor software packages,the `CANN` package should install the full-featured `toolkit` version instead of the `nnrt` version. diff --git a/docs/faq/source_en/training_visualization.md b/docs/faq/source_en/training_visualization.md index 10734fdded54ce114488b10dea51b09b98ee1808..b76df675b151333bc5b52f53d00ac0ce1bfdd918 100644 --- a/docs/faq/source_en/training_visualization.md +++ b/docs/faq/source_en/training_visualization.md @@ -8,7 +8,7 @@ A: You can use "export LD_LIBRARY_PATH=dir:$LD_LIBRARY_PATH" command to export LD_LIBRARY_PATH variable in Linux environment. -
+
**Q: What can I do if the error message `bash: mindinsight: command not found` is displayed in the MindInsight running logs after MindInsight failed to start?** @@ -16,7 +16,7 @@ A: This problem occurs when using Python source codes to compile and install in (Please change `$YourPythonPath$` to your installation path). Note: this command is only valid at the current terminal. If you want to make it permanent, please add it to the file `~/.bashrc`. -
+
**Q: What can I do if the error message `No module named 'mindinsight'` is displayed in the MindInsight running logs after MindInsight is uninstalled?** @@ -27,14 +27,32 @@ In this case, you can perform either of the following operations: - Reinstall MindInsight and run the `mindinsight stop --port ` command to stop the started MindInsight background service. - Run the `kill -9 ` command to kill the processes designed by MindInsight. -
+
-**Q: What can I do if the Google's Chrome browser prompts the error message `ERR_UNSAFE_PORT after` MindInsight is successfully started?** +**Q: What can I do if the Google's Chrome browser prompts the error message `ERR_UNSAFE_PORT` after MindInsight is successfully started?** A: Chrome browser's kernel prohibits certain ports from being used as HTTP services. You can add `--explicitly-allowed-ports=port` in Chrome browser's configuration. Otherwise you can change the port or browser like IE browser. -
+
**Q: What can I do if the error `Exeption calling application: Field number 0 is illegal` appears on Ascend after MindInsight is successfully started with debugger turning on, and the training script is trying to connecting to debugger?** -A: It means the wrong version of protobuf is installed, please install the right version, see [Installing protobuf Python](https://support.huaweicloud.com/intl/en-us/instg-cli-cann/atlascli_03_0046.html). +A: It means the wrong version of Protobuf is installed, please install the right version, see [Installing Protobuf Python](https://support.huaweicloud.com/intl/en-us/instg-cli-cann/atlascli_03_0046.html). + +
+ +**Q: What can I do if the error `The debugger offline server module is not found` appears after MindInsight is successfully started and trying to turn on the offline debugger?** + +A: The debugger offline service needs to import the MindSpore. Please install the correct version of MindSpore. For the installation method, please refer to [Install MindSpore](https://www.mindspore.cn/install/en). + +
+ +**Q: What can I do if the Google's Chrome browser prompts the error message `ERR_CONNECTION_REFUSED` after MindInsight is successfully started?** + +A: Check the firewall policy configuration between the backend server and network devices to ensure that the communication between the browser and MindInsight is not restricted by the configuration rules of relative devices. + +
+ +**Q: What can I do if the Google's Chrome browser prompts the error message `ERR_UNSAFE_PORT after` MindInsight is successfully started?** + +A: Chrome browser's kernel prohibits certain ports from being used as HTTP services. You can add `--explicitly-allowed-ports=port` in Chrome browser's configuration. Otherwise you can change the port or browser like IE browser. diff --git a/docs/faq/source_en/usage_migrate_3rd.md b/docs/faq/source_en/usage_migrate_3rd.md index 3ac27d509b54bc66bf9aa55c3a64368e61e1e4de..ef6aa7d8c8f2052f2892127e6f0d3808f163d5a5 100644 --- a/docs/faq/source_en/usage_migrate_3rd.md +++ b/docs/faq/source_en/usage_migrate_3rd.md @@ -4,16 +4,16 @@ -**Q:How do I load a pre-trained PyTorch model for fine-tuning on MindSpore?** +**Q: How do I load a pre-trained PyTorch model for fine-tuning on MindSpore?** -A:Map parameters of PyTorch and MindSpore one by one. No unified conversion script is provided due to flexible network definitions. +A: Map parameters of PyTorch and MindSpore one by one. No unified conversion script is provided due to flexible network definitions. Customize scripts based on scenarios. For details, see [Advanced Usage of Checkpoint](https://www.mindspore.cn/doc/programming_guide/zh-CN/master/advanced_usage_of_checkpoint.html).
-**Q:How do I convert a PyTorch `dataset` to a MindSpore `dataset`?** +**Q: How do I convert a PyTorch `dataset` to a MindSpore `dataset`?** -A:The custom dataset logic of MindSpore is similar to that of PyTorch. You need to define a `dataset` class containing `__init__`, `__getitem__`, and `__len__` to read your dataset, instantiate the class into an object (for example, `dataset/dataset_generator`), and transfer the instantiated object to `GeneratorDataset` (on MindSpore) or `DataLoader` (on PyTorch). Then, you are ready to load the custom dataset. MindSpore provides further `map`->`batch` operations based on `GeneratorDataset`. Users can easily add other custom operations to `map` and start `batch`. +A: The custom dataset logic of MindSpore is similar to that of PyTorch. You need to define a `dataset` class containing `__init__`, `__getitem__`, and `__len__` to read your dataset, instantiate the class into an object (for example, `dataset/dataset_generator`), and transfer the instantiated object to `GeneratorDataset` (on MindSpore) or `DataLoader` (on PyTorch). Then, you are ready to load the custom dataset. MindSpore provides further `map`->`batch` operations based on `GeneratorDataset`. Users can easily add other custom operations to `map` and start `batch`. The custom dataset of MindSpore is loaded as follows: ```python @@ -41,3 +41,37 @@ dataset = dataset.batch(batch_size, drop_remainder=True) A: For details about script or model migration, please visit the [MindSpore official website](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/migrate_3rd_scripts.html). +
+ +**Q: MindConverter converts TensorFlow script error prompt`terminate called after throwing an instance of 'std::system_error', what(): Resource temporarily unavailable, Aborted (core dumped)`** + +A: This problem is caused by TensorFlow. During script conversion, you need to load the TensorFlow model file through the TensorFlow library. At this time, TensorFlow will apply for relevant resources for initialization. If the resource application fails (maybe because the number of system processes exceeds the maximum number of Linux processes), the TensorFlow C/C++ layer will appear Core Dumped problem. For more information, please refer to the official ISSUE of TensorFlow. The following ISSUE is for reference only: [TF ISSUE 14885](https://github.com/tensorflow/tensorflow/issues/14885), [TF ISSUE 37449](https://github.com/tensorflow/tensorflow/issues/37449) + +
+ +**Q: Can MindConverter run on ARM platform?** + +A: MindConverter supports both x86 and ARM platform. Please ensure all required dependencies and environments have been installed in the ARM platform. + +
+ +**Q: Why does the conversion process take a lot of time (more than 10 minutes), but the model is not so large?** + +A: When converting, MindConverter needs to use Protobuf to deserialize the model file. Please make sure that the Protobuf installed in Python environment is implemented by C++ backend. The validation method is as follows. If the output is "python", you need to install Python Protobuf implemented by C++ (download the Protobuf source code, enter the "python" subdirectory in the source code, and use `python setup.py install --cpp_implementation` to install). If the output is "cpp" and the conversion process still takes a long time, please add environment variable `export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp` before conversion. + +```python +from google.protobuf.internal import api_implementation +print(api_implementation.Type()) +``` + +
+ +**Q: While converting .pb file to MindSpore script, what may be the cause of error code 1000001 with ensuring `model_file`, `shape`, `iput_nodes` and `output_nodes` set right and third party requirements installed correctly?** + +A: Make sure that the TensorFlow version to generate .pb file is no higher than that to convert .pb file, avoiding the conflict which caused by using low version TensorFlow to parse .pb file generated by the high version. + +
+ +**Q: What should I do to deal with Exception `[ERROR] MINDCONVERTER: [BaseConverterError] code: 0000000, msg: {python_home}/lib/libgomp.so.1: cannot allocate memory in static TLS block`?** + +A: In most cases, the problem is caused by environment variable exported incorrectly. Please set `export LD_PRELOAD={python_home}/lib/libgomp.so.1.0.0`, then try to run MindConverter again. diff --git a/docs/faq/source_zh_cn/backend_compile.md b/docs/faq/source_zh_cn/backend_compile.md deleted file mode 100644 index 9fc2a3157a93461a07014bc23286e14ff7540ec0..0000000000000000000000000000000000000000 --- a/docs/faq/source_zh_cn/backend_compile.md +++ /dev/null @@ -1,96 +0,0 @@ -# 后端编译 - -`Linux` `Windows` `Ascend` `GPU` `CPU` `环境准备` `初级` `中级` - - - -**Q:MindSpore现支持直接读取哪些其他框架的模型和哪些格式呢?比如PyTorch下训练得到的pth模型可以加载到MindSpore框架下使用吗?** - -A: MindSpore采用protbuf存储训练参数,无法直接读取其他框架的模型。对于模型文件本质保存的就是参数和对应的值,可以用其他框架的API将参数读取出来之后,拿到参数的键值对,然后再加载到MindSpore中使用。比如想用其他框架训练好的ckpt文件,可以先把参数读取出来,再调用MindSpore的`save_checkpoint`接口,就可以保存成MindSpore可以读取的ckpt文件格式了。 - -
- -**Q:在使用ckpt或导出模型的过程中,报Protobuf内存限制错误,如何处理?** - -A:当单条Protobuf数据过大时,因为Protobuf自身对数据流大小的限制,会报出内存限制的错误。这时可通过设置环境变量`PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python`解除限制。 - -
- -**Q:PyNative模式和Graph模式的区别?** - -A: 在使用效率上,两个模式使用的算子是一致的,因此相同的网络和算子,分别在两个模式下执行时,精度效果是一致的。由于执行机理的差异,网络的执行性能是会不同的,并且在理论上,MindSpore提供的算子同时支持PyNative模式和Graph模式; - -在场景使用方面,Graph模式需要一开始就构建好网络结构,然后框架做整图优化和执行,对于网络固定没有变化,且需要高性能的场景比较适合; - -在不同硬件(`Ascend`、`GPU`和`CPU`)资源上都支持这两种模式; - -代码调试方面,由于PyNative模式是逐行执行算子,用户可以直接调试Python代码,在代码中任意位置打断点查看对应算子`/api`的输出或执行结果。而Graph模式由于在构造函数里只是完成网络构造,实际没有执行,因此在`construct`函数里打断点是无法获取对应算子的输出,而只能等整网执行中指定对应算子的输出打印,在网络执行完成后进行查看。 - -
- -**Q:请问`c_transforms`和`py_transforms`有什么区别,比较推荐使用哪个?** - -A:推荐使用`c_transforms`,因为纯C层执行,所以性能会更好。 - -原理:`c_transform`底层使用的是C版本`opencv/jpeg-turbo`进行的数据处理,`py_transform`使用的是Python版本的`Pillow`进行数据处理。 - -
- -**Q:缓存服务器异常关闭如何处理?** - -A:缓存服务器使用过程中,会进行IPC共享内存和socket文件等系统资源的分配。若允许溢出,在磁盘空间还会存在溢出的数据文件。一般情况下,如果通过`cache_admin --stop`命令正常关闭服务器,这些资源将会被自动清理。 - -但如果缓存服务器被异常关闭,例如缓存服务进程被杀等,用户需要首先尝试重新启动服务器,若启动失败,则应该依照以下步骤手动清理系统资源: - -- 删除IPC资源。 - - 1. 检查是否有IPC共享内存残留。 - - 一般情况下,系统会为缓存服务分配4GB的共享内存。通过以下命令可以查看系统中的共享内存块使用情况。 - - ```text - $ ipcs -m - ------ Shared Memory Segments -------- - key shmid owner perms bytes nattch status - 0x61020024 15532037 root 666 4294967296 1 - ``` - - 其中,`shmid`为共享内存块id,`bytes`为共享内存块的大小,`nattch`为链接到该共享内存块的进程数量。`nattch`不为0表示仍有进程使用该共享内存块。在删除共享内存前,需要停止使用该内存块的所有进程。 - - 2. 删除IPC共享内存。 - - 找到对应的共享内存id,并通过以下命令删除。 - - ```text - ipcrm -m {shmid} - ``` - -- 删除socket文件。 - - 一般情况下,socket文件位于`/tmp/mindspore/cache`。进入文件夹,执行以下命令删除socket文件。 - - ```text - rm cache_server_p{port_number} - ``` - - 其中`port_number`为用户创建缓存服务器时指定的端口号,默认为50052。 - -- 删除溢出到磁盘空间的数据文件。 - - 进入启用缓存服务器时指定的溢出数据路径。通常,默认溢出路径为`/tmp/mindspore/cache`。找到路径下对应的数据文件夹并逐一删除。 - -
- -**Q:编译应用时报错`bash -p`方式和 `bash -e`方式的区别?** - -A:MindSpore Serving的编译和运行依赖MindSpore,Serving提供两种编译方式:一种指定已安装的MindSpore路径,即`bash -p {python site-packages}/mindspore/lib`,避免编译Serving时再编译MindSpore;另一种,编译Serving时,编译配套的MindSpore,Serving会将`-e`、`-V`和`-j`选项透传给MindSpore。 -比如,在Serving目录下,`bash -e ascend -V 910 -j32`: - -- 首先将会以`bash -e ascend -V 910 -j32`方式编译`third_party/mindspore`目录下的MindSpore; -- 其次,编译脚本将MindSpore编译结果作为Serving的编译依赖。 - -
- -**Q:运行应用时报错`libmindspore.so: cannot open shared object file: No such file or directory`怎么办?** - -A:首先,需要确认是否安装MindSpore Serving所依赖的MindSpore;其次,Serving 1.1需要配置`LD_LIBRARY_PATH`,显式指定`libmindspore.so`所在路径,`libmindspore.so`当前在MindSpore Python安装路径的`lib`目录下;Serving 1.2后不再需要显示指定`libmindspore.so`所在路径,Serving会基于MindSpore安装路径查找并追加配置`LD_LIBRARY_PATH`,用户不再需要感知。 diff --git a/docs/faq/source_zh_cn/conf.py b/docs/faq/source_zh_cn/conf.py index 95d7701759707ab95a3c199cd8a22e2e2cc1194d..b61d12b35eef3aa9e3f7bdd6f0d64392659575da 100644 --- a/docs/faq/source_zh_cn/conf.py +++ b/docs/faq/source_zh_cn/conf.py @@ -30,7 +30,7 @@ release = 'master' # ones. extensions = [ 'sphinx_markdown_tables', - 'recommonmark', + 'myst_parser', ] source_suffix = { diff --git a/docs/faq/source_zh_cn/data_processing.md b/docs/faq/source_zh_cn/data_processing.md index b1aaa4f4399c0e2415136196950c2609703bc2e1..18ea51499a519c852dc8a554e7bcd43878b78f5e 100644 --- a/docs/faq/source_zh_cn/data_processing.md +++ b/docs/faq/source_zh_cn/data_processing.md @@ -4,9 +4,17 @@ -**Q:由于我一条数据包含多个图像,并且每个图像的宽高都不一致,我需要对转成mindrecord的格式进行`map`操作来进行数据处理。可是我从`record`读取的数据是`np.ndarray`格式的数据,我的数据处理的`operations`是针对图像格式的。我应该怎么样才能对所生成的mindrecord的格式的数据进行预处理呢?** +**Q: 请问`c_transforms`和`py_transforms`有什么区别,比较推荐使用哪个?** -A:建议你按照如下操作进行: +A: 推荐使用`c_transforms`,因为纯C层执行,所以性能会更好。 + +原理:`c_transform`底层使用的是C版本`opencv/jpeg-turbo`进行的数据处理,`py_transform`使用的是Python版本的`Pillow`进行数据处理。 + +
+ +**Q: 由于我一条数据包含多个图像,并且每个图像的宽高都不一致,我需要对转成mindrecord的格式进行`map`操作来进行数据处理。可是我从`record`读取的数据是`np.ndarray`格式的数据,我的数据处理的`operations`是针对图像格式的。我应该怎么样才能对所生成的mindrecord的格式的数据进行预处理呢?** + +A: 建议你按照如下操作进行: ```python #1 The defined schema is as follows: Among them, data1, data2, data3, ... These fields store your image, and only the binary of the image is stored here. @@ -53,35 +61,35 @@ for item in data_set.create_dict_iterator(output_numpy=True):
-**Q:我的自定义图像数据集转为mindrecord格式时,我的数据是`numpy.ndarray`格式的,且`shape`为[4,100,132,3],这个`shape`的含义是四幅三通道的帧,且每个值都在0~255。可是当我查看转化成mindrecord的格式的数据时,发现是`[19800]`的`shape`,我原数据的维度全部展开有`[158400]`,请问这是为什么?** +**Q: 我的自定义图像数据集转为mindrecord格式时,我的数据是`numpy.ndarray`格式的,且`shape`为[4,100,132,3],这个`shape`的含义是四幅三通道的帧,且每个值都在0~255。可是当我查看转化成mindrecord的格式的数据时,发现是`[19800]`的`shape`,我原数据的维度全部展开有`[158400]`,请问这是为什么?** -A:应该是你数据中`ndarray`的`dtype`是`int8`,因为`[158400]`和`[19800]`刚好相差了8倍,建议将数据中`ndarray`的`dtype`指定为`float64`。 +A: 可能是你数据中`ndarray`的`dtype`是`int8`,因为`[158400]`和`[19800]`刚好相差了8倍,建议将数据中`ndarray`的`dtype`指定为`float64`。
-**Q:想要保存生成的图片,代码运行完毕以后在相应目录找不到图片。相似的,在JupyterLab中生成数据集用于训练,训练时可以在相应路径读取到数据,但是自己却无法在路径中找到图片或数据集?** +**Q: 想要保存生成的图片,代码运行完毕以后在相应目录找不到图片。相似的,在JupyterLab中生成数据集用于训练,训练时可以在相应路径读取到数据,但是自己却无法在路径中找到图片或数据集?** -A:应该是JumperLab生成的图片或者数据集都是在Docker内吧,`moxing`下载的数据只能训练进程的Docker内看见,训练完成后这些数据就随着Docker释放了。 可以试试在训练任务中将需要`download`的数据再通过`moxing`传回`obs`,然后再在`obs`里面下载到你本地。 +A: 可能是JumperLab生成的图片或者数据集都是在Docker内,`moxing`下载的数据只能训练进程的Docker内看见,训练完成后这些数据就随着Docker释放了。 可以试试在训练任务中将需要`download`的数据再通过`moxing`传回`obs`,然后再在`obs`里面下载到你本地。
-**Q:MindSpore中`model.train`的`dataset_sink_mode`参数该如何理解?** +**Q: MindSpore中`model.train`的`dataset_sink_mode`参数该如何理解?** -A:当`dataset_sink_mode=True`时,数据处理会和网络计算构成Pipeline方式,即:数据处理在逐步处理数据时,处理完一个`batch`的数据,会把数据放到一个队列里,这个队列用于缓存已经处理好的数据,然后网络计算从这个队列里面取数据用于训练,那么此时数据处理与网络计算就`Pipeline`起来了,整个训练耗时就是数据处理/网络计算耗时最长的那个。 +A: 当`dataset_sink_mode=True`时,数据处理会和网络计算构成Pipeline方式,即: 数据处理在逐步处理数据时,处理完一个`batch`的数据,会把数据放到一个队列里,这个队列用于缓存已经处理好的数据,然后网络计算从这个队列里面取数据用于训练,那么此时数据处理与网络计算就`Pipeline`起来了,整个训练耗时就是数据处理/网络计算耗时最长的那个。 -当`dataset_sink_mode=False`时,数据处理会和网络计算构成串行的过程,即:数据处理在处理完一个`batch`后,把这个`batch`的数据传递给网络用于计算,在计算完成后,数据处理再处理下一个`batch`,然后把这个新的`batch`数据传递给网络用于计算,如此的循环往复,直到训练完。该方法的总耗时是数据处理的耗时+网络计算的耗时=训练总耗时。 +当`dataset_sink_mode=False`时,数据处理会和网络计算构成串行的过程,即: 数据处理在处理完一个`batch`后,把这个`batch`的数据传递给网络用于计算,在计算完成后,数据处理再处理下一个`batch`,然后把这个新的`batch`数据传递给网络用于计算,如此的循环往复,直到训练完。该方法的总耗时是数据处理的耗时+网络计算的耗时=训练总耗时。
-**Q:MindSpore能否支持按批次对不同尺寸的图片数据进行训练?** +**Q: MindSpore能否支持按批次对不同尺寸的图片数据进行训练?** -A:你可以参考yolov3对于此场景的使用,里面有对于图像的不同缩放,脚本见[yolo_dataset](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/yolov3_darknet53/src/yolo_dataset.py)。 +A: 你可以参考yolov3对于此场景的使用,里面有对于图像的不同缩放,脚本见[yolo_dataset](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/yolov3_darknet53/src/yolo_dataset.py)。
-**Q:使用MindSpore做分割训练,必须将数据转为MindRecords吗?** +**Q: 使用MindSpore做分割训练,必须将数据转为MindRecord吗?** -A:[build_seg_data.py](https://github.com/mindspore-ai/mindspore/blob/master/model_zoo/official/cv/deeplabv3/src/data/build_seg_data.py)是将数据集生成MindRecord的脚本,可以直接使用/适配下你的数据集。或者如果你想尝试自己实现数据集的读取,可以使用`GeneratorDataset`自定义数据集加载。 +A: [build_seg_data.py](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/deeplabv3/src/data/build_seg_data.py)是将数据集生成MindRecord的脚本,可以直接使用/适配下你的数据集。或者如果你想尝试自己实现数据集的读取,可以使用`GeneratorDataset`自定义数据集加载。 [GenratorDataset 示例](https://www.mindspore.cn/doc/programming_guide/zh-CN/master/dataset_loading.html#id5) @@ -89,81 +97,64 @@ A:[build_seg_data.py](https://github.com/mindspore-ai/mindspore/blob/master/mo
-**Q:如何不将数据处理为MindRecord格式,直接进行训练呢?** +**Q: 如何不将数据处理为MindRecord格式,直接进行训练呢?** -A:可以使用自定义的数据加载方式 `GeneratorDataset`,具体可以参考[数据集加载](https://www.mindspore.cn/doc/programming_guide/zh-CN/master/dataset_loading.html)文档中的自定义数据集加载。 +A: 可以使用自定义的数据加载方式 `GeneratorDataset`,具体可以参考[数据集加载](https://www.mindspore.cn/doc/programming_guide/zh-CN/master/dataset_loading.html)文档中的自定义数据集加载。
-**Q:MindSpore在NPU硬件平台进行多卡训练,自定义数据集如何给不同NPU传递不同数据?** +**Q: MindSpore在Ascend硬件平台进行多卡训练,自定义数据集如何给不同卡传递不同数据?** -A:使用`GeneratorDataset`的时候,可以使用`num_shards=num_shards`,`shard_id=device_id`参数来控制不同卡读取哪个分片的数据,`__getitem__`和`__len__`按全量数据集处理即可。 +A: 使用`GeneratorDataset`的时候,可以使用`num_shards=num_shards`,`shard_id=device_id`参数来控制不同卡读取哪个分片的数据,`__getitem__`和`__len__`按全量数据集处理即可。 -举例: +举例: ```python -# 卡0: +# 卡0: ds.GeneratorDataset(..., num_shards=8, shard_id=0, ...) -# 卡1: +# 卡1: ds.GeneratorDataset(..., num_shards=8, shard_id=1, ...) -# 卡2: +# 卡2: ds.GeneratorDataset(..., num_shards=8, shard_id=2, ...) ... -# 卡7: +# 卡7: ds.GeneratorDataset(..., num_shards=8, shard_id=7, ...) ```
-**Q:如何构建图像的多标签MindRecord格式数据集?** +**Q: 如何构建图像的多标签MindRecord格式数据集?** -A:数据Schema可以按如下方式定义:`cv_schema_json = {"label": {"type": "int32", "shape": [-1]}, "data": {"type": "bytes"}}` +A: 数据Schema可以按如下方式定义: `cv_schema_json = {"label": {"type": "int32", "shape": [-1]}, "data": {"type": "bytes"}}` -说明:label是一个数组,numpy类型,这里面可以存你说的 1, 1,0,1, 0, 1 这么多label值,这些label值对应同一个data,即:同一个图像的二进制值。 +说明: label是一个数组,numpy类型,这里面可以存 1, 1,0,1, 0, 1 这么多label值,这些label值对应同一个data,即: 同一个图像的二进制值。 可以参考[将数据集转换为MindRecord](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/convert_dataset.html#将数据集转换为MindRecord)教程。
-**Q:请问自己制作的黑底白字`28*28`的数字图片,使用MindSpore训练出来的模型做预测,报错提示`wrong shape of image`是怎么回事?** +**Q: 请问自己制作的黑底白字`28*28`的数字图片,使用MindSpore训练出来的模型做预测,报错提示`wrong shape of image`是怎么回事?** -A:首先MindSpore训练使用的灰度图MNIST数据集。所以模型使用时对数据是有要求的,需要设置为`28*28`的灰度图,就是单通道才可以。 +A: 首先MindSpore训练使用的灰度图MNIST数据集。所以模型使用时对数据是有要求的,需要设置为`28*28`的灰度图,就是单通道才可以。
-**Q:第一次看到有专门的数据处理框架,能介绍下么?** +**Q: 第一次看到有专门的数据处理框架,能介绍下么?** -A:MindData提供数据处理异构硬件加速功能,高并发数据处理`pipeline`同时支持`NPU/GPU/CPU`,`CPU`占用降低30%,点击查询[优化数据处理](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/optimize_data_processing.html)。 +A: MindData提供数据处理异构硬件加速功能,高并发数据处理`pipeline`同时支持`Ascend/GPU/CPU`,`CPU`占用降低30%,点击查询[优化数据处理](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/optimize_data_processing.html)。
-**Q:网络训练时出现报错提示数据下发失败“TDT Push data into device Failed”,如何定位原因?** +**Q: 网络训练时出现报错提示数据下发失败“TDT Push data into device Failed”,如何定位原因?** -A:首先上述报错指的是通过训练数据下发通道(TDT,train data transfer)发送数据到卡(device)上失败,导致这一报错的原因可能有多种,因此日志中给出了相应的检查建议,具体而言: +A: 首先上述报错指的是通过训练数据下发通道(TDT,train data transfer)发送数据到卡(device)上失败,导致这一报错的原因可能有多种,因此日志中给出了相应的检查建议,具体而言: 1. 通常我们会找到日志中最先抛出的错误(第一个ERROR级别的错误)或报错堆栈(TraceBack),并尝试从中找到有助于定位错误原因的信息。 - 2. **在图编译阶段,训练还没开始报错时**(例如日志中还没打印loss),请先检查下报错(ERROR)日志中是否有网络中涉及的相关算子报错或涉及环境没配置好导致的报错(如hccl.json不对导致多卡通信初始化异常) - - 3. **在中间训练过程中报错时**,通常为host侧下发的数据量(batch数)与网络训练需要的数据量(step数)不匹配导致的,可以通过`get_dataset_size`接口打印一个epoch中包含的batch数,并检查host下发的数据量和device侧收到的数据量(检查方式如下): + 2. **在图编译阶段,训练还没开始报错时**(例如日志中还没打印loss),请先检查下报错(ERROR)日志中是否有网络中涉及的相关算子报错或涉及环境没配置好导致的报错(如hccl.json不对导致多卡通信初始化异常)。 - ```bash - # 注意:数据量都指的列数,如一个batch包含image, label两列,5个batch则包含10列数据 - # 注意:如果开启了环境变量“export ASCEND_SLOG_PRINT_TO_STDOUT=1"则下面plog中的日志将在屏幕上直接打印出来或在重定向的日志文件中。 + 3. **在中间训练过程中报错时**,通常为下发的数据量(batch数)与网络训练需要的数据量(step数)不匹配导致的,可以通过`get_dataset_size`接口打印一个epoch中包含的batch数,导致异常的部分可能原因如下: - # 数据处理队列发给host tdt的数据量,pid为训练任务的进程id - # 文件名如:plog-64944-20210531165504682.log,统计数据量可以加`|wc -l`统计has got日志的条数 - grep -rn "has got" ~/ascend/log/plog/plog-pid_timestamp0.log - - # host tdt发到device tdt的数据量,进行如下搜索,日志中关键字眼如“index is"后面的值即为host发下去的数据量 - grep -rn "has sent" ~/ascend/log/plog/plog-pid_timestamp0.log - - # 查看device侧队列的数据量,pid为训练任务的进程id,与上述host侧的进程id一致 - # 进行如下搜索,日志中关键字眼如“index=",等号后面的值即为device侧收到的数据量 - grep -rn "enqueue data" ~/ascend/log/device-id/device-pid_timestamp1.log - ``` - - - 如果host侧下发的与device侧收到的数据量相等,且该值小于网络正常训练完成的数据量,则数据下发失败主要为host侧数据处理异常导致供应不上网络训练,有三种可能的定位思路: - - 如果数据量刚好为一个epoch中batch数的整数倍,则可能是数据处理部分涉及epoch的处理存在问题,如下面这场景: + - 通过查看打印loss次数的等方式判断如果数据量(step数)刚好为一个epoch中batch数的整数倍,则可能是数据处理部分涉及epoch的处理存在问题,如下面这场景: ```python ... @@ -171,15 +162,10 @@ A:首先上述报错指的是通过训练数据下发通道(TDT,train data return dataset ``` - - 数据处理性能较慢,跟不上网络训练的速度,针对这一场景,可借助profiler工具和MindInsight看一下是否存在明显的迭代间隙,或手动遍历一下dataset,并打印计算下平均单batch的耗时,是否比网络正反向加起来的时间更长,如果是则大概率需要对数据处理部分进行性能优化。 - - 训练过程中出现异常数据抛出异常导致下发数据失败,同常这种情况会有其他报错(ERROR)日志会提示数据处理哪个环节出现了异常及检查建议。如果不明显,也可以通过遍历dataset每条数据的方式尝试找出异常的数据(如关闭shuffle, 然后进行二分法)。 - - 如果host侧与device侧的数据量不相等(通常为host发的数据量更多), 则可能为tdt模块存在一点问题(如反压等)需找模块开发人员协助定位。 + - 考虑是否是数据处理性能较慢,跟不上网络训练的速度,针对这一场景,可借助profiler工具和MindInsight看一下是否存在明显的迭代间隙,或手动遍历一下dataset,并打印计算下平均单batch的耗时,是否比网络正反向加起来的时间更长,如果是则大概率需要对数据处理部分进行性能优化。 - 4. 如果**在训练结束后**打印这条日志(大抵是强制释放资源导致),可忽略这个报错。 + - 训练过程中出现异常数据抛出异常导致下发数据失败,通常这种情况会有其他报错(ERROR)日志会提示数据处理哪个环节出现了异常及检查建议。如果不明显,也可以通过遍历dataset每条数据的方式尝试找出异常的数据(如关闭shuffle, 然后进行二分法)。 - 5. 如果仍不能定位具体原因,请开启mindspore和CANN的info级别日志,并检查日志看报错位置上下文寻找有帮助的信息,CANN host日志文件路径为:~/ascend/log/plog/plog-pid-timestamp.log + 4. 如果**在训练结束后**打印这条日志(大抵是强制释放资源导致),可忽略这个报错。 - ```bash - export GLOG_v=1 # set mindspore log level into info level - export GLOBAL_ASCEND_LOG_LEVEL=1 # set CANN log level into info level - ``` + 5. 如果仍不能定位具体原因,请通过提issue或论坛提问等方式找模块开发人员协助定位。 diff --git a/docs/faq/source_zh_cn/distributed_configure.md b/docs/faq/source_zh_cn/distributed_configure.md index 728f74da140ad52241637a01ba80cb4c8acf8b76..6ffbb2a3b266b68d9eaeb4fabb6fd5dc8c28b225 100644 --- a/docs/faq/source_zh_cn/distributed_configure.md +++ b/docs/faq/source_zh_cn/distributed_configure.md @@ -4,7 +4,7 @@ -**Q:MindSpore执行GPU分布式训练报错如下,如何解决:** +**Q: MindSpore执行GPU分布式训练报错如下,如何解决:** ```text Loading libgpu_collective.so failed. Many reasons could cause this: @@ -13,20 +13,26 @@ Loading libgpu_collective.so failed. Many reasons could cause this: 3.mpi is not installed or found ``` -A:此问题为MindSpore动态加载集合通信库失败,可能原因如下: +A: 此问题为MindSpore动态加载集合通信库失败,可能原因如下: - 执行环境未安装分布式训练依赖的OpenMPI以及NCCL。 -- NCCL版本未更新至`v2.7.6`:MindSpore `v1.1.0`新增GPU P2P通信算子,该特性依赖于NCCL `v2.7.6`,若环境使用的NCCL未升级为此版本,则会引起加载失败错误。 +- NCCL版本未更新至`v2.7.6`: MindSpore `v1.1.0`新增GPU P2P通信算子,该特性依赖于NCCL `v2.7.6`,若环境使用的NCCL未升级为此版本,则会引起加载失败错误。
-**Q:基于Ascend环境需要配置通信配置文件,应该如何配置?** +**Q: 基于Ascend环境需要配置通信配置文件,应该如何配置?** -A:请参考MindSpore教程的基于Ascend分布式训练的[配置分布式环境变量](https://mindspore.cn/tutorial/training/zh-CN/master/advanced_use/distributed_training_ascend.html#id4)部分。 +A: 请参考MindSpore教程的基于Ascend分布式训练的[配置分布式环境变量](https://mindspore.cn/tutorial/training/zh-CN/master/advanced_use/distributed_training_ascend.html#id4)部分。
-**Q:如何进行分布式多机多卡训练?** +**Q: 如何进行分布式多机多卡训练?** -A:基于Ascend环境的,请参考MindSpore教程的基于Ascend分布式训练的[多机多卡训练](https://mindspore.cn/tutorial/training/zh-CN/master/advanced_use/distributed_training_ascend.html#id20) 部分。 +A: 基于Ascend环境的,请参考MindSpore教程的基于Ascend分布式训练的[多机多卡训练](https://mindspore.cn/tutorial/training/zh-CN/master/advanced_use/distributed_training_ascend.html#id20) 部分。 基于GPU环境的,请参考MindSpore教程的基于GPU分布式训练的[运行多机脚本](https://mindspore.cn/tutorial/training/zh-CN/master/advanced_use/distributed_training_gpu.html#id8) 部分。 + +
+ +**Q: `rank_table_file`是什么文件?** + +A: `rank_table_file`是我们对于Ascend环境上运行分布式训练时的芯片资源配置文件的叫法,其中包括了参与训练的Ascend加速卡的设备ip和服务器ip信息等。运行分布式训练时通常需要使用`RANK_TABLE_FILE`环境变量指定该文件。详细介绍可以参考[昇腾芯片资源信息配置文件](https://support.huaweicloud.com/tensorflow-cann502alpha3training/atlasmprtg_13_0020.html)。你可以使用[hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools)自动生成当前服务器的配置文件。 diff --git a/docs/faq/source_zh_cn/feature_advice.md b/docs/faq/source_zh_cn/feature_advice.md index b4b737e19b5f80ac748271ff18f7187bb0ef5209..2a0aaa1d57b7de127d16c76825252f4c3a70927d 100644 --- a/docs/faq/source_zh_cn/feature_advice.md +++ b/docs/faq/source_zh_cn/feature_advice.md @@ -4,126 +4,154 @@ -**Q:一个环境中如果既安装了MindSpore,又安装了PyTorch,是否在一个python文件中可以混用两个框架的语法呢?** +**Q: 导出MindIR格式的时候,`input=np.random.uniform(...)`是不是固定格式?** -A:可以在一个python文件中混用两个两个框架的。要注意类型间的区别。例如两个框架创建的Tensor类型是不同的,但对于python的基础类型都是通用的。 +A: 不是固定格式的,这一步操作是为了创建一个输入,以便于构建网络结构。`export`里只要传入正确的`shape`即可,使用`np.ones`和`np.zeros`创建都是可以的。
-**Q:MindSpore可以读取TensorFlow的ckpt文件吗?** +**Q: MindSpore现支持直接读取哪些其他框架的模型和哪些格式呢?比如PyTorch下训练得到的pth模型可以加载到MindSpore框架下使用吗?** -A:MindSpore的`ckpt`和TensorFlow的`ckpt`格式是不通用的,虽然都是使用`protobuf`协议,但是`proto`的定义是不同的。当前MindSpore不支持读取TensorFlow或PyTorch的`ckpt`文件。 +A: MindSpore采用Protobuf存储训练参数,无法直接读取其他框架的模型。对于模型文件本质保存的就是参数和对应的值,可以用其他框架的API将参数读取出来之后,拿到参数的键值对,然后再加载到MindSpore中使用。比如想用其他框架训练好的ckpt文件,可以先把参数读取出来,再调用MindSpore的`save_checkpoint`接口,就可以保存成MindSpore可以读取的ckpt文件格式了。
-**Q:用MindSpore训练出的模型如何在Ascend 310上使用?可以转换成适用于HiLens Kit用的吗?** +**Q: 在使用ckpt或导出模型的过程中,报Protobuf内存限制错误,如何处理?** -A:Ascend 310需要运行专用的OM模型,先使用MindSpore导出ONNX或AIR模型,再转化为Ascend 310支持的OM模型。具体可参考[多平台推理](https://www.mindspore.cn/tutorial/inference/zh-CN/master/multi_platform_inference_ascend_310.html)。可以,HiLens Kit是以Ascend 310为推理核心,所以前后两个问题本质上是一样的,需要转换为OM模型. +A: 当单条Protobuf数据过大时,因为Protobuf自身对数据流大小的限制,会报出内存限制的错误。这时可通过设置环境变量`PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python`解除限制。
-**Q:MindSpore只能在华为自己的`NPU`上跑么?** +**Q: PyNative模式和Graph模式的区别?** -A: MindSpore同时支持华为自己的`Ascend NPU`、`GPU`与`CPU`,是支持异构算力的。 +A: 通过下面四个方面进行对比: + +- 网络执行:两个模式使用的算子是一致的,因此相同的网络和算子,分别在两个模式下执行时,精度效果是一致的。由于Graph模式运用了图优化、计算图整图下沉等技术,Graph模式执行网络的性能和效率更高; + +- 场景使用:Graph模式需要一开始就构建好网络结构,然后框架做整图优化和执行,比较适合网络固定没有变化,且需要高性能的场景; + +- 不同硬件(`Ascend`、`GPU`和`CPU`)资源:都支持这两种模式; + +- 代码调试:由于PyNative模式是逐行执行算子,用户可以直接调试Python代码,在代码中任意位置打断点查看对应算子的输出或执行结果。而Graph模式由于在构造函数里只是完成网络构造,实际没有执行,因此在`construct`函数里打断点无法获取对应算子的输出,只能先指定算子进行打印,然后在网络执行完成后查看输出结果。
-**Q:MindSpore在Ascend 310上是否可以转AIR模型?** +**Q: 使用MindSpore在GPU上训练的网络脚本可以不做修改直接在Ascend上进行训练么?** -A:Ascend 310不能导出AIR,需要在Ascend 910加载训练好的checkpoint后,导出AIR,然后在Ascend 310转成OM模型进行推理。Ascend 910的安装方法可以参考官网MindSpore[安装指南](https://www.mindspore.cn/install)。 +A: 可以的,MindSpore面向Ascned/GPU/CPU提供统一的API,在算子支持的前提下,网络脚本可以不做修改直接跨平台运行。
-**Q:MindSpore对导出、导入模型的单个Tensor输入大小有什么限制?** +**Q: 一个环境中如果既安装了MindSpore,又安装了PyTorch,是否在一个python文件中可以混用两个框架的语法呢?** -A:由于ProtoBuf的硬件限制,导出AIR、ONNX模型时,单个Tensor大小不能超过2G。导入的MindIR模型中,单个Tensor不能超过2G。 +A: 可以在一个python文件中混用两个框架的。要注意类型间的区别。例如两个框架创建的Tensor类型是不同的,但对于python的基础类型都是通用的。
-**Q:安装运行MindSpore时,是否要求平台有GPU、NPU等计算单元?需要什么硬件支持?** +**Q: MindSpore可以读取TensorFlow的ckpt文件吗?** -A:MindSpore当前支持CPU/GPU/Ascend /NPU。目前笔记本电脑或者有GPU的环境,都可以通过Docker镜像来试用。当前MindSpore Model Zoo中有部分模型已经支持GPU的训练和推理,其他模型也在不断地进行完善。在分布式并行训练方面,MindSpore当前支持GPU多卡训练。你可以通过[RoadMap](https://www.mindspore.cn/doc/note/zh-CN/master/roadmap.html)和项目[Release note](https://gitee.com/mindspore/mindspore/blob/master/RELEASE.md#)获取最新信息。 +A: MindSpore的`ckpt`和TensorFlow的`ckpt`格式是不通用的,虽然都是使用`Protobuf`协议,但是`proto`的定义是不同的。当前MindSpore不支持读取TensorFlow或PyTorch的`ckpt`文件。
-**Q:针对异构计算单元的支持,MindSpore有什么计划?** +**Q: 用MindSpore训练出的模型如何在Ascend 310上使用?可以转换成适用于HiLens Kit用的吗?** -A:MindSpore提供了可插拔式的设备管理接口,其他计算单元(比如FPGA)可快速灵活地实现与MindSpore的对接,欢迎您参与社区进行异构计算后端的开发工作。 +A: Ascend 310需要运行专用的OM模型,先使用MindSpore导出ONNX或AIR模型,再转化为Ascend 310支持的OM模型。具体可参考[多平台推理](https://www.mindspore.cn/tutorial/inference/zh-CN/master/multi_platform_inference_ascend_310.html)。可以,HiLens Kit是以Ascend 310为推理核心,所以前后两个问题本质上是一样的,需要转换为OM模型.
-**Q:MindSpore与ModelArts是什么关系,在ModelArts中能使用MindSpore吗?** +**Q: MindSpore只能在华为自己的`Ascend`上跑么?** -A:ModelArts是华为公有云线上训练及推理平台,MindSpore是华为深度学习框架,可以查阅[MindSpore官网教程](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/use_on_the_cloud.html),教程中详细展示了用户如何使用ModelArts来做MindSpore的模型训练。 +A: MindSpore同时支持华为自己的`Ascend`、`GPU`与`CPU`,是支持异构算力的。
-**Q:最近出来的taichi编程语言有Python扩展,类似`import taichi as ti`就能直接用了,MindSpore是否也支持?** +**Q: MindSpore在Ascend 310上是否可以转AIR模型?** -A:MindSpore支持Python原生表达,`import mindspore`相关包即可使用。 +A: Ascend 310不能导出AIR,需要在Ascend 910加载训练好的checkpoint后,导出AIR,然后在Ascend 310转成OM模型进行推理。Ascend 910的安装方法可以参考官网MindSpore[安装指南](https://www.mindspore.cn/install)。
-**Q:请问MindSpore支持梯度截断吗?** +**Q: MindSpore对导出、导入模型的单个Tensor输入大小有什么限制?** -A:支持,可以参考[梯度截断的定义和使用](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/nlp/transformer/src/transformer_for_train.py#L35)。 +A: 由于Protobuf的硬件限制,导出AIR、ONNX格式时,模型参数大小不能超过2G;导出MINDIR格式时,单个Tensor大小不能超过2G,MindSpore不支持导入AIR、ONNX格式,只支持MINDIR,导入大小限制与导出一致。
-**Q:MindSpore的IR设计理念是什么?** +**Q: 安装运行MindSpore时,是否要求平台有GPU计算单元?需要什么硬件支持?** -A:函数式:一切皆函数,易于微分实现;无副作用,易于实现自动并行化分析;`JIT`编译能力:图形IR,控制流依赖和数据流合一,平衡通用性/易用性;图灵完备的IR:更多的转换`Python`灵活语法,包括递归等。 +A: MindSpore当前支持CPU/GPU/Ascend。目前笔记本电脑或者有GPU的环境,都可以通过Docker镜像来使用。当前MindSpore Model Zoo中有部分模型已经支持GPU的训练和推理,其他模型也在不断地进行完善。在分布式并行训练方面,MindSpore当前支持GPU多卡训练。你可以通过[RoadMap](https://www.mindspore.cn/doc/note/zh-CN/master/roadmap.html)和项目[Release note](https://gitee.com/mindspore/mindspore/blob/master/RELEASE.md#)获取最新信息。
-**Q:MindSpore并行模型训练的优势和特色有哪些?** +**Q: 针对异构计算单元的支持,MindSpore有什么计划?** -A:MindSpore分布式训练除了支持数据并行,还支持算子级模型并行,可以对算子输入tensor进行切分并行。在此基础上支持自动并行,用户只需要写单卡脚本,就能自动切分到多个节点并行执行。 +A: MindSpore提供了可插拔式的设备管理接口,其他计算单元(比如FPGA)可快速灵活地实现与MindSpore的对接,欢迎您参与社区进行异构计算后端的开发工作。
-**Q:MindSpore在语义协同和处理上是如何实现的?是否利用当前学术界流行的FCA理论?** +**Q: MindSpore与ModelArts是什么关系,在ModelArts中能使用MindSpore吗?** -A:MindSpore框架本身并不需要支持FCA。对于语义类模型,用户可以调用第三方的工具在数据预处理阶段做FCA数学分析。MindSpore本身支持Python语言,`import FCA`相关包即可使用。 +A: ModelArts是华为公有云线上训练及推理平台,MindSpore是华为深度学习框架,可以查阅[MindSpore官网教程](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/use_on_the_cloud.html),教程中详细展示了用户如何使用ModelArts来做MindSpore的模型训练。
-**Q:当前在云上MindSpore的训练和推理功能是比较完备的,至于边端场景(尤其是终端设备)MindSpore有什么计划?** +**Q: 最近出来的taichi编程语言有Python扩展,类似`import taichi as ti`就能直接用了,MindSpore是否也支持?** -A:MindSpore是端边云统一的训练和推理框架,支持将云侧训练的模型导出到Ascend AI处理器和终端设备进行推理。当前推理阶段支持的优化包括量化、算子融合、内存复用等。 +A: MindSpore支持Python原生表达,`import mindspore`相关包即可使用。
-**Q:MindSpore自动并行支持情况如何?** +**Q: 请问MindSpore支持梯度截断吗?** -A:自动并行特性对CPU GPU的支持还在完善中。推荐用户在Ascend 910 AI处理器上使用自动并行,可以关注开源社区,申请MindSpore开发者体验环境进行试用。 +A: 支持,可以参考[梯度截断的定义和使用](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/nlp/transformer/src/transformer_for_train.py#L35)。
-**Q:MindSpore有没有类似基于TensorFlow实现的对象检测算法的模块?** +**Q: MindSpore的IR设计理念是什么?** -A:TensorFlow的对象检测Pipeline接口属于TensorFlow Model模块。待MindSpore检测类模型完备后,会提供类似的Pipeline接口。 +A: 函数式: 一切皆函数,易于微分实现;无副作用,易于实现自动并行化分析。`JIT`编译能力: 图形IR,控制流依赖和数据流合一,平衡通用性/易用性。图形完备的IR: 更多的转换`Python`灵活语法,包括递归等。
-**Q:MindSpore Serving是否支持热更新,避免推理服务中断?** +**Q: MindSpore并行模型训练的优势和特色有哪些?** -A:MindSpore Serving当前不支持热更新,需要用户重启;当前建议跑多个Serving服务,升级模型版本时,重启部分服务以避免服务中断。 +A: MindSpore分布式训练除了支持数据并行,还支持算子级模型并行,可以对算子输入tensor进行切分并行。在此基础上支持自动并行,用户只需要写单卡脚本,就能自动切分到多个节点并行执行。
-**Q:MindSpore Serving是否支持一个模型启动多个Worker,以支持多卡单模型并发?** +**Q: MindSpore在语义协同和处理上是如何实现的?是否利用当前学术界流行的FCA理论?** -A:MindSpore Serving暂未支持分流,即不支持一个模型启动多个Worker,这个功能正在开发中;当前建议跑多个Serving服务,通过对接多个Serving服务的服务器进行分流和负载均衡。另外,为了避免`master`和`worker`之间的消息转发,可以使用接口`start_servable_in_master`使`master`和`worker`执行在同一进程,实现Serving服务轻量级部署。 +A: MindSpore框架本身并不需要支持FCA。对于语义类模型,用户可以调用第三方的工具在数据预处理阶段做FCA数学分析。MindSpore本身支持Python语言,`import FCA`相关包即可使用。
-**Q:MindSpore Serving的版本和MindSpore的版本如何配套?** +**Q: 当前在云上MindSpore的训练和推理功能是比较完备的,至于边端场景(尤其是终端设备)MindSpore有什么计划?** -A:MindSpore Serving配套相同版本号的MindSpore的版本,比如Serving `1.1.1`版本配套 MindSpore `1.1.1`版本。 +A: MindSpore是端边云统一的训练和推理框架,支持将云侧训练的模型导出到Ascend AI处理器和终端设备进行推理。当前推理阶段支持的优化包括量化、算子融合、内存复用等。
-**Q:使用PyNative模式能够进行迁移学习?** +**Q: MindSpore自动并行支持情况如何?** + +A: 自动并行特性对CPU GPU的支持还在完善中。推荐用户在Ascend 910 AI处理器上使用自动并行,可以关注开源社区,申请MindSpore开发者体验环境进行试用。 + +
+ +**Q: MindSpore有没有类似基于TensorFlow实现的对象检测算法的模块?** + +A: TensorFlow的对象检测Pipeline接口属于TensorFlow Model模块。待MindSpore检测类模型完备后,会提供类似的Pipeline接口。 + +
+ +**Q: 使用PyNative模式能够进行迁移学习?** A: PyNative模式是兼容迁移学习的,更多的教程信息,可以参考[预训练模型加载代码详解](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/cv_mobilenetv2_fine_tune.html#id7)。 + +
+ +**Q: MindSpore仓库中的[ModelZoo](https://gitee.com/mindspore/mindspore/tree/master/model_zoo)和昇腾官网的[ModelZoo](https://www.hiascend.com/software/modelzoo)有什么关系?** + +A: MindSpore的ModelZoo主要提供MindSpore框架实现的模型,同时包括了Ascend/GPU/CPU/Mobile多种设备的支持。昇腾的ModelZoo主要提供运行于Ascend加速芯片上的模型,包括了MindSpore/PyTorch/TensorFlow/Caffe等多种框架的支持。可以参考对应的[Gitee仓库](https://gitee.com/ascend/modelzoo) + +其中MindSpore+Ascend的组合是有重合的,这部分模型会以MindSpore的ModelZoo为主要版本,定期向昇腾ModelZoo发布。 diff --git a/docs/faq/source_zh_cn/frontend_compile.md b/docs/faq/source_zh_cn/frontend_compile.md deleted file mode 100644 index 277531b517ed8198c124c4f5e0f2751f71bef1e2..0000000000000000000000000000000000000000 --- a/docs/faq/source_zh_cn/frontend_compile.md +++ /dev/null @@ -1,50 +0,0 @@ -# 前端编译 - -`Linux` `Windows` `Ascend` `GPU` `CPU` `环境准备` `初级` `中级` - - - -**Q:运行时报错“Create python object \`\` failed, only support create Cell or Primitive object.”怎么办?** - -A:当前图模式不支持在网络里构造`Tensor`,即不支持语法`x = Tensor(args...)`。 - -如果是常量`Tensor`,请在`__init__`函数中定义。如果不是常量`Tensor`,可以通过`@constexpr`装饰器修饰函数,在函数里生成`Tensor`。 - -关于`@constexpr`的用法可参考:。 - -对于网络中需要用到的常量`Tensor`,可以作为网络的属性,在`init`的时候定义,即`self.x = Tensor(args...)`,然后在`construct`里使用。 - -如下示例,通过`@constexpr`生成一个`shape = (3, 4), dtype = int64`的`Tensor`。 - -```python -@constexpr -def generate_tensor(): - return Tensor(np.ones((3, 4))) -``` - -
- -**Q:运行时报错“'self.xx' should be defined in the class '__init__' function.”怎么办?** - -A:如果在`construct`函数里,想对类成员`self.xx`赋值,那么`self.xx`必须已经在`__init__`函数中被定义为[`Parameter`]()类型,其他类型则不支持。局部变量`xx`不受这个限制。 - -
- -**Q:运行时报错“This comparator 'AnyValue' is not supported. For statement 'is', only support compare with 'None', 'False' or 'True'”怎么办?** - -A:对于语法`is` 或 `is not`而言,当前`MindSpore`仅支持与`True`、`False`和`None`的比较。暂不支持其他类型,如字符串等。 - -
- -**Q:运行时报错“MindSpore does not support comparison with operators more than one now, ops size =2”怎么办?** - -A:对于比较语句,`MindSpore`最多支持一个操作数。例如不支持语句`1 < x < 3`,请使用`1 < x and x < 3`的方式代替。 - -
- -**Q:运行时报错“TypeError: The function construct need 1 positional argument and 0 default argument, but provided 2”怎么办?** - -A:网络的实例被调用时,会执行`construct`方法,然后会检查`construct`方法需要的参数个数和实际传入的参数个数,如果不一致则会抛出以上异常。 -请检查脚本中调用网络实例时传入的参数个数,和定义的网络中`construct`函数需要的参数个数是否一致。 - -
\ No newline at end of file diff --git a/docs/faq/source_zh_cn/frontend_syntax.md b/docs/faq/source_zh_cn/frontend_syntax.md deleted file mode 100644 index e6600a5f635f14a7e05646af6c53efc4f69f923e..0000000000000000000000000000000000000000 --- a/docs/faq/source_zh_cn/frontend_syntax.md +++ /dev/null @@ -1,121 +0,0 @@ -# 前端语法 - -`Linux` `Windows` `Ascend` `GPU` `CPU` `环境准备` `初级` `中级` - - - -**Q:导出MindIR格式的时候,`input=np.random.uniform(...)`是不是固定格式?** - -A:不是固定格式的,这一步操作是为了创建一个输入,以便于构建网络结构。`export`里只要传入正确的`shape`即可,使用`np.ones`和`np.zeros`创建都是可以的。 - -
- -**Q:MindSpore如何进行参数(如dropout值)修改?** - -A:在构造网络的时候可以通过 `if self.training: x = dropput(x)`,验证的时候,执行前设置`network.set_train(mode_false)`,就可以不适用dropout,训练时设置为True就可以使用dropout。 - -
- -**Q:如何查看模型参数量?** - -A:可以直接加载CheckPoint统计,可能额外统计了动量和optimizer中的变量,需要过滤下相关变量。 -您可以参考如下接口统计网络参数量: - -```python -def count_params(net): - """Count number of parameters in the network - Args: - net (mindspore.nn.Cell): Mindspore network instance - Returns: - total_params (int): Total number of trainable params - """ - total_params = 0 - for param in net.trainable_params(): - total_params += np.prod(param.shape) - return total_params -``` - -具体[脚本链接](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/research/cv/tinynet/src/utils.py)。 - -
- -**Q:如何在训练过程中监控`loss`在最低的时候并保存训练参数?** - -A:可以自定义一个`Callback`。参考`ModelCheckpoint`的写法,此外再增加判断`loss`的逻辑: - -```python -class EarlyStop(Callback): -def __init__(self): - self.loss = None -def step_end(self, run_context): - loss = ****(get current loss) - if (self.loss == None or loss < self.loss): - self.loss = loss - # do save ckpt -``` - -
- -**Q:使用`nn.Conv2d`时,怎样获取期望大小的`feature map`?** - -A:`Conv2d shape`推导方法可以[参考这里](https://www.mindspore.cn/doc/api_python/zh-CN/master/mindspore/nn/mindspore.nn.Conv2d.html#mindspore.nn.Conv2d),`Conv2d`的`pad_mode`改成`same`,或者可以根据`Conv2d shape`推导公式自行计算`pad`,想要使得`shape`不变,一般pad为`(kernel_size-1)//2`。 - -
- -**Q:使用MindSpore可以自定义一个可以返回多个值的loss函数?** - -A:自定义`loss function`后还需自定义`TrainOneStepCell`,实现梯度计算时`sens`的个数和`network`的输出个数相同。具体可参考: - -```python -net = Net() - -loss_fn = MyLoss() - -loss_with_net = MyWithLossCell(net, loss_fn) - -train_net = MyTrainOneStepCell(loss_with_net, optim) - -model = Model(net=train_net, loss_fn=None, optimizer=None) -``` - -
- -**Q:MindSpore如何实现早停功能?** - -A:可以自定义`callback`方法实现早停功能。 -例子:当loss降到一定数值后,停止训练。 - -```python -class EarlyStop(Callback): - def __init__(self, control_loss=1): - super(EarlyStep, self).__init__() - self._control_loss = control_loss - - def step_end(self, run_context): - cb_params = run_context.original_args() - loss = cb_params.net_outputs - if loss.asnumpy() < self._control_loss: - # Stop training - run_context._stop_requested = True - -stop_cb = EarlyStop(control_loss=1) -model.train(epoch_size, ds_train, callbacks=[stop_cb]) -``` - -
- -**Q:模型已经训练好,如何将模型的输出结果保存为文本或者`npy`的格式?** - -A:您好,我们网络的输出为`Tensor`,需要使用`asnumpy()`方法将`Tensor`转换为`numpy`,再进行下一步保存。具体可参考: - -```python -out = net(x) - -np.save("output.npy", out.asnumpy()) -``` - -
- -**Q:我用MindSpore在GPU上训练的网络脚本可以不做修改直接在NPU上进行训练么?** - -A:可以的,MindSpore面向NPU/GPU/CPU提供统一的API,在算子支持的前提下,网络脚本可以不做修改直接跨平台运行。 diff --git a/docs/faq/source_zh_cn/implement_problem.md b/docs/faq/source_zh_cn/implement_problem.md new file mode 100644 index 0000000000000000000000000000000000000000..b9fc94facf8ceddf96e93fa44a5ee38b573bd21c --- /dev/null +++ b/docs/faq/source_zh_cn/implement_problem.md @@ -0,0 +1,526 @@ +# 执行问题 + +`Linux` `Windows` `Ascend` `GPU` `CPU` `环境准备` `初级` `中级` + + + +**Q: MindSpore如何进行参数(如dropout值)修改?** + +A: 在构造网络的时候可以通过 `if self.training: x = dropput(x)`,推理时,执行前设置`network.set_train(mode_false)`,就可以不使用dropout,训练时设置为True就可以使用dropout。 + +
+ +**Q: 如何查看模型参数量?** + +A: 可以直接加载CheckPoint统计,可能额外统计了动量和optimizer中的变量,需要过滤下相关变量。 +您可以参考如下接口统计网络参数量: + +```python +def count_params(net): + """Count number of parameters in the network + Args: + net (mindspore.nn.Cell): Mindspore network instance + Returns: + total_params (int): Total number of trainable params + """ + total_params = 0 + for param in net.trainable_params(): + total_params += np.prod(param.shape) + return total_params +``` + +具体[脚本链接](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/research/cv/tinynet/src/utils.py)。 + +
+ +**Q: 如何在训练过程中监控`loss`在最低的时候并保存训练参数?** + +A: 可以自定义一个`Callback`。参考`ModelCheckpoint`的写法,此外再增加判断`loss`的逻辑: + +```python +class EarlyStop(Callback): + def __init__(self): + self.loss = None + def step_end(self, run_context): + loss = ****(get current loss) + if (self.loss == None or loss < self.loss): + self.loss = loss + # do save ckpt +``` + +
+ +**Q: 使用`nn.Conv2d`时,怎样获取期望大小的`feature map`?** + +A: `Conv2d shape`推导方法可以[参考这里](https://www.mindspore.cn/doc/api_python/zh-CN/master/mindspore/nn/mindspore.nn.Conv2d.html#mindspore.nn.Conv2d),`Conv2d`的`pad_mode`改成`same`,或者可以根据`Conv2d shape`推导公式自行计算`pad`,想要使得`shape`不变,一般pad为`(kernel_size-1)//2`。 + +
+ +**Q: 使用MindSpore可以自定义一个可以返回多个值的loss函数?** + +A: 自定义`loss function`后还需自定义`TrainOneStepCell`,实现梯度计算时`sens`的个数和`network`的输出个数相同。具体可参考: + +```python +net = Net() +loss_fn = MyLoss() +loss_with_net = MyWithLossCell(net, loss_fn) +train_net = MyTrainOneStepCell(loss_with_net, optim) +model = Model(net=train_net, loss_fn=None, optimizer=None) +``` + +
+ +**Q: MindSpore如何实现早停功能?** + +A: 可以自定义`callback`方法实现早停功能。 +例子: 当loss降到一定数值后,停止训练。 + +```python +class EarlyStop(Callback): + def __init__(self, control_loss=1): + super(EarlyStep, self).__init__() + self._control_loss = control_loss + + def step_end(self, run_context): + cb_params = run_context.original_args() + loss = cb_params.net_outputs + if loss.asnumpy() < self._control_loss: + # Stop training + run_context._stop_requested = True + +stop_cb = EarlyStop(control_loss=1) +model.train(epoch_size, ds_train, callbacks=[stop_cb]) +``` + +
+ +**Q: 模型已经训练好,如何将模型的输出结果保存为文本或者`npy`的格式?** + +A: 我们网络的输出为`Tensor`,需要使用`asnumpy()`方法将`Tensor`转换为`numpy`,再进行下一步保存。具体可参考: + +```python +out = net(x) +np.save("output.npy", out.asnumpy()) +``` + +
+ +**Q: 运行时报错“Create python object \`\` failed, only support create Cell or Primitive object.”怎么办?** + +A: 当前在图模式下,`construct`函数(或`@ms_function`装饰器修饰的函数)仅支持构造`Cell`和`Primitive object`,不支持构造`Tensor`,即不支持语法`x = Tensor(args...)`。 + +如果是常量`Tensor`,请在`__init__`函数中定义。如果不是常量`Tensor`,可以通过`@constexpr`装饰器修饰函数,在函数里生成`Tensor`。 + +关于`@constexpr`的用法可参考: 。 + +对于网络中需要用到的常量`Tensor`,可以作为网络的属性,在`init`的时候定义,即`self.x = Tensor(args...)`,然后在`construct`函数(或`@ms_function`装饰器修饰的函数)里使用。 + +如下示例,通过`@constexpr`生成一个`shape = (3, 4), dtype = int64`的`Tensor`。 + +```python +@constexpr +def generate_tensor(): + return Tensor(np.ones((3, 4).astype(np.int64))) +``` + +
+ +**Q: 运行时报错“'self.xx' should be defined in the class '__init__' function.”怎么办?** + +A: 如果在`construct`函数里,想对类成员`self.xx`赋值,那么`self.xx`必须已经在`__init__`函数中被定义为[`Parameter`]()类型,其他类型则不支持。局部变量`xx`不受这个限制。 + +
+ +**Q: 运行时报错“This comparator 'AnyValue' is not supported. For statement 'is', only support compare with 'None', 'False' or 'True'”怎么办?** + +A: 对于语法`is` 或 `is not`而言,当前`MindSpore`仅支持与`True`、`False`和`None`的比较。暂不支持其他类型,如字符串等。 + +
+ +**Q: 运行时报错“MindSpore does not support comparison with operators more than one now, ops size =2”怎么办?** + +A: 对于比较语句,`MindSpore`最多支持一个操作数。例如不支持语句`1 < x < 3`,请使用`1 < x and x < 3`的方式代替。 + +
+ +**Q: 运行时报错“TypeError: The function construct need 1 positional argument and 0 default argument, but provided 2”怎么办?** + +A: 网络的实例被调用时,会执行`construct`方法,然后会检查`construct`方法需要的参数个数和实际传入的参数个数,如果不一致则会抛出以上异常。 +请检查脚本中调用网络实例时传入的参数个数,和定义的网络中`construct`函数需要的参数个数是否一致。 + +
+ +**Q: 运行时报错“Type Join Failed”或“Shape Join Failed”怎么办?** + +A: 在前端编译的推理阶段,会对节点的抽象类型(包含`type`、`shape`等)进行推导,常见抽象类型包括`AbstractScalar`、`AbstractTensor`、`AbstractFunction`、`AbstractTuple`、`AbstractList`等。在一些场景比如多分支场景,会对不同分支返回值的抽象类型进行`join`合并,推导出返回结果的抽象类型。如果抽象类型不匹配,或者`type`/`shape`不一致,则会抛出以上异常。 + +当出现类似“Type Join Failed: dtype1 = Float32, dtype2 = Float16”的报错时,说明数据类型不一致,导致抽象类型合并失败。根据提供的数据类型和代码行信息,可以快速定位出错范围。此外,报错信息中提供了具体的抽象类型信息、节点信息,可以通过`analyze_fail.dat`文件查看MindIR信息,定位解决问题。关于MindIR的具体介绍,可以参考[MindSpore IR(MindIR)](https://www.mindspore.cn/doc/note/zh-CN/master/design/mindspore/mindir.html)。代码样例如下: + +```python +import numpy as np +import mindspore as ms +import mindspore.ops as ops +from mindspore import nn, Tensor, context + +context.set_context(mode=context.GRAPH_MODE) +class Net(nn.Cell): + def __init__(self): + super().__init__() + self.relu = ops.ReLU() + self.cast = ops.Cast() + + def construct(self, x, a, b): + if a > b: + return self.relu(x) + else: + return self.cast(self.relu(x), ms.float16) + +input_x = Tensor(np.random.rand(2, 3, 4, 5).astype(np.float32)) +input_a = Tensor(2, ms.float32) +input_b = Tensor(6, ms.float32) +net = Net() +out_me = net(input_x, input_a, input_b) +``` + +执行结果如下: + +```text +TypeError: The return values of different branches do not match. Type Join Failed: dtype1 = Float32, dtype2 = Float16. The abstract type of the return value of the current branch is AbstractTensor(shape: (2, 3, 4, 5), element: AbstractScalar(Type: Float16, Value: AnyValue, Shape: NoShape), value_ptr: 0x32ed00e0, value: AnyValue), and that of the previous branch is AbstractTensor(shape: (2, 3, 4, 5), element: AbstractScalar(Type: Float32, Value: AnyValue, Shape: NoShape), value_ptr: 0x32ed00e0, value: AnyValue). Please check the node construct.4:[CNode]5{[0]: [CNode]6}, true branch: ✓construct.2, false branch: ✗construct.3. trace: +In file test_type_join_failed.py(14)/ if a > b:/ + +The function call stack (See file 'analyze_fail.dat' for more details): +# 0 In file test_type_join_failed.py(14) + if a > b: +``` + +当出现类似“Shape Join Failed: shape1 = (2, 3, 4, 5), shape2 = ()”的报错时,说明`shape`不一致,导致抽象类型合并失败。代码样例如下: + +```python +import numpy as np +import mindspore as ms +import mindspore.ops as ops +from mindspore import nn, Tensor, context + +context.set_context(mode=context.GRAPH_MODE) +class Net(nn.Cell): + def __init__(self): + super().__init__() + self.relu = ops.ReLU() + self.reducesum = ops.ReduceSum() + + def construct(self, x, a, b): + if a > b: + return self.relu(x) + else: + return self.reducesum(x) + +input_x = Tensor(np.random.rand(2, 3, 4, 5).astype(np.float32)) +input_a = Tensor(2, ms.float32) +input_b = Tensor(6, ms.float32) +net = Net() +out = net(input_x, input_a, input_b) +``` + +执行结果如下: + +```text +ValueError: The return values of different branches do not match. Shape Join Failed: shape1 = (2, 3, 4, 5), shape2 = (). The abstract type of the return value of the current branch is AbstractTensor(shape: (), element: AbstractScalar(Type: Float32, Value: AnyValue, Shape: NoShape), value_ptr: 0x239b5120, value: AnyValue), and that of the previous branch is AbstractTensor(shape: (2, 3, 4, 5), element: AbstractScalar(Type: Float32, Value: AnyValue, Shape: NoShape), value_ptr: 0x239b5120, value: AnyValue). Please check the node construct.4:[CNode]5{[0]: [CNode]6}, true branch: ✓construct.2, false branch: ✗construct.3. trace: +In file test_shape_join_failed.py(14)/ if a > b:/ + +The function call stack (See file 'analyze_fail.dat' for more details): +# 0 In file test_shape_join_failed.py(14) + if a > b: +``` + +当出现如“Type Join Failed: abstract type AbstractTensor can not join with AbstractTuple”的报错时,说明这两种抽象类型无法匹配,需要根据提供的代码行等报错信息,重新检视代码并修改。 + +
+ +**Q: 缓存服务器异常关闭如何处理?** + +A: 缓存服务器使用过程中,会进行IPC共享内存和socket文件等系统资源的分配。若允许溢出,在磁盘空间还会存在溢出的数据文件。一般情况下,如果通过`cache_admin --stop`命令正常关闭服务器,这些资源将会被自动清理。 + +但如果缓存服务器被异常关闭,例如缓存服务进程被杀等,用户需要首先尝试重新启动服务器,若启动失败,则应该依照以下步骤手动清理系统资源: + +- 删除IPC资源。 + + 1. 检查是否有IPC共享内存残留。 + + 一般情况下,系统会为缓存服务分配4GB的共享内存。通过以下命令可以查看系统中的共享内存块使用情况。 + + ```text + $ ipcs -m + ------ Shared Memory Segments -------- + key shmid owner perms bytes nattch status + 0x61020024 15532037 root 666 4294967296 1 + ``` + + 其中,`shmid`为共享内存块id,`bytes`为共享内存块的大小,`nattch`为链接到该共享内存块的进程数量。`nattch`不为0表示仍有进程使用该共享内存块。在删除共享内存前,需要停止使用该内存块的所有进程。 + + 2. 删除IPC共享内存。 + + 找到对应的共享内存id,并通过以下命令删除。 + + ```text + ipcrm -m {shmid} + ``` + +- 删除socket文件。 + + 一般情况下,socket文件位于`/tmp/mindspore/cache`。进入文件夹,执行以下命令删除socket文件。 + + ```text + rm cache_server_p{port_number} + ``` + + 其中`port_number`为用户创建缓存服务器时指定的端口号,默认为50052。 + +- 删除溢出到磁盘空间的数据文件。 + + 进入启用缓存服务器时指定的溢出数据路径。通常,默认溢出路径为`/tmp/mindspore/cache`。找到路径下对应的数据文件夹并逐一删除。 + +
+ +**Q: 通过Hub可以使用GPU加载`vgg16`模型以及是否可以做迁移模型吗?** + +A: 请手动修改如下两处参数即可: + +```python +# 增加**kwargs参数: 如下 +def vgg16(num_classes=1000, args=None, phase="train", **kwargs): +``` + +```python +# 增加**kwargs参数: 如下 +net = Vgg(cfg['16'], num_classes=num_classes, args=args, batch_norm=args.batch_norm, phase=phase, **kwargs) +``` + +
+ +**Q: 如何得到VGG模型中间层特征?** + +A: 你好,获取网络中间层的特征,其实跟具体框架没有太大关系了。`torchvison`里定义的`vgg`模型,可以通过`features`字段获取"中间层特征",`torchvison`的`vgg`源码如下: + +```python +class VGG(nn.Module): + + def __init__(self, features, num_classes=1000, init_weights=True): + super(VGG, self).__init__() + self.features = features + self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) +``` + +在MindSpore的ModelZoo里定义的`vgg16`,可以通过`layers`字段获取,如下: + +```python +network = vgg16() +print(network.layers) +``` + +
+ +**Q: 使用MindSpore进行模型训练时,`CTCLoss`的输入参数有四个: `inputs`, `labels_indices`, `labels_values`, `sequence_length`,如何使用`CTCLoss`进行训练?** + +A: 定义的`model.train`接口里接收的`dataset`可以是多个数据组成,形如(`data1`, `data2`, `data3`, ...),所以`dataset`是可以包含`inputs`,`labels_indices`,`labels_values`,`sequence_length`的信息的。只需要定义好相应形式的`dataset`,传入`model.train`里就可以。具体的可以了解下相应的[数据处理接口](https://www.mindspore.cn/doc/programming_guide/zh-CN/master/dataset_loading.html) + +
+ +**Q: 模型转移时如何把PyTorch的权重加载到MindSpore中?** + +A: 首先输入PyTorch的`pth`文件,以`ResNet-18`为例,MindSpore的网络结构和PyTorch保持一致,转完之后可直接加载进网络,这边参数只用到`BN`和`Conv2D`,若有其他层`ms`和PyTorch名称不一致,需要同样的修改名称。 + +
+ +**Q: MindSpore有哪些现成的推荐类或生成类网络或模型可用?** + +A: 目前正在开发Wide & Deep、DeepFM、NCF等推荐类模型,NLP领域已经支持Bert_NEZHA,正在开发MASS等模型,用户可根据场景需要改造为生成类网络,可以关注[MindSpore Model Zoo](https://gitee.com/mindspore/mindspore/tree/master/model_zoo)。 + +
+ +**Q: 如何使用MindSpore拟合$f(x)=a \times sin(x)+b$这类函数?** + +A: 以下拟合案例是基于MindSpore线性拟合官方案例改编而成。 + +```python +# The fitting function is: f(x)=2*sin(x)+3. +import numpy as np +from mindspore import dataset as ds +from mindspore.common.initializer import Normal +from mindspore import nn, Model, context +from mindspore.train.callback import LossMonitor + +context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + + def get_data(num, w=2.0, b=3.0): + # f(x)=w * sin(x) + b + # f(x)=2 * sin(x) +3 + for i in range(num): + x = np.random.uniform(-np.pi, np.pi) + noise = np.random.normal(0, 1) + y = w * np.sin(x) + b + noise + yield np.array([np.sin(x)]).astype(np.float32), np.array([y]).astype(np.float32) + +def create_dataset(num_data, batch_size=16, repeat_size=1): + input_data = ds.GeneratorDataset(list(get_data(num_data)), column_names=['data','label']) + input_data = input_data.batch(batch_size) + input_data = input_data.repeat(repeat_size) + return input_data + +class LinearNet(nn.Cell): + def __init__(self): + super(LinearNet, self).__init__() + self.fc = nn.Dense(1, 1, Normal(0.02), Normal(0.02)) + + def construct(self, x): + x = self.fc(x) + return x + +if __name__ == "__main__": + num_data = 1600 + batch_size = 16 + repeat_size = 1 + lr = 0.005 + momentum = 0.9 + + net = LinearNet() + net_loss = nn.loss.MSELoss() + opt = nn.Momentum(net.trainable_params(), lr, momentum) + model = Model(net, net_loss, opt) + + ds_train = create_dataset(num_data, batch_size=batch_size, repeat_size=repeat_size) + + model.train(1, ds_train, callbacks=LossMonitor(), dataset_sink_mode=False) + + print(net.trainable_params()[0], "\n%s" % net.trainable_params()[1]) +``` + +
+ +**Q: 如何使用MindSpore拟合$f(x)=ax^2+bx+c$这类的二次函数?** + +A: 以下代码引用自MindSpore的官方教程的[代码仓](https://gitee.com/mindspore/docs/blob/master/tutorials/tutorial_code/linear_regression.py) + +在以下几处修改即可很好的拟合$f(x)=ax^2+bx+c$: + +1. 数据集生成。 +2. 拟合网络。 +3. 优化器。 + +修改的详细信息如下,附带解释。 + +```python +# Since the selected optimizer does not support CPU, so the training computing platform is changed to GPU, which requires readers to install the corresponding GPU version of MindSpore. +context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + +# Assuming that the function to be fitted this time is f(x)=2x^2+3x+4, the data generation function is modified as follows: +def get_data(num, a=2.0, b=3.0 ,c = 4): + for i in range(num): + x = np.random.uniform(-10.0, 10.0) + noise = np.random.normal(0, 1) + # The y value is generated by the fitting target function ax^2+bx+c. + y = x * x * a + x * b + c + noise + # When a*x^2+b*x+c is fitted, a and b are weight parameters and c is offset parameter bias. The training data corresponding to the two weights are x^2 and x respectively, so the data set generation mode is changed as follows: + yield np.array([x*x, x]).astype(np.float32), np.array([y]).astype(np.float32) + +def create_dataset(num_data, batch_size=16, repeat_size=1): + input_data = ds.GeneratorDataset(list(get_data(num_data)), column_names=['data','label']) + input_data = input_data.batch(batch_size) + input_data = input_data.repeat(repeat_size) + return input_data + +class LinearNet(nn.Cell): + def __init__(self): + super(LinearNet, self).__init__() + # Because the full join function inputs two training parameters, the input value is changed to 2, the first Nomral(0.02) will automatically assign random weights to the input two parameters, and the second Normal is the random bias. + self.fc = nn.Dense(2, 1, Normal(0.02), Normal(0.02)) + + def construct(self, x): + x = self.fc(x) + return x + +if __name__ == "__main__": + num_data = 1600 + batch_size = 16 + repeat_size = 1 + lr = 0.005 + momentum = 0.9 + + net = LinearNet() + net_loss = nn.loss.MSELoss() + # RMSProp optimalizer with better effect is selected for quadratic function fitting, Currently, Ascend and GPU computing platforms are supported. + opt = nn.RMSProp(net.trainable_params(), learning_rate=0.1) + model = Model(net, net_loss, opt) + + ds_train = create_dataset(num_data, batch_size=batch_size, repeat_size=repeat_size) + model.train(1, ds_train, callbacks=LossMonitor(), dataset_sink_mode=False) + + print(net.trainable_params()[0], "\n%s" % net.trainable_params()[1]) +``` + +
+ +**Q: `mindspore/tests`下怎样执行单个`ut`用例?** + +A: `ut`用例通常需要基于debug版本的MindSpore包,官网并没有提供。可以基于源码使用`sh build.sh`编译,然后通过`pytest`指令执行,debug模式编包不依赖后端。编译选项`sh build.sh -t on`,用例执行可以参考`tests/runtest.sh`脚本。 + +
+ +**Q: 在Ascend平台上,执行用例有时候会报错`run task error`,如何获取更详细的日志帮助问题定位?** + +A: 使用msnpureport工具设置device侧日志级别,工具位置在: `/usr/local/Ascend/driver/tools/msnpureport`。 + +- 全局级别: + +```bash +/usr/local/Ascend/driver/tools/msnpureport -g info +``` + +- 模块级别: + +```bash +/usr/local/Ascend/driver/tools/msnpureport -m SLOG:error +```` + +- Event级别: + +```bash +/usr/local/Ascend/driver/tools/msnpureport -e disable/enable +``` + +- 多device id级别: + +```bash +/usr/local/Ascend/driver/tools/msnpureport -d 1 -g warning +``` + +假设deviceID的取值范围是[0-7],`device0`-`device3`和`device4`-`device7`分别在一个os上。其中`device0`-`device3`共用一个日志配置文件;`device4`-`device7`共用一个配置文件。如果修改了`device0`-`device3`中的任意一个日志级别,其他`device`的日志级别也会被修改。如果修改了`device4`-`device7`中的任意一个日志级别,其他device的日志级别也会被修改。 + +`Driver`包安装以后(假设安装路径为/usr/local/HiAI,在Windows环境下,`msnpureport.exe`执行文件在C:\ProgramFiles\Huawei\Ascend\Driver\tools\目录下),假设用户在/home/shihangbo/目录下直接执行命令行,则Device侧日志被导出到当前目录下,并以时间戳命名文件夹进行存放。 + +
+ +**Q: 使用Ascend平台执行训练过程,出现报错: `Out of Memory!!! total[3212254720] (dynamic[0] memory poll[524288000]) malloc[32611480064] failed!` 如何解决?** + +A: 此问题属于内存占用过多导致的内存不够问题,可能原因有两种: + +- `batch_size`的值设置过大。解决办法: 将`batch_size`的值设置减小。 +- 引入了异常大的`Parameter`,例如单个数据shape为[640,1024,80,81],数据类型为float32,单个数据大小超过15G,这样差不多大小的两个数据相加时,占用内存超过3*15G,容易造成`Out of Memory`。解决办法: 检查参数的`shape`,如果异常过大,减少shape。 +- 如果以上操作还是未能解决,可以上[官方论坛](https://bbs.huaweicloud.com/forum/forum-1076-1.html)发帖提出问题,将会有专门的技术人员帮助解决。 + +
+ +**Q: 如何在训练神经网络过程中对计算损失的超参数进行改变?** + +A: 您好,很抱歉暂时还未有这样的功能。目前只能通过训练-->重新定义优化器-->训练,这样的过程寻找较优的超参数。 + +
+ +**Q: 运行应用时报错`error while loading shared libraries: libge_compiler.so: cannot open shared object file: No such file or directory`怎么办?** + +A: 安装MindSpore所依赖的Ascend 310 AI处理器配套软件包时,`CANN`包不能安装`nnrt`版本,而是需要安装功能完整的`toolkit`版本。 + +
+ +**Q: MindSpore代码里面的model_zoo/official/cv/resnet/train.py中context.set_ps_context(enable_ps=True)为什么一定要在init之前设置** + +A: MindSpore Ascend模式下,如果先调用init,那么会为所有的进程都分配卡,但是parameter server训练模式下server是不需要分配卡的,那么worker和server就会去使用同一块卡,导致会报错: Hccl dependent tsd is not open。 diff --git a/docs/faq/source_zh_cn/index.rst b/docs/faq/source_zh_cn/index.rst index 0f121bd484918440bd536f99ba0cb0e78d4f3928..b59032531f5cd8bd93062f7e43a615229f204927 100644 --- a/docs/faq/source_zh_cn/index.rst +++ b/docs/faq/source_zh_cn/index.rst @@ -11,13 +11,10 @@ MindSpore FAQ :maxdepth: 1 installation - frontend_syntax - frontend_compile - backend_compile - operators_compile data_processing - script_implement + implement_problem training_visualization + operators_compile usage_migrate_3rd performance_tuning precision_tuning diff --git a/docs/faq/source_zh_cn/inference.md b/docs/faq/source_zh_cn/inference.md index 9033e7c95109202c8e42909b30b38d653b9b39b8..ccd5a6cb263d4f95ba46e99cdfd56855042a8abe 100644 --- a/docs/faq/source_zh_cn/inference.md +++ b/docs/faq/source_zh_cn/inference.md @@ -14,62 +14,70 @@ ## C++接口使用类 -**Q:编译应用时报错`/usr/bin/ld: warning: libxxx.so, needed by libmindspore.so, not found`怎么办?** +**Q: 编译应用时报错`/usr/bin/ld: warning: libxxx.so, needed by libmindspore.so, not found`怎么办?** -A:寻找缺少的动态库文件所在目录,添加该路径到环境变量`LD_LIBRARY_PATH`中,环境变量设置参考[Ascend 310 AI处理器上使用MindIR模型进行推理#编译推理代码](https://www.mindspore.cn/tutorial/inference/zh-CN/master/multi_platform_inference_ascend_310_mindir.html#id6)。 +A: 寻找缺少的动态库文件所在目录,添加该路径到环境变量`LD_LIBRARY_PATH`中,环境变量设置参考[Ascend 310 AI处理器上使用MindIR模型进行推理#编译推理代码](https://www.mindspore.cn/tutorial/inference/zh-CN/master/multi_platform_inference_ascend_310_mindir.html#id6)。
-**Q:运行应用时出现`ModuleNotFoundError: No module named 'te'`怎么办?** +**Q: 运行应用时出现`ModuleNotFoundError: No module named 'te'`怎么办?** -A:首先确认环境安装是否正确,`te`、`topi`等whl包是否正确安装。如果用户环境中有多个Python版本,如Conda虚拟环境中,需`ldd name_of_your_executable_app`确认应用所链接的`libpython3.7m.so.1.0`是否与当前Python路径一致,如果不一致需要调整环境变量`LD_LIBRARY_PATH`顺序。 +A: 首先确认环境安装是否正确,`te`、`topi`等whl包是否正确安装。如果用户环境中有多个Python版本,如Conda虚拟环境中,需`ldd name_of_your_executable_app`确认应用所链接的`libpython3.7m.so.1.0`是否与当前Python路径一致,如果不一致需要调整环境变量`LD_LIBRARY_PATH`顺序。
-**Q:运行应用时报错`error while loading shared libraries: libge_compiler.so: cannot open shared object file: No such file or directory`怎么办?** +**Q: 运行应用时报错`error while loading shared libraries: libge_compiler.so: cannot open shared object file: No such file or directory`怎么办?** -A:安装MindSpore所依赖的Ascend 310 AI处理器配套软件包时,`CANN`包不能安装`nnrt`版本,而是需要安装功能完整的`toolkit`版本。 +A: 安装MindSpore所依赖的Ascend 310 AI处理器配套软件包时,`CANN`包不能安装`nnrt`版本,而是需要安装功能完整的`toolkit`版本。 ## MindSpore Serving类 -**Q:MindSpore Serving是否支持热更新,避免推理服务中断?** +**Q: MindSpore Serving是否支持热更新,避免推理服务中断?** -A:MindSpore Serving当前不支持热更新,需要用户重启;当前建议跑多个Serving服务,升级模型版本时,重启部分服务以避免服务中断。 +A: MindSpore Serving当前不支持热更新,需要用户重启;当前建议跑多个Serving服务,升级模型版本时,重启部分服务以避免服务中断。
-**Q:MindSpore Serving是否支持一个模型启动多个Worker,以支持多卡单模型并发?** +**Q: MindSpore Serving是否支持一个模型启动多个Worker,以支持多卡单模型并发?** -A:MindSpore Serving暂未支持分流,即不支持一个模型启动多个Worker,这个功能正在开发中;当前建议跑多个Serving服务,通过对接多个Serving服务的服务器进行分流和负载均衡。另外,为了避免`master`和`worker`之间的消息转发,可以使用接口`start_servable_in_master`使`master`和`worker`执行在同一进程,实现Serving服务轻量级部署。 +A: MindSpore Serving1.3版本后支持一个模型在多卡部署多个副本,实现多卡单模型并发执行。详细可以参考[Add样例](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/serving_server.py)。
-**Q:MindSpore Serving的版本和MindSpore的版本如何配套?** +**Q: MindSpore Serving的版本和MindSpore的版本如何配套?** -A:MindSpore Serving配套相同版本号的MindSpore的版本,比如Serving `1.1.1`版本配套 MindSpore `1.1.1`版本。 +A: MindSpore Serving配套相同版本号的MindSpore的版本,比如Serving `1.1.1`版本配套 MindSpore `1.1.1`版本。
-**Q:编译应用时报错`bash -p`方式和 `bash -e`方式的区别?** +**Q: 编译时使用`bash -p`方式和 `bash -e`方式有什么区别?** -A:MindSpore Serving的编译和运行依赖MindSpore,Serving提供两种编译方式:一种指定已安装的MindSpore路径,即`bash -p {python site-packages}/mindspore/lib`,避免编译Serving时再编译MindSpore;另一种,编译Serving时,编译配套的MindSpore,Serving会将`-e`、`-V`和`-j`选项透传给MindSpore。 -比如,在Serving目录下,`bash -e ascend -V 910 -j32`: +A: MindSpore Serving的编译和运行依赖MindSpore,Serving提供两种编译方式: 一种指定已安装的MindSpore路径,即`bash -p {python site-packages}/mindspore/lib`,避免编译Serving时再编译MindSpore;另一种,编译Serving时,编译配套的MindSpore,Serving会将`-e`、`-V`和`-j`选项透传给MindSpore。 +比如,在Serving目录下,`bash -e ascend -V 910 -j32`: - 首先将会以`bash -e ascend -V 910 -j32`方式编译`third_party/mindspore`目录下的MindSpore; - 其次,编译脚本将MindSpore编译结果作为Serving的编译依赖。
-**Q:运行应用时报错`libmindspore.so: cannot open shared object file: No such file or directory`怎么办?** +**Q: 运行应用时报错`libmindspore.so: cannot open shared object file: No such file or directory`怎么办?** -A:首先,需要确认是否安装MindSpore Serving所依赖的MindSpore;其次,Serving 1.1需要配置`LD_LIBRARY_PATH`,显式指定`libmindspore.so`所在路径,`libmindspore.so`当前在MindSpore Python安装路径的`lib`目录下;Serving 1.2后不再需要显示指定`libmindspore.so`所在路径,Serving会基于MindSpore安装路径查找并追加配置`LD_LIBRARY_PATH`,用户不再需要感知。 +A: 首先,需要确认是否安装MindSpore Serving所依赖的MindSpore;其次,Serving 1.1需要配置`LD_LIBRARY_PATH`,显式指定`libmindspore.so`所在路径,`libmindspore.so`当前在MindSpore Python安装路径的`lib`目录下;Serving 1.2后不再需要显示指定`libmindspore.so`所在路径,Serving会基于MindSpore安装路径查找并追加配置`LD_LIBRARY_PATH`,用户不再需要感知。 -**Q:如何控制Serving日志输出?** +
+ +**Q: 如何控制Serving日志输出?** -A:MindSpore Serving采用glog来输出日志,详细可参考[日志相关的环境变量和配置](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/custom_debugging_info.html?highlight=GLOG#id11),在此基础上,额外补充的内容: +A: MindSpore Serving采用glog来输出日志,详细可参考[日志相关的环境变量和配置](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/custom_debugging_info.html?highlight=GLOG#id11),在此基础上,额外补充的内容: - MS_SUBMODULE_LOG_v 该环境变量除了指定MindSpore C++各子模块日志级别,也可用于控制MindSpore Serving的日志级别。 可以通过GLOG_v=2 MS_SUBMODULE_LOG_v="{SERVING:1}"把Serving模块的日志级别设为INFO,其他模块的日志级别设为WARNING。 + +**Q: 运行应用时报错`libmindspore.so: cannot open shared object file: No such file or directory`怎么办?** + +
+ +A: 首先,需要确认是否安装MindSpore Serving所依赖的MindSpore;其次,Serving 1.1需要配置`LD_LIBRARY_PATH`,显式指定`libmindspore.so`所在路径,`libmindspore.so`当前在MindSpore Python安装路径的`lib`目录下;Serving 1.2后不再需要显示指定`libmindspore.so`所在路径,Serving会基于MindSpore安装路径查找并追加配置`LD_LIBRARY_PATH`,用户不再需要感知。 diff --git a/docs/faq/source_zh_cn/installation.md b/docs/faq/source_zh_cn/installation.md index e40a8b7ac8649aaddb656cbfde7e966de4a31599..eb125f04f9042028404cb9e2d1cef4298d66d2e2 100644 --- a/docs/faq/source_zh_cn/installation.md +++ b/docs/faq/source_zh_cn/installation.md @@ -17,15 +17,15 @@ ## pip安装 -**Q:安装MindSpore版本:GPU、CUDA 10.1、0.5.0-beta、Ubuntu-x86,出现问题:`cannot open shared object file:file such file or directory`。** +**Q: 安装MindSpore版本: GPU、CUDA 10.1、0.5.0-beta,出现问题: `cannot open shared object file:No such file or directory`。** -A:从报错情况来看,是cublas库没有找到。一般的情况下是cublas库没有安装,或者是因为没有加入到环境变量中去。通常cublas是随着cuda以及驱动一起安装的,确认安装后把cublas所在的目录加入`LD_LIBRARY_PATH`环境变量中即可。 +A: 从报错情况来看,是cuBLAS库没有找到。一般的情况下是cuBLAS库没有安装,或者是因为没有加入到环境变量中去。通常cuBLAS是随着CUDA以及驱动一起安装的,确认安装后把cuBLAS所在的目录加入`LD_LIBRARY_PATH`环境变量中即可。
-**Q:使用pip安装时报错:`ERROR: mindspore_{VERSION}.whl is not a supported wheel on this platform`应该怎么办?** +**Q: 使用pip安装时报错: `ERROR: mindspore_{VERSION}.whl is not a supported wheel on this platform`应该怎么办?** -A:pip会通过wheel安装包的文件名来判断该安装包是否与当前Python环境兼容,例如安装mindspore_ascend-1.2.0-cp37-cp37m-linux_aarch64.whl时,pip会检查: +A: pip会通过wheel安装包的文件名来判断该安装包是否与当前Python环境兼容,例如安装mindspore_ascend-1.2.0-cp37-cp37m-linux_aarch64.whl时,pip会检查: 1. 当前python环境为3.7.x版本 2. 当前操作系统为Linux @@ -35,58 +35,58 @@ A:pip会通过wheel安装包的文件名来判断该安装包是否与当前Py
-**Q:使用pip安装时报错:`SSL:CERTIFICATE_VERIFY_FATLED`应该怎么办?** +**Q: 使用pip安装时报错: `SSL:CERTIFICATE_VERIFY_FATLED`应该怎么办?** -A:在pip安装命令后添加参数 `--trusted-host=ms-release.obs.cn-north-4.myhuaweicloud.com`重试即可。 +A: 在pip安装命令后添加参数 `--trusted-host=ms-release.obs.cn-north-4.myhuaweicloud.com`重试即可。
-**Q:pip安装MindSpore对Python版本是否有特别要求?** +**Q: pip安装MindSpore对Python版本是否有特别要求?** -A:MindSpore开发过程中用到了Python3.7+的新特性,因此建议您通过`conda`工具添加Python3.7.5的开发环境。 +A: MindSpore开发过程中用到了Python3.7+的新特性,因此建议您通过`conda`工具添加Python3.7.5的开发环境。
-**Q:MindSpore对protobuf版本是否有特别要求?** +**Q: MindSpore对Protobuf版本是否有特别要求?** -A:MindSpore默认安装protobuf的3.8.0版本,如果您本地已安装protobuf的3.12.0或更高版本,在使用pytest测试代码时日志中会产生很多告警,建议您使用命令`pip install protobuf==3.8.0`重新安装3.8.0版本。 +A: MindSpore默认安装Protobuf的3.13.0版本,如果不是该版本,在使用pytest测试代码时日志中会产生很多告警,建议您使用命令`pip install protobuf==3.13.0`重新安装3.13.0版本。
-**Q:使用pip安装时报错`ProxyError(Cannot connect to proxy)`,应该怎么办?** +**Q: 使用pip安装时报错`ProxyError(Cannot connect to proxy)`,应该怎么办?** -A:此问题一般是代理配置问题,Ubuntu环境下可通过`export http_proxy={your_proxy}`设置代理;Windows环境可以在cmd中通过`set http_proxy={your_proxy}`进行代理设置。 +A: 此问题一般是代理配置问题,Ubuntu环境下可通过`export http_proxy={your_proxy}`设置代理;Windows环境可以在cmd中通过`set http_proxy={your_proxy}`进行代理设置。
-**Q:使用pip安装时提示错误,应该怎么办?** +**Q: 使用pip安装时提示错误,应该怎么办?** -A:请执行`pip -V`查看是否绑定了Python3.7+。如果绑定的版本不对,建议使用`python3.7 -m pip install`代替`pip install`命令。 +A: 请执行`pip -V`查看是否绑定了Python3.7+。如果绑定的版本不对,建议使用`python3.7 -m pip install`代替`pip install`命令。
-**Q:使用pip安装依赖库时提示`No matching distribution found for XXX`错误,应该怎么办?** +**Q: 使用pip安装依赖库时提示`No matching distribution found for XXX`错误,应该怎么办?** -A:请执行`pip config list`,查看当前软件库索引路径`index-url`。某些情况下,软件库索引会出现更新滞后,可尝试设置其它软件库索引路径。 +A: 请执行`pip config list`,查看当前软件库索引路径`index-url`。某些情况下,软件库索引会出现更新滞后,可尝试设置其它软件库索引路径。
-**Q:MindSpore网站安装页面找不到MindInsight和MindArmour的whl包,无法安装怎么办?** +**Q: MindSpore网站安装页面找不到MindInsight和MindArmour的whl包,无法安装怎么办?** -A:您可以从[MindSpore网站下载地址](https://www.mindspore.cn/versions)下载whl包,通过`pip install`命令进行安装。 +A: 您可以从[MindSpore网站下载地址](https://www.mindspore.cn/versions)下载whl包,通过`pip install`命令进行安装。
-**Q:MindSpore是否支持Nvidia GPU独立显卡+Windows操作系统的个人电脑?** +**Q: MindSpore是否支持Nvidia GPU独立显卡+Windows操作系统的个人电脑?** -A:目前MindSpore支持的情况是GPU+Linux与CPU+Windows的组合配置,Windows+GPU的支持还在开发中。 -如果希望在GPU+Windows的环境上运行,可以尝试使用WSL+docker的方式,操作思路: +A: 目前MindSpore支持的情况是GPU+Linux与CPU+Windows的组合配置,Windows+GPU的支持还在开发中。 +如果希望在GPU+Windows的环境上运行,可以尝试使用WSL+docker的方式,操作思路: 1. 以WSL方式安装起Ubuntu18.04,参考。 2. 安装支持WSL的Nvidia驱动以及在WSL运行容器的环境部署,参考。 > 由于CUDA on WSL还是预览特性,注意参考链接里对Windows版本要求的说明,版本不够的需要做升级。 -3. 参考,取MindSpore-GPU镜像。如取MindSpore1.0.0版本容器,在WSL Ubuntu18.04中执行`docker pull mindspore/mindspore-gpu:1.0.0`运行容器: +3. 参考,取MindSpore-GPU镜像。如取MindSpore1.0.0版本容器,在WSL Ubuntu18.04中执行`docker pull mindspore/mindspore-gpu:1.0.0`运行容器: ```docker docker run -it --runtime=nvidia mindspore/mindspore-gpu:1.0.0 /bin/bash @@ -97,71 +97,71 @@ A:目前MindSpore支持的情况是GPU+Linux与CPU+Windows的组合配置,Wi
-**Q:Ascend硬件平台,在个人的Conda环境中,有时候出现报错RuntimeError: json.exception.parse_error.101 parse error at line 1, column 1: syntax error while parsing value - invalid literal; last read: 'T',该怎么处理?** +**Q: Ascend硬件平台,在个人的Conda环境中,有时候出现报错RuntimeError: json.exception.parse_error.101 parse error at line 1, column 1: syntax error while parsing value - invalid literal; last read: 'T',该怎么处理?** -A:出现这种类型的报错,大概率是run包更新后个人的Conda环境中没有更新te或topi或hccl工具包,可以将当前Conda环境中的上述几个工具包卸载,然后使用如下命令再重新安装:`pip install /usr/local/Ascend/fwkacllib/lib64/{te/topi/hccl}*any.whl`。 +A: 出现这种类型的报错,大概率是run包更新后个人的Conda环境中没有更新te或topi或hccl工具包,可以将当前Conda环境中的上述几个工具包卸载,然后使用如下命令再重新安装: `pip install /usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/{te/topi/hccl}-{version}-py3-none-any.whl`。
## 源码编译安装 -**Q:在Linux中已经安装了交叉编译工具,但是编译命令要怎么写呢?** +**Q: 在Linux中已经安装了交叉编译工具,但是编译命令要怎么写呢?** -A:arm64版本编译:`bash build.sh -I arm64`;arm32版本编译:`bash build.sh -I arm32`;注意要先设置环境变量,指定Android NDK路径:`export ANDROID_NDK=/path/to/android-ndk`,编译成功后,在output目录可以找到编译出的包。 +A: arm64版本编译: `bash build.sh -I arm64`;arm32版本编译: `bash build.sh -I arm32`;注意要先设置环境变量,指定Android NDK路径: `export ANDROID_NDK=/path/to/android-ndk`,编译成功后,在output目录可以找到编译出的包。
-**Q:MindSpore安装:版本0.6.0-beta + Ascend 910 + Ubuntu_aarch64 + Python3.7.5,手动下载对应版本的whl包,编译并安装gmp6.1.2。其他Python库依赖已经安装完成,执行样例失败,报错显示找不到so文件。** +**Q: MindSpore安装: 版本0.6.0-beta + Ascend 910 + Ubuntu_aarch64 + Python3.7.5,手动下载对应版本的whl包,编译并安装gmp6.1.2。其他Python库依赖已经安装完成,执行样例失败,报错显示找不到so文件。** -A:`libdatatransfer.so`动态库是`fwkacllib/lib64`目录下的,请先在`/usr/local`目录find到这个库所在的路径,然后把这个路径加到`LD_LIBRARY_PATH`环境变量中,确认设置生效后,再执行。 +A: `libdatatransfer.so`动态库是`fwkacllib/lib64`目录下的,请先在`/usr/local`目录查找到这个库所在的路径,然后把这个路径加到`LD_LIBRARY_PATH`环境变量中,确认设置生效后,再执行。
-**Q:源码编译MindSpore过程时间过长,或时常中断该怎么办?** +**Q: 源码编译MindSpore过程时间过长,或时常中断该怎么办?** -A:MindSpore通过submodule机制引入第三方依赖包,其中`protobuf`依赖包(v3.8.0)下载速度不稳定,建议您提前进行包缓存。 +A: MindSpore通过submodule机制引入第三方依赖包,其中`Protobuf`依赖包(v3.8.0)下载速度不稳定,建议您提前进行包缓存。
-**Q:如何改变第三方依赖库安装路径?** +**Q: 如何改变第三方依赖库安装路径?** -A:第三方依赖库的包默认安装在build/mindspore/.mslib目录下,可以设置环境变量MSLIBS_CACHE_PATH来改变安装目录,比如 `export MSLIBS_CACHE_PATH = ~/.mslib`。 +A: 第三方依赖库的包默认安装在build/mindspore/.mslib目录下,可以设置环境变量MSLIBS_CACHE_PATH来改变安装目录,比如 `export MSLIBS_CACHE_PATH = ~/.mslib`。
-**Q:MindSpore要求的配套软件版本与Ubuntu默认版本不一致怎么办?** +**Q: MindSpore要求的配套软件版本与Ubuntu默认版本不一致怎么办?** -A:当前MindSpore只提供版本配套关系,需要您手动进行配套软件的安装升级。(**注明**:MindSpore要求Python3.7.5和gcc7.3,Ubuntu 16.04默认为Python3.5和gcc5,Ubuntu 18.04默认自带Python3.7.3和gcc7.4)。 +A: 当前MindSpore只提供版本配套关系,需要您手动进行配套软件的安装升级。(**注明**: MindSpore要求Python3.7.5和gcc7.3,Ubuntu 16.04默认为Python3.5和gcc5,Ubuntu 18.04默认自带Python3.7.3和gcc7.4)。
-**Q:当源码编译MindSpore,提示`tclsh not found`时,应该怎么办?** +**Q: 当源码编译MindSpore,提示`tclsh not found`时,应该怎么办?** -A:当有此提示时说明要用户安装`tclsh`;如果仍提示缺少其他软件,同样需要安装其他软件。 +A: 当有此提示时说明要用户安装`tclsh`;如果仍提示缺少其他软件,同样需要安装其他软件。
-**Q:执行用例报错`No module named 'mindpore.version'`,应该怎么办?** +**Q: 执行用例报错`No module named 'mindpore.version'`,应该怎么办?** A: 当有这种报错时,有可能是在创建了和MindSpore安装包相同名字的路径中执行用例,导致Python导入包的时候优先找到了当前目录下,而当前目录没有version.py这个文件。解决方法就是目录重命名或者向上退出一级或者多级目录。
-**Q:源码编译时,报错`MD5 does not match`,应该怎么办?** +**Q: 源码编译时,报错`MD5 does not match`,应该怎么办?** -A: 这种报错可能是在编译的时候由于网络问题导致一些第三方库下载中断,之后重新编译的时候,该文件已经存在但是不完整,在校验MD5的时候失败。解决方法是:删除.mslib缓存路径中的相关第三方库,然后重新编译。 +A: 这种报错可能是在编译的时候由于网络问题导致一些第三方库下载中断,之后重新编译的时候,该文件已经存在但是不完整,在校验MD5的时候失败。解决方法是: 删除.mslib缓存路径中的相关第三方库,然后重新编译。
-**Q:环境上安装了Python3.7.5,环境变量设置正确,编译MindSpore时仍然报错`Python3 not found`,应该怎么办?** +**Q: 环境上安装了Python3.7.5,环境变量设置正确,编译MindSpore时仍然报错`Python3 not found`,应该怎么办?** -A:可能是因为当前环境上的Python未包含动态库。编译MindSpore需要动态链接Python库,因此需要使用开启动态库编译选项的Python3.7.5,即在源码编译Python时使用`./configure --enable-shared`命令。 +A: 可能是因为当前环境上的Python未包含动态库。编译MindSpore需要动态链接Python库,因此需要使用开启动态库编译选项的Python3.7.5,即在源码编译Python时使用`./configure --enable-shared`命令。
-**Q:编译失败后,应该清理哪些路径以确保上次失败的编译结果不会影响到下一次编译?** +**Q: 编译失败后,应该清理哪些路径以确保上次失败的编译结果不会影响到下一次编译?** -A:在编译MindSpore时,如果: +A: 在编译MindSpore时,如果: 1. 第三方组件下载或编译失败,例如icu4c的patch动作失败返回错误信息`Cmake Error at cmake/utils.cmake:301 (message): Failed patch:`,则进入编译目录下的`build/mindspore/.mslib`目录,或由`MSLIBS_CACHE_PATH`环境变量指定的第三方软件安装目录,并删除其中的对应软件。 @@ -169,15 +169,15 @@ A:在编译MindSpore时,如果:
-**Q:编译时报错,打开CMakeError.txt提示pthread找不到怎么办?** +**Q: 编译时报错,打开CMakeError.txt提示pthread找不到怎么办?** -A:真正的失败原因会体现在打屏的日志里,CMakeError.txt无参考价值,请寻找打屏日志中的第一个报错。 +A: 真正的失败原因会体现在打屏的日志里,CMakeError.txt无参考价值,请寻找打屏日志中的第一个报错。
-**Q:编译成功后,运行时报错`undefined reference to XXXX`或`undefined symbol to XXXX`怎么办?** +**Q: 编译成功后,运行时报错`undefined reference to XXXX`或`undefined symbol XXXX`怎么办?** -A:可能的原因有: +A: 可能的原因有: 1. 如果问题是`git pull`更新代码后出现,请删除掉`build`文件夹,排除前次构建的影响。 @@ -187,45 +187,55 @@ A:可能的原因有:
+**Q: 编译应用时报错`bash -p`方式和 `bash -e`方式的区别?** + +A: MindSpore Serving的编译和运行依赖MindSpore,Serving提供两种编译方式: 一种指定已安装的MindSpore路径,即`bash -p {python site-packages}/mindspore/lib`,避免编译Serving时再编译MindSpore;另一种,编译Serving时,编译配套的MindSpore,Serving会将`-e`、`-V`和`-j`选项透传给MindSpore。 +比如,在Serving目录下,`bash -e ascend -V 910 -j32`: + +- 首先将会以`bash -e ascend -V 910 -j32`方式编译`third_party/mindspore`目录下的MindSpore; +- 其次,编译脚本将MindSpore编译结果作为Serving的编译依赖。 + +
+ ## 卸载 -**Q:如何卸载MindSpore?** +**Q: 如何卸载MindSpore?** -A:执行命令`pip uninstall mindspore`可卸载MindSpore。 +A: 首先请确定MindSpore的全称,例如gpu版本的MindSpore,可以执行命令`pip uninstall mindspore-gpu`进行卸载。
## 环境变量 -**Q:一些常用的环境变量设置,在新启动的终端窗口中需要重新设置,容易忘记应该怎么办?** +**Q: 一些常用的环境变量设置,在新启动的终端窗口中需要重新设置,容易忘记应该怎么办?** -A:常用的环境变量设置写入到`~/.bash_profile` 或 `~/.bashrc`中,可让环境变量设置在新启动的终端窗口中立即生效。 +A: 常用的环境变量设置写入到`~/.bash_profile` 或 `~/.bashrc`中,可让环境变量设置在新启动的终端窗口中立即生效。
-**Q:使用GPU版本MindSpore时,如何设置`DEVICE_ID`环境变量** +**Q: 使用GPU版本MindSpore时,如何设置`DEVICE_ID`环境变量** -A:MindSpore GPU模式一般无需设置`DEVICE_ID`环境变量,MindSpore会根据cuda环境变量`CUDA_VISIBLE_DEVICES`,自动选择可见的GPU设备。设置`CUDA_VISIBLE_DEVICES`环境变量后,则`DEVICE_ID`环境变量代表可见GPU设备的下标: +A: MindSpore GPU模式一般无需设置`DEVICE_ID`环境变量,MindSpore会根据cuda环境变量`CUDA_VISIBLE_DEVICES`,自动选择可见的GPU设备。设置`CUDA_VISIBLE_DEVICES`环境变量后,则`DEVICE_ID`环境变量代表可见GPU设备的下标: - 执行`export CUDA_VISIBLE_DEVICES=1,3,5`后,`DEVICE_ID`应当被设置为`0`,`1`或`2`,若设置为`3`及以上,MindSpore会由于设备ID不合法而运行失败。
-**Q:编译应用时报错`/usr/bin/ld: warning: libxxx.so, needed by libmindspore.so, not found`怎么办?** +**Q: 编译应用时报错`/usr/bin/ld: warning: libxxx.so, needed by libmindspore.so, not found`怎么办?** -A:寻找缺少的动态库文件所在目录,添加该路径到环境变量`LD_LIBRARY_PATH`中,环境变量设置参考[Ascend 310 AI处理器上使用MindIR模型进行推理#编译推理代码](https://www.mindspore.cn/tutorial/inference/zh-CN/master/multi_platform_inference_ascend_310_mindir.html#id6)。 +A: 寻找缺少的动态库文件所在目录,添加该路径到环境变量`LD_LIBRARY_PATH`中,环境变量设置参考[Ascend 310 AI处理器上使用MindIR模型进行推理#编译推理代码](https://www.mindspore.cn/tutorial/inference/zh-CN/master/multi_platform_inference_ascend_310_mindir.html#id6)。
-**Q:运行应用时出现`ModuleNotFoundError: No module named 'te'`怎么办?** +**Q: 运行应用时出现`ModuleNotFoundError: No module named 'te'`怎么办?** -A:首先确认环境安装是否正确,`te`、`topi`等whl包是否正确安装。如果用户环境中有多个Python版本,如Conda虚拟环境中,需`ldd name_of_your_executable_app`确认应用所链接的`libpython3.7m.so.1.0`是否与当前Python路径一致,如果不一致需要调整环境变量`LD_LIBRARY_PATH`顺序。 +A: 首先确认环境安装是否正确,`te`、`topi`等whl包是否正确安装。如果用户环境中有多个Python版本,如Conda虚拟环境中,需`ldd name_of_your_executable_app`确认应用所链接的`libpython3.7m.so.1.0`是否与当前Python路径一致,如果不一致需要调整环境变量`LD_LIBRARY_PATH`顺序。
-**Q:Ascend AI处理器配套软件包与其他依赖软件已安装,但是执行MindSpore时提示`Cannot open shared objectfile: No such file or directory`该怎么办?** +**Q: Ascend AI处理器配套软件包与其他依赖软件已安装,但是执行MindSpore时提示`Cannot open shared objectfile: No such file or directory`该怎么办?** -A:常见原因有两种:Ascend AI处理器配套软件包或固件/驱动包版本不正确,或没有安装在默认位置且未配置相应的环境变量。 +A: 常见原因有两种: Ascend AI处理器配套软件包或固件/驱动包版本不正确,或没有安装在默认位置且未配置相应的环境变量。 1. 打开Ascend AI处理器配套软件包安装目录,默认`/usr/local/Ascend`下,各个子目录中的`version.info`文件,观察其版本号是否与当前使用的MindSpore版本一直,参照[安装页面](https://www.mindspore.cn/install/)中关于Ascend AI处理器配套软件包版本的描述。如果版本不配套,请更换软件包或MindSpore版本。 @@ -235,7 +245,7 @@ A:常见原因有两种:Ascend AI处理器配套软件包或固件/驱动包 ## 安装验证 -**Q:个人电脑CPU环境安装MindSpore后验证代码时报错:`the pointer[session] is null`,具体代码如下,该如何验证是否安装成功呢?** +**Q: 个人电脑CPU环境安装MindSpore后验证代码时报错: `the pointer[session] is null`,具体代码如下,该如何验证是否安装成功呢?** ```python import numpy as np @@ -249,22 +259,54 @@ y = Tensor(np.ones([1,3,3,4]).astype(np.float32)) print(ops.add(x,y)) ``` -A:CPU硬件平台安装MindSpore后测试是否安装成功,只需要执行命令:`python -c 'import mindspore'`,如果没有显示`No module named 'mindspore'`等错误即安装成功。问题中的验证代码仅用于验证Ascend平台安装是否成功。 +A: CPU硬件平台安装MindSpore后测试是否安装成功,只需要执行命令: `python -c 'import mindspore'`,如果没有显示`No module named 'mindspore'`等错误即安装成功。问题中的验证代码仅用于验证Ascend平台安装是否成功。
-**Q:`Linux`平台下执行用例的时候会报错`sh:1:python:not found`或者由于链接到了Python2.7的版本中而报错`No module named mindspore._extends.remote`,该怎么处理?** +**Q: `Linux`平台下执行用例的时候会报错`sh:1:python:not found`或者由于链接到了Python2.7的版本中而报错`No module named mindspore._extends.remote`,该怎么处理?** -A:遇到类似的问题,大多是由于Python的环境问题,可以通过如下方式检查Python环境是否是MindSpore运行时所需要的环境。 +A: 遇到类似的问题,大多是由于Python的环境问题,可以通过如下方式检查Python环境是否是MindSpore运行时所需要的环境。 - 在终端窗口中输入`python`,检查以下进入Python交互环境中的版本信息,如果直接报错则是没有Python的软连接;如果进入的是非Python3.7版本的环境,则当前Python环境不是MindSpore运行所需要的。 - 执行`sudo ln -sf /usr/bin/python3.7.x /usr/bin/python`创建Python的软连接,然后再检查执行。
-**Q: 在脚本中`import mindspore`之前import了其他三方库,提示如下错误(`/your_path/libgomp.so.1: cannot allocate memory in static TLS block`)该怎么解决?** +**Q: 在脚本中`import mindspore`之前import了其他三方库,提示如下错误(`/{your_path}/libgomp.so.1: cannot allocate memory in static TLS block`)该怎么解决?** -A: 上述问题较为常见,当前有两种可行的解决方法,可任选其一: +A: 上述问题较为常见,当前有两种可行的解决方法,可任选其一: - 交换import的顺序,先`import mindspore`再import其他三方库。 -- 执行程序之前先添加环境变量(`export LD_PRELOAD=/your_path/libgomp.so.1`),其中`your_path`是上述报错提示的路径。 +- 执行程序之前先添加环境变量(`export LD_PRELOAD=/{your_path}/libgomp.so.1`),其中`{your_path}`是上述报错提示的路径。 + +
+ +**Q: 训练nlp类网络,当使用第三方组件gensim时,可能会报错: ValueError,如何解决?** + +A: 以下为报错信息: + +```bash +>>> import gensim +Traceback (most recent call last): + File "", line 1, in + File "/home/miniconda3/envs/ci39_cj/lib/python3.9/site-packages/gensim/__init__.py", line 11, in + from gensim import parsing, corpora, matutils, interfaces, models, similarities, utils # noqa:F401 + File "/home/miniconda3/envs/ci39_cj/lib/python3.9/site-packages/gensim/corpora/__init__.py", line 6, in + from .indexedcorpus import IndexedCorpus # noqa:F401 must appear before the other classes + File "/home/miniconda3/envs/ci39_cj/lib/python3.9/site-packages/gensim/corpora/indexedcorpus.py", line 14, in + from gensim import interfaces, utils + File "/home/miniconda3/envs/ci39_cj/lib/python3.9/site-packages/gensim/interfaces.py", line 19, in + from gensim import utils, matutils + File "/home/miniconda3/envs/ci39_cj/lib/python3.9/site-packages/gensim/matutils.py", line 1024, in + from gensim._matutils import logsumexp, mean_absolute_difference, dirichlet_expectation + File "gensim/_matutils.pyx", line 1, in init gensim._matutils +ValueError: numpy.ndarray size changed, may indicate binary incompatibility. Expected 88 from C header, got 80 from PyObject +``` + +报错原因请参考[gensim](https://github.com/RaRe-Technologies/gensim/issues/3095)官网,或者[numpy](https://github.com/numpy/numpy/issues/18709)官网: + +解决方案: + +方法一: 重新安装numpy及gensim, 执行命令: `pip uninstall gensim numpy -y && pip install numpy gensim` ; + +方法二: 如果还是有问题,请删除wheel安装包的缓存文件,然后执行方法一(wheel安装包缓存目录为: `~/.cache/pip/wheels`)。 diff --git a/docs/faq/source_zh_cn/operators_compile.md b/docs/faq/source_zh_cn/operators_compile.md index 4c288657875a3e3762bb7bb9892c03bf57b5139e..315da3024f818875a32e894bfe7598cd5f57bc3d 100644 --- a/docs/faq/source_zh_cn/operators_compile.md +++ b/docs/faq/source_zh_cn/operators_compile.md @@ -4,117 +4,74 @@ -**Q:`TransData`算子的功能是什么,能否优化性能?** +**Q: `TransData`算子的功能是什么,能否优化性能?** -A:`TransData`算子出现的场景是:如果网络中相互连接的算子使用的数据格式不一致(如NC1HWC0),框架就会自动插入`transdata`算子使其转换成一致的数据格式,然后再进行计算。 可以考虑训练的时候用我们的`amp`做混合精度,这样能减少一些`fp32`的运算,应该能减少一些`transdata`算子的调用。 +A: `TransData`算子出现的场景是: 如果网络中相互连接的算子使用的数据格式不一致(如NC1HWC0),框架就会自动插入`transdata`算子使其转换成一致的数据格式,然后再进行计算。华为Ascend支持5D格式运算,通过`transdata`算子将数据由4D转为5D以提升性能。
-**Q:算子`Concat`拼接包含多个Tensor的元组出错,似乎传入的`tensor list`元素个数>=192就会报错。如果要`Concat`包含多个Tensor的元组,有什么较好的解决方案?** +**Q: 算子`Concat`拼接包含多个Tensor的元组出错,似乎传入的`tensor list`元素个数>=192就会报错。如果要`Concat`包含多个Tensor的元组,有什么较好的解决方案?** -A:这个昇腾算子底层规格限制一次拼接的Tensor个数不能超过192个,可以尝试分开两次进行拼接。 +A: 这个昇腾算子底层规格限制一次拼接的Tensor个数不能超过192个,可以尝试分开两次进行拼接。
-**Q:在使用`Conv2D`进行卷积定义的时候使用到了`group`的参数,`group`的值不是只需要保证可以被输入输出的维度整除即可了吗?`group`参数的传递方式是怎样的呢?** +**Q: 在使用`Conv2D`进行卷积定义的时候使用到了`group`的参数,`group`的值不是只需要保证可以被输入输出的维度整除即可了吗?`group`参数的传递方式是怎样的呢?** -A:`Conv2D`算子是有这个约束条件的:当`group`大于1 时,其值必须要与输入输出的通道数相等。不要使用`ops.Conv2D`,这个算子目前不支持`group`>1。目前MindSpore只有`nn.Conv2D`接口支持组卷积,但是有`group`要与输入输出的通道数相等的约束。 -`Conv2D`算子的 - -```python -def __init__(self, - out_channel, - kernel_size, - mode=1, - pad_mode="valid", - pad=0, - stride=1, - dilation=1, - group=1, - data_format="NCHW"): -``` - -函数中带有`group`参数,这个参数默认就会被传到C++层。 +A: `Conv2D`算子是有这个约束条件的: 当`group`大于1 时,其值必须要与输入输出的通道数相等。不要使用`ops.Conv2D`,这个算子目前不支持`group`>1。目前MindSpore只有`nn.Conv2D`接口支持组卷积,但是有`group`要与输入输出的通道数相等的约束。
-**Q:MindSpore支持矩阵转置吗?** +**Q: MindSpore支持矩阵转置吗?** -A:支持,请参考`mindspore.ops.Transpose`的[算子教程](https://www.mindspore.cn/doc/api_python/zh-CN/master/mindspore/ops/mindspore.ops.Transpose.html#mindspore.ops.Transpose)。 +A: 支持,请参考`mindspore.ops.Transpose`的[算子教程](https://www.mindspore.cn/doc/api_python/zh-CN/master/mindspore/ops/mindspore.ops.Transpose.html#mindspore.ops.Transpose)。
-**Q:请问MindSpore能算给定任意一个`tensor`的方差吗?** +**Q: 请问MindSpore能算给定任意一个`tensor`的方差吗?** -A:MindSpore目前暂无可以直接求出`tensor`方差的算子或接口。不过MindSpore有足够多的小算子可以支持用户实现这样的操作,你可以参考[class Moments(Cell)](https://www.mindspore.cn/doc/api_python/zh-CN/master/_modules/mindspore/nn/layer/math.html#Moments)来实现。 +A: MindSpore目前暂无可以直接求出`tensor`方差的算子或接口。不过MindSpore有足够多的小算子可以支持用户实现这样的操作,你可以参考[class Moments(Cell)](https://www.mindspore.cn/doc/api_python/zh-CN/master/_modules/mindspore/nn/layer/math.html#Moments)来实现。
-**Q:使用MindSpore-1.0.1版本在图数据下沉模式加载数据异常是什么原因?** +**Q: `nn.Embedding`层与PyTorch相比缺少了`Padding`操作,有其余的算子可以实现吗?** -A:应该是`construct`中直接使用了带有`axis`属性的算子,比如`ops.Concat(axis=1)((x1, x2))`这种,建议把算子在`__init__`中初始化 像这样 - -```python -from mindspore import nn -import mindspore.ops as ops - -class Net(nn.Cell): - def __init__(self): - super(Net, self).__init__() - self.concat = ops.Concat(axis=1) - def construct(self, x, y): - out = self.concat((x, y)) - return out -``` +A: 在PyTorch中`padding_idx`的作用是将embedding矩阵中`padding_idx`位置的词向量置为0,并且反向传播时不会更新`padding_idx`位置的词向量。在MindSpore中,可以手动将embedding的`padding_idx`位置对应的权重初始化为0,并且在训练时通过`mask`的操作,过滤掉`padding_idx`位置对应的`Loss`。
-**Q:`nn.Embedding`层与PyTorch相比缺少了`Padding`操作,有其余的算子可以实现吗?** - -A:在PyTorch中`padding_idx`的作用是将embedding矩阵中`padding_idx`位置的词向量置为0,并且反向传播时不会更新`padding_idx`位置的词向量。在MindSpore中,可以手动将embedding的`padding_idx`位置对应的权重初始化为0,并且在训练时通过`mask`的操作,过滤掉`padding_idx`位置对应的`Loss`。 +**Q: Operations中`Tile`算子执行到`__infer__`时`value`值为`None`,丢失了数值是怎么回事?** -
- -**Q:Operations中`Tile`算子执行到`__infer__`时`value`值为`None`,丢失了数值是怎么回事?** - -A:`Tile`算子的`multiples input`必须是一个常量(该值不能直接或间接来自于图的输入)。否则构图的时候会拿到一个`None`的数据,因为图的输入是在图执行的时候才传下去的,构图的时候拿不到图的输入数据。 +A: `Tile`算子的`multiples input`必须是一个常量(该值不能直接或间接来自于图的输入)。否则构图的时候会拿到一个`None`的数据,因为图的输入是在图执行的时候才传下去的,构图的时候拿不到图的输入数据。 相关的资料可以看[静态图语法支持](https://www.mindspore.cn/doc/note/zh-CN/master/static_graph_syntax_support.html)。
-**Q:官网的LSTM示例在Ascend上跑不通。** - -A:目前LSTM只支持在GPU和CPU上运行,暂不支持硬件环境,您可以通过[MindSpore算子支持列表](https://www.mindspore.cn/doc/note/zh-CN/master/operator_list_ms.html)查看算子支持情况。 - -
- -**Q:conv2d设置为(3,10),Tensor[2,2,10,10],在ModelArts上利用Ascend跑,报错:`FM_W+pad_left+pad_right-KW>=strideW`,CPU下不报错。** +**Q: 使用conv2d算子将卷积核设置为(3,10),Tensor设置为[2,2,10,10],在ModelArts上利用Ascend跑,报错: `FM_W+pad_left+pad_right-KW>=strideW`,CPU下不报错。** -A:这是TBE这个算子的限制,x的width必须大于kernel的width。CPU的这个算子没有这个限制,所以不报错。 +A: TBE(Tensor Boost Engine)算子是华为自研的Ascend算子开发工具,在TVM框架基础上扩展,进行自定义算子开发。上述问题是这个TBE算子的限制,x的width必须大于kernel的width。CPU的这个算子没有这个限制,所以不报错。
-**Q:请问MindSpore实现了反池化操作了吗?类似于`nn.MaxUnpool2d` 这个反池化操作?** +**Q: 请问MindSpore实现了反池化操作了吗?类似于`nn.MaxUnpool2d` 这个反池化操作?** -A:目前 MindSpore 还没有反池化相关的接口。如果用户想自己实现的话,可以通过自定义算子的方式自行开发算子,详情请见[自定义算子](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/custom_operator.html)。 +A: 目前 MindSpore 还没有反池化相关的接口。用户可以通过自定义算子的方式自行开发算子,详情请见[自定义算子](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/custom_operator.html)。
-**Q:使用ExpandDims算子报错:`Pynative run op ExpandDims failed`。具体代码:** +**Q: 使用ExpandDims算子报错: `Pynative run op ExpandDims failed`。具体代码:** ```python -context.set_context( -mode=cintext.GRAPH_MODE, -device_target='ascend') +context.set_context(mode=context.GRAPH_MODE,device_target='Ascend') input_tensor=Tensor(np.array([[2,2],[2,2]]),mindspore.float32) expand_dims=ops.ExpandDims() output=expand_dims(input_tensor,0) ``` -A:这边的问题是选择了Graph模式却使用了PyNative的写法,所以导致报错,MindSpore支持两种运行模式,在调试或者运行方面做了不同的优化: +A: 这边的问题是选择了Graph模式却使用了PyNative的写法,所以导致报错,MindSpore支持两种运行模式,在调试或者运行方面做了不同的优化: -- PyNative模式:也称动态图模式,将神经网络中的各个算子逐一下发执行,方便用户编写和调试神经网络模型。 +- PyNative模式: 也称动态图模式,将神经网络中的各个算子逐一下发执行,方便用户编写和调试神经网络模型。 -- Graph模式:也称静态图模式或者图模式,将神经网络模型编译成一整张图,然后下发执行。该模式利用图优化等技术提高运行性能,同时有助于规模部署和跨平台运行。 +- Graph模式: 也称静态图模式或者图模式,将神经网络模型编译成一整张图,然后下发执行。该模式利用图优化等技术提高运行性能,同时有助于规模部署和跨平台运行。 用户可以参考[官网教程](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/debug_in_pynative_mode.html)选择合适、统一的模式和写法来完成训练。 diff --git a/docs/faq/source_zh_cn/performance_tuning.md b/docs/faq/source_zh_cn/performance_tuning.md index 20615269d18ed7db945d62acab9ee3044f5bda93..63f033ee6f430532469b3c0ecba7138ae0cc2ea8 100644 --- a/docs/faq/source_zh_cn/performance_tuning.md +++ b/docs/faq/source_zh_cn/performance_tuning.md @@ -4,8 +4,8 @@ -**Q:MindSpore安装完成,执行训练时发现网络性能异常,权重初始化耗时过长,怎么办?** +**Q: MindSpore安装完成,执行训练时发现网络性能异常,权重初始化耗时过长,怎么办?** -A:可能与环境中使用了`scipy 1.4`系列版本有关,通过`pip list | grep scipy`命令可查看scipy版本,建议改成MindSpore要求的`scipy`版本。版本第三方库依赖可以在`requirement.txt`中查看。 +A: 可能与环境中使用了`scipy 1.4`系列版本有关,通过`pip list | grep scipy`命令可查看scipy版本,建议改成MindSpore要求的`scipy`版本。版本第三方库依赖可以在`requirement.txt`中查看。 > 其中version替换为MindSpore具体的版本分支。 diff --git a/docs/faq/source_zh_cn/precision_tuning.md b/docs/faq/source_zh_cn/precision_tuning.md index c3185de37ee43044ed505d9ab2f302d4d26c0a52..05055c79377bd2fe00c043bccdf449e696c3e63e 100644 --- a/docs/faq/source_zh_cn/precision_tuning.md +++ b/docs/faq/source_zh_cn/precision_tuning.md @@ -4,12 +4,12 @@ -**Q:导致Loss值不收敛或者精度不达标的原因有哪些呢,应该怎样定位调优?** +**Q: 导致Loss值不收敛或者精度不达标的原因有哪些呢,应该怎样定位调优?** -A:可能导致Loss值不收敛或者精度问题的原因很多,推荐参考下面总结,逐一排查问题。 +A: 可能导致Loss值不收敛或者精度问题的原因很多,推荐参考下面总结,逐一排查问题。 -- [MindSpore模型精度调优实战(一)精度问题的常见现象、原因和简要调优思路](https://bbs.huaweicloud.com/forum/forum.php?mod=viewthread&tid=102750) +[MindSpore模型精度调优实战(一)精度问题的常见现象、原因和简要调优思路](https://bbs.huaweicloud.com/forum/forum.php?mod=viewthread&tid=102750) -- [MindSpore模型精度调优实战(二)精度调试调优思路](https://bbs.huaweicloud.com/forum/forum.php?mod=viewthread&tid=106624) +[MindSpore模型精度调优实战(二)精度调试调优思路](https://bbs.huaweicloud.com/forum/forum.php?mod=viewthread&tid=106624) -- [MindSpore模型精度调优实战(三)常见精度问题简介](https://bbs.huaweicloud.com/forum/forum.php?mod=viewthread&tid=119271) +[MindSpore模型精度调优实战(三)常见精度问题简介](https://bbs.huaweicloud.com/forum/forum.php?mod=viewthread&tid=119271) diff --git a/docs/faq/source_zh_cn/script_implement.md b/docs/faq/source_zh_cn/script_implement.md deleted file mode 100644 index 7461939e09ec9e2c2d950942ed2608050ba58768..0000000000000000000000000000000000000000 --- a/docs/faq/source_zh_cn/script_implement.md +++ /dev/null @@ -1,249 +0,0 @@ -# 执行问题 - -`Linux` `Windows` `Ascend` `GPU` `CPU` `环境准备` `初级` `中级` - - - -**Q:通过Hub可以使用GPU加载`vgg16`模型以及是否可以做迁移模型吗?** - -A:请手动修改规避,修改如下两点即可: - -```python -# 增加**kwargs参数:如下 -def vgg16(num_classes=1000, args=None, phase="train", **kwargs): -``` - -```python -# 增加**kwargs参数:如下 -net = Vgg(cfg['16'], num_classes=num_classes, args=args, batch_norm=args.batch_norm, phase=phase, **kwargs) -``` - -
- -**Q:如何得到VGG模型中间层特征?** - -A:你好,获取网络中间层的特征,其实跟具体框架没有太大关系了。`torchvison`里定义的`vgg`模型,可以通过`features`字段获取"中间层特征",`torchvison`的`vgg`源码如下: - -```python -class VGG(nn.Module): - - def __init__(self, features, num_classes=1000, init_weights=True): - super(VGG, self).__init__() - self.features = features - self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) -``` - -在MindSpore的ModelZoo里定义的`vgg16`,可以通过`layers`字段获取,如下: - -```python -network = vgg16() -print(network.layers) -``` - -
- -**Q:使用MindSpore进行模型训练时,`CTCLoss`的输入参数有四个:`inputs`, `labels_indices`, `labels_values`, `sequence_length`,如何使用`CTCLoss`进行训练?** - -A:定义的`model.train`接口里接收的`dataset`可以是多个数据组成,形如(`data1`, `data2`, `data3`, ...),所以`dataset`是可以包含`inputs`,`labels_indices`,`labels_values`,`sequence_length`的信息的。只需要定义好相应形式的`dataset`,传入`model.train`里就可以。具体的可以了解下相应的[数据处理接口](https://www.mindspore.cn/doc/programming_guide/zh-CN/master/dataset_loading.html) - -
- -**Q:模型转移时如何把PyTorch的权重加载到MindSpore中?** - -A:首先输入PyTorch的`pth`文件,以`ResNet-18`为例,MindSpore的网络结构和PyTorch保持一致,转完之后可直接加载进网络,这边参数只用到`BN`和`Conv2D`,若有其他层`ms`和PyTorch名称不一致,需要同样的修改名称。 - -
- -**Q:MindSpore有哪些现成的推荐类或生成类网络或模型可用?** - -A:目前正在开发Wide & Deep、DeepFM、NCF等推荐类模型,NLP领域已经支持Bert_NEZHA,正在开发MASS等模型,用户可根据场景需要改造为生成类网络,可以关注[MindSpore Model Zoo](https://gitee.com/mindspore/mindspore/tree/master/model_zoo)。 - -
- -**Q:如何使用MindSpore拟合$f(x)=a \times sin(x)+b$这类函数?** - -A:以下拟合案例是基于MindSpore线性拟合官方案例改编而成。 - -```python -# The fitting function is:f(x)=2*sin(x)+3. -import numpy as np -from mindspore import dataset as ds -from mindspore.common.initializer import Normal -from mindspore import nn, Model, context -from mindspore.train.callback import LossMonitor - -context.set_context(mode=context.GRAPH_MODE, device_target="CPU") - - def get_data(num, w=2.0, b=3.0): - # f(x)=w * sin(x) + b - # f(x)=2 * sin(x) +3 - for i in range(num): - x = np.random.uniform(-np.pi, np.pi) - noise = np.random.normal(0, 1) - y = w * np.sin(x) + b + noise - yield np.array([np.sin(x)]).astype(np.float32), np.array([y]).astype(np.float32) - -def create_dataset(num_data, batch_size=16, repeat_size=1): - input_data = ds.GeneratorDataset(list(get_data(num_data)), column_names=['data','label']) - input_data = input_data.batch(batch_size) - input_data = input_data.repeat(repeat_size) - return input_data - -class LinearNet(nn.Cell): - def __init__(self): - super(LinearNet, self).__init__() - self.fc = nn.Dense(1, 1, Normal(0.02), Normal(0.02)) - - def construct(self, x): - x = self.fc(x) - return x - -if __name__ == "__main__": - num_data = 1600 - batch_size = 16 - repeat_size = 1 - lr = 0.005 - momentum = 0.9 - - net = LinearNet() - net_loss = nn.loss.MSELoss() - opt = nn.Momentum(net.trainable_params(), lr, momentum) - model = Model(net, net_loss, opt) - - ds_train = create_dataset(num_data, batch_size=batch_size, repeat_size=repeat_size) - - model.train(1, ds_train, callbacks=LossMonitor(), dataset_sink_mode=False) - - print(net.trainable_params()[0], "\n%s" % net.trainable_params()[1]) -``` - -
- -**Q:如何使用MindSpore拟合$f(x)=ax^2+bx+c$这类的二次函数?** - -A:以下代码引用自MindSpore的官方教程的[代码仓](https://gitee.com/mindspore/docs/blob/master/tutorials/tutorial_code/linear_regression.py) - -在以下几处修改即可很好的拟合$f(x)=ax^2+bx+c$: - -1. 数据集生成。 -2. 拟合网络。 -3. 优化器。 - -修改的详细信息如下,附带解释。 - -```python -# Since the selected optimizer does not support CPU, so the training computing platform is changed to GPU, which requires readers to install the corresponding GPU version of MindSpore. -context.set_context(mode=context.GRAPH_MODE, device_target="GPU") - -# Assuming that the function to be fitted this time is f(x)=2x^2+3x+4, the data generation function is modified as follows: -def get_data(num, a=2.0, b=3.0 ,c = 4): - for i in range(num): - x = np.random.uniform(-10.0, 10.0) - noise = np.random.normal(0, 1) - # The y value is generated by the fitting target function ax^2+bx+c. - y = x * x * a + x * b + c + noise - # When a*x^2+b*x+c is fitted, a and b are weight parameters and c is offset parameter bias. The training data corresponding to the two weights are x^2 and x respectively, so the data set generation mode is changed as follows: - yield np.array([x*x, x]).astype(np.float32), np.array([y]).astype(np.float32) - -def create_dataset(num_data, batch_size=16, repeat_size=1): - input_data = ds.GeneratorDataset(list(get_data(num_data)), column_names=['data','label']) - input_data = input_data.batch(batch_size) - input_data = input_data.repeat(repeat_size) - return input_data - -class LinearNet(nn.Cell): - def __init__(self): - super(LinearNet, self).__init__() - # Because the full join function inputs two training parameters, the input value is changed to 2, the first Nomral(0.02) will automatically assign random weights to the input two parameters, and the second Normal is the random bias. - self.fc = nn.Dense(2, 1, Normal(0.02), Normal(0.02)) - - def construct(self, x): - x = self.fc(x) - return x - -if __name__ == "__main__": - num_data = 1600 - batch_size = 16 - repeat_size = 1 - lr = 0.005 - momentum = 0.9 - - net = LinearNet() - net_loss = nn.loss.MSELoss() - # RMSProp optimalizer with better effect is selected for quadratic function fitting, Currently, Ascend and GPU computing platforms are supported. - opt = nn.RMSProp(net.trainable_params(), learning_rate=0.1) - model = Model(net, net_loss, opt) - - ds_train = create_dataset(num_data, batch_size=batch_size, repeat_size=repeat_size) - model.train(1, ds_train, callbacks=LossMonitor(), dataset_sink_mode=False) - - print(net.trainable_params()[0], "\n%s" % net.trainable_params()[1]) -``` - -
- -**Q:`mindspore/tests`下怎样执行单个`ut`用例?** - -A:`ut`用例通常需要基于debug版本的MindSpore包,官网并没有提供。可以基于源码使用`sh build.sh`编译,然后通过`pytest`指令执行,debug模式编包不依赖后端。编译选项`sh build.sh -t on`,用例执行可以参考`tests/runtest.sh`脚本。 - -
- -**Q:在Ascend平台上,执行用例有时候会报错`run task error`,如何获取更详细的日志帮助问题定位?** - -A:使用msnpureport工具设置device侧日志级别,工具位置在:`/usr/local/Ascend/driver/tools/msnpureport`。 - -- 全局级别: - -```bash -/usr/local/Ascend/driver/tools/msnpureport -g info -``` - -- 模块级别: - -```bash -/usr/local/Ascend/driver/tools/msnpureport -m SLOG:error -```` - -- Event级别: - -```bash -/usr/local/Ascend/driver/tools/msnpureport -e disable/enable -``` - -- 多device id级别: - -```bash -/usr/local/Ascend/driver/tools/msnpureport -d 1 -g warning -``` - -假设deviceID的取值范围是[0-7],`device0`-`device3`和`device4`-`device7`分别在一个os上。其中`device0`-`device3`共用一个日志配置文件;`device4`-`device7`共用一个配置文件。如果修改了`device0`-`device3`中的任意一个日志级别,其他`device`的日志级别也会被修改。如果修改了`device4`-`device7`中的任意一个日志级别,其他device的日志级别也会被修改。 - -`Driver`包安装以后(假设安装路径为/usr/local/HiAI,在Windows环境下,`msnpureport.exe`执行文件在C:\ProgramFiles\Huawei\Ascend\Driver\tools\目录下),假设用户在/home/shihangbo/目录下直接执行命令行,则Device侧日志被导出到当前目录下,并以时间戳命名文件夹进行存放。 - -
- -**Q:使用Ascend平台执行训练过程,出现报错:`Out of Memory!!! total[3212254720] (dynamic[0] memory poll[524288000]) malloc[32611480064] failed!` 如何解决?** - -A:此问题属于内存占用过多导致的内存不够问题,可能原因有两种: - -- `batch_size`的值设置过大。解决办法:将`batch_size`的值设置减小。 -- 引入了异常大的`Parameter`,例如单个数据shape为[640,1024,80,81],数据类型为float32,单个数据大小超过15G,这样差不多大小的两个数据相加时,占用内存超过3*15G,容易造成`Out of Memory`。解决办法:检查参数的`shape`,如果异常过大,减少shape。 -- 如果以上操作还是未能解决,可以上[官方论坛](https://bbs.huaweicloud.com/forum/forum-1076-1.html)发帖提出问题,将会有专门的技术人员帮助解决。 - -
- -**Q:MindInsight成功启动后,在谷歌浏览器中访问时,提示:`ERR_UNSAFE_PORT` 如何处理?** - -A:谷歌浏览器内核禁止将某些端口作为`HTTP`服务,你需要在谷歌浏览器的属性中新增配置`--explicitly-allowed-ports=port`。或者,你可以更换端口或者更换为IE浏览器。 - -
- -**Q:如何在训练神经网络过程中对计算损失的超参数进行改变?** - -A:您好,很抱歉暂时还未有这样的功能。目前只能通过训练-->重新定义优化器-->训练,这样的过程寻找较优的超参数。 - -
- -**Q:运行应用时报错`error while loading shared libraries: libge_compiler.so: cannot open shared object file: No such file or directory`怎么办?** - -A:安装MindSpore所依赖的Ascend 310 AI处理器配套软件包时,`CANN`包不能安装`nnrt`版本,而是需要安装功能完整的`toolkit`版本。 diff --git a/docs/faq/source_zh_cn/training_visualization.md b/docs/faq/source_zh_cn/training_visualization.md index 77e81e66b50c1451ce0d4e4d9671014e6f042da5..20e7cdecd79030cfcffb9b848f33cc612d105520 100644 --- a/docs/faq/source_zh_cn/training_visualization.md +++ b/docs/faq/source_zh_cn/training_visualization.md @@ -4,37 +4,55 @@ -**Q:MindInsight启动失败并且提示:`ImportError: libcrypto.so.1.0.0: cannot open shared object file: No such file or directory` 如何处理?** +**Q: MindInsight启动失败并且提示:`ImportError: libcrypto.so.1.0.0: cannot open shared object file: No such file or directory` 如何处理?** -A:需要在命令行中使用”export LD_LIBRARY_PATH=dir:$LD_LIBRARY_PATH”来导入LD_LIBRARY_PATH变量。 +A: 需要在命令行中使用”export LD_LIBRARY_PATH=dir:$LD_LIBRARY_PATH”来导入LD_LIBRARY_PATH变量。 -
+
-**Q:MindInsight启动失败并且提示:`bash: mindinsight: command not found` 如何处理?** +**Q: MindInsight启动失败并且提示:`bash: mindinsight: command not found` 如何处理?** -A:当使用Python源码编译安装在自定义路径下会出现该问题,pip安装MindInsight时可执行文件会安装在该路径下,若使用`echo $PATH`查询到的bash环境变量中没有该安装目录会导致系统找不到安装的可执行文件。需要在命令行中使用`export PATH=$PATH:$YourPythonPath$/bin`来导入PATH变量。 -(`$YourPythonPath$`请更换为你的安装路径)。注:该命令只在当前终端有效,若想永久有效请在`~/.bashrc`文件中加入该命令。 +A: 当使用Python源码编译安装在自定义路径下会出现该问题,pip安装MindInsight时可执行文件会安装在该路径下,若使用`echo $PATH`查询到的bash环境变量中没有该安装目录会导致系统找不到安装的可执行文件。需要在命令行中使用`export PATH=$PATH:$YourPythonPath$/bin`来导入PATH变量。 +(`$YourPythonPath$`请更换为你的安装路径)。注: 该命令只在当前终端有效,若想永久有效请在`~/.bashrc`文件中加入该命令。 -
+
-**Q:卸载MindInsight后,在MindInsight的运行日志中出现:`No module named 'mindinsight'` 如何处理?** +**Q: 卸载MindInsight后,在MindInsight的运行日志中出现: `No module named 'mindinsight'` 如何处理?** -A:MindInsight启动后,会变成一个后台服务。卸载MindInsight后,已启动的MindInsight后台服务不会自行停止。 +A: MindInsight启动后,会变成一个后台服务。卸载MindInsight后,已启动的MindInsight后台服务不会自行停止。 当MindInsight后台服务启动新的进程加载新数据或者做其他操作时,则会触发`No module named 'mindinsight'`的异常信息,并记录到日志中。 -此时可以通过下面两种方式进行处理: +此时可以通过下面两种方式进行处理: - 重新安装MindInsight,并使用`mindinsight stop --port `命令停止已启动的MindInsight后台服务。 - 通过`kill -9 `命令,将MindInsight涉及的相关进程杀死。 -
+
-**Q:MindInsight成功启动后,在谷歌浏览器中访问时,提示:`ERR_UNSAFE_PORT` 如何处理?** +**Q: MindInsight成功启动后,在谷歌浏览器中访问时,提示: `ERR_UNSAFE_PORT` 如何处理?** -A:谷歌浏览器内核禁止将某些端口作为`HTTP`服务,你需要在谷歌浏览器的属性中新增配置`--explicitly-allowed-ports=port`。或者,你可以更换端口或者更换为IE浏览器。 +A: 谷歌浏览器内核禁止将某些端口作为`HTTP`服务,你需要在谷歌浏览器的属性中新增配置`--explicitly-allowed-ports=port`。或者,你可以更换端口或者更换为IE浏览器。 -
+
-**Q:在Ascend机器上启动Mindinsight并开启调试器后,训练脚本连接调试器时,提示:`Exeption calling application: Field number 0 is illegal` 如何处理?** +**Q: 在Ascend机器上启动Mindinsight并开启调试器后,训练脚本连接调试器时,提示: `Exeption calling application: Field number 0 is illegal` 如何处理?** -A:说明安装的protobuf版本错误,需要安装正确版本的protobuf,安装方法请参照[安装python版本的proto](https://support.huaweicloud.com/instg-cli-cann/atlascli_03_0046.html)。 +A: 说明安装的Protobuf版本错误,需要安装正确版本的Protobuf,安装方法请参照[安装python版本的proto](https://support.huaweicloud.com/instg-cli-cann/atlascli_03_0046.html)。 + +
+ +**Q: MindInsight成功启动后,开启离线调试器,提示: `未找到调试器离线服务器模块` 如何处理?** + +A: 调试器离线服务需要启用MindSpore,请安装正确版本的MindSpore,安装方法请参照[安装MindSpore](https://www.mindspore.cn/install)。 + +
+ +**Q: MindInsight成功启动后,在谷歌浏览器中访问失败,提示: `ERR_CONNECTION_REFUSED` 如何处理?** + +A: 检查后台服务器与网络设备的防火墙策略配置,确保浏览器与MindInsight服务间的通讯连接,不受相关设备的配置规则限制。 + +
+ +**Q: MindInsight成功启动后,在谷歌浏览器中访问时,提示: `ERR_UNSAFE_PORT` 如何处理?** + +A: 谷歌浏览器内核禁止将某些端口作为`HTTP`服务,你需要在谷歌浏览器的属性中新增配置`--explicitly-allowed-ports=port`。或者,你可以更换端口或者更换为IE浏览器。 diff --git a/docs/faq/source_zh_cn/usage_migrate_3rd.md b/docs/faq/source_zh_cn/usage_migrate_3rd.md index e185f983456baee1555c3c37492a5f9ea6b151be..9b28a1abac9f09839a27bdaeb8c8b70ed2b539b2 100644 --- a/docs/faq/source_zh_cn/usage_migrate_3rd.md +++ b/docs/faq/source_zh_cn/usage_migrate_3rd.md @@ -4,17 +4,17 @@ -**Q:请问想加载PyTorch预训练好的模型用于MindSpore模型finetune有什么方法?** +**Q: 请问想加载PyTorch预训练好的模型用于MindSpore模型finetune有什么方法?** -A:需要把PyTorch和MindSpore的参数进行一一对应,因为网络定义的灵活性,所以没办法提供统一的转化脚本。 +A: 需要把PyTorch和MindSpore的参数进行一一对应,因为网络定义的灵活性,所以没办法提供统一的转化脚本。 需要根据场景书写定制化脚本,可参考[checkpoint高级用法](https://www.mindspore.cn/doc/programming_guide/zh-CN/master/advanced_usage_of_checkpoint.html)
-**Q:怎么将PyTorch的`dataset`转换成MindSpore的`dataset`?** +**Q: 怎么将PyTorch的`dataset`转换成MindSpore的`dataset`?** -A:MindSpore和PyTorch的自定义数据集逻辑是比较类似的,需要用户先定义一个自己的`dataset`类,该类负责定义`__init__`,`__getitem__`,`__len__`来读取自己的数据集,然后将该类实例化为一个对象(如:`dataset/dataset_generator`),最后将这个实例化对象传入`GeneratorDataset`(mindspore用法)/`DataLoader`(pytorch用法),至此即可以完成自定义数据集加载了。而mindspore在`GeneratorDataset`的基础上提供了进一步的`map`->`batch`操作,可以很方便的让用户在`map`内添加一些其他的自定义操作,并将其`batch`起来。 -对应的MindSpore的自定义数据集加载如下: +A: MindSpore和PyTorch的自定义数据集逻辑是比较类似的,需要用户先定义一个自己的`dataset`类,该类负责定义`__init__`,`__getitem__`,`__len__`来读取自己的数据集,然后将该类实例化为一个对象(如: `dataset/dataset_generator`),最后将这个实例化对象传入`GeneratorDataset`(mindspore用法)/`DataLoader`(pytorch用法),至此即可以完成自定义数据集加载了。而mindspore在`GeneratorDataset`的基础上提供了进一步的`map`->`batch`操作,可以很方便的让用户在`map`内添加一些其他的自定义操作,并将其`batch`起来。 +对应的MindSpore的自定义数据集加载如下: ```python #1 Data enhancement,shuffle,sampler. @@ -37,6 +37,42 @@ dataset = dataset.batch(batch_size, drop_remainder=True)
-**Q:其他框架的脚本或者模型怎么迁移到MindSpore?** +**Q: 其他框架的脚本或者模型怎么迁移到MindSpore?** -A:关于脚本或者模型迁移,可以查询MindSpore官网中关于[网络迁移](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/migrate_3rd_scripts.html)的介绍。 +A: 关于脚本或者模型迁移,可以查询MindSpore官网中关于[网络迁移](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/migrate_3rd_scripts.html)的介绍。 + +
+ +**Q: MindConverter转换TensorFlow脚本报错提示`terminate called after throwing an instance of 'std::system_error', what(): Resource temporarily unavailable, Aborted (core dumped)`** + +A: 该问题由TensorFlow导致。脚本转换时,需要通过TensorFlow库加载TensorFlow的模型文件,此时TensorFlow会申请相关资源进行初始化,若申请资源失败(可能由于系统进程数超过Linux最大进程数限制),TensorFlow C/C++层会出现Core Dumped问题。详细信息请参考TensorFlow官方ISSUE,如下ISSUE仅供参考: [TF ISSUE 14885](https://github.com/tensorflow/tensorflow/issues/14885), [TF ISSUE 37449](https://github.com/tensorflow/tensorflow/issues/37449) + +
+ +**Q: MindConverter是否可以在ARM平台运行?** + +A: MindConverter同时支持X86、ARM平台,若在ARM平台运行需要用户自行安装模型所需的依赖包和运行环境。 + +
+ +**Q: 为什么使用MindConverter进行模型转换需要很长时间(超过十分钟),而模型并不大?** + +A: MindConverter进行转换时,需要使用Protobuf对模型文件进行反序列化,请确保Python环境中安装的Protobuf采用C++后端实现,检查方法如下,若输出为Python,则需要安装采用C++实现的Python Protobuf(下载Protobuf源码并进入源码中的python子目录,使用python setup.py install --cpp_implementation进行安装);若输出为cpp,转换过程仍耗时较长,请在转换前使用添加环境变量`export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp`。 + +```python +from google.protobuf.internal import api_implementation + +print(api_implementation.Type()) +``` + +
+ +**Q: 使用.pb文件进行转换时,已确定`model_file`,`shape`,`input_nodes`,`output_nodes`均无误,并且环境中的依赖库已经正常安装,但是仍然报异常代码1000001,可能是什么原因?** + +A: 请检查生成该.pb文件所使用的TensorFlow版本不高于用于转换时安装的TensorFlow版本,避免由于旧版本TensorFlow无法解析新版本生成的.pb文件,而导致的模型文件解析失败。 + +
+ +**Q: 出现报错信息`[ERROR] MINDCONVERTER: [BaseConverterError] code: 0000000, msg: {python_home}/lib/libgomp.so.1: cannot allocate memory in static TLS block`时,应该怎么处理?** + +A: 该问题通常是由于环境变量导入不正确导致的。建议用户设置`export LD_PRELOAD={python_home}/lib/libgomp.so.1.0.0`这一环境变量,然后重新尝试进行转换。 diff --git a/docs/federated/api/Makefile b/docs/federated/api/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1eff8952707bdfa503c8d60c1e9a903053170ba2 --- /dev/null +++ b/docs/federated/api/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source_zh_cn +BUILDDIR = build_zh_cn + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/federated/api/source_zh_cn/_ext/__pycache__/my_signature.cpython-37.pyc b/docs/federated/api/source_zh_cn/_ext/__pycache__/my_signature.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4d29e9adbd432f769cae6d07608ca48367941a05 Binary files /dev/null and b/docs/federated/api/source_zh_cn/_ext/__pycache__/my_signature.cpython-37.pyc differ diff --git a/docs/federated/api/source_zh_cn/_ext/my_signature.py b/docs/federated/api/source_zh_cn/_ext/my_signature.py new file mode 100644 index 0000000000000000000000000000000000000000..4f6a22dc9d8b7ef72aeb429f9124c2dabb1577d6 --- /dev/null +++ b/docs/federated/api/source_zh_cn/_ext/my_signature.py @@ -0,0 +1,358 @@ +""" +Rewrote the Signature module that fix default signature error for autodoc module. +""" + +import inspect +import re +import types +import functools + + +def _sort_param(param_list, target_str): + """Sort param_list as default order.""" + ls = [] + for param_name in param_list: + ls.append((param_name, target_str.find(param_name))) + ls.sort(key=lambda x: x[1], reverse=False) + ls = [i[0] for i in ls] + return ls + + +def get_default_params(func): + """ Get the default signatures from function. """ + source_code = inspect.getsource(func) + func_code = func.__code__ + pos_count = func_code.co_argcount + arg_names = func_code.co_varnames + karg_pos = func_code.co_kwonlyargcount + kwargs_num = arg_names.count("args") + arg_names.count("kwargs") + all_param_names = list(arg_names[:pos_count+karg_pos+kwargs_num]) + all_params = re.findall(r"def [\w_\d\-]+\(([\S\s]*?)\):", source_code)[0].replace("\n", "").replace("'", "\"") + + # sub null spaces from matched all param str. + re_space_sub = re.compile(r",\s+") + all_params = re_space_sub.sub(",", all_params) + + all_param_names = _sort_param(all_param_names, all_params) + + # sub the extra "=" from param. + re_equate_sub = re.compile("=") + + re_defaults_param = re.compile(r"(.*?)".join(all_param_names) + r"(.*)") + defaults_params = re_defaults_param.findall(all_params) + if defaults_params: + if isinstance(defaults_params[0], tuple): + defaults_params = list([i[:-2] if i[-2:] == "**" else i for i in defaults_params[0]]) + defaults_params_list = [] + for i in defaults_params: + if "=" in i and i: + i = re_equate_sub.sub("", i, count=1).strip(",") + if i[:6] == "lambda": + i = "<" + i + ">" + defaults_params_list.append(i) + defaults_params_tuple = tuple(defaults_params_list) + return defaults_params_tuple + return func.__defaults__ + + +def _my_signature_from_function(cls, func): + """Private helper: constructs Signature for the given python function.""" + + is_duck_function = False + if not inspect.isfunction(func): + if inspect._signature_is_functionlike(func): # pylint: disable=protected-access + is_duck_function = True + else: + # If it's not a pure Python function, and not a duck type + # of pure function: + raise TypeError('{!r} is not a Python function'.format(func)) + + Parameter = cls._parameter_cls # pylint: disable=protected-access + + # Parameter information._partialmethod + func_code = func.__code__ + pos_count = func_code.co_argcount + arg_names = func_code.co_varnames + positional = tuple(arg_names[:pos_count]) + keyword_only_count = func_code.co_kwonlyargcount + keyword_only = arg_names[pos_count:(pos_count + keyword_only_count)] + annotations = func.__annotations__ + defaults = get_default_params(func) + if keyword_only_count == len(defaults): + kwdefaults = dict() + for num, arg_name in enumerate(keyword_only): + kwdefaults[arg_name] = defaults[num] + else: + kwdefaults = func.__kwdefaults__ + if not isinstance(kwdefaults, type(None)): + for key, value in kwdefaults.items(): + if isinstance(value, str): + kwdefaults[key] = '"' + value + '"' + pos_defaults = func.__defaults__ + + if pos_defaults: + pos_default_count = len(pos_defaults) + else: + pos_default_count = 0 + + parameters = [] + + # Non-keyword-only parameters w/o defaults. + non_default_count = pos_count - pos_default_count + for name in positional[:non_default_count]: + annotation = annotations.get(name, inspect._empty) # pylint: disable=protected-access + parameters.append(Parameter(name, annotation=annotation, + kind=inspect._POSITIONAL_OR_KEYWORD)) # pylint: disable=protected-access + + # ... w/ defaults. + for offset, name in enumerate(positional[non_default_count:]): + annotation = annotations.get(name, inspect._empty) # pylint: disable=protected-access + parameters.append(Parameter(name, annotation=annotation, + kind=inspect._POSITIONAL_OR_KEYWORD, # pylint: disable=protected-access + default=defaults[offset])) + + # *args + if func_code.co_flags & inspect.CO_VARARGS: + name = arg_names[pos_count + keyword_only_count] + annotation = annotations.get(name, inspect._empty) # pylint: disable=protected-access + parameters.append(Parameter(name, annotation=annotation, + kind=inspect._VAR_POSITIONAL)) # pylint: disable=protected-access + + # Keyword-only parameters. + for name in keyword_only: + default = inspect._empty # pylint: disable=protected-access + if kwdefaults is not None: + default = kwdefaults.get(name, inspect._empty) # pylint: disable=protected-access + + annotation = annotations.get(name, inspect._empty) # pylint: disable=protected-access + parameters.append(Parameter(name, annotation=annotation, + kind=inspect._KEYWORD_ONLY, # pylint: disable=protected-access + default=default)) + # **kwargs + if func_code.co_flags & inspect.CO_VARKEYWORDS: + index = pos_count + keyword_only_count + if func_code.co_flags & inspect.CO_VARARGS: + index += 1 + + name = arg_names[index] + annotation = annotations.get(name, inspect._empty) # pylint: disable=protected-access + parameters.append(Parameter(name, annotation=annotation, + kind=inspect._VAR_KEYWORD)) # pylint: disable=protected-access + + # Is 'func' is a pure Python function - don't validate the + # parameters list (for correct order and defaults), it should be OK. + return cls(parameters, + return_annotation=annotations.get('return', inspect._empty), # pylint: disable=protected-access + __validate_parameters__=is_duck_function) + + +def _my_signature_from_callable(obj, *, + follow_wrapper_chains=True, + skip_bound_arg=True, + sigcls): + """Private helper function to get signature for arbitrary + callable objects. + """ + + if not callable(obj): + raise TypeError('{!r} is not a callable object'.format(obj)) + + if isinstance(obj, types.MethodType): + # In this case we skip the first parameter of the underlying + # function (usually `self` or `cls`). + sig = _my_signature_from_callable( + obj.__func__, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + + if skip_bound_arg: + return inspect._signature_bound_method(sig) # pylint: disable=protected-access + return sig + + # Was this function wrapped by a decorator? + if follow_wrapper_chains: + obj = inspect.unwrap(obj, stop=(lambda f: hasattr(f, "__signature__"))) + if isinstance(obj, types.MethodType): + # If the unwrapped object is a *method*, we might want to + # skip its first parameter (self). + # See test_signature_wrapped_bound_method for details. + return _my_signature_from_callable( + obj, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + + try: + sig = obj.__signature__ + except AttributeError: + pass + else: + if sig is not None: + if not isinstance(sig, MySignature): + raise TypeError( + 'unexpected object {!r} in __signature__ ' + 'attribute'.format(sig)) + return sig + + try: + partialmethod = obj._partialmethod # pylint: disable=protected-access + except AttributeError: + pass + else: + if isinstance(partialmethod, functools.partialmethod): + # Unbound partialmethod (see functools.partialmethod) + # This means, that we need to calculate the signature + # as if it's a regular partial object, but taking into + # account that the first positional argument + # (usually `self`, or `cls`) will not be passed + # automatically (as for boundmethods) + + wrapped_sig = _my_signature_from_callable( + partialmethod.func, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + + sig = inspect._signature_get_partial(wrapped_sig, partialmethod, (None,)) # pylint: disable=protected-access + first_wrapped_param = tuple(wrapped_sig.parameters.values())[0] + if first_wrapped_param.kind is Parameter.VAR_POSITIONAL: # pylint: disable=no-else-return + # First argument of the wrapped callable is `*args`, as in + # `partialmethod(lambda *args)`. + return sig + else: + sig_params = tuple(sig.parameters.values()) + assert (not sig_params or + first_wrapped_param is not sig_params[0]) + new_params = (first_wrapped_param,) + sig_params + return sig.replace(parameters=new_params) + + if inspect.isfunction(obj) or inspect._signature_is_functionlike(obj): # pylint: disable=protected-access + # If it's a pure Python function, or an object that is duck type + # of a Python function (Cython functions, for instance), then: + return _my_signature_from_function(sigcls, obj) + + if inspect._signature_is_builtin(obj): # pylint: disable=protected-access + return inspect._signature_from_builtin(sigcls, obj, # pylint: disable=protected-access + skip_bound_arg=skip_bound_arg) + + if isinstance(obj, functools.partial): + wrapped_sig = _my_signature_from_callable( + obj.func, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + return inspect._signature_get_partial(wrapped_sig, obj) # pylint: disable=protected-access + + sig = None + if isinstance(obj, type): + # obj is a class or a metaclass + + # First, let's see if it has an overloaded __call__ defined + # in its metaclass + call = inspect._signature_get_user_defined_method(type(obj), '__call__') # pylint: disable=protected-access + if call is not None: + sig = _my_signature_from_callable( + call, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + else: + # Now we check if the 'obj' class has a '__new__' method + new = inspect._signature_get_user_defined_method(obj, '__new__') # pylint: disable=protected-access + if new is not None: + sig = _my_signature_from_callable( + new, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + else: + # Finally, we should have at least __init__ implemented + init = inspect._signature_get_user_defined_method(obj, '__init__') # pylint: disable=protected-access + if init is not None: + sig = _my_signature_from_callable( + init, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + + if sig is None: + # At this point we know, that `obj` is a class, with no user- + # defined '__init__', '__new__', or class-level '__call__' + + for base in obj.__mro__[:-1]: + # Since '__text_signature__' is implemented as a + # descriptor that extracts text signature from the + # class docstring, if 'obj' is derived from a builtin + # class, its own '__text_signature__' may be 'None'. + # Therefore, we go through the MRO (except the last + # class in there, which is 'object') to find the first + # class with non-empty text signature. + try: + text_sig = base.__text_signature__ + except AttributeError: + pass + else: + if text_sig: + # If 'obj' class has a __text_signature__ attribute: + # return a signature based on it + return inspect._signature_fromstr(sigcls, obj, text_sig) # pylint: disable=protected-access + + # No '__text_signature__' was found for the 'obj' class. + # Last option is to check if its '__init__' is + # object.__init__ or type.__init__. + if type not in obj.__mro__: + # We have a class (not metaclass), but no user-defined + # __init__ or __new__ for it + if (obj.__init__ is object.__init__ and # pylint: disable=no-else-return + obj.__new__ is object.__new__): + # Return a signature of 'object' builtin. + return sigcls.from_callable(object) + else: + raise ValueError( + 'no signature found for builtin type {!r}'.format(obj)) + + elif not isinstance(obj, inspect._NonUserDefinedCallables): # pylint: disable=protected-access + # An object with __call__ + # We also check that the 'obj' is not an instance of + # _WrapperDescriptor or _MethodWrapper to avoid + # infinite recursion (and even potential segfault) + call = inspect._signature_get_user_defined_method(type(obj), '__call__') # pylint: disable=protected-access + if call is not None: + try: + sig = _my_signature_from_callable( + call, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + except ValueError as ex: + msg = 'no signature found for {!r}'.format(obj) + raise ValueError(msg) from ex + + if sig is not None: + # For classes and objects we skip the first parameter of their + # __call__, __new__, or __init__ methods + if skip_bound_arg: + return inspect._signature_bound_method(sig) # pylint: disable=protected-access + return sig + + if isinstance(obj, types.BuiltinFunctionType): + # Raise a nicer error message for builtins + msg = 'no signature found for builtin function {!r}'.format(obj) + raise ValueError(msg) + + raise ValueError('callable {!r} is not supported by signature'.format(obj)) + + +class MySignature(inspect.Signature): + + @classmethod + def from_callable(cls, obj, *, follow_wrapped=True): + """Constructs Signature for the given callable object.""" + return _my_signature_from_callable(obj, sigcls=cls, + follow_wrapper_chains=follow_wrapped) + + +def signature(obj, *, follow_wrapped=True): + """Get a signature object for the passed callable.""" + return MySignature.from_callable(obj, follow_wrapped=follow_wrapped) diff --git a/docs/federated/api/source_zh_cn/_templates/classtemplate.rst b/docs/federated/api/source_zh_cn/_templates/classtemplate.rst new file mode 100644 index 0000000000000000000000000000000000000000..fd88815f7b49e1cd25195fc8eceba498eafe780c --- /dev/null +++ b/docs/federated/api/source_zh_cn/_templates/classtemplate.rst @@ -0,0 +1,24 @@ +.. role:: hidden + :class: hidden-section + +.. currentmodule:: {{ module }} + +{% if objname in [] %} +{{ fullname | underline }} + +.. autofunction:: {{ fullname }} +{% elif objname[0].istitle() %} +{{ fullname | underline }} + +.. autoclass:: {{ name }} + :members: + +{% else %} +{{ fullname | underline }} + +.. autofunction:: {{ fullname }} +{% endif %} + +.. + autogenerated from _templates/classtemplate.rst + note it does not have :inherited-members: diff --git a/docs/federated/api/source_zh_cn/_templates/classtemplate_inherited.rst b/docs/federated/api/source_zh_cn/_templates/classtemplate_inherited.rst new file mode 100644 index 0000000000000000000000000000000000000000..8f4a423dca6e678c191df73d142e4e52a862a3db --- /dev/null +++ b/docs/federated/api/source_zh_cn/_templates/classtemplate_inherited.rst @@ -0,0 +1,26 @@ +.. role:: hidden + :class: hidden-section + +.. currentmodule:: {{ module }} + +{% if objname[0].istitle() %} +{{ fullname | underline }} + +.. autoclass:: {{ name }} + :inherited-members: + :members: + +{% elif fullname=="mindspore.numpy.ix_" %} + +mindspore.numpy.ix\_ +==================== + +.. autofunction:: mindspore.numpy.ix_ + +{% else %} +{{ fullname | underline }} + +.. autofunction:: {{ fullname }} +{% endif %} + +.. autogenerated from _templates/classtemplate_inherited.rst \ No newline at end of file diff --git a/docs/federated/api/source_zh_cn/_templates/classtemplate_probability.rst b/docs/federated/api/source_zh_cn/_templates/classtemplate_probability.rst new file mode 100644 index 0000000000000000000000000000000000000000..6329880e1fc540de910b25d1724a2cfba8d501f2 --- /dev/null +++ b/docs/federated/api/source_zh_cn/_templates/classtemplate_probability.rst @@ -0,0 +1,13 @@ +.. role:: hidden + :class: hidden-section + +.. currentmodule:: {{ module }} + +{{ fullname | underline }} + +.. autoclass:: {{ name }} + :members: + +.. + autogenerated from _templates/classtemplate.rst + note it does not have :inherited-members: diff --git a/docs/federated/api/source_zh_cn/conf.py b/docs/federated/api/source_zh_cn/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..b19de0ff1c0edf05f6da3da426218f029582b3fb --- /dev/null +++ b/docs/federated/api/source_zh_cn/conf.py @@ -0,0 +1,398 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# + +import os +import re +import sys +sys.path.append(os.path.abspath('./_ext')) +import sphinx.ext.autosummary.generate as g +from sphinx.ext import autodoc as sphinx_autodoc +from sphinx.util import inspect as sphinx_inspect +from sphinx.domains import python as sphinx_domain_python +from textwrap import dedent +# sys.path.insert(0, os.path.abspath('.')) + +import mindspore +# If you don't want to generate MindArmour APIs, comment this line. +# import mindarmour +# If you don't want to generate MindSpore_Hub APIs, comment this line. +# import mindspore_hub +# If you don't want to generate MindSpore_Serving APIs, comment this line. +# import mindspore_serving + +# -- Project information ----------------------------------------------------- + +project = 'MindSpore' +copyright = '2020, MindSpore' +author = 'MindSpore' + +# The full version, including alpha/beta/rc tags +release = 'master' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx', + 'sphinx.ext.todo', + 'sphinx.ext.coverage', + 'sphinx.ext.napoleon', + 'sphinx.ext.viewcode', + 'sphinx_markdown_tables', + 'myst_parser', +] + +source_suffix = { + '.rst': 'restructuredtext', + '.md': 'markdown', +} + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + +pygments_style = 'sphinx' + +autodoc_inherit_docstrings = False + +autosummary_generate = True + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +html_search_language = 'zh' + +html_search_options = {'dict': '../../resource/jieba.txt'} + +html_static_path = ['_static'] + +# -- Options for Texinfo output ------------------------------------------- + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = { + 'python': ('https://docs.python.org/', '../python_objects.inv'), + 'numpy': ('https://docs.scipy.org/doc/numpy/', '../numpy_objects.inv'), +} + +from typing import List, Tuple +from docutils.nodes import Node + +from sphinx.locale import __ +from sphinx.ext.autosummary import Autosummary, posixpath, addnodes, logger, Matcher, autosummary_toc, get_import_prefixes_from_env +from sphinx.ext.autosummary import mock, StringList, ModuleType, get_documenter, ModuleAnalyzer, PycodeError, mangle_signature +from sphinx.ext.autosummary import import_by_name, extract_summary, autosummary_table, nodes, switch_source_input, rst +from sphinx.ext.autodoc.directive import DocumenterBridge, Options + +class MsAutosummary(Autosummary): + """ + Inherited from sphinx's autosummary, add titles and a column for the generated table. + """ + + def init(self): + """ + init method + """ + self.find_doc_name = "" + self.third_title = "" + self.default_doc = "" + + def extract_env_summary(self, doc: List[str]) -> str: + """Extract env summary from docstring.""" + env_sum = self.default_doc + for i, piece in enumerate(doc): + if piece.startswith(self.find_doc_name): + env_sum = doc[i+1][4:] + return env_sum + + def run(self): + """ + run method + """ + self.init() + self.bridge = DocumenterBridge(self.env, self.state.document.reporter, + Options(), self.lineno, self.state) + + names = [x.strip().split()[0] for x in self.content + if x.strip() and re.search(r'^[~a-zA-Z_]', x.strip()[0])] + items = self.get_items(names) + teble_nodes = self.get_table(items) + + if 'toctree' in self.options: + dirname = posixpath.dirname(self.env.docname) + + tree_prefix = self.options['toctree'].strip() + docnames = [] + excluded = Matcher(self.config.exclude_patterns) + for item in items: + docname = posixpath.join(tree_prefix, item[3]) + docname = posixpath.normpath(posixpath.join(dirname, docname)) + if docname not in self.env.found_docs: + location = self.state_machine.get_source_and_line(self.lineno) + if excluded(self.env.doc2path(docname, None)): + msg = __('autosummary references excluded document %r. Ignored.') + else: + msg = __('autosummary: stub file not found %r. ' + 'Check your autosummary_generate setting.') + logger.warning(msg, item[3], location=location) + continue + docnames.append(docname) + + if docnames: + tocnode = addnodes.toctree() + tocnode['includefiles'] = docnames + tocnode['entries'] = [(None, docn) for docn in docnames] + tocnode['maxdepth'] = -1 + tocnode['glob'] = None + teble_nodes.append(autosummary_toc('', '', tocnode)) + return teble_nodes + + def get_items(self, names: List[str]) -> List[Tuple[str, str, str, str, str]]: + """Try to import the given names, and return a list of + ``[(name, signature, summary_string, real_name, env_summary), ...]``. + """ + prefixes = get_import_prefixes_from_env(self.env) + items = [] # type: List[Tuple[str, str, str, str, str]] + max_item_chars = 50 + + for name in names: + display_name = name + if name.startswith('~'): + name = name[1:] + display_name = name.split('.')[-1] + try: + with mock(self.config.autosummary_mock_imports): + real_name, obj, parent, modname = import_by_name(name, prefixes=prefixes) + except ImportError: + logger.warning(__('failed to import %s'), name) + items.append((name, '', '', name, '')) + continue + + self.bridge.result = StringList() # initialize for each documenter + full_name = real_name + if not isinstance(obj, ModuleType): + # give explicitly separated module name, so that members + # of inner classes can be documented + full_name = modname + '::' + full_name[len(modname) + 1:] + # NB. using full_name here is important, since Documenters + # handle module prefixes slightly differently + doccls = get_documenter(self.env.app, obj, parent) + documenter = doccls(self.bridge, full_name) + + if not documenter.parse_name(): + logger.warning(__('failed to parse name %s'), real_name) + items.append((display_name, '', '', real_name, '')) + continue + if not documenter.import_object(): + logger.warning(__('failed to import object %s'), real_name) + items.append((display_name, '', '', real_name, '')) + continue + if documenter.options.members and not documenter.check_module(): + continue + + # try to also get a source code analyzer for attribute docs + try: + documenter.analyzer = ModuleAnalyzer.for_module( + documenter.get_real_modname()) + # parse right now, to get PycodeErrors on parsing (results will + # be cached anyway) + documenter.analyzer.find_attr_docs() + except PycodeError as err: + logger.debug('[autodoc] module analyzer failed: %s', err) + # no source file -- e.g. for builtin and C modules + documenter.analyzer = None + + # -- Grab the signature + + try: + sig = documenter.format_signature(show_annotation=False) + except TypeError: + # the documenter does not support ``show_annotation`` option + sig = documenter.format_signature() + + if not sig: + sig = '' + else: + max_chars = max(10, max_item_chars - len(display_name)) + sig = mangle_signature(sig, max_chars=max_chars) + + # -- Grab the summary + + documenter.add_content(None) + summary = extract_summary(self.bridge.result.data[:], self.state.document) + env_sum = self.extract_env_summary(self.bridge.result.data[:]) + items.append((display_name, sig, summary, real_name, env_sum)) + + return items + + def get_table(self, items: List[Tuple[str, str, str, str, str]]) -> List[Node]: + """Generate a proper list of table nodes for autosummary:: directive. + + *items* is a list produced by :meth:`get_items`. + """ + table_spec = addnodes.tabular_col_spec() + table_spec['spec'] = r'\X{1}{2}\X{1}{2}' + + table = autosummary_table('') + real_table = nodes.table('', classes=['longtable']) + table.append(real_table) + group = nodes.tgroup('', cols=3) + real_table.append(group) + group.append(nodes.colspec('', colwidth=10)) + group.append(nodes.colspec('', colwidth=70)) + group.append(nodes.colspec('', colwidth=30)) + body = nodes.tbody('') + group.append(body) + + def append_row(*column_texts: str) -> None: + row = nodes.row('', color="red") + source, line = self.state_machine.get_source_and_line() + for text in column_texts: + node = nodes.paragraph('') + vl = StringList() + vl.append(text, '%s:%d:' % (source, line)) + with switch_source_input(self.state, vl): + self.state.nested_parse(vl, 0, node) + try: + if isinstance(node[0], nodes.paragraph): + node = node[0] + except IndexError: + pass + row.append(nodes.entry('', node)) + body.append(row) + + # add table's title + append_row("**API Name**", "**Description**", self.third_title) + for name, sig, summary, real_name, env_sum in items: + qualifier = 'obj' + if 'nosignatures' not in self.options: + col1 = ':%s:`%s <%s>`\\ %s' % (qualifier, name, real_name, rst.escape(sig)) + else: + col1 = ':%s:`%s <%s>`' % (qualifier, name, real_name) + col2 = summary + col3 = env_sum + append_row(col1, col2, col3) + + return [table_spec, table] + + +class MsNoteAutoSummary(MsAutosummary): + """ + Inherited from MsAutosummary. Add a third column about `Note` to the table. + """ + + def init(self): + """ + init method + """ + self.find_doc_name = ".. note::" + self.third_title = "**Note**" + self.default_doc = "None" + + def extract_env_summary(self, doc: List[str]) -> str: + """Extract env summary from docstring.""" + env_sum = self.default_doc + for piece in doc: + if piece.startswith(self.find_doc_name): + env_sum = piece[10:] + return env_sum + + +class MsPlatformAutoSummary(MsAutosummary): + """ + Inherited from MsAutosummary. Add a third column about `Supported Platforms` to the table. + """ + def init(self): + """ + init method + """ + self.find_doc_name = "Supported Platforms:" + self.third_title = "**{}**".format(self.find_doc_name[:-1]) + self.default_doc = "To Be Developed" + + +def setup(app): + app.add_directive('msplatformautosummary', MsPlatformAutoSummary) + app.add_directive('msnoteautosummary', MsNoteAutoSummary) + +# Modify regex for sphinx.ext.autosummary.generate.find_autosummary_in_lines. +gfile_abs_path = os.path.abspath(g.__file__) +autosummary_re_line_old = r"autosummary_re = re.compile(r'^(\s*)\.\.\s+autosummary::\s*')" +autosummary_re_line_new = r"autosummary_re = re.compile(r'^(\s*)\.\.\s+(ms[a-z]*)?autosummary::\s*')" +with open(gfile_abs_path, "r+", encoding="utf8") as f: + data = f.read() + data = data.replace(autosummary_re_line_old, autosummary_re_line_new) + f.seek(0) + f.write(data) + +# Modify default signatures for autodoc. +autodoc_source_path = os.path.abspath(sphinx_autodoc.__file__) +inspect_source_path = os.path.abspath(sphinx_inspect.__file__) +autodoc_source_re = re.compile(r"(\s+)args = self\.format_args\(\*\*kwargs\)") +inspect_source_code_str = """signature = inspect.signature(subject)""" +inspect_target_code_str = """signature = my_signature.signature(subject)""" +autodoc_source_code_str = """args = self.format_args(**kwargs)""" +is_autodoc_code_str = """args = args.replace("'", "")""" +with open(autodoc_source_path, "r+", encoding="utf8") as f: + code_str = f.read() + if is_autodoc_code_str not in code_str: + code_str_lines = code_str.split("\n") + autodoc_target_code_str = None + for line in code_str_lines: + re_matched_str = autodoc_source_re.search(line) + if re_matched_str: + space_num = re_matched_str.group(1) + autodoc_target_code_str = dedent("""\ + {0} + {1}if type(args) != type(None): + {1} {2}""".format(autodoc_source_code_str, space_num, is_autodoc_code_str)) + break + if autodoc_target_code_str: + code_str = code_str.replace(autodoc_source_code_str, autodoc_target_code_str) + f.seek(0) + f.truncate() + f.write(code_str) +with open(inspect_source_path, "r+", encoding="utf8") as g: + code_str = g.read() + if inspect_target_code_str not in code_str: + code_str = code_str.replace(inspect_source_code_str, inspect_target_code_str) + if "import my_signature" not in code_str: + code_str = code_str.replace("import sys", "import sys\nimport my_signature") + g.seek(0) + g.truncate() + g.write(code_str) + +# remove extra space for default params for autodoc. +sphinx_domain_python_source_path = os.path.abspath(sphinx_domain_python.__file__) +python_code_source = """for argument in arglist.split(','):""" +python_code_target = """for argument in [" " + i if num > 1 else i for num,i in enumerate(arglist.split(", "))]:""" +with open(sphinx_domain_python_source_path, "r+", encoding="utf8") as f: + code_str = f.read() + if python_code_target not in code_str: + code_str = code_str.replace(python_code_source, python_code_target) + f.seek(0) + f.truncate() + f.write(code_str) diff --git a/docs/federated/api/source_zh_cn/federated_client.rst b/docs/federated/api/source_zh_cn/federated_client.rst new file mode 100644 index 0000000000000000000000000000000000000000..5667ba1c3f45a1baf104b7b7d65cdd3aa78b7804 --- /dev/null +++ b/docs/federated/api/source_zh_cn/federated_client.rst @@ -0,0 +1,8 @@ +.. toctree:: + :maxdepth: 1 + + java_api_flparameter + java_api_syncfljob + interface_description_federated_client + + diff --git a/docs/federated/api/source_zh_cn/federated_server.rst b/docs/federated/api/source_zh_cn/federated_server.rst new file mode 100644 index 0000000000000000000000000000000000000000..67fe1fc061e60e9aaf5b29beee3a4e028ea9d5ab --- /dev/null +++ b/docs/federated/api/source_zh_cn/federated_server.rst @@ -0,0 +1,9 @@ +mindspore.context.set_fl_context +-------------------------------- + +.. autofunction:: mindspore.context.set_fl_context + +mindspore.context.get_fl_context +-------------------------------- + +.. autofunction:: mindspore.context.get_fl_context diff --git a/docs/federated/api/source_zh_cn/index.rst b/docs/federated/api/source_zh_cn/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..62633cea1072a400d3f5203f8de88c3c92581a02 --- /dev/null +++ b/docs/federated/api/source_zh_cn/index.rst @@ -0,0 +1,13 @@ +.. MindSpore documentation master file, created by + sphinx-quickstart on Thu Mar 24 11:00:00 2020. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +接口参考 +======== + +.. toctree:: + :maxdepth: 1 + + federated_client + federated_server diff --git a/docs/federated/api/source_zh_cn/interface_description_federated_client.md b/docs/federated/api/source_zh_cn/interface_description_federated_client.md new file mode 100644 index 0000000000000000000000000000000000000000..3aabda4e3587f0cd6416665fdc3e71d13ef1418b --- /dev/null +++ b/docs/federated/api/source_zh_cn/interface_description_federated_client.md @@ -0,0 +1,251 @@ +# 使用示例 + + + +- [使用示例](#使用示例) + - [联邦学习启动接口flJobRun()](#联邦学习启动接口fljobrun) + - [多条数据输入推理接口modelInference()](#多条数据输入推理接口modelinference) + - [输入参数列表](#输入参数列表) + - [获取云侧最新模型接口getModel ()](#获取云侧最新模型接口getmodel-) + + + + + +## 联邦学习启动接口flJobRun() + +调用flJobRun()接口前,需先实例化参数类FLParameter,进行相关参数设置, 相关参数如下: + +| 参数名称 | 参数类型 | 是否必须 | 描述信息 | 备注 | +| -------------- | -------- | -------- | ----------------------------------------------------------- | ------------------------------------------------------------ | +| trainDataset | String | Y | 训练数据集路径 | 情感分类任务是训练数据txt文件格式;图片分类任务是训练data.bin文件与label.bin文件用逗号拼接 | +| vocabFile | String | Y | 数据预处理的词典文件路径 | 情感分类任务必须设置;图片分类任务不需要设置该参数,默认为null | +| idsFile | String | Y | 词典的映射id文件路径 | 情感分类任务必须设置;图片分类任务不需要设置该参数,默认为null | +| testDataset | String | N | 测试数据集路径 | 1. 图片分类任务不需要设置该参数,默认为null;情感分类任务不设置该参数代表训练过程中不进行验证
2.情感分类任务是测试数据txt文件格式;图片分类任务是测试data.bin文件与label.bin文件用逗号拼接 | +| flName | String | Y | 联邦学习使用的模型名称 | 情感分类任务需设置为”adbert“; lenet场景需设置为”lenet“ | +| trainModelPath | String | Y | 联邦学习使用的训练模型路径,为.ms文件的绝对路径 | | +| inferModelPath | String | Y | 联邦学习使用的推理模型路径,为.ms文件的绝对路径 | 情感分类任务必须设置;图片分类任务可设置为与trainModelPath相同 | +| flID | String | Y | 用于唯一标识客户端的ID | | +| ip | String | Y | Server端所启动服务的ip地址,形如“10.113.216.106” | 后期ip+port会改为域名 | +| port | int | Y | Server端所启动服务的端口号 | 后期ip+port会改为域名 | +| useHttps | boolean | N | 端云通信是否进行https通信 | 设置为false, 进行http通信;设置为true,进行https通信;默认为false | +| useSSL | boolean | N | 端云通信是否进行ssl证书认证,ssl证书认证只在https通信中使用 | 设置为false, 不进行ssl证书认证;设置为true,进行ssl证书认证;默认为false | + +注意useSSL设置为true时只支持https通信,以上参数中`useHttps`必须设置为`true`,且还需对以下参数进行设置: + +```java +FLParameter flParameter = FLParameter.getInstance(); +String hostName = "10.113.216.106"; +String certPath = "client.crt"; // 给出证书绝对路径 +flParameter.setHostName(hostName); +flParameter.setCertPath(certPath); +``` + +创建SyncFLJob对象,并通过SyncFLJob类的flJobRun()方法启动同步联邦学习任务。 + +示例代码如下: + +1. 情感分类任务示例代码 + + ```java + // set parameters + String trainDataset = "SyncFLClient0604/data/adbert/client/0.txt"; //绝对路径 + String vocal_file = "SyncFLClient0604/data/adbert/vocab.txt"; //绝对路径 + String idsFile = "SyncFLClient0604/data/adbert/vocab_map_ids.txt"; //绝对路径 + String testDataset = "SyncFLClient0604/data/adbert/eval/eval.txt"; //绝对路径, 若不包含单独的测试集, 可使用训练数据作为测试集, 或不进行测试(不设置该参数) + String flName = "adbert"; + String trainModelPath = "SyncFLClient0604/ms/adbert/albert_ad_train.mindir.ms"; //绝对路径 + String inferModelPath = "SyncFLClient0604/ms/adbert/albert_ad_infer.mindir.ms"; //绝对路径 + String flID = UUID.randomUUID().toString(); + String ip = "10.113.216.106"; + int port = 6668; + boolean useHttps = false; + boolean useSSL = false; + + FLParameter flParameter = FLParameter.getInstance(); + flParameter.setTrainDataset(trainDataset); + flParameter.setVocabFile(vocabFile); + flParameter.setIdsFile(idsFile); + flParameter.setTestDataset(testDataset); + flParameter.setFlName(flName); + flParameter.setTrainModelPath(trainModelPath); + flParameter.setInferModelPath(inferModelPath); + flParameter.setFlID(flID); + flParameter.setIp(ip); + flParameter.setPort(port); + flParameter.setUseHttps(useHttps); + flParameter.setUseSSL(useSSL); + + // start FLJob + SyncFLJob syncFLJob = new SyncFLJob(); + syncFLJob.flJobRun(); + ``` + +2. Lenet图片分类任务示例代码 + + ```java + // set parameters + String trainDataset = "SyncFLClient0604/data/3500_clients_bin/f0178_39/f0178_39_bn_9_train_data.bin,SyncFLClient0604/data/3500_clients_bin/f0178_39/f0178_39_bn_9_train_label.bin"; //绝对路径 + String testDataset = "SyncFLClient0604/data/3500_clients_bin/f0178_39/f0178_39_bn_1_test_data.bin,SyncFLClient0604/data/3500_clients_bin/f0178_39/f0178_39_bn_1_test_label.bin"; //绝对路径, 若不包含单独的测试集, 可使用训练数据作为测试集, 或不进行测试(不设置该参数) + String flName = "lenet"; + String trainModelPath = "SyncFLClient0604/lenet_train.mindir0.ms"; //绝对路径 + String inferModelPath = "SyncFLClient0604/lenet_train.mindir0.ms"; //绝对路径 + String flID = UUID.randomUUID().toString(); + String ip = "10.113.216.106"; + int port = 6668; + boolean useHttps = false; + boolean useSSL = false; + + FLParameter flParameter = FLParameter.getInstance(); + flParameter.setTrainDataset(trainDataset); + flParameter.setTestDataset(testDataset); + flParameter.setFlName(flName); + flParameter.setTrainModelPath(trainModelPath); + flParameter.setInferModelPath(inferModelPath); + flParameter.setFlID(flID); + flParameter.setIp(ip); + flParameter.setPort(port); + flParameter.setUseHttps(useHttps); + flParameter.setUseSSL(useSSL); + + // start FLJob + SyncFLJob syncFLJob = new SyncFLJob(); + syncFLJob.flJobRun(); + ``` + +## 多条数据输入推理接口modelInference() + +### 输入参数列表 + +| 参数名称 | 参数类型 | 是否必须 | 描述信息 | 适应API版本 | +| --------- | -------- | -------- | ----------------------------------------- | ----------------------------------------------------------- | +| flName | String | Y | 联邦学习使用的模型名称 | 情感分类任务需设置为”adbert“; 图片分类任务需设置为”lenet“ | +| dataPath | String | Y | 数据集路径 | 情感分类任务为txt文档格式; 图片分类任务为bin文件格式 | +| vocabFile | String | Y | 数据预处理的词典文件路径 | 情感分类任务必须设置;图片分类任务设置为null | +| idsFile | String | Y | 词典的映射id文件路径 | 情感分类任务必须设置;图片分类任务设置为null | +| modelPath | String | Y | 联邦学习推理模型路径,为.ms文件的绝对路径 | 设置为false, 不进行ssl证书认证;设置为true,进行ssl证书认证 | + +创建SyncFLJob对象,并通过SyncFLJob类的modelInference()方法启动端侧推理任务,返回推理的标签数组。 + +示例代码如下: + +1. 情感分类任务示例代码 + + ```java + // set parameters + String flName = "adbert"; + String dataPath = "SyncFLClient0604/data/adbert/eval/eval.txt"; //绝对路径 + String vocal_file = "SyncFLClient0604/data/adbert/vocab.txt"; //绝对路径 + String idsFile = "SyncFLClient0604/data/adbert/vocab_map_ids.txt"; //绝对路径 + String modelPath = "SyncFLClient0604/ms/adbert/albert_ad_infer.mindir.ms"; //绝对路径 + + // inference + SyncFLJob syncFLJob = new SyncFLJob(); + int[] labels = syncFLJob.modelInference(flName, dataPath, vocal_file, idsFile, modelPath); + ``` + +2. Lenet图片分类示例代码 + + ```java + // set parameters + String flName = "lenet"; + String dataPath = "/SyncFLClient0604/data/3500_clients_bin/f0178_39/f0178_39_bn_1_test_data.bin,/SyncFLClient0604/data/3500_clients_bin/f0178_39/f0178_39_bn_1_test_label.bin"; //绝对路径 + String vocal_file = "null"; //绝对路径 + String idsFile = "null"; //绝对路径 + String modelPath = "SyncFLClient0604/lenet_train.mindir0.ms"; //绝对路径 + + // inference + SyncFLJob syncFLJob = new SyncFLJob(); + int[] labels = syncFLJob.modelInference(flName, dataPath, vocal_file, idsFile, modelPath); + ``` + +## 获取云侧最新模型接口getModel () + +调用getModel()接口前,需先实例化参数类FLParameter,进行相关参数设置, 相关参数如下: + +| 参数名称 | 参数类型 | 是否必须 | 描述信息 | 备注 | +| -------------- | -------- | -------- | ------------------------------------------------------------ | ------------------------------------------------------------ | +| flName | String | Y | 联邦学习使用的模型名称 | 情感分类任务需设置为”adbert“; lenet场景需设置为”lenet“ | +| trainModelPath | String | Y | 联邦学习使用的训练模型路径,为.ms文件的绝对路径 | | +| inferModelPath | String | Y | 联邦学习使用的推理模型路径,为.ms文件的绝对路径 | 情感分类任务必须设置;图片分类任务可设置为与trainModelPath相同 | +| ip | String | Y | Server端所启动服务的ip地址,形如“10.113.216.106” | 后期ip+port会改为域名 | +| port | int | Y | Server端所启动服务的端口号 | 后期ip+port会改为域名 | +| useHttps | boolean | N | 端云通信是否进行https通信 | 设置为false, 进行http通信;设置为true,进行https通信;默认为false | +| useSSL | boolean | N | 端云通信是否进行ssl证书认证,ssl证书认证只在https通信中使用 | 设置为false, 不进行ssl证书认证;设置为true,进行ssl证书认证;默认为false | +| useElb | boolean | Y | 用于设置是否模拟弹性负载均衡,true代表客户端会将请求随机发给一定范围内的server地址, false客户端的请求会发给固定的server地址。 | | +| serverNum | int | Y | 用于设置模拟弹性负载均衡时可发送请求的server数量,需与云侧启动server数量一致。 | | + +注意useSSL设置为true时只支持https通信,以上参数中`useHttps`必须设置为`true`,且还需对以下参数进行设置: + +```java +FLParameter flParameter = FLParameter.getInstance(); +String hostName = "10.113.216.106"; +String certPath = "client.crt"; // 给出证书绝对路径 + +flParameter.setHostName(hostName); +flParameter.setCertPath(certPath); +``` + +创建SyncFLJob对象,并通过SyncFLJob类的getModel()方法启动异步推理任务,返回getModel请求状态码。 + +示例代码如下: + +1. 情感分类任务版本 + + ```java + // set parameters + String flName = "adbert"; // 情感分类任务场景需设置为"adbert", lenet图片分类任务场景需设置为"lenet" + String trainModelPath = "SyncFLClient0604/ms/adbert/albert_ad_train.mindir.ms"; //绝对路径 + String inferModelPath = "SyncFLClient0604/ms/adbert/albert_ad_infer.mindir.ms"; //绝对路径 + String ip = "10.113.216.106"; + int port = 6668; + boolean useHttps = false; + boolean useSSL = false; + boolean useElb = false; + int serverNum = 1; + + FLParameter flParameter = FLParameter.getInstance(); + flParameter.setFlName(flName); + flParameter.setTrainModelPath(trainModelPath); + flParameter.setInferModelPath(inferModelPath); + flParameter.setIp(ip); + flParameter.setPort(port); + flParameter.setUseHttps(useHttps); + flParameter.setUseSSL(useSSL); + flParameter.setUseElb(useElb); + flParameter.setServerNum(serverNum); + + // getModel + SyncFLJob syncFLJob = new SyncFLJob(); + syncFLJob.getModel(); + ``` + +2. Lenet图片分类任务版本 + + ```java + // set parameters + String flName = "lenet"; // 情感分类任务场景需设置为"adbert", lenet场景需设置为"lenet" + String trainModelPath = "SyncFLClient0604/lenet_train.mindir0.ms"; //绝对路径 + String inferModelPath = "SyncFLClient0604/lenet_train.mindir0.ms"; //绝对路径 + String ip = "10.113.216.106"; + int port = 6668 + boolean useHttps = false; + boolean useSSL = false; + boolean useElb = false; + int serverNum = 1; + + FLParameter flParameter = FLParameter.getInstance(); + flParameter.setFlName(flName); + flParameter.setTrainModelPath(trainModelPath); + flParameter.setInferModelPath(inferModelPath); + flParameter.setIp(ip); + flParameter.setPort(port); + flParameter.setUseHttps(useHttps); + flParameter.setUseSSL(useSSL); + flParameter.setUseElb(useElb); + flParameter.setServerNum(serverNum); + + // getModel + SyncFLJob syncFLJob = new SyncFLJob(); + syncFLJob.getModel(); + ``` diff --git a/docs/federated/api/source_zh_cn/java_api_flparameter.md b/docs/federated/api/source_zh_cn/java_api_flparameter.md new file mode 100644 index 0000000000000000000000000000000000000000..e9f08832aa4f04c92cea278df11be99ff6dfcf8e --- /dev/null +++ b/docs/federated/api/source_zh_cn/java_api_flparameter.md @@ -0,0 +1,551 @@ +# FLParaMeter + + + +- [FLParaMeter](#flparameter) + - [公有成员函数](#公有成员函数) + - [getInstance](#getinstance) + - [getHostName](#gethostname) + - [setHostName](#sethostname) + - [getCertPath](#getcertpath) + - [setCertPath](#setcertpath) + - [isUseHttps](#isusehttps) + - [setUseHttps](#setusehttps) + - [getTrainDataset](#gettraindataset) + - [setTrainDataset](#settraindataset) + - [getVocabFile](#getvocabfile) + - [setVocabFile](#setvocabfile) + - [getIdsFile](#getidsfile) + - [setIdsFile](#setidsfile) + - [getTestDataset](#gettestdataset) + - [setTestDataset](#settestdataset) + - [getFlName](#getflname) + - [setFlName](#setflname) + - [getTrainModelPath](#gettrainmodelpath) + - [setTrainModelPath](#settrainmodelpath) + - [getInferModelPath](#getinfermodelpath) + - [setInferModelPath](#setinfermodelpath) + - [getIp](#getip) + - [setIp](#setip) + - [isUseSSL](#isusessl) + - [setUseSSL](#setusessl) + - [getPort](#getport) + - [setPort](#setport) + - [getTimeOut](#gettimeout) + - [setTimeOut](#settimeout) + - [getSleepTime](#getsleeptime) + - [setSleepTime](#setsleeptime) + - [isUseElb](#isuseelb) + - [setUseElb](#setuseelb) + - [getServerNum](#getservernum) + - [setServerNum](#setservernum) + - [isPkiVerify](#ispkiverify) + - [setPkiVerify](#setpkiverify) + - [getClientID](#getclientid) + - [setClientID](#setclientid) + + + + + +```java +import com.huawei.flclient.FLParameter +``` + +FLParameter定义联邦学习相关参数,供用户进行设置。 + +## 公有成员函数 + +| **function** | +| ---------------------------------------------------- | +| public static synchronized FLParameter getInstance() | +| public String getHostName() | +| public void setHostName(String hostName) | +| public String getCertPath() | +| public void setCertPath(String certPath) | +| public boolean isUseHttps() | +| public void setUseHttps(boolean useHttps) | +| public String getTrainDataset() | +| public void setTrainDataset(String trainDataset) | +| public String getVocabFile() | +| public void setVocabFile(String vocabFile) | +| public String getIdsFile() | +| public void setIdsFile(String idsFile) | +| public String getTestDataset() | +| public void setTestDataset(String testDataset) | +| public String getFlName() | +| public void setFlName(String flName) | +| public String getTrainModelPath() | +| public void setTrainModelPath(String trainModelPath) | +| public String getInferModelPath() | +| public void setInferModelPath(String inferModelPath) | +| public String getIp() | +| public void setIp(String ip) | +| public boolean isUseSSL() | +| public void setUseSSL(boolean useSSL) | +| public int getPort() | +| public void setPort(int port) | +| public int getTimeOut() | +| public void setTimeOut(int timeOut) | +| public int getSleepTime() | +| public void setSleepTime(int sleepTime) | +| public boolean isUseElb() | +| public void setUseElb(boolean useElb) | +| public int getServerNum() | +| public void setServerNum(int serverNum) | +| public boolean isPkiVerify() | +| public void setPkiVerify(boolean pkiVerify) | +| public String getClientID() | +| public void setClientID(String clientID) | + +## getInstance + +```java +public static synchronized FLParameter getInstance() +``` + +获取FLParameter单例。 + +- 返回值 + + FLParameter类型的单例对象。 + +## getHostName + +```java +public String getHostName() +``` + +获取用户设置的域名hostName。 + +- 返回值 + + String类型的域名。 + +## setHostName + +```java +public void setHostName(String hostName) +``` + +用于设置域名hostName。 + +- 参数 + + - `hostName`: 域名。 + +## getCertPath + +```java +public String getCertPath() +``` + +获取用户设置的证书路径certPath。 + +- 返回值 + + String类型的证书路径certPath。 + +## setCertPath + +```java +public void setCertPath(String certPath) +``` + +用于设置证书路径certPath。 + +- 参数 + - `certPath`: 证书路径。 + +## isUseHttps + +```java +public boolean isUseHttps() +``` + +端云通信是否是否采用https通信方式。 + +- 返回值 + + boolean类型,true代表进行https通信, false代表进行http通信,默认值为false,目前云侧暂不支持https通信。 + +## setUseHttps + +```java + public void setUseHttps(boolean useHttps) +``` + +用于设置端云通信是否采用https通信方式。 + +- 参数 + - `useHttps`: 是否采用https通信方式。 + +## getTrainDataset + +```java +public String getTrainDataset() +``` + +获取用户设置的训练数据集路径trainDataset。 + +- 返回值 + + String类型的训练数据集路径trainDataset。 + +## setTrainDataset + +```java +public void setTrainDataset(String trainDataset) +``` + +用于设置训练数据集路径trainDataset。 + +- 参数 + - `trainDataset`: 训练数据集路径。 + +## getVocabFile + +```java +public String getVocabFile() +``` + +用于获取用户设置的数据预处理的词典文件路径vocabFile。 + +- 返回值 + + String类型的数据预处理的词典文件路径vocabFile。 + +## setVocabFile + +```java +public void setVocabFile(String vocabFile) +``` + +设置数据预处理的词典文件路径VocabFile。 + +- 参数 + - `vocabFile`: 数据预处理的词典文件路径。 + +## getIdsFile + +```java +public String getIdsFile() +``` + +用于获取用户设置的词典的映射id文件路径idsFile。 + +- 返回值 + + String类型的词典的映射id文件路径idsFile。 + +## setIdsFile + +```java +public void setIdsFile(String idsFile) +``` + +设置词典的映射id文件路径idsFile。 + +- 参数 + + - `vocabFile`: 词典的映射id文件路径。 + +## getTestDataset + +```java +public String getTestDataset() +``` + +用于获取用户设置的测试数据集路径testDataset。 + +- 返回值 + + String类型的测试数据集路径testDataset。 + +## setTestDataset + +```java +public void setTestDataset(String testDataset) +``` + +设置测试数据集路径testDataset。 + +- 参数 + - `testDataset`: 测试数据集路径。 + +## getFlName + +```java +public String getFlName() +``` + +用于获取用户设置的模型名称flName。 + +- 返回值 + + String类型的模型名称flName。 + +## setFlName + +```java +public void setFlName(String flName) +``` + +设置模型名称flName。 + +- 参数 + - `flName`: 模型名称。 + +## getTrainModelPath + +```java +public String getTrainModelPath() +``` + +用于获取用户设置的训练模型路径trainModelPath。 + +- 返回值 + + String类型的训练模型路径trainModelPath。 + +## setTrainModelPath + +```java +public void setTrainModelPath(String trainModelPath) +``` + +设置训练模型路径trainModelPath。 + +- 参数 + - `flName`: 训练模型路径。 + +## getInferModelPath + +```java +public String getInferModelPath() +``` + +用于获取用户设置的推理模型路径inferModelPath。 + +- 返回值 + + String类型的推理模型路径inferModelPath。 + +## setInferModelPath + +```java +public void setInferModelPath(String inferModelPath) +``` + +设置推理模型路径inferModelPath。 + +- 参数 + - `inferModelPath`: 推理模型路径。 + +## getIp + +```java +public String getIp() +``` + +用于获取用户设置的端云通信的ip地址。 + +- 返回值 + + String类型的ip地址。 + +## setIp + +```java +public void setIp(String ip) +``` + +设置端云通信的ip地址。 + +- 参数 + - `ip`: 端云通信的ip地址。 + +## isUseSSL + +```java +public boolean isUseSSL() +``` + +端云通信是否进行ssl证书认证。 + +- 返回值 + + boolean类型,true代表进行ssl证书认证, false代表不进行ssl证书认证。 + +## setUseSSL + +```java +public void setUseSSL(boolean useSSL) +``` + +用于设置端云通信是否进行ssl证书认证,ssl证书认证只用于https通信场景。 + +- 参数 + - `useSSL`: 端云通信是否进行ssl证书认证。 + +## getPort + +```java +public int getPort() +``` + +用于获取用户设置的端云通信的端口号port。 + +- 返回值 + + int类型的端云通信的端口号port。 + +## setPort + +```java +public void setPort(int port) +``` + +用于设置端云通信的端口号port。 + +- 参数 + - `port`: 端云通信的端口号。 + +## getTimeOut + +```java +public int getTimeOut() +``` + +用于获取用户设置的端侧通信的超时时间timeOut。 + +- 返回值 + + int类型的端侧通信的超时时间timeOut。 + +## setTimeOut + +```java +public void setTimeOut(int timeOut) +``` + +用于设置端侧通信的超时时间timeOut。 + +- 参数 + - `timeOut`: 端侧通信的超时时间。 + +## getSleepTime + +```java +public int getSleepTime() +``` + +用于获取用户设置的重复请求的等待时间sleepTime。 + +- 返回值 + + int类型的重复请求的等待时间sleepTime。 + +## setSleepTime + +```java +public void setSleepTime(int sleepTime) +``` + +用于设置重复请求的等待时间sleepTime。 + +- 参数 + - `sleepTime`: 重复请求的等待时间。 + +## isUseElb + +```java +public boolean isUseElb() +``` + +是否模拟弹性负载均衡,即客户端将请求随机发给一定范围内的server地址。 + +- 返回值 + + boolean类型,true代表客户端会将请求随机发给一定范围内的server地址, false客户端的请求会发给固定的server地址。 + +## setUseElb + +```java +public void setUseElb(boolean useElb) +``` + +用于设置是否模拟弹性负载均衡,即客户端将请求随机发给一定范围内的server地址。 + +- 参数 + - `useElb`: 是否模拟弹性负载均衡,默认为false。 + +## getServerNum + +```java +public int getServerNum() +``` + +用于获取用户设置的模拟弹性负载均衡时可发送请求的server数量。 + +- 返回值 + + int类型的模拟弹性负载均衡时可发送请求的server数量。 + +## setServerNum + +```java +public void setServerNum(int serverNum) +``` + +用于设置模拟弹性负载均衡时可发送请求的server数量。 + +- 参数 + - `serverNum`: 模拟弹性负载均衡时可发送请求的server数量,默认为1。 + +## isPkiVerify + +```java +public boolean isPkiVerify() +``` + +是否进行端云认证。 + +- 返回值 + + boolean类型,true代表进行端云认证,false代表不进行端云认证。 + +## setPkiVerify + +```java +public void setPkiVerify(boolean pkiVerify) +``` + +用于设置是否进行端云认证。 + +- 参数 + + - `pkiVerify`: 是否进行端云认证。 + +## getClientID + +```java +public String getClientID() +``` + +用于获取用户设置的唯一标识客户端的ID。 + +- 返回值 + + String类型的唯一标识客户端的ID。 + +## setClientID + +```java +public void setClientID(String clientID) +``` + +用于设置唯一标识客户端的ID。 + +- 参数 + - `clientID`: 唯一标识客户端的ID。 diff --git a/docs/federated/api/source_zh_cn/java_api_syncfljob.md b/docs/federated/api/source_zh_cn/java_api_syncfljob.md new file mode 100644 index 0000000000000000000000000000000000000000..ae3c222b04cd8c9450e38284951e04ab9516e26d --- /dev/null +++ b/docs/federated/api/source_zh_cn/java_api_syncfljob.md @@ -0,0 +1,67 @@ +# SyncFLJob + + + +- [SyncFLJob](#syncfljob) + - [公有成员函数](#公有成员函数) + - [flJobRun](#fljobrun) + - [modelInference](#modelinference) + - [getModel](#getmodel) + + + + + +```java +import com.huawei.flclient.SyncFLJob +``` + +SyncFLJob定义了端侧联邦学习启动接口flJobRun()、端侧推理接口modelInference()、获取云侧最新模型的接口getModel ()。 + +## 公有成员函数 + +| **function** | +| ------------------------------------------------------------ | +| public void flJobRun() | +| public int[] modelInference(String flName, String dataPath, String vocabFile, String idsFile, String modelPath) | +| public FLClientStatus getModel(String ip, int port, String flName, String trainModelPath, String inferModelPath, boolean useSSL) | + +## flJobRun + +```java +public void flJobRun() +``` + +启动端侧联邦学习任务,具体使用方法可参考[接口介绍文档](https://gitee.com/mindspore/docs/blob/master/docs/federated/api/source_zh_cn/interface_description_federated_client.md)。 + +## modelInference + +```java +public int[] modelInference(String flName, String dataPath, String vocabFile, String idsFile, String modelPath) +``` + +启动端侧推理任务。 + +- 参数 + + - `flName`: 联邦学习使用的模型名称, 情感分类任务需设置为”adbert“; 图片分类任务需设置为”lenet“。 + - `dataPath`: 数据集路径,情感分类任务为txt文档格式; 图片分类任务为bin文件格式。 + - `vocabFile`: 数据预处理的词典文件路径, 情感分类任务必须设置;图片分类任务设置为null。 + - `idsFile`: 词典的映射id文件路径, 情感分类任务必须设置;图片分类任务设置为null。 + - `modelPath`: 联邦学习推理模型路径,为.ms文件的绝对路径。 + +- 返回值 + + 根据输入推理出的标签组成的int[]。 + +## getModel + +```java +public FLClientStatus getModel() +``` + +获取云侧最新模型,具体使用方法可参考[接口介绍文档](https://gitee.com/mindspore/docs/blob/master/docs/federated/api/source_zh_cn/interface_description_federated_client.md)。 + +- 返回值 + + 返回getModel请求状态码。 diff --git a/docs/federated/docs/Makefile b/docs/federated/docs/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1eff8952707bdfa503c8d60c1e9a903053170ba2 --- /dev/null +++ b/docs/federated/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source_zh_cn +BUILDDIR = build_zh_cn + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/federated/docs/requirements.txt b/docs/federated/docs/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..96cdfc3e0c7ee0ae6a01e59c1081111fdc792bb6 --- /dev/null +++ b/docs/federated/docs/requirements.txt @@ -0,0 +1,5 @@ +sphinx >= 2.2.1, <= 2.4.4 +myst_parser == 0.14.0 +sphinx-markdown-tables +sphinx_rtd_theme +jieba \ No newline at end of file diff --git a/docs/federated/docs/source_zh_cn/_ext/__pycache__/my_signature.cpython-37.pyc b/docs/federated/docs/source_zh_cn/_ext/__pycache__/my_signature.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..516af30c9a197f89a41eb556f5f38d22e72d6668 Binary files /dev/null and b/docs/federated/docs/source_zh_cn/_ext/__pycache__/my_signature.cpython-37.pyc differ diff --git a/docs/federated/docs/source_zh_cn/_ext/my_signature.py b/docs/federated/docs/source_zh_cn/_ext/my_signature.py new file mode 100644 index 0000000000000000000000000000000000000000..4f6a22dc9d8b7ef72aeb429f9124c2dabb1577d6 --- /dev/null +++ b/docs/federated/docs/source_zh_cn/_ext/my_signature.py @@ -0,0 +1,358 @@ +""" +Rewrote the Signature module that fix default signature error for autodoc module. +""" + +import inspect +import re +import types +import functools + + +def _sort_param(param_list, target_str): + """Sort param_list as default order.""" + ls = [] + for param_name in param_list: + ls.append((param_name, target_str.find(param_name))) + ls.sort(key=lambda x: x[1], reverse=False) + ls = [i[0] for i in ls] + return ls + + +def get_default_params(func): + """ Get the default signatures from function. """ + source_code = inspect.getsource(func) + func_code = func.__code__ + pos_count = func_code.co_argcount + arg_names = func_code.co_varnames + karg_pos = func_code.co_kwonlyargcount + kwargs_num = arg_names.count("args") + arg_names.count("kwargs") + all_param_names = list(arg_names[:pos_count+karg_pos+kwargs_num]) + all_params = re.findall(r"def [\w_\d\-]+\(([\S\s]*?)\):", source_code)[0].replace("\n", "").replace("'", "\"") + + # sub null spaces from matched all param str. + re_space_sub = re.compile(r",\s+") + all_params = re_space_sub.sub(",", all_params) + + all_param_names = _sort_param(all_param_names, all_params) + + # sub the extra "=" from param. + re_equate_sub = re.compile("=") + + re_defaults_param = re.compile(r"(.*?)".join(all_param_names) + r"(.*)") + defaults_params = re_defaults_param.findall(all_params) + if defaults_params: + if isinstance(defaults_params[0], tuple): + defaults_params = list([i[:-2] if i[-2:] == "**" else i for i in defaults_params[0]]) + defaults_params_list = [] + for i in defaults_params: + if "=" in i and i: + i = re_equate_sub.sub("", i, count=1).strip(",") + if i[:6] == "lambda": + i = "<" + i + ">" + defaults_params_list.append(i) + defaults_params_tuple = tuple(defaults_params_list) + return defaults_params_tuple + return func.__defaults__ + + +def _my_signature_from_function(cls, func): + """Private helper: constructs Signature for the given python function.""" + + is_duck_function = False + if not inspect.isfunction(func): + if inspect._signature_is_functionlike(func): # pylint: disable=protected-access + is_duck_function = True + else: + # If it's not a pure Python function, and not a duck type + # of pure function: + raise TypeError('{!r} is not a Python function'.format(func)) + + Parameter = cls._parameter_cls # pylint: disable=protected-access + + # Parameter information._partialmethod + func_code = func.__code__ + pos_count = func_code.co_argcount + arg_names = func_code.co_varnames + positional = tuple(arg_names[:pos_count]) + keyword_only_count = func_code.co_kwonlyargcount + keyword_only = arg_names[pos_count:(pos_count + keyword_only_count)] + annotations = func.__annotations__ + defaults = get_default_params(func) + if keyword_only_count == len(defaults): + kwdefaults = dict() + for num, arg_name in enumerate(keyword_only): + kwdefaults[arg_name] = defaults[num] + else: + kwdefaults = func.__kwdefaults__ + if not isinstance(kwdefaults, type(None)): + for key, value in kwdefaults.items(): + if isinstance(value, str): + kwdefaults[key] = '"' + value + '"' + pos_defaults = func.__defaults__ + + if pos_defaults: + pos_default_count = len(pos_defaults) + else: + pos_default_count = 0 + + parameters = [] + + # Non-keyword-only parameters w/o defaults. + non_default_count = pos_count - pos_default_count + for name in positional[:non_default_count]: + annotation = annotations.get(name, inspect._empty) # pylint: disable=protected-access + parameters.append(Parameter(name, annotation=annotation, + kind=inspect._POSITIONAL_OR_KEYWORD)) # pylint: disable=protected-access + + # ... w/ defaults. + for offset, name in enumerate(positional[non_default_count:]): + annotation = annotations.get(name, inspect._empty) # pylint: disable=protected-access + parameters.append(Parameter(name, annotation=annotation, + kind=inspect._POSITIONAL_OR_KEYWORD, # pylint: disable=protected-access + default=defaults[offset])) + + # *args + if func_code.co_flags & inspect.CO_VARARGS: + name = arg_names[pos_count + keyword_only_count] + annotation = annotations.get(name, inspect._empty) # pylint: disable=protected-access + parameters.append(Parameter(name, annotation=annotation, + kind=inspect._VAR_POSITIONAL)) # pylint: disable=protected-access + + # Keyword-only parameters. + for name in keyword_only: + default = inspect._empty # pylint: disable=protected-access + if kwdefaults is not None: + default = kwdefaults.get(name, inspect._empty) # pylint: disable=protected-access + + annotation = annotations.get(name, inspect._empty) # pylint: disable=protected-access + parameters.append(Parameter(name, annotation=annotation, + kind=inspect._KEYWORD_ONLY, # pylint: disable=protected-access + default=default)) + # **kwargs + if func_code.co_flags & inspect.CO_VARKEYWORDS: + index = pos_count + keyword_only_count + if func_code.co_flags & inspect.CO_VARARGS: + index += 1 + + name = arg_names[index] + annotation = annotations.get(name, inspect._empty) # pylint: disable=protected-access + parameters.append(Parameter(name, annotation=annotation, + kind=inspect._VAR_KEYWORD)) # pylint: disable=protected-access + + # Is 'func' is a pure Python function - don't validate the + # parameters list (for correct order and defaults), it should be OK. + return cls(parameters, + return_annotation=annotations.get('return', inspect._empty), # pylint: disable=protected-access + __validate_parameters__=is_duck_function) + + +def _my_signature_from_callable(obj, *, + follow_wrapper_chains=True, + skip_bound_arg=True, + sigcls): + """Private helper function to get signature for arbitrary + callable objects. + """ + + if not callable(obj): + raise TypeError('{!r} is not a callable object'.format(obj)) + + if isinstance(obj, types.MethodType): + # In this case we skip the first parameter of the underlying + # function (usually `self` or `cls`). + sig = _my_signature_from_callable( + obj.__func__, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + + if skip_bound_arg: + return inspect._signature_bound_method(sig) # pylint: disable=protected-access + return sig + + # Was this function wrapped by a decorator? + if follow_wrapper_chains: + obj = inspect.unwrap(obj, stop=(lambda f: hasattr(f, "__signature__"))) + if isinstance(obj, types.MethodType): + # If the unwrapped object is a *method*, we might want to + # skip its first parameter (self). + # See test_signature_wrapped_bound_method for details. + return _my_signature_from_callable( + obj, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + + try: + sig = obj.__signature__ + except AttributeError: + pass + else: + if sig is not None: + if not isinstance(sig, MySignature): + raise TypeError( + 'unexpected object {!r} in __signature__ ' + 'attribute'.format(sig)) + return sig + + try: + partialmethod = obj._partialmethod # pylint: disable=protected-access + except AttributeError: + pass + else: + if isinstance(partialmethod, functools.partialmethod): + # Unbound partialmethod (see functools.partialmethod) + # This means, that we need to calculate the signature + # as if it's a regular partial object, but taking into + # account that the first positional argument + # (usually `self`, or `cls`) will not be passed + # automatically (as for boundmethods) + + wrapped_sig = _my_signature_from_callable( + partialmethod.func, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + + sig = inspect._signature_get_partial(wrapped_sig, partialmethod, (None,)) # pylint: disable=protected-access + first_wrapped_param = tuple(wrapped_sig.parameters.values())[0] + if first_wrapped_param.kind is Parameter.VAR_POSITIONAL: # pylint: disable=no-else-return + # First argument of the wrapped callable is `*args`, as in + # `partialmethod(lambda *args)`. + return sig + else: + sig_params = tuple(sig.parameters.values()) + assert (not sig_params or + first_wrapped_param is not sig_params[0]) + new_params = (first_wrapped_param,) + sig_params + return sig.replace(parameters=new_params) + + if inspect.isfunction(obj) or inspect._signature_is_functionlike(obj): # pylint: disable=protected-access + # If it's a pure Python function, or an object that is duck type + # of a Python function (Cython functions, for instance), then: + return _my_signature_from_function(sigcls, obj) + + if inspect._signature_is_builtin(obj): # pylint: disable=protected-access + return inspect._signature_from_builtin(sigcls, obj, # pylint: disable=protected-access + skip_bound_arg=skip_bound_arg) + + if isinstance(obj, functools.partial): + wrapped_sig = _my_signature_from_callable( + obj.func, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + return inspect._signature_get_partial(wrapped_sig, obj) # pylint: disable=protected-access + + sig = None + if isinstance(obj, type): + # obj is a class or a metaclass + + # First, let's see if it has an overloaded __call__ defined + # in its metaclass + call = inspect._signature_get_user_defined_method(type(obj), '__call__') # pylint: disable=protected-access + if call is not None: + sig = _my_signature_from_callable( + call, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + else: + # Now we check if the 'obj' class has a '__new__' method + new = inspect._signature_get_user_defined_method(obj, '__new__') # pylint: disable=protected-access + if new is not None: + sig = _my_signature_from_callable( + new, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + else: + # Finally, we should have at least __init__ implemented + init = inspect._signature_get_user_defined_method(obj, '__init__') # pylint: disable=protected-access + if init is not None: + sig = _my_signature_from_callable( + init, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + + if sig is None: + # At this point we know, that `obj` is a class, with no user- + # defined '__init__', '__new__', or class-level '__call__' + + for base in obj.__mro__[:-1]: + # Since '__text_signature__' is implemented as a + # descriptor that extracts text signature from the + # class docstring, if 'obj' is derived from a builtin + # class, its own '__text_signature__' may be 'None'. + # Therefore, we go through the MRO (except the last + # class in there, which is 'object') to find the first + # class with non-empty text signature. + try: + text_sig = base.__text_signature__ + except AttributeError: + pass + else: + if text_sig: + # If 'obj' class has a __text_signature__ attribute: + # return a signature based on it + return inspect._signature_fromstr(sigcls, obj, text_sig) # pylint: disable=protected-access + + # No '__text_signature__' was found for the 'obj' class. + # Last option is to check if its '__init__' is + # object.__init__ or type.__init__. + if type not in obj.__mro__: + # We have a class (not metaclass), but no user-defined + # __init__ or __new__ for it + if (obj.__init__ is object.__init__ and # pylint: disable=no-else-return + obj.__new__ is object.__new__): + # Return a signature of 'object' builtin. + return sigcls.from_callable(object) + else: + raise ValueError( + 'no signature found for builtin type {!r}'.format(obj)) + + elif not isinstance(obj, inspect._NonUserDefinedCallables): # pylint: disable=protected-access + # An object with __call__ + # We also check that the 'obj' is not an instance of + # _WrapperDescriptor or _MethodWrapper to avoid + # infinite recursion (and even potential segfault) + call = inspect._signature_get_user_defined_method(type(obj), '__call__') # pylint: disable=protected-access + if call is not None: + try: + sig = _my_signature_from_callable( + call, + follow_wrapper_chains=follow_wrapper_chains, + skip_bound_arg=skip_bound_arg, + sigcls=sigcls) + except ValueError as ex: + msg = 'no signature found for {!r}'.format(obj) + raise ValueError(msg) from ex + + if sig is not None: + # For classes and objects we skip the first parameter of their + # __call__, __new__, or __init__ methods + if skip_bound_arg: + return inspect._signature_bound_method(sig) # pylint: disable=protected-access + return sig + + if isinstance(obj, types.BuiltinFunctionType): + # Raise a nicer error message for builtins + msg = 'no signature found for builtin function {!r}'.format(obj) + raise ValueError(msg) + + raise ValueError('callable {!r} is not supported by signature'.format(obj)) + + +class MySignature(inspect.Signature): + + @classmethod + def from_callable(cls, obj, *, follow_wrapped=True): + """Constructs Signature for the given callable object.""" + return _my_signature_from_callable(obj, sigcls=cls, + follow_wrapper_chains=follow_wrapped) + + +def signature(obj, *, follow_wrapped=True): + """Get a signature object for the passed callable.""" + return MySignature.from_callable(obj, follow_wrapped=follow_wrapped) diff --git a/docs/federated/docs/source_zh_cn/_templates/classtemplate.rst b/docs/federated/docs/source_zh_cn/_templates/classtemplate.rst new file mode 100644 index 0000000000000000000000000000000000000000..fd88815f7b49e1cd25195fc8eceba498eafe780c --- /dev/null +++ b/docs/federated/docs/source_zh_cn/_templates/classtemplate.rst @@ -0,0 +1,24 @@ +.. role:: hidden + :class: hidden-section + +.. currentmodule:: {{ module }} + +{% if objname in [] %} +{{ fullname | underline }} + +.. autofunction:: {{ fullname }} +{% elif objname[0].istitle() %} +{{ fullname | underline }} + +.. autoclass:: {{ name }} + :members: + +{% else %} +{{ fullname | underline }} + +.. autofunction:: {{ fullname }} +{% endif %} + +.. + autogenerated from _templates/classtemplate.rst + note it does not have :inherited-members: diff --git a/docs/federated/docs/source_zh_cn/_templates/classtemplate_inherited.rst b/docs/federated/docs/source_zh_cn/_templates/classtemplate_inherited.rst new file mode 100644 index 0000000000000000000000000000000000000000..8f4a423dca6e678c191df73d142e4e52a862a3db --- /dev/null +++ b/docs/federated/docs/source_zh_cn/_templates/classtemplate_inherited.rst @@ -0,0 +1,26 @@ +.. role:: hidden + :class: hidden-section + +.. currentmodule:: {{ module }} + +{% if objname[0].istitle() %} +{{ fullname | underline }} + +.. autoclass:: {{ name }} + :inherited-members: + :members: + +{% elif fullname=="mindspore.numpy.ix_" %} + +mindspore.numpy.ix\_ +==================== + +.. autofunction:: mindspore.numpy.ix_ + +{% else %} +{{ fullname | underline }} + +.. autofunction:: {{ fullname }} +{% endif %} + +.. autogenerated from _templates/classtemplate_inherited.rst \ No newline at end of file diff --git a/docs/federated/docs/source_zh_cn/_templates/classtemplate_probability.rst b/docs/federated/docs/source_zh_cn/_templates/classtemplate_probability.rst new file mode 100644 index 0000000000000000000000000000000000000000..6329880e1fc540de910b25d1724a2cfba8d501f2 --- /dev/null +++ b/docs/federated/docs/source_zh_cn/_templates/classtemplate_probability.rst @@ -0,0 +1,13 @@ +.. role:: hidden + :class: hidden-section + +.. currentmodule:: {{ module }} + +{{ fullname | underline }} + +.. autoclass:: {{ name }} + :members: + +.. + autogenerated from _templates/classtemplate.rst + note it does not have :inherited-members: diff --git a/docs/federated/docs/source_zh_cn/conf.py b/docs/federated/docs/source_zh_cn/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..b19de0ff1c0edf05f6da3da426218f029582b3fb --- /dev/null +++ b/docs/federated/docs/source_zh_cn/conf.py @@ -0,0 +1,398 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# + +import os +import re +import sys +sys.path.append(os.path.abspath('./_ext')) +import sphinx.ext.autosummary.generate as g +from sphinx.ext import autodoc as sphinx_autodoc +from sphinx.util import inspect as sphinx_inspect +from sphinx.domains import python as sphinx_domain_python +from textwrap import dedent +# sys.path.insert(0, os.path.abspath('.')) + +import mindspore +# If you don't want to generate MindArmour APIs, comment this line. +# import mindarmour +# If you don't want to generate MindSpore_Hub APIs, comment this line. +# import mindspore_hub +# If you don't want to generate MindSpore_Serving APIs, comment this line. +# import mindspore_serving + +# -- Project information ----------------------------------------------------- + +project = 'MindSpore' +copyright = '2020, MindSpore' +author = 'MindSpore' + +# The full version, including alpha/beta/rc tags +release = 'master' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx', + 'sphinx.ext.todo', + 'sphinx.ext.coverage', + 'sphinx.ext.napoleon', + 'sphinx.ext.viewcode', + 'sphinx_markdown_tables', + 'myst_parser', +] + +source_suffix = { + '.rst': 'restructuredtext', + '.md': 'markdown', +} + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + +pygments_style = 'sphinx' + +autodoc_inherit_docstrings = False + +autosummary_generate = True + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +html_search_language = 'zh' + +html_search_options = {'dict': '../../resource/jieba.txt'} + +html_static_path = ['_static'] + +# -- Options for Texinfo output ------------------------------------------- + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = { + 'python': ('https://docs.python.org/', '../python_objects.inv'), + 'numpy': ('https://docs.scipy.org/doc/numpy/', '../numpy_objects.inv'), +} + +from typing import List, Tuple +from docutils.nodes import Node + +from sphinx.locale import __ +from sphinx.ext.autosummary import Autosummary, posixpath, addnodes, logger, Matcher, autosummary_toc, get_import_prefixes_from_env +from sphinx.ext.autosummary import mock, StringList, ModuleType, get_documenter, ModuleAnalyzer, PycodeError, mangle_signature +from sphinx.ext.autosummary import import_by_name, extract_summary, autosummary_table, nodes, switch_source_input, rst +from sphinx.ext.autodoc.directive import DocumenterBridge, Options + +class MsAutosummary(Autosummary): + """ + Inherited from sphinx's autosummary, add titles and a column for the generated table. + """ + + def init(self): + """ + init method + """ + self.find_doc_name = "" + self.third_title = "" + self.default_doc = "" + + def extract_env_summary(self, doc: List[str]) -> str: + """Extract env summary from docstring.""" + env_sum = self.default_doc + for i, piece in enumerate(doc): + if piece.startswith(self.find_doc_name): + env_sum = doc[i+1][4:] + return env_sum + + def run(self): + """ + run method + """ + self.init() + self.bridge = DocumenterBridge(self.env, self.state.document.reporter, + Options(), self.lineno, self.state) + + names = [x.strip().split()[0] for x in self.content + if x.strip() and re.search(r'^[~a-zA-Z_]', x.strip()[0])] + items = self.get_items(names) + teble_nodes = self.get_table(items) + + if 'toctree' in self.options: + dirname = posixpath.dirname(self.env.docname) + + tree_prefix = self.options['toctree'].strip() + docnames = [] + excluded = Matcher(self.config.exclude_patterns) + for item in items: + docname = posixpath.join(tree_prefix, item[3]) + docname = posixpath.normpath(posixpath.join(dirname, docname)) + if docname not in self.env.found_docs: + location = self.state_machine.get_source_and_line(self.lineno) + if excluded(self.env.doc2path(docname, None)): + msg = __('autosummary references excluded document %r. Ignored.') + else: + msg = __('autosummary: stub file not found %r. ' + 'Check your autosummary_generate setting.') + logger.warning(msg, item[3], location=location) + continue + docnames.append(docname) + + if docnames: + tocnode = addnodes.toctree() + tocnode['includefiles'] = docnames + tocnode['entries'] = [(None, docn) for docn in docnames] + tocnode['maxdepth'] = -1 + tocnode['glob'] = None + teble_nodes.append(autosummary_toc('', '', tocnode)) + return teble_nodes + + def get_items(self, names: List[str]) -> List[Tuple[str, str, str, str, str]]: + """Try to import the given names, and return a list of + ``[(name, signature, summary_string, real_name, env_summary), ...]``. + """ + prefixes = get_import_prefixes_from_env(self.env) + items = [] # type: List[Tuple[str, str, str, str, str]] + max_item_chars = 50 + + for name in names: + display_name = name + if name.startswith('~'): + name = name[1:] + display_name = name.split('.')[-1] + try: + with mock(self.config.autosummary_mock_imports): + real_name, obj, parent, modname = import_by_name(name, prefixes=prefixes) + except ImportError: + logger.warning(__('failed to import %s'), name) + items.append((name, '', '', name, '')) + continue + + self.bridge.result = StringList() # initialize for each documenter + full_name = real_name + if not isinstance(obj, ModuleType): + # give explicitly separated module name, so that members + # of inner classes can be documented + full_name = modname + '::' + full_name[len(modname) + 1:] + # NB. using full_name here is important, since Documenters + # handle module prefixes slightly differently + doccls = get_documenter(self.env.app, obj, parent) + documenter = doccls(self.bridge, full_name) + + if not documenter.parse_name(): + logger.warning(__('failed to parse name %s'), real_name) + items.append((display_name, '', '', real_name, '')) + continue + if not documenter.import_object(): + logger.warning(__('failed to import object %s'), real_name) + items.append((display_name, '', '', real_name, '')) + continue + if documenter.options.members and not documenter.check_module(): + continue + + # try to also get a source code analyzer for attribute docs + try: + documenter.analyzer = ModuleAnalyzer.for_module( + documenter.get_real_modname()) + # parse right now, to get PycodeErrors on parsing (results will + # be cached anyway) + documenter.analyzer.find_attr_docs() + except PycodeError as err: + logger.debug('[autodoc] module analyzer failed: %s', err) + # no source file -- e.g. for builtin and C modules + documenter.analyzer = None + + # -- Grab the signature + + try: + sig = documenter.format_signature(show_annotation=False) + except TypeError: + # the documenter does not support ``show_annotation`` option + sig = documenter.format_signature() + + if not sig: + sig = '' + else: + max_chars = max(10, max_item_chars - len(display_name)) + sig = mangle_signature(sig, max_chars=max_chars) + + # -- Grab the summary + + documenter.add_content(None) + summary = extract_summary(self.bridge.result.data[:], self.state.document) + env_sum = self.extract_env_summary(self.bridge.result.data[:]) + items.append((display_name, sig, summary, real_name, env_sum)) + + return items + + def get_table(self, items: List[Tuple[str, str, str, str, str]]) -> List[Node]: + """Generate a proper list of table nodes for autosummary:: directive. + + *items* is a list produced by :meth:`get_items`. + """ + table_spec = addnodes.tabular_col_spec() + table_spec['spec'] = r'\X{1}{2}\X{1}{2}' + + table = autosummary_table('') + real_table = nodes.table('', classes=['longtable']) + table.append(real_table) + group = nodes.tgroup('', cols=3) + real_table.append(group) + group.append(nodes.colspec('', colwidth=10)) + group.append(nodes.colspec('', colwidth=70)) + group.append(nodes.colspec('', colwidth=30)) + body = nodes.tbody('') + group.append(body) + + def append_row(*column_texts: str) -> None: + row = nodes.row('', color="red") + source, line = self.state_machine.get_source_and_line() + for text in column_texts: + node = nodes.paragraph('') + vl = StringList() + vl.append(text, '%s:%d:' % (source, line)) + with switch_source_input(self.state, vl): + self.state.nested_parse(vl, 0, node) + try: + if isinstance(node[0], nodes.paragraph): + node = node[0] + except IndexError: + pass + row.append(nodes.entry('', node)) + body.append(row) + + # add table's title + append_row("**API Name**", "**Description**", self.third_title) + for name, sig, summary, real_name, env_sum in items: + qualifier = 'obj' + if 'nosignatures' not in self.options: + col1 = ':%s:`%s <%s>`\\ %s' % (qualifier, name, real_name, rst.escape(sig)) + else: + col1 = ':%s:`%s <%s>`' % (qualifier, name, real_name) + col2 = summary + col3 = env_sum + append_row(col1, col2, col3) + + return [table_spec, table] + + +class MsNoteAutoSummary(MsAutosummary): + """ + Inherited from MsAutosummary. Add a third column about `Note` to the table. + """ + + def init(self): + """ + init method + """ + self.find_doc_name = ".. note::" + self.third_title = "**Note**" + self.default_doc = "None" + + def extract_env_summary(self, doc: List[str]) -> str: + """Extract env summary from docstring.""" + env_sum = self.default_doc + for piece in doc: + if piece.startswith(self.find_doc_name): + env_sum = piece[10:] + return env_sum + + +class MsPlatformAutoSummary(MsAutosummary): + """ + Inherited from MsAutosummary. Add a third column about `Supported Platforms` to the table. + """ + def init(self): + """ + init method + """ + self.find_doc_name = "Supported Platforms:" + self.third_title = "**{}**".format(self.find_doc_name[:-1]) + self.default_doc = "To Be Developed" + + +def setup(app): + app.add_directive('msplatformautosummary', MsPlatformAutoSummary) + app.add_directive('msnoteautosummary', MsNoteAutoSummary) + +# Modify regex for sphinx.ext.autosummary.generate.find_autosummary_in_lines. +gfile_abs_path = os.path.abspath(g.__file__) +autosummary_re_line_old = r"autosummary_re = re.compile(r'^(\s*)\.\.\s+autosummary::\s*')" +autosummary_re_line_new = r"autosummary_re = re.compile(r'^(\s*)\.\.\s+(ms[a-z]*)?autosummary::\s*')" +with open(gfile_abs_path, "r+", encoding="utf8") as f: + data = f.read() + data = data.replace(autosummary_re_line_old, autosummary_re_line_new) + f.seek(0) + f.write(data) + +# Modify default signatures for autodoc. +autodoc_source_path = os.path.abspath(sphinx_autodoc.__file__) +inspect_source_path = os.path.abspath(sphinx_inspect.__file__) +autodoc_source_re = re.compile(r"(\s+)args = self\.format_args\(\*\*kwargs\)") +inspect_source_code_str = """signature = inspect.signature(subject)""" +inspect_target_code_str = """signature = my_signature.signature(subject)""" +autodoc_source_code_str = """args = self.format_args(**kwargs)""" +is_autodoc_code_str = """args = args.replace("'", "")""" +with open(autodoc_source_path, "r+", encoding="utf8") as f: + code_str = f.read() + if is_autodoc_code_str not in code_str: + code_str_lines = code_str.split("\n") + autodoc_target_code_str = None + for line in code_str_lines: + re_matched_str = autodoc_source_re.search(line) + if re_matched_str: + space_num = re_matched_str.group(1) + autodoc_target_code_str = dedent("""\ + {0} + {1}if type(args) != type(None): + {1} {2}""".format(autodoc_source_code_str, space_num, is_autodoc_code_str)) + break + if autodoc_target_code_str: + code_str = code_str.replace(autodoc_source_code_str, autodoc_target_code_str) + f.seek(0) + f.truncate() + f.write(code_str) +with open(inspect_source_path, "r+", encoding="utf8") as g: + code_str = g.read() + if inspect_target_code_str not in code_str: + code_str = code_str.replace(inspect_source_code_str, inspect_target_code_str) + if "import my_signature" not in code_str: + code_str = code_str.replace("import sys", "import sys\nimport my_signature") + g.seek(0) + g.truncate() + g.write(code_str) + +# remove extra space for default params for autodoc. +sphinx_domain_python_source_path = os.path.abspath(sphinx_domain_python.__file__) +python_code_source = """for argument in arglist.split(','):""" +python_code_target = """for argument in [" " + i if num > 1 else i for num,i in enumerate(arglist.split(", "))]:""" +with open(sphinx_domain_python_source_path, "r+", encoding="utf8") as f: + code_str = f.read() + if python_code_target not in code_str: + code_str = code_str.replace(python_code_source, python_code_target) + f.seek(0) + f.truncate() + f.write(code_str) diff --git a/docs/federated/docs/source_zh_cn/deploy_federated_client.md b/docs/federated/docs/source_zh_cn/deploy_federated_client.md new file mode 100644 index 0000000000000000000000000000000000000000..aa250068a7e555c83b85dba9fa573b5464936703 --- /dev/null +++ b/docs/federated/docs/source_zh_cn/deploy_federated_client.md @@ -0,0 +1,167 @@ +# 端侧部署 + + + +- [端侧部署](#端侧部署) + - [Android环境](#android环境) + - [编译出包](#编译出包) + - [运行依赖](#运行依赖) + - [构建依赖环境](#构建依赖环境) + - [x86环境](#x86环境) + - [编译出包](#编译出包-1) + - [运行依赖](#运行依赖-1) + - [构建依赖环境](#构建依赖环境-1) + + + + + +下面分别介绍如何在Android环境和x86环境部署Federated-Client。 + +## Android环境 + +### 编译出包 + +1. 配置编译环境。 + + 目前只支持Linux环境编译,Linux编译环境配置可参考[这里](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#linux)。 + +2. 在mindspore根目录进行编译,编译包含aarch64和aarch32的AAR包。 + + ```sh + bash build.sh -A on -j32 + ``` + +3. 获取生成的Android AAR包。 + + ```text + mindspore-lite-maven-{version}.zip + ``` + +### 运行依赖 + +- [Android Studio](https://developer.android.google.cn/studio) >= 3.2 (推荐4.0以上版本) +- [Android SDK](https://developer.android.com/studio?hl=zh-cn#cmdline-tools) >= 26 (Android Studio默认安装) +- [OpenJDK](https://openjdk.java.net/install/) >= 1.8 (Android Studio默认安装) + +### 构建依赖环境 + +将文件`mindspore-lite-maven-{version}.zip`解压后,所得到的目录结构如下所示: + +```text +mindspore-lite-maven-{version} +└── mindspore + └── mindspore-lite + └── {version} + └── mindspore-lite-{version}.aar # MindSpore Lite训练框架AAR包 +``` + +由此可知联邦学习相关的AAR包路径是: + +```text +mindspore/output/mindspore/mindspore-lite/{version}/mindspore-lite-{version}.aar +``` + +其中AAR包中与联邦学习相关的目录结构如下: + +```text +mindspore-lite-{version} +├── jni +│ ├── arm64-v8a +│ │ ├── libjpeg.so # 图像处理动态库文件 +│ │ ├── libminddata-lite.so # 图像处理动态库文件 +│ │ ├── libmindspore-lite.so # MindSpore Lite训练框架依赖的动态库 +│ │ ├── libmindspore-lite-jni.so # MindSpore Lite训练框架依赖的动态库 +│ │ ├── libmindspore-lite-train.so # MindSpore Lite训练框架依赖的动态库 +│ │ └── libturbojpeg.so # 图像处理动态库文件 +│ └── armeabi-v7a +│ ├── libjpeg.so # 图像处理动态库文件 +│ ├── libminddata-lite.so # 图像处理动态库文件 +│ ├── libmindspore-lite.so # MindSpore Lite训练框架依赖的动态库 +│ ├── libmindspore-lite-jni.so # MindSpore Lite训练框架依赖的动态库 +│ ├── libmindspore-lite-train.so # MindSpore Lite训练框架依赖的动态库 +│ └── libturbojpeg.so # 图像处理动态库文件 +├── libs +│ ├── mindspore-lite-java-common.jar # MindSpore Lite训练框架jar包 +│ └── mindspore-lite-java-flclient.jar # 联邦学习框架jar包 +└── classes.jar # MindSpore Lite训练框架jar包 +``` + +在Android工程中只需依赖此 AAR包即可调用联邦学习提供的相关接口,接口的具体调用和运行方式可参考联邦学习接口介绍部分。 + +## x86环境 + +### 编译出包 + +1. 配置编译环境。 + + 目前只支持Linux环境编译,Linux编译环境配置可参考[这里](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#linux)。 + +2. 在mindspore根目录进行编译,编译x86架构相关包。 + + ```sh + bash build.sh -I x86_64 -j32 + ``` + +3. 获取生成的x86架构相关包。 + + ```text + mindspore/output/mindspore-lite-{version}-linux-x64.tar.gz + ``` + +### 运行依赖 + +- [Python](https://www.python.org/downloads/)>=3.7.5 +- [OpenJDK](https://openjdk.java.net/install/) >= 1.8 + +### 构建依赖环境 + +将文件`mindspore/output/mindspore-lite-{version}-linux-x64.tar.gz`解压后,所得到的目录结构如下所示: + +```sh +mindspore-lite-{version}-linux-x64 +├── tools +│ ├── benchmark_train # 训练模型性能与精度调测工具 +│ ├── converter # 模型转换工具 +│ └── cropper # 库裁剪工具 +│ ├── cropper # 库裁剪工具可执行文件 +│ └── cropper_mapping_cpu.cfg # 裁剪cpu库所需的配置文件 +└── runtime + ├── include # 训练框架头文件 + │ └── registry # 自定义算子注册头文件 + ├── lib # 训练框架库 + │ ├── libminddata-lite.a # 图像处理静态库文件 + │ ├── libminddata-lite.so # 图像处理动态库文件 + │ ├── libmindspore-lite-jni.so # MindSpore Lite训练框架的jni动态库 + │ ├── libmindspore-lite-train.a # MindSpore Lite训练框架的静态库 + │ ├── libmindspore-lite-train.so # MindSpore Lite训练框架的动态库 + │ ├── libmindspore-lite.a # MindSpore Lite训练框架依赖的静态库 + │ ├── libmindspore-lite.so # MindSpore Lite训练框架依赖的动态库 + │ ├── mindspore-lite-java.jar # MindSpore Lite训练框架jar包 + │ └── mindspore-lite-java-flclient.jar # 联邦学习框架jar包 + └── third_party + └── libjpeg-turbo + └── lib + ├── libjpeg.so.62 # 图像处理动态库文件 + └── libturbojpeg.so.0 # 图像处理动态库文件 +``` + +其中联邦学习所需的相关x86包名如下: + +```sh +libjpeg.so.62 # 图像处理动态库文件 +libminddata-lite.so # 图像处理动态库文件 +libmindspore-lite.so # MindSpore Lite训练框架依赖的动态库 +libmindspore-lite-jni.so # MindSpore Lite训练框架依赖的jni动态库 +libmindspore-lite-train.so # MindSpore Lite训练框架依赖的动态库 +libturbojpeg.so.0 # 图像处理动态库文件 +mindspore-lite-java-flclient.jar # 联邦学习框架jar包 +``` + +可将路径`mindspore/output/mindspore-lite-{version}-linux-x64/runtime/lib/`以及`mindspore/output/mindspore-lite-{version}-linux-x64/runtime/third_party/libjpeg-turbo/lib`中联邦学习所依赖的so文件(共6个)放入一个文件夹,比如`/resource/x86libs/`。然后在x86中设置环境变量(下面需给绝对路径): + +```sh +export LD_LIBRARY_PATH=/resource/x86libs/:$LD_LIBRARY_PATH +``` + +设置好依赖环境之后,可参考[这里](https://gitee.com/mindspore/docs/blob/master/docs/federated/docs/source_zh_cn/image_classification_application.md)教程在x86环境中模拟启动多个客户端进行联邦学习。 diff --git a/docs/federated/docs/source_zh_cn/deploy_federated_server_cluster.md b/docs/federated/docs/source_zh_cn/deploy_federated_server_cluster.md new file mode 100644 index 0000000000000000000000000000000000000000..13f92d71c305fbc95dd2d89a95d528ad9718c81c --- /dev/null +++ b/docs/federated/docs/source_zh_cn/deploy_federated_server_cluster.md @@ -0,0 +1,317 @@ +# 云侧部署 + +`Linux` `模型训练` `中级` `高级` + + + +- [云侧部署](#云侧部署) + - [准备环节](#准备环节) + - [安装MindSpore](#安装mindspore) + - [定义模型](#定义模型) + - [参数配置](#参数配置) + - [启动集群](#启动集群) + - [弹性伸缩](#弹性伸缩) + - [容灾](#容灾) + + + + + +本文档以LeNet网络为例,讲解如何使用MindSpore来部署联邦学习集群。 + +> 可以在[这里](https://gitee.com/mindspore/mindspore/tree/master/tests/st/fl/mobile)下载本文档中的完整Demo。 + +MindSpore Federated Learning Server集群物理架构如图所示: + + + +如上图所示,在联邦学习云侧集群中,有两种角色的MindSpore进程:`Federated Learning Scheduler`和`Federated Learning Server`: + +- Federated Learning Scheduler + + `Scheduler`的作用主要有两点: + + 1. 协助集群组网:在集群初始化阶段,由`Scheduler`负责收集`Server`信息,并达成集群一致性。` + 2. 开放管理面:支持用户通过`RESTful`接口对集群进行管理。 + + 在一个联邦学习任务中,只有一个`Scheduler`,与`Server`通过TCP私有协议通信。 + +- Federated Learning Server + + `Server`为执行联邦学习任务的主体,用于接收和解析来自端侧设备的数据,具有执行安全聚合、限时通信、模型存储等能力。在一个联邦学习任务中,`Server`可以有多个(用户可配置),`Server`间通过TCP私有协议通信,对外开放HTTP端口用于端侧设备连接。 + + 在MindSpore联邦学习框架中,`Server`还支持弹性伸缩以及容灾,能够在训练任务不中断的情况下,动态调配硬件资源。 + +`Scheduler`和`Server`需部署在单网卡的服务器或者容器中,且处于相同网段。MindSpore自动获取首个可用IP地址作为`Server`地址。 + +## 准备环节 + +### 安装MindSpore + +MindSpore联邦学习云侧集群对硬件设备无依赖,因此安装`CPU`版本的MindSpore即可。执行[官网提供的命令](https://www.mindspore.cn/install)安装MindSpore最新`CPU`版本。 + +## 定义模型 + +为了便于部署,MindSpore联邦学习的`Scheduler`和`Server`进程可以复用训练脚本,仅通过[参数配置](#id5)选择不同的启动方式。 + +本教程选择LeNet网络作为示例,具体网络结构,损失函数和优化器定义请参考[LeNet网络样例脚本](https://gitee.com/mindspore/docs/blob/master/tutorials/tutorial_code/lenet/lenet.py)。 + +## 参数配置 + +MindSpore联邦学习任务进程复用了训练脚本,用户只需要使用相同的脚本,通过Python接口`set_fl_context`传递不同的参数,启动不同角色的MindSpore进程。参数配置说明请参考[API文档](https://mindspore.cn/doc/api_python/zh-CN/master/mindspore/mindspore.context.html#mindspore.context.set_fl_context)。 + +在确定参数配置后,用户需要在执行训练前调用`set_fl_context`接口,调用方式如下: + +```python +import mindspore.context as context +... + +enable_fl = True +server_mode = "FEDERATED_LEARNING" +ms_role = "MS_SERVER" +server_num = 4 +scheduler_ip = "192.168.216.124" +scheduler_port = 6667 +fl_server_port = 6668 +fl_name = "LeNet" +scheduler_manage_port = 11202 + +fl_ctx = { + "enable_fl": enable_fl, + "server_mode": server_mode, + "ms_role": ms_role, + "server_num": server_num, + "scheduler_ip": scheduler_ip, + "scheduler_port": scheduler_port, + "fl_server_port": fl_server_port, + "fl_name": fl_name, + "scheduler_manage_port": scheduler_manage_port, + "config_file_path": config_file_path +} +context.set_fl_context(**fl_ctx) +... + +Model.train() +``` + +本示例设置了训练任务的模式为`联邦学习`,训练进程角色为`Server`,需要启动`4`个`Server`才能完成集群组网,集群`Scheduler`的IP地址为`192.168.216.124`,集群`Scheduler`端口为`6667`,联邦学习`HTTP服务端口`为`6668`(由端侧设备连接),任务名为`LeNet`,集群`Scheduler`管理端口为`11202`。 + +> 部分参数只在Scheduler用到,如scheduler_manage_port,部分参数只在Server用到,如fl_server_port,为了方便部署,可将这些参数配置统一传入,MindSpore会根据进程角色,读取不同的参数配置。 + +建议将参数配置通过Python `argparse`模块传入: + +```python +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("--server_mode", type=str, default="FEDERATED_LEARNING") +parser.add_argument("--ms_role", type=str, default="MS_SERVER") +parser.add_argument("--server_num", type=int, default=4) +parser.add_argument("--scheduler_ip", type=str, default="192.168.216.124") +parser.add_argument("--scheduler_port", type=int, default=6667) +parser.add_argument("--fl_server_port", type=int, default=6668) +parser.add_argument("--fl_name", type=str, default="LeNet") +parser.add_argument("--scheduler_manage_port", type=int, default=11202) +parser.add_argument("--config_file_path", type=str, default="") + +args, t = parser.parse_known_args() +server_mode = args.server_mode +ms_role = args.ms_role +server_num = args.server_num +scheduler_ip = args.scheduler_ip +scheduler_port = args.scheduler_port +fl_server_port = args.fl_server_port +fl_name = args.fl_name +scheduler_manage_port = args.scheduler_manage_port +config_file_path = args.config_file_path +``` + +> 每个Python脚本对应一个进程,若要在不同主机部署多个`Server`角色,则需要分别拉起多个进程,可以通过shell指令配合Python的方式快速启动多`Server`。可参考[示例](https://gitee.com/mindspore/mindspore/blob/master/tests/st/fl/mobile)。 + +## 启动集群 + +参考[示例](https://gitee.com/mindspore/mindspore/blob/master/tests/st/fl/mobile),启动集群。参考示例目录结构如下: + +```text +mobile/ +├── finish_mobile.py +├── run_mobile_sched.py +├── run_mobile_server.py +├── src +│ └── model.py +└── test_mobile_lenet.py +``` + +1. 启动Scheduler + + `run_mobile_sched.py`是为用户启动`Scheduler`而提供的Python脚本,并支持通过`argparse`传参修改配置。执行指令如下,代表启动本次联邦学习任务的`Scheduler`,其TCP端口为`6667`,联邦学习HTTP服务端口为`6668`,`Server`数量为`4`个,集群`Scheduler`管理端口为`11202`: + + ```sh + python run_mobile_sched.py --scheduler_ip=192.168.216.124 --scheduler_port=6667 --fl_server_port=6668 --server_num=4 --scheduler_manage_port=11202 + ``` + +2. 启动Server + + `run_mobile_server.py`是为用户启动若干`Server`而提供的Python脚本,并支持通过`argparse`传参修改配置。执行指令如下,代表启动本次联邦学习任务的`Server`,其TCP端口为`6667`,联邦学习HTTP服务起始端口为`6668`,`Server`数量为`4`个,联邦学习任务正常进行需要的端侧设备数量为`8`个: + + ```sh + python run_mobile_server.py ---scheduler_ip=192.168.216.124 --scheduler_port=6667 --fl_server_port=6668 --server_num=4 --start_fl_job_threshold=8 + ``` + + 以上指令等价于启动了4个`Server`进程,每个`Server`的联邦学习服务端口分别为`6668`、`6669`、`6670`和`6671`,具体实现详见[脚本run_mobile_server.py](https://gitee.com/mindspore/mindspore/blob/master/tests/st/fl/mobile/run_mobile_server.py)。 + + > 若只想在单机部署`Scheduler`以及`Server`,只需将`scheduler_ip`配置项修改为`127.0.0.1`即可。 + + 若想让`Server`分布式部署在不同物理节点,可以使用`local_server_num`参数,代表在**本节点**需要执行的`Server`进程数量: + + ```sh + # 在节点1启动3个Server进程 + python run_mobile_server.py ---scheduler_ip=192.168.216.124 --scheduler_port=6667 --fl_server_port=6668 --server_num=4 --start_fl_job_threshold=8 --local_server_num=3 + ``` + + ```sh + # 在节点2启动1个Server进程 + python run_mobile_server.py ---scheduler_ip=192.168.216.124 --scheduler_port=6667 --fl_server_port=6668 --server_num=4 --start_fl_job_threshold=8 --local_server_num=1 + ``` + + 看到日志打印 + + ```sh + Server started successfully. + ``` + + 则说明启动成功。 + + > 以上分布式部署的指令中,`server_num`都为4,这是因为此参数代表集群全局的`Server`数量,不应随着物理节点的数量而改变。对于不同节点上的`Server`来说,它们无需感知各自的IP地址,集群的一致性和节点发现都由`Scheduler`进行调度。 + +3. 停止联邦学习 + + 可以采用`finish_mobile.py`用于停止联邦学习服务器,执行如下指令来停止联邦学习集群,其中`scheduler_port`传参和启动服务器时的传参保持一致。 + + ```sh + python finish_mobile.py --scheduler_port=6667 + ``` + + 可看到结果: + + ```sh + killed $PID1 + killed $PID2 + killed $PID3 + killed $PID4 + killed $PID5 + killed $PID6 + killed $PID7 + killed $PID8 + ``` + + 说明停止服务成功。 + +## 弹性伸缩 + +MindSpore联邦学习框架支持`Server`的弹性伸缩,对外通过`Scheduler`管理端口提供`RESTful`服务,使得用户在不中断训练任务的情况下,对硬件资源进行动态调度。目前MindSpore的弹性伸缩仅支持水平伸缩(Scale Out/In),暂不支持垂直伸缩(Scale Up/Down)。在弹性伸缩场景下,必然会有`Server`进程的增加/减少。 + +以下详细描述用户如何通过RESTful原生接口,对集群扩容/缩容进行控制。 + +1. 扩容 + + 在集群启动后,向`Scheduler`发起扩容请求,这里使用`curl`指令构造`RESTful`扩容请求,代表集群需要扩容2个`Server`节点: + + ```sh + curl -i -X POST \ + -H "Content-Type:application/json" \ + -d \ + '{ + "worker_num":0, + "server_num":2 + }' \ + 'http://192.168.216.124:11202/scaleout' + ``` + + 需要拉起`2`个新的`Server`进程,并将`server_num`参数累加扩容的个数,从而保证全局组网信息的正确性,则扩容后,`server_num`的数量应为`6`,执行如下指令: + + ```sh + python run_mobile_server.py ---scheduler_ip=192.168.216.124 --scheduler_port=6667 --fl_server_port=6672 --server_num=6 --start_fl_job_threshold=8 --local_server_num=2 + ``` + + 此指令代表启动两个`Server`节点,联邦学习服务端口分别为`6672`和`6673`,总`Server`数量为`6`。 + +2. 缩容 + + 在集群启动后,向`Scheduler`发起缩容请求。由于缩容需要对具体节点进行操作,因此需要获取节点信息: + + ```sh + curl -i -X GET \ + 'http://192.168.216.124:11202/nodes' + ``` + + 返回`json`格式的结果: + + ```json + { + "message": "Get nodes info successful.", + "node_ids": [ + { + "node_id": "40d56ffe-f8d1-4960-85fa-fdf88820402a", + "rank_id": "3", + "role": "SERVER" + }, + { + "node_id": "1ba06348-f2e2-4ad2-be83-0d41fcb53228", + "rank_id": "2", + "role": "SERVER" + }, + { + "node_id": "997967bb-c1ab-4916-8697-dcfaaf0354e5", + "rank_id": "1", + "role": "SERVER" + }, + { + "node_id": "4b8d5bdf-eafd-4f5c-8cae-79008f19298a", + "rank_id": "0", + "role": "SERVER" + } + ] + } + ``` + + 选择`Rank3`和`Rank2`进行缩容: + + ```sh + curl -i -X POST \ + -H "Content-Type:application/json" \ + -d \ + '{ + "node_ids": ["40d56ffe-f8d1-4960-85fa-fdf88820402a", "1ba06348-f2e2-4ad2-be83-0d41fcb53228"] + }' \ + 'http://10.113.216.124:11202/scalein' + ``` + +> - 在集群扩容/缩容成功后,训练任务会自动恢复,不需要用户进行额外干预。 +> +> - 可以通过集群管理工具(如Kubernetes)创建或者释放`Server`资源。 + +## 容灾 + +在MindSpore联邦学习集群中某节点下线后,可以保持集群在线而不退出训练任务,在该节点重新被启动后,可以恢复训练任务。目前MindSpore暂时支持Server节点的容灾(Server 0除外),并且在节点下线超过30s才能检测到。 + +容灾需要配置一个配置文件config.json,具体的格式如下,这个配置文件通过config_file_path指定: + +```json +{ + "recovery": { + "storage_type": 1, + "storge_file_path": "config.json" + } +} +``` + +节点重新启动的指令类似扩容指令,在节点被手动下线之后,执行如下指令: + +```sh +python run_mobile_server.py ---scheduler_ip=192.168.216.124 --scheduler_port=6667 --fl_server_port=6673 --server_num=6 --start_fl_job_threshold=8 --local_server_num=1 --config_file_path=/home/config.json +``` + +此指令代表重新启动了`Server`,其联邦学习服务端口为`6673`。 + +> 在弹性伸缩命令下发成功后,在扩缩容业务执行完毕前,不支持容灾。 \ No newline at end of file diff --git a/docs/federated/docs/source_zh_cn/federated_install.md b/docs/federated/docs/source_zh_cn/federated_install.md new file mode 100644 index 0000000000000000000000000000000000000000..bfdf40dbd4faf31f557177f7164a44769e990e10 --- /dev/null +++ b/docs/federated/docs/source_zh_cn/federated_install.md @@ -0,0 +1,28 @@ +# 获取MindSpore Federated + +`安装` + + + +- [获取MindSpore Federated](#获取mindspore-federated) + - [获取MindSpore whl包](#获取mindspore-whl包) + - [获取MindSpore Lite java包](#获取mindspore-lite-java包) + - [Linux环境编译要求](#linux环境编译要求) + + + + + +MindSpore Federated框架代码分别集成在云侧MindSpore和端侧Lite框架中,因此需要分别获取MindSpore whl包和MindSpore Lite java安装包。其中,MindSpore Whl包负责云侧集群聚合训练以及和Lite的通信。MindSpore Lite java包中包括两部分,一部分是MindSpore Lite训练安装包,负责模型的底层训练,另一部分是Federated-Client安装包,负责模型的下发、加密以及和云侧MindSpore服务的交互。 + +## 获取MindSpore whl包 + +包括源码和下载发布版两种方式,支持CPU、GPU等硬件平台,根据硬件平台选择安装即可。MindSpore从1.3.0版本开始支持联邦学习。安装步骤可参考[MindSpore安装指南](https://www.mindspore.cn/install)。 + +## 获取MindSpore Lite java包 + +包括源码和下载发布版两种方式。目前只支持x86和android平台,只支持CPU硬件架构。安装流程可参考MindSpore Lite教程[下载章节](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html)和[编译章节](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html)。详见下文部署Federated-Client章节。 + +## Linux环境编译要求 + +目前源码编译只支持linux环境,环境要求可参考,[MindSpore源码编译](https://www.mindspore.cn/install) 和[MindSpore Lite源码编译](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html)。 \ No newline at end of file diff --git a/docs/federated/docs/source_zh_cn/image_classification_application.md b/docs/federated/docs/source_zh_cn/image_classification_application.md new file mode 100644 index 0000000000000000000000000000000000000000..7b2e4e61dd00e1059f333f5fa5af5b07e25ef38c --- /dev/null +++ b/docs/federated/docs/source_zh_cn/image_classification_application.md @@ -0,0 +1,936 @@ +# 实现一个图像分类应用(x86) + + + +- [实现一个图像分类应用(x86)](#实现一个图像分类应用x86) + - [下载数据集](#下载数据集) + - [定义网络](#定义网络) + - [定义训练过程](#定义训练过程) + - [生成端侧模型文件](#生成端侧模型文件) + - [模拟启动多客户端参与联邦学习](#模拟启动多客户端参与联邦学习) + + + + + +在动手进行实践之前,确保,你已经正确安装了MindSpore。如果没有,可以参考[MindSpore安装页面](https://www.mindspore.cn/install)完成安装。 + +## 下载数据集 + +参考[leaf数据集官方指导](https://github.com/TalwalkarLab/leaf)下载数据集。 + +本示例采用`leaf`数据集中的联邦学习数据集`FEMNIST`, 该数据集包含62个不同类别的手写数字和字母(数字0~9、26个小写字母、26个大写字母),图像大小为`28 x 28`像素,数据集包含3500个用户的手写数字和字母(最多可模拟3500个客户端参与联邦学习),总数据量为805263,平均每个用户包含数据量为226.83,所有用户数据量的方差为88.94。 + +1. 下载数据集前的环境要求。 + + ```sh + numpy==1.16.4 + scipy # conda install scipy + tensorflow==1.13.1 # pip install tensorflow + Pillow # pip install Pillow + matplotlib # pip install matplotlib + jupyter # conda install jupyter notebook==5.7.8 tornado==4.5.3 + pandas # pip install pandas + ``` + +2. 使用git下载官方数据集生成脚本。 + + ```sh + git clone https://github.com/TalwalkarLab/leaf.git + ``` + + 下载项目后,目录结构如下: + + ```sh + leaf/data/femnist + ├── data # 用来存放指令生成的数据集 + ├── preprocess # 存放数据预处理的相关代码 + ├── preprocess.sh # femnist数据集生成shell脚本 + └── README.md # 官方数据集下载指导文档 + ``` + +3. 以`femnist`数据集为例,运行以下指令进入指定路径。 + + ```sh + cd leaf/data/femnist + ``` + +4. 参考`README.md`文件中的说明,在终端输入指令即可下载对应数据集。 + + 运行`./preprocess.sh`具有以下标签的选择: + + - `-s`:'iid'以iid方式采样,或'niid'以非iid方式采样。 + - `--iu`:用户数(如果进行iid采样);表示为用户总数的分数;默认值为0.01。 + - `--sf`:要采样的数据部分,用十进制表示;默认值为0.1。 + - `-k`:每个用户的最小样本数。 + - `-t`:'user'将用户划分为训练测试组,或'sample'将每个用户的样本划分为训练测试组。 + - `--tf`:训练集中的数据部分,用小数表示;默认值为0.9。 + - `--smplseed`:随机抽样数据之前要使用的种子。 + - `--spltseed`:随机分割数据之前要使用的种子。 + + 例如: + + - `./preprocess.sh -s niid --sf 1.0 -k 0 -t sample` (下载完整数据集)。 + - `./preprocess.sh -s niid --sf 0.05 -k 0 -t sample` (下载小型数据集)。 + + 在重新运行`preprocess.sh`之前,请确保删除数据目录中的`rem_user_data`、`sampled_data`、`test`和`train`子文件夹。 + +5. 用指令`./preprocess.sh -s niid --sf 1.0 -k 0 -t sample`生成的数据集包含3500个用户,且按照9:1对每个用户的数据划分训练和测试集。 + + 运行之后目录结构如下: + + ```text + leaf/data/femnist/35_client_sf1_data/ + ├── all_data # 所有数据集混合在一起,不区分训练测试集,共包含35个json文件,每个json文件包含100个用户的数据 + ├── test # 按照9:1对每个用户的数据划分训练和测试集后的测试集,共包含35个json文件,每个json文件包含100个用户的数据 + ├── train # 按照9:1对每个用户的数据划分训练和测试集后的训练集,共包含35个json文件,每个json文件包含100个用户的数据 + └── ... # 其他文件,暂不需要用到,不作介绍 + ``` + + 其中每个json文件包含以下三个部分: + + - `users`: 用户列表。 + + - `num_samples`: 每个用户的样本数量列表。 + + - `user_data`: 一个以用户名为key,以它们各自的数据为value的字典对象;对于每个用户,数据表示为图像列表,每张图像表示为大小为784的整数列表(将`28 x 28`图像数组展平所得)。 + +6. 将35个json文件划分为3500个json文件(每个json文件代表一个用户)。 + + 参考代码如下: + + ```python + import os + import json + + def mkdir(path): + if not os.path.exists(path): + os.mkdir(path) + + def partition_json(root_path, new_root_path): + """ + partition 35 json files to 3500 json file + + Each raw .json file is an object with 3 keys: + 1. 'users', a list of users + 2. 'num_samples', a list of the number of samples for each user + 3. 'user_data', an object with user names as keys and their respective data as values; for each user, data is represented as a list of images, with each image represented as a size-784 integer list (flattened from 28 by 28) + + Each new .json file is an object with 3 keys: + 1. 'user_name', the name of user + 2. 'num_samples', the number of samples for the user + 3. 'user_data', an dict object with 'x' as keys and their respective data as values; with 'y' as keys and their respective label as values; + + Args: + root_path (str): raw root path of 35 json files + new_root_path (str): new root path of 3500 json files + """ + paths = os.listdir(root_path) + count = 0 + file_num = 0 + for i in paths: + file_num += 1 + file_path = os.path.join(root_path, i) + print('======== process ' + str(file_num) + ' file: ' + str(file_path) + '======================') + with open(file_path, 'r') as load_f: + load_dict = json.load(load_f) + users = load_dict['users'] + num_users = len(users) + num_samples = load_dict['num_samples'] + for j in range(num_users): + count += 1 + print('---processing user: ' + str(count) + '---') + cur_out = {'user_name': None, 'num_samples': None, 'user_data': {}} + cur_user_id = users[j] + cur_data_num = num_samples[j] + cur_user_path = os.path.join(new_root_path, cur_user_id + '.json') + cur_out['user_name'] = cur_user_id + cur_out['num_samples'] = cur_data_num + cur_out['user_data'].update(load_dict['user_data'][cur_user_id]) + with open(cur_user_path, 'w') as f: + json.dump(cur_out, f) + f = os.listdir(new_root_path) + print(len(f), ' users have been processed!') + # partition train json files + partition_json("leaf/data/femnist/35_client_sf1_data/train", "leaf/data/femnist/3500_client_json/train") + # partition test json files + partition_json("leaf/data/femnist/35_client_sf1_data/test", "leaf/data/femnist/3500_client_json/test") + ``` + + 其中`root_path`为`leaf/data/femnist/35_client_sf1_data/{train,test}`,`new_root_path`自行设置,用于存放生成的3500个用户json文件,需分别对训练和测试文件夹进行处理。 + + 新生成的3500个用户json文件,每个文件均包含以下三个部分: + + - `user_name`: 用户名。 + - `num_samples`: 用户的样本数。 + - `user_data`: 一个以'x'为key,以用户数据为value的字典对象; 以'y'为key,以用户数据对应的标签为value。 + + 运行该脚本打印如下,代表运行成功: + + ```sh + ======== process 1 file: /leaf/data/femnist/35_client_sf1_data/train/all_data_16_niid_0_keep_0_train_9.json====================== + ---processing user: 1--- + ---processing user: 2--- + ---processing user: 3--- + ---processing user: 4--- + ---processing user: 5--- + ---processing user: 6--- + ---processing user: 7--- + ---processing user: 8--- + ---processing user: 9--- + ---processing user: 10--- + ---processing user: 11--- + ---processing user: 12--- + ---processing user: 13--- + ---processing user: 14--- + ...... + ``` + +7. 将json文件转换为图片文件。 + + 可参考如下代码: + + ```python + import os + import json + import numpy as np + from PIL import Image + + name_list = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', + 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', + 'v', 'w', 'x', 'y', 'z' + ] + + def mkdir(path): + if not os.path.exists(path): + os.mkdir(path) + + def json_2_numpy(img_size, file_path): + """ + read json file to numpy + Args: + img_size (list): contain three elements: the height, width, channel of image + file_path (str): root path of 3500 json files + return: + image_numpy (numpy) + label_numpy (numpy) + """ + # open json file + with open(file_path, 'r') as load_f_train: + load_dict = json.load(load_f_train) + num_samples = load_dict['num_samples'] + x = load_dict['user_data']['x'] + y = load_dict['user_data']['y'] + size = (num_samples, img_size[0], img_size[1], img_size[2]) + image_numpy = np.array(x, dtype=np.float32).reshape(size) # mindspore doesn't support float64 and int64 + label_numpy = np.array(y, dtype=np.int32) + return image_numpy, label_numpy + + def json_2_img(json_path, save_path): + """ + transform single json file to images + + Args: + json_path (str): the path json file + save_path (str): the root path to save images + + """ + data, label = json_2_numpy([28, 28, 1], json_path) + for i in range(data.shape[0]): + img = data[i] * 255 # PIL don't support the 0/1 image ,need convert to 0~255 image + im = Image.fromarray(np.squeeze(img)) + im = im.convert('L') + img_name = str(label[i]) + '_' + name_list[label[i]] + '_' + str(i) + '.png' + path1 = os.path.join(save_path, str(label[i])) + mkdir(path1) + img_path = os.path.join(path1, img_name) + im.save(img_path) + print('-----', i, '-----') + + def all_json_2_img(root_path, save_root_path): + """ + transform json files to images + Args: + json_path (str): the root path of 3500 json files + save_path (str): the root path to save images + """ + usage = ['train', 'test'] + for i in range(2): + x = usage[i] + files_path = os.path.join(root_path, x) + files = os.listdir(files_path) + + for name in files: + user_name = name.split('.')[0] + json_path = os.path.join(files_path, name) + save_path1 = os.path.join(save_root_path, user_name) + mkdir(save_path1) + save_path = os.path.join(save_path1, x) + mkdir(save_path) + print('=============================' + name + '=======================') + json_2_img(json_path, save_path) + + all_json_2_img("leaf/data/femnist/3500_client_json/", "leaf/data/femnist/3500_client_img/") + ``` + + 运行该脚本打印如下,代表运行成功: + + ```sh + =============================f0644_19.json======================= + ----- 0 ----- + ----- 1 ----- + ----- 2 ----- + ----- 3 ----- + ----- 4 ----- + ----- 5 ----- + ----- 6 ----- + ----- 7 ----- + ----- 8 ----- + ----- 9 ----- + ----- 10 ----- + ...... + ``` + +8. 将图片数据集转换为联邦学习框架可用的bin文件格式。 + + 可参考下面代码: + + ```python + import numpy as np + import os + import mindspore.dataset as ds + import mindspore.dataset.transforms.c_transforms as tC + import mindspore.dataset.vision.py_transforms as PV + import mindspore.dataset.transforms.py_transforms as PT + import mindspore + + def mkdir(path): + if not os.path.exists(path): + os.mkdir(path) + + def count_id(path): + files = os.listdir(path) + ids = {} + for i in files: + ids[i] = int(i) + return ids + + def create_dataset_from_folder(data_path, img_size, batch_size=32, repeat_size=1, num_parallel_workers=1, shuffle=False): + """ create dataset for train or test + Args: + data_path: Data path + batch_size: The number of data records in each group + repeat_size: The number of replicated data records + num_parallel_workers: The number of parallel workers + """ + # define dataset + ids = count_id(data_path) + mnist_ds = ds.ImageFolderDataset(dataset_dir=data_path, decode=False, class_indexing=ids) + # define operation parameters + resize_height, resize_width = img_size[0], img_size[1] # 32 + + transform = [ + PV.Decode(), + PV.Grayscale(1), + PV.Resize(size=(resize_height, resize_width)), + PV.Grayscale(3), + PV.ToTensor(), + ] + compose = PT.Compose(transform) + + # apply map operations on images + mnist_ds = mnist_ds.map(input_columns="label", operations=tC.TypeCast(mindspore.int32)) + mnist_ds = mnist_ds.map(input_columns="image", operations=compose) + + # apply DatasetOps + buffer_size = 10000 + if shuffle: + mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size) # 10000 as in LeNet train script + mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True) + mnist_ds = mnist_ds.repeat(repeat_size) + return mnist_ds + + def img2bin(root_path, root_save): + """ + transform images to bin files + + Args: + root_path: the root path of 3500 images files + root_save: the root path to save bin files + + """ + + use_list = [] + train_batch_num = [] + test_batch_num = [] + mkdir(root_save) + users = os.listdir(root_path) + for user in users: + use_list.append(user) + user_path = os.path.join(root_path, user) + train_test = os.listdir(user_path) + for tag in train_test: + data_path = os.path.join(user_path, tag) + dataset = create_dataset_from_folder(data_path, (32, 32, 1), 32) + batch_num = 0 + img_list = [] + label_list = [] + for data in dataset.create_dict_iterator(): + batch_x_tensor = data['image'] + batch_y_tensor = data['label'] + trans_img = np.transpose(batch_x_tensor.asnumpy(), [0, 2, 3, 1]) + img_list.append(trans_img) + label_list.append(batch_y_tensor.asnumpy()) + batch_num += 1 + + if tag == "train": + train_batch_num.append(batch_num) + elif tag == "test": + test_batch_num.append(batch_num) + + imgs = np.array(img_list) # (batch_num, 32,3,32,32) + labels = np.array(label_list) + path1 = os.path.join(root_save, user) + mkdir(path1) + image_path = os.path.join(path1, user + "_" + "bn_" + str(batch_num) + "_" + tag + "_data.bin") + label_path = os.path.join(path1, user + "_" + "bn_" + str(batch_num) + "_" + tag + "_label.bin") + + imgs.tofile(image_path) + labels.tofile(label_path) + print("user: " + user + " " + tag + "_batch_num: " + str(batch_num)) + print("total " + str(len(use_list)) + " users finished!") + + root_path = "leaf/data/femnist/3500_client_img/" + root_save = "leaf/data/femnist/3500_clients_bin" + img2bin(root_path, root_save) + ``` + + 运行该脚本打印如下,代表运行成功: + + ```sh + user: f0141_43 test_batch_num: 1 + user: f0141_43 train_batch_num: 10 + user: f0137_14 test_batch_num: 1 + user: f0137_14 train_batch_num: 11 + user: f0049_32 test_batch_num: 1 + user: f0049_32 train_batch_num: 11 + user: f0178_39 test_batch_num: 1 + user: f0178_39 train_batch_num: 9 + user: f0222_06 test_batch_num: 1 + user: f0222_06 train_batch_num: 9 + ...... + total 3500 users finished! + ``` + +9. 生成`3500_clients_bin`文件夹内共包含3500个用户文件夹,其目录结构如下: + + ```sh + leaf/data/femnist/3500_clients_bin + ├── f0000_14 # 用户编号 + │ ├── f0000_14_bn_10_train_data.bin # 用户f0000_14的训练数据 (bn_后面的数字10代表batch number) + │ ├── f0000_14_bn_10_train_label.bin # 用户f0000_14的训练标签 + │ ├── f0000_14_bn_1_test_data.bin # 用户f0000_14的测试数据 (bn_后面的数字1代表batch number) + │ └── f0000_14_bn_1_test_label.bin # 用户f0000_14的测试标签 + ├── f0001_41 # 用户编号 + │ ├── f0001_41_bn_11_train_data.bin # 用户f0001_41的训练数据 (bn_后面的数字11代表batch number) + │ ├── f0001_41_bn_11_train_label.bin # 用户f0001_41的训练标签 + │ ├── f0001_41_bn_1_test_data.bin # 用户f0001_41的测试数据 (bn_后面的数字1代表batch number) + │ └── f0001_41_bn_1_test_label.bin # 用户f0001_41的测试标签 + │ ... + └── f4099_10 # 用户编号 + ├── f4099_10_bn_4_train_data.bin # 用户f4099_10的训练数据 (bn_后面的数字4代表batch number) + ├── f4099_10_bn_4_train_label.bin # 用户f4099_10的训练标签 + ├── f4099_10_bn_1_test_data.bin # 用户f4099_10的测试数据 (bn_后面的数字1代表batch number) + └── f4099_10_bn_1_test_label.bin # 用户f4099_10的测试标签 + ``` + +## 定义网络 + +我们选择相对简单的LeNet网络。LeNet网络不包括输入层的情况下,共有7层:2个卷积层、2个下采样层(池化层)、3个全连接层。每层都包含不同数量的训练参数,如下图所示: + +![LeNet5](images/LeNet_5.jpg) + +> 更多的LeNet网络的介绍不在此赘述,希望详细了解LeNet网络,可以查询。 + +网络定义流程可参考[model.py文件](https://gitee.com/mindspore/mindspore/blob/master/tests/st/fl/mobile/src/model.py)。 + +具体网络定义流程可参考[MindSpore官方图片分类任务文档]( https://www.mindspore.cn/tutorial/training/zh-CN/master/quick_start/quick_start.html#%E5%AE%9A%E4%B9%89%E7%BD%91%E7%BB%9C )。 + +## 定义训练过程 + +可参考如下代码: + +```python +import argparse +import numpy as np + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.nn import TrainOneStepCell, WithLossCell +from src.model import LeNet5 +from src.adam import AdamWeightDecayOp + +parser = argparse.ArgumentParser(description="test_fl_lenet") +parser.add_argument("--device_target", type=str, default="CPU") +parser.add_argument("--server_mode", type=str, default="FEDERATED_LEARNING") +parser.add_argument("--ms_role", type=str, default="MS_WORKER") +parser.add_argument("--worker_num", type=int, default=0) +parser.add_argument("--server_num", type=int, default=1) +parser.add_argument("--scheduler_ip", type=str, default="127.0.0.1") +parser.add_argument("--scheduler_port", type=int, default=8113) +parser.add_argument("--fl_server_port", type=int, default=6666) +parser.add_argument("--start_fl_job_threshold", type=int, default=1) +parser.add_argument("--start_fl_job_time_window", type=int, default=3000) +parser.add_argument("--update_model_ratio", type=float, default=1.0) +parser.add_argument("--update_model_time_window", type=int, default=3000) +parser.add_argument("--fl_name", type=str, default="Lenet") +parser.add_argument("--fl_iteration_num", type=int, default=25) +parser.add_argument("--client_epoch_num", type=int, default=20) +parser.add_argument("--client_batch_size", type=int, default=32) +parser.add_argument("--client_learning_rate", type=float, default=0.1) +parser.add_argument("--scheduler_manage_port", type=int, default=11202) + +args, _ = parser.parse_known_args() +device_target = args.device_target +server_mode = args.server_mode +ms_role = args.ms_role +worker_num = args.worker_num +server_num = args.server_num +scheduler_ip = args.scheduler_ip +scheduler_port = args.scheduler_port +fl_server_port = args.fl_server_port +start_fl_job_threshold = args.start_fl_job_threshold +start_fl_job_time_window = args.start_fl_job_time_window +update_model_ratio = args.update_model_ratio +update_model_time_window = args.update_model_time_window +fl_name = args.fl_name +fl_iteration_num = args.fl_iteration_num +client_epoch_num = args.client_epoch_num +client_batch_size = args.client_batch_size +client_learning_rate = args.client_learning_rate +scheduler_manage_port = args.scheduler_manage_port + +ctx = { + "enable_fl": True, + "server_mode": server_mode, + "ms_role": ms_role, + "worker_num": worker_num, + "server_num": server_num, + "scheduler_ip": scheduler_ip, + "scheduler_port": scheduler_port, + "fl_server_port": fl_server_port, + "start_fl_job_threshold": start_fl_job_threshold, + "start_fl_job_time_window": start_fl_job_time_window, + "update_model_ratio": update_model_ratio, + "update_model_time_window": update_model_time_window, + "fl_name": fl_name, + "fl_iteration_num": fl_iteration_num, + "client_epoch_num": client_epoch_num, + "client_batch_size": client_batch_size, + "client_learning_rate": client_learning_rate, + "scheduler_manage_port": scheduler_manage_port +} + +context.set_context(mode=context.GRAPH_MODE, device_target=device_target, save_graphs=False) +context.set_fl_context(**ctx) # 设置联邦学习训练流程相关参数 + +if __name__ == "__main__": + epoch = 5 + np.random.seed(0) + network = LeNet5(62) + criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") + net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9) + net_adam_opt = AdamWeightDecayOp(network.trainable_params(), weight_decay=0.1) + net_with_criterion = WithLossCell(network, criterion) + train_network = TrainOneStepCell(net_with_criterion, net_opt) + train_network.set_train() + losses = [] + + for _ in range(epoch): + data = Tensor(np.random.rand(32, 3, 32, 32).astype(np.float32)) + label = Tensor(np.random.randint(0, 61, (32)).astype(np.int32)) + loss = train_network(data, label).asnumpy() + losses.append(loss) + print(losses) +``` + +其中字典`ctx`中参数`enable_fl`用于设置是否启动联邦学习训练流程,为`true`代表启动联邦学习流程,为`false`代表启动普通训练流程,其他参数可以根据实际情况进行设置。由于只需要生成可用的模型文件即可,上面脚本中`data`和`label`均采用了模拟数据。 + +其中`src.model`为模型定义文件[可参考model.py文件]( https://gitee.com/mindspore/mindspore/blob/master/tests/st/fl/mobile/src/model.py),`src.adam`为优化器定义文件[可参考adam.py文件](https://gitee.com/mindspore/mindspore/blob/master/tests/st/fl/mobile/src/adam.py)。 + +具体优化器损失函数定义可参考[MindSpore官方文档](https://www.mindspore.cn/tutorial/training/zh-CN/master/quick_start/quick_start.html#%E5%AE%9A%E4%B9%89%E6%8D%9F%E5%A4%B1%E5%87%BD%E6%95%B0%E5%8F%8A%E4%BC%98%E5%8C%96%E5%99%A8)。 + +## 生成端侧模型文件 + +1. 将模型导出为MindIR格式文件。 + + 可在训练流程代码中添加`export`语句获取MindIR格式模型文件, 示例代码如下: + + ```python + from mindspore import export + ... + + for _ in range(epoch): + data = Tensor(np.random.rand(32, 3, 32, 32).astype(np.float32)) + label = Tensor(np.random.randint(0, 61, (32)).astype(np.int32)) + loss = train_network(data, label).asnumpy() + losses.append(loss) + mindir_name = "lenet_train.mindir" + export(train_network, data, label, file_name= mindir_name, file_format='MINDIR') # 添加export语句获取MindIR格式模型文件 + print(losses) + ``` + + 具体可参考[这里](https://www.mindspore.cn/tutorial/training/zh-CN/master/use/save_model.html?highlight=mindir#mindir )。 + +2. 将MindIR文件转化为联邦学习端侧框架可用的ms文件。 + + 具体模型转换教程可参考[训练模型转换教程](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/converter_train.html )。 + + 模型转换示例如下: + + 假设待转换的模型文件为`lenet_train.mindir`,执行如下转换命令: + + ```sh + ./converter_lite --fmk=MINDIR --trainModel=true --modelFile=lenet_train.mindir --outputFile=lenet_train + ``` + + 转换成功输出如下: + + ```sh + CONVERTER RESULT SUCCESS:0 + ``` + + 这表明MindSpore模型成功转换为MindSpore端侧模型,并生成了新文件`lenet_train.ms`。如果转换失败输出如下: + + ```sh + CONVERT RESULT FAILED: + ``` + + 将生成的`.ms`格式的模型文件放在某个路径上,在调用联邦学习接口时可设置`FLParameter.trainModelPath`为该模型文件的路径。 + +## 模拟启动多客户端参与联邦学习 + +之后可编写一个Python脚本,调用联邦学习框架jar包 (x86环境联邦学习jar包获取可参考[Federated-Client部署教程中编译出包流程](https://gitee.com/mindspore/docs/blob/master/docs/federated/docs/source_zh_cn/deploy_federated_client.md)) 来模拟启动多客户端联邦学习任务。 + +1. 以Lenet网络为例,参考脚本`run.py`如下。 + + ```python + import os + import argparse + import subprocess + import random + + parser = argparse.ArgumentParser(description="Run TestClient.java case") + parser.add_argument("--jarPath", type=str, default="mindspore-lite-java-flclient.jar") # must be absolute path + parser.add_argument("--train_dataset", type=str, default="leaf/data/femnist/3500_clients_bin/") # must be absolute path + parser.add_argument("--test_dataset", type=str, default="null") # must be absolute path + parser.add_argument("--vocal_file", type=str, default="null") # must be absolute path + parser.add_argument("--ids_file", type=str, default="null") # must be absolute path + parser.add_argument("--flName", type=str, default="lenet") + parser.add_argument("--train_model_path", type=str, default="ms/lenet/") # must be absolute path of .ms files + parser.add_argument("--infer_model_path", type=str, default="ms/lenet/") # must be absolute path of .ms files + parser.add_argument("--ip", type=str, default="10.113.216.106") + parser.add_argument("--ssl", type=str, default="false") + parser.add_argument("--port", type=int, default=6668) + parser.add_argument("--server_num", type=int, default=0) + parser.add_argument("--worker_num", type=int, default=0) + parser.add_argument("--time_window", type=int, default=6000) + parser.add_argument("--use_elb", type=str, default="false") + parser.add_argument("--use_https", type=str, default="false") + parser.add_argument("--cert_path", type=str, default="null") + parser.add_argument("--task", type=str, default="train") + + args, _ = parser.parse_known_args() + jarPath = args.jarPath + train_dataset = args.train_dataset + test_dataset = args.test_dataset + vocal_file = args.vocal_file + ids_file = args.ids_file + flName = args.flName + train_model_path = args.train_model_path + infer_model_path = args.infer_model_path + ip = args.ip + ssl = args.ssl + port = args.port + server_num = args.server_num + worker_num = args.worker_num + time_window = str(args.time_window) + use_elb = args.use_elb + use_https = args.use_https + cert_path = args.cert_path + task = args.task + + users = os.listdir(train_dataset) + + def get_client_data_path(data_root_path, user): + use_path = os.path.join(data_root_path, user) + bin_file_paths = os.listdir(use_path) + + train_data_path = "" + train_label_path = "" + train_batch_num = "" + + test_data_path = "" + test_label_path = "" + test_batch_num = "" + + for file in bin_file_paths: + info = file.split(".")[0].split("_") + if info[4] == "train" and info[5] == "data": + train_data_path = os.path.join(use_path, file) + train_batch_num = info[3] + elif info[4] == "train" and info[5] == "label": + train_label_path = os.path.join(use_path, file) + elif info[4] == "test" and info[5] == "data": + test_data_path = os.path.join(use_path, file) + test_batch_num = info[3] + elif info[4] == "test" and info[5] == "label": + test_label_path = os.path.join(use_path, file) + train_path = train_data_path + "," + train_label_path + test_path = test_data_path + "," + test_label_path + return train_path, test_path, train_batch_num, test_batch_num + + for i in range(worker_num): + flId = "f"+str(i) + user = users[i] + train_path, test_path = "", "" + train_path, test_path, _, _= get_client_data_path(train_dataset, user) + print("===========================") + print("fl id: ", flId) + print("train path: ", train_path) + print("test path: ", test_path) + + cmd_client = "execute_path=$(pwd) && self_path=$(dirname \"${script_self}\") && " + cmd_client += "rm -rf ${execute_path}/client_" + str(i) + "/ &&" + cmd_client += "mkdir ${execute_path}/client_" + str(i) + "/ &&" + cmd_client += "cd ${execute_path}/client_" + str(i) + "/ || exit &&" + + cmd_client += "java -jar " + cmd_client += jarPath + " " + cmd_client += train_path + " " + cmd_client += vocal_file + " " + cmd_client += ids_file + " " + cmd_client += test_path + " " + cmd_client += flName + " " + cmd_client += train_model_path + "lenet_train" + str(i) + ".ms" + " " + print("model path: ", train_model_path + "lenet_train" + str(i) + ".ms" + " ") + cmd_client += infer_model_path + "lenet_train" + str(i) + ".ms" + " " + print("model path: ", infer_model_path + "lenet_train" + str(i) + ".ms" + " ") + cmd_client += flId + " " + cmd_client += ip + " " + cmd_client += ssl + " " + cmd_client += str(port) + " " + cmd_client += time_window + " " + cmd_client += use_elb + " " + cmd_client += str(server_num) + " " + cmd_client += use_https + " " + cmd_client += cert_path + " " + cmd_client += task + " " + cmd_client += " > client" + ".log 2>&1 &" + subprocess.call(['bash', '-c', cmd_client]) + ``` + + `run.py`脚本中入参含义如下,可根据实际情况进行设置。以下涉及路径的必须给出绝对路径。 + + - `--jarPath` + + 设置联邦学习jar包路径,x86环境联邦学习jar包获取可参考[Federated-Client部署教程中编译出包流程](https://gitee.com/mindspore/docs/blob/master/docs/federated/docs/source_zh_cn/deploy_federated_client.md)。 + + - `--train_dataset` + + 训练数据集root路径,LeNet图片分类任务在该root路径中存放的是每个客户端的训练data.bin文件与label.bin文件,例如`leaf/data/femnist/3500_clients_bin/`。 + + - `--test_dataset` + + 测试数据集路径,LeNet图片分类任务不需要设置该参数,默认为null。 + + - `--vocal_file` + + 设置数据预处理的词典文件路径,LeNet网络设置为null。 + + - `--ids_file` + + 设置词典的映射id文件路径,LeNet网络设置为null。 + + - `--flName` + + 设置联邦学习模型名称,目前只支持`lenet`(采用LeNet网络进行图片分类任务)和`adbert`(采用ALBERT网络进行情感分类任务)。 + + - `--train_model_path` + + 设置联邦学习使用的训练模型路径,为上面教程中拷贝的多份.ms文件所存放的目录,比如`ms/lenet`,必须为绝对路径。 + + - `--infer_model_path` + + 联邦学习使用的推理模型路径,为.ms格式的模型文件的绝对路径,LeNet图片分类任务可设置为与`train_model_path`相同。 + + - `--ip` + + 设置ip地址,即启动server端的服务器地址,格式为:10.113.216.106,目前云侧只支持http通信方式,默认采用http通信方式。 + + - `--ssl` + + 设置端云通信是否进行ssl证书认证,ssl证书认证只在https通信中使用,设置为false,不进行ssl证书认证;设置为true时,进行ssl证书认证且只支持https通信,`useHttps`必须设置为true,`cert_path`必须给出具体证书路径;默认为false。 + + - `--port` + + 设置端口号,与启动server端时的`fl_server_port`参数保持一致,格式为: 6668。 + + - `--time_window` + + 设置端侧重复请求的总时间窗口,与启动server端时的`start_fl_job_time_windows`和`update_model_time_windows`之和保持一致。 + + - `--server_num` + + 设置server数量,与启动server端时的`server_num`参数保持一致,用于模拟客户端随机选择不同的server发送信息,真实场景不需要此参数。 + + - `--worker_num` + + 设置client数量, 与启动server端时的`start_fl_job_cnt`保持一致,真实场景不需要此参数。 + + - `--use_elb` + + 用于多server场景,为true代表客户端每个round的请求都采用指定范围内的随机端口,false则采用固定端口。默认为false,当启动server端的`server_num`大于1时,该参数需设置成true。用于模拟客户端随机选择不同的server发送信息,真实场景不需要此参数。 + + - `--use_https` + + 端云通信是否进行Https通信,设置为false,进行http通信;设置为true,进行https通信;默认为false。 + + - `--cert_path` + + 当`--ssl`设置为true时,需对该参数进行设置,设置证书的绝对路径,默认为`null`。 + + - `--task` + + 用于设置本此启动的任务类型,为`train`代表启动训练任务,为`inference`代表启动多条数据推理任务,为`getModel`代表启动获取云侧模型的任务,设置其他字符串代表启动单条数据推理任务。默认为`train`。由于初始的模型文件(.ms文件)是未训练过的,建议先启动训练任务,待训练完成之后,再启动推理任务(注意两次启动的`worker_num`保持一致,以保证`inference`使用的模型文件与`train`保持一致)。 + +2. 为客户端准备好模型文件。 + + 由于真实场景一个客户端只包含一个.ms格式的模型文件,在模拟场景中,需要拷贝多份.ms文件,并按照`lenet_train{i}.ms`格式进行命名。其中i代表客户端编号,由于`run.py`中脚本的设置,需要设置为`0, 1, 2, 3, 4, 5 .....`等数字。每个客户端各使用一份.ms文件。 + + 可参考下面脚本,对原始.ms文件进行拷贝和命名: + + ```python + import shutil + import os + + def copy_file(raw_path,new_path,copy_num): + # Copy the specified number of files from the raw path to the new path + for i in range(copy_num): + file_name = "lenet_train" + str(i) + ".ms" + new_file_path = os.path.join(new_path, file_name) + shutil.copy(raw_path ,new_file_path) + print('====== copying ',i, ' file ======') + print("the number of copy .ms files: ", len(os.listdir(new_path))) + + if __name__ == "__main__": + raw_path = "lenet_train.ms" + new_path = "ms/lenet" + num = 5 + copy_file(raw_path, new_path, num) + ``` + + 其中`raw_path`设置原始.ms文件路径,`new_path`设置拷贝的.ms文件需要放置的路径,`num`设置拷贝的份数,一般需要模拟启动客户端的数量。 + + 比如以上脚本中设置,在路径`ms/lenet`中生成了供5个客户端使用的.ms文件,其目录结构如下: + + ```sh + ms/lenet + ├── lenet_train0.ms # 客户端0使用的.ms文件 + ├── lenet_train1.ms # 客户端1使用的.ms文件 + ├── lenet_train2.ms # 客户端2使用的.ms文件 + ├── lenet_train3.ms # 客户端3使用的.ms文件 + └── lenet_train4.ms # 客户端4使用的.ms文件 + ``` + +3. 启动客户端。 + + 运行`run.py`,指令如下: + + ```sh + python run.py --ip=10.113.216.106 --port=6668 --server_num=8 --worker_num=5 --task=train + ``` + + 该指令代表启动5个客户端参与联邦学习,若启动成功,会在当前文件夹生成5个客户端对应的日志文件,查看日志文件内容可了解每个客户端的运行情况: + + ```text + ./ + ├── client_0 + │ └── client.log # 客户端0的日志文件 + ├── client_1 + │ └── client.log # 客户端1的日志文件 + ├── client_2 + │ └── client.log # 客户端2的日志文件 + ├── client_3 + │ └── client.log # 客户端3的日志文件 + └── lenet_train4.ms + └── client.log # 客户端4的日志文件 + ``` + +4. 关闭客户端进程。 + + 可参考`finish.py`脚本,具体如下: + + ```python + import os + import argparse + import subprocess + + parser = argparse.ArgumentParser(description="Finish test_mobile_lenet.py case") + parser.add_argument("--kill_tag", type=str, default="mindspore-lite-java-flclient") + + args, _ = parser.parse_known_args() + kill_tag = args.kill_tag + + cmd = "pid=`ps -ef|grep " + kill_tag + cmd += " |grep -v \"grep\" | grep -v \"finish\" |awk '{print $2}'` && " + cmd += "for id in $pid; do kill -9 $id && echo \"killed $id\"; done" + + subprocess.call(['bash', '-c', cmd]) + ``` + + 关闭客户端指令如下: + + ```sh + python finish.py --kill_tag=mindspore-lite-java-flclient + ``` + + 其中参数`--kill_tag`用于搜索该关键字对客户端进程进行kill,只需要设置`--jarPath`中的特殊关键字即可。默认为`mindspore-lite-java-flclient`,即联邦学习jar包名。 + + 假设启动了5个客户端,每个客户端包含一个Python进程和一个java进程,关闭成功会有以下打印: + + ```sh + killed 56427 + killed 56432 + killed 56435 + killed 56444 + killed 56449 + killed 56451 + killed 56452 + killed 56461 + killed 56465 + killed 56474 + ``` + + 即有10个进程成功被kill。 + +5. 实验结果。 + + 目前`3500_clients_bin`文件夹中包含3500个客户端的数据,本脚本最多可模拟3500个客户端参与联邦学习。 + + 下图给出了50个客户端(设置`server_num`为16)进行联邦学习的测试集精度: + + ![lenet_50_clients_acc](images/lenet_50_clients_acc.png) + + 其中联邦学习总迭代数为100,客户端本地训练epoch数为20,batchSize设置为32。 + + 图中测试精度指对于每个联邦学习迭代,各客户端测试集在云侧聚合后的模型上的精度。 + + AVG:对于每个联邦学习迭代,50个客户端测试集精度的平均值。 + + TOP5:对于每个联邦学习迭代,测试集精度最高的5个客户端的精度平均值。 + + LOW5:对于每个联邦学习迭代,测试集精度最低的5个客户端的精度平均值。 \ No newline at end of file diff --git a/docs/federated/docs/source_zh_cn/images/LeNet_5.jpg b/docs/federated/docs/source_zh_cn/images/LeNet_5.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7894b0e181d965c5e9cbba91fe240c1890d37bda Binary files /dev/null and b/docs/federated/docs/source_zh_cn/images/LeNet_5.jpg differ diff --git a/docs/federated/docs/source_zh_cn/images/create_android_project.png b/docs/federated/docs/source_zh_cn/images/create_android_project.png new file mode 100644 index 0000000000000000000000000000000000000000..3c0e1ef0c00d27ca0abf1de363e60202c5fdc872 Binary files /dev/null and b/docs/federated/docs/source_zh_cn/images/create_android_project.png differ diff --git a/docs/federated/docs/source_zh_cn/images/lenet_50_clients_acc.png b/docs/federated/docs/source_zh_cn/images/lenet_50_clients_acc.png new file mode 100644 index 0000000000000000000000000000000000000000..c1282811f7161d77ec2ea563d96983ef293dbf43 Binary files /dev/null and b/docs/federated/docs/source_zh_cn/images/lenet_50_clients_acc.png differ diff --git a/docs/federated/docs/source_zh_cn/images/mindspore_federated_architecture.png b/docs/federated/docs/source_zh_cn/images/mindspore_federated_architecture.png new file mode 100644 index 0000000000000000000000000000000000000000..c734913b92cc521152ace8d53f67b3e7d4283980 Binary files /dev/null and b/docs/federated/docs/source_zh_cn/images/mindspore_federated_architecture.png differ diff --git a/docs/federated/docs/source_zh_cn/images/mindspore_federated_networking.png b/docs/federated/docs/source_zh_cn/images/mindspore_federated_networking.png new file mode 100644 index 0000000000000000000000000000000000000000..cb43d016e916600287e545adb3f8355167831533 Binary files /dev/null and b/docs/federated/docs/source_zh_cn/images/mindspore_federated_networking.png differ diff --git a/docs/federated/docs/source_zh_cn/images/start_android_project.png b/docs/federated/docs/source_zh_cn/images/start_android_project.png new file mode 100644 index 0000000000000000000000000000000000000000..4c12def630d28ececea9d63fb9fa0042cc26bea9 Binary files /dev/null and b/docs/federated/docs/source_zh_cn/images/start_android_project.png differ diff --git a/docs/federated/docs/source_zh_cn/index.rst b/docs/federated/docs/source_zh_cn/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..485de6078e52d8f0e61948b8eb36f191741ed61f --- /dev/null +++ b/docs/federated/docs/source_zh_cn/index.rst @@ -0,0 +1,28 @@ +.. MindSpore documentation master file, created by + sphinx-quickstart on Thu Mar 24 11:00:00 2020. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +MindSpore Federated Document +============================== + +.. toctree:: + :maxdepth: 1 + :caption: 安装部署 + + federated_install + deploy_federated_server_cluster + deploy_federated_client + +.. toctree:: + :maxdepth: 1 + :caption: 应用实践 + + image_classification_application + sentiment_classification_application + +.. toctree:: + :maxdepth: 1 + :caption: 模型安全和隐私 + + security_and_privacy_protection diff --git a/docs/federated/docs/source_zh_cn/security_and_privacy_protection.md b/docs/federated/docs/source_zh_cn/security_and_privacy_protection.md new file mode 100644 index 0000000000000000000000000000000000000000..e2c8992a856fb373b4c88e978f3307958a8f951c --- /dev/null +++ b/docs/federated/docs/source_zh_cn/security_and_privacy_protection.md @@ -0,0 +1,107 @@ +# 模型安全与隐私 + + + +- [模型安全与隐私](#模型安全与隐私) + - [基于LDP的安全聚合](#基于ldp的安全聚合) + - [原理概述](#原理概述) + - [使用方式](#使用方式) + - [基于MPC的安全聚合](#基于mpc的安全聚合) + - [原理概述](#原理概述-1) + - [使用方式](#使用方式-1) + - [参考文献](#参考文献) + + + + + +联邦学习过程中,用户数据仅用于本地设备训练,不需要上传至中心服务器,可以避免用户个人数据的直接泄露。然而传统联邦学习框架中,模型以明文形式上云,仍然存在间接泄露用户隐私的风险。敌手获取到用户上传的明文模型后,可以通过重构、模型逆向等攻击恢复用户的个人训练数据,导致用户隐私泄露。 + +MindSpore Federated联邦学习框架,提供了基于本地差分隐私(LDP)和基于多方安全计算(MPC)的安全聚合算法,在本地模型上云前对其进行加噪或加扰。在保证模型可用性的前提下,解决横向联邦学习中的隐私泄露问题。 + +## 基于LDP的安全聚合 + +### 原理概述 + +差分隐私(differential privacy)是一种保护用户数据隐私的机制。差分隐私定义为: + +$$ +Pr[\mathcal{K}(D)\in S] \le e^{\epsilon} Pr[\mathcal{K}(D’) \in S]+\delta​ +$$ + +对于两个差别只有一条记录的数据集$D, D’$,通过随机算法$\mathcal{K}$,输出结果为集合$S$子集的概率满足上面公式。$\epsilon$为差分隐私预算,$\delta$扰动,$\epsilon$和$\delta$越小,说明$\mathcal{K}$在$D$和$D’$上输出的数据分布越接近。 + +在横向联邦学习中,假设客户端本地训练之后的模型权重矩阵是$W$,由于模型在训练过程中会“记住”训练集的特征,所以敌手可以借助$W$还原出用户的训练数据集[1]。 + +MindSpore Federated提供基于本地差分隐私的安全聚合算法,防止本地模型上云时泄露隐私数据。 + +MindSpore Federated客户端会生成一个与本地模型$W$相同维度的差分噪声矩阵$G$,然后将二者相加,得到一个满足差分隐私定义的权重$W_p$: + +$$ +W_p=W+G +$$ + +MindSpore Federated客户端将加噪后的模型$W_p$上传至云侧服务器进行联邦聚合。噪声矩阵$G$相当于给原模型加上了一层掩码,在降低模型泄露敏感数据风险的同时,也会影响模型训练的收敛性。如何在模型隐私性和可用性之间取得更好的平衡,仍然是一个值得研究的问题。实验表明,当参与方的数量$n$足够大时(一般指1000以上),大部分噪声能够相互抵消,本地差分机制对聚合模型的精度和收敛性没有明显影响。 + +### 使用方式 + +开启差分隐私训练的方式很简单,只需要在启动云侧服务时,使用`context.set_fl_context()`设置`encrypt_type='DP_ENCRYPT'`即可。 + +此外,为了控制隐私保护的效果,我们还提供了3个参数:`dp_eps`,`dp_delta`以及`dp_norm_clip`,它们也是通过`context.set_fl_context()`设置。 + +`dp_eps`和`dp_norm_clip`的合法取值范围是大于0,`dp_delta`的合法取值范围是0<`dp_delta`<1。一般来说,`dp_eps`和`dp_delta`越小,隐私保护效果也越好,但是对模型收敛性的影响越大。建议`dp_delta`取成客户端数量的倒数,`dp_eps`大于50。 + +`dp_norm_clip`是差分隐私机制对模型权重加噪前对权重大小的调整系数,会影响模型的收敛性,一般建议取0.5~2。 + +## 基于MPC的安全聚合 + +### 原理概述 + +尽管差分隐私技术可以适当保护用户数据隐私,但是当参与客户端数量比较少或者高斯噪声幅值较大时,模型精度会受较大影响。为了同时满足模型保护和模型收敛这两个要求,我们提供了基于MPC的安全聚合方案。 + +在这种训练模式下,假设参与的客户端集合为$U$,对于任意Federated-Client $u$和$v$, +它们会两两协商出一对随机扰动$p_{uv}$、$p_{vu}$,满足 + +$$ +p_{uv}=\begin{cases} -p_{vu}, &u{\neq}v\\\\ 0, &u=v \end{cases} +$$ + +于是每个Federated-Client $u$ 在上传模型至Server前,会在原模型权重$x_u$加上它与其它用户协商的扰动: + +$$ +x_{encrypt}=x_u+\sum\limits_{v{\in}U}p_{uv} +$$ + +从而Federated-Server聚合结果$\overline{x}$为: + +$$ +\begin{align} +\overline{x}&=\sum\limits_{u{\in}U}(x_{u}+\sum\limits_{v{\in}U}p_{uv})\\\\ +&=\sum\limits_{u{\in}U}x_{u}+\sum\limits_{u{\in}U}\sum\limits_{v{\in}U}p_{uv}\\\\ +&=\sum\limits_{u{\in}U}x_{u} +\end{align} +$$ + +上面的过程只是介绍了聚合算法的主要思想,基于MPC的聚合方案是精度无损的,代价是通讯轮次的增加。 + +如果您对算法的具体步骤感兴趣,可以参考原论文[2]。 + +### 使用方式 + +与开启差分隐私训练相似,我们只需要在`context.set_fl_context()`中设置`encrypt_type='PW_ENCRYPT'`即可。 + +此外,与安全聚合训练相关的云侧环境参数还有`share_secrets_ratio`、`reconstruct_secrets_threshold`和`cipher_time_window`。 + +`share_client_ratio`指代参与密钥碎片分享的客户端数量与参与联邦学习的客户端数量的比值,取值需要小于等于1。 + +`reconstruct_secrets_threshold`指代参与密钥碎片恢复的客户端数量,取值需要小于参与密钥碎片分享的客户端数量。 + +通常为了保证系统安全,当不考虑Server和Client合谋的情况下,`reconstruct_secrets_threshold`需要大于联邦学习客户端数量的一半;当考虑Server和Client合谋,`reconstruct_secrets_threshold`需要大于联邦学习客户端数量的2/3。 + +`cipher_time_window`指代安全聚合各通讯轮次的时长限制,主要用来保证某些客户端掉线的情况下,Server可以开始新一轮迭代。 + +### 参考文献 + +[1] Ligeng Zhu, Zhijian Liu, and Song Han. [Deep Leakage from Gradients](http://arxiv.org/pdf/1906.08935.pdf). NeurIPS, 2019. + +[2] Keith Bonawitz, Vladimir Ivanov, Ben Kreuter, et al. [Practical Secure Aggregationfor Privacy-Preserving Machine Learning](https://dl.acm.org/doi/pdf/10.1145/3133956.3133982). NeurIPS, 2016. diff --git a/docs/federated/docs/source_zh_cn/sentiment_classification_application.md b/docs/federated/docs/source_zh_cn/sentiment_classification_application.md new file mode 100644 index 0000000000000000000000000000000000000000..a03e102f4c5b370d205f3814375cecf938336cc5 --- /dev/null +++ b/docs/federated/docs/source_zh_cn/sentiment_classification_application.md @@ -0,0 +1,548 @@ +# 实现一个情感分类应用(Android) + + + +- [实现一个情感分类应用(Android)](#实现一个情感分类应用android) + - [准备环节](#准备环节) + - [环境](#环境) + - [数据](#数据) + - [模型相关文件](#模型相关文件) + - [定义网络](#定义网络) + - [生成端侧模型文件](#生成端侧模型文件) + - [将模型导出为MindIR格式文件](#将模型导出为mindir格式文件) + - [将MindIR文件转化为联邦学习端侧框架可用的ms文件](#将mindir文件转化为联邦学习端侧框架可用的ms文件) + - [启动联邦学习流程](#启动联邦学习流程) + - [Android新建工程](#android新建工程) + - [编译MindSpore Lite AAR包](#编译mindspore-lite-aar包) + - [Android实例程序结构说明](#android实例程序结构说明) + - [编写代码](#编写代码) + - [Android工程配置依赖项](#android工程配置依赖项) + - [Android构建与运行](#android构建与运行) + - [实验结果](#实验结果) + - [参考文献](#参考文献) + + + + + +通过端云协同的联邦学习建模方式,可以充分发挥端侧数据的优势,避免用户敏感数据直接上报云侧。由于用户在使用输入法时对自己的文字隐私十分看重,并且输入法上的智慧功能也是用户非常需要的。因此,联邦学习天然适用在输入法场景中。 + +MindSpore Federated将联邦语言模型应用到了输入法的表情图片预测功能中。联邦语言模型会根据聊天文本数据推荐出适合当前语境的表情图片。在使用联邦学习建模时,每一张表情图片会被定义为一个情感标签类别,而每个聊天短语会对应一个表情图片。MindSpore Federated将表情图片预测任务定义为联邦情感分类任务。 + +## 准备环节 + +### 环境 + +参考:[服务端环境配置](./deploy_federated_server_cluster.md)和[客户端环境配置](./deploy_federated_client.md)。 + +### 数据 + +[用于训练的数据](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/notebook/datasets/supervise/client.tar.gz)包含20个用户聊天文件,其目录结构如下: + +```text +datasets/supervise/client/ + ├── 0.txt # 用户0的训练数据 + ├── 1.txt # 用户1的训练数据 + │ + │ ...... + │ + └── 19.txt # 用户19的训练数据 +``` + +[用于验证的数据](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/notebook/datasets/supervise/eval.tar.gz)包含1个聊天文件,其目录结构如下: + +```text +datasets/supervise/eval/ + ├── eval.txt # 验证数据 +``` + +[标签对应的表情图片数据](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/notebook/datasets/memo.tar.gz)包含4类表情,每类表情包括若干张图片,其目录结构如下: + +```text +datasets/memo/ + ├── good # good类表情 + │ ├── 2018new_geili_org.png + │ ├── 2018new_good_org.png + │ ├── 2018new_xianhua_org.png + │ ├── 2018new_zan_org.png + │ └── 2018new_zhongguozan_org.png + ├── leimu # leimu类表情 + │ ├── 2018new_beishang_org.png + │ ├── 2018new_kelian_org.png + │ ├── 2018new_leimu_org.png + │ ├── 2018new_weiqu_org.png + │ ├── 2021_alongdog_org.png + │ ├── 2021_LZcry_org.png + │ └── 2021_LZpoor_org.png + ├── xiaoku # xiaoku类表情 + │ ├── 2018new_doge02_org.png + │ ├── 2018new_guzhang_org.png + │ ├── 2018new_huaixiao_org.png + │ ├── 2018new_xiaoerbuyu_org.png + │ ├── 2018new_xiaoku_thumb.png + │ └── 2018new_yinxian_org.png + └── xin # xin类表情 + ├── 2018new_aini_org.png + ├── 2018new_huaxin_org.png + ├── 2018new_tianping_org.png + ├── 2018new_xin_org.png + └── qixi2018_xiaoxinxin_org.png +``` + +### 模型相关文件 + +生成模型需要的起始[CheckPoint文件](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/notebook/models/albert_init.ckpt)、[词典](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/notebook/datasets/vocab.txt)和[词典ID映射文件](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/notebook/datasets/vocab_map_ids.txt)的目录结构如下: + +```text +models/ + ├── albert_init.ckpt # 起始的checkpoint + ├── vocab.txt # 词典 + └── vocab_map_ids.txt # 词典ID映射文件 +``` + +## 定义网络 + +联邦学习中的语言模型使用ALBERT模型[1]。客户端上的ALBERT模型包括:embedding层、encoder层和classifier层。 + +具体网络定义请参考[源码](https://gitee.com/mindspore/mindspore/tree/master/tests/st/fl/albert/src/model.py)。 + +### 生成端侧模型文件 + +#### 将模型导出为MindIR格式文件 + +示例代码如下: + +```python +import argparse +import os +import random +from time import time +import numpy as np +from mindspore import context, set_seed, load_checkpoint, Tensor, export +from mindspore.nn import AdamWeightDecay +from src.config import train_cfg, client_net_cfg +from src.utils import restore_params +from src.model import AlbertModelCLS +from src.cell_wrapper import NetworkWithCLSLoss, NetworkTrainCell + + +def parse_args(): + """ + parse args + """ + parser = argparse.ArgumentParser(description='export task') + parser.add_argument('--device_target', type=str, default='GPU', choices=['Ascend', 'GPU']) + parser.add_argument('--device_id', type=str, default='0') + parser.add_argument('--init_model_path', type=str, default='none') + parser.add_argument('--output_dir', type=str, default='./models/mindir/') + parser.add_argument('--seed', type=int, default=0) + return parser.parse_args() + + +def supervise_export(args_opt): + set_seed(args_opt.seed), random.seed(args_opt.seed) + start = time() + # 参数配置 + os.environ['CUDA_VISIBLE_DEVICES'] = args_opt.device_id + init_model_path = args_opt.init_model_path + output_dir = args_opt.output_dir + if not os.path.exists(output_dir): + os.makedirs(output_dir) + print('Parameters setting is done! Time cost: {}'.format(time() - start)) + start = time() + + # MindSpore配置 + context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target) + print('Context setting is done! Time cost: {}'.format(time() - start)) + start = time() + + # 建立模型 + albert_model_cls = AlbertModelCLS(client_net_cfg) + network_with_cls_loss = NetworkWithCLSLoss(albert_model_cls) + network_with_cls_loss.set_train(True) + print('Model construction is done! Time cost: {}'.format(time() - start)) + start = time() + + # 建立优化器 + client_params = [_ for _ in network_with_cls_loss.trainable_params()] + client_decay_params = list( + filter(train_cfg.optimizer_cfg.AdamWeightDecay.decay_filter, client_params) + ) + client_other_params = list( + filter(lambda x: not train_cfg.optimizer_cfg.AdamWeightDecay.decay_filter(x), client_params) + ) + client_group_params = [ + {'params': client_decay_params, 'weight_decay': train_cfg.optimizer_cfg.AdamWeightDecay.weight_decay}, + {'params': client_other_params, 'weight_decay': 0.0}, + {'order_params': client_params} + ] + client_optimizer = AdamWeightDecay(client_group_params, + learning_rate=train_cfg.client_cfg.learning_rate, + eps=train_cfg.optimizer_cfg.AdamWeightDecay.eps) + client_network_train_cell = NetworkTrainCell(network_with_cls_loss, optimizer=client_optimizer) + print('Optimizer construction is done! Time cost: {}'.format(time() - start)) + start = time() + + # 构造数据 + input_ids = Tensor(np.zeros((train_cfg.batch_size, client_net_cfg.seq_length), np.int32)) + attention_mask = Tensor(np.zeros((train_cfg.batch_size, client_net_cfg.seq_length), np.int32)) + token_type_ids = Tensor(np.zeros((train_cfg.batch_size, client_net_cfg.seq_length), np.int32)) + label_ids = Tensor(np.zeros((train_cfg.batch_size,), np.int32)) + print('Client data loading is done! Time cost: {}'.format(time() - start)) + start = time() + + # 读取checkpoint + if init_model_path != 'none': + init_param_dict = load_checkpoint(init_model_path) + restore_params(client_network_train_cell, init_param_dict) + print('Checkpoint loading is done! Time cost: {}'.format(time() - start)) + start = time() + + # 导出 + export(client_network_train_cell, input_ids, attention_mask, token_type_ids, label_ids, + file_name=os.path.join(output_dir, 'albert_supervise'), file_format='MINDIR') + print('Supervise model export process is done! Time cost: {}'.format(time() - start)) + + +if __name__ == '__main__': + total_time_start = time() + args = parse_args() + supervise_export(args) + print('All is done! Time cost: {}'.format(time() - total_time_start)) + +``` + +#### 将MindIR文件转化为联邦学习端侧框架可用的ms文件 + +参考[图像分类应用](./image_classification_application.md)中生成端侧模型文件部分。 + +## 启动联邦学习流程 + +首先在服务端启动脚本,参考[云端部署方式](./deploy_federated_server_cluster.md)。 + +以ALBERT模型的训练与推理任务为基础,整体流程为: + +1. Android新建工程; + +2. 编译MindSpore Lite AAR包; + +3. Android实例程序结构说明; + +4. 编写代码; + +5. Android工程配置依赖项; + +6. Android构建与运行。 + +### Android新建工程 + +在Android Studio中新建项目工程,并安装相应的SDK(指定SDK版本后,由Android Studio自动安装)。 + +![新建工程](./images/create_android_project.png) + +### 编译MindSpore Lite AAR包 + +1. 参考[端侧部署](./deploy_federated_client.md)完成部署。 + +2. 获取生成的Android AAR包。 + + ```text + mindspore-lite-.aar + ``` + +3. 把AAR包放置安卓工程的app/libs/目录下。 + +### Android实例程序结构说明 + +```text +app +│ ├── libs # Android库项目的二进制归档文件 +| | └── mindspore-lite-version.aar # MindSpore Lite针对Android版本的归档文件 +├── src/main +│ ├── assets # 资源目录 +| | └── model # 模型目录 +| | └── albert_supervise.mindir.ms # 存放的预训练模型文件 +│ | └── albert_inference.mindir.ms # 存放的推理模型文件 +│ | └── data # 数据目录 +| | └── 0.txt # 模型数据文件 +| | └── vocab.txt # 词典文件 +| | └── vocab_map_ids.txt # 词典ID映射文件 +| | └── eval.txt # 训练结果评估文件 +| | └── eval_no_label.txt # 推理数据文件 +│ | +│ ├── java # java层应用代码 +│ │ └── ... 存放Android代码文件,相关目录可以自定义 +│ │ +│ ├── res # 存放Android相关的资源文件 +│ └── AndroidManifest.xml # Android配置文件 +│ +│ +├── build.gradle # Android工程构建配置文件 +├── download.gradle # 工程依赖文件下载 +└── ... +``` + +### 编写代码 + +1. AssetCopyer.java:该代码文件作用是把Android工程的app/src/main/assets目录下的资源文件存放到Android系统的磁盘中,以便在模型训练与推理时联邦学习框架的接口能够根据绝对路径读取到资源文件。 + + ```java + import android.content.Context; + import java.io.File; + import java.io.FileOutputStream; + import java.io.InputStream; + import java.util.logging.Logger; + public class AssetCopyer { + private static final Logger LOGGER = Logger.getLogger(AssetCopyer.class.toString()); + public static void copyAllAssets(Context context,String destination) { + LOGGER.info("destination: " + destination); + copyAssetsToDst(context,"",destination); + } + // copy assets目录下面的资源文件到Android系统的磁盘中,具体的路径可打印destination查看 + private static void copyAssetsToDst(Context context,String srcPath, String dstPath) { + try { + // 递归获取assets目录的所有的文件名 + String[] fileNames =context.getAssets().list(srcPath); + if (fileNames.length > 0) { + // 构建目标file对象 + File file = new File(dstPath); + //创建目标目录 + file.mkdirs(); + for (String fileName : fileNames) { + // copy文件到指定的磁盘 + if(!srcPath.equals("")) { + copyAssetsToDst(context,srcPath + "/" + fileName,dstPath+"/"+fileName); + }else{ + copyAssetsToDst(context, fileName,dstPath+"/"+fileName); + } + } + } else { + // 构建源文件的输入流 + InputStream is = context.getAssets().open(srcPath); + // 构建目标文件的输出流 + FileOutputStream fos = new FileOutputStream(new File(dstPath)); + // 定义1024大小的缓冲数组 + byte[] buffer = new byte[1024]; + int byteCount=0; + // 源文件写到目标文件 + while((byteCount=is.read(buffer))!=-1) { + fos.write(buffer, 0, byteCount); + } + // 刷新输出流 + fos.flush(); + // 关闭输入流 + is.close(); + // 关闭输出流 + fos.close(); + } + } catch (Exception e) { + e.printStackTrace(); + } + } + } + ``` + +2. FlJob.java:该代码文件作用是定义训练与推理任务的内容,具体的联邦学习接口含义请参考[联邦学习接口介绍](./interface_description_federated_client.md)。 + + ```java + import android.annotation.SuppressLint; + import android.os.Build; + import androidx.annotation.RequiresApi; + import com.huawei.flAndroid.utils.AssetCopyer; + import com.huawei.flclient.FLParameter; + import com.huawei.flclient.SyncFLJob; + import java.util.Arrays; + import java.util.UUID; + import java.util.logging.Logger; + public class FlJob { + private static final Logger LOGGER = Logger.getLogger(AssetCopyer.class.toString()); + private final String parentPath; + public FlJob(String parentPath) { + this.parentPath = parentPath; + } + // Android的联邦学习训练任务 + @SuppressLint("NewApi") + @RequiresApi(api = Build.VERSION_CODES.M) + public void syncJobTrain() { + String trainDataset = parentPath + "/data/0.txt"; + String vocal_file = parentPath + "/data/vocab.txt"; + String idsFile = parentPath + "/data/vocab_map_ids.txt"; + String testDataset = parentPath + "/data/eval.txt"; + String trainModelPath = parentPath + "/model/albert_supervise.mindir.ms"; + String inferModelPath = parentPath + "/model/albert_inference.mindir.ms"; + String flName = "albert"; + // server ip address,请保证Android能够访问到server,否则会出现connection failed + String ip = "http://127.0.0.1:"; + int port = 6668; + String clientID = UUID.randomUUID().toString(); + boolean useSSL = false; + FLParameter flParameter = FLParameter.getInstance(); + flParameter.setTrainDataset(trainDataset); + flParameter.setVocabFile(vocal_file); + flParameter.setIdsFile(idsFile); + flParameter.setTestDataset(testDataset); + flParameter.setFlName(flName); + flParameter.setTrainModelPath(trainModelPath); + flParameter.setInferModelPath(inferModelPath); + flParameter.setClientID(clientID); + flParameter.setIp(ip); + flParameter.setPort(port); + flParameter.setUseSSL(useSSL); + SyncFLJob syncFLJob = new SyncFLJob(); + syncFLJob.flJobRun(); + } + // Android的联邦学习推理任务 + public void syncJobPredict() { + String flName = "albert"; + String dataPath = parentPath + "/data/eval_no_label.txt"; + String vocal_file = parentPath + "/data/vocab.txt"; + String idsFile = parentPath + "/data/vocab_map_ids.txt"; + String modelPath = parentPath + "/model/albert_inference.mindir.ms"; + SyncFLJob syncFLJob = new SyncFLJob(); + int[] labels = syncFLJob.modelInference(flName, dataPath, vocal_file, idsFile, modelPath); + LOGGER.info("labels = " + Arrays.toString(labels)); + } + } + ``` + +3. MainActivity.java:该代码文件作用是启动联邦学习训练与推理任务。 + + ```java + import android.os.Build; + import android.os.Bundle; + import androidx.annotation.RequiresApi; + import androidx.appcompat.app.AppCompatActivity; + import com.huawei.flAndroid.job.FlJob; + import com.huawei.flAndroid.utils.AssetCopyer; + @RequiresApi(api = Build.VERSION_CODES.P) + public class MainActivity extends AppCompatActivity { + private String parentPath; + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + // 获取该应用程序在Android系统中的磁盘路径 + this.parentPath = this.getExternalFilesDir(null).getAbsolutePath(); + // copy assets目录下面的资源文件到Android系统的磁盘中 + AssetCopyer.copyAllAssets(this.getApplicationContext(), parentPath); + // 新建一个线程,启动联邦学习训练与推理任务 + new Thread(() -> { + FlJob flJob = new FlJob(parentPath); + flJob.syncJobTrain(); + flJob.syncJobPredict(); + }).start(); + } + } + ``` + +### Android工程配置依赖项 + +1. AndroidManifest.xml + + ```xml + + + + + + + + + + + + + + + ``` + +2. app/build.gradle + + ```text + plugins { + id 'com.android.application' + } + android { + // Android SDK的编译版本,建议大于27 + compileSdkVersion 30 + buildToolsVersion "30.0.3" + defaultConfig { + applicationId "com.huawei.flAndroid" + minSdkVersion 27 + targetSdkVersion 30 + versionCode 1 + versionName "1.0" + multiDexEnabled true + testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" + ndk { + // 不同的手机型号,对应ndk不相同,本人使用的mate20手机是'armeabi-v7a' + abiFilters 'armeabi-v7a' + } + } + //指定ndk版本 + ndkVersion '21.3.6528147' + sourceSets{ + main { + // 指定jni目录 + jniLibs.srcDirs = ['libs'] + jni.srcDirs = [] + } + } + compileOptions { + sourceCompatibility JavaVersion.VERSION_1_8 + targetCompatibility JavaVersion.VERSION_1_8 + } + } + dependencies { + //指定扫描libs目录下的AAR包 + implementation fileTree(dir:'libs',include:['*.aar']) + implementation 'androidx.appcompat:appcompat:1.1.0' + implementation 'com.google.android.material:material:1.1.0' + implementation 'androidx.constraintlayout:constraintlayout:1.1.3' + androidTestImplementation 'androidx.test.ext:junit:1.1.1' + androidTestImplementation 'androidx.test.espresso:espresso-core:3.2.0' + implementation 'com.android.support:multidex:1.0.3' + } + ``` + +### Android构建与运行 + +1. 连接Android设备,运行联邦学习训练与推理应用程序。通过USB连接Android设备调试,点击`Run 'app'`即可在你的设备上运行联邦学习任务。 + + ![run_app](./images/start_android_project.png) + +2. Android Studio连接设备调试操作,可参考。手机需开启“USB调试模式”,Android Studio才能识别到手机。 华为手机一般在`设置->系统和更新->开发人员选项->USB调试`中打开“USB调试模式”。 + +3. 在Android设备上,点击“继续安装”,安装完即可在APP启动之后执行ALBERT模型的联邦学习的训练与推理任务。 + +4. 程序运行结果如下: + + ```text + I/SyncFLJob: [model inference] inference finish + I/SyncFLJob: labels = [2, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 4, 4, 4, 4] + ``` + +## 实验结果 + +联邦学习总迭代数为10,客户端本地训练epoch数为1,batchSize设置为16。 + +```text + total acc:0.44488978 + total acc:0.583166333 + total acc:0.609218437 + total acc:0.645290581 + total acc:0.667334669 + total acc:0.685370741 + total acc:0.70741483 + total acc:0.711422846 + total acc:0.719438878 + total acc:0.733466934 +``` + +## 参考文献 + +[1] Lan Z , Chen M , Goodman S , et al. ALBERT: A Lite BERT for Self-supervised Learning of Language Representations[J]. 2019. diff --git a/docs/federated/requirements.txt b/docs/federated/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..96cdfc3e0c7ee0ae6a01e59c1081111fdc792bb6 --- /dev/null +++ b/docs/federated/requirements.txt @@ -0,0 +1,5 @@ +sphinx >= 2.2.1, <= 2.4.4 +myst_parser == 0.14.0 +sphinx-markdown-tables +sphinx_rtd_theme +jieba \ No newline at end of file diff --git a/docs/federated/summarize_federated.md b/docs/federated/summarize_federated.md new file mode 100644 index 0000000000000000000000000000000000000000..17bfccab7c8969d5909a956d3c3cde2e70d37d14 --- /dev/null +++ b/docs/federated/summarize_federated.md @@ -0,0 +1,99 @@ +# 概述 + +`Linux` `Windows` `联邦学习` `分布式应用` `中级` `高级` `贡献者` + + + +- [概述](#概述) + - [MindSpore Federated 框架优势](#mindspore-federated-框架优势) + - [MindSpore Federated 组网架构](#mindspore-federated-组网架构) + - [MindSpore Federated 总体架构](#mindspore-federated-总体架构) + - [使用MindSpore Federated的工作流程](#使用mindspore-federated的工作流程) + - [场景体验](#场景体验) + + + + + +MindSpore Federated是一款开源联邦学习框架,支持千万级无状态终端设备商用化部署,在用户数据留存在本地的情况下,使能全场景智能应用。 + +联邦学习是一种加密的分布式机器学习技术,它是指参与联邦学习的各用户在不共享本地数据的前提下共建AI模型。MindSpore Federated优先专注于大规模参与方的横向联邦的应用场景。 + +## MindSpore Federated 框架优势 + +- 隐私安全 + + MindSpore Federated框架数据留存本地进行训练,不交换数据本身,而是用加密方式交换更新的模型参数。 + + 支持基于多方安全计算(MPC)的精度无损的安全聚合方案防止模型窃取。 + + 支持基于本地差分隐私的性能无损的加密方案防止模型泄漏隐私数据。 + +- 分布式联邦聚合 + + 云侧松耦合集群化处理方式,支持千万级大规模异构终端部署场景,实现高性能、高可用的分布式联邦聚合计算,可应对网络不稳定,负载突变等。 + +- 联邦效率提升 + + 支持同步和异步的联邦模式,支持多种模型压缩算法,提高联邦学习效率,节省带宽资源。 + + 支持多种联邦聚合策略,提高联邦收敛的平滑度,兼顾全局和局部的精度最优化。 + +- 灵活易用 + + 仅一行代码即可切换单机训练与联邦学习模式 + + 网络模型可编程,聚合算法可编程,安全算法可编程,安全等级可定制。 + +## MindSpore Federated 组网架构 + +MindSpore Federated采用松耦合组网模式,应对大规模、无状态、不可靠的异构设备的联邦学习任务。 + +![image0](./docs/source_zh_cn/images/mindspore_federated_networking.png) + +Federated-Scheduler:联邦学习调度器,与Federated-Server保持TCP长链接,通过心跳完成Federated-Server node的组网结构,并负责管理面任务的下发。 + +Federated-Server:联邦学习服务器,Federated-Server集群对外暴露唯一地址,内部根据负载均衡策略将Federated-Client请求路由到各Federated-Server node,实现联邦学习服务化,解决大规模不稳定Federated-Client的接入。集群内部保证集群事务弱一致性,并完成联邦聚合算法的分布式计算。使得Federated-Client在任何时刻访问任意Federated-Server,都能获得训练所需的全量数据。 + +Federated-Client:联邦学习客户端,负责本地数据训练以及作为https的客户端与Federated-Server交互。 + +## MindSpore Federated 总体架构 + +MindSpore Federated分为客户端模块和服务器模块两个部分,其框架的总体架构如下所示: + +![architecture](./docs/source_zh_cn/images/mindspore_federated_architecture.png) + +- Federated-Server模块: + + - **Federated Job Pipeline:** 联邦学习任务配置执行、弹性扩缩、容错容灾的主控流程。 + - **Worker Manager:** 设备管理相关逻辑。 + - **Aggregator/Optimizer:** 联邦学习在中心侧的聚合和优化逻辑单元,包括多server node间的分布式聚合处理。 + - **Metrics Manager:** 训练训练效果评估模块,用于判断训练效果和模型收敛。 + - **Meta Data/Model Storage:**负责数据存储,主要包括每轮迭代中元数据的跨节点分布式存储,以及训练模型。 + - **Time Limited Communication Module**:限时通信模块,保证在Cross-Device场景下,不会出现由于端侧设备连接不稳定导致训练任务无法继续执行。 + - **Armour:** 安全处理模块,包括多方安全计算等模型加解密策略。 + - **Protocol:** 协议解析器,上层模块只需专注于联邦计算,屏蔽底层通信协议类型。 + - **Communication:** 通信组件,支持多种通讯协议,用于接收来自Federated-Client,Federated-Scheduler和其他Federated-Server的消息以及帮助Federated-Server组网。 + - **Compute Resources:** 用于联邦学习中心侧的硬件计算资源。 + +- Federated-Client模块: + + - **Federated Job Pipeline:** 端侧联邦学习任务执行的主控逻辑,包括学习策略、同步/异步端云交互。 + + - **Training & Inference:** 轻量化的端侧训练和推理的能力,包括runtime和高性能算子库。 + + - **Armour:** 用于端侧的安全处理模块,包括多方安全计算、本地差分隐私等模型加解密策略。 + + - **Communication:** 端侧用于联邦学习任务的通信组件。 + +## 使用MindSpore Federated的工作流程 + +- 场景识别、积累数据:识别出使用联邦学习的场景,在客户端为联邦任务积累本地数据。 +- 模型选择、客户端部署:进行模型原型的选择或开发,并使用工具生成方便部署的端侧模型。 +- 应用部署:将Federated-Client部署到端侧应用中,并在云侧设置Federated-Plan和部署脚本。 + +## 场景体验 + +- 图像分类 + +- 文本分类 diff --git a/docs/migration_guide/requirements.txt b/docs/migration_guide/requirements.txt index 6e8a6bc4c2d8fce7c05a891fab76504cad65a5c7..6d8cd70439820e16bc32c4abc93e948ba81dc01b 100644 --- a/docs/migration_guide/requirements.txt +++ b/docs/migration_guide/requirements.txt @@ -1,5 +1,5 @@ sphinx >= 2.2.1, <= 2.4.4 -recommonmark +myst_parser == 0.14.0 sphinx-markdown-tables sphinx_rtd_theme numpy diff --git a/docs/migration_guide/source_en/accuracy_optimization.md b/docs/migration_guide/source_en/accuracy_optimization.md new file mode 100644 index 0000000000000000000000000000000000000000..e93fa6f0667945801c1a220e285effb894d2f250 --- /dev/null +++ b/docs/migration_guide/source_en/accuracy_optimization.md @@ -0,0 +1,5 @@ +# Accuracy Optimization + +No English version available right now, welcome to contribute. + + \ No newline at end of file diff --git a/docs/migration_guide/source_en/conf.py b/docs/migration_guide/source_en/conf.py index 540978e71d4fe921daf3a31b1a40ff597fdbc0ad..dd1c6f81d8db1f4f93709a6736a19cccb55edb19 100644 --- a/docs/migration_guide/source_en/conf.py +++ b/docs/migration_guide/source_en/conf.py @@ -32,7 +32,7 @@ release = 'master' # ones. extensions = [ 'sphinx_markdown_tables', - 'recommonmark', + 'myst_parser', 'nbsphinx', 'sphinx.ext.mathjax', 'IPython.sphinxext.ipython_console_highlighting' diff --git a/docs/migration_guide/source_en/overview.md b/docs/migration_guide/source_en/overview.md new file mode 100644 index 0000000000000000000000000000000000000000..d78115abcc313b146adc8b09fd3206a76acce286 --- /dev/null +++ b/docs/migration_guide/source_en/overview.md @@ -0,0 +1,5 @@ +# Overview + +No English version available right now, welcome to contribute. + + \ No newline at end of file diff --git a/docs/migration_guide/source_en/sample_code.md b/docs/migration_guide/source_en/sample_code.md index ed8974879d8fa5d6a3075d6c2a973b412d15051c..05c7edc777c7ddaf0dbb0e998670fb30ed164640 100644 --- a/docs/migration_guide/source_en/sample_code.md +++ b/docs/migration_guide/source_en/sample_code.md @@ -591,11 +591,11 @@ Define Loss Function and implement Label Smoothing. import mindspore.nn as nn from mindspore import Tensor from mindspore.common import dtype as mstype -from mindspore.nn import Loss +from mindspore.nn import LossBase import mindspore.ops as ops # define cross entropy loss -class CrossEntropySmooth(Loss): +class CrossEntropySmooth(LossBase): """CrossEntropy""" def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000): super(CrossEntropySmooth, self).__init__() diff --git a/docs/migration_guide/source_zh_cn/accuracy_optimization.md b/docs/migration_guide/source_zh_cn/accuracy_optimization.md index 622935ecc626bfc77701ecf4c904b6547978345b..03052348e90856c9b0dfe4c059a9240ef9e431a7 100644 --- a/docs/migration_guide/source_zh_cn/accuracy_optimization.md +++ b/docs/migration_guide/source_zh_cn/accuracy_optimization.md @@ -287,7 +287,7 @@ MindInsight可以辅助用户对超参做检查,大多数情况下,`SummaryC > MindInsight支持查看`SummaryCollector`记录的计算图和MindSpore context的`save_graphs`参数导出的pb文件计算图。请参考我们教程中的[计算图可视化](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/dashboard.html)部分了解更多信息。 > -> 脚本迁移工具可以将PyTorch、TensorFlow框架下编写的模型转换为MindSpore脚本,请访问教程[使用工具迁移第三方框架脚本](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/migrate_3rd_scripts_mindconverter.html)以了解更多信息。 +> 脚本迁移工具可以将PyTorch、TensorFlow框架下编写的模型转换为MindSpore脚本,请访问教程[使用工具迁移模型定义脚本](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/migrate_3rd_scripts_mindconverter.html)以了解更多信息。 #### 检查输入数据 diff --git a/docs/migration_guide/source_zh_cn/conf.py b/docs/migration_guide/source_zh_cn/conf.py index 4bdc7a1bdc6acd64d40a0a705c0875d1bbf373bf..e9c67e86549f954b37b9e75cc980d5b9b09a5dc5 100644 --- a/docs/migration_guide/source_zh_cn/conf.py +++ b/docs/migration_guide/source_zh_cn/conf.py @@ -32,7 +32,7 @@ release = 'master' # ones. extensions = [ 'sphinx_markdown_tables', - 'recommonmark', + 'myst_parser', 'nbsphinx', 'sphinx.ext.mathjax', 'IPython.sphinxext.ipython_console_highlighting' diff --git a/docs/migration_guide/source_zh_cn/migration_case_of_mindconverter.ipynb b/docs/migration_guide/source_zh_cn/migration_case_of_mindconverter.ipynb index 22e80a4121241ea58d31e8bbac4459a17d41c7a2..aa0c62a8ad5e21bc267d2e0b3a81402bb975e211 100644 --- a/docs/migration_guide/source_zh_cn/migration_case_of_mindconverter.ipynb +++ b/docs/migration_guide/source_zh_cn/migration_case_of_mindconverter.ipynb @@ -2,7 +2,6 @@ "cells": [ { "cell_type": "markdown", - "id": "military-possible", "metadata": {}, "source": [ "# 使用MindConverter迁移脚本\n", @@ -12,7 +11,6 @@ }, { "cell_type": "markdown", - "id": "modular-arbitration", "metadata": {}, "source": [ "## 概述" @@ -20,7 +18,6 @@ }, { "cell_type": "markdown", - "id": "stupid-british", "metadata": {}, "source": [ "PyTorch模型转换为MindSpore脚本和权重,首先需要将PyTorch模型导出为ONNX模型,然后使用MindConverter CLI工具进行脚本和权重迁移。\n", @@ -29,7 +26,6 @@ }, { "cell_type": "markdown", - "id": "impossible-nebraska", "metadata": {}, "source": [ "## 环境准备\n", @@ -42,8 +38,13 @@ "pip install mindspore==1.2.0\n", "pip install mindinsight==1.2.0\n", "pip install onnx\n", - "```\n", - "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "> 以上安装命令可选用国内的清华源途径进行安装,可加快文件下载速度,即在上述命令后面添加`-i https://pypi.tuna.tsinghua.edu.cn/simple`。\n", ">\n", "> 安装`ONNX`第三方库时,需要提前安装`protobuf-compiler`,`libprotoc-dev`,如果没有以上两个库,可以使用命令`apt-get install protobuf-compiler libprotoc-dev`进行安装。" @@ -51,7 +52,6 @@ }, { "cell_type": "markdown", - "id": "revolutionary-bench", "metadata": {}, "source": [ "## ONNX模型导出\n", @@ -66,7 +66,6 @@ { "cell_type": "code", "execution_count": 1, - "id": "heated-millennium", "metadata": {}, "outputs": [], "source": [ @@ -78,7 +77,6 @@ }, { "cell_type": "markdown", - "id": "bacterial-picking", "metadata": {}, "source": [ "我们使用该模型进行推理,生成若干组测试用例,以验证模型迁移的正确性。\n", @@ -91,7 +89,6 @@ { "cell_type": "code", "execution_count": 2, - "id": "hawaiian-borough", "metadata": {}, "outputs": [ { @@ -140,7 +137,6 @@ }, { "cell_type": "markdown", - "id": "atomic-rebel", "metadata": {}, "source": [ "HuggingFace提供了导出ONNX模型的工具,可使用如下方法将HuggingFace的预训练模型导出为ONNX模型:" @@ -149,7 +145,6 @@ { "cell_type": "code", "execution_count": 3, - "id": "corresponding-vampire", "metadata": {}, "outputs": [ { @@ -179,7 +174,6 @@ }, { "cell_type": "markdown", - "id": "adverse-outline", "metadata": {}, "source": [ "根据打印的信息,我们可以看到导出的ONNX模型输入节点有3个:`input_ids`,`token_type_ids`,`attention_mask`,以及相应的输入轴,\n", @@ -190,7 +184,6 @@ }, { "cell_type": "markdown", - "id": "paperback-playback", "metadata": {}, "source": [ "## ONNX模型验证\n" @@ -198,7 +191,6 @@ }, { "cell_type": "markdown", - "id": "mysterious-courage", "metadata": {}, "source": [ "我们仍然使用PyTorch模型推理时的句子`china is a poworful country, its capital is [MASK].`作为输入,观测ONNX模型表现是否符合预期。" @@ -207,7 +199,6 @@ { "cell_type": "code", "execution_count": 4, - "id": "suitable-channels", "metadata": {}, "outputs": [ { @@ -241,7 +232,6 @@ }, { "cell_type": "markdown", - "id": "essential-pharmacology", "metadata": {}, "source": [ "可以看到,导出的ONNX模型功能与原PyTorch模型完全一致,接下来可以使用MindConverter进行脚本和权重迁移了!" @@ -249,7 +239,6 @@ }, { "cell_type": "markdown", - "id": "realistic-singapore", "metadata": {}, "source": [ "## MindConverter进行模型脚本和权重迁移" @@ -257,7 +246,6 @@ }, { "cell_type": "markdown", - "id": "invisible-tracker", "metadata": {}, "source": [ "MindConverter进行模型转换时,需要给定模型路径(`--model_file`)、输入节点(`--input_nodes`)、输入节点尺寸(`--shape`)、输出节点(`--output_nodes`)。\n", @@ -268,7 +256,6 @@ { "cell_type": "code", "execution_count": 5, - "id": "processed-spanish", "metadata": {}, "outputs": [ { @@ -291,7 +278,6 @@ }, { "cell_type": "markdown", - "id": "working-funeral", "metadata": {}, "source": [ "**看到“MindConverter: conversion is completed.”即代表模型已成功转换!**" @@ -299,7 +285,6 @@ }, { "cell_type": "markdown", - "id": "classical-seminar", "metadata": {}, "source": [ "转换完成后,该目录下生成如下文件:\n", @@ -314,7 +299,6 @@ { "cell_type": "code", "execution_count": 6, - "id": "equipped-bottom", "metadata": {}, "outputs": [ { @@ -332,7 +316,6 @@ }, { "cell_type": "markdown", - "id": "fuzzy-thinking", "metadata": {}, "source": [ "可以看到所有文件已生成。\n", @@ -342,7 +325,6 @@ }, { "cell_type": "markdown", - "id": "leading-punch", "metadata": {}, "source": [ "## MindSpore模型验证\n", @@ -351,7 +333,6 @@ }, { "cell_type": "markdown", - "id": "competent-dispute", "metadata": {}, "source": [ "由于工具在转换时,需要将模型尺寸冻结,因此在使用MindSpore进行推理验证时,需要将句子补齐(Pad)到固定长度,可通过如下函数实现句子补齐。\n", @@ -362,7 +343,6 @@ { "cell_type": "code", "execution_count": 7, - "id": "essential-football", "metadata": {}, "outputs": [], "source": [ @@ -378,7 +358,6 @@ { "cell_type": "code", "execution_count": 8, - "id": "greatest-louis", "metadata": {}, "outputs": [ { @@ -417,7 +396,6 @@ }, { "cell_type": "markdown", - "id": "hybrid-intranet", "metadata": {}, "source": [ "至此,使用MindConverter进行脚本和权重迁移完成。\n", @@ -427,7 +405,6 @@ }, { "cell_type": "markdown", - "id": "minute-sector", "metadata": {}, "source": [ "## 常见问题" @@ -435,7 +412,6 @@ }, { "cell_type": "markdown", - "id": "favorite-worse", "metadata": {}, "source": [ "**Q:如何修改迁移后脚本的批次大小(Batch size)、句子长度(Sequence length)等尺寸(shape)规格,以实现模型可支持任意尺寸的数据推理、训练?**\n", @@ -445,7 +421,6 @@ }, { "cell_type": "markdown", - "id": "failing-smoke", "metadata": {}, "source": [ "**Q:生成后的脚本中类名的定义不符合开发者的习惯,如`class Module0(nn.Cell)`,人工修改是否会影响转换后的权重加载?**\n", @@ -470,7 +445,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.7.5" } }, "nbformat": 4, diff --git a/docs/migration_guide/source_zh_cn/sample_code.md b/docs/migration_guide/source_zh_cn/sample_code.md index 09c79a0fbfdd17fe32c4c096006a07bf8d43e2e9..a91e3807e0c528dc44a1c99bc0492c3352216067 100644 --- a/docs/migration_guide/source_zh_cn/sample_code.md +++ b/docs/migration_guide/source_zh_cn/sample_code.md @@ -587,12 +587,12 @@ opt = Momentum(group_params, lr, momentum) import mindspore.nn as nn from mindspore import Tensor from mindspore.common import dtype as mstype -from mindspore.nn import Loss +from mindspore.nn import LossBase import mindspore.ops as ops # define cross entropy loss -class CrossEntropySmooth(Loss): +class CrossEntropySmooth(LossBase): """CrossEntropy""" def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000): super(CrossEntropySmooth, self).__init__() diff --git a/docs/note/requirements.txt b/docs/note/requirements.txt index 6e8a6bc4c2d8fce7c05a891fab76504cad65a5c7..6d8cd70439820e16bc32c4abc93e948ba81dc01b 100644 --- a/docs/note/requirements.txt +++ b/docs/note/requirements.txt @@ -1,5 +1,5 @@ sphinx >= 2.2.1, <= 2.4.4 -recommonmark +myst_parser == 0.14.0 sphinx-markdown-tables sphinx_rtd_theme numpy diff --git a/docs/note/source_en/conf.py b/docs/note/source_en/conf.py index a1fd767271ac159540440ed65bd0d676163366a9..8c87c739fad893021c2db3d979955adada87e82e 100644 --- a/docs/note/source_en/conf.py +++ b/docs/note/source_en/conf.py @@ -30,7 +30,7 @@ release = 'master' # ones. extensions = [ 'sphinx_markdown_tables', - 'recommonmark', + 'myst_parser', ] source_suffix = { diff --git a/docs/note/source_en/design/technical_white_paper.md b/docs/note/source_en/design/technical_white_paper.md index 0b533897990008bdae72a29008aeac2168deeca7..e1e85efefce146663957fd62107a11565c8ab23b 100644 --- a/docs/note/source_en/design/technical_white_paper.md +++ b/docs/note/source_en/design/technical_white_paper.md @@ -1,5 +1,25 @@ # Technical White Paper -Please stay tuned... +`Linux` `Ascend` `GPU` `CPU` `Whole Process` `Framework Development` `Intermediate` `Advanced` `Contributor` - + + +- [Technical White Paper](#technical-white-paper) + - [Introduction](#introduction) + - [Overview](#overview) + + + + + +## Introduction + +Deep learning research and application have experienced explosive development in recent decades, triggering the third wave of artificial intelligence and achieving great success in image recognition, speech recognition and synthesis, unmanned driving, and machine vision. This also poses higher requirements on the algorithm application and dependent frameworks. With the continuous development of deep learning frameworks, a large quantity of computing resources can be conveniently used when neural network models are trained on large datasets. + +Deep learning is a kind of machine learning algorithm that uses a multi-layer structure to automatically learn and extract high-level features from raw data. Generally, it is very difficult to extract high-level abstract features from raw data. There are two mainstream deep learning frameworks. One is to build a static graph before execution to define all operations and network structures, for example, TensorFlow. This method improves the training performance at the cost of usability. The other is dynamic graph computing that is executed immediately, for example, PyTorch. Different from static graphs, dynamic graphs are more flexible and easier to debug, but the performance is sacrificed. Therefore, the existing deep learning framework cannot meet the requirements of easy development and efficient execution at the same time. + +## Overview + +MindSpore is a next-generation deep learning framework that incorporates the best practices of the industry. It best manifests the computing power of the Ascend AI Processor and supports flexible all-scenario deployment across device-edge-cloud. MindSpore creates a brand-new AI programming paradigm and lowers the threshold for AI development. MindSpore aims to achieve easy development, efficient execution, and all-scenario coverage. To facilitate easy development, MindSpore adopts an automatic differentiation (AD) mechanism based on source code transformation (SCT), which can represent complex combinations through control flows. A function is converted into an intermediate representation (IR) which constructs a computational graph that can be parsed and executed on devices. Before execution, multiple software and hardware collaborative optimization technologies are used in the graph to improve performance and efficiency in various scenarios across the device, edge, and cloud. MindSpore supports dynamic graphs for checking the running mode. Thanks to the AD mechanism, the mode switching between dynamic and static graphs becomes very simple. To effectively train large models on large datasets, MindSpore supports data parallel, model parallel, and hybrid parallel training through advanced manual configuration policies, which is highly flexible. In addition, MindSpore supports the automatic parallelism which efficiently searches for a fast parallel strategy in a large strategy space. For details about the advantages of the MindSpore framework, + +see [Technical White Paper](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com:443/white_paper/MindSpore_white_paper_enV1.1.pdf). diff --git a/docs/note/source_en/operator_list_parallel.md b/docs/note/source_en/operator_list_parallel.md index 5e185e632080b01fffe5ac86b12da912536c28d6..4a9d5166d49327e8b7ffc11973c387ec99f23201 100644 --- a/docs/note/source_en/operator_list_parallel.md +++ b/docs/note/source_en/operator_list_parallel.md @@ -42,7 +42,7 @@ | [mindspore.ops.Cosh](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Cosh.html) | None | | [mindspore.ops.Div](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Div.html) | None | | [mindspore.ops.DivNoNan](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.DivNoNan.html) | None | -| [mindspore.ops.Dropout](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Dropout.html) | None | +| [mindspore.ops.Dropout](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Dropout.html) | None | | [mindspore.ops.DropoutDoMask](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.DropoutDoMask.html) | Need to be used in conjunction with `DropoutGenMask` | | [mindspore.ops.DropoutGenMask](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.DropoutGenMask.html) | Need to be used in conjunction with `DropoutDoMask`, configuring shard strategy is not supported. | | [mindspore.ops.Elu](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Elu.html) | None | @@ -57,6 +57,7 @@ | [mindspore.ops.FloorDiv](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.FloorDiv.html) | None | | [mindspore.ops.FloorMod](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.FloorMod.html) | None | | [mindspore.ops.Gather](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Gather.html) | Only support 1-dim and 2-dim parameters and the last dimension of the input_params should be 32-byte aligned; Scalar input_indices is not supported; Repeated calculation is not supported when the parameters are split in the dimension of the axis; Split input_indices and input_params at the same time is not supported. | +| [mindspore.ops.GatherNd](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.GatherNd.html) | The first input can't be split, and the last dimension of the second input can't be split; In auto_parallel mode, the strategy's searching algorithm can not use "recursive_programming". | | [mindspore.ops.GeLU](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.GeLU.html) | None | | [mindspore.ops.Greater](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Greater.html) | None | | [mindspore.ops.GreaterEqual](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.GreaterEqual.html) | None | @@ -93,6 +94,8 @@ | [mindspore.ops.Reshape](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Reshape.html) | Configuring shard strategy is not supported. In auto parallel mode, if multiple operators are followed by the reshape operator, different shard strategys are not allowed to be configured for these operators. | | [mindspore.ops.Round](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Round.html) | None | | [mindspore.ops.Rsqrt](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Rsqrt.html) | None | +| [mindspore.ops.ScatterUpdate](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.ScatterUpdate.html) | The first dimension of first input can not be split, the second input can not be split, and the first n dimensions (n is the dimension size of the second input) of the third input can not be split; In auto_parallel mode, the strategy's searching algorithm can not use "recursive_programming". | +| [mindspore.ops.Select](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Select.html) | In auto_parallel mode, the strategy's searching algorithm can not use "recursive_programming". | | [mindspore.ops.Sigmoid](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Sigmoid.html) | None | | [mindspore.ops.SigmoidCrossEntropyWithLogits](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.SigmoidCrossEntropyWithLogits.html) | None | | [mindspore.ops.Sign](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.Sign.html) | None | @@ -123,4 +126,3 @@ | [mindspore.ops.ZerosLike](https://www.mindspore.cn/doc/api_python/en/master/mindspore/ops/mindspore.ops.ZerosLike.html) | None | > Repeated calculation means that the device is not fully used. For example, the cluster has 8 devices to run distributed training, the splitting strategy only cuts the input into 4 copies. In this case, double counting will occur. -> diff --git a/docs/note/source_en/static_graph_syntax_support.md b/docs/note/source_en/static_graph_syntax_support.md index 19d7e373c0699ea164d8a590e5c2e63be2664f4e..bca0f4997c0ab7cd1c8bda5e66e65a79e48985f6 100644 --- a/docs/note/source_en/static_graph_syntax_support.md +++ b/docs/note/source_en/static_graph_syntax_support.md @@ -76,7 +76,9 @@ Due to syntax parsing restrictions, the supported data types, syntax, and relate The following describes the data types, syntax, and related operations supported during static graph building. These rules apply only to graph mode. -> All the following examples run on the network in graph mode. The network definition is not described. +> All the following examples run on the network in graph mode. For brevity, the network definition is not described. +> +> The `Tensor` cannot be directly constructed in static graphs. It can be transferred to the network through parameters or constructed in the `__init__` method as a network attribute and then used in the `construct` method of the network. ## Data Types @@ -355,7 +357,7 @@ For details about the rules, click 以下所有示例都运行在Graph模式下的网络中,为了简洁,并未将网络的定义都写出来。 +> +>`Tensor`不支持在静态图里直接构造,可通过参数传入网络`construct`方法,或者作为网络属性在`__init__`方法构造,然后在`construct`方法使用。 ## 数据类型 @@ -356,7 +358,7 @@ def generate_tensor(): | 算术运算符 | 支持类型 | | :--------- | :------------------------------------------------------------------------------------------------------ | -| `+` | `Number` + `Number`、`Tensor` + `Tensor`、`Tensor` + `Number`、`Tuple` + `Tuple`、`String` + `String`。 | +| `+` | `Number` + `Number`、`Tensor` + `Tensor`、`Tensor` + `Number`、`Tuple` + `Tuple`、`String` + `String`、`List` + `List`。 | | `-` | `Number` - `Number`、`Tensor` - `Tensor`、`Tensor` - `Number`。 | | `*` | `Number` \* `Number`、`Tensor` \* `Tensor`、`Tensor` \* `Number`。 | | `/` | `Number` / `Number`、`Tensor` / `Tensor`、`Tensor` / `Number`。 | diff --git a/docs/programming_guide/requirements.txt b/docs/programming_guide/requirements.txt index 6e8a6bc4c2d8fce7c05a891fab76504cad65a5c7..6d8cd70439820e16bc32c4abc93e948ba81dc01b 100644 --- a/docs/programming_guide/requirements.txt +++ b/docs/programming_guide/requirements.txt @@ -1,5 +1,5 @@ sphinx >= 2.2.1, <= 2.4.4 -recommonmark +myst_parser == 0.14.0 sphinx-markdown-tables sphinx_rtd_theme numpy diff --git a/docs/programming_guide/source_en/auto_parallel.md b/docs/programming_guide/source_en/auto_parallel.md index b7590dbcb64dbba4295d25c993d178054cdc66a6..13882a44e693cb2d1bcd500e6c05a560775a384f 100644 --- a/docs/programming_guide/source_en/auto_parallel.md +++ b/docs/programming_guide/source_en/auto_parallel.md @@ -98,9 +98,9 @@ context.get_auto_parallel_context("gradients_mean") - `stand_alone`: standalone mode. - `data_parallel`: data parallel mode. -- `hybrid_parallel`: hybrid parallel mode. +- `hybrid_parallel`: hybrid parallel mode, which is based on the communication primitive. - `semi_auto_parallel`: semi-automatic parallel mode. In this mode, you can use the `shard` method to configure a segmentation policy for an operator. If no policy is configured, the data parallel policy is used by default. -- `auto_parallel`: automatic parallel mode. In this mode, the framework automatically creates a cost model and selects the optimal segmentation policy for users. +- `auto_parallel`: automatic parallel mode. In this mode, the framework automatically creates a cost model and selects the optimal segmentation policy for users. This mode is under development and only be validated in some specific networks. The complete examples of `auto_parallel` and `data_parallel` are provided in [Distributed Training](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/distributed_training_tutorials.html). @@ -115,6 +115,29 @@ mul = ops.Mul().shard(((2, 1), (2, 1))) context.get_auto_parallel_context("parallel_mode") ``` +> In semi_auto_parallel mode, if a parameter is used by multiple operators, please ensure that the parameter layout in each operator is consistent, otherwise an error will be reported during compilation. In the following example, mul1 and mul2 share the weight, but mul1 splits weight into 8 slices by row, however, mul2 splits the weight into 8 slices by column. The layout of weight in the two operators is inconsistent, compilation will be failed. + +```python +import numpy as np +import mindspore as ms +import mindspore.ops as ops +from mindspore import Tensor, Parameter +from mindspore.nn import Cell + +class Net(Cell): + """Net definition""" + def __init__(self): + super(Net, self).__init__() + self.mul1 = ops.Mul().shard(((8, 1), (8, 1))) + self.mul2 = ops.Mul().shard(((1, 8), (1, 8))) + self.weight = Parameter(Tensor(np.ones([16, 32]), dtype=ms.float32), "weight1") + + def construct(self, x): + out = self.mul1(x, self.weight) + out = self.mul2(out, self.weight) + return out +``` + #### all_reduce_fusion_config `all_reduce_fusion_config` allows users to customize the AllReduce segmentation policy by gradient aggregation. To reduce resource consumption and operator execution gaps, the framework fusions all the reverse gradient aggregation AllReduce operators into one by default. However, when the model is large, the iteration smearing time increases. You can set this parameter based on the actual network to manually tune and find the optimal segmentation policy by gradient aggregation. @@ -309,8 +332,24 @@ rank_id = get_rank() ## Distributed Attribute Configuration +### shard + +Applied to `Primitive`. + +In `AUTO_PARALLEL` and `SEMI_AUTO_PARALLEL` mode, `shard` can be used to configure the sharding strategies of operators. The definition of shard strategies can be referred to [design document](https://www.mindspore.cn/doc/note/en/master/design/mindspore/distributed_training_design.html). + +The following is a code example: + +```python +import mindspore.ops as ops + +matmul = ops.MatMul().shard(strategy=((1, 4), (4, 2))) +``` + ### cross_batch +Applied to `Primitive`. + In specific scenarios, the calculation logic of `data_parallel` is different from that of `stand_alone`. The calculation logic of `auto_parallel` is the same as that of `stand_alone` in any scenario. The convergence effect of `data_parallel` may be better. Therefore, MindSpore provides the `cross_batch` parameter to ensure that the calculation logic of `auto_parallel` is consistent with that of `data_parallel`. You can use the `add_prim_attr` method to configure the logic. The default value is False. The following is a code example: @@ -323,6 +362,8 @@ mul = ops.Mul().add_prim_attr("cross_batch", True) ### fusion +Applied to `Primitive`, `Parameter` and `Cell`. + To ensure performance, MindSpore provides the fusion function for the `AllGather` and `AllReduce` operators. Operators of the same type (of the same operator type and in the same communication domain) with the same `fusion` value will be fused together. The value of `fusion` must be greater than or equal to 0. When the value of `fusion` is 0, operators will not be fused together. Only `Ascend` backend is supported. There are two ways for configuration. If the communication operators are called explicitly, `add_prim_attr` could be used to configure. The following is a code example: @@ -369,12 +410,14 @@ Here the `comm_fusion` of parameter `Net.p1` is 2, which means the attribute `fu ### layerwise_parallel +Applied to `Parameter`. + In `HYBRID_PARALLEL` mode, you need to manually split the model. You need to manually add the `layerwise_parallel` flag to the parallel parameters of the model. The framework filters out the gradient aggregation operation for the parallel parameters of the model based on the flag. The following is a code example: ```python -imoprt numpy as np +import numpy as np from mindspore import Parameter, Tensor x = Parameter(Tensor(np.ones([2, 2])), layerwise_parallel=True) diff --git a/docs/programming_guide/source_en/cache.md b/docs/programming_guide/source_en/cache.md index addfbcddc8d77659d19fdab35efdf6a7804e2fa6..fc3a898fb3eb145b6e3cdce1b10e4c66cd5ab36a 100644 --- a/docs/programming_guide/source_en/cache.md +++ b/docs/programming_guide/source_en/cache.md @@ -106,7 +106,7 @@ Currently, the cache service supports only single-node cache. That is, the clien Where, the table of Cache Server Configuration lists five detailed configuration information. Active sessions shows the list of active session ID in current server if any. - Cache server generates log files with filename "cache_server.\.\.log.\.\.\". + Cache server generates log files with filename "cache_server.\.\.log.\.\.\". Note that there might be masses of DEBUG logs printed to the screen when `GLOG_v=0` is set. > - To enable data spilling, you need to use `-s` to set spilling path when starting cache server. Otherwise, this feature is default to be disabled and it will bring up a memory-only cache server. diff --git a/docs/programming_guide/source_en/conf.py b/docs/programming_guide/source_en/conf.py index 981c5b76ce56dd91c26e0e701079f88b9c6d9339..a75700ed9ae484c9c4772908b37e397d0c5150a6 100644 --- a/docs/programming_guide/source_en/conf.py +++ b/docs/programming_guide/source_en/conf.py @@ -33,7 +33,7 @@ release = 'master' # ones. extensions = [ 'sphinx_markdown_tables', - 'recommonmark', + 'myst_parser', 'nbsphinx', 'sphinx.ext.mathjax', 'IPython.sphinxext.ipython_console_highlighting' diff --git a/docs/programming_guide/source_en/context.md b/docs/programming_guide/source_en/context.md index c78865ad3b423f62a1087c092f77e5f042910ac0..5fa28dd40f2e813ba3ae0d5f99bfc42506cc13fb 100644 --- a/docs/programming_guide/source_en/context.md +++ b/docs/programming_guide/source_en/context.md @@ -26,7 +26,7 @@ Before initializing the network, configure the context parameter to control the ## Execution Mode Management -MindSpore supports two running modes: Graph and PyNative. +MindSpore supports two running modes: Graph and PyNative. By default, MindSpore is in Graph mode. Graph mode is the main mode of MindSpore, while PyNative mode is mainly used for debugging. - `GRAPH_MODE`: static graph mode or graph mode. In this mode, the neural network model is compiled into an entire graph, and then the graph is delivered for execution. This mode uses graph optimization to improve the running performance and facilitates large-scale deployment and cross-platform running. @@ -34,13 +34,13 @@ MindSpore supports two running modes: Graph and PyNative. ### Mode Selection -You can set and control the running mode of the program. By default, MindSpore is in Graph mode. The main differences between Graph mode and PyNative mode are: +You can set and control the running mode of the program. The main differences between Graph mode and PyNative mode are: - Application scenarios: Graph mode requires the network structure to be built at the beginning, and then the framework performs entire graph optimization and execution. This mode is suitable for scenarios where the network is fixed and high performance is required. PyNative mode executes operators line by line, supporting the execution of single operators, common functions, network inference, and separated gradient calculation. -- Efficiency: Theoretically, operators provided by MindSpore support both the PyNative and Graph modes. Therefore, when the same network and operators are executed in the two modes, the accuracy is the same. The network execution performance varies according to the execution mechanism. +- Network execution: When Graph mode and PyNative mode execute the same network and operator, the accuracy is the same. As Graph mode uses graph optimization, calculation graph sinking and other technologies, it has higher performance and efficiency in executing the network. -- Code debugging: In script development and network debugging, it is recommended to use PyNative mode for debugging. In PyNative mode, you can easily set breakpoints to obtain intermediate results of network execution, and you can also debug the network through pdb. In Graph mode, the constructor only completes the construction of the network, and does not execute it. Therefore, the output of the operator cannot be obtained by setting breakpoints in the `construct` function. You can only print the output of this operator during network execution, and view it after the network execution is complete. +- Code debugging: In script development and network debugging, it is recommended to use PyNative mode for debugging. In PyNative mode, you can easily set breakpoints to obtain intermediate results of network execution, and you can also debug the network through pdb. While Graph mode does not support setting breakpoints, you can only specify operators and print their output results, and then view the results after the network execution is completed. Both Graph mode and PyNative mode use a function-style IR based on graph representation, namely MindIR, which uses the semantics close to that of the ANF function. When using Graph mode, set the running mode in the context to `GRAPH_MODE`. Then call the `nn.Cell` class and write your code in the `construct` function, or call the `@ms_function` decorator. @@ -145,6 +145,8 @@ from mindspore import context context.set_context(enable_profiling=True, profiling_options= '{"result_path":"/home/data/output","training_trace":"on"}') ``` +> The method of collecting profiling data is more suitable for high-level developers to analyze complex problems. If you need to collect profiling data for performance analysis, you can refer to [performance_profiling_ascend](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/performance_profiling_ascend.html). + ### Saving MindIR Saving the intermediate code of each compilation stage through context.set_context(save_graphs=True). diff --git a/docs/programming_guide/source_en/numpy.ipynb b/docs/programming_guide/source_en/numpy.ipynb index 2d9597f66e03ef6858a140bd3f8de2d673d637ad..6429bec297c2f62267ce62b33446d59dbad2eb32 100644 --- a/docs/programming_guide/source_en/numpy.ipynb +++ b/docs/programming_guide/source_en/numpy.ipynb @@ -603,7 +603,7 @@ "id": "66b52f69", "metadata": {}, "source": [ - "> Currently, static graph cannot run in command line mode and not all python types can be passed into functions decorated with `ms_function`. For details about how to use `ms_function`, see [API: ms_function](https://www.mindspore.cn/doc/api_python/en/master/mindspore/mindspore.html#mindspore.ms_function)." + "> Currently, static graph cannot run in Python interactive mode and not all python types can be passed into functions decorated with `ms_function`. For details about how to use `ms_function`, see [API: ms_function](https://www.mindspore.cn/doc/api_python/en/master/mindspore/mindspore.html#mindspore.ms_function)." ] }, { diff --git a/docs/programming_guide/source_en/run.md b/docs/programming_guide/source_en/run.md index 840feb45fa7f4f528dc3d16a137da022460312ef..73d77ed3a712d2e81e507ac8f728f3dbb8f4fb74 100644 --- a/docs/programming_guide/source_en/run.md +++ b/docs/programming_guide/source_en/run.md @@ -105,6 +105,8 @@ The output is as follows: The [Model API](https://www.mindspore.cn/doc/api_python/en/master/mindspore/mindspore.html#mindspore.Model) of MindSpore is an advanced API used for training and validation. Layers with the training or inference function can be combined into an object. The training, inference, and prediction functions can be implemented by calling the train, eval, and predict APIs, respectively. +> MindSpore does not support the use of multiple threads for training, inference, and prediction functions. + You can transfer the initialized Model APIs such as the network, loss function, and optimizer as required. You can also configure amp_level to implement mixed precision and configure metrics to implement model evaluation. > Executing the network model will generate a `kernel_meta` directory under the execution directory, and save the operator cache files generated by network compilation to this directory during execution, including `.o`, `.info` and `.json` files. If the user executes the same network model again, or only some changes are made, MindSpore will automatically call the reusable operator cache file in the `kernel_meta` directory, which significantly reduces network compilation time and improves execution performance. For details, please refer to [Incremental Operator Build](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/incremental_operator_build.html) diff --git a/docs/programming_guide/source_zh_cn/auto_parallel.md b/docs/programming_guide/source_zh_cn/auto_parallel.md index 9f5d4d0dd93c80eda06f9ffa43220f773e837d01..edd54f0d0718a0018e2693b3431c27497d0bf228 100644 --- a/docs/programming_guide/source_zh_cn/auto_parallel.md +++ b/docs/programming_guide/source_zh_cn/auto_parallel.md @@ -98,9 +98,9 @@ context.get_auto_parallel_context("gradients_mean") - `stand_alone`:单机模式。 - `data_parallel`:数据并行模式。 -- `hybrid_parallel`:混合并行模式。 +- `hybrid_parallel`:混合并行模式,基于通信原语构造并行网络。 - `semi_auto_parallel`:半自动并行模式,即用户可通过`shard`方法给算子配置切分策略,若不配置策略,则默认是数据并行策略。 -- `auto_parallel`:自动并行模式,即框架会自动建立代价模型,为用户选择最优的切分策略。 +- `auto_parallel`:自动并行模式,即框架会自动建立代价模型,为用户选择最优的切分策略。该模式为实验特性,当前只在部分网络验证。 其中`auto_parallel`和`data_parallel`在MindSpore教程中有完整样例: @@ -117,6 +117,29 @@ mul = ops.Mul().shard(((2, 1), (2, 1))) context.get_auto_parallel_context("parallel_mode") ``` +> 在semi_auto_parallel模式下,如果一个Parameter被多个算子共享,则需要保证该Parameter在每个算子中的排布都一致,否则构图将会失败。比如下面这个例子中,mul1和mul2共享权重weight,但mul1对weight按行切8份,而mul2对weight按列切8份,weight在两个算子中的排布不一致,构图将会失败: + +```python +import numpy as np +import mindspore as ms +import mindspore.ops as ops +from mindspore import Tensor, Parameter +from mindspore.nn import Cell + +class Net(Cell): + """Net definition""" + def __init__(self): + super(Net, self).__init__() + self.mul1 = ops.Mul().shard(((8, 1), (8, 1))) + self.mul2 = ops.Mul().shard(((1, 8), (1, 8))) + self.weight = Parameter(Tensor(np.ones([16, 32]), dtype=ms.float32), "weight1") + + def construct(self, x): + out = self.mul1(x, self.weight) + out = self.mul2(out, self.weight) + return out +``` + #### all_reduce_fusion_config `all_reduce_fusion_config`可以让用户自定义梯度AllReduce融合切分策略。出于减少资源消耗及算子执行间隙的目的,框架默认将所有反向梯度聚合的AllReduce融合成一个算子运算,但当模型较大时,这会造成迭代拖尾耗时增加。用户可结合具体网络,通过设置该参数,手动调优找到性能最好的融合切分策略。 @@ -310,8 +333,24 @@ rank_id = get_rank() ## 分布式属性配置 +### shard + +适用于`Primitive`。 + +在`semi_auto_parallel`及`auto_parallel`模式下,可以通过`shard`方法对算子配置切分策略。关于算子并行策略的定义可以参考这篇[设计文档](https://www.mindspore.cn/doc/note/zh-CN/master/design/mindspore/distributed_training_design.html#id10)。 + +代码样例如下: + +```python +import mindspore.ops as ops + +matmul = ops.MatMul().shard(strategy=((1, 4),(4, 2))) +``` + ### cross_batch +适用于`Primitive`。 + 在特定场景下,`data_parallel`的计算逻辑和`stand_alone`是不一样的,`auto_parallel`在任何场景下都是和`stand_alone`的计算逻辑保持一致。而`data_parallel`的收敛效果可能更好,因此MindSpore提供了`cross_batch`这个参数,可以使`auto_parallel`的计算逻辑和`data_parallel`保持一致,用户可通过`add_prim_attr`方法进行配置,默认值是False。 代码样例如下: @@ -324,6 +363,8 @@ mul = ops.Mul().add_prim_attr("cross_batch", True) ### fusion +适用于`Primitive`, `Parameter`和`Cell`。 + 出于性能考虑,MindSpore提供了`AllGather`和`AllReduce`算子的融合功能,`fusion`值相同的同类算子(算子类型以及通信域相同)会融合在一起,`fusion`的值必须大于等于0,且当`fusion`值为0时,表示不融合。目前只支持`Ascend`后端。 `fusion`属性的配置有两种方式,如果是显式调用通信算子可以通过`add_prim_attr`方法直接为通信算子配置属性。代码样例如下: @@ -370,12 +411,14 @@ net = Net().set_comm_fusion(2) ### layerwise_parallel +适用于`Parameter`。 + 在`HYBRID_PARALLEL`模式下用户需要手动切分模型,其中对于模型并行的参数用户需要手动打上标记`layerwise_parallel`,框架会根据此标记为模型并行参数过滤掉梯度聚合操作。 代码样例如下: ```python -imoprt numpy as np +import numpy as np from mindspore import Parameter, Tensor x = Parameter(Tensor(np.ones([2, 2])), layerwise_parallel=True) diff --git a/docs/programming_guide/source_zh_cn/cache.ipynb b/docs/programming_guide/source_zh_cn/cache.ipynb index 480632c0a86e2a5c97aecc6e777a21b2fd00257e..42b1cab62142e6b24446168f484443c59298c858 100644 --- a/docs/programming_guide/source_zh_cn/cache.ipynb +++ b/docs/programming_guide/source_zh_cn/cache.ipynb @@ -157,6 +157,7 @@ "metadata": {}, "source": [ "`cache_admin`支持以下命令和参数: \n", + "\n", "- `--start`:启动缓存服务器,支持通过以下参数进行配置:\n", " - `--workers`或`-w`:设置缓存服务器的工作线程数量,默认情况下工作线程数量为机器CPU个数的一半。该参数需要根据NUMA架构来设置,若设置值不是机器中NUMA结点数的整数倍,则缓存服务器会对其进行自动调整。\n", " - `--spilldir`或`-s`:设置若缓存数据的大小超过内存空间,则溢出至磁盘的数据文件路径,默认为空(表示不启用数据溢出功能)。\n", @@ -213,7 +214,7 @@ "source": [ "其中,Cache Server Configuration表格分别列出了当前服务器的IP地址、端口号、工作线程数、日志等级、溢出路径等详细配置信息。Active sessions模块展示了当前服务器中已启用的session ID列表。\n", "\n", - "缓存服务器日志文件的命名格式为 \"cache_server.\\<主机名\\>.\\<用户名\\>.log.\\<日志等级\\>.\\<日期-时间\\>.\\<进程号\\>\"。\n", + "缓存服务器日志文件的命名格式为 \"cache_server.\\<主机名\\>.\\<用户名\\>.log.\\<日志等级\\>.\\<日期-时间\\>.\\<进程号\\>\"。当`GLOG_v=0`时,可能会屏显有大量DEBUG日志。\n", "\n", "> - 若要启用数据溢出功能,则用户在启动缓存服务器时必须使用`-s`参数对溢出路径进行设置,否则该功能默认关闭。" ] @@ -289,6 +290,7 @@ "metadata": {}, "source": [ "输出参数说明:\n", + "\n", "- `Session`: 缓存会话id。\n", "- `Cache Id`: 当前缓存会话中的cache实例id,`n/a`表示当前尚未创建缓存实例。\n", "- `Mem cached`: 缓存在内存中的数据量。\n", @@ -325,13 +327,14 @@ "metadata": {}, "source": [ "`DatasetCache`支持以下参数:\n", + "\n", "- `session_id`:缓存会话的id,通过`cache_admin -g`命令来创建并获取。\n", "- `size`:缓存最大内存空间占用,该参数以MB为单位,例如512GB的缓存空间应设置`size=524288`,默认为0。\n", "- `spilling`:当内存空间超出所设置的最大内存空间占用时,是否允许将剩余的数据溢出至磁盘,默认为False。\n", "- `hostname`:连接至缓存服务器的ip地址,默认为127.0.0.1。\n", "- `port`:连接至缓存服务器的端口号,默认为50052。\n", "- `num_connections`:建立的TCP/IP连接数,默认为12。\n", - "- `prefetch_size`:每次访问获取的行数,默认为20。\n", + "- `prefetch_size`:每次预取的数据行数,默认为20。\n", "\n", "> - 在实际使用中,通常应当首先使用`cache_admin -g`命令从缓存服务器处获得一个缓存会话id并作为`session_id`的参数,防止发生缓存会话不存在而报错的情况。\n", "> - 设置`size=0`代表不限制缓存所使用的内存空间,缓存服务器会根据系统的内存资源状况,自动控制缓存服务器的内存空间占用,使其不超过系统总内存的80%。\n", diff --git a/docs/programming_guide/source_zh_cn/conf.py b/docs/programming_guide/source_zh_cn/conf.py index adbf28ca537437b7cb6165286313131ba60879e0..e35a289bb143d764a56d229834b41f80091ef787 100644 --- a/docs/programming_guide/source_zh_cn/conf.py +++ b/docs/programming_guide/source_zh_cn/conf.py @@ -33,7 +33,7 @@ release = 'master' # ones. extensions = [ 'sphinx_markdown_tables', - 'recommonmark', + 'myst_parser', 'nbsphinx', 'sphinx.ext.mathjax', 'IPython.sphinxext.ipython_console_highlighting' diff --git a/docs/programming_guide/source_zh_cn/context.ipynb b/docs/programming_guide/source_zh_cn/context.ipynb index 1f57f82f271cb5183227e56ba5e5ff8ad0dcb653..881c7a0476ecaa3ca7708cb3b7108236397f12c0 100644 --- a/docs/programming_guide/source_zh_cn/context.ipynb +++ b/docs/programming_guide/source_zh_cn/context.ipynb @@ -32,7 +32,7 @@ "source": [ "## 执行模式管理\n", "\n", - "MindSpore支持Graph和PyNative这两种运行模式:\n", + "MindSpore支持Graph和PyNative两种运行模式。默认情况下,MindSpore处于Graph模式。Graph模式是MindSpore的主模式,而PyNative模式用于调试等用途。\n", "\n", "- `GRAPH_MODE`:静态图模式或者图模式,将神经网络模型编译成一整张图,然后下发执行。该模式利用图优化等技术提高运行性能,同时有助于规模部署和跨平台运行。\n", "\n", @@ -40,13 +40,13 @@ "\n", "### 模式选择\n", "\n", - "通过配置context参数可以控制程序运行的模式。默认情况下,MindSpore处于Graph模式。Graph和PyNative两种模式的区别主要有:\n", + "通过配置context参数可以控制程序运行的模式。Graph和PyNative两种模式的区别主要有:\n", "\n", "- 使用场景:Graph模式需要一开始就构建好网络结构,然后框架做整图优化和执行,比较适合网络固定没有变化,且需要高性能的场景。而PyNative模式逐行执行算子,支持执行单算子、普通函数和网络,以及单独求梯度的操作。\n", "\n", - "- 使用效率:理论上,MindSpore提供的算子同时支持Graph模式和PyNative模式,因此相同的网络和算子,分别在两个模式下执行时,精度效果是一致的。由于执行机理的差异,网络的执行性能会有不同。\n", + "- 网络执行:Graph模式和PyNative模式在执行相同的网络和算子时,精度效果是一致的。由于Graph模式运用了图优化、计算图整图下沉等技术,Graph模式执行网络的性能和效率更高。\n", "\n", - "- 代码调试:在脚本开发和网络流程调试中,推荐使用PyNative模式进行调试。在PyNative模式下,可以方便地设置断点,获取网络执行的中间结果,也可以通过pdb的方式对网络进行调试。而Graph模式由于在构造函数里只是完成网络构造,实际没有执行,因此在`construct`函数里打断点无法获取对应算子的输出,只能等整网执行中指定对应算子的输出打印,在网络执行完成后进行查看。\n", + "- 代码调试:在脚本开发和网络流程调试中,推荐使用PyNative模式进行调试。在PyNative模式下,可以方便地设置断点,获取网络执行的中间结果,也可以通过pdb的方式对网络进行调试。而Graph模式无法设置断点,只能先指定算子进行打印,然后在网络执行完成后查看输出结果。\n", "\n", "Graph模式和PyNative模式使用的是一种基于图表示的函数式IR,即MindIR,采用了接近于ANF函数式的语义。使用Graph模式时,将context中的运行模式设置为`GRAPH_MODE`,需要使用`nn.Cell`类,并且在`construct`函数中编写执行代码, 或者调用`@ms_function`装饰器。\n", "\n", @@ -177,6 +177,13 @@ "```" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> 本篇采集profiling数据的方法,更适合高阶开发者进行复杂问题分析。若需采集profiling数据,进行性能分析,可以参考[性能调优](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/performance_profiling_ascend.html)。" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/docs/programming_guide/source_zh_cn/numpy.ipynb b/docs/programming_guide/source_zh_cn/numpy.ipynb index 8e6a4574a6dcb9de1263d7e67a7b08fa5f771104..6c50ed0ecdee19e26eab37fbb4907a552372ffc7 100644 --- a/docs/programming_guide/source_zh_cn/numpy.ipynb +++ b/docs/programming_guide/source_zh_cn/numpy.ipynb @@ -606,7 +606,7 @@ "id": "f4d34964", "metadata": {}, "source": [ - "> 目前静态图不支持在命令行模式中运行,并且有部分语法限制。`ms_function`的更多信息可参考[API: ms_function](https://www.mindspore.cn/doc/api_python/zh-CN/master/mindspore/mindspore.html#mindspore.ms_function)。" + "> 目前静态图不支持在Python交互式模式下运行,并且有部分语法限制。`ms_function`的更多信息可参考[API: ms_function](https://www.mindspore.cn/doc/api_python/zh-CN/master/mindspore/mindspore.html#mindspore.ms_function)。" ] }, { diff --git a/docs/programming_guide/source_zh_cn/run.ipynb b/docs/programming_guide/source_zh_cn/run.ipynb index e7e656006723d7840c536b9c22d709074f8aa3bc..970b0c5df7b63348b0f8b4c9a480891f630ea940 100644 --- a/docs/programming_guide/source_zh_cn/run.ipynb +++ b/docs/programming_guide/source_zh_cn/run.ipynb @@ -135,6 +135,8 @@ "\n", "MindSpore的[Model接口](https://www.mindspore.cn/doc/api_python/zh-CN/master/mindspore/mindspore.html#mindspore.Model)是用于训练和验证的高级接口。可以将有训练或推理功能的layers组合成一个对象,通过调用`train`、`eval`、`predict`接口可以分别实现训练、推理和预测功能。\n", "\n", + "> MindSpore不支持使用多线程来进行训练、推理和预测功能。\n", + "\n", "用户可以根据实际需要传入网络、损失函数和优化器等初始化Model接口,还可以通过配置`amp_level`实现混合精度,配置`metrics`实现模型评估。\n", "\n", "> 执行网络模型会在执行目录下生成`kernel_meta`目录,并在执行过程中保存网络编译生成的算子缓存文件到此目录,包括`.o`,`.info`和`.json`文件。若用户再次执行相同的网络模型,或者仅有部分变化,MindSpore会自动调用`kernel_meta`目录下可复用的算子缓存文件,显著减少网络编译时间,提升执行性能。详细内容请参考[算子增量编译](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/incremental_operator_build.html)。\n", diff --git a/install/mindspore_ascend310_install_binary.md b/install/mindspore_ascend310_install_binary.md index dedffa608d783dc213997f4340930bc6bd8c319c..e984d64a85ecad853607974ec218f61485e0cafa 100644 --- a/install/mindspore_ascend310_install_binary.md +++ b/install/mindspore_ascend310_install_binary.md @@ -47,7 +47,7 @@ 参考[版本列表](https://www.mindspore.cn/versions)先进行SHA-256完整性校验,校验一致后再执行如下命令安装MindSpore。 ```bash -wget https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/ascend/mindspore_ascend-{version}-linux_{arch}.tar.gz --no-check-certificate +wget https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/ascend/{arch}/mindspore_ascend-{version}-linux_{arch}.tar.gz --no-check-certificate tar -zxf mindspore_ascend-{version}-linux_{arch}.tar.gz ``` diff --git a/install/mindspore_ascend310_install_binary_en.md b/install/mindspore_ascend310_install_binary_en.md index 8acdad404773b4118f587f1c03e7a93c50752759..742ca56d09b27076e23aa6f3daeaa175d4027d7a 100644 --- a/install/mindspore_ascend310_install_binary_en.md +++ b/install/mindspore_ascend310_install_binary_en.md @@ -40,7 +40,7 @@ The following describes how to quickly install MindSpore by pip on Linux in the It is recommended to refer to [Version List](https://www.mindspore.cn/versions/en) to perform SHA-256 integrity verification, and then execute the following command to install MindSpore after the verification is consistent. ```bash -wget https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/ascend/mindspore_ascend-{version}-linux_{arch}.tar.gz --no-check-certificate +wget https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/ascend/{arch}/mindspore_ascend-{version}-linux_{arch}.tar.gz --no-check-certificate tar -zxf mindspore_ascend-{version}-linux_{arch}.tar.gz ``` diff --git a/install/mindspore_ascend310_install_pip.md b/install/mindspore_ascend310_install_pip.md index 8e3316f03241db05a219b5ba20c56258f2605a41..369379c41216d030e4cd94ef553f0249ac3be8ec 100644 --- a/install/mindspore_ascend310_install_pip.md +++ b/install/mindspore_ascend310_install_pip.md @@ -40,14 +40,14 @@ 参考[版本列表](https://www.mindspore.cn/versions)先进行SHA-256完整性校验,校验一致后再执行如下命令安装MindSpore。 ```bash -pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/ascend/ascend310/{arch}/mindspore_ascend-{version}-cp37-cp37m-{arch}.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple +pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/ascend/{arch}/mindspore_ascend-{version}-cp37-cp37m-linux_{arch}.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple ``` 其中: - 在联网状态下,安装whl包时会自动下载MindSpore安装包的依赖项(依赖项详情参见[requirements.txt](https://gitee.com/mindspore/mindspore/blob/master/requirements.txt)),其余情况需自行安装。 - `{version}`表示MindSpore版本号,例如安装1.1.0版本MindSpore时,`{version}`应写为1.1.0。 -- `{arch}`表示系统架构,例如使用的Linux系统是x86架构64位时,`{arch}`应写为`linux_x86_64`。如果系统是ARM架构64位,则写为`linux_aarch64`。 +- `{arch}`表示系统架构,例如使用的Linux系统是x86架构64位时,`{arch}`应写为`x86_64`。如果系统是ARM架构64位,则写为`aarch64`。 ## 配置环境变量 diff --git a/install/mindspore_ascend310_install_pip_en.md b/install/mindspore_ascend310_install_pip_en.md index 5795580c9bdb541343a7b1c96475a8772c67b261..afe31a8340310e4c4c69230311002717e63934c2 100644 --- a/install/mindspore_ascend310_install_pip_en.md +++ b/install/mindspore_ascend310_install_pip_en.md @@ -40,14 +40,14 @@ The following describes how to quickly install MindSpore by pip on Linux in the It is recommended to refer to [Version List](https://www.mindspore.cn/versions/en) to perform SHA-256 integrity verification, and then execute the following command to install MindSpore after the verification is consistent. ```bash -pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/ascend/ascend310/{arch}/mindspore_ascend-{version}-cp37-cp37m-{arch}.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple +pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/ascend/{arch}/mindspore_ascend-{version}-cp37-cp37m-linux_{arch}.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple ``` In the preceding information: - When the network is connected, dependencies of the MindSpore installation package are automatically downloaded during the .whl package installation. For details about dependencies, see [requirements.txt](https://gitee.com/mindspore/mindspore/blob/master/requirements.txt). In other cases, install the dependencies by yourself. - `{version}` specifies the MindSpore version number. For example, when installing MindSpore 1.1.0, set `{version}` to 1.1.0. -- `{arch}` specifies the system architecture. For example, if a Linux OS architecture is x86_64, set `{arch}` to `linux_x86_64`. If the system architecture is ARM64, set `{arch}` to `linux_aarch64`. +- `{arch}` specifies the system architecture. For example, if a Linux OS architecture is x86_64, set `{arch}` to `x86_64`. If the system architecture is ARM64, set `{arch}` to `aarch64`. ## Configuring Environment Variables diff --git a/install/mindspore_ascend_install_conda.md b/install/mindspore_ascend_install_conda.md index f48504ea0ec57b5ab1dc4d8abe389d569b9860b1..f8b986d4607982d83cfecab05d00cb86f9897f74 100644 --- a/install/mindspore_ascend_install_conda.md +++ b/install/mindspore_ascend_install_conda.md @@ -74,7 +74,7 @@ conda activate mindspore 参考[版本列表](https://www.mindspore.cn/versions)先进行SHA-256完整性校验,校验一致后再执行如下命令安装MindSpore。 ```bash -pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/ascend/{system}/mindspore_ascend-{version}-cp37-cp37m-linux_{arch}.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple +pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/ascend/{arch}/mindspore_ascend-{version}-cp37-cp37m-linux_{arch}.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple ``` 其中: @@ -82,7 +82,6 @@ pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSp - 在联网状态下,安装whl包时会自动下载MindSpore安装包的依赖项(依赖项详情参见[requirements.txt](https://gitee.com/mindspore/mindspore/blob/master/requirements.txt)),其余情况需自行安装。 - `{version}`表示MindSpore版本号,例如安装1.1.0版本MindSpore时,`{version}`应写为1.1.0。 - `{arch}`表示系统架构,例如使用的系统是x86架构64位时,`{arch}`应写为`x86_64`。如果系统是ARM架构64位,则写为`aarch64`。 -- `{system}`表示系统,例如使用的欧拉系统ARM架构,`{system}`应写为`euleros_aarch64`,目前可支持以下系统`euleros_aarch64`/`centos_aarch64`/`centos_x86`/`ubuntu_aarch64`/`ubuntu_x86`/`kylin_aarch64`。 ## 配置环境变量 diff --git a/install/mindspore_ascend_install_docker.md b/install/mindspore_ascend_install_docker.md index c1d9e058d20bb7cd93c5f89fbac1e939b30bbe6d..9325bd96f6206ab57a0c7a6577b21796a121e762 100644 --- a/install/mindspore_ascend_install_docker.md +++ b/install/mindspore_ascend_install_docker.md @@ -31,9 +31,9 @@ MindSpore的Ascend 910镜像托管在[Ascend Hub](https://ascend.huawei.com/asce - 确认安装Ubuntu 18.04/CentOS 7.6是64位操作系统。 - 确认安装[Docker 18.03或更高版本](https://docs.docker.com/get-docker/)。 -- 确认安装Ascend 910 AI处理器配套软件包([Ascend Data Center Solution 21.0.1](https://support.huawei.com/enterprise/zh/ascend-computing/ascend-data-center-solution-pid-251167910/software/252504563?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C252309113%7C251167910))。 - - 软件包安装方式请参考[产品文档](https://support.huawei.com/enterprise/zh/ascend-computing/ascend-data-center-solution-pid-251167910)。 - - 配套软件包包括[驱动和固件A800-9000 1.0.10](https://support.huawei.com/enterprise/zh/ascend-computing/a800-9000-pid-250702818/software/252727249?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C250702818)和[CANN 5.0.1](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/252504455?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251168373)。 +- 确认安装Ascend 910 AI处理器配套软件包[Ascend Data Center Solution]。 + - 软件包安装方式请参考[产品文档]。 + - 配套软件包包括[驱动和固件A800-9000]和[CANN]。 - 确认当前用户有权限访问Ascend 910 AI处理器配套软件包的安装路径`/usr/local/Ascend`,若无权限,需要root用户将当前用户添加到`/usr/local/Ascend`所在的用户组。 - 在完成安装基础驱动与配套软件包的基础上,确认安装CANN软件包中的toolbox实用工具包,即Ascend-cann-toolbox-{version}.run,该工具包提供了Ascend NPU容器化支持的Ascend Docker runtime工具。 @@ -50,7 +50,7 @@ MindSpore的Ascend 910镜像托管在[Ascend Hub](https://ascend.huawei.com/asce 其中: - `{username}` `{password}` `{url}` 代表用户的登录信息与镜像服务器信息,均为注册并激活账号后自动生成,在对应MindSpore镜像页面复制登录命令即可获取。 - - `{arch}` 表示系统架构,例如使用的Linux系统是x86架构64位时,{arch}应写为x86。如果系统是ARM架构64位,则写为arm。 + - `{arch}` 表示系统架构,例如使用的Linux系统是x86架构64位时,`{arch}`应写为x86。如果系统是ARM架构64位,则写为arm。 - `{tag}` 对应Atlas Data Center Solution版本号,同样可以在MindSpore镜像下载页面复制下载命令获取。 ## 运行MindSpore镜像 @@ -78,7 +78,7 @@ docker run -it -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ 其中: -- `{arch}` 表示系统架构,例如使用的Linux系统是x86架构64位时,{arch}应写为x86。如果系统是ARM架构64位,则写为arm。 +- `{arch}` 表示系统架构,例如使用的Linux系统是x86架构64位时,`{arch}`应写为x86。如果系统是ARM架构64位,则写为arm。 - `{tag}`对应Atlas Data Center Solution版本号,在MindSpore镜像下载页面自动获取。 如需使用MindInsight,需设置--network参数为”host”模式, 例如: @@ -156,9 +156,7 @@ print(ops.add(x, y)) 验证MindInsight安装: -1. 执行命令:```export PATH=/usr/local/python-3.7.5/bin:$PATH```。 - -2. 输入```mindinsight start --port 8080```, 如提示启动status为success,则安装成功。 +输入```mindinsight start --port 8080```, 如提示启动status为success,则安装成功。 ## 升级MindSpore版本 diff --git a/install/mindspore_ascend_install_docker_en.md b/install/mindspore_ascend_install_docker_en.md index d67434eda59f9f5ce86afba418a8ecb46ae9e1e1..6c70fcaf620a47edcdc3166962f1fdf74f0873d3 100644 --- a/install/mindspore_ascend_install_docker_en.md +++ b/install/mindspore_ascend_install_docker_en.md @@ -31,9 +31,9 @@ The current support for containerized build options is as follows: - Confirm that Ubuntu 18.04/CentOS 7.6 is installed with the 64-bit operating system. - Confirm that [Docker 18.03 or later](https://docs.docker.com/get-docker/) is installed. -- Confirm that the Ascend 910 AI processor software package ([Ascend Data Center Solution 21.0.1](https://support.huawei.com/enterprise/zh/ascend-computing/ascend-data-center-solution-pid-251167910/software/252504563?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C252309113%7C251167910)) are installed. - - For the installation of software package, please refer to the [Product Document](https://support.huawei.com/enterprise/zh/ascend-computing/ascend-data-center-solution-pid-251167910). - - The software packages include [Driver and Firmware A800-9000 1.0.10](https://support.huawei.com/enterprise/zh/ascend-computing/a800-9000-pid-250702818/software/252727249?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C250702818) and [CANN 5.0.1](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/252504455?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251168373). +- Confirm that the Ascend 910 AI processor software package [Ascend Data Center Solution] are installed. + - For the installation of software package, please refer to the [Product Document]. + - The software packages include [Driver and Firmware A800-9000] and [CANN]. - Confirm that the current user has the right to access the installation path `/usr/local/Ascend`of Ascend 910 AI processor software package. If not, the root user needs to add the current user to the user group where `/usr/local/Ascend` is located. For the specific configuration, please refer to the software package instruction document. - After installing basic driver and corresponding software packages, confirm that the toolbox utility package in the CANN software package is installed, namely Ascend-cann-toolbox-{version}.run. The toolbox provides Ascend Docker runtime tools supported by Ascend NPU containerization. @@ -50,7 +50,7 @@ The current support for containerized build options is as follows: of which, - `{username}` `{password}` `{url}` represents the user's login information and image server information, which are automatically generated after registering and activating the account, and can be obtained by copying the login command on the corresponding MindSpore image page. - - `{arch}` denotes the system architecture. For example, the Linux system you are using is x86 architecture 64-bit, {arch} should be x86. If the system is ARM architecture 64-bit, then it should be arm. + - `{arch}` denotes the system architecture. For example, the Linux system you are using is x86 architecture 64-bit, `{arch}` should be x86. If the system is ARM architecture 64-bit, then it should be arm. - `{tag}` corresponds to the version number of Atlas Data Center Solution, which can also be obtained by copying the download command on the MindSpore image download page. ## Running MindSpore Image @@ -78,7 +78,7 @@ docker run -it -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ of which, -- `{arch}` denotes the system architecture. For example, the Linux system you are using is x86 architecture 64-bit, {arch} should be x86. If the system is ARM architecture 64-bit, then it should be arm. +- `{arch}` denotes the system architecture. For example, the Linux system you are using is x86 architecture 64-bit, `{arch}` should be x86. If the system is ARM architecture 64-bit, then it should be arm. - `{tag}` corresponds to the version number of Atlas Data Center Solution, which can be automatically obtained on the MindSpore image download page. If you want to use MindInsight, you need to set the --network parameter to "host" mode, for example: @@ -156,9 +156,7 @@ It means MindSpore has been installed by docker successfully. If you need to verify the MindInsight installation: -1. Execute the command: ```export PATH=/usr/local/python-3.7.5/bin:$PATH```. - -2. Enter ```mindinsight start --port 8080```, if it prompts that the startup status is successful, it means MindInsight has been installed successfully. +Enter ```mindinsight start --port 8080```, if it prompts that the startup status is successful, it means MindInsight has been installed successfully. ## Version Update diff --git a/install/mindspore_ascend_install_pip.md b/install/mindspore_ascend_install_pip.md index 38799b06a2ed7dc6a7a6d97d3f4bae2ae4492b27..38deb4c8a2c26cf901e090d060d48c61bf3c0d21 100644 --- a/install/mindspore_ascend_install_pip.md +++ b/install/mindspore_ascend_install_pip.md @@ -49,14 +49,14 @@ 参考[版本列表](https://www.mindspore.cn/versions)先进行SHA-256完整性校验,校验一致后再执行如下命令安装MindSpore。 ```bash -pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/ascend/{arch}/mindspore_ascend-{version}-cp37-cp37m-{arch}.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple +pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/ascend/{arch}/mindspore_ascend-{version}-cp37-cp37m-linux_{arch}.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple ``` 其中: - 在联网状态下,安装whl包时会自动下载MindSpore安装包的依赖项(依赖项详情参见[requirements.txt](https://gitee.com/mindspore/mindspore/blob/master/requirements.txt)),其余情况需自行安装。 - `{version}`表示MindSpore版本号,例如安装1.1.0版本MindSpore时,`{version}`应写为1.1.0。 -- `{arch}`表示系统架构,例如使用的Linux系统是x86架构64位时,`{arch}`应写为`linux_x86_64`。如果系统是ARM架构64位,则写为`linux_aarch64`。 +- `{arch}`表示系统架构,例如使用的Linux系统是x86架构64位时,`{arch}`应写为`x86_64`。如果系统是ARM架构64位,则写为`aarch64`。 ## 配置环境变量 diff --git a/install/mindspore_ascend_install_pip_en.md b/install/mindspore_ascend_install_pip_en.md index 9878e8a4a9d5bdad0f824d4d5a1c18e1e83c154e..3b54f3dbf6b58407bc8037d8bc7b4ec302bc5e29 100644 --- a/install/mindspore_ascend_install_pip_en.md +++ b/install/mindspore_ascend_install_pip_en.md @@ -49,36 +49,36 @@ This document describes how to quickly install MindSpore in a Linux system with It is recommended to refer to [Version List](https://www.mindspore.cn/versions/en) to perform SHA-256 integrity verification, and then execute the following command to install MindSpore after the verification is consistent. ```bash -pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/ascend/{arch}/mindspore_ascend-{version}-cp37-cp37m-{arch}.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple +pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/ascend/{arch}/mindspore_ascend-{version}-cp37-cp37m-linux_{arch}.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple ``` Of which, - When the network is connected, dependency items are automatically downloaded during .whl package installation. (For details about other dependency items, see [requirements.txt](https://gitee.com/mindspore/mindspore/blob/master/requirements.txt)). In other cases, you need to manually install dependency items. - `{version}` denotes the version of MindSpore. For example, when you are installing MindSpore 1.1.0, `{version}` should be 1.1.0. -- `{arch}` denotes the system architecture. For example, the Linux system you are using is x86 architecture 64-bit, `{arch}` should be `linux_x86_64`. If the system is ARM architecture 64-bit, then it should be `linux_aarch64`. +- `{arch}` denotes the system architecture. For example, the Linux system you are using is x86 architecture 64-bit, `{arch}` should be `x86_64`. If the system is ARM architecture 64-bit, then it should be `aarch64`. ## Configuring Environment Variables -- **If Ascend 910 AI processor software is installed in a non-default path**, after MindSpore is installed, export runtime-related environment variables. `/usr/local/Ascend` in the following command `LOCAL_ASCEND=/usr/local/Ascend` denotes the installation path of the software package, please replace it as your actual installation path. +**If Ascend 910 AI processor software is installed in a non-default path**, after MindSpore is installed, export runtime-related environment variables. `/usr/local/Ascend` in the following command `LOCAL_ASCEND=/usr/local/Ascend` denotes the installation path of the software package, please replace it as your actual installation path. - ```bash - # control log level. 0-DEBUG, 1-INFO, 2-WARNING, 3-ERROR, default level is WARNING. - export GLOG_v=2 +```bash +# control log level. 0-DEBUG, 1-INFO, 2-WARNING, 3-ERROR, default level is WARNING. +export GLOG_v=2 - # Conda environmental options - LOCAL_ASCEND=/usr/local/Ascend # the root directory of run package +# Conda environmental options +LOCAL_ASCEND=/usr/local/Ascend # the root directory of run package - # lib libraries that the run package depends on - export LD_LIBRARY_PATH=${LOCAL_ASCEND}/ascend-toolkit/latest/fwkacllib/lib64:${LOCAL_ASCEND}/driver/lib64:${LOCAL_ASCEND}/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH} +# lib libraries that the run package depends on +export LD_LIBRARY_PATH=${LOCAL_ASCEND}/ascend-toolkit/latest/fwkacllib/lib64:${LOCAL_ASCEND}/driver/lib64:${LOCAL_ASCEND}/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH} - # Environment variables that must be configured - export TBE_IMPL_PATH=${LOCAL_ASCEND}/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe # TBE operator implementation tool path - export ASCEND_OPP_PATH=${LOCAL_ASCEND}/ascend-toolkit/latest/opp # OPP path - export PATH=${LOCAL_ASCEND}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${PATH} # TBE operator compilation tool path - export PYTHONPATH=${TBE_IMPL_PATH}:${PYTHONPATH} - # Python library that TBE implementation depends on - ``` +# Environment variables that must be configured +export TBE_IMPL_PATH=${LOCAL_ASCEND}/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe # TBE operator implementation tool path +export ASCEND_OPP_PATH=${LOCAL_ASCEND}/ascend-toolkit/latest/opp # OPP path +export PATH=${LOCAL_ASCEND}/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin/:${PATH} # TBE operator compilation tool path +export PYTHONPATH=${TBE_IMPL_PATH}:${PYTHONPATH} +# Python library that TBE implementation depends on +``` ## Installation Verification diff --git a/install/mindspore_cpu_install_conda.md b/install/mindspore_cpu_install_conda.md index b0882cb5f9ef1f368bafc25b15b38ffb35350197..cf071eff4eeccefa5103cf874dd00dd6c2586e42 100644 --- a/install/mindspore_cpu_install_conda.md +++ b/install/mindspore_cpu_install_conda.md @@ -55,7 +55,7 @@ conda activate mindspore 参考[版本列表](https://www.mindspore.cn/versions)先进行SHA-256完整性校验,校验一致后再执行如下命令安装MindSpore。 ```bash -pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/cpu/{system}/mindspore-{version}-cp37-cp37m-linux_{arch}.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple +pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/cpu/{arch}/mindspore-{version}-cp37-cp37m-linux_{arch}.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple ``` 其中: @@ -63,7 +63,6 @@ pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSp - 在联网状态下,安装whl包时会自动下载MindSpore安装包的依赖项(依赖项详情参见[requirements.txt](https://gitee.com/mindspore/mindspore/blob/master/requirements.txt)),其余情况需自行安装。 - `{version}`表示MindSpore版本号,例如安装1.1.0版本MindSpore时,`{version}`应写为1.1.0。 - `{arch}`表示系统架构,例如使用的Linux系统是x86架构64位时,`{arch}`应写为`x86_64`。如果系统是ARM架构64位,则写为`aarch64`。 -- `{system}`表示系统,例如使用的Ubuntu系统X86架构,`{system}`应写为`ubuntu_x86`,目前CPU版本可支持以下系统`ubuntu_aarch64`/`ubuntu_x86`。 ## 验证安装是否成功 diff --git a/install/mindspore_cpu_install_pip.md b/install/mindspore_cpu_install_pip.md index e2acbc63ce1415ee5d7a21ebdd7c438212e5daed..be6be0dd5171960d97790292b6304fef8a528123 100644 --- a/install/mindspore_cpu_install_pip.md +++ b/install/mindspore_cpu_install_pip.md @@ -23,6 +23,7 @@ - 确认安装64位操作系统,[glibc](https://www.gnu.org/software/libc/)>=2.17,其中Ubuntu 18.04是经过验证的。 - 确认安装[GCC 7.3.0版本](http://ftp.gnu.org/gnu/gcc/gcc-7.3.0/gcc-7.3.0.tar.gz)。 +- 确认安装[gmp 6.1.2版本](https://gmplib.org/download/gmp/gmp-6.1.2.tar.xz)。 - 确认安装Python 3.7.5版本。 - 如果未安装或者已安装其他版本的Python,可从[官网](https://www.python.org/ftp/python/3.7.5/Python-3.7.5.tgz)或者[华为云](https://mirrors.huaweicloud.com/python/3.7.5/Python-3.7.5.tgz)下载Python 3.7.5版本,进行安装。 @@ -31,14 +32,14 @@ 参考[版本列表](https://www.mindspore.cn/versions)先进行SHA-256完整性校验,校验一致后再执行如下命令安装MindSpore。 ```bash -pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/cpu/{arch}/mindspore-{version}-cp37-cp37m-{arch}.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple +pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/cpu/{arch}/mindspore-{version}-cp37-cp37m-linux_{arch}.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple ``` 其中: - 在联网状态下,安装whl包时会自动下载MindSpore安装包的依赖项(依赖项详情参见[requirements.txt](https://gitee.com/mindspore/mindspore/blob/master/requirements.txt)),其余情况需自行安装。 - `{version}`表示MindSpore版本号,例如安装1.1.0版本MindSpore时,`{version}`应写为1.1.0。 -- `{arch}`表示系统架构,例如使用的Linux系统是x86架构64位时,`{arch}`应写为`linux_x86_64`。如果系统是ARM架构64位,则写为`linux_aarch64`。 +- `{arch}`表示系统架构,例如使用的Linux系统是x86架构64位时,`{arch}`应写为`x86_64`。如果系统是ARM架构64位,则写为`aarch64`。 ## 查询安装是否成功 diff --git a/install/mindspore_cpu_install_pip_en.md b/install/mindspore_cpu_install_pip_en.md index c5883a717e7313b75286c5c4ece9927cc341fea8..505643ee0b06e9c27ca8b0253751e68605de5926 100644 --- a/install/mindspore_cpu_install_pip_en.md +++ b/install/mindspore_cpu_install_pip_en.md @@ -21,6 +21,7 @@ This document describes how to quickly install MindSpore by pip in a Linux syste - Confirm that the 64-bit operating system is installed and the [glibc](https://www.gnu.org/software/libc/)>=2.17, where Ubuntu 18.04 are verified. - Confirm that [GCC 7.3.0](http://ftp.gnu.org/gnu/gcc/gcc-7.3.0/gcc-7.3.0.tar.gz) is installed. +- Confirm that [gmp 6.1.2](https://gmplib.org/download/gmp/gmp-6.1.2.tar.xz) is installed. - Confirm that Python 3.7.5 is installed. - If you didn't install Python or you have installed other versions, please download the Python 3.7.5 64-bit from [Python](https://www.python.org/ftp/python/3.7.5/Python-3.7.5.tgz) or [Huaweicloud](https://mirrors.huaweicloud.com/python/3.7.5/Python-3.7.5.tgz) to install. @@ -29,14 +30,14 @@ This document describes how to quickly install MindSpore by pip in a Linux syste It is recommended to refer to [Version List](https://www.mindspore.cn/versions/en) to perform SHA-256 integrity verification, and then execute the following command to install MindSpore after the verification is consistent. ```bash -pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/cpu/{arch}/mindspore-{version}-cp37-cp37m-{arch}.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple +pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/cpu/{arch}/mindspore-{version}-cp37-cp37m-linux_{arch}.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple ``` Of which, - When the network is connected, dependency items are automatically downloaded during .whl package installation. (For details about other dependency items, see [requirements.txt](https://gitee.com/mindspore/mindspore/blob/master/requirements.txt)). In other cases, you need to manually install dependency items. - `{version}` denotes the version of MindSpore. For example, when you are installing MindSpore 1.1.0, `{version}` should be 1.1.0. -- `{arch}` denotes the system architecture. For example, the Linux system you are using is x86 architecture 64-bit, `{arch}` should be `linux_x86_64`. If the system is ARM architecture 64-bit, then it should be `linux_aarch64`. +- `{arch}` denotes the system architecture. For example, the Linux system you are using is x86 architecture 64-bit, `{arch}` should be `x86_64`. If the system is ARM architecture 64-bit, then it should be `aarch64`. ## Installation Verification diff --git a/install/mindspore_cpu_install_source.md b/install/mindspore_cpu_install_source.md index bb607b988900c330c63f69656d6f2dd618c8e027..dc93fd3637d817b3e37aa0507d085992495e8390 100644 --- a/install/mindspore_cpu_install_source.md +++ b/install/mindspore_cpu_install_source.md @@ -25,6 +25,7 @@ - 确认安装64位操作系统,其中Ubuntu 18.04是经过验证的。 - 确认安装[GCC 7.3.0版本](http://ftp.gnu.org/gnu/gcc/gcc-7.3.0/gcc-7.3.0.tar.gz)。 +- 确认安装[gmp 6.1.2版本](https://gmplib.org/download/gmp/gmp-6.1.2.tar.xz)。 - 确认安装[Python 3.7.5版本](https://www.python.org/ftp/python/3.7.5/Python-3.7.5.tgz)。 - 确认安装[CMake 3.18.3及以上版本](https://cmake.org/download/)。 - 安装完成后需将CMake所在路径添加到系统环境变量。 diff --git a/install/mindspore_cpu_install_source_en.md b/install/mindspore_cpu_install_source_en.md index 64122da9591f63a10dc96f5fdf03c18580125409..b15ab63a0a280d116fc2da4c0d9debace4d4dbaa 100644 --- a/install/mindspore_cpu_install_source_en.md +++ b/install/mindspore_cpu_install_source_en.md @@ -23,6 +23,7 @@ This document describes how to quickly install MindSpore by source code in a Lin - Confirm that the 64-bit operating system is installed, where Ubuntu 18.04 are verified. - Confirm that [GCC 7.3.0](http://ftp.gnu.org/gnu/gcc/gcc-7.3.0/gcc-7.3.0.tar.gz) is installed. +- Confirm that [gmp 6.1.2](https://gmplib.org/download/gmp/gmp-6.1.2.tar.xz) is installed. - Confirm that [Python 3.7.5](https://www.python.org/ftp/python/3.7.5/Python-3.7.5.tgz) is installed. - Confirm that [CMake 3.18.3 or later](https://cmake.org/download/) is installed. - Add the path where the executable file `cmake` stores to the environment variable PATH. diff --git a/install/mindspore_cpu_win_install_conda.md b/install/mindspore_cpu_win_install_conda.md index a0621b5ed3fb5c6ea21ace331cc2bec7f6331455..2c67ab6807a05bc771c261fc6e3d3528813c0379 100644 --- a/install/mindspore_cpu_win_install_conda.md +++ b/install/mindspore_cpu_win_install_conda.md @@ -1,4 +1,4 @@ -# Conda方式安装MindSpore CPU版本(Windows) +# Conda方式安装MindSpore CPU版本(Windows) @@ -55,7 +55,7 @@ conda activate mindspore 参考[版本列表](https://www.mindspore.cn/versions)先进行SHA-256完整性校验,校验一致后再执行如下命令安装MindSpore。 ```bash -pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/cpu/windows_x64/mindspore-{version}-cp37-cp37m-win_amd64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple +pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/cpu/x86_64/mindspore-{version}-cp37-cp37m-win_amd64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple ``` 其中: diff --git a/install/mindspore_cpu_win_install_pip.md b/install/mindspore_cpu_win_install_pip.md index a4cc1e06bcf84497875e4eda1bc0fe7ffeb4ba26..d513c5bd00b007b6529888093cd26aeebbe253cb 100644 --- a/install/mindspore_cpu_win_install_pip.md +++ b/install/mindspore_cpu_win_install_pip.md @@ -1,4 +1,4 @@ -# pip方式安装MindSpore CPU版本(Windows) +# pip方式安装MindSpore CPU版本(Windows) @@ -30,7 +30,7 @@ 参考[版本列表](https://www.mindspore.cn/versions)先进行SHA-256完整性校验,校验一致后再执行如下命令安装MindSpore。 ```bash -pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/cpu/windows_x64/mindspore-{version}-cp37-cp37m-win_amd64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple +pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/cpu/x86_64/mindspore-{version}-cp37-cp37m-win_amd64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple ``` 其中: diff --git a/install/mindspore_cpu_win_install_pip_en.md b/install/mindspore_cpu_win_install_pip_en.md index 214c15667ae7ef80a1c9a5b965f8c8a310325fa6..804ee548560c1107851776c54a67747523e56898 100644 --- a/install/mindspore_cpu_win_install_pip_en.md +++ b/install/mindspore_cpu_win_install_pip_en.md @@ -1,4 +1,4 @@ -# Installing MindSpore in CPU by pip (Windows) +# Installing MindSpore in CPU by pip (Windows) @@ -28,7 +28,7 @@ This document describes how to quickly install MindSpore by pip in a Windows sys It is recommended to refer to [Version List](https://www.mindspore.cn/versions/en) to perform SHA-256 integrity verification, and then execute the following command to install MindSpore after the verification is consistent. ```bash -pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/cpu/windows_x64/mindspore-{version}-cp37-cp37m-win_amd64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple +pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/cpu/x86_64/mindspore-{version}-cp37-cp37m-win_amd64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple ``` Of which, diff --git a/install/mindspore_gpu_install_conda.md b/install/mindspore_gpu_install_conda.md index e36e4bcccd43b19acef5b35754b7488cca22aa72..aa08bb5a7b3db1d5515738625b1ff532507147ca 100644 --- a/install/mindspore_gpu_install_conda.md +++ b/install/mindspore_gpu_install_conda.md @@ -24,9 +24,8 @@ - 确认安装Ubuntu 18.04是64位操作系统。 - 确认安装[GCC 7.3.0版本](http://ftp.gnu.org/gnu/gcc/gcc-7.3.0/gcc-7.3.0.tar.gz)。 -- 确认安装[CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base)。 +- 确认安装[CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base)配套[cuDNN 7.6.X版本](https://developer.nvidia.com/rdp/cudnn-archive) 或者 [CUDA 11.1](https://developer.nvidia.com/cuda-11.1.0-download-archive)配套[cuDNN 8.0.X版本](https://developer.nvidia.com/rdp/cudnn-archive#a-collapse805-111)。 - CUDA安装后,若CUDA没有安装在默认位置,需要设置环境变量PATH(如:`export PATH=/usr/local/cuda-${version}/bin:$PATH`)和`LD_LIBRARY_PATH`(如:`export LD_LIBRARY_PATH=/usr/local/cuda-${version}/lib64:$LD_LIBRARY_PATH`),详细安装后的设置可参考[CUDA安装手册](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions)。 -- 确认安装[cuDNN 7.6.X版本](https://developer.nvidia.com/rdp/cudnn-archive)。 - 确认安装[OpenMPI 4.0.3版本](https://www.open-mpi.org/faq/?category=building#easy-build)(可选,单机多卡/多机多卡训练需要)。 - 确认安装[NCCL 2.7.6-1版本](https://docs.nvidia.com/deeplearning/sdk/nccl-install-guide/index.html#debian)(可选,单机多卡/多机多卡训练需要)。 - 确认安装[TensorRT-7.2.2.3](https://developer.nvidia.com/nvidia-tensorrt-download)(可选,Serving推理需要)。 @@ -60,8 +59,16 @@ conda activate mindspore 参考[版本列表](https://www.mindspore.cn/versions)先进行SHA-256完整性校验,校验一致后再执行如下命令安装MindSpore。 +CUDA 10.1 版本: + +```bash +pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/gpu/x86_64/cuda-10.1/mindspore_gpu-{version}-cp37-cp37m-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple +``` + +CUDA 11.1 版本: + ```bash -pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/gpu/ubuntu_x86/cuda-10.1/mindspore_gpu-{version}-cp37-cp37m-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple +pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/gpu/x86_64/cuda-11.1/mindspore_gpu-{version}-cp37-cp37m-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple ``` 其中: diff --git a/install/mindspore_gpu_install_docker.md b/install/mindspore_gpu_install_docker.md index a4baad62121f1d0642caff07122ab6f02abe6ca0..49b3844f624c8c2537bbbdca373aaf8c0cdd844e 100644 --- a/install/mindspore_gpu_install_docker.md +++ b/install/mindspore_gpu_install_docker.md @@ -159,9 +159,7 @@ print(ops.add(x, y)) - 验证MindInsight安装: - 1. 执行命令:```export PATH=/usr/local/python-3.7.5/bin:$PATH```。 - - 2. 输入```mindinsight start --port 8080```, 如提示启动status为success,则安装成功。 + 输入```mindinsight start --port 8080```, 如提示启动status为success,则安装成功。 - 如果你安装的是`runtime`标签的容器,需要自行安装MindSpore。 diff --git a/install/mindspore_gpu_install_docker_en.md b/install/mindspore_gpu_install_docker_en.md index 6155251132b52d8e96fb15c9a0b4273e91423788..89cbcb31ab8fa825ab69770abec246245eaed9e3 100644 --- a/install/mindspore_gpu_install_docker_en.md +++ b/install/mindspore_gpu_install_docker_en.md @@ -159,9 +159,7 @@ It means MindSpore has been installed by docker successfully. - If you need to verify the MindInsight installation: - 1. Execute the command: ```export PATH=/usr/local/python-3.7.5/bin:$PATH```. - - 2. Enter ```mindinsight start --port 8080```, if it prompts that the startup status is successful, it means MindInsight has been installed successfully. + Enter ```mindinsight start --port 8080```, if it prompts that the startup status is successful, it means MindInsight has been installed successfully. - If you install a container with the label of `runtime`, you need to install MindSpore yourself. diff --git a/install/mindspore_gpu_install_pip.md b/install/mindspore_gpu_install_pip.md index 9597f998af04d758e46b04674e8ffa8f922eb196..86770d89b9c2422f95787b00a7c5c93c144bfa4b 100644 --- a/install/mindspore_gpu_install_pip.md +++ b/install/mindspore_gpu_install_pip.md @@ -23,9 +23,8 @@ - 确认安装64位操作系统,[glibc](https://www.gnu.org/software/libc/)>=2.17,其中Ubuntu 18.04是经过验证的。 - 确认安装[GCC 7.3.0版本](http://ftp.gnu.org/gnu/gcc/gcc-7.3.0/gcc-7.3.0.tar.gz)。 -- 确认安装[CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base)。 +- 确认安装[CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base)配套[cuDNN 7.6.X版本](https://developer.nvidia.com/rdp/cudnn-archive) 或者 [CUDA 11.1](https://developer.nvidia.com/cuda-11.1.0-download-archive)配套[cuDNN 8.0.X版本](https://developer.nvidia.com/rdp/cudnn-archive#a-collapse805-111)。 - CUDA安装后,若CUDA没有安装在默认位置,需要设置环境变量PATH(如:`export PATH=/usr/local/cuda-${version}/bin:$PATH`)和`LD_LIBRARY_PATH`(如:`export LD_LIBRARY_PATH=/usr/local/cuda-${version}/lib64:$LD_LIBRARY_PATH`),详细安装后的设置可参考[CUDA安装手册](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions)。 -- 确认安装[cuDNN 7.6.X版本](https://developer.nvidia.com/rdp/cudnn-archive)。 - 确认安装[OpenMPI 4.0.3版本](https://www.open-mpi.org/faq/?category=building#easy-build)(可选,单机多卡/多机多卡训练需要)。 - 确认安装[NCCL 2.7.6-1版本](https://docs.nvidia.com/deeplearning/sdk/nccl-install-guide/index.html#debian)(可选,单机多卡/多机多卡训练需要)。 - 确认安装[TensorRT-7.2.2.3](https://developer.nvidia.com/nvidia-tensorrt-download)(可选,Serving推理需要)。 @@ -37,8 +36,16 @@ 参考[版本列表](https://www.mindspore.cn/versions)先进行SHA-256完整性校验,校验一致后再执行如下命令安装MindSpore。 +CUDA 10.1 版本: + +```bash +pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/gpu/x86_64/cuda-10.1/mindspore_gpu-{version}-cp37-cp37m-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple +``` + +CUDA 11.1 版本: + ```bash -pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/gpu/linux_x86/cuda-10.1/mindspore_gpu-{version}-cp37-cp37m-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple +pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/gpu/x86_64/cuda-11.1/mindspore_gpu-{version}-cp37-cp37m-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple ``` 其中: diff --git a/install/mindspore_gpu_install_pip_en.md b/install/mindspore_gpu_install_pip_en.md index 7046c605c71f92eb66aff29a05097e4f7a77bacd..818ced4f518ce8dd236ee180335f1f1ea65f6a3b 100644 --- a/install/mindspore_gpu_install_pip_en.md +++ b/install/mindspore_gpu_install_pip_en.md @@ -21,9 +21,8 @@ This document describes how to quickly install MindSpore by pip in a Linux syste - Confirm that the 64-bit operating system is installed and the [glibc](https://www.gnu.org/software/libc/)>=2.17, where Ubuntu 18.04 is verified. - Confirm that [GCC 7.3.0](http://ftp.gnu.org/gnu/gcc/gcc-7.3.0/gcc-7.3.0.tar.gz) is installed. -- Confirm that [CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) is installed. +- Confirm that [CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) with [cuDNN 7.6.X](https://developer.nvidia.com/rdp/cudnn-archive) or [CUDA 11.1](https://developer.nvidia.com/cuda-11.1.0-download-archive) with [cuDNN 8.0.X](https://developer.nvidia.com/rdp/cudnn-archive#a-collapse805-111) is installed. - If CUDA is installed in a non-default path, after installing CUDA, environment variable `PATH`(e.g. `export PATH=/usr/local/cuda-${version}/bin:$PATH`) and `LD_LIBRARY_PATH`(e.g. `export LD_LIBRARY_PATH=/usr/local/cuda-${version}/lib64:$LD_LIBRARY_PATH`) need to be set. Please refer to [CUDA installation guide](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions) for detailed post installation actions. -- Confirm that [cuDNN 7.6.X](https://developer.nvidia.com/rdp/cudnn-archive) is installed. - Confirm that [OpenMPI 4.0.3](https://www.open-mpi.org/faq/?category=building#easy-build) is installed. (optional, required for single-node/multi-GPU and multi-node/multi-GPU training) - Confirm that [NCCL 2.7.6-1](https://docs.nvidia.com/deeplearning/sdk/nccl-install-guide/index.html#debian) is installed. (optional, required for single-node/multi-GPU and multi-node/multi-GPU training) - Confirm that [TensorRT-7.2.2.3](https://developer.nvidia.com/nvidia-tensorrt-download) is installed. (optional,required for Serving inference). @@ -35,8 +34,16 @@ This document describes how to quickly install MindSpore by pip in a Linux syste It is recommended to refer to [Version List](https://www.mindspore.cn/versions/en) to perform SHA-256 integrity verification, and then execute the following command to install MindSpore after the verification is consistent. +For CUDA 10.1: + +```bash +pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/gpu/x86_64/cuda-10.1/mindspore_gpu-{version}-cp37-cp37m-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple +``` + +For CUDA 11.1: + ```bash -pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/gpu/linux_x86/cuda-10.1/mindspore_gpu-{version}-cp37-cp37m-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple +pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/{version}/MindSpore/gpu/x86_64/cuda-11.1/mindspore_gpu-{version}-cp37-cp37m-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple ``` Of which, diff --git a/install/mindspore_gpu_install_source.md b/install/mindspore_gpu_install_source.md index 03f18021a32958a43c527d03bcc663f304932718..bb2c158998bb7f9af996163c8c7db8d6e1844531 100644 --- a/install/mindspore_gpu_install_source.md +++ b/install/mindspore_gpu_install_source.md @@ -34,10 +34,9 @@ - 确认安装[Autoconf 2.69及以上版本](https://www.gnu.org/software/autoconf)(可使用系统自带版本)。 - 确认安装[Libtool 2.4.6-29.fc30及以上版本](https://www.gnu.org/software/libtool)(可使用系统自带版本)。 - 确认安装[Automake 1.15.1及以上版本](https://www.gnu.org/software/automake)(可使用系统自带版本)。 -- 确认安装[cuDNN 7.6及以上版本](https://developer.nvidia.com/rdp/cudnn-archive)。 - 确认安装[Flex 2.5.35及以上版本](https://github.com/westes/flex/)。 - 确认安装[wheel 0.32.0及以上版本](https://pypi.org/project/wheel/)。 -- 确认安装[CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base)按默认配置安装。 +- 确认安装[CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base)配套[cuDNN 7.6.X版本](https://developer.nvidia.com/rdp/cudnn-archive) 或者 [CUDA 11.1](https://developer.nvidia.com/cuda-11.1.0-download-archive)配套[cuDNN 8.0.X版本](https://developer.nvidia.com/rdp/cudnn-archive#a-collapse805-111)。 - CUDA安装后,若CUDA没有安装在默认位置,需要设置环境变量PATH(如:`export PATH=/usr/local/cuda-${version}/bin:$PATH`)和`LD_LIBRARY_PATH`(如:`export LD_LIBRARY_PATH=/usr/local/cuda-${version}/lib64:$LD_LIBRARY_PATH`),详细安装后的设置可参考[CUDA安装手册](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions)。 - 确认安装[OpenMPI 4.0.3版本](https://www.open-mpi.org/faq/?category=building#easy-build)(可选,单机多卡/多机多卡训练需要)。 - 确认安装[NCCL 2.7.6-1版本](https://docs.nvidia.com/deeplearning/sdk/nccl-install-guide/index.html#debian)(可选,单机多卡/多机多卡训练需要)。 diff --git a/install/mindspore_gpu_install_source_en.md b/install/mindspore_gpu_install_source_en.md index ce006e44c177fae412a6bc5b75a3f9dc6e88f274..a650483800c29a9c1ea1d19eb7050f78ea9dd1c9 100644 --- a/install/mindspore_gpu_install_source_en.md +++ b/install/mindspore_gpu_install_source_en.md @@ -31,10 +31,9 @@ This document describes how to quickly install MindSpore by source code in a Lin - Confirm that [Autoconf 2.69 or later](https://www.gnu.org/software/autoconf) is installed. (Default versions of these tools built in their systems are supported.) - Confirm that [Libtool 2.4.6-29.fc30 or later](https://www.gnu.org/software/libtool) is installed. (Default versions of these tools built in their systems are supported.) - Confirm that [Automake 1.15.1 or later](https://www.gnu.org/software/automake) is installed.(Default versions of these tools built in their systems are supported.) -- Confirm that [cuDNN 7.6 or later](https://developer.nvidia.com/rdp/cudnn-archive) is installed. - Confirm that [Flex 2.5.35 or later](https://github.com/westes/flex/) is installed. - Confirm that [wheel 0.32.0 or later](https://pypi.org/project/wheel/) is installed. -- Confirm that [CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) is installed as default configuration. +- Confirm that [CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) with [cuDNN 7.6.X](https://developer.nvidia.com/rdp/cudnn-archive) or [CUDA 11.1](https://developer.nvidia.com/cuda-11.1.0-download-archive) with [cuDNN 8.0.X](https://developer.nvidia.com/rdp/cudnn-archive#a-collapse805-111) is installed as default configuration. - If CUDA is installed in a non-default path, after installing CUDA, environment variable `PATH`(e.g. `export PATH=/usr/local/cuda-${version}/bin:$PATH`) and `LD_LIBRARY_PATH`(e.g. `export LD_LIBRARY_PATH=/usr/local/cuda-${version}/lib64:$LD_LIBRARY_PATH`) need to be set. Please refer to [CUDA installation guide](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions) for detailed post installation actions. - Confirm that [OpenMPI 4.0.3](https://www.open-mpi.org/faq/?category=building#easy-build) is installed. (optional, required for single-node/multi-GPU and multi-node/multi-GPU training) - Confirm that [NCCL 2.7.6-1](https://docs.nvidia.com/deeplearning/sdk/nccl-install-guide/index.html#debian) is installed. (optional, required for single-node/multi-GPU and multi-node/multi-GPU training) diff --git a/resource/api_mapping/ApplyAdagrad.md b/resource/api_mapping/Adagrad.md similarity index 50% rename from resource/api_mapping/ApplyAdagrad.md rename to resource/api_mapping/Adagrad.md index 85ccdb091596a4539304aeec1cec0e8bdedcfcd4..948957c8013b068305b242b00aefeab06e6984b1 100644 --- a/resource/api_mapping/ApplyAdagrad.md +++ b/resource/api_mapping/Adagrad.md @@ -13,7 +13,7 @@ class torch.optim.Adagrad( ) ``` -## mindspore.nn.ApplyAdagrad +## mindspore.nn.Adagrad ```python class mindspore.nn.Adagrad( @@ -30,7 +30,7 @@ class mindspore.nn.Adagrad( PyTorch: 需要将期望更新的参数放入1个迭代类型参数`params`后传入,且设置了`step`方法执行单步优化返回损失值。 -MindSpore:需要分别传入期望更新的参数`grads`,`params`。 +MindSpore:支持所有的参数使用相同的学习率以及不同的参数组使用不用的值的方式。 ## 代码示例 @@ -40,29 +40,28 @@ import numpy as np import torch import mindspore.nn as nn from mindspore import Tensor, Parameter -import mindspore.ops as ops +from mindspore.train import Model from mindspore import dtype as mstype -class Net(nn.Cell): - def __init__(self): - super(Net, self).__init__() - self.apply_adagrad = ops.ApplyAdagrad() - self.var = Parameter(Tensor(np.random.rand(1, 1).astype(np.float32)), name="var") - self.accum = Parameter(Tensor(np.random.rand(1, 1).astype(np.float32)), name="accum") +net = Net() +#1) All parameters use the same learning rate and weight decay +optim = nn.Adagrad(params=net.trainable_params()) - def construct(self, lr, grad): - return self.apply_adagrad(self.var, self.accum, lr, grad) +#2) Use parameter groups and set different values +conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params())) +no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params())) +group_params = [{'params': conv_params, 'weight_decay': 0.01, 'grad_centralization':True}, + {'params': no_conv_params, 'lr': 0.01}, + {'order_params': net.trainable_params()}] +optim = nn.Adagrad(group_params, learning_rate=0.1, weight_decay=0.0) +# The conv_params's parameters will use default learning rate of 0.1 and weight decay of 0.01 and grad +# centralization of True. +# The no_conv_params's parameters will use learning rate of 0.01 and default weight decay of 0.0 and grad +# centralization of False. +# The final parameters order in which the optimizer will be followed is the value of 'order_params'. -np.random.seed(0) -net = Net() -lr = Tensor(0.001, mstype.float32) -grad = Tensor(np.random.rand(1, 1).astype(np.float32)) -var, accum = net(lr, grad) -print(var) -print(accum) -# Out: -# [[0.5482]] -# [[1.0785]] +loss = nn.SoftmaxCrossEntropyWithLogits() +model = Model(net, loss_fn=loss, optimizer=optim) # The following implements Adagrad with torch. input_x = torch.tensor(np.random.rand(1, 20).astype(np.float32)) diff --git a/resource/api_mapping/ApplyAdagrad_en.md b/resource/api_mapping/Adagrad_en.md similarity index 50% rename from resource/api_mapping/ApplyAdagrad_en.md rename to resource/api_mapping/Adagrad_en.md index 2f3d349f5f8f58c0ba8a6fd6492059426d6c1716..d65e57174ba5fd8eb4506807168254505943978e 100644 --- a/resource/api_mapping/ApplyAdagrad_en.md +++ b/resource/api_mapping/Adagrad_en.md @@ -13,7 +13,7 @@ class torch.optim.Adagrad( ) ``` -## mindspore.nn.ApplyAdagrad +## mindspore.nn.Adagrad ```python class mindspore.nn.Adagrad( @@ -30,7 +30,7 @@ class mindspore.nn.Adagrad( PyTorch: Parameters to be optimized should be put into an iterable parameter then passed as a whole. The `step` method is also implemented to perform one single step optimization and return loss. -MindSpore: Parameters to be updated: `grads`, `params` should be passed respectively. +MindSpore: The ways of the same learning rate for all parameters and different values for different parameter groups are supported. ## Code Example @@ -40,29 +40,28 @@ import numpy as np import torch import mindspore.nn as nn from mindspore import Tensor, Parameter -import mindspore.ops as ops +from mindspore.train import Model from mindspore import dtype as mstype -class Net(nn.Cell): - def __init__(self): - super(Net, self).__init__() - self.apply_adagrad = ops.ApplyAdagrad() - self.var = Parameter(Tensor(np.random.rand(1, 1).astype(np.float32)), name="var") - self.accum = Parameter(Tensor(np.random.rand(1, 1).astype(np.float32)), name="accum") +net = Net() +#1) All parameters use the same learning rate and weight decay +optim = nn.Adagrad(params=net.trainable_params()) - def construct(self, lr, grad): - return self.apply_adagrad(self.var, self.accum, lr, grad) +#2) Use parameter groups and set different values +conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params())) +no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params())) +group_params = [{'params': conv_params, 'weight_decay': 0.01, 'grad_centralization':True}, + {'params': no_conv_params, 'lr': 0.01}, + {'order_params': net.trainable_params()}] +optim = nn.Adagrad(group_params, learning_rate=0.1, weight_decay=0.0) +# The conv_params's parameters will use default learning rate of 0.1 and weight decay of 0.01 and grad +# centralization of True. +# The no_conv_params's parameters will use learning rate of 0.01 and default weight decay of 0.0 and grad +# centralization of False. +# The final parameters order in which the optimizer will be followed is the value of 'order_params'. -np.random.seed(0) -net = Net() -lr = Tensor(0.001, mstype.float32) -grad = Tensor(np.random.rand(1, 1).astype(np.float32)) -var, accum = net(lr, grad) -print(var) -print(accum) -# Out: -# [[0.5482]] -# [[1.0785]] +loss = nn.SoftmaxCrossEntropyWithLogits() +model = Model(net, loss_fn=loss, optimizer=optim) # The following implements Adagrad with torch. input_x = torch.tensor(np.random.rand(1, 20).astype(np.float32)) diff --git a/resource/api_mapping/AvgPool2d.md b/resource/api_mapping/AvgPool2d.md deleted file mode 100644 index 16b31556df4e0aede223f9fd4e826bce17d4b285..0000000000000000000000000000000000000000 --- a/resource/api_mapping/AvgPool2d.md +++ /dev/null @@ -1,53 +0,0 @@ -# 比较与torch.nn.functional.adaptive_avg_pool2d的功能差异 - -## torch.nn.functional.adaptive_avg_pool2d - -```python -torch.nn.functional.adaptive_avg_pool2d( - input, - output_size -) -``` - -## mindspore.nn.AvgPool2d - -```python -class mindspore.nn.AvgPool2d( - kernel_size=1, - stride=1, - pad_mode='valid', - data_format='NCHW' -)(input) -``` - -## 使用方式 - -PyTorch: 对输入数据的H与W维执行平均池化。使用上,仅需指定池化后数据H和W维的期望形状即可。无需用户手工计算并指定`kernel_size`、`stride`等。 - -MindSpore:需用户手工计算并指定`kernel_size`、`stride`等。 - -## 代码示例 - -```python -import mindspore -from mindspore import Tensor, nn -import torch -import numpy as np - -x = np.random.randint(0, 10, [1, 2, 4, 4]) - -# In MindSpore, parameters kernel_size and stride should be calculated in advance and set for pooling. -pool = nn.AvgPool2d(kernel_size=3, stride=1) -input_x = Tensor(x, mindspore.float32) -output = pool(input_x) -print(output.shape) -# Out: -# (1, 2, 2, 2) - -# In torch, the shape of output can be set directly for pooling. -input_x = torch.tensor(x.astype(np.float32)) -output = torch.nn.functional.adaptive_avg_pool2d(input_x, (2, 2)) -print(output.shape) -# Out: -# torch.Size([1, 2, 2, 2]) -``` diff --git a/resource/api_mapping/AvgPool2d_en.md b/resource/api_mapping/AvgPool2d_en.md deleted file mode 100644 index fbbb4e723c870f0d041d539adaa7fe5fa9aa05c4..0000000000000000000000000000000000000000 --- a/resource/api_mapping/AvgPool2d_en.md +++ /dev/null @@ -1,53 +0,0 @@ -# Function Differences with torch.nn.functional.adaptive_avg_pool2d - -## torch.nn.functional.adaptive_avg_pool2d - -```python -torch.nn.functional.adaptive_avg_pool2d( - input, - output_size -) -``` - -## mindspore.nn.AvgPool2d - -```python -class mindspore.nn.AvgPool2d( - kernel_size=1, - stride=1, - pad_mode='valid', - data_format='NCHW' -)(input) -``` - -## Differences - -PyTorch: Performs average pooling for H and W dimensions of the input data. You only need to specify the desired shape of the H and W dimensions of data after pooling. It is unnecessary to manually calculate and specify the `kernel_size`, `stride`, etc. - -MindSpore:The user needs to manually calculate and specify the `kernel_size`, `stride`, etc. - -## Code Example - -```python -import mindspore -from mindspore import Tensor, nn -import torch -import numpy as np - -x = np.random.randint(0, 10, [1, 2, 4, 4]) - -# In MindSpore, parameters kernel_size and stride should be calculated in advance and set for pooling. -pool = nn.AvgPool2d(kernel_size=3, stride=1) -input_x = Tensor(x, mindspore.float32) -output = pool(input_x) -print(output.shape) -# Out: -# (1, 2, 2, 2) - -# In torch, the shape of output can be set directly for pooling. -input_x = torch.tensor(x.astype(np.float32)) -output = torch.nn.functional.adaptive_avg_pool2d(input_x, (2, 2)) -print(output.shape) -# Out: -# torch.Size([1, 2, 2, 2]) -``` \ No newline at end of file diff --git a/resource/api_mapping/BroadcastTo.md b/resource/api_mapping/BroadcastTo.md index e6ddc836f565f17e31f3ae24c4f04eba16877562..5aa5e2523e5a7ee2f021954c10b18af01507d759 100644 --- a/resource/api_mapping/BroadcastTo.md +++ b/resource/api_mapping/BroadcastTo.md @@ -42,7 +42,7 @@ print(output.shape) # And the final output of the tensor's shape will be determined by these inputs' shapes according to rules mentioned above. x = torch.Tensor(np.array([1, 2, 3]).astype(np.float32)).view(1, 3) y = torch.Tensor(np.array([4, 5]).astype(np.float32)).view(2, 1) -m, n = torch.broadcast_tensor(x, y) +m, n = torch.broadcast_tensors(x, y) print(m.shape) # Out: # torch.Size([2, 3]) diff --git a/resource/api_mapping/BroadcastTo_en.md b/resource/api_mapping/BroadcastTo_en.md index e71e9a7d80c1d3a0466c9661450e43c208976cb1..370c9e1dc0d27a854f3f865809958cfc0be1173b 100644 --- a/resource/api_mapping/BroadcastTo_en.md +++ b/resource/api_mapping/BroadcastTo_en.md @@ -42,7 +42,7 @@ print(output.shape) # And the final output of the tensor's shape will be determined by these inputs' shapes according to rules mentioned above. x = torch.Tensor(np.array([1, 2, 3]).astype(np.float32)).view(1, 3) y = torch.Tensor(np.array([4, 5]).astype(np.float32)).view(2, 1) -m, n = torch.broadcast_tensor(x, y) +m, n = torch.broadcast_tensors(x, y) print(m.shape) # Out: # torch.Size([2, 3]) diff --git a/resource/api_mapping/Constant.md b/resource/api_mapping/Constant.md index b81998b3b1b49d7ffa3d2f79700a83dce9cc7c61..0aedc06bd9dfa6eef60154c83846d23712a5ab26 100644 --- a/resource/api_mapping/Constant.md +++ b/resource/api_mapping/Constant.md @@ -19,7 +19,7 @@ class mindspore.common.initializer.Constant(value)(arr) PyTorch: 以常量`val`填充输入的tensor。 -MindSpore:以`value`(整型或numpy数组)填充输入的numpy数组。 +MindSpore:以`value`(整型或numpy数组)填充输入的numpy数组,且是原地更新输入值。 ## 代码示例 @@ -32,7 +32,7 @@ import numpy as np input_constant = np.array([1, 2, 3]) constant_init = mindspore.common.initializer.Constant(value=1) out_constant = constant_init(input_constant) -print(out_constant) +print(input_constant) # Out: # [1 1 1] diff --git a/resource/api_mapping/Constant_en.md b/resource/api_mapping/Constant_en.md index dda3801912d91434456aeeb244eff99f2c2e97c3..583c18549e85d405e57830e1f8f82eda3de0ddd0 100644 --- a/resource/api_mapping/Constant_en.md +++ b/resource/api_mapping/Constant_en.md @@ -19,7 +19,7 @@ class mindspore.common.initializer.Constant(value)(arr) PyTorch: Fills in the input tensor with constant `val`. -MindSpore:Fills in a constant array with `value`(int or numpy array). +MindSpore:Fills in a constant array with `value`(int or numpy array) and update-in-place for the input. ## Code Example @@ -32,7 +32,7 @@ import numpy as np input_constant = np.array([1, 2, 3]) constant_init = mindspore.common.initializer.Constant(value=1) out_constant = constant_init(input_constant) -print(out_constant) +print(input_constant) # Out: # [1 1 1] diff --git a/resource/api_mapping/Norm.md b/resource/api_mapping/Norm.md index 95921370f0a3f4b335e24931d000cf4a9add49ba..2d1872b1a220f78e8b355b276d733779302ce2da 100644 --- a/resource/api_mapping/Norm.md +++ b/resource/api_mapping/Norm.md @@ -45,13 +45,13 @@ print(output) # [4.4721 4.1231 9.4868 6.0828] # In torch, you can set parameter p to implement the desired norm. -input_x = torch.tensor(np.array([[4, 4, 9, 1], [2, 1, 3, 6]])) +input_x = torch.tensor(np.array([[4, 4, 9, 1], [2, 1, 3, 6]]), dtype=torch.float) output1 = torch.norm(input_x, dim=0, p=2) print(output1) # Out: # tensor([4.4721, 4.1231, 9.4868, 6.0828]) -input_x = torch.tensor(np.array([[4, 4, 9, 1], [2, 1, 3, 6]])) +input_x = torch.tensor(np.array([[4, 4, 9, 1], [2, 1, 3, 6]]), dtype=torch.float) output2 = torch.norm(input_x, dim=0, p=1) print(output2) # Out: diff --git a/resource/api_mapping/Norm_en.md b/resource/api_mapping/Norm_en.md index 6ee468195cfd1ea5c78f871a07da2c532b93a6e8..3cd6a95b32839bb035d066561e6a62163d707297 100644 --- a/resource/api_mapping/Norm_en.md +++ b/resource/api_mapping/Norm_en.md @@ -45,13 +45,13 @@ print(output) # [4.4721 4.1231 9.4868 6.0828] # In torch, you can set parameter p to implement the desired norm. -input_x = torch.tensor(np.array([[4, 4, 9, 1], [2, 1, 3, 6]])) +input_x = torch.tensor(np.array([[4, 4, 9, 1], [2, 1, 3, 6]]), dtype=torch.float) output1 = torch.norm(input_x, dim=0, p=2) print(output1) # Out: # tensor([4.4721, 4.1231, 9.4868, 6.0828]) -input_x = torch.tensor(np.array([[4, 4, 9, 1], [2, 1, 3, 6]])) +input_x = torch.tensor(np.array([[4, 4, 9, 1], [2, 1, 3, 6]]), dtype=torch.float) output2 = torch.norm(input_x, dim=0, p=1) print(output2) # Out: diff --git a/resource/api_mapping/ReduceMean&AdaptiveAvgPool2d.md b/resource/api_mapping/ReduceMean&AdaptiveAvgPool2d.md deleted file mode 100644 index 627cc22afd7d5fd5f39940f4945f89a1e13d5dee..0000000000000000000000000000000000000000 --- a/resource/api_mapping/ReduceMean&AdaptiveAvgPool2d.md +++ /dev/null @@ -1,47 +0,0 @@ -# 比较与torch.nn.AdaptiveAvgPool2d的功能差异 - -## torch.nn.AdaptiveAvgPool2d - -```python -torch.nn.AdaptiveAvgPool2d(output_size)(input) -``` - -## mindspore.ops.ReduceMean - -```python -class mindspore.ops.ReduceMean(keep_dims=False)( - input_x, - axis=() -) -``` - -## 使用方式 - -PyTorch: 对输入做自适应的平均池化,算法内部根据指定的输出大小计算出对应大小的结果。仅在输出为1*1时和MindSpore的`ReduceMean`一致。 - -MindSpore:计算指定维度数据的平均值。 - -## 代码示例 - -```python -from mindspore import Tensor -import mindspore.ops as ops -import torch -import numpy as np - -# In MindSpore, mean of given axis will be computed. -input_x = Tensor(np.random.randn(1, 64, 8, 9).astype(np.float32)) -op = ops.ReduceMean(keep_dims=True) -output = op(x=input_x, axis=1) -print(output.shape) -# Out: -# (1, 1, 8, 9) - -# In torch, the corresponding results will be returned based on the input shape. -input_x = torch.randn(1, 64, 8, 9) -op = torch.nn.AdaptiveAvgPool2d((5, 7)) -output = op(input_x) -print(output.shape) -# Out: -# torch.Size([1, 64, 5, 7]) -``` diff --git a/resource/api_mapping/ReduceMean&AdaptiveAvgPool2d_en.md b/resource/api_mapping/ReduceMean&AdaptiveAvgPool2d_en.md deleted file mode 100644 index 9d2cb2846b2cd690c9cc431dd74c2a6c8e18ec3e..0000000000000000000000000000000000000000 --- a/resource/api_mapping/ReduceMean&AdaptiveAvgPool2d_en.md +++ /dev/null @@ -1,48 +0,0 @@ -# Function Differences with torch.nn.AdaptiveAvgPool2d - -## torch.nn.AdaptiveAvgPool2d - -```python -torch.nn.AdaptiveAvgPool2d(output_size)(input) -``` - -## mindspore.ops.ReduceMean - -```python -class mindspore.ops.ReduceMean(keep_dims=False)( - input_x, - axis=() -) -``` - -## Differences - -PyTorch: Applies an adaptive average pooling over the inputs, and the corresponding results are calculated based on the specified output size. It is consistent with the `ReduceMean` of MindSpore only if the output is 1*1. - -MindSpore:Computes mean of the given axis. - -## Code Example - -```python -from mindspore import Tensor -import mindspore.ops as ops -import torch -import numpy as np - -# In MindSpore, mean of given axis will be computed. -input_x = Tensor(np.random.randn(1, 64, 8, 9).astype(np.float32)) -op = ops.ReduceMean(keep_dims=True) -output = op(x=input_x, axis=1) -print(output.shape) -# Out: -# (1, 1, 8, 9) - -# In torch, the corresponding results will be returned based on the input shape. -input_x = torch.randn(1, 64, 8, 9) -op = torch.nn.AdaptiveAvgPool2d((5, 7)) -output = op(input_x) -print(output.shape) -# Out: -# torch.Size([1, 64, 5, 7]) -``` - diff --git a/resource/api_mapping/TrainOneStepCell.md b/resource/api_mapping/TrainOneStepCell.md index ec44aef3ed17d8e51287d2b55c283f005e746c2a..2d0b5a28e2bc1a8c598c610e19ee4139be1c505d 100644 --- a/resource/api_mapping/TrainOneStepCell.md +++ b/resource/api_mapping/TrainOneStepCell.md @@ -3,7 +3,7 @@ ## torch.optim.Optimizer.step ```python -torch.optim.Optimizer.step() +torch.optim.Optimizer.step(closure) ``` ## mindspore.nn.TrainOneStepCell diff --git a/resource/api_mapping/TrainOneStepCell_en.md b/resource/api_mapping/TrainOneStepCell_en.md index c48db395eaa44cd596e8f50c824262668782f02e..d0c6bebe4d3d3954af9fe29942d4d63acf6bf808 100644 --- a/resource/api_mapping/TrainOneStepCell_en.md +++ b/resource/api_mapping/TrainOneStepCell_en.md @@ -3,7 +3,7 @@ ## torch.optim.Optimizer.step ```python -torch.optim.Optimizer.step() +torch.optim.Optimizer.step(closure) ``` ## mindspore.nn.TrainOneStepCell diff --git a/resource/api_mapping/Uniform.md b/resource/api_mapping/Uniform.md index 29b8ea92fa3b5de43feac907066dd0f828be7027..1fe347fa5917e14d6ed73b13f243ab21e81214e5 100644 --- a/resource/api_mapping/Uniform.md +++ b/resource/api_mapping/Uniform.md @@ -20,7 +20,7 @@ class mindspore.common.initializer.Uniform(scale=0.07)(arr) PyTorch: 通过入参`a`和`b`分别指定均匀分布的上下界,即U(-a, b)。 -MindSpore:仅通过一个入参`scale`指定均匀分布的范围,即U(-scale, scale)。 +MindSpore:仅通过一个入参`scale`指定均匀分布的范围,即U(-scale, scale),且是原地更新输入值。 ## 代码示例 @@ -33,7 +33,7 @@ import numpy as np input_x = np.array([1, 1, 1]).astype(np.float32) uniform = mindspore.common.initializer.Uniform(scale=1) output = uniform(input_x) -print(output) +print(input_x) # Out: # [-0.2333 0.6208 -0.1627] diff --git a/resource/api_mapping/Uniform_en.md b/resource/api_mapping/Uniform_en.md index c0150b3e39f2389d620bc5f978c437da39824214..d2125af1522c7614ecc594451fa554d0666a94be 100644 --- a/resource/api_mapping/Uniform_en.md +++ b/resource/api_mapping/Uniform_en.md @@ -20,7 +20,7 @@ class mindspore.common.initializer.Uniform(scale=0.07)(arr) PyTorch: The upper and lower bounds of uniform distribution are specified by parameters `a` and `b`, i.e. U(-a, b). -MindSpore:It only uses one parameter to specify a uniformly distributed range, i.e. U(-scale, scale). +MindSpore:It only uses one parameter to specify a uniformly distributed range, i.e. U(-scale, scale) and update-in-place for the input. ## Code Example @@ -33,7 +33,7 @@ import numpy as np input_x = np.array([1, 1, 1]).astype(np.float32) uniform = mindspore.common.initializer.Uniform(scale=1) output = uniform(input_x) -print(output) +print(input_x) # Out: # [-0.2333 0.6208 -0.1627] diff --git a/resource/api_mapping/api_mapping.md b/resource/api_mapping/api_mapping.md index d4b74b41f5fc38b51c82440889b078c5b9296680..4459ee239b43c6264cd18a2e2ada451066175753 100644 --- a/resource/api_mapping/api_mapping.md +++ b/resource/api_mapping/api_mapping.md @@ -64,7 +64,7 @@ | torch.minimum | mindspore.ops.Minimum | 功能一致 | | torch.mm | mindspore.ops.MatMul | 功能一致 | | torch.mul | mindspore.ops.Mul | 功能一致 | -| torch.nn.AdaptiveAvgPool2d | mindspore.ops.ReduceMean |[差异对比](https://gitee.com/mindspore/docs/blob/master/resource/api_mapping/ReduceMean&AdaptiveAvgPool2d.md)| +| torch.nn.AdaptiveAvgPool2d | mindspore.ops.AdaptiveAvgPool2d | 功能一致 | | torch.nn.AvgPool1d | mindspore.nn.AvgPool1d | 功能一致 | | torch.nn.AvgPool2d | mindspore.nn.AvgPool2d | 功能一致 | | torch.nn.BatchNorm1d | mindspore.nn.BatchNorm1d | 功能一致 | @@ -76,7 +76,7 @@ | torch.nn.Dropout | mindspore.nn.Dropout | 功能一致 | | torch.nn.Embedding | mindspore.nn.Embedding | 功能一致 | | torch.nn.Flatten | mindspore.nn.Flatten |[差异对比](https://gitee.com/mindspore/docs/blob/master/resource/api_mapping/nn_Flatten.md)| -| torch.nn.functional.adaptive_avg_pool2d | mindspore.nn.AvgPool2d |[差异对比](https://gitee.com/mindspore/docs/blob/master/resource/api_mapping/AvgPool2d.md)| +| torch.nn.functional.adaptive_avg_pool2d | mindspore.ops.AdaptiveAvgPool2d | 功能一致 | | torch.nn.functional.avg_pool2d | mindspore.ops.AvgPool | 功能一致 | | torch.nn.functional.binary_cross_entropy | mindspore.ops.BinaryCrossEntropy | 功能一致 | | torch.nn.functional.conv2d | mindspore.ops.Conv2D | 功能一致 | @@ -126,7 +126,7 @@ | torch.ones | mindspore.ops.Ones | 功能一致 | | torch.ones_like | mindspore.ops.OnesLike | 功能一致 | | torch.optim.Adadelta | mindspore.ops.ApplyAdadelta |[差异对比](https://gitee.com/mindspore/docs/blob/master/resource/api_mapping/ApplyAdadelta.md)| -| torch.optim.Adagrad | mindspore.nn.ApplyAdagrad |[差异对比](https://gitee.com/mindspore/docs/blob/master/resource/api_mapping/ApplyAdagrad.md)| +| torch.optim.Adagrad | mindspore.nn.Adagrad |[差异对比](https://gitee.com/mindspore/docs/blob/master/resource/api_mapping/Adagrad.md)| | torch.optim.Adam | mindspore.nn.Adam | 功能一致 | | torch.optim.Adamax | mindspore.ops.ApplyAdaMax | 功能一致 | | torch.optim.AdamW | mindspore.nn.AdamWeightDecay | 功能一致 | @@ -186,18 +186,55 @@ | torch.unbind | mindspore.ops.Unstack | 功能一致 | | torch.unique | mindspore.ops.Unique |[差异对比](https://gitee.com/mindspore/docs/blob/master/resource/api_mapping/Unique.md)| | torch.unsqueeze | mindspore.ops.ExpandDims | 功能一致 | -| torch.utils.data.DataLoader | mindspore.DatasetHelper | 功能一致 | -| torch.utils.data.Dataset | mindspore.dataset.MindDataset | 功能一致 | | torch.utils.data.distributed.DistributedSampler | mindspore.dataset.DistributedSampler | 功能一致 | +| torch.utils.data.RandomSampler | mindspore.dataset.RandomSampler | 功能一致 | +| torch.utils.data.SequentialSampler | mindspore.dataset.SequentialSampler | 功能一致 | +| torch.utils.data.SubsetRandomSampler | mindspore.dataset.SubsetRandomSampler | 功能一致 | +| torch.utils.data.WeightedRandomSampler | mindspore.dataset.WeightedRandomSampler | 功能一致 | | torch.zeros | mindspore.ops.Zeros | 功能一致 | | torch.zeros_like | mindspore.ops.ZerosLike | 功能一致 | +| torchtext.data.functional.custom_replace | mindspore.dataset.text.transforms.RegexReplace | 功能一致 | +| torchtext.data.functional.load_sp_model | mindspore.dataset.text.transforms.SentencePieceTokenizer | 差异对比 | +| torchtext.data.functional.numericalize_tokens_from_iterator | mindspore.dataset.text.transforms.Lookup | 差异对比 | +| torchtext.data.functional.sentencepiece_numericalizer | mindspore.dataset.text.transforms.SentencePieceTokenizer | 差异对比 | +| torchtext.data.functional.sentencepiece_tokenizer | mindspore.dataset.text.transforms.SentencePieceTokenizer | 差异对比 | +| torchtext.data.functional.simple_space_split | mindspore.dataset.text.transforms.WhitespaceTokenizer | 功能一致 | +| torchtext.data.utils.ngrams_iterator | mindspore.dataset.text.transforms.Ngram | 功能一致 | +| torchvision.datasets.CelebA | mindspore.dataset.CelebADataset | 功能一致 | +| torchvision.datasets.CIFAR10 | mindspore.dataset.Cifar10Dataset | 功能一致 | +| torchvision.datasets.CIFAR100 | mindspore.dataset.Cifar100Dataset | 功能一致 | +| torchvision.datasets.CocoDetection | mindspore.dataset.CocoDataset | 差异对比 | | torchvision.datasets.ImageFolder | mindspore.dataset.ImageFolderDataset | 功能一致 | +| torchvision.datasets.MNIST | mindspore.dataset.MnistDataset | 功能一致 | +| torchvision.datasets.VOCDetection | mindspore.dataset.VOCDataset | 差异对比 | +| torchvision.datasets.VOCSegmentation | mindspore.dataset.VOCDataset | 差异对比 | | torchvision.ops.nms | mindspore.ops.NMSWithMask | 功能一致 | | torchvision.ops.roi_align | mindspore.ops.ROIAlign | 功能一致 | -| torchvision.transforms.CenterCrop | mindspore.dataset.vision.py_transforms.CenterCrop | 功能一致 | -| torchvision.transforms.ColorJitter | mindspore.dataset.vision.py_transforms.RandomColorAdjust | 功能一致 | -| torchvision.transforms.Compose | mindspore.dataset.transforms.py_transforms.Compose | 功能一致 | -| torchvision.transforms.Normalize | mindspore.dataset.vision.py_transforms.Normalize | 功能一致 | -| torchvision.transforms.RandomHorizontalFlip | mindspore.dataset.vision.py_transforms.RandomHorizontalFlip | 功能一致 | -| torchvision.transforms.Resize | mindspore.dataset.vision.py_transforms.Resize | 功能一致 | -| torchvision.transforms.ToTensor | mindspore.dataset.vision.py_transforms.ToTensor | 功能一致 | +| torchvision.transforms.CenterCrop | mindspore.dataset.vision.c_transforms.CenterCrop | 功能一致 | +| torchvision.transforms.ColorJitter | mindspore.dataset.vision.c_transforms.RandomColorAdjust | 功能一致 | +| torchvision.transforms.Compose | mindspore.dataset.transforms.c_transforms.Compose | 功能一致 | +| torchvision.transforms.ConvertImageDtype | mindspore.dataset.transforms.py_transforms.ToType | 差异对比 | +| torchvision.transforms.FiveCrop | mindspore.dataset.vision.py_transforms.FiveCrop | 功能一致 | +| torchvision.transforms.GaussianBlur | mindspore.dataset.vision.c_transforms.GaussianBlur | 功能一致 | +| torchvision.transforms.Grayscale | mindspore.dataset.vision.py_transforms.Grayscale | 功能一致 | +| torchvision.transforms.LinearTransformation | mindspore.dataset.vision.py_transforms.LinearTransformation | 功能一致 | +| torchvision.transforms.Normalize | mindspore.dataset.vision.c_transforms.Normalize | 功能一致 | +| torchvision.transforms.Pad | mindspore.dataset.vision.c_transforms.Pad | 功能一致 | +| torchvision.transforms.RandomAffine | mindspore.dataset.vision.c_transforms.RandomAffine | 功能一致 | +| torchvision.transforms.RandomApply | mindspore.dataset.transforms.c_transforms.RandomApply | 功能一致 | +| torchvision.transforms.RandomChoice | mindspore.dataset.transforms.c_transforms.RandomChoice | 功能一致 | +| torchvision.transforms.RandomCrop | mindspore.dataset.vision.c_transforms.RandomCrop | 功能一致 | +| torchvision.transforms.RandomErasing | mindspore.dataset.vision.py_transforms.RandomErasing | 功能一致 | +| torchvision.transforms.RandomGrayscale | mindspore.dataset.vision.py_transforms.RandomGrayscale | 功能一致 | +| torchvision.transforms.RandomHorizontalFlip | mindspore.dataset.vision.c_transforms.RandomHorizontalFlip | 功能一致 | +| torchvision.transforms.RandomOrder | mindspore.dataset.transforms.py_transforms.RandomOrder | 功能一致 | +| torchvision.transforms.RandomPerspective | mindspore.dataset.vision.py_transforms.RandomPerspective | 功能一致 | +| torchvision.transforms.RandomPosterize | mindspore.dataset.vision.c_transforms.RandomPosterize | 功能一致 | +| torchvision.transforms.RandomResizedCrop | mindspore.dataset.vision.c_transforms.RandomResizedCrop | 功能一致 | +| torchvision.transforms.RandomRotation | mindspore.dataset.vision.c_transforms.RandomRotation | 功能一致 | +| torchvision.transforms.RandomSolarize | mindspore.dataset.vision.c_transforms.RandomSolarize | 差异对比 | +| torchvision.transforms.RandomVerticalFlip | mindspore.dataset.vision.c_transforms.RandomVerticalFlip | 功能一致 | +| torchvision.transforms.Resize | mindspore.dataset.vision.c_transforms.Resize | 功能一致 | +| torchvision.transforms.TenCrop | mindspore.dataset.vision.py_transforms.TenCrop | 功能一致 | +| torchvision.transforms.ToPILImage | mindspore.dataset.vision.py_transforms.ToPIL | 差异对比 | +| torchvision.transforms.ToTensor | mindspore.dataset.vision.py_transforms.ToTensor | 差异对比 | diff --git a/resource/api_mapping/api_mapping_en.md b/resource/api_mapping/api_mapping_en.md index a24cdbacea42a4be34cbe119357ade1d05d54232..c6c6b8be7cdff857a70e03413b5701f739b94c2e 100644 --- a/resource/api_mapping/api_mapping_en.md +++ b/resource/api_mapping/api_mapping_en.md @@ -64,7 +64,7 @@ Mapping between PyTorch APIs and MindSpore APIs, which is provided by the commun | torch.minimum | mindspore.ops.Minimum | same | | torch.mm | mindspore.ops.MatMul | same | | torch.mul | mindspore.ops.Mul | same | -| torch.nn.AdaptiveAvgPool2d | mindspore.ops.ReduceMean |[diff](https://gitee.com/mindspore/docs/blob/master/resource/api_mapping/ReduceMean&AdaptiveAvgPool2d_en.md)| +| torch.nn.AdaptiveAvgPool2d | mindspore.ops.AdaptiveAvgPool2d | same | | torch.nn.AvgPool1d | mindspore.nn.AvgPool1d | same | | torch.nn.AvgPool2d | mindspore.nn.AvgPool2d | same | | torch.nn.BatchNorm1d | mindspore.nn.BatchNorm1d | same | @@ -76,7 +76,7 @@ Mapping between PyTorch APIs and MindSpore APIs, which is provided by the commun | torch.nn.Dropout | mindspore.nn.Dropout | same | | torch.nn.Embedding | mindspore.nn.Embedding | same | | torch.nn.Flatten | mindspore.nn.Flatten |[diff](https://gitee.com/mindspore/docs/blob/master/resource/api_mapping/nn_Flatten_en.md)| -| torch.nn.functional.adaptive_avg_pool2d | mindspore.nn.AvgPool2d |[diff](https://gitee.com/mindspore/docs/blob/master/resource/api_mapping/AvgPool2d_en.md)| +| torch.nn.functional.adaptive_avg_pool2d | mindspore.ops.AdaptiveAvgPool2d | same | | torch.nn.functional.avg_pool2d | mindspore.ops.AvgPool | same | | torch.nn.functional.binary_cross_entropy | mindspore.ops.BinaryCrossEntropy | same | | torch.nn.functional.conv2d | mindspore.ops.Conv2D | same | @@ -126,7 +126,7 @@ Mapping between PyTorch APIs and MindSpore APIs, which is provided by the commun | torch.ones | mindspore.ops.Ones | same | | torch.ones_like | mindspore.ops.OnesLike | same | | torch.optim.Adadelta | mindspore.ops.ApplyAdadelta |[diff](https://gitee.com/mindspore/docs/blob/master/resource/api_mapping/ApplyAdadelta_en.md)| -| torch.optim.Adagrad | mindspore.nn.ApplyAdagrad |[diff](https://gitee.com/mindspore/docs/blob/master/resource/api_mapping/ApplyAdagrad_en.md)| +| torch.optim.Adagrad | mindspore.nn.Adagrad |[diff](https://gitee.com/mindspore/docs/blob/master/resource/api_mapping/Adagrad_en.md)| | torch.optim.Adam | mindspore.nn.Adam | same | | torch.optim.Adamax | mindspore.ops.ApplyAdaMax | same | | torch.optim.AdamW | mindspore.nn.AdamWeightDecay | same | @@ -186,18 +186,55 @@ Mapping between PyTorch APIs and MindSpore APIs, which is provided by the commun | torch.unbind | mindspore.ops.Unstack | same | | torch.unique | mindspore.ops.Unique |[diff](https://gitee.com/mindspore/docs/blob/master/resource/api_mapping/Unique_en.md)| | torch.unsqueeze | mindspore.ops.ExpandDims | same | -| torch.utils.data.DataLoader | mindspore.DatasetHelper | same | -| torch.utils.data.Dataset | mindspore.dataset.MindDataset | same | | torch.utils.data.distributed.DistributedSampler | mindspore.dataset.DistributedSampler | same | +| torch.utils.data.RandomSampler | mindspore.dataset.RandomSampler | same | +| torch.utils.data.SequentialSampler | mindspore.dataset.SequentialSampler | same | +| torch.utils.data.SubsetRandomSampler | mindspore.dataset.SubsetRandomSampler | same | +| torch.utils.data.WeightedRandomSampler | mindspore.dataset.WeightedRandomSampler | same | | torch.zeros | mindspore.ops.Zeros | same | | torch.zeros_like | mindspore.ops.ZerosLike | same | +| torchtext.data.functional.custom_replace | mindspore.dataset.text.transforms.RegexReplace | same | +| torchtext.data.functional.load_sp_model | mindspore.dataset.text.transforms.SentencePieceTokenizer | diff | +| torchtext.data.functional.numericalize_tokens_from_iterator | mindspore.dataset.text.transforms.Lookup | diff | +| torchtext.data.functional.sentencepiece_numericalizer | mindspore.dataset.text.transforms.SentencePieceTokenizer | diff | +| torchtext.data.functional.sentencepiece_tokenizer | mindspore.dataset.text.transforms.SentencePieceTokenizer | diff | +| torchtext.data.functional.simple_space_split | mindspore.dataset.text.transforms.WhitespaceTokenizer | same | +| torchtext.data.utils.ngrams_iterator | mindspore.dataset.text.transforms.Ngram | same | +| torchvision.datasets.CelebA | mindspore.dataset.CelebADataset | same | +| torchvision.datasets.CIFAR10 | mindspore.dataset.Cifar10Dataset | same | +| torchvision.datasets.CIFAR100 | mindspore.dataset.Cifar100Dataset | same | +| torchvision.datasets.CocoDetection | mindspore.dataset.CocoDataset | diff | | torchvision.datasets.ImageFolder | mindspore.dataset.ImageFolderDataset | same | +| torchvision.datasets.MNIST | mindspore.dataset.MnistDataset | same | +| torchvision.datasets.VOCDetection | mindspore.dataset.VOCDataset | diff | +| torchvision.datasets.VOCSegmentation | mindspore.dataset.VOCDataset | diff | | torchvision.ops.nms | mindspore.ops.NMSWithMask | same | | torchvision.ops.roi_align | mindspore.ops.ROIAlign | same | -| torchvision.transforms.CenterCrop | mindspore.dataset.vision.py_transforms.CenterCrop | same | -| torchvision.transforms.ColorJitter | mindspore.dataset.vision.py_transforms.RandomColorAdjust | same | -| torchvision.transforms.Compose | mindspore.dataset.transforms.py_transforms.Compose | same | -| torchvision.transforms.Normalize | mindspore.dataset.vision.py_transforms.Normalize | same | -| torchvision.transforms.RandomHorizontalFlip | mindspore.dataset.vision.py_transforms.RandomHorizontalFlip | same | -| torchvision.transforms.Resize | mindspore.dataset.vision.py_transforms.Resize | same | -| torchvision.transforms.ToTensor | mindspore.dataset.vision.py_transforms.ToTensor | same | +| torchvision.transforms.CenterCrop | mindspore.dataset.vision.c_transforms.CenterCrop | same | +| torchvision.transforms.ColorJitter | mindspore.dataset.vision.c_transforms.RandomColorAdjust | same | +| torchvision.transforms.Compose | mindspore.dataset.transforms.c_transforms.Compose | same | +| torchvision.transforms.ConvertImageDtype | mindspore.dataset.transforms.py_transforms.ToType | diff | +| torchvision.transforms.FiveCrop | mindspore.dataset.vision.py_transforms.FiveCrop | same | +| torchvision.transforms.GaussianBlur | mindspore.dataset.vision.c_transforms.GaussianBlur | same | +| torchvision.transforms.Grayscale | mindspore.dataset.vision.py_transforms.Grayscale | same | +| torchvision.transforms.LinearTransformation | mindspore.dataset.vision.py_transforms.LinearTransformation | same | +| torchvision.transforms.Normalize | mindspore.dataset.vision.c_transforms.Normalize | same | +| torchvision.transforms.Pad | mindspore.dataset.vision.c_transforms.Pad | same | +| torchvision.transforms.RandomAffine | mindspore.dataset.vision.c_transforms.RandomAffine | same | +| torchvision.transforms.RandomApply | mindspore.dataset.transforms.c_transforms.RandomApply | same | +| torchvision.transforms.RandomChoice | mindspore.dataset.transforms.c_transforms.RandomChoice | same | +| torchvision.transforms.RandomCrop | mindspore.dataset.vision.c_transforms.RandomCrop | same | +| torchvision.transforms.RandomErasing | mindspore.dataset.vision.py_transforms.RandomErasing | same | +| torchvision.transforms.RandomGrayscale | mindspore.dataset.vision.py_transforms.RandomGrayscale | same | +| torchvision.transforms.RandomHorizontalFlip | mindspore.dataset.vision.c_transforms.RandomHorizontalFlip | same | +| torchvision.transforms.RandomOrder | mindspore.dataset.transforms.py_transforms.RandomOrder | same | +| torchvision.transforms.RandomPerspective | mindspore.dataset.vision.py_transforms.RandomPerspective | same | +| torchvision.transforms.RandomPosterize | mindspore.dataset.vision.c_transforms.RandomPosterize | same | +| torchvision.transforms.RandomResizedCrop | mindspore.dataset.vision.c_transforms.RandomResizedCrop | same | +| torchvision.transforms.RandomRotation | mindspore.dataset.vision.c_transforms.RandomRotation | same | +| torchvision.transforms.RandomSolarize | mindspore.dataset.vision.c_transforms.RandomSolarize | diff | +| torchvision.transforms.RandomVerticalFlip | mindspore.dataset.vision.c_transforms.RandomVerticalFlip | same | +| torchvision.transforms.Resize | mindspore.dataset.vision.c_transforms.Resize | same | +| torchvision.transforms.TenCrop | mindspore.dataset.vision.py_transforms.TenCrop | same | +| torchvision.transforms.ToPILImage | mindspore.dataset.vision.py_transforms.ToPIL | diff | +| torchvision.transforms.ToTensor | mindspore.dataset.vision.py_transforms.ToTensor | diff | diff --git a/resource/api_mapping/create_group.md b/resource/api_mapping/create_group.md index 6b7746352884c73fb9cc784bcce3404f3b0e5401..dfd4392fd131d5407536b4713b18c9da8b315ae5 100644 --- a/resource/api_mapping/create_group.md +++ b/resource/api_mapping/create_group.md @@ -6,7 +6,8 @@ torch.distributed.new_group( ranks=None, timeout=datetime.timedelta(0, 1800), - backend=None + backend=None, + pg_options=None ) ``` diff --git a/resource/api_mapping/create_group_en.md b/resource/api_mapping/create_group_en.md index 57a7720562c0340085e77b8c9c38c7aba5cd1b88..b0592fd817bedcd04d1ec0f288d1377bd0257cb8 100644 --- a/resource/api_mapping/create_group_en.md +++ b/resource/api_mapping/create_group_en.md @@ -6,7 +6,8 @@ torch.distributed.new_group( ranks=None, timeout=datetime.timedelta(0, 1800), - backend=None + backend=None, + pg_options=None ) ``` diff --git a/resource/api_mapping/init.md b/resource/api_mapping/init.md index 9c5337861c136fdb8ae6cf68e69254793b86ca47..6534007a07aa56f85c2063cb6008fc2eb39c47b2 100644 --- a/resource/api_mapping/init.md +++ b/resource/api_mapping/init.md @@ -10,7 +10,8 @@ torch.distributed.init_process_group( world_size=-1, rank=-1, store=None, - group_name='' + group_name='', + pg_options=None ) ``` diff --git a/resource/api_mapping/init_en.md b/resource/api_mapping/init_en.md index 9ad43a0dc67d41db978d7434b50b199beda3e28d..25dfc95e55c6a7c8f1be2106351aca13d9b5d1fd 100644 --- a/resource/api_mapping/init_en.md +++ b/resource/api_mapping/init_en.md @@ -10,7 +10,8 @@ torch.distributed.init_process_group( world_size=-1, rank=-1, store=None, - group_name='' + group_name='', + pg_options=None ) ``` diff --git a/resource/api_updates/nn_api_updates.md b/resource/api_updates/nn_api_updates.md index 2c9ea8e89de30dde3f777790c830f0b0726a7f03..efb0ce7b08f6917a6de3b918163581b0f451c062 100644 --- a/resource/api_updates/nn_api_updates.md +++ b/resource/api_updates/nn_api_updates.md @@ -6,6 +6,7 @@ Compared with the previous version, the added, deleted and supported platforms c |:----|:----|:----|:---- |[mindspore.nn.ForwardValueAndGrad](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.ForwardValueAndGrad.html#mindspore.nn.ForwardValueAndGrad)|New|r1.2: Ascend/GPU/CPU|Wrapper Functions |[mindspore.nn.TimeDistributed](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.TimeDistributed.html#mindspore.nn.TimeDistributed)|New|r1.2: Ascend/GPU/CPU|Wrapper Functions +|[mindspore.nn.SparseToDense](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.SparseToDense.html#mindspore.nn.SparseToDense)|New|r1.2: CPU|Utilities |[mindspore.nn.BatchNorm3d](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.BatchNorm3d.html#mindspore.nn.BatchNorm3d)|New|r1.2: Ascend/GPU/CPU|Normalization Layers |[mindspore.nn.InstanceNorm2d](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.InstanceNorm2d.html#mindspore.nn.InstanceNorm2d)|New|r1.2: GPU|Normalization Layers |[mindspore.nn.SyncBatchNorm](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.SyncBatchNorm.html#mindspore.nn.SyncBatchNorm)|New|r1.2: Ascend|Normalization Layers @@ -33,9 +34,11 @@ Compared with the previous version, the added, deleted and supported platforms c |[mindspore.nn.RMSProp](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.RMSProp.html#mindspore.nn.RMSProp)|Changed|r1.1: Ascend/GPU => r1.2: Ascend/GPU/CPU|Optimizer Functions |[mindspore.nn.GroupNorm](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.GroupNorm.html#mindspore.nn.GroupNorm)|Changed|r1.1: Ascend/GPU => r1.2: Ascend/GPU/CPU|Normalization Layers |[mindspore.nn.BatchNorm1d](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.BatchNorm1d.html#mindspore.nn.BatchNorm1d)|Changed|r1.1: Ascend/GPU => r1.2: Ascend|Normalization Layers +|[mindspore.nn.LayerNorm](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.LayerNorm.html#mindspore.nn.LayerNorm)|Changed|r1.1: Ascend/GPU => r1.2: Ascend/GPU/CPU|Normalization Layers |[mindspore.nn.HSigmoid](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.HSigmoid.html#mindspore.nn.HSigmoid)|Changed|r1.1: GPU => r1.2: GPU/CPU|Non-linear Activations |[mindspore.nn.HSwish](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.HSwish.html#mindspore.nn.HSwish)|Changed|r1.1: GPU => r1.2: GPU/CPU|Non-linear Activations |[mindspore.nn.LeakyReLU](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.LeakyReLU.html#mindspore.nn.LeakyReLU)|Changed|r1.1: Ascend/GPU => r1.2: Ascend/GPU/CPU|Non-linear Activations +|[mindspore.nn.GELU](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.GELU.html#mindspore.nn.GELU)|Changed|r1.1: Ascend/GPU => r1.2: Ascend/GPU/CPU|Non-linear Activations |[mindspore.nn.ELU](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.ELU.html#mindspore.nn.ELU)|Changed|r1.1: Ascend/GPU => r1.2: Ascend/GPU/CPU|Non-linear Activations |[mindspore.nn.get_activation](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.get_activation.html#mindspore.nn.get_activation)|Changed|r1.1: To Be Developed => r1.2: Ascend/GPU/CPU|Non-linear Activations |[mindspore.nn.Moments](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/nn/mindspore.nn.Moments.html#mindspore.nn.Moments)|Changed|r1.1: Ascend => r1.2: Ascend/GPU|Math Functions diff --git a/resource/api_updates/ops_api_updates.md b/resource/api_updates/ops_api_updates.md index 7186ce20bab922ca979a5f63155e5582787578dc..3b2364f47d32312fa4af8701b060ab828870fcb2 100644 --- a/resource/api_updates/ops_api_updates.md +++ b/resource/api_updates/ops_api_updates.md @@ -33,6 +33,7 @@ Compared with the previous version, the added, deleted and supported platforms c |[mindspore.ops.Dihedral14LJCFForceWithAtomEnergy](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/ops/mindspore.ops.Dihedral14LJCFForceWithAtomEnergy.html#mindspore.ops.Dihedral14LJCFForceWithAtomEnergy)|New|r1.2: GPU|operations--Sponge Operators |[mindspore.ops.AngleAtomEnergy](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/ops/mindspore.ops.AngleAtomEnergy.html#mindspore.ops.AngleAtomEnergy)|New|r1.2: GPU|operations--Sponge Operators |[mindspore.ops.NoRepeatNGram](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/ops/mindspore.ops.NoRepeatNGram.html#mindspore.ops.NoRepeatNGram)|New|r1.2: Ascend|operations--Other Operators +|[mindspore.ops.Dropout](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/ops/mindspore.ops.Dropout.html#mindspore.ops.Dropout)|New|r1.2: Ascend/GPU/CPU|operations--Neural Network Operators |[mindspore.ops.Conv3DTranspose](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/ops/mindspore.ops.Conv3DTranspose.html#mindspore.ops.Conv3DTranspose)|New|r1.2: Ascend|operations--Neural Network Operators |[mindspore.ops.SeLU](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/ops/mindspore.ops.SeLU.html#mindspore.ops.SeLU)|New|r1.2: Ascend|operations--Neural Network Operators |[mindspore.ops.Dropout2D](https://www.mindspore.cn/doc/api_python/zh-CN/r1.2/mindspore/ops/mindspore.ops.Dropout2D.html#mindspore.ops.Dropout2D)|New|r1.2: Ascend|operations--Neural Network Operators diff --git a/resource/release/release_list_en.md b/resource/release/release_list_en.md index 3f4a97b641add3a6a7d8c04fa6e2eb1543c19640..611fedd61503b5da4fe45a46fcb71345955e5d42 100644 --- a/resource/release/release_list_en.md +++ b/resource/release/release_list_en.md @@ -4,6 +4,7 @@ - [Release List](#release-list) - [Related Documents](#related-documents) + - [Ascend Supporting Software Package](#Ascend-Supporting-Software-Package) - [Downloads](#downloads) - [1.2.1](#121) - [1.2.0](#120) @@ -40,6 +41,15 @@ | | Design&Specification | [1.2.1](https://www.mindspore.cn/doc/note/en/r1.2/index.html)    [1.1.1](https://www.mindspore.cn/doc/note/en/r1.1/index.html)    [1.0.1](https://www.mindspore.cn/doc/note/en/r1.0/index.html)    [0.7.0-beta](https://www.mindspore.cn/docs/en/r0.7/index.html)    [0.7.0-beta (Lite)   ](https://www.mindspore.cn/lite/docs/en/r0.7/index.html)   
[0.6.0-beta](https://www.mindspore.cn/docs/en/r0.6/index.html)    [0.5.2-beta](https://www.mindspore.cn/docs/en/r0.5/index.html)    [0.3.0-alpha](https://www.mindspore.cn/docs/en/0.3.0-alpha/index.html)    [0.2.0-alpha](https://www.mindspore.cn/docs/en/0.2.0-alpha/index.html)    [0.1.0-alpha](https://www.mindspore.cn/docs/en/0.1.0-alpha/index.html)    [master](https://www.mindspore.cn/doc/note/en/master/index.html) | | | FAQ | [1.2.1](https://www.mindspore.cn/doc/faq/en/r1.2/index.html)    [1.1.1](https://www.mindspore.cn/doc/faq/en/r1.1/index.html)    [1.0.1](https://www.mindspore.cn/doc/faq/en/r1.0/index.html)    [master](https://www.mindspore.cn/doc/faq/en/master/index.html) | +## Ascend Supporting Software Package + +| MindSpore Version | Hardware Platform | Supporting Software Package | Driver and Firmware | CANN | Software Package Documentation | +| --- | --- | --- | --- | --- | --- | +| 1.2.1 | Ascend 910 | [Ascend Data Center Solution 21.0.1.SPC001](https://support.huawei.com/enterprise/zh/ascend-computing/ascend-data-center-solution-pid-251167910/software/253384063?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C252309113%7C251167910) | [A800-9000 1.0.10 ARM platform](https://support.huawei.com/enterprise/zh/ascend-computing/a800-9000-pid-250702818/software/252727249?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C250702818) or [A800-9010 1.0.10 x86 platform](https://support.huawei.com/enterprise/zh/ascend-computing/a800-9010-pid-250702809/software/252727265?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C250702809) | [CANN 5.0.1.SPC102](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/253348209?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251168373) | [Ascend Data Center Solution 21.0.1.SPC001](https://support.huawei.com/enterprise/zh/ascend-computing/ascend-data-center-solution-pid-251167910) | +|| Ascend 310 | [Ascend Data Center Solution 21.0.1.SPC001](https://support.huawei.com/enterprise/zh/ascend-computing/ascend-data-center-solution-pid-251167910/software/253384063?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C252309113%7C251167910) | [A300-3000 1.0.10.SPC012 ARM platform](https://support.huawei.com/enterprise/zh/ascend-computing/a300-3000-pid-250702915/software/253286767?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C250702915) or [A300-3010 1.0.10.SPC012 x86 platform](https://support.huawei.com/enterprise/zh/ascend-computing/a300-3010-pid-251560253/software/253286771?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251560253) | [CANN 5.0.1.SPC102](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/253348209?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251168373) | [Ascend Data Center Solution 21.0.1.SPC001](https://support.huawei.com/enterprise/zh/ascend-computing/ascend-data-center-solution-pid-251167910) | +| 1.2.0 | Ascend 910 | [Ascend Data Center Solution 21.0.1](https://support.huawei.com/enterprise/zh/ascend-computing/ascend-data-center-solution-pid-251167910/software/252504563?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C252309113%7C251167910) | [A800-9000 1.0.10](https://support.huawei.com/enterprise/zh/ascend-computing/a800-9000-pid-250702818/software/252727249?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C250702818) | [5.0.1](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/252504455?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251168373) | [Ascend Data Center Solution 21.0.1](https://support.huawei.com/enterprise/zh/ascend-computing/ascend-data-center-solution-pid-251167910) | +|| Ascend 310 | [Ascend Data Center Solution 21.0.1](https://support.huawei.com/enterprise/zh/ascend-computing/ascend-data-center-solution-pid-251167910/software/252504563?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C252309113%7C251167910) | [A300-3000 1.0.10](https://support.huawei.com/enterprise/zh/ascend-computing/a300-3000-pid-250702915/software/252496291?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C250702915) | [5.0.1](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/252504455?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251168373) | [Ascend Data Center Solution 21.0.1](https://support.huawei.com/enterprise/zh/ascend-computing/ascend-data-center-solution-pid-251167910) | + ## Downloads ### 1.2.1 diff --git a/resource/release/release_list_zh_cn.md b/resource/release/release_list_zh_cn.md index 227f6c47f322dd9577dd334708fd5d43689cf64a..020aaa7f266f6d131a26f496878b4347e93ea5b0 100644 --- a/resource/release/release_list_zh_cn.md +++ b/resource/release/release_list_zh_cn.md @@ -4,6 +4,7 @@ - [发布版本列表](#发布版本列表) - [配套资料](#配套资料) + - [Ascend配套软件包](#ascend配套软件包) - [下载地址](#下载地址) - [1.2.1](#121) - [1.2.0](#120) @@ -41,6 +42,15 @@ | | 设计和规格 | [1.2.1](https://www.mindspore.cn/doc/note/zh-CN/r1.2/index.html)    [1.1.1](https://www.mindspore.cn/doc/note/zh-CN/r1.1/index.html)    [1.0.1](https://www.mindspore.cn/doc/note/zh-CN/r1.0/index.html)    [0.7.0-beta](https://www.mindspore.cn/docs/zh-CN/r0.7/index.html)    [0.7.0-beta (Lite)   ](https://www.mindspore.cn/lite/docs/zh-CN/r0.7/index.html)   
[0.6.0-beta](https://www.mindspore.cn/docs/zh-CN/r0.6/index.html)    [0.5.2-beta](https://www.mindspore.cn/docs/zh-CN/r0.5/index.html)    [0.3.0-alpha](https://www.mindspore.cn/docs/zh-CN/0.3.0-alpha/index.html)    [0.2.0-alpha](https://www.mindspore.cn/docs/zh-CN/0.2.0-alpha/index.html)    [0.1.0-alpha](https://www.mindspore.cn/docs/zh-CN/0.1.0-alpha/index.html)    [master](https://www.mindspore.cn/doc/note/zh-CN/master/index.html) | | | FAQ | [1.2.1](https://www.mindspore.cn/doc/faq/zh-CN/r1.2/index.html)    [1.1.1](https://www.mindspore.cn/doc/faq/zh-CN/r1.1/index.html)    [1.0.1](https://www.mindspore.cn/doc/faq/zh-CN/r1.0/index.html)    [master](https://www.mindspore.cn/doc/faq/zh-CN/master/index.html) | +## Ascend配套软件包 + +| MindSpore版本 | 硬件平台 | 配套软件包 | 驱动和固件 | CANN | 软件包文档 | +| --- | --- | --- | --- | --- | --- | +| 1.2.1 | Ascend 910 | [Ascend Data Center Solution 21.0.1.SPC001](https://support.huawei.com/enterprise/zh/ascend-computing/ascend-data-center-solution-pid-251167910/software/253384063?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C252309113%7C251167910) | [A800-9000 1.0.10 ARM平台](https://support.huawei.com/enterprise/zh/ascend-computing/a800-9000-pid-250702818/software/252727249?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C250702818) 或 [A800-9010 1.0.10 x86平台](https://support.huawei.com/enterprise/zh/ascend-computing/a800-9010-pid-250702809/software/252727265?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C250702809) | [CANN 5.0.1.SPC102](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/253348209?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251168373) | [Ascend Data Center Solution 21.0.1.SPC001](https://support.huawei.com/enterprise/zh/ascend-computing/ascend-data-center-solution-pid-251167910) | +|| Ascend 310 | [Ascend Data Center Solution 21.0.1.SPC001](https://support.huawei.com/enterprise/zh/ascend-computing/ascend-data-center-solution-pid-251167910/software/253384063?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C252309113%7C251167910) | [A300-3000 1.0.10.SPC012 ARM平台](https://support.huawei.com/enterprise/zh/ascend-computing/a300-3000-pid-250702915/software/253286767?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C250702915) 或 [A300-3010 1.0.10.SPC012 x86平台](https://support.huawei.com/enterprise/zh/ascend-computing/a300-3010-pid-251560253/software/253286771?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251560253) | [CANN 5.0.1.SPC102](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/253348209?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251168373) | [Ascend Data Center Solution 21.0.1.SPC001](https://support.huawei.com/enterprise/zh/ascend-computing/ascend-data-center-solution-pid-251167910) | +| 1.2.0 | Ascend 910 | [Ascend Data Center Solution 21.0.1](https://support.huawei.com/enterprise/zh/ascend-computing/ascend-data-center-solution-pid-251167910/software/252504563?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C252309113%7C251167910) | [A800-9000 1.0.10](https://support.huawei.com/enterprise/zh/ascend-computing/a800-9000-pid-250702818/software/252727249?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C250702818) | [5.0.1](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/252504455?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251168373) | [Ascend Data Center Solution 21.0.1](https://support.huawei.com/enterprise/zh/ascend-computing/ascend-data-center-solution-pid-251167910) | +|| Ascend 310 | [Ascend Data Center Solution 21.0.1](https://support.huawei.com/enterprise/zh/ascend-computing/ascend-data-center-solution-pid-251167910/software/252504563?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C252309113%7C251167910) | [A300-3000 1.0.10](https://support.huawei.com/enterprise/zh/ascend-computing/a300-3000-pid-250702915/software/252496291?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C250702915) | [5.0.1](https://support.huawei.com/enterprise/zh/ascend-computing/cann-pid-251168373/software/252504455?idAbsPath=fixnode01%7C23710424%7C251366513%7C22892968%7C251168373) | [Ascend Data Center Solution 21.0.1](https://support.huawei.com/enterprise/zh/ascend-computing/ascend-data-center-solution-pid-251167910) | + ## 下载地址 ### 1.2.1 diff --git a/tutorials/inference/requirements.txt b/tutorials/inference/requirements.txt index 13ad45b4e3ca3cd3f58eb5f0cfa361911b473415..7d912c7550668468a02c1779e5fa59925e403d60 100644 --- a/tutorials/inference/requirements.txt +++ b/tutorials/inference/requirements.txt @@ -1,5 +1,5 @@ sphinx >= 2.2.1, <= 2.4.4 -recommonmark +myst_parser == 0.14.0 sphinx-markdown-tables sphinx_rtd_theme ipykernel diff --git a/tutorials/inference/source_en/conf.py b/tutorials/inference/source_en/conf.py index 0a00ad8da18607c9f0ac88017972211d04c763c0..d227b3aeb63f870bb10e54bb2ec83c54a9737aba 100644 --- a/tutorials/inference/source_en/conf.py +++ b/tutorials/inference/source_en/conf.py @@ -30,7 +30,7 @@ release = 'master' # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'recommonmark', + 'myst_parser', 'sphinx_markdown_tables', ] diff --git a/tutorials/inference/source_en/images/resnet_example.png b/tutorials/inference/source_en/images/resnet_example.png index f747ae6c48c406586a418ef890f81517ee5849b2..0f6ce44b617a1754a25fac57763b53dee1df4314 100644 Binary files a/tutorials/inference/source_en/images/resnet_example.png and b/tutorials/inference/source_en/images/resnet_example.png differ diff --git a/tutorials/inference/source_en/multi_platform_inference_ascend_910.md b/tutorials/inference/source_en/multi_platform_inference_ascend_910.md index f9c0cdf494daea0a5f1a0ba19e6e259263ccb8f7..d2e6a394da9203e9dc990e5e7211cc851fa62b1a 100644 --- a/tutorials/inference/source_en/multi_platform_inference_ascend_910.md +++ b/tutorials/inference/source_en/multi_platform_inference_ascend_910.md @@ -12,7 +12,7 @@ - [Inference Code](#inference-code) - [Introduce to Building Script](#introduce-to-building-script) - [Building Inference Code](#building-inference-code) - - [Performing Inference and Viewing the Result](#performing-inference-and-viewing-the-result) + - [Performing Inference and Viewing the Result](#performing-inference-and-viewing-the-result) @@ -310,7 +310,7 @@ make After building, the executable `main` file is generated in `ascend910_resnet50_preprocess_sample`. -## Performing Inference and Viewing the Result +### Performing Inference and Viewing the Result Log in to the Ascend 910 server, and create the `model` directory for storing the MindIR file `resnet50_imagenet.mindir`, for example, `/home/HwHiAiUser/mindspore_sample/ascend910_resnet50_preprocess_sample/model`. Create the `test_data` directory to store images, for example, `/home/HwHiAiUser/mindspore_sample/ascend910_resnet50_preprocess_sample/test_data`. diff --git a/tutorials/inference/source_en/serving_distributed_example.md b/tutorials/inference/source_en/serving_distributed_example.md index e4b6e30b2455687a519b6e630d2c379d1a91effd..d7261be6146be55cccbee90e30735334fee8e6c4 100644 --- a/tutorials/inference/source_en/serving_distributed_example.md +++ b/tutorials/inference/source_en/serving_distributed_example.md @@ -182,7 +182,8 @@ def start(): distributed.start_servable(servable_dir, "matmul", rank_table_json_file="rank_table_8pcs.json", version_number=1, - distributed_address="127.0.0.1:6200") + distributed_address="127.0.0.1:6200", + wait_agents_time_in_seconds=0) server.start_grpc_server("127.0.0.1:5500") server.start_restful_server("127.0.0.1:1500") @@ -215,7 +216,8 @@ def start_agents(): group_configs.append(f"model/device{i}/group_config.pb") distributed.startup_agents(distributed_address="127.0.0.1:6200", model_files=model_files, - group_config_files=group_configs) + group_config_files=group_configs, agent_start_port=7000, + agent_ip=None, rank_start=None) if __name__ == '__main__': diff --git a/tutorials/inference/source_en/serving_example.md b/tutorials/inference/source_en/serving_example.md index c406644ed55a6318e93b77f679fdbdf7192463fc..b45657b5a3a114c6c5df85e1bebbdacdbabb0094 100644 --- a/tutorials/inference/source_en/serving_example.md +++ b/tutorials/inference/source_en/serving_example.md @@ -26,15 +26,15 @@ The following uses a simple `Add` network as an example to describe how to use M ### Preparing the Environment -Before running the sample network, ensure that MindSpore Serving has been properly installed. To install MindSpore Serving on your PC, go to the [MindSpore Serving installation page](https://gitee.com/mindspore/serving/blob/master/README.md#installing-serving) and configure environment variables on the [MindSpore Serving environment configuration page](https://gitee.com/mindspore/serving/blob/master/README.md#configuring-environment-variables). +Before running the sample network, ensure that MindSpore Serving has been properly installed. To install MindSpore Serving on your PC, go to the [MindSpore Serving installation page](https://gitee.com/mindspore/serving/blob/master/README.md#installing-serving) and configure environment variables on the [MindSpore Serving environment configuration page](https://gitee.com/mindspore/docs/blob/master/install/mindspore_ascend_install_source_en.md#configuring-environment-variables). ### Downloading the Example -Please download the [add example](https://gitee.com/mindspore/serving/blob/master/example/add/) first. +Please download the [add example](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/) first. ### Exporting the Model -In the directory `export_model`, use [add_model.py](https://gitee.com/mindspore/serving/blob/master/example/add/export_model/add_model.py) to build a network with only the Add operator and export the MindSpore inference deployment model. +In the directory `export_model`, use [add_model.py](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/export_model/add_model.py) to build a network with only the Add operator and export the MindSpore inference deployment model. ```python import os @@ -99,7 +99,7 @@ Execute the `add_model.py` script to generate the `tensor_add.mindir` file. The Start Serving with the following files: ```text -test_dir +tensor_add ├── add/ │ └── servable_config.py │ └── 1/ @@ -110,7 +110,7 @@ test_dir - `serving_server.py`: Script file for starting the service. - `add`: Model folder, which is named after the model name. - `tensor_add.mindir`: Model file generated by the network in the previous step, which is stored in folder 1 (the number indicates the version number). Different versions are stored in different folders. The version number must be a string of digits. By default, the latest model file is started. -- [servable_config.py](https://gitee.com/mindspore/serving/blob/master/example/add/add/servable_config.py): [Model configuration file](https://www.mindspore.cn/tutorial/inference/en/master/serving_model.html), which defines the model processing functions, including the `add_common` and `add_cast` methods. `add_common` defines an addition operation whose input is two pieces of float32 data, and `add_cast` defines an addition operation whose input is data with its type converted to float32. +- [servable_config.py](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/add/servable_config.py): [Model configuration file](https://www.mindspore.cn/tutorial/inference/en/master/serving_model.html), which defines the model processing functions, including the `add_common` and `add_cast` methods. `add_common` defines an addition operation whose input is two pieces of float32 data, and `add_cast` defines an addition operation whose input is data with its type converted to float32. Content of the configuration file: @@ -150,7 +150,7 @@ def add_cast(x1, x2): #### Starting the Service The server calls a Python API to start the inference process shared by both master and worker nodes. The client directly connects to the inference service and delivers an inference task. -Run the [serving_server.py](https://gitee.com/mindspore/serving/blob/master/example/add/serving_server.py) script to deploy lightweight service: +Run the [serving_server.py](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/serving_server.py) script to deploy lightweight service: ```python import os @@ -175,12 +175,12 @@ if __name__ == "__main__": The above startup script will load and run two inference copies of `add` on devices 0 and 1, and the inference requests from the client will be split to the two inference copies. -If the server prints the `Serving gRPC start success, listening on 0.0.0.0:5500` log, the Serving has loaded the inference model. +If the server prints the `Serving RESTful server start success, listening on 127.0.0.1:1500` log, the Serving RESTful service has started successfully and the inference model has already loaded successfully. ### Inference Execution The client can access the inference service through either [gRPC](https://www.mindspore.cn/tutorial/inference/en/master/serving_grpc.html) or [RESTful](https://www.mindspore.cn/tutorial/inference/en/master/serving_restful.html). The following uses gRPC as an example. -Execute [serving_client.py](https://gitee.com/mindspore/serving/blob/master/example/add/serving_client.py) to start the Python client. +Execute [serving_client.py](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/serving_client.py) to start the Python client. ```python import numpy as np diff --git a/tutorials/inference/source_en/serving_grpc.md b/tutorials/inference/source_en/serving_grpc.md index a15c632ce47372144e0dc85908e93a7762505e5a..29199b042b4d45020b294a2c9aacbf058f6d3dcc 100644 --- a/tutorials/inference/source_en/serving_grpc.md +++ b/tutorials/inference/source_en/serving_grpc.md @@ -9,6 +9,7 @@ - [add](#add) - [ResNet-50](#resnet-50) - [Accessing Serving Server through Unix Domain Socket](#accessing-serving-server-through-unix-domain-socket) + - [Accessing SSL/TLS enabled Serving service](#accessing-ssltls-enabled-serving-service) @@ -20,7 +21,7 @@ The gRPC API is provided to access the MindSpore Serving. In the Python environm ## add -This example comes from [add example](https://gitee.com/mindspore/serving/blob/master/example/add/serving_client.py). The `add` Servable provides the `add_common` method to add up two 2x2 tensors. The code of the gRPC Python client is as follows. One gRPC request includes three pairs of independent 2x2 tensors. +This example comes from [add example](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/serving_client.py). The `add` Servable provides the `add_common` method to add up two 2x2 tensors. The code of the gRPC Python client is as follows. One gRPC request includes three pairs of independent 2x2 tensors. ```python from mindspore_serving.client import Client @@ -175,11 +176,11 @@ from mindspore_serving import server def start(): servable_dir = os.path.dirname(os.path.realpath(sys.argv[0])) - servable_config = server.ServableStartConfig(servable_directory=servable_dir, servable_name="add", + servable_config = server.ServableStartConfig(servable_directory=servable_dir, servable_name="resnet50", device_ids=(0, 1)) server.start_servables(servable_configs=servable_config) - server.start_grpc_server(address="unix:add_test_temp_file") + server.start_grpc_server(address="unix:/tmp/resnet50_test_temp_file") if __name__ == "__main__": @@ -194,7 +195,7 @@ from mindspore_serving.client import Client def run_classify_top1(): - client = Client("unix:add_test_temp_file", "resnet50", "classify_top1") + client = Client("unix:/tmp/resnet50_test_temp_file", "resnet50", "classify_top1") instances = [] for path, _, file_list in os.walk("./test_image/"): for file_name in file_list: @@ -209,3 +210,180 @@ def run_classify_top1(): if __name__ == '__main__': run_classify_top1() ``` + +## Accessing SSL/TLS enabled Serving Service + +Mindspore Serving supports server and client communicating based on `SSL/TLS`. + +`SSL/TLS`is a secure communication protocol that can be used to verify the identity of a client or server, encrypt all data, and secure communication. +Digital certificates are used to identify the server or client, and private keys are used to decrypt data and sign information digests. +We can use openssl to generate the private keys and certificates related to server and client. + +Here's an example of how to generate a certificate and perform single-bidirectional authentication: + +### One-way authentication + +Only the client verifies the identity of the server, so we need the server's certificate and private key. +You can execute the following openssl command to generate the relevant certificate. + +```shell +# Generate the root certificate used to issue the certificate of server or client +openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout ca.key -out ca.crt -subj "/C=CN/ST=xx/L=xx/OU=gRPC/CN=Root" + +# Generate server's private key +openssl genrsa -out server.key 2048 +# Generate server's certificate sign request +# You can customize the server name on the certificate by setting CN (Common Name). In this case we can set CN to localhost. +# When the gRPC client accesses the server with this certificate, address needs to be localhost. +openssl req -new -key server.key -out server.csr -subj "/C=XX/ST=MyST/L=XX/O=HW/OU=gRPC/CN=localhost" +# Use the root certificate to issue a server certificate +openssl x509 -req -in server.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out server.crt -days 365 -sha256 +``` + +We got `server.key`, `server.crt` and `ca.crt` files. Pass them to the corresponding `SSLConfig`. + +- Server: + + ```python + import os + import sys + from mindspore_serving import server + + + def start(): + servable_dir = os.path.dirname(os.path.realpath(sys.argv[0])) + + servable_config = server.ServableStartConfig(servable_directory=servable_dir, servable_name="add", + device_ids=(0, 1)) + server.start_servables(servable_configs=servable_config) + + ssl_config = server.SSLConfig(certificate="server.crt", private_key="server.key", custom_ca=None, verify_client=False) + + server.start_grpc_server(address="127.0.0.1:5500", ssl_config=ssl_config) + + + if __name__ == "__main__": + start() + ``` + + - `ssl_config` represents the server's `SSL` configuration. This parameter defaults to `None`, which means `SSL/TLS` is not enabled. + Enabling `SSL/TLS` requires `mindspore serving.server.SSLConfig` object passed to this parameter. + - `certificate` is the path to the server's certificate file. + - `private_key` is the path to the server's private key file. + - `custom_ca` is the path to the server's root certificate file which is for verifying client certificate. When `verify_client` is `True`, + the client's certificate needs to be verified, so this parameter can't be `None`, the corresponding path must be passed in. + - `verify_client` indicates whether to verify the identity of the client. + + Setting `verify_client` to `False` represents one-way authentication. We pass in the certificate `server.crt` and the private key `server.key`, respectively. + Due to the server does not need to verify the client so `custom_ca` is ignored. + +- Client: + + ```python + from mindspore_serving.client import Client + from mindspore_serving.client import SSLConfig + import numpy as np + + + def run_add_common(): + """invoke Servable add method add_common""" + ssl_config = SSLConfig(custom_ca="ca.crt") + client = Client("localhost:5500", "add", "add_common", ssl_config=ssl_config) + instances = [] + + # instance 1 + x1 = np.asarray([[1, 1], [1, 1]]).astype(np.float32) + x2 = np.asarray([[1, 1], [1, 1]]).astype(np.float32) + instances.append({"x1": x1, "x2": x2}) + + result = client.infer(instances) + print(result) + + + if __name__ == '__main__': + run_add_common() + ``` + + - `ssl_config` represents the client's `SSL` configuration. This parameter defaults to `None`, which means `SSL/TLS` is not enabled. + Enabling `SSL/TLS` requires passing `mindspore_serving.client.SSLConfig` object to `ssl_config`. + - `certificate` is the path to the client's certificate file. + - `private_key` is the path to the client's private key file. + - `custom_ca` is the path to the client's root certificate file, which is used to verify the identity of the server. + This parameter can be `None`, at which point gRPC finds the corresponding root certificate through the `grpc/_cython/_credentials/roots.pem` file under the gRPC installation path or + the `GRPC_DEFAULT_SSL_ROOTS_FILE_PATH` environment variable. + + Because only the client verifies the server certificate, you only need to set `custom_ca` to `ca.crt` which issues the server's certificate. + +### Mutual authentication + +Both the client and the server need to verify each other's identity, so in addition to the server's certificate, +we need to execute the following command to generate the client's certificate. + +```shell +# Generate client's private key +openssl genrsa -out client.key 2048 +# Generate client's certificate sign request +openssl req -new -key client.key -out client.csr -subj "/C=XX/ST=MyST/L=XX/O=HW/OU=gRPC/CN=client" +# Use root certificate to issue client's certificate +openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out client.crt -days 365 -sha256 +``` + +We got `client.key`and`cleint.crt`. + +- Server: + + ```python + import os + import sys + from mindspore_serving import server + + + def start(): + servable_dir = os.path.dirname(os.path.realpath(sys.argv[0])) + + servable_config = server.ServableStartConfig(servable_directory=servable_dir, servable_name="add", + device_ids=(0, 1)) + server.start_servables(servable_configs=servable_config) + + ssl_config = server.SSLConfig(certificate="server.crt", private_key="server.key", custom_ca="ca.crt", verify_client=True) + + server.start_grpc_server(address="127.0.0.1:5500", ssl_config=ssl_config) + + + if __name__ == "__main__": + start() + ``` + + Setting `verify_client` to `True` represents two-way authentication. Also set `custom_ca` to `ca.crt` to verify the client certificate. + +- Client: + + ```python + from mindspore_serving.client import Client + from mindspore_serving.client import SSLConfig + import numpy as np + + + def run_add_common(): + """invoke Servable add method add_common""" + ssl_config = SSLConfig(certificate="client.crt", private_key="client.key", custom_ca="ca.crt") + client = Client("localhost:5500", "add", "add_common", ssl_config=ssl_config) + instances = [] + + # instance 1 + x1 = np.asarray([[1, 1], [1, 1]]).astype(np.float32) + x2 = np.asarray([[1, 1], [1, 1]]).astype(np.float32) + instances.append({"x1": x1, "x2": x2}) + + result = client.infer(instances) + print(result) + + + if __name__ == '__main__': + run_add_common() + ``` + + The client needs providing its own certificate to the server for authentication, and we pass in the client's certificate `client.crt` and the private key `client.key`, respectively. + +When the gRPC server and client are not enabling `SSL/TLS` at the same time, the server side or client side will get `ssl3_get_record:wrong version number` error, +and you will need to confirm that both the server and the client have enabled `SSL/TLS`. diff --git a/tutorials/inference/source_en/serving_model.md b/tutorials/inference/source_en/serving_model.md index 5c8956af76695a96d767bd607f0fee801a29941d..34d60d964a2d05e8f9622b1905b5a09dcd2903fe 100644 --- a/tutorials/inference/source_en/serving_model.md +++ b/tutorials/inference/source_en/serving_model.md @@ -70,9 +70,9 @@ Take the ResNet-50 model as an example. The model configuration file directory i ```text resnet50 ├── 1 -│ └── resnet_classify.mindir +│ └── resnet50_1b_cifar10.mindir ├── 2 -│ └── resnet_classify.mindir +│ └── resnet50_1b_cifar10.mindir └── servable_config.py ``` @@ -82,7 +82,7 @@ resnet50 - `1` and `2`: directories, which indicate models of the `1` and `2` versions. The model version is a positive integer starting from `1`. A larger number indicates a later version. -- `resnet_classify.mindir`: a model file. When the Servable is started, the model file of the corresponding version is loaded. +- `resnet50_1b_cifar10.mindir`: a model file. When the Servable is started, the model file of the corresponding version is loaded. ### Preprocessing and Post-processing Definition @@ -93,15 +93,20 @@ import mindspore.dataset as ds import mindspore.dataset.transforms.c_transforms as TC import mindspore.dataset.vision.c_transforms as VC +# cifar 10 +idx_2_label = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] + + def preprocess_eager(image): """ Define preprocess, input is image numpy, return preprocess result. Return type can be numpy, str, bytes, int, float, or bool. - Use MindData Eager, this image processing can also use other image processing library, likes numpy, PIL or cv2 etc. + Use MindData Eager, this image processing can also use other image processing library, + likes numpy, PIL or cv2 etc. """ image_size = 224 - mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] - std = [0.229 * 255, 0.224 * 255, 0.225 * 255] + mean = [0.4914 * 255, 0.4822 * 255, 0.4465 * 255] + std = [0.2023 * 255, 0.1994 * 255, 0.2010 * 255] decode = VC.Decode() resize = VC.Resize([image_size, image_size]) @@ -126,7 +131,8 @@ def postprocess_top1(score): def postprocess_top5(score): """ Define postprocess. This example has one input and two outputs. - The input is the numpy tensor of the score. The first output is the str joined by labels of top five, and the second output is the score tensor of the top five. + The input is the numpy tensor of the score. The first output is the str + joined by labels of top five, and the second output is the score tensor of the top five. """ idx = np.argsort(score)[::-1][:5] # top 5 ret_label = [idx_2_label[i] for i in idx] @@ -144,7 +150,7 @@ The sample code for declaring the `resnet50` Servable model is as follows: ```python from mindspore_serving.server import register -register.declare_servable(servable_file="resnet50_1b_imagenet.mindir", model_format="MindIR", with_batch_dim=True) +register.declare_servable(servable_file="resnet50_1b_cifar10.mindir", model_format="MindIR", with_batch_dim=True) ``` The input parameter `servable_file` of `declare_servable` indicates the model file name. `model_format` indicates the model type. Currently, the Ascend 310 environment supports both `OM` and `MindIR` model types. The Ascend 910 environment supports only the `MindIR` model type. @@ -222,24 +228,38 @@ The method definition cannot contain branch structures such as if, for, and whil When a user uses a service provided by a Servable method on the client, the user needs to specify the input value based on the input parameter name and identify the output value based on the output parameter name. For example, the method `classify_top5` accessed by the client is as follows: ```python +import os from mindspore_serving.client import Client def read_images(): - # read image file and return + """Read images for directory test_image""" + image_files = [] + images_buffer = [] + for path, _, file_list in os.walk("./test_image/"): + for file_name in file_list: + image_file = os.path.join(path, file_name) + image_files.append(image_file) + for image_file in image_files: + with open(image_file, "rb") as fp: + images_buffer.append(fp.read()) + return image_files, images_buffer def run_classify_top5(): """Client for servable resnet50 and method classify_top5""" client = Client("localhost:5500", "resnet50", "classify_top5") instances = [] - for image in read_images(): # read multi image + image_files, images_buffer = read_images() + for image in images_buffer: instances.append({"image": image}) # input `image` + result = client.infer(instances) - print(result) - for result_item in result: # result for every image + + for file, result_item in zip(image_files, result): # result for every image label = result_item["label"] # result `label` score = result_item["score"] # result `score` - print("label result", label) - print("score result", score) + print("file:", file) + print("label result:", label) + print("score result:", score) if __name__ == '__main__': run_classify_top5() diff --git a/tutorials/inference/source_en/serving_restful.md b/tutorials/inference/source_en/serving_restful.md index 6ba098ced3a726d43edcf25ac354d79d34c45958..362c76f42cb53773c4f5879470a2e3a1f3b6e119 100644 --- a/tutorials/inference/source_en/serving_restful.md +++ b/tutorials/inference/source_en/serving_restful.md @@ -10,6 +10,7 @@ - [Request Format](#request-format) - [Base64 Data Encoding](#base64-data-encoding) - [Response Format](#response-format) + - [Accessing SSL/TLS enabled Serving RESTful service](#accessing-ssltls-enabled-serving-restful-service) @@ -23,9 +24,7 @@ MindSpore Serving supports both `gPRC` and `RESTful` request modes. The followin For details about how to deploy `Serving`, see [MindSpore Serving-based Inference Service Deployment](https://www.mindspore.cn/tutorial/inference/en/master/serving_example.html). -Use the `master.start_restful_server` API to start the `RESTful` service. Alternatively, you can use `master.start_grpc_server` to start the `gRPC` service. - -> `RESTful` clients do not depend on specific hardware platforms. Currently, the Serving server supports `Ascend 310`, `Ascend 910` and `Nvidia GPU` hardware environments. +We can use the `mindspore_serving.server.start_restful_server` API to start the `RESTful` service. ## Request Method @@ -240,10 +239,59 @@ The response format is the same as the request format. The information in the `J **The response data is represented as follows:** - | Serving Output Type | RESTful json Data Type | Description | Example | - | ---- | ---- | ---- | ---- | - | `int8`, `int16`, `int32`, `int64`, `uint8`, `uint16`, `uint32`, `uint64` | json integer | All types of integer data are represented as JSON integer | 1,[1,2,3,4] | - | `float16`, `float32`, `float64` | json float | All types of float data are represented as JSON float | 1.0,[[1.2, 2.3], [3.0, 4.5]] | - | `bool` | json bool | Bool data is represented as json bool | true,false,[[true],[false]] | - | `string` | json str | String data is represented as json string | "news_car" | - | `bytes` | base64 object | Bytes data is represented as a base64 object | {"b64":"AQACAAIAAwADAAQA"} | +| Serving Output Type | RESTful json Data Type | Description | Example | +| ---- | ---- | ---- | ---- | +| `int8`, `int16`, `int32`, `int64`, `uint8`, `uint16`, `uint32`, `uint64` | json integer | All types of integer data are represented as JSON integer | 1,[1,2,3,4] | +| `float16`, `float32`, `float64` | json float | All types of float data are represented as JSON float | 1.0,[[1.2, 2.3], [3.0, 4.5]] | +| `bool` | json bool | Bool data is represented as json bool | true,false,[[true],[false]] | +| `string` | json str | String data is represented as json string | "news_car" | +| `bytes` | base64 object | Bytes data is represented as a base64 object | {"b64":"AQACAAIAAwADAAQA"} | + +## Accessing SSL/TLS enabled Serving RESTful service + +MindSpore Serving supports `SSL/TLS` enabled `RESTful` service. Here's an example of starting and accessing `RESTful` service with one-way authentication. + +Setting `verify_client` to `False` indicates one-way authentication, in order to enable `SSL/TLS`, pass `mindspore_serving.server.SSLConfig` object to`ssl_config`. You can refer to [Accessing SSL/TLS enabled Serving service](https://www.mindspore.cn/tutorial/inference/en/master/serving_grpc.html#accessing-ssl-tls-enabled-serving-service) for other details. + +```python +import os +import sys +from mindspore_serving import server + + +def start(): + servable_dir = os.path.dirname(os.path.realpath(sys.argv[0])) + + servable_config = server.ServableStartConfig(servable_directory=servable_dir, servable_name="add", + device_ids=(0, 1)) + server.start_servables(servable_configs=servable_config) + + ssl_config = server.SSLConfig(certificate="server.crt", private_key="server.key", custom_ca=None, verify_client=False) + + server.start_restful_server(address="127.0.0.1:5500", ssl_config=ssl_config) + + +if __name__ == "__main__": + start() +``` + +We can use `curl` command line or `requests` library accessing `SSL/TLS` enabled `RESTful` service. If you use `curl`, you could try the following command: + +```text +curl -X POST -d '${REQ_JSON_MESSAGE}' --cacert '${PATH_TO_CA_CERT_FILE}' https://${HOST}:${PORT}/model/${MODLE_NAME}/version/${VERSION}]:${METHOD_NAME} +``` + +The example of accessing the `add_common` method of the `add` model is as follows: + +```text +curl -X POST -d '{"instances":[{"x1":[[1.0, 2.0], [3.0, 4.0]], "x2":[[1.0, 2.0], [3.0, 4.0]]}]}' --cacert ca.crt https://localhost:5500/model/add/version/1:add_common +``` + +The protocol needs to be set to `https`, and set value of the option `--cacert` to the path of `ca.crt`. + +By the way, we can set `--insecure` option representing not verifying the server's certificate due to using self-signed server's certificate in this case. +And here's an example: + +```text +curl -X POST -d '{"instances":[{"x1":[[1.0, 2.0], [3.0, 4.0]], "x2":[[1.0, 2.0], [3.0, 4.0]]}]}' --insecure https://localhost:5500/model/add/version/1:add_common +``` \ No newline at end of file diff --git a/tutorials/inference/source_zh_cn/conf.py b/tutorials/inference/source_zh_cn/conf.py index 0c819a8b0622e1914ff199e5bd29a591595470b3..f050e627013604355ce2787ffd1dbf007abfc48b 100644 --- a/tutorials/inference/source_zh_cn/conf.py +++ b/tutorials/inference/source_zh_cn/conf.py @@ -30,7 +30,7 @@ release = 'master' # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'recommonmark', + 'myst_parser', 'sphinx_markdown_tables', ] diff --git a/tutorials/inference/source_zh_cn/images/resnet_example.png b/tutorials/inference/source_zh_cn/images/resnet_example.png index f747ae6c48c406586a418ef890f81517ee5849b2..0f6ce44b617a1754a25fac57763b53dee1df4314 100644 Binary files a/tutorials/inference/source_zh_cn/images/resnet_example.png and b/tutorials/inference/source_zh_cn/images/resnet_example.png differ diff --git a/tutorials/inference/source_zh_cn/index.rst b/tutorials/inference/source_zh_cn/index.rst index bf777e0a3fded324420ef9c13a27af207a7ad8a4..f573a3fd39e462a6ab651a861844ac8df7eba552 100644 --- a/tutorials/inference/source_zh_cn/index.rst +++ b/tutorials/inference/source_zh_cn/index.rst @@ -28,6 +28,7 @@ serving_grpc serving_restful serving_model + serving_pipeline .. toctree:: :glob: diff --git a/tutorials/inference/source_zh_cn/multi_platform_inference_ascend_910.md b/tutorials/inference/source_zh_cn/multi_platform_inference_ascend_910.md index db0a4f785499eb86799c7bf9768585ef1d48605f..ebe6da393874f3d50f3b5fc0e80ebc4f9371a116 100644 --- a/tutorials/inference/source_zh_cn/multi_platform_inference_ascend_910.md +++ b/tutorials/inference/source_zh_cn/multi_platform_inference_ascend_910.md @@ -271,7 +271,7 @@ add_executable(resnet50_sample main.cc) target_link_libraries(resnet50_sample ${MS_LIB} ${MD_LIB}) ``` -## 编译推理代码 +### 编译推理代码 进入工程目录`ascend910_resnet50_preprocess_sample`,设置如下环境变量: @@ -309,7 +309,7 @@ make 编译完成后,在`ascend910_resnet50_preprocess_sample`下会生成可执行`main`文件。 -## 执行推理并查看结果 +### 执行推理并查看结果 登录Ascend 910环境,创建`model`目录放置MindIR文件`resnet50_imagenet.mindir`,例如`/home/HwHiAiUser/mindspore_sample/ascend910_resnet50_preprocess_sample/model`。 创建`test_data`目录放置图片,例如`/home/HwHiAiUser/mindspore_sample/ascend910_resnet50_preprocess_sample/test_data`。 diff --git a/tutorials/inference/source_zh_cn/serving_distributed_example.md b/tutorials/inference/source_zh_cn/serving_distributed_example.md index 1295625761e0cc8c217745978d7afcf4556cdc54..b0ceb392a43a4d32b534363d65c1a58072b4925f 100644 --- a/tutorials/inference/source_zh_cn/serving_distributed_example.md +++ b/tutorials/inference/source_zh_cn/serving_distributed_example.md @@ -180,7 +180,8 @@ def start(): distributed.start_servable(servable_dir, "matmul", rank_table_json_file="rank_table_8pcs.json", version_number=1, - distributed_address="127.0.0.1:6200") + distributed_address="127.0.0.1:6200", + wait_agents_time_in_seconds=0) server.start_grpc_server("127.0.0.1:5500") server.start_restful_server("127.0.0.1:1500") @@ -213,7 +214,8 @@ def start_agents(): group_configs.append(f"model/device{i}/group_config.pb") distributed.startup_agents(distributed_address="127.0.0.1:6200", model_files=model_files, - group_config_files=group_configs) + group_config_files=group_configs, agent_start_port=7000, + agent_ip=None, rank_start=None) if __name__ == '__main__': diff --git a/tutorials/inference/source_zh_cn/serving_example.md b/tutorials/inference/source_zh_cn/serving_example.md index 4c4df19e74793812518b2f7fa222d89961472d83..bf4b7ae47b5a2abb9afe4709ef816070128d1bf3 100644 --- a/tutorials/inference/source_zh_cn/serving_example.md +++ b/tutorials/inference/source_zh_cn/serving_example.md @@ -26,15 +26,15 @@ MindSpore Serving是一个轻量级、高性能的服务模块,旨在帮助Min ### 环境准备 -运行示例前,需确保已经正确安装了MindSpore Serving。如果没有,可以通过[MindSpore Serving安装页面](https://gitee.com/mindspore/serving/blob/master/README_CN.md#%E5%AE%89%E8%A3%85),将MindSpore Serving正确地安装到你的电脑当中,同时通过[MindSpore Serving环境配置页面](https://gitee.com/mindspore/serving/blob/master/README_CN.md#%E9%85%8D%E7%BD%AE%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F)完成环境变量配置。 +运行示例前,需确保已经正确安装了MindSpore Serving。如果没有,可以通过[MindSpore Serving安装页面](https://gitee.com/mindspore/serving/blob/master/README_CN.md#%E5%AE%89%E8%A3%85),将MindSpore Serving正确地安装到你的电脑当中,同时通过[MindSpore Serving环境配置页面](https://gitee.com/mindspore/docs/blob/master/install/mindspore_ascend_install_pip.md#%E9%85%8D%E7%BD%AE%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F)完成环境变量配置。 ### 下载样例 -请先[下载样例](https://gitee.com/mindspore/serving/blob/master/example/add/)。 +请先[下载样例](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/)。 ### 导出模型 -在`export_model`目录下,使用[add_model.py](https://gitee.com/mindspore/serving/blob/master/example/add/export_model/add_model.py),构造一个只有Add算子的网络,并导出MindSpore推理部署模型。 +在`export_model`目录下,使用[add_model.py](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/export_model/add_model.py),构造一个只有Add算子的网络,并导出MindSpore推理部署模型。 ```python import os @@ -99,18 +99,18 @@ if __name__ == "__main__": 启动Serving服务,以Add用例为例,需要如下文件列表: ```text -test_dir +tensor_add ├── add/ │   └── servable_config.py │  └── 1/ │   └── tensor_add.mindir -└── master_with_worker.py +└── serving_server.py ``` -- `master_with_worker.py`为启动服务脚本文件。 +- `serving_server.py`为启动服务脚本文件。 - `add`为模型文件夹,文件夹名即为模型名。 - `tensor_add.mindir`为上一步网络生成的模型文件,放置在文件夹1下,1为版本号,不同的版本放置在不同的文件夹下,版本号需以纯数字串命名,默认配置下启动最大数值的版本号的模型文件。 -- [servable_config.py](https://gitee.com/mindspore/serving/blob/master/example/add/add/servable_config.py)为[模型配置文件](https://www.mindspore.cn/tutorial/inference/zh-CN/master/serving_model.html),其定义了模型的处理函数,包括`add_common`和`add_cast`两个方法,`add_common`定义了输入为两个普通float32类型的加法操作,`add_cast`定义输入类型为其他类型,经过输入类型转换float32后的加法操作。 +- [servable_config.py](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/add/servable_config.py)为[模型配置文件](https://www.mindspore.cn/tutorial/inference/zh-CN/master/serving_model.html),其定义了模型的处理函数,包括`add_common`和`add_cast`两个方法,`add_common`定义了输入为两个普通float32类型的加法操作,`add_cast`定义输入类型为其他类型,经过输入类型转换float32后的加法操作。 模型配置文件内容如下: @@ -149,7 +149,7 @@ def add_cast(x1, x2): #### 启动服务 -执行[serving_server.py](https://gitee.com/mindspore/serving/blob/master/example/add/serving_server.py),完成服务启动: +执行[serving_server.py](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/serving_server.py),完成服务启动: ```python import os @@ -174,12 +174,12 @@ if __name__ == "__main__": 上述启动脚本将在设备0和1上共加载和运行两个`add`推理副本,来自客户端的推理请求将被切割分流到两个推理副本。 -当服务端打印日志`Serving gRPC start success, listening on 127.0.0.1:5500`时,表示Serving服务已加载推理模型完毕。 +当服务端打印日志`Serving RESTful server start success, listening on 127.0.0.1:1500`时,表示Serving RESTful服务启动成功,推理模型已成功加载。 ### 执行推理 客户端提供两种方式访问推理服务,一种是通过[gRPC方式](https://www.mindspore.cn/tutorial/inference/zh-CN/master/serving_grpc.html),一种是通过[RESTful方式](https://www.mindspore.cn/tutorial/inference/zh-CN/master/serving_restful.html),本文以gRPC方式为例。 -使用[serving_client.py](https://gitee.com/mindspore/serving/blob/master/example/add/serving_client.py),启动Python客户端。 +使用[serving_client.py](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/serving_client.py),启动Python客户端。 ```python import numpy as np diff --git a/tutorials/inference/source_zh_cn/serving_grpc.md b/tutorials/inference/source_zh_cn/serving_grpc.md index d0ca08b4fa60284e21148783b5d99a4c5e6c80ca..eada1675486d8a8030789f5f511cbd3163645ab3 100644 --- a/tutorials/inference/source_zh_cn/serving_grpc.md +++ b/tutorials/inference/source_zh_cn/serving_grpc.md @@ -9,6 +9,7 @@ - [add样例](#add样例) - [ResNet-50样例](#resnet-50样例) - [通过Unix domain socket访问Serving服务器](#通过unix-domain-socket访问serving服务器) + - [访问开启SSL/TLS的Serving服务](#访问开启ssltls的serving服务) @@ -20,7 +21,7 @@ MindSpore Serving提供gRPC接口访问Serving服务。在Python环境下,我 ## add样例 -样例来源于[add example](https://gitee.com/mindspore/serving/blob/master/example/add/serving_client.py) ,`add` Servable提供的`add_common`方法提供两个2x2 Tensor相加功能。其中gRPC Python客户端代码如下所示,一次gRPC请求包括了三对独立的2x2 Tensor: +样例来源于[add example](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/serving_client.py) ,`add` Servable提供的`add_common`方法提供两个2x2 Tensor相加功能。其中gRPC Python客户端代码如下所示,一次gRPC请求包括了三对独立的2x2 Tensor: ```python from mindspore_serving.client import Client @@ -175,11 +176,11 @@ from mindspore_serving import server def start(): servable_dir = os.path.dirname(os.path.realpath(sys.argv[0])) - servable_config = server.ServableStartConfig(servable_directory=servable_dir, servable_name="add", + servable_config = server.ServableStartConfig(servable_directory=servable_dir, servable_name="resnet50", device_ids=(0, 1)) server.start_servables(servable_configs=servable_config) - server.start_grpc_server(address="unix:/tmp/serving_add_test_temp_file") + server.start_grpc_server(address="unix:/tmp/serving_resnet50_test_temp_file") if __name__ == "__main__": @@ -194,7 +195,7 @@ from mindspore_serving.client import Client def run_classify_top1(): - client = Client("unix:/tmp/serving_add_test_temp_file", "resnet50", "classify_top1") + client = Client("unix:/tmp/serving_resnet50_test_temp_file", "resnet50", "classify_top1") instances = [] for path, _, file_list in os.walk("./test_image/"): for file_name in file_list: @@ -209,3 +210,168 @@ def run_classify_top1(): if __name__ == '__main__': run_classify_top1() ``` + +## 访问开启SSL/TLS的Serving服务 + +Mindspore Serving的服务器和客户端可以通过`SSL/TLS`协议进行通信。 + +`SSL/TLS`是一个安全通信协议,可以用来验证客户端或服务器的身份,加密所有的数据,保证通信的安全。数字证书用来标识服务器或客户端的身份,私钥用来解密数据和对信息摘要进行签名。我们可以用openssl来生成服务器与客户端相关的私钥和证书。 + +下面举个例子展示如何生成证书并进行单双向认证: + +### 单向认证 + +仅客户端验证服务器的身份,所以我们需要服务器的证书和私钥。可以执行下面的openssl命令来生成相关证书。 + +```shell +# 生成根证书 用来签发服务器或客户端的证书 +openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout ca.key -out ca.crt -subj "/C=CN/ST=xx/L=xx/OU=gRPC/CN=Root" + +# 生成服务器的私钥 +openssl genrsa -out server.key 2048 +# 生成服务器证书签名请求 +# 参数CN可以自定义证书上服务器名,这里我们可以配置成localhost,gRPC客户端访问时地址需要设置为localhost +openssl req -new -key server.key -out server.csr -subj "/C=XX/ST=MyST/L=XX/O=HW/OU=gRPC/CN=localhost" +# 使用根证书签发服务器证书 +openssl x509 -req -in server.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out server.crt -days 365 -sha256 +``` + +我们得到了`server.key`,`server.crt`和`ca.crt`三个文件。将他们传入对应的`SSLConfig`。 + +- 服务器: + + ```python + import os + import sys + from mindspore_serving import server + + + def start(): + servable_dir = os.path.dirname(os.path.realpath(sys.argv[0])) + + servable_config = server.ServableStartConfig(servable_directory=servable_dir, servable_name="add", + device_ids=(0, 1)) + server.start_servables(servable_configs=servable_config) + + ssl_config = server.SSLConfig(certificate="server.crt", private_key="server.key", custom_ca=None, verify_client=False) + + server.start_grpc_server(address="127.0.0.1:5500", ssl_config=ssl_config) + + + if __name__ == "__main__": + start() + ``` + + - `ssl_config`表示服务器的`SSL`配置。该参数默认为`None`,表示不开启`SSL/TLS`。开启`SSL/TLS`则需要传入`mindspore_serving.server.SSLConfig`对象。 + - `certificate`为服务器证书文件的路径。 + - `private_key`为服务器私钥文件的路径。 + - `custom_ca`为服务器的根证书文件的路径,用来验证客户端的身份。当`verify_client` 的为`True`时,需要验证客户端的证书,所以该参数不能为`None`,必须传入对应的路径。 + - `verify_client`表示是否验证客户端的身份。 + + 将`verify_client`设为`False`表示单向认证。我们分别传入服务器的证书`server.crt`和私钥`server.key`,由于服务器不需要验证客户端的证书,此时服务器的`custom_ca`参数会被忽略。 + +- 客户端: + + ```python + from mindspore_serving.client import Client + from mindspore_serving.client import SSLConfig + import numpy as np + + + def run_add_common(): + """invoke Servable add method add_common""" + ssl_config = SSLConfig(custom_ca="ca.crt") + client = Client("localhost:5500", "add", "add_common", ssl_config=ssl_config) + instances = [] + + # instance 1 + x1 = np.asarray([[1, 1], [1, 1]]).astype(np.float32) + x2 = np.asarray([[1, 1], [1, 1]]).astype(np.float32) + instances.append({"x1": x1, "x2": x2}) + + result = client.infer(instances) + print(result) + + + if __name__ == '__main__': + run_add_common() + ``` + + - `ssl_config`表示客户端的`SSL`配置。该参数默认为`None`,表示不开启`SSL/TLS`。开启`SSL/TLS`则需要传入`mindspore_serving.client.SSLConfig`对象。 + - `certificate`为客户端证书文件的路径。 + - `private_key`为客户端私钥文件的路径。 + - `custom_ca`为客户端的根证书文件的路径,用来验证服务器的身份。该参数可以为`None`,这个时候gRPC会通过gRPC安装路径下的`grpc/_cython/_credentials/roots.pem`文件或`GRPC_DEFAULT_SSL_ROOTS_FILE_PATH`环境变量找到对应的根证书。 + + 由于仅客户端验证服务器证书,所以只需要将`custom_ca`设置为签发服务器证书的`ca.crt`。 + +### 双向认证 + +客户端和服务器都需要验证对方的身份,所以除了服务器的证书,我们还需要执行下面的命令生成客户端的证书。 + +```shell +# 生成客户端的私钥 +openssl genrsa -out client.key 2048 +# 生成客户端证书签名请求 +openssl req -new -key client.key -out client.csr -subj "/C=XX/ST=MyST/L=XX/O=HW/OU=gRPC/CN=client" +# 使用根证书签发客户端证书 +openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out client.crt -days 365 -sha256 +``` + +我们得到了`client.key`和`cleint.crt`。 + +- 服务器: + + ```python + import os + import sys + from mindspore_serving import server + + + def start(): + servable_dir = os.path.dirname(os.path.realpath(sys.argv[0])) + + servable_config = server.ServableStartConfig(servable_directory=servable_dir, servable_name="add", + device_ids=(0, 1)) + server.start_servables(servable_configs=servable_config) + + ssl_config = server.SSLConfig(certificate="server.crt", private_key="server.key", custom_ca="ca.crt", verify_client=True) + + server.start_grpc_server(address="127.0.0.1:5500", ssl_config=ssl_config) + + + if __name__ == "__main__": + start() + ``` + + 将`verify_client`设为`True`表示双向认证。同时将`custom_ca`设置为`ca.crt`来验证客户端证书。 + +- 客户端: + + ```python + from mindspore_serving.client import Client + from mindspore_serving.client import SSLConfig + import numpy as np + + + def run_add_common(): + """invoke Servable add method add_common""" + ssl_config = SSLConfig(certificate="client.crt", private_key="client.key", custom_ca="ca.crt") + client = Client("localhost:5500", "add", "add_common", ssl_config=ssl_config) + instances = [] + + # instance 1 + x1 = np.asarray([[1, 1], [1, 1]]).astype(np.float32) + x2 = np.asarray([[1, 1], [1, 1]]).astype(np.float32) + instances.append({"x1": x1, "x2": x2}) + + result = client.infer(instances) + print(result) + + + if __name__ == '__main__': + run_add_common() + ``` + + 客户端需要提供自己的证书给服务器验证,我们分别传入客户端的证书`client.crt`和私钥`client.key`。 + +当gRPC服务器与客户端`SSL/TLS`开启状态不一致的时候,服务器或客户端会出现`ssl3_get_record:wrong version number`的错误,这时需要确认服务器与客户端是否都开启了`SSL/TLS`。 diff --git a/tutorials/inference/source_zh_cn/serving_model.md b/tutorials/inference/source_zh_cn/serving_model.md index b043f9f886b8a2f67d797be9158250538f79c172..2ffebacd33325941fd227d9fc016aa3c8bd074f7 100644 --- a/tutorials/inference/source_zh_cn/serving_model.md +++ b/tutorials/inference/source_zh_cn/serving_model.md @@ -70,9 +70,9 @@ Servable包含如下内容: ```text resnet50 ├── 1 -│   └── resnet_classify.mindir +│   └── resnet50_1b_cifar10.mindir ├── 2 -│   └── resnet_classify.mindir +│   └── resnet50_1b_cifar10.mindir └── servable_config.py ``` @@ -82,7 +82,7 @@ resnet50 - 目录`1`和`2`表示版本`1`和版本`2`的模型,模型版本为正整数,从`1`开始,数字越大表示版本越新。 -- `resnet_classify.mindir`为模型文件,Servable启动会加载对应版本的模型文件。 +- `resnet50_1b_cifar10.mindir`为模型文件,Servable启动会加载对应版本的模型文件。 ### 预处理和后处理定义 @@ -93,15 +93,20 @@ import mindspore.dataset as ds import mindspore.dataset.transforms.c_transforms as TC import mindspore.dataset.vision.c_transforms as VC +# cifar 10 +idx_2_label = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] + + def preprocess_eager(image): """ Define preprocess, input is image numpy, return preprocess result. Return type can be numpy, str, bytes, int, float, or bool. - Use MindData Eager, this image processing can also use other image processing library, likes numpy, PIL or cv2 etc. + Use MindData Eager, this image processing can also use other image processing library, + likes numpy, PIL or cv2 etc. """ image_size = 224 - mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] - std = [0.229 * 255, 0.224 * 255, 0.225 * 255] + mean = [0.4914 * 255, 0.4822 * 255, 0.4465 * 255] + std = [0.2023 * 255, 0.1994 * 255, 0.2010 * 255] decode = VC.Decode() resize = VC.Resize([image_size, image_size]) @@ -114,6 +119,7 @@ def preprocess_eager(image): image = hwc2chw(image) return image + def postprocess_top1(score): """ Define postprocess. This example has one input and one output. @@ -126,7 +132,8 @@ def postprocess_top1(score): def postprocess_top5(score): """ Define postprocess. This example has one input and two outputs. - The input is the numpy tensor of the score. The first output is the str joined by labels of top five, and the second output is the score tensor of the top five. + The input is the numpy tensor of the score. The first output is the str + joined by labels of top five, and the second output is the score tensor of the top five. """ idx = np.argsort(score)[::-1][:5] # top 5 ret_label = [idx_2_label[i] for i in idx] @@ -144,7 +151,7 @@ def postprocess_top5(score): ```python from mindspore_serving.server import register -register.declare_servable(servable_file="resnet50_1b_imagenet.mindir", model_format="MindIR", with_batch_dim=True) +register.declare_servable(servable_file="resnet50_1b_cifar10.mindir", model_format="MindIR", with_batch_dim=True) ``` 其中`declare_servable`入参`servable_file`指示模型的文件名称;`model_format`指示模型的模型类别,当前Ascend310环境支持`OM`和`MindIR`两种模型类型,Ascend910环境仅支持`MindIR`模型类型。 @@ -222,24 +229,38 @@ def classify_top5(image): 用户在客户端使用Servable某个方法提供的服务时,需要通过入参名称指定对应输入的值,通过出参名称识别各个输出的值。比如客户端访问方法`classify_top5`: ```python +import os from mindspore_serving.client import Client def read_images(): - # read image file and return + """Read images for directory test_image""" + image_files = [] + images_buffer = [] + for path, _, file_list in os.walk("./test_image/"): + for file_name in file_list: + image_file = os.path.join(path, file_name) + image_files.append(image_file) + for image_file in image_files: + with open(image_file, "rb") as fp: + images_buffer.append(fp.read()) + return image_files, images_buffer def run_classify_top5(): """Client for servable resnet50 and method classify_top5""" client = Client("localhost:5500", "resnet50", "classify_top5") instances = [] - for image in read_images(): # read multi image + image_files, images_buffer = read_images() + for image in images_buffer: instances.append({"image": image}) # input `image` + result = client.infer(instances) - print(result) - for result_item in result: # result for every image + + for file, result_item in zip(image_files, result): # result for every image label = result_item["label"] # result `label` score = result_item["score"] # result `score` - print("label result", label) - print("score result", score) + print("file:", file) + print("label result:", label) + print("score result:", score) if __name__ == '__main__': run_classify_top5() diff --git a/tutorials/inference/source_zh_cn/serving_pipeline.md b/tutorials/inference/source_zh_cn/serving_pipeline.md new file mode 100644 index 0000000000000000000000000000000000000000..7f58f48675dc72248932f8581927c21788f5502c --- /dev/null +++ b/tutorials/inference/source_zh_cn/serving_pipeline.md @@ -0,0 +1,289 @@ +# 基于Pipeline实现多图调度 + +`Linux` `Ascend` `Serving` `中级` `高级` + + + +- [基于Pipeline实现多图调度](#基于pipeline实现多图调度) + - [概述](#概述) + - [环境准备](#环境准备) + - [导出多图模型](#导出多图模型) + - [部署分布式推理服务](#部署分布式推理服务) + - [启动Serving服务器](#启动serving服务器) + - [启动Agent](#启动agent) + - [执行推理](#执行推理) + + + + + +## 概述 + +MindSpore支持一个模型可以生成多张子图,通过调度多张子图实现性能的提升。例如,在GPT3场景,会将模型拆成两个阶段的图,第一阶段为初始化图,只需执行一次,第二阶段的推理图,根据输入句子长度N决定需要执行N次。这样,相对于优化之前两张图合并在一起都需要执行N次,实现了推理服务性能的5-6倍提升。为此,MindSpore Serving提供了Pineline功能,实现多张图之间的调度,提升特定场景的推理服务性能。 + +当前对Pipeline使用有以下限制: + +- 当前仅支持batchsize为1场景。 +- 有Pipeline存在场景,服务仅是以Pineline的形式对外体现,即客户端调用的模型方法需为注册的Pipeline方法。 + +下面以一个简单的分布式场景为例,演示Pipeline部署流程。 + +### 环境准备 + +运行示例前,需确保已经正确安装了MindSpore Serving。如果没有,可以参考[MindSpore Serving安装页面](https://gitee.com/mindspore/serving/blob/master/README_CN.md#%E5%AE%89%E8%A3%85),将MindSpore Serving正确地安装到你的电脑当中,同时参考[MindSpore Serving环境配置页面](https://gitee.com/mindspore/serving/blob/master/README_CN.md#%E9%85%8D%E7%BD%AE%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F)完成环境变量配置。 + +### 导出多图模型 + +导出分布式模型需要的文件可以参考[export_model目录](https://gitee.com/mindspore/serving/tree/master/example/pipeline_distributed/export_model),需要如下文件列表: + +```text +export_model +├── distributed_inference.py +├── export_model.sh +├── net.py +└── rank_table_8pcs.json +``` + +- `net.py`为MatMul网络定义。 +- `distributed_inference.py`配置分布式相关的参数。 +- `export_model.sh`在当前机器上创建`device`目录并且导出每个`device`对应的模型文件。 +- `rank_table_8pcs.json`为配置当前多卡环境的组网信息的json文件,可以参考[rank_table](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/distributed_training_ascend.html#id4)。 + +使用[net.py](https://gitee.com/mindspore/serving/blob/master/example/matmul_distributed/export_model/net.py),构造一个包含MatMul、Neg算子的网络。 + +```python +import numpy as np +from mindspore import Tensor, Parameter, ops +from mindspore.nn import Cell + + +class Net(Cell): + def __init__(self, matmul_size, init_val, transpose_a=False, transpose_b=False, strategy=None): + super().__init__() + matmul_np = np.full(matmul_size, init_val, dtype=np.float32) + self.matmul_weight = Parameter(Tensor(matmul_np)) + self.matmul = ops.MatMul(transpose_a=transpose_a, transpose_b=transpose_b) + self.neg = ops.Neg() + if strategy is not None: + self.matmul.shard(strategy) + + def construct(self, inputs): + x = self.matmul(inputs, self.matmul_weight) + x = self.neg(x) + return x +``` + +使用[distributed_inference.py](https://gitee.com/mindspore/serving/blob/master/example/pipeline_distributed/export_model/distributed_inference.py),生成多图模型。可以参考[分布式推理](https://www.mindspore.cn/tutorial/inference/zh-CN/master/multi_platform_inference_ascend_910.html#id1)。 + +```python +import numpy as np +from net import Net +from mindspore import context, Model, Tensor, export +from mindspore.communication import init + + +def test_inference(): + """distributed inference after distributed training""" + context.set_context(mode=context.GRAPH_MODE) + init(backend_name="hccl") + context.set_auto_parallel_context(full_batch=True, parallel_mode="semi_auto_parallel", + device_num=8, group_ckpt_save_file="./group_config.pb") + + predict_data = create_predict_data() + network = Net(matmul_size=(96, 16), init_val = 0.5) + model = Model(network) + model.infer_predict_layout(Tensor(predict_data)) + export(model._predict_network, Tensor(predict_data), file_name="matmul_0", file_format="MINDIR") + + network_1 = Net(matmul_size=(96, 16), init_val = 1.5) + model_1 = Model(network) + model_1.infer_predict_layout(Tensor(predict_data)) + export(model_1._predict_network, Tensor(predict_data), file_name="matmul_1", file_format="MINDIR") + + +def create_predict_data(): + """user-defined predict data""" + inputs_np = np.random.randn(128, 96).astype(np.float32) + return Tensor(inputs_np) +``` + +使用[export_model.sh](https://gitee.com/mindspore/serving/blob/master/example/matmul_distributed/export_model/export_model.sh),导出多图模型。执行成功后会在上一级目录创建`model`目录,结构如下: + +```text +model +├── device0 +│   ├── group_config.pb +│   └── matmul.mindir +├── device1 +├── device2 +├── device3 +├── device4 +├── device5 +├── device6 +└── device7 +``` + +每个`device`目录都包含两个文件`group_config.pb`、`matmul_0.mindir`和`matmul_1.mindir`,分别表示模型分组配置文件与两张图对应的模型文件。 + +### 部署分布式推理服务 + +启动分布式推理服务,可以参考[matmul_distributed](https://gitee.com/mindspore/serving/tree/master/example/pipeline_distributed),需要如下文件列表: + +```text +matmul_distributed +├── serving_agent.py +├── serving_server.py +├── matmul +│   └── servable_config.py +├── model +└── rank_table_8pcs.json +``` + +- `model`为存放模型文件的目录。 +- `serving_server.py`为启动服务脚本,包括`Main`和`Distributed Worker`进程。 +- `serving_agent.py`为启动`Agent`脚本。 +- `servable_config.py`为[模型配置文件](https://www.mindspore.cn/tutorial/inference/zh-CN/master/serving_model.html),通过`distributed.declare_servable`声明了一个rank_size为8、stage_size为1的分布式模型,同时定义了一个Pipeline的方法`predict`。 + +模型配置文件内容如下: + +```python +import numpy as np +from mindspore_serving.server import distributed +from mindspore_serving.server import register +from mindspore_serving.server.register import PipelineServable + +distributed.declare_servable(rank_size=8, stage_size=1, with_batch_dim=False) + +def add_preprocess(x): + """define preprocess, this example has one input and one output""" + x = np.add(x, x) + return x + +@register.register_method(output_names=["y"]) +def fun1(x): + x = register.call_preprocess(add_preprocess, x) + y = register.call_servable(x, subgraph=0) + return y + +@register.register_method(output_names=["y"]) +def fun2(x): + y = register.call_servable(x, subgraph=1) + return y + +servable1 = PipelineServable(servable_name="matmul", method="fun1", version_number=0) +servable2 = PipelineServable(servable_name="matmul", method="fun2", version_number=0) + +@register.register_pipeline(output_names=["x", "z"]) +def predict(x, y): + x = servable1.run(x) + for i in range(10): + print(i) + z = servable2.run(y) + return x, z + +``` + +其中,`call_servable`方法的`subgraph`参数指定图的标号,从0开始,此编号在为图加载的序号,单机场景与`declare_servable`接口的`servable_file`的参数列表序号对应,分布式场景与`startup_agents`接口的`model_files`的参数列表序号对应。 +`PipelineServable`类声明模型的服务函数,`servable_name`指定模型名, `method`指定函数方法, `version_number`指定版本号,`register_pipeline`实现对Pipeline函数的注册,入参output_names指定输出列表。 + +#### 启动Serving服务器 + +使用[serving_server.py](https://gitee.com/mindspore/serving/blob/master/example/pipeline_distributed/serving_server.py),调用`distributed.start_servable`方法部署分布式Serving服务器。 + +```python +import os +import sys +from mindspore_serving import server +from mindspore_serving.server import distributed + + +def start(): + servable_dir = os.path.dirname(os.path.realpath(sys.argv[0])) + distributed.start_servable(servable_dir, "matmul", + rank_table_json_file="rank_table_8pcs.json", + version_number=1, + distributed_address="127.0.0.1:6200") + + server.start_grpc_server("127.0.0.1:5500") + server.start_restful_server("127.0.0.1:1500") + + +if __name__ == "__main__": + start() +``` + +- `servable_dir`为servable存放的目录。 +- `servable_name`为servable的名称,对应一个存放模型配置文件的目录。 +- `rank_table_json_file`为配置当前多卡环境的组网信息的json文件。 +- `distributed_address`为`Distributed Worker`的地址。 +- `wait_agents_time_in_seconds`设置等待所有`Agent`注册完成的时限,默认为0表示会一直等待。 + +#### 启动Agent + +使用[serving_agent.py](https://gitee.com/mindspore/serving/blob/master/example/pipeline_distributed/serving_agent.py),调用`startup_agents`方法会在当前机器上启动的8个`Agent`进程。`Agent`会从`Distributed Worker`获取rank_table,这样`Agent`之间才能利用HCCL进行通信。 + +```python +from mindspore_serving.server import distributed + + +def start_agents(): + """Start all the agents in current machine""" + model_files = [] + group_configs = [] + for i in range(8): + model_files.append([f"model/device{i}/matmul_0.mindir", f"model/device{i}/matmul_1.mindir"]) + group_configs.append([f"model/device{i}/group_config.pb"]) + + distributed.startup_agents(distributed_address="127.0.0.1:6200", model_files=model_files, + group_config_files=group_configs) + +if __name__ == '__main__': + start_agents() + +``` + +- `distributed_address`为`Distributed Worker`的地址。 +- `model_files`为模型文件路径的列表,传入多个模型文件表示支持多图,文件传输顺序号决定`call_servable`方法的`subgraph`参数对应的图号。 +- `group_config_files`为模型分组配置文件路径的列表。 +- `agent_start_port`表示`Agent`占用的起始端口,默认为7000。 +- `agent_ip`为`Agent`的ip地址,默认为None。`Agent`与`Distributed Worker`通信的ip默认会从rank_table获取,如果该ip地址不可用,则需要同时设置`agent_ip`与`rank_start`。 +- `rank_start`为当前机器起始的rank_id,默认为None。 + +### 执行推理 + +通过gRPC访问推理服务,client需要指定gRPC服务器的ip地址和port。运行[serving_client.py](https://gitee.com/mindspore/serving/blob/master/example/pipeline_distributed/serving_client.py),调用matmul分布式模型的`predict`方法,该方法对应注册的pipeline方法,执行推理。 + +```python +import numpy as np +from mindspore_serving.client import Client + + +def run_matmul(): + """Run client of distributed matmul""" + client = Client("localhost:5500", "matmul", "predict") + instance = {"x": np.ones((128, 96), np.float32), "y": np.ones((128, 96), np.float32)} + result = client.infer(instance) + print("result:\n", result) + +if __name__ == '__main__': + run_matmul() +``` + +执行后显示如下返回值,说明Serving分布式推理服务已正确执行Pipeline的多图推理: + +```text +result: +[{'x': array([[-96., -96., -96., ..., -96., -96., -96.], + [-96., -96., -96., ..., -96., -96., -96.], + [-96., -96., -96., ..., -96., -96., -96.], + ..., + [-96., -96., -96., ..., -96., -96., -96.], + [-96., -96., -96., ..., -96., -96., -96.], + [-96., -96., -96., ..., -96., -96., -96.]], dtype=float32),'z': array([[-48., -48., -48., ..., -48., -48., -48.], + [-48., -48., -48., ..., -48., -48., -48.], + [-48., -48., -48., ..., -48., -48., -48.], + ..., + [-48., -48., -48., ..., -48., -48., -48.], + [-48., -48., -48., ..., -48., -48., -48.], + [-48., -48., -48., ..., -48., -48., -48.]], }] +``` diff --git a/tutorials/inference/source_zh_cn/serving_restful.md b/tutorials/inference/source_zh_cn/serving_restful.md index bb93d410eb0b64800c0b182c3c1439675c8be903..fd3e6089e2738fa881f2b4717669d1bb90d88820 100644 --- a/tutorials/inference/source_zh_cn/serving_restful.md +++ b/tutorials/inference/source_zh_cn/serving_restful.md @@ -10,6 +10,7 @@ - [请求输入格式](#请求输入格式) - [base64数据编码](#base64数据编码) - [请求应答格式](#请求应答格式) + - [访问开启SSL/TSL的RESTful服务](#访问开启SSL/TLS的RESTful服务) @@ -23,9 +24,7 @@ MindSpore Serving支持`gPRC`和`RESTful`两种请求方式。本章节介绍`RE 部署`Serving`参考[快速入门](https://www.mindspore.cn/tutorial/inference/zh-CN/master/serving_example.html) 章节。 -通过`master.start_restful_server`接口启动`RESTful`服务;另外,可通过`master.start_grpc_server`启动`gRPC`服务。 - -> `RESTful`客户端不依赖特定硬件平台,Serving服务端当前支持`Ascend310`、`Ascend910`和`Nvidia GPU`硬件环境。 +我们可以通过`mindspore_serving.server.start_restful_server`接口启动`RESTful`服务。 ## 请求方式 @@ -240,10 +239,58 @@ RESTful支持`Json`请求格式,`key`固定为`instances`,`value`表示多 **应答数据表示如下:** - | Serving输出类型 | RESTful json中数据类型 | 说明 | 举例 | - | ---- | ---- | ---- | ---- | - | `int8`, `int16`, `int32`, `int64`, `uint8`, `uint16`, `uint32`, `uint64` | json integer | 整型格式的数据表示为json整型 | 1,[1,2,3,4] | - | `float16`, `float32`, `float64` | json float | 浮点格式的数据表示为json浮点数 | 1.0,[[1.2, 2.3], [3.0, 4.5]] | - | `bool` | json bool | bool类型数据表示为json bool | true,false,[[true],[false]] | - | `string` | json str | 字符串格式输出表示为json str | "news_car" | - | `bytes` | base64 object | 二进制格式输出转为base64对象 | {"b64":"AQACAAIAAwADAAQA"} | +| Serving输出类型 | RESTful json中数据类型 | 说明 | 举例 | +| ---- | ---- | ---- | ---- | +| `int8`, `int16`, `int32`, `int64`, `uint8`, `uint16`, `uint32`, `uint64` | json integer | 整型格式的数据表示为json整型 | 1,[1,2,3,4] | +| `float16`, `float32`, `float64` | json float | 浮点格式的数据表示为json浮点数 | 1.0,[[1.2, 2.3], [3.0, 4.5]] | +| `bool` | json bool | bool类型数据表示为json bool | true,false,[[true],[false]] | +| `string` | json str | 字符串格式输出表示为json str | "news_car" | +| `bytes` | base64 object | 二进制格式输出转为base64对象 | {"b64":"AQACAAIAAwADAAQA"} | + +## 访问开启SSL/TLS的RESTful服务 + +MindSpore Serving支持开启`SSL/TLS`的`RESTful`服务,下面以单向认证为例展示如何启动并访问开启`SSL/TLS`的`Restful`服务。 + +`verify_client`设置为`False`表示单向认证,开启`SSL/TLS`需要把`mindspore_serving.server.SSLConfig`对象传入`start_restful_server`的`ssl_config`参数。其他内容可以参考[访问开启SSL/TLS的Serving服务](https://www.mindspore.cn/tutorial/inference/zh-CN/master/serving_grpc.html#ssl-tlsserving)。 + +```python +import os +import sys +from mindspore_serving import server + + +def start(): + servable_dir = os.path.dirname(os.path.realpath(sys.argv[0])) + + servable_config = server.ServableStartConfig(servable_directory=servable_dir, servable_name="add", + device_ids=(0, 1)) + server.start_servables(servable_configs=servable_config) + + ssl_config = server.SSLConfig(certificate="server.crt", private_key="server.key", custom_ca=None, verify_client=False) + + server.start_restful_server(address="127.0.0.1:5500", ssl_config=ssl_config) + + +if __name__ == "__main__": + start() +``` + +我们可以使用`curl`工具或`python`的`requests`库访问`Serving`的开启`SSL/TLS`的`RESTful`服务。如果使用`curl`工具访问,可以尝试使用下面的请求方式: + +```text +curl -X POST -d '${REQ_JSON_MESSAGE}' --cacert '${PATH_TO_CA_CERT_FILE}' https://${HOST}:${PORT}/model/${MODLE_NAME}/version/${VERSION}]:${METHOD_NAME} +``` + +例子:请求`add`模型的`add_common`方法,具体如下: + +```text +curl -X POST -d '{"instances":[{"x1":[[1.0, 2.0], [3.0, 4.0]], "x2":[[1.0, 2.0], [3.0, 4.0]]}]}' --cacert ca.crt https://localhost:5500/model/add/version/1:add_common +``` + +我们这里需要将协议设置为`https`,设置选项`--cacert`的值为CA证书文件`ca.crt`的路径。 + +另外由于示例中使用了自签名的证书,也可以设置选项`--insecure`表示忽略对服务器证书的验证,具体如下: + +```text +curl -X POST -d '{"instances":[{"x1":[[1.0, 2.0], [3.0, 4.0]], "x2":[[1.0, 2.0], [3.0, 4.0]]}]}' --insecure https://localhost:5500/model/add/version/1:add_common +``` \ No newline at end of file diff --git a/tutorials/lite/requirements.txt b/tutorials/lite/requirements.txt index ea17a9e73613ddd99cc31690ddcf283d9a721450..96cdfc3e0c7ee0ae6a01e59c1081111fdc792bb6 100644 --- a/tutorials/lite/requirements.txt +++ b/tutorials/lite/requirements.txt @@ -1,5 +1,5 @@ sphinx >= 2.2.1, <= 2.4.4 -recommonmark +myst_parser == 0.14.0 sphinx-markdown-tables sphinx_rtd_theme jieba \ No newline at end of file diff --git a/tutorials/lite/source_en/conf.py b/tutorials/lite/source_en/conf.py index b472aa71f0899d61ef358f7388dcadfe8a2c7706..319a337a8035f0f296458c6dd2bda075cc670e82 100644 --- a/tutorials/lite/source_en/conf.py +++ b/tutorials/lite/source_en/conf.py @@ -30,7 +30,7 @@ release = 'master' # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'recommonmark', + 'myst_parser', 'sphinx_markdown_tables', ] diff --git a/tutorials/lite/source_en/faq.md b/tutorials/lite/source_en/faq.md index a521fa183ed789fe507b3b747f46191135cb40f4..6dcbc2dbda5f71108afa4d73547d13ae9e46db8e 100644 --- a/tutorials/lite/source_en/faq.md +++ b/tutorials/lite/source_en/faq.md @@ -17,7 +17,7 @@ A: Currently NPU only supports system ROM version EMUI>=11. Chip support inclu **Q: Why does the static library after cutting with the cropper tool fail to compile during integration?** -A: Currently the cropper tool only supports CPU libraries, that is, `-e CPU` is specified in the compilation command. For details, please refer to [Use clipping tool to reduce library file size](https://www.mindspore.cn/tutorial/lite/en/master/use/cropper_tool.html) document. +A: Currently the cropper tool only supports CPU and GPU libraries. For details, please refer to [Use clipping tool to reduce library file size](https://www.mindspore.cn/tutorial/lite/en/master/use/cropper_tool.html) document.
diff --git a/tutorials/lite/source_en/index.rst b/tutorials/lite/source_en/index.rst index 34411b87d922505fcfe54f85826dca83865dde10..2e66430dbaa48f44d7ae1c3893e5a82c7eb31153 100644 --- a/tutorials/lite/source_en/index.rst +++ b/tutorials/lite/source_en/index.rst @@ -58,6 +58,8 @@ Using MindSpore on Mobile and IoT use/benchmark use/cropper_tool + use/visual_tool + use/obfuscator_tool .. toctree:: :glob: diff --git a/tutorials/lite/source_en/operator_list_lite.md b/tutorials/lite/source_en/operator_list_lite.md index 8494dee1ded0204324e60504826c38e4743681ed..954d6e82a4d84159d2f3e2e6afb4b7e9c2f616ba 100644 --- a/tutorials/lite/source_en/operator_list_lite.md +++ b/tutorials/lite/source_en/operator_list_lite.md @@ -6,171 +6,172 @@ This article lists the operators supported by MindSpore Lite. -| Operation
  | CPU
FP16 | CPU
FP32 | CPU
Int8 | CPU
UInt8 | GPU
FP16 | GPU
FP32 | NPU
  | TensorFlow Lite
operators supported | Caffe Lite
operators supported | Onnx Lite
operators supported | TensorFlow
operators supported | -|-----------------------|:----------:|:----------:|:----------:|:-----------:|:----------:|:-------------------:|:----------:|----------|---------|---------|---------| -| Abs | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Abs | | Abs | Abs | -| Add | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Add | | Add, Int8Add | Add, AddV2 | -| AddGrad | | ✅ | | | | | | | | | | -| AddN | | ✅ | | | | | | AddN | | | | -| Assert | | ✅ | | | | | | | | | Assert | -| Argmax | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Argmax | ArgMax | ArgMax | Argmax | -| Argmin | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Argmin | | | ArgMin | -| AvgPool | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | MeanPooling | Pooling | AveragePool,
GlobalAveragePool,
Int8AveragePool | AvgPool | -| AvgPoolGrad | | ✅ | | | | | | | | | | -| BatchNorm | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | BatchNorm | BatchNormalization | | -| BatchNormGrad | | ✅ | | | | | | | | | | -| BatchToSpace | | ✅ | ✅ | ✅ | ✅ | ✅ | | BatchToSpace,
BatchToSpaceND | | | BatchToSpace,
BatchToSpaceND | -| BiasAdd | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | BiasAdd | BiasAdd | -| BiasAddGrad | | ✅ | | | | | | | | | | -| Broadcast | | ✅ | | | | | | BroadcastTo | | Expand | | -| Cast | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Cast,
QUANTIZE,
DEQUANTIZE | | Cast | Cast | -| Ceil | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Ceil | | Ceil | Ceil | -| Concat | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Concat | Concat | Concat | ConcatV2 | -| ConstantOfShape | | ✅ | | | | | | | | ConstantOfShape | | -| Conv2d | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Conv2D | Convolution | Conv, Int8Conv,
ConvRelu,
Int8ConvRelu | Conv2D | -| Conv2dGrad | | ✅ | | | | | | | | | | -| Conv2dTranspose | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | DeConv2D | Deconvolution | ConvTranspose | Conv2DBackpropInput | -| Conv2dTransposeGrad | | ✅ | | | | | | | | | | -| Cos | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Cos | | Cos | Cos | -| Crop | ✅ | ✅ | ✅ | ✅ | | | | | Crop | | | -| CropAndResize | | ✅ | | | | | ✅ | | | | CropAndResize | -| CumSum | | ✅ | | | | | | | | | Cumsum | -| CustomExtractFeatures | | ✅ | | | | | | ExtractFeatures | | | | -| CustomNormalize | | ✅ | | | | | | Normalize | | | | -| CustomPredict | | ✅ | | | | | | Predict | | | | -| DeDepthwiseConv2D | | ✅ | ✅ | ✅ | | | | | Deconvolution | | | -| DepthToSpace | | ✅ | ✅ | ✅ | ✅ | ✅ | | DepthToSpace | | DepthToSpace | | -| DepthwiseConv2dNative | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | DepthwiseConv2D | Convolution | | DepthwiseConv2dNative | -| DetectionPostProcess | | ✅ | ✅ | ✅ | | | | Custom | | | | -| Div | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Div, RealDiv | | Div | Div, RealDiv | -| DivGrad | | ✅ | | | | | | | | | | -| Eltwise | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Eltwise | Sum, Max[3] | | -| Elu | | ✅ | | | | | | | Elu | Elu,
NonMaxSuppression | NonMaxSuppressionV3 | -| EluGrad | | ✅ | | | | | | | | | | -| Equal | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Equal | | Equal | Equal | -| Exp | | ✅ | | | ✅ | ✅ | | Exp | Exp | Exp | Exp | -| ExpandDims | ✅ | ✅ | ✅ | ✅ | | | ✅ | ExpandDims | | | ExpandDims | -| Fill | | ✅ | | | | | | Fill | | | Fill | -| Flatten | ✅ | ✅ | | | | | | | Flatten | | | -| Floor | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | flOOR | | Floor | Floor | -| FloorDiv | ✅ | ✅ | | | ✅ | ✅ | ✅ | FloorDiv | | | FloorDiv | -| FloorMod | ✅ | ✅ | | | ✅ | ✅ | ✅ | FloorMod | | | FloorMod | -| FullConnection | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | FullyConnected | InnerProduct | | | -| FusedBatchNorm | ✅ | ✅ | ✅ | ✅ | | | ✅ | FusedBatchNorm | | | FusedBatchNorm,
FusedBatchNormV3 | -| GatherNd | | ✅ | ✅ | ✅ | ✅ | ✅ | | GatherND | | | GatherNd | -| Gather | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Gather | | Gather | GatherV2 | -| Greater | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Greater | | Greater | Greater | -| GreaterEqual | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | GreaterEqual | | | GreaterEqual | -| GRU | ✅ | ✅ | | | | | | | | | | -| HardTanh | ✅ | ✅ | | | | | | | | | | -| HashtableLookup | | ✅ | | | | | | HashtableLookup | | | | -| HSigmoid | | ✅ | | ✅ | | | | | | | | -| Hswish | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | HardSwish | | | | -| HswishGrad | | ✅ | | | | | | | | | | -| InstanceNorm | ✅ | ✅ | | | | | | InstanceNorm | | | | -| InvertPermutation | | ✅ | | | | | | | | | InvertPermutation | -| L2Norm | | ✅ | ✅ | | | | | L2_NORMALIZATION | | | | -| LayerNorm | | ✅ | ✅ | | | | | | | | | -| LeakyReLU | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | LeakyRelu | | LeakyRelu | LeakyRelu | -| LeakyReLUGrad | | ✅ | | | | | | | | | | -| Less | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Less | | Less | Less | -| LessEqual | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | LessEqual | | | LessEqual | -| LRN | | ✅ | | | | | | LocalResponseNorm | | Lrn, LRN | | -| Log | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Log | | Log | Log | -| LogGrad | ✅ | ✅ | | | | | | | | | | -| LogicalAnd | ✅ | ✅ | | | ✅ | ✅ | ✅ | LogicalAnd | | And | LogicalAnd | -| LogicalNot | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | LogicalNot | | Not | LogicalNot | -| LogicalOr | ✅ | ✅ | | | ✅ | ✅ | ✅ | LogicalOr | | Or | LogicalOr | -| LshProjection | | ✅ | | | | | | LshProjection | | | | -| LSTM | ✅ | ✅ | | | | | | | | LSTM | | -| MatMul | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | MatMul | MatMul,
BatchMatMul | -| MatMulGrad | | ✅ | | | | | | | | | | -| Maximum | ✅ | ✅ | | | ✅ | ✅ | ✅ | Maximum | | | Maximum | -| MaximumGrad | | ✅ | | | | | | | | | | -| MaxPool | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | MaxPooling | Pooling | MaxPool,
GlobalMaxPool | MaxPool | -| MaxPoolGrad | | ✅ | | | | | | | | | | -| Merge | ✅ | ✅ | | | | | | | | | Merge | -| Minimum | ✅ | ✅ | | | ✅ | ✅ | ✅ | Minimum | | Min | Minimum | -| MinimumGrad | | ✅ | | | | | | | | | | -| Mul | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Mul | | Mul | Mul | -| MulGrad | | ✅ | | | | | | | | | | -| Neg | ✅ | ✅ | | | ✅ | ✅ | ✅ | Neg | | Neg | | -| NegGrad | | ✅ | | | | | | | | | | -| NotEqual | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | NotEqual | | | NotEqual | -| OneHot | | ✅ | | | ✅ | ✅ | | OneHot | | OneHot | OneHot | -| Pad | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Pad, MirrorPad | | Pad | MirrorPad,
Pad | -| Pow | | ✅ | ✅ | ✅ | ✅ | ✅ | | Pow | Power | Pow[2] | Pow | -| PowGrad | | ✅ | | | | | | | | | | -| PReLU | | ✅ | | | ✅ | ✅ | | PRELU | PReLU | PRelu | | -| RandomStandardNormal | | ✅ | | | | | | | | | RandomStandardNormal | -| RandomUniform | | ✅ | | | | | | | | | RandomUniform | -| Range | | ✅ | | | | | | Range | | | Range,
RaggedRange | -| Rank | | ✅ | | | | | | Rank | | | Rank | -| Reciprocal | ✅ | ✅ | ✅ | | | | ✅ | | | | | -| ReduceAll | | ✅ | | | | | | | | | All | -| ReduceASum | | ✅ | | | ✅ | ✅ | | | Reduction | | | -| ReduceMax | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | ReduceMax | | ReduceMax | Max | -| ReduceMean | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Mean | Reduction | ReduceMean | Mean | -| ReduceMin | | ✅ | ✅ | ✅ | ✅ | ✅ | | ReduceMin | | ReduceMin | Min | -| ReduceProd | | ✅ | ✅ | ✅ | ✅ | ✅ | | ReduceProd | | ReduceProd | Prod | -| ReduceSum | | ✅ | ✅ | ✅ | ✅ | ✅ | | Sum | Reduction | ReduceSum | Sum | -| ReduceSumSquare | | ✅ | ✅ | ✅ | | | | | Reduction | ReduceSumSquare | | -| ReLU | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Relu | ReLU | Relu | Relu | -| ReLUGrad | ✅ | ✅ | | | | | | | | | | -| ReLU6 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Relu6 | ReLU6 | Clip[1] | Relu6 | -| ReLU6Grad | ✅ | ✅ | | | | | | | | | | -| Reshape | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Reshape | Reshape | Reshape,
Flatten | Reshape | -| Resize | | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ResizeBilinear,
NearestNeighbor | Interp | | ResizeBilinear,
ResizeBicubic,
ResizeNearestNeighbor | -| ResizeGrad | | ✅ | | | | | | | | | | -| Reverse | | ✅ | | | | | | reverse | | | ReverseV2 | -| ReverseSequence | | ✅ | | | | | | ReverseSequence | | | ReverseSequence | -| Round | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Round | | Round | Round | -| Rsqrt | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Rsqrt | | | Rsqrt | -| Select | | ✅ | | | | | | | | | Select | -| Selu | | | | | | | | | | | Selu | -| Scale | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Scale | | | -| ScatterNd | | ✅ | | | | | | ScatterNd | | | | -| Shape | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Shape | | Shape | Shape | -| Sigmoid | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Logistic | Sigmoid | Sigmoid | Sigmoid | -| SigmoidGrad | ✅ | ✅ | | | | | | | | | | -| Sin | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Sin | | Sin | Sin | -| Size | | ✅ | | | | | | | | | Size | -| Slice | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Slice | Slice | Slice | Slice | -| SkipGram | | ✅ | | | | | | SKipGram | | | | -| Softmax | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Softmax | Softmax | Softmax | Softmax | -| SoftmaxGrad | | ✅ | | | | | | | | | | -| Softplus | | ✅ | | | | | | | | | Softplus | -| SpaceToBatch | | ✅ | ✅ | ✅ | ✅ | ✅ | | SpaceToBatch | | | | -| SpaceToBatchND | | ✅ | ✅ | ✅ | ✅ | ✅ | | SpaceToBatchND | | | SpaceToBatchND | -| SpaceToDepth | | ✅ | | | ✅ | ✅ | | SpaceToDepth | | SpaceToDepth | | -| SparseToDense | | ✅ | | | ✅ | ✅ | | SpareToDense | | | | -| Split | ✅ | ✅ | ✅ | ✅ | | | ✅ | Split, SplitV | | Split | Split, SplitV | -| Sqrt | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Sqrt | | Sqrt | Sqrt | -| Square | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Square | | | Square | -| SquaredDifference | ✅ | ✅ | | | ✅ | ✅ | ✅ | SquaredDifference | | | SquaredDifference | -| Squeeze | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Squeeze | | Squeeze | Squeeze | -| StridedSlice | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | StridedSlice | | | StridedSlice | -| Stack | ✅ | ✅ | | | ✅ | ✅ | | Stack | | | Pack | -| Sub | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Sub | | Sub | Sub | -| SubGrad | | ✅ | | | | | | | | | | -| Swish | ✅ | ✅ | | | | | | | | | | -| Switch | ✅ | ✅ | | | | | | | | | Switch | -| Tanh | ✅ | ✅ | | | ✅ | ✅ | ✅ | Tanh | TanH | Tanh, Sign | Tanh | -| TanhGrad | | ✅ | | | | | | | | | | -| TensorListFromTensor | ✅ | ✅ | | | | | | | | | TensorListFromTensor | -| TensorListGetItem | ✅ | ✅ | | | | | | | | | TensorListGetItem | -| TensorListReserve | ✅ | ✅ | | | | | | | | | TensorListReserve | -| TensorListSetItem | ✅ | ✅ | | | | | | | | | TensorListSetItem | -| TensorListStack | ✅ | ✅ | | | | | | | | | TensorListStack | -| Tile | ✅ | ✅ | | | | | ✅ | Tile | Tile | Tile | Tile | -| TopK | | ✅ | ✅ | ✅ | | | | TopKV2 | | TopK | TopKV2 | -| Transpose | ✅ | ✅ | ✅ | | ✅ | ✅ | ✅ | Transpose | Permute | Transpose | Transpose | -| UniformReal | | ✅ | | | | | | | | | | -| Unique | | ✅ | | | | | | Unique | | | | -| Unsqueeze | ✅ | ✅ | ✅ | ✅ | | | ✅ | | | Unsqueeze | | -| Unstack | | ✅ | | | | | | Unstack | | | | -| Where | | ✅ | | | | | | Where | | | Where | -| ZerosLike | | ✅ | | | | | | ZerosLike | | | ZerosLike | +| Operation
  | CPU
FP16 | CPU
FP32 | CPU
Int8 | CPU
UInt8 | GPU
FP16 | GPU
FP32 | NPU
  | TensorRT
  | TensorFlow Lite
operators supported | Caffe Lite
operators supported | Onnx Lite
operators supported | TensorFlow
operators supported | +|-----------------------|:----------:|:----------:|:----------:|:-----------:|:----------:|:-------------------:|:----------:|:----------:|----------|---------|---------|---------| +| Abs | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Abs | | Abs | Abs | +| Add | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Add | | Add, Int8Add | Add, AddV2 | +| AddGrad | | ✅ | | | | | | | | | | | +| AddN | | ✅ | | | | | | | AddN | | | | +| Assert | | ✅ | | | | | | | | | | Assert | +| Argmax | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Argmax | ArgMax | ArgMax | Argmax | +| Argmin | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | Argmin | | | ArgMin | +| AvgPool | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | MeanPooling | Pooling | AveragePool,
GlobalAveragePool,
Int8AveragePool | AvgPool | +| AvgPoolGrad | | ✅ | | | | | | | | | | | +| BatchNorm | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | BatchNorm | BatchNormalization | | +| BatchNormGrad | | ✅ | | | | | | | | | | | +| BatchToSpace | | ✅ | ✅ | ✅ | ✅ | ✅ | | | BatchToSpace,
BatchToSpaceND | | | BatchToSpace,
BatchToSpaceND | +| BiasAdd | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | BiasAdd | BiasAdd | +| BiasAddGrad | | ✅ | | | | | | | | | | | +| Broadcast | | ✅ | | | | | | | BroadcastTo | | Expand | | +| Cast | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Cast,
QUANTIZE,
DEQUANTIZE | | Cast | Cast | +| Ceil | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Ceil | | Ceil | Ceil | +| Concat | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Concat | Concat | Concat | ConcatV2 | +| ConstantOfShape | | ✅ | | | | | | | | | ConstantOfShape | | +| Conv2d | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Conv2D | Convolution | Conv, Int8Conv,
ConvRelu,
Int8ConvRelu | Conv2D | +| Conv2dGrad | | ✅ | | | | | | | | | | | +| Conv2dTranspose | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | DeConv2D | Deconvolution | ConvTranspose | Conv2DBackpropInput | +| Conv2dTransposeGrad | | ✅ | | | | | | | | | | | +| Cos | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | |Cos | | Cos | Cos | +| Crop | ✅ | ✅ | ✅ | ✅ | | | | | | Crop | | | +| CropAndResize | | ✅ | | | | | ✅ | | | | | CropAndResize | +| CumSum | | ✅ | | | | | | | | | | Cumsum | +| CustomExtractFeatures | | ✅ | | | | | | | ExtractFeatures | | | | +| CustomNormalize | | ✅ | | | | | | | Normalize | | | | +| CustomPredict | | ✅ | | | | | | | Predict | | | | +| DeDepthwiseConv2D | | ✅ | ✅ | ✅ | | | | | | Deconvolution | | | +| DepthToSpace | | ✅ | ✅ | ✅ | ✅ | ✅ | | | DepthToSpace | | DepthToSpace | | +| DepthwiseConv2dNative | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | DepthwiseConv2D | Convolution | | DepthwiseConv2dNative | +| DetectionPostProcess | | ✅ | ✅ | ✅ | | | | | Custom | | | | +| Div | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Div, RealDiv | | Div | Div, RealDiv | +| DivGrad | | ✅ | | | | | | | | | | | +| Eltwise | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Eltwise | Sum, Max[3] | | +| Elu | | ✅ | | | | | | | | Elu | Elu,
NonMaxSuppression | NonMaxSuppressionV3 | +| EluGrad | | ✅ | | | | | | | | | | | +| Equal | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Equal | | Equal | Equal | +| Exp | | ✅ | | | ✅ | ✅ | | | Exp | Exp | Exp | Exp | +| ExpandDims | ✅ | ✅ | ✅ | ✅ | | | ✅ | | ExpandDims | | | ExpandDims | +| Fill | | ✅ | | | | | | | Fill | | | Fill | +| Flatten | ✅ | ✅ | | | | | | | | Flatten | | | +| Floor | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | flOOR | | Floor | Floor | +| FloorDiv | ✅ | ✅ | | | ✅ | ✅ | ✅ | | FloorDiv | | | FloorDiv | +| FloorMod | ✅ | ✅ | | | ✅ | ✅ | ✅ | | FloorMod | | | FloorMod | +| FullConnection | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | FullyConnected | InnerProduct | | | +| FusedBatchNorm | ✅ | ✅ | ✅ | ✅ | | | ✅ | | FusedBatchNorm | | | FusedBatchNorm,
FusedBatchNormV3 | +| GatherNd | | ✅ | ✅ | ✅ | ✅ | ✅ | | | GatherND | | | GatherNd | +| Gather | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Gather | | Gather | GatherV2 | +| Greater | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Greater | | Greater | Greater | +| GreaterEqual | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | GreaterEqual | | | GreaterEqual | +| GRU | ✅ | ✅ | | | | | | | | | | | +| HardTanh | ✅ | ✅ | | | | | | | | | | | +| HashtableLookup | | ✅ | | | | | | | HashtableLookup | | | | +| HSigmoid | | ✅ | | ✅ | | | | | | | | | +| Hswish | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | HardSwish | | | | +| HswishGrad | | ✅ | | | | | | | | | | | +| InstanceNorm | ✅ | ✅ | | | | | | | InstanceNorm | | | | +| InvertPermutation | | ✅ | | | | | | | | | | InvertPermutation | +| L2Norm | | ✅ | ✅ | | | | | | L2_NORMALIZATION | | | | +| LayerNorm | | ✅ | ✅ | | | | | | | | | | +| LeakyReLU | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | LeakyRelu | | LeakyRelu | LeakyRelu | +| LeakyReLUGrad | | ✅ | | | | | | | | | | | +| Less | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Less | | Less | Less | +| LessEqual | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | LessEqual | | | LessEqual | +| LRN | | ✅ | | | | | | | LocalResponseNorm | | Lrn, LRN | | +| Log | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Log | | Log | Log | +| LogGrad | ✅ | ✅ | | | | | | | | | | | +| LogicalAnd | ✅ | ✅ | | | ✅ | ✅ | ✅ | | LogicalAnd | | And | LogicalAnd | +| LogicalNot | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | LogicalNot | | Not | LogicalNot | +| LogicalOr | ✅ | ✅ | | | ✅ | ✅ | ✅ | | LogicalOr | | Or | LogicalOr | +| LogSoftmax | | ✅ | | | ✅ | ✅ | ✅ | | LogSoftmax | | LogSoftmax | | +| LshProjection | | ✅ | | | | | | | LshProjection | | | | +| LSTM | ✅ | ✅ | | | | | | | | | LSTM | | +| MatMul | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | BatchMatMul | | MatMul,
Gemm | MatMul,
BatchMatMul | +| MatMulGrad | | ✅ | | | | | | | | | | | +| Maximum | ✅ | ✅ | | | ✅ | ✅ | ✅ | | Maximum | | | Maximum | +| MaximumGrad | | ✅ | | | | | | | | | | | +| MaxPool | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | MaxPooling | Pooling | MaxPool,
GlobalMaxPool | MaxPool | +| MaxPoolGrad | | ✅ | | | | | | | | | | | +| Merge | ✅ | ✅ | | | | | | | | | | Merge | +| Minimum | ✅ | ✅ | | | ✅ | ✅ | ✅ | | Minimum | | Min | Minimum | +| MinimumGrad | | ✅ | | | | | | | | | | | +| Mul | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Mul | | Mul | Mul | +| MulGrad | | ✅ | | | | | | | | | | | +| Neg | ✅ | ✅ | | | ✅ | ✅ | ✅ | | Neg | | Neg | | +| NegGrad | | ✅ | | | | | | | | | | | +| NotEqual | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | NotEqual | | | NotEqual | +| OneHot | | ✅ | | | ✅ | ✅ | | | OneHot | | OneHot | OneHot | +| Pad | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Pad, MirrorPad, PadV2 | | Pad | MirrorPad, Pad, PadV2 | +| Pow | | ✅ | ✅ | ✅ | ✅ | ✅ | | ✅ | Pow | Power | Pow[2] | Pow | +| PowGrad | | ✅ | | | | | | | | | | | +| PReLU | | ✅ | | | ✅ | ✅ | | | PRELU | PReLU | PRelu | | +| RandomStandardNormal | | ✅ | | | | | | | | | | RandomStandardNormal | +| RandomUniform | | ✅ | | | | | | | | | | RandomUniform | +| Range | | ✅ | | | | | | | Range | | | Range,
RaggedRange | +| Rank | | ✅ | | | | | | | Rank | | | Rank | +| Reciprocal | ✅ | ✅ | ✅ | | | | ✅ | | | | | | +| ReduceAll | | ✅ | | | | | | | | | | All | +| ReduceASum | | ✅ | | | ✅ | ✅ | | | | Reduction | | | +| ReduceMax | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | ReduceMax | | ReduceMax | Max | +| ReduceMean | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | ✅ | Mean | Reduction | ReduceMean | Mean | +| ReduceMin | | ✅ | ✅ | ✅ | ✅ | ✅ | | | ReduceMin | | ReduceMin | Min | +| ReduceProd | | ✅ | ✅ | ✅ | ✅ | ✅ | | | ReduceProd | | ReduceProd | Prod | +| ReduceSum | | ✅ | ✅ | ✅ | ✅ | ✅ | | | Sum | Reduction | ReduceSum | Sum | +| ReduceSumSquare | | ✅ | ✅ | ✅ | | | | | | Reduction | ReduceSumSquare | | +| ReLU | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Relu | ReLU | Relu | Relu | +| ReLUGrad | ✅ | ✅ | | | | | | | | | | | +| ReLU6 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Relu6 | ReLU6 | Clip[1] | Relu6 | +| ReLU6Grad | ✅ | ✅ | | | | | | | | | | | +| Reshape | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Reshape | Reshape | Reshape,
Flatten | Reshape | +| Resize | | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | ResizeBilinear,
NearestNeighbor | Interp | | ResizeBilinear,
ResizeBicubic,
ResizeNearestNeighbor | +| ResizeGrad | | ✅ | | | | | | | | | | | +| Reverse | | ✅ | | | | | | | reverse | | | ReverseV2 | +| ReverseSequence | | ✅ | | | | | | | ReverseSequence | | | ReverseSequence | +| Round | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Round | | Round | Round | +| Rsqrt | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Rsqrt | | | Rsqrt | +| Select | | ✅ | | | | | | | | | | Select | +| Selu | | | | | | | | | | | | Selu | +| Scale | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Scale | | | +| ScatterNd | | ✅ | | | | | | | ScatterNd | | | | +| Shape | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Shape | | Shape | Shape | +| Sigmoid | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Logistic | Sigmoid | Sigmoid | Sigmoid | +| SigmoidGrad | ✅ | ✅ | | | | | | | | | | | +| Sin | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Sin | | Sin | Sin | +| Size | | ✅ | | | | | | | | | | Size | +| Slice | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Slice | Slice | Slice | Slice | +| SkipGram | | ✅ | | | | | | | SKipGram | | | | +| Softmax | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Softmax | Softmax | Softmax | Softmax | +| SoftmaxGrad | | ✅ | | | | | | | | | | | +| Softplus | | ✅ | | | | | | | | | | Softplus | +| SpaceToBatch | | ✅ | ✅ | ✅ | ✅ | ✅ | | | SpaceToBatch | | | | +| SpaceToBatchND | | ✅ | ✅ | ✅ | ✅ | ✅ | | | SpaceToBatchND | | | SpaceToBatchND | +| SpaceToDepth | | ✅ | | | ✅ | ✅ | | | SpaceToDepth | | SpaceToDepth | | +| SparseToDense | | ✅ | | | ✅ | ✅ | | | SpareToDense | | | | +| Split | ✅ | ✅ | ✅ | ✅ | | | ✅ | | Split, SplitV | | Split | Split, SplitV | +| Sqrt | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Sqrt | | Sqrt | Sqrt | +| Square | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Square | | | Square | +| SquaredDifference | ✅ | ✅ | | | ✅ | ✅ | ✅ | | SquaredDifference | | | SquaredDifference | +| Squeeze | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | ✅ | Squeeze | | Squeeze | Squeeze | +| StridedSlice | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | StridedSlice | | Slice,
DynamicSlice | StridedSlice | +| Stack | ✅ | ✅ | | | ✅ | ✅ | | | Stack | | | Pack | +| Sub | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Sub | | Sub | Sub | +| SubGrad | | ✅ | | | | | | | | | | | +| Swish | ✅ | ✅ | | | | | | | | | | | +| Switch | ✅ | ✅ | | | | | | | | | | Switch | +| Tanh | ✅ | ✅ | | | ✅ | ✅ | ✅ | ✅ | Tanh | TanH | Tanh, Sign | Tanh | +| TanhGrad | | ✅ | | | | | | | | | | | +| TensorListFromTensor | ✅ | ✅ | | | | | | | | | | TensorListFromTensor | +| TensorListGetItem | ✅ | ✅ | | | | | | | | | | TensorListGetItem | +| TensorListReserve | ✅ | ✅ | | | | | | | | | | TensorListReserve | +| TensorListSetItem | ✅ | ✅ | | | | | | | | | | TensorListSetItem | +| TensorListStack | ✅ | ✅ | | | | | | | | | | TensorListStack | +| Tile | ✅ | ✅ | | | | | ✅ | | Tile | Tile | Tile | | Tile | +| TopK | | ✅ | ✅ | ✅ | | | | | TopKV2 | | TopK | TopKV2 | +| Transpose | ✅ | ✅ | ✅ | | ✅ | ✅ | ✅ | ✅ | Transpose | Permute | Transpose | Transpose | +| UniformReal | | ✅ | | | | | | | | | | | +| Unique | | ✅ | | | | | | | Unique | | | | +| Unsqueeze | ✅ | ✅ | ✅ | ✅ | | | ✅ | ✅ | | | Unsqueeze | | +| Unstack | | ✅ | | | | | | | Unstack | | | | +| Where | | ✅ | | | | | | | Where | | Where | Where | +| ZerosLike | | ✅ | | | | | | | ZerosLike | | | ZerosLike | | Other operators supported by the converter.[4] | | | | | | | | | | Loop, Dropout, If | Dropout, Enter,
Exit, If,
IsFinite,
LinSpace,
LoopCond,
NextIteration,
StatelessIf,
StatelessWhile,
While | [1] Clip: Only support converting clip(0, 6) to Relu6. diff --git a/tutorials/lite/source_en/quick_start/image_segmentation.md b/tutorials/lite/source_en/quick_start/image_segmentation.md index 1b16ba26f00f979394aaed88e02848d5aea972ab..101677be8f37a13d6e03d2ef5c7f749774568102 100644 --- a/tutorials/lite/source_en/quick_start/image_segmentation.md +++ b/tutorials/lite/source_en/quick_start/image_segmentation.md @@ -119,13 +119,13 @@ app ### Configuring MindSpore Lite Dependencies -Related library files are required for Android to call MindSpore Android AAR. You can use MindSpore Lite [source code](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html) to generate the `mindspore-lite-{version}-inference-android.tar.gz` library file package (including the `mindspore-lite-{version}.aar` library file) and decompress it. +Related library files are required for Android to call MindSpore Android AAR. You can use MindSpore Lite [source code](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html) to generate the `mindspore-lite-{version}-android.tar.gz` library file package (including the `mindspore-lite-{version}.aar` library file) and decompress it. > version: version number in the output file, which is the same as the version number of the built branch code. In this example, the MindSpore Lite version file is automatically downloaded using the `app/download.gradle` file during the build process and stored in the `app/libs` directory. -Note: If the automatic download fails, manually download the related library file [mindspore-lite-{version}-inference-android.tar.gz](https://www.mindspore.cn/tutorial/lite/en/master/use/downloads.html), decompress it, and save it to the corresponding directory. +Note: If the automatic download fails, manually download the related library file [mindspore-lite-{version}-android.tar.gz](https://www.mindspore.cn/tutorial/lite/en/master/use/downloads.html), decompress it, and save it to the corresponding directory. ### Downloading and Deploying the Model File diff --git a/tutorials/lite/source_en/quick_start/quick_start.md b/tutorials/lite/source_en/quick_start/quick_start.md index 3b69b70e631b9becee52ab6fce4ede4cb762d971..ec414a8e63d6f8692f2755aa9e74dec4b33276cb 100644 --- a/tutorials/lite/source_en/quick_start/quick_start.md +++ b/tutorials/lite/source_en/quick_start/quick_start.md @@ -167,17 +167,17 @@ Create a link to the `.so` library file in the `app/CMakeLists.txt` file: ```text # ============== Set MindSpore Dependencies. ============= include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp) -include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/inference) -include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/inference/include) -include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/inference/minddata/include) +include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/runtime) +include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/runtime/include) +include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/runtime/dataset/include) add_library(mindspore-lite SHARED IMPORTED) add_library(minddata-lite SHARED IMPORTED) set_target_properties(mindspore-lite PROPERTIES IMPORTED_LOCATION - ${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/inference/lib/libmindspore-lite.so) + ${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/runtime/lib/libmindspore-lite.so) set_target_properties(minddata-lite PROPERTIES IMPORTED_LOCATION - ${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/inference/minddata/lib/libminddata-lite.so) + ${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/runtime/dataset/lib/libminddata-lite.so) # --------------- MindSpore Lite set End. -------------------- # Link target library. diff --git a/tutorials/lite/source_en/quick_start/quick_start_cpp.md b/tutorials/lite/source_en/quick_start/quick_start_cpp.md index e8c97de686d22fbbaab063f88211831910e66d95..a18476b35723a0c45f2931d46c27031bf881ea9b 100644 --- a/tutorials/lite/source_en/quick_start/quick_start_cpp.md +++ b/tutorials/lite/source_en/quick_start/quick_start_cpp.md @@ -57,7 +57,7 @@ The MindSpore Lite inference steps are as follows: bash build.sh ``` - > If the MindSpore Lite inference framework fails to be downloaded by using this build script, manually download the MindSpore Lite model inference framework [mindspore-lite-{version}-linux-x64.tar.gz](https://www.mindspore.cn/tutorial/lite/en/master/use/downloads.html) whose hardware platform is CPU and operating system is Ubuntu-x64, and copy the `libmindspore-lite.a` file in the decompressed lib directory to the `mindspore/lite/examples/quick_start_cpp/lib` directory. Also copy the files from `inference/include` to the `mindspore/lite/examples/quick_start_cpp/include` directory. + > If the MindSpore Lite inference framework fails to be downloaded by using this build script, manually download the MindSpore Lite model inference framework [mindspore-lite-{version}-linux-x64.tar.gz](https://www.mindspore.cn/tutorial/lite/en/master/use/downloads.html) whose hardware platform is CPU and operating system is Ubuntu-x64, and copy the `libmindspore-lite.a` file in the decompressed lib directory to the `mindspore/lite/examples/quick_start_cpp/lib` directory. Also copy the files from `runtime/include` to the `mindspore/lite/examples/quick_start_cpp/include` directory. > > If the MobileNetV2 model fails to be downloaded, manually download the model file [mobilenetv2.ms](https://download.mindspore.cn/model_zoo/official/lite/quick_start/mobilenetv2.ms) and copy it to the `mindspore/lite/examples/quick_start_cpp/model` directory. > @@ -89,7 +89,7 @@ The MindSpore Lite inference steps are as follows: - Build - - Download the library: Manually download the MindSpore Lite model inference framework [mindspore-lite-{version}-win-x64.zip](https://www.mindspore.cn/tutorial/lite/en/master/use/downloads.html) whose hardware platform is CPU and operating system is Windows-x64. Copy the `libmindspore-lite.a` file in the decompressed `inference/lib` directory to the `mindspore/lite/examples/quick_start_cpp/lib` project directory, and change the include directory to the `mindspore/lite/examples/quick_start_cpp/include` project directory. (Note: The `lib` and `include` directories under the project need to be created manually) + - Download the library: Manually download the MindSpore Lite model inference framework [mindspore-lite-{version}-win-x64.zip](https://www.mindspore.cn/tutorial/lite/en/master/use/downloads.html) whose hardware platform is CPU and operating system is Windows-x64. Copy the `libmindspore-lite.a` file in the decompressed `runtime/lib` directory to the `mindspore/lite/examples/quick_start_cpp/lib` project directory, and change the include directory to the `mindspore/lite/examples/quick_start_cpp/include` project directory. (Note: The `lib` and `include` directories under the project need to be created manually) - Download the model: Manually download the model file [mobilenetv2.ms](https://download.mindspore.cn/model_zoo/official/lite/quick_start/mobilenetv2.ms) and copy it to the `mindspore/lite/examples/quick_start_cpp/model` directory. diff --git a/tutorials/lite/source_en/quick_start/quick_start_java.md b/tutorials/lite/source_en/quick_start/quick_start_java.md index f84e5283c34511e69dc9a7e53451e9f737e017f9..590201ee0c41cf825e95cc2dd5acc69a0a6ec6e0 100644 --- a/tutorials/lite/source_en/quick_start/quick_start_java.md +++ b/tutorials/lite/source_en/quick_start/quick_start_java.md @@ -52,7 +52,7 @@ The MindSpore Lite inference steps are as follows: bash build.sh ``` - > If the MindSpore Lite inference framework fails to be downloaded, manually download the MindSpore Lite model inference framework [mindspore-lite-{version}-linux-x64.tar.gz](https://www.mindspore.cn/tutorial/lite/en/master/use/downloads.html) whose hardware platform is CPU and operating system is Ubuntu-x64. Decompress the package and obtain the `libmindspore-lite.so` file in the `lib\jar` directory. Copy `libmindspore-lite-jni.so` and `libmindspore-lite-java.jar` to the `mindspore/lite/examples/quick_start_java/lib` directory. + > If the MindSpore Lite inference framework fails to be downloaded, manually download the MindSpore Lite model inference framework [mindspore-lite-{version}-linux-x64.tar.gz](https://www.mindspore.cn/tutorial/lite/en/master/use/downloads.html) whose hardware platform is CPU and operating system is Ubuntu-x64. Decompress the package and obtain the `libmindspore-lite.so` file in the `runtime/lib` directory. Copy `libmindspore-lite-jni.so` and `libmindspore-lite-java.jar` to the `mindspore/lite/examples/quick_start_java/lib` directory. > > If the MobileNetV2 model fails to be downloaded, manually download the model file [mobilenetv2.ms](https://download.mindspore.cn/model_zoo/official/lite/quick_start/mobilenetv2.ms) and copy it to the `mindspore/lite/examples/quick_start_java/model/` directory. > diff --git a/tutorials/lite/source_en/quick_start/train_lenet.md b/tutorials/lite/source_en/quick_start/train_lenet.md index 32ca197db690d0859fa6f19e528edff24691d6a8..3bf213eeba109e50b9ac2c96a78383484971e395 100644 --- a/tutorials/lite/source_en/quick_start/train_lenet.md +++ b/tutorials/lite/source_en/quick_start/train_lenet.md @@ -77,17 +77,17 @@ cd ./mindspore The `mindspore/lite/examples/train_lenet` directory relative to the MindSpore Lite source code contains this demo's source code. -Go to the [MindSpore Lite Download Page](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html) to download the mindspore-lite-{version}-linux-x64.tar.gz and mindspore-lite-{version}-android-aarch64.tar.gz. The mindspore-lite-{version}-linux-x64.tar.gz is the MindSpore Lite install package for x86 platform, it contains the converter tool `converter_lite`, this demo uses it to converte `MIDIR` model to `.ms` which is supported by MindSpore Lite; The mindspore-lite-{version}-android-aarch64.tar.gz is the MindSpore Lite install package for Android, it contains training runtime library `libmindspore-lite.so`, this demo uses it to train model. After download these two files, you need rename the mindspore-lite-{version}-linux-x64.tar.gz to mindspore-lite-{version}-train-linux-x64.tar.gz and rename the mindspore-lite-{version}-android-aarch64.tar.gz to mindspore-lite-{version}-train-android-aarch64.tar.gz. Then put the renamed files to the `output` directory relative to MindSpore Lite source code(if there is no `output` directory,you should create it). +Go to the [MindSpore Lite Download Page](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html) to download the mindspore-lite-{version}-linux-x64.tar.gz and mindspore-lite-{version}-android-aarch64.tar.gz. The mindspore-lite-{version}-linux-x64.tar.gz is the MindSpore Lite install package for x86 platform, it contains the converter tool `converter_lite`, this demo uses it to converte `MIDIR` model to `.ms` which is supported by MindSpore Lite; The mindspore-lite-{version}-android-aarch64.tar.gz is the MindSpore Lite install package for Android, it contains training runtime library `libmindspore-lite.so`, this demo uses it to train model. After download these two files, you need rename the mindspore-lite-{version}-linux-x64.tar.gz to mindspore-lite-{version}-linux-x64.tar.gz and rename the mindspore-lite-{version}-android-aarch64.tar.gz to mindspore-lite-{version}-android-aarch64.tar.gz. Then put the renamed files to the `output` directory relative to MindSpore Lite source code(if there is no `output` directory,you should create it). Suppose these packags are downloaded in `/Downloads` directory, `Linux` commands for operations above is as follows: ```bash mkdir output -cp /Downloads/mindspore-lite-{version}-linux-x64.tar.gz output/mindspore-lite-{version}-train-linux-x64.tar.gz -cp /Downloads/mindspore-lite-{version}0-android-aarch64.tar.gz output/mindspore-lite-{version}-train-android-aarch64.tar.gz +cp /Downloads/mindspore-lite-{version}-linux-x64.tar.gz output/mindspore-lite-{version}-linux-x64.tar.gz +cp /Downloads/mindspore-lite-{version}0-android-aarch64.tar.gz output/mindspore-lite-{version}-android-aarch64.tar.gz ``` -You can also [compile from source](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html) to generate the training package for x86 platform mindspore-lite-{version}-train-linux-x64.tar.gz and for Andorid platform mindspore-lite-{version}-train-android-aarch64.tar.gz. These packages will directly generated in `output` directory and you should make sure that in the `output` directory both the two packages exist. +You can also [compile from source](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html) to generate the training package for x86 platform mindspore-lite-{version}-linux-x64.tar.gz and for Andorid platform mindspore-lite-{version}-android-aarch64.tar.gz. These packages will directly generated in `output` directory and you should make sure that in the `output` directory both the two packages exist. ### Connect Android Device diff --git a/tutorials/lite/source_en/use/asic.rst b/tutorials/lite/source_en/use/asic.rst index 007f4257e71cf6e599fa5316651602637602f3a2..b9f2136c9c072c5b3fa4c9fadc129125c913f01c 100644 --- a/tutorials/lite/source_en/use/asic.rst +++ b/tutorials/lite/source_en/use/asic.rst @@ -4,4 +4,5 @@ Application Specific Integrated Circuit Integration Instructions .. toctree:: :maxdepth: 1 - npu_info \ No newline at end of file + npu_info + tensorrt_info diff --git a/tutorials/lite/source_en/use/benchmark_tool.md b/tutorials/lite/source_en/use/benchmark_tool.md index bc3f630c0ab014cc30bb5a736f7caa36f811e838..472897b86bc68092cfe5e55eda6ff50f3c39ffe8 100644 --- a/tutorials/lite/source_en/use/benchmark_tool.md +++ b/tutorials/lite/source_en/use/benchmark_tool.md @@ -41,7 +41,7 @@ To use the Benchmark tool, you need to prepare the environment as follows: - Add the path of dynamic library required by the inference code to the environment variables LD_LIBRARY_PATH. ```bash - export LD_LIBRARY_PATH=${PACKAGE_ROOT_PATH}/inference/lib:${LD_LIBRARY_PATH} + export LD_LIBRARY_PATH=${PACKAGE_ROOT_PATH}/runtime/lib:${LD_LIBRARY_PATH} ```` ${PACKAGE_ROOT_PATH} is the compiled inference package path after decompressing. diff --git a/tutorials/lite/source_en/use/benchmark_train_tool.md b/tutorials/lite/source_en/use/benchmark_train_tool.md index ce96e98a378ad8caaac1621519bb93d5f4138d8a..5a358814c851ce733239b5fed5931421cb36884d 100644 --- a/tutorials/lite/source_en/use/benchmark_train_tool.md +++ b/tutorials/lite/source_en/use/benchmark_train_tool.md @@ -29,15 +29,15 @@ To use the `benchmark_train` tool, you need to prepare the environment as follow - Compilation: Install build dependencies and build the MindSpore Lite training framework. The code of the `benchmark_train` tool is stored in the `mindspore/lite/tools/benchmark_train` directory of the MindSpore source code. For details about the build operations, see the [Environment Requirements](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#environment-requirements) and [Compilation Example](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#compilation-example) in the build document. -- Configure environment variables: For details, see [Output Description](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#training-output-description) in the build document. Suppose the absolute path of MindSpore Lite training package you build is `/path/mindspore-lite-{version}-train-{os}-{arch}.tar.gz`, the commands to extract the package and configure the LD_LIBRARY_PATH variable are as follows: +- Configure environment variables: For details, see [Output Description](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#training-output-description) in the build document. Suppose the absolute path of MindSpore Lite training package you build is `/path/mindspore-lite-{version}-{os}-{arch}.tar.gz`, the commands to extract the package and configure the LD_LIBRARY_PATH variable are as follows: ```bash cd /path - tar xvf mindspore-lite-{version}-train-{os}-{arch}.tar.gz - export LD_LIBRARY_PATH=/path/mindspore-lite-{version}-train-{os}-{arch}/train/lib:/path/mindspore-lite-{version}-train-{os}-{arch}/train/third_party/libjpeg-turbo/lib:${LD_LIBRARY_PATH} + tar xvf mindspore-lite-{version}-{os}-{arch}.tar.gz + export LD_LIBRARY_PATH=/path/mindspore-lite-{version}-{os}-{arch}/runtime/lib:/path/mindspore-lite-{version}-{os}-{arch}/runtime/third_party/libjpeg-turbo/lib:${LD_LIBRARY_PATH} ``` -The absolute path of the benchmark_train tool is `/path/mindspore-lite-{version}-train-{os}-{arch}/tools/benchmark_train/benchmark_train`. +The absolute path of the benchmark_train tool is `/path/mindspore-lite-{version}-{os}-{arch}/tools/benchmark_train/benchmark_train`. ### Parameter Description diff --git a/tutorials/lite/source_en/use/build.md b/tutorials/lite/source_en/use/build.md index d350b084616bcc072cbb3bb04e2d9085363e461e..25e9fbb944dc315875675b695528ab1fed4972f4 100644 --- a/tutorials/lite/source_en/use/build.md +++ b/tutorials/lite/source_en/use/build.md @@ -1,6 +1,6 @@ # Building MindSpore Lite -`Windows` `Linux` `Android` `Environment Preparation` `Intermediate` `Expert` +`Windows` `macOS` `Linux` `iOS` `Android` `Environment Preparation` `Intermediate` `Expert` @@ -9,26 +9,22 @@ - [Environment Requirements](#environment-requirements) - [Compilation Options](#compilation-options) - [Compilation Example](#compilation-example) - - [Inference Output Description](#inference-output-description) - - [Description of Converter's Directory Structure](#description-of-converters-directory-structure) - - [Description of Obfuscator's Directory Structure](#description-of-obfuscators-directory-structure) - - [Description of Runtime and Other tools' Directory Structure](#description-of-runtime-and-other-tools-directory-structure) - - [Training Output Description](#training-output-description) - - [Description of Training Runtime and Related Tools' Directory Structure](#description-of-training-runtime-and-related-tools-directory-structure) + - [Directory Structure](#directory-structure) - [Windows Environment Compilation](#windows-environment-compilation) - [Environment Requirements](#environment-requirements-1) - [Compilation Options](#compilation-options-1) - [Compilation Example](#compilation-example-1) - - [Output Description](#output-description) - - [Description of Runtime and Related Tools' Directory Structure](#description-of-runtime-and-related-tools-directory-structure) - - [Docker Environment Compilation](#docker-environment-compilation) - - [Environmental Preparation](#environmental-preparation) - - [Download the docker image](#download-the-docker-image) - - [Create a container](#create-a-container) - - [Enter the container](#enter-the-container) + - [Directory Structure](#directory-structure-1) + - [macOS Environment Compilation](#macOS-environment-compilation) + - [Environment Requirements](#environment-requirements) - [Compilation Options](#compilation-options-2) - [Compilation Example](#compilation-example-2) - - [Output Description](#output-description-1) + - [Directory Structure](#directory-structure-2) + - [Docker Environment Compilation](#docker-environment-compilation) + - [Environmental Preparation](#environmental-preparation) + - [Compilation Options](#compilation-options-3) + - [Compilation Example](#compilation-example-3) + - [Directory Structure](#directory-structure-3) @@ -36,70 +32,47 @@ This chapter introduces how to quickly compile MindSpore Lite, which includes the following modules: -Modules in inference version: +Modules in MindSpore Lite: | Module | Support Platform | Description | | --- | ---- | ---- | -| converter | Linux, Windows | Model Conversion Tool | -| runtime(cpp, java) | Linux, Windows, Android | Model Inference Framework(Windows platform does not support java version runtime) | -| benchmark | Linux, Windows, Android | Benchmarking Tool | -| cropper | Linux | Static library crop tool for libmindspore-lite.a | -| minddata | Linux, Android | Image Processing Library | - -Modules in training version: - -| Module | Support Platform | Description | -| --------------- | ---------------- | ------------------------------------------------ | -| converter | Linux | Model Conversion Tool | -| runtime(cpp) | Linux, Android | Model Train Framework(java is not support) | -| cropper | Linux | Static library crop tool for libmindspore-lite.a | -| minddata | Linux, Android | Image Processing Library | -| benchmark_train | Linux, Android | Performance and Accuracy Validation | -| obfuscator | Linux | Model Obfuscation Tool | +| converter | Linux, Windows | Model Conversion Tool | +| runtime(cpp, java) | Linux, Windows, Android, iOS | Model Inference Framework(Windows platform does not support java version runtime) | +| benchmark | Linux, Windows, Android | Benchmarking Tool | +| benchmark_train | Linux, Android | Performance and Accuracy Validation | +| cropper | Linux | Static library crop tool for libmindspore-lite.a | +| minddata | Linux, Android | Image Processing Library | +| codegen | Linux | Model inference code generation tool | +| obfuscator | Linux | Model Obfuscation Tool | ## Linux Environment Compilation ### Environment Requirements - The compilation environment supports Linux x86_64 only. Ubuntu 18.04.02 LTS is recommended. - -- Compilation dependencies of runtime(cpp): - - [CMake](https://cmake.org/download/) >= 3.18.3 +- Compilation dependencies of cpp: - [GCC](https://gcc.gnu.org/releases.html) >= 7.3.0 - - [Android_NDK](https://dl.google.com/android/repository/android-ndk-r20b-linux-x86_64.zip) >= r20 - - [Git](https://git-scm.com/downloads) >= 2.28.0 -- Compilation dependencies of converter: - [CMake](https://cmake.org/download/) >= 3.18.3 - - [GCC](https://gcc.gnu.org/releases.html) >= 7.3.0 - - [Android_NDK](https://dl.google.com/android/repository/android-ndk-r20b-linux-x86_64.zip) >= r20 - [Git](https://git-scm.com/downloads) >= 2.28.0 - - [Autoconf](http://ftp.gnu.org/gnu/autoconf/) >= 2.69 - - [Libtool](https://www.gnu.org/software/libtool/) >= 2.4.6 - - [LibreSSL](http://www.libressl.org/) >= 3.1.3 - - [Automake](https://www.gnu.org/software/automake/) >= 1.11.6 - - [Libevent](https://libevent.org) >= 2.0 - - [OpenSSL](https://www.openssl.org/) >= 1.1.1 - -- Compilation dependencies of runtime(java) - - [CMake](https://cmake.org/download/) >= 3.18.3 - - [GCC](https://gcc.gnu.org/releases.html) >= 7.3.0 - [Android_NDK](https://dl.google.com/android/repository/android-ndk-r20b-linux-x86_64.zip) >= r20 - - [Git](https://git-scm.com/downloads) >= 2.28.0 - - [Android SDK](https://developer.android.com/studio?hl=zh-cn#cmdline-tools) + - Configure environment variables: `export ANDROID_NDK=NDK path`. + - [DDK](https://developer.huawei.com/consumer/cn/doc/development/hiai-Library/ddk-download-0000001053590180) = V500.010 + - Configure environment variables: `export HWHIAI_DDK=DDK path`. +- Additional compilation dependencies of Java: - [Gradle](https://gradle.org/releases/) >= 6.6.1 + - Configure environment variables: `export GRADLE_HOME=GRADLE path`. + - Add the bin directory to the PATH: `export PATH=${GRADLE_HOME}/bin:$PATH`. - [OpenJDK](https://openjdk.java.net/install/) >= 1.8 - -> - To install and use `Android_NDK`, you need to configure environment variables. The command example is `export ANDROID_NDK=${NDK_PATH}/android-ndk-r20b`. -> - After Gradle is installed, you need to add its installation path to the PATH: `export PATH=${GRADLE_PATH}/bin:$PATH`. -> - To install the Android SDK via `Android command line tools`, you need to create a new directory first and configure its path to the environment variable in `${ANDROID_SDK_ROOT}`, then create SDK via `sdkmanager`: `./sdkmanager --sdk_root=$ {ANDROID_SDK_ROOT} "cmdline-tools;latest"`, and finally accept the license through `sdkmanager` under the `${ANDROID_SDK_ROOT}` directory: `yes | ./sdkmanager --licenses`. -> - Compiling AAR relies on Android SDK Build-Tools, Android SDK Platform-Tools and other Android SDK related components. If the Android SDK in the environment does not have related components, the required dependencies will be automatically downloaded during compilation. -> - When compiling the NPU operator, you need to download [DDK V500.010](https://developer.huawei.com/consumer/cn/doc/development/hiai-Library/ddk-download-0000001053590180), the directory where the compressed package is decompressed Set to the environment variable `${HWHIAI_DDK}`. + - Configure environment variables: `export JAVA_HOME=JDK path`. + - Add the bin directory to the PATH: `export PATH=${JAVA_HOME}/bin:$PATH`. + - [Android SDK](https://developer.android.com/studio?hl=zh-cn#cmdline-tools) + - Create a new directory, configure environment variables`export ANDROID_SDK_ROOT=new directory`. + - Download `SDK Tools`, create SDK through `sdkmanager`: `./sdkmanager --sdk_root=${ANDROID_SDK_ROOT} "cmdline-tools;latest"`. + - Accept the license through `sdkmanager` under the `${ANDROID_SDK_ROOT}` directory: `yes | ./sdkmanager --licenses`. ### Compilation Options -MindSpore Lite provides a compilation script `build.sh` for one-click compilation, located in the root directory of MindSpore. This script can be used to compile the code of training and inference. - -The following describes the compilation parameter of `build.sh` and the options of `mindspore/lite/CMakeLists.txt`. +The script `build.sh` in the root directory of MindSpore can be used to compile MindSpore Lite. #### The compilation parameter of `build.sh` @@ -110,7 +83,7 @@ The following describes the compilation parameter of `build.sh` and the options | -d | If this parameter is set, the debug version is compiled. Otherwise, the release version is compiled. | None | None | | -i | If this parameter is set, incremental compilation is performed. Otherwise, full compilation is performed. | None | None | | -j[n] | Sets the number of threads used during compilation. Otherwise, the number of threads is set to 8 by default. | Integer | 8 | -| -a | Whether to enable AddressSanitizer | on、off | off | +| -a | Whether to enable AddressSanitizer | on, off | off | > - When compiling the x86_64 version, if the JAVA_HOME environment variable is configured and Gradle is installed, the JAR package will be compiled at the same time. > - When the `-I` parameter changes, such as `-I x86_64` is converted to `-I arm64`, adding `-i` for parameter compilation does not take effect. @@ -120,17 +93,16 @@ The following describes the compilation parameter of `build.sh` and the options | Option | Parameter Description | Value Range | Defaults | | -------- | ----- | ---- | ---- | -| MSLITE_GPU_BACKEND | Set the GPU backend, only valid when `-I arm64` | opencl, vulkan, cuda, off | opencl | -| MSLITE_ENABLE_NPU | Whether to compile NPU operator, only valid when `-I arm64` or `-I arm32` | on、off | on | -| MSLITE_ENABLE_TRAIN | Whether to compile the training version | on、off | off | -| MSLITE_ENABLE_SSE | Whether to enable SSE instruction set, only valid when `-I x86_64` | on、off | off | -| MSLITE_ENABLE_AVX | Whether to enable AVX instruction set, only valid when `-I x86_64` | on、off | off | -| MSLITE_ENABLE_CONVERTER | Whether to compile the model conversion tool, only valid when `-I x86_64` | on、off | on | -| MSLITE_ENABLE_TOOLS | Whether to compile supporting tools | on、off | on | -| MSLITE_ENABLE_TESTCASES | Whether to compile test cases | on、off | off | +| MSLITE_GPU_BACKEND | Set the GPU backend, only opencl is valid when `-I arm64`, and only tensorrt is valid when `-I x86_64` | opencl, tensorrt, off | opencl when `-I arm64`, off when `-I x86_64` | +| MSLITE_ENABLE_NPU | Whether to compile NPU operator, only valid when `-I arm64` or `-I arm32` | on, off | on | +| MSLITE_ENABLE_TRAIN | Whether to compile the training version | on, off | on | +| MSLITE_ENABLE_SSE | Whether to enable SSE instruction set, only valid when `-I x86_64` | on, off | off | +| MSLITE_ENABLE_AVX | Whether to enable AVX instruction set, only valid when `-I x86_64` | on, off | off | +| MSLITE_ENABLE_CONVERTER | Whether to compile the model conversion tool, only valid when `-I x86_64` | on, off | on | +| MSLITE_ENABLE_TOOLS | Whether to compile supporting tools | on, off | on | +| MSLITE_ENABLE_TESTCASES | Whether to compile test cases | on, off | off | > - The above options can be modified by setting the environment variable with the same name or the file `mindspore/lite/CMakeLists.txt`. -> - Enabling MSLITE_ENABLE_TRAIN only generates the training version. > - After modifying the Option, adding the `-i` parameter for incremental compilation will not take effect. ### Compilation Example @@ -168,142 +140,66 @@ Then, run the following commands in the root directory of the source code to com bash build.sh -A on -j32 ``` -### Inference Output Description +Finally, the following files will be generated in the `output/` directory: -After the compilation is complete, go to the `mindspore/output` directory of the source code to view the file generated after compilation. The file is divided into the following parts. +- `mindspore-lite-{version}-{os}-{arch}.tar.gz`: Contains runtime, and related tools. -- `mindspore-lite-{version}-inference-{os}-{arch}.tar.gz`: Contains model inference framework runtime (cpp), and related tools. -- `mindspore-lite-maven-{version}.zip`: Contains model reasoning framework AAR package. +- `mindspore-lite-maven-{version}.zip`: The AAR package which contains runtime (java). > - version: Version of the output, consistent with that of the MindSpore. > - os: Operating system on which the output will be deployed. > - arch: System architecture on which the output will be deployed. -Execute the decompression command to obtain the compiled output: - -```bash -tar -xvf mindspore-lite-{version}-inference-{os}-{arch}.tar.gz -unzip mindspore-lite-maven-{version}.zip -``` - -#### Description of Converter's Directory Structure - -The conversion tool is only available under the `-I x86_64` compilation option, and the content includes the following parts: - -```text -mindspore-lite-{version}-inference-linux-x64 -└── tools - └── converter - ├── include - │ └── registry # Header files of customized op, parser and pass registration - ├── converter # Model conversion tool - │ └── converter_lite # Executable program - └── lib # The dynamic link library that converter depends - ├── libglog.so.0 # Dynamic library of Glog - └── libmslite_converter_plugin.so # Dynamic library of plugin registry -``` - -#### Description of CodeGen's Directory Structure - -The codegen executable program is only available under the `-I x86_64` compilation option, and only the operator library required by the inference code generated by codegen is generated under the `-I arm64` and `-I arm32` compilation options. - -- When the compilation option is `-I x86_64`: - - ```text - mindspore-lite-{version}-inference-linux-x64 - └── tools - └── codegen # Code generation tool - ├── codegen # Executable program - ├── include # Header files of inference framework - │ ├── nnacl # nnacl operator header file - │ └── wrapper - ├── lib - │ └── libwrapper.a # MindSpore Lite CodeGen generates code dependent operator static library - └── third_party - ├── include - │ └── CMSIS # ARM CMSIS NN operator header files - └── lib - └── libcmsis_nn.a # ARM CMSIS NN operator static library - ``` - -- When the compilation option is `-I arm64` or `-I arm32`: - - ```text - mindspore-lite-{version}-inference-android-{arch} - └── tools - └── codegen # Code generation tool - └── operator_library # Operator library - ├── include # Header files of inference framework - │ ├── nnacl # nnacl operator header file - │ └── wrapper - └── lib # Inference framework library - └── libwrapper.a # MindSpore Lite CodeGen generates code dependent static library - ``` - -#### Description of Obfuscator's Directory Structure - -The obfuscation tool is only available under the `-I x86_64` compilation option and the `ENABLE_MODEL_OBF` compilation option in `mindspore/mindspore/lite/CMakeLists.txt` is turned on, the content includes the following parts: - -```text -mindspore-lite-{version}-inference-linux-x64 -└── tools - └── obfuscator # Model obfuscation tool - └── msobfuscator # Executable program -``` - -#### Description of Runtime and Other tools' Directory Structure - -The inference framework can be obtained under `-I x86_64`, `-I arm64` and `-I arm32` compilation options, and the content includes the following parts: +### Directory Structure - When the compilation option is `-I x86_64`: ```text - mindspore-lite-{version}-inference-linux-x64 - ├── inference - │ ├── include # Header files of inference framework - │ │ └── registry # Header files of customized op registration - │ └── lib # Inference framework library - │ ├── libminddata-lite.so # The files of image processing dynamic library - │ ├── libmindspore-lite.a # Static library of inference framework in MindSpore Lite - │ ├── libmindspore-lite-jni.so # Dynamic library of inference framework jni in MindSpore Lite - │ ├── libmindspore-lite.so # Dynamic library of inference framework in MindSpore Lite - │ ├── libmsdeobfuscator-lite.so # The files of obfuscated model loading dynamic library, need to open the `ENABLE_MODEL_OBF` option. - │ └── mindspore-lite-java.jar # Jar of inference framework in MindSpore Lite + mindspore-lite-{version}-linux-x64 + ├── runtime + │ ├── include + │ ├── lib + │ │ ├── libminddata-lite.a # Static library of image processing + │ │ ├── libminddata-lite.so # Dynamic library of image processing + │ │ ├── libmindspore-lite.a # Static library of inference framework in MindSpore Lite + │ │ ├── libmindspore-lite-jni.so # Dynamic library of inference framework jni in MindSpore Lite + │ │ ├── libmindspore-lite.so # Dynamic library of inference framework in MindSpore Lite + │ │ ├── libmindspore-lite-train.a # Static library of training framework in MindSpore Lite + │ │ ├── libmindspore-lite-train.so # Dynamic library of training framework in MindSpore Lite + │ │ ├── libmsdeobfuscator-lite.so # The files of obfuscated model loading dynamic library, need to open the `ENABLE_MODEL_OBF` option. + │ │ └── mindspore-lite-java.jar # Jar of inference framework in MindSpore Lite + │ └── third_party + │ └── libjpeg-turbo └── tools - ├── benchmark # Benchmarking tool - │ └── benchmark # Executable program - ├── codegen # Code generation tool - │ ├── codegen # Executable program - │ ├── include # operator header file - │ ├── lib # operator static library - │ └── third_party # ARM CMSIS NN static library - ├── converter # Model conversion tool - ├── obfuscator # Model obfuscation tool - └── cropper # Static library crop tool - ├── cropper # Executable file of static library crop tool - └── cropper_mapping_cpu.cfg # Crop cpu library related configuration files + ├── benchmark # Benchmarking tool + ├── benchmark_train # Training model benchmark tool + ├── codegen # Code generation tool + ├── converter # Model conversion tool + ├── obfuscator # Model obfuscation tool + └── cropper # Static library crop tool ``` - When the compilation option is `-I arm64` or `-I arm32`: ```text - mindspore-lite-{version}-inference-android-{arch} - ├── inference - │ ├── include # Header files of inference framework - │ │ └── registry # Header files of customized op registration - │ ├── lib # Inference framework library - │ │ ├── libminddata-lite.so # The files of image processing dynamic library - │ │ ├── libmindspore-lite.a # Static library of inference framework in MindSpore Lite - │ │ ├── libmindspore-lite.so # Dynamic library of inference framework in MindSpore Lite - │ │ └── libmsdeobfuscator-lite.so # The files of obfuscated model loading dynamic library, need to open the `ENABLE_MODEL_OBF` option. + mindspore-lite-{version}-android-{arch} + ├── runtime + │ ├── include + │ ├── lib + │ │ ├── libminddata-lite.a # Static library of image processing + │ │ ├── libminddata-lite.so # Dynamic library of image processing + │ │ ├── libmindspore-lite.a # Static library of inference framework in MindSpore Lite + │ │ ├── libmindspore-lite.so # Dynamic library of inference framework in MindSpore Lite + │ │ ├── libmindspore-lite-train.a # Static library of training framework in MindSpore Lite + │ │ └── libmindspore-lite-train.so # Dynamic library of training framework in MindSpore Lite + │ │ └── libmsdeobfuscator-lite.so # The files of obfuscated model loading dynamic library, need to open the `ENABLE_MODEL_OBF` option. │ └── third_party - │ └── hiai_ddk # NPU library, only exists in arm64 package + │ ├── hiai_ddk + │ └── libjpeg-turbo └── tools - ├── benchmark # Benchmarking tool - │ └── benchmark - └── codegen # Code generation tool - ├── include # operator header file - └── lib # operator static library + ├── benchmark # Benchmarking tool + ├── benchmark_train # Training model benchmark tool + └── codegen # Code generation tool ``` - When the compilation option is `-A on`: @@ -316,68 +212,6 @@ The inference framework can be obtained under `-I x86_64`, `-I arm64` and `-I ar └── mindspore-lite-{version}.aar # MindSpore Lite runtime aar ``` -### Training Output Description - -If the MSLITE_ENABLE_TRAIN option is turned on, the training Runtime and related tools will be generated, as follows: - -- `mindspore-lite-{version}-train-{os}-{arch}.tar.gz`: Contains model training framework and related tool. - -> - version: Version of the output, consistent with that of the MindSpore. -> - os: Operating system on which the output will be deployed. -> - arch: System architecture on which the output will be deployed. - -Execute the decompression command to obtain the compiled output: - -```bash -tar -xvf mindspore-lite-{version}-train-{os}-{arch}.tar.gz -``` - -#### Description of Training Runtime and Related Tools' Directory Structure - -The MindSpore Lite training framework can be obtained under `-I x86_64`, `-I arm64` and `-I arm32` compilation options, and the content includes the following parts: - -- When the compilation option is `-I x86_64`: - - ```text - mindspore-lite-{version}-train-linux-x64 - ├── tools - │ ├── benchmark_train # Training model benchmark tool - │ ├── converter # Model conversion tool - │ └── cropper # Static library crop tool - │ ├── cropper # Executable file of static library crop tool - │ └── cropper_mapping_cpu.cfg # Crop cpu library related configuration files - └── train - ├── include # Header files of training framework - │ └── registry # Header files of customized op registration - ├── lib # Inference framework library - │ ├── libminddata-lite.so # The files of image processing dynamic library - │ ├── libmindspore-lite-jni.so # Dynamic library of training framework jni in MindSpore Lite - │ ├── libmindspore-lite-train.a # Static library of training framework in MindSpore Lite - │ ├── libmindspore-lite-train.so # Dynamic library of training framework in MindSpore Lite - │ └── mindspore-lite-java.jar # Jar of inference framework in MindSpore Lite - └── third_party - └── libjpeg-turbo - ``` - -- When the compilation option is `-I arm64` or `-I arm32`: - - ```text - mindspore-lite-{version}-train-android-{arch} - ├── tools - │ ├── benchmark # Benchmarking tool - │ ├── benchmark_train # Training model benchmark tool - └── train - ├── include # Header files of training framework - │ └── registry # Header files of customized op registration - ├── lib # Training framework library - │ ├── libminddata-lite.so # The files of image processing dynamic library - │ ├── libmindspore-lite-train.a # Static library of training framework in MindSpore Lite - │ └── libmindspore-lite-train.so # Dynamic library of training framework in MindSpore Lite - └── third_party - ├── hiai_ddk # NPU library, only exists in arm64 package - └── libjpeg-turbo - ``` - ## Windows Environment Compilation ### Environment Requirements @@ -393,13 +227,27 @@ The MindSpore Lite training framework can be obtained under `-I x86_64`, `-I arm ### Compilation Options -The compilation options of MindSpore Lite are as follows: +The script `build.bat` in the root directory of MindSpore can be used to compile MindSpore Lite. + +#### The compilation parameter of `build.bat` | Parameter | Parameter Description | Mandatory or Not | | -------- | ----- | ---- | | lite | Set this parameter to compile the MindSpore Lite project. | Yes | | [n] | Set the number of threads used during compilation, otherwise the default is set to 6 threads. | No | +#### The options of `mindspore/lite/CMakeLists.txt` + +| Option | Parameter Description | Value Range | Defaults | +| -------- | ----- | ---- | ---- | +| MSLITE_ENABLE_SSE | Whether to enable SSE instruction set | on, off | off | +| MSLITE_ENABLE_AVX | Whether to enable AVX instruction set | on, off | off | +| MSLITE_ENABLE_CONVERTER | Whether to compile the model conversion tool | on, off | on | +| MSLITE_ENABLE_TOOLS | Whether to compile supporting tools | on, off | on | +| MSLITE_ENABLE_TESTCASES | Whether to compile test cases | on, off | off | + +> - The above options can be modified by setting the environment variable with the same name or the file `mindspore/lite/CMakeLists.txt`. + ### Compilation Example First, use the git tool to download the source code from the MindSpore code repository. @@ -422,29 +270,18 @@ call build.bat lite call build.bat lite 8 ``` -### Output Description - -After the compilation is complete, go to the `mindspore/output` directory of the source code to view the file generated after compilation. The file is divided into the following parts. +Finally, the following files will be generated in the `output/` directory: -- `mindspore-lite-{version}-inference-win-x64.zip`: Contains model inference framework and related tool. +- `mindspore-lite-{version}-win-x64.zip`: Contains model inference framework and related tool. > version: Version of the output, consistent with that of the MindSpore. -Execute the decompression command to obtain the compiled output: - -```bat -unzip mindspore-lite-{version}-inference-win-x64.zip -``` - -#### Description of Runtime and Related Tools' Directory Structure - -The content includes the following parts: +### Directory Structure ```text -mindspore-lite-{version}-inference-win-x64 -├── inference -│ ├── include # Header files of inference framework -│   │ └── registry # Header files of customized op registration +mindspore-lite-{version}-win-x64 +├── runtime +│ ├── include │ └── lib │ ├── libgcc_s_seh-1.dll # Dynamic library of MinGW │ ├── libmindspore-lite.a # Static library of inference framework in MindSpore Lite @@ -455,50 +292,103 @@ mindspore-lite-{version}-inference-win-x64 │ └── libwinpthread-1.dll # Dynamic library of MinGW └── tools ├── benchmark # Benchmarking tool - │ └── benchmark.exe # Executable program └── converter # Model conversion tool - ├── include - │ └── registry # Header files of customized op, parser and pass registration - ├── converter - │ └── converter_lite.exe # Executable program - └── lib - ├── libgcc_s_seh-1.dll # Dynamic library of MinGW - ├── libglog.dll # Dynamic library of Glog - ├── libmslite_converter_plugin.dll # Dynamic library of plugin registry - ├── libmslite_converter_plugin.dll.a # Link file of Dynamic library of plugin registry - ├── libssp-0.dll # Dynamic library of MinGW - ├── libstdc++-6.dll # Dynamic library of MinGW - └── libwinpthread-1.dll # Dynamic library of MinGW ``` > Currently, MindSpore Lite is not supported on Windows. -## Docker Environment Compilation +## macOS Environment Compilation -### Environmental Preparation +### Environment Requirements -#### Download the docker image +- System environment: macOS 10.15.4 and above ; 64-bit. -```bash -docker pull swr.cn-south-1.myhuaweicloud.com/mindspore-build/mindspore-lite:ubuntu18.04.2-20210530 -``` +- Compilation dependencies are: + - [CMake](https://cmake.org/download/) >= 3.18.3 + - [Xcode](https://developer.apple.com/xcode/download/cn) == 11.4.1 + - [Git](https://git-scm.com/downloads) >= 2.28.0 + +> - The compilation script will execute `git clone` to obtain the code of the third-party dependent libraries. + +### Compilation Options + +The script `build.sh` in the root directory of MindSpore can be used to compile MindSpore Lite. -> - Before downloading the image, please make sure docker has been installed. -> - Docker image does not currently support Windows version compilation. -> - Third-party libraries that compile dependencies have been installed in the image and environment variables have been configured. +#### The compilation parameter of `build.sh` -#### Create a container +| Parameter | Parameter Description | Value Range | Defaults | +| -------- | ----- | ---- | ---- | +| -I | Selects an applicable architecture. | arm64, arm32 | None | +| -j[n] | Sets the number of threads used during compilation. Otherwise, the number of threads is set to 8 by default. | Integer | 8 | + +### Compilation Example + +First, use the git tool to download the source code from the MindSpore code repository. ```bash -docker run -tid --net=host --name=docker01 swr.cn-south-1.myhuaweicloud.com/mindspore-build/mindspore-lite:ubuntu18.04.2-20210530 +git clone https://gitee.com/mindspore/mindspore.git ``` -#### Enter the container +Then, use the cmd tool to compile MindSpore Lite in the root directory of the source code and execute the following commands. -```bash -docker exec -ti -u 0 docker01 bash +- Compile the ARM64 architecture version + + ```bash + bash build.sh -I arm64 -j8 + ``` + +- Compile the ARM32 architecture version + + ```bash + bash build.sh -I arm32 -j8 + ``` + +Finally, the following files will be generated in the `output/` directory: + +- `mindspore-lite-{version}-{os}-{arch}.tar.gz`: Contains model inference framework. + +> - version: Version of the output, consistent with that of the MindSpore. +> - os: Operating system on which the output will be deployed. +> - arch: System architecture on which the output will be deployed. + +### Directory Structure + +```text +mindspore-lite.framework +└── runtime + ├── Headers # 推理框架头文件 + ├── Info.plist # 配置文件 + └── mindspore-lite # 静态库 ``` +> Currently, MindSpore Lite Train and converter are not supported on macOS. + +## Docker Environment Compilation + +### Environmental Preparation + +- Download the docker image + + ```bash + docker pull swr.cn-south-1.myhuaweicloud.com/mindspore-build/mindspore-lite:ubuntu18.04.2-20210530 + ``` + + > - Before downloading the image, please make sure docker has been installed. + > - Docker image does not currently support Windows version compilation. + > - Third-party libraries that compile dependencies have been installed in the image and environment variables have been configured. + +- Create a container + + ```bash + docker run -tid --net=host --name=docker01 swr.cn-south-1.myhuaweicloud.com/mindspore-build/mindspore-lite:ubuntu18.04.2-20210530 + ``` + +- Enter the container + + ```bash + docker exec -ti -u 0 docker01 bash + ``` + ### Compilation Options Refer to [Linux Environment Compilation](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#linux-environment-compilation) @@ -507,6 +397,6 @@ Refer to [Linux Environment Compilation](https://www.mindspore.cn/tutorial/lite/ Refer to [Linux Environment Compilation](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#linux-environment-compilation) -### Output Description +### Directory Structure Refer to [Linux Environment Compilation](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#linux-environment-compilation) diff --git a/tutorials/lite/source_en/use/converter_tool.md b/tutorials/lite/source_en/use/converter_tool.md index 1acc5e2a2698e6b39997cc94f39845efd03f936b..959b08e9a9c90005c5c247f482347d3719ceb7cc 100644 --- a/tutorials/lite/source_en/use/converter_tool.md +++ b/tutorials/lite/source_en/use/converter_tool.md @@ -8,10 +8,12 @@ - [Overview](#overview) - [Linux Environment Instructions](#linux-environment-instructions) - [Environment Preparation](#environment-preparation) + - [Directory Structure](#directory-structure) - [Parameter Description](#parameter-description) - [Example](#example) - [Windows Environment Instructions](#windows-environment-instructions) - [Environment Preparation](#environment-preparation-1) + - [Directory Structure](#directory-structure-1) - [Parameter Description](#parameter-description-1) - [Example](#example-1) @@ -43,6 +45,19 @@ To use the MindSpore Lite model conversion tool, you need to prepare the environ ${PACKAGE_ROOT_PATH} is the decompressed package path obtained by compiling or downloading. +### Directory Structure + +```text +mindspore-lite-{version}-linux-x64 +└── tools + └── converter + ├── include + ├── converter # Model conversion tool + │ └── converter_lite # Executable program + └── lib # The dynamic link library that converter depends + ├── libglog.so.0 # Dynamic library of Glog +``` + ### Parameter Description MindSpore Lite model conversion tool provides multiple parameters. @@ -61,12 +76,29 @@ The following describes the parameters in detail. | `--bitNum=` | No | Sets the quantization bitNum when quantType is set as WeightQuant, now supports 1 bit to 16 bit quantization. | \[1, 16] | 8 | | `--quantWeightSize=` | No | Sets a size threshold of convolution filter when quantType is set as WeightQuant. If the size is bigger than this value, it will trigger weight quantization. | \[0, +∞) | 0 | | `--quantWeightChannel=` | No | Sets a channel number threshold of convolution filter when quantType is set as WeightQuant. If the number is bigger than this, it will trigger weight quantization. | \[0, +∞) | 16 | -| `--configFile=` | No | 1) Profile path of calibration dataset when quantType is set as PostTraining. 2) Profile path of converter. It contains the parameters `plugin_path` and `disable_fusion`, both of which are optional. The former one is the third-party library path. If there are more than one, please use `;` to separate. The default value of the latter one is `off`. Fusion optimization will be turned off when the value is set to `on`. | - | - | +| `--configFile=` | No | 1) Profile path of calibration dataset when quantType is set as PostTraining; 2) Profile path of converter. | - | - | +| `--fp16=` | No | Serialize const tensor in Float16 data type, only effective for const tensor in Float32 data type. | on or off | off | +| `--inputShape=` | No | Set the dimension of the model input, the default is the same as the input of the original model. The model can be further optimized in some scenarios, such as models with shape operator, but the output model will lose the feature of dymatic shape. e.g. inTensorName: 1,32,32,4 | - | - | > - The parameter name and parameter value are separated by an equal sign (=) and no space is allowed between them. > - The Caffe model is divided into two files: model structure `*.prototxt`, corresponding to the `--modelFile` parameter; model weight `*.caffemodel`, corresponding to the `--weightFile` parameter. > - In order to ensure the accuracy of weight quantization, the "--bitNum" parameter should better be set to a range from 8bit to 16bit. > - PostTraining method currently only supports activation quantization and weight quantization in 8 bit. +> - The priority of `--fp16` option is very low. For example, if quantization is enabled, `--fp16` will no longer take effect on const tensors that have been quantized. All in all, this option only takes effect on const tensors of Float32 when serializing model. + +The calibration dataset configuration file uses the `key=value` mode to define related parameters. The `key` to be configured is as follows: + +| Parameter Name | Attribute | Function Description | Parameter Type | Default Value | Value Range | +| -------- | ------- | ----- | ----- | ----- | ----- | +| image_path | Mandatory for full quantization | Directory for storing a calibration dataset. If a model has multiple inputs, enter directories where the corresponding data is stored in sequence. Use commas (,) to separate them. | String | - | The directory stores the input data that can be directly used for inference. Since the current framework does not support data preprocessing, all data must be converted in advance to meet the input requirements of inference. | +| batch_count | Optional | Number of used inputs | Integer | 100 | (0, +∞) | +| method_x | Optional | Input and output data quantization algorithms at the network layer | String | KL | KL, MAX_MIN, or RemovalOutlier.
KL: quantizes and calibrates the data range based on [KL divergence](http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf).
MAX_MIN: data quantization parameter computed based on the maximum and minimum values.
RemovalOutlier: removes the maximum and minimum values of data based on a certain proportion and then calculates the quantization parameters.
If the calibration dataset is consistent with the input data during actual inference, MAX_MIN is recommended. If the noise of the calibration dataset is large, KL or RemovalOutlier is recommended. +| thread_num | Optional | Number of threads used when the calibration dataset is used to execute the inference process | Integer | 1 | (0, +∞) | +| bias_correction | Optional | Indicate whether to correct the quantization error. | Boolean | false | True or false. After this parameter is enabled, the accuracy of the converted model can be improved. You are advised to set this parameter to true. | +| plugin_path | Optional | Third-party library path | String | - | If there are more than one, please use `;` to separate. | +| disable_fusion | Optional | Indicate whether to correct the quantization error | String | off | off or on. | + +### Example The following describes how to use the conversion command by using several common examples. @@ -138,6 +170,23 @@ To use the MindSpore Lite model conversion tool, the following environment prepa %PACKAGE_ROOT_PATH% is the decompressed package path obtained by compiling or downloading. +### Directory Structure + +```text +mindspore-lite-{version}-win-x64 +└── tools + └── converter # Model conversion tool + ├── include + ├── converter + │ └── converter_lite.exe # Executable program + └── lib + ├── libgcc_s_seh-1.dll # Dynamic library of MinGW + ├── libglog.dll # Dynamic library of Glog + ├── libssp-0.dll # Dynamic library of MinGW + ├── libstdc++-6.dll # Dynamic library of MinGW + └── libwinpthread-1.dll # Dynamic library of MinGW +``` + ### Parameter Description Refer to the Linux environment model conversion tool [parameter description](https://www.mindspore.cn/tutorial/lite/en/master/use/converter_tool.html#parameter-description). diff --git a/tutorials/lite/source_en/use/converter_train.md b/tutorials/lite/source_en/use/converter_train.md index 1ffe6d36a050f01946a1e5d3b18c82159f4c26dc..416f7857e7a189dc19f1d6ecde788ec42da0761b 100644 --- a/tutorials/lite/source_en/use/converter_train.md +++ b/tutorials/lite/source_en/use/converter_train.md @@ -49,6 +49,10 @@ The table below shows the parameters used in the MindSpore Lite model training t | `--modelFile=` | yes | Path of the input model. | - | - | | `--outputFile=` | yes | Path of the output model. The suffix `.ms` can be automatically generated. | - | - | | `--trainModel=true` | yes | Training on Device or not | true, false | false | +| `--quantType=` | No | Sets the quantization type of the model. | WeightQuant: this quantType is only supported while use litetraining | - | +| `--bitNum=` | No | Sets the quantization bitNum when quantType is set as WeightQuant, now supports 1 bit to 16 bit quantization. | \[1, 16] | 8 | +| `--quantWeightSize=` | No | Sets a size threshold of convolution filter when quantType is set as WeightQuant. If the size is bigger than this value, it will trigger weight quantization. | \[0, +∞) | 0 | +| `--quantWeightChannel=` | No | Sets a channel number threshold of convolution filter when quantType is set as WeightQuant. If the number is bigger than the channel number, it will trigger weight quantization. | \[0, +∞) | 16 | > The parameter name and parameter value are separated by an equal sign (=) and no space is allowed between them. diff --git a/tutorials/lite/source_en/use/cropper_tool.md b/tutorials/lite/source_en/use/cropper_tool.md index 42e894e8b47fd6d0a687f664fcc3d693384f14a8..749dce795b1be2c4c40aadd497fb5610ae1c8ae0 100644 --- a/tutorials/lite/source_en/use/cropper_tool.md +++ b/tutorials/lite/source_en/use/cropper_tool.md @@ -58,7 +58,7 @@ The Cropper tool obtains the operator list by parsing the `ms` model, and crop t - Pass in the `ms` model through the folder, and pass the folder path where the model file is located to the `modelFolderPath` parameter to crop the `libmindspore-lite.a` static library of arm64-cpu. ```bash -./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/inference/lib/libmindspore-lite.a --configFile=./cropper_mapping_cpu.cfg --modelFolderPath=/model --outputFile=/mindspore-lite/lib/libmindspore-lite.a +./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/runtime/lib/libmindspore-lite.a --configFile=./cropper_mapping_cpu.cfg --modelFolderPath=/model --outputFile=/mindspore-lite/lib/libmindspore-lite.a ``` This example will read all the `ms` models contained in the `/model` folder, crop the `libmindspore-lite.a` static library of arm64-cpu, and the cropped `libmindspore-lite.a` static library will be saved to `/mindspore-lite/lib/` directory. @@ -66,7 +66,7 @@ This example will read all the `ms` models contained in the `/model` folder, cro - Pass in the `ms` model by file, pass the path where the model file is located to the `modelFile` parameter, and crop the `libmindspore-lite.a` static library of arm64-cpu. ```bash -./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/inference/lib/libmindspore-lite.a --configFile=./cropper_mapping_cpu.cfg --modelFile=/model/lenet.ms,/model/retinaface.ms --outputFile=/mindspore-lite/lib/libmindspore-lite.a +./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/runtime/lib/libmindspore-lite.a --configFile=./cropper_mapping_cpu.cfg --modelFile=/model/lenet.ms,/model/retinaface.ms --outputFile=/mindspore-lite/lib/libmindspore-lite.a ``` In this example, the `libmindspore-lite.a` static library of arm64-cpu will be cropped according to the `ms` model passed by `modelFile`, and the cropped `libmindspore-lite.a` static library will be saved to `/mindspore-lite/lib/` directory. @@ -74,7 +74,7 @@ In this example, the `libmindspore-lite.a` static library of arm64-cpu will be c - Pass in the `ms` model through the folder, and pass the folder path where the model file is located to the `modelFolderPath` parameter to crop the `libmindspore-lite.a` static library of arm64-gpu. ```bash -./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/inference/lib/libmindspore-lite.a --configFile=./cropper_mapping_gpu.cfg --modelFolderPath=/model --outputFile=/mindspore-lite/lib/libmindspore-lite.a +./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/runtime/lib/libmindspore-lite.a --configFile=./cropper_mapping_gpu.cfg --modelFolderPath=/model --outputFile=/mindspore-lite/lib/libmindspore-lite.a ``` This example will read all the `ms` models contained in the `/model` folder, crop the `libmindspore-lite.a` static library of arm64-gpu, and the cropped `libmindspore-lite.a` static library will be saved to `/mindspore-lite/lib/` directory. @@ -82,7 +82,7 @@ This example will read all the `ms` models contained in the `/model` folder, cro - Pass in the `ms` model by file, pass the path where the model file is located to the `modelFile` parameter, and crop the `libmindspore-lite.a` static library of arm64-gpu. ```bash -./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/inference/lib/libmindspore-lite.a --configFile=./cropper_mapping_gpu.cfg --modelFile=/model/lenet.ms,/model/retinaface.ms --outputFile=/mindspore-lite/lib/libmindspore-lite.a +./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/runtime/lib/libmindspore-lite.a --configFile=./cropper_mapping_gpu.cfg --modelFile=/model/lenet.ms,/model/retinaface.ms --outputFile=/mindspore-lite/lib/libmindspore-lite.a ``` In this example, the `libmindspore-lite.a` static library of arm64-gpu will be cropped according to the `ms` model passed by `modelFile`, and the cropped `libmindspore-lite.a` static library will be saved to `/mindspore-lite/lib/` directory. \ No newline at end of file diff --git a/tutorials/lite/source_en/use/micro.md b/tutorials/lite/source_en/use/micro.md index 455da3529ea5b0d331edccb778f6a0a535f6d691..bd5cd932d6f7181018df535b515c6aba851a814d 100644 --- a/tutorials/lite/source_en/use/micro.md +++ b/tutorials/lite/source_en/use/micro.md @@ -7,9 +7,11 @@ - [Perform Inference on the Microcontroller](#perform-inference-on-the-microcontroller) - [Overview](#overview) - [Obtaining CodeGen](#obtaining-codeGen) + - [Directory Structure](#directory-structure) - [Parameter Description](#parameter-description) - [Instructions](#instructions) - - [Using CodeGen to Perform inference on STM Boards](#perform-inference-on-the-stm-microcontroller) + - [Using CodeGen to Perform inference on STM Boards](#using-codegen-to-perform-inference-on-stm-boards) + - [Using CodeGen to Perform inference on Open HarmonyOS](#using-codegen-to-perform-inference-on-stm-boards) - [More Details](#more-details) @@ -37,6 +39,25 @@ You can obtain CodeGen by any of the following ways: > Currently the code generator is only available on Linux x86_64. +## Directory Structure + +```text +mindspore-lite-{version}-linux-x64 +└── tools + └── codegen # Code generation tool + ├── codegen # Executable program + ├── include # Header files of inference framework + │ ├── nnacl # nnacl operator header file + │ └── wrapper + ├── lib + │ └── libwrapper.a # MindSpore Lite CodeGen generates code dependent operator static library + └── third_party + ├── include + │ └── CMSIS # ARM CMSIS NN operator header files + └── lib + └── libcmsis_nn.a # ARM CMSIS NN operator static library +``` + ## Parameter Description Here is the detailed description of parameters: @@ -280,6 +301,97 @@ load # load executable to board c # perform model inference ``` +## Using CodeGen to Perform inference on Open HarmonyOS + +1. For the environment preparation, please refer to [harmonyos quick start](https://device.harmonyos.com/en/docs/start/introduce/oem_minitinier_environment_lin-0000001105407498), including gn/ninja/llvm. + +2. For Hardware environment preparation, please refer to the harmonyos quick start [How to Develop](https://device.harmonyos.com/en/docs/start/introduce/oem_development_eq_3516-0000001105829366) of board Hi3516 as example. + +3. The project file structure needed to be organized is shown below: + + ```text + ├── benchmark + ├── CMakeLists.txt + ├── BUILD.gn # project management file + └── src + ``` + +4. Use CodeGen and input the `*.ms` file of [mobilebetv3 model](https://download.mindspore.cn/model_zoo/official/lite/mnist_lite/mnist.ms) to automatically generate the inference code on harmonyos. + + ```bash + ./codegen --modelPath=./mobilenetv3.ms --codePath=./ --target=ARM32A + ``` + +4. Edit gn file: + + ```text + import("//build/lite/config/component/lite_component.gni") + import("//build/lite/ndk/ndk.gni") + + lite_component("mobilenetV3_benchmark") { + target_type = "executable" + sources = [ + "benchmark/benchmark.cc", + "benchmark/load_input.c", + "benchmark/calib_output.cc", + "src/net.c", + "src/weight.c", + "src/session.cc", + "src/tensor.cc", + ] + + features = [] + + include_dirs = [ + "//foundation/ai/engine/test/mindspore_benchmark", + "//foundation/ai/engine/test/mindspore_benchmark/include", + "//foundation/ai/engine/test/mindspore_benchmark/mobilenetV3/benchmark", + "//foundation/ai/engine/test/mindspore_benchmark/mobilenetV3/src", + ] + + ldflags = [ + "-fno-strict-aliasing", + "-Wall", + "-pedantic", + "-std=gnu99", + ] + + libs = [ + "../lib/libmindspore-lite.a", + "../lib/libwrapper.a", + ] + + defines = [ "NOT_USE_STL" ] + defines += [ "ENABLE_NEON" ] + defines += [ "ENABLE_ARM" ] + defines += [ "ENABLE_ARM32" ] + + cflags = [ + "-fno-strict-aliasing", + "-Wall", + "-pedantic", + "-std=gnu99", + ] + } + ``` + +5. Compile benchmark and execute the benchmark, the result is as follows: + + ```text + ReadWeightData time: 0.00000ms + input 0: mobilenetV3_input.bin + ReadInputData time: 0.00000ms + + loop count:3 + total time: 756.13397ms, per time: 252.04466ms + + outputs: + name: Reshape-110, DataType: 43, Elements: 1001, Shape: [1 1001 ], Data: + -0.583575, -0.359817, 0.536744, -1.843612, -0.849360, 0.147853, 0.402617, -1.016975, 0.737295, 1.312937 + ===========run success======== + total end to end time: 2124.91895ms + ``` + ## More Details ### [Linux_x86_64 platform compile and deploy](https://gitee.com/mindspore/mindspore/tree/master/mindspore/lite/micro/example/mnist_x86) diff --git a/tutorials/lite/source_en/use/nnie.md b/tutorials/lite/source_en/use/nnie.md new file mode 100644 index 0000000000000000000000000000000000000000..b4d1ededb6e5e9305b67dd6dc682959f9ec14e71 --- /dev/null +++ b/tutorials/lite/source_en/use/nnie.md @@ -0,0 +1,5 @@ +# NNIE Usage Instruction + +No English version available right now, welcome to contribute. + + \ No newline at end of file diff --git a/tutorials/lite/source_en/use/npu_info.md b/tutorials/lite/source_en/use/npu_info.md index e77c095625306e8e65ceb726225e087d7f04cfd2..7532beda5c902026d9ee75a283a1f8175009b846 100644 --- a/tutorials/lite/source_en/use/npu_info.md +++ b/tutorials/lite/source_en/use/npu_info.md @@ -33,7 +33,8 @@ It will build MindSpore Lite's package under the output directory under the Mind which contains the NPU's dynamic library, the libmindspore-lite dynamic library, and the test tool Benchmark. ```bash -bash build.sh -I arm64 -e npu +export MSLITE_ENABLE_NPU=ON +bash build.sh -I arm64 -j8 ``` For more information about compilation, see [Linux Environment Compilation](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#linux-environment-compilation). @@ -51,7 +52,7 @@ For more information about compilation, see [Linux Environment Compilation](http For example, ```bash - export LD_LIBRARY_PATH=mindspore-lite-{version}-inference-android-{arch}/inference/third_party/hiai_ddk/lib/:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=mindspore-lite-{version}-android-{arch}/runtime/third_party/hiai_ddk/lib/:$LD_LIBRARY_PATH ``` For more information about compilation, please refer to [Compilation Output](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#description-of-runtime-and-other-tools-directory-structure) @@ -59,10 +60,7 @@ For more information about compilation, see [Linux Environment Compilation](http - Using Benchmark testing NPU inference - Users can also test NPU inference using MindSpore Lite's Benchmark tool. -For the Benchmark tool location, see [Compilation Output](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#description-of-runtime-and-other-tools-directory-structure). -Pass the build package to the `/data/local/tmp/` directory of an Android phone equipped with NPU chips -(For supported NPU chips, see [Chipset Platforms and Supported HUAWEI HiAI Versions](https://developer.huawei.com/consumer/en/doc/development/hiai-Guides/mapping-relationship-0000001052830507#ZH-CN_TOPIC_0000001052830507__section94427279718).)and test NPU inference using the Benchmark tool + Users can also test NPU inference using MindSpore Lite's Benchmark tool. Pass the build package to the `/data/local/tmp/` directory of an Android phone equipped with NPU chips and test NPU inference using the Benchmark tool on the phone, as shown in the example below: - Test performance @@ -80,10 +78,8 @@ on the phone, as shown in the example below: For more information about the use of Benchmark, see [Benchmark Use](https://www.mindspore.cn/tutorial/lite/en/master/use/benchmark_tool.html). For environment variable settings, you need to set the directory where the libmindspore-lite.so -(under the directory `mindspore-lite-{version}-inference-android-{arch}/inference/lib`) and NPU libraries -(under the directory `mindspore-lite-{version}-inference-android-{arch}/inference/third_party/hiai_ddk/lib/`) are located, to `${LD_LIBRARY_PATH}`. -The directory is specified in [Compilation Output](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#description-of-runtime-and-other-tools-directory-structure) -with compilation option `-I arm64` or `-I arm32`. +(under the directory `mindspore-lite-{version}-android-{arch}/runtime/lib`) and NPU libraries +(under the directory `mindspore-lite-{version}-android-{arch}/runtime/third_party/hiai_ddk/lib/`) are located, to `${LD_LIBRARY_PATH}`. ## Supported Chips diff --git a/tutorials/lite/source_en/use/obfuscator_tool.md b/tutorials/lite/source_en/use/obfuscator_tool.md new file mode 100644 index 0000000000000000000000000000000000000000..d7f5e0060e6174be6fccee922ae6d9f8f2665184 --- /dev/null +++ b/tutorials/lite/source_en/use/obfuscator_tool.md @@ -0,0 +1,5 @@ +# Model Obfuscator Tool + +No English version available right now, welcome to contribute. + + diff --git a/tutorials/lite/source_en/use/post_training_quantization.md b/tutorials/lite/source_en/use/post_training_quantization.md index cbe3442a76f114a22bf3326f2401e205d7eb9881..e9d868f88136d612a86cbcafaff2c21fd7ff386a 100644 --- a/tutorials/lite/source_en/use/post_training_quantization.md +++ b/tutorials/lite/source_en/use/post_training_quantization.md @@ -95,16 +95,7 @@ Parameters of this command are described as follows: | `--configFile=` | Mandatory | Path of a calibration dataset configuration file | String | - | - | | `--bitNum=` | Optional | Number of bits for full quantization. Currently, 1 to 8 bits are supported. | Integer | 8 | \[1, 8] | -To compute a quantization parameter of an activation value, you need to provide a calibration dataset. It is recommended that the calibration dataset be obtained from the actual inference scenario and can represent the actual input of a model. The number of data records is about 100. -The calibration dataset configuration file uses the `key=value` mode to define related parameters. The `key` to be configured is as follows: - -| Parameter Name | Attribute | Function Description | Parameter Type | Default Value | Value Range | -| -------- | ------- | ----- | ----- | ----- | ----- | -| image_path | Mandatory | Directory for storing a calibration dataset. If a model has multiple inputs, enter directories where the corresponding data is stored in sequence. Use commas (,) to separate them. | String | - | The directory stores the input data that can be directly used for inference. Since the current framework does not support data preprocessing, all data must be converted in advance to meet the input requirements of inference. | -| batch_count | Optional | Number of used inputs | Integer | 100 | (0, +∞) | -| method_x | Optional | Input and output data quantization algorithms at the network layer | String | KL | KL, MAX_MIN, or RemovalOutlier.
KL: quantizes and calibrates the data range based on [KL divergence](http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf).
MAX_MIN: data quantization parameter computed based on the maximum and minimum values.
RemovalOutlier: removes the maximum and minimum values of data based on a certain proportion and then calculates the quantization parameters.
If the calibration dataset is consistent with the input data during actual inference, MAX_MIN is recommended. If the noise of the calibration dataset is large, KL or RemovalOutlier is recommended. -| thread_num | Optional | Number of threads used when the calibration dataset is used to execute the inference process | Integer | 1 | (0, +∞) | -| bias_correction | Optional | Indicate whether to correct the quantization error. | Boolean | false | True or false. After this parameter is enabled, the accuracy of the converted model can be improved. You are advised to set this parameter to true. | +To compute a quantization parameter of an activation value, you need to provide a calibration dataset. It is recommended that the calibration dataset be obtained from the actual inference scenario and can represent the actual input of a model. The number of data records is about 100. Please refer to [Parameter Description](https://www.mindspore.cn/tutorial/lite/en/master/use/converter_tool.html#parameter-description) for `configFile` configuration. > For a multi-input model, different input data must be stored in different directories. In addition, names of all files in each directory must be sorted in ascending lexicographic order to ensure one-to-one mapping. For example, a model has two inputs input0 and input1, and there are two calibration datasets (batch_count=2). The data of input0 is stored in the /dir/input0/ directory. The input data files are data_1.bin and data_2.bin. The data of input1 is stored in the /dir/input1/ directory. The input data files are data_a.bin and data_b.bin. The (data_1.bin, data_a.bin) is regarded as a group of inputs and the (data_2.bin, data_b.bin) is regarded as another group of inputs. diff --git a/tutorials/lite/source_en/use/runtime_cpp.md b/tutorials/lite/source_en/use/runtime_cpp.md index edf787311e76c4614e083574dd223694a28a8268..b39145aadd1dd4e911f7380685da6a660cae6ff3 100644 --- a/tutorials/lite/source_en/use/runtime_cpp.md +++ b/tutorials/lite/source_en/use/runtime_cpp.md @@ -1,6 +1,6 @@ # Using C++ Interface to Perform Inference -`Windows` `Linux` `Android` `C++` `Inference Application` `Model Loading` `Data Preparation` `Intermediate` `Expert` +`Windows` `macOS` `Linux` `iOS` `Android` `C++` `Inference Application` `Model Loading` `Data Preparation` `Intermediate` `Expert` @@ -12,6 +12,7 @@ - [Configuring the CPU Backend](#configuring-the-cpu-backend) - [Configuring the GPU Backend](#configuring-the-gpu-backend) - [Configuring the NPU Backend](#configuring-the-npu-backend) + - [Configuring the TensorRT Backend](#configuring-the-tensorrt-backend) - [Creating a Session](#creating-a-session) - [Building a Graph](#building-a-graph) - [Inputting Data](#inputting-data) @@ -77,7 +78,9 @@ if (model == nullptr) { ## Creating a Configuration Context -The context saves some basic configuration parameters required by the session to guide graph build and execution. If you use `new` to create a [Context](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#id2) and do not need it any more, use `delete` to release it. Generally, the [Context](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#id2) is released after the [LiteSession](https://www.mindspore.cn/doc/api_cpp/en/master/session.html#litesession) is created. The parameters contained in [Context](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#id2) are defined as follows: +The context saves some basic configuration parameters required by the session to guide graph build and execution. If you use `new` to create a [Context](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#id2) and do not need it any more, use `delete` to release it. Generally, the [Context](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#id2) is released after the [LiteSession](https://www.mindspore.cn/doc/api_cpp/en/master/session.html#litesession) is created. + +The basic parameters contained in [Context](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#id2) are defined as follows: - [thread_num_](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#thread-num): MindSpore Lite has a built-in thread pool shared by processes. During inference, `thread_num_` is used to specify the maximum number of threads in the thread pool. The default value is 2. - [allocator](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#allocator): MindSpore Lite supports dynamic memory allocation and release. If `allocator` is not specified, a default `allocator` is generated during inference. You can also use the [Context](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#context) method to share the memory allocator in multiple [Context](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#id2). For details about the calling method, see the usage of [Sharing a Memory Pool](#sharing-a-memory-pool). @@ -85,6 +88,10 @@ The context saves some basic configuration parameters required by the session to - [device_list_](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#device-list): MindSpore Lite supports heterogeneous inference. The backend configuration information for inference is specified by `device_list_` in [Context](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#id2). By default, the [DeviceContext](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#devicecontext) of the CPU is stored. During graph build, operator selection and scheduling are performed based on the backend configuration information in `device_list_`. Currently, only CPU and GPU heterogeneity or CPU and NPU heterogeneity is supported. When the GPU's [DeviceContext](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#devicecontext) is configured, GPU-based inference is preferentially used. When the NPU's [DeviceContext](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#devicecontext) is configured, NPU-based inference is preferentially used. > `device_list_[0]` must be `DeviceContext` of the CPU, and `device_list_[1]` must be `DeviceContext` of the GPU or `DeviceContext` of the NPU. Currently, the CPU, GPU, and NPU cannot be set at a time. +> +> For the iOS platform, `device_list_[0]` must be `DeviceContext` of the CPU. + +The advanced parameters contained in [Context](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#id2) are defined as follows: ### Configuring the Number of Threads @@ -119,6 +126,8 @@ cpu_device_info.enable_float16_ = true; ``` > Float16 takes effect only when the CPU is of the ARM v8.2 architecture. Other models and x86 platforms that are not supported are automatically rolled back to Float32. +> +> For the iOS platform, only the CPU is supported, and Float16 is temporarily not supported. ### Configuring the GPU Backend @@ -166,6 +175,28 @@ npu_device_ctx.device_info_.npu_device_info_.frequency_ = 3; context->device_list_.push_back(npu_device_ctx); ``` +### Configuring the TensorRT Backend + +When the backend to be executed is heterogeneous inference based on CPUs and TensorRTs, you need to set the CPU's and TensorRT's [DeviceContext](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#devicecontext). After the configuration, the TensorRT's inference is preferentially used. + +The following sample code from [main.cc](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/examples/runtime_cpp/main.cc#L120) shows how to create the CPU and TensorRT heterogeneous inference backend. + +```cpp +auto context = std::make_shared(); +if (context == nullptr) { + std::cerr << "New context failed while running. " << std::endl; + return nullptr; +} + +// If GPU device context is set. The preferred backend is GPU, which means, if there is a GPU operator, it will run on +// the GPU first, otherwise it will run on the CPU. +mindspore::lite::DeviceContext gpu_device_ctx{mindspore::lite::DT_GPU, {false}}; +// GPU use float16 operator as priority. +gpu_device_ctx.device_info_.gpu_device_info_.enable_float16_ = true; +// The GPU device context needs to be push_back into device_list to work. +context->device_list_.push_back(gpu_device_ctx); +``` + ## Creating a Session When MindSpore Lite is used for inference, [LiteSession](https://www.mindspore.cn/doc/api_cpp/en/master/session.html#litesession) is the main entry for inference. You can use [LiteSession](https://www.mindspore.cn/doc/api_cpp/en/master/session.html#litesession) to build and execute graphs. Use the [Context](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#id2) created in the previous step to call the [CreateSession](https://www.mindspore.cn/doc/api_cpp/en/master/session.html#createsession) method of the [LiteSession](https://www.mindspore.cn/doc/api_cpp/en/master/session.html#litesession) to create the LiteSession. @@ -580,6 +611,8 @@ If an exception occurs during inference, you can view logs to locate the fault. logcat -s "MS_LITE" ``` +> For the iOS platform, does not support viewing logs temporarily. + ### Obtaining the Version Number MindSpore Lite provides the [Version](https://www.mindspore.cn/doc/api_cpp/en/master/lite.html#version) method to obtain the version number, which is included in the `include/version.h` header file. You can call this method to obtain the version number of MindSpore Lite. diff --git a/tutorials/lite/source_en/use/runtime_train_cpp.md b/tutorials/lite/source_en/use/runtime_train_cpp.md index d5e014259600240c07ce05215a34edba23e6aceb..3e6275018ba3e2ed2060751ab3d7713fb69eb3b0 100644 --- a/tutorials/lite/source_en/use/runtime_train_cpp.md +++ b/tutorials/lite/source_en/use/runtime_train_cpp.md @@ -541,15 +541,21 @@ if (ret != RET_OK) { ### Saving Model -The function `CkptSaver` calls the function `SaveToFile` actually. The user can also call `SaveToFile` directly to save the trained model. +The function `CkptSaver` calls the function `Export` actually. The user can also call `Export` directly to save the trained model. ```cpp /// \brief Save the trained model into a flatbuffer file /// - /// \param[in] filename Filename to save flatbuffer to + /// \param[in] file_name Filename to save flatbuffer to + /// + /// \param[in] model_type ModelType to save train or inference + /// + /// \param[in] quant_type QuantizationType to save + /// + /// \param[in] format FormatType to save /// /// \return 0 on success or -1 in case of error - virtual int SaveToFile(const std::string &filename) const = 0; + virtual int Export(const std::string &file_name, lite::ModelType model_type = lite::MT_TRAIN, lite::QuantizationType quant_type = lite::QT_DEFAULT,lite::FormatType format= lite::FT_FLATBUFFERS) const = 0; ``` You can load the saved model to do re-training or inference. diff --git a/tutorials/lite/source_en/use/tensorrt_info.md b/tutorials/lite/source_en/use/tensorrt_info.md new file mode 100644 index 0000000000000000000000000000000000000000..7216089c54270b235867e34dedb59e82cd30f42b --- /dev/null +++ b/tutorials/lite/source_en/use/tensorrt_info.md @@ -0,0 +1,78 @@ +# TensorRT Integration Information + +`TensorRT` `NVIDIA` `Linux` `Environment Preparation` `Operators Supported` `Intermediate` `Expert` + + + +- [TensorRT Integration Information](#tensorrt-integration-information) + - [Steps](#steps) + - [Environment Preparation](#environment-preparation) + - [Build](#build) + - [Integration](#integration) + - [Supported Operators](#supported-operators) + + + + + +## Steps + +### Environment Preparation + +Besides basic [Environment Preparation](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html), CUDA and TensorRT is required as well. Current version only supports CUDA version 10.1 and TensorRT version 6.0.1.5. + +Install[CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base), set the installed directory to environment viriable as `${CUDA_HOME}`. Our build script uses this environment viriable to seek CUDA. + +Install[TensorRT 6.0.1.5](https://developer.nvidia.com/nvidia-tensorrt-6x-download), set the installed directory to environment viriable as `${TENSORRT_PATH}`. Our build script uses this environment viriable to seek TensorRT. + +### Build + +In the Linux environment, use the build.sh script in the root directory of MindSpore [Source Code](https://gitee.com/mindspore/mindspore) to build the MindSpore Lite package integrated with TensorRT. First configure the environment variable `MSLITE_GPU_BACKEND=tensorrt`, and then execute the compilation command as follows. + +```bash +bash build.sh -I x86_64 +``` + +For more information about compilation, see [Linux Environment Compilation](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#linux). + +### Integration + +- Integration instructions + + When developers need to integrate the use of TensorRT features, it is important to note: + - [Configure the TensorRT backend](https://www.mindspore.cn/tutorial/lite/en/master/use/runtime_cpp.html#tensorrt), + For more information about using Runtime to perform inference, see [Using Runtime to Perform Inference (C++)](https://www.mindspore.cn/tutorial/lite/en/master/use/runtime_cpp.html). + + - Compile and execute the binary. If you use dynamic linking, please refer to [Compilation Output](https://www.mindspore.cn/tutorial/lite/en/master/use/build.html#directory-structure) with compilation option `-I x86_64`. + Please set environment variables to dynamically link related libs. + + ```bash + export LD_LIBRARY_PATH=mindspore-lite-{version}-{os}-{arch}/runtime/lib/:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=user-installed-tensorrt-path/lib/:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=user-installed-cuda-path/lib/:$LD_LIBRARY_PATH + ``` + +- Using Benchmark testing TensorRT inference + + Pass the build package to a device with a TensorRT environment(TensorRT 6.0.1.5) and use the Benchmark tool to test TensorRT inference. Examples are as follows: + + - Test performance + + ```bash + ./benchmark --device=GPU --modelFile=./models/test_benchmark.ms --timeProfiling=true + ``` + + - Test precision + + ```bash + ./benchmark --device=GPU --modelFile=./models/test_benchmark.ms --inDataFile=./input/test_benchmark.bin --inputShapes=1,32,32,1 --accuracyThreshold=3 --benchmarkDataFile=./output/test_benchmark.out + ``` + + For more information about the use of Benchmark, see [Benchmark Use](https://www.mindspore.cn/tutorial/lite/en/master/use/benchmark_tool.html). + + For environment variable settings, you need to set the directory where the `libmindspore-lite.so` + (under the directory `mindspore-lite-{version}-{os}-{arch}/runtime/lib`), TensorRT and CUDA `so` libraries are located, to `${LD_LIBRARY_PATH}`. + +## Supported Operators + +For supported TensorRT operators, see [Lite Operator List](https://www.mindspore.cn/tutorial/lite/en/master/operator_list_lite.html). diff --git a/tutorials/lite/source_en/use/visual_tool.md b/tutorials/lite/source_en/use/visual_tool.md new file mode 100644 index 0000000000000000000000000000000000000000..c388bb5e31abafd3e1f955bb8e637edf455ebd47 --- /dev/null +++ b/tutorials/lite/source_en/use/visual_tool.md @@ -0,0 +1,5 @@ +# Visual Tool + +No English version available right now, welcome to contribute. + + \ No newline at end of file diff --git a/tutorials/lite/source_zh_cn/conf.py b/tutorials/lite/source_zh_cn/conf.py index 88925d3d6f26b5216c2c406566a4abbb50a1fe18..cf84f82f286519d801fd178783cd5a980c540d95 100644 --- a/tutorials/lite/source_zh_cn/conf.py +++ b/tutorials/lite/source_zh_cn/conf.py @@ -30,7 +30,7 @@ release = 'master' # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'recommonmark', + 'myst_parser', 'sphinx_markdown_tables', ] diff --git a/tutorials/lite/source_zh_cn/faq.md b/tutorials/lite/source_zh_cn/faq.md index ec32e712ede9bad7e30518fb8c5aeb8a4284c3fd..d9f77fc0c8203d66aa216f35741168d101de72e4 100644 --- a/tutorials/lite/source_zh_cn/faq.md +++ b/tutorials/lite/source_zh_cn/faq.md @@ -18,7 +18,7 @@ A:目前NPU仅支持在系统ROM版本EMUI>=11、芯片支持包括Kirin 9000 **Q:为什么使用裁剪工具裁剪后的静态库在集成时存在编译失败情况?** -A:目前裁剪工具仅支持CPU的库,即编译命令中指定了`-e CPU`,具体使用请查看[使用裁剪工具降低库文件大小](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/cropper_tool.html)文档。 +A:目前裁剪工具仅支持CPU和GPU的库,具体使用请查看[使用裁剪工具降低库文件大小](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/cropper_tool.html)文档。
diff --git a/tutorials/lite/source_zh_cn/index.rst b/tutorials/lite/source_zh_cn/index.rst index f0355fb341f6c4598cdf0d72582fadc880576c5f..bc00fa6cb3057dba6db5a96925863eb374ded21c 100644 --- a/tutorials/lite/source_zh_cn/index.rst +++ b/tutorials/lite/source_zh_cn/index.rst @@ -97,9 +97,10 @@
+ + -
@@ -134,10 +135,12 @@ + + @@ -149,6 +152,7 @@
+
@@ -176,6 +180,32 @@
+ + + - - - + +
+ +
+
+ 模型混淆工具 +
+
+ MindSpore Lite提供一个轻量级的离线模型混淆工具,可用于保护IOT或端侧设备上部署的模型文件的机密性。 +
+
+
+
diff --git a/tutorials/lite/source_zh_cn/operator_list_lite.md b/tutorials/lite/source_zh_cn/operator_list_lite.md index 02ffa9fc770d98dbb19d82306d033a62fa86a09e..b848766c3fc90003511017548ab0920cfa8e2417 100644 --- a/tutorials/lite/source_zh_cn/operator_list_lite.md +++ b/tutorials/lite/source_zh_cn/operator_list_lite.md @@ -6,171 +6,172 @@ 本文列举MindSpore Lite支持的算子。 -| 操作名
  | CPU
FP16 | CPU
FP32 | CPU
Int8 | CPU
UInt8 | GPU
FP16 | GPU
FP32 | NPU
  | 支持的TensorFlow Lite算子 | 支持的Caffe Lite算子 | 支持的Onnx Lite算子 |支持的TensorFlow算子 | -| --------------------- | :------------: | :------------: | :------------: | :-------------: | :------------: | :------------: | :---------: | ------------------------------- | ------------------------ | ----------------------------------------------- | ----------------------------------------------- | -| Abs | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Abs | | Abs | Abs | -| Add | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Add | | Add, Int8Add | Add, AddV2 | -| AddGrad | | ✅ | | | | | | | | | | -| AddN | | ✅ | | | | | | AddN | | | | -| Assert | | ✅ | | | | | | | | | Assert | -| Argmax | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Argmax | ArgMax | ArgMax | Argmax | -| Argmin | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Argmin | | | ArgMin | -| AvgPool | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | MeanPooling | Pooling | AveragePool,
GlobalAveragePool,
Int8AveragePool | AvgPool | -| AvgPoolGrad | | ✅ | | | | | | | | | | -| BatchNorm | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | BatchNorm | BatchNormalization | | -| BatchNormGrad | | ✅ | | | | | | | | | | -| BatchToSpace | | ✅ | ✅ | ✅ | ✅ | ✅ | | BatchToSpace,
BatchToSpaceND | | | BatchToSpace,
BatchToSpaceND | -| BiasAdd | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | BiasAdd | BiasAdd | -| BiasAddGrad | | ✅ | | | | | | | | | | -| Broadcast | | ✅ | | | | | | BroadcastTo | | Expand | | -| Cast | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Cast, QUANTIZE,
DEQUANTIZE | | Cast | Cast | -| Ceil | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Ceil | | Ceil | Ceil | -| Concat | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Concat | Concat | Concat | ConcatV2 | -| ConstantOfShape | | ✅ | | | | | | | | ConstantOfShape | | -| Conv2d | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Conv2D | Convolution | Conv, Int8Conv,
ConvRelu,
Int8ConvRelu | Conv2D | -| Conv2dGrad | | ✅ | | | | | | | | | | -| Conv2dTranspose | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | DeConv2D | Deconvolution | ConvTranspose | Conv2DBackpropInput | -| Conv2dTransposeGrad | | ✅ | | | | | | | | | | -| Cos | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Cos | | Cos | Cos | -| Crop | ✅ | ✅ | ✅ | ✅ | | | | | Crop | | | -| CropAndResize | | ✅ | | | | | ✅ | | | | CropAndResize | -| CumSum | | ✅ | | | | | | | | | Cumsum | -| CustomExtractFeatures | | ✅ | | | | | | ExtractFeatures | | | | -| CustomNormalize | | ✅ | | | | | | Normalize | | | | -| CustomPredict | | ✅ | | | | | | Predict | | | | -| DeDepthwiseConv2D | | ✅ | ✅ | ✅ | | | | | Deconvolution | | | -| DepthToSpace | | ✅ | ✅ | ✅ | ✅ | ✅ | | DepthToSpace | | DepthToSpace | | -| DepthwiseConv2dNative | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | DepthwiseConv2D | Convolution | | DepthwiseConv2dNative | -| DetectionPostProcess | | ✅ | ✅ | ✅ | | | | Custom | | | | -| Div | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Div, RealDiv | | Div | Div, RealDiv | -| DivGrad | | ✅ | | | | | | | | | | -| Eltwise | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Eltwise | Sum, Max[3] | | -| Elu | | ✅ | | | | | | | Elu | Elu,
NonMaxSuppression | NonMaxSuppressionV3 | -| EluGrad | | ✅ | | | | | | | | | | -| Equal | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Equal | | Equal | Equal | -| Exp | | ✅ | | | ✅ | ✅ | | Exp | Exp | Exp | Exp | -| ExpandDims | ✅ | ✅ | ✅ | ✅ | | | ✅ | ExpandDims | | | ExpandDims | -| Fill | | ✅ | | | | | | Fill | | | Fill | -| Flatten | ✅ | ✅ | | | | | | | Flatten | | | -| Floor | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | flOOR | | Floor | Floor | -| FloorDiv | ✅ | ✅ | | | ✅ | ✅ | ✅ | FloorDiv | | | FloorDiv | -| FloorMod | ✅ | ✅ | | | ✅ | ✅ | ✅ | FloorMod | | | FloorMod | -| FullConnection | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | FullyConnected | InnerProduct | | | -| FusedBatchNorm | ✅ | ✅ | ✅ | ✅ | | | ✅ | FusedBatchNorm | | | FusedBatchNorm,
FusedBatchNormV3 | -| GatherNd | | ✅ | ✅ | ✅ | ✅ | ✅ | | GatherND | | | GatherNd | -| Gather | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Gather | | Gather | GatherV2 | -| Greater | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Greater | | Greater | Greater | -| GreaterEqual | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | GreaterEqual | | | GreaterEqual | -| GRU | ✅ | ✅ | | | | | | | | | | -| HardTanh | ✅ | ✅ | | | | | | | | | | -| HashtableLookup | | ✅ | | | | | | HashtableLookup | | | | -| HSigmoid | | ✅ | | ✅ | | | | | | | | -| Hswish | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | HardSwish | | | | -| HswishGrad | | ✅ | | | | | | | | | | -| InstanceNorm | ✅ | ✅ | | | | | | InstanceNorm | | | | -| InvertPermutation | | ✅ | | | | | | | | | InvertPermutation | -| L2Norm | | ✅ | ✅ | | | | | L2_NORMALIZATION | | | | -| LayerNorm | | ✅ | ✅ | | | | | | | | | -| LeakyReLU | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | LeakyRelu | | LeakyRelu | LeakyRelu | -| LeakyReLUGrad | | ✅ | | | | | | | | | | -| Less | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Less | | Less | Less | -| LessEqual | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | LessEqual | | | LessEqual | -| LRN | | ✅ | | | | | | LocalResponseNorm | | Lrn, LRN | | -| Log | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Log | | Log | Log | -| LogGrad | ✅ | ✅ | | | | | | | | | | -| LogicalAnd | ✅ | ✅ | | | ✅ | ✅ | ✅ | LogicalAnd | | And | LogicalAnd | -| LogicalNot | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | LogicalNot | | Not | LogicalNot | -| LogicalOr | ✅ | ✅ | | | ✅ | ✅ | ✅ | LogicalOr | | Or | LogicalOr | -| LshProjection | | ✅ | | | | | | LshProjection | | | | -| LSTM | ✅ | ✅ | | | | | | | | LSTM | | -| MatMul | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | MatMul | MatMul,
BatchMatMul | -| MatMulGrad | | ✅ | | | | | | | | | | -| Maximum | ✅ | ✅ | | | ✅ | ✅ | ✅ | Maximum | | | Maximum | -| MaximumGrad | | ✅ | | | | | | | | | | -| MaxPool | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | MaxPooling | Pooling | MaxPool, GlobalMaxPool | MaxPool | -| MaxPoolGrad | | ✅ | | | | | | | | | | -| Merge | ✅ | ✅ | | | | | | | | | Merge | -| Minimum | ✅ | ✅ | | | ✅ | ✅ | ✅ | Minimum | | Min | Minimum | -| MinimumGrad | | ✅ | | | | | | | | | | -| Mul | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Mul | | Mul | Mul | -| MulGrad | | ✅ | | | | | | | | | | -| Neg | ✅ | ✅ | | | ✅ | ✅ | ✅ | Neg | | Neg | | -| NegGrad | | ✅ | | | | | | | | | | -| NotEqual | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | NotEqual | | | NotEqual | -| OneHot | | ✅ | | | ✅ | ✅ | | OneHot | | OneHot | OneHot | -| Pad | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Pad, MirrorPad | | Pad | MirrorPad, Pad | -| Pow | | ✅ | ✅ | ✅ | ✅ | ✅ | | Pow | Power | Pow[2] | Pow | -| PowGrad | | ✅ | | | | | | | | | | -| PReLU | | ✅ | | | ✅ | ✅ | | PRELU | PReLU | PRelu | | -| RandomStandardNormal | | ✅ | | | | | | | | | RandomStandardNormal | -| RandomUniform | | ✅ | | | | | | | | | RandomUniform | -| Range | | ✅ | | | | | | Range | | | Range,
RaggedRange | -| Rank | | ✅ | | | | | | Rank | | | Rank | -| Reciprocal | ✅ | ✅ | ✅ | | | | ✅ | | | | | -| ReduceAll | | ✅ | | | | | | | | | All | -| ReduceASum | | ✅ | | | ✅ | ✅ | | | Reduction | | | -| ReduceMax | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | ReduceMax | | ReduceMax | Max | -| ReduceMean | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Mean | Reduction | ReduceMean | Mean | -| ReduceMin | | ✅ | ✅ | ✅ | ✅ | ✅ | | ReduceMin | | ReduceMin | Min | -| ReduceProd | | ✅ | ✅ | ✅ | ✅ | ✅ | | ReduceProd | | ReduceProd | Prod | -| ReduceSum | | ✅ | ✅ | ✅ | ✅ | ✅ | | Sum | Reduction | ReduceSum | Sum | -| ReduceSumSquare | | ✅ | ✅ | ✅ | | | | | Reduction | ReduceSumSquare | | -| ReLU | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Relu | ReLU | Relu | Relu | -| ReLUGrad | ✅ | ✅ | | | | | | | | | | -| ReLU6 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Relu6 | ReLU6 | Clip[1] | Relu6 | -| ReLU6Grad | ✅ | ✅ | | | | | | | | | | -| Reshape | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Reshape | Reshape | Reshape,Flatten | Reshape | -| Resize | | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ResizeBilinear,
NearestNeighbor | Interp | | ResizeBilinear,
ResizeBicubic,
ResizeNearestNeighbor | -| ResizeGrad | | ✅ | | | | | | | | | | -| Reverse | | ✅ | | | | | | reverse | | | ReverseV2 | -| ReverseSequence | | ✅ | | | | | | ReverseSequence | | | ReverseSequence | -| Round | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Round | | Round | Round | -| Rsqrt | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Rsqrt | | | Rsqrt | -| Select | | ✅ | | | | | | | | | Select | -| Selu | | | | | | | | | | | Selu | -| Scale | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Scale | | | -| ScatterNd | | ✅ | | | | | | ScatterNd | | | | -| Shape | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Shape | | Shape | Shape | -| Sigmoid | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Logistic | Sigmoid | Sigmoid | Sigmoid | -| SigmoidGrad | ✅ | ✅ | | | | | | | | | | -| Sin | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Sin | | Sin | Sin | -| Size | | ✅ | | | | | | | | | Size | -| Slice | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Slice | Slice | Slice | Slice | -| SkipGram | | ✅ | | | | | | SKipGram | | | | -| Softmax | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Softmax | Softmax | Softmax | Softmax | -| SoftmaxGrad | | ✅ | | | | | | | | | | -| Softplus | | ✅ | | | | | | | | | Softplus | -| SpaceToBatch | | ✅ | ✅ | ✅ | ✅ | ✅ | | SpaceToBatch | | | | -| SpaceToBatchND | | ✅ | ✅ | ✅ | ✅ | ✅ | | SpaceToBatchND | | | SpaceToBatchND | -| SpaceToDepth | | ✅ | | | ✅ | ✅ | | SpaceToDepth | | SpaceToDepth | | -| SparseToDense | | ✅ | | | ✅ | ✅ | | SpareToDense | | | | -| Split | ✅ | ✅ | ✅ | ✅ | | | ✅ | Split, SplitV | | Split | Split, SplitV | -| Sqrt | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Sqrt | | Sqrt | Sqrt | -| Square | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Square | | | Square | -| SquaredDifference | ✅ | ✅ | | | ✅ | ✅ | ✅ | SquaredDifference | | | SquaredDifference | -| Squeeze | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Squeeze | | Squeeze | Squeeze | -| StridedSlice | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | StridedSlice | | | StridedSlice | -| Stack | ✅ | ✅ | | | ✅ | ✅ | | Stack | | | Pack | -| Sub | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Sub | | Sub | Sub | -| SubGrad | | ✅ | | | | | | | | | | -| Swish | ✅ | ✅ | | | | | | | | | | -| Switch | ✅ | ✅ | | | | | | | | | Switch | -| Tanh | ✅ | ✅ | | | ✅ | ✅ | ✅ | Tanh | TanH | Tanh, Sign | Tanh | -| TanhGrad | | ✅ | | | | | | | | | | -| TensorListFromTensor | ✅ | ✅ | | | | | | | | | TensorListFromTensor | -| TensorListGetItem | ✅ | ✅ | | | | | | | | | TensorListGetItem | -| TensorListReserve | ✅ | ✅ | | | | | | | | | TensorListReserve | -| TensorListSetItem | ✅ | ✅ | | | | | | | | | TensorListSetItem | -| TensorListStack | ✅ | ✅ | | | | | | | | | TensorListStack | -| Tile | ✅ | ✅ | | | | | ✅ | Tile | Tile | Tile | Tile | -| TopK | | ✅ | ✅ | ✅ | | | | TopKV2 | | TopK | TopKV2 | -| Transpose | ✅ | ✅ | ✅ | | ✅ | ✅ | ✅ | Transpose | Permute | Transpose | Transpose | -| UniformReal | | ✅ | | | | | | | | | | -| Unique | | ✅ | | | | | | Unique | | | | -| Unsqueeze | ✅ | ✅ | ✅ | ✅ | | | ✅ | | | Unsqueeze | | -| Unstack | | ✅ | | | | | | Unstack | | | | -| Where | | ✅ | | | | | | Where | | | Where | -| ZerosLike | | ✅ | | | | | | ZerosLike | | | ZerosLike | +| 操作名
  | CPU
FP16 | CPU
FP32 | CPU
Int8 | CPU
UInt8 | GPU
FP16 | GPU
FP32 | NPU
  | TensorRT
  |支持的TensorFlow Lite算子 | 支持的Caffe Lite算子 | 支持的Onnx Lite算子 |支持的TensorFlow算子 | +| --------------------- | :------------: | :------------: | :------------: | :-------------: | :------------: | :------------: | :---------: | :---------: | ------------------------------- | ------------------------ | ----------------------------------------------- | ----------------------------------------------- | +| Abs | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Abs | | Abs | Abs | +| Add | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Add | | Add, Int8Add | Add, AddV2 | +| AddGrad | | ✅ | | | | | | | | | | | +| AddN | | ✅ | | | | | | | AddN | | | | +| Assert | | ✅ | | | | | | | | | | Assert | +| Argmax | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Argmax | ArgMax | ArgMax | Argmax | +| Argmin | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | Argmin | | | ArgMin | +| AvgPool | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | MeanPooling | Pooling | AveragePool,
GlobalAveragePool,
Int8AveragePool | AvgPool | +| AvgPoolGrad | | ✅ | | | | | | | | | | | +| BatchNorm | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | BatchNorm | BatchNormalization | | +| BatchNormGrad | | ✅ | | | | | | | | | | | +| BatchToSpace | | ✅ | ✅ | ✅ | ✅ | ✅ | | | BatchToSpace,
BatchToSpaceND | | | BatchToSpace,
BatchToSpaceND | +| BiasAdd | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | BiasAdd | BiasAdd | +| BiasAddGrad | | ✅ | | | | | | | | | | | +| Broadcast | | ✅ | | | | | | | BroadcastTo | | Expand | | +| Cast | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Cast,
QUANTIZE,
DEQUANTIZE | | Cast | Cast | +| Ceil | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Ceil | | Ceil | Ceil | +| Concat | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Concat | Concat | Concat | ConcatV2 | +| ConstantOfShape | | ✅ | | | | | | | | | ConstantOfShape | | +| Conv2d | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Conv2D | Convolution | Conv, Int8Conv,
ConvRelu,
Int8ConvRelu | Conv2D | +| Conv2dGrad | | ✅ | | | | | | | | | | | +| Conv2dTranspose | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | DeConv2D | Deconvolution | ConvTranspose | Conv2DBackpropInput | +| Conv2dTransposeGrad | | ✅ | | | | | | | | | | | +| Cos | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | |Cos | | Cos | Cos | +| Crop | ✅ | ✅ | ✅ | ✅ | | | | | | Crop | | | +| CropAndResize | | ✅ | | | | | ✅ | | | | | CropAndResize | +| CumSum | | ✅ | | | | | | | | | | Cumsum | +| CustomExtractFeatures | | ✅ | | | | | | | ExtractFeatures | | | | +| CustomNormalize | | ✅ | | | | | | | Normalize | | | | +| CustomPredict | | ✅ | | | | | | | Predict | | | | +| DeDepthwiseConv2D | | ✅ | ✅ | ✅ | | | | | | Deconvolution | | | +| DepthToSpace | | ✅ | ✅ | ✅ | ✅ | ✅ | | | DepthToSpace | | DepthToSpace | | +| DepthwiseConv2dNative | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | DepthwiseConv2D | Convolution | | DepthwiseConv2dNative | +| DetectionPostProcess | | ✅ | ✅ | ✅ | | | | | Custom | | | | +| Div | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Div, RealDiv | | Div | Div, RealDiv | +| DivGrad | | ✅ | | | | | | | | | | | +| Eltwise | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Eltwise | Sum, Max[3] | | +| Elu | | ✅ | | | | | | | | Elu | Elu,
NonMaxSuppression | NonMaxSuppressionV3 | +| EluGrad | | ✅ | | | | | | | | | | | +| Equal | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Equal | | Equal | Equal | +| Exp | | ✅ | | | ✅ | ✅ | | | Exp | Exp | Exp | Exp | +| ExpandDims | ✅ | ✅ | ✅ | ✅ | | | ✅ | | ExpandDims | | | ExpandDims | +| Fill | | ✅ | | | | | | | Fill | | | Fill | +| Flatten | ✅ | ✅ | | | | | | | | Flatten | | | +| Floor | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | flOOR | | Floor | Floor | +| FloorDiv | ✅ | ✅ | | | ✅ | ✅ | ✅ | | FloorDiv | | | FloorDiv | +| FloorMod | ✅ | ✅ | | | ✅ | ✅ | ✅ | | FloorMod | | | FloorMod | +| FullConnection | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | FullyConnected | InnerProduct | | | +| FusedBatchNorm | ✅ | ✅ | ✅ | ✅ | | | ✅ | | FusedBatchNorm | | | FusedBatchNorm,
FusedBatchNormV3 | +| GatherNd | | ✅ | ✅ | ✅ | ✅ | ✅ | | | GatherND | | | GatherNd | +| Gather | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Gather | | Gather | GatherV2 | +| Greater | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Greater | | Greater | Greater | +| GreaterEqual | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | GreaterEqual | | | GreaterEqual | +| GRU | ✅ | ✅ | | | | | | | | | | | +| HardTanh | ✅ | ✅ | | | | | | | | | | | +| HashtableLookup | | ✅ | | | | | | | HashtableLookup | | | | +| HSigmoid | | ✅ | | ✅ | | | | | | | | | +| Hswish | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | HardSwish | | | | +| HswishGrad | | ✅ | | | | | | | | | | | +| InstanceNorm | ✅ | ✅ | | | | | | | InstanceNorm | | | | +| InvertPermutation | | ✅ | | | | | | | | | | InvertPermutation | +| L2Norm | | ✅ | ✅ | | | | | | L2_NORMALIZATION | | | | +| LayerNorm | | ✅ | ✅ | | | | | | | | | | +| LeakyReLU | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | LeakyRelu | | LeakyRelu | LeakyRelu | +| LeakyReLUGrad | | ✅ | | | | | | | | | | | +| Less | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Less | | Less | Less | +| LessEqual | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | LessEqual | | | LessEqual | +| LRN | | ✅ | | | | | | | LocalResponseNorm | | Lrn, LRN | | +| Log | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Log | | Log | Log | +| LogGrad | ✅ | ✅ | | | | | | | | | | | +| LogicalAnd | ✅ | ✅ | | | ✅ | ✅ | ✅ | | LogicalAnd | | And | LogicalAnd | +| LogicalNot | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | LogicalNot | | Not | LogicalNot | +| LogicalOr | ✅ | ✅ | | | ✅ | ✅ | ✅ | | LogicalOr | | Or | LogicalOr | +| LogSoftmax | | ✅ | | | ✅ | ✅ | ✅ | | LogSoftmax | | LogSoftmax | | +| LshProjection | | ✅ | | | | | | | LshProjection | | | | +| LSTM | ✅ | ✅ | | | | | | | | | LSTM | | +| MatMul | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | BatchMatMul | | MatMul,
Gemm | MatMul,
BatchMatMul | +| MatMulGrad | | ✅ | | | | | | | | | | | +| Maximum | ✅ | ✅ | | | ✅ | ✅ | ✅ | | Maximum | | | Maximum | +| MaximumGrad | | ✅ | | | | | | | | | | | +| MaxPool | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | MaxPooling | Pooling | MaxPool,
GlobalMaxPool | MaxPool | +| MaxPoolGrad | | ✅ | | | | | | | | | | | +| Merge | ✅ | ✅ | | | | | | | | | | Merge | +| Minimum | ✅ | ✅ | | | ✅ | ✅ | ✅ | | Minimum | | Min | Minimum | +| MinimumGrad | | ✅ | | | | | | | | | | | +| Mul | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Mul | | Mul | Mul | +| MulGrad | | ✅ | | | | | | | | | | | +| Neg | ✅ | ✅ | | | ✅ | ✅ | ✅ | | Neg | | Neg | | +| NegGrad | | ✅ | | | | | | | | | | | +| NotEqual | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | NotEqual | | | NotEqual | +| OneHot | | ✅ | | | ✅ | ✅ | | | OneHot | | OneHot | OneHot | +| Pad | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Pad, MirrorPad, PadV2 | | Pad | MirrorPad, Pad, PadV2 | +| Pow | | ✅ | ✅ | ✅ | ✅ | ✅ | | ✅ | Pow | Power | Pow[2] | Pow | +| PowGrad | | ✅ | | | | | | | | | | | +| PReLU | | ✅ | | | ✅ | ✅ | | | PRELU | PReLU | PRelu | | +| RandomStandardNormal | | ✅ | | | | | | | | | | RandomStandardNormal | +| RandomUniform | | ✅ | | | | | | | | | | RandomUniform | +| Range | | ✅ | | | | | | | Range | | | Range,
RaggedRange | +| Rank | | ✅ | | | | | | | Rank | | | Rank | +| Reciprocal | ✅ | ✅ | ✅ | | | | ✅ | | | | | | +| ReduceAll | | ✅ | | | | | | | | | | All | +| ReduceASum | | ✅ | | | ✅ | ✅ | | | | Reduction | | | +| ReduceMax | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | | ReduceMax | | ReduceMax | Max | +| ReduceMean | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | ✅ | Mean | Reduction | ReduceMean | Mean | +| ReduceMin | | ✅ | ✅ | ✅ | ✅ | ✅ | | | ReduceMin | | ReduceMin | Min | +| ReduceProd | | ✅ | ✅ | ✅ | ✅ | ✅ | | | ReduceProd | | ReduceProd | Prod | +| ReduceSum | | ✅ | ✅ | ✅ | ✅ | ✅ | | | Sum | Reduction | ReduceSum | Sum | +| ReduceSumSquare | | ✅ | ✅ | ✅ | | | | | | Reduction | ReduceSumSquare | | +| ReLU | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Relu | ReLU | Relu | Relu | +| ReLUGrad | ✅ | ✅ | | | | | | | | | | | +| ReLU6 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Relu6 | ReLU6 | Clip[1] | Relu6 | +| ReLU6Grad | ✅ | ✅ | | | | | | | | | | | +| Reshape | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Reshape | Reshape | Reshape,
Flatten | Reshape | +| Resize | | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | ResizeBilinear,
NearestNeighbor | Interp | | ResizeBilinear,
ResizeBicubic,
ResizeNearestNeighbor | +| ResizeGrad | | ✅ | | | | | | | | | | | +| Reverse | | ✅ | | | | | | | reverse | | | ReverseV2 | +| ReverseSequence | | ✅ | | | | | | | ReverseSequence | | | ReverseSequence | +| Round | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Round | | Round | Round | +| Rsqrt | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Rsqrt | | | Rsqrt | +| Select | | ✅ | | | | | | | | | | Select | +| Selu | | | | | | | | | | | | Selu | +| Scale | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Scale | | | +| ScatterNd | | ✅ | | | | | | | ScatterNd | | | | +| Shape | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Shape | | Shape | Shape | +| Sigmoid | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Logistic | Sigmoid | Sigmoid | Sigmoid | +| SigmoidGrad | ✅ | ✅ | | | | | | | | | | | +| Sin | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Sin | | Sin | Sin | +| Size | | ✅ | | | | | | | | | | Size | +| Slice | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Slice | Slice | Slice | Slice | +| SkipGram | | ✅ | | | | | | | SKipGram | | | | +| Softmax | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Softmax | Softmax | Softmax | Softmax | +| SoftmaxGrad | | ✅ | | | | | | | | | | | +| Softplus | | ✅ | | | | | | | | | | Softplus | +| SpaceToBatch | | ✅ | ✅ | ✅ | ✅ | ✅ | | | SpaceToBatch | | | | +| SpaceToBatchND | | ✅ | ✅ | ✅ | ✅ | ✅ | | | SpaceToBatchND | | | SpaceToBatchND | +| SpaceToDepth | | ✅ | | | ✅ | ✅ | | | SpaceToDepth | | SpaceToDepth | | +| SparseToDense | | ✅ | | | ✅ | ✅ | | | SpareToDense | | | | +| Split | ✅ | ✅ | ✅ | ✅ | | | ✅ | | Split, SplitV | | Split | Split, SplitV | +| Sqrt | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Sqrt | | Sqrt | Sqrt | +| Square | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | Square | | | Square | +| SquaredDifference | ✅ | ✅ | | | ✅ | ✅ | ✅ | | SquaredDifference | | | SquaredDifference | +| Squeeze | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | ✅ | Squeeze | | Squeeze | Squeeze | +| StridedSlice | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | StridedSlice | | Slice,
DynamicSlice | StridedSlice | +| Stack | ✅ | ✅ | | | ✅ | ✅ | | | Stack | | | Pack | +| Sub | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | Sub | | Sub | Sub | +| SubGrad | | ✅ | | | | | | | | | | | +| Swish | ✅ | ✅ | | | | | | | | | | | +| Switch | ✅ | ✅ | | | | | | | | | | Switch | +| Tanh | ✅ | ✅ | | | ✅ | ✅ | ✅ | ✅ | Tanh | TanH | Tanh, Sign | Tanh | +| TanhGrad | | ✅ | | | | | | | | | | | +| TensorListFromTensor | ✅ | ✅ | | | | | | | | | | TensorListFromTensor | +| TensorListGetItem | ✅ | ✅ | | | | | | | | | | TensorListGetItem | +| TensorListReserve | ✅ | ✅ | | | | | | | | | | TensorListReserve | +| TensorListSetItem | ✅ | ✅ | | | | | | | | | | TensorListSetItem | +| TensorListStack | ✅ | ✅ | | | | | | | | | | TensorListStack | +| Tile | ✅ | ✅ | | | | | ✅ | | Tile | Tile | Tile | | Tile | +| TopK | | ✅ | ✅ | ✅ | | | | | TopKV2 | | TopK | TopKV2 | +| Transpose | ✅ | ✅ | ✅ | | ✅ | ✅ | ✅ | ✅ | Transpose | Permute | Transpose | Transpose | +| UniformReal | | ✅ | | | | | | | | | | | +| Unique | | ✅ | | | | | | | Unique | | | | +| Unsqueeze | ✅ | ✅ | ✅ | ✅ | | | ✅ | ✅ | | | Unsqueeze | | +| Unstack | | ✅ | | | | | | | Unstack | | | | +| Where | | ✅ | | | | | | | Where | | Where | Where | +| ZerosLike | | ✅ | | | | | | | ZerosLike | | | ZerosLike | | 转换工具支持的其他算子[4] | | | | | | | | | | Loop, Dropout, If | Dropout, Enter,
Exit, If,
IsFinite,
LinSpace,
LoopCond,
NextIteration,
StatelessIf,
StatelessWhile,
While | [1] Clip:仅支持将clip(0, 6)转换为Relu6。 diff --git a/tutorials/lite/source_zh_cn/quick_start/image_segmentation.md b/tutorials/lite/source_zh_cn/quick_start/image_segmentation.md index b23c1e0cc4ef9719cfa6ff0618987e6675a96120..89d53905917aa8bb5fe38065c7ec9b3b4ae0e379 100644 --- a/tutorials/lite/source_zh_cn/quick_start/image_segmentation.md +++ b/tutorials/lite/source_zh_cn/quick_start/image_segmentation.md @@ -119,7 +119,7 @@ app ### 配置MindSpore Lite依赖项 -Android调用MindSpore Android AAR时,需要相关库文件支持。可通过MindSpore Lite[源码编译](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html)生成`mindspore-lite-{version}-inference-android.tar.gz`库文件包并解压缩(包含`mindspore-lite-{version}.aar`库文件)。 +Android调用MindSpore Android AAR时,需要相关库文件支持。可通过MindSpore Lite[源码编译](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html)生成`mindspore-lite-{version}-android.tar.gz`库文件包并解压缩(包含`mindspore-lite-{version}.aar`库文件)。 > version:输出件版本号,与所编译的分支代码对应的版本一致。 diff --git a/tutorials/lite/source_zh_cn/quick_start/quick_start.md b/tutorials/lite/source_zh_cn/quick_start/quick_start.md index ee936fbf521fb27eaeabb6ee9583372e2ee0402e..330b1bebec14112d58d02ee30e1eb1c6d5e0b804 100644 --- a/tutorials/lite/source_zh_cn/quick_start/quick_start.md +++ b/tutorials/lite/source_zh_cn/quick_start/quick_start.md @@ -171,17 +171,17 @@ android{ ```text # ============== Set MindSpore Dependencies. ============= include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp) -include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/inference) -include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/inference/include) -include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/inference/minddata/include) +include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/runtime) +include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/runtime/include) +include_directories(${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/runtime/minddata/dataset) add_library(mindspore-lite SHARED IMPORTED) add_library(minddata-lite SHARED IMPORTED) set_target_properties(mindspore-lite PROPERTIES IMPORTED_LOCATION - ${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/inference/lib/libmindspore-lite.so) + ${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/runtime/lib/libmindspore-lite.so) set_target_properties(minddata-lite PROPERTIES IMPORTED_LOCATION - ${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/inference/minddata/lib/libminddata-lite.so) + ${CMAKE_SOURCE_DIR}/src/main/cpp/${MINDSPORELITE_VERSION}/runtime/minddata/lib/libminddata-lite.so) # --------------- MindSpore Lite set End. -------------------- # Link target library. diff --git a/tutorials/lite/source_zh_cn/quick_start/quick_start_cpp.md b/tutorials/lite/source_zh_cn/quick_start/quick_start_cpp.md index 1c824e0a3e9ff1cf4b460b2b2521abf805c79d07..6a746cfbe61de23b537d9e62ad60ad458e57f389 100644 --- a/tutorials/lite/source_zh_cn/quick_start/quick_start_cpp.md +++ b/tutorials/lite/source_zh_cn/quick_start/quick_start_cpp.md @@ -54,7 +54,7 @@ bash build.sh ``` - > 若使用该build脚本下载MindSpore Lite推理框架失败,请手动下载硬件平台为CPU、操作系统为Ubuntu-x64的MindSpore Lite 模型推理框架[mindspore-lite-{version}-linux-x64.tar.gz](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html),将解压后`inference/lib`目录下的`libmindspore-lite.a`文件拷贝到`mindspore/lite/examples/quick_start_cpp/lib`目录、`inference/include`目录里的文件拷贝到`mindspore/lite/examples/quick_start_cpp/include`目录下。 + > 若使用该build脚本下载MindSpore Lite推理框架失败,请手动下载硬件平台为CPU、操作系统为Ubuntu-x64的MindSpore Lite 模型推理框架[mindspore-lite-{version}-linux-x64.tar.gz](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html),将解压后`runtime/lib`目录下的`libmindspore-lite.a`文件拷贝到`mindspore/lite/examples/quick_start_cpp/lib`目录、`runtime/include`目录里的文件拷贝到`mindspore/lite/examples/quick_start_cpp/include`目录下。 > > 若MobileNetV2模型下载失败,请手动下载相关模型文件[mobilenetv2.ms](https://download.mindspore.cn/model_zoo/official/lite/quick_start/mobilenetv2.ms),并将其拷贝到`mindspore/lite/examples/quick_start_cpp/model`目录。 > @@ -86,7 +86,7 @@ - 编译构建 - - 库下载:请手动下载硬件平台为CPU、操作系统为Windows-x64的MindSpore Lite模型推理框架[mindspore-lite-{version}-win-x64.zip](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html),将解压后`inference/lib`目录下的`libmindspore-lite.a`拷贝到`mindspore/lite/examples/quick_start_cpp/lib`工程目录、`inference/include`目录里的文件拷贝到`mindspore/lite/examples/quick_start_cpp/include`工程目录下。(注意:工程项目下的`lib`、`include`目录需手工创建) + - 库下载:请手动下载硬件平台为CPU、操作系统为Windows-x64的MindSpore Lite模型推理框架[mindspore-lite-{version}-win-x64.zip](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html),将解压后`runtime/lib`目录下的`libmindspore-lite.a`拷贝到`mindspore/lite/examples/quick_start_cpp/lib`工程目录、`runtime/include`目录里的文件拷贝到`mindspore/lite/examples/quick_start_cpp/include`工程目录下。(注意:工程项目下的`lib`、`include`目录需手工创建) - 模型下载:请手动下载相关模型文件[mobilenetv2.ms](https://download.mindspore.cn/model_zoo/official/lite/quick_start/mobilenetv2.ms),并将其拷贝到`mindspore/lite/examples/quick_start_cpp/model`目录。 - 编译:在`mindspore/lite/examples/quick_start_cpp`目录下执行[build脚本](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/examples/quick_start_cpp/build.bat),将能够自动下载相关文件并编译Demo。 diff --git a/tutorials/lite/source_zh_cn/quick_start/quick_start_java.md b/tutorials/lite/source_zh_cn/quick_start/quick_start_java.md index 9c52e48ff10e086cc9eaf60697cc8b30bf793cc3..d259511c6b4978270b6bb95796f1920ca746ee54 100644 --- a/tutorials/lite/source_zh_cn/quick_start/quick_start_java.md +++ b/tutorials/lite/source_zh_cn/quick_start/quick_start_java.md @@ -52,7 +52,7 @@ bash build.sh ``` - > 若MindSpore Lite推理框架下载失败,请手动下载硬件平台为CPU、操作系统为Ubuntu-x64的MindSpore Lite 框架[mindspore-lite-{version}-linux-x64.tar.gz](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html),解压后将`inference/lib/jar`目录下的`libmindspore-lite.so`、`libmindspore-lite-jni.so`以及`libmindspore-lite-java.jar`拷贝到`mindspore/lite/examples/quick_start_java/lib`目录。 + > 若MindSpore Lite推理框架下载失败,请手动下载硬件平台为CPU、操作系统为Ubuntu-x64的MindSpore Lite 框架[mindspore-lite-{version}-linux-x64.tar.gz](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html),解压后将`runtime/lib`目录下的`libmindspore-lite.so`、`libmindspore-lite-jni.so`以及`libmindspore-lite-java.jar`拷贝到`mindspore/lite/examples/quick_start_java/lib`目录。 > > 若MobileNetV2模型下载失败,请手动下载相关模型文件[mobilenetv2.ms](https://download.mindspore.cn/model_zoo/official/lite/quick_start/mobilenetv2.ms),并将其拷贝到`mindspore/lite/examples/quick_start_java/model/`目录。 > diff --git a/tutorials/lite/source_zh_cn/quick_start/train_lenet.md b/tutorials/lite/source_zh_cn/quick_start/train_lenet.md index aecdf3e0565aa75ec7f1f96eba65d26ed8ef0cce..5482473158baf7940f7b49175d8180840b226d64 100644 --- a/tutorials/lite/source_zh_cn/quick_start/train_lenet.md +++ b/tutorials/lite/source_zh_cn/quick_start/train_lenet.md @@ -73,17 +73,17 @@ cd ./mindspore 源码路径下的`mindspore/lite/examples/train_lenet`目录包含了本示例程序的源码。 -请到[MindSpore Lite下载页面](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html)下载mindspore-lite-{version}-linux-x64.tar.gz以及mindspore-lite-{version}-android-aarch64.tar.gz。其中,mindspore-lite-{version}-linux-x64.tar.gz是MindSpore Lite在x86平台的安装包,里面包含模型转换工具converter_lite,本示例用它来将MINDIR模型转换成MindSpore Lite支持的`.ms`格式;mindspore-lite-{version}-android-aarch64.tar.gz是MindSpore Lite在Android平台的安装包,里面包含训练运行时库libmindspore-lite.so,本示例用它所提供的接口在Android上训练模型。下载完成后,需要将mindspore-lite-{version}-linux-x64.tar.gz重命名为mindspore-lite-{version}-train-linux-x64.tar.gz,将mindspore-lite-{version}-android-aarch64.tar.gz重命名为mindspore-lite-{version}-train-android-aarch64.tar.gz,最后将重命名后的文件放到MindSpore源码下的`output`目录(如果没有`output`目录,请创建它)。 +请到[MindSpore Lite下载页面](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html)下载mindspore-lite-{version}-linux-x64.tar.gz以及mindspore-lite-{version}-android-aarch64.tar.gz。其中,mindspore-lite-{version}-linux-x64.tar.gz是MindSpore Lite在x86平台的安装包,里面包含模型转换工具converter_lite,本示例用它来将MINDIR模型转换成MindSpore Lite支持的`.ms`格式;mindspore-lite-{version}-android-aarch64.tar.gz是MindSpore Lite在Android平台的安装包,里面包含训练运行时库libmindspore-lite.so,本示例用它所提供的接口在Android上训练模型。下载完成后,需要将mindspore-lite-{version}-linux-x64.tar.gz重命名为mindspore-lite-{version}-linux-x64.tar.gz,将mindspore-lite-{version}-android-aarch64.tar.gz重命名为mindspore-lite-{version}-android-aarch64.tar.gz,最后将重命名后的文件放到MindSpore源码下的`output`目录(如果没有`output`目录,请创建它)。 假设下载的安装包存放在`/Downloads`目录,上述操作对应的`Linux`指令如下: ```bash mkdir output -cp /Downloads/mindspore-lite-{version}-linux-x64.tar.gz output/mindspore-lite-{version}-train-linux-x64.tar.gz -cp /Downloads/mindspore-lite-{version}-android-aarch64.tar.gz output/mindspore-lite-{version}-train-android-aarch64.tar.gz +cp /Downloads/mindspore-lite-{version}-linux-x64.tar.gz output/mindspore-lite-{version}-linux-x64.tar.gz +cp /Downloads/mindspore-lite-{version}-android-aarch64.tar.gz output/mindspore-lite-{version}-android-aarch64.tar.gz ``` -您也可以通过[源码编译](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html)直接生成端侧训练框架对应的x86平台安装包mindspore-lite-{version}-train-linux-x64.tar.gz以及Android平台安装包mindspore-lite-{version}-train-android-aarch64.tar.gz,源码编译的安装包会自动生成在`output`目录下,请确保`output`目录下同时存在这两个安装包。 +您也可以通过[源码编译](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html)直接生成端侧训练框架对应的x86平台安装包mindspore-lite-{version}-linux-x64.tar.gz以及Android平台安装包mindspore-lite-{version}-android-aarch64.tar.gz,源码编译的安装包会自动生成在`output`目录下,请确保`output`目录下同时存在这两个安装包。 ### 连接安卓设备 diff --git a/tutorials/lite/source_zh_cn/quick_start/train_lenet_java.md b/tutorials/lite/source_zh_cn/quick_start/train_lenet_java.md index f142f35f07469d7cb9d0e6e874eb1d55b7776865..ac74ccba1d6d66e7a3d988590137a430d8bdfca9 100644 --- a/tutorials/lite/source_zh_cn/quick_start/train_lenet_java.md +++ b/tutorials/lite/source_zh_cn/quick_start/train_lenet_java.md @@ -80,7 +80,7 @@ MNIST_Data/ ```bash cd /codes/mindspore/output -tar xzf mindspore-lite-${version}-train-linux-x64-jar.tar.gz +tar xzf mindspore-lite-${version}-linux-x64-jar.tar.gz mkdir ../mindspore/lite/examples/train_lenet_java/lib cp mindspore-lite-${version}-train-linux-x64-jar/jar/* ../mindspore/lite/examples/train_lenet_java/lib/ ``` diff --git a/tutorials/lite/source_zh_cn/use/asic.rst b/tutorials/lite/source_zh_cn/use/asic.rst index 3559e77523a7fd30e8fb98204db0af716f734454..caf3accb604bdda95009c59fc5ccb361561a4d6a 100644 --- a/tutorials/lite/source_zh_cn/use/asic.rst +++ b/tutorials/lite/source_zh_cn/use/asic.rst @@ -6,3 +6,4 @@ npu_info nnie + tensorrt_info diff --git a/tutorials/lite/source_zh_cn/use/benchmark_tool.md b/tutorials/lite/source_zh_cn/use/benchmark_tool.md index fdebd8c10b4892465f0696cdd5b62a2ec6903c5d..14ce48285a6fefb6e16a0547cec08ae873556580 100644 --- a/tutorials/lite/source_zh_cn/use/benchmark_tool.md +++ b/tutorials/lite/source_zh_cn/use/benchmark_tool.md @@ -41,7 +41,7 @@ - 将推理需要的动态链接库加入环境变量LD_LIBRARY_PATH。 ```bash - export LD_LIBRARY_PATH=${PACKAGE_ROOT_PATH}/inference/lib:${LD_LIBRARY_PATH} + export LD_LIBRARY_PATH=${PACKAGE_ROOT_PATH}/runtime/lib:${LD_LIBRARY_PATH} ``` 其中${PACKAGE_ROOT_PATH}是编译得到的包解压后的根目录。 @@ -224,7 +224,7 @@ Model = model.ms, NumThreads = 1, MinRunTime = 0.104000 ms, MaxRunTime = 0.17900 - 将推理需要的动态链接库加入环境变量PATH。 ```bash - set PATH=%PACKAGE_ROOT_PATH%\inference\lib;%PATH% + set PATH=%PACKAGE_ROOT_PATH%\runtime\lib;%PATH% ``` 其中%PACKAGE_ROOT_PATH%是编译得到的包解压后的根目录。 diff --git a/tutorials/lite/source_zh_cn/use/benchmark_train_tool.md b/tutorials/lite/source_zh_cn/use/benchmark_train_tool.md index 38a820392cd068e958b0d63302ae7bce728138e1..d57392ccdf5679a1fa7049a2567096c4d2558da6 100644 --- a/tutorials/lite/source_zh_cn/use/benchmark_train_tool.md +++ b/tutorials/lite/source_zh_cn/use/benchmark_train_tool.md @@ -29,15 +29,15 @@ - 编译:`benchmark_train`工具代码在MindSpore源码的`mindspore/lite/tools/benchmark_train`目录中,参考构建文档中的[环境要求](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#id1)和[编译示例](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#id3)编译端侧训练框架。 -- 配置环境变量:参考构建文档中的[编译输出](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#id5),获得`benchmark_train`工具,并配置环境变量。假设您编译出的端侧训练框架压缩包所在完整路径为`/path/mindspore-lite-{version}-train-{os}-{arch}.tar.gz`,解压并配置环境变量的命令如下: +- 配置环境变量:参考构建文档中的[编译输出](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#id5),获得`benchmark_train`工具,并配置环境变量。假设您编译出的端侧训练框架压缩包所在完整路径为`/path/mindspore-lite-{version}-{os}-{arch}.tar.gz`,解压并配置环境变量的命令如下: ```bash cd /path - tar xvf mindspore-lite-{version}-train-{os}-{arch}.tar.gz - export LD_LIBRARY_PATH=/path/mindspore-lite-{version}-train-{os}-{arch}/train/lib:/path/mindspore-lite-{version}-train-{os}-{arch}/train/third_party/libjpeg-turbo/lib:${LD_LIBRARY_PATH} + tar xvf mindspore-lite-{version}-{os}-{arch}.tar.gz + export LD_LIBRARY_PATH=/path/mindspore-lite-{version}-{os}-{arch}/runtime/lib:/path/mindspore-lite-{version}-{os}-{arch}/runtime/third_party/libjpeg-turbo/lib:${LD_LIBRARY_PATH} ``` -benchmark_train工具所在完整路径为`/path/mindspore-lite-{version}-train-{os}-{arch}/tools/benchmark_train/benchmark_train`。 +benchmark_train工具所在完整路径为`/path/mindspore-lite-{version}-{os}-{arch}/tools/benchmark_train/benchmark_train`。 ### 参数说明 diff --git a/tutorials/lite/source_zh_cn/use/build.md b/tutorials/lite/source_zh_cn/use/build.md index 5469b4695c4de4f8c9857bf570f92678a8a256bc..89379d51d5bc4273c4e39f81bbc08f44428b384b 100644 --- a/tutorials/lite/source_zh_cn/use/build.md +++ b/tutorials/lite/source_zh_cn/use/build.md @@ -1,6 +1,6 @@ # 编译MindSpore Lite -`Windows` `Linux` `Android` `环境准备` `中级` `高级` +`Windows` `Linux` `Mac` `Android` `iOS` `环境准备` `中级` `高级` @@ -9,27 +9,22 @@ - [环境要求](#环境要求) - [编译选项](#编译选项) - [编译示例](#编译示例) - - [端侧推理框架编译输出](#端侧推理框架编译输出) - - [模型转换工具converter目录结构说明](#推理模型转换工具converter目录结构说明) - - [代码生成工具codegen目录结构说明](#代码生成工具codegen目录结构说明) - - [模型混淆工具obfuscator目录结构说明](#模型混淆工具obfuscator目录结构说明) - - [Runtime及其他工具目录结构说明](#推理Runtime及其他工具目录结构说明) - - [端侧训练框架编译输出](#端侧训练框架编译输出) - - [训练Runtime及配套工具目录结构说明](#训练Runtime及配套工具目录结构说明) + - [目录结构](#目录结构) - [Windows环境编译](#windows环境编译) - [环境要求](#环境要求-1) - [编译选项](#编译选项-1) - [编译示例](#编译示例-1) - - [端侧推理框架编译输出](#端侧推理框架编译输出) - - [Runtime及配套工具目录结构说明](#Runtime及配套工具目录结构说明-1) - - [Docker环境编译](#docker环境编译) - - [环境准备](#环境准备) - - [下载镜像](#下载镜像) - - [创建容器](#创建容器) - - [进入容器](#进入容器) + - [目录结构](#目录结构-1) + - [macOS环境编译](#macOS环境编译) + - [环境要求](#环境要求-2) - [编译选项](#编译选项-2) - [编译示例](#编译示例-2) - - [编译输出](#编译输出) + - [目录结构](#目录结构-2) + - [Docker环境编译](#docker环境编译) + - [环境准备](#环境准备) + - [编译选项](#编译选项-3) + - [编译示例](#编译示例-3) + - [目录结构](#目录结构-3) @@ -37,69 +32,47 @@ 本章节介绍如何快速编译出MindSpore Lite。 -推理版本包含模块: +MindSpore Lite包含模块: | 模块 | 支持平台 | 说明 | | ------------------ | ----------------------- | --------------------------------- | | converter | Linux, Windows | 模型转换工具 | -| runtime(cpp、java) | Linux, Windows, Android | 模型推理框架(Windows平台不支持java版runtime) | +| runtime(cpp、java) | Linux, Windows, Android, iOS | 模型推理框架(Windows平台不支持java版runtime) | | benchmark | Linux, Windows, Android | 基准测试工具 | +| benchmark_train | Linux, Android | 性能测试和精度校验工具 | | cropper | Linux | libmindspore-lite.a静态库裁剪工具 | | minddata | Linux, Android | 图像处理库 | | codegen | Linux | 模型推理代码生成工具 | | obfuscator | Linux | 模型混淆工具 | -训练版本包含模块: - -| 模块 | 支持平台 | 说明 | -| --------------- | -------------- | --------------------------------- | -| converter | Linux | 模型转换工具 | -| runtime(cpp) | Linux, Android | 模型训练框架(暂不支持java) | -| cropper | Linux | libmindspore-lite.a静态库裁剪工具 | -| minddata | Linux, Android | 图像处理库 | -| benchmark_train | Linux, Android | 性能测试和精度校验工具 | - ## Linux环境编译 ### 环境要求 - 系统环境:Linux x86_64,推荐使用Ubuntu 18.04.02LTS -- runtime(cpp)编译依赖 - - [CMake](https://cmake.org/download/) >= 3.18.3 +- C++编译依赖 - [GCC](https://gcc.gnu.org/releases.html) >= 7.3.0 - - [Android_NDK](https://dl.google.com/android/repository/android-ndk-r20b-linux-x86_64.zip) >= r20 - - [Git](https://git-scm.com/downloads) >= 2.28.0 -- converter编译依赖 - [CMake](https://cmake.org/download/) >= 3.18.3 - - [GCC](https://gcc.gnu.org/releases.html) >= 7.3.0 - - [Android_NDK](https://dl.google.com/android/repository/android-ndk-r20b-linux-x86_64.zip) >= r20 - [Git](https://git-scm.com/downloads) >= 2.28.0 - - [Autoconf](http://ftp.gnu.org/gnu/autoconf/) >= 2.69 - - [Libtool](https://www.gnu.org/software/libtool/) >= 2.4.6 - - [LibreSSL](http://www.libressl.org/) >= 3.1.3 - - [Automake](https://www.gnu.org/software/automake/) >= 1.11.6 - - [Libevent](https://libevent.org) >= 2.0 - - [OpenSSL](https://www.openssl.org/) >= 1.1.1 -- runtime(java)编译依赖 - - [CMake](https://cmake.org/download/) >= 3.18.3 - - [GCC](https://gcc.gnu.org/releases.html) >= 7.3.0 - [Android_NDK](https://dl.google.com/android/repository/android-ndk-r20b-linux-x86_64.zip) >= r20 - - [Git](https://git-scm.com/downloads) >= 2.28.0 - - [Android SDK](https://developer.android.com/studio?hl=zh-cn#cmdline-tools) + - 配置环境变量:`export ANDROID_NDK=NDK路径` + - [DDK](https://developer.huawei.com/consumer/cn/doc/development/hiai-Library/ddk-download-0000001053590180) = V500.010 + - 配置环境变量:`export HWHIAI_DDK=DDK路径` +- Java编译需要的额外依赖 - [Gradle](https://gradle.org/releases/) >= 6.6.1 + - 配置环境变量:`export GRADLE_HOME=GRADLE路径` + - 将bin目录添加到PATH中:`export PATH=${GRADLE_HOME}/bin:$PATH` - [OpenJDK](https://openjdk.java.net/install/) >= 1.8 - -> - 当安装完依赖项`Android_NDK`后,需配置环境变量:`export ANDROID_NDK=${NDK_PATH}/android-ndk-r20b`。 -> - 当安装完依赖项Gradle后,需将其安装路径增加到PATH当中:`export PATH=${GRADLE_PATH}/bin:$PATH`。 -> - 通过`Android command line tools`安装Android SDK,首先需要创建一个新目录,并将其路径配置到环境变量`${ANDROID_SDK_ROOT}`中,然后通过`sdkmanager`创建SDK:`./sdkmanager --sdk_root=${ANDROID_SDK_ROOT} "cmdline-tools;latest"`,最后通过`${ANDROID_SDK_ROOT}`目录下的`sdkmanager`接受许可证:`yes | ./sdkmanager --licenses`。 -> - 编译AAR需要依赖Android SDK Build-Tools、Android SDK Platform-Tools等Android SDK相关组件,如果环境中的Android SDK不存在相关组件,编译时会自动下载所需依赖。 -> - 编译NPU算子的时候需要下载[DDK V500.010](https://developer.huawei.com/consumer/cn/doc/development/hiai-Library/ddk-download-0000001053590180),并将压缩包解压后的目录设置为环境变量`${HWHIAI_DDK}`。 + - 配置环境变量:`export JAVA_HOME=JDK路径` + - 将bin目录添加到PATH中:`export PATH=${JAVA_HOME}/bin:$PATH` + - [Android SDK](https://developer.android.com/studio?hl=zh-cn#cmdline-tools) + - 创建一个新目录,配置环境变量`export ANDROID_SDK_ROOT=新建的目录` + - 下载`SDK Tools`,通过`sdkmanager`创建SDK:`./sdkmanager --sdk_root=${ANDROID_SDK_ROOT} "cmdline-tools;latest"` + - 通过`${ANDROID_SDK_ROOT}`目录下的`sdkmanager`接受许可证:`yes | ./sdkmanager --licenses` ### 编译选项 -MindSpore Lite提供编译脚本`build.sh`用于一键式编译,位于MindSpore根目录下,该脚本可用于MindSpore训练及推理的编译。 - -下面对`build.sh`的编译参数和`mindspore/lite/CMakeLists.txt`的选项进行说明。 +MindSpore根目录下的`build.sh`脚本可用于MindSpore Lite的编译。 #### `build.sh`的编译参数 @@ -120,9 +93,9 @@ MindSpore Lite提供编译脚本`build.sh`用于一键式编译,位于MindSpor | 选项 | 参数说明 | 取值范围 | 默认值 | | -------- | ----- | ---- | ---- | -| MSLITE_GPU_BACKEND | 设置GPU后端,仅在`-I arm64`时有效 | opencl、vulkan、cuda、off | opencl | +| MSLITE_GPU_BACKEND | 设置GPU后端,在`-I arm64`时仅opencl有效,在`-I x86_64`时仅tensorrt有效 | opencl、tensorrt、off | 在`-I arm64`时为opencl, 在`-I x86_64`时为off | | MSLITE_ENABLE_NPU | 是否编译NPU算子,仅在`-I arm64`或`-I arm32`时有效 | on、off | on | -| MSLITE_ENABLE_TRAIN | 是否编译训练版本 | on、off | off | +| MSLITE_ENABLE_TRAIN | 是否编译训练版本 | on、off | on | | MSLITE_ENABLE_SSE | 是否启用SSE指令集,仅在`-I x86_64`时有效 | on、off | off | | MSLITE_ENABLE_AVX | 是否启用AVX指令集,仅在`-I x86_64`时有效 | on、off | off | | MSLITE_ENABLE_CONVERTER | 是否编译模型转换工具,仅在`-I x86_64`时有效 | on、off | on | @@ -130,7 +103,6 @@ MindSpore Lite提供编译脚本`build.sh`用于一键式编译,位于MindSpor | MSLITE_ENABLE_TESTCASES | 是否编译测试用例 | on、off | off | > - 以上选项可通过设置同名环境变量或者`mindspore/lite/CMakeLists.txt`文件修改。 -> - 开启MSLITE_ENABLE_TRAIN只生成训练版本。 > - 修改选项后,添加`-i`参数进行增量编译不生效。 ### 编译示例 @@ -168,142 +140,66 @@ git clone https://gitee.com/mindspore/mindspore.git bash build.sh -A on -j32 ``` -### 端侧推理框架编译输出 - -执行编译指令后,会在`mindspore/output/`目录中生成如下文件: +最后,会在`output/`目录中生成如下文件: -- `mindspore-lite-{version}-inference-{os}-{arch}.tar.gz`:包含模型推理框架runtime(cpp)和配套工具。 +- `mindspore-lite-{version}-{os}-{arch}.tar.gz`:包含runtime和配套工具。 -- `mindspore-lite-maven-{version}.zip`:包含模型推理框架runtime(java)的AAR。 +- `mindspore-lite-maven-{version}.zip`:包含runtime(java)的AAR包。 > - version: 输出件版本号,与所编译的分支代码对应的版本一致。 > - os: 输出件应部署的操作系统。 > - arch: 输出件应部署的系统架构。 -执行解压缩命令,获取编译后的输出件: - -```bash -tar -xvf mindspore-lite-{version}-inference-{os}-{arch}.tar.gz -unzip mindspore-lite-maven-{version}.zip -``` - -#### 模型转换工具converter目录结构说明 - -仅在`-I x86_64`编译选项下获得(推理和训练的目录结构相同)内容如下: - -```text -mindspore-lite-{version}-inference-linux-x64 -└── tools - └── converter - ├── include - │ └── registry # 自定义算子、模型解析、转换优化注册头文件 - ├── converter # 模型转换工具 - │ └── converter_lite # 可执行程序 - └── lib # 转换工具依赖的动态库 - ├── libglog.so.0 # Glog的动态库 - └── libmslite_converter_plugin.so # 注册插件的动态库 -``` - -#### 代码生成工具CodeGen目录结构说明 - -仅在`-I x86_64`编译选项下获得codegen可执行程序,在`-I arm64`和`-I arm32`编译选项下只生成codegen生成的推理代码所需要的算子库。 - -- `-I x86_64`编译选项下获得codegen,内容如下: - - ```text - mindspore-lite-{version}-inference-linux-x64 - └── tools - └── codegen # 代码生成工具 - ├── codegen # 可执行程序 - ├── include # 推理框架头文件 - │ ├── nnacl # nnacl 算子头文件 - │ └── wrapper - ├── lib - │ └── libwrapper.a # MindSpore Lite CodeGen生成代码依赖的部分算子静态库 - └── third_party - ├── include - │ └── CMSIS # ARM CMSIS NN 算子头文件 - └── lib - └── libcmsis_nn.a # ARM CMSIS NN 算子静态库 - ``` - -- `-I arm64`或`-I arm32`编译选项下获得codegen,内容如下: - - ```text - mindspore-lite-{version}-inference-android-{arch} - └── tools - └── codegen # 代码生成工具 - ├── include # 推理框架头文件 - │ ├── nnacl # nnacl 算子头文件 - │ └── wrapper - └── lib # 推理框架库 - └── libwrapper.a # MindSpore Lite CodeGen生成代码依赖的部分算子静态库 - ``` - -#### 模型混淆工具obfuscator目录结构说明 - -仅在`-I x86_64`编译选项下且`mindspore/mindspore/lite/CMakeLists.txt`中的`ENABLE_MODEL_OBF`选项开启时,获得msobfuscator可执行程序,内容如下: - -```text -mindspore-lite-{version}-inference-linux-x64 -└── tools - └── obfuscator # 模型混淆工具 - └── msobfuscator # 可执行程序 -``` - -#### Runtime及其他工具目录结构说明 - -推理框架可在`-I x86_64`、`-I arm64`、`-I arm32`和`-A java`编译选项下获得,内容如下: +### 目录结构 - 当编译选项为`-I x86_64`时: ```text - mindspore-lite-{version}-inference-linux-x64 - ├── inference - │ ├── include # 推理框架头文件 - │ │ └── registry # 自定义算子注册头文件 - │ └── lib # 推理框架库 - │ ├── libminddata-lite.so # 图像处理动态库文件 - │ ├── libmindspore-lite.a # MindSpore Lite推理框架的静态库 - │ ├── libmindspore-lite-jni.so # MindSpore Lite推理框架的jni动态库 - │ ├── libmindspore-lite.so # MindSpore Lite推理框架的动态库 - │ ├── libmsdeobfuscator-lite.so # 混淆模型加载动态库文件,需开启`ENABLE_MODEL_OBF`选项。 - │ └── mindspore-lite-java.jar # MindSpore Lite推理框架jar包 + mindspore-lite-{version}-linux-x64 + ├── runtime + │ ├── include + │ ├── lib + │ │ ├── libminddata-lite.a # 图像处理静态库 + │ │ ├── libminddata-lite.so # 图像处理动态库 + │ │ ├── libmindspore-lite.a # MindSpore Lite推理框架的静态库 + │ │ ├── libmindspore-lite-jni.so # MindSpore Lite推理框架的jni动态库 + │ │ ├── libmindspore-lite.so # MindSpore Lite推理框架的动态库 + │ │ ├── libmindspore-lite-train.a # MindSpore Lite训练框架的静态库 + │ │ ├── libmindspore-lite-train.so # MindSpore Lite训练框架的动态库 + │ │ ├── libmsdeobfuscator-lite.so # 混淆模型加载动态库文件,需开启`ENABLE_MODEL_OBF`选项。 + │ │ └── mindspore-lite-java.jar # MindSpore Lite推理框架jar包 + │ └── third_party + │ └── libjpeg-turbo └── tools - ├── benchmark # 基准测试工具 - │ └── benchmark # 可执行程序 - ├── codegen # 代码生成工具 - │ ├── codegen # 可执行程序 - │ ├── include # 算子头文件 - │ ├── lib # 算子静态库 - │ └── third_party # ARM CMSIS NN算子库 - ├── converter # 模型转换工具 - ├── obfuscator # 模型混淆工具 - └── cropper # 库裁剪工具 - ├── cropper # 库裁剪工具可执行文件 - └── cropper_mapping_cpu.cfg # 裁剪cpu库所需的配置文件 + ├── benchmark # 基准测试工具 + ├── benchmark_train # 训练模型性能与精度调测工具 + ├── codegen # 代码生成工具 + ├── converter # 模型转换工具 + ├── obfuscator # 模型混淆工具 + └── cropper # 库裁剪工具 ``` - 当编译选项为`-I arm64`或`-I arm32`时: ```text - mindspore-lite-{version}-inference-android-{arch} - ├── inference - │ ├── include # 推理框架头文件 - │ │ └── registry # 自定义算子注册头文件 - │ ├── lib # 推理框架库 - │ │ ├── libminddata-lite.so # 图像处理动态库文件 - │ │ ├── libmindspore-lite.a # MindSpore Lite推理框架的静态库 - │ │ ├── libmindspore-lite.so # MindSpore Lite推理框架的动态库 - │ │ └── libmsdeobfuscator-lite.so # 混淆模型加载动态库文件,需开启`ENABLE_MODEL_OBF`选项。 + mindspore-lite-{version}-android-{arch} + ├── runtime + │ ├── include + │ ├── lib + │ │ ├── libminddata-lite.a # 图像处理静态库 + │ │ ├── libminddata-lite.so # 图像处理动态库 + │ │ ├── libmindspore-lite.a # MindSpore Lite推理框架的静态库 + │ │ ├── libmindspore-lite.so # MindSpore Lite推理框架的动态库 + │ │ ├── libmindspore-lite-train.a # MindSpore Lite训练框架的静态库 + │ │ ├── libmindspore-lite-train.so # MindSpore Lite训练框架的动态库 + │ │ └── libmsdeobfuscator-lite.so # 混淆模型加载动态库文件,需开启`ENABLE_MODEL_OBF`选项。 │ └── third_party - │ └── hiai_ddk # NPU库,只存在于arm64包 + │ ├── hiai_ddk + │ └── libjpeg-turbo └── tools - ├── benchmark # 基准测试工具 - │ └── benchmark - └── codegen # 代码生成工具 - ├── include # 算子头文件 - └── lib # 算子静态库 + ├── benchmark # 基准测试工具 + ├── benchmark_train # 训练模型性能与精度调测工具 + └── codegen # 代码生成工具 ``` - 当编译选项为`-A on`时: @@ -316,67 +212,6 @@ mindspore-lite-{version}-inference-linux-x64 └── mindspore-lite-{version}.aar # MindSpore Lite推理框架aar包 ``` -### 端侧训练框架编译输出 - -如果开启了MSLITE_ENABLE_TRAIN选项,会生成训练runtime和配套工具,如下: - -`mindspore-lite-{version}-train-{os}-{arch}.tar.gz`:模型训练框架runtime和配套工具。 - -> - version: 输出件版本号,与所编译的分支代码对应的版本一致。 -> - os: 输出件应部署的操作系统。 -> - arch: 输出件应部署的系统架构。 - -执行解压缩命令,获取编译后的输出件: - -```bash -tar -xvf mindspore-lite-{version}-train-{os}-{arch}.tar.gz -``` - -#### 训练Runtime及配套工具目录结构说明 - -训练框架可在`-I x86_64`、`-I arm64`、`-I arm32`编译选项下获得对应不同硬件平台的版本,内容如下: - -- 当编译选项为`-I x86_64`时: - - ```text - mindspore-lite-{version}-train-linux-x64 - ├── tools - │   ├── benchmark_train # 训练模型性能与精度调测工具 - │   ├── converter # 模型转换工具 - │   └── cropper # 库裁剪工具 - │   ├── cropper # 库裁剪工具可执行文件 - │   └── cropper_mapping_cpu.cfg # 裁剪cpu库所需的配置文件 - └── train - ├── include # 训练框架头文件 - │ └── registry # 自定义算子注册头文件 - ├── lib # 训练框架库 - │   ├── libminddata-lite.so # 图像处理动态库文件 - │   ├── libmindspore-lite-jni.so # MindSpore Lite训练框架的jni动态库 - │   ├── libmindspore-lite-train.a # MindSpore Lite训练框架的静态库 - │   ├── libmindspore-lite-train.so # MindSpore Lite训练框架的动态库 - │   └── mindspore-lite-java.jar # MindSpore Lite训练框架jar包 - └── third_party - └── libjpeg-turbo - ``` - -- 当编译选项为`-I arm64`或`-I arm32`时: - - ```text - mindspore-lite-{version}-train-android-{arch} - ├── tools - │   └── benchmark_train # 训练模型性能与精度调测工具 - └── train - ├── include # 训练框架头文件 - │ └── registry # 自定义算子注册头文件 - ├── lib # 训练框架库 - │   ├── libminddata-lite.so # 图像处理动态库文件 - │   ├── libmindspore-lite-train.a # MindSpore Lite训练框架的静态库 - │   └── libmindspore-lite-train.so # MindSpore Lite训练框架的动态库 - └── third_party - ├── hiai_ddk # NPU库,只存在于arm64包 - └── libjpeg-turbo - ``` - ## Windows环境编译 ### 环境要求 @@ -392,13 +227,27 @@ tar -xvf mindspore-lite-{version}-train-{os}-{arch}.tar.gz ### 编译选项 -MindSpore Lite提供编译脚本build.bat用于一键式编译,位于MindSpore根目录下,该脚本可用于MindSpore训练及推理的编译。下面对MindSpore Lite的编译选项进行说明。 +MindSpore根目录下的`build.bat`脚本可用于MindSpore Lite的编译。 + +#### `build.bat`的编译参数 | 参数 | 参数说明 | 是否必选 | | -------- | ----- | ---- | | lite | 设置该参数,则对MindSpore Lite工程进行编译 | 是 | | [n] | 设定编译时所用的线程数,否则默认设定为6线程 | 否 | +#### `mindspore/lite/CMakeLists.txt`的选项 + +| 选项 | 参数说明 | 取值范围 | 默认值 | +| -------- | ----- | ---- | ---- | +| MSLITE_ENABLE_SSE | 是否启用SSE指令集 | on、off | off | +| MSLITE_ENABLE_AVX | 是否启用AVX指令集 | on、off | off | +| MSLITE_ENABLE_CONVERTER | 是否编译模型转换工具 | on、off | on | +| MSLITE_ENABLE_TOOLS | 是否编译配套工具 | on、off | on | +| MSLITE_ENABLE_TESTCASES | 是否编译测试用例 | on、off | off | + +> - 以上选项可通过设置同名环境变量或者`mindspore/lite/CMakeLists.txt`文件修改。 + ### 编译示例 首先,使用git工具,从MindSpore代码仓下载源码。 @@ -421,83 +270,125 @@ call build.bat lite call build.bat lite 8 ``` -### 端侧推理框架编译输出 +最后,会在`output/`目录中生成如下文件: -编译完成后,进入`mindspore/output/`目录,可查看编译后生成的文件。文件分为以下几种: - -- `mindspore-lite-{version}-inference-win-x64.zip`:包含模型推理框架runtime和配套工具。 +- `mindspore-lite-{version}-win-x64.zip`:包含模型推理框架runtime和配套工具。 > version:输出件版本号,与所编译的分支代码对应的版本一致。 -执行解压缩命令,获取编译后的输出件: - -```bat -unzip mindspore-lite-{version}-inference-win-x64.zip -``` - -#### Runtime及配套工具目录结构说明 - -Runtime及配套工具包括以下几部分: +### 目录结构 ```text -mindspore-lite-{version}-inference-win-x64 -├── inference -│   ├── include # 推理框架头文件 -│   │ └── registry # 自定义算子注册头文件 -│   └── lib -│   ├── libgcc_s_seh-1.dll # MinGW动态库 -│   ├── libmindspore-lite.a # MindSpore Lite推理框架的静态库 -│   ├── libmindspore-lite.dll # MindSpore Lite推理框架的动态库 -│   ├── libmindspore-lite.dll.a # MindSpore Lite推理框架的动态库的链接文件 -│   ├── libssp-0.dll # MinGW动态库 -│   ├── libstdc++-6.dll # MinGW动态库 -│   └── libwinpthread-1.dll # MinGW动态库 +mindspore-lite-{version}-win-x64 +├── runtime +│ ├── include +│ └── lib +│ ├── libgcc_s_seh-1.dll # MinGW动态库 +│ ├── libmindspore-lite.a # MindSpore Lite推理框架的静态库 +│ ├── libmindspore-lite.dll # MindSpore Lite推理框架的动态库 +│ ├── libmindspore-lite.dll.a # MindSpore Lite推理框架的动态库的链接文件 +│ ├── libssp-0.dll # MinGW动态库 +│ ├── libstdc++-6.dll # MinGW动态库 +│ └── libwinpthread-1.dll # MinGW动态库 └── tools ├── benchmark # 基准测试工具 - │   └── benchmark.exe # 可执行程序 └── converter # 模型转换工具 - ├── include - │ └── registry # 自定义算子、模型解析、转换优化注册头文件 - ├── converter - │   └── converter_lite.exe # 可执行程序 - └── lib - ├── libgcc_s_seh-1.dll # MinGW动态库 - ├── libglog.dll # Glog的动态库 - ├── libmslite_converter_plugin.dll # 注册插件的动态库 - ├── libmslite_converter_plugin.dll.a # 注册插件的动态库的链接文件 - ├── libssp-0.dll # MinGW动态库 - ├── libstdc++-6.dll # MinGW动态库 - └── libwinpthread-1.dll # MinGW动态库 ``` > 暂不支持在Windows进行端侧训练。 -## Docker环境编译 +## macOS环境编译 -### 环境准备 +### 环境要求 -#### 下载镜像 +- 系统环境:macOS 10.15.4及以上;64位。 -```bash -docker pull swr.cn-south-1.myhuaweicloud.com/mindspore-build/mindspore-lite:ubuntu18.04.2-20210530 -``` +- 编译依赖 + - [CMake](https://cmake.org/download/) >= 3.18.3 + - [Xcode](https://developer.apple.com/xcode/download/cn) == 11.4.1 + - [Git](https://git-scm.com/downloads) >= 2.28.0 + +> - 编译脚本中会执行`git clone`获取第三方依赖库的代码。 + +### 编译选项 -> - 下载镜像前,请确保已经安装docker。 -> - docker镜像暂不支持Windows版本编译。 -> - 镜像里已安装好编译依赖的第三方库并且配置好环境变量。 +MindSpore根目录下的`build.sh`脚本可用于MindSpore Lite的编译。 -#### 创建容器 +#### `build.sh`的编译参数 + +| 参数 | 参数说明 | 取值范围 | 默认值 | +| -------- | ----- | ---- | ---- | +| -I | 选择目标架构 | arm64、arm32 | 无 | +| -j[n] | 设定编译时所用的线程数,否则默认设定为8线程 | Integer | 8 | + +### 编译示例 + +首先,在进行编译之前,需从MindSpore代码仓下载源码。 ```bash -docker run -tid --net=host --name=docker01 swr.cn-south-1.myhuaweicloud.com/mindspore-build/mindspore-lite:ubuntu18.04.2-20210530 +git clone https://gitee.com/mindspore/mindspore.git ``` -#### 进入容器 +然后,在源码根目录下执行如下命令即可编译MindSpore Lite。 -```bash -docker exec -ti -u 0 docker01 bash +- 编译ARM64架构版本。 + + ```bash + bash build.sh -I arm64 -j8 + ``` + +- 编译ARM32架构版本。 + + ```bash + bash build.sh -I arm32 -j8 + ``` + +最后,会在`output/`目录中生成如下文件: + +- `mindspore-lite-{version}-{os}-{arch}.tar.gz`:包含模型推理框架runtime。 + +> - version: 输出件版本号,与所编译的分支代码对应的版本一致。 +> - os: 输出件应部署的操作系统。 +> - arch: 输出件应部署的系统架构。 + +### 目录结构 + +```text +mindspore-lite.framework +└── runtime + ├── Headers # 推理框架头文件 + ├── Info.plist # 配置文件 + └── mindspore-lite # 静态库 ``` +> 暂不支持在macOS进行端侧训练与转换工具。 + +## Docker环境编译 + +### 环境准备 + +- 下载镜像 + + ```bash + docker pull swr.cn-south-1.myhuaweicloud.com/mindspore-build/mindspore-lite:ubuntu18.04.2-20210530 + ``` + + > - 下载镜像前,请确保已经安装docker。 + > - docker镜像暂不支持Windows版本编译。 + > - 镜像里已安装好编译依赖的第三方库并且配置好环境变量。 + +- 创建容器 + + ```bash + docker run -tid --net=host --name=docker01 swr.cn-south-1.myhuaweicloud.com/mindspore-build/mindspore-lite:ubuntu18.04.2-20210530 + ``` + +- 进入容器 + + ```bash + docker exec -ti -u 0 docker01 bash + ``` + ### 编译选项 参考[Linux环境编译](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#linux) @@ -506,6 +397,6 @@ docker exec -ti -u 0 docker01 bash 参考[Linux环境编译](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#linux) -### 编译输出 +### 目录结构 参考[Linux环境编译](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#linux) diff --git a/tutorials/lite/source_zh_cn/use/converter_tool.md b/tutorials/lite/source_zh_cn/use/converter_tool.md index 24fcd43b498d49f770d77d7cab143a1f1a0f1e38..0b57e82001c5b9e21aa67321bffdec05f0e13421 100644 --- a/tutorials/lite/source_zh_cn/use/converter_tool.md +++ b/tutorials/lite/source_zh_cn/use/converter_tool.md @@ -8,10 +8,12 @@ - [概述](#概述) - [Linux环境使用说明](#linux环境使用说明) - [环境准备](#环境准备) + - [目录结构](#目录结构) - [参数说明](#参数说明) - [使用示例](#使用示例) - [Windows环境使用说明](#windows环境使用说明) - [环境准备](#环境准备-1) + - [目录结构](#目录结构-1) - [参数说明](#参数说明-1) - [使用示例](#使用示例-1) @@ -42,6 +44,19 @@ MindSpore Lite提供离线转换模型功能的工具,支持多种类型的模 ${PACKAGE_ROOT_PATH}是编译或下载得到的包解压后的路径。 +### 目录结构 + +```text +mindspore-lite-{version}-linux-x64 +└── tools + └── converter + ├── include + ├── converter # 模型转换工具 + │ └── converter_lite # 可执行程序 + └── lib # 转换工具依赖的动态库 + ├── libglog.so.0 # Glog的动态库 +``` + ### 参数说明 MindSpore Lite模型转换工具提供了多种参数设置,用户可根据需要来选择使用。此外,用户可输入`./converter_lite --help`获取实时帮助。 @@ -59,12 +74,29 @@ MindSpore Lite模型转换工具提供了多种参数设置,用户可根据需 | `--bitNum=` | 否 | 设定训练后量化(权重量化)的比特数,目前支持1bit~16bit量化 | \[1,16] | 8 | | `--quantWeightSize=` | 否 | 设定参与训练后量化(权重量化)的卷积核尺寸阈值,若卷积核尺寸大于该值,则对此权重进行量化 | \[0,+∞) | 0 | | `--quantWeightChannel=` | 否 | 设定参与训练后量化(权重量化)的卷积通道数阈值,若卷积通道数大于该值,则对此权重进行量化 | \[0,+∞) | 16 | -| `--configFile=` | 否 | 1)可作为训练后量化(全量化)校准数据集配置文件路径。2)可作为转换器的配置文件路径,内含参数`plugin_path`与`disable_fusion`,均为非必选,前者为第三方库加载路径,如有多个请用`;`分隔,后者默认值为`off`,当为`on`时会关闭融合优化。 | - | - | +| `--configFile=` | 否 | 1)可作为训练后量化(全量化)校准数据集配置文件路径;2)可作为转换器的配置文件路径。 | - | - | +| `--fp16=` | 否 | 设定在模型序列化时是否需要将Float32数据格式的权重存储为Float16数据格式. | on、off | off | +| `--inputShape=` | 否 | 设定模型输入的维度,默认与原始模型的输入一致。对某些特定的模型可以进一步常量折叠,比如存在shape算子的模型,但是转化后的模型将失去动态shape的特性。e.g. inTensorName: 1,32,32,4 | -| - | > - 参数名和参数值之间用等号连接,中间不能有空格。 > - Caffe模型一般分为两个文件:`*.prototxt`模型结构,对应`--modelFile`参数;`*.caffemodel`模型权值,对应`--weightFile`参数。 > - 为保证权重量化的精度,建议`--bitNum`参数设定范围为8bit~16bit。 > - 全量化目前仅支持激活值8bit、权重8bit的量化方式。 +> - `--fp16`的优先级很低,比如如果开启了量化,那么对于已经量化的权重,`--fp16`不会再次生效。总而言之,该选项只会在序列化时对模型中的Float32的权重生效。 + +`configFile`配置文件采用`key=value`的方式定义相关参数,可配置的`key`如下: + +| 参数名 | 属性 | 功能描述 | 参数类型 | 默认值 | 取值范围 | +| -------- | ------- | ----- | ----- | ----- | ----- | +| image_path | 全量化时必选 | 存放校准数据集的目录;如果模型有多个输入,请依次填写对应的数据所在目录,目录路径间请用`,`隔开 | String | - | 该目录存放可直接用于执行推理的输入数据。由于目前框架还不支持数据预处理,所有数据必须事先完成所需的转换,使得它们满足推理的输入要求 | +| batch_count | 可选 | 使用的输入数目 | Integer | 100 | (0,+∞) | +| method_x | 可选 | 网络层输入输出数据量化算法 | String | KL | KL、MAX_MIN、RemovalOutlier。
KL:基于[KL散度](http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf)对数据范围作量化校准。
MAX_MIN:基于最大值、最小值计算数据的量化参数。
RemovalOutlier:按照一定比例剔除数据的极大极小值,再计算量化参数。
在校准数据集与实际推理时的输入数据相吻合的情况下,推荐使用MAX_MIN;而在校准数据集噪声比较大的情况下,推荐使用KL或者RemovalOutlier | +| thread_num | 可选 | 使用校准数据集执行推理流程时的线程数 | Integer | 1 | (0,+∞) | +| bias_correction | 可选 | 是否对量化误差进行校正 | Boolean | false | true、flase。使能后,能提升转换后的模型精度,建议设置为true | +| plugin_path | 可选 | 第三方库加载路径 | String | - | 如有多个请用`;`分隔 | +| disable_fusion | 可选 | 是否关闭融合优化 | String | off | off、on | + +### 使用示例 下面选取了几个常用示例,说明转换命令的使用方法。 @@ -137,6 +169,23 @@ MindSpore Lite模型转换工具提供了多种参数设置,用户可根据需 ${PACKAGE_ROOT_PATH}是编译或下载得到的包解压后的路径。 +### 目录结构 + +```text +mindspore-lite-{version}-win-x64 +└── tools + └── converter # 模型转换工具 + ├── include + ├── converter + │ └── converter_lite.exe # 可执行程序 + └── lib + ├── libgcc_s_seh-1.dll # MinGW动态库 + ├── libglog.dll # Glog的动态库 + ├── libssp-0.dll # MinGW动态库 + ├── libstdc++-6.dll # MinGW动态库 + └── libwinpthread-1.dll # MinGW动态库 +``` + ### 参数说明 参考Linux环境模型转换工具的[参数说明](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/converter_tool.html#id3)。 diff --git a/tutorials/lite/source_zh_cn/use/converter_train.md b/tutorials/lite/source_zh_cn/use/converter_train.md index b205078cf399a4be82c1085ee7b22cbf4c4c23e6..ca562611067154c6e2182cf2797b004cedcaac8d 100644 --- a/tutorials/lite/source_zh_cn/use/converter_train.md +++ b/tutorials/lite/source_zh_cn/use/converter_train.md @@ -48,6 +48,10 @@ MindSpore Lite 模型转换工具提供了多个参数,目前工具仅支持Li | `--modelFile=` | 是 | MINDIR模型文件名(包括路径) | - | - | | `--outputFile=` | 是 | 输出模型文件名(包括路径)自动生成`.ms`后缀 | - | - | | `--trainModel=true` | 是 | 是否是训练模式;如果要训练模型,必须为true | true, false | false | +| `--quantType=` | 否 | 设置模型的量化类型。 | WeightQuant:权重量化(训练只支持此类型) | - | +| `--bitNum=` | 否 | 设定训练后量化(权重量化)的比特数,目前支持1bit~16bit量化 | \[1,16] | 8 | +| `--quantWeightSize=` | 否 | 设定参与训练后量化(权重量化)的卷积核尺寸阈值,若卷积核尺寸大于该值,则对此权重进行量化 | \[0,+∞) | 0 | +| `--quantWeightChannel=` | 否 | 设定参与训练后量化(权重量化)的卷积通道数阈值,若卷积通道数大于该值,则对此权重进行量化 | \[0,+∞) | 16 | > 参数名称和数值之间使用等号连接且不能有空格。 diff --git a/tutorials/lite/source_zh_cn/use/cropper_tool.md b/tutorials/lite/source_zh_cn/use/cropper_tool.md index 40afbdabaff0d7c83b08f997f3a24b0e711513bc..652085c588afbfb1085cbcd165fa6dabee3198bf 100644 --- a/tutorials/lite/source_zh_cn/use/cropper_tool.md +++ b/tutorials/lite/source_zh_cn/use/cropper_tool.md @@ -58,7 +58,7 @@ MindSpore Lite提供对Runtime的`libmindspore-lite.a`静态库裁剪工具, - 通过文件夹的方式传入`ms`模型,将模型文件所在的文件夹路径传递给`modelFolderPath`参数,对arm64-cpu的`libmindspore-lite.a`静态库进行裁剪。 ```bash -./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/inference/lib/libmindspore-lite.a --configFile=./cropper_mapping_cpu.cfg --modelFolderPath=/model --outputFile=/mindspore-lite/lib/libmindspore-lite.a +./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/runtime/lib/libmindspore-lite.a --configFile=./cropper_mapping_cpu.cfg --modelFolderPath=/model --outputFile=/mindspore-lite/lib/libmindspore-lite.a ``` 本例将读取`/model`文件夹中包含的所有`ms`模型,对arm64-cpu的`libmindspore-lite.a`静态库进行裁剪,并将裁剪后的`libmindspore-lite.a`静态库保存到`/mindspore-lite/lib/`目录。 @@ -66,7 +66,7 @@ MindSpore Lite提供对Runtime的`libmindspore-lite.a`静态库裁剪工具, - 通过文件的方式传入`ms`模型,将模型文件所在的路径传递给`modelFile`参数,对arm64-cpu的`libmindspore-lite.a`静态库进行裁剪。 ```bash -./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/inference/lib/libmindspore-lite.a --configFile=./cropper_mapping_cpu.cfg --modelFile=/model/lenet.ms,/model/retinaface.ms --outputFile=/mindspore-lite/lib/libmindspore-lite.a +./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/runtime/lib/libmindspore-lite.a --configFile=./cropper_mapping_cpu.cfg --modelFile=/model/lenet.ms,/model/retinaface.ms --outputFile=/mindspore-lite/lib/libmindspore-lite.a ``` 本例将根据`modelFile`传入的`ms`模型,对arm64-cpu的`libmindspore-lite.a`静态库进行裁剪,并将裁剪后的`libmindspore-lite.a`静态库保存到`/mindspore-lite/lib/`目录。 @@ -74,7 +74,7 @@ MindSpore Lite提供对Runtime的`libmindspore-lite.a`静态库裁剪工具, - 通过文件夹的方式传入`ms`模型,将模型文件所在的文件夹路径传递给`modelFolderPath`参数,对arm64-gpu的`libmindspore-lite.a`静态库进行裁剪。 ```bash -./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/inference/lib/libmindspore-lite.a --configFile=./cropper_mapping_gpu.cfg --modelFolderPath=/model --outputFile=/mindspore-lite/lib/libmindspore-lite.a +./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/runtime/lib/libmindspore-lite.a --configFile=./cropper_mapping_gpu.cfg --modelFolderPath=/model --outputFile=/mindspore-lite/lib/libmindspore-lite.a ``` 本例将读取`/model`文件夹中包含的所有`ms`模型,对arm64-gpu的`libmindspore-lite.a`静态库进行裁剪,并将裁剪后的`libmindspore-lite.a`静态库保存到`/mindspore-lite/lib/`目录。 @@ -82,7 +82,7 @@ MindSpore Lite提供对Runtime的`libmindspore-lite.a`静态库裁剪工具, - 通过文件的方式传入`ms`模型,将模型文件所在的路径传递给`modelFile`参数,对arm64-gpu的`libmindspore-lite.a`静态库进行裁剪。 ```bash -./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/inference/lib/libmindspore-lite.a --configFile=./cropper_mapping_gpu.cfg --modelFile=/model/lenet.ms,/model/retinaface.ms --outputFile=/mindspore-lite/lib/libmindspore-lite.a +./cropper --packageFile=/mindspore-lite-{version}-android-aarch64/runtime/lib/libmindspore-lite.a --configFile=./cropper_mapping_gpu.cfg --modelFile=/model/lenet.ms,/model/retinaface.ms --outputFile=/mindspore-lite/lib/libmindspore-lite.a ``` 本例将根据`modelFile`传入的`ms`模型,对arm64-gpu的`libmindspore-lite.a`静态库进行裁剪,并将裁剪后的`libmindspore-lite.a`静态库保存到`/mindspore-lite/lib/`目录。 \ No newline at end of file diff --git a/tutorials/lite/source_zh_cn/use/micro.md b/tutorials/lite/source_zh_cn/use/micro.md index 18f906e4a0bed29b7104f5ccfd0f9b38e1812a95..b2eb28d04907b894ce019317f66076db727bc8b2 100644 --- a/tutorials/lite/source_zh_cn/use/micro.md +++ b/tutorials/lite/source_zh_cn/use/micro.md @@ -7,9 +7,11 @@ - [在微控制器上执行推理](#在微控制器上执行推理) - [概述](#概述) - [获取codegen](#获取codegen) + - [目录结构](#目录结构) - [参数说明](#参数说明) - [使用步骤](#使用步骤) - [使用CodeGen在STM开发板上执行推理](#使用CodeGen在STM开发板上执行推理) + - [使用CodeGen在轻鸿蒙上执行推理](#在轻鸿蒙上部署MobileNetV3) - [更多详情](#更多详情) @@ -37,6 +39,25 @@ > 目前模型生成工具仅支持在Linux x86_64架构下运行。 +## 目录结构 + +```text +mindspore-lite-{version}-linux-x64 +└── tools + └── codegen # 代码生成工具 + ├── codegen # 可执行程序 + ├── include # 推理框架头文件 + │ ├── nnacl # nnacl 算子头文件 + │ └── wrapper + ├── lib + │ └── libwrapper.a # MindSpore Lite CodeGen生成代码依赖的部分算子静态库 + └── third_party + ├── include + │ └── CMSIS # ARM CMSIS NN 算子头文件 + └── lib + └── libcmsis_nn.a # ARM CMSIS NN 算子静态库 +``` + ## 参数说明 详细参数说明如下: @@ -274,13 +295,105 @@ jlinkgdbserver # 启动jlinkgdbserver 选定target device为STM32F746I jlinkRTTViewer # 启动jlinkRTTViewer 选定target devices为STM32F746IG arm-none-eabi-gdb # 启动arm-gcc gdb服务 file build/target.elf # 打开调测文件 -target remote 127.0.0.1 # 连接jlink服务器 +ta +rget remote 127.0.0.1 # 连接jlink服务器 monitor reset # 重置单板 monitor halt # 挂起单板 load # 加载可执行文件到单板 c # 执行模型推理 ``` +## 在轻鸿蒙上部署MobileNetV3 + +1. 轻鸿蒙编译环境准备,需要安装gn/ninja/llvm等编译工具链,详细请参考[轻鸿蒙快速入门](https://device.harmonyos.com/cn/docs/start/introduce/oem_minitinier_environment_lin-0000001105407498)。 + +2. 开发板环境配置请参考,以Hi3516开发板为例,请参考轻鸿蒙快速入门,[开发步骤章节](https://device.harmonyos.com/cn/docs/start/introduce/oem_development_eq_3516-0000001105829366)。 + +3. 需要组织的工程目录如下: + + ```text + ├── benchmark + ├── CMakeLists.txt + ├── BUILD.gn # 编译工程组织文件 + └── src + ``` + +4. 使用codegen编译[mobilebetv3模型](https://download.mindspore.cn/model_zoo/official/lite/mnist_lite/mnist.ms),生成对应轻鸿蒙平台的推理代码,命令为: + + ```bash + ./codegen --modelPath=./mobilenetv3.ms --codePath=./ --target=ARM32A + ``` + +4. 编写gn文件 + + ```text + import("//build/lite/config/component/lite_component.gni") + import("//build/lite/ndk/ndk.gni") + + lite_component("mobilenetV3_benchmark") { + target_type = "executable" + sources = [ + "benchmark/benchmark.cc", + "benchmark/load_input.c", + "benchmark/calib_output.cc", + "src/net.c", + "src/weight.c", + "src/session.cc", + "src/tensor.cc", + ] + + features = [] + + include_dirs = [ + "//foundation/ai/engine/test/mindspore_benchmark", + "//foundation/ai/engine/test/mindspore_benchmark/include", + "//foundation/ai/engine/test/mindspore_benchmark/mobilenetV3/benchmark", + "//foundation/ai/engine/test/mindspore_benchmark/mobilenetV3/src", + ] + + ldflags = [ + "-fno-strict-aliasing", + "-Wall", + "-pedantic", + "-std=gnu99", + ] + + libs = [ + "../lib/libmindspore-lite.a", + "../lib/libwrapper.a", + ] + + defines = [ "NOT_USE_STL" ] + defines += [ "ENABLE_NEON" ] + defines += [ "ENABLE_ARM" ] + defines += [ "ENABLE_ARM32" ] + + cflags = [ + "-fno-strict-aliasing", + "-Wall", + "-pedantic", + "-std=gnu99", + ] + } + ``` + +5. 编译benchmark,并执行,结果为: + + ```text + ReadWeightData time: 0.00000ms + input 0: mobilenetV3_input.bin + ReadInputData time: 0.00000ms + + loop count:3 + total time: 756.13397ms, per time: 252.04466ms + + outputs: + name: Reshape-110, DataType: 43, Elements: 1001, Shape: [1 1001 ], Data: + -0.583575, -0.359817, 0.536744, -1.843612, -0.849360, 0.147853, 0.402617, -1.016975, 0.737295, 1.312937 + ===========run success======== + total end to end time: 2124.91895ms + ``` + ## 更多详情 ### [Linux_x86_64平台编译部署](https://gitee.com/mindspore/mindspore/tree/master/mindspore/lite/micro/example/mnist_x86) diff --git a/tutorials/lite/source_zh_cn/use/nnie.md b/tutorials/lite/source_zh_cn/use/nnie.md index d5ca59853d0f4a3229bec7ec3e0c99195f363282..de9e232ebcdb300cab9a82f64b7043987d40d984 100644 --- a/tutorials/lite/source_zh_cn/use/nnie.md +++ b/tutorials/lite/source_zh_cn/use/nnie.md @@ -10,8 +10,27 @@ - [模型推理工具runtime目录结构说明](#模型推理工具Runtime目录结构说明) - [工具使用](#工具使用) - [转换工具converter](#转换工具converter) + - [概述](#概述) + - [环境准备](#环境准备) + - [执行converter](#执行converter) - [推理工具runtime](#推理工具runtime) + - [概述](#概述) + - [环境准备](#环境准备) + - [执行benchmark](#执行benchmark) - [集成使用](#集成使用) + - [SVP工具链相关功能支持及注意事项(可选)](#SVP工具链相关功能支持及注意事项(可选)) + - [image_list说明](#image_list说明) + - [image_type限制](#image_type限制) + - [板端运行输入Format须是NHWC](#板端运行输入Format须是NHWC) + - [image_list和roi_coordinate_file个数说明](#image_list和roi_coordinate_file个数说明) + - [prototxt中节点名_cpu后缀支持](#prototxt中节点名_cpu后缀支持) + - [prototxt中Custom算子支持](#prototxt中Custom算子支持) + - [prototxt中top域的_report后缀支持](#prototxt中top域的_report后缀支持) + - [inplace机制](#inplace机制) + - [多图片batch运行及多step运行](#多图片batch运行及多step运行) + - [节点名称的变动](#节点名称的变动) + - [proposal算子使用说明](#proposal算子使用说明) + - [分段机制说明及8段限制](#分段机制说明及8段限制) @@ -22,15 +41,15 @@ ### 模型转换工具converter目录结构说明 ```text -mindspore-lite-{version}-inference-linux-x64 +mindspore-lite-{version}-runtime-linux-x64 └── tools └── converter └── providers - └── 3516D # 嵌入式板型号 - ├── libmslite_nnie_converter.so # 集成nnie的动态库 - ├── libmslite_nnie_data_process.so # 处理nnie输入数据的动态库 - ├── libnnie_mapper.so # 构建nnie wk文件的动态库 - └── third_party # nnie依赖的三方动态库 + └── Hi3516D # 嵌入式板型号 + ├── libmslite_nnie_converter.so # 集成NNIE转换的动态库 + ├── libmslite_nnie_data_process.so # 处理NNIE输入数据的动态库 + ├── libnnie_mapper.so # 构建NNIE二进制文件的动态库 + └── third_party # NNIE依赖的三方动态库 ├── opencv-4.2.0 │ └── libopencv_xxx.so └── protobuf-3.9.0 @@ -38,114 +57,288 @@ mindspore-lite-{version}-inference-linux-x64 └── libprotoc.so ``` -上述是nnie的集成目录结构,转换工具converter的其余目录结构详情,见[模型转换工具converter目录结构说明](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#converter)。 +上述是NNIE的集成目录结构,转换工具converter的其余目录结构详情,见[模型转换工具](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/converter_tool.html)。 ### 模型推理工具runtime目录结构说明 ```text mindspore-lite-{version}-linux-aarch32 └── providers - └── 3516D # 嵌入式板型号 - └── libmslite_nnie.so # 集成nnie的动态库 + └── Hi3516D # 嵌入式板型号 + └── libmslite_nnie.so # 集成NNIE的动态库 + └── libmslite_proposal.so # 集成proposal的样例动态库 ``` -上述是nnie的集成目录结构,推理工具runtime的其余目录结构详情,见[Runtime及其他工具目录结构说明](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#runtime)。 +上述是NNIE的集成目录结构,推理工具runtime的其余目录结构详情,见[目录结构](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html)。 ## 工具使用 ### 转换工具converter -1. 进入**版本发布件根路径**。 +#### 概述 - ```text - cd mindspore-lite-{version}-inference-linux-x64 - ``` +MindSpore Lite提供离线转换模型功能的工具,将多种类型的模型(当前只支持Caffe)转换为可使用NNIE硬件加速推理的板端专属模型,可运行在Hi3516板上。 +通过转换工具转换成的NNIE`ms`模型,仅支持在关联的嵌入式板上,使用转换工具配套的Runtime推理框架执行推理。关于转换工具的更一般说明,可参考[推理模型转换](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/converter_tool.html)。 - 若用户未进入**版本发件件根路径**,后续配置用户需按实际情况进行等价设置。 +#### 环境准备 -2. converter配置文件。 +使用MindSpore Lite模型转换工具,需要进行如下环境准备工作。 - 用户创建后缀为.cfg的converter配置文件(以converter.cfg指代),文件内容如下: +1. [下载](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/downloads.html)NNIE专用集成发布件(内含模型转换及推理工具),当前仅支持Linux。 - ```text - plugin_path=./tools/converter/providers/3516D/libmslite_nnie_converter.so # 用户请设置绝对路径 - ``` +2. 解压下载的包 -3. nnie配置文件。 + ```bash + tar -zxvf mindspore-lite-{version}-linux-x64.tar.gz + ``` - 用户需参照HiSVP开发指南(nnie提供)自行配置(以nnie.cfg指代)。 - 设定如下环境变量: + {version}是发布包的版本号。 - ```shell - export NNIE_CONFIG_PATH=nnie.cfg - ``` +3. 将转换工具需要的动态链接库加入环境变量LD_LIBRARY_PATH -4. converter环境变量设置。 + ```bash + export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${PACKAGE_ROOT_PATH}/tools/converter/lib:${PACKAGE_ROOT_PATH}/runtime/lib:${PACKAGE_ROOT_PATH}/tools/converter/providers/Hi3516D/third_party/opencv-4.2.0:${PACKAGE_ROOT_PATH}/tools/converter/providers/Hi3516D/third_party/protobuf-3.9.0 + ``` - ```shell - export NNIE_MAPPER_PATH=./tools/converter/providers/3516D/libnnie_mapper.so - export NNIE_DATA_PROCESS_PATH=./tools/converter/providers/3516D/libmslite_nnie_data_process.so - export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:./tools/converter/lib:./tools/converter/providers/3516D/third_party/opencv-4.2.0:./tools/converter/providers/3516D/third_party/protobuf-3.9.0 - ``` + ${PACKAGE_ROOT_PATH}是解压得到的文件夹路径。 -5. benchmark环境变量设置。 +4. 使能NNIE模型转换 - 运行于x86_64系统上的benchmark是用来生成校正集的,以供nnie学习量化参数。用户需设置以下环境变量: + NNIE模型可以使用NNIE硬件以提高模型运行速度,用户需配置以下两点,以使能NNIE模型转换。 - ```shell - export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:./inference/lib - export BENCHMARK_PATH=./tools/benchmark - ``` + - NNIE转换配置文件 -6. 执行converter,当前只支持caffe。 + MindSpore Lite所需的NNIE转换配置文件,需参照海思提供的《HiSVP 开发指南》中表格`nnie_mapper 配置选项说明`来进行配置,以nnie.cfg指代此配置文件: - ```text - ./tools/converter/converter/converter_lite --fmk=CAFFE --modelFile=${model_name}.prototxt --weightFile=${model_name}.caffemodel --configFile=converter.cfg --outputFile=${model_name} - ``` + nnie.cfg文件的示例参考如下: + + ```text + [net_type] 0 + [image_list] ./input_nchw.txt + [image_type] 0 + [norm_type] 0 + [mean_file] null + ``` + + `input_nchw.txt`为被转换CAFFE模型的浮点文本格式的输入数据,详情请参照《HiSVP 开发指南》中的`image_list`说明。在配置文件中,配置选项caffemodel_file、prototxt_file、is_simulation、instructions_name不可配置,其他选项功能可正常配置。 + + - NNIE动态库路径配置(可选) + + 在NNIE转换时,通过参数configFile传入配置文件(`--configFile=./converter.cfg`)以使能NNIE转换,配置方式请参见[推理模型转换的参数说明](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/converter_tool.html#id5)。在配置文件中,保存着NNIE动态库的相对路径,用户可手动修改该路径,默认不需修改即可。 + +#### 执行converter + +1. 进入转换目录 + + ```bash + cd ${PACKAGE_ROOT_PATH}/tools/converter/converter + ``` + +2. 配置环境变量(可选) + + 若已执行第1步,进入到转换目录,则此步无需配置,默认值将使能。若用户未进入转换目录,则需在环境变量中声明转换工具所依赖的so和benchmark二进制执行程序的路径,如下所示: + + ```bash + export NNIE_MAPPER_PATH=${PACKAGE_ROOT_PATH}/tools/converter/providers/Hi3516D/libnnie_mapper.so + export NNIE_DATA_PROCESS_PATH=${PACKAGE_ROOT_PATH}/tools/converter/providers/Hi3516D/libmslite_nnie_data_process.so + export BENCHMARK_PATH=${PACKAGE_ROOT_PATH}/tools/benchmark + ``` + + ${PACKAGE_ROOT_PATH}是下载得到的包解压后的路径。 + +2. 将nnie.cfg拷贝到转换目录并设置如下环境变量 + + ```bash + export NNIE_CONFIG_PATH=./nnie.cfg + ``` + +3. 执行converter,生成NNIE`ms`模型 - 参数modelFile、weightFile、configFile、outputFile用户按实际情况进行设置。 - 当用户在mindspore-lite-{version}-inference-linux-x64/tools/converter/converter目录下时,环境变量NNIE_MAPPER_PATH、NNIE_DATA_PROCESS_PATH、BENCHMARK_PATH可不设置。 + ```bash + ./converter_lite --fmk=CAFFE --modelFile=${model_name}.prototxt --weightFile=${model_name}.caffemodel --configFile=../converter.cfg --outputFile=${model_name} + ``` + + ${model_name}为模型文件名称,运行后的结果显示为: + + ```text + CONVERTER RESULT SUCCESS:0 + ``` + + 用户若想了解converter_lite转换工具的相关参数,可参考[参数说明](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/converter_tool.html#id4)。 ### 推理工具runtime -以下是示例用法,用户可根据实际情况进行等价操作。 +#### 概述 + +得到转换模型后,可在关联的嵌入式板上,使用板子配套的Runtime推理框架执行推理。MindSpore Lite提供benchmark基准测试工具,它可以对MindSpore Lite模型前向推理的执行耗时进行定量分析(性能),还可以通过指定模型输出进行可对比的误差分析(精度)。 +关于推理工具的一般说明,可参考[benchmark](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/benchmark_tool.html)。 + +#### 环境准备 -1. 3516D板目录创建。 +以下为示例用法,用户可根据实际情况进行等价操作。 - ```text - mkdir /user/mindspore # 存放非库文件 - mkdir /user/mindspore/lib # 存放库文件 +1. Hi3516D板目录创建 + + 登陆板端,创建工作目录 + + ```bash + mkdir /user/mindspore # 存放benchmark执行文件及模型 + mkdir /user/mindspore/lib # 存放依赖库文件 ``` -2. 传输文件。 +2. 传输文件 + + 向板端传输benchmark工具、模型、so库。其中libnnie_proposal.so为MindSpore Lite提供的proposal算子实现样例so,若用户模型里含有自定义的proposal算子,用户需参考[proposal算子使用说明](#proposal算子使用说明)生成libnnie_proposal.so替换该so文件,以进行正确推理。 - ```text + ```bash scp libmindspore-lite.so libmslite_nnie.so libnnie_proposal.so root@${device_ip}:/user/mindspore/lib - scp benchmark ${model_name}.ms root@${device_ip}:/user/mindspore + scp benchmark ${model_path} root@${device_ip}:/user/mindspore + ``` + + ${model_path}为转换后ms模型文件路径 + +3. 设置动态库路径 + + NNIE模型的推理,还依赖海思提供NNIE相关板端动态库,包括:libnnie.so、libmpi.so、libVoiceEngine.so、libupvqe.so、libdnvqe.so。 + + 用户需在板端保存这些so,并将路径传递给LD_LIBRARY_PATH环境变量。 + 在示例中,这些so位于/usr/lib下,用户需按实际情况进行配置: + + ```bash + export LD_LIBRARY_PATH=/user/mindspore/lib:/user/lib:${LD_LIBRARY_PATH} ``` -3. 设置动态库路径。 +4. 设置配置项(可选) + + 若用户模型含有proposal算子,需根据proposal算子实现情况,配置MAX_ROI_NUM环境变量: - ```shell - export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/user/mindspore/lib # 此处未设置nnie依赖的动态库,用户需按实际情况进行配置。 + ```bash + export MAX_ROI_NUM=300 # 单张图片支持roi区域的最大数量,范围:正整数,默认值:300。 ``` -4. 设置配置项(可选)。 + 若用户模型为循环或lstm网络,需根据实际网络运行情况,配置TIME_STEP环境变量,其他要求[见多图片batch运行及多step运行](#多图片batch运行及多step运行): - ```shell - export TIME_STEP=1 # 循环或lstm网络运行的step数,范围:正整数,默认直:1 - export MAX_ROI_NUM=300 # 单张图片支持roi区域的最大数量,范围:正整数,默认直:300 - export CORE_IDS=0 # nnie运行内核id,支持多个,逗号分隔,范围:[0,7],默认直:0 + ```bash + export TIME_STEP=1 # 循环或lstm网络运行的step数,范围:正整数,默认值:1。 ``` -5. 执行benchmark。 + 若板端含有多个NNIE硬件,用户可通过CORE_IDS环境变量指定模型运行在哪个NNIE设备上, + 若模型被分段(用户可用netron打开模型,观察模型被分段情况),可依序分别配置每个分段运行在哪个设备上,未被配置分段运行在最后被配置的NNIE设备上: - ```text - ./benchmark --modelFile=/.../${model_name} + ```bash + export CORE_IDS=0 # NNIE运行内核id,支持模型分段独立配置,使用逗号分隔(如export CORE_IDS=1,1),默认值:0 ``` - 有关Benchmark使用详情,见[Benchmark使用](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/benchmark_tool.html)。 +#### 执行benchmark + +```text +cd /user/mindspore +./benchmark --modelFile=${model_path} +``` + +${model_path}为转换后ms模型文件路径 + +执行该命令,会生成模型的随机输入,并执行前向推理。有关benchmark的其他使用详情,如耗时分析与推理误差分析等,见[Benchmark使用](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/benchmark_tool.html)。 + +有关模型的输入数据格式要求,见[SVP工具链相关功能支持及注意事项(可选)](#SVP工具链相关功能支持及注意事项(可选))。 ## 集成使用 有关集成使用详情,见[集成c++接口](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/runtime_cpp.html)。 + +> 用户设置Context[创建配置上下文](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/runtime_cpp.html#id3)时,需对provider_、provider_device_进行如下设置: +> provider_ = "NNIE" +> provider_device_ = "NNIE" + +## SVP工具链相关功能支持及注意事项(高级选项) + +在模型转换时,由NNIE_CONFIG_PATH环境变量声明的nnie.cfg文件,提供原先SVP工具链相关功能,支持除caffemodel_file、prototxt_file、is_simulation、instructions_name外其他字段的配置,相关注意实现如下: + +### 板端运行输入Format须是NHWC + + 转换后的`ms`模型只接受NHWC格式的数据输入,若image_type被声明为0,则接收NHWC格式的float32数据,若image_type被声明为1,则接收NHWC的uint8数据输入。 + +### image_list说明 + + nnie.cfg中image_list字段含义与原先不变,当image_type声明为0时,按行提供chw格式数据,无论原先模型是否是nchw输入。 + +### image_type限制 + + MindSpore Lite不支持image_type为3和5时的网络输入,用户设为0或1。 + +### image_list和roi_coordinate_file个数说明 + + 用户只需提供与模型输入个数相同数量的image_list,若模型中含有ROI Pooling或PSROI Pooling层,用户需提供roi_coordinate_file,数量与顺序和prototxt内的ROI Pooling或PSROI Pooling层的个数与顺序对应。 + +### prototxt中节点名_cpu后缀支持 + + SVP工具链中,可通过在prototxt文件的节点名后使用_cpu后缀来,声明cpu自定义算子。MindSpore Lite中忽略_cpu后缀,不做支持。用户若想重定义MindSpore Lite已有的算子实现或新增新的算子,可通过自定义算子注册的方式进行注册。 + +### prototxt中Custom算子支持 + + SVP工具链中,通过在prototxt中声明custom层,实现推理时分段,并由用户实现cpu代码。在MindSpore Lite中,用户需在Custom层中增加type属性,并通过自定义算子注册的方式进行在线推理代码的注册。 + + Custom层的修改样例如下: + + ```text + layer { + name: "custom1" + type: "Custom" + bottom: "conv1" + top: "custom1_1" + custom_param { + type: "MY_CUSTOM" + shape { + dim: 1 + dim: 256 + dim: 64 + dim: 64 + } + } + } + ``` + + 在该示例中定义了一个MY_CUSTOM类型的自定义算子,推理时用户需注册一个类型为MY_CUSTOM的自定义算子。 + +### prototxt中top域的_report后缀支持 + + MindSpore Lite在转换NNIE模型时,会将大部分的算子融合为NNIE运行的二进制文件,用户无法观察到中间算子的输出,通过在top域上添加”_report“后缀,转换构图时会将中间算子的输出添加到融合后的层输出中,若原先该算子便有输出(未被融合),则维持不变。 + + 在推理运行时,用户可通过[回调运行](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/runtime_cpp.html#id15)得到中间算子输出。 + + MindSpore Lite解析_report的相应规则,及与[inplace机制](#inplace机制)的冲突解决,参照《HiSVP 开发指南》中的定义说明。 + +### inplace机制 + + 使用Inplace层写法,可运行芯片高效模式。转换工具默认将Prototxt中符合芯片支持Inplace层的所有层进行改写,用户如需关闭该功能,可通过如下环境声明: + + ```bash + export NNIE_DISABLE_INPLACE_FUSION=off # 设置为on或未设置时,使能Inplace自动改写 + ``` + + 当自动改写被关闭时,若需对个别层使能芯片高效模式,可手动改写Prototxt里面的相应层。 + +### 多图片batch运行及多step运行 + + 用户若需同时前向推理多个输入数据(多个图片),可通过[输入维度Resize](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/runtime_cpp.html#resize)将模型输入的第一维resize为输入数据个数。NNIE模型只支持对第一个维度('n'维)进行resize,其他维度('hwc')不可变。 + + 对于循环或lstm网络,用户需根据step值,配置TIME_STEP环境变量,同时resize模型输入。 + 设一次同时前向推理的数据的个数为input_num,对于序列数据输入的节点resize为input_num * step,非序列数据输入的节点resize为input_num。 + + 含有proposal算子的模型,不支持batch运行,不支持resize操作。 + +### 节点名称的变动 + + 模型转换为NNIE模型后,各节点名称可能发生变化,用户可通过netron打开模型,得到变化后的节点名。 + +### proposal算子使用说明 + + MindSpore Lite提供Proposal算子的样例代码,在该样例中,以自定义算子注册的方式实现proposal算子及该算子infer shape的注册。用户可将其修改为自身模型匹配的实现后,进行集成使用。 + > 你可以在这里下载完整的样例代码: + > + > + +### 分段机制说明及8段限制 + + 由于NNIE芯片支持的算子限制,在含有NNIE芯片不支持的算子时,需将模型分段为可支持层与不可支持层。 + 板端芯片支持最多8段的可支持层,当分段后的可支持层数量大于8段时,模型将无法运行,用户可通过netron观察Custom算子(其属性中含有type:NNIE),得到转换后的NNIE支持层数量。 diff --git a/tutorials/lite/source_zh_cn/use/npu_info.md b/tutorials/lite/source_zh_cn/use/npu_info.md index 2c9816ecee730465bb0dd89a366cd4215467fd2b..76ea406b43cfd5182b089e84a6096bb7f3d4921d 100644 --- a/tutorials/lite/source_zh_cn/use/npu_info.md +++ b/tutorials/lite/source_zh_cn/use/npu_info.md @@ -31,7 +31,8 @@ DDK包含了使用NPU的对外接口(包括模型构建、加载,计算等 它将在MindSpore源代码根目录下的output目录下构建出MindSpore Lite的包,其中包含NPU的动态库,libmindspore-lite动态库以及测试工具Benchmark。 ```bash -bash build.sh -I arm64 -e npu +export MSLITE_ENABLE_NPU=ON +bash build.sh -I arm64 -j8 ``` 有关编译详情见[Linux环境编译](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#linux)。 @@ -47,15 +48,12 @@ bash build.sh -I arm64 -e npu 配置好环境变量,将会动态加载libhiai.so, libhiai_ir.so, libhiai_ir_build.so。例如: ```bash - export LD_LIBRARY_PATH=mindspore-lite-{version}-inference-android-{arch}/inference/third_party/hiai_ddk/lib/:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=mindspore-lite-{version}-android-{arch}/runtime/third_party/hiai_ddk/lib/:$LD_LIBRARY_PATH ``` - Benchmark测试NPU推理 - 用户也可以使用MindSpore Lite的Benchmark工具测试NPU推理。 -编译出的Benchmark位置见[编译输出](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#runtime)。 -将构建包传到具有NPU芯片(支持的芯片详情见[芯片与HUAWEI HiAI Version版本映射关系](https://developer.huawei.com/consumer/cn/doc/development/hiai-Guides/mapping-relationship-0000001052830507#ZH-CN_TOPIC_0000001052830507__section94427279718)) -的Android手机的`/data/local/tmp/`目录下,在手机上使用Benchmark工具测试NPU推理,示例如下: + 用户也可以使用MindSpore Lite的Benchmark工具测试NPU推理。将构建包传到具有NPU芯片的Android手机的`/data/local/tmp/`目录下,在手机上使用Benchmark工具测试NPU推理,示例如下: - 测性能 @@ -71,9 +69,8 @@ bash build.sh -I arm64 -e npu 有关Benchmark使用详情,见[Benchmark使用](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/benchmark_tool.html)。 -有关环境变量设置,需要根据[编译输出](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#runtime)中编译选项为`-I arm64`或`-I arm32`时的目录结构, -将libmindspore-lite.so(目录为`mindspore-lite-{version}-inference-android-{arch}/inference/lib`)和 -NPU库(目录为`mindspore-lite-{version}-inference-android-{arch}/inference/third_party/hiai_ddk/lib/`)所在的目录加入`${LD_LIBRARY_PATH}`。 +有关环境变量设置,将libmindspore-lite.so(目录为`mindspore-lite-{version}-android-{arch}/runtime/lib`)和 +NPU库(目录为`mindspore-lite-{version}-android-{arch}/runtime/third_party/hiai_ddk/lib/`)所在的目录加入`${LD_LIBRARY_PATH}`。 ## 芯片支持 diff --git a/tutorials/lite/source_zh_cn/use/obfuscator_tool.md b/tutorials/lite/source_zh_cn/use/obfuscator_tool.md index bf5dd76e6bd95c8e4712a929a0e3868122301c9b..146c94184686fd8c57e555f84f8b30ee9fdb86e7 100644 --- a/tutorials/lite/source_zh_cn/use/obfuscator_tool.md +++ b/tutorials/lite/source_zh_cn/use/obfuscator_tool.md @@ -1,6 +1,6 @@ # 模型混淆工具 -`Linux` `模型混淆` `安全` `高级` +`Linux` `模型混淆` `高级` @@ -8,6 +8,7 @@ - [概述](#概述) - [Linux环境使用说明](#linux环境使用说明) - [环境准备](#环境准备) + - [目录结构](#目录结构) - [参数说明](#参数说明) - [使用示例](#使用示例) @@ -27,6 +28,15 @@ MindSpore Lite提供一个轻量级的离线模型混淆工具,可用于保护 - 参考构建文档中的[环境要求](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#id1)和[编译示例](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#id3)编译x86_64版本。 +### 目录结构 + +```text +mindspore-lite-{version}-linux-x64 +└── tools + └── obfuscator # 模型混淆工具 + └── msobfuscator # 可执行程序 +``` + ### 参数说明 MindSpore Lite模型混淆工具提供了多种参数设置,用户可根据需要来选择使用。此外,用户可输入`./msobfuscator --help`获取实时帮助。 diff --git a/tutorials/lite/source_zh_cn/use/post_training_quantization.md b/tutorials/lite/source_zh_cn/use/post_training_quantization.md index ecf6577029b2593777d07a048022225f93f9f86e..7c778025020577b705f68758b31298e351620bac 100644 --- a/tutorials/lite/source_zh_cn/use/post_training_quantization.md +++ b/tutorials/lite/source_zh_cn/use/post_training_quantization.md @@ -95,16 +95,7 @@ MindSpore Lite训练后量化分为两类: | `--configFile=` | 必选 | 校准数据集配置文件路径 | String | - | - | | `--bitNum=` | 可选 | 设定全量化的比特数,目前支持1bit~8bit量化 | Integer | 8 | \[1,8] | -为了计算激活值的量化参数,用户需要提供校准数据集。校准数据集最好来自真实推理场景,能表征模型的实际输入情况,数量在100个左右。 -校准数据集配置文件采用`key=value`的方式定义相关参数,需要配置的`key`如下: - -| 参数名 | 属性 | 功能描述 | 参数类型 | 默认值 | 取值范围 | -| -------- | ------- | ----- | ----- | ----- | ----- | -| image_path | 必选 | 存放校准数据集的目录;如果模型有多个输入,请依次填写对应的数据所在目录,目录路径间请用`,`隔开 | String | - | 该目录存放可直接用于执行推理的输入数据。由于目前框架还不支持数据预处理,所有数据必须事先完成所需的转换,使得它们满足推理的输入要求 | -| batch_count | 可选 | 使用的输入数目 | Integer | 100 | (0,+∞) | -| method_x | 可选 | 网络层输入输出数据量化算法 | String | KL | KL、MAX_MIN、RemovalOutlier。
KL:基于[KL散度](http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf)对数据范围作量化校准。
MAX_MIN:基于最大值、最小值计算数据的量化参数。
RemovalOutlier:按照一定比例剔除数据的极大极小值,再计算量化参数。
在校准数据集与实际推理时的输入数据相吻合的情况下,推荐使用MAX_MIN;而在校准数据集噪声比较大的情况下,推荐使用KL或者RemovalOutlier | -| thread_num | 可选 | 使用校准数据集执行推理流程时的线程数 | Integer | 1 | (0,+∞) | -| bias_correction | 可选 | 是否对量化误差进行校正 | Boolean | false | true、flase。使能后,能提升转换后的模型精度,建议设置为true | +为了计算激活值的量化参数,用户需要提供校准数据集。校准数据集最好来自真实推理场景,能表征模型的实际输入情况,数量在100个左右。`configFile`的配置方式请参见[推理模型转换的参数说明](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/converter_tool.html#id5)。 > 对于多输入模型,要求不同输入数据分别存放在各自不同的目录,同时各自目录中的所有文件的文件名按照字典序递增排序后,能够一一对应。例如:模型有两个输入input0、input1,校准数据集共2组(batch_count=2);input0的对应数据存放在/dir/input0/目录,输入数据文件名为:data_1.bin、data_2.bin;input1的对应数据存放在/dir/input1/目录,输入数据文件名为:data_a.bin、data_b.bin,则认为(data_1.bin, data_a.bin)构成一组输入,(data_2.bin, data_b.bin)构成另一组输入。 diff --git a/tutorials/lite/source_zh_cn/use/runtime_cpp.md b/tutorials/lite/source_zh_cn/use/runtime_cpp.md index a397b5d7611c01610aff84e39b04387c507d70e1..486ca7f2d245e7b655c782c865f1252d31a0e4d6 100644 --- a/tutorials/lite/source_zh_cn/use/runtime_cpp.md +++ b/tutorials/lite/source_zh_cn/use/runtime_cpp.md @@ -1,6 +1,6 @@ # 使用C++接口执行推理 -`Windows` `Linux` `Android` `C++` `推理应用` `模型加载` `数据准备` `中级` `高级` +`Windows` `macOS` `Linux` `iOS` `Android` `C++` `推理应用` `模型加载` `数据准备` `中级` `高级` @@ -12,6 +12,7 @@ - [配置使用CPU后端](#配置使用cpu后端) - [配置使用GPU后端](#配置使用gpu后端) - [配置使用NPU后端](#配置使用npu后端) + - [配置使用TensorRT后端](#配置使用TensorRT后端) - [创建会话](#创建会话) - [图编译](#图编译) - [输入数据](#输入数据) @@ -77,7 +78,9 @@ if (model == nullptr) { ## 创建配置上下文 -上下文会保存会话所需的一些基本配置参数,用于指导图编译和图执行,如果用户通过`new`创建[Context](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#id2),不再需要时,需要用户通过`delete`释放。一般在创建完[LiteSession](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/session.html#litesession)后,[Context](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#id2)即可释放。其中[Context](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#id2)所包含的参数定义如下: +上下文会保存会话所需的一些基本配置参数,用于指导图编译和图执行,如果用户通过`new`创建[Context](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#id2),不再需要时,需要用户通过`delete`释放。一般在创建完[LiteSession](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/session.html#litesession)后,[Context](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#id2)即可释放。 + +[Context](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#id2)中包含的基本参数定义如下: - [thread_num_](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#thread-num):MindSpore Lite内置一个进程共享的线程池,推理时通过`thread_num_`指定线程池的最大线程数,默认为2线程。 - [allocator](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#allocator):MindSpore Lite支持动态内存分配和释放,如果没有指定`allocator`,推理时会生成一个默认的`allocator`,也可以通过[Context](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#context)方法在多个[Context](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#id2)中共享内存分配器,具体调用方式可参考[共享内存池](#共享内存池)的使用方式。 @@ -85,6 +88,10 @@ if (model == nullptr) { - [device_list_](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#device-list):MindSpore Lite支持异构推理,推理时的后端配置信息由[Context](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#id2)中的`device_list_`指定,默认存放CPU的[DeviceContext](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#devicecontext)。在进行图编译时,会根据`device_list_`中不同的后端配置信息进行算子选型调度。目前仅支持两种异构,CPU和GPU异构或者CPU和NPU异构。当配置GPU的[DeviceContext](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#devicecontext)时,优先使用GPU推理;当配置NPU的[DeviceContext](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#devicecontext)时,优先使用NPU推理。 > `device_list_[0]`必须是CPU的`DeviceContext`, `device_list_[1]`是GPU的`DeviceContext`或者NPU的`DeviceContext`。暂时不支持同时设置CPU, GPU和NPU三个`DeviceContext`。 +> +> 对于iOS设备,暂时只支持`device_list_[0]`为CPU的`DeviceContext`。 + +[Context](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#id2)中包含的高级参数定义如下: ### 配置线程数 @@ -119,6 +126,8 @@ cpu_device_info.enable_float16_ = true; ``` > Float16需要CPU为ARM v8.2架构的机型才能生效,其他不支持的机型和x86平台会自动回退到Float32执行。 +> +> 对于iOS设备, 只支持CPU后端运行, 且暂时不支持CPU后端的Float16的执行。 ### 配置使用GPU后端 @@ -166,6 +175,28 @@ npu_device_ctx.device_info_.npu_device_info_.frequency_ = 3; context->device_list_.push_back(npu_device_ctx); ``` +### 配置使用TensorRT后端 + +当需要执行的后端为CPU和TensorRT的异构推理时,需要同时设置CPU和TensorRT的[DeviceContext](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#devicecontext),配置后将会优先使用TensorRT推理. + +下面示例[代码演示](https://gitee.com/mindspore/mindspore/blob/master/mindspore/lite/examples/runtime_cpp/main.cc#L120)如何创建CPU与TensorRT异构推理后端: + +```cpp +auto context = std::make_shared(); +if (context == nullptr) { + std::cerr << "New context failed while running. " << std::endl; + return nullptr; +} + +// If GPU device context is set. The preferred backend is GPU, which means, if there is a GPU operator, it will run on +// the GPU first, otherwise it will run on the CPU. +mindspore::lite::DeviceContext gpu_device_ctx{mindspore::lite::DT_GPU, {false}}; +// GPU use float16 operator as priority. +gpu_device_ctx.device_info_.gpu_device_info_.enable_float16_ = true; +// The GPU device context needs to be push_back into device_list to work. +context->device_list_.push_back(gpu_device_ctx); +``` + ## 创建会话 使用MindSpore Lite执行推理时,[LiteSession](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/session.html#litesession)是推理的主入口,通过[LiteSession](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/session.html#litesession)可以进行图编译、图执行。采用上一步创建得到的[Context](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#id2),调用[LiteSession](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/session.html#litesession)的静态[static LiteSession *CreateSession(const lite::Context *context)](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/session.html#createsession)方法来创建[LiteSession](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/session.html#litesession)。 @@ -580,6 +611,8 @@ if (session == nullptr) { logcat -s "MS_LITE" ``` +> 对iOS设备暂不支持日志查看。 + ### 获取版本号 MindSpore Lite提供了[Version](https://www.mindspore.cn/doc/api_cpp/zh-CN/master/lite.html#version)方法可以获取版本号,包含在`include/version.h`头文件中,调用该方法可以得到当前MindSpore Lite的版本号。 diff --git a/tutorials/lite/source_zh_cn/use/runtime_train_cpp.md b/tutorials/lite/source_zh_cn/use/runtime_train_cpp.md index 9a36cf517a2f03810a61a51bff0aef2cb66be438..b9b3ce0a3652260548ed488deb6e09aed542ba91 100644 --- a/tutorials/lite/source_zh_cn/use/runtime_train_cpp.md +++ b/tutorials/lite/source_zh_cn/use/runtime_train_cpp.md @@ -550,15 +550,21 @@ if (ret != RET_OK) { ### 保存模型 -MindSpore的`CkptSaver`类实际调用的是`SaveToFile`函数,当然你也可以直接调用`SaveToFile`来保存模型,`SaveToFile`原型如下: +MindSpore的`CkptSaver`类实际调用的是`Export`函数,当然你也可以直接调用`SaveToFile`来保存模型,`Export`原型如下: ```cpp /// \brief Save the trained model into a flatbuffer file /// - /// \param[in] filename Filename to save flatbuffer to + /// \param[in] file_name Filename to save flatbuffer to + /// + /// \param[in] model_type ModelType to save train or inference + /// + /// \param[in] quant_type QuantizationType to save + /// + /// \param[in] format FormatType to save /// /// \return 0 on success or -1 in case of error - virtual int SaveToFile(const std::string &filename) const = 0; + virtual int Export(const std::string &file_name, lite::ModelType model_type = lite::MT_TRAIN, lite::QuantizationType quant_type = lite::QT_DEFAULT,lite::FormatType format= lite::FT_FLATBUFFERS) const = 0; ``` 保存的模型可继续用于训练或推理。 diff --git a/tutorials/lite/source_zh_cn/use/tensorrt_info.md b/tutorials/lite/source_zh_cn/use/tensorrt_info.md new file mode 100644 index 0000000000000000000000000000000000000000..c8ad2d5bac231eaf6751bcb32e8c3eff8e6db248 --- /dev/null +++ b/tutorials/lite/source_zh_cn/use/tensorrt_info.md @@ -0,0 +1,74 @@ +# 集成TensorRT使用说明 + +`TensorRT` `NVIDIA` `Linux` `环境准备` `算子支持` `中级` `高级` + + + +- [集成TensorRT使用说明](#集成TensorRT使用说明) + - [使用步骤](#使用步骤) + - [环境准备](#环境准备) + - [编译构建](#编译构建) + - [集成使用](#集成使用) + - [算子支持](#算子支持) + + + + + +## 使用步骤 + +### 环境准备 + +在基本的[环境准备](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html)之外,使用TensorRT需要集成CUDA、TensorRT。当前版本适配CUDA 10.1 和 TensorRT 6.0.1.5。 + +安装[CUDA 10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base),并将安装后的目录设置为环境变量`${CUDA_HOME}`。构建脚本将使用这个环境变量寻找CUDA。 + +下载[TensorRT 6.0.1.5](https://developer.nvidia.com/nvidia-tensorrt-6x-download),并将压缩包解压后的目录设置为环境变量`${TENSORRT_PATH}`。构建脚本将使用这个环境变量寻找TensorRT。 + +### 编译构建 + +在Linux环境下,使用MindSpore[源代码](https://gitee.com/mindspore/mindspore)根目录下的build.sh脚本可以构建集成TensorRT的MindSpore Lite包,先配置环境变量`MSLITE_GPU_BACKEND=tensorrt`,再执行编译命令如下,它将在MindSpore源代码根目录下的output目录下构建出MindSpore Lite的包,其中包含`libmindspore-lite.so`以及测试工具Benchmark。 + +```bash +bash build.sh -I x86_64 +``` + +有关编译详情见[Linux环境编译](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#linux)。 + +### 集成使用 + +- 集成说明 + + 开发者需要集成使用TensorRT功能时,需要注意: + - 在代码中[配置TensorRT后端](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/runtime_cpp.html#tensorrt),有关使用Runtime执行推理详情见[使用Runtime执行推理(C++)](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/runtime_cpp.html)。 + - 编译执行可执行程序。如采用动态加载方式,参考[编译输出](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#runtime)中编译选项为`-I x86_64`时的内容,需要配置的环境变量如下,将会动态加载相关的so。 + + ```bash + export LD_LIBRARY_PATH=mindspore-lite-{version}-{os}-{arch}/runtime/lib/:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=user-installed-tensorrt-path/lib/:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=user-installed-cuda-path/lib/:$LD_LIBRARY_PATH + ``` + +- Benchmark测试TensorRT推理 + + 用户也可以使用MindSpore Lite的Benchmark工具测试TensorRT推理。编译出的Benchmark位置见[编译输出](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#runtime)。将构建包传到具有TensorRT环境(TensorRT 6.0.1.5)的设备上使用Benchmark工具测试TensorRT推理,示例如下: + + - 测性能 + + ```bash + ./benchmark --device=GPU --modelFile=./models/test_benchmark.ms --timeProfiling=true + ``` + + - 测精度 + + ```bash + ./benchmark --device=GPU --modelFile=./models/test_benchmark.ms --inDataFile=./input/test_benchmark.bin --inputShapes=1,32,32,1 --accuracyThreshold=3 --benchmarkDataFile=./output/test_benchmark.out + ``` + + 有关Benchmark使用详情,见[Benchmark使用](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/benchmark_tool.html)。 + + 有关环境变量设置,需要根据[编译输出](https://www.mindspore.cn/tutorial/lite/zh-CN/master/use/build.html#runtime)中编译选项为`-I x86_64`时的目录结构,将`libmindspore-lite.so`(目录为`mindspore-lite-{version}-{os}-{arch}/runtime/lib`)、CUDA的`so`库所在的目录和TensorRT的`so`库所在的目录加入`${LD_LIBRARY_PATH}`。 + +## 算子支持 + +TensorRT算子支持见[Lite 算子支持](https://www.mindspore.cn/tutorial/lite/zh-CN/master/operator_list_lite.html)。 diff --git a/tutorials/requirements.txt b/tutorials/requirements.txt index 6e8a6bc4c2d8fce7c05a891fab76504cad65a5c7..6d8cd70439820e16bc32c4abc93e948ba81dc01b 100644 --- a/tutorials/requirements.txt +++ b/tutorials/requirements.txt @@ -1,5 +1,5 @@ sphinx >= 2.2.1, <= 2.4.4 -recommonmark +myst_parser == 0.14.0 sphinx-markdown-tables sphinx_rtd_theme numpy diff --git a/tutorials/source_en/autograd.md b/tutorials/source_en/autograd.md index d830c76e3ae84ff38fa97c62837931b5c312caa4..7137e36bb3a79450a694635f2f4c9e634d065697 100644 --- a/tutorials/source_en/autograd.md +++ b/tutorials/source_en/autograd.md @@ -136,3 +136,58 @@ print(output) [[2.211 0.51 1.49 ] [5.588 2.68 4.07 ]] ``` + +## Stop Gradient + +We can use `stop_gradient` to disable calculation of gradient for certain operators. For example: + +```python +import numpy as np +import mindspore.nn as nn +import mindspore.ops as ops +from mindspore import Tensor +from mindspore import ParameterTuple, Parameter +from mindspore import dtype as mstype +from mindspore.ops.functional import stop_gradient + +class Net(nn.Cell): + def __init__(self): + super(Net, self).__init__() + self.matmul = ops.MatMul() + + def construct(self, x, y): + out1 = self.matmul(x, y) + out2 = self.matmul(x, y) + out2 = stop_gradient(out2) + out = out1 + out2 + return out + +class GradNetWrtX(nn.Cell): + def __init__(self, net): + super(GradNetWrtX, self).__init__() + self.net = net + self.grad_op = ops.GradOperation() + + def construct(self, x, y): + gradient_function = self.grad_op(self.net) + return gradient_function(x, y) + +x = Tensor([[0.8, 0.6, 0.2], [1.8, 1.3, 1.1]], dtype=mstype.float32) +y = Tensor([[0.11, 3.3, 1.1], [1.1, 0.2, 1.4], [1.1, 2.2, 0.3]], dtype=mstype.float32) +output = GradNetWrtX(Net())(x, y) +print(output) +``` + +```text + [[4.5, 2.7, 3.6], + [4.5, 2.7, 3.6]] +``` + +Here, we set `stop_gradient` to `out2`, so this operator does not have any contribution to gradient. If we delete `out2 = stop_gradient(out2)`, the result is: + +```text + [[9.0, 5.4, 7.2], + [9.0, 5.4, 7.2]] +``` + +After we do not set `stop_gradient` to `out2`, it will make the same contribution to gradient as `out1`. So we can see that each result has doubled. \ No newline at end of file diff --git a/tutorials/source_en/conf.py b/tutorials/source_en/conf.py index 8556caa7cb5b4738f2944f47b9bf4e568dba86ea..92693976b790257f153290b67b7778d5754c9458 100644 --- a/tutorials/source_en/conf.py +++ b/tutorials/source_en/conf.py @@ -33,7 +33,7 @@ release = 'master' # ones. extensions = [ 'sphinx_markdown_tables', - 'recommonmark', + 'myst_parser', 'nbsphinx', 'sphinx.ext.mathjax', 'IPython.sphinxext.ipython_console_highlighting' diff --git a/tutorials/source_en/dataset.md b/tutorials/source_en/dataset.md index e4ba917b3700a7c469318ccf08593119452c9758..725971e2a1c2dd89090b5c3306e82e203383abf7 100644 --- a/tutorials/source_en/dataset.md +++ b/tutorials/source_en/dataset.md @@ -168,7 +168,7 @@ plt.title(data['label'].asnumpy(), fontsize=20) plt.show() ``` -![png](./images/output_13_0.png) +![png](./images/output_13_0.PNG) Define the data augmentation operator, perform the `Resize` and `RandomCrop` operations on the dataset, and insert the dataset into the data processing pipeline through `map` mapping. @@ -189,6 +189,6 @@ plt.title(data['label'].asnumpy(), fontsize=20) plt.show() ``` -![png](./images/output_17_0.png) +![png](./images/output_17_0.PNG) For more information, see [Data augmentation](https://www.mindspore.cn/doc/programming_guide/en/master/augmentation.html). diff --git a/tutorials/source_en/index.rst b/tutorials/source_en/index.rst index a76f16205607b0ce6a2a998571f5f684d7693e04..b661ceb6226e821acd06608142ba79bbcdaf12bc 100644 --- a/tutorials/source_en/index.rst +++ b/tutorials/source_en/index.rst @@ -3,11 +3,12 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Quick Start for MindSpore +MindSpore Tutorial ========================= .. toctree:: :maxdepth: 1 + :caption: Quick Start introduction quick_start diff --git a/tutorials/source_en/save_load_model.md b/tutorials/source_en/save_load_model.md index 9d25704bb16a471c2171c181daa4aaaf3039b251..2ab0e8104ec66c3e6d46835a68f86690ab3f5c61 100644 --- a/tutorials/source_en/save_load_model.md +++ b/tutorials/source_en/save_load_model.md @@ -37,7 +37,7 @@ You can configure the checkpoint policies as required. The following describes t ```python from mindspore.train.callback import ModelCheckpoint, CheckpointConfig -config_ck = CheckpointConfig(save_checkpoint_steps=32, keep_checkpoint_max=10) +config_ckpt = CheckpointConfig(save_checkpoint_steps=32, keep_checkpoint_max=10) ckpt_cb = ModelCheckpoint(prefix='resnet50', directory=None, config=config_ckpt) model.train(epoch_num, dataset, callbacks= ckpt_cb) ``` diff --git a/tutorials/source_zh_cn/autograd.ipynb b/tutorials/source_zh_cn/autograd.ipynb index 2ffc0211171530134b5998da7bab14cbc213be99..0076ebfd05c5644e263d861ce8b4090d8b5f0052 100644 --- a/tutorials/source_zh_cn/autograd.ipynb +++ b/tutorials/source_zh_cn/autograd.ipynb @@ -238,6 +238,104 @@ "output = GradNetWrtX(Net())(x, y) \n", "print(output)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 停止计算梯度\n", + "\n", + "我们可以使用`stop_gradient`来禁止网络内的算子对梯度的影响,例如:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[4.5 2.7 3.6]\n", + " [4.5 2.7 3.6]]\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import mindspore.nn as nn\n", + "import mindspore.ops as ops\n", + "from mindspore import Tensor\n", + "from mindspore import ParameterTuple, Parameter\n", + "from mindspore import dtype as mstype\n", + "from mindspore.ops.functional import stop_gradient\n", + "\n", + "class Net(nn.Cell):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.matmul = ops.MatMul()\n", + "\n", + " def construct(self, x, y):\n", + " out1 = self.matmul(x, y)\n", + " out2 = self.matmul(x, y)\n", + " out2 = stop_gradient(out2)\n", + " out = out1 + out2\n", + " return out\n", + "\n", + "class GradNetWrtX(nn.Cell):\n", + " def __init__(self, net):\n", + " super(GradNetWrtX, self).__init__()\n", + " self.net = net\n", + " self.grad_op = ops.GradOperation()\n", + "\n", + " def construct(self, x, y):\n", + " gradient_function = self.grad_op(self.net)\n", + " return gradient_function(x, y)\n", + "\n", + "x = Tensor([[0.8, 0.6, 0.2], [1.8, 1.3, 1.1]], dtype=mstype.float32)\n", + "y = Tensor([[0.11, 3.3, 1.1], [1.1, 0.2, 1.4], [1.1, 2.2, 0.3]], dtype=mstype.float32)\n", + "output = GradNetWrtX(Net())(x, y)\n", + "print(output)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "在这里我们对`out2`设置了`stop_gradient`, 所以`out2`没有对梯度计算有任何的贡献。 如果我们删除`out2 = stop_gradient(out2)`,那么输出值会变为:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[9.0 5.4 7.2]\n", + " [9.0 5.4 7.2]]\n" + ] + } + ], + "source": [ + "output = GradNetWrtX(Net())(x, y)\n", + "print(output)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "在我们不对`out2`设置`stop_gradient`后, `out2`和`out1`会对梯度产生相同的贡献。 所以我们可以看到,结果中每一项的值都变为了原来的两倍。\n", + "\n" + ] } ], "metadata": { @@ -261,4 +359,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/tutorials/source_zh_cn/conf.py b/tutorials/source_zh_cn/conf.py index 7d3d33a6ca58c4a834ba5cdfd954f99f7ef3768e..1c8e0b67ff658ec904fef48f7205bbb8da5084c1 100644 --- a/tutorials/source_zh_cn/conf.py +++ b/tutorials/source_zh_cn/conf.py @@ -33,7 +33,7 @@ release = 'master' # ones. extensions = [ 'sphinx_markdown_tables', - 'recommonmark', + 'myst_parser', 'nbsphinx', 'sphinx.ext.mathjax', 'IPython.sphinxext.ipython_console_highlighting' diff --git a/tutorials/source_zh_cn/distributed_training.rst b/tutorials/source_zh_cn/distributed_training.rst new file mode 100644 index 0000000000000000000000000000000000000000..25e78a706f213cd05d9c99f18872a865a9d98be9 --- /dev/null +++ b/tutorials/source_zh_cn/distributed_training.rst @@ -0,0 +1,10 @@ +分布式训练 +=================== + +.. toctree:: + :maxdepth: 1 + + middleclass/distributed_training/distributed_training_ascend + middleclass/distributed_training/distributed_training_gpu + middleclass/distributed_training/distributed_training_model_parameters_saving_and_loading + middleclass/distributed_training/apply_parameter_server_training diff --git a/tutorials/source_zh_cn/index.rst b/tutorials/source_zh_cn/index.rst index 124a2f94379bb7aa5dea7f8c2eb934ebcb17591b..fd34aab0e759f078d57629e6a7432689209f89cd 100644 --- a/tutorials/source_zh_cn/index.rst +++ b/tutorials/source_zh_cn/index.rst @@ -9,7 +9,7 @@ MindSpore教程 .. toctree:: :glob: :maxdepth: 1 - :caption: 快速入门 + :caption: 入门教程 introduction quick_start @@ -26,4 +26,7 @@ MindSpore教程 :maxdepth: 1 :caption: 进阶教程 - custom \ No newline at end of file + custom + middleclass/pynative_mode_and_graph_mode/pynative_mode_and_graph_mode + distributed_training + inference_and_deploy \ No newline at end of file diff --git a/tutorials/source_zh_cn/inference_and_deploy.rst b/tutorials/source_zh_cn/inference_and_deploy.rst new file mode 100644 index 0000000000000000000000000000000000000000..da414a36bcd92f602289716591e1923fe8529ec0 --- /dev/null +++ b/tutorials/source_zh_cn/inference_and_deploy.rst @@ -0,0 +1,10 @@ +推理与部署 +=================== + +.. toctree:: + :maxdepth: 1 + + middleclass/inference/ascend910_inference + middleclass/inference/ascend310_inference + middleclass/inference/mindspore_serving_inference + middleclass/inference/mindspore_lite_inference diff --git a/tutorials/source_zh_cn/middleclass/custom/loss.ipynb b/tutorials/source_zh_cn/middleclass/custom/loss.ipynb index c998aaec7c730344e82355ce7a424a68e312289a..bb849f3f0247a4c8d72315b8c965c94889b529ff 100644 --- a/tutorials/source_zh_cn/middleclass/custom/loss.ipynb +++ b/tutorials/source_zh_cn/middleclass/custom/loss.ipynb @@ -70,7 +70,6 @@ "cell_type": "code", "execution_count": 1, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false }, @@ -82,10 +81,9 @@ "source": [ "import numpy as np\n", "import mindspore.ops as ops\n", - "from mindspore.nn import Loss\n", + "from mindspore.nn import LossBase\n", "\n", - "\n", - "class L1Loss(Loss):\n", + "class L1Loss(LossBase):\n", " def __init__(self, reduction=\"mean\"):\n", " super(L1Loss, self).__init__(reduction)\n", " self.abs = ops.Abs()\n", @@ -124,7 +122,6 @@ "cell_type": "code", "execution_count": 2, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false }, @@ -161,7 +158,6 @@ "cell_type": "code", "execution_count": 3, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false }, @@ -171,6 +167,7 @@ }, "outputs": [], "source": [ + "import numpy as np\n", "from mindspore.common.initializer import Normal\n", "import mindspore.nn as nn\n", "\n", @@ -198,7 +195,6 @@ "cell_type": "code", "execution_count": 4, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false }, @@ -224,7 +220,6 @@ "cell_type": "code", "execution_count": 5, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false }, @@ -258,7 +253,6 @@ "cell_type": "code", "execution_count": 6, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false }, @@ -271,16 +265,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "epoch: 1 step: 1, loss is 11.240622\n", - "epoch: 1 step: 2, loss is 10.386615\n", - "epoch: 1 step: 3, loss is 8.864252\n", - "epoch: 1 step: 4, loss is 11.2606\n", - "epoch: 1 step: 5, loss is 8.192026\n", - "epoch: 1 step: 6, loss is 8.621394\n", - "epoch: 1 step: 7, loss is 5.699249\n", - "epoch: 1 step: 8, loss is 6.912241\n", - "epoch: 1 step: 9, loss is 9.500487\n", - "epoch: 1 step: 10, loss is 5.641017\n" + "epoch: 1 step: 1, loss is 8.042041\n", + "epoch: 1 step: 2, loss is 10.255897\n", + "epoch: 1 step: 3, loss is 11.757828\n", + "epoch: 1 step: 4, loss is 10.169484\n", + "epoch: 1 step: 5, loss is 8.056287\n", + "epoch: 1 step: 6, loss is 7.80862\n", + "epoch: 1 step: 7, loss is 8.432369\n", + "epoch: 1 step: 8, loss is 8.16351\n", + "epoch: 1 step: 9, loss is 6.4131327\n", + "epoch: 1 step: 10, loss is 4.8829217\n" ] } ], diff --git a/tutorials/source_zh_cn/middleclass/custom/metric.md b/tutorials/source_zh_cn/middleclass/custom/metric.md index 33c8dbeed039930014d60ee5bc9413c5b20bee85..9dbcaa7280c74696aa43c12fa20e32f5ca938e7f 100644 --- a/tutorials/source_zh_cn/middleclass/custom/metric.md +++ b/tutorials/source_zh_cn/middleclass/custom/metric.md @@ -4,7 +4,6 @@ - [自定义评价指标](#自定义评价指标) - - [概述](#概述) - [Metrics自定义方法](#metrics自定义方法) - [导入Metric模块](#导入metric模块) - [定义Metrics](#定义metrics) @@ -12,9 +11,7 @@ - - -## 概述 + 评价指标(Metrics)可以用来评估模型结果的可信度。 @@ -36,16 +33,16 @@ MindSpore提供了多种Metrics评估指标,如:`accuracy`、`loss`、`preci ### 导入Metric模块 ```python -from mindspore.nn import Metric +import numpy as np +from mindspore._checkparam import Validator as validator +from .metric import Metric, rearrange_inputs ``` ### 定义Metrics `Dice`实际上计算了两个样本间的相似度,数学公式可以表达为: -$$ -dice = \frac{2 \times (pred \bigcap true)}{pred \bigcup true} -$$ +$$ dice = \frac{2 \times (pred \bigcap true)}{pred \bigcup true} $$ Dice的输入为两个尺度相同的Tensor, list或numpy,一个为预测值,一个为实际值。最后输出两个Tensor间的相似度计算值。其中为防止计算过程中分母为零,引入参数smooth,默认值为1e-5。 @@ -67,6 +64,7 @@ class Dice(Metric): self._dice_coeff_sum = 0 self._samples_num = 0 + @rearrange_inputs def update(self, *inputs): """更新内部计算结果""" @@ -103,7 +101,7 @@ class Dice(Metric): ### 在框架中导入Metrics -在同级目录中的[__init__.py](https://gitee.com/mindspore/mindspore/blob/master/mindspore/nn/metrics/__init__.py)文件中,添加已经定义好的[Dice](https://gitee.com/mindspore/mindspore/blob/master/mindspore/nn/metrics/dice.py): +在同级目录中的[\_\_init\_\_.py](https://gitee.com/mindspore/mindspore/blob/master/mindspore/nn/metrics/__init__.py)文件中,添加已经定义好的[Dice](https://gitee.com/mindspore/mindspore/blob/master/mindspore/nn/metrics/dice.py)。可以点击链接查看文件的具体位置,Metrics在框架中位于`mindspore/nn/metrics/`目录下: ```text __all__ = [ diff --git a/tutorials/source_zh_cn/middleclass/custom/operator.md b/tutorials/source_zh_cn/middleclass/custom/operator.md index 49f9c4d7ef86bf8fe9f8ad233fc0524bd5bb5234..ea502ec06ace18a489697180e67b0e808f08fbb5 100644 --- a/tutorials/source_zh_cn/middleclass/custom/operator.md +++ b/tutorials/source_zh_cn/middleclass/custom/operator.md @@ -19,7 +19,7 @@ - + ## 自定义算子开发 @@ -47,12 +47,7 @@ AI CPU算子是AI CPU负责执行昇腾处理器中海思 SoC 的CPU类算子( Elu是一类激活函数,数学公式表达如下: -$$ -ELU(x)=\begin{cases} -\alpha(e^x-1) \qquad if \quad x \leq 0 \\\\ -x \qquad\qquad\quad\ \ if \quad x > 0 -\end{cases} -$$ +$$ ELU(x)=\begin{cases} \alpha(e^x-1) \qquad if \quad x \leq 0 \\\\ x \qquad\qquad\quad\ \ if \quad x > 0 \end{cases} $$ Elu 算子的输入为 Tensor,数据类型为 float16 或 float32 ,输出为同种数据类型、同种 shape 的 Tensor。当前系数$\alpha$仅支持设定为 float 类型的“1.0”。详细说明可查看[API](https://www.mindspore.cn/doc/api_python/zh-CN/master/mindspore/ops/mindspore.ops.Elu.html)。 @@ -281,7 +276,7 @@ from mindspore.ops._op_impl.tbe import _elu_tbe #### AI CPU算子信息注册 -AI CPU 算子的注册文件放在`mindspore/ops/_op_impl/aicpu/`文件夹中,注册方式与AI CPU 算子类似。注册完后要在`mindspore/ops/_op_impl/aicpu/__init__.py`中导入注册好的算子信息。 +AI CPU 算子的注册文件放在`mindspore/ops/_op_impl/aicpu/`文件夹中,注册方式与AI Core 算子类似。注册完后要在`mindspore/ops/_op_impl/aicpu/__init__.py`中导入注册好的算子信息。 #### 反向算子信息注册 diff --git a/tutorials/source_zh_cn/middleclass/custom/train.ipynb b/tutorials/source_zh_cn/middleclass/custom/train.ipynb index 5bdc3f733e2ffae47b00d177ece3e3c4f73a06c1..765a5712c479e33b23b23d3971dcf5d6a34c6c00 100644 --- a/tutorials/source_zh_cn/middleclass/custom/train.ipynb +++ b/tutorials/source_zh_cn/middleclass/custom/train.ipynb @@ -193,7 +193,7 @@ " # 为反向传播设定系数\n", " sens = ops.Fill()(ops.DType()(loss), ops.Shape()(loss), self.sens)\n", " grads = self.grad(self.network, weights)(data, label, sens)\n", - " return ops.depend(loss, self.optimizer(grads))" + " return loss, self.optimizer(grads)" ] }, { @@ -284,4 +284,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/tutorials/source_zh_cn/middleclass/distributed_training/apply_parameter_server_training.md b/tutorials/source_zh_cn/middleclass/distributed_training/apply_parameter_server_training.md new file mode 100644 index 0000000000000000000000000000000000000000..324691cf3ba2bd44f34bb2d83344509fa79ee60f --- /dev/null +++ b/tutorials/source_zh_cn/middleclass/distributed_training/apply_parameter_server_training.md @@ -0,0 +1,160 @@ +# 使用Parameter Server训练 + + + +- [使用Parameter Server训练](#使用parameter-server训练) + - [准备工作](#准备工作) + - [训练脚本准备](#训练脚本准备) + - [参数设置](#参数设置) + - [环境变量设置](#环境变量设置) + - [执行训练](#执行训练) + + + + + +Parameter Server(参数服务器)是分布式训练中一种广泛使用的架构,相较于同步的AllReduce训练方法,Parameter Server具有更好的灵活性、可扩展性以及节点容灾的能力。具体来讲,参数服务器既支持同步SGD,也支持异步SGD的训练算法;在扩展性上,将模型的计算与模型的更新分别部署在Worker和Server两类进程中,使得Worker和Server的资源可以独立地横向扩缩;另外,在大规模数据中心的环境下,计算设备、网络以及存储经常会出现各种故障而导致部分节点异常,而在参数服务器的架构下,能够较为容易地处理此类的故障而不会对训练中的任务产生影响。 + +在MindSpore的参数服务器实现中,采用了自研的通信框架作为基础架构,基于其提供的远程通信能力以及抽象的Send/Broadcast等原语,实现了同步SGD的分布式训练算法,另外结合Ascend和GPU中的高性能集合通信库(HCCL和NCCL),MindSpore还提供了Parameter Server和AllReduce的混合训练模式,支持将部分权重通过参数服务器进行存储和更新,其余权重仍然通过AllReduce算法进行训练。 + +在参数服务器的架构设计中,一共包含三个独立的组件,分别是Server、Worker和Scheduler,作用分别是: + +- Server:保存模型的权重和反向计算的梯度值,并使用优化器通过Worker上传的梯度值对模型进行更新。 + +- Worker:执行网络的正反向计算,反向计算的梯度值通过Push接口上传至Server中,通过Pull接口把Server更新好的模型下载到Worker本地。 + +- Scheduler:用于建立Server和Worker的通信关系。 + +## 准备工作 + +以LeNet在Ascend 910上使用Parameter Server训练为例: + +### 训练脚本准备 + +参考,使用[MNIST数据集](http://yann.lecun.com/exdb/mnist/),了解如何训练一个LeNet网络。 + +### 参数设置 + +1. 首先调用`mindspore.context.set_ps_context(enable_ps=True)`开启Parameter Server训练模式. + + - 此接口需在`mindspore.communication.management.init()`之前调用。 + - 若没有调用此接口,下面的[环境变量设置](#id5)则不会生效。 + - 调用`mindspore.context.reset_ps_context()`可以关闭Parameter Server训练模式。 + +2. 在本训练模式下,有以下两种调用接口方式以控制训练参数是否通过Parameter Server进行更新,并且可以控制参数初始化位置: + + - 通过`mindspore.nn.Cell.set_param_ps()`对`nn.Cell`中所有权重递归设置。 + - 通过`mindspore.Parameter.set_param_ps()`对此权重进行设置。 + - 被设置为通过Parameter Server更新的单个权重大小不得超过INT_MAX(2^31 - 1)字节。 + - 接口`set_param_ps`可接收一个`bool`型参数:`init_in_server`,表示该训练参数是否在Server端初始化,`init_in_server`默认值为`False`,表示在Worker上初始化该训练参数;当前仅支持`EmbeddingLookup`算子的训练参数`embedding_table`在Server端初始化,以解决超大shape的`embedding_table`在Worker上初始化导致内存不足的问题,该算子的`target`属性需要设置为'CPU'。在Server端初始化的训练参数将不再同步到Worker上,如果涉及到多Server训练并保存CheckPoint,则训练结束后每个Server均会保存一个CheckPoint。 + +3. 在[原训练脚本](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/lenet/train.py)基础上,设置LeNet模型所有权重通过Parameter Server训练: + + ```python + context.set_ps_context(enable_ps=True) + network = LeNet5(cfg.num_classes) + network.set_param_ps() + ``` + +4. [可选配置]针对超大shape的`embedding_table`,由于设备上存放不下全量的`embedding_table`,可以配置[EmbeddingLookup算子](https://www.mindspore.cn/doc/api_python/zh-CN/master/mindspore/nn/mindspore.nn.EmbeddingLookup.html)的`vocab_cache_size`,用于开启Parameter Server训练模式下`EmbeddingLookup`的cache功能,该功能使用`vocab_cache_size`大小的`embedding_table`在设备上训练,全量`embedding_table`存储在Server,将下批次训练用到的`embedding_table`提前换入到cache上,当cache放不下时则将过期的`embedding_table`放回到Server,以达到提升训练性能的目的;训练结束后,可在Server上导出CheckPoint,保存训练后的全量`embedding_table`。详细网络训练脚本参考。 + + ```python + context.set_auto_parallel_context(full_batch=True, parallel_mode=ParallelMode.AUTO_PARALLEL) + context.set_context(enable_sparse=True) + network = Net() + model = Model(network) + model.train(epoch, train_dataset, dataset_sink_mode=True) + ``` + + 其中, + + - `dataset_sink_mode`:是否开启数据下沉模式 ,为`True`时表明开启,通过数据集通道传递数据,该场景中必须设置为`True`(训练中推理也需要开启数据下沉模式)。 + - `full_batch`:是否全量导入数据集,为`True`时表明全量导入,每卡的数据相同,在多Worker场景中必须设置为`True`。 + - `parallel_mode`:并行模式,多Worker场景需要开启自动并行模式,设置`parallel_mode`=`ParallelMode.AUTO_PARALLEL`。 + - `enable_sparse`:是否开启sparse训练,默认值为`False`;为`True`时表示开启sparse训练;在Parameter Server模式下,所有开启cache的`EmbeddingLookup`算子的`sparse`参数应和`enable_sparse`值保持一致。 + +### 环境变量设置 + +MindSpore通过读取环境变量,控制Parameter Server训练,环境变量包括以下选项(其中`MS_SCHED_HOST`及`MS_SCHED_PORT`所有脚本需保持一致): + +```text +export MS_SERVER_NUM=1 # Server number +export MS_WORKER_NUM=1 # Worker number +export MS_SCHED_HOST=XXX.XXX.XXX.XXX # Scheduler IP address +export MS_SCHED_PORT=XXXX # Scheduler port +export MS_ROLE=MS_SCHED # The role of this process: MS_SCHED represents the scheduler, MS_WORKER represents the worker, MS_PSERVER represents the Server +``` + +## 执行训练 + +1. shell脚本 + + 提供Worker,Server和Scheduler三个角色对应的shell脚本,以启动训练: + + `Scheduler.sh`: + + ```bash + #!/bin/bash + export MS_SERVER_NUM=1 + export MS_WORKER_NUM=1 + export MS_SCHED_HOST=XXX.XXX.XXX.XXX + export MS_SCHED_PORT=XXXX + export MS_ROLE=MS_SCHED + python train.py --device_target=Ascend --data_path=path/to/dataset + ``` + + `Server.sh`: + + ```bash + #!/bin/bash + export MS_SERVER_NUM=1 + export MS_WORKER_NUM=1 + export MS_SCHED_HOST=XXX.XXX.XXX.XXX + export MS_SCHED_PORT=XXXX + export MS_ROLE=MS_PSERVER + python train.py --device_target=Ascend --data_path=path/to/dataset + ``` + + `Worker.sh`: + + ```bash + #!/bin/bash + export MS_SERVER_NUM=1 + export MS_WORKER_NUM=1 + export MS_SCHED_HOST=XXX.XXX.XXX.XXX + export MS_SCHED_PORT=XXXX + export MS_ROLE=MS_WORKER + python train.py --device_target=Ascend --data_path=path/to/dataset + ``` + + 最后分别执行: + + ```bash + sh Scheduler.sh > scheduler.log 2>&1 & + sh Server.sh > server.log 2>&1 & + sh Worker.sh > worker.log 2>&1 & + ``` + + 启动训练 + +2. 查看结果 + + 查看`scheduler.log`中Server与Worker通信日志: + + ```text + The server node id:b5d8a47c-46d7-49a5-aecf-d29d7f8b6124,node ip: 10.90.53.118,node port:46737 assign rank id:0 + The worker node id:55e86d4b-d717-4930-b414-ebd80082f541 assign rank id:1 + Start the scheduler node is successful! + ``` + + 说明Server、Worker与Scheduler通信建立成功。 + + 查看`worker.log`中训练结果: + + ```text + epoch: 1 step: 1, loss is 2.302287 + epoch: 1 step: 2, loss is 2.304071 + epoch: 1 step: 3, loss is 2.308778 + epoch: 1 step: 4, loss is 2.301943 + ... + ``` diff --git a/tutorials/source_zh_cn/middleclass/distributed_training/distributed_training_ascend.md b/tutorials/source_zh_cn/middleclass/distributed_training/distributed_training_ascend.md new file mode 100644 index 0000000000000000000000000000000000000000..0550d718acc0bf6482a699e7c7ecdea59ef3699f --- /dev/null +++ b/tutorials/source_zh_cn/middleclass/distributed_training/distributed_training_ascend.md @@ -0,0 +1,475 @@ +# 分布式并行训练 (Ascend) + + + +- [分布式并行训练 (Ascend)](#分布式并行训练-ascend) + - [概述](#概述) + - [准备环节](#准备环节) + - [下载数据集](#下载数据集) + - [配置分布式环境变量](#配置分布式环境变量) + - [调用集合通信库](#调用集合通信库) + - [数据并行模式加载数据集](#数据并行模式加载数据集) + - [定义网络](#定义网络) + - [定义损失函数及优化器](#定义损失函数及优化器) + - [定义损失函数](#定义损失函数) + - [定义优化器](#定义优化器) + - [训练网络](#训练网络) + - [运行脚本](#运行脚本) + - [多机多卡训练](#多机多卡训练) + + + + + +在深度学习中,当数据集和参数量的规模越来越大,训练所需的时间和硬件资源会随之增加,最后会变成制约训练的瓶颈。分布式并行训练,可以降低对内存、计算性能等硬件的需求,是进行训练的重要优化手段。根据并行的原理及模式不同,业界主流的并行类型有以下几种: + +- 数据并行(Data Parallel):对数据进行切分的并行模式,一般按照batch维度切分,将数据分配到各个计算单元(worker)中,进行模型计算。 + +- 模型并行(Model Parallel):对模型进行切分的并行模式。MindSpore中支持层内模型并行模式,即对参数切分后分配到各个计算单元中进行训练。 + +- 混合并行(Hybrid Parallel):指涵盖数据并行和模型并行的并行模式。 + +本篇教程我们主要讲解如何在Ascend 910 AI处理器硬件平台上,利用MindSpore通过下面两种并行模式训练ResNet-50网络。 + +- DATA_PARALLEL:数据并行模式。 + +- AUTO_PARALLEL:自动并行模式,融合了数据并行、模型并行及混合并行的1种分布式并行模式,可以自动建立代价模型,找到训练时间较短的并行策略,为用户选择1种并行模式。MindSpore提供了如下的两种不同的策略搜索算法: + +> 你可以在这里下载完整的样例代码: +> +> + +目录结构如下: + +```text +└─tutorial_code + ├─distributed_training + │ rank_table_16pcs.json + │ rank_table_8pcs.json + │ rank_table_2pcs.json + │ cell_wrapper.py + │ model_accu.py + │ resnet.py + │ resnet50_distributed_training.py + │ resnet50_distributed_training_gpu.py + │ resnet50_distributed_training_grad_accu.py + │ run.sh + │ run_gpu.sh + │ run_grad_accu.sh + │ run_cluster.sh +``` + +其中,`rank_table_16pcs.json`、`rank_table_8pcs.json`、`rank_table_2pcs.json`是配置当前多卡环境的组网信息文件。`resnet.py`、`resnet50_distributed_training.py`、`resnet50_distributed_training_gpu.py`和`resnet50_distributed_training_grad_accu.py`几个文件是定义网络结构的脚本。`run.sh`、`run_gpu.sh`、`run_grad_accu.sh`、`run_cluster.sh`是执行脚本。 + +## 准备环节 + +### 下载数据集 + +本样例采用`CIFAR-10`数据集,由10类32*32的彩色图片组成,每类包含6000张图片。其中训练集共50000张图片,测试集共10000张图片。 + +> `CIFAR-10`数据集下载链接:。 + +将数据集下载并解压到本地路径下,解压后的文件夹为`cifar-10-batches-bin`。 + +### 配置分布式环境变量 + +在裸机环境(对比云上环境,即本地有Ascend 910 AI 处理器)进行分布式训练时,需要配置当前多卡环境的组网信息文件。如果使用华为云环境,因为云服务本身已经做好了配置,可以跳过本小节。 + +以Ascend 910 AI处理器为例,1个8卡环境的json配置文件示例如下,本样例将该配置文件命名为`rank_table_8pcs.json`。2卡环境配置可以参考样例代码中的`rank_table_2pcs.json`文件。 + +```json +{ + "version": "1.0", + "server_count": "1", + "server_list": [ + { + "server_id": "10.155.111.140", + "device": [ + {"device_id": "0","device_ip": "192.1.27.6","rank_id": "0"}, + {"device_id": "1","device_ip": "192.2.27.6","rank_id": "1"}, + {"device_id": "2","device_ip": "192.3.27.6","rank_id": "2"}, + {"device_id": "3","device_ip": "192.4.27.6","rank_id": "3"}, + {"device_id": "4","device_ip": "192.1.27.7","rank_id": "4"}, + {"device_id": "5","device_ip": "192.2.27.7","rank_id": "5"}, + {"device_id": "6","device_ip": "192.3.27.7","rank_id": "6"}, + {"device_id": "7","device_ip": "192.4.27.7","rank_id": "7"}], + "host_nic_ip": "reserve" + } + ], + "status": "completed" +} +``` + +其中需要根据实际训练环境修改的参数项有: + +- `server_count`表示参与训练的机器数量。 +- `server_id`表示当前机器的IP地址。 +- `device_id`表示卡物理序号,即卡所在机器中的实际序号。 +- `device_ip`表示集成网卡的IP地址,可以在当前机器执行指令`cat /etc/hccn.conf`,`address_x`的键值就是网卡IP地址。 +- `rank_id`表示卡逻辑序号,固定从0开始编号。 + +### 调用集合通信库 + +MindSpore分布式并行训练的通信使用了华为集合通信库`Huawei Collective Communication Library`(以下简称HCCL),可以在Ascend AI处理器配套的软件包中找到。同时`mindspore.communication.management`中封装了HCCL提供的集合通信接口,方便用户配置分布式信息。 +> HCCL实现了基于Ascend AI处理器的多机多卡通信,有一些使用限制,我们列出使用分布式服务常见的,详细的可以查看HCCL对应的使用文档。 +> +> - 单机场景下支持1、2、4、8卡设备集群,多机场景下支持8*n卡设备集群。 +> - 每台机器的0-3卡和4-7卡各为1个组网,2卡和4卡训练时卡必须相连且不支持跨组网创建集群。 +> - 组建多机集群时需要保证各台机器使用同一交换机。 +> - 服务器硬件架构及操作系统需要是SMP(Symmetrical Multi-Processing,对称多处理器)处理模式。 + +下面是调用集合通信库样例代码: + +```python +import os +from mindspore import context +from mindspore.communication.management import init + +if __name__ == "__main__": + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=int(os.environ["DEVICE_ID"])) + init() + ... +``` + +其中, + +- `mode=context.GRAPH_MODE`:使用分布式训练需要指定运行模式为图模式(PyNative模式当前仅支持数据并行)。 +- `device_id`:卡的物理序号,即卡所在机器中的实际序号。 +- `init`:使能HCCL通信,并完成分布式训练初始化操作。 + +## 数据并行模式加载数据集 + +分布式训练时,数据是以数据并行的方式导入的。下面我们以CIFAR-10数据集为例,介绍以数据并行方式导入CIFAR-10数据集的方法,`data_path`是指数据集的路径,即`cifar-10-batches-bin`文件夹的路径。 + +```python +from mindspore import dtype as mstype +import mindspore.dataset as ds +import mindspore.dataset.transforms.c_transforms as C +import mindspore.dataset.vision.c_transforms as vision +from mindspore.communication.management import get_rank, get_group_size + +def create_dataset(data_path, repeat_num=1, batch_size=32, rank_id=0, rank_size=1): + resize_height = 224 + resize_width = 224 + rescale = 1.0 / 255.0 + shift = 0.0 + + # 获取rank_id与rank_size + rank_id = get_rank() + rank_size = get_group_size() + data_set = ds.Cifar10Dataset(data_path, num_shards=rank_size, shard_id=rank_id) + + # 定义数据的map操作 + random_crop_op = vision.RandomCrop((32, 32), (4, 4, 4, 4)) + random_horizontal_op = vision.RandomHorizontalFlip() + resize_op = vision.Resize((resize_height, resize_width)) + rescale_op = vision.Rescale(rescale, shift) + normalize_op = vision.Normalize((0.4465, 0.4822, 0.4914), (0.2010, 0.1994, 0.2023)) + changeswap_op = vision.HWC2CHW() + type_cast_op = C.TypeCast(mstype.int32) + + c_trans = [random_crop_op, random_horizontal_op] + c_trans += [resize_op, rescale_op, normalize_op, changeswap_op] + + # 对图片数据进行map操作 + data_set = data_set.map(operations=type_cast_op, input_columns="label") + data_set = data_set.map(operations=c_trans, input_columns="image") + + # 对数据集进行处理 + data_set = data_set.shuffle(buffer_size=10) + data_set = data_set.batch(batch_size=batch_size, drop_remainder=True) + data_set = data_set.repeat(repeat_num) + + return data_set +``` + +其中,与单机不同的是,在数据集接口需要传入`num_shards`和`shard_id`参数,分别对应卡的数量和逻辑序号,建议通过HCCL接口获取: + +- `get_rank`:获取当前设备在集群中的ID。 +- `get_group_size`:获取集群数量。 + +> 数据并行场景加载数据集时,建议对每卡指定相同的数据集文件,若是各卡加载的数据集不同,可能会影响计算精度。 + +## 定义网络 + +数据并行及自动并行模式下,网络定义方式与单机写法一致,可以参考[ResNet网络样例脚本](https://gitee.com/mindspore/docs/blob/master/tutorials/tutorial_code/resnet/resnet.py)。 + +## 定义损失函数及优化器 + +### 定义损失函数 + +自动并行以算子为粒度切分模型,通过算法搜索得到最优并行策略,所以与单机训练不同的是,为了有更好的并行训练效果,损失函数建议使用小算子来实现。 + +在Loss部分,我们采用`SoftmaxCrossEntropyWithLogits`的展开形式,即按照数学公式,将其展开为多个小算子进行实现,样例代码如下: + +```python +import mindspore.ops as ops +from mindspore import Tensor +from mindspore import dtype as mstype +import mindspore.nn as nn + +class SoftmaxCrossEntropyExpand(nn.Cell): + def __init__(self, sparse=False): + super(SoftmaxCrossEntropyExpand, self).__init__() + self.exp = ops.Exp() + self.sum = ops.ReduceSum(keep_dims=True) + self.onehot = ops.OneHot() + self.on_value = Tensor(1.0, mstype.float32) + self.off_value = Tensor(0.0, mstype.float32) + self.div = ops.RealDiv() + self.log = ops.Log() + self.sum_cross_entropy = ops.ReduceSum(keep_dims=False) + self.mul = ops.Mul() + self.mul2 = ops.Mul() + self.mean = ops.ReduceMean(keep_dims=False) + self.sparse = sparse + self.max = ops.ReduceMax(keep_dims=True) + self.sub = ops.Sub() + + def construct(self, logit, label): + logit_max = self.max(logit, -1) + exp = self.exp(self.sub(logit, logit_max)) + exp_sum = self.sum(exp, -1) + softmax_result = self.div(exp, exp_sum) + if self.sparse: + label = self.onehot(label, ops.shape(logit)[1], self.on_value, self.off_value) + softmax_result_log = self.log(softmax_result) + loss = self.sum_cross_entropy((self.mul(softmax_result_log, label)), -1) + loss = self.mul2(ops.scalar_to_array(-1.0), loss) + loss = self.mean(loss, -1) + + return loss +``` + +### 定义优化器 + +采用`Momentum`优化器作为参数更新工具,这里定义与单机一致,不再展开,具体可以参考样例代码中的实现。 + +## 训练网络 + +`context.set_auto_parallel_context`是配置并行训练参数的接口,必须在初始化网络之前调用。常用参数包括: + +- `parallel_mode`:分布式并行模式,默认为单机模式`ParallelMode.STAND_ALONE`。可选数据并行`ParallelMode.DATA_PARALLEL`及自动并行`ParallelMode.AUTO_PARALLEL`。 +- `parameter_broadcast`:训练开始前自动广播0号卡上数据并行的参数权值到其他卡上,默认值为`False`。 +- `gradients_mean`:反向计算时,框架内部会将数据并行参数分散在多台机器的梯度值进行收集,得到全局梯度值后再传入优化器中更新。默认值为`False`,设置为True对应`allreduce_mean`操作,False对应`allreduce_sum`操作。 +- `device_num`和`global_rank`建议采用默认值,框架内会调用HCCL接口获取。 + +如脚本中存在多个网络用例,请在执行下个用例前调用`context.reset_auto_parallel_context`将所有参数还原到默认值。 + +在下面的样例中我们指定并行模式为自动并行,用户如需切换为数据并行模式只需将`parallel_mode`改为`DATA_PARALLEL`。 + +```python +from mindspore import context, Model +from mindspore.nn.optim.momentum import Momentum +from mindspore.train.callback import LossMonitor +from mindspore.context import ParallelMode +from resnet import resnet50 + +device_id = int(os.getenv('DEVICE_ID')) +context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") +context.set_context(device_id=device_id) # set device_id + +def test_train_cifar(epoch_size=10): + context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, gradients_mean=True) + loss_cb = LossMonitor() + dataset = create_dataset(data_path) + batch_size = 32 + num_classes = 10 + net = resnet50(batch_size, num_classes) + loss = SoftmaxCrossEntropyExpand(sparse=True) + opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, 0.9) + model = Model(net, loss_fn=loss, optimizer=opt) + model.train(epoch_size, dataset, callbacks=[loss_cb], dataset_sink_mode=True) +``` + +其中, + +- `dataset_sink_mode=True`:表示采用数据集的下沉模式,即训练的计算下沉到硬件平台中执行。 +- `LossMonitor`:能够通过回调函数返回Loss值,用于监控损失函数。 + +## 运行脚本 + +上述已将训练所需的脚本编辑好了,接下来通过命令调用对应的脚本。 + +目前MindSpore分布式执行采用单卡单进程运行方式,即每张卡上运行1个进程,进程数量与使用的卡的数量一致。其中,0卡在前台执行,其他卡放在后台执行。每个进程创建1个目录,用来保存日志信息以及算子编译信息。下面以使用8张卡的分布式训练脚本为例,演示如何运行脚本: + +```bash +#!/bin/bash + +echo "==============================================================================================================" +echo "Please run the script as: " +echo "bash run.sh DATA_PATH RANK_SIZE" +echo "For example: bash run.sh /path/dataset 8" +echo "It is better to use the absolute path." +echo "==============================================================================================================" +DATA_PATH=$1 +export DATA_PATH=${DATA_PATH} +RANK_SIZE=$2 + +EXEC_PATH=$(pwd) + +test_dist_8pcs() +{ + export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_8pcs.json + export RANK_SIZE=8 +} + +test_dist_2pcs() +{ + export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_2pcs.json + export RANK_SIZE=2 +} + +test_dist_${RANK_SIZE}pcs + +for((i=1;i<${RANK_SIZE};i++)) +do + rm -rf device$i + mkdir device$i + cp ./resnet50_distributed_training.py ./resnet.py ./device$i + cd ./device$i + export DEVICE_ID=$i + export RANK_ID=$i + echo "start training for device $i" + env > env$i.log + pytest -s -v ./resnet50_distributed_training.py > train.log$i 2>&1 & + cd ../ +done +rm -rf device0 +mkdir device0 +cp ./resnet50_distributed_training.py ./resnet.py ./device0 +cd ./device0 +export DEVICE_ID=0 +export RANK_ID=0 +echo "start training for device 0" +env > env0.log +pytest -s -v ./resnet50_distributed_training.py > train.log0 2>&1 +if [ $? -eq 0 ];then + echo "training success" +else + echo "training failed" + exit 2 +fi +cd ../ +``` + +脚本需要传入变量`DATA_PATH`和`RANK_SIZE`,分别表示数据集的绝对路径和卡的数量。 + +分布式相关的环境变量有, + +- `RANK_TABLE_FILE`:组网信息文件的路径。 +- `DEVICE_ID`:当前卡在机器上的实际序号。 +- `RANK_ID`:当前卡的逻辑序号。 + +其余环境变量请参考安装教程中的配置项。 + +运行时间大约在5分钟内,主要时间是用于算子的编译,实际训练时间在20秒内。用户可以通过`ps -ef | grep pytest`来监控任务进程。 + +日志文件保存到`rank`所对应的`device0`、 `device1`......目录下,`env.log`中记录了环境变量的相关信息,关于Loss部分结果保存在`train.log`中,示例如下: + +```text +epoch: 1 step: 156, loss is 2.0084016 +epoch: 2 step: 156, loss is 1.6407638 +epoch: 3 step: 156, loss is 1.6164391 +epoch: 4 step: 156, loss is 1.6838071 +epoch: 5 step: 156, loss is 1.6320667 +epoch: 6 step: 156, loss is 1.3098773 +epoch: 7 step: 156, loss is 1.3515002 +epoch: 8 step: 156, loss is 1.2943741 +epoch: 9 step: 156, loss is 1.2316195 +epoch: 10 step: 156, loss is 1.1533381 +``` + +## 多机多卡训练 + +前面的章节,对MindSpore的分布式训练进行了介绍,都是基于单机8卡的Ascend环境,使用多机进行分布式训练,可以更大地提升训练速度。 +在Ascend环境下,跨机器的NPU单元的通信与单机内各个NPU单元的通信一样,依旧是通过HCCL进行通信,区别在于,单机内的NPU单元天然的是互通的,而跨机器的则需要保证两台机器的网络是互通的。 +在确认了机器之间的NPU单元的网络是通畅后,配置多机的json配置文件,本教程以16卡的配置文件为例。需要注意的是,在多机的json文件配置中,要求rank_id的排序,与server_id的字典序一致。 + +```json +{ + "version": "1.0", + "server_count": "2", + "server_list": [ + { + "server_id": "10.155.111.140", + "device": [ + {"device_id": "0","device_ip": "192.1.27.6","rank_id": "0"}, + {"device_id": "1","device_ip": "192.2.27.6","rank_id": "1"}, + {"device_id": "2","device_ip": "192.3.27.6","rank_id": "2"}, + {"device_id": "3","device_ip": "192.4.27.6","rank_id": "3"}, + {"device_id": "4","device_ip": "192.1.27.7","rank_id": "4"}, + {"device_id": "5","device_ip": "192.2.27.7","rank_id": "5"}, + {"device_id": "6","device_ip": "192.3.27.7","rank_id": "6"}, + {"device_id": "7","device_ip": "192.4.27.7","rank_id": "7"}], + "host_nic_ip": "reserve" + }, + { + "server_id": "10.155.111.141", + "device": [ + {"device_id": "0","device_ip": "192.1.27.8","rank_id": "8"}, + {"device_id": "1","device_ip": "192.2.27.8","rank_id": "9"}, + {"device_id": "2","device_ip": "192.3.27.8","rank_id": "10"}, + {"device_id": "3","device_ip": "192.4.27.8","rank_id": "11"}, + {"device_id": "4","device_ip": "192.1.27.9","rank_id": "12"}, + {"device_id": "5","device_ip": "192.2.27.9","rank_id": "13"}, + {"device_id": "6","device_ip": "192.3.27.9","rank_id": "14"}, + {"device_id": "7","device_ip": "192.4.27.9","rank_id": "15"}], + "host_nic_ip": "reserve" + } + ], + "status": "completed" +} +``` + +准备好配置文件后,可以进行分布式多机训练脚本的组织,在以2机16卡为例,两台机器上编写的脚本与单机8卡的运行脚本类似,区别在于指定不同的rank_id变量。 + +```bash +#!/bin/bash + +echo "==============================================================================================================" +echo "Please run the script as: " +echo "bash run.sh DATA_PATH RANK_TABLE_FILE RANK_SIZE RANK_START" +echo "For example: bash run.sh /path/dataset /path/rank_table.json 16 0" +echo "It is better to use the absolute path." +echo "==============================================================================================================" + +execute_path=$(pwd) +echo ${execute_path} +script_self=$(readlink -f "$0") +self_path=$(dirname "${script_self}") +echo ${self_path} + +export DATA_PATH=$1 +export RANK_TABLE_FILE=$2 +export RANK_SIZE=$3 +RANK_START=$4 +DEVICE_START=0 +for((i=0;i<=7;i++)); +do + export RANK_ID=$[i+RANK_START] + export DEVICE_ID=$[i+DEVICE_START] + rm -rf ${execute_path}/device_$RANK_ID + mkdir ${execute_path}/device_$RANK_ID + cd ${execute_path}/device_$RANK_ID || exit + pytest -s ${self_path}/resnet50_distributed_training.py >train$RANK_ID.log 2>&1 & +done +``` + +上面列出的参考脚本,所要求的代码组织结构如下,脚本中会获取脚本所在路径以及命令执行的路径,并且将所有任务都置于后台执行。 + +```text +└─tutorial_code + ├─distributed_training + │ resnet50_distributed_training.py + │ run_cluster.sh +``` + +执行时,两台机器分别执行如下命令,其中rank_table.json按照本章节展示的16卡的分布式json文件参考配置。 + +```bash +# server0 +bash run.sh /path/dataset /path/rank_table.json 16 0 +# server1 +bash run.sh /path/dataset /path/rank_table.json 16 8 +``` diff --git a/tutorials/source_zh_cn/middleclass/distributed_training/distributed_training_gpu.md b/tutorials/source_zh_cn/middleclass/distributed_training/distributed_training_gpu.md new file mode 100644 index 0000000000000000000000000000000000000000..6d414e314f4f6f613466804285c39f211cdf3eb6 --- /dev/null +++ b/tutorials/source_zh_cn/middleclass/distributed_training/distributed_training_gpu.md @@ -0,0 +1,150 @@ +# 分布式并行训练 (GPU) + + + +- [分布式并行训练 (GPU)](#分布式并行训练-gpu) + - [准备环节](#准备环节) + - [下载数据集](#下载数据集) + - [配置分布式环境](#配置分布式环境) + - [调用集合通信库](#调用集合通信库) + - [定义网络](#定义网络) + - [运行脚本](#运行脚本) + - [运行多机脚本](#运行多机脚本) + + + + + +本篇教程我们主要讲解,如何在GPU硬件平台上,利用MindSpore的数据并行及自动并行模式训练ResNet-50网络。 + +## 准备环节 + +### 下载数据集 + +本样例采用`CIFAR-10`数据集,由10类32*32的彩色图片组成,每类包含6000张图片。其中训练集共50000张图片,测试集共10000张图片。 + +> `CIFAR-10`数据集下载链接:。 + +将数据集下载并解压到本地路径下,解压后的文件夹为`cifar-10-batches-bin`。 + +### 配置分布式环境 + +- `OpenMPI-4.0.3`:MindSpore采用的多进程通信库。 + + OpenMPI-4.0.3源码下载地址:,选择`openmpi-4.0.3.tar.gz`下载。 + + 参考OpenMPI官网教程安装:。 + +- `NCCL-2.7.6`:Nvidia集合通信库。 + + NCCL-2.7.6下载地址:。 + + 参考NCCL官网教程安装:。 + +- 主机间免密登陆(涉及多机训练时需要)。若训练涉及多机,则需要配置多机间免密登陆,可参考以下步骤进行配置: + 1. 每台主机确定同一个用户作为登陆用户(不推荐root); + 2. 执行`ssh-keygen -t rsa -P ""`生成密钥; + 3. 执行`ssh-copy-id DEVICE-IP`设置需要免密登陆的机器IP; + 4. 执行`ssh DEVICE-IP`,若不需要输入密码即可登录,则说明以上配置成功; + 5. 在所有机器上执行以上命令,确保两两互通。 + +### 调用集合通信库 + +在GPU硬件平台上,MindSpore分布式并行训练的通信使用的是NCCL。 + +> GPU平台上,MindSpore暂不支持用户进行: +> +> `get_local_rank`、`get_local_size`、`get_world_rank_from_group_rank`、`get_group_rank_from_world_rank`、`create_group`操作。 + +下面是调用集合通信库的代码样例: + +```python +from mindspore import context +from mindspore.communication.management import init + +if __name__ == "__main__": + context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + init("nccl") + ... +``` + +其中, + +- `mode=context.GRAPH_MODE`:使用分布式训练需要指定运行模式为图模式(PyNative模式不支持并行)。 +- `init("nccl")`:使能NCCL通信,并完成分布式训练初始化操作。 + +## 定义网络 + +在GPU硬件平台上,网络的定义和Ascend 910 AI处理器一致。 + +可以参考[ResNet网络样例脚本](https://gitee.com/mindspore/docs/blob/master/tutorials/tutorial_code/resnet/resnet.py) + +## 运行脚本 + +在GPU硬件平台上,MindSpore采用OpenMPI的`mpirun`进行分布式训练。下面以使用8张卡的分布式训练脚本为例,演示如何运行脚本: + +> 你可以在这里找到样例的运行脚本: +> +> 。 +> +> 如果通过root用户执行脚本,`mpirun`需要加上`--allow-run-as-root`参数。 + +```bash +#!/bin/bash + +echo "==============================================================================================================" +echo "Please run the script as: " +echo "bash run_gpu.sh DATA_PATH" +echo "For example: bash run_gpu.sh /path/dataset" +echo "It is better to use the absolute path." +echo "==============================================================================================================" +DATA_PATH=$1 +export DATA_PATH=${DATA_PATH} + +rm -rf device +mkdir device +cp ./resnet50_distributed_training.py ./resnet.py ./device +cd ./device +echo "start training" +mpirun -n 8 pytest -s -v ./resnet50_distributed_training.py > train.log 2>&1 & +``` + +脚本会在后台运行,日志文件会保存到device目录下,共跑了10个epoch,每个epoch有234个step,关于Loss部分结果保存在train.log中。将loss值grep出来后,示例如下: + +```text +epoch: 1 step: 1, loss is 2.3025854 +epoch: 1 step: 1, loss is 2.3025854 +epoch: 1 step: 1, loss is 2.3025854 +epoch: 1 step: 1, loss is 2.3025854 +epoch: 1 step: 1, loss is 2.3025854 +epoch: 1 step: 1, loss is 2.3025854 +epoch: 1 step: 1, loss is 2.3025854 +epoch: 1 step: 1, loss is 2.3025854 +``` + +## 运行多机脚本 + +若训练涉及多机,则需要额外在`mpirun`命令中设置多机配置。你可以直接在`mpirun`命令中用`-H`选项进行设置,比如`mpirun -n 16 -H DEVICE1_IP:8,DEVICE2_IP:8 python hello.py`,表示在ip为DEVICE1_IP和DEVICE2_IP的机器上分别起8个进程运行程序;或者也可以构造一个如下这样的hostfile文件,并将其路径传给`mpirun`的`--hostfile`的选项。hostfile文件每一行格式为`[hostname] slots=[slotnum]`,hostname可以是ip或者主机名。 + +```bash +DEVICE1 slots=8 +DEVICE2 slots=8 +``` + +两机十六卡的执行脚本如下,需要传入变量`DATA_PATH`和`HOSTFILE`,表示数据集的路径和hostfile文件的路径。更多mpirun的选项设置可见OpenMPI的官网。 + +```bash +#!/bin/bash + +DATA_PATH=$1 +HOSTFILE=$2 + +rm -rf device +mkdir device +cp ./resnet50_distributed_training.py ./resnet.py ./device +cd ./device +echo "start training" +mpirun -n 16 --hostfile $HOSTFILE -x DATA_PATH=$DATA_PATH -x PATH -mca pml ob1 pytest -s -v ./resnet50_distributed_training.py > train.log 2>&1 & +``` + +在GPU上进行分布式训练时,模型参数的保存和加载可参考[分布式训练模型参数保存和加载](https://www.mindspore.cn/tutorial/zh-CN/master/middleclass/distributed_training/distributed_training_model_parameters_saving_and_loading.html) diff --git a/tutorials/source_zh_cn/middleclass/distributed_training/distributed_training_model_parameters_saving_and_loading.md b/tutorials/source_zh_cn/middleclass/distributed_training/distributed_training_model_parameters_saving_and_loading.md new file mode 100644 index 0000000000000000000000000000000000000000..dc9e05285e3d343eb37a754e2ed25587ff88b81e --- /dev/null +++ b/tutorials/source_zh_cn/middleclass/distributed_training/distributed_training_model_parameters_saving_and_loading.md @@ -0,0 +1,64 @@ +# 分布式训练模型参数保存和加载 + + + +- [分布式训练模型参数保存和加载](#分布式训练模型参数保存和加载) + - [自动并行模式](#自动并行模式) + - [数据并行模式](#数据并行模式) + + + + + +本章将会讲解在Ascend与GPU环境中进行分布式训练时,如何进行参数的保存与加载。涵盖的分布式训练模式包括自动并行(Auto Parallel)与数据并行(Data Parallel)。分布式训练进行模型参数的保存之前,需要先按照[Ascend分布式训练](https://www.mindspore.cn/tutorial/zh-CN/master/middleclass/distributed_training/distributed_training_ascend.html)、[GPU分布式训练](https://www.mindspore.cn/tutorial/zh-CN/master/middleclass/distributed_training/distributed_training_gpu.html)教程配置分布式环境变量和集合通信库。 + +## 自动并行模式 + +自动并行模式(Auto Parallel)下模型参数的保存和加载与非分布式训练的模型参数保存和加载用法相同,以[Ascend分布式训练](https://www.mindspore.cn/tutorial/zh-CN/master/middleclass/distributed_training/distributed_training_ascend.html)为例,只需在Ascend训练网络步骤中的`test_train_cifar`方法中添加配置`CheckpointConfig`和`ModelCheckpoint`,即可实现模型参数的保存,具体代码如下: + +```python +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig + +def test_train_cifar(epoch_size=10): + context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, gradients_mean=True) + loss_cb = LossMonitor() + dataset = create_dataset(data_path) + batch_size = 32 + num_classes = 10 + net = resnet50(batch_size, num_classes) + loss = SoftmaxCrossEntropyExpand(sparse=True) + opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, 0.9) + ckpt_config = CheckpointConfig() + ckpt_callback = ModelCheckpoint(prefix='auto_parallel', config=ckpt_config) + model = Model(net, loss_fn=loss, optimizer=opt) + model.train(epoch_size, dataset, callbacks=[loss_cb, ckpt_callback], dataset_sink_mode=True) +``` + +保存好checkpoint文件后,用户可以很容易加载模型参数进行推理或再训练场景,如用于再训练场景可使用如下代码加载模型: + +```python +from mindspore import load_checkpoint, load_param_into_net + +net = resnet50(batch_size=32, num_classes=10) +# The parameter for load_checkpoint is a .ckpt file which has been successfully saved +param_dict = load_checkpoint('...') +load_param_into_net(net, param_dict) +``` + +checkpoint配置策略和保存方法可以参考[保存及加载模型](https://www.mindspore.cn/tutorial/zh-CN/master/save_load_model.html)。 + +## 数据并行模式 + +数据并行模式(Data Parallel)下checkpoint的使用方法和自动并行模式(Auto Parallel)一样,只需要将`test_train_cifar`中 + +```python +context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, gradients_mean=True) +``` + +修改为: + +```python +context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True) +``` + +> 数据并行场景下加载模型参数时建议每卡加载相同的checkpoint文件,避免造成计算误差,或者可以打开`parameter_broadcast`开关将0号卡的参数广播到其他卡上。 diff --git a/tutorials/source_zh_cn/middleclass/inference/ascend310_inference.md b/tutorials/source_zh_cn/middleclass/inference/ascend310_inference.md new file mode 100644 index 0000000000000000000000000000000000000000..1943a5f1295b4bbc9a7518ef0733952cf446af27 --- /dev/null +++ b/tutorials/source_zh_cn/middleclass/inference/ascend310_inference.md @@ -0,0 +1,146 @@ +# Ascend310处理器上推理MindIR模型 + + + +- [Ascend310处理器上推理MindIR模型](#ascend310处理器上推理mindir模型) + - [推理代码介绍](#推理代码介绍) + - [构建脚本介绍](#构建脚本介绍) + - [编译并执行推理代码](#编译并执行推理代码) + + + + + +本文介绍如何在Ascend310处理器中推理MindIR模型。Ascned环境配置可参考[Ascend安装指南](https://www.mindspore.cn/install/),完整推理代码可参考[ascend310_resnet50_preprocess_sample](https://gitee.com/mindspore/docs/tree/master/tutorials/tutorial_code/ascend310_resnet50_preprocess_sample)。 + +## 推理代码介绍 + +推理部分使用了CPU算子来进行数据的预处理,然后完成推理。整体代码存放在`main.cc`文件中,现在对其中的功能实现进行说明。 + +引用`mindspore`和`mindspore::dataset`的名字空间。 + +```c++ +namespace ms = mindspore; +namespace ds = mindspore::dataset; +``` + +环境初始化,指定硬件为Ascend 310,DeviceID为0: + +```c++ +auto context = std::make_shared(); +auto ascend310_info = std::make_shared(); +ascend310_info->SetDeviceID(0); +context->MutableDeviceInfo().push_back(ascend310_info); +``` + +加载模型文件: + +```c++ +// 加载MindIR模型 +ms::Graph graph; +ms::Status ret = ms::Serialization::Load(resnet_file, ms::ModelType::kMindIR, &graph); +// 图编译 +ms::Model resnet50; +ret = resnet50.Build(ms::GraphCell(graph), context); +``` + +获取模型所需输入信息: + +```c++ +std::vector model_inputs = resnet50.GetInputs(); +``` + +加载图片文件: + +```c++ +ms::MSTensor ReadFile(const std::string &file); +auto image = ReadFile(image_file); +``` + +图片预处理(使用CPU算子): + +```c++ +// 对图片进行解码,变为RGB格式,并重设尺寸 +std::shared_ptr decode(new ds::vision::Decode()); +std::shared_ptr resize(new ds::vision::Resize({256})); +// 输入归一化 +std::shared_ptr normalize(new ds::vision::Normalize( + {0.485 * 255, 0.456 * 255, 0.406 * 255}, {0.229 * 255, 0.224 * 255, 0.225 * 255})); +// 剪裁图片 +std::shared_ptr center_crop(new ds::vision::CenterCrop({224, 224})); +// shape (H, W, C) 变为 shape (C, H, W) +std::shared_ptr hwc2chw(new ds::vision::HWC2CHW()); + +// 定义preprocessor +ds::Execute preprocessor({decode, resize, normalize, center_crop, hwc2chw}); + +// 调用函数,获取处理后的图像 +ret = preprocessor(image, &image); +``` + +执行推理: + +```c++ +// 创建输入输出向量 +std::vector outputs; +std::vector inputs; +inputs.emplace_back(model_inputs[0].Name(), model_inputs[0].DataType(), model_inputs[0].Shape(), + image.Data().get(), image.DataSize()); +// 执行推理 +ret = resnet50.Predict(inputs, &outputs); +``` + +获取推理结果: + +```c++ +// 获取推理结果的最大概率 +std::cout << "Image: " << image_file << " infer result: " << GetMax(outputs[0]) << std::endl; +``` + +## 构建脚本介绍 + +构建脚本用于构建用户程序,完整代码位于`CMakeLists.txt` ,下面进行解释说明。 + +为编译器添加头文件搜索路径: + +```cmake +option(MINDSPORE_PATH "mindspore install path" "") +include_directories(${MINDSPORE_PATH}) +include_directories(${MINDSPORE_PATH}/include) +``` + +在MindSpore中查找所需动态库: + +```cmake +find_library(MS_LIB libmindspore.so ${MINDSPORE_PATH}/lib) +file(GLOB_RECURSE MD_LIB ${MINDSPORE_PATH}/_c_dataengine*) +``` + +使用指定的源文件生成目标可执行文件,并为目标文件链接MindSpore库: + +```cmake +add_executable(resnet50_sample main.cc) +target_link_libraries(resnet50_sample ${MS_LIB} ${MD_LIB}) +``` + +## 编译并执行推理代码 + +若运行完整推理代码[ascend310_resnet50_preprocess_sample](https://gitee.com/mindspore/docs/tree/master/tutorials/tutorial_code/ascend310_resnet50_preprocess_sample),可将实验的脚本下载至Ascend310环境中编译并执行。 + +创建并进入工程目录`ascend310_resnet50_preprocess_sample`,执行`cmake`命令,其中`pip3`需要按照实际情况修改: + +```bash +cmake . -DMINDSPORE_PATH=`pip3 show mindspore-ascend | grep Location | awk '{print $2"/mindspore"}' | xargs realpath` +``` + +再执行`make`命令编译即可。 + +```bash +make +``` + +编译成功后,会获得`resnet50_sample`可执行文件。在工程目录`ascend310_resnet50_preprocess_sample`下创建`model`目录放置MindIR文件[resnet50_imagenet.mindir](https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/sample_resources/ascend310_resnet50_preprocess_sample/resnet50_imagenet.mindir)。此外,创建`test_data`目录用于存放待分类的图片,输入执行命令即可获取推理结果: + +```bash +./resnet50_sample +``` diff --git a/tutorials/source_zh_cn/middleclass/inference/ascend910_inference.ipynb b/tutorials/source_zh_cn/middleclass/inference/ascend910_inference.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..86b7a2d97975319a8e1da47907270bc97b232e9a --- /dev/null +++ b/tutorials/source_zh_cn/middleclass/inference/ascend910_inference.ipynb @@ -0,0 +1,497 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": true, + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Ascend910与GPU推理\n", + "\n", + "\n", + "\n", + "\n", + "本文将介绍如何在Ascend910和GPU硬件环境中,利用MindIR和Checkpoint执行推理。MindIR是MindSpore的统一模型文件,同时存储了网络结构和权重参数值,定义了可扩展的图结构以及算子的IR表示,消除了不同后端的模型差异,一般用于跨硬件平台执行推理任务。Checkpoint是训练参数,采用了Protocol Buffers格式,一般用于训练任务中断后恢复训练,或训练后的微调(Fine Tune)任务。\n", + "\n", + "下面将针对这两种情况,介绍如何使用MindSpore进行单卡推理。\n", + "\n", + "## 使用checkpoint格式文件单卡推理\n", + "\n", + "### 使用本地模型推理\n", + "\n", + "用户可以通过`load_checkpoint`和`load_param_into_net`接口从本地加载模型与参数,传入验证数据集后使用`model.eval`即可进行模型验证,使用`model.predict`可进行模型推理。在这里我们下载MindSpore Hub中已经预训练好的LeNet和MINIST数据集进行推理演示:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "!mkdir -p ./datasets/MNIST_Data/test\n", + "!wget -NP ./datasets/MNIST_Data/test https://mindspore-website.obs.myhuaweicloud.com/notebook/datasets/mnist/t10k-labels-idx1-ubyte\n", + "!wget -NP ./datasets/MNIST_Data/test https://mindspore-website.obs.myhuaweicloud.com/notebook/datasets/mnist/t10k-images-idx3-ubyte\n", + "!tree ./datasets/MNIST_Data\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "!mkdir -p ./checkpoint\n", + "!wget -NP ./checkpoint https://download.mindspore.cn/model_zoo/r1.1/lenet_ascend_v111_offical_cv_mnist_bs32_acc98/lenet_ascend_v111_offical_cv_mnist_bs32_acc98.ckpt\n", + "!tree ./checkpoint" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "配置运行所需信息,进行推理的数据处理:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import os\n", + "import argparse\n", + "from mindspore import context\n", + "\n", + "import mindspore.dataset as ds\n", + "import mindspore.dataset.transforms.c_transforms as C\n", + "import mindspore.dataset.vision.c_transforms as CV\n", + "from mindspore.dataset.vision import Inter\n", + "from mindspore import dtype as mstype\n", + "\n", + "parser = argparse.ArgumentParser(description='MindSpore Inference Example')\n", + "parser.add_argument('--device_target', type=str, default=\"GPU\", choices=['Ascend', 'GPU'])\n", + "\n", + "args = parser.parse_known_args()[0]\n", + "context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)\n", + "\n", + "def create_dataset(data_path, batch_size=32, repeat_size=1,\n", + " num_parallel_workers=1):\n", + " # 定义数据集\n", + " mnist_ds = ds.MnistDataset(data_path)\n", + " resize_height, resize_width = 32, 32\n", + " rescale = 1.0 / 255.0\n", + " shift = 0.0\n", + " rescale_nml = 1 / 0.3081\n", + " shift_nml = -1 * 0.1307 / 0.3081\n", + "\n", + " # 定义所需要操作的map映射\n", + " resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR)\n", + " rescale_nml_op = CV.Rescale(rescale_nml, shift_nml)\n", + " rescale_op = CV.Rescale(rescale, shift)\n", + " hwc2chw_op = CV.HWC2CHW()\n", + " type_cast_op = C.TypeCast(mstype.int32)\n", + "\n", + " # 使用map映射函数,将数据操作应用到数据集\n", + " mnist_ds = mnist_ds.map(operations=type_cast_op, input_columns=\"label\", num_parallel_workers=num_parallel_workers)\n", + " mnist_ds = mnist_ds.map(operations=resize_op, input_columns=\"image\", num_parallel_workers=num_parallel_workers)\n", + " mnist_ds = mnist_ds.map(operations=rescale_op, input_columns=\"image\", num_parallel_workers=num_parallel_workers)\n", + " mnist_ds = mnist_ds.map(operations=rescale_nml_op, input_columns=\"image\", num_parallel_workers=num_parallel_workers)\n", + " mnist_ds = mnist_ds.map(operations=hwc2chw_op, input_columns=\"image\", num_parallel_workers=num_parallel_workers)\n", + "\n", + " # 进行shuffle、batch操作\n", + " buffer_size = 10000\n", + " mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size)\n", + " mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)\n", + "\n", + " return mnist_ds\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "创建LeNet模型:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import mindspore.nn as nn\n", + "from mindspore.common.initializer import Normal\n", + "\n", + "class LeNet5(nn.Cell):\n", + " \"\"\"\n", + " Lenet网络结构\n", + " \"\"\"\n", + " def __init__(self, num_class=10, num_channel=1):\n", + " super(LeNet5, self).__init__()\n", + " # 定义所需要的运算\n", + " self.conv1 = nn.Conv2d(num_channel, 6, 5, pad_mode='valid')\n", + " self.conv2 = nn.Conv2d(6, 16, 5, pad_mode='valid')\n", + " self.fc1 = nn.Dense(16 * 5 * 5, 120, weight_init=Normal(0.02))\n", + " self.fc2 = nn.Dense(120, 84, weight_init=Normal(0.02))\n", + " self.fc3 = nn.Dense(84, num_class, weight_init=Normal(0.02))\n", + " self.relu = nn.ReLU()\n", + " self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)\n", + " self.flatten = nn.Flatten()\n", + "\n", + " def construct(self, x):\n", + " # 使用定义好的运算构建前向网络\n", + " x = self.conv1(x)\n", + " x = self.relu(x)\n", + " x = self.max_pool2d(x)\n", + " x = self.conv2(x)\n", + " x = self.relu(x)\n", + " x = self.max_pool2d(x)\n", + " x = self.flatten(x)\n", + " x = self.fc1(x)\n", + " x = self.relu(x)\n", + " x = self.fc2(x)\n", + " x = self.relu(x)\n", + " x = self.fc3(x)\n", + " return x\n", + "\n", + "# 实例化网络\n", + "net = LeNet5()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "在推理进行前,需要使用`load_checkpoint`和`load_param_into_net`接口从本地加载模型与参数。这样一来就可以使用本地模型完成后面的推理过程。" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "from mindspore import load_checkpoint, load_param_into_net\n", + "ckpt_file_name = \"./checkpoint/lenet_ascend_v111_offical_cv_mnist_bs32_acc98.ckpt\"\n", + "param_dict = load_checkpoint(ckpt_file_name)\n", + "load_param_into_net(net, param_dict)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "设置损失函数与优化器,并调用`model`接口创建对象:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from mindspore.nn import Accuracy\n", + "from mindspore import Model, Tensor\n", + "\n", + "net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction=\"mean\")\n", + "net_opt = nn.Momentum(net.trainable_params(), learning_rate=0.01, momentum=0.9)\n", + "model = Model(net, net_loss, net_opt, metrics={\"Accuracy\": Accuracy()})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "下面调用`model.eval`接口执行验证过程:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "============== {'Accuracy': 0.9846754807692307} ==============\n" + ] + } + ], + "source": [ + "mnist_path = \"./datasets/MNIST_Data/test\"\n", + "ds_eval = create_dataset(os.path.join(mnist_path))\n", + "acc = model.eval(ds_eval, dataset_sink_mode=False)\n", + "print(\"============== {} ==============\".format(acc))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "调用`model.predict`接口执行验证过程,这里选取数据集中的一张图片进行预测:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicted: \"6\", Actual: \"6\"\n" + ] + } + ], + "source": [ + "ds_eval = ds_eval.create_dict_iterator()\n", + "data = next(ds_eval)\n", + "\n", + "# images为测试图片,labels为测试图片的实际分类\n", + "images = data[\"image\"].asnumpy()\n", + "labels = data[\"label\"].asnumpy()\n", + "\n", + "# 使用函数model.predict预测image对应分类\n", + "output = model.predict(Tensor(data['image']))\n", + "predicted = np.argmax(output.asnumpy(), axis=1)\n", + "\n", + "# 输出预测分类与实际分类\n", + "print(f'Predicted: \"{predicted[0]}\", Actual: \"{labels[0]}\"')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 加载MindSpore Hub模型执行推理\n", + "\n", + "除了使用`load_checkpoint`和`load_param_into_net`从本地加载模型之外,也可以通过[安装MindSpore Hub](https://gitee.com/mindspore/hub/blob/r1.2/README_CN.md),通过`mindspore_hub.load`从云端加载模型参数执行推理。\n", + "\n", + "之前使用的加载本地模型的方法为:\n", + "\n", + "```python\n", + "from mindspore import load_checkpoint, load_param_into_net\n", + "ckpt_file_name = \"./checkpoint/lenet_ascend_v111_offical_cv_mnist_bs32_acc98.ckpt\"\n", + "param_dict = load_checkpoint(ckpt_file_name)\n", + "load_param_into_net(net, param_dict)\n", + "```\n", + "\n", + "可替换为`mindspore_hub.load`方法:\n", + "\n", + "```python\n", + "import mindspore_hub\n", + "model_uid = \"mindspore/ascend/1.2/lenet_v1.2\"\n", + "network = mindspore_hub.load(model_uid)\n", + "```\n", + "\n", + "## 使用C++接口推理MindIR格式文件\n", + "\n", + "本小节将介绍如何使用C++接口推理MINDIR格式的模型。完整代码可参考[ascend910_resnet50_preprocess_sample](https://gitee.com/mindspore/docs/tree/master/tutorials/tutorial_code/ascend910_resnet50_preprocess_sample)。\n", + "\n", + "### 推理代码介绍\n", + "\n", + "完成的推理代码为`main.cc`文件,现在对其中的功能实现进行说明。\n", + "\n", + "引用`mindspore`和`mindspore::dataset`的名字空间。\n", + "\n", + "```cpp\n", + "namespace ms = mindspore;\n", + "namespace ds = mindspore::dataset;\n", + "```\n", + "\n", + "环境初始化,指定硬件为Ascend 910,DeviceID为0:\n", + "\n", + "```c++\n", + "auto context = std::make_shared();\n", + "auto ascend910_info = std::make_shared();\n", + "ascend910_info->SetDeviceID(0);\n", + "context->MutableDeviceInfo().push_back(ascend910_info);\n", + "```\n", + "\n", + "加载模型文件:\n", + "\n", + "```cpp\n", + "// 加载 MindIR 模型\n", + "ms::Graph graph;\n", + "ms::Status ret = ms::Serialization::Load(resnet_file, ms::ModelType::kMindIR, &graph);\n", + "// 进行图编译\n", + "ms::Model resnet50;\n", + "ret = resnet50.Build(ms::GraphCell(graph), context);\n", + "```\n", + "\n", + "获取模型所需输入信息:\n", + "\n", + "```cpp\n", + "std::vector model_inputs = resnet50.GetInputs();\n", + "```\n", + "\n", + "加载图片文件:\n", + "\n", + "```cpp\n", + "ms::MSTensor ReadFile(const std::string &file);\n", + "auto image = ReadFile(image_file);\n", + "```\n", + "\n", + "图片预处理:\n", + "\n", + "```cpp\n", + "// 对图片进行解码,变为RGB格式,并重设尺寸\n", + "std::shared_ptr decode(new ds::vision::Decode());\n", + "std::shared_ptr resize(new ds::vision::Resize({256}));\n", + "// 输入归一化\n", + "std::shared_ptr normalize(new ds::vision::Normalize(\n", + " {0.485 * 255, 0.456 * 255, 0.406 * 255}, {0.229 * 255, 0.224 * 255, 0.225 * 255}));\n", + "// 剪裁图片\n", + "std::shared_ptr center_crop(new ds::vision::CenterCrop({224, 224}));\n", + "// shape (H, W, C) 变为 shape (C, H, W)\n", + "std::shared_ptr hwc2chw(new ds::vision::HWC2CHW());\n", + "\n", + "// 定义preprocessor\n", + "ds::Execute preprocessor({decode, resize, normalize, center_crop, hwc2chw});\n", + "\n", + "// 调用函数,获取处理后的图像\n", + "ret = preprocessor(image, &image);\n", + "```\n", + "\n", + "执行推理:\n", + "\n", + "```cpp\n", + "// 创建输入输出向量\n", + "std::vector outputs;\n", + "std::vector inputs;\n", + "\n", + "inputs.emplace_back(model_inputs[0].Name(), model_inputs[0].DataType(), model_inputs[0].Shape(),\n", + " image.Data().get(), image.DataSize());\n", + "// 执行推理\n", + "ret = resnet50.Predict(inputs, &outputs);\n", + "```\n", + "\n", + "获取推理结果:\n", + "\n", + "```cpp\n", + "// 获取推理结果的最大可能性\n", + "std::cout << \"Image: \" << image_file << \" infer result: \" << GetMax(outputs[0]) << std::endl;\n", + "```\n", + "\n", + "### 构建脚本介绍\n", + "\n", + "构建脚本用于构建用户程序,完整代码位于`CMakeLists.txt` ,下面进行解释说明。\n", + "\n", + "为编译器添加头文件搜索路径:\n", + "\n", + "```cmake\n", + "option(MINDSPORE_PATH \"mindspore install path\" \"\")\n", + "include_directories(${MINDSPORE_PATH})\n", + "include_directories(${MINDSPORE_PATH}/include)\n", + "```\n", + "\n", + "在MindSpore中查找所需动态库:\n", + "\n", + "```cmake\n", + "find_library(MS_LIB libmindspore.so ${MINDSPORE_PATH}/lib)\n", + "file(GLOB_RECURSE MD_LIB ${MINDSPORE_PATH}/_c_dataengine*)\n", + "```\n", + "\n", + "使用指定的源文件生成目标可执行文件,并为目标文件链接MindSpore库:\n", + "\n", + "```cmake\n", + "add_executable(resnet50_sample main.cc)\n", + "target_link_libraries(resnet50_sample ${MS_LIB} ${MD_LIB})\n", + "```\n", + "\n", + "\n", + "### 编译并执行推理代码\n", + "\n", + "可选择将实验的脚本下载至Ascend910环境中编译并执行。\n", + "\n", + "进入工程目录`ascend910_resnet50_preprocess_sample`,执行`cmake`命令,其中`pip3`需要按照实际情况修改::\n", + "\n", + "```bash\n", + "cmake . -DMINDSPORE_PATH=`pip3 show mindspore-ascend | grep Location | awk '{print $2\"/mindspore\"}' | xargs realpath`\n", + "```\n", + "\n", + "再执行`make`命令编译即可。\n", + "\n", + "```bash\n", + "make\n", + "```\n", + "\n", + "编译成功后,会获得`resnet50_sample`可执行文件。选取几张图片,输入执行命令即可获取推理结果:\n", + "\n", + "```bash\n", + "./resnet50_sample\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.5" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/tutorials/source_zh_cn/middleclass/inference/images/segmentation1.png b/tutorials/source_zh_cn/middleclass/inference/images/segmentation1.png new file mode 100644 index 0000000000000000000000000000000000000000..85456fdecf884d17707a5440676299c6a2af10ca Binary files /dev/null and b/tutorials/source_zh_cn/middleclass/inference/images/segmentation1.png differ diff --git a/tutorials/source_zh_cn/middleclass/inference/images/segmentation2.png b/tutorials/source_zh_cn/middleclass/inference/images/segmentation2.png new file mode 100644 index 0000000000000000000000000000000000000000..e872283bf0adf4aa13d100078630ba7e1f4f1bb1 Binary files /dev/null and b/tutorials/source_zh_cn/middleclass/inference/images/segmentation2.png differ diff --git a/tutorials/source_zh_cn/middleclass/inference/images/segmentation_apk.png b/tutorials/source_zh_cn/middleclass/inference/images/segmentation_apk.png new file mode 100644 index 0000000000000000000000000000000000000000..0d8c4daad80a4bf30854e85825195cebf022a61a Binary files /dev/null and b/tutorials/source_zh_cn/middleclass/inference/images/segmentation_apk.png differ diff --git a/tutorials/source_zh_cn/middleclass/inference/mindspore_lite_inference.md b/tutorials/source_zh_cn/middleclass/inference/mindspore_lite_inference.md new file mode 100644 index 0000000000000000000000000000000000000000..4667656b0ad91d6d172ca08b78b062c39ece7249 --- /dev/null +++ b/tutorials/source_zh_cn/middleclass/inference/mindspore_lite_inference.md @@ -0,0 +1,227 @@ +# MindSpore Lite推理流程 + + + +- [MindSpore Lite推理流程](#mindspore-lite推理流程) + - [运行依赖](#运行依赖) + - [MindSpore Lite推理流程](#mindspore-lite推理流程) + - [在Android中部署Demo](#在android中部署demo) + + + + + +本文将会以图像分割Demo为例讲解如何使用MindSpore Lite进行推理。 + +文章包含以下几方面的说明: + +- MindSpore Lite推理流程及Java接口的使用 +- 如何在Android中部署Demo + +## 运行依赖 + +完成Demo部署需安装以下依赖项: + +- Android Studio >= 3.2 (推荐4.0以上版本) +- Android SDK >= 26 (Android Studio默认安装) +- JDK >= 1.8 (Android Studio默认安装) + +## MindSpore Lite推理流程 + +推理代码流程如下,完整代码请参见 [src/java/com/mindspore/imagesegmentation/TrackingMobile](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/lite/image_segmentation/app/src/main/java/com/mindspore/imagesegmentation/help/TrackingMobile.java)。 + +1. 加载MindSpore Lite模型文件,构建上下文、会话以及用于推理的计算图。 + + - 加载模型文件:创建并配置用于模型推理的上下文。 + + ```java + // 加载segment_model.ms模型 + Model model = new Model(); + if (!model.loadModel(Context, "segment_model.ms")) { + Log.e(TAG, "Load Model failed"); + return; + } + ``` + + - 创建会话。 + + ```java + // 创建config并完成初始化 + MSConfig msConfig = new MSConfig(); + if (!msConfig.init(DeviceType.DT_CPU, threadNum, CpuBindMode.MID_CPU)) { + Log.e(TAG, "Init context failed"); + return; + } + + // 创建MindSpore lite session. + LiteSession session = new LiteSession(); + if (!session.init(msConfig)) { + Log.e(TAG, "Create session failed"); + msConfig.free(); + return; + } + msConfig.free(); + ``` + + - 加载模型文件并构建用于推理的计算图。 + + ```java + // 图编译 + if (!session.compileGraph(model)) { + Log.e(TAG, "Compile graph failed"); + model.freeBuffer(); + return; + } + + // 当使用model.freeBuffer()时, 模型无法被再次编译 + model.freeBuffer(); + ``` + +2. 将输入图片转换为传入MindSpore模型的Tensor格式。 + + 将待检测图片数据转换为输入MindSpore模型的Tensor。 + + ```java + List inputs = session.getInputs(); + if (inputs.size() != 1) { + Log.e(TAG, "inputs.size() != 1"); + return null; + } + + float resource_height = bitmap.getHeight(); + float resource_weight = bitmap.getWidth(); + + ByteBuffer contentArray = BitmapUtils.bitmapToByteBuffer(bitmap, imageSize, imageSize, IMAGE_MEAN, IMAGE_STD); + + MSTensor inTensor = inputs.get(0); + inTensor.setData(contentArray); + ``` + +3. 运行会话,执行计算图。 + + ```java + if (!session.runGraph()) { + Log.e(TAG, "Run graph failed"); + return null; + } + ``` + +4. 对输出数据进行处理。 + + - 通过Tensor获取的输出数据得到其维度,批处理数,通道数等信息。 + + ```java + // 获取输出Tensor + List tensorNames = session.getOutputTensorNames(); + Map outputs = session.getOutputMapByTensor(); + for (String tensorName : tensorNames) { + MSTensor output = outputs.get(tensorName); + if (output == null) { + Log.e(TAG, "Can not find output " + tensorName); + return null; + } + float[] results = output.getFloatData(); + float[] result = new float[output.elementsNum()]; + + int batch = output.getShape()[0]; + int channel = output.getShape()[1]; + int weight = output.getShape()[2]; + int height = output.getShape()[3]; + int plane = weight * height; + ``` + + - 将NCHW格式转为NHWC格式,放入到`float[] result`。 + + ```java + for (int n = 0; n < batch; n++) { + for (int c = 0; c < channel; c++) { + for (int hw = 0; hw < plane; hw++) { + result[n * channel * plane + hw * channel + c] = results[n * channel * plane + c * plane + hw]; + } + } + } + ``` + +5. 对输入Tensor按照模型进行推理,进行后处理。 + + - 将`float[] result`数据转换成ByteBuffer数据格式。 + + ```java + ByteBuffer buffer = ByteBuffer.allocate(4 * result.length); + FloatBuffer floatBuffer = buffer.asFloatBuffer(); + floatBuffer.put(result); + return buffer; + ``` + + - 将ByteBuffer数据格式转成Bitmap。 + + 通过推理出来的数据在Bitmap每个像素坐标进行比对。如果坐标数据等于PERSON,坐标点颜色不变。反之,则改成透明色(如下图所示)。 + + ```java + Bitmap.Config conf = Bitmap.Config.ARGB_8888; + Bitmap maskBitmap = Bitmap.createBitmap(imageWidth, imageHeight, conf); + Bitmap scaledBackgroundImage = + BitmapUtils.scaleBitmapAndKeepRatio(backgroundImage, imageWidth, imageHeight); + int[][] mSegmentBits = new int[imageWidth][imageHeight]; + inputBuffer.rewind(); + for (int y = 0; y < imageHeight; y++) { + for (int x = 0; x < imageWidth; x++) { + float maxVal = 0f; + mSegmentBits[x][y] = 0; + for (int i = 0; i < NUM_CLASSES; i++) { + float value = inputBuffer.getFloat((y * imageWidth * NUM_CLASSES + x * NUM_CLASSES + i) * 4); + if (i == 0 || value > maxVal) { + maxVal = value; + if (i == PERSON) { + mSegmentBits[x][y] = i; + } else { + mSegmentBits[x][y] = 0; + } + } + } + maskBitmap.setPixel(x, y, mSegmentBits[x][y] == 0 ? colors[0] : scaledBackgroundImage.getPixel(x, y)); + } + } + ``` + +
+ + + + +

图1 推理前

图2 推理后
+ +6. 将推理后的图片与选择的背景图片相结合。 + + ```java + MainActivity.this.imgPreview.setDrawingCacheEnabled(true); + MainActivity.this.imgPreview.setBackground(isDemo ? getDrawable(IMAGES[selectedPosition]) : customBack); + MainActivity.this.imgPreview.setImageBitmap(foreground); + MainActivity.this.imgPreview.setDrawingCacheEnabled(false); + ``` + +## 在Android中部署Demo + +如果不具备Android Studio等开发环境,我们提供了本示例对应的APK文件,可以扫描下方的二维码或直接下载[APK文件](https://download.mindspore.cn/model_zoo/official/lite/apk/segmentation/image_segmentation.apk),并部署到Android设备后使用。 + +![apk](./images/segmentation_apk.png) + +如果已经安装了前面所说的运行依赖,可以通过Android Studio编译并运行[图像分割Demo](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/lite/image_segmentation)。 + +运行结果如下图所示(以选取相册某张头像图片为例)。 + +![result1](./images/segmentation1.png) + +选取相册带有头像图片。 + +![result2](./images/segmentation2.png) + +选择九宫格中不同的背景图片,即可对人像的背景就行替换分割。 + + + + + + + +

图1 白色背景

图2 蓝色背景

图3 油画背景
diff --git a/tutorials/source_zh_cn/middleclass/inference/mindspore_serving_inference.md b/tutorials/source_zh_cn/middleclass/inference/mindspore_serving_inference.md new file mode 100644 index 0000000000000000000000000000000000000000..d0e9903303a450217b85581e030aa0bb054b0230 --- /dev/null +++ b/tutorials/source_zh_cn/middleclass/inference/mindspore_serving_inference.md @@ -0,0 +1,172 @@ +# 基于MindSpore Serving部署推理服务 + + + +- [基于MindSpore Serving部署推理服务](#基于mindspore-serving部署推理服务) + - [环境准备](#环境准备) + - [部署Serving推理服务](#部署serving推理服务) + - [配置服务](#配置服务) + - [启动服务](#启动服务) + - [执行推理](#执行推理) + + + + + +MindSpore Serving是一个轻量级、高性能的推理服务模块,旨在帮助MindSpore开发者在生产环境中高效部署在线推理服务。当用户使用MindSpore完成模型训练后,导出MindSpore模型,即可使用MindSpore Serving创建该模型的推理服务。 + +MindSpore Serving提供如下功能: + +- 加载模型文件生成推理引擎,提供推理功能; +- 预测请求和处理结果的消息交互,支持gPRC和RESTful两种请求方式; +- 预测接口调用,执行预测,返回预测结果; +- 模型的生命周期管理; +- 服务的生命周期管理; +- 多模型多版本的管理。 + +本文以一个简单的Add网络为例,演示MindSpore Serving的基础使用方法。可通过链接查看[Add网络推理源码](https://gitee.com/mindspore/serving/tree/master/example/tensor_add)。 + +## 环境准备 + +用户在使用MindSpore Serving前,可参考[安装指南](https://gitee.com/mindspore/serving/blob/master/README_CN.md#%E5%AE%89%E8%A3%85),与[MindSpore Serving环境变量配置](https://gitee.com/mindspore/serving/blob/master/README_CN.md#%E9%85%8D%E7%BD%AE%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F)部署MindSpore Serving环境。 + +## 部署Serving推理服务 + +### 配置服务 + +以Add用例为例,启动Serving服务需要如下文件: + +```shell +tensor_add +├── add/ +│   └── servable_config.py +│  └── 1/ +│   └── tensor_add.mindir +└── serving_server.py +``` + +- `tensor_add.mindir`为模型文件,放置在文件夹1下,1为版本号。不同的版本放置在不同的文件夹下,版本号需以纯数字串命名,默认配置下启动最大数值的版本号的模型文件。 +- `servable_config.py`为模型配置文件,定义了模型的处理函数,包括`add_common`和`add_cast`两个方法,`add_common`定义了输入为两个普通float32类型的加法操作,`add_cast`定义输入类型为其他类型,经过输入类型转换float32后的加法操作。 + +模型配置文件内容如下: + +```python +import numpy as np +from mindspore_serving.worker import register + + +def add_trans_datatype(x1, x2): + """预处理定义,本例中有两个输入和输出""" + return x1.astype(np.float32), x2.astype(np.float32) + +# 进行模型声明,其中declare_servable入参servable_file指示模型的文件名称,model_format指示模型的模型类别 +# 当with_batch_dim设定为False时, 仅支持2x2的Tensor +# 当with_batch_dim设定为True时, 可支持Nx2的Tensor, N的值由batch决定 +register.declare_servable(servable_file="tensor_add.mindir", model_format="MindIR", with_batch_dim=False) + + +# add_common方法定义 +# Servable方法的入参由Python方法的入参指定,Servable方法的出参由register_method的output_names指定 +@register.register_method(output_names=["y"]) +def add_common(x1, x2): # 仅支持float32类型的输入 + """add_common数据流定义,只调用模型推理""" + y = register.call_servable(x1, x2) + return y + + +# add_cast方法定义 +@register.register_method(output_names=["y"]) +def add_cast(x1, x2): + """add_cast数据流定义,调用预处理和模型推理""" + x1, x2 = register.call_preprocess(add_trans_datatype, x1, x2) # 将输入转换为 float32 + y = register.call_servable(x1, x2) + return y +``` + +### 启动服务 + +执行[serving_server.py](https://gitee.com/mindspore/serving/blob/master/example/tensor_add/serving_server.py),完成服务启动: + +```python +import os +import sys +from mindspore_serving import server + + +def start(): + servable_dir = os.path.dirname(os.path.realpath(sys.argv[0])) + servable_config = server.ServableStartConfig(servable_directory=servable_dir, servable_name="add", + device_ids=(0, 1)) + server.start_servables(servable_configs=servable_config) + + server.start_grpc_server(address="127.0.0.1:5500") + server.start_restful_server(address="127.0.0.1:1500") + + +if __name__ == "__main__": + start() + +``` + +上述启动脚本将在设备0和1上共加载和运行两个`add`推理副本,来自客户端的推理请求将被切割分流到两个推理副本。 + +当服务端打印日志`Serving RESTful server start success, listening on 127.0.0.1:1500`时,表示Serving RESTful服务启动成功,推理模型已成功加载。 + +## 执行推理 + +客户端提供两种方式访问推理服务,一种是通过[gRPC方式](https://www.mindspore.cn/tutorial/inference/zh-CN/master/serving_grpc.html),一种是通过[RESTful方式](https://www.mindspore.cn/tutorial/inference/zh-CN/master/serving_restful.html)。本文以gRPC方式为例,通过`client.py`执行推理。 + +```python +import numpy as np +from mindspore_serving.client import Client + + +def run_add_common(): + """调用add add_common""" + client = Client("localhost", 5500, "add", "add_common") + instances = [] + + # 例1 + x1 = np.asarray([[1, 1], [1, 1]]).astype(np.float32) + x2 = np.asarray([[1, 1], [1, 1]]).astype(np.float32) + instances.append({"x1": x1, "x2": x2}) + + # 例2 + x1 = np.asarray([[2, 2], [2, 2]]).astype(np.float32) + x2 = np.asarray([[2, 2], [2, 2]]).astype(np.float32) + instances.append({"x1": x1, "x2": x2}) + + # 例3 + x1 = np.asarray([[3, 3], [3, 3]]).astype(np.float32) + x2 = np.asarray([[3, 3], [3, 3]]).astype(np.float32) + instances.append({"x1": x1, "x2": x2}) + + result = client.infer(instances) + print(result) + + +def run_add_cast(): + client = Client("localhost", 5500, "add", "add_cast") + instances = [] + x1 = np.ones((2, 2), np.int32) + x2 = np.ones((2, 2), np.int32) + instances.append({"x1": x1, "x2": x2}) + result = client.infer(instances) + print(result) + + +if __name__ == '__main__': + run_add_common() + run_add_cast() +``` + +使用`mindspore_serving.client`定义的`Client`类,客户端定义两个用例,分别调用模型的两个方法,`run_add_common`用例为三对float32类型数组相加操作,`run_add_cast`用例计算两个int32数组相加操作。执行后显示如下返回值,三对float32类型相加结果合集和一对int32类型的相加结果,说明Serving服务已正确执行Add网络的推理。 + +```shell +[{'y': array([[2. , 2.], + [2., 2.]], dtype=float32)},{'y': array([[4. , 4.], + [4., 4.]], dtype=float32)},{'y': array([[6. , 6.], + [6., 6.]], dtype=float32)}] +[{'y': array([[2. , 2.], + [2., 2.]], dtype=float32)}] +``` diff --git a/tutorials/source_zh_cn/middleclass/pynative_mode_and_graph_mode/pynative_mode_and_graph_mode.ipynb b/tutorials/source_zh_cn/middleclass/pynative_mode_and_graph_mode/pynative_mode_and_graph_mode.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..7eefefd8ad5ecb0e199d4caf5af239531f6223f4 --- /dev/null +++ b/tutorials/source_zh_cn/middleclass/pynative_mode_and_graph_mode/pynative_mode_and_graph_mode.ipynb @@ -0,0 +1,465 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 动态图与静态图\n", + "\n", + "[![](https://gitee.com/mindspore/docs/raw/master/resource/_static/logo_source.png)](https://gitee.com/mindspore/docs/blob/master/tutorials/source_zh_cn/middleclass/pynative_mode_and_graph_mode/pynative_mode_and_graph_mode.ipynb)\n", + "\n", + "## 概述\n", + "\n", + "MindSpore支持两种运行模式:\n", + "\n", + "- Graph模式:静态图模式或者图模式,将神经网络模型编译成一整张图,然后下发执行。该模式利用图优化等技术提高运行性能,同时有助于规模部署和跨平台运行。\n", + "- PyNative模式:动态图模式,将神经网络中的各个算子逐一下发执行,方便用户编写和调试神经网络模型。\n", + "\n", + "默认情况下,MindSpore处于Graph模式,可以通过`context.set_context(mode=context.PYNATIVE_MODE)`切换为PyNative模式;同样地,MindSpore处于PyNative模式时,可以通过 `context.set_context(mode=context.GRAPH_MODE)`切换为Graph模式。\n", + "\n", + "Graph和PyNative两种模式的区别主要有:\n", + "\n", + "- 使用场景:Graph模式需要一开始就构建好网络结构,然后框架做整图优化和执行,比较适合网络固定没有变化,且需要高性能的场景。而PyNative模式逐行执行算子,支持单独求梯度。\n", + "\n", + "- 网络执行:Graph模式和PyNative模式在执行相同的网络和算子时,精度效果是一致的。由于Graph模式运用了图优化、计算图整图下沉等技术,Graph模式执行网络的性能和效率更高。\n", + "\n", + "- 代码调试:在脚本开发和网络流程调试中,推荐使用PyNative模式进行调试。在PyNative模式下,可以方便地设置断点,获取网络执行的中间结果,也可以通过pdb的方式对网络进行调试。而Graph模式无法设置断点,只能先指定算子进行打印,然后在网络执行完成后查看输出结果。\n", + "\n", + "下面以Graph模式为例,演示MindSpore单算子、普通函数、模型的执行方式,并进一步说明如何在PyNative模式下进行性能改进及梯度求取。\n", + "\n", + "## 执行方式\n", + "\n", + "这里演示在Graph模式和PyNative模式下,单算子、普通函数、模型的执行方式。\n", + "\n", + "> 在本案例的实际执行中,采取了MindSpore的默认方式`GRAPH_MODE`,用户也可以将其变更为`PYNATIVE_MODE`进行尝试。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "import numpy as np\n", + "import mindspore.nn as nn\n", + "from mindspore import context, Tensor, ParameterTuple, ms_function\n", + "import mindspore.ops as ops\n", + "from mindspore import dtype as mstype\n", + "from mindspore.common.initializer import Normal\n", + "from mindspore.nn import Dense, WithLossCell, SoftmaxCrossEntropyWithLogits, Momentum\n", + "\n", + "# 设定为Graph模式,也可替换为PYNATIVE_MODE\n", + "context.set_context(mode=context.GRAPH_MODE, device_target=\"Ascend\")" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### 执行单算子" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[[[-0.02593261 0.01615404 0.01615404 0.01615404 0.01196378]\n", + " [-0.01535788 0.05602208 0.05602208 0.05602208 0.04094065]\n", + " [-0.01535788 0.05602208 0.05602208 0.05602208 0.04094065]\n", + " [-0.01535788 0.05602208 0.05602208 0.05602208 0.04094065]\n", + " [-0.01409336 0.04544117 0.04544117 0.04544117 0.0373004 ]]\n", + "\n", + " [[ 0.03874376 0.02201786 0.02201786 0.02201786 0.02687691]\n", + " [ 0.05751193 0.02690699 0.02690699 0.02690699 0.03515062]\n", + " [ 0.05751193 0.02690699 0.02690699 0.02690699 0.03515062]\n", + " [ 0.05751193 0.02690699 0.02690699 0.02690699 0.03515062]\n", + " [ 0.02599058 0.01130002 0.01130002 0.01130002 0.02304572]]\n", + "\n", + " [[-0.00022919 0.02640852 0.02640852 0.02640852 0.04932421]\n", + " [ 0.01657246 0.0705748 0.0705748 0.0705748 0.0874946 ]\n", + " [ 0.01657246 0.0705748 0.0705748 0.0705748 0.0874946 ]\n", + " [ 0.01657246 0.0705748 0.0705748 0.0705748 0.0874946 ]\n", + " [ 0.03821789 0.09614976 0.09614976 0.09614976 0.10491695]]\n", + "\n", + " [[ 0.0190958 0.02602289 0.02602289 0.02602289 0.01660084]\n", + " [ 0.03556763 0.06862713 0.06862713 0.06862713 0.02653556]\n", + " [ 0.03556763 0.06862713 0.06862713 0.06862713 0.02653556]\n", + " [ 0.03556763 0.06862713 0.06862713 0.06862713 0.02653556]\n", + " [ 0.00727296 0.04514674 0.04514674 0.04514674 0.01423099]]]]\n" + ] + } + ], + "source": [ + "# 打印Cond2d算子的输出\n", + "conv = nn.Conv2d(3, 4, 3, bias_init='zeros')\n", + "input_data = Tensor(np.ones([1, 3, 5, 5]).astype(np.float32))\n", + "output = conv(input_data)\n", + "print(output.asnumpy())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 执行普通函数\n", + "\n", + "将若干算子组合成一个函数,然后直接通过函数调用的方式执行这些算子,并打印相关结果,如下例所示。\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 4. 10. 18.]\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import mindspore.nn as nn\n", + "import mindspore.ops as ops\n", + "from mindspore import context, Tensor\n", + "\n", + "class Net(nn.Cell):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.mul = ops.Mul()\n", + "\n", + " def construct(self, x, y):\n", + " return self.mul(x, y)\n", + "\n", + "x = Tensor(np.array([1.0, 2.0, 3.0]).astype(np.float32))\n", + "y = Tensor(np.array([4.0, 5.0, 6.0]).astype(np.float32))\n", + "\n", + "net = Net()\n", + "print(net(x, y))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 执行网络" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 4. 10. 18.]\n" + ] + } + ], + "source": [ + "class Net(nn.Cell):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.mul = ops.Mul()\n", + "\n", + " def construct(self, x, y):\n", + " return self.mul(x, y)\n", + "\n", + "x = Tensor(np.array([1.0, 2.0, 3.0]).astype(np.float32))\n", + "y = Tensor(np.array([4.0, 5.0, 6.0]).astype(np.float32))\n", + "\n", + "net = Net()\n", + "print(net(x, y))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## PyNative模式说明\n", + "\n", + "### 性能优化\n", + "\n", + "正如文章开头所说,Graph模式适合高性能的场景,但PyNative模式中也提供了性能优化的手段。MindSpore提供了Staging功能,该功能可以在PyNative模式下将Python函数或者Python类的方法编译成计算图,通过图优化等技术提高运行速度,是一种混合运行机制。Staging功能的使用通过`ms_function`装饰器达成,该装饰器会将将模块编译成计算图,在给定输入之后,以图的形式下发执行。如下例所示:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[2. 2. 2. 2.]\n", + " [2. 2. 2. 2.]\n", + " [2. 2. 2. 2.]\n", + " [2. 2. 2. 2.]]\n" + ] + } + ], + "source": [ + "# 导入ms_function\n", + "from mindspore import ms_function\n", + "\n", + "# 仍设定为PyNative模式\n", + "context.set_context(mode=context.PYNATIVE_MODE, device_target=\"Ascend\")\n", + "\n", + "add = ops.Add()\n", + "\n", + "# 使用装饰器编译计算图\n", + "@ms_function\n", + "def add_fn(x, y):\n", + " res = add(x, y)\n", + " return res\n", + "\n", + "x = Tensor(np.ones([4, 4]).astype(np.float32))\n", + "y = Tensor(np.ones([4, 4]).astype(np.float32))\n", + "z = add_fn(x, y)\n", + "print(z.asnumpy())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "在加装了`ms_function`装饰器的函数中,如果包含不需要进行参数训练的算子(如`pooling`、`add`等算子),则这些算子可以在被装饰的函数中直接调用,如上例所示。如果被装饰的函数中包含了需要进行参数训练的算子(如`Convolution`、`BatchNorm`等算子),则这些算子必须在被装饰等函数之外完成实例化操作。" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[[[ 0.00994964 0.01850731 -0.05146599]\n", + " [ 0.02427048 -0.09082688 -0.00945184]\n", + " [ 0.02710651 -0.07322617 0.02594434]]\n", + "\n", + " [[ 0.00056772 -0.05043615 -0.03873939]\n", + " [-0.00445028 0.03694705 -0.03555503]\n", + " [ 0.07329068 -0.02026664 0.01922888]]\n", + "\n", + " [[ 0.02257145 -0.04093865 -0.00493869]\n", + " [ 0.01740007 0.02478302 0.02072578]\n", + " [ 0.05831327 -0.03933404 0.01767443]]\n", + "\n", + " [[-0.03954437 0.02160874 -0.00700614]\n", + " [ 0.03856367 -0.04015685 0.02508826]\n", + " [-0.0229507 -0.03803677 0.02813173]]]\n", + "\n", + "\n", + " [[[ 0.01678797 -0.02227589 -0.04470547]\n", + " [-0.05720481 -0.15464461 0.00911596]\n", + " [ 0.02566019 -0.04340314 0.03164666]]\n", + "\n", + " [[ 0.03300299 -0.05849815 0.05841954]\n", + " [-0.11595733 -0.01524522 0.02947116]\n", + " [ 0.05930116 0.00831041 -0.0466827 ]]\n", + "\n", + " [[-0.0797728 0.02910854 0.00766015]\n", + " [-0.01380327 -0.03338642 0.02625138]\n", + " [ 0.02279372 -0.00952736 0.02026749]]\n", + "\n", + " [[ 0.04039776 -0.05340278 -0.0083563 ]\n", + " [ 0.04991922 -0.05205034 -0.0058607 ]\n", + " [ 0.00686666 0.00064385 0.00301326]]]]\n" + ] + } + ], + "source": [ + "# Conv2d实例化操作\n", + "conv_obj = nn.Conv2d(in_channels=3, out_channels=4, kernel_size=3, stride=2, padding=0)\n", + "conv_obj.init_parameters_data()\n", + "\n", + "@ms_function\n", + "def conv_fn(x):\n", + " res = conv_obj(x)\n", + " return res\n", + "\n", + "input_data = np.random.randn(2, 3, 6, 6).astype(np.float32)\n", + "z = conv_fn(Tensor(input_data))\n", + "print(z.asnumpy())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 梯度求取\n", + "\n", + "PyNative模式中支持单独的梯度求取操作,下面演示如何利用这一特性调试网络模型。具体操作中可通过`GradOperation`求该函数或者网络所有的输入梯度。需要注意,输入类型仅支持Tensor。\n", + "\n", + "构建网络如下。" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "class LeNet5(nn.Cell):\n", + " \"\"\"\n", + " Lenet网络结构\n", + " \"\"\"\n", + " def __init__(self, num_class=10, num_channel=1):\n", + " super(LeNet5, self).__init__()\n", + " # 定义所需要的运算\n", + " self.conv1 = nn.Conv2d(num_channel, 6, 5, pad_mode='valid')\n", + " self.conv2 = nn.Conv2d(6, 16, 5, pad_mode='valid')\n", + " self.fc1 = nn.Dense(16 * 5 * 5, 120, weight_init=Normal(0.02))\n", + " self.fc2 = nn.Dense(120, 84, weight_init=Normal(0.02))\n", + " self.fc3 = nn.Dense(84, num_class, weight_init=Normal(0.02))\n", + " self.relu = nn.ReLU()\n", + " self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)\n", + " self.flatten = nn.Flatten()\n", + "\n", + " def construct(self, x):\n", + " # 使用定义好的运算构建前向网络\n", + " x = self.conv1(x)\n", + " x = self.relu(x)\n", + " x = self.max_pool2d(x)\n", + " x = self.conv2(x)\n", + " x = self.relu(x)\n", + " x = self.max_pool2d(x)\n", + " x = self.flatten(x)\n", + " x = self.fc1(x)\n", + " x = self.relu(x)\n", + " x = self.fc2(x)\n", + " x = self.relu(x)\n", + " x = self.fc3(x)\n", + " return x\n", + "\n", + "# 实例化网络\n", + "net = LeNet5()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "如上文所说,利用`GradOperation`求函数的输入梯度。" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "class GradWrap(nn.Cell):\n", + " \"\"\"求函数输入梯度\"\"\"\n", + " def __init__(self, network):\n", + " super(GradWrap, self).__init__(auto_prefix=False)\n", + " self.network = network\n", + " # 用Tuple的形式包装weight\n", + " self.weights = ParameterTuple(filter(lambda x: x.requires_grad, network.get_parameters()))\n", + "\n", + " def construct(self, x, label):\n", + " weights = self.weights\n", + " # 返回值为梯度\n", + " return ops.GradOperation(get_by_list=True)(self.network, weights)(x, label)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "在PyNative模式中进行网络训练。" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.3025854\n" + ] + } + ], + "source": [ + "# 设定优化器、损失函数\n", + "optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.1, 0.9)\n", + "criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')\n", + "\n", + "# 通过WithLossCell获取Loss值\n", + "net_with_criterion = WithLossCell(net, criterion)\n", + "\n", + "# 调用GradWrap\n", + "train_network = GradWrap(net_with_criterion)\n", + "train_network.set_train()\n", + "\n", + "# 产生输入数据\n", + "input_data = Tensor(np.ones([32, 1, 32, 32]).astype(np.float32) * 0.01)\n", + "label = Tensor(np.ones([32]).astype(np.int32))\n", + "output = net(Tensor(input_data))\n", + "\n", + "# 利用前向网络计算loss\n", + "loss_output = criterion(output, label)\n", + "# 求得梯度\n", + "grads = train_network(input_data, label)\n", + "# 优化参数\n", + "success = optimizer(grads)\n", + "loss = loss_output.asnumpy()\n", + "print(loss)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "MindSpore1.2_gpu", + "language": "python", + "name": "liuxiao_gpu" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/tutorials/source_zh_cn/save_load_model.md b/tutorials/source_zh_cn/save_load_model.md index a48c1cc32d3312263805c103442da4d2aee41eb5..ab8fda01e8b83068931df33dd23070bd8c52c300 100644 --- a/tutorials/source_zh_cn/save_load_model.md +++ b/tutorials/source_zh_cn/save_load_model.md @@ -22,7 +22,7 @@ model.train(epoch_num, dataset, callbacks=ckpt_cb) ```python from mindspore.train.callback import ModelCheckpoint, CheckpointConfig -config_ck = CheckpointConfig(save_checkpoint_steps=32, keep_checkpoint_max=10) +config_ckpt = CheckpointConfig(save_checkpoint_steps=32, keep_checkpoint_max=10) ckpt_cb = ModelCheckpoint(prefix='resnet50', directory=None, config=config_ckpt) model.train(epoch_num, dataset, callbacks= ckpt_cb) ``` diff --git a/tutorials/training/requirements.txt b/tutorials/training/requirements.txt index 0183c33c87e1685d958cee4a68cd14ad7aa069e9..3d37ac10fd903676ea245250fcaafb1c5ee5867b 100644 --- a/tutorials/training/requirements.txt +++ b/tutorials/training/requirements.txt @@ -1,5 +1,5 @@ sphinx >= 2.2.1, <= 2.4.4 -recommonmark +myst_parser == 0.14.0 sphinx-markdown-tables sphinx_rtd_theme nbsphinx diff --git a/tutorials/training/source_en/advanced_use/custom_debugging_info.md b/tutorials/training/source_en/advanced_use/custom_debugging_info.md index 9f6bed8d549ab89229e7f67ced78cecd2ab88dea..e7c4de801fd57a5eb3e1815c432aa64fe1de33a5 100644 --- a/tutorials/training/source_en/advanced_use/custom_debugging_info.md +++ b/tutorials/training/source_en/advanced_use/custom_debugging_info.md @@ -205,6 +205,8 @@ By invoking the `eval` method of `Accuracy`, you will obtain the calculation res You can understand how `Accuracy` runs by using the following code: ```python +from mindspore import Tensor +from mindspore.nn import Accuracy x = Tensor(np.array([[0.2, 0.5], [0.3, 0.1], [0.9, 0.6]])) y = Tensor(np.array([1, 0, 1])) metric = Accuracy() @@ -337,7 +339,8 @@ MindSpore uses glog to output logs. The following environment variables are comm The environment variable specifies the log output path. If `GLOG_logtostderr` is set to 0, value of this variable must be specified. If `GLOG_log_dir` is specified and the value of `GLOG_logtostderr` is 1, logs are output to the screen but not to a file. - Logs of C++ and Python will be output to different files. The file name of C++ log complies with the naming rule of `GLOG` log file. Here, the name is `mindspore.MachineName.UserName.log.LogLevel.Timestamp`. The file name of Python log is `mindspore.log`. + Logs of C++ and Python will be output to different files. The file name of C++ log complies with the naming rule of `GLOG` log file. Here, the name is `mindspore.MachineName.UserName.log.LogLevel.Timestamp`. The file name of Python log is `mindspore.log`. + `GLOG_log_dir` can only contains characters such as uppercase letters, lowercase letters, digits, ".", ":", "-", "_", "/" and "\\". - `MS_SUBMODULE_LOG_v` diff --git a/tutorials/training/source_en/advanced_use/custom_loss_function.md b/tutorials/training/source_en/advanced_use/custom_loss_function.md index 49898918fe7337d2e5001fa7edc18e558b7a27bf..3fbe07512125fd9ea6196b0c4e3d56f8394cd408 100644 --- a/tutorials/training/source_en/advanced_use/custom_loss_function.md +++ b/tutorials/training/source_en/advanced_use/custom_loss_function.md @@ -74,9 +74,9 @@ When the loss function is defined, the base class `Loss` of the loss function ca ```python import mindspore.ops as ops -from mindspore.nn import Loss +from mindspore.nn import LossBase -class L1Loss(Loss): +class L1Loss(LossBase): def __init__(self, reduction="mean"): super(L1Loss, self).__init__(reduction) self.abs = ops.Abs() @@ -182,7 +182,7 @@ import mindspore.nn as nn import mindspore.ops as ops from mindspore import Model from mindspore import dataset as ds -from mindspore.nn import Loss +from mindspore.nn import LossBase from mindspore.common.initializer import Normal from mindspore.train.callback import LossMonitor @@ -194,7 +194,7 @@ class LinearNet(nn.Cell): def construct(self, x): return self.fc(x) -class L1Loss(Loss): +class L1Loss(LossBase): def __init__(self, reduction="mean"): super(L1Loss, self).__init__(reduction) self.abs = ops.Abs() @@ -284,9 +284,9 @@ We will define a loss function `L1LossForMultiLabel` according to defined multil ```python import mindspore.ops as ops -from mindspore.nn import Loss +from mindspore.nn import LossBase -class L1LossForMultiLabel(Loss): +class L1LossForMultiLabel(LossBase): def __init__(self, reduction="mean"): super(L1LossForMultiLabel, self).__init__(reduction) self.abs = ops.Abs() @@ -372,7 +372,7 @@ import mindspore.nn as nn import mindspore.ops as ops from mindspore import Model from mindspore import dataset as ds -from mindspore.nn import Loss +from mindspore.nn import LossBase from mindspore.common.initializer import Normal from mindspore.train.callback import LossMonitor @@ -384,7 +384,7 @@ class LinearNet(nn.Cell): def construct(self, x): return self.fc(x) -class L1LossForMultiLabel(Loss): +class L1LossForMultiLabel(LossBase): def __init__(self, reduction="mean"): super(L1LossForMultiLabel, self).__init__(reduction) self.abs = ops.Abs() diff --git a/tutorials/training/source_en/advanced_use/custom_operator_gpu.md b/tutorials/training/source_en/advanced_use/custom_operator_gpu.md index a76aadaae3d9273403a1d5cb350e13e702f09f66..cbd7ebe4996042d33bc7fc803c2a6755cd4f998a 100644 --- a/tutorials/training/source_en/advanced_use/custom_operator_gpu.md +++ b/tutorials/training/source_en/advanced_use/custom_operator_gpu.md @@ -246,17 +246,78 @@ context.set_context(device_target='GPU') @pytest.mark.level0 @pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard -def test_TensroAdd(): +def test_TensorAdd(): x1 = Tensor(np.ones((3, 4), np.float32)) x2 = Tensor(np.ones((3, 4), np.float32)) y = ops.TensorAddV2()(x1, x2) print('result: ', y) ``` -When the command `pytest -s tests/st/ops/gpu/test_tensoraddv2_op.py` executes, you can see the results meeting expectations: +When the command `pytest -s tests/st/ops/gpu/test_tensoraddv2_op.py::test_TensorAdd` executes, you can see the results meeting expectations: ```text result: [[2. 2. 2. 2.] [2. 2. 2. 2.] [2. 2. 2. 2.]] ``` + +## Defining Operators' BProp Functions + +If an operator needs to support automatic differentiation, its back-propagation function (bprop) needs to be defined in its primitives. You need to describe the reverse computing logic that uses forward input, forward output, and output gradient to get the input gradient in bprop. Reverse computation logic can be composed of built-in operators or custom reverse operators. + +The following points should be paid attention to when defining operators' bprop functions: + +- The order of input parameters of bprop function is defined as positive input, positive output and output gradient. If the operator is a multi-output operator, the forward output and output gradient will be provided in the form of tuples. +- The form of the return values of bprop function is arranged as a tuple composed of input gradient, and the order of elements in the tuple is consistent with that of forward input parameters. Even if there is only one input gradient, the return value must be in the form of tuples. + +For example, the bprop primitives of `TensorAddV2` are: + +```python +import mindspore.ops as ops +@bprop_getters.register(ops.TensorAddV2) +def get_bprop_tensoraddv2(self): + """Generate bprop for TensorAddV2""" + + def bprop(x, y, out, dout): + return dout, dout + + return bprop +``` + +Define the bprop case in document `test_tensoraddv2_op.py`. + +```python +import mindspore.ops as ops +class Grad(nn.Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.grad = ops.GradOperation(sens_param=True) + self.network = network + + def construct(self, x1, x2, sens): + gout = self.grad(self.network)(x1, x2, sens) + return gout + +def test_grad_net(): + x1 = Tensor(np.ones((3, 4), np.float32)) + x2 = Tensor(np.ones((3, 4), np.float32)) + sens = Tensor(np.arange(3 * 4).reshape(3, 4).astype(np.float32)) + grad = Grad(Net()) + dx = grad(x1, x2, sense) + print("dx[0]: ", dx[0].asnumpy()) +``` + +Running case: + +```bash +pytest -s tests/st/ops/gpu/test_tensoraddv2_op.py::test_grad_net +``` + +Running results: + +```text +dx[0]: [[0. 1. 2. 3.] + [4. 5. 6. 7.] + [8. 9. 10. 11.]] +``` + diff --git a/tutorials/training/source_en/advanced_use/cv_resnet50_second_order_optimizer.md b/tutorials/training/source_en/advanced_use/cv_resnet50_second_order_optimizer.md index 4cd71443b5411bf744d3c46ceda6da01d4c3931b..4f0e60a31ab3043785f4f3a5de326b1bb6bf893a 100644 --- a/tutorials/training/source_en/advanced_use/cv_resnet50_second_order_optimizer.md +++ b/tutorials/training/source_en/advanced_use/cv_resnet50_second_order_optimizer.md @@ -39,32 +39,26 @@ Based on the existing natural gradient algorithm, MindSpore development team use This tutorial describes how to use the second-order optimizer THOR provided by MindSpore to train the ResNet-50 v1.5 network and ImageNet dataset on Ascend 910 and GPU. > Download address of the complete code example: - + Directory Structure of Code Examples ```text -├── resnet_thor +├── resnet ├── README.md ├── scripts ├── run_distribute_train.sh # launch distributed training for Ascend 910 - └── run_eval.sh # launch inference for Ascend 910 + ├── run_eval.sh # launch inference for Ascend 910 ├── run_distribute_train_gpu.sh # launch distributed training for GPU - └── run_eval_gpu.sh # launch inference for GPU + ├── run_eval_gpu.sh # launch inference for GPU ├── src - ├── crossentropy.py # CrossEntropy loss function ├── config.py # parameter configuration - ├── dataset_helper.py # dataset helper for minddata dataset - ├── grad_reducer_thor.py # grad reduce for thor - ├── model_thor.py # model for train - ├── resnet_thor.py # resnet50_thor backone - ├── thor.py # thor optimizer - ├── thor_layer.py # thor layer - └── dataset.py # data preprocessing + ├── dataset.py # data preprocessing + ├── CrossEntropySmooth.py # CrossEntropy loss function + ├── lr_generator.py # generate learning rate for every step + ├── resnet.py # ResNet50 backbone ├── eval.py # infer script ├── train.py # train script - ├── export.py # export checkpoint file into air file - └── mindspore_hub_conf.py # config file for mindspore hub repository ``` The overall execution process is as follows: @@ -121,21 +115,40 @@ import mindspore.dataset.vision.c_transforms as C import mindspore.dataset.transforms.c_transforms as C2 from mindspore.communication.management import init, get_rank, get_group_size -def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"): + +def create_dataset2(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend", distribute=False, + enable_cache=False, cache_session_id=None): + """ + Create a training or evaluation ImageNet2012 dataset for ResNet50. + + Args: + dataset_path(string): the path of dataset. + do_train(bool): whether the dataset is used for training or evaluation. + repeat_num(int): the repeat times of dataset. Default: 1 + batch_size(int): the batch size of dataset. Default: 32 + target(str): the device target. Default: Ascend + distribute(bool): data for distribute or not. Default: False + enable_cache(bool): whether tensor caching service is used for evaluation. Default: False + cache_session_id(int): if enable_cache is set, cache session_id need to be provided. Default: None + + Returns: + dataset + """ if target == "Ascend": device_num, rank_id = _get_rank_info() - num_parallels = 8 else: - init() - rank_id = get_rank() - device_num = get_group_size() - num_parallels = 4 + if distribute: + init() + rank_id = get_rank() + device_num = get_group_size() + else: + device_num = 1 if device_num == 1: - data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=num_parallels, shuffle=True) + data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True) else: - data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=num_parallels, shuffle=True, - num_shards=device_num, shard_id=rank_id) + data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True, + num_shards=device_num, shard_id=rank_id) image_size = 224 mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] @@ -160,8 +173,18 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target=" type_cast_op = C2.TypeCast(mstype.int32) - data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=num_parallels) - data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallels) + data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=8) + # only enable cache for eval + if do_train: + enable_cache = False + if enable_cache: + if not cache_session_id: + raise ValueError("A cache session_id must be provided to use cache.") + eval_cache = ds.DatasetCache(session_id=int(cache_session_id), size=0) + data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8, + cache=eval_cache) + else: + data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) # apply batch operations data_set = data_set.batch(batch_size, drop_remainder=True) @@ -176,25 +199,18 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target=" ## Defining the Network -Use the ResNet-50 v1.5 network model as an example. Define the [ResNet-50 network](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/resnet/src/resnet.py), and replace the `Conv2d` and `Dense` operators with the operators customized by the second-order optimizer. - The defined network model is stored in the `src/resnet_thor.py` script in the source code, and the customized operators `Conv2d_thor` and `Dense_thor` are stored in the `src/thor_layer.py` script. - -- Use `Conv2d_thor` to replace `Conv2d` in the original network model. -- Use `Dense_thor` to replace `Dense` in the original network model. - -> The `Conv2d_thor` and `Dense_thor` operators customized by THOR are used to save the second-order matrix information in model training. The backbone of the newly defined network is the same as that of the original network model. +Use the ResNet-50 v1.5 network model as an example. Define the [ResNet-50 network](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/resnet/src/resnet.py). After the network is built, call the defined ResNet-50 in the `__main__` function. ```python ... -from src.resnet_thor import resnet50 +from src.resnet import resnet50 as resnet ... if __name__ == "__main__": ... - # define the net - net = resnet50(class_num=config.class_num, damping=damping, loss_scale=config.loss_scale, - frequency=config.frequency, batch_size=config.batch_size) + # define net + net = resnet(class_num=config.class_num) ... ``` @@ -204,23 +220,23 @@ if __name__ == "__main__": Loss functions supported by MindSpore include `SoftmaxCrossEntropyWithLogits`, `L1Loss`, and `MSELoss`. The `SoftmaxCrossEntropyWithLogits` loss function is required by THOR. -The implementation procedure of the loss function is in the `src/crossentropy.py` script. A common trick in deep network model training, label smoothing, is used to improve the model tolerance to error label classification by smoothing real labels, thereby improving the model generalization capability. +The implementation procedure of the loss function is in the `src/CrossEntropySmooth.py` script. A common trick in deep network model training, label smoothing, is used to improve the model tolerance to error label classification by smoothing real labels, thereby improving the model generalization capability. ```python -class CrossEntropy(Loss): +class CrossEntropySmooth(LossBase): """CrossEntropy""" - def __init__(self, smooth_factor=0., num_classes=1000): - super(CrossEntropy, self).__init__() + def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000): + super(CrossEntropySmooth, self).__init__() self.onehot = ops.OneHot() + self.sparse = sparse self.on_value = Tensor(1.0 - smooth_factor, mstype.float32) self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32) - self.ce = nn.SoftmaxCrossEntropyWithLogits() - self.mean = ops.ReduceMean(False) + self.ce = nn.SoftmaxCrossEntropyWithLogits(reduction=reduction) def construct(self, logit, label): - one_hot_label = self.onehot(label, ops.shape(logit)[1], self.on_value, self.off_value) - loss = self.ce(logit, one_hot_label) - loss = self.mean(loss, 0) + if self.sparse: + label = self.onehot(label, ops.shape(logit)[1], self.on_value, self.off_value) + loss = self.ce(logit, label) return loss ``` @@ -228,14 +244,15 @@ Call the defined loss function in the `__main__` function. ```python ... -from src.crossentropy import CrossEntropy +from src.CrossEntropySmooth import CrossEntropySmooth ... if __name__ == "__main__": ... # define the loss function if not config.use_label_smooth: config.label_smooth_factor = 0.0 - loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) + loss = CrossEntropySmooth(sparse=True, reduction="mean", + smooth_factor=config.label_smooth_factor, num_classes=config.class_num) ... ``` @@ -253,27 +270,29 @@ The meanings of parameters in the formula are as follows: - $F^{-1}$: FIM obtained from the network computation. - $\nabla E$: the first-order gradient value. -As shown in the parameter update formula, THOR needs to additionally compute an FIM of each layer, and the FIM of each layer is obtained through computation in the customized network model. The FIM can adaptively adjust the parameter update step and direction of each layer, accelerating convergence and reducing parameter optimization complexity. +As shown in the parameter update formula, THOR needs to additionally compute an FIM of each layer. The FIM can adaptively adjust the parameter update step and direction of each layer, accelerating convergence and reducing parameter optimization complexity. + +For more introduction of THOR optimizer, please see [THOR paper](https://www.aaai.org/AAAI21Papers/AAAI-6611.ChenM.pdf). + +When calling the second-order optimizer THOR provided by MindSpore, THOR will automatically call the conversion interface to convert the Conv2d and Dense layers in the original network model into corresponding [Conv2dThor](https://gitee.com/mindspore/mindspore/blob/master/mindspore/nn/layer/thor_layer.py) and [DenseThor](https://gitee.com/mindspore/mindspore/blob/master/mindspore/nn/layer/thor_layer.py). +And the FIM of each layer is computed and saved in Conv2dThor and DenseThor. + +> Compared to the original network model, conversion network model has the same backbone and weights. ```python ... -if args_opt.device_target == "Ascend": - from src.thor import THOR -else: - from src.thor import THOR_GPU as THOR +from mindspore.nn.optim import thor ... - if __name__ == "__main__": ... - # learning rate setting - lr = get_model_lr(0, config.lr_init, config.lr_decay, config.lr_end_epoch, step_size, decay_epochs=39) + # learning rate setting and damping setting + from src.lr_generator import get_thor_lr, get_thor_damping + lr = get_thor_lr(0, config.lr_init, config.lr_decay, config.lr_end_epoch, step_size, decay_epochs=39) + damping = get_thor_damping(0, config.damping_init, config.damping_decay, 70, step_size) # define the optimizer - opt = THOR(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), config.momentum, - filter(lambda x: 'matrix_A' in x.name, net.get_parameters()), - filter(lambda x: 'matrix_G' in x.name, net.get_parameters()), - filter(lambda x: 'A_inv_max' in x.name, net.get_parameters()), - filter(lambda x: 'G_inv_max' in x.name, net.get_parameters()), - config.weight_decay, config.loss_scale) + split_indices = [26, 53] + opt = thor(net, Tensor(lr), Tensor(damping), config.momentum, config.weight_decay, config.loss_scale, + config.batch_size, split_indices=split_indices, frequency=config.frequency) ... ``` @@ -287,7 +306,7 @@ MindSpore provides the callback mechanism to execute customized logic during tra ```python ... -from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, TimeMonitor, LossMonitor +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor ... if __name__ == "__main__": ... @@ -305,23 +324,25 @@ if __name__ == "__main__": ### Configuring the Network Training -Use the `model.train` API provided by MindSpore to easily train the network. THOR reduces the computation workload and improves the computation speed by reducing the frequency of updating the second-order matrix. Therefore, the Model_Thor class is redefined to inherit the Model class provided by MindSpore. The parameter for controlling the frequency of updating the second-order matrix is added to the Model_Thor class. You can adjust this parameter to optimize the overall performance. +Use the `model.train` API provided by MindSpore to easily train the network. THOR reduces the computation workload and improves the computation speed by reducing the frequency of updating the second-order matrix. Therefore, the [ModelThor](https://gitee.com/mindspore/mindspore/blob/master/mindspore/train/train_thor/model_thor.py) class is redefined to inherit the Model class provided by MindSpore. The parameter of THOR for controlling the frequency of updating the second-order matrix can be obtained by the ModelThor class. You can adjust this parameter to optimize the overall performance. +MindSpore provides a one-click conversion interface from Model class to ModelThor class. ```python ... from mindspore import FixedLossScaleManager -from src.model_thor import Model_Thor as Model +from mindspore import Model +from mindspore.train.train_thor import ConvertModelUtils ... if __name__ == "__main__": ... loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) - if target == "Ascend": - model = Model(net, loss_fn=loss, optimizer=opt, amp_level='O2', loss_scale_manager=loss_scale, - keep_batchnorm_fp32=False, metrics={'acc'}, frequency=config.frequency) - else: - model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}, - amp_level="O2", keep_batchnorm_fp32=True, frequency=config.frequency) + model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics=metrics, + amp_level="O2", keep_batchnorm_fp32=False, eval_network=dist_eval_network) + if cfg.optimizer == "Thor": + model = ConvertModelUtils().convert_to_thor_model(model=model, network=net, loss_fn=loss, optimizer=opt, + loss_scale_manager=loss_scale, metrics={'acc'}, + amp_level="O2", keep_batchnorm_fp32=False) ... ``` @@ -331,19 +352,20 @@ After the training script is defined, call the shell script in the `scripts` dir #### Ascend 910 -Currently, MindSpore distributed execution on Ascend uses the single-device single-process running mode. That is, one process runs on one device, and the number of total processes is the same as the number of devices that are being used. All processes are executed in the background. Create a directory named `train_parallel`+`device_id` for each process to store log information, operator compilation information, and training checkpoint files. The following takes the distributed training script for eight devices as an example to describe how to run the script: +Currently, MindSpore distributed execution on Ascend uses the single-device single-process running mode. That is, one process runs on one device, and the number of total processes is the same as the number of devices that are being used. All processes are executed in the background. Create a directory named `train_parallel`+`device_id` for each process to store log information, operator compilation information, and training checkpoint files. The following takes the distributed training script for eight devices as an example to describe how to run the script. -Run the script. +First configure the optimizer as 'Thor' in `src/config.py`, and then run the script: ```bash -sh run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [DEVICE_NUM] +bash run_distribute_train.sh ``` -Variables `RANK_TABLE_FILE`, `DATASET_PATH`, and `DEVICE_NUM` need to be transferred to the script. The meanings of variables are as follows: +Variables `resnet50`, `imagenet2012`, `RANK_TABLE_FILE` and `DATASET_PATH` need to be transferred to the script. The meanings of variables are as follows: +- `resnet50`: training network +- `imagenet2012`: training dataset - `RANK_TABLE_FILE`: path for storing the networking information file (about the rank table file, you can refer to [HCCL_TOOL](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools)) - `DATASET_PATH`: training dataset path -- `DEVICE_NUM`: the actual number of running devices. For details about other environment variables, see configuration items in the installation guide. @@ -367,11 +389,12 @@ After the training is complete, the checkpoint file generated by each device is ```text └─train_parallel0 - ├─resnet-1_5004.ckpt - ├─resnet-2_5004.ckpt - │ ...... - ├─resnet-42_5004.ckpt - │ ...... + ├─ckpt_0 + ├─resnet-1_5004.ckpt + ├─resnet-2_5004.ckpt + │ ...... + ├─resnet-42_5004.ckpt + │ ...... ``` In the preceding information, @@ -379,16 +402,18 @@ In the preceding information, #### GPU -On the GPU hardware platform, MindSpore uses `mpirun` of OpenMPI to perform distributed training. The process creates a directory named `train_parallel` to store log information and training checkpoint files. The following takes the distributed training script for eight devices as an example to describe how to run the script: +On the GPU hardware platform, MindSpore uses `mpirun` of OpenMPI to perform distributed training. The process creates a directory named `train_parallel` to store log information and training checkpoint files. The following takes the distributed training script for eight devices as an example to describe how to run the script. +First configure the optimizer as 'Thor' in `src/config.py`, and then run the script: ```bash -sh run_distribute_train_gpu.sh [DATASET_PATH] [DEVICE_NUM] +bash run_distribute_train_gpu.sh ``` -Variables `DATASET_PATH` and `DEVICE_NUM` need to be transferred to the script. The meanings of variables are as follows: +Variables `resnet50`, `imagenet2012` and `DATASET_PATH` need to be transferred to the script. The meanings of variables are as follows: +- `resnet50`: training network +- `imagenet2012`: training dataset - `DATASET_PATH`: training dataset path -- `DEVICE_NUM`: the actual number of running devices During GPU-based training, the `DEVICE_ID` environment variable is not required. Therefore, you do not need to call `int(os.getenv('DEVICE_ID'))` in the main training script to obtain the device ID or transfer `device_id` to `context`. You need to set `device_target` to `GPU` and call `init()` to enable the NCCL. @@ -444,23 +469,28 @@ if __name__ == "__main__": ... # define net net = resnet(class_num=config.class_num) - net.add_flags_recursive(thor=False) # load checkpoint param_dict = load_checkpoint(args_opt.checkpoint_path) - keys = list(param_dict.keys()) - for key in keys: - if "damping" in key: - param_dict.pop(key) load_param_into_net(net, param_dict) net.set_train(False) + # define loss + if args_opt.dataset == "imagenet2012": + if not config.use_label_smooth: + config.label_smooth_factor = 0.0 + loss = CrossEntropySmooth(sparse=True, reduction='mean', + smooth_factor=config.label_smooth_factor, num_classes=config.class_num) + else: + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') + # define model model = Model(net, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'}) # eval model res = model.eval(dataset) print("result:", res, "ckpt=", args_opt.checkpoint_path) + ... ``` ### Inference @@ -472,11 +502,13 @@ After the inference network is defined, the shell script in the `scripts` direct On the Ascend 910 hardware platform, run the following inference command: ```bash -sh run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] +bash run_eval.sh ``` -Variables `DATASET_PATH` and `CHECKPOINT_PATH` need to be transferred to the script. The meanings of variables are as follows: +Variables `resnet50`, `imagenet2012`, `DATASET_PATH` and `CHECKPOINT_PATH` need to be transferred to the script. The meanings of variables are as follows: +- `resnet50`: inference network +- `imagenet2012`: inference dataset - `DATASET_PATH`: inference dataset path - `CHECKPOINT_PATH`: path for storing the checkpoint file @@ -494,11 +526,13 @@ result: {'top_5_accuracy': 0.9295574583866837, 'top_1_accuracy': 0.7614436619718 On the GPU hardware platform, run the following inference command: ```bash -sh run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH] + bash run_eval_gpu.sh ``` -Variables `DATASET_PATH` and `CHECKPOINT_PATH` need to be transferred to the script. The meanings of variables are as follows: +Variables `resnet50`, `imagenet2012`, `DATASET_PATH` and `CHECKPOINT_PATH` need to be transferred to the script. The meanings of variables are as follows: +- `resnet50`: inference network +- `imagenet2012`: inference dataset - `DATASET_PATH`: inference dataset path - `CHECKPOINT_PATH`: path for storing the checkpoint file diff --git a/tutorials/training/source_en/advanced_use/dashboard.md b/tutorials/training/source_en/advanced_use/dashboard.md index 3d74e3bedac36bdc6ca60503b9d5c0321ec387c1..e3a58c80d04ea31746f08bcd8797e4096addeaf0 100644 --- a/tutorials/training/source_en/advanced_use/dashboard.md +++ b/tutorials/training/source_en/advanced_use/dashboard.md @@ -186,8 +186,8 @@ Figure 13 shows tensors recorded by a user in a form of a histogram. Click the u - MindInsight supports the column of tensor displayed on the front end up to 1000 columns for each query. - After the tensor is loaded, in the tensor-visible table view, you can view the maximum of 100,000 values. If the value obtained by the selected dimension query exceeds this limit, it cannot be displayed. -5. Since tensor visualizatioin (`TensorSummary`) records raw tensor data, it requires a large amount of storage space. Before using `TensorSummary` and during training, please check that the system storage space is sufficient. - The storage space occupied by the tensor visualizatioin function can be reduced by the following methods: +5. Since tensor visualization (`TensorSummary`) records raw tensor data, it requires a large amount of storage space. Before using `TensorSummary` and during training, please check that the system storage space is sufficient. + The storage space occupied by the tensor visualization function can be reduced by the following methods: 1) Avoid using `TensorSummary` to record larger tensor. 2) Reduce the number of `TensorSummary` operators in the network. diff --git a/tutorials/training/source_en/advanced_use/debugger.rst b/tutorials/training/source_en/advanced_use/debugger.rst new file mode 100644 index 0000000000000000000000000000000000000000..c91dd4d20639b42e66fcafabecf3382d78bf0176 --- /dev/null +++ b/tutorials/training/source_en/advanced_use/debugger.rst @@ -0,0 +1,10 @@ +Debugger +================================== + +MindSpore Debugger is a debugging tool for training in Graph Mode. It can be applied to visualize and analyze the intermediate computation results of the computational graph. + +.. toctree:: + :maxdepth: 1 + + debugger_online + debugger_offline diff --git a/tutorials/training/source_en/advanced_use/debugger_offline.md b/tutorials/training/source_en/advanced_use/debugger_offline.md new file mode 100644 index 0000000000000000000000000000000000000000..384b977719f66c0b65282abb6f2d5c5161d2ad88 --- /dev/null +++ b/tutorials/training/source_en/advanced_use/debugger_offline.md @@ -0,0 +1,104 @@ +# Using the Offline Debugger + +`Linux` `Ascend` `GPU` `Model Optimization` `Intermediate` `Expert` + + + +- [Using the Offline Debugger](#use-offline-debugger) + - [Overview](#overview) + - [Operation Procedure](#operation-procedure) + - [Environment Preparation](#environment-preparation) + - [UI Introduction](#ui-introduction) + - [Usage Example](#usage-example) + - [Precautions](#precautions) + + + + + +## Overview + +MindSpore offline debugger is used to perform visualized debugging based on the trained dump data. It can be used to view and analyze the intermediate results of computational graph nodes. + +The offline debugger can connect to offline dump data for visualized analysis. It solves the problem that the online debugger is not supported when memory overcommitment is not enabled. + +## Operation Process + +1. Prepare dump data. For details about how to use the dump function, see [Using Dump in the Graph Mode](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/dump_in_graph_mode.html). +2. Start MindInsight and set summary-base-dir to the upper one or two layers of a path in the dump configuration. +3. Find the offline debugger entry from the Summary list, and click Offline Debugger. The debugger UI is displayed, and debugging analysis starts. + +## Environment Preparation + +Use the dump function of MindSpore to prepare offline data. For details about how to use the dump function, see [Using Dump in the Graph Mode](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/dump_in_graph_mode.html). + +Start MindInsight and set summary-base-dir to the upper one or two layers of a path in the dump configuration. Then, you can query the offline debugger entry on the UI. + +MindInsight startup command: + +```text +mindinsight start --port {PORT} --summary-base-dir /path/to/father/directory/of/dump_dir +``` + +Or: + +```text +mindinsight start --port {PORT} --summary-base-dir /path/to/grandfher/directory/of/dump_dir +``` + +Parameters are described as follows: + +|Name|Attribute|Description|Type|Default Value|Range| +|---|---|---|---|---|---| +|`--port {PORT}`|Optional|Specifies the port number of the web visualization service.|Integer|8080|1–65535| +|`--summary-base-dir /path/to`|Mandatory|Specifies the upper one or two layers of a path in the MP configuration. For example, if the path in the dump configuration file is set to `/home/workspace/data/dump_dir`, summary-base-dir can be set to `/home/workspace/data` or `/home/workspace`.|String|./|-| + +For details about more startup parameters, see [MindInsight Commands](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/mindinsight_commands.html). + +Open MindInsight and access the debugger UI from the offline debugger entry. + +![debugger_offline_entry](images/debugger_offline_entry.png) + +Figure 1: Offline debugger entry + +## UI Introduction + +The UI of the offline debugger is the same as that of the online debugger. For details about the online debugger UI, see [Debugger UI Introduction](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/debugger.html#debugger-ui-introduction). + +## Usage Example + +1. After the debugger environment is prepared, open the debugger UI, as shown in the following figure: + + ![debugger_waiting](images/debugger_offline_waiting.png) + + Figure 2: Debugger waiting for training connection + + At this point, the debugger is in a state of loading offline data. + +2. Wait for a moment. A dialog box is displayed on the MindInsight UI, asking you whether to use the recommended watchpoints. The following procedures are the same as those for online debugging. [Debugger Usage Example](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/debugger.html#debugger-usage-example). + +3. Compared with the online debugger, the offline debugger can reset the training step. As shown in Figure 3, click the edit icon on the right, and an edit box will appear, as shown in Figure 4. Enter the step id to be reset and click the tick icon. + + ![debugger_offline_reset](images/debugger_offline_reset.png) + + Figure 3: Resetting the training step + + ![debugger_offline_edit](images/debugger_offline_edit.png) + + Figure 4: Resetting the trainning step in edit status + +## Precautions + +- Scenarios: + - The offline debugger does not support the CPU scenario currently. + - The offline debugger supports the single-node multi-device scenario. To analyze the multi-node multi-device scenario, you need to summarize the data of multiple nodes. + - The offline debugger does not support checking the initial weight currently. + +- GPU scenario: + - Different from the online debugger, the offline debugger does not support node-by-node execution. + +- If there are more than one dump files of the same tensor in the same directory, offline debugger will only show the latest tensor. +- When using the offline debugger, make sure that the version numbers of MindInsight and MindSpore are the same. +- Only watchpoints that have tensor values are rechecked. +- The graph displayed by the debugger is the finally optimized execution graph. The called operator may have been integrated with other operators, or the name of the called operator is changed after optimization. +- If the asynchronous dump data in the Ascend scenario is used, you can use the `convert_all_data_to_host` API of MindInsight DumpParser to convert the asynchronous dump data into the `.npy` file to improve the data analysis efficiency. For details about how to use DumpParser, see [DumpParser Introduction](https://gitee.com/mindspore/mindinsight/tree/master/mindinsight/parser). diff --git a/tutorials/training/source_en/advanced_use/debugger.md b/tutorials/training/source_en/advanced_use/debugger_online.md similarity index 87% rename from tutorials/training/source_en/advanced_use/debugger.md rename to tutorials/training/source_en/advanced_use/debugger_online.md index 1e37320cce20ffb9637f2ccb6f440eccf69abf0d..cd58c524e698737aecee8ec5813a9b0cc418190f 100644 --- a/tutorials/training/source_en/advanced_use/debugger.md +++ b/tutorials/training/source_en/advanced_use/debugger_online.md @@ -22,17 +22,16 @@ - + ## Overview -MindSpore Debugger is a debugging tool for training in `Graph Mode`. It can be applied to visualize and analyze the intermediate computation results of the computational graph. - In `Graph Mode` training, the computation results of intermediate nodes in the computational graph can not be acquired from the python layer, which makes it difficult for users to do the debugging. By applying MindSpore Debugger, users can: - Visualize the computational graph on the UI and analyze the output of the graph node. - Set watchpoints to monitor training exceptions (for example, tensor overflow) and trace error causes. - Visualize and analyze the change of parameters, such as weights. +- Visualize the nodes and code mapping relationship. ## Operation Process @@ -43,6 +42,8 @@ In `Graph Mode` training, the computation results of intermediate nodes in the c ## Debugger Environment Preparation +### Launch MindInsight in Debugger Mode + At first, install MindInsight and launch it in debugger mode. MindSpore will send training information to MindInsight Debugger Server in debugger mode, users can analyze the information on MindInsight UI. The command to launch MindInsight in debugger mode is as follows: @@ -61,7 +62,9 @@ The Debugger related parameters: For more launch parameters, please refer to [MindInsight Commands](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/mindinsight_commands.html). -Then, set `export ENABLE_MS_DEBUGGER=1` or `export ENABLE_MS_DEBUGGER=True` to specify the training is in the debugger mode, and set the debugger host and port to which the training is connected: +### Run the Training Script in Debug Mode + +Run the training script in debug mode, you need to set `export ENABLE_MS_DEBUGGER=1` or `export ENABLE_MS_DEBUGGER=True` to specify the training is in the debugger mode, and set the debugger host and port to which the training is connected: `export MS_DEBUGGER_HOST=127.0.0.1` (the service address must be consistent with MindInsight host address); `export MS_DEBUGGER_PORT=50051` (the port must be consistent with MindInsight debugger-port). @@ -69,11 +72,11 @@ If the memory space of your equipment is limited, you can use the memory reuse m In addition, do not use data offload mode during training (you need to set `dataset_sink_mode` in `model.train` to `False`) to ensure that the debugger can obtain the training information of each step. -After the debugger environment is prepared, run the training script. +After the environment and training script are prepared, run the training script. ## Debugger UI Introduction -After the training is connected, you can view the training meta information such as a computational graph on the MindInsight Debugger UI which consists of the computational graph, node list, node information, watchpoint list, and watchpoint hit list. +After the training is connected, you can view the training meta information such as a computational graph on the MindInsight Debugger UI which consists of the computational graph, node list, node information, watchpoint list, watchpoint hit list, stack list, and stack information. The Debugger UI components are shown as follows. ![debugger_init_page](./images/debugger_init_page.png) @@ -104,7 +107,7 @@ You can filter nodes by `Graph File` and `Node Type` under `Node List`, as shown ### Graph Node Details -After clicking a graph node, you can view its detailed information in the lower part of the UI, including the output and input, training steps (`Step`), as well as data types (`DType`), shapes (`Shape`), and values (`Value`) of a tensor, as shown in Figure 2. +After clicking a graph node, you can view its detailed information in the lower part of the UI, including the output and input, training steps (`Step`), as well as data types (`DType`), shapes (`Shape`), and values (`Value`) of a tensor, as shown in Figure 2. After clicking the `Download` in the `Value` column, tensor values can be download as `.npy` file, the default directory is `Downloads` folder. In the GPU environment, select and right-click an executable graph node, and choose `Run to This Node` from the shortcut menu to run the training script to the selected node (no more than one step). @@ -164,6 +167,28 @@ Figure 5: Viewing hit watchpoints The hit watchpoints are displayed on the left of the UI. The hit nodes and watchpoint conditions are sorted based on the node execution sequence. Each record displays the configured threshold and the actual value. In addition, after you click a record, the corresponding node is displayed in the computational graph. You can view the node information to analyze the possible cause. Click `View` to enter the tensor check view. You can view the hit watchpoint information and optimization guide, as shown in Figure 6. +### Stack List + +You can click the switch button in the upper left corner on the debugger UI to switch from `Node List` or `Watchpoint Hit List` to `Stack List`. + +You can view all stack information in the stack list. After you enter a keyword in the search box, the matched stack information is displayed. The list is displayed on multiple pages. You can click the page number at the bottom to quickly go to the corresponding page. + +Click an item in the list to go to the node list where you can view the node related to the code. + +![debugger_stack_list](images/debugger_stack_list.png) + +Figure 6: Stack list + +### Stack Information + +When a node is located in the graph, click the `Stack Info` tab page below the graph to view the stack information related to the node. + +On the Stack Info tab page, click Search in a row to search for all nodes related to the row. The search result is automatically displayed in the node list. + +![debugger_stack_info](images/debugger_stack_info.png) + +Figure 7: Stack information + ### Recheck To perform more detailed monitoring and analysis on a node, you can modify the node to be monitored, add or delete watchpoints, and then check the current step again. The `recheck` icon is in the upper right corner of the watchpoint list as shown in figure 3. @@ -192,6 +217,8 @@ The `node information`, `current step`, and `statistics` are displayed on the to Based on the tensor relationship diagram, you can analyze which tensors are used to compute the current tensor and which constants are affected by the current tensor. Abbreviations of watchpoint conditions are displayed on the diagram, helping you quickly identify the propagation path of tensor issues. Each condition abbreviation can be found in "Setting Watchpoints". +Tensors can be downloaded in tensor check view. Users can download the desired tensor for in-depth analysis or processing. + ## Debugger Usage Example 1. Prepare the debugger environment, and open the MindInsight Debugger UI. @@ -219,7 +246,7 @@ Based on the tensor relationship diagram, you can analyze which tensors are used 6. Trigger watchpoints, as shown in Figure 5. - After watchpoints are hit, you can view the corresponding node information, find the exception cause on the tensor check view, and modify the script to rectify the fault. + After watchpoints are hit, you can view the corresponding node information and stack information, find the exception cause on the tensor check view, or download the tensor to analyse the exception, and modify the script to rectify the fault. ## Notices @@ -236,8 +263,8 @@ Based on the tensor relationship diagram, you can analyze which tensors are used - GPU Scenario: - In the GPU scenario, only the parameter nodes that meet requirements can be compared with the previous step. For example, nodes executed on the `next node`, nodes selected when `Run to This Node` is chosen, and nodes input as `watchpoints` can be compared. In other cases, the `Compare with Previous Step` function cannot be used. - - The previous step in the GPU scenario is a subgraph (not a complete graph). Therefore, when multiple graphs are rechecked on a GPU, only the current subgraph can be checked again. +- When using the debugger, make sure that the version numbers of MindInsight and MindSpore are the same. - Recheck only watchpoints that have tensor values. - To check overflow during computation, you need to enable the overflow detection function of the asynchronous dump. For details about how to enable the function, see [Asynchronous Dump](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/custom_debugging_info.html#asynchronous-dump). - The graph displayed by the debugger is the finally optimized execution graph. The called operator may have been integrated with other operators, or the name of the called operator is changed after optimization. diff --git a/tutorials/training/source_en/advanced_use/distributed_training_ascend.md b/tutorials/training/source_en/advanced_use/distributed_training_ascend.md index d948a9277761516eacdcd624c2f12086650a5939..f5fb7b34bba5367fa95402061fd1923d95289cec 100644 --- a/tutorials/training/source_en/advanced_use/distributed_training_ascend.md +++ b/tutorials/training/source_en/advanced_use/distributed_training_ascend.md @@ -553,9 +553,9 @@ For model parameter saving and loading in Hybrid Parallel Mode, please refer to ## Multi-machine Training -The previous chapters introduced the distributed training of MindSpore, which is based on the Ascend environment of a single machine with 8 cards. Using multiple machines for distributed training can greatly improve the training speed. +The previous chapters introduced the distributed training of MindSpore, which is based on the Ascend environment of a single machine with multiple cards. Using multiple machines for distributed training can greatly improve the training speed. In the Ascend environment, the communication between NPU units across machines is the same as the communication between each NPU unit in a single machine. It is still communicated through HCCL. The difference is that the NPU units in a single machine are naturally interoperable, while cross-machine communication needs to be guaranteed that the networks of the two machines are interoperable. -After confirming that the network of the NPU unit between the machines is smooth, configure the json configuration file of multiple machines. This tutorial takes the configuration file of 16 cards as an example. It should be noted that in the json file configuration of multiple machines, the order of rank_id is required to be consistent with the lexicographic order of server_id. +After confirming that the network of the NPU unit between the machines is smooth, configure the json configuration file of multiple machines. This tutorial takes the configuration file of 16 cards as an example. The detailed configuration file description can refer to the introduction of the single-machine multi-card part of this tutorial. It should be noted that in the json file configuration of multiple machines, the order of rank_id is required to be consistent with the lexicographic order of server_id. ```json { @@ -593,16 +593,17 @@ After confirming that the network of the NPU unit between the machines is smooth } ``` -After preparing the configuration file, you can organize distributed multi-machine training scripts. Taking 2 machines with 16 cards as an example, the scripts written on the two machines are similar to the running scripts of a single machine with 8 cards. The difference is that different rank_id variables are specified. +After preparing the configuration file, you can organize distributed multi-machine training scripts. Taking 2 machines with 16 cards as an example, the scripts written on the two machines are similar to the running scripts of a single machine with multiple cards. The difference is that different rank_id variables are specified. ```bash #!/bin/bash echo "==============================================================================================================" echo "Please run the script as: " -echo "bash run.sh DATA_PATH RANK_TABLE_FILE RANK_SIZE RANK_START" -echo "For example: bash run.sh /path/dataset /path/rank_table.json 16 0" +echo "bash run_cluster.sh DATA_PATH RANK_TABLE_FILE RANK_SIZE RANK_START" +echo "For example: bash run_cluster.sh /path/dataset /path/rank_table.json 16 0" echo "It is better to use the absolute path." +echo "The time interval between multiple machines to execute the script should not exceed 120s" echo "==============================================================================================================" execute_path=$(pwd) @@ -627,7 +628,7 @@ do done ``` -For the reference scripts listed above, the required code organization structure is as follows. The script will get the path of the script and the path of the command execution, and put all tasks in the background for execution. +For the reference scripts listed above, the required code organization structure is as follows. The script will get the path of the script and the path of the command execution, and put all tasks in the background for execution, the code link can be obtained at the top of this tutorial. ```text └─tutorial_code diff --git a/tutorials/training/source_en/advanced_use/distributed_training_tutorials.rst b/tutorials/training/source_en/advanced_use/distributed_training_tutorials.rst index ac9d237c4204d9ea74185bd848e1ef0625a0be95..4f5092687175b1de5ae4170d97637bc6d3b94bba 100644 --- a/tutorials/training/source_en/advanced_use/distributed_training_tutorials.rst +++ b/tutorials/training/source_en/advanced_use/distributed_training_tutorials.rst @@ -10,12 +10,12 @@ In deep learning, the increasing number of datasets and parameters prolongs the MindSpore also provides the parallel distributed training function. It supports the following modes: - `DATA_PARALLEL`: data parallelism. -- `AUTO_PARALLEL`: automatic parallelism, which integrates data parallelism, model parallelism, and hybrid parallelism. A cost model can be automatically created to find the parallel strategy with a relatively short training time and to select one parallel mode for users. MindSpore offers two different strategy search algorithms as follows: +- `AUTO_PARALLEL`: automatic parallelism, which is under developing and only be validated in some specific networks. Auto parallel integrates data parallelism, model parallelism, and hybrid parallelism. A cost model can be automatically created to find the parallel strategy with a relatively short training time and to select one parallel mode for users. MindSpore offers two different strategy search algorithms as follows: - `dynamic_programming`: Dynamic programming search algorithm. The optimal strategy of cost model description can be found, but it takes a long time to search for parallel strategy of huge network model. Its cost model refers to modeling the training time based on the memory-based computation and communication overheads of the Ascend 910 chip. - `recursive_programming`: Double recursive programming search algorithm. The optimal strategy can be generated instantly even for a large network or for a large-scale multi-device partitioning need. Its symbolic cost model can flexibly adapt to different accelerator clusters. -- `HYBRID_PARALLEL`: On MindSpore, users manually split parameters to implement intra-layer model parallelism. +- `HYBRID_PARALLEL`: On MindSpore, users manually split parameters and base communication primitives to implement intra-layer model parallelism. .. toctree:: :maxdepth: 1 diff --git a/tutorials/training/source_en/advanced_use/dump_in_graph_mode.md b/tutorials/training/source_en/advanced_use/dump_in_graph_mode.md index 58e1dea9f9de5fe2861384479407453debdfa7b5..f78b23e962b5e893ba9fa7f682130e8d9e0d4720 100644 --- a/tutorials/training/source_en/advanced_use/dump_in_graph_mode.md +++ b/tutorials/training/source_en/advanced_use/dump_in_graph_mode.md @@ -36,6 +36,18 @@ Aiming at the static graph mode, this tutorial introduces how to analyze and com ### Debugging Process +Using dump to help debugging is divided into two steps: 1. Data preparation; 2. Data analysis. + +#### Data preparation + +The data preparation phase uses synchronous dump or asynchronous dump to generate dump data. See [Synchronous Dump Step](#synchronous-dump-step) and [Asynchronous Dump Step](#asynchronous-dump-step) for details. + +#### Data analysis + +If you have installed MindInsight, you can use offline debugger of MindInsight to analyze it. See [Using the Offline Debugger](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/debugger_offline.html) for the usage of offline debugger. + +If MindInsight is not installed, you need to analyze the data through the following steps. + 1. Find the corresponding operator from the script. The Dump function needs to use the IR file of the final execution graph. The IR file can be viewed with the `vi` command. The IR file contains the full name of the operator, and the dependency of the operator on the input and output of the computational graph, and also contains the trace information from the operator to the corresponding script code. For the configuration of the Dump function, see [Synchronous Dump Step](#synchronous-dump-step) and [Asynchronous Dump Step](#asynchronous-dump-step). For the final implementation of the image IR file naming and directory structure, see [Synchronous Dump Data Object Directory](#synchronous-dump-data-object-directory) and [Asynchronous Dump Data Object Directory](#asynchronous-dump-data-object-directory). Then find the operator corresponding to the code in the script through the graph file, refer to [Synchronous Dump Data Analysis Sample](#synchronous-dump-data-analysis-sample) and [Asynchronous Dump Data Analysis Sample](#asynchronous-dump-data-analysis-sample). @@ -71,11 +83,10 @@ MindSpore provides two modes: synchronous dump and asynchronous dump: The configuration files required for different modes and the data format of dump are different: -- Synchronous mode takes up more memory than asynchronous mode, but it is easier to use. -- Generally, for small and medium-sized networks (such as ResNet), it is recommended to use the synchronous dump mode first. When the network does not occupy much memory, please use synchronous dump first.If an error of insufficient device memory occurs after enabling synchronous dump, please use asynchronous dump in the next section. - When Dump is enabled on Ascend, the operator to Dump will automatically close memory reuse. - Synchronous Dump supports the graphics mode both on Ascend, GPU and CPU, and currently does not support PyNative mode. - Asynchronous Dump only supports graph mode on Ascend, not PyNative mode. Memory reuse will not be turned off when asynchronous dump is enabled. +- Default is Asynchronous mode. If synchronous mode is needed, "e2e_dump_settings" should be set in configure file. ## Synchronous Dump @@ -89,7 +100,7 @@ The configuration files required for different modes and the data format of dump "dump_mode": 0, "path": "/absolute_path", "net_name": "ResNet50", - "iteration": 0, + "iteration": "0|5-8|100-120", "input_output": 0, "kernels": ["Default/Conv-op12"], "support_device": [0,1,2,3,4,5,6,7] @@ -104,11 +115,11 @@ The configuration files required for different modes and the data format of dump - `dump_mode`: 0: dump all kernels in graph, 1: dump kernels in kernels list. - `path`: The absolute path to save dump data. - `net_name`: The net name eg:ResNet50. - - `iteration`: Specify the iterations to dump. Iteration should be set to 0 when dataset_sink_mode is False and data of every iteration will be dumped. + - `iteration`: Specify the iterations to dump, type is string. Use "|" to separate the step data of different intervals to be saved. For example, "0 | 5-8 | 100-120" represents the initial value of the dump parameter, the data of the 1st, 6th to 9th, and 101st to 121st steps. If iteration set to "all", data of every iteration will be dumped. - `input_output`: 0: dump input and output of kernel, 1:dump input of kernel, 2:dump output of kernel. This configuration parameter only supports Ascend and CPU, and GPU can only dump the output of operator. - `kernels`: List of operator names. Turn on the IR save switch `context.set_context(save_graphs=True)` and execute the network to obtain the operator name from the generated `trace_code_graph_{graph_id}`IR file. For details, please refer to [Saving IR](https://www.mindspore.cn/doc/note/en/master/design/mindspore/mindir.html#saving-ir). - `support_device`: Supported devices, default setting is `[0,1,2,3,4,5,6,7]`. You can specify specific device ids to dump specific device data. This configuration parameter is invalid on the CPU, because there is no concept of device on the CPU. - - `enable`: Enable Asynchronous Dump. If synchronous dump and asynchronous dump are enabled at the same time, only synchronous dump will take effect. + - `enable`: Enable Asynchronous Dump. - `trans_flag`: Enable trans flag. Transform the device data format into NCHW. If it is `True`, the data will be saved in the 4D format (NCHW) format on the Host side; if it is `False`, the data format on the Device side will be retained. This configuration parameter is invalid on the CPU, because there is no format conversion on the CPU. 2. Specify the json configuration file of Dump. @@ -134,7 +145,7 @@ The configuration files required for different modes and the data format of dump You can set `context.set_context(reserve_class_name_in_scope=False)` in your training script to avoid dump failure because of file name is too long. -4. Read and parse synchronous dump data through `numpy.fromfile`, refer to [Introduction to Synchronous Dump Data File](#introduction-to-synchronous-dump-data-file). +4. Read and parse synchronous dump data through `numpy.load`, refer to [Introduction to Synchronous Dump Data File](#introduction-to-synchronous-dump-data-file). ### Synchronous Dump Data Object Directory @@ -142,43 +153,44 @@ After starting the training, the data objects saved by the synchronous Dump incl ```text {path}/ - |-- {net_name}/ - |-- {device_id}/ - |-- iteration_{iteration}/ - -- {op_name}_{input_output_index}_{shape}_{data_type}_{format}.bin - … - |-- graphs/ - ms_output_trace_code_graph_{graph_id}.pb - ms_output_trace_code_graph_{graph_id}.ir - |-- execution_order/ - ms_execution_order_graph_{graph_id}.csv - - |-- .metadata/ - data_dump.json + - rank_{rank_id}/ + - .dump_metadata/ + - {net_name}/ + - {graph_id}/ + - {iteration_id}/ + {op_type}.{op_name}.{task_id}.{stream_id}.{timestamp}.{input_output_index}.{slot}.{format}.npy + ... + - graphs/ + ms_output_trace_code_graph_{graph_id}.pb + ms_output_trace_code_graph_{graph_id}.ir + - execution_order/ + ms_execution_order_graph_{graph_id}.csv + ``` - `path`: the absolute path set in the `data_dump.json` configuration file. +- `rank_id`: the id of the logic device. - `net_name`: the network name set in the `data_dump.json` configuration file. -- `device_id`: the id of the training device. - `graph_id`: the id of the training graph. -- `iteration`: the iteration of the training. -- `operator_name`: the name of the operator. +- `iteration_id`: the iteration of the training. +- `op_type`: the type of the operator. +- `op_name`: the name of the operator. +- `task_id`: the id of the task. +- `stream_id`: the id of the stream. +- `timestamp`: the time stamp. - `input_output_index` : the index of input or output. For example, `output_0` means that the file is the data of the first output Tensor of the operator. -- `shape`: Tensor dimension information. -- `data_type`: the type of the data. +- `slot`: the id of the slot. - `format`: the format of the data. -When data dump is performed on the CPU, there is no directory level of `device_id`, because there is no concept of device on the CPU, and there are no `graphs`, `execution_order` and `.metadata` directories. - ### Introduction to Synchronous Dump Data File -The data file generated by the synchronous Dump is a binary file with the suffix `.bin`, and the file naming format is: +The data file generated by the synchronous Dump is a binary file with the suffix `.npy`, and the file naming format is: ```text -{operator_name}_{input_output_index}_{shape}_{data_type}_{format}.bin +{op_type}.{op_name}.{task_id}.{stream_id}.{timestamp}.{input_output_index}.{slot}.{format}.npy ``` -According to the `Tensor` information provided by the file name, you can use `numpy.fromfile` to read the data and restore the `data_type` and `shape` of the original data. +User can use Numpy interface `numpy.load` to read the data. The suffixes of the final execution graph files generated by synchronous Dump are `.pb` and `.ir` respectively, and the file naming format is: @@ -195,7 +207,7 @@ The suffix of the node execution sequence file generated by the synchronous Dump ms_execution_order_graph_{graph_id}.csv ``` -`.metadata` records the original training information, and `data_dump.json` saves the dump configuration set by the user. +`.dump_metadata` records the original training information, and `data_dump.json` saves the dump configuration set by the user. ### Synchronous Dump Data Analysis Sample @@ -320,36 +332,23 @@ The meanings of the lines in the file content shown above are as follows: Through the operator name and input and output information, you can find the only corresponding Tensor data file. For example, if you want to view the dump file corresponding to the first output data of the Conv2D-op107 operator, you can obtain the following information: -- `operator_name`: `Default--network-WithLossCell--_backbone-AlexNet--conv3-Conv2d--Conv2D-op107`. Based on the operator name declared in sequence number 2 in the graph, replace `/` with `--` to get it. +- `operator_name`: `Conv2D-op107`. -- `input_output_index`: `output_0` indicates that the file is the data of the first output Tensor of the operator. +- `input_output_index`: `output.0` indicates that the file is the data of the first output Tensor of the operator. -Search for the corresponding file name in the data object file directory saved by Dump: -`Default--network-WithLossCell--_backbone-AlexNet--conv3-Conv2d--Conv2D-op107_output_0_shape_32_12_13_13_16_Float16_NC1HWC0.bin`. - -The following information can be obtained from the file name: - -- `shape`: The tensor dimension is `32_12_13_13_16`. - -- `data_type`: The data type is `Float16`. +- `slot`: 0, this tensor only has one slot. -- `format`: The data format is `NC1HWC0` (the data format to be saved can be modified through the Dump configuration file). - -When restoring data, first execute: - -```python -import numpy -numpy.fromfile("Default--network-WithLossCell--_backbone-AlexNet--conv3-Conv2d--Conv2D-op107_output_0_shape_32_12_13_13_16_Float16_NC1HWC0.bin", numpy.float16) -``` +Search for the corresponding file name in the data object file directory saved by Dump: +`Conv2d.Conv2D-op107.2.2.1623124369613540.output.0.DefaultFormat.npy`. -One-dimensional array data is generated, and then execute: +When restoring data, execute: ```python import numpy -numpy.reshape(array, (32,12,13,13,16)) +numpy.load("Conv2d.Conv2D-op107.2.2.1623124369613540.output.0.DefaultFormat.npy") ``` -Restore to the original shape data. +Restore the data as `numpy.array' format. ## Asynchronous Dump @@ -367,13 +366,10 @@ Large networks (such as Bert Large) will cause memory overflow when using synchr "dump_mode": 0, "path": "/absolute_path", "net_name": "ResNet50", - "iteration": 0, + "iteration": "0|5-8|100-120", "input_output": 0, "kernels": ["Default/Conv-op12"], - "support_device": [0,1,2,3,4,5,6,7] - }, - "async_dump_settings": { - "enable": true, + "support_device": [0,1,2,3,4,5,6,7], "op_debug_mode": 0 } } @@ -382,7 +378,7 @@ Large networks (such as Bert Large) will cause memory overflow when using synchr - `dump_mode`: 0: dump all kernels in graph, 1: dump kernels in kernels list. - `path`: The absolute path to save dump data. - `net_name`: The net name eg:ResNet50. - - `iteration`: Specify the iterations to dump. Iteration should be set to 0 when dataset_sink_mode is False and data of every iteration will be dumped. + - `iteration`: Specify the iterations to dump, type is string. Use "|" to separate the step data of different intervals to be saved. For example, "0 | 5-8 | 100-120" represents the initial value of the dump parameter, the data of the 1st, 6th to 9th, and 101st to 121st steps. If iteration set to "all", data of every iteration will be dumped. - `input_output`: When set to 0, it means to Dump the operator's input and output; setting it to 1 means to Dump the operator's input; setting it to 2 means to Dump the output of the operator. - `kernels`: List of operator names. Turn on the IR save switch `context.set_context(save_graphs=True)` and execute the network to obtain the operator name from the generated `trace_code_graph_{graph_id}`IR file. `kernels` only supports TBE operator, AiCPU operator and communication operator. The data of communication operation input operator will be dumped if `kernels` is set to the name of communication operator. For details, please refer to [Saving IR](https://www.mindspore.cn/doc/note/en/master/design/mindspore/mindir.html#saving-ir). - `support_device`: Supported devices, default setting is `[0,1,2,3,4,5,6,7]`. You can specify specific device ids to dump specific device data. @@ -405,8 +401,6 @@ Large networks (such as Bert Large) will cause memory overflow when using synchr 4. Refer to [Asynchronous Dump Data Analysis Sample](#asynchronous-dump-data-analysis-sample) to analyze the Dump data file. - If you need to dump all or part of the operator, you can modify the `dump_mode` option in the json configuration file to 0 or 1. -- If the data sink function is enabled (set the `dataset_sink_mode` parameter in `model.train` or `DatasetHelper` to `True`), only the data of one step specified in the configuration file can be dumped (in this case, `iteration 0` means The 0th step), and save it to the specified directory. -- If the data sink function is not enabled (set the `dataset_sink_mode` parameter in `model.train` or `DatasetHelper` to `False`), `iteration` in the configuration file must be specified as 0, and all step data are stored in a directory In, cannot support multi-step data management. At this time, it is recommended to execute the step data dump only once (you can train only one step by modifying the script). - Using the Dump function will automatically generate the IR file of the final execution graph. ### Asynchronous Dump Data Object Directory @@ -415,30 +409,29 @@ The data objects saved by asynchronous Dump include the final execution graph (` ```text {path}/ - |-- {device_id}/ - |-- {new_name}_graph_{graph_id}/ - |-- {graph_id}/ - |-- {iteration}/ - |-- {op_type}.{op_name}.{task_id}.{timestamp} - … - |-- graphs/ + - rank_{rank_id}/ + - .dump_metadata/ + - {net_name}/ + - {graph_id}/ + - {iteration_id}/ + {op_type}.{op_name}.{task_id}.{stream_id}.{timestamp} + ... + - graphs/ ms_output_trace_code_graph_{graph_id}.pb ms_output_trace_code_graph_{graph_id}.ir - |-- execution_order/ + - execution_order/ ms_execution_order_graph_{graph_id}.csv - - |-- .metadata/ - data_dump.json ``` - `path`: the absolute path set in the `data_dump.json` configuration file. +- `rank_id`: the id of the logic device. - `net_name`: the network name set in the `data_dump.json` configuration file. -- `device_id`: the id of the training device. - `graph_id`: the id of the training graph. -- `iteration`: the iteration of the training. +- `iteration_id`: the iteration of the training. - `op_type`: the type of the operator. - `op_name`: the name of the operator. -- `taskid`: the id of the task. +- `task_id`: the id of the task. +- `stream_id`: the id of the stream. - `timestamp`: the time stamp. ### Introduction to Asynchronous Dump Data File @@ -456,6 +449,9 @@ Take the Dump result of a simple network as an example: `Add.Default_Add-op1.2.1 If ".", "/", "\", and spaces appear in `op_type` and `op_name`, they will be converted to underscores. +The original data file generated by dump can also be parsed by using the data parsing tool DumpParser of MindInsight. Please refer to [DumpParser Introduction](https://gitee.com/mindspore/mindinsight/blob/master/mindinsight/parser/README.md) for the usage of DumpParser. +The data format parsed by MindInsight is exactly the same as that of synchronous dump. + The final execution graph file and node execution sequence file naming rules generated by asynchronous Dump are the same as that of synchronous Dump. You can refer to [Introduction to Synchronous Dump Data File](#introduction-to-synchronous-dump-data-file). ### Asynchronous Dump Data Analysis Sample @@ -476,7 +472,7 @@ Through the asynchronous Dump function, the data files generated by the operator python ${The absolute path of msaccucmp.py} convert -d {file path of dump} -out {file path of output} ``` - Or you can use `msaccucmp.py` to convert the format of dump file. Please see . + Or you can use `msaccucmp.py` to convert the format of dump file. Please see . For example, the data file generated by Dump is: diff --git a/tutorials/training/source_en/advanced_use/hpc_sponge.md b/tutorials/training/source_en/advanced_use/hpc_sponge.md index e882525f0e9824d2ea9ee6afd586e43675c22619..d95af25de99bab398b7196e2724213b31f456fbe 100644 --- a/tutorials/training/source_en/advanced_use/hpc_sponge.md +++ b/tutorials/training/source_en/advanced_use/hpc_sponge.md @@ -106,7 +106,7 @@ NVT 290k langevin_gamma=1.0, # Gamma_ln for Langevin thermostat represents coupling strength between thermostat and system target_temperature=290, # Target temperature write_information_interval=1000, # Output frequency - amber_irest=1, # Input style ; amber_irest=1 for using amber style input & rst7 file contains veclocity + amber_irest=0, # Input style ; amber_irest=1 for using amber style input & rst7 file contains veclocity cut=10.0, # Nonbonded cutoff distance in Angstroms ``` @@ -116,7 +116,7 @@ NVT 290k - `thermostat`, represents the method of temperature control, `1` represents to use `Liujian-Langevin`. - `langevin_gamma`, represents `Gamma_In` parameters in the thermostat. - `target_temperature`, represents the target temperature. -- `amber_irest`, represents the input mode, `1` represents to use the amber mode to input, and files suffix `rst7` includes the attribute `veclocity`. +- `amber_irest`, represents the input mode, `0` represents to use the amber mode to input, and files suffix `rst7` do not include the attribute `veclocity`. - `cut`, represents the distance of non-bonding interaction. ### Loading Data @@ -199,7 +199,7 @@ After training, output file `ala_NVT_290_10ns.out` can be obtained, which record ```text _steps_ _TEMP_ _TOT_POT_ENE_ _BOND_ENE_ _ANGLE_ENE_ _DIHEDRAL_ENE_ _14LJ_ENE_ _14CF_ENE_ _LJ_ENE_ _CF_PME_ENE_ - 1 293.105 -6117.709 1204.406 7.096 4.491 3.456 44.018 1372.488 -8753.664 + 0 0.000 -5713.804 0.037 0.900 14.909 9.072 194.477 765.398 -6698.648 ... ``` diff --git a/tutorials/training/source_en/advanced_use/images/cluster_flops.png b/tutorials/training/source_en/advanced_use/images/cluster_flops.png new file mode 100644 index 0000000000000000000000000000000000000000..88bfee623eed8c38d1c7ce09c7a69c43a344a4dc Binary files /dev/null and b/tutorials/training/source_en/advanced_use/images/cluster_flops.png differ diff --git a/tutorials/training/source_en/advanced_use/images/debugger_ask_recommend.png b/tutorials/training/source_en/advanced_use/images/debugger_ask_recommend.png old mode 100755 new mode 100644 index b25b57e86131544a8375eef42261833a480ffbab..f705ee0d8bd9629deee89274ac4b1c827c3983b2 Binary files a/tutorials/training/source_en/advanced_use/images/debugger_ask_recommend.png and b/tutorials/training/source_en/advanced_use/images/debugger_ask_recommend.png differ diff --git a/tutorials/training/source_en/advanced_use/images/debugger_init_page.png b/tutorials/training/source_en/advanced_use/images/debugger_init_page.png old mode 100755 new mode 100644 index 665321b7fe5978d8412b85901c0a3af9573766ac..3491b5b2e366994e9c47f53f1a1747ac670af3dd Binary files a/tutorials/training/source_en/advanced_use/images/debugger_init_page.png and b/tutorials/training/source_en/advanced_use/images/debugger_init_page.png differ diff --git a/tutorials/training/source_en/advanced_use/images/debugger_offline_edit.png b/tutorials/training/source_en/advanced_use/images/debugger_offline_edit.png new file mode 100644 index 0000000000000000000000000000000000000000..c7d097411a94d0014a3dc8d7604df8a145043a19 Binary files /dev/null and b/tutorials/training/source_en/advanced_use/images/debugger_offline_edit.png differ diff --git a/tutorials/training/source_en/advanced_use/images/debugger_offline_entry.png b/tutorials/training/source_en/advanced_use/images/debugger_offline_entry.png new file mode 100644 index 0000000000000000000000000000000000000000..2e4a4cd838e1363e417fccb92a7f26cb93726779 Binary files /dev/null and b/tutorials/training/source_en/advanced_use/images/debugger_offline_entry.png differ diff --git a/tutorials/training/source_en/advanced_use/images/debugger_offline_reset.png b/tutorials/training/source_en/advanced_use/images/debugger_offline_reset.png new file mode 100644 index 0000000000000000000000000000000000000000..9e482911185930fa6781cfd2d14835b9f7415af5 Binary files /dev/null and b/tutorials/training/source_en/advanced_use/images/debugger_offline_reset.png differ diff --git a/tutorials/training/source_en/advanced_use/images/debugger_offline_waiting.png b/tutorials/training/source_en/advanced_use/images/debugger_offline_waiting.png new file mode 100644 index 0000000000000000000000000000000000000000..7b08445e17abcae1c3587969a7bb0a91fbb15706 Binary files /dev/null and b/tutorials/training/source_en/advanced_use/images/debugger_offline_waiting.png differ diff --git a/tutorials/training/source_en/advanced_use/images/debugger_search_node_type.png b/tutorials/training/source_en/advanced_use/images/debugger_search_node_type.png old mode 100755 new mode 100644 index 21f5e8a25cb270caacd1fb152d6c1020b4d6f1e8..c51b0d377f8fa2e139fac9a04ab44ad7e199c4a3 Binary files a/tutorials/training/source_en/advanced_use/images/debugger_search_node_type.png and b/tutorials/training/source_en/advanced_use/images/debugger_search_node_type.png differ diff --git a/tutorials/training/source_en/advanced_use/images/debugger_set_watch_point.png b/tutorials/training/source_en/advanced_use/images/debugger_set_watch_point.png old mode 100755 new mode 100644 index baf9f57495be5c3492cb33b6a88d5d077ec70f2c..c147f53ad3238efd26a7d9a80667673674394918 Binary files a/tutorials/training/source_en/advanced_use/images/debugger_set_watch_point.png and b/tutorials/training/source_en/advanced_use/images/debugger_set_watch_point.png differ diff --git a/tutorials/training/source_en/advanced_use/images/debugger_stack_info.png b/tutorials/training/source_en/advanced_use/images/debugger_stack_info.png new file mode 100644 index 0000000000000000000000000000000000000000..4b61ca9e93f6673644a821ab7fdbe9d93997aff8 Binary files /dev/null and b/tutorials/training/source_en/advanced_use/images/debugger_stack_info.png differ diff --git a/tutorials/training/source_en/advanced_use/images/debugger_stack_list.png b/tutorials/training/source_en/advanced_use/images/debugger_stack_list.png new file mode 100644 index 0000000000000000000000000000000000000000..4bb8f4c79e5a0ffd849b0f85b75b231d5b4de481 Binary files /dev/null and b/tutorials/training/source_en/advanced_use/images/debugger_stack_list.png differ diff --git a/tutorials/training/source_en/advanced_use/images/debugger_tensor_view.png b/tutorials/training/source_en/advanced_use/images/debugger_tensor_view.png old mode 100755 new mode 100644 index 62d7c97318799fc0ed8fa1f4a2a418116326bf74..72036d91bba752f718dfe735625b784a84e5841c Binary files a/tutorials/training/source_en/advanced_use/images/debugger_tensor_view.png and b/tutorials/training/source_en/advanced_use/images/debugger_tensor_view.png differ diff --git a/tutorials/training/source_en/advanced_use/images/debugger_waiting.png b/tutorials/training/source_en/advanced_use/images/debugger_waiting.png old mode 100755 new mode 100644 index 47dfb444fdca544d3817e892d887a65689cd04b3..279f5278593938df78b28c47a52d1514851cb3aa Binary files a/tutorials/training/source_en/advanced_use/images/debugger_waiting.png and b/tutorials/training/source_en/advanced_use/images/debugger_waiting.png differ diff --git a/tutorials/training/source_en/advanced_use/images/debugger_watch_point_hit.png b/tutorials/training/source_en/advanced_use/images/debugger_watch_point_hit.png old mode 100755 new mode 100644 index e1da9aa506baeceda9bfe85a5b853bec415af7eb..1f921a5ca08073e9859e7e54518a2ee444d49093 Binary files a/tutorials/training/source_en/advanced_use/images/debugger_watch_point_hit.png and b/tutorials/training/source_en/advanced_use/images/debugger_watch_point_hit.png differ diff --git a/tutorials/training/source_en/advanced_use/images/debugger_watch_point_list.png b/tutorials/training/source_en/advanced_use/images/debugger_watch_point_list.png old mode 100755 new mode 100644 index 609186d4a80e6fe48551ae154e27b637158a9f60..94ec0883bcf57edc4bd5034f68e4bc92d4551ad8 Binary files a/tutorials/training/source_en/advanced_use/images/debugger_watch_point_list.png and b/tutorials/training/source_en/advanced_use/images/debugger_watch_point_list.png differ diff --git a/tutorials/training/source_en/advanced_use/implement_high_order_differentiation.md b/tutorials/training/source_en/advanced_use/implement_high_order_differentiation.md index 1ba59dc396ee0fa20b9f4645a80ef4ba32913aeb..ea597529238761d4accb6f595ce3eb57435db5ee 100644 --- a/tutorials/training/source_en/advanced_use/implement_high_order_differentiation.md +++ b/tutorials/training/source_en/advanced_use/implement_high_order_differentiation.md @@ -217,6 +217,61 @@ The output is as follows: [0. 0. 0. ]] ``` +## Stop Gradient + +We can use `stop_gradient` to disable calculation of gradient for certain operators. For example: + +```python +import numpy as np +import mindspore.nn as nn +import mindspore.ops as ops +from mindspore import Tensor +from mindspore import ParameterTuple, Parameter +from mindspore import dtype as mstype +from mindspore.ops.functional import stop_gradient + +class Net(nn.Cell): + def __init__(self): + super(Net, self).__init__() + self.matmul = ops.MatMul() + + def construct(self, x, y): + out1 = self.matmul(x, y) + out2 = self.matmul(x, y) + out2 = stop_gradient(out2) + out = out1 + out2 + return out + +class GradNetWrtX(nn.Cell): + def __init__(self, net): + super(GradNetWrtX, self).__init__() + self.net = net + self.grad_op = ops.GradOperation() + + def construct(self, x, y): + gradient_function = self.grad_op(self.net) + return gradient_function(x, y) + +x = Tensor([[0.8, 0.6, 0.2], [1.8, 1.3, 1.1]], dtype=mstype.float32) +y = Tensor([[0.11, 3.3, 1.1], [1.1, 0.2, 1.4], [1.1, 2.2, 0.3]], dtype=mstype.float32) +output = GradNetWrtX(Net())(x, y) +print(output) +``` + +```text + [[4.5, 2.7, 3.6], + [4.5, 2.7, 3.6]] +``` + +Here, we set `stop_gradient` to `out2`, so this operator does not have any contribution to gradient. If we delete `out2 = stop_gradient(out2)`, the result is: + +```text + [[9.0, 5.4, 7.2], + [9.0, 5.4, 7.2]] +``` + +After we do not set `stop_gradient` to `out2`, it will make the same contribution to gradient as `out1`. So we can see that each result has doubled. + ## High-order Derivation MindSpore can support high-order derivatives by computing derivatives for multiple times. The following uses several examples to describe how to compute derivatives. diff --git a/tutorials/training/source_en/advanced_use/model_encrypt_protection.md b/tutorials/training/source_en/advanced_use/model_encrypt_protection.md new file mode 100644 index 0000000000000000000000000000000000000000..e6deaf151e727b7bc7d6a676e4d34f8c3afce919 --- /dev/null +++ b/tutorials/training/source_en/advanced_use/model_encrypt_protection.md @@ -0,0 +1,5 @@ +# Model Encrypt Protection + +No English version available right now, welcome to contribute. + +   \ No newline at end of file diff --git a/tutorials/training/source_en/advanced_use/parameterized_quantum_circuit.md b/tutorials/training/source_en/advanced_use/parameterized_quantum_circuit.md index 127050fabf6d38e7e8157cbe3e02a7a99e18ebe4..1475a59eb2ec7b2bb59b4a4b600a11c9ef0311e6 100644 --- a/tutorials/training/source_en/advanced_use/parameterized_quantum_circuit.md +++ b/tutorials/training/source_en/advanced_use/parameterized_quantum_circuit.md @@ -201,7 +201,7 @@ In the following, we will construct a quantum neural network, whose encoder cons ![simple qnn](./images/simple_qnn.png) ```python -from projectq.ops import QubitOperator +from mindquantum.ops import QubitOperator @circuit_generator(2) def encoder(qubits): diff --git a/tutorials/training/source_en/advanced_use/performance_profiling_ascend.md b/tutorials/training/source_en/advanced_use/performance_profiling_ascend.md index ee9e753067a9a7b205786a7bc52deb227e22145b..00fab445e3d78e3adbf4de2b6d13223a82e3e5ff 100644 --- a/tutorials/training/source_en/advanced_use/performance_profiling_ascend.md +++ b/tutorials/training/source_en/advanced_use/performance_profiling_ascend.md @@ -113,6 +113,10 @@ In order to divide the stages, the Step Trace Component need to figure out the f The operator performance analysis component is used to display the execution time of the operators(AICORE/AICPU/HOSTCPU) during MindSpore run. The AICORE operator contains the information about calculation quantity. +- AICORE:AI Core operator is the main component of the computing core of Ascend AI processor, which is responsible for executing vector and tensor related computation intensive operators. TBE (tensor virtual machine) is an extended operator development tool based on TVM (tensor virtual machine) framework. Users can use TBE to register AI Core operator information. +- AICPU:AI CPU operator is a kind of CPU operator (including control operator, scalar, vector and other general-purpose calculations) that AI CPU is responsible for executing Hisilicon SOC in Ascend processor. The same operator in MindSpore may have AI Core operator and AI CPU operator at the same time. The framework will give priority to AI Core operator. If there is no AI Core operator or the selection is not satisfied, AI CPU operator will be called. +- HOSTCPU:The host side CPU is mainly responsible for distributing the graph or operator to Ascend chip, and the operator can also be developed on the host side CPU according to the actual needs. The host CPU operator refers to the operator running on the host side CPU. + ![op_type_statistics.png](./images/op_type_statistics.PNG) Figure 3: Statistics for Operator Types diff --git a/tutorials/training/source_en/advanced_use/performance_profiling_ascend_of_cluster.md b/tutorials/training/source_en/advanced_use/performance_profiling_ascend_of_cluster.md index b6879ef8dfe720bdc73d17825723bc008260dddb..b7e2a31d4ebf044cdfcb8ca0ee360a3eea2511d5 100644 --- a/tutorials/training/source_en/advanced_use/performance_profiling_ascend_of_cluster.md +++ b/tutorials/training/source_en/advanced_use/performance_profiling_ascend_of_cluster.md @@ -305,6 +305,18 @@ This page shows the memory usage of the model on the **device side** in the para Figure 3: The page of cluster memory analysis +### Cluster FLOPs Analysis + +This page shows the FLOPs data for each device in the parallel mode. The content of the page includes: + +- The distribution of cluster devices, which servers and which devices are used. +- The relative size of FLOPs among cluster devices. The color of the corresponding rectangular block of each device represents the ratio of FLOPs of the current device to the maximum FLOPs of all devices. +- Click on a device to jump to the operator time-consuming details page of the device, which contains detailed data for FLOPs. + +![cluster_flops.png](./images/cluster_flops.png) + +Figure 4: The page of cluster FLOPs analysis + ## Specifications - To limit the data size generated by the Profiler, MindInsight suggests that for large neural networks, the profiled steps should be less than 10. diff --git a/tutorials/training/source_en/advanced_use/qnn_for_nlp.md b/tutorials/training/source_en/advanced_use/qnn_for_nlp.md index a7713553904e9ac30d266c246700c9aeeaad4f69..4ceab0304f7552f0605d0eee54b3c42e47934dfb 100644 --- a/tutorials/training/source_en/advanced_use/qnn_for_nlp.md +++ b/tutorials/training/source_en/advanced_use/qnn_for_nlp.md @@ -30,7 +30,7 @@ Import relevant dependencies of the tutorial. ```python import numpy as np import time -from projectq.ops import QubitOperator +from mindquantum.ops import QubitOperator import mindspore.ops as ops import mindspore.dataset as ds from mindspore import nn @@ -129,7 +129,7 @@ label = 2 # label need to encode label_bin = bin(label)[-1:1:-1].ljust(n_qubits,'0') # binary form of label label_array = np.array([int(i)*np.pi for i in label_bin]).astype(np.float32) # parameter value of encoder encoder = GenerateEncoderCircuit(n_qubits, prefix='e') # encoder circuit -encoder_para_names = encoder.parameter_resolver().para_name # parameter names of encoder +encoder_para_names = encoder.para_name # parameter names of encoder print("Label is: ", label) print("Binary label is: ", label_bin) @@ -286,8 +286,8 @@ def QEmbedding(num_embedding, embedding_dim, window, layers, n_threads): encoder.no_grad() circ += encoder circ += ansatz - encoder_param_name.extend(list(encoder.parameter_resolver())) - ansatz_param_name.extend(list(ansatz.parameter_resolver())) + encoder_param_name.extend(encoder.para_name) + ansatz_param_name.extend(ansatz.para_name) net = MindQuantumLayer(encoder_param_name, ansatz_param_name, circ, diff --git a/tutorials/training/source_en/advanced_use/summary_record.md b/tutorials/training/source_en/advanced_use/summary_record.md index fb23a82065d3bd5391a1895e60d64b70e77db471..ce105254b3a3d973bb01a1856baf9bcafad55400 100644 --- a/tutorials/training/source_en/advanced_use/summary_record.md +++ b/tutorials/training/source_en/advanced_use/summary_record.md @@ -303,7 +303,7 @@ class ConfusionMatrixCallback(Callback): ... confusion_matrix = ConfusionMatrixCallback(summary_dir='./summary_dir') -model.train(network, train_dataset=ds_train, callbacks=[confusion_matrix]) +model.train(epoch=2, train_dataset=ds_train, callbacks=[confusion_matrix]) ``` The above three ways support the record computational graph, loss value and other data. In addition, MindSpore also supports the saving of computational graph for other phases of training, through diff --git a/tutorials/training/source_en/conf.py b/tutorials/training/source_en/conf.py index 46072a4ffdbbe9a21ef7ac60e2fd3a34fe2dae08..63e90d4eae7aee7f3556d724086bd7e4788cff42 100644 --- a/tutorials/training/source_en/conf.py +++ b/tutorials/training/source_en/conf.py @@ -32,7 +32,7 @@ release = 'master' # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'recommonmark', + 'myst_parser', 'sphinx_markdown_tables', 'nbsphinx', 'sphinx.ext.mathjax', diff --git a/tutorials/training/source_en/index.rst b/tutorials/training/source_en/index.rst index ddff262e3377f802ec5b47a9a5974d1e083912d6..6907c3d974fa383eec79b5b348e4f7b9ab908ec3 100644 --- a/tutorials/training/source_en/index.rst +++ b/tutorials/training/source_en/index.rst @@ -96,6 +96,7 @@ Train with MindSpore advanced_use/protect_user_privacy_with_suppress_privacy advanced_use/test_model_security_fuzzing advanced_use/test_model_security_membership_inference + advanced_use/model_encrypt_protection .. toctree:: :glob: diff --git a/tutorials/training/source_en/quick_start/quick_video.md b/tutorials/training/source_en/quick_start/quick_video.md index 5e3c593120361f32785b9a5bd018c068e620b7e9..7d9486306767dfbbe959a6fb303890748c709157 100644 --- a/tutorials/training/source_en/quick_start/quick_video.md +++ b/tutorials/training/source_en/quick_start/quick_video.md @@ -702,6 +702,38 @@ Provides video tutorials from installation to try-on, helping you quickly use Mi +## Network Migration + + + + + + ## Join the MindSpore Community diff --git a/tutorials/training/source_en/quick_start/quick_video/network_migration_process.md b/tutorials/training/source_en/quick_start/quick_video/network_migration_process.md new file mode 100644 index 0000000000000000000000000000000000000000..b1fba5c2d8bd47639ade44cb4e2b76de7e3f4297 --- /dev/null +++ b/tutorials/training/source_en/quick_start/quick_video/network_migration_process.md @@ -0,0 +1,9 @@ +# Network Migration Process + +[comment]: <> (This document contains Hands-on Tutorial Series. Gitee does not support display. Please check tutorials on the official website) + + + +**See More**: \ No newline at end of file diff --git a/tutorials/training/source_zh_cn/advanced_use/concept_drift_time_series.md b/tutorials/training/source_zh_cn/advanced_use/concept_drift_time_series.md new file mode 100644 index 0000000000000000000000000000000000000000..f92c69745201333e4fe93ae4618a331273244fa9 --- /dev/null +++ b/tutorials/training/source_zh_cn/advanced_use/concept_drift_time_series.md @@ -0,0 +1,136 @@ +# 实现时序数据概念漂移检测应用 + + + +- [实现时序数据概念漂移检测应用](#实现时序数据概念漂移检测应用) + - [概述](#概述) + - [准备环节](#准备环节) + - [下载数据集](#下载数据集) + - [导入Python库&模块](#导入python库模块) + - [数据处理](#数据处理) + - [初始化概念漂移检测模块](#初始化概念漂移检测模块) + - [启动概念漂移检测](#启动概念漂移检测) + - [查看结果](#查看结果) + + + + + +## 概述 + +概念漂移(Concept Drift)是AI学习领域的一种重要数据现象,表现为在线推理数据(实时分布) +与训练阶段(历史分布)不一致。概念漂移检测能够及时发现数据分布变化,提前预测模型失效征兆,对AI模型的及时调整具有重要意义。 + +概念漂移检测本质上是检测数据分布变化,本示例提出一种检测数据变化的方法,对比新窗口数据的特征是否足够偏离历史窗口特征,如若偏离程度大于某一阈值,则数据发生概念漂移。 + +本例会实现一个简单的时序数据概念漂移检测的功能,整体流程如下: + +1. 下载公开数据集或构造数据。 +2. 定义概念漂移类参数。 +3. 调用概念漂移检测函数。 +4. 查看结果。 + +> 你可以在这里找到完整可运行的样例代码: 。 + +## 准备环节 + +确保已经正确安装了MindSpore。如果没有,可以通过[MindSpore安装页面](https://www.mindspore.cn/install)进行安装。 + +### 下载数据集 + +示例中用到金融领域公开数据集:标普500指数记录的美国股市平均记录。 +> 数据集下载页面:。 + +将数据集下载并解压到本地路径下,目录结构如下: + +```bash +├── archive + ├── individual_stocks_5yr + ├──individual_stocks_5yr +``` + +数据路径:archive/individual_stocks_5yr/individual_stocks_5yr。文件夹内每一个csv文件为一组数据用例。 + +### 导入Python库&模块 + +在使用前,需要导入需要的Python库。 + +```python +import numpy +import matplotlib +import itertools +import mindarmour +``` + +## 数据处理 + +从数据路径:archive/individual_stocks_5yr/individual_stocks_5yr 中打开一个数据用例。 + +```python +import numpy as np +DATA_FILE = r'archive/individual_stocks_5yr/individual_stocks_5yr/AEE_data.csv' +data = np.loadtxt(DATA_FILE, str, delimiter=",") +``` + +`data`数据包含包含了`date`,`open`,`high`,`low`,`close`,`volume`,`Name`列 ,其中`open`,`high`,`low`,`close`,`volume`为数值列,可以选择数值列中的某一列或某几列进行概念漂移检测。 + +```python +data = data[1:, 2].astype('float64') # 选择第2列 +``` + +或 + +```python +data = data[1:, 2: 4].astype('float64') # 选择第2-4列 +``` + +为了方便样例使用,可以通过构造的方式获得数据,如下方代码所示。 + +```python +import numpy as np +data = 5*np.random.rand(1000) +data[200: 800] = 50*np.random.rand(600) +``` + +## 初始化概念漂移检测模块 + +导入概念漂移检测模块,并进行初始化,示例代码如下: + +```python +from mindarmour import ConceptDriftCheckTimeSeries + +concept = ConceptDriftCheckTimeSeries(window_size=100, rolling_window=10, step=10, threshold_index=1.5,need_label=False) +``` + +初始化参数含义: + +- `window_size(int)`:概念窗口。数值不小于10,如果给定输入数据`data`的长度, `window_size`范围在[10, 1/3*len(`data`)] 之间。一般,如果时序数据为周期性函数,`window_size`的大小可以选择2-5倍的周期长度。举例,`data`的长度为1000,周期为30,那么`window_size`的范围可以在[10, 333],考虑到数据周期性,`window_size`可以取值90。 +- `rolling_window(int)`:平滑窗口。数值大小 [1, `window_size`]。默认值:10。 +- `step(int)`: 窗口滑动步长。数值范围在 [1,`window_size`]之间。默认值:10。 +- `threshold_index(float)`:阈值系数。阈值系数越高,阈值越大。默认值: 1.5。 +- `need_label(bool)`:标签需求。False或True。如果为True, 表明需要概念漂移标签。如果为False, 则不需要概念漂移标签。默认值:False。 + +## 启动概念漂移检测 + +完成模块初始化后,调用概念漂移检测函数`concept_check`。 + +```python +drift_score, threshold, concept_drift_location = concept.concept_check(data) +``` + +返回值 + +- `drift_score(numpy.ndarray)`:概念漂移分数。针对输入`data`,获得其发生概念漂移的置信分数。分数越高,概念漂移的可能性越大。 +- `threshold(float)`:概念漂移阈值。根据`threshold_index(float)`计算获得的阈值大小。 +- `concept_drift_location(list)`:概念漂移发生位置。返回概念漂移发生的x轴对应位置,通常为某个x轴区域。 + +## 查看结果 + +当执行完concept.concept_check(data),会将执行结果保存为pdf,命名为"concept_drift_check.pdf"。 + +如下图所示: + +![概念漂移](./images/concept_drift_timeseries.JPG) + +**子图1**:用户输入的数据`data`。数据中发生概念漂移的位置用蓝色五星标出,红色虚线(竖直方向)表示概念漂移发生最明显的位置。 +**子图2**:概念漂移置信分数`drift_score`(针对子图1中的数据),分数越高,概念漂移的可能性越大。红色虚线表示判断概念漂移的阈值`threshold`,虚线之上的`drift_score`所对应的横轴位置,判定为发生概念漂移。`threshold`的大小可根据`threshold_index`进行调节。 diff --git a/tutorials/training/source_zh_cn/advanced_use/custom_debugging_info.md b/tutorials/training/source_zh_cn/advanced_use/custom_debugging_info.md index ca00b3782f4e9ddef7baf4ff8d003af3c22877a6..847f4669da3bea0409c692a4977551fff158002c 100644 --- a/tutorials/training/source_zh_cn/advanced_use/custom_debugging_info.md +++ b/tutorials/training/source_zh_cn/advanced_use/custom_debugging_info.md @@ -208,6 +208,8 @@ result = model.eval(ds_eval) 通过如下代码可以更清楚了解到`Accuracy`是如何运行的: ```python +from mindspore import Tensor +from mindspore.nn import Accuracy x = Tensor(np.array([[0.2, 0.5], [0.3, 0.1], [0.9, 0.6]])) y = Tensor(np.array([1, 0, 1])) metric = Accuracy() @@ -366,7 +368,8 @@ MindSpore采用glog来输出日志,常用的几个环境变量如下: 该环境变量指定日志输出的路径。 若`GLOG_logtostderr`的值为0,则必须设置此变量。 若指定了`GLOG_log_dir`且`GLOG_logtostderr`的值为1时,则日志输出到屏幕,不输出到文件。 - C++和Python的日志会被输出到不同的文件中,C++日志的文件名遵从`GLOG`日志文件的命名规则,这里是`mindspore.机器名.用户名.log.日志级别.时间戳.进程ID`,Python日志的文件名为`mindspore.log.进程ID`。 + C++和Python的日志会被输出到不同的文件中,C++日志的文件名遵从`GLOG`日志文件的命名规则,这里是`mindspore.机器名.用户名.log.日志级别.时间戳.进程ID`,Python日志的文件名为`mindspore.log.进程ID`。 + `GLOG_log_dir`只能包含大小写字母、数字、".", ":", "-", "_", "/", "\\"等字符。 - `MS_SUBMODULE_LOG_v` diff --git a/tutorials/training/source_zh_cn/advanced_use/custom_loss_function.md b/tutorials/training/source_zh_cn/advanced_use/custom_loss_function.md index 8541565b21986e21400fb7120af0d58b5e26b4ef..4673b26e071a57a8ab432c08db6188d8a065aa79 100644 --- a/tutorials/training/source_zh_cn/advanced_use/custom_loss_function.md +++ b/tutorials/training/source_zh_cn/advanced_use/custom_loss_function.md @@ -70,9 +70,9 @@ print(output) ```python import mindspore.ops as ops -from mindspore.nn import Loss +from mindspore.nn import LossBase -class L1Loss(Loss): +class L1Loss(LossBase): def __init__(self, reduction="mean"): super(L1Loss, self).__init__(reduction) self.abs = ops.Abs() @@ -178,7 +178,7 @@ import mindspore.nn as nn import mindspore.ops as ops from mindspore import Model from mindspore import dataset as ds -from mindspore.nn import Loss +from mindspore.nn import LossBase from mindspore.common.initializer import Normal from mindspore.train.callback import LossMonitor @@ -190,7 +190,7 @@ class LinearNet(nn.Cell): def construct(self, x): return self.fc(x) -class L1Loss(Loss): +class L1Loss(LossBase): def __init__(self, reduction="mean"): super(L1Loss, self).__init__(reduction) self.abs = ops.Abs() @@ -280,9 +280,9 @@ def create_multilabel_dataset(num_data, batch_size=16): ```python import mindspore.ops as ops -from mindspore.nn import Loss +from mindspore.nn import LossBase -class L1LossForMultiLabel(Loss): +class L1LossForMultiLabel(LossBase): def __init__(self, reduction="mean"): super(L1LossForMultiLabel, self).__init__(reduction) self.abs = ops.Abs() @@ -368,7 +368,7 @@ import mindspore.nn as nn import mindspore.ops as ops from mindspore import Model from mindspore import dataset as ds -from mindspore.nn import Loss +from mindspore.nn import LossBase from mindspore.common.initializer import Normal from mindspore.train.callback import LossMonitor @@ -380,7 +380,7 @@ class LinearNet(nn.Cell): def construct(self, x): return self.fc(x) -class L1LossForMultiLabel(Loss): +class L1LossForMultiLabel(LossBase): def __init__(self, reduction="mean"): super(L1LossForMultiLabel, self).__init__(reduction) self.abs = ops.Abs() diff --git a/tutorials/training/source_zh_cn/advanced_use/custom_operator_gpu.md b/tutorials/training/source_zh_cn/advanced_use/custom_operator_gpu.md index bd159ebb6b5596d616fd8f0e401012eb52c21029..1a67d6842c03ff5661d3161f70fbdd4d9557bd10 100644 --- a/tutorials/training/source_zh_cn/advanced_use/custom_operator_gpu.md +++ b/tutorials/training/source_zh_cn/advanced_use/custom_operator_gpu.md @@ -244,17 +244,77 @@ context.set_context(device_target='GPU') @pytest.mark.level0 @pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard -def test_TensroAdd(): +def test_TensorAdd(): x1 = Tensor(np.ones((3, 4), np.float32)) x2 = Tensor(np.ones((3, 4), np.float32)) y = ops.TensorAddV2()(x1, x2) print('result: ', y) ``` -通过`pytest -s tests/st/ops/gpu/test_tensoraddv2_op.py`命令执行后,可以看到结果符合预期: +通过`pytest -s tests/st/ops/gpu/test_tensoraddv2_op.py::test_TensorAdd`命令执行后,可以看到结果符合预期: ```text result: [[2. 2. 2. 2.] [2. 2. 2. 2.] [2. 2. 2. 2.]] ``` + +## 定义算子反向传播函数 + +如果算子要支持自动微分,需要在其原语中定义其反向传播函数(bprop)。你需要在bprop中描述利用正向输入、正向输出和输出梯度得到输入梯度的反向计算逻辑。反向计算逻辑可以使用内置算子或自定义反向算子构成。 + +定义算子反向传播函数时需注意以下几点: + +- bprop函数的入参顺序约定为正向的输入、正向的输出、输出梯度。若算子为多输出算子,正向输出和输出梯度将以元组的形式提供。 +- bprop函数的返回值形式约定为输入梯度组成的元组,元组中元素的顺序与正向输入参数顺序一致。即使只有一个输入梯度,返回值也要求是元组的形式。 + +例如,`TensorAddV2`的反向原语为: + +```python +import mindspore.ops as ops +@bprop_getters.register(ops.TensorAddV2) +def get_bprop_tensoraddv2(self): + """Generate bprop for TensorAddV2""" + + def bprop(x, y, out, dout): + return dout, dout + + return bprop +``` + +在`test_tensoraddv2_op.py`文件中定义反向用例。 + +```python +import mindspore.ops as ops +class Grad(nn.Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.grad = ops.GradOperation(sens_param=True) + self.network = network + + def construct(self, x1, x2, sens): + gout = self.grad(self.network)(x1, x2, sens) + return gout + +def test_grad_net(): + x1 = Tensor(np.ones((3, 4), np.float32)) + x2 = Tensor(np.ones((3, 4), np.float32)) + sens = Tensor(np.arange(3 * 4).reshape(3, 4).astype(np.float32)) + grad = Grad(Net()) + dx = grad(x1, x2, sense) + print("dx[0]: ", dx[0].asnumpy()) +``` + +执行用例: + +```bash +pytest -s tests/st/ops/gpu/test_tensoraddv2_op.py::test_grad_net +``` + +执行结果: + +```text +dx[0]: [[0. 1. 2. 3.] + [4. 5. 6. 7.] + [8. 9. 10. 11.]] +``` diff --git a/tutorials/training/source_zh_cn/advanced_use/cv_resnet50_second_order_optimizer.md b/tutorials/training/source_zh_cn/advanced_use/cv_resnet50_second_order_optimizer.md index a459df4aafae10a7b981580b14cc8e8c45d854e9..8e9a6c3e0c8c56adb8c92bbe66e9b7858a4bc9c5 100644 --- a/tutorials/training/source_zh_cn/advanced_use/cv_resnet50_second_order_optimizer.md +++ b/tutorials/training/source_zh_cn/advanced_use/cv_resnet50_second_order_optimizer.md @@ -39,32 +39,26 @@ MindSpore开发团队在现有的自然梯度算法的基础上,对FIM矩阵 本篇教程将主要介绍如何在Ascend 910 以及GPU上,使用MindSpore提供的二阶优化器THOR训练ResNet50-v1.5网络和ImageNet数据集。 > 你可以在这里下载完整的示例代码: - 。 + 。 示例代码目录结构 ```text -├── resnet_thor +├── resnet ├── README.md ├── scripts ├── run_distribute_train.sh # launch distributed training for Ascend 910 - └── run_eval.sh # launch inference for Ascend 910 + ├── run_eval.sh # launch inference for Ascend 910 ├── run_distribute_train_gpu.sh # launch distributed training for GPU - └── run_eval_gpu.sh # launch inference for GPU + ├── run_eval_gpu.sh # launch inference for GPU ├── src - ├── crossentropy.py # CrossEntropy loss function ├── config.py # parameter configuration - ├── dataset_helper.py # dataset helper for minddata dataset - ├── grad_reducer_thor.py # grad reduce for thor - ├── model_thor.py # model for train - ├── resnet_thor.py # resnet50_thor backone - ├── thor.py # thor optimizer - ├── thor_layer.py # thor layer - └── dataset.py # data preprocessing + ├── dataset.py # data preprocessing + ├── CrossEntropySmooth.py # CrossEntropy loss function + ├── lr_generator.py # generate learning rate for every step + ├── resnet.py # ResNet50 backbone ├── eval.py # infer script ├── train.py # train script - ├── export.py # export checkpoint file into air file - └── mindspore_hub_conf.py # config file for mindspore hub repository ``` @@ -123,21 +117,40 @@ import mindspore.dataset.vision.c_transforms as C import mindspore.dataset.transforms.c_transforms as C2 from mindspore.communication.management import init, get_rank, get_group_size -def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"): + +def create_dataset2(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend", distribute=False, + enable_cache=False, cache_session_id=None): + """ + Create a training or evaluation ImageNet2012 dataset for ResNet50. + + Args: + dataset_path(string): the path of dataset. + do_train(bool): whether the dataset is used for training or evaluation. + repeat_num(int): the repeat times of dataset. Default: 1 + batch_size(int): the batch size of dataset. Default: 32 + target(str): the device target. Default: Ascend + distribute(bool): data for distribute or not. Default: False + enable_cache(bool): whether tensor caching service is used for evaluation. Default: False + cache_session_id(int): if enable_cache is set, cache session_id need to be provided. Default: None + + Returns: + dataset + """ if target == "Ascend": device_num, rank_id = _get_rank_info() - num_parallels = 8 else: - init() - rank_id = get_rank() - device_num = get_group_size() - num_parallels = 4 + if distribute: + init() + rank_id = get_rank() + device_num = get_group_size() + else: + device_num = 1 if device_num == 1: - data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=num_parallels, shuffle=True) + data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True) else: - data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=num_parallels, shuffle=True, - num_shards=device_num, shard_id=rank_id) + data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True, + num_shards=device_num, shard_id=rank_id) image_size = 224 mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] @@ -162,8 +175,18 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target=" type_cast_op = C2.TypeCast(mstype.int32) - data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=num_parallels) - data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=num_parallels) + data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=8) + # only enable cache for eval + if do_train: + enable_cache = False + if enable_cache: + if not cache_session_id: + raise ValueError("A cache session_id must be provided to use cache.") + eval_cache = ds.DatasetCache(session_id=int(cache_session_id), size=0) + data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8, + cache=eval_cache) + else: + data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) # apply batch operations data_set = data_set.batch(batch_size, drop_remainder=True) @@ -178,25 +201,18 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target=" ## 定义网络 -本示例中使用的网络模型为ResNet50-v1.5,先定义[ResNet50网络](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/resnet/src/resnet.py),然后使用二阶优化器自定义的算子替换`Conv2d`和 -和`Dense`算子。定义好的网络模型在在源码`src/resnet_thor.py`脚本中,自定义的算子`Conv2d_thor`和`Dense_thor`在`src/thor_layer.py`脚本中。 - -- 使用`Conv2d_thor`替换原网络模型中的`Conv2d` -- 使用`Dense_thor`替换原网络模型中的`Dense` - -> 使用THOR自定义的算子`Conv2d_thor`和`Dense_thor`是为了保存模型训练中的二阶矩阵信息,新定义的网络与原网络模型的backbone一致。 +本示例中使用的网络模型为ResNet50-v1.5,定义[ResNet50网络](https://gitee.com/mindspore/mindspore/blob/master/model_zoo/official/cv/resnet/src/resnet.py)。 网络构建完成以后,在`__main__`函数中调用定义好的ResNet50: ```python ... -from src.resnet_thor import resnet50 +from src.resnet import resnet50 as resnet ... if __name__ == "__main__": ... - # define the net - net = resnet50(class_num=config.class_num, damping=damping, loss_scale=config.loss_scale, - frequency=config.frequency, batch_size=config.batch_size) + # define net + net = resnet(class_num=config.class_num) ... ``` @@ -206,23 +222,23 @@ if __name__ == "__main__": MindSpore支持的损失函数有`SoftmaxCrossEntropyWithLogits`、`L1Loss`、`MSELoss`等。THOR优化器需要使用`SoftmaxCrossEntropyWithLogits`损失函数。 -损失函数的实现步骤在`src/crossentropy.py`脚本中。这里使用了深度网络模型训练中的一个常用trick:label smoothing,通过对真实标签做平滑处理,提高模型对分类错误标签的容忍度,从而可以增加模型的泛化能力。 +损失函数的实现步骤在`src/CrossEntropySmooth.py`脚本中。这里使用了深度网络模型训练中的一个常用trick:label smoothing,通过对真实标签做平滑处理,提高模型对分类错误标签的容忍度,从而可以增加模型的泛化能力。 ```python -class CrossEntropy(Loss): +class CrossEntropySmooth(LossBase): """CrossEntropy""" - def __init__(self, smooth_factor=0., num_classes=1000): - super(CrossEntropy, self).__init__() + def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000): + super(CrossEntropySmooth, self).__init__() self.onehot = ops.OneHot() + self.sparse = sparse self.on_value = Tensor(1.0 - smooth_factor, mstype.float32) self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32) - self.ce = nn.SoftmaxCrossEntropyWithLogits() - self.mean = ops.ReduceMean(False) + self.ce = nn.SoftmaxCrossEntropyWithLogits(reduction=reduction) def construct(self, logit, label): - one_hot_label = self.onehot(label, ops.shape(logit)[1], self.on_value, self.off_value) - loss = self.ce(logit, one_hot_label) - loss = self.mean(loss, 0) + if self.sparse: + label = self.onehot(label, ops.shape(logit)[1], self.on_value, self.off_value) + loss = self.ce(logit, label) return loss ``` @@ -230,14 +246,15 @@ class CrossEntropy(Loss): ```python ... -from src.crossentropy import CrossEntropy +from src.CrossEntropySmooth import CrossEntropySmooth ... if __name__ == "__main__": ... # define the loss function if not config.use_label_smooth: config.label_smooth_factor = 0.0 - loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) + loss = CrossEntropySmooth(sparse=True, reduction="mean", + smooth_factor=config.label_smooth_factor, num_classes=config.class_num) ... ``` @@ -255,27 +272,31 @@ $$ \theta^{t+1} = \theta^t + \alpha F^{-1}\nabla E$$ - $F^{-1}$:FIM矩阵,在网络中计算获得; - $\nabla E$:一阶梯度值。 -从参数更新公式中可以看出,THOR优化器需要额外计算的是每一层的FIM矩阵,每一层的FIM矩阵就是之前在自定义的网络模型中计算获得的。FIM矩阵可以对每一层参数更新的步长和方向进行自适应的调整,加速收敛的同时可以降低调参的复杂度。 +从参数更新公式中可以看出,THOR优化器需要额外计算的是每一层的FIM矩阵。FIM矩阵可以对每一层参数更新的步长和方向进行自适应的调整,加速收敛的同时可以降低调参的复杂度。 + +更多THOR优化器的介绍请参考:[THOR论文](https://www.aaai.org/AAAI21Papers/AAAI-6611.ChenM.pdf) + +在调用MindSpore封装的二阶优化器THOR时,优化器会自动调用转换接口,把之前定义好的ResNet50网络中的Conv2d层和Dense层分别转换成对应的[Conv2dThor](https://gitee.com/mindspore/mindspore/blob/master/mindspore/nn/layer/thor_layer.py)和[DenseThor](https://gitee.com/mindspore/mindspore/blob/master/mindspore/nn/layer/thor_layer.py)。 +而在Conv2dThor和DenseThor中可以完成二阶信息矩阵的计算和存储。 + +> THOR优化器转换前后的网络backbone一致,网络参数保持不变。 + +在训练主脚本中调用THOR优化器: ```python ... -if args_opt.device_target == "Ascend": - from src.thor import THOR -else: - from src.thor import THOR_GPU as THOR +from mindspore.nn.optim import thor ... - if __name__ == "__main__": ... - # learning rate setting - lr = get_model_lr(0, config.lr_init, config.lr_decay, config.lr_end_epoch, step_size, decay_epochs=39) + # learning rate setting and damping setting + from src.lr_generator import get_thor_lr, get_thor_damping + lr = get_thor_lr(0, config.lr_init, config.lr_decay, config.lr_end_epoch, step_size, decay_epochs=39) + damping = get_thor_damping(0, config.damping_init, config.damping_decay, 70, step_size) # define the optimizer - opt = THOR(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), config.momentum, - filter(lambda x: 'matrix_A' in x.name, net.get_parameters()), - filter(lambda x: 'matrix_G' in x.name, net.get_parameters()), - filter(lambda x: 'A_inv_max' in x.name, net.get_parameters()), - filter(lambda x: 'G_inv_max' in x.name, net.get_parameters()), - config.weight_decay, config.loss_scale) + split_indices = [26, 53] + opt = thor(net, Tensor(lr), Tensor(damping), config.momentum, config.weight_decay, config.loss_scale, + config.batch_size, split_indices=split_indices, frequency=config.frequency) ... ``` @@ -289,7 +310,7 @@ MindSpore提供了callback机制,可以在训练过程中执行自定义逻辑 ```python ... -from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, TimeMonitor, LossMonitor +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor ... if __name__ == "__main__": ... @@ -307,23 +328,25 @@ if __name__ == "__main__": ### 配置训练网络 -通过MindSpore提供的`model.train`接口可以方便地进行网络的训练。THOR优化器通过降低二阶矩阵更新频率,来减少计算量,提升计算速度,故重新定义一个Model_Thor类,继承MindSpore提供的Model类。在Model_Thor类中增加二阶矩阵更新频率控制参数,用户可以通过调整该参数,优化整体的性能。 +通过MindSpore提供的`model.train`接口可以方便地进行网络的训练。THOR优化器通过降低二阶矩阵更新频率,来减少计算量,提升计算速度,故重新定义一个[ModelThor](https://gitee.com/mindspore/mindspore/blob/master/mindspore/train/train_thor/model_thor.py)类,继承MindSpore提供的Model类。在ModelThor类中获取THOR的二阶矩阵更新频率控制参数,用户可以通过调整该参数,优化整体的性能。 +MindSpore提供Model类向ModelThor类的一键转换接口。 ```python ... from mindspore import FixedLossScaleManager -from src.model_thor import Model_Thor as Model +from mindspore import Model +from mindspore.train.train_thor import ConvertModelUtils ... if __name__ == "__main__": ... loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) - if target == "Ascend": - model = Model(net, loss_fn=loss, optimizer=opt, amp_level='O2', loss_scale_manager=loss_scale, - keep_batchnorm_fp32=False, metrics={'acc'}, frequency=config.frequency) - else: - model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}, - amp_level="O2", keep_batchnorm_fp32=True, frequency=config.frequency) + model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics=metrics, + amp_level="O2", keep_batchnorm_fp32=False, eval_network=dist_eval_network) + if cfg.optimizer == "Thor": + model = ConvertModelUtils().convert_to_thor_model(model=model, network=net, loss_fn=loss, optimizer=opt, + loss_scale_manager=loss_scale, metrics={'acc'}, + amp_level="O2", keep_batchnorm_fp32=False) ... ``` @@ -333,19 +356,20 @@ if __name__ == "__main__": #### Ascend 910 -目前MindSpore分布式在Ascend上执行采用单卡单进程运行方式,即每张卡上运行1个进程,进程数量与使用的卡的数量一致。进程均放在后台执行,每个进程创建1个目录,目录名称为`train_parallel`+ `device_id`,用来保存日志信息,算子编译信息以及训练的checkpoint文件。下面以使用8张卡的分布式训练脚本为例,演示如何运行脚本: +目前MindSpore分布式在Ascend上执行采用单卡单进程运行方式,即每张卡上运行1个进程,进程数量与使用的卡的数量一致。进程均放在后台执行,每个进程创建1个目录,目录名称为`train_parallel`+ `device_id`,用来保存日志信息,算子编译信息以及训练的checkpoint文件。下面以使用8张卡的分布式训练脚本为例,演示如何运行脚本。 -使用以下命令运行脚本: +首先在`src/config.py`中将优化器配置为'Thor',然后使用以下命令运行脚本: ```bash -sh run_distribute_train.sh +bash run_distribute_train.sh ``` -脚本需要传入变量`RANK_TABLE_FILE`、`DATASET_PATH`和`DEVICE_NUM`,其中: +脚本需要传入变量`resnet50`、`imagenet2012`、`RANK_TABLE_FILE`和`DATASET_PATH`,其中: +- `resnet50`:训练的网络为ResNet50。 +- `imagenet2012`:训练使用的数据集为ImageNet2012数据集。 - `RANK_TABLE_FILE`:组网信息文件的路径。(rank table文件的生成,参考[HCCL_TOOL](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools)) - `DATASET_PATH`:训练数据集路径。 -- `DEVICE_NUM`:实际的运行卡数。 其余环境变量请参考安装教程中的配置项。 @@ -369,11 +393,12 @@ epoch: 42 step: 5004, loss is 1.6453942 ```text └─train_parallel0 - ├─resnet-1_5004.ckpt - ├─resnet-2_5004.ckpt - │ ...... - ├─resnet-42_5004.ckpt - │ ...... + ├─ckpt_0 + ├─resnet-1_5004.ckpt + ├─resnet-2_5004.ckpt + │ ...... + ├─resnet-42_5004.ckpt + │ ...... ``` 其中, @@ -381,16 +406,19 @@ epoch: 42 step: 5004, loss is 1.6453942 #### GPU -在GPU硬件平台上,MindSpore采用OpenMPI的`mpirun`进行分布式训练,进程创建1个目录,目录名称为`train_parallel`,用来保存日志信息和训练的checkpoint文件。下面以使用8张卡的分布式训练脚本为例,演示如何运行脚本: +在GPU硬件平台上,MindSpore采用OpenMPI的`mpirun`进行分布式训练,进程创建1个目录,目录名称为`train_parallel`,用来保存日志信息和训练的checkpoint文件。下面以使用8张卡的分布式训练脚本为例,演示如何运行脚本。 + +首先在`src/config.py`中将优化器配置为'Thor',然后使用以下命令运行脚本: ```bash -sh run_distribute_train_gpu.sh +bash run_distribute_train_gpu.sh ``` -脚本需要传入变量`DATASET_PATH`和`DEVICE_NUM`,其中: +脚本需要传入变量`resnet50`、`imagenet2012`和`DATASET_PATH`,其中: +- `resnet50`:训练的网络为ResNet50。 +- `imagenet2012`:训练使用的数据集为ImageNet2012数据集。 - `DATASET_PATH`:训练数据集路径。 -- `DEVICE_NUM`:实际的运行卡数。 在GPU训练时,无需设置`DEVICE_ID`环境变量,因此在主训练脚本中不需要调用`int(os.getenv('DEVICE_ID'))`来获取卡的物理序号,同时`context`中也无需传入`device_id`。我们需要将device_target设置为GPU,并需要调用`init()`来使能NCCL。 @@ -447,23 +475,28 @@ if __name__ == "__main__": ... # define net net = resnet(class_num=config.class_num) - net.add_flags_recursive(thor=False) # load checkpoint param_dict = load_checkpoint(args_opt.checkpoint_path) - keys = list(param_dict.keys()) - for key in keys: - if "damping" in key: - param_dict.pop(key) load_param_into_net(net, param_dict) net.set_train(False) + # define loss + if args_opt.dataset == "imagenet2012": + if not config.use_label_smooth: + config.label_smooth_factor = 0.0 + loss = CrossEntropySmooth(sparse=True, reduction='mean', + smooth_factor=config.label_smooth_factor, num_classes=config.class_num) + else: + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') + # define model model = Model(net, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'}) # eval model res = model.eval(dataset) print("result:", res, "ckpt=", args_opt.checkpoint_path) + ... ``` ### 执行推理 @@ -475,11 +508,13 @@ if __name__ == "__main__": 在Ascend 910硬件平台上,推理的执行命令如下: ```bash -sh run_eval.sh +bash run_eval.sh ``` -脚本需要传入变量`DATASET_PATH`和`CHECKPOINT_PATH`,其中: +脚本需要传入变量`resnet50`、`imagenet2012`、`DATASET_PATH`和`CHECKPOINT_PATH`,其中: +- `resnet50`: 推理的网络为ResNet50. +- `imagenet2012`: 推理使用的数据集为ImageNet2012。 - `DATASET_PATH`:推理数据集路径。 - `CHECKPOINT_PATH`:保存的checkpoint路径。 @@ -497,11 +532,13 @@ result: {'top_5_accuracy': 0.9295574583866837, 'top_1_accuracy': 0.7614436619718 在GPU硬件平台上,推理的执行命令如下: ```bash -sh run_eval_gpu.sh + bash run_eval_gpu.sh ``` -脚本需要传入变量`DATASET_PATH`和`CHECKPOINT_PATH`,其中: +脚本需要传入变量`resnet50`、`imagenet2012`、`DATASET_PATH`和`CHECKPOINT_PATH`,其中: +- `resnet50`: 推理的网络为ResNet50. +- `imagenet2012`: 推理使用的数据集为ImageNet2012。 - `DATASET_PATH`:推理数据集路径。 - `CHECKPOINT_PATH`:保存的checkpoint路径。 diff --git a/tutorials/training/source_zh_cn/advanced_use/debugger.rst b/tutorials/training/source_zh_cn/advanced_use/debugger.rst new file mode 100644 index 0000000000000000000000000000000000000000..bf2c1f3f80ec577de996dfd1c7a61bae365b53ea --- /dev/null +++ b/tutorials/training/source_zh_cn/advanced_use/debugger.rst @@ -0,0 +1,10 @@ +调试器 +================================== + +MindSpore调试器是为图模式训练提供的调试工具,可以用来查看并分析计算图节点的中间结果。 + +.. toctree:: + :maxdepth: 1 + + debugger_online + debugger_offline diff --git a/tutorials/training/source_zh_cn/advanced_use/debugger_offline.md b/tutorials/training/source_zh_cn/advanced_use/debugger_offline.md new file mode 100755 index 0000000000000000000000000000000000000000..ab0455965210d4af95ae204707f34ba48c19ee46 --- /dev/null +++ b/tutorials/training/source_zh_cn/advanced_use/debugger_offline.md @@ -0,0 +1,104 @@ +# 使用离线调试器 + +`Linux` `Ascend` `GPU` `模型调优` `中级` `高级` + + + +- [使用离线调试器](#使用离线调试器) + - [概述](#概述) + - [操作流程](#操作流程) + - [离线调试器环境准备](#离线调试器环境准备) + - [离线调试器页面介绍](#离线调试器页面介绍) + - [使用离线调试器进行调试](#使用离线调试器进行调试) + - [注意事项](#注意事项) + + + + + +## 概述 + +MindSpore离线调试器是基于训练的Dump数据进行可视化调试,可以用来查看并分析计算图节点的中间结果。 + +离线调试器支持对接离线Dump数据,进行可视化分析。离线调试器解决不开启内存复用的情况下,在线调试器不支持的问题。 + +## 操作流程 + +1. 准备Dump数据。Dump的使用方式详见[使用Dump功能在Graph模式调试](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/dump_in_graph_mode.html) ; +2. 启动MindInsight,指定summary-base-dir为dump配置中的{path}路径的上一层或上两层; +3. 从训练列表中找到离线调试器入口,点击“离线调试器”,进入调试器页面,开始进行调试分析。 + +## 离线调试器环境准备 + +使用MindSpore的Dump功能准备离线数据。Dump的使用方式详见[使用Dump功能在Graph模式调试](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/dump_in_graph_mode.html) 。 + +然后,启动MindInsight,指定summary-base-dir为dump配置中的{path}路径的上一层或上两层,即可在UI页面中查询到离线调试器的入口。 + +MindInsight启动命令: + +```text +mindinsight start --port {PORT} --summary-base-dir /path/to/father/directory/of/dump_dir +``` + +或者: + +```text +mindinsight start --port {PORT} --summary-base-dir /path/to/grandfher/directory/of/dump_dir +``` + +参数含义如下: + +|参数名|属性|功能描述|参数类型|默认值|取值范围| +|---|---|---|---|---|---| +|`--port {PORT}`|可选|指定Web可视化服务端口。|Integer|8080|1~65535| +|`--summary-base-dir /path/to`|必选|mp配置中的{path}路径的上一层或上两层。例如,Dump配置文件中的path为“/home/workspace/data/dump_dir”,summary-base-dir可以设置为“/home/workspace/data”或“/home/workspace”。|String|./|-| + +更多启动参数请参考[MindInsight相关命令](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/mindinsight_commands.html)。 + +然后,打开MindInsight页面,从离线调试器入口进入调试器界面。 + +![debugger_offline_entry](images/debugger_offline_entry.png) + +图1: 离线调试器入口 + +## 离线调试器页面介绍 + +离线调试器界面与在线调试器相同。在线调试器的页面介绍详见[调试器页面介绍](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/debugger.html#id7) 。 + +## 使用离线调试器进行调试 + +1. 在调试器环境准备完成后,打开调试器界面,如下图所示: + + ![debugger_offline_waiting](images/debugger_offline_waiting.png) + + 图2: 调试器等待训练连接 + + 此时,调试器处于加载离线数据的状态。 + +2. 稍等片刻,在MindInsight UI上可以看到弹窗,提示选择是否使用推荐监测点,接下来的使用步骤与在线调试相同。[使用调试器进行调试](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/debugger.html#id18) 。 + +3. 与在线调试器相比,离线调试器可以重置训练轮次。如图3所示,点击右边的编辑图标,就会出现一个编辑框,如图4所示,输入需要重置的轮次,点击对钩符号即可。。 + + ![debugger_offline_reset](images/debugger_offline_reset.png) + + 图3: 重置训练轮次 + + ![debugger_offline_edit](images/debugger_offline_edit.png) + + 图4:重置训练轮次编辑状态 + +## 注意事项 + +- 场景支持: + - 离线调试器暂不支持CPU场景。 + - 离线调试器支持单机多卡场景。若要分析多机多卡的场景。需要自行把多机数据汇总到一起。 + - 离线调试器暂不支持初始权重的检查。 + +- GPU场景: + - 与在线调试器不同,离线调试器不支持逐节点执行。 + +- 使用离线调试器时要保证MindInsight和MindSpore的版本号相同。 +- 如果同一路径下存在多个相同张量的Dump文件,离线调试器只会显示最新的张量。 +- 重新检查只检查当前有张量值的监测点。 +- 调试器展示的图是优化后的最终执行图。调用的算子可能已经与其它算子融合,或者在优化后改变了名称。 +- 如果使用Ascend场景下的异步Dump数据,可以使用MindInsight的数据解析工具DumpParser的`convert_all_data_to_host`接口将异步Dump数据转换为`.npy`文件,从而提高数据分析效率。DumpParser的使用方式详见[DumpParser介绍](https://gitee.com/mindspore/mindinsight/tree/master/mindinsight/parser) 。 \ No newline at end of file diff --git a/tutorials/training/source_zh_cn/advanced_use/debugger.md b/tutorials/training/source_zh_cn/advanced_use/debugger_online.md similarity index 85% rename from tutorials/training/source_zh_cn/advanced_use/debugger.md rename to tutorials/training/source_zh_cn/advanced_use/debugger_online.md index e0136e570ce29bfa133132a4f0957b321ae017d5..b9d30504cc4aa2d6f5bd6e4281af1996f95e1498 100755 --- a/tutorials/training/source_zh_cn/advanced_use/debugger.md +++ b/tutorials/training/source_zh_cn/advanced_use/debugger_online.md @@ -22,17 +22,16 @@ - + ## 概述 -MindSpore调试器是为图模式训练提供的调试工具,可以用来查看并分析计算图节点的中间结果。 - 在MindSpore图模式的训练过程中,用户无法从Python层获取到计算图中间节点的结果,使得训练调试变得很困难。使用MindSpore调试器,用户可以: - 在MindInsight调试器界面结合计算图,查看图节点的输出结果; - 设置监测点,监测训练异常情况(比如检查张量溢出),在异常发生时追踪错误原因; - 查看权重等参数的变化情况。 +- 查看图节点和源代码的对应关系。 ## 操作流程 @@ -43,6 +42,8 @@ MindSpore调试器是为图模式训练提供的调试工具,可以用来查 ## 调试器环境准备 +### 以调试模式启动MindInsight + 开始训练前,请先安装MindInsight,并以调试模式启动。调试模式下,MindSpore会将训练信息发送给MindInsight调试服务,用户可在MindInsight调试器界面进行查看和分析。 MindInsight调试服务启动命令: @@ -61,7 +62,9 @@ mindinsight start --port {PORT} --enable-debugger True --debugger-port {DEBUGGER 更多启动参数请参考[MindInsight相关命令](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/mindinsight_commands.html)。 -然后,设置环境变量`export ENABLE_MS_DEBUGGER=1`或`export ENABLE_MS_DEBUGGER=True`,将训练指定为调试模式,并设置训练要连接的调试服务和端口: +### 以调试模式运行训练脚本 + +以调试模式运行训练脚本,需要设置环境变量`export ENABLE_MS_DEBUGGER=1`或`export ENABLE_MS_DEBUGGER=True`,将训练指定为调试模式,并设置训练要连接的调试服务和端口: `export MS_DEBUGGER_HOST=127.0.0.1`(该服务地址需与MindInsight host一致); `export MS_DEBUGGER_PORT=50051`(该端口需与MindInsight debugger-port一致)。 @@ -69,11 +72,11 @@ mindinsight start --port {PORT} --enable-debugger True --debugger-port {DEBUGGER 此外,训练时不要使用数据下沉模式(需设置`model.train`中的`dataset_sink_mode`为`False`),以保证调试器可以获取每个轮次的训练信息。 -调试器环境准备完成后,运行训练脚本。 +环境变量和训练脚本准备完成后,运行训练脚本。 ## 调试器页面介绍 -训练连接成功后,可以在MindInsight调试器界面查看计算图等训练元信息,调试器页面布局由计算图、节点列表、节点信息、监测点列表、监测点命中列表等部分组成。 +训练连接成功后,可以在MindInsight调试器界面查看计算图等训练元信息,调试器页面布局由计算图、节点列表、节点信息、监测点列表、监测点命中列表、堆栈列表、堆栈信息等部分组成。 ![debugger_init_page](images/debugger_init_page.png) @@ -98,7 +101,7 @@ mindinsight start --port {PORT} --enable-debugger True --debugger-port {DEBUGGER ### 节点信息 -点击计算图上的节点后,可以在UI下方查看该节点的详细信息,如图2所示。该部分展示了节点的输出和输入,训练的`轮次`数目,`张量`的`类型`、`形状`和`数值`等信息。 +点击计算图上的节点后,可以在UI下方查看该节点的详细信息,如图2所示。该部分展示了节点的输出和输入,训练的`轮次`数目,`张量`的`类型`、`形状`和`数值`等信息。点击`数值`信息里的`下载`,可以将该张量数据下载为.npy文件,默认在download文件夹下。 在GPU环境下,选中图上的某个可执行节点后,单击鼠标右键,可选择`运行到该节点`,代表将训练脚本运行到被选中节点(不超过一个`轮次`)。 @@ -158,6 +161,28 @@ mindinsight start --port {PORT} --enable-debugger True --debugger-port {DEBUGGER 图5展示了监测点触发后的展示页面,该页面和`节点列表`所在位置相同。触发的节点以及监控条件会按照节点的执行序排列,触发的监控条件上会显示该条件的设置值以及触发该条件的实际值。 另外,用户点击某一行,会在计算图中跳转到对应节点,可以进一步查看节点信息分析异常结果出现的原因。点击`查看`进入张量检查视图可以查看触发的监测点信息以及调优向导,如图6所示。 +### 堆栈列表 + +通过调试器页面左上方的切换按钮,可以从`节点列表`或`命中的监测点`切换到`堆栈列表`。 + +在调试器的堆栈信息列表页,可以看到所有的堆栈信息列表,搜索框输入关键字,可以显示匹配的堆栈信息列表。列表分页显示。点击底部的页码,可以快速跳转到对应页。 + +点击列表中的某一项,可以自动跳转到节点列表,在节点列表可以看到与这一行代码相关的节点。 + +![debugger_stack_list](images/debugger_stack_list.png) + +图6: 堆栈列表 + +### 堆栈信息 + +在图上定位到某一个节点时,点击计算图下方的`堆栈信息`标签,会看到该节点对应的堆栈信息。 + +在堆栈信息标签下,点击某一行的搜索,可以搜索与这一行相关的所有节点,搜索结果会自动展示在节点列表中。 + +![debugger_stack_info](images/debugger_stack_info.png) + +图7: 堆栈信息 + ### 重新检查 为了更详细地对节点进行监测分析,用户可以在修改监测点的节点,添加删除监测点后对当前轮次重新检查。`重新检查`按钮位于监测点列表右上角,如图3所示。 @@ -175,23 +200,25 @@ mindinsight start --port {PORT} --enable-debugger True --debugger-port {DEBUGGER ![debugger_tensor_view](images/debugger_tensor_view.png) -图6: 查看`张量`值 +图8: 查看张量值 一些`张量`的维度过多,无法直接在主页进行展示。用户可以点击对应的`查看`按钮,在弹出的张量检查视图中查看`张量`值的详细信息。 -如图6所示,张量检查视图将`张量`值展示在UI的中上位置,用户可以进行`维度选择`,点击`显示当前step`,`显示上一step`和`显示对比结果`对张量进行显示和对比(当前仅支持参数节点与上一轮次对比)。此外,用户可以设置切片进行`维度选择`来显示相应维度的`张量`。 +如图8所示,张量检查视图将`张量`值展示在UI的中上位置,用户可以进行`维度选择`,点击`显示当前step`,`显示上一step`和`显示对比结果`对张量进行显示和对比(当前仅支持参数节点与上一轮次对比)。此外,用户可以设置切片进行`维度选择`来显示相应维度的`张量`。 视图的最上方展示了`节点信息`、`当前轮次`以及`统计信息`;视图的左侧展示了调优向导,当监测点命中时,将显示命中信息和相关的调优建议;视图的下方展示了张量关系图以及详细的`节点信息`。 通过张量关系图,可以分析当前张量是通过哪些张量计算出来的,还可以分析当前张量影响到了哪些常量。张量图中标注了命中监测点的条件的缩写,方便用户快速识别张量问题的传播路径。每个条件的缩写可以在“设置监测点”一节中查到。 +张量检查视图也提供了下载功能,用户可以将需要的张量下载,进行深入的处理分析。 + ## 使用调试器进行调试 1. 在调试器环境准备完成后,打开调试器界面,如下图所示: ![debugger_waiting](images/debugger_waiting.png) - 图7: 调试器等待训练连接 + 图9: 调试器等待训练连接 此时,调试器处于等待训练启动和连接的状态。 @@ -201,7 +228,7 @@ mindinsight start --port {PORT} --enable-debugger True --debugger-port {DEBUGGER ![debugger_ask_recommend](images/debugger_ask_recommend.png) - 图8: 等待用户选择是否使用推荐监测点 + 图10: 等待用户选择是否使用推荐监测点 4. 稍后可以看到计算图显示在调试器界面,见图1。 @@ -212,7 +239,7 @@ mindinsight start --port {PORT} --enable-debugger True --debugger-port {DEBUGGER 6. 监测点触发,见图5。 - 监测点触发后,用户查看对应的节点信息,通过张量检查视图找出异常原因,修改脚本,修复问题。 + 监测点触发后,用户查看对应的节点信息和堆栈信息,通过张量检查视图找出异常原因,或者下载张量以后再通过离线分析来找出异常原因,修改脚本,修复问题。 ## 注意事项 @@ -229,8 +256,8 @@ mindinsight start --port {PORT} --enable-debugger True --debugger-port {DEBUGGER - GPU场景: - 在GPU场景下,只有满足条件的参数节点可以与自身的上一轮次结果作对比:使用`下一个节点`执行过的节点、使用`运行到该节点`时选中的节点、作为`监测点`输入的参数节点。其他情况均无法使用`上一轮次对比`功能。 - - 由于GPU上一个轮次是一个子图(而非完整的图),GPU上多图做重新检查时,只能重新检查当前的子图。 +- 使用调试器时要保证MindInsight和MindSpore的版本号相同。 - 重新检查只检查当前有张量值的监测点。 - 检查计算过程溢出需要用户开启异步Dump的全部溢出检测功能,开启方式请参照[异步Dump功能介绍](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/custom_debugging_info.html#id5) - 调试器展示的图是优化后的最终执行图。调用的算子可能已经与其它算子融合,或者在优化后改变了名称。 diff --git a/tutorials/training/source_zh_cn/advanced_use/distributed_training_ascend.md b/tutorials/training/source_zh_cn/advanced_use/distributed_training_ascend.md index e57537adf2a3821c64a36da12e420b6907a19db1..7c76f1158ac3c30652e049f85e2ac401d52e0900 100644 --- a/tutorials/training/source_zh_cn/advanced_use/distributed_training_ascend.md +++ b/tutorials/training/source_zh_cn/advanced_use/distributed_training_ascend.md @@ -555,9 +555,9 @@ ckpt_config = CheckpointConfig(keep_checkpoint_max=1, integrated_save=False) ## 多机多卡训练 -前面的章节,对MindSpore的分布式训练进行了介绍,都是基于单机8卡的Ascend环境,使用多机进行分布式训练,可以更大地提升训练速度。 +前面的章节,对MindSpore的分布式训练进行了介绍,都是基于单机多卡的Ascend环境,使用多机进行分布式训练,可以更大地提升训练速度。 在Ascend环境下,跨机器的NPU单元的通信与单机内各个NPU单元的通信一样,依旧是通过HCCL进行通信,区别在于,单机内的NPU单元天然的是互通的,而跨机器的则需要保证两台机器的网络是互通的。 -在确认了机器之间的NPU单元的网络是通畅后,配置多机的json配置文件,本教程以16卡的配置文件为例。需要注意的是,在多机的json文件配置中,要求rank_id的排序,与server_id的字典序一致。 +在确认了机器之间的NPU单元的网络是通畅后,配置多机的json配置文件,本教程以16卡的配置文件为例,详细的配置文件说明可以参照本教程单机多卡部分的介绍。需要注意的是,在多机的json文件配置中,要求rank_id的排序,与server_id的字典序一致。 ```json { @@ -595,16 +595,17 @@ ckpt_config = CheckpointConfig(keep_checkpoint_max=1, integrated_save=False) } ``` -准备好配置文件后,可以进行分布式多机训练脚本的组织,在以2机16卡为例,两台机器上编写的脚本与单机8卡的运行脚本类似,区别在于指定不同的rank_id变量。 +准备好配置文件后,可以进行分布式多机训练脚本的组织,在以2机16卡为例,两台机器上编写的脚本与单机多卡的运行脚本类似,区别在于指定不同的rank_id变量。 ```bash #!/bin/bash echo "==============================================================================================================" echo "Please run the script as: " -echo "bash run.sh DATA_PATH RANK_TABLE_FILE RANK_SIZE RANK_START" -echo "For example: bash run.sh /path/dataset /path/rank_table.json 16 0" +echo "bash run_cluster.sh DATA_PATH RANK_TABLE_FILE RANK_SIZE RANK_START" +echo "For example: bash run_cluster.sh /path/dataset /path/rank_table.json 16 0" echo "It is better to use the absolute path." +echo "The time interval between multiple machines to execute the script should not exceed 120s" echo "==============================================================================================================" execute_path=$(pwd) @@ -629,7 +630,7 @@ do done ``` -上面列出的参考脚本,所要求的代码组织结构如下,脚本中会获取脚本所在路径以及命令执行的路径,并且将所有任务都置于后台执行。 +上面列出的参考脚本,所要求的代码组织结构如下,脚本中会获取脚本所在路径以及命令执行的路径,并且将所有任务都置于后台执行,完整的代码链接请于本教程置顶处获取。 ```text └─tutorial_code @@ -642,7 +643,7 @@ done ```bash # server0 -bash run.sh /path/dataset /path/rank_table.json 16 0 +bash run_cluster.sh /path/dataset /path/rank_table.json 16 0 # server1 -bash run.sh /path/dataset /path/rank_table.json 16 8 +bash run_cluster.sh /path/dataset /path/rank_table.json 16 8 ``` diff --git a/tutorials/training/source_zh_cn/advanced_use/distributed_training_tutorials.rst b/tutorials/training/source_zh_cn/advanced_use/distributed_training_tutorials.rst index 504f487d74228a2ad9ddc6e6ee7d56e0da33f656..c175d75f6c2a5626e3480a05bf17bbaa01aa41a1 100644 --- a/tutorials/training/source_zh_cn/advanced_use/distributed_training_tutorials.rst +++ b/tutorials/training/source_zh_cn/advanced_use/distributed_training_tutorials.rst @@ -10,13 +10,13 @@ 当前MindSpore也提供分布式并行训练的功能。它支持了多种模式包括: - `DATA_PARALLEL`:数据并行模式。 -- `AUTO_PARALLEL`:自动并行模式,融合了数据并行、模型并行及混合并行的1种分布式并行模式,可以自动建立代价模型,找到训练时间较短的并行策略,为用户选择1种并行模式。MindSpore提供了如下的两种不同的策略搜索算法: +- `AUTO_PARALLEL`:自动并行模式,该模式为实验特性,当前只在部分网络验证。自动并行融合了数据并行、模型并行及混合并行,可以自动建立代价模型,找到训练时间较短的并行策略,为用户选择1种并行模式。MindSpore提供了如下的两种不同的策略搜索算法: - `dynamic_programming` :动态规划策略搜索算法。能够搜索出代价模型刻画的最优策略,但在搜索巨大网络模型的并行策略时耗时较长。其代价模型是围绕Ascend 910芯片基于内存的计算开销和通信开销对训练时间建模。 - `recursive_programming` :双递归策略搜索算法。对于巨大网络以及大规模多卡切分能够保证瞬间生成最优策略。其基于符号运算的代价模型可以自由适配不同的加速器集群。 - `SEMI_AUTO_PARALLEL`:半自动并行模式,相较于自动并行,该模式需要用户对算子手动配置切分策略实现并行。 -- `HYBRID_PARALLEL`:在MindSpore中特指用户通过手动切分模型实现混合并行的场景。 +- `HYBRID_PARALLEL`:在MindSpore中特指用户通过手动切分模型并基于通信原语实现混合并行的场景。 .. toctree:: :maxdepth: 1 diff --git a/tutorials/training/source_zh_cn/advanced_use/dump_in_graph_mode.md b/tutorials/training/source_zh_cn/advanced_use/dump_in_graph_mode.md index a4f284f97022ea613898fbcf0bb0accc7d13c25b..023fb46cad69372245c0769b537fa21b71952283 100644 --- a/tutorials/training/source_zh_cn/advanced_use/dump_in_graph_mode.md +++ b/tutorials/training/source_zh_cn/advanced_use/dump_in_graph_mode.md @@ -36,17 +36,29 @@ ### 调试过程 +使用Dump来帮助调试分为两个步骤:1、数据准备;2、数据分析。 + +#### 数据准备 + +数据准备阶段使用同步Dump或异步Dump来生成Dump数据。使用方法详见[同步Dump操作步骤](#id7)和[异步Dump操作步骤](#id12)。 + +#### 数据分析 + +如果用户已经安装了MindInsight, 可以使用MindInsight的离线调试器来分析。离线调试器的使用方法详见[使用离线调试器](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/debugger_offline.html) 。 + +如果没有安装MindInsight,需要通过以下步骤来分析数据。 + 1. 从脚本找到对应的算子 - 使用Dump功能将自动生成最终执行图的IR文件(IR文件中包含了算子全名,和算子在计算图中输入和输出的依赖,也包含从算子到相应脚本代码的Trace信息),IR文件可以用`vi`命令查看,Dump功能的配置见[同步Dump操作步骤](#id5)和[异步Dump操作步骤](#id10),Dump输出的目录结构见[同步Dump数据对象目录](#id6)和[异步Dump数据对象目录](#id11)。然后通过图文件找到脚本中代码对应的算子,参考[同步Dump数据分析样例](#id8)和[异步Dump数据数据分析样例](#id13)。 + 使用Dump功能将自动生成最终执行图的IR文件(IR文件中包含了算子全名,和算子在计算图中输入和输出的依赖,也包含从算子到相应脚本代码的Trace信息),IR文件可以用`vi`命令查看,Dump功能的配置见[同步Dump操作步骤](#id7)和[异步Dump操作步骤](#id12),Dump输出的目录结构见[同步Dump数据对象目录](#id8)和[异步Dump数据对象目录](#id13)。然后通过图文件找到脚本中代码对应的算子,参考[同步Dump数据分析样例](#id10)和[异步Dump数据数据分析样例](#id15)。 2. 从算子到Dump数据 - 在了解脚本和算子的映射关系后,可以确定想要分析的算子名称,从而找到算子对应的dump文件,参考[同步Dump数据对象目录](#id6)和[异步Dump数据对象目录](#id11)。 + 在了解脚本和算子的映射关系后,可以确定想要分析的算子名称,从而找到算子对应的dump文件,参考[同步Dump数据对象目录](#id8)和[异步Dump数据对象目录](#id13)。 3. 分析Dump数据 - 通过解析Dump数据,可以与其他第三方框架进行对比。同步Dump数据格式参考[同步Dump数据文件介绍](#id7),异步Dump数据格式参考[异步Dump数据文件介绍](#id12)。 + 通过解析Dump数据,可以与其他第三方框架进行对比。同步Dump数据格式参考[同步Dump数据文件介绍](#id9),异步Dump数据格式参考[异步Dump数据文件介绍](#id14)。 ### 适用场景 @@ -71,11 +83,10 @@ MindSpore提供了同步Dump与异步Dump两种模式: 不同模式所需要的配置文件和dump出来的数据格式不同: -- 同步模式较异步模式会占用更多内存,但易用性更好。 -- 一般对于中小型网络(如ResNet)等,推荐优先使用同步Dump模式。在网络占用内存不大的情况下,请优先使用同步Dump。若开启同步Dump后,因为模型过大导致需要的内存超过系统限制,再使用异步Dump。 - 在Ascend上开启同步Dump的时候,待Dump的算子会自动关闭内存复用。 - 同步Dump目前支持Ascend、GPU和CPU上的图模式,暂不支持PyNative模式。 - 异步Dump仅支持Ascend上的图模式,不支持PyNative模式。开启异步Dump的时候不会关闭内存复用。 +- 默认使用用异步Dump模式,如果要使用同步Dump模式,需要在配置文件中设置"e2e_dump_settings"。 ## 同步Dump @@ -89,7 +100,7 @@ MindSpore提供了同步Dump与异步Dump两种模式: "dump_mode": 0, "path": "/absolute_path", "net_name": "ResNet50", - "iteration": 0, + "iteration": "0|5-8|100-120", "input_output": 0, "kernels": ["Default/Conv-op12"], "support_device": [0,1,2,3,4,5,6,7] @@ -104,11 +115,11 @@ MindSpore提供了同步Dump与异步Dump两种模式: - `dump_mode`:设置成0,表示Dump出该网络中的所有算子;设置成1,表示Dump`"kernels"`里面指定的算子。 - `path`:Dump保存数据的绝对路径。 - `net_name`:自定义的网络名称,例如:"ResNet50"。 - - `iteration`:指定需要Dump的迭代,若设置成0,表示Dump所有的迭代。 - - `input_output`:设置成0,表示Dump出算子的输入和算子的输出;设置成1,表示Dump出算子的输入;设置成2,表示Dump出算子的输出。该配置参数仅支持Ascend和CPU,GPU只能Dump算子的输出。 + - `iteration`:指定需要Dump数据的迭代。类型为str,用“|”分离要保存的不同区间的step的数据。如"0|5-8|100-120"表示Dump参数初始值,第1个,第6个到第9个, 第101个到第121个step的数据。指定“all”,表示Dump所有迭代的数据。 + - `input_output`:设置成0,表示Dump出算子的输入和算子的输出;设置成1,表示Dump出算子的输入;设置成2,表示Dump出算子的输出。 - `kernels`:算子的名称列表。开启IR保存开关`context.set_context(save_graphs=True)`并执行用例,从生成的IR文件`trace_code_graph_{graph_id}`中获取算子名称。详细说明可以参照教程:[如何保存IR](https://www.mindspore.cn/doc/note/zh-CN/master/design/mindspore/mindir.html#ir)。 - `support_device`:支持的设备,默认设置成0到7即可;在分布式训练场景下,需要dump个别设备上的数据,可以只在`support_device`中指定需要Dump的设备Id。该配置参数在CPU上无效,因为CPU下没有device这个概念。 - - `enable`:开启E2E Dump,如果同时开启同步Dump和异步Dump,那么只有同步Dump会生效。 + - `enable`:开启E2E Dump。 - `trans_flag`:开启格式转换。将设备上的数据格式转换成NCHW格式。若为`True`,则数据会以Host侧的4D格式(NCHW)格式保存;若为`False`,则保留Device侧的数据格式。该配置参数在CPU上无效,因为CPU上没有format转换。 2. 设置Dump环境变量,指定Dump的json配置文件。 @@ -136,7 +147,7 @@ MindSpore提供了同步Dump与异步Dump两种模式: 可以在训练脚本中设置`context.set_context(reserve_class_name_in_scope=False)`,避免Dump文件名称过长导致Dump数据文件生成失败。 -4. 通过`numpy.fromfile`读取和解析同步Dump数据,参考[同步Dump数据文件介绍](#id7)。 +4. 通过`numpy.load`读取和解析同步Dump数据,参考[同步Dump数据文件介绍](#id9)。 ### 同步Dump数据对象目录 @@ -144,43 +155,43 @@ MindSpore提供了同步Dump与异步Dump两种模式: ```text {path}/ - |-- {net_name}/ - |-- {device_id}/ - |-- iteration_{iteration}/ - -- {op_name}_{input_output_index}_{shape}_{data_type}_{format}.bin - … - |-- graphs/ - ms_output_trace_code_graph_{graph_id}.pb - ms_output_trace_code_graph_{graph_id}.ir - |-- execution_order/ - ms_execution_order_graph_{graph_id}.csv - - |-- .metadata/ - data_dump.json + - rank_{rank_id}/ + - .dump_metadata/ + - {net_name}/ + - {graph_id}/ + - {iteration_id}/ + {op_type}.{op_name}.{task_id}.{stream_id}.{timestamp}.{input_output_index}.{slot}.{format}.npy + ... + - graphs/ + ms_output_trace_code_graph_{graph_id}.pb + ms_output_trace_code_graph_{graph_id}.ir + - execution_order/ + ms_execution_order_graph_{graph_id}.csv ``` - `path`:`data_dump.json`配置文件中设置的绝对路径。 -- `net_name`:`data_dump.json`配置文件中设置的网络名称。 -- `device_id`:训练的卡号。 +- `rank_id`: 逻辑卡号。 +- `net_name`:`data_dump.json`配置文件中设置的网络称。 - `graph_id`:训练的图标号。 -- `iteration`:训练的轮次。 -- `operator_name`:算子名称。 -- `input_output_index` :输入或输出标号,例如`output_0`表示该文件是该算子的第1个输出Tensor的数据。 -- `shape`: 张量维度信息。 -- `data_type`: 数据类型。 +- `iteration_id`:训练的轮次。 +- `op_type`:算子类型。 +- `op_name`:算子名称。 +- `ask_id`:任务标号。 +- `stream_id`:流标号。 +- `timestamp`:时间戳。 +- `input_output_index`:输入或输出标号,例如`output.0`表示该文件是该算子的第1个输出Tensor的数据。 +- `slot`:slot标号。 - `format`: 数据格式。 -在CPU上进行数据dump时,没有`device_id`这个目录层级,因为CPU上没有device这个概念,也没有`graphs`、`execution_order`和`.metadata`目录。 - ### 同步Dump数据文件介绍 -同步Dump生成的数据文件是后缀名为`.bin`的二进制文件,文件命名格式为: +同步Dump生成的数据文件是后缀名为`.npy`的文件,文件命名格式为: ```text -{operator_name}_{input_output_index}_{shape}_{data_type}_{format}.bin +{op_type}.{op_name}.{task_id}.{stream_id}.{timestamp}.{input_output_index}.{slot}.{format}.npy ``` -根据文件名提供的`Tensor`信息,可以用`numpy.fromfile`读取数据,并还原原始数据的`data_type`和`shape`。 +可以用Numpy的`numpy.load`接口读取数据。 同步Dump生成的最终执行图文件后缀名分别为`.pb`和`.ir`,文件命名格式为: @@ -197,7 +208,7 @@ ms_output_trace_code_graph_{graph_id}.ir ms_execution_order_graph_{graph_id}.csv ``` -`.metadata`记录了训练的原信息,其中`data_dump.json`保存了用户设置的dump配置。 +`.dump_metadata`记录了训练的原信息,其中`data_dump.json`保存了用户设置的dump配置。 ### 同步Dump数据分析样例 @@ -322,35 +333,23 @@ IsFeatureMapOutput: true, IsFeatureMapInputList: (0), pri_format: NC1HWC0} 通过算子名称和输入输出信息,可以查找到唯一对应的Tensor数据文件。比如,若要查看Conv2D-op107算子的第1个输出数据对应的Dump文件,可获取以下信息: -- `operator_name`:`Default--network-WithLossCell--_backbone-AlexNet--conv3-Conv2d--Conv2D-op107`。基于图中序号2声明的算子名称,将其中的`/`替换为`--`可得。 +- `operator_name`:`Conv2D-op107`。 -- `input_output_index` :`output_0`表示该文件是该算子的第1个输出Tensor的数据。 +- `input_output_index`:`output.0`表示该文件是该算子的第1个输出Tensor的数据。 -在Dump保存的数据对象文件目录下搜索到相应的文件名: -`Default--network-WithLossCell--_backbone-AlexNet--conv3-Conv2d--Conv2D-op107_output_0_shape_32_12_13_13_16_Float16_NC1HWC0.bin`。 -从文件名中可以得知以下信息: +- `slot`:0,该算子的输出只有一个slot。 -- `shape`: 张量维度是`32_12_13_13_16`。 - -- `data_type`: 数据类型为`Float16`。 - -- `format`: 数据格式为`NC1HWC0`(可通过Dump配置文件修改要保存的数据格式)。 - -还原数据的时候,首先通过执行: - -```python -import numpy -numpy.fromfile("Default--network-WithLossCell--_backbone-AlexNet--conv3-Conv2d--Conv2D-op107_output_0_shape_32_12_13_13_16_Float16_NC1HWC0.bin", numpy.float16) -``` +在Dump保存的数据对象文件目录下搜索到相应的文件名: +`Conv2D.Conv2D-op107.2.2.1623124369613540.output.0.DefaultFormat.npy`。 -生成一维array数据,再通过执行: +还原数据的时候,通过执行: ```python import numpy -numpy.reshape(array, (32,12,13,13,16)) +numpy.load("Conv2D.Conv2D-op107.2.2.1623124369613540.output.0.DefaultFormat.npy") ``` -还原到原始shape数据。 +生成numpy.array数据。 ## 异步Dump @@ -368,13 +367,10 @@ numpy.reshape(array, (32,12,13,13,16)) "dump_mode": 0, "path": "/absolute_path", "net_name": "ResNet50", - "iteration": 0, + "iteration": "0|5-8|100-120", "input_output": 0, "kernels": ["Default/Conv-op12"], - "support_device": [0,1,2,3,4,5,6,7] - }, - "async_dump_settings": { - "enable": true, + "support_device": [0,1,2,3,4,5,6,7], "op_debug_mode": 0 } } @@ -383,11 +379,10 @@ numpy.reshape(array, (32,12,13,13,16)) - `dump_mode`:设置成0,表示Dump出改网络中的所有算子;设置成1,表示Dump`"kernels"`里面指定的算子。 - `path`:Dump保存数据的绝对路径。 - `net_name`:自定义的网络名称,例如:"ResNet50"。 - - `iteration`:指定需要Dump的迭代。非数据下沉模式下,`iteration`需要设置成0,并且会Dump出每个迭代的数据。 + - `iteration`:指定需要Dump的迭代。类型为str,用“|”分离要保存的不同区间的step的数据。如"0|5-8|100-120"表示Dump参数初始值,第1个,第6个到第9个, 第101个到第121个step的数据。指定“all”,表示Dump所有迭代的数据。 - `input_output`:设置成0,表示Dump出算子的输入和算子的输出;设置成1,表示Dump出算子的输入;设置成2,表示Dump出算子的输出。 - `kernels`:算子的名称列表。开启IR保存开关`context.set_context(save_graphs=True)`并执行用例,从生成的`trace_code_graph_{graph_id}`IR文件中获取算子名称。`kernels`仅支持TBE算子、AiCPU算子、通信算子,若设置成通信算子的名称,将会Dump出通信算子的输入算子的数据。详细说明可以参照教程:[如何保存IR](https://www.mindspore.cn/doc/note/zh-CN/master/design/mindspore/mindir.html#ir)。 - `support_device`:支持的设备,默认设置成0到7即可;在分布式训练场景下,需要dump个别设备上的数据,可以只在`support_device`中指定需要Dump的设备Id。 - - `enable`:开启异步Dump,如果同时开启同步Dump和异步Dump,那么只有同步Dump会生效。 - `op_debug_mode`:该属性用于算子溢出调试,设置成0,表示不开启溢出;设置成1,表示开启AiCore溢出检测;设置成2,表示开启Atomic溢出检测;设置成3,表示开启全部溢出检测功能。在Dump数据的时候请设置成0,若设置成其他值,则只会Dump溢出算子的数据。 2. 设置数据Dump的环境变量。 @@ -403,13 +398,11 @@ numpy.reshape(array, (32,12,13,13,16)) 可以在训练脚本中设置`context.set_context(reserve_class_name_in_scope=False)`,避免Dump文件名称过长导致Dump数据文件生成失败。 -4. 参考[异步Dump数据分析样例](#id13)解析Dump数据文件。 +4. 参考[异步Dump数据分析样例](#id15)解析Dump数据文件。 注意: - 若需要dump全量或部分算子,则可以修改json配置文件中的`dump_mode`选项为0或1。 -- 若开启数据下沉功能(设置`model.train`或`DatasetHelper`中的`dataset_sink_mode`参数为`True`),只能dump出配置文件里指定的一个step的数据(此时`iteration 0`表示第0个step),并保存到指定目录下。 -- 若不开启数据下沉功能(设置`model.train`或`DatasetHelper`中的`dataset_sink_mode`参数为`False`),配置文档里`iteration`必须指定为0,所有step的数据都保存在一个目录中,无法支持多step的数据管理。此时建议只执行一次step的数据Dump(可以通过修改脚本只训练一个step)。 - 使用Dump功能将自动生成最终执行图的IR文件。 ### 异步Dump数据对象目录 @@ -418,37 +411,36 @@ numpy.reshape(array, (32,12,13,13,16)) ```text {path}/ - |-- {device_id}/ - |-- {new_name}_graph_{graph_id}/ - |-- {graph_id}/ - |-- {iteration}/ - |-- {op_type}.{op_name}.{task_id}.{timestamp} - … - |-- graphs/ + - rank_{rank_id}/ + - .dump_metadata/ + - {net_name}/ + - {graph_id}/ + - {iteration_id}/ + {op_type}.{op_name}.{task_id}.{stream_id}.{timestamp} + ... + - graphs/ ms_output_trace_code_graph_{graph_id}.pb ms_output_trace_code_graph_{graph_id}.ir - |-- execution_order/ + - execution_order/ ms_execution_order_graph_{graph_id}.csv - - |-- .metadata/ - data_dump.json ``` -- `path`:`data_dump.json`文件中设置的绝对路径。 -- `net_name`:`data_dump.json`文件中设置的网络名称。 -- `device_id`:训练的卡号。 +- `path`:`data_dump.json`配置文件中设置的绝对路径。 +- `rank_id`: 逻辑卡号。 +- `net_name`:`data_dump.json`配置文件中设置的网络名称。 - `graph_id`:训练的图标号。 -- `iteration`:训练的轮次。 +- `iteration_id`:训练的轮次。 - `op_type`:算子类型。 - `op_name`:算子名称。 -- `taskid`:任务标号。 +- `task_id`:任务标号。 +- `stream_id`:流标号。 - `timestamp`:时间戳。 ### 异步Dump数据文件介绍 -启动训练后,异步Dump生成的原始数据文件是protobuf格式的文件,需要用到海思Run包中自带的数据解析工具进行解析,详见[如何查看dump数据文件](https://support.huaweicloud.com/tg-Inference-cann/atlasaccuracy_16_0014.html) 。 +启动训练后,异步Dump生成的原始数据文件是protobuf格式的文件,需要用到海思Run包中自带的数据解析工具进行解析,详见[如何查看dump数据文件](https://support.huawei.com/enterprise/zh/doc/EDOC1100191946/8d6ddc58) 。 -数据在Device侧的格式可能和Host侧计算图中的定义不同,异步Dump的数据格式为Device侧格式,如果想要转为Host侧格式,可以参考[如何进行dump数据文件Format转换](https://support.huaweicloud.com/tg-Inference-cann/atlasaccuracy_16_0013.html) 。 +数据在Device侧的格式可能和Host侧计算图中的定义不同,异步Dump的数据格式为Device侧格式,如果想要转为Host侧格式,可以参考[如何进行dump数据文件Format转换](https://support.huawei.com/enterprise/zh/doc/EDOC1100191946/fa6aecce) 。 异步Dump生成的数据文件命名规则如下: @@ -459,7 +451,9 @@ numpy.reshape(array, (32,12,13,13,16)) 如果`op_type`和`op_name`中出现了“.”、“/”、“\”、空格时,会转换为下划线表示。 -异步Dump生成的最终执行图文件和节点执行序文件命名规则与同步Dump相同,可以参考[同步Dump数据文件介绍](#id7)。 +Dump生成的原始数据文件也可以使用MindInsight的数据解析工具DumpParser解析,DumpParser的使用方式详见[DumpParser介绍](https://gitee.com/mindspore/mindinsight/tree/master/mindinsight/parser) 。MindInsight解析出来的数据格式与同步dump的数据格式完全相同。 + +异步Dump生成的最终执行图文件和节点执行序文件命名规则与同步Dump相同,可以参考[同步Dump数据文件介绍](#id9)。 ### 异步Dump数据分析样例 @@ -479,7 +473,7 @@ numpy.reshape(array, (32,12,13,13,16)) python ${The absolute path of msaccucmp.py} convert -d {file path of dump} -out {file path of output} ``` - 若需要转换数据格式,可参考使用说明链接 。 + 若需要转换数据格式,可参考使用说明链接 。 如Dump生成的数据文件为: diff --git a/tutorials/training/source_zh_cn/advanced_use/hpc_sponge.md b/tutorials/training/source_zh_cn/advanced_use/hpc_sponge.md index 869dd138f6a31bda1653e13b9007a3bcafd253e6..381fb8aad8d0763ff677b69be957c407243410e6 100644 --- a/tutorials/training/source_zh_cn/advanced_use/hpc_sponge.md +++ b/tutorials/training/source_zh_cn/advanced_use/hpc_sponge.md @@ -101,7 +101,7 @@ NVT 290k langevin_gamma=1.0, # Gamma_ln for Langevin thermostat represents coupling strength between thermostat and system target_temperature=290, # Target temperature write_information_interval=1000, # Output frequency - amber_irest=1, # Input style ; amber_irest=1 for using amber style input & rst7 file contains veclocity + amber_irest=0, # Input style ; amber_irest=1 for using amber style input & rst7 file contains veclocity cut=10.0, # Nonbonded cutoff distance in Angstroms ``` @@ -111,7 +111,7 @@ NVT 290k - `thermostat`,表示控温方法,`1`表示采用的是`Liujian-Langevin`方法。 - `langevin_gamma`,表示控温器中的`Gamma_ln`参数。 - `target_temperature`,表示目标温度。 -- `amber_irest`,表示输入方式,`1`表示使用amber方式输入,并且`rst7`文件中包含`veclocity`属性。 +- `amber_irest`,表示输入方式,`0`表示使用amber方式输入,`rst7`文件中不包含`veclocity`属性。 - `cut`,表示非键相互作用的距离。 ### 加载数据 @@ -191,7 +191,7 @@ python main.py --i /path/NVT_290_10ns.in \ ```text _steps_ _TEMP_ _TOT_POT_ENE_ _BOND_ENE_ _ANGLE_ENE_ _DIHEDRAL_ENE_ _14LJ_ENE_ _14CF_ENE_ _LJ_ENE_ _CF_PME_ENE_ - 1 293.105 -6117.709 1204.406 7.096 4.491 3.456 44.018 1372.488 -8753.664 + 0 0.000 -5713.804 0.037 0.900 14.909 9.072 194.477 765.398 -6698.648 ... ``` diff --git a/tutorials/training/source_zh_cn/advanced_use/images/cluster_flops.png b/tutorials/training/source_zh_cn/advanced_use/images/cluster_flops.png new file mode 100644 index 0000000000000000000000000000000000000000..e3d6ebe68d0c056e463a8e582bfc8dc684e19e87 Binary files /dev/null and b/tutorials/training/source_zh_cn/advanced_use/images/cluster_flops.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/concept_drift_timeseries.JPG b/tutorials/training/source_zh_cn/advanced_use/images/concept_drift_timeseries.JPG new file mode 100644 index 0000000000000000000000000000000000000000..3b6cb11c8990228adb01b519f6b88be47ed9424e Binary files /dev/null and b/tutorials/training/source_zh_cn/advanced_use/images/concept_drift_timeseries.JPG differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_ask_recommend.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_ask_recommend.png old mode 100755 new mode 100644 index 95af3226a83d5def01c3d0cb8d17ba1e3315e638..dedae2d5e8389cff5864fa9b60e15491d4293e42 Binary files a/tutorials/training/source_zh_cn/advanced_use/images/debugger_ask_recommend.png and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_ask_recommend.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_init_page.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_init_page.png old mode 100755 new mode 100644 index dc724b0c58e5e0ea9c3b775f83ea76a2349de9d1..2df2e8aaef6caedfc864d50f50dfc6dc288b0ede Binary files a/tutorials/training/source_zh_cn/advanced_use/images/debugger_init_page.png and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_init_page.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_offline_edit.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_offline_edit.png new file mode 100644 index 0000000000000000000000000000000000000000..bf6afec15b8802d167528d6a36e93d73500c339b Binary files /dev/null and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_offline_edit.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_offline_entry.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_offline_entry.png new file mode 100644 index 0000000000000000000000000000000000000000..6b2a4b903e04df6e9e9f1e9d18d87edc7d98c235 Binary files /dev/null and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_offline_entry.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_offline_reset.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_offline_reset.png new file mode 100644 index 0000000000000000000000000000000000000000..f91b269e3f2f106f9896123d76445b0c6b7eb65b Binary files /dev/null and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_offline_reset.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_offline_waiting.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_offline_waiting.png new file mode 100644 index 0000000000000000000000000000000000000000..0965774233dba9df038eecbebb4b312e2f0a7b6d Binary files /dev/null and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_offline_waiting.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_search_node_type.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_search_node_type.png old mode 100755 new mode 100644 index 8e23df5ae9eaea10baea89f54360b18eed1aeb0d..1d0789310928faf5062284e648231d5ae5d1a458 Binary files a/tutorials/training/source_zh_cn/advanced_use/images/debugger_search_node_type.png and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_search_node_type.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_set_watch_point.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_set_watch_point.png old mode 100755 new mode 100644 index ab135b12017e1b6918460a52502398849b803fa4..1b4a9c49f5e568a58c006f0c5e5606e333574064 Binary files a/tutorials/training/source_zh_cn/advanced_use/images/debugger_set_watch_point.png and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_set_watch_point.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_stack_info.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_stack_info.png new file mode 100644 index 0000000000000000000000000000000000000000..9da7c2af41f87fb33a3d4230f57240e90cf1be77 Binary files /dev/null and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_stack_info.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_stack_list.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_stack_list.png new file mode 100644 index 0000000000000000000000000000000000000000..02dd27420b7b8041f1ad0921c588196c31b61283 Binary files /dev/null and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_stack_list.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_tensor_view.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_tensor_view.png old mode 100755 new mode 100644 index d7bc1fcbbc1e767be07c374d3d298e43160a637d..c209a0971bbc256d8f51d8fb50ea97bb0109e967 Binary files a/tutorials/training/source_zh_cn/advanced_use/images/debugger_tensor_view.png and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_tensor_view.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_waiting.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_waiting.png old mode 100755 new mode 100644 index 63bb4d6066fb81eb1629ba7ac545f69114296ff4..6e83e34d9ddbcf4231e4a8eb19d853ae0928eae0 Binary files a/tutorials/training/source_zh_cn/advanced_use/images/debugger_waiting.png and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_waiting.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/images/debugger_watch_point_hit.png b/tutorials/training/source_zh_cn/advanced_use/images/debugger_watch_point_hit.png old mode 100755 new mode 100644 index 87dfeb02082e7a518bd6cba31224b73d5d7bdd6e..ed62727543880b0b8f3252d83359d9000ab6db2d Binary files a/tutorials/training/source_zh_cn/advanced_use/images/debugger_watch_point_hit.png and b/tutorials/training/source_zh_cn/advanced_use/images/debugger_watch_point_hit.png differ diff --git a/tutorials/training/source_zh_cn/advanced_use/implement_high_order_differentiation.md b/tutorials/training/source_zh_cn/advanced_use/implement_high_order_differentiation.md index 6d76aaacc1d572acee0cc1ed73f2f01eef12276f..2f3e8b3225aecb20fdc1ce121bcac35e0a79b809 100644 --- a/tutorials/training/source_zh_cn/advanced_use/implement_high_order_differentiation.md +++ b/tutorials/training/source_zh_cn/advanced_use/implement_high_order_differentiation.md @@ -217,6 +217,61 @@ print(output) [0. 0. 0. ]] ``` +## 停止计算梯度 + +我们可以使用`stop_gradient`来禁止网络内的算子对梯度的影响,例如: + +```python +import numpy as np +import mindspore.nn as nn +import mindspore.ops as ops +from mindspore import Tensor +from mindspore import ParameterTuple, Parameter +from mindspore import dtype as mstype +from mindspore.ops.functional import stop_gradient + +class Net(nn.Cell): + def __init__(self): + super(Net, self).__init__() + self.matmul = ops.MatMul() + + def construct(self, x, y): + out1 = self.matmul(x, y) + out2 = self.matmul(x, y) + out2 = stop_gradient(out2) + out = out1 + out2 + return out + +class GradNetWrtX(nn.Cell): + def __init__(self, net): + super(GradNetWrtX, self).__init__() + self.net = net + self.grad_op = ops.GradOperation() + + def construct(self, x, y): + gradient_function = self.grad_op(self.net) + return gradient_function(x, y) + +x = Tensor([[0.8, 0.6, 0.2], [1.8, 1.3, 1.1]], dtype=mstype.float32) +y = Tensor([[0.11, 3.3, 1.1], [1.1, 0.2, 1.4], [1.1, 2.2, 0.3]], dtype=mstype.float32) +output = GradNetWrtX(Net())(x, y) +print(output) +``` + +```text + [[4.5, 2.7, 3.6], + [4.5, 2.7, 3.6]] +``` + +在这里我们对`out2`设置了`stop_gradient`, 所以`out2`没有对梯度计算有任何的贡献。 如果我们删除`out2 = stop_gradient(out2)`,那么输出值会变为: + +```text + [[9.0, 5.4, 7.2], + [9.0, 5.4, 7.2]] +``` + +在我们不对`out2`设置`stop_gradient`后, `out2`和`out1`会对梯度产生相同的贡献。 所以我们可以看到,结果中每一项的值都变为了原来的两倍。 + ## 高阶求导 MindSpore可通过多次求导的方式支持高阶导数,下面通过几类例子展开阐述。 diff --git a/tutorials/training/source_zh_cn/advanced_use/migrate_3rd_scripts.md b/tutorials/training/source_zh_cn/advanced_use/migrate_3rd_scripts.md index fc26fb08856d084288f3f7fa3004c02993d7f701..fbba48dd39f42bc7a9bce6b959ba83ab0fa0c00b 100644 --- a/tutorials/training/source_zh_cn/advanced_use/migrate_3rd_scripts.md +++ b/tutorials/training/source_zh_cn/advanced_use/migrate_3rd_scripts.md @@ -1,10 +1,10 @@ -# 迁移第三方框架训练脚本 +# 迁移脚本全流程 `Linux` `Ascend` `GPU` `CPU` `全流程` `初级` `中级` `高级` -- [迁移第三方框架训练脚本](#迁移第三方框架训练脚本) +- [迁移脚本全流程](#迁移脚本全流程) - [概述](#概述) - [准备环节](#准备环节) - [算子评估](#算子评估) diff --git a/tutorials/training/source_zh_cn/advanced_use/migrate_3rd_scripts_mindconverter.md b/tutorials/training/source_zh_cn/advanced_use/migrate_3rd_scripts_mindconverter.md index ea67ec864f99e60e4e3403a3c082efc8c8300110..844370f993879492da7ca131516e11f1046b43a6 100644 --- a/tutorials/training/source_zh_cn/advanced_use/migrate_3rd_scripts_mindconverter.md +++ b/tutorials/training/source_zh_cn/advanced_use/migrate_3rd_scripts_mindconverter.md @@ -1,10 +1,10 @@ -# 使用工具迁移第三方框架脚本 +# 使用工具迁移模型定义脚本 `Linux` `Ascend` `模型开发` `初级` -- [使用工具迁移第三方框架脚本](#使用工具迁移第三方框架脚本) +- [使用工具迁移模型定义脚本](#使用工具迁移模型定义脚本) - [概述](#概述) - [安装](#安装) - [用法](#用法) diff --git a/tutorials/training/source_zh_cn/advanced_use/migrate_script.rst b/tutorials/training/source_zh_cn/advanced_use/migrate_script.rst index dea5b119a20a7530207b977dec670d25f8072f1d..4d669822cd3ba38a2b8f47a7596f5f79624e5815 100644 --- a/tutorials/training/source_zh_cn/advanced_use/migrate_script.rst +++ b/tutorials/training/source_zh_cn/advanced_use/migrate_script.rst @@ -1,4 +1,4 @@ -迁移第三方框架训练脚本 +迁移第三方框架脚本 ======================== .. toctree:: diff --git a/tutorials/training/source_zh_cn/advanced_use/nlp.rst b/tutorials/training/source_zh_cn/advanced_use/nlp.rst index 37d9606b68dc72b1259f10a5ee6c3afc872842cc..8d92025b769fbc88c575780e0fbb102eccce57b6 100644 --- a/tutorials/training/source_zh_cn/advanced_use/nlp.rst +++ b/tutorials/training/source_zh_cn/advanced_use/nlp.rst @@ -6,3 +6,4 @@ nlp_sentimentnet nlp_bert_poetry + concept_drift_time_series diff --git a/tutorials/training/source_zh_cn/advanced_use/parameterized_quantum_circuit.ipynb b/tutorials/training/source_zh_cn/advanced_use/parameterized_quantum_circuit.ipynb index e4fb4a830f943205a59364ac662a509f9da14498..5b851f5546ac0c6dc31cbe640428c42250001a98 100644 --- a/tutorials/training/source_zh_cn/advanced_use/parameterized_quantum_circuit.ipynb +++ b/tutorials/training/source_zh_cn/advanced_use/parameterized_quantum_circuit.ipynb @@ -311,7 +311,7 @@ "metadata": {}, "outputs": [], "source": [ - "from projectq.ops import QubitOperator\n", + "from mindquantum.ops import QubitOperator\n", "\n", "@circuit_generator(2)\n", "def encoder(qubits):\n", diff --git a/tutorials/training/source_zh_cn/advanced_use/performance_profiling_ascend.md b/tutorials/training/source_zh_cn/advanced_use/performance_profiling_ascend.md index 3f330ac98a7ad4e392437c5fd6bd52d864d7cb23..37bc8a1c34bab9ce15da36d60bc66fca56085914 100644 --- a/tutorials/training/source_zh_cn/advanced_use/performance_profiling_ascend.md +++ b/tutorials/training/source_zh_cn/advanced_use/performance_profiling_ascend.md @@ -116,6 +116,10 @@ profiler.analyse() 使用算子性能分析组件可以对MindSpore运行过程中的各个算子的执行时间进行统计展示(包括AICORE、AICPU、HOSTCPU算子)。其中AICORE算子包含计算量相关信息。 +- AICORE算子:AI Core 算子是昇腾 AI 处理器计算核心的主要构成,负责执行向量和张量相关的计算密集型算子。TBE(Tensor Virtual Machine)是一种在TVM(Tensor Virtual Machine)框架基础上扩展的算子开发工具,用户可使用 TBE 进行 AI Core 算子信息注册。 +- AICPU算子:AI CPU算子是AI CPU负责执行昇腾处理器中海思 SoC 的CPU类算子(包括控制算子、标量和向量等通用计算)。MindSpore中同一个算子可能会同时拥有 AI Core 算子和AI CPU算子,框架会优先选择 AI Core 算子,没有 AI Core 算子或者不满足选择的场景下,会调用AI CPU算子。 +- HOSTCPU算子:Host侧CPU主要负责将图或者算子下发到昇腾芯片,根据实际需求也可以在Host侧CPU上开发算子。HOSTCPU算子特指运行在Host侧CPU上的算子。 + ![op_type_statistics.png](./images/op_type_statistics.PNG) 图3:算子类别统计分析 diff --git a/tutorials/training/source_zh_cn/advanced_use/performance_profiling_ascend_of_cluster.md b/tutorials/training/source_zh_cn/advanced_use/performance_profiling_ascend_of_cluster.md index 491710ca88b6c0062e8812c64580b7911c5c53dc..67da23ce20d230909b70b919a2f9106714960aac 100644 --- a/tutorials/training/source_zh_cn/advanced_use/performance_profiling_ascend_of_cluster.md +++ b/tutorials/training/source_zh_cn/advanced_use/performance_profiling_ascend_of_cluster.md @@ -300,7 +300,7 @@ pip install /usr/local/Ascend/tools/hccl_parser-{version}-py3-none-any.whl - 集群设备的分布情况,使用了哪些服务器的哪些设备。 - 集群设备的内存峰值情况,即内存峰值与可用内存占比。 -- 点击某张设备,可以跳转至该设备的内存详情页面。 +- 点击某个设备,可以跳转至该设备的内存详情页面。 ![cluster_memory.png](./images/cluster_memory.png) @@ -308,6 +308,18 @@ pip install /usr/local/Ascend/tools/hccl_parser-{version}-py3-none-any.whl > 内存使用情况分析暂不支持异构训练场景。 +### 集群FLOPs热力图分析 + +该页面展示了并行场景下,每个设备的FLOPs(浮点运算次)数据,热力图反映了设备之间FLOPs的相对大小。页面内容包括: + +- 集群设备的分布情况,使用了哪些服务器的哪些设备。 +- 集群设备之间FLOPs的相对大小,每个设备对应矩形块颜色代表当前设备FLOPs与所有设备中最大FLOPs的比值。 +- 点击某个设备,可以跳转至该设备的算子耗时详情页面,含有FLOPs的详细数据。 + +![cluster_flops.png](./images/cluster_flops.png) + +图4:集群FLOPs概览页面 + ## 规格 - 为了控制性能测试时生成数据的大小,大型网络建议性能调试的step数目限制在10以内。 diff --git a/tutorials/training/source_zh_cn/advanced_use/qnn_for_nlp.ipynb b/tutorials/training/source_zh_cn/advanced_use/qnn_for_nlp.ipynb index cafa44d1e4195ffd6ecc1da1af3334f7bf164627..0f197fe3e9e4ffdd7ddcdff40eadb7ffd597e674 100644 --- a/tutorials/training/source_zh_cn/advanced_use/qnn_for_nlp.ipynb +++ b/tutorials/training/source_zh_cn/advanced_use/qnn_for_nlp.ipynb @@ -29,7 +29,7 @@ "source": [ "import numpy as np\n", "import time\n", - "from projectq.ops import QubitOperator\n", + "from mindquantum.ops import QubitOperator\n", "import mindspore.ops as ops\n", "import mindspore.dataset as ds\n", "from mindspore import nn\n", @@ -204,7 +204,7 @@ "label_bin = bin(label)[-1:1:-1].ljust(n_qubits,'0') # binary form of label\n", "label_array = np.array([int(i)*np.pi for i in label_bin]).astype(np.float32) # parameter value of encoder\n", "encoder = GenerateEncoderCircuit(n_qubits, prefix='e') # encoder circuit\n", - "encoder_para_names = encoder.parameter_resolver().para_name # parameter names of encoder\n", + "encoder_para_names = encoder.para_name # parameter names of encoder\n", "\n", "print(\"Label is: \", label)\n", "print(\"Binary label is: \", label_bin)\n", @@ -421,8 +421,8 @@ " encoder.no_grad()\n", " circ += encoder\n", " circ += ansatz\n", - " encoder_param_name.extend(list(encoder.parameter_resolver()))\n", - " ansatz_param_name.extend(list(ansatz.parameter_resolver()))\n", + " encoder_param_name.extend(encoder.para_name)\n", + " ansatz_param_name.extend(ansatz.para_name)\n", " net = MindQuantumLayer(encoder_param_name,\n", " ansatz_param_name,\n", " circ,\n", @@ -842,4 +842,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/tutorials/training/source_zh_cn/advanced_use/summary_record.md b/tutorials/training/source_zh_cn/advanced_use/summary_record.md index 21f9006a7b9a4887dc61a741210436fe83bb73a9..5cefbd25fee9b6a184bb870cb869e803cce14ef0 100644 --- a/tutorials/training/source_zh_cn/advanced_use/summary_record.md +++ b/tutorials/training/source_zh_cn/advanced_use/summary_record.md @@ -307,7 +307,7 @@ class ConfusionMatrixCallback(Callback): ... confusion_matrix = ConfusionMatrixCallback(summary_dir='./summary_dir') -model.train(network, train_dataset=ds_train, callbacks=[confusion_matrix]) +model.train(epoch=2, train_dataset=ds_train, callbacks=[confusion_matrix]) ``` 上面的三种方式,支持记录计算图, 损失值等多种数据。除此以外,MindSpore还支持保存训练中其他阶段的计算图,通过 diff --git a/tutorials/training/source_zh_cn/conf.py b/tutorials/training/source_zh_cn/conf.py index aa4b98834d2001250bd2adc3442aabc4e228f9dd..1f2f7e7363bd0f0bd5d5cfa7842c6157e162c669 100644 --- a/tutorials/training/source_zh_cn/conf.py +++ b/tutorials/training/source_zh_cn/conf.py @@ -31,7 +31,7 @@ release = 'master' # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'recommonmark', + 'myst_parser', 'sphinx_markdown_tables', 'nbsphinx', 'sphinx.ext.mathjax', diff --git a/tutorials/training/source_zh_cn/index.rst b/tutorials/training/source_zh_cn/index.rst index fbee3e070cc17fce93a01987cc143af2c81f52db..348cdd1a1e5e5a126760e6dc66ed56579cd17b11 100644 --- a/tutorials/training/source_zh_cn/index.rst +++ b/tutorials/training/source_zh_cn/index.rst @@ -98,6 +98,7 @@ advanced_use/test_model_security_fuzzing advanced_use/test_model_security_membership_inference advanced_use/model_encrypt_protection + advanced_use/concept_drift_time_series .. toctree:: :glob: @@ -384,7 +385,7 @@
- 使用工具迁移第三方框架脚本 + 使用工具迁移模型定义脚本
MindConverter是一款将PyTorch模型脚本转换至MindSpore的脚本迁移工具。结合转换报告的提示信息,用户对转换后脚本进行微小改动,即可快速将PyTorch模型脚本迁移至MindSpore。 @@ -396,7 +397,7 @@
- 迁移第三方框架训练脚本 + 迁移脚本全流程
本教程介绍如何将已有的TensorFlow、PyTorch等的网络迁移到MindSpore,包括主要步骤和操作建议,帮助你快速进行网络迁移。 diff --git a/tutorials/training/source_zh_cn/quick_start/quick_start.ipynb b/tutorials/training/source_zh_cn/quick_start/quick_start.ipynb index c6f4e5f8e6e6ddd8453bd441f2088da7d664807e..938b7b2262fa659a96d51dbe97ea39b3ca2058e8 100644 --- a/tutorials/training/source_zh_cn/quick_start/quick_start.ipynb +++ b/tutorials/training/source_zh_cn/quick_start/quick_start.ipynb @@ -51,9 +51,9 @@ "\n", "我们示例中用到的`MNIST`数据集是由10类$28*28$的灰度图片组成,训练数据集包含60000张图片,测试数据集包含10000张图片。\n", "\n", - "在Jupyter Notebook中执行如下命令下载MNIST数据集。\n", + "> MNIST数据集下载页面:。页面提供4个数据集下载链接,其中前2个文件是训练数据需要,后2个文件是测试结果需要。\n", "\n", - "> MNIST数据集下载页面:。页面提供4个数据集下载链接,其中前2个文件是训练数据需要,后2个文件是测试结果需要。" + "在Jupyter Notebook中执行如下命令下载MNIST数据集。" ] }, { diff --git a/tutorials/training/source_zh_cn/quick_start/quick_video.md b/tutorials/training/source_zh_cn/quick_start/quick_video.md index 8e1de8f507d8a8ca5fc3ee99f96ea65b814e9795..3a24d879ec7b5faecd66e9f03146b10c09b70b66 100644 --- a/tutorials/training/source_zh_cn/quick_start/quick_video.md +++ b/tutorials/training/source_zh_cn/quick_start/quick_video.md @@ -750,6 +750,38 @@ +## 网络迁移 + + + + + + ## 参与社区 diff --git a/tutorials/training/source_zh_cn/quick_start/quick_video/network_migration_process.md b/tutorials/training/source_zh_cn/quick_start/quick_video/network_migration_process.md new file mode 100644 index 0000000000000000000000000000000000000000..8c25b3f20f3b5e5618ec7b4dd587f03bb2249607 --- /dev/null +++ b/tutorials/training/source_zh_cn/quick_start/quick_video/network_migration_process.md @@ -0,0 +1,9 @@ +# 网络迁移流程 + +[comment]: <> (本文档中包含手把手系列视频,码云Gitee不支持展示,请于官方网站对应教程中查看) + + + +**更多内容**: \ No newline at end of file diff --git a/tutorials/training/source_zh_cn/use/defining_the_network.md b/tutorials/training/source_zh_cn/use/defining_the_network.md index da32937232104cb16c4551326a1fdf303158a9a9..538427cd8d0fa464a1beded94a19f4e9b7bfa0c3 100644 --- a/tutorials/training/source_zh_cn/use/defining_the_network.md +++ b/tutorials/training/source_zh_cn/use/defining_the_network.md @@ -18,7 +18,7 @@ - MindSpore提供了迁移第三方训练框架的脚本,支持将已有的TensorFlow、PyTorch等的网络迁移到MindSpore,帮助你快速进行网络迁移。 - 通过[迁移第三方框架训练脚本](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/migrate_script.html)了解详细帮助信息。 + 通过[迁移脚本全流程](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/migrate_script.html)了解详细帮助信息。 - MindSpore支持使用开发深度学习模型的逻辑进行概率编程,还提供深度概率学习的工具箱,构建贝叶斯神经网络。 diff --git a/tutorials/tutorial_code/ascend310_single_op_sample/main.cc b/tutorials/tutorial_code/ascend310_single_op_sample/main.cc index ead314976c8293e9bb3327433fbd3f9bc2371515..d7f8e9dfa34a4091a204231cd2bf859797db1ed6 100644 --- a/tutorials/tutorial_code/ascend310_single_op_sample/main.cc +++ b/tutorials/tutorial_code/ascend310_single_op_sample/main.cc @@ -59,7 +59,7 @@ int main() { inputs.emplace_back(origin_inputs[0].Name(), origin_inputs[0].DataType(), origin_inputs[0].Shape(), input_data_1.data(), sizeof(float) * input_data_1.size()); inputs.emplace_back(origin_inputs[1].Name(), origin_inputs[1].DataType(), origin_inputs[1].Shape(), - input_data_1.data(), sizeof(float) * input_data_1.size()); + input_data_2.data(), sizeof(float) * input_data_2.size()); // infer ret = tensor_add.Predict(inputs, &outputs); diff --git a/tutorials/tutorial_code/debugging_info/src/dataset.py b/tutorials/tutorial_code/debugging_info/src/dataset.py index eac47f1b9f5942481b26986265340e5a3608e5f3..cc7ec9bb71305cf4ad330e5d6ce859e766802484 100644 --- a/tutorials/tutorial_code/debugging_info/src/dataset.py +++ b/tutorials/tutorial_code/debugging_info/src/dataset.py @@ -44,6 +44,10 @@ class CustomDataSet: self.repeat_count = 1 self.batch_data_size = (self.batch_size,) + image_size + def get_batch_size(self): + """get batch size""" + return self.batch_size + def get_dataset_size(self): """get dataset size""" return int(self.size / self.batch_size) diff --git a/tutorials/tutorial_code/nnie_proposal/CMakeLists.txt b/tutorials/tutorial_code/nnie_proposal/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..1e1b05050300901e3f0360c9d73c0841b63ce9d7 --- /dev/null +++ b/tutorials/tutorial_code/nnie_proposal/CMakeLists.txt @@ -0,0 +1,21 @@ +cmake_minimum_required(VERSION 3.14) +project(nnie_proposal) + +set(MSLIB_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_patry/") +execute_process( + COMMAND ${CMAKE_COMMAND} -G ${CMAKE_GENERATOR} . + OUTPUT_VARIABLE result + WORKING_DIRECTORY ${MSLIB_DIR}) +execute_process( + COMMAND ${CMAKE_COMMAND} --build . + WORKING_DIRECTORY ${MSLIB_DIR}) + +#execute_process(COMMAND tar xzf "ms-prefix/src/mindspore-lite-1.0.1-runtime-arm64-cpu.tar.gz" WORKING_DIRECTORY ${MSLIB_DIR}) + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_patry/ms-prefix/src/ms/include) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_patry/ms-prefix/src/third_patry/flatbuffers/include) + +aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/src SRC) + +add_library(nnie_proposal SHARED + ${SRC}) diff --git a/tutorials/tutorial_code/nnie_proposal/src/proposal.cc b/tutorials/tutorial_code/nnie_proposal/src/proposal.cc new file mode 100644 index 0000000000000000000000000000000000000000..1461b302ab4a24c42072484d20693a85517e1e9b --- /dev/null +++ b/tutorials/tutorial_code/nnie_proposal/src/proposal.cc @@ -0,0 +1,650 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/proposal.h" +#include +#include +#include +#include "include/errorcode.h" + +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; + +namespace mindspore { +namespace proposal { + +uint32_t RpnTmpBufSize(uint32_t num_ratio_anchors, uint32_t num_scale_anchors, uint32_t input_height, + uint32_t input_width) { + uint32_t anchors_num = num_ratio_anchors * num_scale_anchors * input_height * input_width; + uint32_t anchors_size = sizeof(uint32_t) * COORDI_NUM * anchors_num; + uint32_t bbox_delta_size = anchors_size; + uint32_t proposal_size = sizeof(uint32_t) * PROPOSAL_WIDTH * anchors_num; + uint32_t ratio_anchors_size = sizeof(float) * num_ratio_anchors * COORDI_NUM; + uint32_t scale_anchors_size = sizeof(float) * num_ratio_anchors * num_scale_anchors * COORDI_NUM; + uint32_t score_size = sizeof(float) * anchors_num * 2; + uint32_t stack_size = sizeof(Stack) * anchors_num; + uint32_t total_size = + anchors_size + bbox_delta_size + proposal_size + ratio_anchors_size + scale_anchors_size + score_size + stack_size; + return total_size; +} + +static float exp_coef[10][16] = { + {1.0f, 1.00024f, 1.00049f, 1.00073f, 1.00098f, 1.00122f, 1.00147f, 1.00171f, 1.00196f, 1.0022f, 1.00244f, 1.00269f, + 1.00293f, 1.00318f, 1.00342f, 1.00367f}, + {1.0f, 1.00391f, 1.00784f, 1.01179f, 1.01575f, 1.01972f, 1.02371f, 1.02772f, 1.03174f, 1.03578f, 1.03984f, 1.04391f, + 1.04799f, 1.05209f, 1.05621f, 1.06034f}, + {1.0f, 1.06449f, 1.13315f, 1.20623f, 1.28403f, 1.36684f, 1.45499f, 1.54883f, 1.64872f, 1.75505f, 1.86825f, 1.98874f, + 2.117f, 2.25353f, 2.39888f, 2.55359f}, + {1.0f, 2.71828f, 7.38906f, 20.0855f, 54.5981f, 148.413f, 403.429f, 1096.63f, 2980.96f, 8103.08f, 22026.5f, 59874.1f, + 162755.0f, 442413.0f, 1.2026e+006f, 3.26902e+006f}, + {1.0f, 8.88611e+006f, 7.8963e+013f, 7.01674e+020f, 6.23515e+027f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, + 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, + 5.54062e+034f}, + {1.0f, 0.999756f, 0.999512f, 0.999268f, 0.999024f, 0.99878f, 0.998536f, 0.998292f, 0.998049f, 0.997805f, 0.997562f, + 0.997318f, 0.997075f, 0.996831f, 0.996588f, 0.996345f}, + {1.0f, 0.996101f, 0.992218f, 0.98835f, 0.984496f, 0.980658f, 0.976835f, 0.973027f, 0.969233f, 0.965455f, 0.961691f, + 0.957941f, 0.954207f, 0.950487f, 0.946781f, 0.94309f}, + {1.0f, 0.939413f, 0.882497f, 0.829029f, 0.778801f, 0.731616f, 0.687289f, 0.645649f, 0.606531f, 0.569783f, 0.535261f, + 0.502832f, 0.472367f, 0.443747f, 0.416862f, 0.391606f}, + {1.0f, 0.367879f, 0.135335f, 0.0497871f, 0.0183156f, 0.00673795f, 0.00247875f, 0.000911882f, 0.000335463f, + 0.00012341f, 4.53999e-005f, 1.67017e-005f, 6.14421e-006f, 2.26033e-006f, 8.31529e-007f, 3.05902e-007f}, + {1.0f, 1.12535e-007f, 1.26642e-014f, 1.42516e-021f, 1.60381e-028f, 1.80485e-035f, 2.03048e-042f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}}; +static float QuickExp(int32_t value) { + if (value & 0x80000000) { + value = ~value + 0x00000001; + return exp_coef[5][value & 0x0000000F] * exp_coef[6][(value >> 4) & 0x0000000F] * + exp_coef[7][(value >> 8) & 0x0000000F] * exp_coef[8][(value >> 12) & 0x0000000F] * + exp_coef[9][(value >> 16) & 0x0000000F]; + } else { + return exp_coef[0][value & 0x0000000F] * exp_coef[1][(value >> 4) & 0x0000000F] * + exp_coef[2][(value >> 8) & 0x0000000F] * exp_coef[3][(value >> 12) & 0x0000000F] * + exp_coef[4][(value >> 16) & 0x0000000F]; + } +} + +static int32_t SoftMax(float *src, uint32_t num) { + float max = 0; + float sum = 0; + uint32_t i = 0; + + for (i = 0; i < num; ++i) { + if (max < src[i]) { + max = src[i]; + } + } + + for (i = 0; i < num; ++i) { + src[i] = QuickExp((int32_t)((src[i] - max) * QUANT_BASE)); + sum += src[i]; + } + + for (i = 0; i < num; ++i) { + src[i] /= sum; + } + return RET_OK; +} +static void Argswap(int32_t *src1, int32_t *src2) { + for (uint32_t i = 0; i < PROPOSAL_WIDTH; i++) { + int32_t tmp = src1[i]; + src1[i] = src2[i]; + src2[i] = tmp; + } +} + +static int32_t NonRecursiveArgQuickSort(int32_t *array, int32_t low, int32_t high, Stack *stack, int32_t max_num) { + int32_t top = 0; + stack[top].min_ = low; + stack[top].max_ = high; + + while (top > -1) { + low = stack[top].min_; + high = stack[top].max_; + int32_t i = low; + int32_t j = high; + + int32_t key_confidence = array[PROPOSAL_WIDTH * low + 4]; + top--; + while (i < j) { + while ((i < j) && (key_confidence > array[j * PROPOSAL_WIDTH + 4])) { + j--; + } + if (i < j) { + Argswap(&array[i * PROPOSAL_WIDTH], &array[j * PROPOSAL_WIDTH]); + i++; + } + + while ((i < j) && (key_confidence < array[i * PROPOSAL_WIDTH + 4])) { + i++; + } + if (i < j) { + Argswap(&array[i * PROPOSAL_WIDTH], &array[j * PROPOSAL_WIDTH]); + j--; + } + } + + if (low <= max_num) { + if (low < i - 1) { + top++; + stack[top].min_ = low; + stack[top].max_ = i - 1; + } + + if (high > i + 1) { + top++; + stack[top].min_ = i + 1; + stack[top].max_ = high; + } + } + } + return RET_OK; +} + +static int32_t FilterLowScoreBbox(int32_t *proposals, uint32_t anchors_num, uint32_t filter_thresh, + uint32_t *num_after_filter) { + uint32_t proposal_cnt = anchors_num; + + if (filter_thresh > 0) { + uint32_t i; + for (i = 0; i < anchors_num; i++) { + if (proposals[PROPOSAL_WIDTH * i + 4] < (int32_t)filter_thresh) { + proposals[PROPOSAL_WIDTH * i + 5] = 1; + } + } + + proposal_cnt = 0; + for (i = 0; i < anchors_num; i++) { + if (0 == proposals[PROPOSAL_WIDTH * i + 5]) { + proposals[PROPOSAL_WIDTH * proposal_cnt] = proposals[PROPOSAL_WIDTH * i]; + proposals[PROPOSAL_WIDTH * proposal_cnt + 1] = proposals[PROPOSAL_WIDTH * i + 1]; + proposals[PROPOSAL_WIDTH * proposal_cnt + 2] = proposals[PROPOSAL_WIDTH * i + 2]; + proposals[PROPOSAL_WIDTH * proposal_cnt + 3] = proposals[PROPOSAL_WIDTH * i + 3]; + proposals[PROPOSAL_WIDTH * proposal_cnt + 4] = proposals[PROPOSAL_WIDTH * i + 4]; + proposals[PROPOSAL_WIDTH * proposal_cnt + 5] = proposals[PROPOSAL_WIDTH * i + 5]; + proposal_cnt++; + } + } + } + *num_after_filter = proposal_cnt; + return RET_OK; +} + +static int32_t SVP_NNIE_Overlap(int32_t x_min1, int32_t y_min1, int32_t x_max1, int32_t y_max1, int32_t x_min2, + int32_t y_min2, int32_t x_max2, int32_t y_max2, int32_t *area_sum, + int32_t *area_inter) { + /*** Check the input, and change the Return value ***/ + int32_t inter = 0; + int32_t total = 0; + int32_t x_min = 0; + int32_t y_min = 0; + int32_t x_max = 0; + int32_t y_max = 0; + int32_t area1 = 0; + int32_t area2 = 0; + int32_t inter_width = 0; + int32_t inter_height = 0; + + x_min = MAX(x_min1, x_min2); + y_min = MAX(y_min1, y_min2); + x_max = MIN(x_max1, x_max2); + y_max = MIN(y_max1, y_max2); + + inter_width = x_max - x_min + 1; + inter_height = y_max - y_min + 1; + + inter_width = (inter_width >= 0) ? inter_width : 0; + inter_height = (inter_height >= 0) ? inter_height : 0; + + inter = inter_width * inter_height; + area1 = (x_max1 - x_min1 + 1) * (y_max1 - y_min1 + 1); + area2 = (x_max2 - x_min2 + 1) * (y_max2 - y_min2 + 1); + + total = area1 + area2 - inter; + + *area_sum = total; + *area_inter = inter; + return RET_OK; +} + +static int32_t SVP_NNIE_NonMaxSuppression(int32_t *proposals, uint32_t anchors_num, uint32_t nms_thresh, + uint32_t max_roi_num) { + /****** define variables *******/ + int32_t x_min1; + int32_t y_min1; + int32_t x_max1; + int32_t y_max1; + int32_t x_min2; + int32_t y_min2; + int32_t x_max2; + int32_t y_max2; + int32_t s32AreaTotal = 0; + int32_t area_inter = 0; + uint32_t i; + uint32_t j; + uint32_t num = 0; + bool bNoOverlap; + for (i = 0; i < anchors_num && num < max_roi_num; i++) { + if (proposals[PROPOSAL_WIDTH * i + 5] == 0) { + num++; + x_min1 = proposals[PROPOSAL_WIDTH * i]; + y_min1 = proposals[PROPOSAL_WIDTH * i + 1]; + x_max1 = proposals[PROPOSAL_WIDTH * i + 2]; + y_max1 = proposals[PROPOSAL_WIDTH * i + 3]; + for (j = i + 1; j < anchors_num; j++) { + if (proposals[PROPOSAL_WIDTH * j + 5] == 0) { + x_min2 = proposals[PROPOSAL_WIDTH * j]; + y_min2 = proposals[PROPOSAL_WIDTH * j + 1]; + x_max2 = proposals[PROPOSAL_WIDTH * j + 2]; + y_max2 = proposals[PROPOSAL_WIDTH * j + 3]; + bNoOverlap = (x_min2 > x_max1) || (x_max2 < x_min1) || (y_min2 > y_max1) || (y_max2 < y_min1); + if (bNoOverlap) { + continue; + } + (void)SVP_NNIE_Overlap(x_min1, y_min1, x_max1, y_max1, x_min2, y_min2, x_max2, y_max2, &s32AreaTotal, + &area_inter); + if (area_inter * QUANT_BASE > ((int32_t)nms_thresh * s32AreaTotal)) { + if (proposals[PROPOSAL_WIDTH * i + 4] >= proposals[PROPOSAL_WIDTH * j + 4]) { + proposals[PROPOSAL_WIDTH * j + 5] = 1; + } else { + proposals[PROPOSAL_WIDTH * i + 5] = 1; + } + } + } + } + } + } + return RET_OK; +} + +static void Rpn(float **inputs, uint32_t num_ratio_anchors, uint32_t num_scale_anchors, uint32_t *scales, + uint32_t *ratios, uint32_t ori_image_height, uint32_t ori_image_width, uint32_t *inputs_height, + uint32_t *inputs_width, uint32_t *inputs_channel, uint32_t inputs_stride, uint32_t max_rois, + uint32_t min_size, uint32_t spatial_scale, uint32_t nms_thresh, uint32_t filter_thresh, + uint32_t num_before_nms, char *pu32MemPool, float *proposal_result, uint32_t dst_stride, + uint32_t *num_rois) { +#if 1 + /******************** define parameters ****************/ + uint32_t size; + int32_t *anchors = nullptr; + int32_t *bbox_delta = nullptr; + int32_t *proposals = nullptr; + int32_t *ptr1 = nullptr; + int32_t *ptr2 = nullptr; + int32_t *ptr3 = nullptr; + uint32_t num_after_filter = 0; + uint32_t num_anchors; + float base_w; + float base_h; + float base_x_ctr; + float base_y_ctr; + float *ratio_anchors = nullptr; + float *f32_ptr = nullptr; + float *f32_ptr2 = nullptr; + float *scale_anchors = nullptr; + float *scores = nullptr; + float f32_size; + uint32_t pixel_interval; + uint32_t src_bbox_index; + uint32_t src_fg_prob_index; + uint32_t src_bg_prob_index; + uint32_t src_bbox_bias; + uint32_t src_prob_bias; + uint32_t des_box; + uint32_t bg_blob_size; + uint32_t anchors_per_pixel; + uint32_t map_size; + uint32_t line_size; + int32_t proposal_width; + int32_t proposal_height; + uint32_t roi_count; + Stack *stack = nullptr; + uint32_t c; + uint32_t h; + uint32_t w; + uint32_t i; + uint32_t j; + uint32_t p; + uint32_t q; + uint32_t z; + uint32_t base_anchor[4] = {0, 0, (min_size - 1), (min_size - 1)}; + + /*********************************** Faster RCNN *********************************************/ + /********* calculate the start pointer of each part in MemPool *********/ + anchors = reinterpret_cast(pu32MemPool); + num_anchors = num_ratio_anchors * num_scale_anchors * (inputs_height[0] * inputs_width[0]); + size = COORDI_NUM * num_anchors; + pu32MemPool += size * sizeof(int32_t); + + bbox_delta = reinterpret_cast(pu32MemPool); + pu32MemPool += size * sizeof(int32_t); + + proposals = reinterpret_cast(pu32MemPool); + size = PROPOSAL_WIDTH * num_anchors; + pu32MemPool += size * sizeof(int32_t); + + ratio_anchors = reinterpret_cast(static_cast(pu32MemPool)); + f32_ptr = reinterpret_cast(static_cast(pu32MemPool)); + size = num_ratio_anchors * COORDI_NUM; + f32_ptr = f32_ptr + size; + + scale_anchors = f32_ptr; + size = num_scale_anchors * num_ratio_anchors * COORDI_NUM; + f32_ptr = f32_ptr + size; + + scores = f32_ptr; + size = num_anchors * SCORE_NUM; + f32_ptr = f32_ptr + size; + + stack = reinterpret_cast(f32_ptr); + + /********************* Generate the base anchor ***********************/ + base_w = static_cast(base_anchor[2] - base_anchor[0] + 1); + base_h = static_cast(base_anchor[3] - base_anchor[1] + 1); + base_x_ctr = static_cast(base_anchor[0] + ((base_w - 1) * 0.5)); + base_y_ctr = static_cast(base_anchor[1] + ((base_h - 1) * 0.5)); + + /*************** Generate Ratio Anchors for the base anchor ***********/ + f32_ptr = ratio_anchors; + f32_size = base_w * base_h; + for (i = 0; i < num_ratio_anchors; i++) { + float f32_ratios = static_cast(ratios[i]) / QUANT_BASE; + base_w = sqrt(f32_size / f32_ratios); + base_w = static_cast(1.0 * ((base_w) >= 0 ? (int32_t)(base_w + HALF_VAL) : (int32_t)(base_w - HALF_VAL))); + base_h = base_w * f32_ratios; + base_h = static_cast(1.0 * ((base_h) >= 0 ? (int32_t)(base_h + HALF_VAL) : (int32_t)(base_h - HALF_VAL))); + + *f32_ptr++ = static_cast(base_x_ctr - ((base_w - 1) * HALF_VAL)); + *(f32_ptr++) = static_cast(base_y_ctr - ((base_h - 1) * HALF_VAL)); + *(f32_ptr++) = static_cast(base_x_ctr + ((base_w - 1) * HALF_VAL)); + *(f32_ptr++) = static_cast(base_y_ctr + ((base_h - 1) * HALF_VAL)); + } + + /********* Generate Scale Anchors for each Ratio Anchor **********/ + f32_ptr = ratio_anchors; + f32_ptr2 = scale_anchors; + /* Generate Scale Anchors for one pixel */ + for (i = 0; i < num_ratio_anchors; i++) { + for (j = 0; j < num_scale_anchors; j++) { + base_w = *(f32_ptr + 2) - *(f32_ptr) + 1; + base_h = *(f32_ptr + 3) - *(f32_ptr + 1) + 1; + base_x_ctr = static_cast(*(f32_ptr) + ((base_w - 1) * HALF_VAL)); + base_y_ctr = static_cast(*(f32_ptr + 1) + ((base_h - 1) * HALF_VAL)); + + *(f32_ptr2++) = + static_cast(base_x_ctr - ((base_w * (static_cast(scales[j]) / QUANT_BASE) - 1) * HALF_VAL)); + *(f32_ptr2++) = + static_cast(base_y_ctr - ((base_h * (static_cast(scales[j]) / QUANT_BASE) - 1) * HALF_VAL)); + *(f32_ptr2++) = + static_cast(base_x_ctr + ((base_w * (static_cast(scales[j]) / QUANT_BASE) - 1) * HALF_VAL)); + *(f32_ptr2++) = + static_cast(base_y_ctr + ((base_h * (static_cast(scales[j]) / QUANT_BASE) - 1) * HALF_VAL)); + } + f32_ptr += COORDI_NUM; + } + + /******************* Copy the anchors to every pixel in the feature map ******************/ + ptr1 = anchors; + pixel_interval = QUANT_BASE / spatial_scale; + + for (p = 0; p < inputs_height[0]; p++) { + for (q = 0; q < inputs_width[0]; q++) { + f32_ptr2 = scale_anchors; + for (z = 0; z < num_scale_anchors * num_ratio_anchors; z++) { + *(ptr1++) = (int32_t)(q * pixel_interval + *(f32_ptr2++)); + *(ptr1++) = (int32_t)(p * pixel_interval + *(f32_ptr2++)); + *(ptr1++) = (int32_t)(q * pixel_interval + *(f32_ptr2++)); + *(ptr1++) = (int32_t)(p * pixel_interval + *(f32_ptr2++)); + } + } + } + + /********** do transpose, convert the blob from (M,C,H,W) to (M,H,W,C) **********/ + map_size = inputs_height[1] * inputs_stride / sizeof(uint32_t); + anchors_per_pixel = num_ratio_anchors * num_scale_anchors; + bg_blob_size = anchors_per_pixel * map_size; + line_size = inputs_stride / sizeof(uint32_t); + src_prob_bias = 0; + src_bbox_bias = 0; + + for (c = 0; c < inputs_channel[1]; c++) { + for (h = 0; h < inputs_height[1]; h++) { + for (w = 0; w < inputs_width[1]; w++) { + src_bbox_index = src_bbox_bias + c * map_size + h * line_size + w; + src_bg_prob_index = src_prob_bias + (c / COORDI_NUM) * map_size + h * line_size + w; + src_fg_prob_index = bg_blob_size + src_bg_prob_index; + + des_box = (anchors_per_pixel) * (h * inputs_width[1] + w) + c / COORDI_NUM; + + uint32_t des_bbox_delta_index = COORDI_NUM * des_box + c % COORDI_NUM; + bbox_delta[des_bbox_delta_index] = (int32_t)(inputs[1][src_bbox_index] * QUANT_BASE); + + uint32_t des_score_index = (SCORE_NUM)*des_box; + scores[des_score_index] = inputs[0][src_bg_prob_index]; + scores[des_score_index + 1] = inputs[0][src_fg_prob_index]; + } + } + } + + /************************* do softmax ****************************/ + f32_ptr = scores; + for (i = 0; i < num_anchors; i++) { + SoftMax(f32_ptr, SCORE_NUM); + f32_ptr += SCORE_NUM; + } + + /************************* BBox Transform *****************************/ + /* use parameters from Conv3 to adjust the coordinates of anchors */ + for (i = 0; i < num_anchors; i++) { + ptr1 = anchors; + ptr1 = ptr1 + COORDI_NUM * i; + ptr2 = proposals; + ptr2 = ptr2 + PROPOSAL_WIDTH * i; + ptr3 = bbox_delta; + ptr3 = ptr3 + COORDI_NUM * i; + f32_ptr = scores; + f32_ptr = f32_ptr + i * (SCORE_NUM); + + proposal_width = *(ptr1 + 2) - *(ptr1) + 1; + proposal_height = *(ptr1 + 3) - *(ptr1 + 1) + 1; + int32_t proposal_center_x = *(ptr1) + (int32_t)(proposal_width * HALF_VAL); + int32_t proposal_center_y = *(ptr1 + 1) + (int32_t)(proposal_height * HALF_VAL); + int32_t pred_center_x = (int32_t)((static_cast(*(ptr3)) / QUANT_BASE) * proposal_width + proposal_center_x); + int32_t pred_center_y = + (int32_t)((static_cast(*(ptr3 + 1)) / QUANT_BASE) * proposal_height + proposal_center_y); + + int32_t pred_w = (int32_t)(proposal_width * QuickExp((int32_t)(*(ptr3 + 2)))); + int32_t pred_h = (int32_t)(proposal_height * QuickExp((int32_t)(*(ptr3 + 3)))); + *(ptr2) = (int32_t)(pred_center_x - HALF_VAL * pred_w); + *(ptr2 + 1) = (int32_t)(pred_center_y - HALF_VAL * pred_h); + *(ptr2 + 2) = (int32_t)(pred_center_x + HALF_VAL * pred_w); + *(ptr2 + 3) = (int32_t)(pred_center_y + HALF_VAL * pred_h); + *(ptr2 + 4) = (int32_t)(*(f32_ptr + 1) * QUANT_BASE); + *(ptr2 + 5) = 0; + } + + /************************ clip bbox *****************************/ + for (i = 0; i < num_anchors; i++) { + ptr1 = proposals; + ptr1 = ptr1 + PROPOSAL_WIDTH * i; + *ptr1 = MAX(MIN(*ptr1, (int32_t)ori_image_width - 1), 0); + *(ptr1 + 1) = MAX(MIN(*(ptr1 + 1), (int32_t)ori_image_height - 1), 0); + *(ptr1 + 2) = MAX(MIN(*(ptr1 + 2), (int32_t)ori_image_width - 1), 0); + *(ptr1 + 3) = MAX(MIN(*(ptr1 + 3), (int32_t)ori_image_height - 1), 0); + } + + /************ remove the bboxes which are too small *************/ + for (i = 0; i < num_anchors; i++) { + ptr1 = proposals; + ptr1 = ptr1 + PROPOSAL_WIDTH * i; + proposal_width = *(ptr1 + 2) - *(ptr1) + 1; + proposal_height = *(ptr1 + 3) - *(ptr1 + 1) + 1; + if (proposal_width < (int32_t)min_size || proposal_height < (int32_t)min_size) { + *(ptr1 + 5) = 1; + } + } + + /********** remove low score bboxes ************/ + (void)FilterLowScoreBbox(proposals, num_anchors, filter_thresh, &num_after_filter); + + /********** sort ***********/ + (void)NonRecursiveArgQuickSort(proposals, 0, num_after_filter - 1, stack, static_cast(num_before_nms)); + num_after_filter = (num_after_filter < num_before_nms) ? num_after_filter : num_before_nms; + + /* do nms to remove highly overlapped bbox */ + (void)SVP_NNIE_NonMaxSuppression(proposals, num_after_filter, nms_thresh, max_rois); /* function NMS */ + + /************** write the final result to output ***************/ + roi_count = 0; + for (i = 0; i < num_after_filter; i++) { + ptr1 = proposals; + ptr1 = ptr1 + PROPOSAL_WIDTH * i; + if (*(ptr1 + 5) == 0) { + /*In this sample,the output Roi coordinates will be input in hardware, + so the type coordinates are convert to HI_S20Q12*/ + proposal_result[dst_stride / sizeof(uint32_t) * roi_count] = *ptr1; + proposal_result[dst_stride / sizeof(uint32_t) * roi_count + 1] = *(ptr1 + 1); + proposal_result[dst_stride / sizeof(uint32_t) * roi_count + 2] = *(ptr1 + 2); + proposal_result[dst_stride / sizeof(uint32_t) * roi_count + 3] = *(ptr1 + 3); + roi_count++; + } + if (roi_count >= max_rois) { + break; + } + } + + *num_rois = roi_count; +#endif +} + +int32_t ProposalInit(ProposalParam *param, std::vector *inputs, uint32_t max_roi_num, + uint32_t ori_image_height, uint32_t ori_image_width) { + uint32_t tmp_buf_size = 0; + uint32_t bbox_buf_size = 0; + uint32_t total_size = 0; + param->max_roi_num_ = max_roi_num; + + param->num_ratio_anchors_ = 1; + param->num_scale_anchors_ = 9; + param->scales_[0] = 1.5 * QUANT_BASE; + param->scales_[1] = 2.1 * QUANT_BASE; + param->scales_[2] = 2.9 * QUANT_BASE; + param->scales_[3] = 4.1 * QUANT_BASE; + param->scales_[4] = 5.8 * QUANT_BASE; + param->scales_[5] = 8.0 * QUANT_BASE; + param->scales_[6] = 11.3 * QUANT_BASE; + param->scales_[7] = 15.8 * QUANT_BASE; + param->scales_[8] = 22.1 * QUANT_BASE; + param->ratios_[0] = 2.44 * QUANT_BASE; + + param->ori_image_height_ = ori_image_height; + param->ori_image_width_ = ori_image_width; + param->min_size_ = 16; + param->spatial_scale_ = (uint32_t)(0.0625 * QUANT_BASE); + param->nms_thresh_ = (uint32_t)(0.7 * QUANT_BASE); + param->filter_thresh_ = 0; + param->num_before_nms_ = 6000; + + param->rpn_bounding_box_.chn_ = 1; + param->rpn_bounding_box_.height_ = max_roi_num; + param->rpn_bounding_box_.width_ = COORDI_NUM; + param->rpn_bounding_box_.stride_ = COORDI_NUM * sizeof(float); + param->rpn_bounding_box_.num_ = 1; + if (inputs->size() < 2) { + LOGE("inputs tensor size error."); + return RET_ERROR; + } + + for (int i = 0; i < 2; i++) { + auto input_data_type = inputs->at(i)->data_type(); + if (input_data_type == mindspore::kNumberTypeFloat32) { + auto ptr_shape = (*inputs)[i]->shape(); + // (*inputs)[i]->Format() 检查下format + if ((ptr_shape.size() == 4)) { + param->inputs_height_[i] = ptr_shape[2]; + param->inputs_width_[i] = ptr_shape[3]; + param->inputs_channel_[i] = ptr_shape[1]; + if (0 == i) { + param->inputs_stride_ = ptr_shape[3] * sizeof(float); + } + } + } + } + + tmp_buf_size = RpnTmpBufSize(param->num_ratio_anchors_, param->num_scale_anchors_, param->inputs_height_[0], + param->inputs_width_[0]); + + bbox_buf_size = param->rpn_bounding_box_.num_ * param->rpn_bounding_box_.height_ * param->rpn_bounding_box_.stride_; + total_size = tmp_buf_size + bbox_buf_size; + + if (param->rpn_tmp_buf_ != nullptr) { + free(param->rpn_tmp_buf_); + param->rpn_tmp_buf_ = nullptr; + } + param->rpn_tmp_buf_ = malloc(total_size); + if (param->rpn_tmp_buf_ == nullptr) { + LOGE("malloc buf fail."); + return RET_ERROR; + } + param->rpn_bounding_box_.data_ = reinterpret_cast(param->rpn_tmp_buf_) + tmp_buf_size; + + return RET_OK; +} + +int32_t ProposalRun(std::vector *inputs, + std::vector *outputs, ProposalParam *param) { + if (inputs->size() < 2) { + LOGE("inputs tensor size error."); + return RET_ERROR; + } + if (outputs->size() != 1) { + LOGE("outputs tensor size error."); + return RET_ERROR; + } + for (int i = 0; i < 2; i++) { + auto input_data_type = inputs->at(i)->data_type(); + if (input_data_type == mindspore::kNumberTypeFloat32) { + param->inputs_[i] = reinterpret_cast((*inputs)[i]->MutableData()); + } + } + auto output_data_type = (*outputs)[0]->data_type(); + if (output_data_type != mindspore::kNumberTypeFloat32) { + LOGE("outputs tensor data type error."); + return RET_ERROR; + } + + Rpn(param->inputs_, param->num_ratio_anchors_, param->num_scale_anchors_, param->scales_, param->ratios_, + param->ori_image_height_, param->ori_image_width_, param->inputs_height_, param->inputs_width_, + param->inputs_channel_, param->inputs_stride_, param->max_roi_num_, param->min_size_, param->spatial_scale_, + param->nms_thresh_, param->filter_thresh_, param->num_before_nms_, reinterpret_cast(param->rpn_tmp_buf_), + reinterpret_cast(param->rpn_bounding_box_.data_), param->rpn_bounding_box_.stride_, + ¶m->rpn_bounding_box_.height_); + + std::vector shape{static_cast(param->rpn_bounding_box_.height_), COORDI_NUM}; + (*outputs)[0]->set_shape(shape); + auto output_data = (*outputs)[0]->MutableData(); + memcpy(output_data, param->rpn_bounding_box_.data_, param->rpn_bounding_box_.height_ * COORDI_NUM * sizeof(float)); + + return RET_OK; +} + +void ProposalDeInit(ProposalParam *param) { + if (param->rpn_tmp_buf_ != 0) { + free(param->rpn_tmp_buf_); + param->rpn_tmp_buf_ = 0; + } +} +} // namespace proposal +} // namespace mindspore diff --git a/tutorials/tutorial_code/nnie_proposal/src/proposal.h b/tutorials/tutorial_code/nnie_proposal/src/proposal.h new file mode 100644 index 0000000000000000000000000000000000000000..19ec43e25c3906dfc64ef1f1482157bf255f9654 --- /dev/null +++ b/tutorials/tutorial_code/nnie_proposal/src/proposal.h @@ -0,0 +1,93 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_H_ +#define MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_H_ +#include +#include "include/ms_tensor.h" + +#define LOG_TAG1 "Proposal" +#define LOGE(format, ...) \ + do { \ + if (1) { \ + fprintf(stderr, "\n[ERROR] " LOG_TAG1 " [" __FILE__ ":%d] %s] ", __LINE__, __FUNCTION__); \ + fprintf(stderr, format, ##__VA_ARGS__); \ + } \ + } while (0) + +#define LOGW(format, ...) \ + do { \ + if (1) { \ + fprintf(stderr, "\n[Warning] " LOG_TAG1 " [" __FILE__ ":%d] %s] ", __LINE__, __FUNCTION__); \ + fprintf(stderr, format, ##__VA_ARGS__); \ + } \ + } while (0) + +namespace mindspore { +namespace proposal { + +typedef struct { + uint32_t stride_; + void *data_; + uint32_t num_; + uint32_t width_; + uint32_t height_; + uint32_t chn_; +} RpnBoundingBox; + +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define HALF_VAL 0.5f /*the half value*/ +#define COORDI_NUM 4 /*coordinate numbers*/ +#define PROPOSAL_WIDTH 6 /*the number of proposal values*/ +#define QUANT_BASE 4096 /*the base value*/ +#define SCORE_NUM 2 /*the num of RPN scores*/ + +typedef struct { + uint32_t scales_[9]; + uint32_t ratios_[9]; + uint32_t inputs_height_[2]; + uint32_t inputs_width_[2]; + uint32_t inputs_channel_[2]; + uint32_t inputs_stride_; + uint32_t num_ratio_anchors_; + uint32_t num_scale_anchors_; + uint32_t ori_image_height_; + uint32_t ori_image_width_; + uint32_t min_size_; + uint32_t spatial_scale_; + uint32_t nms_thresh_; + uint32_t filter_thresh_; + uint32_t max_roi_num_; + uint32_t num_before_nms_; + float *inputs_[2]; + void *rpn_tmp_buf_; + RpnBoundingBox rpn_bounding_box_; +} ProposalParam; + +typedef struct { + int32_t min_; + int32_t max_; +} Stack; + +int32_t ProposalInit(ProposalParam *param, std::vector *inputs, uint32_t max_roi_num, + uint32_t ori_image_height, uint32_t ori_image_width); +int32_t ProposalRun(std::vector *inputs, + std::vector *outputs, ProposalParam *param); +void ProposalDeInit(ProposalParam *param); +} // namespace proposal +} // namespace mindspore +#endif // MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_H_ diff --git a/tutorials/tutorial_code/nnie_proposal/src/proposal_fp32.cc b/tutorials/tutorial_code/nnie_proposal/src/proposal_fp32.cc new file mode 100644 index 0000000000000000000000000000000000000000..a90b61870d65b9ec9d47bf00e73ce0d99d82221e --- /dev/null +++ b/tutorials/tutorial_code/nnie_proposal/src/proposal_fp32.cc @@ -0,0 +1,195 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/proposal_fp32.h" +#include +#include +#include "include/schema/model_generated.h" +#include "include/registry/register_kernel.h" +#include "include/errorcode.h" + +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_Custom; +#define MAX_SIZE 1024 + +namespace mindspore { +namespace proposal { +int ProposalCPUKernel::Prepare() { + if (inputs_.size() < 2) { + LOGE("inputs tensor num error."); + return RET_ERROR; + } + if (outputs_.size() != 1) { + LOGE("outputs tensor num error."); + return RET_ERROR; + } + std::vector inputs_name = {"rpn_cls_score", "rpn_bbox_pred"}; + std::vector inputs; + for (size_t i = 0; i < inputs_name.size(); i++) { + bool find_flag = false; + for (auto &input : inputs_) { + if (input->tensor_name() == inputs_name[i]) { + inputs.push_back(input); + find_flag = true; + break; + } + } + if (!find_flag) { + for (auto &input : inputs_) { + if (std::find(inputs.begin(), inputs.end(), input) != inputs.end()) { + continue; + } + inputs.push_back(input); + LOGW("input tensor name diff '%s' vs '%s'.", inputs_name[i].c_str(), input->tensor_name().c_str()); + break; + } + } + } + if (inputs.size() != inputs_name.size()) { + LOGE("inputs size error."); + return RET_ERROR; + } + + this->set_inputs(inputs); + if (inputs[0]->shape()[0] != 1) { + LOGE("proposal only support input num == 1."); + return RET_ERROR; + } + + outputs_[0]->set_tensor_name("proposal"); + + int max_roi_num_int = 300; + auto *max_roi_num = std::getenv("MAX_ROI_NUM"); + if (max_roi_num != nullptr) { + auto iter = + std::find_if(max_roi_num, max_roi_num + strlen(max_roi_num), [](char val) { return val < '0' || val > '9'; }); + if (iter != max_roi_num) { + *iter = '\0'; + max_roi_num_int = atoi(max_roi_num); + } else { + LOGW("MAX_ROI_NUM ENV is invalid, now set to default value %d", max_roi_num_int); + } + } else { + LOGW("MAX_ROI_NUM ENV is not set, now set to default value %d", max_roi_num_int); + } + + return ProposalInit(&proposal_param_, &inputs_, max_roi_num_int, image_height_, image_weight_); +} + +int ProposalCPUKernel::ReSize() { + if (inputs_[0]->shape()[0] != 1) { + LOGE("proposal only support input num == 1."); + return RET_ERROR; + } + return RET_OK; +} + +int ProposalCPUKernel::Execute() { return ProposalRun(&inputs_, &outputs_, &proposal_param_); } + +ProposalCPUKernel::~ProposalCPUKernel() { ProposalDeInit(&proposal_param_); } + +bool GetCustomAttr(char *buf, int buf_size, const mindspore::schema::Custom *op, const std::string &attr) { + int attr_size; + for (size_t i = 0; i < op->attr()->size(); i++) { + if (op->attr()->Get(i)->name()->str() == attr) { + auto output_info = op->attr()->Get(i)->data(); + attr_size = static_cast(output_info->size()); + if (attr_size >= buf_size) { + LOGE("attr size too big"); + return false; + } + for (int j = 0; j < attr_size; j++) { + buf[j] = static_cast(output_info->Get(j)); + } + buf[attr_size] = 0; + return true; + } + } + return false; +} + +std::shared_ptr ProposalCreateKernel( + const std::vector &inputs, const std::vector &outputs, + const mindspore::schema::Primitive *primitive, const mindspore::lite::Context *ctx) { + if (primitive->value_type() != mindspore::schema::PrimitiveType_Custom) { + LOGE("Primitive type is not PrimitiveType_Custom"); + return nullptr; + } + + auto op = primitive->value_as_Custom(); + if (op->attr()->size() < 1) { + LOGE("There are at least 1 attribute of Custom"); + return nullptr; + } + int64_t ndims; + int64_t image_height; + int64_t image_width; + + char *res = nullptr; + char buf[MAX_SIZE]; + if (GetCustomAttr(buf, MAX_SIZE, op, "proposal_id")) { + res = nullptr; + ndims = strtol(buf, &res, 10); + if ((*res) != 0) { + LOGE("Get attr id data fail"); + return nullptr; + } + } else { + LOGE("Proposal Custom op should have id"); + return nullptr; + } + + if (GetCustomAttr(buf, MAX_SIZE, op, "image_height")) { + res = nullptr; + image_height = strtol(buf, &res, 10); + if ((*res) != 0) { + LOGE("Get attr id data fail"); + return nullptr; + } + } else { + LOGE("Proposal Custom op should have image_height"); + return nullptr; + } + if (GetCustomAttr(buf, MAX_SIZE, op, "image_width")) { + res = nullptr; + image_width = strtol(buf, &res, 10); + if ((*res) != 0) { + LOGE("Get attr id data fail"); + return nullptr; + } + } else { + LOGE("Proposal Custom op should have image_width"); + return nullptr; + } + + auto kernel = std::make_shared(inputs, outputs, primitive, ctx, ndims, image_height, image_width); + // auto kernel = new (std::nothrow) ProposalCPUKernel(inputs, outputs, primitive, ctx, ndims, image_height, + // image_width); + if (kernel == nullptr) { + LOGE("new custom kernel is nullptr"); + return nullptr; + } + return kernel; +} +} // namespace proposal +} // namespace mindspore + +namespace mindspore { +namespace kernel { +REGISTER_CUSTOM_KERNEL(CPU, NNIE, kNumberTypeFloat32, Proposal, proposal::ProposalCreateKernel) +} // namespace kernel +} // namespace mindspore diff --git a/tutorials/tutorial_code/nnie_proposal/src/proposal_fp32.h b/tutorials/tutorial_code/nnie_proposal/src/proposal_fp32.h new file mode 100644 index 0000000000000000000000000000000000000000..ed2d47aa78ed42982bb023fa7026504b84f5a43d --- /dev/null +++ b/tutorials/tutorial_code/nnie_proposal/src/proposal_fp32.h @@ -0,0 +1,52 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_FP32_H_ +#define MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_FP32_H_ + +#include +#include "include/schema/model_generated.h" +#include "include/context.h" +#include "include/kernel.h" +#include "src/proposal.h" + +using mindspore::kernel::Kernel; +namespace mindspore { +namespace proposal { +class ProposalCPUKernel : public Kernel { + public: + ProposalCPUKernel(const std::vector &inputs, + const std::vector &outputs, + const mindspore::schema::Primitive *primitive, const mindspore::lite::Context *ctx, int id, + int image_height, int image_width) + : Kernel(inputs, outputs, primitive, ctx), id_(id), image_height_(image_height), image_weight_(image_width) {} + + ~ProposalCPUKernel() override; + + int Prepare() override; + int ReSize() override; + int Execute() override; + + private: + proposal::ProposalParam proposal_param_ = {0}; + int64_t id_; + int64_t image_height_; + int64_t image_weight_; +}; +} // namespace proposal +} // namespace mindspore + +#endif // MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_FP32_H_ diff --git a/tutorials/tutorial_code/nnie_proposal/src/proposal_infer.cc b/tutorials/tutorial_code/nnie_proposal/src/proposal_infer.cc new file mode 100644 index 0000000000000000000000000000000000000000..b689b3df0eb1f45d5adc3661a18b5fc2e4fb02be --- /dev/null +++ b/tutorials/tutorial_code/nnie_proposal/src/proposal_infer.cc @@ -0,0 +1,74 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/proposal_infer.h" +#include +#include +#include "include/errorcode.h" +#include "src/proposal.h" + +using mindspore::kernel::KernelInterface; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_Custom; + +namespace mindspore { +namespace proposal { +std::shared_ptr ProposalInferCreater() { + auto infer = std::make_shared(); + if (infer == nullptr) { + LOGE("new custom infer is nullptr"); + return nullptr; + } + + return infer; +} +int ProposalInterface::Infer(const std::vector &inputs, + const std::vector &outputs, + const mindspore::schema::Primitive *primitive) { + if (inputs.size() != 2) { + LOGE("Inputs size less 2"); + return RET_ERROR; + } + if (outputs.size() == 0) { + LOGE("Outputs size 0"); + return RET_ERROR; + } + if (primitive->value_type() != mindspore::schema::PrimitiveType_Custom) { + LOGE("Primitive type is not PrimitiveType_Custom"); + return RET_ERROR; + } + + size_t id = 0; + while (id < outputs.size()) { + // 待补完 + // outputs[id]->format_ = input->format_; + // outputs[id]->data_type_ = kNumberTypeFloat32; + // 设置type为int + std::vector shape{-1, COORDI_NUM}; + outputs[id]->set_shape(shape); + id++; + } + return RET_OK; +} +} // namespace proposal +} // namespace mindspore +namespace mindspore { +namespace kernel { +// static KernelInterfaceReg a(aa, schema::PrimitiveType_Custom, CustomInferCreater); +REGISTER_CUSTOM_KERNEL_INTERFACE(NNIE, Proposal, proposal::ProposalInferCreater); +} // namespace kernel +} // namespace mindspore diff --git a/tutorials/tutorial_code/nnie_proposal/src/proposal_infer.h b/tutorials/tutorial_code/nnie_proposal/src/proposal_infer.h new file mode 100644 index 0000000000000000000000000000000000000000..b384817af368975d7c890c7350f9cc41e54be639 --- /dev/null +++ b/tutorials/tutorial_code/nnie_proposal/src/proposal_infer.h @@ -0,0 +1,36 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_INFER_H_ +#define MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_INFER_H_ +#include +#include "include/registry/kernel_interface.h" + +namespace mindspore { +namespace proposal { +class ProposalInterface : public mindspore::kernel::KernelInterface { + public: + ProposalInterface() {} + + ~ProposalInterface() = default; + + int Infer(const std::vector &inputs, + const std::vector &outputs, + const mindspore::schema::Primitive *primitive) override; +}; +} // namespace proposal +} // namespace mindspore +#endif // MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_INFER_H_ diff --git a/tutorials/tutorial_code/nnie_proposal/third_patry/CMakeLists.txt b/tutorials/tutorial_code/nnie_proposal/third_patry/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5aa873e18c2ec91a7063795ec9feec66c9cb9782 --- /dev/null +++ b/tutorials/tutorial_code/nnie_proposal/third_patry/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 3.14) +project(ms_download) + +include(ExternalProject) +set(MS_VESION "1.3.0") +message(STATUS "download mindspore-lite-${MS_VESION}-runtime-arm64-cpu") + +SET(MS_URL https://ms-release.obs.cn-north-4.myhuaweicloud.com/${MS_VESION}/lite/android_aarch64/mindspore-lite-${MS_VESION}-runtime-arm64-cpu.tar.gz) + +ExternalProject_Add( + ms + URL ${MS_URL} + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" # Skip annoying updates for every build + # Disable install step + INSTALL_COMMAND "" +) + +add_custom_target(ms_download ALL + DEPENDS ms + )