创建mindspore本地环境

创建新的虚拟环境

1
2
3
4
# anaconda prompt
conda create -n mindspore_py37 python=3.7.5
# 官方 conda create -c conda-forge -n mindspore_py37 -c conda-forge python=3.7.5
conda ativate mindespore_py37

安装mindspore

  • windowsOS仅提供CPU版本的mindspore
  • 下载2.2.14版本的mindspore,命令参考mindspore安装指南
1
2
3
4
5
6
7
8
9
10
11
# 此时已经激活mindspore_py37虚拟环境
conda install mindspore-cpu=2.2.14 -c mindspore -c conda-forge
# 本来想安装1.7.0版本,因为华为云提供的服务器镜像为:
# ’mindspore1.7.0-cuda10.1-py3.7-ubuntu18.04‘

# 验证是否安装成功
python -c "import mindspore;mindspore.run_check()"

# 如果成功安装
MindSpore version: 2.2.14
The result of multiplication calculation is correct, MindSpore has been installed successfully!

一些常用的命令

1
2
3
4
5
6
7
8
# 查看jupyter内核配置情况
jupyter kernelspec list
# 检查环境中是否有kernel
python -m ipykernel --version
# 删除无用的kernel
jupyter kernelspec remove KERNEL_NAME
# 删除无用的conda环境
conda remove -n ENV_NAME --all

env添加到kernel

  • 将虚拟环境mindspore_py37添加到kernel
1
2
3
4
conda activate mindspore_py37
# 当前环境下安装kernel
conda install ipykernel
python -m ipykernel install --user --name mindspore_py37 --display-name "mindspore_py37"

Bug:内核正在启动请等待

问题原因pyzmq版本过高

1
2
3
4
5
6
conda activate mindspore_py37
# 查看版本信息
pip list
# 卸载 & 指定版本安装
pip uninstall pyzmq
pip install pyzmq==23.0

结果:重启jupyter notebook,选择mindspore_py37内核,启动成功!

Bug:’DLL load failed:找不到指定的模块‘

问题原因找不到对应的python解释器。因为在不同的虚拟环境中安装了不同版本的python,若直接终端cmd进入文件夹所在位置并启动jupyter notebook,又在jupyter notebook中切换为虚拟环境的kernel,会导致无法确定解释器的位置。

解决步骤 - 1:

检查anaconda3环境配置(紫框部分,为配置教程提及的部分)

image-20240608175335547

解决步骤 - 2:

存在多个python解释器,需要先进入到特定虚拟环境,然后打开jupyter notebook

image-20240608175727588

先激活进入所使用的虚拟环境,再启动jupyter notebook

参考:内核正在启动请等待 & “DLL load failed”找不到指定模块


ResNet - MindSpore - pretrain

下载数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import requests
import zipfile
import os

# 下载 CIFAR-10 数据集
url = "https://obs.dualstack.cn-north-4.myhuaweicloud.com/mindspore-website/notebook/datasets/cifar10.zip"
local_filename = "cifar10.zip"

print("Downloading CIFAR-10 dataset...")
response = requests.get(url)
with open(local_filename, 'wb') as f:
f.write(response.content)
print("Download complete.")

# 解压文件
print("Unzipping CIFAR-10 dataset...")
with zipfile.ZipFile(local_filename, 'r') as zip_ref:
zip_ref.extractall("./datasets")
print("Unzip complete.")

# 查看解压后的目录结构
def print_directory_structure(dir_path, indent=''):
for root, dirs, files in os.walk(dir_path):
level = root.replace(dir_path, '').count(os.sep)
indent = ' ' * 4 * (level)
print('{}{}/'.format(indent, os.path.basename(root)))
sub_indent = ' ' * 4 * (level + 1)
for f in files:
print('{}{}'.format(sub_indent, f))

print("Directory structure:")
print_directory_structure("./datasets/cifar10")
#Downloading CIFAR-10 dataset...
#Download complete.
#Unzipping CIFAR-10 dataset...
#Unzip complete.
#Directory structure:
#cifar10/
# test/
# test_batch.bin
# train/
# batches.meta.txt
# data_batch_1.bin
# data_batch_2.bin
# data_batch_3.bin
# data_batch_4.bin
# data_batch_5.bin

预处理 & 可视化

1
2
3
4
5
6
7
8
9
10
11
12
import mindspore.nn as nn
from mindspore import dtype as mstype
import mindspore.dataset as ds
import mindspore.dataset.vision.c_transforms as C
import mindspore.dataset.transforms.c_transforms as C2
from mindspore import context
import numpy as np
import matplotlib.pyplot as plt

# windows只提供CPU版本的mindspore
# 实测,1.7.0版本的mindspore可以使用GPU训练(来自华为云服务器镜像)
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
  • 创建数据集,数据增强
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
def create_dataset(data_home, repeat_num=1, batch_size=32, do_train=True, device_target="GPU"):
"""
create data for next use such as training or inferring
"""

cifar_ds = ds.Cifar10Dataset(data_home,num_parallel_workers=8, shuffle=True)

c_trans = []
if do_train:
c_trans += [
C.RandomCrop((32, 32), (4, 4, 4, 4)),
C.RandomHorizontalFlip(prob=0.5)
]

c_trans += [
C.Resize((224, 224)),
C.Rescale(1.0 / 255.0, 0.0),
C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]),
C.HWC2CHW()
]

type_cast_op = C2.TypeCast(mstype.int32)

cifar_ds = cifar_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
cifar_ds = cifar_ds.map(operations=c_trans, input_columns="image", num_parallel_workers=8)

cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)
cifar_ds = cifar_ds.repeat(repeat_num)

return cifar_ds
  • 数据可视化
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
ds_train_path = "./datasets/cifar10/train/"
dataset_show = create_dataset(ds_train_path)
with open(ds_train_path+"batches.meta.txt","r",encoding="utf-8") as f:
all_name = [name.replace("\n","") for name in f.readlines()]

iterator_show= dataset_show.create_dict_iterator()
dict_data = next(iterator_show)
images = dict_data["image"].asnumpy()
labels = dict_data["label"].asnumpy()
count = 1
%matplotlib inline
for i in images:
plt.subplot(4, 8, count)
# Images[0].shape is (3,224,224).We need transpose as (224,224,3) for using in plt.show().
picture_show = np.transpose(i,(1,2,0))
picture_show = picture_show/np.amax(picture_show)
picture_show = np.clip(picture_show, 0, 1)
plt.title(all_name[labels[count-1]])
picture_show = np.array(picture_show,np.float32)
plt.imshow(picture_show)
count += 1
plt.axis("off")

print("The dataset size is:", dataset_show.get_dataset_size())
print("The batch tensor is:",images.shape)
plt.show()

image-20240608235037140

定义卷积神经网络

  • 使用预训练的resnet50,下载模型文件
1
2
3
4
5
6
7
8
9
10
11
import requests

# 下载 resnet.py 文件
url = "https://obs.dualstack.cn-north-4.myhuaweicloud.com/mindspore-website/notebook/source-codes/resnet.py"
local_filename = "resnet.py"

print("Downloading resnet.py...")
response = requests.get(url)
with open(local_filename, 'wb') as f:
f.write(response.content)
print("Download complete.")
1
2
3
from resnet import resnet50

net = resnet50(batch_size=32, num_classes=10)

损失函数 & 优化器

1
2
3
4
5
import mindspore.nn as nn
from mindspore.nn import SoftmaxCrossEntropyWithLogits

ls = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
opt = nn.Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, 0.9)

训练 & 保存模型文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor
from mindspore import load_checkpoint, load_param_into_net
import os
from mindspore import Model


model = Model(net, loss_fn=ls, optimizer=opt, metrics={'acc'})
# As for train, users could use model.train

epoch_size = 10
ds_train_path = "./datasets/cifar10/train/"
model_path = "./models/ckpt/mindspore_vision_application/"
os.system('rm -f {0}*.ckpt {0}*.meta {0}*.pb'.format(model_path))

dataset = create_dataset(ds_train_path )
batch_num = dataset.get_dataset_size()
config_ck = CheckpointConfig(save_checkpoint_steps=batch_num, keep_checkpoint_max=35)
ckpoint_cb = ModelCheckpoint(prefix="train_resnet_cifar10", directory=model_path, config=config_ck)
loss_cb = LossMonitor(142)
1
model.train(epoch_size, dataset, callbacks=[ckpoint_cb, loss_cb])
  • 华为云服务器
  • 镜像:mindspore1.7.0-cuda10.1-py3.7-ubuntu18.04
  • 规格:GPU: 1*Tnt004(16GB)|CPU: 8核 32GB

img

验证模型精度

1
2
3
4
ds_eval_path = "./datasets/cifar10/test/"
eval_dataset = create_dataset(ds_eval_path, do_train=False)
res = model.eval(eval_dataset)
print("result: ", res)

img