diff --git a/code/my_project.py b/code/my_project.py new file mode 100644 index 0000000000000000000000000000000000000000..3fd1d71b461554d3e5d8df7ce91986ff23b1b43b --- /dev/null +++ b/code/my_project.py @@ -0,0 +1,377 @@ +语音助手大模型实例(调用官网 API)完整代码: +from huaweicloud_sis.client.tts_client import TtsCustomizationClient +from huaweicloud_sis.client.asr_client import AsrCustomizationClient +from huaweicloud_sis.bean.tts_request import TtsCustomRequest +from huaweicloud_sis.bean.asr_request import AsrCustomShortRequest +from huaweicloud_sis.bean.sis_config import SisConfig +from huaweicloud_sis.exception.exceptions import ClientException +from huaweicloud_sis.exception.exceptions import ServerException +from huaweicloud_sis.utils import io_utils +from openai import OpenAI +import speech_recognition as sr +import tempfile +import os +import json +import playsound + +# 华为云配置 +AK = 'HPUAMIAQLT9IVVO06VVL' +SK = 'RsdaliCEuHI8u7UEi0bQKmUZyW2U1EZS58D0IUHQ' +REGION = 'cn-east-3' +PROJECT_ID = '5860acea06914e2ab9351d92286c4677' + +# DeepSeek配置 +DEEPSEEK_API_KEY = "your_deepseek_api_key_here" # 请替换为实际的DeepSeek API密钥 + +# 语音识别配置 +ASR_AUDIO_FORMAT = 'wav' +ASR_PROPERTY = 'chinese_8k_common' + +# 语音合成配置 +TTS_PROPERTY = 'chinese_xiaoyan_common' +TTS_AUDIO_FORMAT = 'wav' +TTS_SAMPLE_RATE = '8000' + +# 初始化语音识别器 +r = sr.Recognizer() +mic = sr.Microphone() + +def record_audio(): + """录制音频并保存为临时文件""" + try: + with mic as source: + print("请说话:") + r.adjust_for_ambient_noise(source, duration=0.5) + audio = r.listen(source, timeout=5) + + # 创建临时文件保存录音 + fd, temp_path = tempfile.mkstemp(suffix='.wav') + with os.fdopen(fd, 'wb') as f: + f.write(audio.get_wav_data()) + return temp_path + except Exception as e: + print(f"录音异常: {str(e)}") + return None + +def speech_to_text(audio_path): + """语音识别(使用华为云ASR服务)""" + try: + # 初始化ASR客户端 + config = SisConfig() + config.set_connect_timeout(10) + config.set_read_timeout(10) + asr_client = AsrCustomizationClient(AK, SK, REGION, PROJECT_ID, sis_config=config) + + # 构造请求 + data = io_utils.encode_file(audio_path) + asr_request = AsrCustomShortRequest(ASR_AUDIO_FORMAT, ASR_PROPERTY, data) + asr_request.set_add_punc('yes') + asr_request.set_digit_norm('yes') + asr_request.set_need_word_info('no') + + # 发送请求并获取结果 + result = asr_client.get_short_response(asr_request) + print("ASR识别结果:", json.dumps(result, indent=2, ensure_ascii=False)) + + # 解析结果 + if 'result' in result and 'text' in result['result']: + return result['result']['text'] + else: + print("语音识别失败") + return "语音识别失败" + + except ClientException as e: + print(f"客户端异常: {str(e)}") + return "语音识别异常" + except ServerException as e: + print(f"服务端异常: {str(e)}") + return "语音识别异常" + except Exception as e: + print(f"语音识别异常: {str(e)}") + return "语音识别异常" + +def text_to_speech(text): + """语音合成(使用华为云TTS服务)""" + try: + # 初始化TTS客户端 + config = SisConfig() + config.set_connect_timeout(10) + config.set_read_timeout(10) + tts_client = TtsCustomizationClient(AK, SK, REGION, PROJECT_ID, sis_config=config) + + # 创建临时文件保存合成音频 + fd, temp_path = tempfile.mkstemp(suffix='.wav') + os.close(fd) + + # 构造请求 + tts_request = TtsCustomRequest(text) + tts_request.set_property(TTS_PROPERTY) + tts_request.set_audio_format(TTS_AUDIO_FORMAT) + tts_request.set_sample_rate(TTS_SAMPLE_RATE) + tts_request.set_volume(50) + tts_request.set_saved(True) + tts_request.set_saved_path(temp_path) + + # 发送请求 + result = tts_client.get_ttsc_response(tts_request) + print("TTS合成结果:", json.dumps(result, indent=2, ensure_ascii=False)) + + return temp_path + + except ClientException as e: + print(f"客户端异常: {str(e)}") + return None + except ServerException as e: + print(f"服务端异常: {str(e)}") + return None + except Exception as e: + print(f"语音合成异常: {str(e)}") + return None + +def chat_with_ai(text, conversation_history=[]): + """与DeepSeek AI对话""" + try: + # 初始化OpenAI客户端(兼容DeepSeek API) + client = OpenAI( + api_key=DEEPSEEK_API_KEY, + base_url="https://api.deepseek.com" + ) + + # 构建对话历史 + messages = [ + {"role": "system", "content": "你是一个有帮助的AI助手,请用简洁明了的语言回答用户问题。"} + ] + + # 添加历史对话 + messages.extend(conversation_history) + + # 添加当前用户输入 + messages.append({"role": "user", "content": text}) + + # 调用DeepSeek API + response = client.chat.completions.create( + model="deepseek-chat", + messages=messages, + stream=False + ) + + # 获取AI回复 + ai_response = response.choices[0].message.content + + # 更新对话历史(限制历史长度避免过长) + updated_history = conversation_history[-8:] if len(conversation_history) > 8 else conversation_history.copy() + updated_history.append({"role": "user", "content": text}) + updated_history.append({"role": "assistant", "content": ai_response}) + + return ai_response, updated_history + + except Exception as e: + print(f"AI对话异常: {str(e)}") + return "抱歉,我现在无法回答这个问题。", conversation_history + +def main(): + """主函数:录音->识别->AI对话->合成->播放""" + conversation_history = [] # 初始化对话历史 + + while True: + # 1. 录音 + audio_path = record_audio() + if not audio_path: + print("录音失败,请重试") + continue + + try: + # 2. 语音识别 + print("正在识别...") + recognized_text = speech_to_text(audio_path) + print(f"识别结果: {recognized_text}") + + # 检查是否要退出 + if any(exit_cmd in recognized_text for exit_cmd in ["退出", "结束", "停止", "再见"]): + print("对话结束") + break + + # 3. 与AI对话 + print("正在与AI对话...") + ai_response, conversation_history = chat_with_ai(recognized_text, conversation_history) + print(f"AI回复: {ai_response}") + + # 4. 语音合成 + print("正在合成语音...") + tts_path = text_to_speech(ai_response) + + if tts_path and os.path.exists(tts_path): + # 5. 播放合成音频 + print("正在播放...") + playsound.playsound(tts_path) + print("播放完成") + else: + print("语音合成失败") + + except Exception as e: + print(f"处理过程中出现异常: {str(e)}") + + finally: + # 清理临时文件 + try: + if audio_path and os.path.exists(audio_path): + os.remove(audio_path) + if 'tts_path' in locals() and tts_path and os.path.exists(tts_path): + os.remove(tts_path) + except: + pass + +if __name__ == '__main__': + main() + + +LoRA 微调 DeepSeek(医患对话数据)完整代码 +import mindnlp +import mindspore +from mindnlp import core +from datasets import Dataset +import pandas as pd +from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorForSeq2Seq, TrainingArguments, Trainer, GenerationConfig +from peft import LoraConfig, TaskType, get_peft_model, PeftModel + +# 加载数据 +df = pd.read_json('./medical_finetune_data_small.json') +ds = Dataset.from_pandas(df) + +# 加载tokenizer +tokenizer = AutoTokenizer.from_pretrained('./DeepSeek-R1-Distill-Qwen-1.5B', + use_fast=False, + trust_remote_code=True) + +def process_func(example): + MAX_LENGTH = 384 # 分词器会将一个中文字切分为多个token,因此需要放开一些最大长度,保证数据的完整性 + input_ids, attention_mask, labels = [], [], [] + instruction = tokenizer(f"<<|im_start|>system\n现在你是一名专业且有责任心的医生<<|im_end|>\n<<|im_start|>user\n{example['instruction'] + example['input']}<<|im_end|>\n<<|im_start|>assistant\n", add_special_tokens=False) # add_special_tokens 不在开头加 special_tokens + response = tokenizer(f"{example['output']}", add_special_tokens=False) + input_ids = instruction["input_ids"] + response["input_ids"] + [tokenizer.pad_token_id] + attention_mask = instruction["attention_mask"] + response["attention_mask"] + [1] # 因为eos token咱们也是要关注的所以 补充为1 + labels = [-100] * len(instruction["input_ids"]) + response["input_ids"] + [tokenizer.pad_token_id] + if len(input_ids) > MAX_LENGTH: # 做一个截断 + input_ids = input_ids[:MAX_LENGTH] + attention_mask = attention_mask[:MAX_LENGTH] + labels = labels[:MAX_LENGTH] + return { + "input_ids": input_ids, + "attention_mask": attention_mask, + "labels": labels + } + +# 数据预处理 +tokenized_id = ds.map(process_func, remove_columns=ds.column_names) +# 验证预处理结果(可选) +# tokenizer.decode(tokenized_id[0]['input_ids']) + +# 加载基础模型 +model = AutoModelForCausalLM.from_pretrained('./DeepSeek-R1-Distill-Qwen-1.5B', + ms_dtype=mindspore.bfloat16, device_map=0) + +# 开启梯度检查点时,执行该方法 +model.enable_input_require_grads() + +# 微调前推理(验证基础模型效果) +model = model.npu() +prompt = "你是谁" +inputs = tokenizer.apply_chat_template([{"role": "system", "content": "现在你是一名专业且有责任心的医生"},{"role": "user", "content": prompt}], + add_generation_prompt=True, + tokenize=True, + return_tensors="ms", + return_dict=True + ).to('cuda') + +gen_kwargs = {"max_length": 2500, "do_sample": True, "top_k": 1} +with core.no_grad(): + outputs = model.generate(**inputs, **gen_kwargs) + outputs = outputs[:, inputs['input_ids'].shape[1]:] + print("微调前推理结果:", tokenizer.decode(outputs[0], skip_special_tokens=True)) + +# 配置LoRA参数 +config = LoraConfig( + task_type=TaskType.CAUSAL_LM, + target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], + inference_mode=False, # 训练模式 + r=8, # Lora 秩 + lora_alpha=32, # Lora alaph,具体作用参见 Lora 原理 + lora_dropout=0.1 # Dropout 比例 +) + +# 查看模型添加LoRA前后变化(可选) +print("Model without LoRA:\n", model) +model = get_peft_model(model, config) +print('='*50) +print("Model with LoRA:\n", model) +# 输出需要训练的参数比例 +model.print_trainable_parameters() + +# 定义训练超参数 +args = TrainingArguments( + output_dir="./my_output_1.5bf/Qwen2.5_instruct_lora", + per_device_train_batch_size=4, + gradient_accumulation_steps=1, + logging_steps=10, + num_train_epochs=1, + save_steps=100, + learning_rate=1e-4, + save_on_each_node=True, +) + +# # --- 快速验证流程参数(可选,替换上方args)--- +# args = TrainingArguments( +# output_dir="./my_output_1.5bf/Qwen2.5_instruct_lora", # 换个新的输出目录,避免覆盖 +# # 优化批处理速度 +# per_device_train_batch_size=32, # 在显存允许的情况下,尽量增大批大小,减少I/O次数 +# gradient_accumulation_steps=1, # 验证流程时,设为1,不做梯度累积,这样更快 +# # 减少不必要的开销 +# logging_steps=10, # 每10步打印日志,方便查看训练进度 +# num_train_epochs=1, +# save_steps=100, # 训练过程中保存一次,验证保存功能 +# # 其他参数保持或简化 +# learning_rate=1e-4, # 学习率对速度影响不大,保持即可 +# save_on_each_node=True, +# ) + +# 实例化Trainer并启动训练 +trainer = Trainer( + model=model, + args=args, + train_dataset=tokenized_id, + data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True), +) + +trainer.train() + +# 微调后推理(验证微调效果) +# 加载基础模型和LoRA权重 +mode_path = './DeepSeek-R1-Distill-Qwen-1.5B' +lora_path = './my_output_1.5bf/Qwen2.5_instruct_lora/checkpoint-98' # 替换为实际的LoRA输出checkpoint地址 + +# 重新加载tokenizer +tokenizer = AutoTokenizer.from_pretrained(mode_path, trust_remote_code=True) + +# 重新加载基础模型 +model = AutoModelForCausalLM.from_pretrained(mode_path, ms_dtype=mindspore.bfloat16, trust_remote_code=True).eval() + +# 加载LoRA权重 +model = PeftModel.from_pretrained(model, model_id=lora_path) + +# 部署到设备 +model = model.npu() + +# 测试微调后效果 +prompt = "男科问题一般有啥" +inputs = tokenizer.apply_chat_template([{"role": "system", "content": "现在你是一名医生"},{"role": "user", "content": prompt}], + add_generation_prompt=True, + tokenize=True, + return_tensors="ms", + return_dict=True + ).to('cuda') + +gen_kwargs = {"max_length": 250, "do_sample": True, "top_k": 1} +with core.no_grad(): + outputs = model.generate(**inputs, **gen_kwargs) + outputs = outputs[:, inputs['input_ids'].shape[1]:] +print("微调后推理结果:", tokenizer.decode(outputs[0], skip_special_tokens=True)) diff --git "a/deliverables/Labubu\351\230\237+\345\237\272\344\272\216DeepSeek-R1-Distill-Qwen-1.5B\347\232\204\344\270\252\344\272\272\345\214\273\347\226\227\345\257\271\350\257\235\345\244\247\346\250\241\345\236\213.pdf" "b/deliverables/Labubu\351\230\237+\345\237\272\344\272\216DeepSeek-R1-Distill-Qwen-1.5B\347\232\204\344\270\252\344\272\272\345\214\273\347\226\227\345\257\271\350\257\235\345\244\247\346\250\241\345\236\213.pdf" new file mode 100644 index 0000000000000000000000000000000000000000..8a6c21b81e2cf8dd6ba06a7cab2b537eea669b0e Binary files /dev/null and "b/deliverables/Labubu\351\230\237+\345\237\272\344\272\216DeepSeek-R1-Distill-Qwen-1.5B\347\232\204\344\270\252\344\272\272\345\214\273\347\226\227\345\257\271\350\257\235\345\244\247\346\250\241\345\236\213.pdf" differ