feat:tts 合成音频

1ddc9ae4 · mingyard · c7e4ad25 · 1ddc9ae4 · 1ddc9ae4 · 1ddc9ae4
Commit 1ddc9ae4 authored Jan 16, 2025 by mingyard
7 changed files
--- a/src/common/utils/constants.ts
+++ b/src/common/utils/constants.ts
@@ -169,3 +169,19 @@ export const FanYiGouTargetLanguages = {
  EN: 'en',
  ZH: 'zh',
 };
+
+// tts 音色列表
+export const TTS_VOICE_LIST = [
+  {
+    uuid: '3e7a6cb0-3bae-4af9-bb7d-abc9f1c5ebe6',
+    speaker: 'xiaoyan',
+    name: '小燕',
+    provider: 9,
+  },
+  {
+    uuid: 'ea70ee2c-94eb-4a32-a77d-89452fdef367',
+    speaker: 'xiaoyu',
+    name: '小宇',
+    provider: 9,
+  },
+];
--- a/src/config/app.ts
+++ b/src/config/app.ts
@@ -41,6 +41,10 @@ interface AppConfig {
    /** 应用私钥 */
    privateKey: string;
  };
+
+  obs: {
+    endpoint: string;
+  };
 }

 const server = env.APP_SERVER ?? 'http://localhost:3000';
@@ -71,4 +75,8 @@ export const app: AppConfig = {
    publicKey: env.APP_RSA_PUBLIC_KEY ?? '',
    privateKey: env.APP_RSA_PRIVATE_KEY ?? '',
  },
+
+  obs: {
+    endpoint: env.APP_OBS_ENDPOINT ?? 'https://resources.laihua.com',
+  },
 };
--- a/src/config/service.ts
+++ b/src/config/service.ts
@@ -3,6 +3,7 @@ import { env } from './env';
 export interface ServiceConfig {
  tts: {
    txAsrTokenUrl: string;
+    serviceUrl: string;
  };
  fanYiGou: {
    endpoint: string;
@@ -14,6 +15,7 @@ export interface ServiceConfig {
 export const service: ServiceConfig = {
  tts: {
    txAsrTokenUrl: env.TTS_TX_ASR_TOKEN_URL ?? '',
+    serviceUrl: env.TTS_SERVICE_URL ?? '',
  },
  fanYiGou: {
    endpoint: env.FAN_YI_GOU_ENDPOINT ?? 'https://www.fanyigou.com',

--- a/src/controller/translate/dto/baseDto.ts
+++ b/src/controller/translate/dto/baseDto.ts
+export class TTSDto {
+  text: string;
+  speaker: string;
+  // 默认 elevenLabs 服务商
+  provider: number = 9;
+  // 默认语速 5
+  speed?: number = 5;
+}
--- a/src/controller/translate/dto/req/textToSpeechReq.dto.ts
+++ b/src/controller/translate/dto/req/textToSpeechReq.dto.ts
+import { ApiProperty } from '@nestjs/swagger';
+import { Expose } from 'class-transformer';
+import { IsNotEmpty, IsNumber, IsOptional, IsString } from 'class-validator';
+
+export class TextToSpeechReqDto {
+  @ApiProperty({
+    description: '要转换为语音的文本',
+    type: String,
+    example: '要转换为语音的文本',
+  })
+  @Expose()
+  @IsNotEmpty()
+  @IsString()
+  text: string;
+
+  @ApiProperty({
+    description: '语音发音人Id',
+    type: String,
+    example: 'uuid',
+  })
+  @Expose()
+  @IsNotEmpty()
+  @IsString()
+  speaker: string;
+
+  @ApiProperty({
+    description: '语音速度',
+    type: Number,
+    required: false,
+    default: 5,
+    example: 5,
+  })
+  @Expose()
+  @IsOptional()
+  @IsNumber()
+  speed?: number = 5;
+}
--- a/src/controller/translate/translate.controller.ts
+++ b/src/controller/translate/translate.controller.ts
@@ -18,6 +18,10 @@ import { TranslateImageReqDto } from './dto/req/translateImageReq.dto';
 import { TranslateProgressReqDto } from './dto/req/translateProgressReq.dto';
 import { ApiResponseInterceptor } from '@/common/interceptor/api.response.interceptor';
 import { Response } from 'express';
+import { TextToSpeechReqDto } from './dto/req/textToSpeechReq.dto';
+import { TTS_VOICE_LIST } from '@/common/utils/constants';
+import { BadRequestError } from '@/common/exception/badRequest/BadRequestError';
+import { config } from '@/config';

 @ApiTags('translate')
 @UseInterceptors(ApiResponseInterceptor)
@@ -162,4 +166,59 @@ export class TranslateController {
  ): Promise<any> {
    return await this.translateService.downloadImage(dto.taskId, res);
  }
+
+  // text合成语音
+  @Post('textToSpeech')
+  @ApiOperation({ summary: 'text合成语音' })
+  @ApiResponse({
+    status: 200,
+    description: '成功返回合成语音文件',
+    example: {
+      code: 200,
+      message: 'success',
+      data: {
+        url: 'http://xxx.com/xxx.mp3',
+      },
+    },
+  })
+  @Auth()
+  async textToSpeech(@Body() dto: TextToSpeechReqDto): Promise<any> {
+    const voice = TTS_VOICE_LIST.find((item) => item.uuid === dto.speaker);
+
+    if (!voice) {
+      throw BadRequestError.default('音色不存在');
+    }
+
+    const result = await this.translateService.tts({
+      ...dto,
+      speaker: voice.speaker,
+      provider: voice.provider,
+    });
+
+    return `${config.obs.endpoint}/${result.filename}`;
+  }
+
+  // 获取语音合成支持的语音列表
+  @Get('voiceList')
+  @ApiOperation({ summary: '获取语音合成支持的语音列表' })
+  @ApiResponse({
+    status: 200,
+    description: '成功返回支持的语音列表',
+    example: {
+      code: 200,
+      message: 'success',
+      data: [
+        {
+          uuid: '3e7a6cb0-3bae-4af9-bb7d-abc9f1c5ebe6',
+          speaker: 'xiaoyan',
+          name: '小燕',
+          provider: 9,
+        },
+      ],
+    },
+  })
+  @Auth()
+  async getVoiceList(): Promise<any> {
+    return TTS_VOICE_LIST;
+  }
 }
--- a/src/controller/translate/translate.service.ts
+++ b/src/controller/translate/translate.service.ts
@@ -13,6 +13,7 @@ import * as FormData from 'form-data';
 import { TranslateImageReqDto } from './dto/req/translateImageReq.dto';
 import { Response } from 'express';
 import axios, { AxiosResponse } from 'axios';
+import { TTSDto } from './dto/baseDto';

 @Injectable()
 export class TranslateService {
@@ -206,4 +207,25 @@ export class TranslateService {
    // 将文件流返回给前端
    response.data.pipe(res);
  }
+
+  // tts 语音合成
+  async tts(dto: TTSDto): Promise<any> {
+    const result = await axiosPostRequest(
+      config.service.tts.serviceUrl,
+      {
+        ...dto,
+      },
+      {
+        headers: {
+          'Content-Type': 'application/json',
+        },
+      },
+    );
+
+    if (!result?.data || result?.data?.filename) {
+      throw BadRequestError.default('语音合成失败');
+    }
+
+    return result?.data;
+  }
 }