From b3581f494fd0ddd1a8e41e0acf6dbedd4a58b3ed Mon Sep 17 00:00:00 2001 From: yuanhe Date: Mon, 11 May 2026 10:33:54 +0800 Subject: [PATCH 1/3] fix: auto-select compatible sample rate for opus/pcmu formats opus defaults to 24000 Hz, pcmu_raw/pcmu_wav default to 8000 Hz. Explicit --sample-rate still takes precedence. --- src/commands/speech/synthesize.ts | 4 +-- src/utils/audio-formats.ts | 10 +++++++ test/commands/speech/synthesize.test.ts | 39 +++++++++++++++++++++++++ test/utils/audio-formats.test.ts | 19 ++++++++++++ 4 files changed, 70 insertions(+), 2 deletions(-) diff --git a/src/commands/speech/synthesize.ts b/src/commands/speech/synthesize.ts index 4c32b84..3589952 100644 --- a/src/commands/speech/synthesize.ts +++ b/src/commands/speech/synthesize.ts @@ -8,7 +8,7 @@ import { detectOutputFormat, formatOutput } from '../../output/formatter'; import { saveAudioOutput } from '../../output/audio'; import { writeFileSync } from 'fs'; import { readTextFromPathOrStdin } from '../../utils/fs'; -import { T2A_FORMATS, formatList, validateAudioFormat, validateT2AStreaming } from '../../utils/audio-formats'; +import { T2A_FORMATS, formatList, validateAudioFormat, validateT2AStreaming, t2aDefaultSampleRate } from '../../utils/audio-formats'; import type { Config } from '../../config/schema'; import type { GlobalFlags } from '../../types/flags'; import type { SpeechRequest, SpeechResponse } from '../../types/api'; @@ -81,7 +81,7 @@ export default defineCommand({ }, audio_setting: { format: (flags.format as string) || 'mp3', - sample_rate: (flags.sampleRate as number) ?? 32000, + sample_rate: (flags.sampleRate as number) ?? t2aDefaultSampleRate(ext, 32000), bitrate: (flags.bitrate as number) ?? 128000, channel: (flags.channels as number) ?? 1, }, diff --git a/src/utils/audio-formats.ts b/src/utils/audio-formats.ts index 5545961..e55fee0 100644 --- a/src/utils/audio-formats.ts +++ b/src/utils/audio-formats.ts @@ -20,6 +20,16 @@ export function validateAudioFormat(format: string, formats: readonly string[]): } } +const T2A_SAMPLE_RATE: Partial> = { + opus: 24000, + pcmu_raw: 8000, + pcmu_wav: 8000, +}; + +export function t2aDefaultSampleRate(format: string, fallback: number): number { + return T2A_SAMPLE_RATE[format as T2AFormat] ?? fallback; +} + export function validateT2AStreaming(format: string, stream: boolean): void { if (stream && format === 'wav') { throw new CLIError( diff --git a/test/commands/speech/synthesize.test.ts b/test/commands/speech/synthesize.test.ts index cf8db6b..8a49063 100644 --- a/test/commands/speech/synthesize.test.ts +++ b/test/commands/speech/synthesize.test.ts @@ -259,4 +259,43 @@ describe('speech synthesize format validation', () => { synthesizeCommand.execute(config, { ...flags, format: 'wav', stream: true }), ).rejects.toThrow('wav format is not supported in streaming'); }); + + it('defaults opus sample rate to 24000', async () => { + const originalLog = console.log; + let output = ''; + console.log = (msg: string) => { output += msg; }; + try { + await synthesizeCommand.execute(config, { ...flags, format: 'opus' }); + const parsed = JSON.parse(output); + expect(parsed.request.audio_setting.sample_rate).toBe(24000); + } finally { + console.log = originalLog; + } + }); + + it('defaults pcmu_wav sample rate to 8000', async () => { + const originalLog = console.log; + let output = ''; + console.log = (msg: string) => { output += msg; }; + try { + await synthesizeCommand.execute(config, { ...flags, format: 'pcmu_wav' }); + const parsed = JSON.parse(output); + expect(parsed.request.audio_setting.sample_rate).toBe(8000); + } finally { + console.log = originalLog; + } + }); + + it('respects explicit --sample-rate even for opus', async () => { + const originalLog = console.log; + let output = ''; + console.log = (msg: string) => { output += msg; }; + try { + await synthesizeCommand.execute(config, { ...flags, format: 'opus', sampleRate: 16000 }); + const parsed = JSON.parse(output); + expect(parsed.request.audio_setting.sample_rate).toBe(16000); + } finally { + console.log = originalLog; + } + }); }); \ No newline at end of file diff --git a/test/utils/audio-formats.test.ts b/test/utils/audio-formats.test.ts index a4f6369..e9120a5 100644 --- a/test/utils/audio-formats.test.ts +++ b/test/utils/audio-formats.test.ts @@ -5,6 +5,7 @@ import { formatList, validateAudioFormat, validateT2AStreaming, + t2aDefaultSampleRate, } from '../../src/utils/audio-formats'; describe('audio-formats', () => { @@ -52,4 +53,22 @@ describe('audio-formats', () => { expect(formatList(['a', 'b', 'c'])).toBe('a, b, c'); }); }); + + describe('t2aDefaultSampleRate', () => { + it('returns 24000 for opus', () => { + expect(t2aDefaultSampleRate('opus', 32000)).toBe(24000); + }); + + it('returns 8000 for pcmu_raw', () => { + expect(t2aDefaultSampleRate('pcmu_raw', 32000)).toBe(8000); + }); + + it('returns 8000 for pcmu_wav', () => { + expect(t2aDefaultSampleRate('pcmu_wav', 32000)).toBe(8000); + }); + + it('returns fallback for mp3', () => { + expect(t2aDefaultSampleRate('mp3', 32000)).toBe(32000); + }); + }); }); From de6094bd9cdb98d09e9d3a628079ffb8ada7cf8d Mon Sep 17 00:00:00 2001 From: yuanhe Date: Mon, 11 May 2026 10:44:11 +0800 Subject: [PATCH 2/3] =?UTF-8?q?fix:=20remove=20flac=20from=20music=20forma?= =?UTF-8?q?ts=20=E2=80=94=20API=20rejects=20it?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Real API testing confirmed music only supports mp3, wav, pcm. --- src/utils/audio-formats.ts | 2 +- test/commands/music/generate.test.ts | 2 +- test/utils/audio-formats.test.ts | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/utils/audio-formats.ts b/src/utils/audio-formats.ts index e55fee0..956ea76 100644 --- a/src/utils/audio-formats.ts +++ b/src/utils/audio-formats.ts @@ -2,7 +2,7 @@ import { CLIError } from '../errors/base'; import { ExitCode } from '../errors/codes'; export const T2A_FORMATS = ['mp3', 'pcm', 'flac', 'wav', 'pcmu_raw', 'pcmu_wav', 'opus'] as const; -export const MUSIC_FORMATS = ['mp3', 'wav', 'pcm', 'flac'] as const; +export const MUSIC_FORMATS = ['mp3', 'wav', 'pcm'] as const; export type T2AFormat = (typeof T2A_FORMATS)[number]; export type MusicFormat = (typeof MUSIC_FORMATS)[number]; diff --git a/test/commands/music/generate.test.ts b/test/commands/music/generate.test.ts index 04738e0..e5d0b16 100644 --- a/test/commands/music/generate.test.ts +++ b/test/commands/music/generate.test.ts @@ -190,7 +190,7 @@ describe('music generate command', () => { ).rejects.toThrow('Invalid audio format "opus"'); }); - it.each(['mp3', 'wav', 'pcm', 'flac'])( + it.each(['mp3', 'wav', 'pcm'])( 'accepts %s format in dry-run', async (fmt) => { const origLog = console.log; diff --git a/test/utils/audio-formats.test.ts b/test/utils/audio-formats.test.ts index e9120a5..b055002 100644 --- a/test/utils/audio-formats.test.ts +++ b/test/utils/audio-formats.test.ts @@ -22,12 +22,12 @@ describe('audio-formats', () => { }); describe('MUSIC_FORMATS', () => { - it.each(['mp3', 'wav', 'pcm', 'flac'] as const)( + it.each(['mp3', 'wav', 'pcm'] as const)( 'accepts %s', (fmt) => expect(() => validateAudioFormat(fmt, MUSIC_FORMATS)).not.toThrow(), ); - it.each(['opus', 'pcmu_raw', 'pcmu_wav', 'aac'])( + it.each(['opus', 'pcmu_raw', 'pcmu_wav', 'flac', 'aac'])( 'rejects %s', (fmt) => expect(() => validateAudioFormat(fmt, MUSIC_FORMATS)).toThrow(/Invalid audio format/), ); From 5ac960b7f9be599a4c806b78ac04591e4b2d263f Mon Sep 17 00:00:00 2001 From: yuanhe Date: Mon, 11 May 2026 10:45:41 +0800 Subject: [PATCH 3/3] chore: bump version to 1.0.13 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index a6155f6..7bb20f1 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "mmx-cli", - "version": "1.0.12", + "version": "1.0.13", "description": "CLI for the MiniMax AI Platform", "type": "module", "engines": {