Przejdź do głównej zawartości

Recording

flowchart LR
subgraph Recording
MIC[🎤 Mikrofon] --> CAP[Audio Capture]
CAP --> BUF[Ring Buffer]
BUF --> VAD[Voice Activity Detection]
VAD --> WAV[WAV Encoder]
end
subgraph Storage
WAV --> FS[📁 File System]
WAV --> DB[(SQLite Metadata)]
end

FormatExtensionUse CaseCompression
WAV.wavRecording, archivalNone (lossless)
WebM/Opus.webmBrowser recordingLossy (efficient)
AAC.m4aiOS compatibilityLossy
FLAC.flacHigh quality archivalLossless

interface AudioConfig {
sampleRate: 16000 | 44100 | 48000; // Default: 16000 (optimal for STT)
channels: 1 | 2; // Default: 1 (mono)
bitsPerSample: 16 | 24; // Default: 16
bufferSize: number; // Default: 4096 samples
}
Use CaseSample RateChannelsBits
STT (transkrypcja)16000 HzMono16-bit
Archival44100 HzStereo24-bit
Quick notes16000 HzMono16-bit

src/hooks/audio/useRecording.ts
interface UseRecordingReturn {
// State
isRecording: boolean;
isPaused: boolean;
duration: number; // seconds
audioLevel: number; // 0-100 (for visualisation)
// Actions
startRecording: () => Promise<void>;
stopRecording: () => Promise<Blob>;
pauseRecording: () => void;
resumeRecording: () => void;
// Errors
error: Error | null;
permissionDenied: boolean;
}
export const useRecording = (config?: Partial<AudioConfig>) => {
const [isRecording, setIsRecording] = useState(false);
const [isPaused, setIsPaused] = useState(false);
const [duration, setDuration] = useState(0);
const [audioLevel, setAudioLevel] = useState(0);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const chunksRef = useRef<Blob[]>([]);
const startRecording = async () => {
try {
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
sampleRate: config?.sampleRate ?? 16000,
channelCount: config?.channels ?? 1,
echoCancellation: true,
noiseSuppression: true,
}
});
const mediaRecorder = new MediaRecorder(stream, {
mimeType: 'audio/webm;codecs=opus'
});
mediaRecorder.ondataavailable = (e) => {
if (e.data.size > 0) {
chunksRef.current.push(e.data);
}
};
mediaRecorder.start(1000); // Chunk every 1 second
mediaRecorderRef.current = mediaRecorder;
setIsRecording(true);
// Start duration timer
startDurationTimer();
// Start audio level monitoring
startAudioLevelMonitoring(stream);
} catch (error) {
if (error.name === 'NotAllowedError') {
setPermissionDenied(true);
}
throw error;
}
};
const stopRecording = async (): Promise<Blob> => {
return new Promise((resolve) => {
const mediaRecorder = mediaRecorderRef.current;
if (!mediaRecorder) return;
mediaRecorder.onstop = () => {
const blob = new Blob(chunksRef.current, { type: 'audio/webm' });
chunksRef.current = [];
setIsRecording(false);
setDuration(0);
resolve(blob);
};
mediaRecorder.stop();
mediaRecorder.stream.getTracks().forEach(track => track.stop());
});
};
return {
isRecording,
isPaused,
duration,
audioLevel,
startRecording,
stopRecording,
pauseRecording,
resumeRecording,
error,
permissionDenied,
};
};

Okno terminala
# macOS
~/Library/Application Support/Vista/
├── recordings/ # Original audio files
├── processed/ # Compressed/converted files
├── transcripts/ # Text output cache
└── temp/ # Processing workspace
CREATE TABLE recordings (
id TEXT PRIMARY KEY,
visit_id TEXT REFERENCES visits(visit_id),
patient_id TEXT REFERENCES patients(patient_id),
created_by TEXT REFERENCES users(user_id) ON DELETE SET NULL,
-- File info
path TEXT NOT NULL, -- Relative path from app data dir
bytes INTEGER, -- File size
duration_ms INTEGER, -- Duration in milliseconds
sha256 TEXT UNIQUE, -- Checksum for deduplication
-- Audio metadata
codec TEXT, -- pcm, opus, aac, etc.
sample_rate INTEGER, -- 16000, 44100, etc.
channels INTEGER, -- 1=mono, 2=stereo
-- Processing status
status TEXT NOT NULL CHECK(status IN (
'recording', -- Currently recording
'saving', -- Being written to disk
'indexed', -- Saved, ready for processing
'transcribing', -- STT in progress
'linked', -- Associated with visit
'done', -- Fully processed
'failed', -- Error occurred
'orphaned', -- No visit association
'retryable' -- Failed but can retry
)),
error_code TEXT,
error_summary TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Indexes
CREATE INDEX idx_recordings_visit ON recordings(visit_id);
CREATE INDEX idx_recordings_status ON recordings(status);
CREATE INDEX idx_recordings_sha ON recordings(sha256);

src/components/audio/RecordingControls.tsx
interface RecordingControlsProps {
visitId: string;
onRecordingComplete: (recordingId: string) => void;
}
export const RecordingControls: React.FC<RecordingControlsProps> = ({
visitId,
onRecordingComplete,
}) => {
const {
isRecording,
isPaused,
duration,
audioLevel,
startRecording,
stopRecording,
pauseRecording,
resumeRecording,
} = useRecording();
const handleStop = async () => {
const audioBlob = await stopRecording();
// Save to backend
const recording = await audioService.saveRecording(visitId, audioBlob);
onRecordingComplete(recording.id);
};
return (
<div className="flex items-center gap-4">
{!isRecording ? (
<Button onClick={startRecording} variant="primary">
<MicrophoneIcon /> Start Recording
</Button>
) : (
<>
<Button onClick={handleStop} variant="danger">
<StopIcon /> Stop
</Button>
<Button onClick={isPaused ? resumeRecording : pauseRecording}>
{isPaused ? <PlayIcon /> : <PauseIcon />}
</Button>
<div className="flex items-center gap-2">
<AudioLevelMeter level={audioLevel} />
<span className="font-mono">{formatDuration(duration)}</span>
</div>
</>
)}
</div>
);
};

VAD automatycznie wykrywa mowę w sygnale audio.

src/hooks/audio/useVoiceActivityDetection.ts
interface VADConfig {
threshold: number; // -50 to 0 dB (default: -45)
minSilenceDuration: number; // ms before stopping (default: 1500)
bufferSize: number; // Audio buffer size (default: 4096)
smoothingFrames: number; // Noise reduction (default: 3)
}
export const useVoiceActivityDetection = (config: VADConfig) => {
const [isVoiceActive, setIsVoiceActive] = useState(false);
const [currentLevel, setCurrentLevel] = useState(-Infinity);
// Adaptive thresholds based on environment
const getAdaptiveThreshold = (environment: 'quiet' | 'normal' | 'noisy') => {
switch(environment) {
case 'quiet': return -50; // Sensitive detection
case 'normal': return -45; // Balanced (default)
case 'noisy': return -35; // Less sensitive
}
};
const processAudioFrame = (audioData: Float32Array) => {
// Calculate RMS energy
const rms = Math.sqrt(
audioData.reduce((sum, sample) => sum + sample * sample, 0) / audioData.length
);
// Convert to dB
const db = 20 * Math.log10(rms);
setCurrentLevel(db);
// Compare to threshold
const isSpeech = db > config.threshold;
setIsVoiceActive(isSpeech);
return isSpeech;
};
return {
isVoiceActive,
currentLevel,
processAudioFrame,
};
};

interface AudioRetentionSettings {
retentionDays: number; // 7, 14, 30, 90, 365
deleteOnVisitFinalize: boolean;
keepTranscriptsOnly: boolean; // Delete audio, keep text
}
pub async fn cleanup_old_recordings(
pool: &SqlitePool,
retention_days: i32,
) -> Result<CleanupResult, Error> {
let cutoff = chrono::Utc::now() - chrono::Duration::days(retention_days as i64);
// Find recordings older than retention period
let old_recordings = sqlx::query_as!(Recording,
"SELECT * FROM recordings
WHERE created_at < ? AND status = 'done'",
cutoff.to_rfc3339()
).fetch_all(pool).await?;
let mut deleted_count = 0;
let mut freed_bytes = 0u64;
for recording in old_recordings {
// Delete file from filesystem
if let Ok(metadata) = fs::metadata(&recording.path) {
freed_bytes += metadata.len();
fs::remove_file(&recording.path)?;
}
// Update database record
sqlx::query!(
"UPDATE recordings SET status = 'deleted', path = NULL WHERE id = ?",
recording.id
).execute(pool).await?;
deleted_count += 1;
}
Ok(CleanupResult { deleted_count, freed_bytes })
}