Przejdź do głównej zawartości

Transcript Processing

flowchart LR
subgraph Input
AUDIO[🎤 Audio] --> STT[Speech-to-Text]
end
subgraph Processing
STT --> RAW[Raw Transcript]
RAW --> CLEAN[Text Cleanup]
CLEAN --> DIAR[+ Diarization]
DIAR --> MERGE[Merge & Format]
end
subgraph Output
MERGE --> DB[(SQLite)]
MERGE --> SOAP[SOAP Generation]
MERGE --> UI[UI Display]
end

CREATE TABLE transcripts (
id TEXT PRIMARY KEY,
recording_id TEXT NOT NULL UNIQUE REFERENCES recordings(id) ON DELETE CASCADE,
-- STT metadata
engine TEXT, -- 'libraxis', 'mlx', 'openai'
lang TEXT, -- 'pl', 'en'
status TEXT NOT NULL, -- 'pending', 'processing', 'done', 'failed'
-- Content
text TEXT, -- Full transcript text
segments_json TEXT, -- JSON array of segments with timing
-- Error handling
error_code TEXT,
error_message TEXT,
-- Audit
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Indexes
CREATE INDEX ix_transcripts_status ON transcripts(status);
CREATE INDEX ix_transcripts_recording ON transcripts(recording_id);

stateDiagram-v2
[*] --> pending: Recording saved
pending --> processing: STT job started
processing --> done: Transcription complete
processing --> failed: Error occurred
failed --> processing: Retry
done --> [*]

interface TranscriptSegment {
id: number;
start: number; // Start time in seconds
end: number; // End time in seconds
text: string; // Segment text
speaker?: string; // Speaker ID (if diarization)
confidence: number; // 0-1 confidence score
words?: WordTimestamp[]; // Word-level timing
}
interface WordTimestamp {
word: string;
start: number;
end: number;
confidence: number;
}
{
"segments": [
{
"id": 0,
"start": 0.0,
"end": 4.5,
"text": "Dzień dobry, co się dzieje z Burkiem?",
"speaker": "SPEAKER_00",
"confidence": 0.95
},
{
"id": 1,
"start": 4.8,
"end": 12.3,
"text": "Od wczoraj nie chce jeść i jest bardzo ospały.",
"speaker": "SPEAKER_01",
"confidence": 0.92
}
]
}

const cleanTranscript = (rawText: string): string => {
let text = rawText;
// 1. Remove filler words
const fillers = ['eee', 'yyy', 'hmm', 'no', 'znaczy'];
fillers.forEach(filler => {
text = text.replace(new RegExp(`\\b${filler}\\b`, 'gi'), '');
});
// 2. Fix common STT errors
const corrections: Record<string, string> = {
'vista': 'Vista',
'soap': 'SOAP',
// Add more medical terms
};
Object.entries(corrections).forEach(([wrong, correct]) => {
text = text.replace(new RegExp(wrong, 'gi'), correct);
});
// 3. Normalize whitespace
text = text.replace(/\s+/g, ' ').trim();
// 4. Fix punctuation
text = text.replace(/\s+([.,!?])/g, '$1');
return text;
};

#[tauri::command]
pub async fn link_transcript_to_visit(
db: State<'_, Database>,
visit_id: String,
recording_id: String,
) -> Result<(), String> {
// Get transcript
let transcript = sqlx::query_as!(Transcript,
"SELECT * FROM transcripts WHERE recording_id = ?",
recording_id
).fetch_one(&db.pool).await?;
// Update visit with transcript
sqlx::query!(
"UPDATE visits SET
transcript = ?,
raw_transcript = ?,
transcript_updated_at = CURRENT_TIMESTAMP
WHERE visit_id = ?",
transcript.text,
transcript.text, // raw_transcript stores original
visit_id
).execute(&db.pool).await?;
// Update recording status
sqlx::query!(
"UPDATE recordings SET status = 'linked', visit_id = ? WHERE id = ?",
visit_id,
recording_id
).execute(&db.pool).await?;
Ok(())
}

src/components/audio/TranscriptionViewer.tsx
interface TranscriptionViewerProps {
visitId: string;
onTranscriptChange?: (text: string) => void;
}
export const TranscriptionViewer: React.FC<TranscriptionViewerProps> = ({
visitId,
onTranscriptChange,
}) => {
const { transcript, isLoading, segments } = useVisitTranscript(visitId);
const [isEditing, setIsEditing] = useState(false);
const [editedText, setEditedText] = useState('');
if (isLoading) {
return <TranscriptionSkeleton />;
}
if (!transcript) {
return (
<EmptyState
icon={<MicrophoneIcon />}
title="Brak transkrypcji"
description="Nagraj wizytę aby wygenerować transkrypcję"
/>
);
}
return (
<div className="space-y-4">
<div className="flex justify-between items-center">
<h3 className="font-medium">Transkrypcja</h3>
<Button
variant="ghost"
size="sm"
onClick={() => setIsEditing(!isEditing)}
>
{isEditing ? 'Podgląd' : 'Edytuj'}
</Button>
</div>
{isEditing ? (
<Textarea
value={editedText || transcript}
onChange={(e) => setEditedText(e.target.value)}
rows={10}
className="font-mono text-sm"
/>
) : segments ? (
<TranscriptWithSpeakers segments={segments} />
) : (
<div className="prose dark:prose-invert">
<p>{transcript}</p>
</div>
)}
{isEditing && (
<div className="flex gap-2">
<Button onClick={() => handleSave(editedText)}>
Zapisz zmiany
</Button>
<Button variant="ghost" onClick={() => setIsEditing(false)}>
Anuluj
</Button>
</div>
)}
</div>
);
};
src/components/audio/TranscriptionProgress.tsx
interface TranscriptionProgressProps {
jobId: string;
onComplete: (transcript: Transcript) => void;
}
export const TranscriptionProgress: React.FC<TranscriptionProgressProps> = ({
jobId,
onComplete,
}) => {
const { status, progress, error } = useTranscriptionStatus(jobId);
return (
<div className="flex items-center gap-4 p-4 bg-gray-50 dark:bg-gray-800 rounded-lg">
{status === 'processing' && (
<>
<Spinner />
<div className="flex-1">
<p className="font-medium">Transkrypcja w toku...</p>
<Progress value={progress} className="mt-2" />
</div>
</>
)}
{status === 'done' && (
<>
<CheckIcon className="text-green-500" />
<p>Transkrypcja zakończona</p>
</>
)}
{status === 'failed' && (
<>
<XIcon className="text-red-500" />
<div>
<p className="font-medium text-red-600">Błąd transkrypcji</p>
<p className="text-sm text-gray-500">{error}</p>
</div>
<Button size="sm" onClick={handleRetry}>
Ponów
</Button>
</>
)}
</div>
);
};

// Export transcript as text file
const exportAsText = (transcript: string, visitId: string) => {
const blob = new Blob([transcript], { type: 'text/plain' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = `transcript-${visitId}.txt`;
a.click();
};
// Export with timestamps
const exportWithTimestamps = (segments: TranscriptSegment[]) => {
const lines = segments.map(seg =>
`[${formatTime(seg.start)} - ${formatTime(seg.end)}] ${seg.speaker || ''}: ${seg.text}`
);
return lines.join('\n');
};
// Export as SRT subtitles
const exportAsSRT = (segments: TranscriptSegment[]) => {
return segments.map((seg, idx) =>
`${idx + 1}\n${formatSRTTime(seg.start)} --> ${formatSRTTime(seg.end)}\n${seg.text}\n`
).join('\n');
};

Error CodeDescriptionSolution
STT_UNAVAILABLENo STT provider availableCheck internet/MLX
AUDIO_TOO_SHORTRecording < 1 secondRecord longer audio
AUDIO_CORRUPTEDCannot decode audioRe-record
TIMEOUTSTT took too longRetry with shorter audio
LANGUAGE_UNSUPPORTEDLanguage not supportedUse PL or EN
pub async fn retry_transcription(
db: State<'_, Database>,
recording_id: String,
) -> Result<TranscriptionJob, String> {
// Check retry count
let job = get_job_for_recording(&db, &recording_id).await?;
if job.attempts >= 3 {
return Err("Maximum retry attempts reached".to_string());
}
// Reset status
sqlx::query!(
"UPDATE jobs SET status = 'queued', attempts = attempts + 1 WHERE id = ?",
job.id
).execute(&db.pool).await?;
Ok(TranscriptionJob {
job_id: job.id,
status: "queued".to_string(),
})
}