Enter:~/kaldi/egs/aishell/v1
# address variables (quotation marks can be omitted, but = there must be space between?)
data=/export/a05/xna/data
data_url=www.openslr.org/resources/33
# Load the reference file (use., Similar to the import)
. ./cmd.sh
. ./path.sh
# Set Bash error and exit
set -e # exit on error
# download and decompress the data (should be one of the two)
local/download_and_untar.sh $data $data_url data_aishell
local/download_and_untar.sh $data $data_url resource_aishell
# (DOWNLOAD_AND_UNTAR.SH <data/data download address> <Data output location>)
# Data preparation (should be extracted from WAV data and generate "explanation document"?)
local/aishell_data_prep.sh $data/data_aishell/wav $data/data_aishell/transcript
# extract MFCC features (It is recommended to use a large -capacity disk to store MFCC)
# each WAV is first divided into many frames. Each frame is represented by a series of numbers. MFCC is this string of numbers
mfccdir=mfcc
for x in train test; do
steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/$x exp/make_mfcc/$x $mfccdir
# MAKE_MFCC.SH <Data Folder> <Log Folder> <MFCC Output Folder>;
# 12 -Stimemaking after the symbols
sid/compute_vad_decision.sh --nj 10 --cmd "$train_cmd" data/$x exp/make_mfcc/$x $mfccdir
# compute_vad_decision.sh <<data folder> <Log folder> <MFCC output folder>
utils/fix_data_dir.sh data/$x
# fix_data_dir.sh requires only one parameter
done
# 15 Diag ubm
sid/train_diag_ubm.sh --nj 10 --cmd "$train_cmd" --num-threads 16 \
data/train 1024 exp/diag_ubm_1024
# 1 Full ubm
sid/train_full_ubm.sh --nj 10 --cmd "$train_cmd" data/train \
exp/diag_ubm_1024 exp/full_ubm_1024
# Training Ivector
sid/train_ivector_extractor.sh --cmd "$train_cmd --mem 10G" \
--num-iters 5 exp/full_ubm_1024/final.ubm data/train \
exp/extractor_1024
# extract Ivector
sid/extract_ivectors.sh --cmd "$train_cmd" --nj 10 \
exp/extractor_1024 data/train exp/ivector_train_1024
# Training PLDA
$train_cmd exp/ivector_train_1024/log/plda.log \
ivector-compute-plda ark:data/train/spk2utt \
'ark:ivector-normalize-length scp:exp/ivector_train_1024/ivector.scp ark:- |' \
exp/ivector_train_1024/plda
# --- ↓ split the test to enroll and eval ↓ ---
mkdir -p data/test/enroll data/test/eval
cp data/test/{
spk2utt,feats.scp,vad.scp} data/test/enroll
cp data/test/{
spk2utt,feats.scp,vad.scp} data/test/eval
local/split_data_enroll_eval.py data/test/utt2spk data/test/enroll/utt2spk data/test/eval/utt2spk
# split_data_enroll_eval.py
# 20 20 20 (UTT2SPK) is split into a registration set and verification set
# Input: Test UTT2SPK; Output: Enroll UTT2SPK, Eval UTT2SPK
# For each speaker, the three audio of the lower audio is randomly selected as the verification set, and the remaining other is used as a verification set test
trials=data/test/aishell_speaker_ver.lst
local/produce_trials.py data/test/eval/utt2spk $trials
# local/produce_trials.py
# This script is used to generate trails file: Uttid spkid target | nontarget
utils/fix_data_dir.sh data/test/enroll
utils/fix_data_dir.sh data/test/eval
# utils/fix_data_dir.sh
# This script makes sure that only the segments present in
# all of "feats.scp", "wav.scp" [if present], segments [if present]
# text, and utt2spk are present in any of them.
# It puts the original contents of data-dir into
# data-dir/.backup
# --- ↑ split the test to enroll and eval ↑ ---
# extract the registration set ivector
sid/extract_ivectors.sh --cmd "$train_cmd" --nj 10 \
exp/extractor_1024 data/test/enroll exp/ivector_enroll_1024
# extract the Ivector of the verification set
sid/extract_ivectors.sh --cmd "$train_cmd" --nj 10 \
exp/extractor_1024 data/test/eval exp/ivector_eval_1024
# Calculate PLDA score
$train_cmd exp/ivector_eval_1024/log/plda_score.log \
ivector-plda-scoring --num-utts=ark:exp/ivector_enroll_1024/num_utts.ark \
exp/ivector_train_1024/plda \
ark:exp/ivector_enroll_1024/spk_ivector.ark \
"ark:ivector-normalize-length scp:exp/ivector_eval_1024/ivector.scp ark:- |" \
"cat '$trials' | awk '{print \\\$2, \\\$1}' |" exp/trials_out
# ~kaldi/src/ivectorbin/ivector-plda-scoring.cc
# Use the PLDA model to calculate Trails's matched ratio (OG-Likelihood Ratios)
# Input (Trials-File): "<Key1> <Key2> \ N"
# Output: "<Key1> <Key2> [<Dot-PRODUCT>] \ n"
# Calculate EER
awk '{print $3}' exp/trials_out | paste - $trials | awk '{print $1, $4}' | compute-eer -
# Result
# Scoring against data/test/aishell_speaker_ver.lst
# Equal error rate is 0.140528%, at threshold -12.018
exit 0
- Linux Shell script some basic knowledge: for example
$ variable
、<Fu> = <Value>
、-Super Porter Settings
、. Load
- kaldi Most of the speaker recognition
Reference: