Update prepare_audio_v2.sh

Fix a bug that dumping the km labels without actually learning the kmeans.
This commit is contained in:
Jinjing Zhou 2022-07-27 23:05:18 -07:00 committed by GitHub
parent 0c5731f921
commit d29a12fc9b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -63,6 +63,7 @@ mkdir -p $tgt_dir/mfcc
# Consider spliting corpus into chuncks for large corpus, see HuBERT preprocessing for more details
python $FAIRSEQ_ROOT/examples/hubert/simple_kmeans/dump_mfcc_feature.py \
$tgt_dir $train_split 1 0 $tgt_dir/mfcc
python $FAIRSEQ_ROOT/examples/hubert/simple_kmeans/learn_kmeans.py ${tgt_dir}/mfcc ${train_split} 1 ${tgt_dir}/mfcc/cls${dim} ${dim}
python $FAIRSEQ_ROOT/examples/hubert/simple_kmeans/dump_km_label.py \
$tgt_dir/mfcc $train_split $tgt_dir/mfcc/cls$dim 1 0 $tgt_dir/mfcc/cls${dim}_idx
cp $tgt_dir/mfcc/cls${dim}_idx/${train_split}_0_1.km $tgt_dir/$train_split.km