From d840cc0b225393170b79514f99ea5f4c30c52d03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaros=C5=82aw=20Dzikowski?= Date: Thu, 1 Apr 2021 18:39:30 +0200 Subject: [PATCH 01/10] Ported eval_abx.sh script to CPC --- cpc/model.py | 1 - scripts/embeddings_abx.py | 138 +++++++++++++++++++++++++++++++++++ scripts/eval_abx.sh | 146 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 284 insertions(+), 1 deletion(-) create mode 100644 scripts/embeddings_abx.py create mode 100755 scripts/eval_abx.sh diff --git a/cpc/model.py b/cpc/model.py index adb5354..65fee55 100644 --- a/cpc/model.py +++ b/cpc/model.py @@ -304,7 +304,6 @@ def __init__(self, def forward(self, batchData, label): cFeature, encodedData, label = self.cpc(batchData, label) cFeature = self.nullspace(cFeature) - encodedData = self.nullspace(encodedData) return cFeature, encodedData, label diff --git a/scripts/embeddings_abx.py b/scripts/embeddings_abx.py new file mode 100644 index 0000000..8e68a8c --- /dev/null +++ b/scripts/embeddings_abx.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 -u +# !/usr/bin/env python3 -u +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import os +import sys +import argparse +from itertools import chain +from pathlib import Path +import time +import copy +import numpy as np +import soundfile as sf + +from cpc.feature_loader import loadModel, FeatureModule + +import torch +import torch.nn as nn +import torch.nn.functional as F + +logging.basicConfig( + format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO").upper(), + stream=sys.stdout, +) +logger = logging.getLogger("zerospeech2021 abx") + +def parse_args(): + # Run parameters + parser = argparse.ArgumentParser() + parser.add_argument("path_checkpoint", type=str, + help="Path to the trained fairseq wav2vec2.0 model.") + parser.add_argument("path_data", type=str, + help="Path to the dataset that we want to compute ABX for.") + parser.add_argument("path_output_dir", type=str, + help="Path to the output directory.") + parser.add_argument("--debug", action="store_true", + help="Load only a very small amount of files for " + "debugging purposes.") + parser.add_argument("--cpu", action="store_true", + help="Run on a cpu machine.") + parser.add_argument("--file_extension", type=str, default="wav", + help="Extension of the audio files in the dataset (default: wav).") + parser.add_argument("--no_test", action="store_true", + help="Don't compute embeddings for test-* parts of dataset") + parser.add_argument('--gru_level', type=int, default=-1, + help='Hidden level of the LSTM autoregressive model to be taken' + '(default: -1, last layer).') + parser.add_argument('--nullspace', action='store_true', + help="Additionally load nullspace") + return parser.parse_args() + +def main(): + # Parse and print args + args = parse_args() + logger.info(args) + + # Load the model + print("") + print(f"Loading model from {args.path_checkpoint}") + + if args.gru_level is not None and args.gru_level > 0: + updateConfig = argparse.Namespace(nLevelsGRU=args.gru_level) + else: + updateConfig = None + + model = loadModel([args.path_checkpoint], load_nullspace=args.nullspace, updateConfig=updateConfig)[0] + + if args.gru_level is not None and args.gru_level > 0: + # Keep hidden units at LSTM layers on sequential batches + if args.nullspace: + model.cpc.gAR.keepHidden = True + else: + model.gAR.keepHidden = True + + device = "cuda" if torch.cuda.is_available() and not args.cpu else "cpu" + + # Register the hooks + layer_outputs = {} + def get_layer_output(name): + def hook(model, input, output): + if type(output) is tuple: + layer_outputs[name] = output[0].detach().squeeze(1).cpu().numpy() + elif type(output) is dict: + layer_outputs[name] = output["x"].detach().squeeze(0).cpu().numpy() + else: + layer_outputs[name] = output.detach().squeeze(0).cpu().numpy() + return hook + + layer_names = [] + layer_name = os.path.basename(os.path.dirname(args.path_checkpoint)) + layer_names.append(layer_name) + if not args.nullspace: + model.gAR.register_forward_hook(get_layer_output(layer_name)) + else: + model.nullspace.register_forward_hook(get_layer_output(layer_name)) + + model = model.eval().to(device) + print("Model loaded!") + print(model) + + # Extract values from chosen layers and save them to files + phonetic = "phonetic" + datasets_path = os.path.join(args.path_data, phonetic) + datasets = os.listdir(datasets_path) + datasets = [dataset for dataset in datasets if not args.no_test or not dataset.startswith("test")] + print(datasets) + + with torch.no_grad(): + for dataset in datasets: + print("> {}".format(dataset)) + dataset_path = os.path.join(datasets_path, dataset) + files = [f for f in os.listdir(dataset_path) if f.endswith(args.file_extension)] + for i, f in enumerate(files): + print("Progress {:2.1%}".format(i / len(files)), end="\r") + input_f = os.path.join(dataset_path, f) + x, sample_rate = sf.read(input_f) + x = torch.tensor(x).float().reshape(1,1,-1).to(device) + output = model(x, None)[0] + + for layer_name, value in layer_outputs.items(): + output_dir = os.path.join(args.path_output_dir, layer_name, phonetic, dataset) + Path(output_dir).mkdir(parents=True, exist_ok=True) + out_f = os.path.join(output_dir, os.path.splitext(f)[0] + ".txt") + np.savetxt(out_f, value) + +if __name__ == "__main__": + #import ptvsd + #ptvsd.enable_attach(('0.0.0.0', 7310)) + #print("Attach debugger now") + #ptvsd.wait_for_attach() + main() + diff --git a/scripts/eval_abx.sh b/scripts/eval_abx.sh new file mode 100755 index 0000000..5475b39 --- /dev/null +++ b/scripts/eval_abx.sh @@ -0,0 +1,146 @@ +########## CHANGE THIS ################## +ZEROSPEECH_EVAL_ENV=zerospeech2021 # Where the zerospeech2021-evaluate is installed +CPC_ENV=202010-fairseq-c11 +CONDA_PATH=/pio/scratch/2/i273233/miniconda3 +######################################### + +DATASET_PATH=false +ORIGINAL_DATASET_PATH=false +CHECKPOINT_PATH=false +OUTPUT_DIR=false +NULLSPACE=false +NO_TEST=false + +print_usage() { + echo -e "Usage: ./eval_abx.sh" + echo -e "\t-d DATASET_PATH" + echo -e "\t-r ORIGINAL_DATASET_PATH" + echo -e "\t-c CHECKPOINT_PATH" + echo -e "\t-o OUTPUT_DIR" + echo -e "OPTIONAL FLAGS:" + echo -e "\t-n (Load a model with nullspace)" + echo -e "\t-a CONDA_PATH" + echo -e "\t-e CPC_ENV" + echo -e "\t-z ZEROSPEECH_EVAL_ENV (The conda environment where the zerospeech2021-evaluate is installed)" + echo -e "\t-t (Do not compute embeddings for test set)" +} + +while getopts 'd:r:c:o:na:e:z:t' flag; do + case "${flag}" in + d) DATASET_PATH="${OPTARG}" ;; + r) ORIGINAL_DATASET_PATH="${OPTARG}" ;; + c) CHECKPOINT_PATH="${OPTARG}" ;; + o) OUTPUT_DIR="${OPTARG}" ;; + n) NULLSPACE=true ;; + a) CONDA_PATH="${OPTARG}" ;; + e) CPC_ENV="${OPTARG}" ;; + z) ZEROSPEECH_EVAL_ENV="${OPTARG}" ;; + t) NO_TEST=true ;; + *) print_usage + exit 1 ;; + esac +done + +echo $DATASET_PATH $ORIGINAL_DATASET_PATH $CHECKPOINT_PATH $OUTPUT_DIR $NULLSPACE $CONDA_PATH $CPC_ENV $ZEROSPEECH_EVAL_ENV $NO_TEST + +if [[ $DATASET_PATH == false || $ORIGINAL_DATASET_PATH == false || $CHECKPOINT_PATH == false || $OUTPUT_DIR == false ]] +then + echo "Either DATASET_PATH or ORIGINAL_DATASET_PATH or CHECKPOINT_PATH or OUTPUT_DIR is not set." + print_usage + exit 1 +fi + +SCRIPT_PATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" + +results=$OUTPUT_DIR/results +embeddings=$OUTPUT_DIR/embeddings +mkdir -p embeddings + +source $CONDA_PATH/etc/profile.d/conda.sh +SAVED_ENV=$(conda info | sed -n 's/\( \)*active environment : //p') +echo SAVED_ENV: $SAVED_ENV + +ENV_TO_ACTIVATE=$CPC_ENV +conda activate $ENV_TO_ACTIVATE + +params="" +if [[ $NULLSPACE == true ]] +then + params="${params} --nullspace" +fi + +if [[ $NO_TEST == true ]] +then + params="${params} --no_test" +fi +echo "Params: $params" + +echo "$SCRIPT_PATH/embeddings_abx.py" +python $SCRIPT_PATH/embeddings_abx.py $CHECKPOINT_PATH $DATASET_PATH $embeddings --gru_level 2 $params + +directories=("dev-clean" "dev-other") +if [[ $NO_TEST == false ]] +then + directories+=("test-clean" "test-other") +fi +echo "Directories: ${directories[@]}" + +for i in `basename -a $(ls -d $embeddings/*/)` +do + for directory in ${directories[@]} + do + for file in `ls $embeddings/$i/phonetic/$directory` + do + filename_no_ext="${file%.*}" + if [[ ! -f "$ORIGINAL_DATASET_PATH/phonetic/$directory/${filename_no_ext}.wav" ]] + then + rm $embeddings/$i/phonetic/$directory/$file + fi + done + done +done + +conda activate $ZEROSPEECH_EVAL_ENV + +frame_shift="0.01" +echo "Frame shift is ${frame_shift}s" + +metrics=("cosine" "euclidean") +for metric in ${metrics[@]} +do + cat > $embeddings/$metric.yaml << EOF +author: LSTM Baseline +affiliation: EHESS, ENS, PSL Research Univerity, CNRS and Inria +description: > + CPC-big (trained on librispeech 960), kmeans (trained on librispeech 100), + LSTM. See https://zerospeech.com/2021 for more details. +open_source: true +train_set: librispeech 100 and 960 +gpu_budget: 60 +parameters: + phonetic: + metric: ${metric} + frame_shift: ${frame_shift} +EOF + + for i in `basename -a $(ls -d $embeddings/*/)` + do + cp $embeddings/$metric.yaml $embeddings/$i/meta.yaml + #zerospeech2021-evaluate -j 12 -o $results/$metric/$i --no-lexical --no-syntactic --no-semantic $DATASET_PATH $embeddings/$i + #zerospeech2021-evaluate -j 12 -o $results/$metric/$i --force-cpu --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i + #zerospeech2021-evaluate -j 20 -o $results/$metric/$i --force-cpu --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i + zerospeech2021-evaluate -j 20 -o $results/$metric/$i --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i + done +done + +for metric in ${metrics[@]} +do + for i in `basename -a $(ls -d $embeddings/*/)` + do + echo $i $metric + cat $results/$metric/$i/score_phonetic.csv + echo + done +done > $OUTPUT_DIR/combined_results.txt + +conda activate $SAVED_ENV \ No newline at end of file From 257583535562d47d135731cc8fa68e962a0a5a76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaros=C5=82aw=20Dzikowski?= Date: Fri, 2 Apr 2021 00:09:34 +0200 Subject: [PATCH 02/10] Modified finetune_nullspace.sh so that you can provide your own arguments --- finetune_nullspace.sh | 61 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 54 insertions(+), 7 deletions(-) diff --git a/finetune_nullspace.sh b/finetune_nullspace.sh index e250170..8874cac 100755 --- a/finetune_nullspace.sh +++ b/finetune_nullspace.sh @@ -1,30 +1,77 @@ -SAVE_DIR="/pio/scratch/1/i273233/linear_separability/cpc/gru_level2/cpc_official" SPEAKERS="speakers_factorized" PHONEMES="phonemes_nullspace" SPEAKERS_NULLSPACE="speakers_nullspace" -DIM_INTER=$1 +DATASET_PATH=false +TRAIN_SET=false +VALIDATION_SET=false +CHECKPOINT_PATH=false +OUTPUT_DIR=false +DIM_INBETWEEN=false FROM_STEP=$SPEAKERS -if [[ $# -ge 2 ]]; then - FROM_STEP=$2 +PHONES_PATH=false + +print_usage() { + echo -e "Usage: ./finetune_nullspace.sh" + echo -e "\t-d DATASET_PATH" + echo -e "\t-t TRAIN_SET" + echo -e "\t-v VALIDATION_SET" + echo -e "\t-c CHECKPOINT_PATH" + echo -e "\t-o OUTPUT_DIR" + echo -e "\t-n DIM_INBETWEEN (Dimension of nullspace will be DIM_EMBEDDING - DIM_INBETWEEN)" + echo -e "OPTIONAL FLAGS:" + echo -e "\t-f FROM_STEP (From which step do you want to start. Order: $SPEAKERS -> $PHONEMES -> $SPEAKERS_NULLSPACE)" + echo -e "\t-p PHONES_PATH (Path to the file containing phonemes for the entire dataset. You don't need it if you start from $SPEAKERS_NULLSPACE)" +} + +while getopts 'd:t:v:c:o:n:f:p:' flag; do + case "${flag}" in + d) DATASET_PATH="${OPTARG}" ;; + t) TRAIN_SET="${OPTARG}" ;; + v) VALIDATION_SET="${OPTARG}" ;; + c) CHECKPOINT_PATH="${OPTARG}" ;; + o) OUTPUT_DIR="${OPTARG}" ;; + n) DIM_INBETWEEN="${OPTARG}" ;; + f) FROM_STEP="${OPTARG}" ;; + p) PHONES_PATH="${OPTARG}" ;; + *) print_usage + exit 1 ;; + esac +done + +echo $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH $OUTPUT_DIR $DIM_INBETWEEN $FROM_STEP $PHONES_PATH + +if [[ $DATASET_PATH == false || $TRAIN_SET == false || $VALIDATION_SET == false || $CHECKPOINT_PATH == false || $OUTPUT_DIR == false || $DIM_INBETWEEN == false || ( $PHONES_PATH == false && $FROM_STEP != $SPEAKERS ) ]] +then + echo "Either DATASET_PATH, TRAIN_SET, VALIDATION_SET, CHECKPOINT_PATH, OUTPUT_DIR or DIM_INBETWEEN is not set or there are invalid PHONES_PATH and FROM_STEP." + print_usage + exit 1 fi +mkdir -p $OUTPUT_DIR + case $FROM_STEP in $SPEAKERS) echo $SPEAKERS - mkdir -p ${SAVE_DIR}_${SPEAKERS}_${DIM_INTER} && python cpc/eval/linear_separability.py $zd/LibriSpeech/train-clean-100/ $zd/LibriSpeech/labels_split/train_split_100.txt $zd/LibriSpeech/labels_split/test_split_100.txt $zd/checkpoints/CPC-big-kmeans50/cpc_ll6k/checkpoint_32.pt --pathCheckpoint ${SAVE_DIR}_${SPEAKERS}_${DIM_INTER} --mode $SPEAKERS --max_size_loaded 40000000 --n_process_loader 2 --model cpc --dim_inter $DIM_INTER --gru_level 2 | tee ${SAVE_DIR}_${SPEAKERS}_${DIM_INTER}/log.txt + mkdir -p ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN} + python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN} --mode $SPEAKERS --max_size_loaded 40000000 --n_process_loader 2 --model cpc --dim_inter $DIM_INBETWEEN --gru_level 2 ;& $PHONEMES) echo $PHONEMES - mkdir -p ${SAVE_DIR}_${PHONEMES}_${DIM_INTER} && python cpc/eval/linear_separability.py $zd/LibriSpeech/train-clean-100/ $zd/LibriSpeech/labels_split/train_split_100.txt $zd/LibriSpeech/labels_split/test_split_100.txt $zd/checkpoints/CPC-big-kmeans50/cpc_ll6k/checkpoint_32.pt --pathCheckpoint ${SAVE_DIR}_${PHONEMES}_${DIM_INTER} --mode $PHONEMES --max_size_loaded 40000000 --n_process_loader 2 --model cpc --pathPhone $zd/LibriSpeech/alignments2/converted_aligned_phones.txt --path_speakers_factorized ${SAVE_DIR}_${SPEAKERS}_${DIM_INTER}/checkpoint_9.pt --dim_inter $DIM_INTER --gru_level 2 | tee ${SAVE_DIR}_${PHONEMES}_${DIM_INTER}/log.txt + mkdir -p ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN} + python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN} --mode $PHONEMES --max_size_loaded 40000000 --n_process_loader 2 --model cpc --pathPhone $PHONES_PATH --path_speakers_factorized ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt --dim_inter $DIM_INBETWEEN --gru_level 2 ;& $SPEAKERS_NULLSPACE) echo $SPEAKERS_NULLSPACE - mkdir -p ${SAVE_DIR}_${SPEAKERS_NULLSPACE}_${DIM_INTER} && python cpc/eval/linear_separability.py $zd/LibriSpeech/train-clean-100/ $zd/LibriSpeech/labels_split/train_split_100.txt $zd/LibriSpeech/labels_split/test_split_100.txt $zd/checkpoints/CPC-big-kmeans50/cpc_ll6k/checkpoint_32.pt --pathCheckpoint ${SAVE_DIR}_${SPEAKERS_NULLSPACE}_${DIM_INTER} --mode $SPEAKERS_NULLSPACE --max_size_loaded 40000000 --n_process_loader 2 --model cpc --path_speakers_factorized ${SAVE_DIR}_${SPEAKERS}_${DIM_INTER}/checkpoint_9.pt --dim_inter $DIM_INTER --gru_level 2 | tee ${SAVE_DIR}_${SPEAKERS_NULLSPACE}_${DIM_INTER}/log.txt + mkdir -p ${OUTPUT_DIR}/${SPEAKERS_NULLSPACE}_${DIM_INBETWEEN} + python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${SPEAKERS_NULLSPACE}_${DIM_INBETWEEN} --mode $SPEAKERS_NULLSPACE --max_size_loaded 40000000 --n_process_loader 2 --model cpc --path_speakers_factorized ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt --dim_inter $DIM_INBETWEEN --gru_level 2 ;; *) echo "Invalid from step: ${FROM_STEP} while it should be either ${SPEAKERS}, ${PHONEMES} or ${SPEAKERS_NULLSPACE}" ;; esac +echo "Checkpoint with nullspace is located in ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN}/checkpoint_9.pt" +echo "The results of all the experiments are located in ${OUTPUT_DIR}/DIRECTORY/checkpoint_logs.json" + exit 0 \ No newline at end of file From 0c5635a60937af9147c622b08449b086f0232b53 Mon Sep 17 00:00:00 2001 From: Piotr Pusz Date: Sat, 3 Apr 2021 19:55:01 +0200 Subject: [PATCH 03/10] updated nullspace scripts with dataset audio format option, added summary on top of readme, deleted old scripts (moving to zs2021 repo) --- README.md | 11 ++++++++- ...push_nonullspace_phoneme_classification.sh | 16 ------------- ...erpush_nullspace_phoneme_classification.sh | 18 --------------- finetune_nullspace.sh | 23 +++++++++++++++---- scripts/eval_abx.sh | 7 ++++-- 5 files changed, 33 insertions(+), 42 deletions(-) delete mode 100755 centerpush_nonullspace_phoneme_classification.sh delete mode 100755 centerpush_nullspace_phoneme_classification.sh diff --git a/README.md b/README.md index 63d47f8..7de9f27 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,12 @@ +this repo is based on `CPC_audio` () repo, but it also contains: +- part of the code used for University of Wroclaw ZeroSpeech2021 submission (our modifications and also merged files from: - `criterion/clustering/`, - `feature_loder.py -> buildFeature_batch`, - `scripts/`) +- code used for CPC-CTC paper + +Below original README updated with some of our modifications; part of our code is also described in `cpc/README.md`. + +-------------------------------------------------------------------------------------------------- + + # CPC_audio This code implements the Contrast Predictive Coding algorithm on audio data, as described in the paper [Unsupervised Pretraining Transfers well Across Languages](https://arxiv.org/abs/2002.02848). This is an unsupervised method to train audio features directly from the raw waveform. @@ -123,7 +132,7 @@ Will evaluate the speaker separability of the concatenation of the features from `--gru_level` controls from which layer of autoregressive part of CPC to extract the features. By default it's the last one. -Nullspaces: +### Nullspaces: To conduct the nullspace experiment, first classify speakers using two factorized matrices `A` (`DIM_EMBEDDING` x `DIM_INBETWEEN`) and `B` (`DIM_INBETWEEN` x `SPEAKERS`). You'll want to extract `A'`, the nullspace of matrix `A` (of size `DIM_EMBEDDING` x (`DIM_EMBEDDING` - `DIM_INBETWEEN`)), to make the embeddings less sensitive to speakers. ```bash diff --git a/centerpush_nonullspace_phoneme_classification.sh b/centerpush_nonullspace_phoneme_classification.sh deleted file mode 100755 index e858084..0000000 --- a/centerpush_nonullspace_phoneme_classification.sh +++ /dev/null @@ -1,16 +0,0 @@ - -for deg in 0 0.2 0.3 0.4 0.5 0.6 0.7 -do - echo $deg - mkdir ${centerpushDir}/phoneme_classif_nonull_${deg}/ - python cpc/eval/linear_separability.py $zd/LibriSpeech/train-clean-100/ \ - $zd/LibriSpeech/labels_split/train_split_100.txt \ - $zd/LibriSpeech/labels_split/test_split_100.txt \ - $zd/checkpoints/CPC-big-kmeans50/cpc_ll6k/checkpoint_32.pt \ - --centerpushFile $zd/checkpoints/CPC-big-kmeans50/clustering_kmeans50/clustering_CPC_big_kmeans50.pt \ - --centerpushDeg $deg \ - --pathCheckpoint ${centerpushDir}/phoneme_classif_nonull_${deg}/ \ - --mode phonemes --max_size_loaded 40000000 --n_process_loader 2 \ - --model cpc --pathPhone $zd/LibriSpeech/alignments2/converted_aligned_phones.txt \ - --gru_level 2 --batchSizeGPU 32 | tee ${centerpushDir}/phoneme_classif_nonull_${deg}/log.txt -done \ No newline at end of file diff --git a/centerpush_nullspace_phoneme_classification.sh b/centerpush_nullspace_phoneme_classification.sh deleted file mode 100755 index 5d5c268..0000000 --- a/centerpush_nullspace_phoneme_classification.sh +++ /dev/null @@ -1,18 +0,0 @@ - -for deg in 0 0.2 0.3 0.4 0.5 0.6 0.7 -do - echo $deg - mkdir ${centerpushDir}/phoneme_classif_null_${deg}/ - python cpc/eval/linear_separability.py $zd/LibriSpeech/train-clean-100/ \ - $zd/LibriSpeech/labels_split/train_split_100.txt \ - $zd/LibriSpeech/labels_split/test_split_100.txt \ - $zd/checkpoints/CPC-big-kmeans50/cpc_ll6k/checkpoint_32.pt \ - --centerpushFile $cpcClustDir/checkpoints/clustering_CPC_big_kmeans50_nullspace_64/clustering_CPC_big_kmeans50_nullspace_64.pt \ - --centerpushDeg $deg \ - --pathCheckpoint ${centerpushDir}/phoneme_classif_null_${deg}/ \ - --mode phonemes_nullspace --max_size_loaded 40000000 --n_process_loader 2 \ - --model cpc --pathPhone $zd/LibriSpeech/alignments2/converted_aligned_phones.txt \ - --path_speakers_factorized $nullspaceDir/linear_separability/cpc/gru_level2/cpc_official_speakers_factorized_64/checkpoint_9.pt \ - --dim_inter 64 --gru_level 2 --batchSizeGPU 32 | tee ${centerpushDir}/phoneme_classif_null_${deg}/log.txt -done - diff --git a/finetune_nullspace.sh b/finetune_nullspace.sh index 8874cac..52eb6ac 100755 --- a/finetune_nullspace.sh +++ b/finetune_nullspace.sh @@ -10,6 +10,7 @@ OUTPUT_DIR=false DIM_INBETWEEN=false FROM_STEP=$SPEAKERS PHONES_PATH=false +AUDIO_FORMAT=flac print_usage() { echo -e "Usage: ./finetune_nullspace.sh" @@ -20,8 +21,9 @@ print_usage() { echo -e "\t-o OUTPUT_DIR" echo -e "\t-n DIM_INBETWEEN (Dimension of nullspace will be DIM_EMBEDDING - DIM_INBETWEEN)" echo -e "OPTIONAL FLAGS:" - echo -e "\t-f FROM_STEP (From which step do you want to start. Order: $SPEAKERS -> $PHONEMES -> $SPEAKERS_NULLSPACE)" + echo -e "\t-s FROM_STEP (From which step do you want to start. Order: $SPEAKERS -> $PHONEMES -> $SPEAKERS_NULLSPACE)" echo -e "\t-p PHONES_PATH (Path to the file containing phonemes for the entire dataset. You don't need it if you start from $SPEAKERS_NULLSPACE)" + echo -e "\t-f audio files format in LibriSpeech dataset (without a dot)" } while getopts 'd:t:v:c:o:n:f:p:' flag; do @@ -32,8 +34,9 @@ while getopts 'd:t:v:c:o:n:f:p:' flag; do c) CHECKPOINT_PATH="${OPTARG}" ;; o) OUTPUT_DIR="${OPTARG}" ;; n) DIM_INBETWEEN="${OPTARG}" ;; - f) FROM_STEP="${OPTARG}" ;; + s) FROM_STEP="${OPTARG}" ;; p) PHONES_PATH="${OPTARG}" ;; + f) AUDIO_FORMAT=${OPTARG} ;; *) print_usage exit 1 ;; esac @@ -54,17 +57,27 @@ case $FROM_STEP in $SPEAKERS) echo $SPEAKERS mkdir -p ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN} - python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN} --mode $SPEAKERS --max_size_loaded 40000000 --n_process_loader 2 --model cpc --dim_inter $DIM_INBETWEEN --gru_level 2 + python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH \ + --pathCheckpoint ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN} --mode $SPEAKERS \ + --max_size_loaded 40000000 --n_process_loader 2 --model cpc --dim_inter $DIM_INBETWEEN --gru_level 2 --file_extension .$AUDIO_FORMAT ;& $PHONEMES) echo $PHONEMES mkdir -p ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN} - python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN} --mode $PHONEMES --max_size_loaded 40000000 --n_process_loader 2 --model cpc --pathPhone $PHONES_PATH --path_speakers_factorized ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt --dim_inter $DIM_INBETWEEN --gru_level 2 + python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH \ + --pathCheckpoint ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN} --mode $PHONEMES \ + --max_size_loaded 40000000 --n_process_loader 2 --model cpc --pathPhone $PHONES_PATH \ + --path_speakers_factorized ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt \ + --dim_inter $DIM_INBETWEEN --gru_level 2 --file_extension .$AUDIO_FORMAT ;& $SPEAKERS_NULLSPACE) echo $SPEAKERS_NULLSPACE mkdir -p ${OUTPUT_DIR}/${SPEAKERS_NULLSPACE}_${DIM_INBETWEEN} - python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${SPEAKERS_NULLSPACE}_${DIM_INBETWEEN} --mode $SPEAKERS_NULLSPACE --max_size_loaded 40000000 --n_process_loader 2 --model cpc --path_speakers_factorized ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt --dim_inter $DIM_INBETWEEN --gru_level 2 + python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH \ + --pathCheckpoint ${OUTPUT_DIR}/${SPEAKERS_NULLSPACE}_${DIM_INBETWEEN} --mode $SPEAKERS_NULLSPACE \ + --max_size_loaded 40000000 --n_process_loader 2 --model cpc \ + --path_speakers_factorized ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt \ + --dim_inter $DIM_INBETWEEN --gru_level 2 --file_extension .$AUDIO_FORMAT ;; *) echo "Invalid from step: ${FROM_STEP} while it should be either ${SPEAKERS}, ${PHONEMES} or ${SPEAKERS_NULLSPACE}" diff --git a/scripts/eval_abx.sh b/scripts/eval_abx.sh index 5475b39..07cd2ed 100755 --- a/scripts/eval_abx.sh +++ b/scripts/eval_abx.sh @@ -10,6 +10,7 @@ CHECKPOINT_PATH=false OUTPUT_DIR=false NULLSPACE=false NO_TEST=false +AUDIO_FORMAT=flac print_usage() { echo -e "Usage: ./eval_abx.sh" @@ -23,6 +24,7 @@ print_usage() { echo -e "\t-e CPC_ENV" echo -e "\t-z ZEROSPEECH_EVAL_ENV (The conda environment where the zerospeech2021-evaluate is installed)" echo -e "\t-t (Do not compute embeddings for test set)" + echo -e "\t-f audio files format in LibriSpeech dataset (without a dot)" } while getopts 'd:r:c:o:na:e:z:t' flag; do @@ -36,6 +38,7 @@ while getopts 'd:r:c:o:na:e:z:t' flag; do e) CPC_ENV="${OPTARG}" ;; z) ZEROSPEECH_EVAL_ENV="${OPTARG}" ;; t) NO_TEST=true ;; + f) AUDIO_FORMAT=${OPTARG} ;; *) print_usage exit 1 ;; esac @@ -76,7 +79,7 @@ fi echo "Params: $params" echo "$SCRIPT_PATH/embeddings_abx.py" -python $SCRIPT_PATH/embeddings_abx.py $CHECKPOINT_PATH $DATASET_PATH $embeddings --gru_level 2 $params +python $SCRIPT_PATH/embeddings_abx.py $CHECKPOINT_PATH $DATASET_PATH $embeddings --gru_level 2 --file_extension $AUDIO_FORMAT $params directories=("dev-clean" "dev-other") if [[ $NO_TEST == false ]] @@ -92,7 +95,7 @@ do for file in `ls $embeddings/$i/phonetic/$directory` do filename_no_ext="${file%.*}" - if [[ ! -f "$ORIGINAL_DATASET_PATH/phonetic/$directory/${filename_no_ext}.wav" ]] + if [[ ! -f "$ORIGINAL_DATASET_PATH/phonetic/$directory/${filename_no_ext}.$AUDIO_FORMAT" ]] then rm $embeddings/$i/phonetic/$directory/$file fi From 9308a331721d9268664ff010a7e47f2248657139 Mon Sep 17 00:00:00 2001 From: Piotr Pusz Date: Sat, 3 Apr 2021 20:05:57 +0200 Subject: [PATCH 04/10] fixed soundfile config in environment.yml --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 2786201..9c9eb34 100644 --- a/environment.yml +++ b/environment.yml @@ -14,8 +14,8 @@ dependencies: - tqdm - nose - cython - - pysoundfile - pip: + - soundfile - progressbar2 - matplotlib - torchaudio From 79f4f2e86c6fec75e93e3aae26e365d34a6fa322 Mon Sep 17 00:00:00 2001 From: Piotr Pusz Date: Tue, 6 Apr 2021 15:00:44 +0200 Subject: [PATCH 05/10] adding Jarek's script for creating flattened LibriSpeech and nullspace help output update --- finetune_nullspace.sh | 12 ++--- scripts/create_ls_dataset_for_abx_eval.py | 55 +++++++++++++++++++++++ scripts/eval_abx.sh | 6 +-- 3 files changed, 64 insertions(+), 9 deletions(-) create mode 100644 scripts/create_ls_dataset_for_abx_eval.py diff --git a/finetune_nullspace.sh b/finetune_nullspace.sh index 52eb6ac..2463c7c 100755 --- a/finetune_nullspace.sh +++ b/finetune_nullspace.sh @@ -14,19 +14,19 @@ AUDIO_FORMAT=flac print_usage() { echo -e "Usage: ./finetune_nullspace.sh" - echo -e "\t-d DATASET_PATH" - echo -e "\t-t TRAIN_SET" - echo -e "\t-v VALIDATION_SET" + echo -e "\t-d DATASET_PATH (E.g. LIBRISPEECH_DATASET_PATH/train-clean-100)" + echo -e "\t-t TRAIN_SET (E.g. LIBRISPEECH_TRAIN_CLEAN_100_TRAIN_SPLIT_FILE_PATH)" + echo -e "\t-v VALIDATION_SET (E.g. LIBRISPEECH_TRAIN_CLEAN_100_TEST_SPLIT_FILE_PATH)" echo -e "\t-c CHECKPOINT_PATH" echo -e "\t-o OUTPUT_DIR" echo -e "\t-n DIM_INBETWEEN (Dimension of nullspace will be DIM_EMBEDDING - DIM_INBETWEEN)" + echo -e "\t-p PHONES_PATH (Path to the file containing phonemes for the entire dataset)" echo -e "OPTIONAL FLAGS:" - echo -e "\t-s FROM_STEP (From which step do you want to start. Order: $SPEAKERS -> $PHONEMES -> $SPEAKERS_NULLSPACE)" - echo -e "\t-p PHONES_PATH (Path to the file containing phonemes for the entire dataset. You don't need it if you start from $SPEAKERS_NULLSPACE)" + echo -e "\t-s FROM_STEP (From which step do you want to start. Order: $SPEAKERS [default] -> $PHONEMES -> $SPEAKERS_NULLSPACE)" echo -e "\t-f audio files format in LibriSpeech dataset (without a dot)" } -while getopts 'd:t:v:c:o:n:f:p:' flag; do +while getopts 'd:t:v:c:o:n:s:p:f' flag; do case "${flag}" in d) DATASET_PATH="${OPTARG}" ;; t) TRAIN_SET="${OPTARG}" ;; diff --git a/scripts/create_ls_dataset_for_abx_eval.py b/scripts/create_ls_dataset_for_abx_eval.py new file mode 100644 index 0000000..92b77da --- /dev/null +++ b/scripts/create_ls_dataset_for_abx_eval.py @@ -0,0 +1,55 @@ +import os +import sys +import shutil +import argparse +from pathlib import Path +import numpy as np +import soundfile as sf + +def parse_args(): + # Run parameters + parser = argparse.ArgumentParser() + parser.add_argument("librispeech_path", type=str, + help="Path to the root directory of LibriSpeech.") + parser.add_argument("zerospeech_dataset_path", type=str, + help="Path to the ZeroSpeech dataset.") + parser.add_argument("target_path", type=str, + help="Path to the output directory.") + parser.add_argument("--file_extension", type=str, default="flac", + help="Extension of the audio files in the dataset (default: flac).") + return parser.parse_args() + +def main(): + # Parse and print args + args = parse_args() + #logger.info(args) + + phonetic = "phonetic" + datasets = ["dev-clean", "dev-other", "test-clean", "test-other"] + + for dataset in datasets: + print("> {}".format(dataset)) + target_dirname = os.path.join(args.target_path, phonetic, dataset) + Path(target_dirname).mkdir(parents=True, exist_ok=True) + + librispeech_dirname = os.path.join(args.librispeech_path, dataset) + files = [(filename, dirname) for dirname, _, files in os.walk(librispeech_dirname, followlinks=True) for filename in files if filename.endswith(args.file_extension)] + for i, (filename, dirname) in enumerate(files): + print("Progress {:2.1%}".format(i / len(files)), end="\r") + input_path = os.path.join(dirname, filename) + output_path = os.path.join(target_dirname, os.path.splitext(filename)[0] + ".wav") + data, sample_rate = sf.read(input_path) + sf.write(output_path, data, sample_rate) + + if dataset.startswith("dev"): + source_item_path = os.path.join(args.zerospeech_dataset_path, phonetic, dataset, dataset + ".item") + target_item_path = os.path.join(target_dirname, dataset + ".item") + shutil.copy(source_item_path, target_item_path) + + +if __name__ == "__main__": + #import ptvsd + #ptvsd.enable_attach(('0.0.0.0', 7310)) + #print("Attach debugger now") + #ptvsd.wait_for_attach() + main() \ No newline at end of file diff --git a/scripts/eval_abx.sh b/scripts/eval_abx.sh index 07cd2ed..b8d8a87 100755 --- a/scripts/eval_abx.sh +++ b/scripts/eval_abx.sh @@ -14,12 +14,12 @@ AUDIO_FORMAT=flac print_usage() { echo -e "Usage: ./eval_abx.sh" - echo -e "\t-d DATASET_PATH" + echo -e "\t-d DATASET_PATH (Either ZEROSPEECH_DATASET_PATH or LIBRISPEECH_FLATTENED_DATASET_PATH [Or anything that has directory structure of these two with dev-*.item files from ZEROSPEECH_DATASET_PATH])" echo -e "\t-r ORIGINAL_DATASET_PATH" echo -e "\t-c CHECKPOINT_PATH" echo -e "\t-o OUTPUT_DIR" echo -e "OPTIONAL FLAGS:" - echo -e "\t-n (Load a model with nullspace)" + echo -e "\t-n (Provide this flag if you want to load a model with nullspace)" echo -e "\t-a CONDA_PATH" echo -e "\t-e CPC_ENV" echo -e "\t-z ZEROSPEECH_EVAL_ENV (The conda environment where the zerospeech2021-evaluate is installed)" @@ -27,7 +27,7 @@ print_usage() { echo -e "\t-f audio files format in LibriSpeech dataset (without a dot)" } -while getopts 'd:r:c:o:na:e:z:t' flag; do +while getopts 'd:r:c:o:n:a:e:z:t:f' flag; do case "${flag}" in d) DATASET_PATH="${OPTARG}" ;; r) ORIGINAL_DATASET_PATH="${OPTARG}" ;; From 35bce6e71f6550491f6652113b49d6f043093688 Mon Sep 17 00:00:00 2001 From: Piotr Pusz Date: Tue, 6 Apr 2021 16:17:38 +0200 Subject: [PATCH 06/10] forgotten and badly merged changes from Jarek's commit --- finetune_nullspace.sh | 2 +- scripts/embeddings_abx.py | 5 ----- scripts/eval_abx.sh | 2 +- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/finetune_nullspace.sh b/finetune_nullspace.sh index 2463c7c..892213c 100755 --- a/finetune_nullspace.sh +++ b/finetune_nullspace.sh @@ -26,7 +26,7 @@ print_usage() { echo -e "\t-f audio files format in LibriSpeech dataset (without a dot)" } -while getopts 'd:t:v:c:o:n:s:p:f' flag; do +while getopts 'd:t:v:c:o:n:s:p:f:' flag; do case "${flag}" in d) DATASET_PATH="${OPTARG}" ;; t) TRAIN_SET="${OPTARG}" ;; diff --git a/scripts/embeddings_abx.py b/scripts/embeddings_abx.py index 8e68a8c..4b6ab62 100644 --- a/scripts/embeddings_abx.py +++ b/scripts/embeddings_abx.py @@ -1,9 +1,4 @@ #!/usr/bin/env python3 -u -# !/usr/bin/env python3 -u -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. import logging import os diff --git a/scripts/eval_abx.sh b/scripts/eval_abx.sh index b8d8a87..e9cdc1f 100755 --- a/scripts/eval_abx.sh +++ b/scripts/eval_abx.sh @@ -27,7 +27,7 @@ print_usage() { echo -e "\t-f audio files format in LibriSpeech dataset (without a dot)" } -while getopts 'd:r:c:o:n:a:e:z:t:f' flag; do +while getopts 'd:r:c:o:n:a:e:z:t:f:' flag; do case "${flag}" in d) DATASET_PATH="${OPTARG}" ;; r) ORIGINAL_DATASET_PATH="${OPTARG}" ;; From 6a3630e8e55da650ff75924c3f3b4420a28455f7 Mon Sep 17 00:00:00 2001 From: Piotr Pusz Date: Tue, 6 Apr 2021 16:36:51 +0200 Subject: [PATCH 07/10] another bash args fix --- scripts/eval_abx.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/eval_abx.sh b/scripts/eval_abx.sh index e9cdc1f..ef1e8f6 100755 --- a/scripts/eval_abx.sh +++ b/scripts/eval_abx.sh @@ -27,7 +27,7 @@ print_usage() { echo -e "\t-f audio files format in LibriSpeech dataset (without a dot)" } -while getopts 'd:r:c:o:n:a:e:z:t:f:' flag; do +while getopts 'd:r:c:o:na:e:z:tf:' flag; do case "${flag}" in d) DATASET_PATH="${OPTARG}" ;; r) ORIGINAL_DATASET_PATH="${OPTARG}" ;; From 8e0c7ad0d21d03f32fbf1e5cc89460d9da4e05cf Mon Sep 17 00:00:00 2001 From: Piotr Pusz Date: Wed, 7 Apr 2021 00:28:34 +0200 Subject: [PATCH 08/10] flattened LS file extension fix and --force_gpu for nullspace ABX eval because of GPU overflow --- scripts/create_ls_dataset_for_abx_eval.py | 2 +- scripts/eval_abx.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/create_ls_dataset_for_abx_eval.py b/scripts/create_ls_dataset_for_abx_eval.py index 92b77da..3f04770 100644 --- a/scripts/create_ls_dataset_for_abx_eval.py +++ b/scripts/create_ls_dataset_for_abx_eval.py @@ -37,7 +37,7 @@ def main(): for i, (filename, dirname) in enumerate(files): print("Progress {:2.1%}".format(i / len(files)), end="\r") input_path = os.path.join(dirname, filename) - output_path = os.path.join(target_dirname, os.path.splitext(filename)[0] + ".wav") + output_path = os.path.join(target_dirname, os.path.splitext(filename)[0] + "." + args.file_extension) data, sample_rate = sf.read(input_path) sf.write(output_path, data, sample_rate) diff --git a/scripts/eval_abx.sh b/scripts/eval_abx.sh index ef1e8f6..5c52534 100755 --- a/scripts/eval_abx.sh +++ b/scripts/eval_abx.sh @@ -132,7 +132,7 @@ EOF #zerospeech2021-evaluate -j 12 -o $results/$metric/$i --no-lexical --no-syntactic --no-semantic $DATASET_PATH $embeddings/$i #zerospeech2021-evaluate -j 12 -o $results/$metric/$i --force-cpu --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i #zerospeech2021-evaluate -j 20 -o $results/$metric/$i --force-cpu --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i - zerospeech2021-evaluate -j 20 -o $results/$metric/$i --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i + zerospeech2021-evaluate --force-cpu -j 20 -o $results/$metric/$i --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i done done From 8406a1a75ae320f12c370f444534e715a0481ee0 Mon Sep 17 00:00:00 2001 From: Piotr Pusz Date: Wed, 7 Apr 2021 12:04:51 +0200 Subject: [PATCH 09/10] fixing bug I made in eval_abx --- finetune_nullspace.sh | 2 +- scripts/eval_abx.sh | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/finetune_nullspace.sh b/finetune_nullspace.sh index 892213c..5d1ec30 100755 --- a/finetune_nullspace.sh +++ b/finetune_nullspace.sh @@ -23,7 +23,7 @@ print_usage() { echo -e "\t-p PHONES_PATH (Path to the file containing phonemes for the entire dataset)" echo -e "OPTIONAL FLAGS:" echo -e "\t-s FROM_STEP (From which step do you want to start. Order: $SPEAKERS [default] -> $PHONEMES -> $SPEAKERS_NULLSPACE)" - echo -e "\t-f audio files format in LibriSpeech dataset (without a dot)" + echo -e "\t-f audio files format in -d dataset (without a dot)" } while getopts 'd:t:v:c:o:n:s:p:f:' flag; do diff --git a/scripts/eval_abx.sh b/scripts/eval_abx.sh index 5c52534..de6247f 100755 --- a/scripts/eval_abx.sh +++ b/scripts/eval_abx.sh @@ -24,7 +24,7 @@ print_usage() { echo -e "\t-e CPC_ENV" echo -e "\t-z ZEROSPEECH_EVAL_ENV (The conda environment where the zerospeech2021-evaluate is installed)" echo -e "\t-t (Do not compute embeddings for test set)" - echo -e "\t-f audio files format in LibriSpeech dataset (without a dot)" + echo -e "\t-f audio files format in -d dataset (without a dot)" } while getopts 'd:r:c:o:na:e:z:tf:' flag; do @@ -95,7 +95,7 @@ do for file in `ls $embeddings/$i/phonetic/$directory` do filename_no_ext="${file%.*}" - if [[ ! -f "$ORIGINAL_DATASET_PATH/phonetic/$directory/${filename_no_ext}.$AUDIO_FORMAT" ]] + if [[ ! -f "$ORIGINAL_DATASET_PATH/phonetic/$directory/${filename_no_ext}.wav" ]] # here ALWAYS wav as it is for ZS dataset then rm $embeddings/$i/phonetic/$directory/$file fi @@ -146,4 +146,6 @@ do done done > $OUTPUT_DIR/combined_results.txt -conda activate $SAVED_ENV \ No newline at end of file +if [ $SAVED_ENV != None ]; then + conda activate $SAVED_ENV +fi \ No newline at end of file From 87d5739abe77b9277720767b1d9b086ae863633e Mon Sep 17 00:00:00 2001 From: Piotr Pusz Date: Wed, 7 Apr 2021 18:43:38 +0200 Subject: [PATCH 10/10] fixed pushing to closest centers for linear_separability with nullspace - as only c_feature are being projected --- cpc/eval/linear_separability.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpc/eval/linear_separability.py b/cpc/eval/linear_separability.py index 36a1011..20c065c 100644 --- a/cpc/eval/linear_separability.py +++ b/cpc/eval/linear_separability.py @@ -39,7 +39,8 @@ def train_step(feature_maker, criterion, data_loader, optimizer, label_key="spea if centerpushSettings: centers, pushDeg = centerpushSettings c_feature = utils.pushToClosestForBatch(c_feature, centers, deg=pushDeg) - encoded_data = utils.pushToClosestForBatch(encoded_data, centers, deg=pushDeg) + # [!] ONLY c_features are projected into nullspace, so encoded_data is of no use with nullspace currently + #encoded_data = utils.pushToClosestForBatch(encoded_data, centers, deg=pushDeg) all_losses, all_acc = criterion(c_feature, encoded_data, label) totLoss = all_losses.sum() @@ -70,7 +71,8 @@ def val_step(feature_maker, criterion, data_loader, label_key="speaker", centerp if centerpushSettings: centers, pushDeg = centerpushSettings c_feature = utils.pushToClosestForBatch(c_feature, centers, deg=pushDeg) - encoded_data = utils.pushToClosestForBatch(encoded_data, centers, deg=pushDeg) + # [!] ONLY c_features are projected into nullspace, so encoded_data is of no use with nullspace currently + #encoded_data = utils.pushToClosestForBatch(encoded_data, centers, deg=pushDeg) all_losses, all_acc = criterion(c_feature, encoded_data, label) logs["locLoss_val"] += np.asarray([all_losses.mean().item()])