From d840cc0b225393170b79514f99ea5f4c30c52d03 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaros=C5=82aw=20Dzikowski?= <jarekdzikowski1337@gmail.com>
Date: Thu, 1 Apr 2021 18:39:30 +0200
Subject: [PATCH 01/10] Ported eval_abx.sh script to CPC

---
 cpc/model.py              |   1 -
 scripts/embeddings_abx.py | 138 +++++++++++++++++++++++++++++++++++
 scripts/eval_abx.sh       | 146 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 284 insertions(+), 1 deletion(-)
 create mode 100644 scripts/embeddings_abx.py
 create mode 100755 scripts/eval_abx.sh

diff --git a/cpc/model.py b/cpc/model.py
index adb5354..65fee55 100644
--- a/cpc/model.py
+++ b/cpc/model.py
@@ -304,7 +304,6 @@ def __init__(self,
     def forward(self, batchData, label):
         cFeature, encodedData, label = self.cpc(batchData, label)
         cFeature = self.nullspace(cFeature)
-        encodedData = self.nullspace(encodedData)
         return cFeature, encodedData, label
 
 
diff --git a/scripts/embeddings_abx.py b/scripts/embeddings_abx.py
new file mode 100644
index 0000000..8e68a8c
--- /dev/null
+++ b/scripts/embeddings_abx.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python3 -u
+# !/usr/bin/env python3 -u
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+import os
+import sys
+import argparse
+from itertools import chain
+from pathlib import Path
+import time
+import copy
+import numpy as np
+import soundfile as sf
+
+from cpc.feature_loader import loadModel, FeatureModule
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=os.environ.get("LOGLEVEL", "INFO").upper(),
+    stream=sys.stdout,
+)
+logger = logging.getLogger("zerospeech2021 abx")
+
+def parse_args():
+    # Run parameters
+    parser = argparse.ArgumentParser()
+    parser.add_argument("path_checkpoint", type=str,
+                        help="Path to the trained fairseq wav2vec2.0 model.")
+    parser.add_argument("path_data", type=str,
+                        help="Path to the dataset that we want to compute ABX for.")
+    parser.add_argument("path_output_dir", type=str,
+                        help="Path to the output directory.")
+    parser.add_argument("--debug", action="store_true",
+                        help="Load only a very small amount of files for "
+                        "debugging purposes.")
+    parser.add_argument("--cpu", action="store_true",
+                        help="Run on a cpu machine.")
+    parser.add_argument("--file_extension", type=str, default="wav",
+                          help="Extension of the audio files in the dataset (default: wav).")
+    parser.add_argument("--no_test", action="store_true",
+                        help="Don't compute embeddings for test-* parts of dataset")
+    parser.add_argument('--gru_level', type=int, default=-1,
+                        help='Hidden level of the LSTM autoregressive model to be taken'
+                        '(default: -1, last layer).')
+    parser.add_argument('--nullspace', action='store_true',
+                        help="Additionally load nullspace")
+    return parser.parse_args()
+
+def main():
+    # Parse and print args
+    args = parse_args()
+    logger.info(args)
+
+    # Load the model
+    print("")
+    print(f"Loading model from {args.path_checkpoint}") 
+
+    if args.gru_level is not None and args.gru_level > 0:
+        updateConfig = argparse.Namespace(nLevelsGRU=args.gru_level)
+    else:
+        updateConfig = None
+
+    model = loadModel([args.path_checkpoint], load_nullspace=args.nullspace, updateConfig=updateConfig)[0]
+    
+    if args.gru_level is not None and args.gru_level > 0:
+        # Keep hidden units at LSTM layers on sequential batches
+        if args.nullspace:
+            model.cpc.gAR.keepHidden = True
+        else:
+            model.gAR.keepHidden = True
+    
+    device = "cuda" if torch.cuda.is_available() and not args.cpu else "cpu"
+
+    # Register the hooks
+    layer_outputs = {}
+    def get_layer_output(name):
+        def hook(model, input, output):
+            if type(output) is tuple:
+                layer_outputs[name] = output[0].detach().squeeze(1).cpu().numpy()
+            elif type(output) is dict:
+                layer_outputs[name] = output["x"].detach().squeeze(0).cpu().numpy()
+            else:
+                layer_outputs[name] = output.detach().squeeze(0).cpu().numpy()
+        return hook
+
+    layer_names = []
+    layer_name = os.path.basename(os.path.dirname(args.path_checkpoint))
+    layer_names.append(layer_name)
+    if not args.nullspace:
+        model.gAR.register_forward_hook(get_layer_output(layer_name))
+    else:
+        model.nullspace.register_forward_hook(get_layer_output(layer_name))
+
+    model = model.eval().to(device)  
+    print("Model loaded!")
+    print(model)
+
+    # Extract values from chosen layers and save them to files
+    phonetic = "phonetic"
+    datasets_path = os.path.join(args.path_data, phonetic)
+    datasets = os.listdir(datasets_path)
+    datasets = [dataset for dataset in datasets if not args.no_test or not dataset.startswith("test")]
+    print(datasets)
+
+    with torch.no_grad():     
+        for dataset in datasets:
+            print("> {}".format(dataset))
+            dataset_path = os.path.join(datasets_path, dataset)
+            files = [f for f in os.listdir(dataset_path) if f.endswith(args.file_extension)]
+            for i, f in enumerate(files):
+                print("Progress {:2.1%}".format(i / len(files)), end="\r")
+                input_f = os.path.join(dataset_path, f)
+                x, sample_rate = sf.read(input_f)
+                x = torch.tensor(x).float().reshape(1,1,-1).to(device)
+                output = model(x, None)[0]
+
+                for layer_name, value in layer_outputs.items():
+                    output_dir = os.path.join(args.path_output_dir, layer_name, phonetic, dataset)
+                    Path(output_dir).mkdir(parents=True, exist_ok=True)
+                    out_f = os.path.join(output_dir, os.path.splitext(f)[0] + ".txt")
+                    np.savetxt(out_f, value)
+
+if __name__ == "__main__":
+    #import ptvsd
+    #ptvsd.enable_attach(('0.0.0.0', 7310))
+    #print("Attach debugger now")
+    #ptvsd.wait_for_attach()
+    main()
+
diff --git a/scripts/eval_abx.sh b/scripts/eval_abx.sh
new file mode 100755
index 0000000..5475b39
--- /dev/null
+++ b/scripts/eval_abx.sh
@@ -0,0 +1,146 @@
+########## CHANGE THIS ##################
+ZEROSPEECH_EVAL_ENV=zerospeech2021 # Where the zerospeech2021-evaluate is installed
+CPC_ENV=202010-fairseq-c11
+CONDA_PATH=/pio/scratch/2/i273233/miniconda3
+#########################################
+
+DATASET_PATH=false
+ORIGINAL_DATASET_PATH=false
+CHECKPOINT_PATH=false
+OUTPUT_DIR=false
+NULLSPACE=false
+NO_TEST=false
+
+print_usage() {
+  echo -e "Usage: ./eval_abx.sh"
+  echo -e "\t-d DATASET_PATH"
+  echo -e "\t-r ORIGINAL_DATASET_PATH"
+  echo -e "\t-c CHECKPOINT_PATH"
+  echo -e "\t-o OUTPUT_DIR"
+  echo -e "OPTIONAL FLAGS:"
+  echo -e "\t-n (Load a model with nullspace)"
+  echo -e "\t-a CONDA_PATH"
+  echo -e "\t-e CPC_ENV"
+  echo -e "\t-z ZEROSPEECH_EVAL_ENV (The conda environment where the zerospeech2021-evaluate is installed)"
+  echo -e "\t-t (Do not compute embeddings for test set)"
+}
+
+while getopts 'd:r:c:o:na:e:z:t' flag; do
+    case "${flag}" in
+        d) DATASET_PATH="${OPTARG}" ;;
+        r) ORIGINAL_DATASET_PATH="${OPTARG}" ;;
+        c) CHECKPOINT_PATH="${OPTARG}" ;;
+        o) OUTPUT_DIR="${OPTARG}" ;;
+        n) NULLSPACE=true ;;
+        a) CONDA_PATH="${OPTARG}" ;;
+        e) CPC_ENV="${OPTARG}" ;;
+        z) ZEROSPEECH_EVAL_ENV="${OPTARG}" ;;
+        t) NO_TEST=true ;;
+        *) print_usage
+           exit 1 ;;
+    esac
+done
+
+echo $DATASET_PATH $ORIGINAL_DATASET_PATH $CHECKPOINT_PATH $OUTPUT_DIR $NULLSPACE $CONDA_PATH $CPC_ENV $ZEROSPEECH_EVAL_ENV $NO_TEST
+
+if [[ $DATASET_PATH == false || $ORIGINAL_DATASET_PATH == false || $CHECKPOINT_PATH == false || $OUTPUT_DIR == false ]]
+then
+    echo "Either DATASET_PATH or ORIGINAL_DATASET_PATH or CHECKPOINT_PATH or OUTPUT_DIR is not set."
+    print_usage
+    exit 1
+fi
+
+SCRIPT_PATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+
+results=$OUTPUT_DIR/results
+embeddings=$OUTPUT_DIR/embeddings
+mkdir -p embeddings
+
+source $CONDA_PATH/etc/profile.d/conda.sh
+SAVED_ENV=$(conda info | sed -n 's/\( \)*active environment : //p')
+echo SAVED_ENV: $SAVED_ENV
+
+ENV_TO_ACTIVATE=$CPC_ENV
+conda activate $ENV_TO_ACTIVATE
+
+params=""
+if [[ $NULLSPACE == true ]]
+then
+    params="${params} --nullspace"
+fi
+
+if [[ $NO_TEST == true ]]
+then
+    params="${params} --no_test"
+fi
+echo "Params: $params"
+
+echo "$SCRIPT_PATH/embeddings_abx.py"
+python $SCRIPT_PATH/embeddings_abx.py $CHECKPOINT_PATH $DATASET_PATH $embeddings --gru_level 2 $params
+
+directories=("dev-clean" "dev-other")
+if [[ $NO_TEST == false ]]
+then
+    directories+=("test-clean" "test-other")
+fi
+echo "Directories: ${directories[@]}"
+
+for i in `basename -a $(ls -d $embeddings/*/)`
+do
+    for directory in ${directories[@]}
+    do 
+        for file in `ls $embeddings/$i/phonetic/$directory` 
+        do 
+            filename_no_ext="${file%.*}" 
+            if [[ ! -f "$ORIGINAL_DATASET_PATH/phonetic/$directory/${filename_no_ext}.wav" ]] 
+            then 
+                rm $embeddings/$i/phonetic/$directory/$file 
+            fi
+        done
+    done 
+done
+
+conda activate $ZEROSPEECH_EVAL_ENV
+
+frame_shift="0.01"
+echo "Frame shift is ${frame_shift}s"
+
+metrics=("cosine" "euclidean")
+for metric in ${metrics[@]}
+do
+    cat > $embeddings/$metric.yaml << EOF
+author: LSTM Baseline
+affiliation: EHESS, ENS, PSL Research Univerity, CNRS and Inria
+description: >
+  CPC-big (trained on librispeech 960), kmeans (trained on librispeech 100),
+  LSTM. See https://zerospeech.com/2021 for more details.
+open_source: true
+train_set: librispeech 100 and 960
+gpu_budget: 60
+parameters:
+  phonetic:
+    metric: ${metric}
+    frame_shift: ${frame_shift}
+EOF
+
+    for i in `basename -a $(ls -d $embeddings/*/)`
+    do
+        cp $embeddings/$metric.yaml $embeddings/$i/meta.yaml
+        #zerospeech2021-evaluate -j 12 -o $results/$metric/$i --no-lexical --no-syntactic --no-semantic $DATASET_PATH $embeddings/$i
+        #zerospeech2021-evaluate -j 12 -o $results/$metric/$i --force-cpu --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i
+        #zerospeech2021-evaluate -j 20 -o $results/$metric/$i --force-cpu --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i
+        zerospeech2021-evaluate -j 20 -o $results/$metric/$i --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i
+    done
+done
+
+for metric in ${metrics[@]}
+do
+    for i in `basename -a $(ls -d $embeddings/*/)`
+    do 
+        echo $i $metric
+        cat $results/$metric/$i/score_phonetic.csv
+        echo
+    done
+done > $OUTPUT_DIR/combined_results.txt
+
+conda activate $SAVED_ENV
\ No newline at end of file

From 257583535562d47d135731cc8fa68e962a0a5a76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaros=C5=82aw=20Dzikowski?= <jarekdzikowski1337@gmail.com>
Date: Fri, 2 Apr 2021 00:09:34 +0200
Subject: [PATCH 02/10] Modified finetune_nullspace.sh so that you can provide
 your own arguments

---
 finetune_nullspace.sh | 61 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 54 insertions(+), 7 deletions(-)

diff --git a/finetune_nullspace.sh b/finetune_nullspace.sh
index e250170..8874cac 100755
--- a/finetune_nullspace.sh
+++ b/finetune_nullspace.sh
@@ -1,30 +1,77 @@
-SAVE_DIR="/pio/scratch/1/i273233/linear_separability/cpc/gru_level2/cpc_official"
 SPEAKERS="speakers_factorized"
 PHONEMES="phonemes_nullspace"
 SPEAKERS_NULLSPACE="speakers_nullspace"
 
-DIM_INTER=$1
+DATASET_PATH=false
+TRAIN_SET=false
+VALIDATION_SET=false
+CHECKPOINT_PATH=false
+OUTPUT_DIR=false
+DIM_INBETWEEN=false
 FROM_STEP=$SPEAKERS
-if [[ $# -ge 2 ]]; then
-    FROM_STEP=$2
+PHONES_PATH=false
+
+print_usage() {
+  echo -e "Usage: ./finetune_nullspace.sh"
+  echo -e "\t-d DATASET_PATH"
+  echo -e "\t-t TRAIN_SET"
+  echo -e "\t-v VALIDATION_SET"
+  echo -e "\t-c CHECKPOINT_PATH"
+  echo -e "\t-o OUTPUT_DIR"
+  echo -e "\t-n DIM_INBETWEEN (Dimension of nullspace will be DIM_EMBEDDING - DIM_INBETWEEN)"
+  echo -e "OPTIONAL FLAGS:"
+  echo -e "\t-f FROM_STEP (From which step do you want to start. Order: $SPEAKERS -> $PHONEMES -> $SPEAKERS_NULLSPACE)"
+  echo -e "\t-p PHONES_PATH (Path to the file containing phonemes for the entire dataset. You don't need it if you start from $SPEAKERS_NULLSPACE)"
+}
+
+while getopts 'd:t:v:c:o:n:f:p:' flag; do
+    case "${flag}" in
+        d) DATASET_PATH="${OPTARG}" ;;
+        t) TRAIN_SET="${OPTARG}" ;;
+        v) VALIDATION_SET="${OPTARG}" ;;
+        c) CHECKPOINT_PATH="${OPTARG}" ;;
+        o) OUTPUT_DIR="${OPTARG}" ;;
+        n) DIM_INBETWEEN="${OPTARG}" ;;
+        f) FROM_STEP="${OPTARG}" ;;
+        p) PHONES_PATH="${OPTARG}" ;;
+        *) print_usage
+           exit 1 ;;
+    esac
+done
+
+echo $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH $OUTPUT_DIR $DIM_INBETWEEN $FROM_STEP $PHONES_PATH
+
+if [[ $DATASET_PATH == false || $TRAIN_SET == false || $VALIDATION_SET == false || $CHECKPOINT_PATH == false || $OUTPUT_DIR == false  || $DIM_INBETWEEN == false || ( $PHONES_PATH == false && $FROM_STEP != $SPEAKERS ) ]]
+then
+    echo "Either DATASET_PATH, TRAIN_SET, VALIDATION_SET, CHECKPOINT_PATH, OUTPUT_DIR or DIM_INBETWEEN is not set or there are invalid PHONES_PATH and FROM_STEP."
+    print_usage
+    exit 1
 fi
 
+mkdir -p $OUTPUT_DIR
+
 case $FROM_STEP in
 $SPEAKERS)
     echo $SPEAKERS
-    mkdir -p ${SAVE_DIR}_${SPEAKERS}_${DIM_INTER} && python cpc/eval/linear_separability.py $zd/LibriSpeech/train-clean-100/ $zd/LibriSpeech/labels_split/train_split_100.txt $zd/LibriSpeech/labels_split/test_split_100.txt $zd/checkpoints/CPC-big-kmeans50/cpc_ll6k/checkpoint_32.pt --pathCheckpoint ${SAVE_DIR}_${SPEAKERS}_${DIM_INTER} --mode $SPEAKERS --max_size_loaded 40000000 --n_process_loader 2 --model cpc --dim_inter $DIM_INTER --gru_level 2 | tee ${SAVE_DIR}_${SPEAKERS}_${DIM_INTER}/log.txt
+    mkdir -p ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}
+    python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN} --mode $SPEAKERS --max_size_loaded 40000000 --n_process_loader 2 --model cpc --dim_inter $DIM_INBETWEEN --gru_level 2
     ;&
 $PHONEMES)
     echo $PHONEMES
-    mkdir -p ${SAVE_DIR}_${PHONEMES}_${DIM_INTER} && python cpc/eval/linear_separability.py $zd/LibriSpeech/train-clean-100/ $zd/LibriSpeech/labels_split/train_split_100.txt $zd/LibriSpeech/labels_split/test_split_100.txt $zd/checkpoints/CPC-big-kmeans50/cpc_ll6k/checkpoint_32.pt --pathCheckpoint ${SAVE_DIR}_${PHONEMES}_${DIM_INTER} --mode $PHONEMES --max_size_loaded 40000000 --n_process_loader 2 --model cpc --pathPhone $zd/LibriSpeech/alignments2/converted_aligned_phones.txt --path_speakers_factorized ${SAVE_DIR}_${SPEAKERS}_${DIM_INTER}/checkpoint_9.pt --dim_inter $DIM_INTER --gru_level 2 | tee ${SAVE_DIR}_${PHONEMES}_${DIM_INTER}/log.txt
+    mkdir -p ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN}
+    python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN} --mode $PHONEMES --max_size_loaded 40000000 --n_process_loader 2 --model cpc --pathPhone $PHONES_PATH --path_speakers_factorized ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt --dim_inter $DIM_INBETWEEN --gru_level 2
     ;&
 $SPEAKERS_NULLSPACE)
     echo $SPEAKERS_NULLSPACE
-    mkdir -p ${SAVE_DIR}_${SPEAKERS_NULLSPACE}_${DIM_INTER} && python cpc/eval/linear_separability.py $zd/LibriSpeech/train-clean-100/ $zd/LibriSpeech/labels_split/train_split_100.txt $zd/LibriSpeech/labels_split/test_split_100.txt $zd/checkpoints/CPC-big-kmeans50/cpc_ll6k/checkpoint_32.pt --pathCheckpoint ${SAVE_DIR}_${SPEAKERS_NULLSPACE}_${DIM_INTER} --mode $SPEAKERS_NULLSPACE --max_size_loaded 40000000 --n_process_loader 2 --model cpc --path_speakers_factorized ${SAVE_DIR}_${SPEAKERS}_${DIM_INTER}/checkpoint_9.pt --dim_inter $DIM_INTER --gru_level 2 | tee ${SAVE_DIR}_${SPEAKERS_NULLSPACE}_${DIM_INTER}/log.txt
+    mkdir -p ${OUTPUT_DIR}/${SPEAKERS_NULLSPACE}_${DIM_INBETWEEN}
+    python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${SPEAKERS_NULLSPACE}_${DIM_INBETWEEN} --mode $SPEAKERS_NULLSPACE --max_size_loaded 40000000 --n_process_loader 2 --model cpc --path_speakers_factorized ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt --dim_inter $DIM_INBETWEEN --gru_level 2
     ;;
 *)
     echo "Invalid from step: ${FROM_STEP} while it should be either ${SPEAKERS}, ${PHONEMES} or ${SPEAKERS_NULLSPACE}"
     ;;
 esac
 
+echo "Checkpoint with nullspace is located in ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN}/checkpoint_9.pt"
+echo "The results of all the experiments are located in ${OUTPUT_DIR}/DIRECTORY/checkpoint_logs.json"
+
 exit 0
\ No newline at end of file

From 0c5635a60937af9147c622b08449b086f0232b53 Mon Sep 17 00:00:00 2001
From: Piotr Pusz <petropusz@gmail.com>
Date: Sat, 3 Apr 2021 19:55:01 +0200
Subject: [PATCH 03/10] updated nullspace scripts with dataset audio format
 option, added summary on top of readme, deleted old scripts (moving to zs2021
 repo)

---
 README.md                                     | 11 ++++++++-
 ...push_nonullspace_phoneme_classification.sh | 16 -------------
 ...erpush_nullspace_phoneme_classification.sh | 18 ---------------
 finetune_nullspace.sh                         | 23 +++++++++++++++----
 scripts/eval_abx.sh                           |  7 ++++--
 5 files changed, 33 insertions(+), 42 deletions(-)
 delete mode 100755 centerpush_nonullspace_phoneme_classification.sh
 delete mode 100755 centerpush_nullspace_phoneme_classification.sh

diff --git a/README.md b/README.md
index 63d47f8..7de9f27 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,12 @@
+this repo is based on `CPC_audio` (<https://github.com/facebookresearch/CPC_audio>) repo, but it also contains:
+- part of the code used for University of Wroclaw ZeroSpeech2021 submission (our modifications and also merged files from: <https://github.com/facebookresearch/CPC_audio/tree/zerospeech> - `criterion/clustering/`, <https://github.com/tuanh208/CPC_audio/tree/zerospeech> - `feature_loder.py -> buildFeature_batch`, <https://github.com/bootphon/zerospeech2021_baseline> - `scripts/`)
+- code used for CPC-CTC paper
+
+Below original README updated with some of our modifications; part of our code is also described in `cpc/README.md`.
+
+--------------------------------------------------------------------------------------------------
+
+
 # CPC_audio
 
 This code implements the Contrast Predictive Coding algorithm on audio data, as described in the paper [Unsupervised Pretraining Transfers well Across Languages](https://arxiv.org/abs/2002.02848). This is an unsupervised method to train audio features directly from the raw waveform.
@@ -123,7 +132,7 @@ Will evaluate the speaker separability of the concatenation of the features from
 
 `--gru_level` controls from which layer of autoregressive part of CPC to extract the features. By default it's the last one.
 
-Nullspaces:
+### Nullspaces:
 
 To conduct the nullspace experiment, first classify speakers using two factorized matrices `A` (`DIM_EMBEDDING` x `DIM_INBETWEEN`) and `B` (`DIM_INBETWEEN` x `SPEAKERS`). You'll want to extract `A'`, the nullspace of matrix `A` (of size `DIM_EMBEDDING` x (`DIM_EMBEDDING` - `DIM_INBETWEEN`)), to make the embeddings less sensitive to speakers. 
 ```bash 
diff --git a/centerpush_nonullspace_phoneme_classification.sh b/centerpush_nonullspace_phoneme_classification.sh
deleted file mode 100755
index e858084..0000000
--- a/centerpush_nonullspace_phoneme_classification.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-
-for deg in 0 0.2 0.3 0.4 0.5 0.6 0.7
-do
-    echo $deg
-    mkdir ${centerpushDir}/phoneme_classif_nonull_${deg}/
-    python cpc/eval/linear_separability.py $zd/LibriSpeech/train-clean-100/ \
-    $zd/LibriSpeech/labels_split/train_split_100.txt \
-    $zd/LibriSpeech/labels_split/test_split_100.txt \
-    $zd/checkpoints/CPC-big-kmeans50/cpc_ll6k/checkpoint_32.pt \
-    --centerpushFile $zd/checkpoints/CPC-big-kmeans50/clustering_kmeans50/clustering_CPC_big_kmeans50.pt \
-    --centerpushDeg $deg \
-    --pathCheckpoint ${centerpushDir}/phoneme_classif_nonull_${deg}/ \
-    --mode phonemes --max_size_loaded 40000000 --n_process_loader 2 \
-    --model cpc --pathPhone $zd/LibriSpeech/alignments2/converted_aligned_phones.txt \
-    --gru_level 2 --batchSizeGPU 32 | tee ${centerpushDir}/phoneme_classif_nonull_${deg}/log.txt
-done
\ No newline at end of file
diff --git a/centerpush_nullspace_phoneme_classification.sh b/centerpush_nullspace_phoneme_classification.sh
deleted file mode 100755
index 5d5c268..0000000
--- a/centerpush_nullspace_phoneme_classification.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-
-for deg in 0 0.2 0.3 0.4 0.5 0.6 0.7
-do
-    echo $deg
-    mkdir ${centerpushDir}/phoneme_classif_null_${deg}/
-    python cpc/eval/linear_separability.py $zd/LibriSpeech/train-clean-100/ \
-    $zd/LibriSpeech/labels_split/train_split_100.txt \
-    $zd/LibriSpeech/labels_split/test_split_100.txt \
-    $zd/checkpoints/CPC-big-kmeans50/cpc_ll6k/checkpoint_32.pt \
-    --centerpushFile $cpcClustDir/checkpoints/clustering_CPC_big_kmeans50_nullspace_64/clustering_CPC_big_kmeans50_nullspace_64.pt \
-    --centerpushDeg $deg \
-    --pathCheckpoint ${centerpushDir}/phoneme_classif_null_${deg}/ \
-    --mode phonemes_nullspace --max_size_loaded 40000000 --n_process_loader 2 \
-    --model cpc --pathPhone $zd/LibriSpeech/alignments2/converted_aligned_phones.txt \
-    --path_speakers_factorized $nullspaceDir/linear_separability/cpc/gru_level2/cpc_official_speakers_factorized_64/checkpoint_9.pt \
-    --dim_inter 64 --gru_level 2 --batchSizeGPU 32 | tee ${centerpushDir}/phoneme_classif_null_${deg}/log.txt
-done
-
diff --git a/finetune_nullspace.sh b/finetune_nullspace.sh
index 8874cac..52eb6ac 100755
--- a/finetune_nullspace.sh
+++ b/finetune_nullspace.sh
@@ -10,6 +10,7 @@ OUTPUT_DIR=false
 DIM_INBETWEEN=false
 FROM_STEP=$SPEAKERS
 PHONES_PATH=false
+AUDIO_FORMAT=flac
 
 print_usage() {
   echo -e "Usage: ./finetune_nullspace.sh"
@@ -20,8 +21,9 @@ print_usage() {
   echo -e "\t-o OUTPUT_DIR"
   echo -e "\t-n DIM_INBETWEEN (Dimension of nullspace will be DIM_EMBEDDING - DIM_INBETWEEN)"
   echo -e "OPTIONAL FLAGS:"
-  echo -e "\t-f FROM_STEP (From which step do you want to start. Order: $SPEAKERS -> $PHONEMES -> $SPEAKERS_NULLSPACE)"
+  echo -e "\t-s FROM_STEP (From which step do you want to start. Order: $SPEAKERS -> $PHONEMES -> $SPEAKERS_NULLSPACE)"
   echo -e "\t-p PHONES_PATH (Path to the file containing phonemes for the entire dataset. You don't need it if you start from $SPEAKERS_NULLSPACE)"
+  echo -e "\t-f audio files format in LibriSpeech dataset (without a dot)"
 }
 
 while getopts 'd:t:v:c:o:n:f:p:' flag; do
@@ -32,8 +34,9 @@ while getopts 'd:t:v:c:o:n:f:p:' flag; do
         c) CHECKPOINT_PATH="${OPTARG}" ;;
         o) OUTPUT_DIR="${OPTARG}" ;;
         n) DIM_INBETWEEN="${OPTARG}" ;;
-        f) FROM_STEP="${OPTARG}" ;;
+        s) FROM_STEP="${OPTARG}" ;;
         p) PHONES_PATH="${OPTARG}" ;;
+        f) AUDIO_FORMAT=${OPTARG} ;;
         *) print_usage
            exit 1 ;;
     esac
@@ -54,17 +57,27 @@ case $FROM_STEP in
 $SPEAKERS)
     echo $SPEAKERS
     mkdir -p ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}
-    python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN} --mode $SPEAKERS --max_size_loaded 40000000 --n_process_loader 2 --model cpc --dim_inter $DIM_INBETWEEN --gru_level 2
+    python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH \
+    --pathCheckpoint ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN} --mode $SPEAKERS \
+    --max_size_loaded 40000000 --n_process_loader 2 --model cpc --dim_inter $DIM_INBETWEEN --gru_level 2 --file_extension .$AUDIO_FORMAT
     ;&
 $PHONEMES)
     echo $PHONEMES
     mkdir -p ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN}
-    python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN} --mode $PHONEMES --max_size_loaded 40000000 --n_process_loader 2 --model cpc --pathPhone $PHONES_PATH --path_speakers_factorized ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt --dim_inter $DIM_INBETWEEN --gru_level 2
+    python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH \
+    --pathCheckpoint ${OUTPUT_DIR}/${PHONEMES}_${DIM_INBETWEEN} --mode $PHONEMES \
+    --max_size_loaded 40000000 --n_process_loader 2 --model cpc --pathPhone $PHONES_PATH \
+    --path_speakers_factorized ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt \
+    --dim_inter $DIM_INBETWEEN --gru_level 2 --file_extension .$AUDIO_FORMAT
     ;&
 $SPEAKERS_NULLSPACE)
     echo $SPEAKERS_NULLSPACE
     mkdir -p ${OUTPUT_DIR}/${SPEAKERS_NULLSPACE}_${DIM_INBETWEEN}
-    python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH --pathCheckpoint ${OUTPUT_DIR}/${SPEAKERS_NULLSPACE}_${DIM_INBETWEEN} --mode $SPEAKERS_NULLSPACE --max_size_loaded 40000000 --n_process_loader 2 --model cpc --path_speakers_factorized ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt --dim_inter $DIM_INBETWEEN --gru_level 2
+    python cpc/eval/linear_separability.py $DATASET_PATH $TRAIN_SET $VALIDATION_SET $CHECKPOINT_PATH \
+    --pathCheckpoint ${OUTPUT_DIR}/${SPEAKERS_NULLSPACE}_${DIM_INBETWEEN} --mode $SPEAKERS_NULLSPACE \
+    --max_size_loaded 40000000 --n_process_loader 2 --model cpc \
+    --path_speakers_factorized ${OUTPUT_DIR}/${SPEAKERS}_${DIM_INBETWEEN}/checkpoint_9.pt \
+    --dim_inter $DIM_INBETWEEN --gru_level 2 --file_extension .$AUDIO_FORMAT
     ;;
 *)
     echo "Invalid from step: ${FROM_STEP} while it should be either ${SPEAKERS}, ${PHONEMES} or ${SPEAKERS_NULLSPACE}"
diff --git a/scripts/eval_abx.sh b/scripts/eval_abx.sh
index 5475b39..07cd2ed 100755
--- a/scripts/eval_abx.sh
+++ b/scripts/eval_abx.sh
@@ -10,6 +10,7 @@ CHECKPOINT_PATH=false
 OUTPUT_DIR=false
 NULLSPACE=false
 NO_TEST=false
+AUDIO_FORMAT=flac
 
 print_usage() {
   echo -e "Usage: ./eval_abx.sh"
@@ -23,6 +24,7 @@ print_usage() {
   echo -e "\t-e CPC_ENV"
   echo -e "\t-z ZEROSPEECH_EVAL_ENV (The conda environment where the zerospeech2021-evaluate is installed)"
   echo -e "\t-t (Do not compute embeddings for test set)"
+  echo -e "\t-f audio files format in LibriSpeech dataset (without a dot)"
 }
 
 while getopts 'd:r:c:o:na:e:z:t' flag; do
@@ -36,6 +38,7 @@ while getopts 'd:r:c:o:na:e:z:t' flag; do
         e) CPC_ENV="${OPTARG}" ;;
         z) ZEROSPEECH_EVAL_ENV="${OPTARG}" ;;
         t) NO_TEST=true ;;
+        f) AUDIO_FORMAT=${OPTARG} ;;
         *) print_usage
            exit 1 ;;
     esac
@@ -76,7 +79,7 @@ fi
 echo "Params: $params"
 
 echo "$SCRIPT_PATH/embeddings_abx.py"
-python $SCRIPT_PATH/embeddings_abx.py $CHECKPOINT_PATH $DATASET_PATH $embeddings --gru_level 2 $params
+python $SCRIPT_PATH/embeddings_abx.py $CHECKPOINT_PATH $DATASET_PATH $embeddings --gru_level 2 --file_extension $AUDIO_FORMAT $params
 
 directories=("dev-clean" "dev-other")
 if [[ $NO_TEST == false ]]
@@ -92,7 +95,7 @@ do
         for file in `ls $embeddings/$i/phonetic/$directory` 
         do 
             filename_no_ext="${file%.*}" 
-            if [[ ! -f "$ORIGINAL_DATASET_PATH/phonetic/$directory/${filename_no_ext}.wav" ]] 
+            if [[ ! -f "$ORIGINAL_DATASET_PATH/phonetic/$directory/${filename_no_ext}.$AUDIO_FORMAT" ]] 
             then 
                 rm $embeddings/$i/phonetic/$directory/$file 
             fi

From 9308a331721d9268664ff010a7e47f2248657139 Mon Sep 17 00:00:00 2001
From: Piotr Pusz <petropusz@gmail.com>
Date: Sat, 3 Apr 2021 20:05:57 +0200
Subject: [PATCH 04/10] fixed soundfile config in environment.yml

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 2786201..9c9eb34 100644
--- a/environment.yml
+++ b/environment.yml
@@ -14,8 +14,8 @@ dependencies:
   - tqdm
   - nose
   - cython
-  - pysoundfile
   - pip:
+    - soundfile
     - progressbar2
     - matplotlib
     - torchaudio

From 79f4f2e86c6fec75e93e3aae26e365d34a6fa322 Mon Sep 17 00:00:00 2001
From: Piotr Pusz <petropusz@gmail.com>
Date: Tue, 6 Apr 2021 15:00:44 +0200
Subject: [PATCH 05/10] adding Jarek's script for creating flattened
 LibriSpeech and nullspace help output update

---
 finetune_nullspace.sh                     | 12 ++---
 scripts/create_ls_dataset_for_abx_eval.py | 55 +++++++++++++++++++++++
 scripts/eval_abx.sh                       |  6 +--
 3 files changed, 64 insertions(+), 9 deletions(-)
 create mode 100644 scripts/create_ls_dataset_for_abx_eval.py

diff --git a/finetune_nullspace.sh b/finetune_nullspace.sh
index 52eb6ac..2463c7c 100755
--- a/finetune_nullspace.sh
+++ b/finetune_nullspace.sh
@@ -14,19 +14,19 @@ AUDIO_FORMAT=flac
 
 print_usage() {
   echo -e "Usage: ./finetune_nullspace.sh"
-  echo -e "\t-d DATASET_PATH"
-  echo -e "\t-t TRAIN_SET"
-  echo -e "\t-v VALIDATION_SET"
+  echo -e "\t-d DATASET_PATH (E.g. LIBRISPEECH_DATASET_PATH/train-clean-100)"
+  echo -e "\t-t TRAIN_SET (E.g. LIBRISPEECH_TRAIN_CLEAN_100_TRAIN_SPLIT_FILE_PATH)"
+  echo -e "\t-v VALIDATION_SET (E.g. LIBRISPEECH_TRAIN_CLEAN_100_TEST_SPLIT_FILE_PATH)"
   echo -e "\t-c CHECKPOINT_PATH"
   echo -e "\t-o OUTPUT_DIR"
   echo -e "\t-n DIM_INBETWEEN (Dimension of nullspace will be DIM_EMBEDDING - DIM_INBETWEEN)"
+  echo -e "\t-p PHONES_PATH (Path to the file containing phonemes for the entire dataset)"
   echo -e "OPTIONAL FLAGS:"
-  echo -e "\t-s FROM_STEP (From which step do you want to start. Order: $SPEAKERS -> $PHONEMES -> $SPEAKERS_NULLSPACE)"
-  echo -e "\t-p PHONES_PATH (Path to the file containing phonemes for the entire dataset. You don't need it if you start from $SPEAKERS_NULLSPACE)"
+  echo -e "\t-s FROM_STEP (From which step do you want to start. Order: $SPEAKERS [default] -> $PHONEMES -> $SPEAKERS_NULLSPACE)"
   echo -e "\t-f audio files format in LibriSpeech dataset (without a dot)"
 }
 
-while getopts 'd:t:v:c:o:n:f:p:' flag; do
+while getopts 'd:t:v:c:o:n:s:p:f' flag; do
     case "${flag}" in
         d) DATASET_PATH="${OPTARG}" ;;
         t) TRAIN_SET="${OPTARG}" ;;
diff --git a/scripts/create_ls_dataset_for_abx_eval.py b/scripts/create_ls_dataset_for_abx_eval.py
new file mode 100644
index 0000000..92b77da
--- /dev/null
+++ b/scripts/create_ls_dataset_for_abx_eval.py
@@ -0,0 +1,55 @@
+import os
+import sys
+import shutil
+import argparse
+from pathlib import Path
+import numpy as np
+import soundfile as sf
+
+def parse_args():
+    # Run parameters
+    parser = argparse.ArgumentParser()
+    parser.add_argument("librispeech_path", type=str,
+                        help="Path to the root directory of LibriSpeech.")
+    parser.add_argument("zerospeech_dataset_path", type=str,
+                        help="Path to the ZeroSpeech dataset.")
+    parser.add_argument("target_path", type=str,
+                        help="Path to the output directory.")
+    parser.add_argument("--file_extension", type=str, default="flac",
+                          help="Extension of the audio files in the dataset (default: flac).")
+    return parser.parse_args()
+
+def main():
+    # Parse and print args
+    args = parse_args()
+    #logger.info(args)
+
+    phonetic = "phonetic"
+    datasets = ["dev-clean", "dev-other", "test-clean", "test-other"]
+
+    for dataset in datasets:
+        print("> {}".format(dataset))
+        target_dirname = os.path.join(args.target_path, phonetic, dataset)
+        Path(target_dirname).mkdir(parents=True, exist_ok=True)
+
+        librispeech_dirname = os.path.join(args.librispeech_path, dataset)
+        files = [(filename, dirname) for dirname, _, files in os.walk(librispeech_dirname, followlinks=True) for filename in files if filename.endswith(args.file_extension)]
+        for i, (filename, dirname) in enumerate(files):
+            print("Progress {:2.1%}".format(i / len(files)), end="\r")
+            input_path = os.path.join(dirname, filename)
+            output_path = os.path.join(target_dirname, os.path.splitext(filename)[0] + ".wav")
+            data, sample_rate = sf.read(input_path)
+            sf.write(output_path, data, sample_rate)
+
+        if dataset.startswith("dev"):
+            source_item_path = os.path.join(args.zerospeech_dataset_path, phonetic, dataset, dataset + ".item")
+            target_item_path = os.path.join(target_dirname, dataset + ".item")
+            shutil.copy(source_item_path, target_item_path)
+
+
+if __name__ == "__main__":
+    #import ptvsd
+    #ptvsd.enable_attach(('0.0.0.0', 7310))
+    #print("Attach debugger now")
+    #ptvsd.wait_for_attach()
+    main()
\ No newline at end of file
diff --git a/scripts/eval_abx.sh b/scripts/eval_abx.sh
index 07cd2ed..b8d8a87 100755
--- a/scripts/eval_abx.sh
+++ b/scripts/eval_abx.sh
@@ -14,12 +14,12 @@ AUDIO_FORMAT=flac
 
 print_usage() {
   echo -e "Usage: ./eval_abx.sh"
-  echo -e "\t-d DATASET_PATH"
+  echo -e "\t-d DATASET_PATH (Either ZEROSPEECH_DATASET_PATH or LIBRISPEECH_FLATTENED_DATASET_PATH [Or anything that has directory structure of these two with dev-*.item files from ZEROSPEECH_DATASET_PATH])"
   echo -e "\t-r ORIGINAL_DATASET_PATH"
   echo -e "\t-c CHECKPOINT_PATH"
   echo -e "\t-o OUTPUT_DIR"
   echo -e "OPTIONAL FLAGS:"
-  echo -e "\t-n (Load a model with nullspace)"
+  echo -e "\t-n (Provide this flag if you want to load a model with nullspace)"
   echo -e "\t-a CONDA_PATH"
   echo -e "\t-e CPC_ENV"
   echo -e "\t-z ZEROSPEECH_EVAL_ENV (The conda environment where the zerospeech2021-evaluate is installed)"
@@ -27,7 +27,7 @@ print_usage() {
   echo -e "\t-f audio files format in LibriSpeech dataset (without a dot)"
 }
 
-while getopts 'd:r:c:o:na:e:z:t' flag; do
+while getopts 'd:r:c:o:n:a:e:z:t:f' flag; do
     case "${flag}" in
         d) DATASET_PATH="${OPTARG}" ;;
         r) ORIGINAL_DATASET_PATH="${OPTARG}" ;;

From 35bce6e71f6550491f6652113b49d6f043093688 Mon Sep 17 00:00:00 2001
From: Piotr Pusz <petropusz@gmail.com>
Date: Tue, 6 Apr 2021 16:17:38 +0200
Subject: [PATCH 06/10] forgotten and badly merged changes from Jarek's commit

---
 finetune_nullspace.sh     | 2 +-
 scripts/embeddings_abx.py | 5 -----
 scripts/eval_abx.sh       | 2 +-
 3 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/finetune_nullspace.sh b/finetune_nullspace.sh
index 2463c7c..892213c 100755
--- a/finetune_nullspace.sh
+++ b/finetune_nullspace.sh
@@ -26,7 +26,7 @@ print_usage() {
   echo -e "\t-f audio files format in LibriSpeech dataset (without a dot)"
 }
 
-while getopts 'd:t:v:c:o:n:s:p:f' flag; do
+while getopts 'd:t:v:c:o:n:s:p:f:' flag; do
     case "${flag}" in
         d) DATASET_PATH="${OPTARG}" ;;
         t) TRAIN_SET="${OPTARG}" ;;
diff --git a/scripts/embeddings_abx.py b/scripts/embeddings_abx.py
index 8e68a8c..4b6ab62 100644
--- a/scripts/embeddings_abx.py
+++ b/scripts/embeddings_abx.py
@@ -1,9 +1,4 @@
 #!/usr/bin/env python3 -u
-# !/usr/bin/env python3 -u
-# Copyright (c) Facebook, Inc. and its affiliates.
-#
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
 
 import logging
 import os
diff --git a/scripts/eval_abx.sh b/scripts/eval_abx.sh
index b8d8a87..e9cdc1f 100755
--- a/scripts/eval_abx.sh
+++ b/scripts/eval_abx.sh
@@ -27,7 +27,7 @@ print_usage() {
   echo -e "\t-f audio files format in LibriSpeech dataset (without a dot)"
 }
 
-while getopts 'd:r:c:o:n:a:e:z:t:f' flag; do
+while getopts 'd:r:c:o:n:a:e:z:t:f:' flag; do
     case "${flag}" in
         d) DATASET_PATH="${OPTARG}" ;;
         r) ORIGINAL_DATASET_PATH="${OPTARG}" ;;

From 6a3630e8e55da650ff75924c3f3b4420a28455f7 Mon Sep 17 00:00:00 2001
From: Piotr Pusz <petropusz@gmail.com>
Date: Tue, 6 Apr 2021 16:36:51 +0200
Subject: [PATCH 07/10] another bash args fix

---
 scripts/eval_abx.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/eval_abx.sh b/scripts/eval_abx.sh
index e9cdc1f..ef1e8f6 100755
--- a/scripts/eval_abx.sh
+++ b/scripts/eval_abx.sh
@@ -27,7 +27,7 @@ print_usage() {
   echo -e "\t-f audio files format in LibriSpeech dataset (without a dot)"
 }
 
-while getopts 'd:r:c:o:n:a:e:z:t:f:' flag; do
+while getopts 'd:r:c:o:na:e:z:tf:' flag; do
     case "${flag}" in
         d) DATASET_PATH="${OPTARG}" ;;
         r) ORIGINAL_DATASET_PATH="${OPTARG}" ;;

From 8e0c7ad0d21d03f32fbf1e5cc89460d9da4e05cf Mon Sep 17 00:00:00 2001
From: Piotr Pusz <petropusz@gmail.com>
Date: Wed, 7 Apr 2021 00:28:34 +0200
Subject: [PATCH 08/10] flattened LS file extension fix and --force_gpu for
 nullspace ABX eval because of GPU overflow

---
 scripts/create_ls_dataset_for_abx_eval.py | 2 +-
 scripts/eval_abx.sh                       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/create_ls_dataset_for_abx_eval.py b/scripts/create_ls_dataset_for_abx_eval.py
index 92b77da..3f04770 100644
--- a/scripts/create_ls_dataset_for_abx_eval.py
+++ b/scripts/create_ls_dataset_for_abx_eval.py
@@ -37,7 +37,7 @@ def main():
         for i, (filename, dirname) in enumerate(files):
             print("Progress {:2.1%}".format(i / len(files)), end="\r")
             input_path = os.path.join(dirname, filename)
-            output_path = os.path.join(target_dirname, os.path.splitext(filename)[0] + ".wav")
+            output_path = os.path.join(target_dirname, os.path.splitext(filename)[0] + "." + args.file_extension)
             data, sample_rate = sf.read(input_path)
             sf.write(output_path, data, sample_rate)
 
diff --git a/scripts/eval_abx.sh b/scripts/eval_abx.sh
index ef1e8f6..5c52534 100755
--- a/scripts/eval_abx.sh
+++ b/scripts/eval_abx.sh
@@ -132,7 +132,7 @@ EOF
         #zerospeech2021-evaluate -j 12 -o $results/$metric/$i --no-lexical --no-syntactic --no-semantic $DATASET_PATH $embeddings/$i
         #zerospeech2021-evaluate -j 12 -o $results/$metric/$i --force-cpu --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i
         #zerospeech2021-evaluate -j 20 -o $results/$metric/$i --force-cpu --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i
-        zerospeech2021-evaluate -j 20 -o $results/$metric/$i --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i
+        zerospeech2021-evaluate --force-cpu -j 20 -o $results/$metric/$i --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i
     done
 done
 

From 8406a1a75ae320f12c370f444534e715a0481ee0 Mon Sep 17 00:00:00 2001
From: Piotr Pusz <petropusz@gmail.com>
Date: Wed, 7 Apr 2021 12:04:51 +0200
Subject: [PATCH 09/10] fixing bug I made in eval_abx

---
 finetune_nullspace.sh | 2 +-
 scripts/eval_abx.sh   | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/finetune_nullspace.sh b/finetune_nullspace.sh
index 892213c..5d1ec30 100755
--- a/finetune_nullspace.sh
+++ b/finetune_nullspace.sh
@@ -23,7 +23,7 @@ print_usage() {
   echo -e "\t-p PHONES_PATH (Path to the file containing phonemes for the entire dataset)"
   echo -e "OPTIONAL FLAGS:"
   echo -e "\t-s FROM_STEP (From which step do you want to start. Order: $SPEAKERS [default] -> $PHONEMES -> $SPEAKERS_NULLSPACE)"
-  echo -e "\t-f audio files format in LibriSpeech dataset (without a dot)"
+  echo -e "\t-f audio files format in -d dataset (without a dot)"
 }
 
 while getopts 'd:t:v:c:o:n:s:p:f:' flag; do
diff --git a/scripts/eval_abx.sh b/scripts/eval_abx.sh
index 5c52534..de6247f 100755
--- a/scripts/eval_abx.sh
+++ b/scripts/eval_abx.sh
@@ -24,7 +24,7 @@ print_usage() {
   echo -e "\t-e CPC_ENV"
   echo -e "\t-z ZEROSPEECH_EVAL_ENV (The conda environment where the zerospeech2021-evaluate is installed)"
   echo -e "\t-t (Do not compute embeddings for test set)"
-  echo -e "\t-f audio files format in LibriSpeech dataset (without a dot)"
+  echo -e "\t-f audio files format in -d dataset (without a dot)"
 }
 
 while getopts 'd:r:c:o:na:e:z:tf:' flag; do
@@ -95,7 +95,7 @@ do
         for file in `ls $embeddings/$i/phonetic/$directory` 
         do 
             filename_no_ext="${file%.*}" 
-            if [[ ! -f "$ORIGINAL_DATASET_PATH/phonetic/$directory/${filename_no_ext}.$AUDIO_FORMAT" ]] 
+            if [[ ! -f "$ORIGINAL_DATASET_PATH/phonetic/$directory/${filename_no_ext}.wav" ]] # here ALWAYS wav as it is for ZS dataset 
             then 
                 rm $embeddings/$i/phonetic/$directory/$file 
             fi
@@ -146,4 +146,6 @@ do
     done
 done > $OUTPUT_DIR/combined_results.txt
 
-conda activate $SAVED_ENV
\ No newline at end of file
+if [ $SAVED_ENV != None ]; then
+    conda activate $SAVED_ENV
+fi
\ No newline at end of file

From 87d5739abe77b9277720767b1d9b086ae863633e Mon Sep 17 00:00:00 2001
From: Piotr Pusz <petropusz@gmail.com>
Date: Wed, 7 Apr 2021 18:43:38 +0200
Subject: [PATCH 10/10] fixed pushing to closest centers for
 linear_separability with nullspace - as only c_feature are being projected

---
 cpc/eval/linear_separability.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/cpc/eval/linear_separability.py b/cpc/eval/linear_separability.py
index 36a1011..20c065c 100644
--- a/cpc/eval/linear_separability.py
+++ b/cpc/eval/linear_separability.py
@@ -39,7 +39,8 @@ def train_step(feature_maker, criterion, data_loader, optimizer, label_key="spea
         if centerpushSettings:
             centers, pushDeg = centerpushSettings
             c_feature = utils.pushToClosestForBatch(c_feature, centers, deg=pushDeg)
-            encoded_data = utils.pushToClosestForBatch(encoded_data, centers, deg=pushDeg)
+            # [!] ONLY c_features are projected into nullspace, so encoded_data is of no use with nullspace currently
+            #encoded_data = utils.pushToClosestForBatch(encoded_data, centers, deg=pushDeg)
         all_losses, all_acc = criterion(c_feature, encoded_data, label)
 
         totLoss = all_losses.sum()
@@ -70,7 +71,8 @@ def val_step(feature_maker, criterion, data_loader, label_key="speaker", centerp
             if centerpushSettings:
                 centers, pushDeg = centerpushSettings
                 c_feature = utils.pushToClosestForBatch(c_feature, centers, deg=pushDeg)
-                encoded_data = utils.pushToClosestForBatch(encoded_data, centers, deg=pushDeg)
+                # [!] ONLY c_features are projected into nullspace, so encoded_data is of no use with nullspace currently
+                #encoded_data = utils.pushToClosestForBatch(encoded_data, centers, deg=pushDeg)
             all_losses, all_acc = criterion(c_feature, encoded_data, label)
 
             logs["locLoss_val"] += np.asarray([all_losses.mean().item()])