diff --git a/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm b/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm
index 70865465e9..b0edf65793 100755
--- a/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm
+++ b/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm
@@ -134,10 +134,10 @@ our $ENSEMBL_GENOMES_PORT = "4157";
 our $ENSEMBL_GENOMES_USER = "anonymous";
 
 ## Vertebrates
-our $HOMO_SAPIENS_CORE = "homo_sapiens_core_110_38";
-our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_110_38";
-our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_110_38";
-our $HOMO_SAPIENS_COMPARA = "homo_sapiens_compara_110_38";
+our $HOMO_SAPIENS_CORE = "homo_sapiens_core_111_38";
+our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_111_38";
+our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_111_38";
+our $HOMO_SAPIENS_COMPARA = "homo_sapiens_compara_111_38";
 #our $HOMO_SAPIENS_CORE = "homo_sapiens_core_78_38";
 #our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_78_38";
 #our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_78_38";
diff --git a/cellbase-app/app/scripts/ensembl-scripts/protein_function_prediction_matrices.pl b/cellbase-app/app/scripts/ensembl-scripts/protein_function_prediction_matrices.pl
index de55722396..b1f4004f2c 100755
--- a/cellbase-app/app/scripts/ensembl-scripts/protein_function_prediction_matrices.pl
+++ b/cellbase-app/app/scripts/ensembl-scripts/protein_function_prediction_matrices.pl
@@ -6,6 +6,10 @@
 use Digest::MD5 qw(md5 md5_hex md5_base64);
 use JSON;
 
+#use lib "~/appl/cellbase/build/scripts/ensembl-scripts/";
+#use lib "~/soft/ensembl-variation/modules/";
+#use lib "~/soft/ensembl/modules/";
+
 use DB_CONFIG;
 
 my $species = 'Homo sapiens';
@@ -87,6 +91,37 @@
 #}
 #print join("=", $polyphen2->get_prediction(1, 'G'))."\n";
 
+##################################################################
+
+# Get the current time
+my ($sec, $min, $hour, $mday, $mon, $year) = localtime();
+# Adjust the year and month values (year is years since 1900, and month is 0-based)
+
+$year += 1900;
+$mon += 1;
+
+# Format the date and time
+my $formatted_date = sprintf("%04d%02d%02d_%02d%02d%02d", $year, $mon, $mday, $hour, $min, $sec);
+
+my $jsonVersion = {};
+$jsonVersion->{"date"} = $formatted_date;
+$jsonVersion->{"data"} = "protein_substitution_predictions";
+$jsonVersion->{"version"} = "Ensembl 104";
+my @urls = ();
+push @urls, "ensembldb.ensembl.org:3306";
+$jsonVersion->{"url"} = \@urls;
+
+print "Generating the JSON file for the Sift version.\n";
+$jsonVersion->{"name"} = "sift";
+open(FILE, ">".$outdir."/siftVersion.json") || die "error opening file\n";
+print FILE to_json($jsonVersion) . "\n";
+close(FILE);
+
+print "Generating the JSON file for the PolyPhen version\n";
+$jsonVersion->{"name"} = "polyphen";
+open(FILE, ">".$outdir."/polyphenVersion.json") || die "error opening file\n";
+print FILE to_json($jsonVersion) . "\n";
+close(FILE);
 
 my ($translation, $seq, $md5seq, @preds, @all_predictions);
 #my @transcripts = @{$transcript_adaptor->fetch_all_by_biotype('protein_coding')};
@@ -126,42 +161,56 @@
 
 	        ## HASH ##
 			my $effect = {};
+			$effect->{"chromosome"} = $trans->seq_region_name;
 	        $effect->{"transcriptId"} = $trans->stable_id;
-	        $effect->{"checksum"} = $md5seq;
-	        $effect->{"size"} = length($seq);
 
 	        foreach my $u (@{ $trans->get_all_xrefs('Uniprot/SWISSPROT') }){
 		        $effect->{"uniprotId"} = $u->display_id();
 	        }
 
+			$effect->{"source"} = "polyphen";
 	        my $polyphen2 = $prot_function_adaptor->fetch_polyphen_predictions_by_translation_md5($md5seq);
-			for(my $i=1; $i<=length($seq); $i++) {
-				foreach (my $j=0; $j < @aa_code; $j++) {
-					if(defined $polyphen2) {
+			if(defined $polyphen2) {
+				for(my $i=1; $i<=length($seq); $i++) {
+					$effect->{"aaPosition"} = $i;
+					my @scores = ();
+					foreach (my $j=0; $j < @aa_code; $j++) {
 						@preds = $polyphen2->get_prediction($i, $aa_code[$j]);
-						$effect->{"aaPositions"}->{$i}->{$aa_code[$j]}->{"pe"} = $effect_code{$preds[0]};
-						$effect->{"aaPositions"}->{$i}->{$aa_code[$j]}->{"ps"} = $preds[1];
+						if(defined $preds[0] || defined $preds[1]) {
+							push @scores, {"aaAlternate" => $aa_code[$j], "score" => $preds[1], "effect" => $preds[0]};
+							$effect->{"scores"} = \@scores;
+						}
+					}
+					if(@scores) {
+						print FILE to_json($effect)."\n";
 					}
 				}
 			}
 
-			my $sift = $prot_function_adaptor->fetch_sift_predictions_by_translation_md5($md5seq);
-			for(my $i=1; $i<=length($seq); $i++) {
-	            foreach (my $j=0; $j < @aa_code; $j++) {
-	            	if(defined $sift) {
-	            		@preds = $sift->get_prediction($i, $aa_code[$j]);
-						$effect->{"aaPositions"}->{$i}->{$aa_code[$j]}->{"se"} = $effect_code{$preds[0]};
-						$effect->{"aaPositions"}->{$i}->{$aa_code[$j]}->{"ss"} = $preds[1];
-	            	}
-	            }
-	        }
-			print FILE to_json($effect)."\n";
+			$effect->{"source"} = "sift";
+	        my $sift = $prot_function_adaptor->fetch_sift_predictions_by_translation_md5($md5seq);
+			if(defined $sift) {
+				for(my $i=1; $i<=length($seq); $i++) {
+					$effect->{"aaPosition"} = $i;
+					my @scores = ();
+					foreach (my $j=0; $j < @aa_code; $j++) {
+						@preds = $sift->get_prediction($i, $aa_code[$j]);
+						if(defined $preds[0] || defined $preds[1]) {
+							push @scores, {"aaAlternate" => $aa_code[$j], "score" => $preds[1], "effect" => $preds[0]};
+							$effect->{"scores"} = \@scores;
+						}
+					}
+					if(@scores) {
+						print FILE to_json($effect)."\n";
+					}
+				}
+			}
 		}
 	}
 	close(FILE);
 
 	## GZip output to save space in Amazon AWS
-#	exec("gzip prot_func_pred_chr_".$chrom->seq_region_name);
+	exec("gzip " . $outdir . "/prot_func_pred_chr_" . $chr->seq_region_name . ".json");
 }
 
 sub print_parameters {
diff --git a/cellbase-app/app/scripts/mirtarbase/fix-gene-symbol.sh b/cellbase-app/app/scripts/mirtarbase/fix-gene-symbol.sh
new file mode 100755
index 0000000000..38c7d1efa2
--- /dev/null
+++ b/cellbase-app/app/scripts/mirtarbase/fix-gene-symbol.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+# The original MirTarBase hsa_MTI.xlsx contains invalid Gene Symbols in 793 lines.
+# To fix it, that file has to be converted to a CSV file, i.e.: hsa_MTI.csv
+#
+# After converting to CSV file, we can see the errors from the original file for the Gene Symbols (column 4),
+# e.g.: 06-mar:
+# MIRT050267,hsa-miR-25-3p,Homo sapiens,06-mar,10299,Homo sapiens,CLASH,Functional MTI (Weak),23622248
+# MIRT051174,hsa-miR-16-5p,Homo sapiens,06-mar,10299,Homo sapiens,CLASH,Functional MTI (Weak),23622248
+#
+# This script fix those lines and convert the column 4 for a vaild Gene Symbol:
+#
+# MIRT050267,hsa-miR-25-3p,Homo sapiens,MARCHF6,10299,Homo sapiens,CLASH,Functional MTI (Weak),23622248
+# MIRT051174,hsa-miR-16-5p,Homo sapiens,MARCHF6,10299,Homo sapiens,CLASH,Functional MTI (Weak),23622248
+
+# Check the parameters number
+if [ "$#" -ne 1 ]; then
+    echo "Usage: $0 <csv_file>"
+    exit 1
+fi
+
+# Check CSV file
+csv_file="$1"
+if [ ! -f "$csv_file" ]; then
+    echo "CSV file '$csv_file' does not exist."
+    exit 1
+fi
+
+# Fix gene-symbol
+while IFS=$'\t' read -r c1 c2 c3 c4 c5 c6 c7 c8 c9 || [[ -n "$c1" ]]; do
+    # Aplica las condiciones
+    if [ "$c5" = "10299" ]; then
+        c4="MARCHF6"
+    elif [ "$c5" = "51257" ]; then
+        c4="MARCHF2"
+    elif [ "$c5" = "54708" ]; then
+        c4="MARCHF5"
+    elif [ "$c5" = "54996" ]; then
+        c4="MTARC2"
+    elif [ "$c5" = "55016" ]; then
+        c4="MARCHF1"
+    elif [ "$c5" = "57574" ]; then
+        c4="MARCHF4"
+    elif [ "$c5" = "64757" ]; then
+        c4="MTARC1"
+    elif [ "$c5" = "64844" ]; then
+        c4="MARCHF7"
+    elif [ "$c5" = "92979" ]; then
+        c4="MARCHF9"
+    elif [ "$c5" = "115123" ]; then
+        c4="MARCHF3"
+    elif [ "$c5" = "220972" ]; then
+        c4="MARCHF8"
+    elif [ "$c5" = "441061" ]; then
+        c4="MARCHF11"
+    fi
+
+    # Print line
+    echo -e "$c1\t$c2\t$c3\t$c4\t$c5\t$c6\t$c7\t$c8\t$c9"
+done < "$csv_file"
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java
index 6049ef9b4b..4f830c6e43 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java
@@ -19,11 +19,14 @@
 import com.beust.jcommander.*;
 import org.opencb.cellbase.app.cli.CliOptionsParser;
 import org.opencb.cellbase.core.api.key.ApiKeyQuota;
+import org.opencb.cellbase.lib.EtlCommons;
 
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
 /**
  * Created by imedina on 03/02/15.
  */
@@ -87,12 +90,15 @@ public class DownloadCommandOptions {
         @ParametersDelegate
         public SpeciesAndAssemblyCommandOptions speciesAndAssemblyOptions = speciesAndAssemblyCommandOptions;
 
-        @Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to download: genome, gene, "
-                + "variation, variation_functional_score, regulation, protein, conservation, "
-                + "clinical_variants, repeats, svs, pubmed and 'all' to download everything", required = true, arity = 1)
+        @Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to download: " + GENOME_DATA + "," + GENE_DATA
+                + "," + VARIATION_FUNCTIONAL_SCORE_DATA + "," + REGULATION_DATA + "," + PROTEIN_DATA + "," + CONSERVATION_DATA + ","
+                + CLINICAL_VARIANT_DATA + "," + REPEATS_DATA + "," + ONTOLOGY_DATA + "," + PUBMED_DATA + "," + PHARMACOGENOMICS_DATA
+                + "," + PGS_DATA + "," + REVEL_DATA + "," + ALPHAMISSENSE_DATA + "; or use 'all' to download everything", required = true,
+                arity = 1)
         public String data;
 
-        @Parameter(names = {"-o", "--outdir"}, description = "Downloaded files will be saved in this directory.", required = true, arity = 1)
+        @Parameter(names = {"-o", "--outdir"}, description = "Downloaded files will be saved in this directory.", required = true,
+                arity = 1)
         public String outputDirectory;
     }
 
@@ -102,9 +108,11 @@ public class BuildCommandOptions {
         @ParametersDelegate
         public CommonCommandOptions commonOptions = commonCommandOptions;
 
-        @Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to build: genome, genome_info, "
-                + "gene, variation, variation_functional_score, regulation, protein, ppi, conservation, drug, "
-                + "clinical_variants, repeats, svs, splice_score, pubmed. 'all' builds everything.", required = true, arity = 1)
+        @Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to build: " + GENOME_DATA + "," + GENE_DATA + ","
+                + VARIATION_FUNCTIONAL_SCORE_DATA + "," + REGULATION_DATA + "," + PROTEIN_DATA + "," + CONSERVATION_DATA + ","
+                + CLINICAL_VARIANT_DATA + "," + REPEATS_DATA + "," + ONTOLOGY_DATA + "," + SPLICE_SCORE_DATA + "," + PUBMED_DATA + ","
+                + PHARMACOGENOMICS_DATA + "," + PGS_DATA + "," + REVEL_DATA + "," + ALPHAMISSENSE_DATA + "; or use 'all' to build"
+                + " everything", required = true, arity = 1)
         public String data;
 
         @Parameter(names = {"-s", "--species"}, description = "Name of the species to be built, valid formats include 'Homo sapiens' or 'hsapiens'", required = false, arity = 1)
@@ -190,8 +198,9 @@ public class LoadCommandOptions {
         public CommonCommandOptions commonOptions = commonCommandOptions;
 
         @Parameter(names = {"-d", "--data"}, description = "Data model type to be loaded: genome, gene, variation,"
-                + " conservation, regulation, protein, clinical_variants, repeats, regulatory_pfm, splice_score, pubmed, pharmacogenomics."
-                + " 'all' loads everything", required = true, arity = 1)
+                + " conservation, regulation, protein, clinical_variants, repeats, regulatory_pfm, splice_score, pubmed, pharmacogenomics,"
+                + " protein_functional_prediction, missense_variation_functional_score, alphamissense; and 'all' loads everything",
+                required = true, arity = 1)
         public String data;
 
         @Parameter(names = {"-i", "--input"}, required = true, arity = 1,
@@ -237,8 +246,8 @@ public class ExportCommandOptions {
         public CommonCommandOptions commonOptions = commonCommandOptions;
 
         @Parameter(names = {"-d", "--data"}, description = "Data model type to be loaded: genome, gene, variation, "
-                + "conservation, regulation, protein, clinical_variants, repeats, regulatory_pfm, splice_score, pubmed. 'all' "
-                + " loads everything", required = true, arity = 1)
+                + EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA + ", conservation, regulation, protein, clinical_variants, repeats,"
+                + " regulatory_pfm, splice_score, pubmed. 'all' export everything", required = true, arity = 1)
         public String data;
 
         @Parameter(names = {"--db", "--database"}, description = "Database name, e.g., cellbase_hsapiens_grch38_v5", required = true,
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminMain.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminMain.java
index 10c43d637c..fecf57c08a 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminMain.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminMain.java
@@ -98,10 +98,10 @@ public static void main(String[] args) {
                     commandExecutor.execute();
                 } catch (IOException | URISyntaxException | CellBaseException e) {
                     commandExecutor.getLogger().error("Error: " + e.getMessage());
+                    e.printStackTrace();
                     System.exit(1);
                 }
             }
         }
     }
-
 }
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java
index 8c0d477023..380cbdaaba 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java
@@ -17,11 +17,14 @@
 package org.opencb.cellbase.app.cli.admin.executors;
 
 import com.beust.jcommander.ParameterException;
-import org.apache.commons.lang.StringUtils;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectReader;
+import org.apache.commons.lang3.StringUtils;
 import org.opencb.cellbase.app.cli.CommandExecutor;
 import org.opencb.cellbase.app.cli.admin.AdminCliOptionsParser;
 import org.opencb.cellbase.core.config.SpeciesConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.models.DataSource;
 import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
 import org.opencb.cellbase.core.serializer.CellBaseJsonFileSerializer;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
@@ -33,12 +36,16 @@
 
 import java.io.File;
 import java.io.IOException;
-import java.nio.file.*;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.StandardCopyOption;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 
-import static org.opencb.cellbase.lib.EtlCommons.PHARMGKB_DATA;
+import static org.opencb.cellbase.core.utils.SpeciesUtils.getSpeciesShortname;
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
 /**
  * Created by imedina on 03/02/15.
@@ -51,11 +58,16 @@ public class BuildCommandExecutor extends CommandExecutor {
     private Path downloadFolder = null; // <output>/<species>_<assembly>/download
     private boolean normalize = true;
 
-    private File ensemblScriptsFolder;
+    private SpeciesConfiguration.Assembly assembly;
+    private String ensemblRelease;
 
     private boolean flexibleGTFParsing;
     private SpeciesConfiguration speciesConfiguration;
 
+    private static final List<String> VALID_SOURCES_TO_BUILD = Arrays.asList(GENOME_DATA, GENE_DATA, VARIATION_FUNCTIONAL_SCORE_DATA,
+            REGULATION_DATA, PROTEIN_DATA, CONSERVATION_DATA, CLINICAL_VARIANT_DATA, REPEATS_DATA, ONTOLOGY_DATA, SPLICE_SCORE_DATA,
+            PUBMED_DATA, PHARMACOGENOMICS_DATA, REVEL_DATA, ALPHAMISSENSE_DATA, PGS_DATA);
+
     public BuildCommandExecutor(AdminCliOptionsParser.BuildCommandOptions buildCommandOptions) {
         super(buildCommandOptions.commonOptions.logLevel, buildCommandOptions.commonOptions.conf);
 
@@ -63,16 +75,20 @@ public BuildCommandExecutor(AdminCliOptionsParser.BuildCommandOptions buildComma
         this.output = Paths.get(buildCommandOptions.outputDirectory);
         normalize = !buildCommandOptions.skipNormalize;
 
-        this.ensemblScriptsFolder = new File(System.getProperty("basedir") + "/bin/ensembl-scripts/");
         this.flexibleGTFParsing = buildCommandOptions.flexibleGTFParsing;
     }
 
-
     /**
      * Parse specific 'build' command options.
+     *
+     * @throws CellBaseException Exception
      */
-    public void execute() {
+    public void execute() throws CellBaseException {
+        String data = null;
         try {
+            // Check data sources
+            List<String> dataList = checkDataSources();
+
             // Output directory need to be created if it doesn't exist
             if (!Files.exists(output)) {
                 Files.createDirectories(output);
@@ -82,7 +98,9 @@ public void execute() {
             if (speciesConfiguration == null) {
                 throw new CellBaseException("Invalid species: '" + buildCommandOptions.species + "'");
             }
-            SpeciesConfiguration.Assembly assembly = null;
+
+            SpeciesConfiguration.Assembly assembly;
+
             if (!StringUtils.isEmpty(buildCommandOptions.assembly)) {
                 assembly = SpeciesUtils.getAssembly(speciesConfiguration, buildCommandOptions.assembly);
                 if (assembly == null) {
@@ -92,7 +110,10 @@ public void execute() {
                 assembly = SpeciesUtils.getDefaultAssembly(speciesConfiguration);
             }
 
-            String spShortName = SpeciesUtils.getSpeciesShortname(speciesConfiguration);
+            String ensemblVersion = assembly.getEnsemblVersion();
+            ensemblRelease = "release-" + ensemblVersion.split("_")[0];
+
+            String spShortName = getSpeciesShortname(speciesConfiguration);
             String spAssembly = assembly.getName().toLowerCase();
             Path spFolder = output.resolve(spShortName + "_" + spAssembly);
             // <output>/<species>_<assembly>/download
@@ -106,262 +127,211 @@ public void execute() {
                 makeDir(buildFolder);
             }
 
-            if (buildCommandOptions.data != null) {
-                String[] buildOptions;
-                if (buildCommandOptions.data.equals("all")) {
-                    buildOptions = speciesConfiguration.getData().toArray(new String[0]);
-                } else {
-                    buildOptions = buildCommandOptions.data.split(",");
+            CellBaseBuilder parser;
+            for (int i = 0; i < dataList.size(); i++) {
+                data = dataList.get(i);
+                switch (data) {
+                    case GENOME_DATA:
+                        parser = buildGenomeSequence();
+                        break;
+                    case GENE_DATA:
+                        parser = buildGene();
+                        break;
+                    case VARIATION_FUNCTIONAL_SCORE_DATA:
+                        parser = buildCadd();
+                        break;
+                    case REVEL_DATA:
+                        parser = buildRevel();
+                        break;
+                    case REGULATION_DATA:
+                        parser = buildRegulation();
+                        break;
+                    case PROTEIN_DATA:
+                        parser = buildProtein();
+                        break;
+                    case CONSERVATION_DATA:
+                        parser = buildConservation();
+                        break;
+                    case CLINICAL_VARIANT_DATA:
+                        parser = buildClinicalVariants();
+                        break;
+                    case REPEATS_DATA:
+                        parser = buildRepeats();
+                        break;
+                    case ONTOLOGY_DATA:
+                        parser = buildObo();
+                        break;
+                    case SPLICE_SCORE_DATA:
+                        parser = buildSplice();
+                        break;
+                    case PUBMED_DATA:
+                        parser = buildPubMed();
+                        break;
+                    case PHARMACOGENOMICS_DATA:
+                        parser = buildPharmacogenomics();
+                        break;
+                    case PGS_DATA:
+                        parser = buildPolygenicScores();
+                        break;
+                    case ALPHAMISSENSE_DATA:
+                        parser = buildAlphaMissense();
+                        break;
+                    default:
+                        throw new IllegalArgumentException("Value '" + data + "' is not allowed for the data parameter."
+                                + " Valid values are: " + StringUtils.join(VALID_SOURCES_TO_BUILD, ",") + "; or use 'all' to build"
+                                + " everything");
                 }
 
-                for (int i = 0; i < buildOptions.length; i++) {
-                    String buildOption = buildOptions[i];
-
-                    logger.info("Building '{}' data", buildOption);
-                    CellBaseBuilder parser = null;
-                    switch (buildOption) {
-//                        case EtlCommons.GENOME_INFO_DATA:
-//                            buildGenomeInfo();
-//                            break;
-                        case EtlCommons.GENOME_DATA:
-                            parser = buildGenomeSequence();
-                            break;
-                        case EtlCommons.GENE_DATA:
-                            parser = buildGene();
-                            break;
-                        case EtlCommons.REFSEQ_DATA:
-                            parser = buildRefSeq();
-                            break;
-                        case EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA:
-                            parser = buildCadd();
-                            break;
-                        case EtlCommons.MISSENSE_VARIATION_SCORE_DATA:
-                            parser = buildRevel();
-                            break;
-                        case EtlCommons.REGULATION_DATA:
-                            parser = buildRegulation();
-                            break;
-                        case EtlCommons.PROTEIN_DATA:
-                            parser = buildProtein();
-                            break;
-//                        case EtlCommons.PPI_DATA:
-//                            parser = getInteractionParser();
-//                            break;
-                        case EtlCommons.CONSERVATION_DATA:
-                            parser = buildConservation();
-                            break;
-                        case EtlCommons.CLINICAL_VARIANTS_DATA:
-                            parser = buildClinicalVariants();
-                            break;
-                        case EtlCommons.REPEATS_DATA:
-                            parser = buildRepeats();
-                            break;
-                        case EtlCommons.OBO_DATA:
-                            parser = buildObo();
-                            break;
-                        case EtlCommons.SPLICE_SCORE_DATA:
-                            parser = buildSplice();
-                            break;
-                        case EtlCommons.PUBMED_DATA:
-                            parser = buildPubMed();
-                            break;
-                        case EtlCommons.PHARMACOGENOMICS_DATA:
-                            parser = buildPharmacogenomics();
-                            break;
-                        default:
-                            logger.error("Build option '" + buildCommandOptions.data + "' is not valid");
-                            break;
-                    }
-
-                    if (parser != null) {
-                        try {
-                            parser.parse();
-                        } catch (Exception e) {
-                            logger.error("Error executing 'build' command " + buildCommandOptions.data + ": " + e.getMessage(), e);
-                        }
-                        parser.disconnect();
-                    }
+                if (parser != null) {
+                    parser.parse();
+                    parser.disconnect();
                 }
             }
-        } catch (ParameterException e) {
-            logger.error("Error parsing build command line parameters: " + e.getMessage(), e);
-        } catch (IOException | CellBaseException e) {
-            logger.error(e.getMessage());
+        } catch (Exception e) {
+            String msg = "Error executing the command 'build'";
+            if (StringUtils.isNotEmpty(data)) {
+                msg += ". The last data being built was '" + data + "'";
+            }
+            throw new CellBaseException(msg + ": " + e.getMessage(), e);
         }
     }
 
-    private CellBaseBuilder buildRepeats() {
-        Path repeatsFilesDir = downloadFolder.resolve(EtlCommons.REPEATS_FOLDER);
-        copyVersionFiles(Arrays.asList(repeatsFilesDir.resolve(EtlCommons.TRF_VERSION_FILE)));
-        copyVersionFiles(Arrays.asList(repeatsFilesDir.resolve(EtlCommons.GSD_VERSION_FILE)));
-        copyVersionFiles(Arrays.asList(repeatsFilesDir.resolve(EtlCommons.WM_VERSION_FILE)));
-        // TODO: chunk size is not really used in ConvervedRegionParser, remove?
-        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, EtlCommons.REPEATS_JSON);
-        return new RepeatsBuilder(repeatsFilesDir, serializer);
+    private CellBaseBuilder buildRepeats() throws CellBaseException {
+        // Sanity check
+        Path repeatsDownloadPath = downloadFolder.resolve(REPEATS_DATA);
+        List<Path> versionPaths = Arrays.asList(repeatsDownloadPath.resolve(getDataVersionFilename(TRF_DATA)),
+                repeatsDownloadPath.resolve(getDataVersionFilename(GSD_DATA)),
+                repeatsDownloadPath.resolve(getDataVersionFilename(WM_DATA)));
+        copyVersionFiles(versionPaths, buildFolder.resolve(REPEATS_DATA));
+
+        // Create serializer and return the repeats builder
+        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(buildFolder.resolve(REPEATS_DATA), REPEATS_BASENAME);
+        return new RepeatsBuilder(repeatsDownloadPath, serializer, configuration);
     }
 
-    private CellBaseBuilder buildObo() {
-        Path oboDir = downloadFolder.resolve(EtlCommons.OBO_DATA);
-        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, EtlCommons.OBO_JSON);
-        return new OntologyBuilder(oboDir, serializer);
+    private CellBaseBuilder buildObo() throws CellBaseException {
+        Path oboDownloadPath = downloadFolder.resolve(ONTOLOGY_DATA);
+        Path oboBuildPath = buildFolder.resolve(ONTOLOGY_DATA);
+        List<Path> versionPaths = Arrays.asList(oboDownloadPath.resolve(getDataVersionFilename(HPO_OBO_DATA)),
+                oboDownloadPath.resolve(getDataVersionFilename(GO_OBO_DATA)),
+                oboDownloadPath.resolve(getDataVersionFilename(DOID_OBO_DATA)),
+                oboDownloadPath.resolve(getDataVersionFilename(MONDO_OBO_DATA)));
+        copyVersionFiles(versionPaths, oboBuildPath);
+
+        // Create serializer and return the ontology builder
+        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(oboBuildPath, OBO_BASENAME);
+        return new OntologyBuilder(oboDownloadPath, serializer);
     }
 
+    /**
+     * @deprecated (when using the new copyVersionFiles)
+     */
+    @Deprecated
     private void copyVersionFiles(List<Path> pathList) {
         for (Path path : pathList) {
             try {
                 Files.copy(path, downloadFolder.resolve(path.getFileName()), StandardCopyOption.REPLACE_EXISTING);
             } catch (IOException e) {
-                logger.warn("Version file {} not found - skipping", path.toString());
+                logger.warn("Version file {} not found - skipping", path);
             }
         }
     }
 
-//    private void buildGenomeInfo() {
-//        /**
-//         * To get some extra info about the genome such as chromosome length or cytobands
-//         * we execute the following script.
-//         */
-//        try {
-//            String outputFileName = downloadFolder.resolve("genome_info.json").toAbsolutePath().toString();
-//            List<String> args = new ArrayList<>();
-//            args.addAll(Arrays.asList("--species", speciesConfigurathtion.getScientificName(),
-//                    "--assembly", buildCommandOptions.assembly == null ? getDefaultHumanAssembly() : buildCommandOptions.assembly,
-//                    "-o", outputFileName,
-//                    "--ensembl-libs", configuration.getDownload().getEnsembl().getLibs()));
-//            if (!configuration.getSpecies().getVertebrates().contains(speciesConfiguration)
-//                    && !speciesConfiguration.getScientificName().equals("Drosophila melanogaster")) {
-//                args.add("--phylo");
-//                args.add("no-vertebrate");
-//            }
-//
-//            String geneInfoLogFileName = downloadFolder.resolve("genome_info.log").toAbsolutePath().toString();
-//
-//            boolean downloadedGenomeInfo;
-//            downloadedGenomeInfo = EtlCommons.runCommandLineProcess(ensemblScriptsFolder, "./genome_info.pl", args, geneInfoLogFileName);
-//
-//            if (downloadedGenomeInfo) {
-//                logger.info(outputFileName + " created OK");
-//            } else {
-//                logger.error("Genome info for " + speciesConfiguration.getScientificName() + " cannot be downloaded");
-//            }
-//        } catch (IOException | InterruptedException e) {
-//            e.printStackTrace();
-//        }
-//    }
-
-    private CellBaseBuilder buildGenomeSequence() {
-        copyVersionFiles(Collections.singletonList(downloadFolder.resolve("genome/genomeVersion.json")));
-        Path fastaFile = getFastaReferenceGenome();
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "genome_sequence");
-        return new GenomeSequenceFastaBuilder(fastaFile, serializer);
+    private CellBaseBuilder buildGenomeSequence() throws CellBaseException {
+        // Sanity check
+        Path genomeVersionPath = downloadFolder.resolve(GENOME_DATA).resolve(getDataVersionFilename(GENOME_DATA));
+        copyVersionFiles(Collections.singletonList(genomeVersionPath), buildFolder.resolve(GENOME_DATA));
+
+        // Get FASTA path
+        Path fastaPath = getFastaReferenceGenome();
+
+        // Create serializer and return the genome builder
+        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder.resolve(GENOME_DATA), GENOME_DATA);
+        return new GenomeSequenceFastaBuilder(fastaPath, serializer);
     }
 
     private CellBaseBuilder buildGene() throws CellBaseException {
-        Path geneFolderPath = downloadFolder.resolve("gene");
-        copyVersionFiles(Arrays.asList(geneFolderPath.resolve("dgidbVersion.json"),
-                geneFolderPath.resolve("ensemblCoreVersion.json"), geneFolderPath.resolve("uniprotXrefVersion.json"),
-                geneFolderPath.resolve("geneExpressionAtlasVersion.json"),
-                geneFolderPath.resolve("hpoVersion.json"), geneFolderPath.resolve("disgenetVersion.json"),
-                geneFolderPath.resolve("gnomadVersion.json")));
-        Path genomeFastaFilePath = getFastaReferenceGenome();
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "gene");
-        return new GeneBuilder(geneFolderPath, genomeFastaFilePath, speciesConfiguration, flexibleGTFParsing, serializer);
+        return new GeneBuilder(downloadFolder.resolve(GENE_DATA), buildFolder.resolve(GENE_DATA), speciesConfiguration, flexibleGTFParsing);
     }
 
-    private CellBaseBuilder buildRefSeq() {
-        Path refseqFolderPath = downloadFolder.resolve("refseq");
-        copyVersionFiles(Arrays.asList(refseqFolderPath.resolve("refSeqVersion.json")));
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "refseq");
-        return new RefSeqGeneBuilder(refseqFolderPath, speciesConfiguration, serializer);
-    }
+    private CellBaseBuilder buildCadd() throws CellBaseException {
+        // Sanity check
+        Path caddDownloadPath = downloadFolder.resolve(VARIATION_FUNCTIONAL_SCORE_DATA).resolve(CADD_DATA);
+        Path caddBuildPath = buildFolder.resolve(VARIATION_FUNCTIONAL_SCORE_DATA).resolve(CADD_DATA);
+        copyVersionFiles(Collections.singletonList(caddDownloadPath.resolve(getDataVersionFilename(CADD_DATA))), caddBuildPath);
 
-    private CellBaseBuilder buildCadd() {
-        Path variationFunctionalScorePath = downloadFolder.resolve("variation_functional_score");
-        copyVersionFiles(Arrays.asList(variationFunctionalScorePath.resolve("caddVersion.json")));
-        Path caddFilePath = variationFunctionalScorePath.resolve("whole_genome_SNVs.tsv.gz");
-        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "cadd");
-        return new CaddScoreBuilder(caddFilePath, serializer);
+        // Create the file serializer and the protein builder
+        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(caddBuildPath, CADD_DATA);
+        return new CaddScoreBuilder(caddDownloadPath, serializer);
     }
 
-    private CellBaseBuilder buildRevel() {
-        Path missensePredictionScorePath = downloadFolder.resolve(EtlCommons.MISSENSE_VARIATION_SCORE_DATA);
-        copyVersionFiles(Arrays.asList(missensePredictionScorePath.resolve("revelVersion.json")));
-        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, EtlCommons.MISSENSE_VARIATION_SCORE_DATA);
-        return new RevelScoreBuilder(missensePredictionScorePath, serializer);
-    }
+    private CellBaseBuilder buildRevel() throws CellBaseException {
+        // Sanity check
+        Path revelDownloadPath = downloadFolder.resolve(MISSENSE_VARIATION_SCORE_DATA).resolve(REVEL_DATA);
+        Path revelBuildPath = buildFolder.resolve(MISSENSE_VARIATION_SCORE_DATA).resolve(REVEL_DATA);
+        copyVersionFiles(Collections.singletonList(revelDownloadPath.resolve(getDataVersionFilename(REVEL_DATA))), revelBuildPath);
 
-    private CellBaseBuilder buildRegulation() {
-        Path regulatoryRegionFilesDir = downloadFolder.resolve("regulation");
-        copyVersionFiles(Collections.singletonList(regulatoryRegionFilesDir.resolve("ensemblRegulationVersion.json")));
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "regulatory_region");
-        return new RegulatoryFeatureBuilder(regulatoryRegionFilesDir, serializer);
+        // Create the file serializer and the regulatory feature builder
+        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(revelBuildPath, REVEL_DATA);
+        return new RevelScoreBuilder(revelDownloadPath, serializer);
     }
 
-    private CellBaseBuilder buildProtein() {
-        Path proteinFolder = downloadFolder.resolve("protein");
-        copyVersionFiles(Arrays.asList(proteinFolder.resolve("uniprotVersion.json"),
-                proteinFolder.resolve("interproVersion.json")));
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "protein");
-        return new ProteinBuilder(proteinFolder.resolve("uniprot_chunks"),
-                downloadFolder.resolve("protein").resolve("protein2ipr.dat.gz"), speciesConfiguration.getScientificName(), serializer);
-    }
+    private CellBaseBuilder buildRegulation() throws CellBaseException {
+        // Sanity check
+        Path regulationDownloadPath = downloadFolder.resolve(REGULATION_DATA);
+        Path regulationBuildPath = buildFolder.resolve(REGULATION_DATA);
+        copyVersionFiles(Arrays.asList(regulationDownloadPath.resolve(getDataVersionFilename(REGULATORY_BUILD_DATA)),
+                regulationDownloadPath.resolve(getDataVersionFilename(MOTIF_FEATURES_DATA))), regulationBuildPath);
 
-    private void getProteinFunctionPredictionMatrices(SpeciesConfiguration sp, Path geneFolder)
-            throws IOException, InterruptedException {
-        logger.info("Downloading protein function prediction matrices ...");
-
-        // run protein_function_prediction_matrices.pl
-        String proteinFunctionProcessLogFile = geneFolder.resolve("protein_function_prediction_matrices.log").toString();
-        List<String> args = Arrays.asList("--species", sp.getScientificName(), "--outdir", geneFolder.toString(),
-                "--ensembl-libs", configuration.getDownload().getEnsembl().getLibs());
-
-        boolean proteinFunctionPredictionMatricesObtaines = EtlCommons.runCommandLineProcess(ensemblScriptsFolder,
-                "./protein_function_prediction_matrices.pl",
-                args,
-                proteinFunctionProcessLogFile);
-
-        // check output
-        if (proteinFunctionPredictionMatricesObtaines) {
-            logger.info("Protein function prediction matrices created OK");
-        } else {
-            logger.error("Protein function prediction matrices for " + sp.getScientificName() + " cannot be downloaded");
-        }
+        // Create the file serializer and the regulatory feature builder
+        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(regulationBuildPath, REGULATORY_REGION_BASENAME);
+        return new RegulatoryFeatureBuilder(regulationDownloadPath, serializer);
     }
 
-    private CellBaseBuilder getInteractionParser() {
-        Path proteinFolder = downloadFolder.resolve("protein");
-        Path psimiTabFile = proteinFolder.resolve("intact.txt");
-        copyVersionFiles(Arrays.asList(proteinFolder.resolve("intactVersion.json")));
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "protein_protein_interaction");
-        return new InteractionBuilder(psimiTabFile, speciesConfiguration.getScientificName(), serializer);
+    private CellBaseBuilder buildProtein() throws CellBaseException {
+        // Sanity check
+        Path proteinDownloadPath = downloadFolder.resolve(PROTEIN_DATA);
+        Path proteinBuildPath = buildFolder.resolve(PROTEIN_DATA);
+        copyVersionFiles(Arrays.asList(proteinDownloadPath.resolve(getDataVersionFilename(UNIPROT_DATA)),
+                proteinDownloadPath.resolve(getDataVersionFilename(INTERPRO_DATA))), proteinBuildPath);
+
+        // Create the file serializer and the protein builder
+        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(proteinBuildPath, PROTEIN_DATA);
+        return new ProteinBuilder(proteinDownloadPath, speciesConfiguration.getScientificName(), serializer);
     }
 
-    private CellBaseBuilder buildConservation() {
-        Path conservationFilesDir = downloadFolder.resolve("conservation");
-        copyVersionFiles(Arrays.asList(conservationFilesDir.resolve("gerpVersion.json"),
-                conservationFilesDir.resolve("phastConsVersion.json"),
-                conservationFilesDir.resolve("phyloPVersion.json")));
-        // TODO: chunk size is not really used in ConvervedRegionParser, remove?
+    private CellBaseBuilder buildConservation() throws CellBaseException {
+        // Sanity check
+        Path conservationDownloadPath = downloadFolder.resolve(CONSERVATION_DATA);
+        Path conservationBuildPath = buildFolder.resolve(CONSERVATION_DATA);
+        copyVersionFiles(Arrays.asList(conservationDownloadPath.resolve(getDataVersionFilename(GERP_DATA)),
+                        conservationDownloadPath.resolve(getDataVersionFilename(PHASTCONS_DATA)),
+                        conservationDownloadPath.resolve(getDataVersionFilename(PHYLOP_DATA))), conservationBuildPath);
+
         int conservationChunkSize = MongoDBCollectionConfiguration.CONSERVATION_CHUNK_SIZE;
-        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(buildFolder);
-        return new ConservationBuilder(conservationFilesDir, conservationChunkSize, serializer);
+        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(conservationBuildPath);
+        return new ConservationBuilder(conservationDownloadPath, conservationChunkSize, serializer);
     }
 
-    private CellBaseBuilder buildClinicalVariants() {
-        Path clinicalVariantFolder = downloadFolder.resolve(EtlCommons.CLINICAL_VARIANTS_FOLDER);
-        copyVersionFiles(Arrays.asList(clinicalVariantFolder.resolve("clinvarVersion.json")));
-        copyVersionFiles(Arrays.asList(clinicalVariantFolder.resolve("gwasVersion.json")));
-
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder,
-                EtlCommons.CLINICAL_VARIANTS_JSON_FILE.replace(".json.gz", ""), true);
-        return new ClinicalVariantBuilder(clinicalVariantFolder, normalize, getFastaReferenceGenome(),
+    private CellBaseBuilder buildClinicalVariants() throws CellBaseException {
+        // Sanity check
+        Path clinicalDownloadPath = downloadFolder.resolve(CLINICAL_VARIANT_DATA);
+        Path clinicalBuildPath = buildFolder.resolve(CLINICAL_VARIANT_DATA);
+        copyVersionFiles(Arrays.asList(clinicalDownloadPath.resolve(getDataVersionFilename(CLINVAR_DATA)),
+                clinicalDownloadPath.resolve(getDataVersionFilename(COSMIC_DATA)),
+                clinicalDownloadPath.resolve(getDataVersionFilename(HGMD_DATA)),
+                clinicalDownloadPath.resolve(getDataVersionFilename(GWAS_DATA))), clinicalBuildPath);
+
+        // Create the file serializer and the clinical variants builder
+        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(clinicalBuildPath, CLINICAL_VARIANTS_BASENAME, true);
+        return new ClinicalVariantBuilder(clinicalDownloadPath, normalize, getFastaReferenceGenome(),
                 buildCommandOptions.assembly == null ? getDefaultHumanAssembly() : buildCommandOptions.assembly,
-                serializer);
+                configuration, serializer);
     }
 
     private String getDefaultHumanAssembly() {
         for (SpeciesConfiguration species : configuration.getSpecies().getVertebrates()) {
-            if (species.getId().equals("hsapiens")) {
+            if (species.getId().equals(HSAPIENS_NAME)) {
                 return species.getAssemblies().get(0).getName();
             }
         }
@@ -370,19 +340,30 @@ private String getDefaultHumanAssembly() {
                 + "configuration file. No hsapiens data found within the configuration.json file");
     }
 
-    private Path getFastaReferenceGenome() {
-        Path fastaFile = null;
-        try {
-            DirectoryStream<Path> stream = Files.newDirectoryStream(downloadFolder.resolve("genome"), entry -> {
-                return entry.toString().endsWith(".fa");
-            });
-            for (Path entry : stream) {
-                fastaFile = entry;
+    private Path getFastaReferenceGenome() throws CellBaseException {
+        // Check FASTA and unzip if necessary
+        String ensemblUrl = getEnsemblUrl(configuration.getDownload().getEnsembl(), ensemblRelease, ENSEMBL_PRIMARY_FA_FILE_ID,
+                getSpeciesShortname(speciesConfiguration), assembly.getName(), null);
+        String fastaFilename = Paths.get(ensemblUrl).getFileName().toString();
+        Path fastaPath = downloadFolder.resolve(GENOME_DATA).resolve(fastaFilename);
+        if (fastaPath.toFile().exists()) {
+            // Gunzip
+            logger.info("Gunzip file: {}", fastaPath);
+            try {
+                EtlCommons.runCommandLineProcess(null, "gunzip", Collections.singletonList(fastaPath.toString()), null);
+            } catch (IOException e) {
+                throw new CellBaseException("Error executing gunzip in FASTA file " + fastaPath, e);
+            } catch (InterruptedException e) {
+                // Restore interrupted state...
+                Thread.currentThread().interrupt();
+                throw new CellBaseException("Error executing gunzip in FASTA file " + fastaPath, e);
             }
-        } catch (IOException e) {
-            e.printStackTrace();
         }
-        return fastaFile;
+        fastaPath = downloadFolder.resolve(GENOME_DATA).resolve(fastaFilename.replace(".gz", ""));
+        if (!fastaPath.toFile().exists()) {
+            throw new CellBaseException("FASTA file " + fastaPath + " does not exist after executing gunzip");
+        }
+        return fastaPath;
     }
 
     private CellBaseBuilder buildSplice() throws IOException {
@@ -402,39 +383,136 @@ private CellBaseBuilder buildSplice() throws IOException {
         return new SpliceBuilder(spliceInputFolder, serializer);
     }
 
-    private CellBaseBuilder buildPubMed() throws IOException {
-        Path pubmedInputFolder = downloadFolder.resolve(EtlCommons.PUBMED_DATA);
-        Path pubmedOutputFolder = buildFolder.resolve(EtlCommons.PUBMED_DATA);
-        if (!pubmedOutputFolder.toFile().exists()) {
-            pubmedOutputFolder.toFile().mkdirs();
+    private CellBaseBuilder buildPubMed() throws CellBaseException {
+        // Sanity check
+        Path pubMedDownloadPath = downloadFolder.resolve(PUBMED_DATA);
+        Path pubMedBuildPath = buildFolder.resolve(PUBMED_DATA);
+        copyVersionFiles(Collections.singletonList(pubMedDownloadPath.resolve(getDataVersionFilename(PUBMED_DATA))), pubMedBuildPath);
+
+        // Create the file serializer and the PubMed builder
+        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(pubMedBuildPath);
+        return new PubMedBuilder(pubMedDownloadPath, serializer, configuration);
+    }
+
+    private CellBaseBuilder buildPharmacogenomics() throws CellBaseException {
+        // Sanity check
+        Path pharmGkbDownloadPath = downloadFolder.resolve(PHARMACOGENOMICS_DATA).resolve(PHARMGKB_DATA);
+        Path pharmGkbBuildPath = buildFolder.resolve(PHARMACOGENOMICS_DATA).resolve(PHARMGKB_DATA);
+        copyVersionFiles(Arrays.asList(pharmGkbDownloadPath.resolve(getDataVersionFilename(PHARMGKB_DATA))), pharmGkbBuildPath);
+
+        // Create the file serializer and the PharmGKB feature builder
+        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(pharmGkbBuildPath);
+        return new PharmGKBBuilder(pharmGkbDownloadPath, serializer);
+    }
+
+    private void checkVersionFiles(List<Path> versionPaths) throws CellBaseException {
+        ObjectReader dataSourceReader = new ObjectMapper().readerFor(DataSource.class);
+        for (Path versionPath : versionPaths) {
+            if (!versionPath.toFile().exists()) {
+                throw new CellBaseException("Version file " +  versionPath + " does not exist: this file is mandatory for version control");
+            }
+            try {
+                DataSource dataSource = dataSourceReader.readValue(versionPath.toFile());
+                if (org.apache.commons.lang3.StringUtils.isEmpty(dataSource.getVersion())) {
+                    throw new CellBaseException("Version missing version in file " +  versionPath + ": a version must be specified in the"
+                            + " file");
+                }
+            } catch (IOException e) {
+                throw new CellBaseException("Error parsing the version file " + versionPath, e);
+            }
         }
+    }
 
-        logger.info("Copying PubMed version file...");
-        if (pubmedInputFolder.resolve(EtlCommons.PUBMED_VERSION_FILENAME).toFile().exists()) {
-            Files.copy(pubmedInputFolder.resolve(EtlCommons.PUBMED_VERSION_FILENAME),
-                    pubmedOutputFolder.resolve(EtlCommons.PUBMED_VERSION_FILENAME),
-                    StandardCopyOption.REPLACE_EXISTING);
+    private void copyVersionFiles(List<Path> versionPaths, Path targetPath) throws CellBaseException {
+        // Check version files before copying them
+        checkVersionFiles(versionPaths);
+        if (!targetPath.toFile().exists()) {
+            try {
+                Files.createDirectories(targetPath);
+            } catch (IOException e) {
+                throw new CellBaseException("Error creating folder " + targetPath, e);
+            }
         }
 
-        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(pubmedOutputFolder);
-        return new PubMedBuilder(pubmedInputFolder, serializer);
+        for (Path versionPath : versionPaths) {
+            try {
+                Files.copy(versionPath, targetPath.resolve(versionPath.getFileName()), StandardCopyOption.REPLACE_EXISTING);
+            } catch (IOException e) {
+                throw new CellBaseException("Error copying version file " + versionPath + " to " + targetPath, e);
+            }
+            // Sanity check after copying
+            if (!targetPath.resolve(versionPath.getFileName()).toFile().exists()) {
+                throw new CellBaseException("Something wrong happened when copying version file " + versionPath + " to " + targetPath);
+            }
+        }
+    }
+
+    private List<String> checkDataSources() {
+        if (StringUtils.isEmpty(buildCommandOptions.data)) {
+            throw new IllegalArgumentException("Missing data parameter. Valid values are: "
+                    + StringUtils.join(VALID_SOURCES_TO_BUILD, ",") + "; or use 'all' to download everything");
+        }
+        List<String> dataList = Arrays.asList(buildCommandOptions.data.split(","));
+        for (String data : dataList) {
+            switch (data) {
+                case GENOME_DATA:
+                case GENE_DATA:
+                case REFSEQ_DATA:
+                case VARIATION_FUNCTIONAL_SCORE_DATA:
+                case MISSENSE_VARIATION_SCORE_DATA:
+                case REGULATION_DATA:
+                case PROTEIN_DATA:
+                case CONSERVATION_DATA:
+                case CLINICAL_VARIANT_DATA:
+                case REPEATS_DATA:
+                case ONTOLOGY_DATA:
+                case SPLICE_SCORE_DATA:
+                case PUBMED_DATA:
+                case PHARMACOGENOMICS_DATA:
+                case PGS_DATA:
+                    break;
+                default:
+                    throw new IllegalArgumentException("Value '" + data + "' is not allowed for the data parameter. Valid values are: "
+                            + StringUtils.join(VALID_SOURCES_TO_BUILD, ",") + "; or use 'all' to build everything");
+            }
+        }
+        return dataList;
     }
 
-    private CellBaseBuilder buildPharmacogenomics() throws IOException {
-        Path inFolder = downloadFolder.resolve(EtlCommons.PHARMACOGENOMICS_DATA);
-        Path outFolder = buildFolder.resolve(EtlCommons.PHARMACOGENOMICS_DATA);
+    private CellBaseBuilder buildPolygenicScores() throws IOException {
+        Path inFolder = downloadFolder.resolve(EtlCommons.PGS_DATA);
+        Path outFolder = buildFolder.resolve(EtlCommons.PGS_DATA);
         if (!outFolder.toFile().exists()) {
             outFolder.toFile().mkdirs();
         }
 
-        logger.info("Copying PharmGKB version file...");
-        if (inFolder.resolve(PHARMGKB_DATA).resolve(EtlCommons.PHARMGKB_VERSION_FILENAME).toFile().exists()) {
-            Files.copy(inFolder.resolve(PHARMGKB_DATA).resolve(EtlCommons.PHARMGKB_VERSION_FILENAME),
-                    outFolder.resolve(EtlCommons.PHARMGKB_VERSION_FILENAME),
+        logger.info("Copying PGS version file...");
+        if (inFolder.resolve(PGS_CATALOG_VERSION_FILENAME).toFile().exists()) {
+            Files.copy(inFolder.resolve(PGS_CATALOG_VERSION_FILENAME), outFolder.resolve(PGS_CATALOG_VERSION_FILENAME),
                     StandardCopyOption.REPLACE_EXISTING);
         }
 
-        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(outFolder);
-        return new PharmGKBBuilder(inFolder, serializer);
+        String basename = PolygenicScoreBuilder.VARIANT_POLYGENIC_SCORE_FILENAME.split("\\.")[0];
+        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(outFolder, basename);
+        return new PolygenicScoreBuilder(PGS_CATALOG_NAME, configuration.getDownload().getPgs().getVersion(), inFolder, serializer);
+    }
+
+    private CellBaseBuilder buildAlphaMissense() throws IOException {
+        Path inputFolder = downloadFolder.resolve(EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA);
+        Path outputFolder = buildFolder.resolve(EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA);
+        if (!outputFolder.toFile().exists()) {
+            outputFolder.toFile().mkdirs();
+        }
+
+        logger.info("Copying AlphaMissense version file...");
+        if (inputFolder.resolve(EtlCommons.ALPHAMISSENSE_VERSION_FILENAME).toFile().exists()) {
+            Files.copy(inputFolder.resolve(EtlCommons.ALPHAMISSENSE_VERSION_FILENAME),
+                    outputFolder.resolve(EtlCommons.ALPHAMISSENSE_VERSION_FILENAME), StandardCopyOption.REPLACE_EXISTING);
+        }
+
+        File alphaMissenseFile = inputFolder.resolve(EtlCommons.ALPHAMISSENSE_RAW_FILENAME).toFile();
+        String basename = EtlCommons.ALPHAMISSENSE_JSON_FILENAME.replace(".json.gz", "");
+        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(outputFolder, basename);
+        return new AlphaMissenseBuilder(alphaMissenseFile, serializer);
     }
 }
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/DownloadCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/DownloadCommandExecutor.java
index f8197e6558..faf383ba26 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/DownloadCommandExecutor.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/DownloadCommandExecutor.java
@@ -16,26 +16,22 @@
 
 package org.opencb.cellbase.app.cli.admin.executors;
 
-import com.beust.jcommander.ParameterException;
 import org.apache.commons.lang3.StringUtils;
-import org.opencb.biodata.formats.io.FileFormatException;
 import org.opencb.cellbase.app.cli.CommandExecutor;
 import org.opencb.cellbase.app.cli.admin.AdminCliOptionsParser;
-import org.opencb.cellbase.core.config.SpeciesConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.core.utils.SpeciesUtils;
-import org.opencb.cellbase.lib.EtlCommons;
 import org.opencb.cellbase.lib.download.AbstractDownloadManager;
 import org.opencb.cellbase.lib.download.DownloadFile;
 import org.opencb.cellbase.lib.download.Downloader;
 
-import java.io.IOException;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
 /**
  * Created by imedina on 03/02/15.
  */
@@ -44,6 +40,10 @@ public class DownloadCommandExecutor extends CommandExecutor {
     private AdminCliOptionsParser.DownloadCommandOptions downloadCommandOptions;
     private Path outputDirectory;
 
+    private static final List<String> VALID_SOURCES_TO_DOWNLOAD = Arrays.asList(GENOME_DATA, GENE_DATA, VARIATION_FUNCTIONAL_SCORE_DATA,
+            REGULATION_DATA, PROTEIN_DATA, CONSERVATION_DATA, CLINICAL_VARIANT_DATA, REPEATS_DATA, ONTOLOGY_DATA, PUBMED_DATA,
+            PHARMACOGENOMICS_DATA, REVEL_DATA, ALPHAMISSENSE_DATA, PGS_DATA);
+
     public DownloadCommandExecutor(AdminCliOptionsParser.DownloadCommandOptions downloadCommandOptions) {
         super(downloadCommandOptions.commonOptions.logLevel, downloadCommandOptions.commonOptions.conf);
 
@@ -52,88 +52,103 @@ public DownloadCommandExecutor(AdminCliOptionsParser.DownloadCommandOptions down
     }
 
     /**
-     * Execute specific 'download' command options.
+     * Process CellBase command 'download'.
+     *
+     * @throws CellBaseException Exception
      */
-    public void execute() {
+    public void execute() throws CellBaseException {
         try {
             String species = downloadCommandOptions.speciesAndAssemblyOptions.species;
             String assembly = downloadCommandOptions.speciesAndAssemblyOptions.assembly;
             List<DownloadFile> downloadFiles = new ArrayList<>();
-            List<String> dataList = getDataList(species);
+            List<String> dataList = checkDataSources();
             Downloader downloader = new Downloader(species, assembly, outputDirectory, configuration);
             for (String data : dataList) {
                 switch (data) {
-                    case EtlCommons.GENOME_DATA:
+                    case GENOME_DATA:
                         downloadFiles.addAll(downloader.downloadGenome());
                         break;
-                    case EtlCommons.GENE_DATA:
+                    case GENE_DATA:
                         downloadFiles.addAll(downloader.downloadGene());
                         break;
-//                    case EtlCommons.VARIATION_DATA:
-//                        downloadManager.downloadVariation();
-//                        break;
-                    case EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA:
+                    case VARIATION_FUNCTIONAL_SCORE_DATA:
                         downloadFiles.addAll(downloader.downloadCaddScores());
                         break;
-                    case EtlCommons.MISSENSE_VARIATION_SCORE_DATA:
-                        downloadFiles.addAll(downloader.downloadPredictionScores());
+                    case REVEL_DATA:
+                        downloadFiles.addAll(downloader.downloadRevelScores());
                         break;
-                    case EtlCommons.REGULATION_DATA:
+                    case REGULATION_DATA:
                         downloadFiles.addAll(downloader.downloadRegulation());
                         break;
-                    case EtlCommons.PROTEIN_DATA:
+                    case PROTEIN_DATA:
                         downloadFiles.addAll(downloader.downloadProtein());
                         break;
-                    case EtlCommons.CONSERVATION_DATA:
+                    case CONSERVATION_DATA:
                         downloadFiles.addAll(downloader.downloadConservation());
                         break;
-                    case EtlCommons.CLINICAL_VARIANTS_DATA:
+                    case CLINICAL_VARIANT_DATA:
                         downloadFiles.addAll(downloader.downloadClinicalVariants());
                         break;
-//                    case EtlCommons.STRUCTURAL_VARIANTS_DATA:
-//                        downloadFiles.add(downloadManager.downloadStructuralVariants());
-//                        break;
-                    case EtlCommons.REPEATS_DATA:
+                    case REPEATS_DATA:
                         downloadFiles.addAll(downloader.downloadRepeats());
                         break;
-                    case EtlCommons.OBO_DATA:
+                    case ONTOLOGY_DATA:
                         downloadFiles.addAll(downloader.downloadOntologies());
                         break;
-                    case EtlCommons.PUBMED_DATA:
+                    case PUBMED_DATA:
                         downloadFiles.addAll(downloader.downloadPubMed());
                         break;
-                    case EtlCommons.PHARMACOGENOMICS_DATA:
+                    case PHARMACOGENOMICS_DATA:
                         downloadFiles.addAll(downloader.downloadPharmKGB());
                         break;
-                    default:
-                        System.out.println("Value \"" + data + "\" is not allowed for the data parameter. Allowed values"
-                                + " are: {genome, gene, gene_disease_association, variation, variation_functional_score,"
-                                + " regulation, protein, conservation, clinical_variants, ontology, pubmed}");
+                    case ALPHAMISSENSE_DATA:
+                        downloadFiles.addAll(downloader.downloadAlphaMissense());
+                        break;
+                    case PGS_DATA:
+                        downloadFiles.addAll(downloader.downloadPolygenicScores());
                         break;
+                    default:
+                        throw new IllegalArgumentException("Value '" + data + "' is not allowed for the data parameter. Valid values are: "
+                                + StringUtils.join(VALID_SOURCES_TO_DOWNLOAD, ",") + "; or use 'all' to download everything");
                 }
             }
             AbstractDownloadManager.writeDownloadLogFile(outputDirectory, downloadFiles);
-        } catch (ParameterException | IOException | CellBaseException | InterruptedException | NoSuchMethodException
-                | FileFormatException e) {
-            logger.error("Error in 'download' command line: " + e.getMessage());
+        } catch (InterruptedException e) {
+            // Restore interrupted state...
+            Thread.currentThread().interrupt();
+            throw new CellBaseException("Error executing command line 'download': " + e.getMessage(), e);
+        } catch (Exception e) {
+            e.printStackTrace();
+            throw new CellBaseException("Error executing command line 'download': " + e.getMessage(), e);
         }
     }
 
-    private List<String> getDataList(String species) throws CellBaseException {
-        if (StringUtils.isEmpty(downloadCommandOptions.data) || downloadCommandOptions.data.equals("all")) {
-            return SpeciesUtils.getSpeciesConfiguration(configuration, species).getData();
-        } else {
-            return Arrays.asList(downloadCommandOptions.data.split(","));
+    private List<String> checkDataSources() {
+        if (StringUtils.isEmpty(downloadCommandOptions.data)) {
+            throw new IllegalArgumentException("Missing data parameter. Valid values are: "
+                    + StringUtils.join(VALID_SOURCES_TO_DOWNLOAD, ",") + "; or use 'all' to download everything");
         }
-    }
-
-    @Deprecated
-    private List<String> getDataList(SpeciesConfiguration sp) {
-        List<String> dataList;
-        if (downloadCommandOptions.data.equals("all")) {
-            dataList = sp.getData();
-        } else {
-            dataList = Arrays.asList(downloadCommandOptions.data.split(","));
+        List<String> dataList = Arrays.asList(downloadCommandOptions.data.split(","));
+        for (String data : dataList) {
+            switch (data) {
+                case GENOME_DATA:
+                case GENE_DATA:
+                case VARIATION_FUNCTIONAL_SCORE_DATA:
+                case MISSENSE_VARIATION_SCORE_DATA:
+                case REGULATION_DATA:
+                case PROTEIN_DATA:
+                case CONSERVATION_DATA:
+                case CLINICAL_VARIANT_DATA:
+                case REPEATS_DATA:
+                case ONTOLOGY_DATA:
+                case PUBMED_DATA:
+                case PHARMACOGENOMICS_DATA:
+                case PGS_DATA:
+                    break;
+                default:
+                    throw new IllegalArgumentException("Value '" + data + "' is not allowed for the data parameter. Valid values are: "
+                            + StringUtils.join(VALID_SOURCES_TO_DOWNLOAD, ",") + "; or use 'all' to download everything");
+            }
         }
         return dataList;
     }
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ExportCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ExportCommandExecutor.java
index 72f992f344..791cc599ef 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ExportCommandExecutor.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ExportCommandExecutor.java
@@ -82,11 +82,9 @@ public ExportCommandExecutor(AdminCliOptionsParser.ExportCommandOptions exportCo
         this.assembly = splits[2];
 
         if (exportCommandOptions.data.equals("all")) {
-            this.dataToExport = new String[]{EtlCommons.GENOME_DATA, EtlCommons.GENE_DATA, EtlCommons.REFSEQ_DATA,
-                    EtlCommons.CONSERVATION_DATA, EtlCommons.REGULATION_DATA, EtlCommons.PROTEIN_DATA,
-                    EtlCommons.PROTEIN_FUNCTIONAL_PREDICTION_DATA, EtlCommons.VARIATION_DATA,
-                    EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA, EtlCommons.CLINICAL_VARIANTS_DATA, EtlCommons.REPEATS_DATA,
-                    OBO_DATA, EtlCommons.MISSENSE_VARIATION_SCORE_DATA, EtlCommons.SPLICE_SCORE_DATA, EtlCommons.PHARMACOGENOMICS_DATA};
+            this.dataToExport = new String[]{GENOME_DATA, GENE_DATA, REFSEQ_DATA, CONSERVATION_DATA, REGULATION_DATA, PROTEIN_DATA,
+                    PROTEIN_SUBSTITUTION_PREDICTION_DATA, VARIATION_DATA, VARIATION_FUNCTIONAL_SCORE_DATA, CLINICAL_VARIANT_DATA,
+                    REPEATS_DATA, ONTOLOGY_DATA, SPLICE_SCORE_DATA, PHARMACOGENOMICS_DATA};
         } else {
             this.dataToExport = exportCommandOptions.data.split(",");
         }
@@ -200,38 +198,6 @@ public void execute() throws CellBaseException {
                             counterMsg = counter + " CADD items";
                             break;
                         }
-                        case EtlCommons.MISSENSE_VARIATION_SCORE_DATA: {
-                            CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(output);
-                            ProteinManager proteinManager = managerFactory.getProteinManager(species, assembly);
-                            Map<String, List<Integer>> positionMap = new HashMap<>();
-                            for (Variant variant : variants) {
-                                if (!positionMap.containsKey(variant.getChromosome())) {
-                                    positionMap.put(variant.getChromosome(), new ArrayList<>());
-                                }
-                                positionMap.get(variant.getChromosome()).add(variant.getStart());
-                                if (positionMap.get(variant.getChromosome()).size() >= 200) {
-                                    CellBaseDataResult<MissenseVariantFunctionalScore> results = proteinManager
-                                            .getMissenseVariantFunctionalScores(variant.getChromosome(),
-                                                    positionMap.get(variant.getChromosome()), null, dataRelease);
-                                    counter += writeExportedData(results.getResults(), "missense_variation_functional_score", serializer);
-                                    positionMap.put(variant.getChromosome(), new ArrayList<>());
-                                }
-                            }
-
-                            // Process map
-                            for (Map.Entry<String, List<Integer>> entry : positionMap.entrySet()) {
-                                if (CollectionUtils.isEmpty(entry.getValue())) {
-                                    continue;
-                                }
-                                CellBaseDataResult<MissenseVariantFunctionalScore> results = proteinManager
-                                        .getMissenseVariantFunctionalScores(entry.getKey(), entry.getValue(), null, dataRelease);
-                                counter += writeExportedData(results.getResults(), "missense_variation_functional_score", serializer);
-                            }
-                            serializer.close();
-
-                            counterMsg = counter + " missense variation functional scores";
-                            break;
-                        }
                         case EtlCommons.CONSERVATION_DATA: {
                             // Export data
                             CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(output);
@@ -271,7 +237,7 @@ public void execute() throws CellBaseException {
                             counterMsg = counter + " proteins";
                             break;
                         }
-                        case EtlCommons.PROTEIN_FUNCTIONAL_PREDICTION_DATA: {
+                        case EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA: {
                             ProteinManager proteinManager = managerFactory.getProteinManager(species, assembly);
                             Map<String, List<String>> transcriptsMap = new HashMap<>();
                             for (Gene gene : genes) {
@@ -290,10 +256,10 @@ public void execute() throws CellBaseException {
                             }
                             serializer.close();
 
-                            counterMsg = counter + " protein functional predictions";
+                            counterMsg = counter + " protein substitution predictions";
                             break;
                         }
-                        case EtlCommons.CLINICAL_VARIANTS_DATA: {
+                        case EtlCommons.CLINICAL_VARIANT_DATA: {
                             counter = exportClinicalVariantData(regions);
                             counterMsg = counter + " clinical variants";
                             break;
@@ -309,7 +275,7 @@ public void execute() throws CellBaseException {
                             counterMsg = counter + " repeats";
                             break;
                         }
-                        case OBO_DATA: {
+                        case ONTOLOGY_DATA: {
                             counter = exportOntologyData();
                             counterMsg = counter + " ontology items";
                             break;
@@ -424,7 +390,7 @@ private String exportPharmacogenomicsData(List<Gene> genes)
 
     private int exportClinicalVariantData(List<Region> regions) throws CellBaseException, QueryException, IllegalAccessException,
             IOException {
-        String baseFilename = CLINICAL_VARIANTS_DATA + ".full";
+        String baseFilename = CLINICAL_VARIANT_DATA + ".full";
         CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(output, baseFilename);
         ClinicalManager clinicalManager = managerFactory.getClinicalManager(species, assembly);
         ClinicalVariantQuery query = new ClinicalVariantQuery();
@@ -449,7 +415,7 @@ private int exportClinicalVariantData(List<Region> regions) throws CellBaseExcep
 
     private int exportOntologyData() throws CellBaseException, IOException {
         int counter = 0;
-        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(output, OBO_DATA);
+        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(output, ONTOLOGY_DATA);
         OntologyManager ontologyManager = managerFactory.getOntologyManager(species, assembly);
         CellBaseIterator<OntologyTerm> iterator = ontologyManager.iterator(new OntologyQuery());
         while (iterator.hasNext()) {
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java
index 5a8fd9417b..480e7ef09d 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java
@@ -24,6 +24,7 @@
 import org.opencb.cellbase.core.models.DataRelease;
 import org.opencb.cellbase.core.result.CellBaseDataResult;
 import org.opencb.cellbase.lib.EtlCommons;
+import org.opencb.cellbase.lib.builders.PolygenicScoreBuilder;
 import org.opencb.cellbase.lib.impl.core.CellBaseDBAdaptor;
 import org.opencb.cellbase.lib.indexer.IndexManager;
 import org.opencb.cellbase.lib.loader.LoadRunner;
@@ -44,6 +45,8 @@
 import java.util.List;
 import java.util.concurrent.ExecutionException;
 
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
 /**
  * Created by imedina on 03/02/15.
  */
@@ -79,9 +82,9 @@ public LoadCommandExecutor(AdminCliOptionsParser.LoadCommandOptions loadCommandO
             loadOptions = new String[]{EtlCommons.GENOME_DATA, EtlCommons.GENE_DATA, EtlCommons.REFSEQ_DATA,
                     EtlCommons.CONSERVATION_DATA, EtlCommons.REGULATION_DATA, EtlCommons.PROTEIN_DATA,
                     EtlCommons.PROTEIN_FUNCTIONAL_PREDICTION_DATA, EtlCommons.VARIATION_DATA,
-                    EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA, EtlCommons.CLINICAL_VARIANTS_DATA, EtlCommons.REPEATS_DATA,
-                    EtlCommons.OBO_DATA, EtlCommons.MISSENSE_VARIATION_SCORE_DATA, EtlCommons.SPLICE_SCORE_DATA, EtlCommons.PUBMED_DATA,
-                    EtlCommons.PHARMACOGENOMICS_DATA};
+                    EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA, EtlCommons.CLINICAL_VARIANT_DATA, EtlCommons.REPEATS_DATA,
+                    EtlCommons.ONTOLOGY_DATA, EtlCommons.MISSENSE_VARIATION_SCORE_DATA, EtlCommons.SPLICE_SCORE_DATA,
+                    EtlCommons.PUBMED_DATA, EtlCommons.PHARMACOGENOMICS_DATA, EtlCommons.PGS_DATA};
         } else {
             loadOptions = loadCommandOptions.data.split(",");
         }
@@ -198,16 +201,21 @@ public void execute() throws CellBaseException {
                         }
                         case EtlCommons.MISSENSE_VARIATION_SCORE_DATA: {
                             // Load data
-                            loadIfExists(input.resolve("missense_variation_functional_score.json.gz"),
-                                    "missense_variation_functional_score");
+                            Path path = input.resolve(EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA);
+                            loadIfExists(path.resolve(EtlCommons.MISSENSE_VARIATION_SCORE_JSON_FILENAME),
+                                    EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA);
 
                             // Create index
-                            createIndex("missense_variation_functional_score");
+                            createIndex(EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA);
 
                             // Update release (collection and sources)
-                            List<Path> sources = new ArrayList<>(Collections.singletonList(input.resolve("revelVersion.json")));
-                            dataReleaseManager.update(dataRelease, "missense_variation_functional_score",
-                                    EtlCommons.MISSENSE_VARIATION_SCORE_DATA, sources);
+                            dataReleaseManager.update(dataRelease, EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA,
+                                    REVEL_DATA, Collections.singletonList(path.resolve(EtlCommons.REVEL_VERSION_FILENAME)));
+                            break;
+                        }
+                        case EtlCommons.ALPHAMISSENSE_DATA: {
+                            // Load data, create index and update release
+                            loadAlphaMissense();
                             break;
                         }
                         case EtlCommons.CONSERVATION_DATA: {
@@ -255,7 +263,7 @@ public void execute() throws CellBaseException {
                             loadProteinFunctionalPrediction();
                             break;
                         }
-                        case EtlCommons.CLINICAL_VARIANTS_DATA: {
+                        case EtlCommons.CLINICAL_VARIANT_DATA: {
                             // Load data, create index and update release
                             loadClinical();
                             break;
@@ -268,7 +276,7 @@ public void execute() throws CellBaseException {
 //                        case EtlCommons.STRUCTURAL_VARIANTS_DATA:
 //                            loadStructuralVariants();
 //                            break;
-                        case EtlCommons.OBO_DATA: {
+                        case EtlCommons.ONTOLOGY_DATA: {
                             // Load data
                             loadIfExists(input.resolve("ontology.json.gz"), "ontology");
 
@@ -281,7 +289,7 @@ public void execute() throws CellBaseException {
                                     input.resolve(EtlCommons.GO_VERSION_FILE),
                                     input.resolve(EtlCommons.DO_VERSION_FILE)
                             ));
-                            dataReleaseManager.update(dataRelease, "ontology", EtlCommons.OBO_DATA, sources);
+                            dataReleaseManager.update(dataRelease, "ontology", EtlCommons.ONTOLOGY_DATA, sources);
                             break;
                         }
                         case EtlCommons.SPLICE_SCORE_DATA: {
@@ -289,7 +297,7 @@ public void execute() throws CellBaseException {
                             loadSpliceScores();
                             break;
                         }
-                        case EtlCommons.PUBMED_DATA: {
+                        case PUBMED_DATA: {
                             // Load data, create index and update release
                             loadPubMed();
                             break;
@@ -299,6 +307,11 @@ public void execute() throws CellBaseException {
                             loadPharmacogenomica();
                             break;
                         }
+                        case EtlCommons.PGS_DATA: {
+                            // Load data, create index and update release
+                            loadPolygenicScores();
+                            break;
+                        }
                         default:
                             logger.warn("Not valid 'data'. We should not reach this point");
                             break;
@@ -427,19 +440,52 @@ private void loadProteinFunctionalPrediction() throws NoSuchMethodException, Int
             InstantiationException, IllegalAccessException, InvocationTargetException, ClassNotFoundException,
             IOException, CellBaseException, LoaderException {
         // Load data
-        DirectoryStream<Path> stream = Files.newDirectoryStream(input,
+        Path path = input.resolve(EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA);
+        DirectoryStream<Path> stream = Files.newDirectoryStream(path,
                 entry -> entry.getFileName().toString().startsWith("prot_func_pred_"));
 
         for (Path entry : stream) {
             logger.info("Loading file '{}'", entry);
-            loadRunner.load(input.resolve(entry.getFileName()), "protein_functional_prediction", dataRelease);
+            loadRunner.load(path.resolve(entry.getFileName()), EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA, dataRelease);
         }
 
         // Create index
-        createIndex("protein_functional_prediction");
+        createIndex(EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA);
 
         // Update release (collection and sources)
-        dataReleaseManager.update(dataRelease, "protein_functional_prediction", null, null);
+        String sourceName = null;
+        List<Path> sourceUrls = new ArrayList<>();
+        if (path.resolve(EtlCommons.SIFT_VERSION_FILENAME).toFile().exists()) {
+            sourceUrls.add(path.resolve(EtlCommons.SIFT_VERSION_FILENAME));
+            sourceName = EtlCommons.SIFT_SOURCE_NAME;
+        }
+        dataReleaseManager.update(dataRelease, EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA, sourceName, sourceUrls);
+
+        sourceUrls = new ArrayList<>();
+        if (path.resolve(EtlCommons.POLYPHEN_VERSION_FILENAME).toFile().exists()) {
+            sourceUrls.add(path.resolve(EtlCommons.POLYPHEN_VERSION_FILENAME));
+            sourceName = EtlCommons.POLYPHEN_SOURCE_NAME;
+        }
+        dataReleaseManager.update(dataRelease, EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA, sourceName, sourceUrls);
+    }
+
+    private void loadAlphaMissense() throws NoSuchMethodException, InterruptedException, ExecutionException,
+            InstantiationException, IllegalAccessException, InvocationTargetException, ClassNotFoundException,
+            IOException, CellBaseException, LoaderException {
+        Path proteinSubstitutionPath = input.resolve(EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA);
+
+        // Load data
+        Path alphamissensePath = proteinSubstitutionPath.resolve(EtlCommons.ALPHAMISSENSE_JSON_FILENAME);
+        logger.info("Loading file '{}'", alphamissensePath);
+            loadRunner.load(alphamissensePath, EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA, dataRelease);
+
+        // Create index
+        createIndex(EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA);
+
+        // Update release (collection and sources)
+        List<Path> sources = Collections.singletonList(proteinSubstitutionPath.resolve(EtlCommons.ALPHAMISSENSE_VERSION_FILENAME));
+        dataReleaseManager.update(dataRelease, EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA,
+                EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA, sources);
     }
 
     private void loadClinical() throws FileNotFoundException {
@@ -459,7 +505,7 @@ private void loadClinical() throws FileNotFoundException {
                         input.resolve("cosmicVersion.json"),
                         input.resolve("gwasVersion.json")
                 ));
-                dataReleaseManager.update(dataRelease, "clinical_variants", EtlCommons.CLINICAL_VARIANTS_DATA, sources);
+                dataReleaseManager.update(dataRelease, "clinical_variants", EtlCommons.CLINICAL_VARIANT_DATA, sources);
             } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | InvocationTargetException
                     | IllegalAccessException | ExecutionException | IOException | InterruptedException | CellBaseException e) {
                 logger.error(e.toString());
@@ -484,9 +530,9 @@ private void loadRepeats() {
 
                 // Update release (collection and sources)
                 List<Path> sources = new ArrayList<>(Arrays.asList(
-                        input.resolve(EtlCommons.TRF_VERSION_FILE),
-                        input.resolve(EtlCommons.GSD_VERSION_FILE),
-                        input.resolve(EtlCommons.WM_VERSION_FILE)
+                        input.resolve(getDataVersionFilename(TRF_DATA)),
+                        input.resolve(getDataVersionFilename(GSD_DATA)),
+                        input.resolve(getDataVersionFilename(WM_DATA))
                 ));
                 dataReleaseManager.update(dataRelease, "repeats", EtlCommons.REPEATS_DATA, sources);
             } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | InvocationTargetException
@@ -536,7 +582,7 @@ private void loadSpliceScores(Path spliceFolder) throws IOException, ExecutionEx
     }
 
     private void loadPubMed() throws CellBaseException {
-        Path pubmedPath = input.resolve(EtlCommons.PUBMED_DATA);
+        Path pubmedPath = input.resolve(PUBMED_DATA);
 
         if (Files.exists(pubmedPath)) {
             // Load data
@@ -544,7 +590,7 @@ private void loadPubMed() throws CellBaseException {
                 if (file.isFile() && (file.getName().endsWith("gz"))) {
                     logger.info("Loading file '{}'", file.getName());
                     try {
-                        loadRunner.load(file.toPath(), EtlCommons.PUBMED_DATA, dataRelease);
+                        loadRunner.load(file.toPath(), PUBMED_DATA, dataRelease);
                     } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | InvocationTargetException
                             | IllegalAccessException | ExecutionException | IOException | InterruptedException | LoaderException e) {
                         logger.error("Error loading file '{}': {}", file.getName(), e.toString());
@@ -552,11 +598,11 @@ private void loadPubMed() throws CellBaseException {
                 }
             }
             // Create index
-            createIndex(EtlCommons.PUBMED_DATA);
+            createIndex(PUBMED_DATA);
 
             // Update release (collection and sources)
-            List<Path> sources = Collections.singletonList(pubmedPath.resolve(EtlCommons.PUBMED_VERSION_FILENAME));
-            dataReleaseManager.update(dataRelease, EtlCommons.PUBMED_DATA, EtlCommons.PUBMED_DATA, sources);
+            List<Path> sources = Collections.singletonList(pubmedPath.resolve(EtlCommons.getDataVersionFilename(PUBMED_DATA)));
+            dataReleaseManager.update(dataRelease, PUBMED_DATA, PUBMED_DATA, sources);
         } else {
             logger.warn("PubMed folder {} not found", pubmedPath);
         }
@@ -585,10 +631,53 @@ private void loadPharmacogenomica() throws IOException, CellBaseException {
         createIndex(EtlCommons.PHARMACOGENOMICS_DATA);
 
         // Update release (collection and sources)
-        List<Path> sources = Collections.singletonList(pharmaPath.resolve(EtlCommons.PHARMGKB_VERSION_FILENAME));
+        List<Path> sources = Collections.singletonList(pharmaPath.resolve(getDataVersionFilename(PHARMGKB_DATA)));
         dataReleaseManager.update(dataRelease, EtlCommons.PHARMACOGENOMICS_DATA, EtlCommons.PHARMACOGENOMICS_DATA, sources);
     }
 
+    private void loadPolygenicScores() throws NoSuchMethodException, InterruptedException, ExecutionException, InstantiationException,
+            IllegalAccessException, InvocationTargetException, ClassNotFoundException, IOException, CellBaseException, LoaderException {
+        Path pgsPath = input.resolve(EtlCommons.PGS_DATA);
+
+        if (!Files.exists(pgsPath)) {
+            logger.warn("Polygenic scores (PGS) folder {} not found to load", pgsPath);
+            return;
+        }
+
+        // Load common polygenic scores data
+        Path pathToLoad = pgsPath.resolve(PolygenicScoreBuilder.COMMON_POLYGENIC_SCORE_FILENAME);
+        logger.info("Loading file '{}'", pathToLoad.toFile().getName());
+        try {
+            loadRunner.load(pathToLoad, EtlCommons.PGS_COMMON_COLLECTION, dataRelease);
+        } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | InvocationTargetException
+                | IllegalAccessException | ExecutionException | IOException | InterruptedException | CellBaseException
+                | LoaderException e) {
+            logger.error("Error loading file '{}': {}", pathToLoad.toFile().getName(), e.toString());
+        }
+
+        // Load variant polygenic scores data
+        pathToLoad = pgsPath.resolve(PolygenicScoreBuilder.VARIANT_POLYGENIC_SCORE_FILENAME);
+        logger.info("Loading file '{}'", pathToLoad.toFile().getName());
+        try {
+            loadRunner.load(pathToLoad, EtlCommons.PGS_VARIANT_COLLECTION, dataRelease);
+        } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | InvocationTargetException
+                | IllegalAccessException | ExecutionException | IOException | InterruptedException | CellBaseException
+                | LoaderException e) {
+            logger.error("Error loading file '{}': {}", pathToLoad.toFile().getName(), e.toString());
+        }
+
+        // Create index
+        createIndex(EtlCommons.PGS_COMMON_COLLECTION);
+        createIndex(EtlCommons.PGS_VARIANT_COLLECTION);
+
+        // Update release (collection and sources)
+        List<Path> sources = new ArrayList<>(Arrays.asList(
+                input.resolve(EtlCommons.PGS_DATA + "/" + EtlCommons.PGS_CATALOG_VERSION_FILENAME)
+        ));
+        dataReleaseManager.update(dataRelease, EtlCommons.PGS_VARIANT_COLLECTION, EtlCommons.PGS_DATA, sources);
+        dataReleaseManager.update(dataRelease, EtlCommons.PGS_COMMON_COLLECTION, null, null);
+    }
+
     private void createIndex(String collection) {
         if (!createIndexes) {
             return;
diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/api/PolygenicScoreQuery.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/api/PolygenicScoreQuery.java
new file mode 100644
index 0000000000..106b01e1fe
--- /dev/null
+++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/api/PolygenicScoreQuery.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.core.api;
+
+import org.opencb.cellbase.core.api.query.AbstractQuery;
+import org.opencb.cellbase.core.api.query.QueryException;
+import org.opencb.cellbase.core.api.query.QueryParameter;
+
+import java.util.List;
+import java.util.Map;
+
+public class PolygenicScoreQuery extends AbstractQuery {
+
+    @QueryParameter(id = "id")
+    private List<String> ids;
+
+    @QueryParameter(id = "name")
+    private List<String> names;
+
+    @QueryParameter(id = "source", allowedValues = {"PGS Catalog"})
+    private List<String> sources;
+
+    public PolygenicScoreQuery() {
+    }
+
+    public PolygenicScoreQuery(Map<String, String> params) throws QueryException {
+        super(params);
+
+        objectMapper.readerForUpdating(this);
+        objectMapper.readerFor(PolygenicScoreQuery.class);
+        objectWriter = objectMapper.writerFor(PolygenicScoreQuery.class);
+    }
+
+    @Override
+    protected void validateQuery() throws QueryException {
+        // Nothing to to
+        return;
+    }
+
+    @Override
+    public String toString() {
+        final StringBuilder sb = new StringBuilder("PolygenicScoreQuery{");
+        sb.append("ids=").append(ids);
+        sb.append(", names=").append(names);
+        sb.append(", sources=").append(sources);
+        sb.append(", limit=").append(limit);
+        sb.append(", skip=").append(skip);
+        sb.append(", count=").append(count);
+        sb.append(", sort='").append(sort).append('\'');
+        sb.append(", order=").append(order);
+        sb.append(", facet='").append(facet).append('\'');
+        sb.append(", includes=").append(includes);
+        sb.append(", excludes=").append(excludes);
+        sb.append('}');
+        return sb.toString();
+    }
+
+    public List<String> getIds() {
+        return ids;
+    }
+
+    public PolygenicScoreQuery setIds(List<String> ids) {
+        this.ids = ids;
+        return this;
+    }
+
+    public List<String> getNames() {
+        return names;
+    }
+
+    public PolygenicScoreQuery setNames(List<String> names) {
+        this.names = names;
+        return this;
+    }
+
+    public List<String> getSources() {
+        return sources;
+    }
+
+    public PolygenicScoreQuery setSources(List<String> sources) {
+        this.sources = sources;
+        return this;
+    }
+}
diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java
index 19f1606c91..8f2d714d26 100644
--- a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java
+++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java
@@ -16,7 +16,7 @@
 
 package org.opencb.cellbase.core.config;
 
-import java.util.List;
+import java.util.Map;
 
 /**
  * Created by imedina on 19/08/16.
@@ -28,16 +28,11 @@ public class DownloadProperties {
     private URLProperties hgnc;
     private URLProperties cancerHotspot;
     private URLProperties refSeq;
-    private URLProperties refSeqFasta;
-    private URLProperties refSeqProteinFasta;
-    private URLProperties refSeqCdna;
     private URLProperties maneSelect;
     private URLProperties lrg;
-
     private URLProperties geneUniprotXref;
     private URLProperties geneExpressionAtlas;
     private URLProperties mirbase;
-    private URLProperties mirbaseReadme;
     private URLProperties targetScan;
     private URLProperties miRTarBase;
     private URLProperties uniprot;
@@ -45,16 +40,12 @@ public class DownloadProperties {
     private URLProperties intact;
     private URLProperties interpro;
     private URLProperties interproRelNotes;
-    private URLProperties conservation;
+    private URLProperties phastCons;
+    private URLProperties phylop;
     private URLProperties gerp;
     private URLProperties clinvar;
-    private URLProperties clinvarVariation;
-    private URLProperties clinvarSummary;
-    private URLProperties clinvarVariationAllele;
-    private URLProperties clinvarEfoTerms;
-    private URLProperties iarctp53;
-    private URLProperties docm;
-    private URLProperties docmVersion;
+    private URLProperties cosmic;
+    private URLProperties hgmd;
     private URLProperties dgv;
     private URLProperties simpleRepeats;
     private URLProperties windowMasker;
@@ -76,6 +67,8 @@ public class DownloadProperties {
     private URLProperties revel;
     private URLProperties pubmed;
     private URLProperties pharmGKB;
+    private URLProperties alphaMissense;
+    private URLProperties pgs;
 
     public EnsemblProperties getEnsembl() {
         return ensembl;
@@ -131,15 +124,6 @@ public DownloadProperties setMirbase(URLProperties mirbase) {
         return this;
     }
 
-    public URLProperties getMirbaseReadme() {
-        return mirbaseReadme;
-    }
-
-    public DownloadProperties setMirbaseReadme(URLProperties mirbaseReadme) {
-        this.mirbaseReadme = mirbaseReadme;
-        return this;
-    }
-
     public URLProperties getTargetScan() {
         return targetScan;
     }
@@ -203,12 +187,21 @@ public DownloadProperties setInterproRelNotes(URLProperties interproRelNotes) {
         return this;
     }
 
-    public URLProperties getConservation() {
-        return conservation;
+    public URLProperties getPhastCons() {
+        return phastCons;
+    }
+
+    public DownloadProperties setPhastCons(URLProperties phastCons) {
+        this.phastCons = phastCons;
+        return this;
+    }
+
+    public URLProperties getPhylop() {
+        return phylop;
     }
 
-    public DownloadProperties setConservation(URLProperties conservation) {
-        this.conservation = conservation;
+    public DownloadProperties setPhylop(URLProperties phylop) {
+        this.phylop = phylop;
         return this;
     }
 
@@ -230,65 +223,24 @@ public DownloadProperties setClinvar(URLProperties clinvar) {
         return this;
     }
 
-    public URLProperties getClinvarVariation() {
-        return clinvarVariation;
+    public URLProperties getCosmic() {
+        return cosmic;
     }
 
-    public DownloadProperties setClinvarVariation(URLProperties clinvarVariation) {
-        this.clinvarVariation = clinvarVariation;
+    public DownloadProperties setCosmic(URLProperties cosmic) {
+        this.cosmic = cosmic;
         return this;
     }
 
-    public URLProperties getClinvarSummary() {
-        return clinvarSummary;
+    public URLProperties getHgmd() {
+        return hgmd;
     }
 
-    public DownloadProperties setClinvarSummary(URLProperties clinvarSummary) {
-        this.clinvarSummary = clinvarSummary;
+    public DownloadProperties setHgmd(URLProperties hgmd) {
+        this.hgmd = hgmd;
         return this;
     }
 
-    public URLProperties getClinvarVariationAllele() {
-        return clinvarVariationAllele;
-    }
-
-    public void setClinvarVariationAllele(URLProperties clinvarVariationAllele) {
-        this.clinvarVariationAllele = clinvarVariationAllele;
-    }
-
-    public URLProperties getClinvarEfoTerms() {
-        return clinvarEfoTerms;
-    }
-
-    public DownloadProperties setClinvarEfoTerms(URLProperties clinvarEfoTerms) {
-        this.clinvarEfoTerms = clinvarEfoTerms;
-        return this;
-    }
-
-    public URLProperties getIarctp53() {
-        return iarctp53;
-    }
-
-    public void setIarctp53(URLProperties iarctp53) {
-        this.iarctp53 = iarctp53;
-    }
-
-    public URLProperties getDocm() {
-        return docm;
-    }
-
-    public void setDocm(URLProperties docm) {
-        this.docm = docm;
-    }
-
-    public URLProperties getDocmVersion() {
-        return docmVersion;
-    }
-
-    public void setDocmVersion(URLProperties docmVersion) {
-        this.docmVersion = docmVersion;
-    }
-
     public URLProperties getDgv() {
         return dgv;
     }
@@ -447,19 +399,6 @@ public DownloadProperties setRefSeq(URLProperties refSeq) {
         return this;
     }
 
-    public URLProperties getRefSeqFasta() {
-        return refSeqFasta;
-    }
-
-    public DownloadProperties setRefSeqFasta(URLProperties refSeqFasta) {
-        this.refSeqFasta = refSeqFasta;
-        return this;
-    }
-
-    public URLProperties getRefSeqProteinFasta() {
-        return refSeqProteinFasta;
-    }
-
     public URLProperties getRevel() {
         return revel;
     }
@@ -487,17 +426,21 @@ public DownloadProperties setPharmGKB(URLProperties pharmGKB) {
         return this;
     }
 
-    public DownloadProperties setRefSeqProteinFasta(URLProperties refSeqProteinFasta) {
-        this.refSeqProteinFasta = refSeqProteinFasta;
+    public URLProperties getAlphaMissense() {
+        return alphaMissense;
+    }
+
+    public DownloadProperties setAlphaMissense(URLProperties alphaMissense) {
+        this.alphaMissense = alphaMissense;
         return this;
     }
 
-    public URLProperties getRefSeqCdna() {
-        return refSeqCdna;
+    public URLProperties getPgs() {
+        return pgs;
     }
 
-    public DownloadProperties setRefSeqCdna(URLProperties refSeqCdna) {
-        this.refSeqCdna = refSeqCdna;
+    public DownloadProperties setPgs(URLProperties pgs) {
+        this.pgs = pgs;
         return this;
     }
 
@@ -572,7 +515,7 @@ public static class URLProperties {
 
         private String host;
         private String version;
-        private List<String> files;
+        private Map<String, String> files;
 
         public String getHost() {
             return host;
@@ -591,14 +534,13 @@ public URLProperties setVersion(String version) {
             return this;
         }
 
-        public List<String> getFiles() {
+        public Map<String, String> getFiles() {
             return files;
         }
 
-        public URLProperties setFiles(List<String> files) {
+        public URLProperties setFiles(Map<String, String> files) {
             this.files = files;
             return this;
         }
-
     }
 }
diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/exception/CellBaseException.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/exception/CellBaseException.java
index 884c63f2ae..422a52b0d4 100644
--- a/cellbase-core/src/main/java/org/opencb/cellbase/core/exception/CellBaseException.java
+++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/exception/CellBaseException.java
@@ -22,5 +22,8 @@ public CellBaseException(String msg) {
         super(msg);
     }
 
+    public CellBaseException(String msg, Throwable e) {
+        super(msg, e);
+    }
 }
 
diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/models/DataRelease.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/models/DataRelease.java
index 5674839aa8..47a694c5d8 100644
--- a/cellbase-core/src/main/java/org/opencb/cellbase/core/models/DataRelease.java
+++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/models/DataRelease.java
@@ -21,14 +21,9 @@
 public class DataRelease {
     private int release;
     private String date;
-    /**
-     * @deprecated it is maintained to back-compatibility with previous CellBase versions to v5.5
-     */
-    @Deprecated
-    private boolean active;
     private List<String> activeByDefaultIn;
     private Map<String, String> collections;
-    private List<DataReleaseSource> sources;
+    private List<DataSource> sources;
 
     public DataRelease() {
         this.activeByDefaultIn = Collections.emptyList();
@@ -37,7 +32,7 @@ public DataRelease() {
     }
 
     public DataRelease(int release, String date, List<String> activeByDefaultIn, Map<String, String> collections,
-                       List<DataReleaseSource> sources) {
+                       List<DataSource> sources) {
         this.release = release;
         this.date = date;
         this.activeByDefaultIn = activeByDefaultIn;
@@ -75,15 +70,6 @@ public DataRelease setDate(String date) {
         return this;
     }
 
-    public boolean isActive() {
-        return active;
-    }
-
-    public DataRelease setActive(boolean active) {
-        this.active = active;
-        return this;
-    }
-
     public List<String> getActiveByDefaultIn() {
         return activeByDefaultIn;
     }
@@ -102,11 +88,11 @@ public DataRelease setCollections(Map<String, String> collections) {
         return this;
     }
 
-    public List<DataReleaseSource> getSources() {
+    public List<DataSource> getSources() {
         return sources;
     }
 
-    public DataRelease setSources(List<DataReleaseSource> sources) {
+    public DataRelease setSources(List<DataSource> sources) {
         this.sources = sources;
         return this;
     }
diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/models/DataReleaseSource.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/models/DataReleaseSource.java
deleted file mode 100644
index 3a42de9374..0000000000
--- a/cellbase-core/src/main/java/org/opencb/cellbase/core/models/DataReleaseSource.java
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright 2015-2020 OpenCB
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.opencb.cellbase.core.models;
-
-import java.util.List;
-import java.util.Objects;
-
-public class DataReleaseSource {
-    private String name;
-    private String version;
-    private String data;
-    private String date;
-    private List<String> url;
-
-    public DataReleaseSource() {
-    }
-
-    public DataReleaseSource(String name, String version, String data, String date, List<String> url) {
-        this.name = name;
-        this.version = version;
-        this.data = data;
-        this.date = date;
-        this.url = url;
-    }
-
-    @Override
-    public String toString() {
-        final StringBuilder sb = new StringBuilder("DataReleaseSource{");
-        sb.append("name='").append(name).append('\'');
-        sb.append(", version='").append(version).append('\'');
-        sb.append(", data='").append(data).append('\'');
-        sb.append(", date='").append(date).append('\'');
-        sb.append(", url=").append(url);
-        sb.append('}');
-        return sb.toString();
-    }
-
-    public String getName() {
-        return name;
-    }
-
-    public DataReleaseSource setName(String name) {
-        this.name = name;
-        return this;
-    }
-
-    public String getVersion() {
-        return version;
-    }
-
-    public DataReleaseSource setVersion(String version) {
-        this.version = version;
-        return this;
-    }
-
-    public String getData() {
-        return data;
-    }
-
-    public DataReleaseSource setData(String data) {
-        this.data = data;
-        return this;
-    }
-
-    public String getDate() {
-        return date;
-    }
-
-    public DataReleaseSource setDate(String date) {
-        this.date = date;
-        return this;
-    }
-
-    public List<String> getUrl() {
-        return url;
-    }
-
-    public DataReleaseSource setUrl(List<String> url) {
-        this.url = url;
-        return this;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (this == o) {
-            return true;
-        }
-        if (o == null || getClass() != o.getClass()) {
-            return false;
-        }
-        DataReleaseSource that = (DataReleaseSource) o;
-        return Objects.equals(name, that.name)
-                && Objects.equals(version, that.version)
-                && Objects.equals(data, that.data)
-                && Objects.equals(date, that.date)
-                && Objects.equals(url, that.url);
-    }
-
-    @Override
-    public int hashCode() {
-        return Objects.hash(name, version, data, date, url);
-    }
-}
diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/models/DataSource.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/models/DataSource.java
new file mode 100644
index 0000000000..f716412a03
--- /dev/null
+++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/models/DataSource.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.core.models;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class DataSource {
+
+    private String name;
+    private String category;
+    private String version;
+    private String downloadDate;
+    private List<String> urls;
+
+    public DataSource() {
+        this.urls = new ArrayList<>();
+    }
+
+    public DataSource(String name, String category, String version, String downloadDate, List<String> urls) {
+        this.name = name;
+        this.category = category;
+        this.version = version;
+        this.downloadDate = downloadDate;
+        this.urls = urls;
+    }
+
+    @Override
+    public String toString() {
+        final StringBuilder sb = new StringBuilder("DataSourceDescr{");
+        sb.append("name='").append(name).append('\'');
+        sb.append(", category='").append(category).append('\'');
+        sb.append(", version='").append(version).append('\'');
+        sb.append(", downloadDate='").append(downloadDate).append('\'');
+        sb.append(", urls=").append(urls);
+        sb.append('}');
+        return sb.toString();
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public DataSource setName(String name) {
+        this.name = name;
+        return this;
+    }
+
+    public String getCategory() {
+        return category;
+    }
+
+    public DataSource setCategory(String category) {
+        this.category = category;
+        return this;
+    }
+
+    public String getVersion() {
+        return version;
+    }
+
+    public DataSource setVersion(String version) {
+        this.version = version;
+        return this;
+    }
+
+    public String getDownloadDate() {
+        return downloadDate;
+    }
+
+    public DataSource setDownloadDate(String downloadedDate) {
+        this.downloadDate = downloadedDate;
+        return this;
+    }
+
+    public List<String> getUrls() {
+        return urls;
+    }
+
+    public DataSource setUrls(List<String> urls) {
+        this.urls = urls;
+        return this;
+    }
+}
diff --git a/cellbase-core/src/main/resources/configuration.yml b/cellbase-core/src/main/resources/configuration.yml
index f24827532c..7b5e60b91c 100644
--- a/cellbase-core/src/main/resources/configuration.yml
+++ b/cellbase-core/src/main/resources/configuration.yml
@@ -45,6 +45,7 @@ server:
     port: "${CELLBASE.SERVER.REST.PORT}"
 defaultOutdir: "/tmp"
 download:
+  ## Genomic and Gene information
   ensembl:
     database:
       host: ensembldb.ensembl.org:3306
@@ -52,7 +53,16 @@ download:
       password: ''
     libs: "${CELLBASE.ENSEMBL.LIBS}"
     url:
-      host: ftp://ftp.ensembl.org/pub
+      host: https://ftp.ensembl.org/pub/
+      files:
+        # New Homo sapiens assemblies contain too many ALT regions, so we download 'primary_assembly' file instead
+        PRIMARY_FA: "release-put_release_here/fasta/put_species_here/dna/put_capital_species_here.put_assembly_here.dna.primary_assembly.fa.gz"
+        GTF: "release-put_release_here/gtf/put_species_here/put_capital_species_here.put_assembly_here.put_release_here.gtf.gz"
+        PEP_FA: "release-put_release_here/fasta/put_species_here/pep/put_capital_species_here.put_assembly_here.pep.all.fa.gz"
+        CDNA_FA: "release-put_release_here/fasta/put_species_here/cdna/put_capital_species_here.put_assembly_here.cdna.all.fa.gz"
+        REGULATORY_BUILD: "release-put_release_here/regulation/put_species_here/put_species_here.put_assembly_here.Regulatory_Build.regulatory_features.20221007.gff.gz"
+        MOTIF_FEATURES: "release-put_release_here/regulation/put_species_here/MotifFeatures/put_species_here.put_assembly_here.motif_features.gff.gz"
+        MOTIF_FEATURES_INDEX: "release-put_release_here/regulation/put_species_here/MotifFeatures/put_species_here.put_assembly_here.motif_features.gff.gz.tbi"
   ensemblGenomes:
     database:
       host: mysql-eg-publicsql.ebi.ac.uk:4157
@@ -61,164 +71,239 @@ download:
     libs: "${CELLBASE.ENSEMBL.LIBS}"
     url:
       host: ftp://ftp.ensemblgenomes.org/pub
-  hgnc:
-    host: https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2023-11-01.txt
-    version: 2023-11-01
-  cancerHotspot:
-    host: https://www.cancerhotspots.org/files/hotspots_v2.xls
-    version: "v2"
   refSeq:
-    host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gtf.gz
-  refSeqFasta:
-    host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.fna.gz
-  refSeqProteinFasta:
-    host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_protein.faa.gz
-  refSeqCdna:
-    host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_rna.fna.gz
+    host: https://ftp.ncbi.nih.gov/refseq/
+    version: "2023-10-11"
+    files:
+      GENOMIC_GTF: H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gtf.gz
+      GENOMIC_FNA: H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.fna.gz
+      PROTEIN_FAA: H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_protein.faa.gz
+      RNA_FNA: H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_rna.fna.gz
   maneSelect:
-#    host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_0.93/MANE.GRCh38.v0.93.summary.txt.gz
-#    host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_1.0/MANE.GRCh38.v1.0.summary.txt.gz
-    host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_1.1/MANE.GRCh38.v1.1.summary.txt.gz
-    version: "1.1"
+    host: https://ftp.ncbi.nlm.nih.gov/refseq/
+    version: "1.2"
+    files:
+      MANE_SELECT: MANE/MANE_human/release_1.2/MANE.GRCh38.v1.2.summary.txt.gz
   lrg:
-    host: http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt
+    host: http://ftp.ebi.ac.uk/
     version: "2021-03-30"
+    files:
+      LRG: pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt
+  hgnc:
+    host: https://ftp.ebi.ac.uk/
+    version: "2024-04-01"
+    files:
+      HGNC: pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2024-04-01.txt
+  cancerHotspot:
+    host: https://www.cancerhotspots.org/
+    version: "v2"
+    files:
+      CANCER_HOTSPOT: files/hotspots_v2.xls
+  dgidb:
+    host: https://old.dgidb.org/
+    version: "2022-02-01"
+    files:
+      DGIDB: data/monthly_tsvs/2022-Feb/interactions.tsv
   geneUniprotXref:
-    host: http://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
-    version: "2023-11-08"
+    host: http://ftp.uniprot.org/
+    version: "2024-03-27"
+    files:
+      UNIPROT_XREF: pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping_selected.tab.gz
   geneExpressionAtlas:
-    host: ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/gxa/allgenes_updown_in_organism_part_2.0.14.tab.gz
+    host: https://ftp.ebi.ac.uk/
+    version: "2.0.14"
+    files:
+      GENE_EXPRESSION_ATLAS: pub/databases/microarray/data/gxa/allgenes_updown_in_organism_part_2.0.14.tab.gz
+  hpo:
+    ## NOTE: Download manually from here now
+    version: "2024-04-26"
+    host: https://hpo.jax.org/app/data/annotations
+  disgenet:
+    host: https://www.disgenet.org/
+    version: "7.0 (January 2020)"
+    files:
+      DISGENET: static/disgenet_ap1/files/downloads/all_gene_disease_associations.tsv.gz
+  gnomadConstraints:
+    host: https://storage.googleapis.com/
+    version: "2.1.1"
+    files:
+      GNOMAD_CONSTRAINTS: gcp-public-data--gnomad/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz
+  goAnnotation:
+    host: http://geneontology.org/
+    files:
+      GO_ANNOTATION: gene-associations/goa_human.gaf.gz
+  pgs:
+    host: https://www.pgscatalog.org/
+    version: "Dec. 15, 2023"
+    files:
+      PGS_METADATA: https://ftp.ebi.ac.uk/pub/databases/spot/pgs/metadata/pgs_all_metadata_scores.csv
+
+  ## Regulation
   mirbase:
-    host: ftp://mirbase.org/pub/mirbase/CURRENT/miRNA.xls.gz
-  mirbaseReadme:
-    host: ftp://mirbase.org/pub/mirbase/CURRENT/README
+    host: https://www.mirbase.org/
+    version: "22.1"
+    files:
+      MIRBASE: download/miRNA.dat
   targetScan:
     host: http://hgdownload.cse.ucsc.edu/goldenPath/
   miRTarBase:
-    host: https://mirtarbase.cuhk.edu.cn/~miRTarBase/miRTarBase_2022/cache/download/9.0/hsa_MTI.xlsx
+    host: https://mirtarbase.cuhk.edu.cn/
     version: "9.0"
+    files:
+      # This file contains errors and has to be fixed before building
+      # check the script cellbase-app/app/scripts/mirtarbase/fix-gene-symbol.sh
+      MIRTARBASE: ~miRTarBase/miRTarBase_2022/cache/download/9.0/hsa_MTI.xlsx
 
   ## Protein Data
   uniprot:
-    host: https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz
-    version: "2023-11-08"
-  uniprotRelNotes:
-    host: https://ftp.uniprot.org/pub/databases/uniprot/relnotes.txt
-    version: "2023-11-08"
+    host: https://ftp.uniprot.org/
+    version: "2024-03-27"
+    files:
+      UNIPROT: pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz
   interpro:
-    host: https://ftp.ebi.ac.uk/pub/databases/interpro/current_release/protein2ipr.dat.gz
-    version: "2023-11-08"
-  interproRelNotes:
-    host: https://ftp.ebi.ac.uk/pub/databases/interpro/current_release/release_notes.txt
+    host: https://ftp.ebi.ac.uk/
+    version: "2024-03-27"
+    files:
+      INTERPRO: pub/databases/interpro/current_release/protein2ipr.dat.gz
   intact:
-    host: https://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.txt
-    version: "2023-10-07"
+    host: https://ftp.ebi.ac.uk/
+    version: "2024-02-16"
+    files:
+      INTACT: pub/databases/intact/current/psimitab/intact.txt
 
   ## Conservation Scores
-  conservation:
-    host: https://hgdownload.cse.ucsc.edu/goldenPath/
+  phastCons:
+    ## The CellBase downloader will change put_assembly_here by the assembly, e.g. hg38; and put_chromosome_here by the chromosomes: 1,2,..X,Y,M
+    host: https://hgdownload.cse.ucsc.edu/
+    version: "2022-08-30"
+    files:
+      PHASTCONS: goldenPath/put_assembly_here/phastCons470way/put_assembly_here.470way.phastCons/chrput_chromosome_here.phastCons470way.wigFix.gz
+  phylop:
+    ## The CellBase downloader will change put_assembly_here by the assembly, e.g. hg38; and put_chromosome_here by the chromosomes: 1,2,..X,Y,M
+    host: https://hgdownload.cse.ucsc.edu/
     version: "2022-08-30"
+    files:
+      PHYLOP: goldenPath/put_assembly_here/phyloP470way/put_assembly_here.470way.phyloP/chrput_chromosome_here.phyloP470way.wigFix.gz
   gerp:
-    host: http://ftp.ensembl.org/pub/release-110/compara/conservation_scores/91_mammals.gerp_conservation_score/gerp_conservation_scores.homo_sapiens.GRCh38.bw
+    host: http://ftp.ensembl.org/
     version: "2023-05-17"
+    files:
+      GERP: pub/release-111/compara/conservation_scores/91_mammals.gerp_conservation_score/gerp_conservation_scores.homo_sapiens.GRCh38.bw
+
+  ## Clinical Variant
   clinvar:
-#    host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2021-07.xml.gz
-#    host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-02.xml.gz
-#    host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-11.xml.gz
-    host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2023-12.xml.gz
-    version: "2023-12-01"
-  clinvarVariation:
-#    host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2021-07.xml.gz
-#    host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-02.xml.gz
-#    host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-11.xml.gz
-    host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2023-12.xml.gz
-  clinvarSummary:
-    host: http://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz
-    version: "2023-12-01"
-  clinvarVariationAllele:
-    host: http://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variation_allele.txt.gz
-    version: "2023-12-01"
-  clinvarEfoTerms:
-    host: ftp://ftp.ebi.ac.uk/pub/databases/eva/ClinVar/2015/ClinVar_Traits_EFO_Names_260615.csv
-  iarctp53:
-    host: http://p53.iarc.fr/ajax/Zipper.ashx
-  docm:
-    host: http://docm.info/api/
-  docmVersion:
-    host: http://docm.info
+    host: https://ftp.ncbi.nlm.nih.gov/
+    version: "2024-02"
+    files:
+      FULL_RELEASE: pub/clinvar/xml/RCV_xml_old_format/ClinVarFullRelease_2024-02.xml.gz
+      SUMMARY: pub/clinvar/tab_delimited/variant_summary.txt.gz
+      ALLELE: pub/clinvar/tab_delimited/variation_allele.txt.gz
+      EFO_TERMS: ftp://ftp.ebi.ac.uk/pub/databases/eva/ClinVar/2015/ClinVar_Traits_EFO_Names_260615.csv
+  cosmic:
+    ## To be downloaded manually
+    host: https://cancer.sanger.ac.uk/cosmic/
+    version: "v99"
+    files:
+      COSMIC: CosmicMutantExport.tsv.gz
+  hgmd:
+    ## To be downloaded manually
+    host: https://www.hgmd.cf.ac.uk/
+    version: "2020-03"
+    files:
+      HGMD: hgmd.vcf
+  gwasCatalog:
+    ## Download file from https://www.ebi.ac.uk/gwas/docs/file-downloads to find the real version, which is 'e111_r2024-04-22'
+    host: https://ftp.ebi.ac.uk/
+    version: "2024-04-22"
+    files:
+      GWAS: pub/databases/gwas/releases/2024/04/22/gwas-catalog-associations_ontology-annotated.tsv
+      DBSNP: All.vcf.gz
+
   dgv:
     host: http://dgv.tcag.ca/v106/docs
   simpleRepeats:
-    host: http://hgdownload.cse.ucsc.edu/goldenPath
+    host: http://hgdownload.cse.ucsc.edu/
+    files:
+      ## The CellBase downloader will change put_assembly_here by the assembly, e.g. hg38
+      SIMPLE_REPEATS: goldenPath/put_assembly_here/database/simpleRepeat.txt.gz
   windowMasker:
-    host: http://hgdownload.cse.ucsc.edu/goldenPath
+    host: http://hgdownload.cse.ucsc.edu/
+    files:
+      ## The CellBase downloader will change put_assembly_here by the assembly, e.g. hg38
+      WINDOW_MASKER: goldenPath/put_assembly_here/database/windowmaskerSdust.txt.gz
   genomicSuperDups:
-    host: http://hgdownload.cse.ucsc.edu/goldenPath
-  gwasCatalog:
-#    host: http://resources.opencb.org/opencb/cellbase/data/gwas/gwas_catalog_v1.0.2-associations_e106_r2022-05-17.tsv
-#    version: "1.0.2 associations_e106_r2022-05-17"
-    host: ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/2023/12/21/gwas-catalog-associations.tsv
-    version: "23-12-21"
-  hpo:
-    ## Downlaod manually from here now:  https://hpo.jax.org/app/data/annotations
-    host: https://ci.monarchinitiative.org/view/hpo/job/hpo.annotations/lastSuccessfulBuild/artifact/rare-diseases/util/annotation/phenotype_to_genes.txt
-  disgenet:
-    host: https://www.disgenet.org/static/disgenet_ap1/files/downloads
+    host: http://hgdownload.cse.ucsc.edu/
     files:
-      - all_gene_disease_associations.tsv.gz
-      - readme.txt
-  dgidb:
-    host: https://old.dgidb.org/data/monthly_tsvs/2022-Feb/interactions.tsv
-    version: "2022-02-01"
+      ## The CellBase downloader will change put_assembly_here by the assembly, e.g. hg38
+      GENOMIC_SUPER_DUPS: goldenPath/put_assembly_here/database/genomicSuperDups.txt.gz
+
+  ## Variant Pathogenic Prediction
+  revel:
+    host: https://zenodo.org/
+    version: "1.3"
+    files:
+      REVEL: record/7072866/files/revel-v1.3_all_chromosomes.zip
+  alphaMissense:
+    host: https://github.com/google-deepmind/alphamissense
+    version: "Aug. 3, 2023"
+    files:
+      ALPHAMISSENSE: https://storage.googleapis.com/dm_alphamissense/AlphaMissense_hg38.tsv.gz
   cadd:
-    ## Nacho: Move to https://krishna.gs.washington.edu/download/CADD/v1.7-pre/GRCh38/whole_genome_SNVs.tsv.gz ASAP!
-#    host: https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz
-    host: https://krishna.gs.washington.edu/download/CADD/v1.7-pre/GRCh38/whole_genome_SNVs.tsv.gz
-    version: "1.7-pre"
+    host: https://krishna.gs.washington.edu/
+    version: "1.7"
+    files:
+      CADD: download/CADD/v1.7/GRCh38/whole_genome_SNVs.tsv.gz
+
   reactome:
     host: http://www.reactome.org/download/current/biopax.zip
-  gnomadConstraints:
-    host: https://storage.googleapis.com/gcp-public-data--gnomad/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz
-    version: "2.1.1"
+
+  ## OBO Ontologies
   hpoObo:
-    host: http://purl.obolibrary.org/obo/hp.obo
-    version: "2023-12-01"
+    host: http://purl.obolibrary.org/obo/
+    ## The version is retrieved from the OBO file
+    files:
+      HPO: hp.obo
   goObo:
-    host: http://purl.obolibrary.org/obo/go/go-basic.obo
-    version: "2023-12-01"
+    host: http://purl.obolibrary.org/obo/
+    ## The version is retrieved from the OBO file
+    files:
+      GO: go/go-basic.obo
   doidObo:
-    host: http://purl.obolibrary.org/obo/doid.obo
-    version: "2023-12-01"
+    host: http://purl.obolibrary.org/obo/
+    ## The version is retrieved from the OBO file
+    files:
+      DOID: doid.obo
   mondoObo:
-    host: http://purl.obolibrary.org/obo/mondo.obo
-    version: "2023-12-01"
-  goAnnotation:
-    host: http://geneontology.org/gene-associations/goa_human.gaf.gz
-  revel:
-    host: https://zenodo.org/record/7072866/files/revel-v1.3_all_chromosomes.zip
+    host: http://purl.obolibrary.org/obo/
+    ## The version is retrieved from the OBO file
+    files:
+      MONDO: mondo.obo
+
+  ## Others
   pubmed:
     host: https://ftp.ncbi.nlm.nih.gov/pubmed/baseline/
+    version: 2024
     files:
-      - pubmed22n[1..1114..4].xml.gz
+      PUBMED_REGEX: pubmed24n[1..1219..4].xml.gz
   pharmGKB:
-    host: https://www.pharmgkb.org/downloads
+    host: https://api.pharmgkb.org/v1/download/file/data/
     version: v1
     files:
-      - https://api.pharmgkb.org/v1/download/file/data/genes.zip
-      - https://api.pharmgkb.org/v1/download/file/data/chemicals.zip
-      - https://api.pharmgkb.org/v1/download/file/data/variants.zip
-      - https://api.pharmgkb.org/v1/download/file/data/guidelineAnnotations.json.zip
-      - https://api.pharmgkb.org/v1/download/file/data/variantAnnotations.zip
-      - https://api.pharmgkb.org/v1/download/file/data/clinicalAnnotations.zip
-      - https://api.pharmgkb.org/v1/download/file/data/clinicalVariants.zip
-      - https://api.pharmgkb.org/v1/download/file/data/drugLabels.zip
-      - https://api.pharmgkb.org/v1/download/file/data/relationships.zip
+      GENES: genes.zip
+      CHEMICALS: chemicals.zip
+      VARIANTS: variants.zip
+      GUIDELINE_ANNOTATIONS: guidelineAnnotations.json.zip
+      VARIANT_ANNOTATIONS: variantAnnotations.zip
+      CLINICAL_ANNOTATIONS: clinicalAnnotations.zip
+      CLINICAL_VARIANTS: clinicalVariants.zip
+      DRUG_LABELS: drugLabels.zip
+      RELATIONSHIPS: relationships.zip
 species:
   vertebrates:
     - id: hsapiens
       scientificName: Homo sapiens
       assemblies:
-        - ensemblVersion: '110_38'
+        - ensemblVersion: '111_38'
           name: GRCh38
         - ensemblVersion: '82_37'
           name: GRCh37
diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml
index 71964bb36e..50fb973a8c 100644
--- a/cellbase-lib/pom.xml
+++ b/cellbase-lib/pom.xml
@@ -185,6 +185,11 @@
             <artifactId>junit-platform-engine</artifactId>
             <scope>test</scope>
         </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-csv</artifactId>
+            <version>1.0</version>
+        </dependency>
 
     </dependencies>
 </project>
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java
index 124ac6e6fc..9abfac8b6b 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java
@@ -16,9 +16,12 @@
 
 package org.opencb.cellbase.lib;
 
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.logging.log4j.Level;
 import org.apache.logging.log4j.core.config.Configurator;
+import org.opencb.cellbase.core.config.DownloadProperties;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.lib.download.DownloadFile;
 import org.opencb.commons.utils.FileUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -26,120 +29,485 @@
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.file.Files;
 import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
 
 /**
  * Created by fjlopez on 03/06/16.
  */
-public class EtlCommons {
+public final class EtlCommons {
 
+    // Commons
+    public static final String XLSX_EXTENSION = ".xlsx";
+    public static final String CSV_EXTENSION = ".csv";
+    public static final String TBI_EXTENSION = ".tbi";
+    public static final String FAI_EXTENSION = ".fai";
+
+    public static final String OK_LOG_MESSAGE = "Ok.";
+
+    // Ensembl
+    public static final String ENSEMBL_DATA = "ensembl";
+    public static final String PUT_RELEASE_HERE_MARK = "put_release_here";
+    public static final String PUT_SPECIES_HERE_MARK = "put_species_here";
+    public static final String PUT_CAPITAL_SPECIES_HERE_MARK = "put_capital_species_here";
+    public static final String PUT_ASSEMBLY_HERE_MARK = "put_assembly_here";
+    public static final String PUT_CHROMOSOME_HERE_MARK = "put_chromosome_here";
+    // Must match the configuration file
+    public static final String ENSEMBL_PRIMARY_FA_FILE_ID = "PRIMARY_FA";
+    public static final String ENSEMBL_GTF_FILE_ID = "GTF";
+    public static final String ENSEMBL_PEP_FA_FILE_ID = "PEP_FA";
+    public static final String ENSEMBL_CDNA_FA_FILE_ID = "CDNA_FA";
+    public static final String ENSEMBL_REGULATORY_BUILD_FILE_ID = "REGULATORY_BUILD";
+    public static final String ENSEMBL_MOTIF_FEATURES_FILE_ID = "MOTIF_FEATURES";
+    public static final String ENSEMBL_MOTIF_FEATURES_INDEX_FILE_ID = "MOTIF_FEATURES_INDEX";
+
+    public static final String HOMO_SAPIENS_NAME= "Homo sapiens";
+    public static final String HSAPIENS_NAME= "hsapiens";
+
+    public static final String GRCH38_NAME = "GRCh38";
+    public static final String GRCH37_NAME = "GRCh37";
+    public static final String HG38_NAME = "hg38";
+    public static final String HG19_NAME = "hg19";
+
+    public static final String SUFFIX_VERSION_FILENAME = "Version.json";
+
+    // Genome
     public static final String GENOME_DATA = "genome";
+
+    // Gene
     public static final String GENE_DATA = "gene";
+    public static final String ENSEMBL_GENE_BASENAME = "ensemblGene";
+    public static final String GENE_ANNOTATION_DATA = "gene_annotation";
+    public static final String GENE_DISEASE_ANNOTATION_DATA = "gene_disease_annotation";
+
+    // RefSeq
     public static final String REFSEQ_DATA = "refseq";
-    public static final String GENE_DISEASE_ASSOCIATION_DATA = "gene_disease_association";
+    public static final String REFSEQ_GENE_BASENAME = "refSeqGene";
+    // Must match the configuration file
+    public static final String REFSEQ_GENOMIC_GTF_FILE_ID = "GENOMIC_GTF";
+    public static final String REFSEQ_GENOMIC_FNA_FILE_ID = "GENOMIC_FNA";
+    public static final String REFSEQ_PROTEIN_FAA_FILE_ID = "PROTEIN_FAA";
+    public static final String REFSEQ_RNA_FNA_FILE_ID = "RNA_FNA";
+
+    // Gene annotation
+    //   - MANE Select
+    public static final String MANE_SELECT_DATA = "MANE Select";
+    // Must match the configuration file
+    public static final String MANE_SELECT_FILE_ID = "MANE_SELECT";
+    //   - LRG
+    public static final String LRG_DATA = "lrg";
+    // Must match the configuration file
+    public static final String LRG_FILE_ID = "LRG";
+    //   - HGNC
+    public static final String HGNC_DATA = "hgnc";
+    // Must match the configuration file
+    public static final String HGNC_FILE_ID = "HGNC";
+    //   - Cancer HotSpot
+    public static final String CANCER_HOTSPOT_DATA = "cancer_hotspot";
+    // Must match the configuration file
+    public static final String CANCER_HOTSPOT_FILE_ID = "CANCER_HOTSPOT";
+    //   - DGID (drug)
+    public static final String DGIDB_DATA = "dgidb";
+    // Must match the configuration file
+    public static final String DGIDB_FILE_ID = "DGIDB";
+    //   - UniProt Xref
+    public static final String UNIPROT_XREF_DATA = "uniprot_xref";
+    // Must match the configuration file
+    public static final String UNIPROT_XREF_FILE_ID = "UNIPROT_XREF";
+    //   - Gene Expression Atlas
+    public static final String GENE_EXPRESSION_ATLAS_DATA = "gene_expression_atlas";
+    // Must match the configuration file
+    public static final String GENE_EXPRESSION_ATLAS_FILE_ID = "GENE_EXPRESSION_ATLAS";
+    //   - Gene Disease Annotation
+    public static final String GENE_DISEASE_ANNOTATION_NAME = "Gene Disease Annotation";
+    //     - HPO
+    public static final String HPO_DATA = "hpo";
+    //     - DISGENET
+    public static final String DISGENET_DATA = "disgenet";
+    // Must match the configuration file
+    public static final String DISGENET_FILE_ID = "DISGENET";
+    //   - gnomAD Constraints
+    public static final String GNOMAD_CONSTRAINTS_DATA = "gnomad_constraints";
+    // Must match the configuration file
+    public static final String GNOMAD_CONSTRAINTS_FILE_ID = "GNOMAD_CONSTRAINTS";
+    //   - GO Annotation
+    public static final String GO_ANNOTATION_DATA = "go_annotation";
+    // Must match the configuration file
+    public static final String GO_ANNOTATION_FILE_ID = "GO_ANNOTATION";
+
     public static final String VARIATION_DATA = "variation";
-    public static final String VARIATION_FUNCTIONAL_SCORE_DATA = "variation_functional_score";
-    public static final String MISSENSE_VARIATION_SCORE_DATA = "missense_variation_functional_score";
-    public static final String REGULATION_DATA = "regulation";
-    public static final String PROTEIN_DATA = "protein";
-    public static final String CONSERVATION_DATA = "conservation";
-    public static final String CLINICAL_VARIANTS_DATA = "clinical_variants";
     public static final String SPLICE_SCORE_DATA = "splice_score";
 
+    // PGS (polygenic scores)
+    public static final String PGS_NAME = "Polygenic Scores";
+    public static final String PGS_DATA = "polygenic_score";
+    public static final String PGS_COMMON_COLLECTION = "common_polygenic_scores";
+    public static final String PGS_VARIANT_COLLECTION = "variant_polygenic_scores";
+    // PGS Catalog
+    public static final String PGS_CATALOG_NAME = "PGS Catalog";
+    public static final String PGS_CATALOG_VERSION_FILENAME = "pgsCatalog" + SUFFIX_VERSION_FILENAME;
+    // Must match the configuration file
+    public static final String PGS_CATALOG_METADATA_FILE_ID = "PGS_METADATA";
+
+    // Pharmacogenomics
     public static final String PHARMACOGENOMICS_DATA = "pharmacogenomics";
-    public static final String PHARMGKB_NAME = "PharmGKB";
+    // PharmGKB
     public static final String PHARMGKB_DATA = "pharmgkb";
-    public static final String PHARMGKB_VERSION_FILENAME = "pharmgkbVersion.json";
-
-    public static final String CLINICAL_VARIANTS_FOLDER = "clinicalVariant";
-    public static final String CLINVAR_VERSION = "2022.11";
-    public static final String CLINVAR_DATE = "2022-11";
-    public static final String CLINVAR_XML_FILE = "ClinVarFullRelease_2022-11.xml.gz";
-    public static final String CLINVAR_EFO_FILE = "ClinVar_Traits_EFO_Names.csv";
-    public static final String CLINVAR_SUMMARY_FILE = "variant_summary.txt.gz";
-    public static final String CLINVAR_VARIATION_ALLELE_FILE = "variation_allele.txt.gz";
-    public static final String IARCTP53_FILE = "IARC-TP53.zip";
-    public static final String GWAS_FILE = "gwas_catalog.tsv";
-    public static final String COSMIC_FILE = "CosmicMutantExport.tsv.gz";
-    public static final String DBSNP_FILE = "All.vcf.gz";
-
-    public static final String STRUCTURAL_VARIANTS_DATA = "svs";
-    public static final String REPEATS_DATA = "repeats";
-    public static final String OBO_DATA = "ontology";
-    public static final String HPO_FILE = "hp.obo";
-    public static final String GO_FILE = "go-basic.obo";
-    public static final String DOID_FILE = "doid.obo";
-    public static final String MONDO_FILE = "mondo.obo";
-    public static final String PFM_DATA = "regulatory_pfm";
+    // Must match the configuration file
+    public static final String PHARMGKB_GENES_FILE_ID = "GENES";
+    public static final String PHARMGKB_CHEMICALS_FILE_ID = "CHEMICALS";
+    public static final String PHARMGKB_VARIANTS_FILE_ID = "VARIANTS";
+    public static final String PHARMGKB_GUIDELINE_ANNOTATIONS_FILE_ID = "GUIDELINE_ANNOTATIONS";
+    public static final String PHARMGKB_VARIANT_ANNOTATIONS_FILE_ID = "VARIANT_ANNOTATIONS";
+    public static final String PHARMGKB_CLINICAL_ANNOTATIONS_FILE_ID = "CLINICAL_ANNOTATIONS";
+    public static final String PHARMGKB_CLINICAL_VARIANTS_FILE_ID = "CLINICAL_VARIANTS";
+    public static final String PHARMGKB_DRUG_LABELS_FILE_ID = "DRUG_LABELS";
+    public static final String PHARMGKB_RELATIONSHIPS_FILE_ID = "RELATIONSHIPS";
 
-    // Build specific data options
-    public static final String GENOME_INFO_DATA = "genome_info";
-    public static final String DISGENET_DATA = "disgenet";
-    public static final String HPO_DATA = "hpo";
-    public static final String CADD_DATA = "cadd";
-    public static final String PPI_DATA = "ppi";
-    public static final String DRUG_DATA = "drug";
+    // Missense variantion functional score
+    public static final String MISSENSE_VARIATION_SCORE_DATA = "missense_variation_functional_score";
+
+    // Clinical variants data
+    public static final String CLINICAL_VARIANT_DATA = "clinical_variant";
+    public static final String CLINICAL_VARIANTS_BASENAME = "clinicalVariant";
+    // ClinVar
     public static final String CLINVAR_DATA = "clinvar";
-    public static final String DOCM_DATA = "docm";
+    public static final String CLINVAR_CHUNKS_SUBDIRECTORY = "clinvar_chunks";
+    // Must match the configuration file
+    public static final String CLINVAR_FULL_RELEASE_FILE_ID = "FULL_RELEASE";
+    public static final String CLINVAR_SUMMARY_FILE_ID = "SUMMARY";
+    public static final String CLINVAR_ALLELE_FILE_ID = "ALLELE";
+    public static final String CLINVAR_EFO_TERMS_FILE_ID = "EFO_TERMS";
+    // COSMIC
     public static final String COSMIC_DATA = "cosmic";
-    public static final String GWAS_DATA = "gwas";
-    public static final String IARCTP53_GERMLINE_FILE = "germlineMutationDataIARC TP53 Database, R20.txt";
-    public static final String IARCTP53_GERMLINE_REFERENCES_FILE = "germlineMutationReferenceIARC TP53 Database, R20.txt";
-    public static final String IARCTP53_SOMATIC_FILE = "somaticMutationDataIARC TP53 Database, R20.txt";
-    public static final String IARCTP53_SOMATIC_REFERENCES_FILE = "somaticMutationReferenceIARC TP53 Database, R20.txt";
+    // Must match the configuration file
+    public static final String COSMIC_FILE_ID = "COSMIC";
+    // HGMD
     public static final String HGMD_DATA = "hgmd";
+    // Must match the configuration file
+    public static final String HGMD_FILE_ID = "HGMD";
+    // GWAS
+    public static final String GWAS_DATA = "gwas";
+    // Must match the configuration file
+    public static final String GWAS_FILE_ID = "GWAS";
+    public static final String GWAS_DBSNP_FILE_ID = "DBSNP";
 
-    public static final String PUBMED_DATA = "pubmed";
+    // Repeats
+    public static final String REPEATS_DATA = "repeats";
+    public static final String REPEATS_BASENAME = "repeats";
+    /**
+     * @deprecated (when refactoring downloaders, builders and loaders)
+     */
+    @Deprecated
+    public static final String REPEATS_JSON = "repeats";
+    // Simple repeats
+    public static final String TRF_DATA = "trf";
+    // Must match the configuration file
+    public static final String SIMPLE_REPEATS_FILE_ID = "SIMPLE_REPEATS";
+    // Genomic super duplications
+    public static final String GSD_DATA = "gsd";
+    // Must match the configuration file
+    public static final String GENOMIC_SUPER_DUPS_FILE_ID = "GENOMIC_SUPER_DUPS";
+    // Window masker
+    public static final String WM_DATA = "wm";
+    // Must match the configuration file
+    public static final String WINDOW_MASKER_FILE_ID = "WINDOW_MASKER";
+
+    // Ontology
+    public static final String ONTOLOGY_DATA = "ontology";
+    public static final String OBO_BASENAME = "ontology";
+    // HPO
+    public static final String HPO_OBO_DATA = "hpo";
+    // Must match the configuration file
+    public static final String HPO_OBO_FILE_ID = "HPO";
+    // GO
+    public static final String GO_OBO_DATA = "go";
+    // Must match the configuration file
+    public static final String GO_OBO_FILE_ID = "GO";
+    // DOID
+    public static final String DOID_OBO_DATA = "doid";
+    // Must match the configuration file
+    public static final String DOID_OBO_FILE_ID = "DOID";
+    // MONDO
+    public static final String MONDO_OBO_DATA = "mondo";
+    // Must match the configuration file
+    public static final String MONDO_OBO_FILE_ID = "MONDO";
+
+
+    public static final String PFM_DATA = "regulatory_pfm";
+
+    // Variation functional score
+    public static final String VARIATION_FUNCTIONAL_SCORE_DATA = "variation_functional_score";
+    // CADD scores
+    public static final String CADD_DATA = "cadd";
+    public static final String CADD_RAW_DATA = "cadd_raw";
+    public static final String CADD_SCALED_DATA = "cadd_scaled";
+    // Must match the configuration file
+    public static final String CADD_FILE_ID = "CADD";
+
+    // Regulation
+    public static final String REGULATION_DATA = "regulation";
+    public static final String REGULATORY_PFM_BASENAME = "regulatory_pfm";
+    public static final String REGULATORY_REGION_BASENAME = "regulatory_region";
+    // Regulatory build and motif features (see Ensembl files: regulatory build and motif features files)
+    public static final String REGULATORY_BUILD_DATA = "regulatory_build";
+    // Motif features (see Ensembl files)
+    public static final String MOTIF_FEATURES_DATA = "motif_features";
+    // miRBase
+    public static final String MIRBASE_DATA = "mirbase";
+    // Must match the configuration file
+    public static final String MIRBASE_FILE_ID = "MIRBASE";
+    // miRTarBase
+    public static final String MIRTARBASE_DATA = "mirtarbase";
+    // Must match the configuration file
+    public static final String MIRTARBASE_FILE_ID = "MIRTARBASE";
 
-    // Load specific data options
+    // Protein substitution predictions consist of sift, polyphen, revel and alphamissense
+    public static final String PROTEIN_SUBSTITUTION_PREDICTION_DATA = "protein_substitution_predictions";
+    // Sift and polyphen
     public static final String PROTEIN_FUNCTIONAL_PREDICTION_DATA = "protein_functional_prediction";
+    public static final String SIFT_SOURCE_NAME = "Sift";
+    public static final String POLYPHEN_SOURCE_NAME = "PolyPhen";
+    public static final String SIFT_VERSION_FILENAME = "siftVersion.json";
+    public static final String POLYPHEN_VERSION_FILENAME = "polyphenVersion.json";
+    // Revel
+    public static final String REVEL_DATA = "revel";
+    // Must match the configuration file
+    public static final String REVEL_FILE_ID = "REVEL";
+    @Deprecated
+    public static final String MISSENSE_VARIATION_SCORE_JSON_FILENAME = "missense_variation_functional_score.json.gz";
+    @Deprecated
+    public static final String REVEL_RAW_FILENAME = "revel-v1.3_all_chromosomes.zip";
+    @Deprecated
+    public static final String REVEL_JSON_FILENAME = "revel-v1.3_all_chromosomes.json.gz";
+    @Deprecated
+    public static final String REVEL_VERSION_FILENAME = "revelVersion.json";
+    // AlphaMissense
+    public static final String ALPHAMISSENSE_DATA = "alphamissense";
+    // Must match the configuration file
+    public static final String ALPHAMISSENSE_FILE_ID = "ALPHAMISSENSE";
+    @Deprecated
+    public static final String ALPHAMISSENSE_RAW_FILENAME = "AlphaMissense_hg38.tsv.gz";
+    @Deprecated
+    public static final String ALPHAMISSENSE_JSON_FILENAME = "alphamissense_hg38.json.gz";
+    @Deprecated
+    public static final String ALPHAMISSENSE_VERSION_FILENAME = "alphamissenseVersion.json";
 
-    // Path and file names
-    public static final String GERP_SUBDIRECTORY = "gerp";
+    // Protein
+    public static final String PROTEIN_DATA = "protein";
+    // UniProt
+    public static final String UNIPROT_DATA = "uniprot";
+    public static final String UNIPROT_CHUNKS_SUBDIRECTORY = "uniprot_chunks";
+    // Must match the configuration file
+    public static final String UNIPROT_FILE_ID = "UNIPROT";
+    // InterPro
+    public static final String INTERPRO_DATA = "interpro";
+    // Must match the configuration file
+    public static final String INTERPRO_FILE_ID = "INTERPRO";
+    // IntAct
+    public static final String INTACT_DATA = "intact";
+    // Must match the configuration file
+    public static final String INTACT_FILE_ID = "INTACT";
+
+    // Conservation scores
+    public static final String CONSERVATION_DATA = "conservation";
+    // GERP
+    public static final String GERP_DATA = "gerp";
+    // Must match the configuration file
+    public static final String GERP_FILE_ID = "GERP";
+    // PHASTCONS
+    public static final String PHASTCONS_DATA = "phastCons";
+    // Must match the configuration file
+    public static final String PHASTCONS_FILE_ID = "PHASTCONS";
+    // PHYLOP
+    public static final String PHYLOP_DATA = "phylop";
+    // Must match the configuration file
+    public static final String PHYLOP_FILE_ID = "PHYLOP";
+
+    // Splice scores
     public static final String MMSPLICE_SUBDIRECTORY = "mmsplice";
-    public static final String MMSPLICE_VERSION_FILENAME = "mmspliceVersion.json";
+    public static final String MMSPLICE_VERSION_FILENAME = MMSPLICE_SUBDIRECTORY + SUFFIX_VERSION_FILENAME;
     public static final String SPLICEAI_SUBDIRECTORY = "spliceai";
-    public static final String SPLICEAI_VERSION_FILENAME = "spliceaiVersion.json";
+    public static final String SPLICEAI_VERSION_FILENAME = SPLICEAI_SUBDIRECTORY + SUFFIX_VERSION_FILENAME;
 
-    // binary bigwig file
+    /**
+     * @deprecated (when refactoring downloaders, builders and loaders)
+     */
+    @Deprecated
     public static final String GERP_FILE = "gerp_conservation_scores.homo_sapiens.GRCh38.bw";
-    // bigwig file manually transformed to bedGraph file
-    public static final String GERP_PROCESSED_FILE = "gerp.bedGraph.gz"; //"gerp_conservation_scores.homo_sapiens.GRCh38.bedGraph.gz";
     public static final String CLINICAL_VARIANTS_JSON_FILE = "clinical_variants.json.gz";
     public static final String CLINICAL_VARIANTS_ANNOTATED_JSON_FILE = "clinical_variants.full.json.gz";
-    public static final String DOCM_FILE = "docm.json.gz";
     public static final String DOCM_NAME = "DOCM";
-    public static final String STRUCTURAL_VARIANTS_FOLDER = "structuralVariants";
-    public static final String DGV_FILE = "dgv.txt";
-    public static final String DGV_VERSION_FILE = "dgvVersion.json";
-    public static final String STRUCTURAL_VARIANTS_JSON = "structuralVariants";
-    public static final String TRF_FILE = "simpleRepeat.txt.gz";
-    public static final String TRF_VERSION_FILE = "simpleRepeat.json";
-    public static final String GSD_FILE = "genomicSuperDups.txt.gz";
-    public static final String GSD_VERSION_FILE = "genomicSuperDups.json";
-    public static final String WM_FILE = "windowMasker.txt.gz";
-    public static final String WM_VERSION_FILE = "windowMasker.json";
-    public static final String REPEATS_FOLDER = "genome";
-    public static final String REPEATS_JSON = "repeats";
-    public static final String OBO_JSON = "ontology";
-    public static final String HPO_VERSION_FILE = "hpoVersion.json";
-    public static final String GO_VERSION_FILE = "goVersion.json";
-    public static final String DO_VERSION_FILE = "doVersion.json";
+    public static final String HPO_VERSION_FILE = "hpo" + SUFFIX_VERSION_FILENAME;
+    public static final String GO_VERSION_FILE = "go" + SUFFIX_VERSION_FILENAME;
+    public static final String DO_VERSION_FILE = "do" + SUFFIX_VERSION_FILENAME;
+    public static final String MONDO_VERSION_FILE = "mondo" + SUFFIX_VERSION_FILENAME;
+
     public static final String HGMD_FILE = "hgmd.vcf";
-    public static final String PUBMED_VERSION_FILENAME = "pubmedVersion.json";
 
-    public static final String REGULATORY_FEATURES_FILE = "Regulatory_Build.regulatory_features.gff.gz";
-    public static final String MOTIF_FEATURES_FILE = "motif_features.gff.gz";
+    // PubMed
+    public static final String PUBMED_DATA = "pubmed";
+    // Must match the configuration file
+    public static final String PUBMED_REGEX_FILE_ID = "PUBMED_REGEX";
+
+    // Utilities maps
+    private static Map<String, String> dataNamesMap = new HashMap<>();
+    private static Map<String, String> dataCategoriesMap = new HashMap<>();
+    private static Map<String, String> dataVersionFilenamesMap = new HashMap<>();
+
+    static {
+
+        // Populate data names map
+        dataNamesMap.put(ENSEMBL_DATA, "Ensembl");
+        dataNamesMap.put(REFSEQ_DATA, "RefSeq");
+        dataNamesMap.put(GENOME_DATA, "Genome");
+        dataNamesMap.put(GENE_DATA, "Gene");
+        dataNamesMap.put(GENE_ANNOTATION_DATA, "Gene Annotation");
+        dataCategoriesMap.put(REFSEQ_DATA, "Gene");
+        dataNamesMap.put(MANE_SELECT_DATA, "MANE Select");
+        dataNamesMap.put(LRG_DATA, "LRG");
+        dataNamesMap.put(HGNC_DATA, "HGNC Gene");
+        dataNamesMap.put(CANCER_HOTSPOT_DATA, "Cancer HotSpot");
+        dataNamesMap.put(DGIDB_DATA, "DGIdb");
+        dataNamesMap.put(UNIPROT_XREF_DATA, "UniProt Xref");
+        dataNamesMap.put(GENE_EXPRESSION_ATLAS_DATA, "Gene Expression Atlas");
+        dataNamesMap.put(GENE_DISEASE_ANNOTATION_DATA, "Gene Disease Annotation");
+        dataNamesMap.put(HPO_DATA, "HPO");
+        dataNamesMap.put(DISGENET_DATA, "DisGeNet");
+        dataNamesMap.put(GNOMAD_CONSTRAINTS_DATA, "gnomAD Constraint");
+        dataNamesMap.put(GO_ANNOTATION_DATA, "EBI Gene Ontology Annotation");
+        dataNamesMap.put(PROTEIN_DATA, "Protein");
+        dataNamesMap.put(UNIPROT_DATA, "UniProt");
+        dataNamesMap.put(INTERPRO_DATA, "InterPro");
+        dataNamesMap.put(INTACT_DATA, "IntAct");
+        dataNamesMap.put(CONSERVATION_DATA, "Conservation");
+        dataNamesMap.put(GERP_DATA, "GERP++");
+        dataNamesMap.put(PHASTCONS_DATA, "PhastCons");
+        dataNamesMap.put(PHYLOP_DATA, "PhyloP");
+        dataNamesMap.put(REPEATS_DATA, "Repeats");
+        dataNamesMap.put(TRF_DATA, "Tandem Repeats Finder");
+        dataNamesMap.put(WM_DATA, "Window Masker");
+        dataNamesMap.put(GSD_DATA, "Genomic Super Duplications");
+        dataNamesMap.put(REGULATION_DATA, "Regulation");
+        dataNamesMap.put(REGULATORY_BUILD_DATA, "Regulatory Build");
+        dataNamesMap.put(MOTIF_FEATURES_DATA, "Motif Features");
+        dataNamesMap.put(MIRBASE_DATA, "miRBase");
+        dataNamesMap.put(MIRTARBASE_DATA, "miRTarBase");
+        dataNamesMap.put(ONTOLOGY_DATA, "Ontology");
+        dataNamesMap.put(HPO_OBO_DATA, "HPO");
+        dataNamesMap.put(GO_OBO_DATA, "GO");
+        dataNamesMap.put(DOID_OBO_DATA, "DOID");
+        dataNamesMap.put(MONDO_OBO_DATA, "Mondo");
+        dataNamesMap.put(PUBMED_DATA, "PubMed");
+        dataNamesMap.put(PHARMACOGENOMICS_DATA, "Pharmacogenomics");
+        dataNamesMap.put(PHARMGKB_DATA, "PharmGKB");
+        dataNamesMap.put(VARIATION_FUNCTIONAL_SCORE_DATA, "Variant Functional Score");
+        dataNamesMap.put(CADD_DATA, "CADD");
+        dataNamesMap.put(MISSENSE_VARIATION_SCORE_DATA, "Missense Variation Score");
+        dataNamesMap.put(REVEL_DATA, "Revel");
+        dataNamesMap.put(CLINICAL_VARIANT_DATA, "Clinical Variant");
+        dataNamesMap.put(CLINVAR_DATA, "ClinVar");
+        dataNamesMap.put(COSMIC_DATA, "Cosmic");
+        dataNamesMap.put(HGMD_DATA, "HGMD");
+        dataNamesMap.put(GWAS_DATA, "GWAS Catalog");
+
+        // Populate data categories map
+        dataCategoriesMap.put(ENSEMBL_DATA, "Gene");
+        dataCategoriesMap.put(REFSEQ_DATA, "Gene");
+        dataCategoriesMap.put(GENOME_DATA, dataNamesMap.get(ENSEMBL_DATA));
+        dataCategoriesMap.put(MANE_SELECT_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(LRG_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(HGNC_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(CANCER_HOTSPOT_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(DGIDB_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(UNIPROT_XREF_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(GENE_EXPRESSION_ATLAS_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(HPO_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(DISGENET_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(GNOMAD_CONSTRAINTS_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(GO_ANNOTATION_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(UNIPROT_DATA, dataNamesMap.get(PROTEIN_DATA));
+        dataCategoriesMap.put(INTERPRO_DATA, dataNamesMap.get(PROTEIN_DATA));
+        dataCategoriesMap.put(INTACT_DATA, dataNamesMap.get(PROTEIN_DATA));
+        dataCategoriesMap.put(GERP_DATA, dataNamesMap.get(CONSERVATION_DATA));
+        dataCategoriesMap.put(PHASTCONS_DATA, dataNamesMap.get(CONSERVATION_DATA));
+        dataCategoriesMap.put(PHYLOP_DATA, dataNamesMap.get(CONSERVATION_DATA));
+        dataCategoriesMap.put(TRF_DATA, dataNamesMap.get(REPEATS_DATA));
+        dataCategoriesMap.put(WM_DATA, dataNamesMap.get(REPEATS_DATA));
+        dataCategoriesMap.put(GSD_DATA, dataNamesMap.get(REPEATS_DATA));
+        dataCategoriesMap.put(REGULATORY_BUILD_DATA, dataNamesMap.get(REGULATION_DATA));
+        dataCategoriesMap.put(MOTIF_FEATURES_DATA, dataNamesMap.get(REGULATION_DATA));
+        dataCategoriesMap.put(MIRBASE_DATA, dataNamesMap.get(REGULATION_DATA));
+        dataCategoriesMap.put(MIRTARBASE_DATA, dataNamesMap.get(REGULATION_DATA));
+        dataCategoriesMap.put(HPO_OBO_DATA, dataNamesMap.get(ONTOLOGY_DATA));
+        dataCategoriesMap.put(GO_OBO_DATA, dataNamesMap.get(ONTOLOGY_DATA));
+        dataCategoriesMap.put(DOID_OBO_DATA, dataNamesMap.get(ONTOLOGY_DATA));
+        dataCategoriesMap.put(MONDO_OBO_DATA, dataNamesMap.get(ONTOLOGY_DATA));
+        dataCategoriesMap.put(PUBMED_DATA, "Publication");
+        dataCategoriesMap.put(PHARMGKB_DATA, dataNamesMap.get(PHARMACOGENOMICS_DATA));
+        dataCategoriesMap.put(CADD_DATA, dataNamesMap.get(VARIATION_FUNCTIONAL_SCORE_DATA));
+        dataCategoriesMap.put(REVEL_DATA, dataNamesMap.get(MISSENSE_VARIATION_SCORE_DATA));
+        dataCategoriesMap.put(CLINVAR_DATA, dataNamesMap.get(CLINICAL_VARIANT_DATA));
+        dataCategoriesMap.put(COSMIC_DATA, dataNamesMap.get(CLINICAL_VARIANT_DATA));
+        dataCategoriesMap.put(HGMD_DATA, dataNamesMap.get(CLINICAL_VARIANT_DATA));
+        dataCategoriesMap.put(GWAS_DATA, dataNamesMap.get(CLINICAL_VARIANT_DATA));
+
+        // Populate data version filenames Map
+        dataVersionFilenamesMap.put(ENSEMBL_DATA, "ensemblCore" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(REFSEQ_DATA, "refSeqCore" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(GENOME_DATA, "genome" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(MANE_SELECT_DATA, "maneSelect" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(LRG_DATA, "lrg" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(HGNC_DATA, "hgnc" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(CANCER_HOTSPOT_DATA, "cancerHotSpot" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(DGIDB_DATA, "dgidb" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(UNIPROT_XREF_DATA, "uniProtXref" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(GENE_EXPRESSION_ATLAS_DATA, "geneExpressionAtlas" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(HPO_DATA, "hpo" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(DISGENET_DATA, "disGeNet" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(GNOMAD_CONSTRAINTS_DATA, "gnomadConstraints" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(GO_ANNOTATION_DATA, "goAnnotation" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(UNIPROT_DATA, "uniProt" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(INTERPRO_DATA, "interPro" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(INTACT_DATA, "intAct" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(GERP_DATA, "gerp" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(PHASTCONS_DATA, "phastCons" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(PHYLOP_DATA, "phyloP" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(TRF_DATA, "simpleRepeat" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(WM_DATA, "windowMasker" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(GSD_DATA, "genomicSuperDups" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(REGULATORY_BUILD_DATA, "regulatoryBuild" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(MOTIF_FEATURES_DATA, "motifFeatures" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(MIRBASE_DATA, "mirBase" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(MIRTARBASE_DATA, "mirTarBase" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(HPO_OBO_DATA, "hpoObo" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(GO_OBO_DATA, "goObo" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(DOID_OBO_DATA, "doidObo" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(MONDO_OBO_DATA, "mondoObo" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(PUBMED_DATA, "pubMed" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(PHARMGKB_DATA, "pharmGKB" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(CADD_DATA, "cadd" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(REVEL_DATA, "revel" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(ALPHAMISSENSE_DATA, "alphaMissense" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(CLINVAR_DATA, "clinVar" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(COSMIC_DATA, "cosmic" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(HGMD_DATA, "hgmd" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(GWAS_DATA, "gwas" + SUFFIX_VERSION_FILENAME);
+    }
+
+    private EtlCommons() {
+        throw new IllegalStateException("Utility class");
+    }
 
     public static boolean runCommandLineProcess(File workingDirectory, String binPath, List<String> args, String logFilePath)
-            throws IOException, InterruptedException {
-        // This small hack allow to configure the appropriate Logger level from the command line, this is done
-        // by setting the DEFAULT_LOG_LEVEL_KEY before the logger object is created.
-//        org.apache.log4j.Logger rootLogger = LogManager.getRootLogger();
-//        ConsoleAppender stderr = (ConsoleAppender) rootLogger.getAppender("stdout");
-//        stderr.setThreshold(Level.toLevel("debug"));
+            throws IOException, InterruptedException, CellBaseException {
 
         Configurator.setRootLevel(Level.INFO);
 
@@ -147,18 +515,21 @@ public static boolean runCommandLineProcess(File workingDirectory, String binPat
 
         ProcessBuilder builder = getProcessBuilder(workingDirectory, binPath, args, logFilePath);
 
-        logger.debug("Executing command: " + StringUtils.join(builder.command(), " "));
+        if (logger.isDebugEnabled()) {
+            logger.debug("Executing command: {}", StringUtils.join(builder.command(), " "));
+        }
         Process process = builder.start();
         process.waitFor();
 
         // Check process output
-        boolean executedWithoutErrors = true;
-        int genomeInfoExitValue = process.exitValue();
-        if (genomeInfoExitValue != 0) {
-            logger.warn("Error executing {}, error code: {}. More info in log file: {}", binPath, genomeInfoExitValue, logFilePath);
-            executedWithoutErrors = false;
+        if (process.exitValue() != 0) {
+            String msg = "Error executing command '" + binPath + "'; args = " + args + ", error code = " + process.exitValue()
+                    + ". More info in log file: " + logFilePath;
+            logger.error(msg);
+            throw new CellBaseException(msg);
         }
-        return executedWithoutErrors;
+
+        return true;
     }
 
     private static ProcessBuilder getProcessBuilder(File workingDirectory, String binPath, List<String> args, String logFilePath) {
@@ -203,7 +574,132 @@ public static Long countFileLines(Path filePath) throws IOException {
             }
             return nLines;
         }
+    }
+
+    public static String getEnsemblUrl(DownloadProperties.EnsemblProperties props, String ensemblRelease, String fileId, String species,
+                                       String assembly, String chromosome) throws CellBaseException {
+        if (!props.getUrl().getFiles().containsKey(fileId)) {
+            throw new CellBaseException(getMissingFileIdMessage(fileId));
+        }
+        String url = props.getUrl().getHost() + props.getUrl().getFiles().get(fileId);
+
+        // Change release, species, assembly, chromosome if necessary
+        if (StringUtils.isNotEmpty(ensemblRelease)) {
+            url = url.replace(PUT_RELEASE_HERE_MARK, ensemblRelease.split("-")[1]);
+        }
+        if (StringUtils.isNotEmpty(species)) {
+            url = url.replace(PUT_SPECIES_HERE_MARK, species);
+            url = url.replace(PUT_CAPITAL_SPECIES_HERE_MARK, Character.toUpperCase(species.charAt(0)) + species.substring(1));
+        }
+        if (StringUtils.isNotEmpty(assembly)) {
+            url = url.replace(PUT_ASSEMBLY_HERE_MARK, assembly);
+        }
+        if (StringUtils.isNotEmpty(chromosome)) {
+            url = url.replace(PUT_CHROMOSOME_HERE_MARK, chromosome);
+        }
+        return url;
+    }
+
+    public static String getUrl(DownloadProperties.URLProperties props, String fileId) throws CellBaseException {
+        return getUrl(props, fileId, null, null, null);
+    }
+
+    public static String getUrl(DownloadProperties.URLProperties props, String fileId, String species, String assembly, String chromosome)
+            throws CellBaseException {
+        if (!props.getFiles().containsKey(fileId)) {
+            throw new CellBaseException(getMissingFileIdMessage(fileId));
+        }
+        String url;
+        String filesValue = props.getFiles().get(fileId);
+        if (filesValue.startsWith("https://") || filesValue.startsWith("http://") || filesValue.startsWith("ftp://")) {
+            url = filesValue;
+        } else {
+            url = props.getHost() + filesValue;
+        }
+        if (StringUtils.isNotEmpty(species)) {
+            url = url.replace(PUT_SPECIES_HERE_MARK, species);
+        }
+        if (StringUtils.isNotEmpty(assembly)) {
+            url = url.replace(PUT_ASSEMBLY_HERE_MARK, assembly);
+        }
+        if (StringUtils.isNotEmpty(chromosome)) {
+            url = url.replace(PUT_CHROMOSOME_HERE_MARK, chromosome);
+        }
+        return url;
+    }
 
+    public static String getFilename(String prefix, String chromosome) {
+        return prefix + "_" + chromosome;
     }
 
+    public static boolean isExecutableAvailable(String executable) throws IOException, InterruptedException {
+        ProcessBuilder processBuilder = new ProcessBuilder("which", executable);
+        Process process = processBuilder.start();
+
+        try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) {
+            String line;
+            StringBuilder output = new StringBuilder();
+            while ((line = reader.readLine()) != null) {
+                output.append(line).append("\n");
+            }
+        }
+
+        int exitCode = process.waitFor();
+
+        // if exitCode is 0 then the executable is installed at + output.toString().trim()),
+        // otherwise, it's not
+        return (exitCode == 0);
+    }
+
+    public static String getFilenameFromProps(DownloadProperties.URLProperties props, String fileId) throws CellBaseException {
+        if (!props.getFiles().containsKey(fileId)) {
+            throw new CellBaseException(getMissingFileIdMessage(fileId));
+        }
+        return getFilenameFromUrl(props.getFiles().get(fileId));
+    }
+
+    public static String getFilenameFromUrl(String url) {
+        return Paths.get(url).getFileName().toString();
+    }
+
+    public static void checkDirectory(Path path, String name) throws CellBaseException {
+        if (path == null) {
+            throw new CellBaseException(name + " directory is null");
+        }
+        if (!Files.exists(path)) {
+            throw new CellBaseException(name + " directory " + path + " does not exist");
+        }
+        if (!Files.isDirectory(path)) {
+            throw new CellBaseException(name + " directory " + path + " is not a directory");
+        }
+    }
+
+    private static String getMissingFileIdMessage(String fileId) {
+        return "File ID " + fileId + " is missing in the DownloadProperties.URLProperties within the CellBase configuration file";
+    }
+
+    public static String getDataName(String data) throws CellBaseException {
+        if (!dataNamesMap.containsKey(data)) {
+            throw new CellBaseException("Name not found for data '" + data + "'");
+        }
+        return dataNamesMap.get(data);
+    }
+
+    public static String getDataCategory(String data) throws CellBaseException {
+        if (!dataCategoriesMap.containsKey(data)) {
+            throw new CellBaseException("Category not found for data '" + data + "'");
+        }
+        return dataCategoriesMap.get(data);
+    }
+
+    public static String getDataVersionFilename(String data) throws CellBaseException {
+        if (!dataVersionFilenamesMap.containsKey(data)) {
+            throw new CellBaseException("Version filename not found for data '" + data + "'");
+        }
+        return dataVersionFilenamesMap.get(data);
+    }
+
+    public static List<String> getUrls(List<DownloadFile> downloadFiles) {
+        return downloadFiles.stream().map(DownloadFile::getUrl).collect(Collectors.toList());
+    }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/AlphaMissenseBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/AlphaMissenseBuilder.java
new file mode 100644
index 0000000000..475a91d315
--- /dev/null
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/AlphaMissenseBuilder.java
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.lib.builders;
+
+import com.fasterxml.jackson.databind.MapperFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectReader;
+import com.fasterxml.jackson.databind.ObjectWriter;
+import org.apache.commons.lang3.StringUtils;
+import org.opencb.biodata.models.core.ProteinSubstitutionPrediction;
+import org.opencb.biodata.models.core.ProteinSubstitutionPredictionScore;
+import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
+import org.opencb.cellbase.lib.builders.utils.RocksDBUtils;
+import org.opencb.commons.utils.FileUtils;
+import org.rocksdb.Options;
+import org.rocksdb.RocksDB;
+import org.rocksdb.RocksIterator;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class AlphaMissenseBuilder extends CellBaseBuilder {
+
+    private File alphaMissenseFile;
+    private CellBaseFileSerializer fileSerializer;
+
+    private RocksDB rdb;
+
+    private String AA_CHANGE_PATTERN = "^([A-Z])(\\d+)([A-Z])$";
+    private Pattern aaChangePattern = Pattern.compile(AA_CHANGE_PATTERN);
+
+    private static ObjectMapper mapper;
+    private static ObjectReader predictionReader;
+    private static ObjectWriter jsonObjectWriter;
+
+    private static final String SOURCE = "alphamissense";
+
+    static {
+        mapper = new ObjectMapper();
+        mapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);
+        predictionReader = mapper.readerFor(ProteinSubstitutionPrediction.class);
+        jsonObjectWriter = mapper.writer();
+    }
+
+    public AlphaMissenseBuilder(File alphaMissenseFile, CellBaseFileSerializer serializer) {
+        super(serializer);
+
+        this.fileSerializer = serializer;
+        this.alphaMissenseFile = alphaMissenseFile;
+
+        logger = LoggerFactory.getLogger(AlphaMissenseBuilder.class);
+    }
+
+    @Override
+    public void parse() throws Exception {
+        logger.info("Parsing AlphaMissense file: {} ...", alphaMissenseFile.getName());
+
+        // Sanity check
+        FileUtils.checkFile(alphaMissenseFile.toPath());
+
+        Object[] dbConnection = RocksDBUtils.getDBConnection(serializer.getOutdir().resolve("alphamissense-rdb.idx").toString(), true);
+        rdb = (RocksDB) dbConnection[0];
+        Options dbOption = (Options) dbConnection[1];
+        String dbLocation = (String) dbConnection[2];
+
+        // AlphaMissense file reader
+        BufferedReader br = FileUtils.newBufferedReader(alphaMissenseFile.toPath());
+        String line;
+        int counter = 0;
+        while ((line = br.readLine()) != null) {
+            if (!line.startsWith("#")) {
+                // 0        1   2   3   4       5           6               7               8                   9
+                // CHROM    POS REF ALT genome  uniprot_id  transcript_id   protein_variant am_pathogenicity    am_class
+                String[] split = line.split("\t", -1);
+
+                String chrom = null;
+                int position;
+                String reference;
+                String alternate = null;
+                String transcriptId;
+                String uniprotId;
+                int aaPosition;
+                String aaReference;
+                String aaAlternate;
+
+                if (StringUtils.isNotEmpty(split[0])) {
+                    chrom = split[0].replace("chr", "");
+                }
+                if (StringUtils.isNotEmpty(split[1])) {
+                    position = Integer.parseInt(split[1]);
+                } else {
+                    logger.warn("Missing field 'position', skipping line: {}", line);
+                    continue;
+                }
+                if (StringUtils.isNotEmpty(split[2])) {
+                    reference = split[2];
+                } else {
+                    logger.warn("Missing field 'reference', skipping line: {}", line);
+                    continue;
+                }
+                if (StringUtils.isNotEmpty(split[3])) {
+                    alternate = split[3];
+                }
+                if (StringUtils.isNotEmpty(split[6])) {
+                    transcriptId = split[6].split("\\.")[0];
+                } else {
+                    logger.warn("Missing field 'transcript_id', skipping line: {}", line);
+                    continue;
+                }
+                if (StringUtils.isNotEmpty(split[5])) {
+                    uniprotId = split[5];
+                } else {
+                    logger.warn("Missing field 'uniprot_id', skipping line: {}", line);
+                    continue;
+                }
+                if (StringUtils.isNotEmpty(split[7])) {
+                    Matcher matcher = aaChangePattern.matcher(split[7]);
+                    if (matcher.matches()) {
+                        aaReference = matcher.group(1);
+                        aaPosition = Integer.parseInt(matcher.group(2));
+                        aaAlternate = matcher.group(3);
+                    } else {
+                        logger.warn("Error parsing field 'protein_variant' = {}, skipping line: {}", split[7], line);
+                        continue;
+                    }
+                } else {
+                    logger.warn("Missing field 'protein_variant', skipping line: {}", line);
+                    continue;
+                }
+
+                // Create protein substitution score
+                ProteinSubstitutionPredictionScore score = new ProteinSubstitutionPredictionScore();
+                score.setAlternate(alternate);
+                score.setAaAlternate(aaAlternate);
+                if (StringUtils.isNotEmpty(split[8])) {
+                    score.setScore(Double.parseDouble(split[8]));
+                }
+                if (StringUtils.isNotEmpty(split[9])) {
+                    score.setEffect(split[9]);
+                }
+
+                // Creating and/or updating protein substitution prediction
+                ProteinSubstitutionPrediction prediction;
+                String key = transcriptId + "_" + uniprotId + "_" + position + "_" + reference + "_" + aaPosition + "_" + aaReference;
+                byte[] dbContent = rdb.get(key.getBytes());
+                if (dbContent == null) {
+                    prediction = new ProteinSubstitutionPrediction(chrom, position, reference, transcriptId, uniprotId, aaPosition,
+                            aaReference, SOURCE, null, Collections.singletonList(score));
+                } else {
+                    prediction = predictionReader.readValue(dbContent);
+                    prediction.getScores().add(score);
+                }
+                rdb.put(key.getBytes(), jsonObjectWriter.writeValueAsBytes(prediction));
+
+                // Log messages
+                counter++;
+                if (counter % 10000 == 0) {
+                    logger.info("{} AlphaMissense predictions parsed", counter);
+                }
+            }
+        }
+
+        // Serialize/write the saved variant polygenic scores in the RocksDB
+        serializeRDB(rdb);
+        RocksDBUtils.closeIndex(rdb, dbOption, dbLocation);
+        serializer.close();
+
+        logger.info("Parsed AlphaMissense file: {}. Done!", alphaMissenseFile.getName());
+    }
+
+    private void serializeRDB(RocksDB rdb) throws IOException {
+        // DO NOT change the name of the rocksIterator variable - for some unexplainable reason Java VM crashes if it's
+        // named "iterator"
+        RocksIterator rocksIterator = rdb.newIterator();
+
+        logger.info("Reading from RocksDB index and serializing to {}.json.gz", serializer.getOutdir().resolve(serializer.getFileName()));
+        int counter = 0;
+        for (rocksIterator.seekToFirst(); rocksIterator.isValid(); rocksIterator.next()) {
+//            logger.info("variant = {}", new String(rocksIterator.key()));
+            ProteinSubstitutionPrediction prediction = predictionReader.readValue(rocksIterator.value());
+            serializer.serialize(prediction);
+            counter++;
+            if (counter % 10000 == 0) {
+                logger.info("{} written", counter);
+            }
+        }
+        serializer.close();
+        logger.info("Done.");
+    }
+}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CaddScoreBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CaddScoreBuilder.java
index b593f44901..d0597c4c2a 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CaddScoreBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CaddScoreBuilder.java
@@ -17,32 +17,33 @@
 package org.opencb.cellbase.lib.builders;
 
 import org.opencb.biodata.models.core.GenomicScoreRegion;
+import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
 import org.opencb.commons.utils.FileUtils;
-import org.slf4j.LoggerFactory;
 
 import java.io.BufferedReader;
+import java.io.File;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
 /**
  * Created by imedina on 06/11/15.
  */
 public class CaddScoreBuilder extends CellBaseBuilder {
 
-    private Path caddFilePath;
+    private Path caddDownloadPath;
 
     private static final int CHUNK_SIZE = 1000;
     private static final int DECIMAL_RESOLUTION = 100;
 
-    public CaddScoreBuilder(Path caddFilePath, CellBaseSerializer serializer) {
+    public CaddScoreBuilder(Path caddDownloadPath, CellBaseSerializer serializer) {
         super(serializer);
-        this.caddFilePath = caddFilePath;
-
-        logger = LoggerFactory.getLogger(ConservationBuilder.class);
+        this.caddDownloadPath = caddDownloadPath;
     }
 
     /* Example:
@@ -57,14 +58,25 @@ public CaddScoreBuilder(Path caddFilePath, CellBaseSerializer serializer) {
     */
     @Override
     public void parse() throws Exception {
-        FileUtils.checkPath(caddFilePath);
+        String dataName = getDataName(CADD_DATA);
+        String dataCategory = getDataCategory(CADD_DATA);
+
+        logger.info(CATEGORY_BUILDING_LOG_MESSAGE, dataCategory, dataName);
+
+        // Sanity check
+        checkDirectory(caddDownloadPath, dataName);
+
+        // Check ontology files
+        List<File> caddFiles = checkFiles(dataSourceReader.readValue(caddDownloadPath.resolve(getDataVersionFilename(CADD_DATA)).toFile()),
+                caddDownloadPath, dataName);
+        if (caddFiles.size() != 1) {
+            throw new CellBaseException("One " + dataName + " file is expected, but currently there are " + caddFiles.size() + " files");
+        }
 
-        BufferedReader bufferedReader = FileUtils.newBufferedReader(caddFilePath);
         List<Long> rawValues = new ArrayList<>(CHUNK_SIZE);
         List<Long> scaledValues = new ArrayList<>(CHUNK_SIZE);
 
         int start = 1;
-//        int end = 1999;
         int end = CHUNK_SIZE - 1;
         String line;
         String[] fields = new String[0];
@@ -72,8 +84,8 @@ public void parse() throws Exception {
         int lineCount = 0;
         int counter = 1;
         int serializedChunks = 0;
-        int previousPosition = 0;
-        int newPosition = 0;
+        int prevPos = 0;
+        int newPos = 0;
         String chromosome = null;
 
         String[] nucleotides = new String[]{"A", "C", "G", "T"};
@@ -81,127 +93,102 @@ public void parse() throws Exception {
         long scaledLongValue = 0;
         Map<String, Float> rawScoreValuesMap = new HashMap<>();
         Map<String, Float> scaledScoreValuesMap = new HashMap<>();
-        while ((line = bufferedReader.readLine()) != null) {
-            if (!line.startsWith("#")) {
-                fields = line.split("\t");
-                newPosition = Integer.parseInt(fields[1]);
-//                if (fields[0].equals("1") && fields[1].equals("249240621")) {
-//                if (fields[0].equals("1") && fields[1].equals("69100")) {
-//                if (fields[0].equals("1") && fields[1].equals("144854598")) {
-//                    logger.debug("line {} reached", line);
-//                    logger.debug("Associated chunk count {}", serializedChunks);
-//                    logger.debug("start {}", start);
-//                    logger.debug("end {}", end);
-//                    logger.debug("chunk size {}", CHUNK_SIZE);
-//                }
-                // this only happens the first time, when we start reading the file
-                if (chromosome == null) {
-                    logger.info("Parsing chr {} ", fields[0]);
-                    chromosome = fields[0];
-
-                    start = newPosition;
-                    previousPosition = newPosition;
-                    end = start + CHUNK_SIZE - 2;
-                }
 
-                if (!chromosome.equals(fields[0])) {
-                    logger.info("Parsing chr {} ", fields[0]);
-                    // both raw and scaled are serialized
-                    GenomicScoreRegion<Long> genomicScoreRegion =
-                            new GenomicScoreRegion<>(chromosome, start, previousPosition, "cadd_raw", rawValues);
-                    serializer.serialize(genomicScoreRegion);
-
-                    genomicScoreRegion = new GenomicScoreRegion<>(chromosome, start, previousPosition, "cadd_scaled", scaledValues);
-                    serializer.serialize(genomicScoreRegion);
-
-                    serializedChunks++;
-                    chromosome = fields[0];
-                    start = newPosition;
-//                    end = CHUNK_SIZE - 1;
-                    end = start + CHUNK_SIZE - 2;
-
-                    counter = 0;
-                    rawValues.clear();
-                    scaledValues.clear();
-//                    rawLongValue = 0;
-//                    lineCount = 0;
-//                    rawScoreValuesMap.clear();
-//                    scaledScoreValuesMap.clear();
-                // The series of cadd scores is not continuous through the whole chromosome
-                } else if (end < newPosition || (newPosition - previousPosition) > 1) {
-                    // both raw and scaled are serialized
-                    GenomicScoreRegion genomicScoreRegion
-                            = new GenomicScoreRegion<>(fields[0], start, previousPosition, "cadd_raw", rawValues);
-                    serializer.serialize(genomicScoreRegion);
-
-                    genomicScoreRegion
-                            = new GenomicScoreRegion<>(fields[0], start, previousPosition, "cadd_scaled", scaledValues);
-                    serializer.serialize(genomicScoreRegion);
-
-                    serializedChunks++;
-                    start = newPosition;
-//                    start = end + 1;
-//                    end += CHUNK_SIZE;
-                    end = (start / CHUNK_SIZE) * CHUNK_SIZE + CHUNK_SIZE - 1;
-
-                    counter = 0;
-                    rawValues.clear();
-                    scaledValues.clear();
-                }
+        logger.info(PARSING_LOG_MESSAGE, caddFiles.get(0));
+        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(caddFiles.get(0).toPath())) {
+            while ((line = bufferedReader.readLine()) != null) {
+                if (!line.startsWith("#")) {
+                    fields = line.split("\t");
+                    newPos = Integer.parseInt(fields[1]);
+                    String message = "chrom. " + fields[0];
+                    // This only happens the first time, when we start reading the file
+                    if (chromosome == null) {
+                        logger.info(PARSING_LOG_MESSAGE, message);
+                        chromosome = fields[0];
+
+                        start = newPos;
+                        prevPos = newPos;
+                        end = start + CHUNK_SIZE - 2;
+                    }
 
-                rawScoreValuesMap.put(fields[3], Float.valueOf(fields[4]));
-                scaledScoreValuesMap.put(fields[3], Float.valueOf(fields[5]));
-
-                if (++lineCount == 3) {
-//                    if (fields[0].equals("1") && fields[1].equals("249240621")) {
-//                    if (fields[0].equals("1") && fields[1].equals("69100")) {
-//                    if (fields[0].equals("1") && fields[1].equals("144854598")) {
-//                        logger.info("offset: {}", rawValues.size());
-//                    }
-
-                    for (String nucleotide : nucleotides) {
-                        // raw CADD score values can be negative, we add 10 to make positive
-                        float a = rawScoreValuesMap.getOrDefault(nucleotide, 10f) + 10.0f;
-                        v = (short) (a * DECIMAL_RESOLUTION);
-                        rawLongValue = (rawLongValue << 16) | v;
-
-                        // scaled CADD scores are always positive
-                        a = scaledScoreValuesMap.getOrDefault(nucleotide, 0f);
-                        v = (short) (a * DECIMAL_RESOLUTION);
-                        scaledLongValue = (scaledLongValue << 16) | v;
+                    if (!chromosome.equals(fields[0])) {
+                        logger.info(PARSING_LOG_MESSAGE, message);
+
+                        // Both raw and scaled are serialized
+                        GenomicScoreRegion<Long> genomicScoreRegion = new GenomicScoreRegion<>(chromosome, start, prevPos, CADD_RAW_DATA,
+                                rawValues);
+                        serializer.serialize(genomicScoreRegion);
+
+                        genomicScoreRegion = new GenomicScoreRegion<>(chromosome, start, prevPos, CADD_SCALED_DATA, scaledValues);
+                        serializer.serialize(genomicScoreRegion);
+
+                        serializedChunks++;
+                        chromosome = fields[0];
+                        start = newPos;
+                        end = start + CHUNK_SIZE - 2;
+
+                        counter = 0;
+                        rawValues.clear();
+                        scaledValues.clear();
+                        // The series of cadd scores is not continuous through the whole chromosome
+                    } else if (end < newPos || (newPos - prevPos) > 1) {
+                        // Both raw and scaled are serialized
+                        GenomicScoreRegion<Long> genomicScoreRegion = new GenomicScoreRegion<>(fields[0], start, prevPos, CADD_RAW_DATA,
+                                rawValues);
+                        serializer.serialize(genomicScoreRegion);
+
+                        genomicScoreRegion = new GenomicScoreRegion<>(fields[0], start, prevPos, CADD_SCALED_DATA, scaledValues);
+                        serializer.serialize(genomicScoreRegion);
+
+                        serializedChunks++;
+                        start = newPos;
+                        end = (start / CHUNK_SIZE) * CHUNK_SIZE + CHUNK_SIZE - 1;
+
+                        counter = 0;
+                        rawValues.clear();
+                        scaledValues.clear();
                     }
 
-//                    if (rawLongValue < 0 || scaledLongValue < 0) {
-//                        logger.error("raw/scaled Long Values cannot be 0");
-//                        logger.error("Last read line {}", line);
-//                        System.exit(1);
-//                    }
-                    rawValues.add(rawLongValue);
-                    scaledValues.add(scaledLongValue);
-
-                    counter++;
-                    rawLongValue = 0;
-                    lineCount = 0;
-                    rawScoreValuesMap.clear();
-                    scaledScoreValuesMap.clear();
+                    rawScoreValuesMap.put(fields[3], Float.valueOf(fields[4]));
+                    scaledScoreValuesMap.put(fields[3], Float.valueOf(fields[5]));
+
+                    if (++lineCount == 3) {
+                        for (String nucleotide : nucleotides) {
+                            // Raw CADD score values can be negative, we add 10 to make positive
+                            float a = rawScoreValuesMap.getOrDefault(nucleotide, 10f) + 10.0f;
+                            v = (short) (a * DECIMAL_RESOLUTION);
+                            rawLongValue = (rawLongValue << 16) | v;
+
+                            // Scaled CADD scores are always positive
+                            a = scaledScoreValuesMap.getOrDefault(nucleotide, 0f);
+                            v = (short) (a * DECIMAL_RESOLUTION);
+                            scaledLongValue = (scaledLongValue << 16) | v;
+                        }
+
+                        rawValues.add(rawLongValue);
+                        scaledValues.add(scaledLongValue);
+
+                        counter++;
+                        rawLongValue = 0;
+                        lineCount = 0;
+                        rawScoreValuesMap.clear();
+                        scaledScoreValuesMap.clear();
+                    }
+                    prevPos = newPos;
                 }
-                previousPosition = newPosition;
             }
-        }
 
-        // Last chunks can be incomplete for both raw and scaled are serialized
-//        GenomicScoreRegion<Long> genomicScoreRegion =
-//                new GenomicScoreRegion<>(fields[0], start, start + rawValues.size() - 1, "cadd_raw", rawValues);
-        GenomicScoreRegion<Long> genomicScoreRegion =
-                new GenomicScoreRegion<>(fields[0], start, newPosition, "cadd_raw", rawValues);
-        serializer.serialize(genomicScoreRegion);
+            // Last chunks can be incomplete for both raw and scaled are serialized
+            GenomicScoreRegion<Long> genomicScoreRegion = new GenomicScoreRegion<>(fields[0], start, newPos, CADD_RAW_DATA, rawValues);
+            serializer.serialize(genomicScoreRegion);
+
+            genomicScoreRegion = new GenomicScoreRegion<>(fields[0], start, newPos, CADD_SCALED_DATA, scaledValues);
+            serializer.serialize(genomicScoreRegion);
 
-//        genomicScoreRegion = new GenomicScoreRegion<>(fields[0], start, start + scaledValues.size() - 1, "cadd_scaled", scaledValues);
-        genomicScoreRegion = new GenomicScoreRegion<>(fields[0], start, newPosition, "cadd_scaled", scaledValues);
-        serializer.serialize(genomicScoreRegion);
+            serializer.close();
+        }
+        logger.info(PARSING_DONE_LOG_MESSAGE, caddFiles.get(0));
 
-        serializer.close();
-        bufferedReader.close();
-        logger.info("Parsing finished.");
+        logger.info(CATEGORY_BUILDING_DONE_LOG_MESSAGE, dataCategory, dataName);
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CellBaseBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CellBaseBuilder.java
index 79e5b7e58b..fe1b5fe648 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CellBaseBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CellBaseBuilder.java
@@ -16,34 +16,145 @@
 
 package org.opencb.cellbase.lib.builders;
 
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectReader;
+import org.apache.commons.lang3.StringUtils;
+import org.opencb.cellbase.core.config.DownloadProperties;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.models.DataSource;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
+import org.opencb.cellbase.lib.EtlCommons;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
 /**
  * Created by imedina on 30/08/14.
  */
 public abstract class CellBaseBuilder {
 
     protected CellBaseSerializer serializer;
+    protected ObjectReader dataSourceReader = new ObjectMapper().readerFor(DataSource.class);
+
+    protected boolean checked;
 
     protected Logger logger;
 
+    public static final String CHECKING_BEFORE_BUILDING_LOG_MESSAGE = "Checking files before building {} ...";
+    public static final String CHECKING_DONE_BEFORE_BUILDING_LOG_MESSAGE = "Checking {} done!";
+
+    public static final String BUILDING_LOG_MESSAGE = "Building {} ...";
+    public static final String BUILDING_DONE_LOG_MESSAGE = "Building done!";
+
+    public static final String CATEGORY_BUILDING_LOG_MESSAGE = "Building {}/{} ...";
+    public static final String CATEGORY_BUILDING_DONE_LOG_MESSAGE = "Building done!";
+
+    public static final String PARSING_LOG_MESSAGE = "Parsing {} ...";
+    public static final String PARSING_DONE_LOG_MESSAGE = "Parsing done!";
+
     public CellBaseBuilder(CellBaseSerializer serializer) {
         logger = LoggerFactory.getLogger(this.getClass());
 
         this.serializer = serializer;
-        //this.serializer.open();
+        this.checked = false;
     }
 
     public abstract void parse() throws Exception;
 
     public void disconnect() {
-        try {
-            serializer.close();
-        } catch (Exception e) {
-            logger.error("Disconnecting serializer: " + e.getMessage());
+        if (serializer != null) {
+            try {
+                serializer.close();
+            } catch (Exception e) {
+                logger.error("Error closing serializer:\n" + StringUtils.join(e.getStackTrace(), "\n"));
+            }
+        }
+    }
+
+    protected File checkFile(String data, DownloadProperties.URLProperties props, String fileId, Path targetPath) throws CellBaseException {
+        logger.info("Checking file {}/{} ...", getDataName(data), fileId);
+        if (!props.getFiles().containsKey(fileId)) {
+            throw new CellBaseException("File ID " + fileId + " does not exist in the configuration file in the section '" + data + "'");
+        }
+        if (!Files.exists(targetPath)) {
+            throw new CellBaseException("Folder does not exist " + targetPath);
+        }
+
+        String filename = Paths.get(props.getFiles().get(fileId)).getFileName().toString();
+        Path filePath = targetPath.resolve(filename);
+        if (!Files.exists(filePath)) {
+            throw new CellBaseException(getDataName(data) + " file " + filePath + " does not exist");
         }
+        logger.info("Ok.");
+        return filePath.toFile();
     }
 
+    protected List<File> checkFiles(String data, Path downloadPath, int expectedFiles) throws CellBaseException, IOException {
+        return checkFiles(getDataName(data), data, downloadPath, expectedFiles);
+    }
+
+    protected List<File> checkFiles(String label, String data, Path downloadPath, int expectedFiles) throws CellBaseException, IOException {
+        List<File> files = checkFiles(dataSourceReader.readValue(downloadPath.resolve(getDataVersionFilename(data)).toFile()),
+                downloadPath, label);
+        if (files.size() != expectedFiles) {
+            throw new CellBaseException(expectedFiles + " " + label + " files are expected at " + downloadPath + ", but currently there"
+                    + " are " + files.size() + " files");
+        }
+        return files;
+    }
+
+    protected List<File> checkFiles(DataSource dataSource, Path targetPath, String name) throws CellBaseException {
+        logger.info("Checking {} folder and files ...", name);
+        if (!targetPath.toFile().exists()) {
+            throw new CellBaseException(name + " folder does not exist " + targetPath);
+        }
+
+        List<File> files = new ArrayList<>();
+
+        List<String> filenames = dataSource.getUrls().stream().map(u -> Paths.get(u).getFileName().toString()).collect(Collectors.toList());
+        for (String filename : filenames) {
+            File file = targetPath.resolve(filename).toFile();
+            if (!file.exists()) {
+                throw new CellBaseException("File " + file + " does not exits");
+            } else {
+                files.add(file);
+            }
+        }
+        logger.info("Ok.");
+        return files;
+    }
+
+    protected Path getIndexFastaReferenceGenome(Path fastaPath) throws CellBaseException {
+        Path indexFastaPath = Paths.get(fastaPath + FAI_EXTENSION);
+        if (!Files.exists(indexFastaPath)) {
+            // Index FASTA file
+            logger.info("Indexing FASTA file {} ...", fastaPath);
+            String errorMsg = "Error executing 'samtools faidx' for FASTA file ";
+            try {
+                List<String> params = Arrays.asList("faidx", fastaPath.toString());
+                EtlCommons.runCommandLineProcess(null, "samtools", params, null);
+            } catch (IOException e) {
+                throw new CellBaseException(errorMsg + fastaPath, e);
+            } catch (InterruptedException e) {
+                // Restore interrupted state...
+                Thread.currentThread().interrupt();
+                throw new CellBaseException(errorMsg + fastaPath, e);
+            }
+            if (!Files.exists(indexFastaPath)) {
+                throw new CellBaseException("It could not index the FASTA file " + fastaPath + ". Please, try to do it manually!");
+            }
+        }
+        return indexFastaPath;
+    }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/ConservationBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/ConservationBuilder.java
index 9247b78faa..d43c38cb7a 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/ConservationBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/ConservationBuilder.java
@@ -18,23 +18,24 @@
 
 import org.opencb.biodata.models.core.GenomicScoreRegion;
 import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.models.DataSource;
 import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
 import org.opencb.cellbase.lib.EtlCommons;
 import org.opencb.cellbase.lib.MongoDBCollectionConfiguration;
 import org.opencb.commons.utils.FileUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.io.BufferedReader;
+import java.io.File;
 import java.io.IOException;
-import java.nio.file.DirectoryStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.*;
 
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
 public class ConservationBuilder extends CellBaseBuilder {
 
-    private Logger logger;
     private Path conservedRegionPath;
     private int chunkSize;
 
@@ -50,326 +51,287 @@ public ConservationBuilder(Path conservedRegionPath, int chunkSize, CellBaseFile
         fileSerializer = serializer;
         this.conservedRegionPath = conservedRegionPath;
         this.chunkSize = chunkSize;
-        logger = LoggerFactory.getLogger(ConservationBuilder.class);
         outputFileNames = new HashMap<>();
     }
 
     @Override
     public void parse() throws IOException, CellBaseException {
-        System.out.println("conservedRegionPath = " + conservedRegionPath.toString());
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(CONSERVATION_DATA));
+
         if (conservedRegionPath == null || !Files.exists(conservedRegionPath) || !Files.isDirectory(conservedRegionPath)) {
-            throw new IOException("Conservation directory does not exist, is not a directory or cannot be read");
+            throw new IOException("Conservation directory " + conservedRegionPath + " does not exist or it is not a directory or it cannot"
+                    + " be read");
         }
 
-        /*
-         * GERP is downloaded from Ensembl as a bigwig file. The library we have doesn't seem to parse
-         * this file correctly, so we transform the file into a bedGraph format which is human readable.
-         */
-        Path gerpFolderPath = conservedRegionPath.resolve(EtlCommons.GERP_SUBDIRECTORY);
-        if (gerpFolderPath.toFile().exists()) {
-            logger.debug("Parsing GERP data ...");
-            gerpParser(gerpFolderPath);
-        } else {
-            logger.debug("GERP data not found: " + gerpFolderPath.toString());
+        // Check GERP folder and files
+        Path gerpPath = conservedRegionPath.resolve(GERP_DATA);
+        DataSource dataSource = dataSourceReader.readValue(conservedRegionPath.resolve(getDataVersionFilename(GERP_DATA)).toFile());
+        List<File> gerpFiles = checkFiles(dataSource, gerpPath, getDataName(GERP_DATA));
+
+        // Check PhastCons folder and files
+        Path phastConsPath = conservedRegionPath.resolve(PHASTCONS_DATA);
+        dataSource = dataSourceReader.readValue(conservedRegionPath.resolve(getDataVersionFilename(PHASTCONS_DATA)).toFile());
+        List<File> phastConsFiles = checkFiles(dataSource, phastConsPath, getDataName(PHASTCONS_DATA));
+
+        // Check PhyloP folder and files
+        Path phylopPath = conservedRegionPath.resolve(PHYLOP_DATA);
+        dataSource = dataSourceReader.readValue(conservedRegionPath.resolve(getDataVersionFilename(PHYLOP_DATA)).toFile());
+        List<File> phylopFiles = checkFiles(dataSource, phylopPath, getDataName(PHYLOP_DATA));
+
+        // GERP is downloaded from Ensembl as a bigwig file. The library we have doesn't seem to parse
+        // this file correctly, so we transform the file into a bedGraph format which is human-readable.
+        if (gerpFiles.size() != 1) {
+            throw new CellBaseException("Only one " + getDataName(GERP_DATA) + " file is expected, but currently there are "
+                    + gerpFiles.size() + " files");
         }
+        File bigwigFile = gerpFiles.get(0);
+        File bedgraphFile = Paths.get(gerpFiles.get(0).getAbsolutePath() + ".bedgraph").toFile();
+        String exec = "bigWigToBedGraph";
+        if (!bedgraphFile.exists()) {
+            try {
+                if (isExecutableAvailable(exec)) {
+                    EtlCommons.runCommandLineProcess(null, exec, Arrays.asList(bigwigFile.toString(), bedgraphFile.toString()), null);
+                } else {
+                    throw new CellBaseException(exec + " not found in your system, install it to build " + getDataName(GERP_DATA)
+                            + ". It is available at http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/");
+                }
+            } catch (IOException e) {
+                throw new CellBaseException("Error executing " + exec + " in BIGWIG file " + bigwigFile, e);
+            } catch (InterruptedException e) {
+                // Restore interrupted state...
+                Thread.currentThread().interrupt();
+                throw new CellBaseException("" + e.getMessage(), e);
+            }
+            if (!bedgraphFile.exists()) {
+                throw new CellBaseException("Something happened when executing " + exec + " in BIGWIG file " + bigwigFile + "; the BED"
+                        + " graph file was not generated. Please, check " + exec);
+            }
+        }
+        gerpParser(bedgraphFile.toPath());
 
-        /*
-         * UCSC phastCons and phylop are stored in the same format. They are processed together.
-         */
+        // UCSC phastCons and phylop are stored in the same format. They are processed together.
         Map<String, Path> files = new HashMap<>();
         String chromosome;
         Set<String> chromosomes = new HashSet<>();
 
-        // Reading all files in phastCons folder
-        DirectoryStream<Path> directoryStream = Files.newDirectoryStream(conservedRegionPath.resolve("phastCons"), "*.wigFix.gz");
-        for (Path path : directoryStream) {
-            chromosome = path.getFileName().toString().split("\\.")[0].replace("chr", "");
+        // Process PhastCons filenames
+        for (File file : phastConsFiles) {
+            chromosome = file.getName().split("\\.")[0].replace("chr", "");
             chromosomes.add(chromosome);
-            files.put(chromosome + "phastCons", path);
+            files.put(chromosome + PHASTCONS_DATA, file.toPath());
         }
 
-        // Reading all files in phylop folder
-        directoryStream = Files.newDirectoryStream(conservedRegionPath.resolve("phylop"), "*.wigFix.gz");
-        for (Path path : directoryStream) {
-            chromosome = path.getFileName().toString().split("\\.")[0].replace("chr", "");
+        // Process PhyloP filenames
+        for (File file : phylopFiles) {
+            chromosome = file.getName().split("\\.")[0].replace("chr", "");
             chromosomes.add(chromosome);
-            files.put(chromosome + "phylop", path);
+            files.put(chromosome + PHYLOP_DATA, file.toPath());
         }
 
-        /*
-         * Now we can iterate over all the chromosomes found and process the files
-         */
-        logger.debug("Chromosomes found '{}'", chromosomes.toString());
+        // Now we can iterate over all the chromosomes found and process the files
+        logger.debug("Chromosomes found '{}'", chromosomes);
         for (String chr : chromosomes) {
-            logger.debug("Processing chromosome '{}', file '{}'", chr, files.get(chr + "phastCons"));
-            processWigFixFile(files.get(chr + "phastCons"), "phastCons");
+            logger.debug("Processing chromosome '{}', file '{}'", chr, files.get(chr + PHASTCONS_DATA));
+            processWigFixFile(files.get(chr + PHASTCONS_DATA), PHASTCONS_DATA);
 
-            logger.debug("Processing chromosome '{}', file '{}'", chr, files.get(chr + "phylop"));
-            processWigFixFile(files.get(chr + "phylop"), "phylop");
+            logger.debug("Processing chromosome '{}', file '{}'", chr, files.get(chr + PHYLOP_DATA));
+            processWigFixFile(files.get(chr + PHYLOP_DATA), PHYLOP_DATA);
         }
+
+        logger.info(BUILDING_DONE_LOG_MESSAGE, getDataName(CONSERVATION_DATA));
     }
 
-    private void gerpParser(Path gerpFolderPath) throws IOException, CellBaseException {
-        Path gerpProcessFilePath = gerpFolderPath.resolve(EtlCommons.GERP_PROCESSED_FILE);
-        logger.info("parsing {}", gerpProcessFilePath);
-        BufferedReader bufferedReader = FileUtils.newBufferedReader(gerpProcessFilePath);
-
-        String line;
-        int startOfBatch = 0;
-        int previousEndValue = 0;
-        String chromosome = null;
-        String previousChromosomeValue = null;
-
-        List<Float> conservationScores = new ArrayList<>(chunkSize);
-        while ((line = bufferedReader.readLine()) != null) {
-            String[] fields = line.split("\t");
-
-            // file is wrong. throw an exception instead?
-            if (fields.length != 4) {
-                logger.error("skipping invalid line: " + line.length());
-                continue;
-            }
+    private void gerpParser(Path gerpProcessFilePath) throws IOException, CellBaseException {
+        logger.info(PARSING_LOG_MESSAGE, gerpProcessFilePath);
 
-            chromosome = fields[0];
+        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(gerpProcessFilePath)) {
+            String line;
+            int startOfBatch = 0;
+            int previousEndValue = 0;
+            String chromosome = null;
+            String previousChromosomeValue = null;
 
-            // new chromosome, store batch
-            if (previousChromosomeValue != null && !previousChromosomeValue.equals(chromosome)) {
-                storeScores(startOfBatch, previousChromosomeValue, conservationScores);
+            List<Float> conservationScores = new ArrayList<>(chunkSize);
+            while ((line = bufferedReader.readLine()) != null) {
+                String[] fields = line.split("\t");
 
-                // reset values for current batch
-                startOfBatch = 0;
-            }
+                // Checking line
+                if (fields.length != 4) {
+                    throw new CellBaseException("Invalid " + getDataName(GERP_DATA) + " line (expecting 4 columns): " + fields.length
+                            + " items: " + line);
+                }
 
-            // reset chromosome for next entry
-            previousChromosomeValue = chromosome;
+                chromosome = fields[0];
 
-            // file is american! starts at zero, add one
-            int start = Integer.parseInt(fields[1]) + 1;
-            // inclusive
-            int end = Integer.parseInt(fields[2]) + 1;
+                // New chromosome, store batch
+                if (previousChromosomeValue != null && !previousChromosomeValue.equals(chromosome)) {
+                    storeScores(startOfBatch, previousChromosomeValue, conservationScores);
 
-            // start coordinate for this batch of 2,000
-            if (startOfBatch == 0) {
-                startOfBatch = start;
-                previousEndValue = 0;
-            }
+                    // Reset values for current batch
+                    startOfBatch = 0;
+                }
 
-            // if there is a gap between the last entry and this one.
-            if (previousEndValue != 0 && (start - previousEndValue) != 0) {
-                // gap is too big! store what we already have before processing more
-                if (start - previousEndValue >= chunkSize) {
-                    // we have a full batch, store
-                    storeScores(startOfBatch, chromosome, conservationScores);
+                // Reset chromosome for next entry
+                previousChromosomeValue = chromosome;
 
-                    // reset batch to start at this record
+                // File is american! starts at zero, add one
+                int start = Integer.parseInt(fields[1]) + 1;
+                // Inclusive
+                int end = Integer.parseInt(fields[2]) + 1;
+
+                // sSart coordinate for this batch of 2,000
+                if (startOfBatch == 0) {
                     startOfBatch = start;
-                } else {
-                    // fill in the gap with zeroes
-                    // don't overfill the batch
-                    while (previousEndValue < start && conservationScores.size() < chunkSize) {
-                        conservationScores.add((float) 0);
-                        previousEndValue++;
+                    previousEndValue = 0;
+                }
+
+                // If there is a gap between the last entry and this one
+                if (previousEndValue != 0 && (start - previousEndValue) != 0) {
+                    // Gap is too big! store what we already have before processing more
+                    if (start - previousEndValue >= chunkSize) {
+                        // We have a full batch, store
+                        storeScores(startOfBatch, chromosome, conservationScores);
+
+                        // Reset batch to start at this record
+                        startOfBatch = start;
+                    } else {
+                        // Fill in the gap with zeroes, don't overfill the batch
+                        while (previousEndValue < start && conservationScores.size() < chunkSize) {
+                            conservationScores.add((float) 0);
+                            previousEndValue++;
+                        }
+
+                        // We have a full batch, store
+                        if (conservationScores.size() == chunkSize) {
+                            storeScores(startOfBatch, chromosome, conservationScores);
+
+                            // Reset: start a new batch
+                            startOfBatch = start;
+                        }
                     }
+                }
 
-                    // we have a full batch, store
+                // Reset value
+                previousEndValue = end;
+
+                // Score for these coordinates
+                String score = fields[3];
+
+                // Add the score for each coordinate included in the range start-end
+                while (start < end) {
+                    // We have a full batch: store
                     if (conservationScores.size() == chunkSize) {
                         storeScores(startOfBatch, chromosome, conservationScores);
 
-                        // reset. start a new batch
+                        // Reset: start a new batch
                         startOfBatch = start;
                     }
-                }
-            }
 
-            // reset value
-            previousEndValue = end;
+                    // Add score to batch
+                    conservationScores.add(Float.valueOf(score));
 
-            // score for these coordinates
-            String score = fields[3];
+                    // Increment coordinate
+                    start++;
+                }
 
-            // add the score for each coordinate included in the range start-end
-            while (start < end) {
-                // we have a full batch, store
+                // We have a full batch: store
                 if (conservationScores.size() == chunkSize) {
                     storeScores(startOfBatch, chromosome, conservationScores);
 
-                    // reset. start a new batch
-                    startOfBatch = start;
+                    // Reset: start a new batch
+                    startOfBatch = 0;
                 }
-
-                // add score to batch
-                conservationScores.add(Float.valueOf(score));
-
-                // increment coordinate
-                start++;
             }
-
-            // we have a full batch, store
-            if (conservationScores.size() == chunkSize) {
+            // We need to serialize the last chunk that might be incomplete
+            if (!conservationScores.isEmpty()) {
                 storeScores(startOfBatch, chromosome, conservationScores);
-
-                // reset, start a new batch
-                startOfBatch = 0;
             }
         }
-        // we need to serialize the last chunk that might be incomplete
-        if (!conservationScores.isEmpty()) {
-            storeScores(startOfBatch, chromosome, conservationScores);
-        }
-        bufferedReader.close();
+
+        logger.info(PARSING_DONE_LOG_MESSAGE, gerpProcessFilePath);
     }
 
     private void storeScores(int startOfBatch, String chromosome, List<Float> conservationScores)
             throws CellBaseException {
 
-        // if this is a small batch, fill in the missing coordinates with 0
+        // If this is a small batch, fill in the missing coordinates with 0
         while (conservationScores.size() < chunkSize) {
             conservationScores.add((float) 0);
         }
 
         if (conservationScores.size() != chunkSize) {
-            throw new CellBaseException("invalid chunk size " + conservationScores.size() + " for " + chromosome + ":" + startOfBatch);
+            throw new CellBaseException("Invalid chunk size " + conservationScores.size() + " for " + chromosome + ":" + startOfBatch);
         }
 
-        GenomicScoreRegion<Float> conservationScoreRegion = new GenomicScoreRegion(chromosome, startOfBatch,
-                startOfBatch + conservationScores.size() - 1, "gerp", conservationScores);
+        GenomicScoreRegion<Float> conservationScoreRegion = new GenomicScoreRegion<>(chromosome, startOfBatch,
+                startOfBatch + conservationScores.size() - 1, GERP_DATA, conservationScores);
         fileSerializer.serialize(conservationScoreRegion, getOutputFileName(chromosome));
 
-        // reset
+        // Reset
         conservationScores.clear();
     }
 
-//    @Deprecated
-//    private void gerpParser(Path gerpFolderPath) throws IOException, InterruptedException {
-//        logger.info("Uncompressing {}", gerpFolderPath.resolve(EtlCommons.GERP_FILE));
-//        List<String> tarArgs = Arrays.asList("-xvzf", gerpFolderPath.resolve(EtlCommons.GERP_FILE).toString(),
-//                "--overwrite", "-C", gerpFolderPath.toString());
-//        EtlCommons.runCommandLineProcess(null, "tar", tarArgs, null);
-//
-//        DirectoryStream<Path> pathDirectoryStream = Files.newDirectoryStream(gerpFolderPath, "*.rates");
-//        boolean filesFound = false;
-//        for (Path path : pathDirectoryStream) {
-//            filesFound = true;
-//            logger.info("Processing file '{}'", path.getFileName().toString());
-//            String[] chromosome = path.getFileName().toString().replaceFirst("chr", "").split("\\.");
-//            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(String.valueOf(path))));
-//            String line;
-//            int start = 1;
-//            int end = 1999;
-//            int counter = 1;
-//            String[] fields;
-//            List<Float> val = new ArrayList<>(chunkSize);
-//            while ((line = bufferedReader.readLine()) != null) {
-//                fields = line.split("\t");
-//                val.add(Float.valueOf(fields[1]));
-//                counter++;
-//                if (counter == chunkSize) {
-////                    ConservationScoreRegion conservationScoreRegion = new ConservationScoreRegion(chromosome[0], start, end, "gerp",
-// val);
-//                    GenomicScoreRegion<Float> conservationScoreRegion =
-//                            new GenomicScoreRegion<>(chromosome[0], start, end, "gerp", val);
-//                    fileSerializer.serialize(conservationScoreRegion, getOutputFileName(chromosome[0]));
-//
-//                    start = end + 1;
-//                    end += chunkSize;
-//
-//                    counter = 0;
-//                    val.clear();
-//                }
-//            }
-//
-//            // we need to serialize the last chunk that might be incomplete
-////            ConservationScoreRegion conservationScoreRegion =
-////                    new ConservationScoreRegion(chromosome[0], start, start + val.size() - 1, "gerp", val);
-//            GenomicScoreRegion<Float> conservationScoreRegion =
-//                    new GenomicScoreRegion<>(chromosome[0], start, start + val.size() - 1, "gerp", val);
-//            fileSerializer.serialize(conservationScoreRegion, getOutputFileName(chromosome[0]));
-//
-//            bufferedReader.close();
-//        }
-//
-//        if (!filesFound) {
-//            logger.warn("No GERP++ files were found. Please check that the original file {} is there, that it was"
-//                    + " properly decompressed and that the *.rates files are present",
-//                    gerpFolderPath.resolve(EtlCommons.GERP_FILE));
-//        }
-//    }
-
     private void processWigFixFile(Path inGzPath, String conservationSource) throws IOException {
-        BufferedReader bufferedReader = FileUtils.newBufferedReader(inGzPath);
-
-        String line;
-        String chromosome = "";
-//        int start = 0, end = 0;
-        int start = 0;
-        float value;
-        Map<String, String> attributes = new HashMap<>();
-//        ConservedRegion conservedRegion =  null;
-        List<Float> values = new ArrayList<>();
-//        ConservationScoreRegion conservedRegion = null;
-        GenomicScoreRegion<Float> conservedRegion = null;
-
-        while ((line = bufferedReader.readLine()) != null) {
-            if (line.startsWith("fixedStep")) {
-                //new group, save last
-                if (conservedRegion != null) {
-//                    conservedRegion.setEnd(end);
-//                    conservedRegion = new ConservationScoreRegion(chromosome, start, end, conservationSource, values);
-                    conservedRegion = new GenomicScoreRegion<>(chromosome, start, start + values.size() - 1,
-                            conservationSource, values);
-                    fileSerializer.serialize(conservedRegion, getOutputFileName(chromosome));
-                }
+        logger.info(PARSING_LOG_MESSAGE, inGzPath);
+        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(inGzPath)) {
+
+            String line;
+            String chromosome = "";
+            int start = 0;
+            float value;
+            Map<String, String> attributes = new HashMap<>();
+            List<Float> values = new ArrayList<>();
+            GenomicScoreRegion<Float> conservedRegion = null;
+
+            while ((line = bufferedReader.readLine()) != null) {
+                if (line.startsWith("fixedStep")) {
+                    // New group, save last
+                    if (conservedRegion != null) {
+                        conservedRegion = new GenomicScoreRegion<>(chromosome, start, start + values.size() - 1,
+                                conservationSource, values);
+                        fileSerializer.serialize(conservedRegion, getOutputFileName(chromosome));
+                    }
 
-//                offset = 0;
-                attributes.clear();
-                String[] attrFields = line.split(" ");
-                String[] attrKeyValue;
-                for (String attrField : attrFields) {
-                    if (!attrField.equalsIgnoreCase("fixedStep")) {
-                        attrKeyValue = attrField.split("=");
-                        attributes.put(attrKeyValue[0].toLowerCase(), attrKeyValue[1]);
+                    attributes.clear();
+                    String[] attrFields = line.split(" ");
+                    String[] attrKeyValue;
+                    for (String attrField : attrFields) {
+                        if (!attrField.equalsIgnoreCase("fixedStep")) {
+                            attrKeyValue = attrField.split("=");
+                            attributes.put(attrKeyValue[0].toLowerCase(), attrKeyValue[1]);
+                        }
                     }
-                }
 
-                chromosome = formatChromosome(attributes);
-                start = Integer.parseInt(attributes.get("start"));
-//                end = Integer.parseInt(attributes.get("start"));
-
-                values = new ArrayList<>(2000);
-            } else {
-                int startChunk = start / MongoDBCollectionConfiguration.CONSERVATION_CHUNK_SIZE;
-//                end++;
-                int endChunk = (start + values.size()) / MongoDBCollectionConfiguration.CONSERVATION_CHUNK_SIZE;
-                // This is the endChunk if current read score is
-                // appended to the array (otherwise it would be
-                // start + values.size() - 1). If this endChunk is
-                // different from the startChunk means that current
-                // conserved region must be dumped and current
-                // score must be associated to next chunk. Main
-                // difference to what there was before is that if
-                // the fixedStep starts on the last position of a
-                // chunk e.g. 1999, the chunk must be created with
-                // just that score - the chunk was left empty with
-                // the old code
-                if (startChunk != endChunk) {
-//                    conservedRegion = new ConservationScoreRegion(chromosome, start, end - 1, conservationSource, values);
-                    conservedRegion = new GenomicScoreRegion<>(chromosome, start, start + values.size() - 1,
-                            conservationSource, values);
-                    fileSerializer.serialize(conservedRegion, getOutputFileName(chromosome));
-                    start = start + values.size();
-                    values.clear();
-                }
+                    chromosome = formatChromosome(attributes);
+                    start = Integer.parseInt(attributes.get("start"));
 
-                value = Float.parseFloat(line.trim());
-                values.add(value);
+                    values = new ArrayList<>(2000);
+                } else {
+                    int startChunk = start / MongoDBCollectionConfiguration.CONSERVATION_CHUNK_SIZE;
+                    int endChunk = (start + values.size()) / MongoDBCollectionConfiguration.CONSERVATION_CHUNK_SIZE;
+                    // This is the endChunk if current read score is appended to the array (otherwise it would be start + values.size()
+                    // - 1). If this endChunk is different from the startChunk means that current conserved region must be dumped and
+                    // current score must be associated to next chunk. Main difference to what there was before is that if the fixedStep
+                    // starts on the last position of a chunk e.g. 1999, the chunk must be created with just that score - the chunk was
+                    // left empty with the old code
+                    if (startChunk != endChunk) {
+                        conservedRegion = new GenomicScoreRegion<>(chromosome, start, start + values.size() - 1, conservationSource,
+                                values);
+                        fileSerializer.serialize(conservedRegion, getOutputFileName(chromosome));
+                        start = start + values.size();
+                        values.clear();
+                    }
+
+                    value = Float.parseFloat(line.trim());
+                    values.add(value);
+                }
             }
+
+            // Write last
+            conservedRegion = new GenomicScoreRegion<>(chromosome, start, start + values.size() - 1, conservationSource, values);
+            fileSerializer.serialize(conservedRegion, getOutputFileName(chromosome));
         }
-        //write last
-//        conservedRegion = new ConservationScoreRegion(chromosome, start, end, conservationSource, values);
-        conservedRegion = new GenomicScoreRegion<>(chromosome, start, start + values.size() - 1, conservationSource,
-                values);
-        fileSerializer.serialize(conservedRegion, getOutputFileName(chromosome));
-        bufferedReader.close();
+        logger.info(PARSING_DONE_LOG_MESSAGE, inGzPath);
     }
 
     private String getOutputFileName(String chromosome) {
@@ -379,13 +341,18 @@ private String getOutputFileName(String chromosome) {
         }
         String outputFileName = outputFileNames.get(chromosome);
         if (outputFileName == null) {
-            outputFileName = "conservation_" + chromosome;
+            outputFileName = getFilename(CONSERVATION_DATA, chromosome);
             outputFileNames.put(chromosome, outputFileName);
         }
         return outputFileName;
     }
 
-    // phylop and phastcons list the chromosome as M instead of the standard MT. replace.
+    /**
+     * Remove chr from the chromosome name; and phylop and phastcons list the chromosome as M instead of the standard MT, replace it.
+     *
+     * @param attributes Attributes map with the chromosome name
+     * @return The new chromosome name
+     */
     private String formatChromosome(Map<String, String> attributes) {
         String chromosome = attributes.get("chrom").replace("chr", "");
 
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/EnsemblGeneBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/EnsemblGeneBuilder.java
new file mode 100644
index 0000000000..d6b935fa52
--- /dev/null
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/EnsemblGeneBuilder.java
@@ -0,0 +1,956 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.lib.builders;
+
+import htsjdk.tribble.readers.TabixReader;
+import org.apache.commons.lang3.StringUtils;
+import org.opencb.biodata.formats.feature.gff.Gff2;
+import org.opencb.biodata.formats.feature.gtf.Gtf;
+import org.opencb.biodata.formats.feature.gtf.io.GtfReader;
+import org.opencb.biodata.formats.io.FileFormatException;
+import org.opencb.biodata.models.core.*;
+import org.opencb.biodata.tools.sequence.FastaIndex;
+import org.opencb.cellbase.core.ParamConstants;
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.config.SpeciesConfiguration;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.models.DataSource;
+import org.opencb.cellbase.core.serializer.CellBaseSerializer;
+import org.rocksdb.RocksDBException;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.*;
+import java.util.stream.Collectors;
+
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
+public class EnsemblGeneBuilder extends CellBaseBuilder {
+
+    private Path downloadPath;
+    private SpeciesConfiguration speciesConfiguration;
+    private boolean flexibleGTFParsing;
+    private CellBaseConfiguration configuration;
+
+    private Map<String, Integer> transcriptDict;
+    private Map<String, Exon> exonDict;
+
+    private Path gtfFile;
+    private Path proteinFastaFile;
+    private Path cDnaFastaFile;
+    private Path geneDescriptionFile;
+    private Path xrefsFile;
+    private Path hgncFile;
+    private Path maneFile;
+    private Path lrgFile;
+    private Path uniprotIdMappingFile;
+    private Path tfbsFile;
+    private Path tabixFile;
+    private Path geneExpressionFile;
+    private Path geneDrugFile;
+    private Path hpoFile;
+    private Path disgenetFile;
+    private Path genomeSequenceFilePath;
+    private Path gnomadFile;
+    private Path geneOntologyAnnotationFile;
+    private Path miRBaseFile;
+    private Path miRTarBaseFile;
+    private Path cancerGeneCensusFile;
+    private Path cancerHostpotFile;
+    private Path ensemblCanonicalFile;
+    private Path tso500File;
+    private Path eglhHaemOncFile;
+
+    // source for genes is either ensembl or refseq
+    private final String SOURCE = ParamConstants.QueryParams.ENSEMBL.key();
+
+    private int geneCounter;
+    private ArrayList<String> geneList;
+    private String geneName;
+    private int transcriptCounter;
+    private ArrayList<String> transcriptList;
+    private String transcriptName;
+    private int exonCounter;
+    private String feature;
+    private Gtf nextGtfToReturn;
+
+    public EnsemblGeneBuilder(Path downloadPath, SpeciesConfiguration speciesConfiguration, boolean flexibleGTFParsing,
+                              CellBaseSerializer serializer) {
+        super(serializer);
+
+        this.downloadPath = downloadPath;
+        this.speciesConfiguration = speciesConfiguration;
+        this.flexibleGTFParsing = flexibleGTFParsing;
+
+        transcriptDict = new HashMap<>(250000);
+        exonDict = new HashMap<>(8000000);
+    }
+
+    public void check() throws Exception {
+        if (checked) {
+            return;
+        }
+
+        String ensemblGeneLabel = getDataName(ENSEMBL_DATA) + " " + getDataName(GENE_DATA);
+        logger.info(CHECKING_BEFORE_BUILDING_LOG_MESSAGE, ensemblGeneLabel);
+
+        // Sanity check
+        checkDirectory(downloadPath, ensemblGeneLabel);
+        if (!Files.exists(serializer.getOutdir())) {
+            try {
+                Files.createDirectories(serializer.getOutdir());
+            } catch (IOException e) {
+                throw new CellBaseException("Error creating folder " + serializer.getOutdir(), e);
+            }
+        }
+
+        // Check Ensembl files
+        List<File> files = checkFiles(ensemblGeneLabel, ENSEMBL_DATA, downloadPath, 3);
+        gtfFile = files.stream().filter(f -> f.getName().contains(".gtf")).findFirst().get().toPath();
+        proteinFastaFile = files.stream().filter(f -> f.getName().contains(".pep.all.fa")).findFirst().get().toPath();
+        cDnaFastaFile = files.stream().filter(f -> f.getName().contains(".cdna.all.fa")).findFirst().get().toPath();
+
+        // Check common files
+        // geneDescriptionFile =
+        // xrefsFile =
+        maneFile = checkFiles(MANE_SELECT_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        lrgFile = checkFiles(LRG_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        hgncFile = checkFiles(HGNC_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        cancerHostpotFile = checkFiles(CANCER_HOTSPOT_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        geneDrugFile = checkFiles(DGIDB_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        uniprotIdMappingFile = checkFiles(UNIPROT_XREF_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        geneExpressionFile = checkFiles(GENE_EXPRESSION_ATLAS_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        // hpoFile = checkFiles(HPO_DATA, downloadPath.getParent(), 1);
+        disgenetFile = checkFiles(DISGENET_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        gnomadFile = checkFiles(GNOMAD_CONSTRAINTS_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        geneOntologyAnnotationFile = checkFiles(GO_ANNOTATION_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        // ensemblCanonicalFile = ;
+        // cancerGeneCensus =
+        // tso500File =
+        // eglhHaemOncFile =
+
+        // Check regulation files
+        // Motif features
+        files = checkFiles(ensemblGeneLabel, MOTIF_FEATURES_DATA, downloadPath.getParent().getParent().resolve(REGULATION_DATA), 2);
+        if (files.get(0).getName().endsWith("tbi")) {
+            tabixFile = files.get(0).toPath();
+            tfbsFile = files.get(1).toPath();
+        } else {
+            tabixFile = files.get(1).toPath();
+            tfbsFile = files.get(0).toPath();
+        }
+        // mirbase
+        miRBaseFile = checkFiles(MIRBASE_DATA, downloadPath.getParent().getParent().resolve(REGULATION_DATA), 1).get(0).toPath();
+
+        // mirtarbase
+        // The downloaded .xlsx file contains errors and it has to be fixed manually
+        logger.info("Checking {} folder and files", getDataName(MIRTARBASE_DATA));
+        Path downloadRegulationPath = downloadPath.getParent().getParent().resolve(REGULATION_DATA);
+        List<String> mirTarBaseFiles = ((DataSource) dataSourceReader.readValue(downloadRegulationPath.resolve(
+                getDataVersionFilename(MIRTARBASE_DATA)).toFile())).getUrls().stream().map(u -> Paths.get(u).getFileName().toString())
+                .collect(Collectors.toList());
+        if (mirTarBaseFiles.size() != 1) {
+            throw new CellBaseException("One " + getDataName(MIRTARBASE_DATA) + " file is expected at " + downloadRegulationPath
+                    + ", but currently there are " + mirTarBaseFiles.size() + " files");
+        }
+        // The hsa_MIT.xlsx is fixed and converted to hsa_MIT.csv manually
+        if (!mirTarBaseFiles.get(0).endsWith(XLSX_EXTENSION)) {
+            throw new CellBaseException("A " + XLSX_EXTENSION + " " + getDataName(MIRTARBASE_DATA) + " file is expected at "
+                    + downloadRegulationPath + ", but currently it is named " + mirTarBaseFiles.get(0));
+        }
+        miRTarBaseFile = downloadRegulationPath.resolve(mirTarBaseFiles.get(0).replace(XLSX_EXTENSION, CSV_EXTENSION));
+        if (!Files.exists(miRTarBaseFile)) {
+            throw new CellBaseException("The " + getDataName(MIRTARBASE_DATA) + " fixed file " + miRTarBaseFile + " does not exist");
+        }
+
+        // Check genome fasta file
+        genomeSequenceFilePath = checkFiles(GENOME_DATA, downloadPath.getParent().getParent().resolve(GENOME_DATA), 1).get(0).toPath();
+
+        logger.info(CHECKING_DONE_BEFORE_BUILDING_LOG_MESSAGE, ensemblGeneLabel);
+        checked = true;
+    }
+
+    public void parse() throws Exception {
+        check();
+
+        Gene gene = null;
+        Transcript transcript;
+        Exon exon = null;
+        int cdna = 1;
+        int cds = 1;
+
+        EnsemblGeneBuilderIndexer indexer = new EnsemblGeneBuilderIndexer(serializer.getOutdir());
+
+        try {
+            // process files and put values in rocksdb
+            indexer.index(geneDescriptionFile, xrefsFile, hgncFile, maneFile, lrgFile, uniprotIdMappingFile,
+                    proteinFastaFile, cDnaFastaFile, speciesConfiguration.getScientificName(), geneExpressionFile,
+                    geneDrugFile, hpoFile, disgenetFile, gnomadFile, geneOntologyAnnotationFile, miRBaseFile,
+                    miRTarBaseFile, cancerGeneCensusFile, cancerHostpotFile, ensemblCanonicalFile,
+                    tso500File, eglhHaemOncFile);
+
+            TabixReader tabixReader = null;
+            if (!Files.exists(tfbsFile) || !Files.exists(tabixFile)) {
+                logger.error("Tfbs or tabix file not found. Download them and try again.");
+            } else {
+                tabixReader = new TabixReader(tfbsFile.toAbsolutePath().toString(), tabixFile.toAbsolutePath().toString());
+            }
+
+            // Preparing the fasta file for fast accessing
+//            System.out.println("genomeSequenceFilePath.toString() = " + genomeSequenceFilePath.toString());
+            FastaIndex fastaIndex = new FastaIndex(genomeSequenceFilePath);
+
+            // Empty transcript and exon dictionaries
+            transcriptDict.clear();
+            exonDict.clear();
+
+            logger.info(PARSING_LOG_MESSAGE, gtfFile);
+            GtfReader gtfReader = new GtfReader(gtfFile);
+
+            // Gene->Transcript->Feature->GTF line
+            Map<String, Map<String, Map<String, Object>>> gtfMap = null;
+            if (flexibleGTFParsing) {
+                gtfMap = loadGTFMap(gtfReader);
+                initializePointers(gtfMap);
+            }
+
+            Gtf gtf;
+            while ((gtf = getGTFEntry(gtfReader, gtfMap)) != null) {
+
+                if (gtf.getFeature().equals("gene") || gtf.getFeature().equals("transcript")
+                        || gtf.getFeature().equals("UTR") || gtf.getFeature().equals("Selenocysteine")) {
+                    continue;
+                }
+
+                String geneId = gtf.getAttributes().get("gene_id");
+                String transcriptId = gtf.getAttributes().get("transcript_id");
+                String geneName = gtf.getAttributes().get("gene_name");
+                if (newGene(gene, geneId)) {
+                    // If new geneId is different from the current then we must serialize before data new gene
+                    if (gene != null) {
+                        serializer.serialize(gene);
+                    }
+
+                    GeneAnnotation geneAnnotation = new GeneAnnotation(indexer.getExpression(geneId), indexer.getDiseases(geneName),
+                            indexer.getDrugs(geneName), indexer.getConstraints(geneId), indexer.getMirnaTargets(geneName),
+                            indexer.getCancerGeneCensus(geneName), indexer.getCancerHotspot(geneName));
+
+                    gene = new Gene(geneId, geneName, gtf.getSequenceName().replaceFirst("chr", ""),
+                            gtf.getStart(), gtf.getEnd(), gtf.getStrand(), gtf.getAttributes().get("gene_version"),
+                            gtf.getAttributes().get("gene_biotype"), "KNOWN", SOURCE, indexer.getDescription(geneId),
+                            new ArrayList<>(), indexer.getMirnaGene(transcriptId), geneAnnotation);
+                }
+
+                // Check if Transcript exist in the Gene Set of transcripts
+                if (!transcriptDict.containsKey(transcriptId)) {
+                    transcript = getTranscript(gene, indexer, tabixReader, gtf, transcriptId);
+                } else {
+                    transcript = gene.getTranscripts().get(transcriptDict.get(transcriptId));
+                }
+
+                // At this point gene and transcript objects are set up
+                // Update gene and transcript genomic coordinates, start must be the
+                // lower, and end the higher
+                updateTranscriptAndGeneCoords(transcript, gene, gtf);
+
+                String transcriptIdWithoutVersion = transcript.getId().split("\\.")[0];
+                if (gtf.getFeature().equalsIgnoreCase("exon")) {
+                    // Obtaining the exon sequence
+                    String exonId = gtf.getAttributes().get("exon_id") + "." + gtf.getAttributes().get("exon_version");
+                    String exonSequence = fastaIndex.query(gtf.getSequenceName(), gtf.getStart(), gtf.getEnd());
+
+                    exon = new Exon(exonId, gtf.getSequenceName().replaceFirst("chr", ""),
+                            gtf.getStart(), gtf.getEnd(), gtf.getStrand(), 0, 0, 0, 0, 0, 0, -1, Integer.parseInt(gtf
+                            .getAttributes().get("exon_number")), exonSequence);
+                    transcript.getExons().add(exon);
+
+                    exonDict.put(transcriptIdWithoutVersion + "_" + exon.getExonNumber(), exon);
+                    if (gtf.getAttributes().get("exon_number").equals("1")) {
+                        cdna = 1;
+                        cds = 1;
+                    } else {
+                        // with every exon we update cDNA length with the previous exon length
+                        cdna += exonDict.get(transcriptIdWithoutVersion + "_" + (exon.getExonNumber() - 1)).getEnd()
+                                - exonDict.get(transcriptIdWithoutVersion + "_" + (exon.getExonNumber() - 1)).getStart() + 1;
+                    }
+                } else {
+                    exon = exonDict.get(transcriptIdWithoutVersion + "_" + exon.getExonNumber());
+                    if (gtf.getFeature().equalsIgnoreCase("CDS")) {
+                        // Protein ID is only present in CDS lines
+                        String proteinId = gtf.getAttributes().get("protein_id") != null
+                                ? gtf.getAttributes().get("protein_id") + "." + gtf.getAttributes().get("protein_version")
+                                : "";
+                        transcript.setProteinId(proteinId);
+                        transcript.setProteinSequence(indexer.getProteinFasta(proteinId));
+
+                        if (gtf.getStrand().equals("+") || gtf.getStrand().equals("1")) {
+                            // CDS states the beginning of coding start
+                            exon.setGenomicCodingStart(gtf.getStart());
+                            exon.setGenomicCodingEnd(gtf.getEnd());
+
+                            // cDNA coordinates
+                            exon.setCdnaCodingStart(gtf.getStart() - exon.getStart() + cdna);
+                            exon.setCdnaCodingEnd(gtf.getEnd() - exon.getStart() + cdna);
+                            // Set cdnaCodingEnd to prevent those cases without stop_codon
+
+                            transcript.setCdnaCodingEnd(gtf.getEnd() - exon.getStart() + cdna);
+                            exon.setCdsStart(cds);
+                            exon.setCdsEnd(gtf.getEnd() - gtf.getStart() + cds);
+
+                            // increment in the coding length
+                            cds += gtf.getEnd() - gtf.getStart() + 1;
+                            transcript.setCdsLength(cds - 1);  // Set cdnaCodingEnd to prevent those cases without stop_codon
+
+                            exon.setPhase(Integer.parseInt(gtf.getFrame()));
+
+                            if (transcript.getGenomicCodingStart() == 0 || transcript.getGenomicCodingStart() > gtf.getStart()) {
+                                transcript.setGenomicCodingStart(gtf.getStart());
+                            }
+                            if (transcript.getGenomicCodingEnd() == 0 || transcript.getGenomicCodingEnd() < gtf.getEnd()) {
+                                transcript.setGenomicCodingEnd(gtf.getEnd());
+                            }
+                            // only first time
+                            if (transcript.getCdnaCodingStart() == 0) {
+                                transcript.setCdnaCodingStart(gtf.getStart() - exon.getStart() + cdna);
+                            }
+                            // strand -
+                        } else {
+                            // CDS states the beginning of coding start
+                            exon.setGenomicCodingStart(gtf.getStart());
+                            exon.setGenomicCodingEnd(gtf.getEnd());
+                            // cDNA coordinates
+                            // cdnaCodingStart points to the same base position than genomicCodingEnd
+                            exon.setCdnaCodingStart(exon.getEnd() - gtf.getEnd() + cdna);
+                            // cdnaCodingEnd points to the same base position than genomicCodingStart
+                            exon.setCdnaCodingEnd(exon.getEnd() - gtf.getStart() + cdna);
+                            // Set cdnaCodingEnd to prevent those cases without stop_codon
+                            transcript.setCdnaCodingEnd(exon.getEnd() - gtf.getStart() + cdna);
+                            exon.setCdsStart(cds);
+                            exon.setCdsEnd(gtf.getEnd() - gtf.getStart() + cds);
+
+                            // increment in the coding length
+                            cds += gtf.getEnd() - gtf.getStart() + 1;
+                            transcript.setCdsLength(cds - 1);  // Set cdnaCodingEnd to prevent those cases without stop_codon
+                            exon.setPhase(Integer.parseInt(gtf.getFrame()));
+
+                            if (transcript.getGenomicCodingStart() == 0 || transcript.getGenomicCodingStart() > gtf.getStart()) {
+                                transcript.setGenomicCodingStart(gtf.getStart());
+                            }
+                            if (transcript.getGenomicCodingEnd() == 0 || transcript.getGenomicCodingEnd() < gtf.getEnd()) {
+                                transcript.setGenomicCodingEnd(gtf.getEnd());
+                            }
+                            // only first time
+                            if (transcript.getCdnaCodingStart() == 0) {
+                                // cdnaCodingStart points to the same base position than genomicCodingEnd
+                                transcript.setCdnaCodingStart(exon.getEnd() - gtf.getEnd() + cdna);
+                            }
+                        }
+
+                    }
+//                if (gtf.getFeature().equalsIgnoreCase("start_codon")) {
+//                    // nothing to do
+//                    System.out.println("Empty block, this should be redesigned");
+//                }
+                    if (gtf.getFeature().equalsIgnoreCase("stop_codon")) {
+                        //                      setCdnaCodingEnd = false; // stop_codon found, cdnaCodingEnd will be set here,
+                        //                      no need to set it at the beginning of next feature
+                        if (exon.getStrand().equals("+")) {
+                            updateStopCodingDataPositiveExon(exon, cdna, cds, gtf);
+
+                            cds += gtf.getEnd() - gtf.getStart();
+                            // If stop_codon appears, overwrite values
+                            transcript.setGenomicCodingEnd(gtf.getEnd());
+                            transcript.setCdnaCodingEnd(gtf.getEnd() - exon.getStart() + cdna);
+                            transcript.setCdsLength(cds - 1);
+
+                        } else {
+                            updateNegativeExonCodingData(exon, cdna, cds, gtf);
+
+                            cds += gtf.getEnd() - gtf.getStart();
+                            // If stop_codon appears, overwrite values
+                            transcript.setGenomicCodingStart(gtf.getStart());
+                            // cdnaCodingEnd points to the same base position than genomicCodingStart
+                            transcript.setCdnaCodingEnd(exon.getEnd() - gtf.getStart() + cdna);
+                            transcript.setCdsLength(cds - 1);
+                        }
+                    }
+                }
+            }
+
+            // last gene must be serialized
+            serializer.serialize(gene);
+
+            // Close
+            gtfReader.close();
+            serializer.close();
+            fastaIndex.close();
+            indexer.close();
+
+            logger.info(PARSING_DONE_LOG_MESSAGE, gtfFile);
+        } catch (Exception e) {
+            indexer.close();
+            throw e;
+        }
+    }
+
+    private Transcript getTranscript(Gene gene, EnsemblGeneBuilderIndexer indexer, TabixReader tabixReader, Gtf gtf, String transcriptId)
+            throws IOException, RocksDBException {
+        Map<String, String> gtfAttributes = gtf.getAttributes();
+
+        // To match Ensembl, we set the ID as transcript+version. This also matches the Ensembl website.
+        String transcriptIdWithVersion = transcriptId + "." + gtfAttributes.get("transcript_version");
+        String biotype = gtfAttributes.get("transcript_biotype") != null ? gtfAttributes.get("transcript_biotype") : "";
+        String transcriptChromosome = gtf.getSequenceName().replaceFirst("chr", "");
+        List<TranscriptTfbs> transcriptTfbses = getTranscriptTfbses(gtf, transcriptChromosome, tabixReader);
+
+        List<FeatureOntologyTermAnnotation> ontologyAnnotations = getOntologyAnnotations(indexer.getXrefs(transcriptId), indexer);
+        TranscriptAnnotation transcriptAnnotation = new TranscriptAnnotation(ontologyAnnotations, indexer.getConstraints(transcriptId));
+
+        Transcript transcript = new Transcript(transcriptIdWithVersion, gtfAttributes.get("transcript_name"), transcriptChromosome,
+                gtf.getStart(), gtf.getEnd(), gtf.getStrand(), biotype, "KNOWN",
+                0, 0, 0, 0, 0,
+                indexer.getCdnaFasta(transcriptIdWithVersion), "", "", "",
+                gtfAttributes.get("transcript_version"), SOURCE, new ArrayList<>(), indexer.getXrefs(transcriptId), transcriptTfbses,
+                new HashSet<>(), transcriptAnnotation);
+
+        // Adding Ids appearing in the GTF to the xrefs is required, since for some unknown reason the ENSEMBL
+        // Perl API often doesn't return all genes resulting in an incomplete xrefs.txt file. We must ensure
+        // that the xrefs array contains all ids present in the GTF file
+        addGtfXrefs(transcript, gene, gtfAttributes);
+
+        // Add HGNC ID mappings, with this we can know which Ensembl and Refseq transcripts match to HGNC ID
+        String hgncId = indexer.getHgncId(gene.getName());
+        if (StringUtils.isNotEmpty(hgncId)) {
+            transcript.getXrefs().add(new Xref(hgncId, "hgnc_id", "HGNC ID"));
+        }
+
+        // Add MANE Select mappings, with this we can know which Ensembl and Refseq transcripts match according to MANE
+        for (String suffix: Arrays.asList("refseq", "refseq_protein")) {
+            String maneRefSeq = indexer.getMane(transcriptIdWithVersion, suffix);
+            if (StringUtils.isNotEmpty(maneRefSeq)) {
+                transcript.getXrefs().add(new Xref(maneRefSeq, "mane_select_" + suffix,
+                        "MANE Select RefSeq" + (suffix.contains("_") ? " Protein" : "")));
+            }
+        }
+
+        // Add LRG mappings, with this we can know which Ensembl and Refseq transcripts match according to LRG
+        String lrgRefSeq = indexer.getLrg(transcriptIdWithVersion, "refseq");
+        if (StringUtils.isNotEmpty(lrgRefSeq)) {
+            transcript.getXrefs().add(new Xref(lrgRefSeq, "lrg_refseq", "LRG RefSeq"));
+        }
+
+        // Add Flags
+        // 1. GTF tags
+        String tags = gtf.getAttributes().get("tag");
+        if (StringUtils.isNotEmpty(tags)) {
+            transcript.getFlags().addAll(Arrays.asList(tags.split(",")));
+        }
+        // 2. TSL
+        String supportLevel = gtfAttributes.get("transcript_support_level");
+        if (StringUtils.isNotEmpty(supportLevel)) {
+            // split on space so "5 (assigned to previous version 3)" and "5" both become "TSL:5"
+            String truncatedSupportLevel = supportLevel.split(" ")[0];
+            transcript.getFlags().add("TSL:" + truncatedSupportLevel);
+        }
+        // 3. MANE Flag
+        String maneFlag = indexer.getMane(transcriptIdWithVersion, "flag");
+        if (StringUtils.isNotEmpty(maneFlag)) {
+            transcript.getFlags().add(maneFlag);
+        }
+        // 4. LRG Flag
+        String lrg = indexer.getLrg(transcriptIdWithVersion, "ensembl");
+        if (StringUtils.isNotEmpty(lrg)) {
+            transcript.getFlags().add("LRG");
+        } else {
+            for (Xref xref : transcript.getXrefs()) {
+                if (xref.getId().startsWith("LRG_") && xref.getId().contains("t")) {
+                    transcript.getFlags().add("LRG");
+                }
+            }
+        }
+        // 5. Ensembl Canonical
+        String canonicalFlag = indexer.getCanonical(transcriptIdWithVersion);
+        if (StringUtils.isNotEmpty(canonicalFlag)) {
+            transcript.getFlags().add(canonicalFlag);
+        }
+
+        // 6. TSO500 and EGLH HaemOnc
+        String maneRefSeq = indexer.getMane(transcriptIdWithVersion, "refseq");
+        if (StringUtils.isNotEmpty(maneRefSeq)) {
+            String tso500Flag = indexer.getTSO500(maneRefSeq.split("\\.")[0]);
+            if (StringUtils.isNotEmpty(tso500Flag)) {
+                transcript.getFlags().add(tso500Flag);
+            }
+
+            String eglhHaemOncFlag = indexer.getEGLHHaemOnc(maneRefSeq.split("\\.")[0]);
+            if (StringUtils.isNotEmpty(eglhHaemOncFlag)) {
+                transcript.getFlags().add(eglhHaemOncFlag);
+            }
+        }
+
+        gene.getTranscripts().add(transcript);
+
+        // Do not change order!! size()-1 is the index of the transcript ID
+        transcriptDict.put(transcriptId, gene.getTranscripts().size() - 1);
+        return transcript;
+    }
+
+    private List<FeatureOntologyTermAnnotation> getOntologyAnnotations(List<Xref> xrefs,  EnsemblGeneBuilderIndexer indexer)
+            throws IOException, RocksDBException {
+        if (xrefs == null || indexer == null) {
+            return null;
+        }
+        List<FeatureOntologyTermAnnotation> annotations = new ArrayList<>();
+        for (Xref xref : xrefs) {
+            if (xref.getDbName().equals("uniprotkb_acc")) {
+                String key = xref.getId();
+                if (key != null && indexer.getOntologyAnnotations(key) != null) {
+                    annotations.addAll(indexer.getOntologyAnnotations(key));
+                }
+            }
+        }
+        return annotations;
+    }
+
+    private void updateNegativeExonCodingData(Exon exon, int cdna, int cds, Gtf gtf) {
+        // we need to increment 3 nts, the stop_codon length.
+        exon.setGenomicCodingStart(gtf.getStart());
+        // cdnaCodingEnd points to the same base position than genomicCodingStart
+        exon.setCdnaCodingEnd(exon.getEnd() - gtf.getStart() + cdna);
+        exon.setCdsEnd(gtf.getEnd() - gtf.getStart() + cds);
+
+        // If the STOP codon corresponds to the first three nts of the exon then no CDS will be defined
+        // in the gtf -as technically the STOP codon is non-coding- and we must manually set coding
+        // starts
+        if (exon.getGenomicCodingEnd() == 0) {
+            exon.setGenomicCodingEnd(exon.getGenomicCodingStart() + 2);
+        }
+        if (exon.getCdnaCodingStart() == 0) {
+            exon.setCdnaCodingStart(exon.getCdnaCodingEnd() - 2);
+        }
+        if (exon.getCdsStart() == 0) {
+            exon.setCdsStart(exon.getCdsEnd() - 2);
+        }
+    }
+
+    private void updateStopCodingDataPositiveExon(Exon exon, int cdna, int cds, Gtf gtf) {
+        // we need to increment 3 nts, the stop_codon length.
+        exon.setGenomicCodingEnd(gtf.getEnd());
+        exon.setCdnaCodingEnd(gtf.getEnd() - exon.getStart() + cdna);
+        exon.setCdsEnd(gtf.getEnd() - gtf.getStart() + cds);
+
+        // If the STOP codon corresponds to the first three nts of the exon then no CDS will be defined
+        // in the gtf -as technically the STOP codon is non-coding- and we must manually set coding
+        // starts
+        if (exon.getGenomicCodingStart() == 0) {
+            exon.setGenomicCodingStart(exon.getGenomicCodingEnd() - 2);
+        }
+        if (exon.getCdnaCodingStart() == 0) {
+            exon.setCdnaCodingStart(exon.getCdnaCodingEnd() - 2);
+        }
+        if (exon.getCdsStart() == 0) {
+            exon.setCdsStart(exon.getCdsEnd() - 2);
+        }
+    }
+
+    private void addGtfXrefs(Transcript transcript, Gene gene, Map<String, String> gtfAttributes) {
+        if (transcript.getXrefs() == null) {
+            transcript.setXrefs(new ArrayList<>());
+        }
+
+        transcript.getXrefs().add(new Xref(gene.getId(), "ensembl_gene", "Ensembl Gene"));
+        transcript.getXrefs().add(new Xref(transcript.getId(), "ensembl_transcript", "Ensembl Transcript"));
+
+        // Some non-coding genes do not have Gene names
+        if (StringUtils.isNotEmpty(gene.getName())) {
+            transcript.getXrefs().add(new Xref(gene.getName(), "hgnc_symbol", "HGNC Symbol"));
+            transcript.getXrefs().add(new Xref(transcript.getName(), "ensembl_transcript_name", "Ensembl Transcript Name"));
+        }
+
+        if (gtfAttributes.get("ccds_id") != null) {
+            transcript.getXrefs().add(new Xref(gtfAttributes.get("ccds_id"), "ccds_id", "CCDS"));
+        }
+    }
+
+    private void initializePointers(Map<String, Map<String, Map<String, Object>>> gtfMap) {
+        geneCounter = 0;
+        geneList = new ArrayList<>(gtfMap.keySet());
+        geneName = geneList.get(geneCounter);
+        transcriptCounter = 0;
+        transcriptList = new ArrayList<>(gtfMap.get(geneName).keySet());
+        transcriptName = transcriptList.get(transcriptCounter);
+        exonCounter = 0;
+        feature = "exon";
+        nextGtfToReturn = (Gtf) ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).get(exonCounter);
+    }
+
+    private Gtf getGTFEntry(GtfReader gtfReader, Map<String, Map<String, Map<String, Object>>> gtfMap) throws FileFormatException {
+        // Flexible parsing is deactivated, return next line
+        if (gtfMap == null) {
+            return gtfReader.read();
+            // Flexible parsing activated, carefully select next line to return
+        } else {
+            // No more genes/features to return
+            if (nextGtfToReturn == null) {
+                return null;
+            }
+            Gtf gtfToReturn = nextGtfToReturn;
+            if (feature.equals("exon")) {
+//                gtfToReturn = (Gtf) ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).get(exonCounter);
+                if (gtfMap.get(geneName).get(transcriptName).containsKey("cds")) {
+                    nextGtfToReturn = getExonCDSLine(((Gtf) ((List) gtfMap.get(geneName)
+                                    .get(transcriptName).get("exon")).get(exonCounter)).getStart(),
+                            ((Gtf) ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).get(exonCounter)).getEnd(),
+                            (List) gtfMap.get(geneName).get(transcriptName).get("cds"));
+                    if (nextGtfToReturn != null) {
+                        feature = "cds";
+                        return gtfToReturn;
+                    }
+                }
+                // if no cds was found for this exon, get next exon
+                getFeatureFollowsExon(gtfMap);
+                return gtfToReturn;
+            }
+            if (feature.equals("cds") || feature.equals("stop_codon")) {
+                getFeatureFollowsExon(gtfMap);
+                return gtfToReturn;
+            }
+            if (feature.equals("start_codon")) {
+                feature = "stop_codon";
+                nextGtfToReturn = (Gtf) gtfMap.get(geneName).get(transcriptName).get("stop_codon");
+                return gtfToReturn;
+            }
+            // The only accepted features that should appear in the gtfMap are exon, cds, start_codon and stop_codon
+            throw new FileFormatException("Execution cannot reach this point");
+        }
+    }
+
+    private Gtf getExonCDSLine(Integer exonStart, Integer exonEnd, List cdsList) {
+        for (Object cdsObject : cdsList) {
+            int cdsStart = ((Gtf) cdsObject).getStart();
+            int cdsEnd = ((Gtf) cdsObject).getEnd();
+            if (cdsStart <= exonEnd && cdsEnd >= exonStart) {
+                return (Gtf) cdsObject;
+            }
+        }
+        return null;
+    }
+
+    private void getFeatureFollowsExon(Map<String, Map<String, Map<String, Object>>> gtfMap) {
+        exonCounter++;
+        if (exonCounter == ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).size()
+                || feature.equals("stop_codon")) {
+            // If last returned feature was a stop_codon or no start_codon is provided for this transcript,
+            // next transcript must be selected
+            if (!feature.equals("stop_codon") && gtfMap.get(geneName).get(transcriptName).containsKey("start_codon")) {
+                feature = "start_codon";
+                nextGtfToReturn = (Gtf) gtfMap.get(geneName).get(transcriptName).get("start_codon");
+            } else {
+                transcriptCounter++;
+                // No more transcripts in this gene, check if there are more genes
+                if (transcriptCounter == gtfMap.get(geneName).size()) {
+                    geneCounter++;
+                    // No more genes available, end parsing
+                    if (geneCounter == gtfMap.size()) {
+                        nextGtfToReturn = null;
+                        feature = null;
+                        // Still more genes to parse, select next one
+                    } else {
+                        geneName = geneList.get(geneCounter);
+                        transcriptCounter = 0;
+                        transcriptList = new ArrayList<>(gtfMap.get(geneName).keySet());
+                    }
+                }
+                // Check if a new gene was selected - null would indicate there're no more genes
+                if (nextGtfToReturn != null) {
+                    transcriptName = transcriptList.get(transcriptCounter);
+                    exonCounter = 0;
+                    feature = "exon";
+                    nextGtfToReturn = (Gtf) ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).get(exonCounter);
+                }
+            }
+        } else {
+            feature = "exon";
+            nextGtfToReturn = (Gtf) ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).get(exonCounter);
+        }
+    }
+
+    private Map<String, Map<String, Map<String, Object>>> loadGTFMap(GtfReader gtfReader) throws FileFormatException {
+        Map<String, Map<String, Map<String, Object>>> gtfMap = new HashMap<>();
+        Gtf gtf;
+        while ((gtf = gtfReader.read()) != null) {
+            if (gtf.getFeature().equals("gene") || gtf.getFeature().equals("transcript")
+                    || gtf.getFeature().equals("UTR") || gtf.getFeature().equals("Selenocysteine")) {
+                continue;
+            }
+
+            // Get GTF lines associated with this gene - create a new Map of GTF entries if it's a new gene
+            String geneId = gtf.getAttributes().get("gene_id");
+            // Transcript -> feature -> GTF line
+            Map<String, Map<String, Object>> gtfMapGeneEntry;
+            if (gtfMap.containsKey(geneId)) {
+                gtfMapGeneEntry =  gtfMap.get(geneId);
+            } else {
+                gtfMapGeneEntry = new HashMap();
+                gtfMap.put(geneId, gtfMapGeneEntry);
+            }
+
+            // Get GTF lines associated with this transcript - create a new Map of GTF entries if it's a new gene
+            String transcriptId = gtf.getAttributes().get("transcript_id");
+            Map<String, Object> gtfMapTranscriptEntry;
+            if (gtfMapGeneEntry.containsKey(transcriptId)) {
+                gtfMapTranscriptEntry =  gtfMapGeneEntry.get(transcriptId);
+            } else {
+                gtfMapTranscriptEntry = new HashMap();
+                gtfMapGeneEntry.put(transcriptId, gtfMapTranscriptEntry);
+            }
+
+            addGTFLineToGTFMap(gtfMapTranscriptEntry, gtf);
+
+        }
+
+        // Exon number is mandatory for the parser to be able to properly generate the gene data model
+        if (!exonNumberPresent(gtfMap)) {
+            setExonNumber(gtfMap);
+        }
+
+        return gtfMap;
+    }
+
+    private boolean exonNumberPresent(Map<String, Map<String, Map<String, Object>>> gtfMap) {
+        Map<String, Map<String, Object>> geneGtfMap = gtfMap.get(gtfMap.keySet().iterator().next());
+        return ((Gtf) ((List) geneGtfMap.get(geneGtfMap.keySet().iterator().next()).get("exon")).get(0))
+                .getAttributes().containsKey("exon_number");
+    }
+
+    private void setExonNumber(Map<String, Map<String, Map<String, Object>>> gtfMap) {
+        for (String gene : gtfMap.keySet()) {
+            for (String transcript : gtfMap.get(gene).keySet()) {
+                List<Gtf> exonList = (List<Gtf>) gtfMap.get(gene).get(transcript).get("exon");
+                Collections.sort(exonList, (e1, e2) -> Integer.valueOf(e1.getStart()).compareTo(e2.getStart()));
+                if (exonList.get(0).getStrand().equals("+")) {
+                    int exonNumber = 1;
+                    for (Gtf gtf : exonList) {
+                        gtf.getAttributes().put("exon_number", String.valueOf(exonNumber));
+                        exonNumber++;
+                    }
+                } else {
+                    int exonNumber = exonList.size();
+                    for (Gtf gtf : exonList) {
+                        gtf.getAttributes().put("exon_number", String.valueOf(exonNumber));
+                        exonNumber--;
+                    }
+                }
+            }
+        }
+    }
+
+    private void addGTFLineToGTFMap(Map<String, Object> gtfMapTranscriptEntry, Gtf gtf) {
+        // Add exon/cds GTF line to the corresponding gene entry in the map
+        String featureType = gtf.getFeature().toLowerCase();
+        if (featureType.equals("exon") || featureType.equals("cds")) {
+            List gtfList;
+            // Check if there were exons already stored
+            if (gtfMapTranscriptEntry.containsKey(featureType)) {
+                gtfList =  (List) gtfMapTranscriptEntry.get(featureType);
+            } else {
+                gtfList = new ArrayList<>();
+                gtfMapTranscriptEntry.put(featureType, gtfList);
+            }
+            gtfList.add(gtf);
+            // Only one start/stop codon can be stored per transcript - no need to check if the "start_codon"/"stop_codon"
+            // keys are already there
+        } else if (featureType.equals("start_codon") || featureType.equals("stop_codon")) {
+            gtfMapTranscriptEntry.put(featureType, gtf);
+        }
+    }
+
+    private List<TranscriptTfbs> getTranscriptTfbses(Gtf transcript, String chromosome, TabixReader tabixReader) throws IOException {
+        if (tabixReader == null) {
+            return null;
+        }
+        List<TranscriptTfbs> transcriptTfbses = null;
+
+        int transcriptStart = transcript.getStart();
+        int transcriptEnd = transcript.getEnd();
+
+
+        String line;
+        TabixReader.Iterator iter = tabixReader.query(chromosome, transcriptStart, transcriptEnd);
+        while ((line = iter.next()) != null) {
+            String[] elements = line.split("\t");
+
+            String sequenceName = elements[0];
+            String source = elements[1];
+            String feature = elements[2];
+            int start = Integer.parseInt(elements[3]);
+            int end = Integer.parseInt(elements[4]);
+            String score = elements[5];
+            String strand = elements[6];
+            String frame = elements[7];
+            String attribute = elements[8];
+
+            if (strand.equals(transcript.getStrand())) {
+                continue;
+            }
+
+            if (transcript.getStrand().equals("+")) {
+                if (start > transcript.getStart() + 500) {
+                    break;
+                } else if (end > transcript.getStart() - 2500) {
+                    Gff2 tfbs = new Gff2(sequenceName, source, feature, start, end, score, strand, frame, attribute);
+                    transcriptTfbses = addTranscriptTfbstoList(tfbs, transcript, chromosome, transcriptTfbses);
+                }
+            } else {
+                // transcript in negative strand
+                if (start > transcript.getEnd() + 2500) {
+                    break;
+                } else if (start > transcript.getEnd() - 500) {
+                    Gff2 tfbs = new Gff2(sequenceName, source, feature, start, end, score, strand, frame, attribute);
+                    transcriptTfbses = addTranscriptTfbstoList(tfbs, transcript, chromosome, transcriptTfbses);
+                }
+            }
+        }
+
+        return transcriptTfbses;
+    }
+
+    protected List<TranscriptTfbs> addTranscriptTfbstoList(Gff2 tfbs, Gtf transcript, String chromosome,
+                                                           List<TranscriptTfbs> transcriptTfbses) {
+        if (transcriptTfbses == null) {
+            transcriptTfbses = new ArrayList<>();
+        }
+
+        // binding_matrix_stable_id=ENSPFM0542;epigenomes_with_experimental_evidence=SK-N.%2CMCF-7%2CH1-hESC_3%2CHCT116;
+        // stable_id=ENSM00208374688;transcription_factor_complex=TEAD4::ESRRB
+        String[] attributes = tfbs.getAttribute().split(";");
+
+        String id = null;
+        String pfmId = null;
+        List<String> transciptionFactors = null;
+
+        for (String attributePair : attributes) {
+            String[] attributePairArray = attributePair.split("=");
+            switch(attributePairArray[0]) {
+                case "binding_matrix_stable_id":
+                    pfmId = attributePairArray[1];
+                    break;
+                case "stable_id":
+                    id = attributePairArray[1];
+                    break;
+                case "transcription_factor_complex":
+                    transciptionFactors = Arrays.asList(attributePairArray[1].split("(::)|(%2C)"));
+                    break;
+                default:
+                    break;
+            }
+        }
+
+        transcriptTfbses.add(new TranscriptTfbs(id, pfmId, tfbs.getFeature(), transciptionFactors, chromosome, tfbs.getStart(),
+                tfbs.getEnd(), getRelativeTranscriptTfbsStart(tfbs, transcript), getRelativeTranscriptTfbsEnd(tfbs, transcript),
+                Float.parseFloat(tfbs.getScore())));
+        return transcriptTfbses;
+    }
+
+    private Integer getRelativeTranscriptTfbsStart(Gff2 tfbs, Gtf transcript) {
+        Integer relativeStart;
+        if (transcript.getStrand().equals("+")) {
+            if (tfbs.getStart() < transcript.getStart()) {
+                relativeStart = tfbs.getStart() - transcript.getStart();
+            } else {
+                relativeStart = tfbs.getStart() - transcript.getStart() + 1;
+            }
+        } else {
+            // negative strand transcript
+            if (tfbs.getEnd() > transcript.getEnd()) {
+                relativeStart = transcript.getEnd() - tfbs.getEnd();
+            } else {
+                relativeStart = transcript.getEnd() - tfbs.getEnd() + 1;
+            }
+        }
+        return relativeStart;
+    }
+
+    private Integer getRelativeTranscriptTfbsEnd(Gff2 tfbs, Gtf transcript) {
+        Integer relativeEnd;
+        if (transcript.getStrand().equals("+")) {
+            if (tfbs.getEnd() < transcript.getStart()) {
+                relativeEnd = tfbs.getEnd() - transcript.getStart();
+            } else {
+                relativeEnd = tfbs.getEnd() - transcript.getStart() + 1;
+            }
+        } else {
+            if (tfbs.getStart() > transcript.getEnd()) {
+                relativeEnd = transcript.getEnd() - tfbs.getStart();
+            } else {
+                relativeEnd = transcript.getEnd() - tfbs.getStart() + 1;
+            }
+        }
+        return relativeEnd;
+    }
+
+
+
+    private boolean newGene(Gene previousGene, String newGeneId) {
+        return previousGene == null || !newGeneId.equals(previousGene.getId());
+    }
+
+    private void updateTranscriptAndGeneCoords(Transcript transcript, Gene gene, Gtf gtf) {
+        if (transcript.getStart() > gtf.getStart()) {
+            transcript.setStart(gtf.getStart());
+        }
+        if (transcript.getEnd() < gtf.getEnd()) {
+            transcript.setEnd(gtf.getEnd());
+        }
+        if (gene.getStart() > gtf.getStart()) {
+            gene.setStart(gtf.getStart());
+        }
+        if (gene.getEnd() < gtf.getEnd()) {
+            gene.setEnd(gtf.getEnd());
+        }
+    }
+
+    private void getGtfFileFromGeneDirectoryPath(Path geneDirectoryPath) {
+        for (String fileName : geneDirectoryPath.toFile().list()) {
+            if (fileName.endsWith(".gtf") || fileName.endsWith(".gtf.gz")) {
+                gtfFile = geneDirectoryPath.resolve(fileName);
+                break;
+            }
+        }
+    }
+
+    private void getProteinFastaFileFromGeneDirectoryPath(Path geneDirectoryPath) {
+        for (String fileName : geneDirectoryPath.toFile().list()) {
+            if (fileName.endsWith(".pep.all.fa") || fileName.endsWith(".pep.all.fa.gz")) {
+                proteinFastaFile = geneDirectoryPath.resolve(fileName);
+                break;
+            }
+        }
+    }
+
+    private void getCDnaFastaFileFromGeneDirectoryPath(Path geneDirectoryPath) {
+        for (String fileName : geneDirectoryPath.toFile().list()) {
+            if (fileName.endsWith(".cdna.all.fa") || fileName.endsWith(".cdna.all.fa.gz")) {
+                cDnaFastaFile = geneDirectoryPath.resolve(fileName);
+                break;
+            }
+        }
+    }
+}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/EnsemblGeneBuilderIndexer.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/EnsemblGeneBuilderIndexer.java
index fb67c19b8b..10f54e2ea1 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/EnsemblGeneBuilderIndexer.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/EnsemblGeneBuilderIndexer.java
@@ -16,27 +16,44 @@
 
 package org.opencb.cellbase.lib.builders;
 
+import com.fasterxml.jackson.core.JsonProcessingException;
 import org.apache.commons.lang3.StringUtils;
-import org.apache.poi.hssf.usermodel.HSSFSheet;
-import org.apache.poi.hssf.usermodel.HSSFWorkbook;
-import org.apache.poi.ss.usermodel.*;
-import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.opencb.biodata.formats.feature.mirbase.MirBaseParser;
+import org.opencb.biodata.formats.feature.mirbase.MirBaseParserCallback;
 import org.opencb.biodata.formats.gaf.GafParser;
 import org.opencb.biodata.formats.io.FileFormatException;
+import org.opencb.biodata.models.core.FeatureOntologyTermAnnotation;
+import org.opencb.biodata.models.core.MiRnaGene;
+import org.opencb.biodata.models.core.MirnaTarget;
 import org.opencb.biodata.models.core.Xref;
-import org.opencb.biodata.models.core.*;
-import org.opencb.biodata.models.variant.avro.*;
+import org.opencb.biodata.models.variant.avro.Constraint;
+import org.opencb.biodata.models.variant.avro.Expression;
+import org.opencb.biodata.models.variant.avro.ExpressionCall;
+import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.commons.utils.FileUtils;
+import org.rocksdb.RocksDB;
 import org.rocksdb.RocksDBException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
-import java.io.*;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 import java.util.zip.GZIPInputStream;
 
-public class EnsemblGeneBuilderIndexer extends GeneBuilderIndexer{
+import static org.opencb.cellbase.lib.EtlCommons.ENSEMBL_DATA;
+import static org.opencb.cellbase.lib.builders.CellBaseBuilder.PARSING_DONE_LOG_MESSAGE;
+import static org.opencb.cellbase.lib.builders.CellBaseBuilder.PARSING_LOG_MESSAGE;
+
+public class EnsemblGeneBuilderIndexer extends GeneBuilderIndexer {
 
     private static final String DESCRIPTION_SUFFIX = "_description";
     private static final String XREF_SUFFIX = "_xref";
@@ -56,12 +73,12 @@ public void index(Path geneDescriptionFile, Path xrefsFile, Path hgncFile, Path
                       Path proteinFastaFile, Path cDnaFastaFile, String species, Path geneExpressionFile, Path geneDrugFile, Path hpoFile,
                       Path disgenetFile, Path gnomadFile, Path geneOntologyAnnotationFile, Path miRBaseFile, Path miRTarBaseFile,
                       Path cancerGeneGensusFile, Path cancerHostpotFile, Path canonicalFile, Path tso500File, Path eglhHaemOncFile)
-            throws IOException, RocksDBException, FileFormatException {
-        indexDescriptions(geneDescriptionFile);
-        indexXrefs(xrefsFile, uniprotIdMappingFile);
+            throws IOException, RocksDBException, FileFormatException, CellBaseException {
+//        indexDescriptions(geneDescriptionFile);
+//        indexXrefs(xrefsFile, uniprotIdMappingFile);
         indexHgncIdMapping(hgncFile);
-        indexManeMapping(maneFile, "ensembl");
-        indexLrgMapping(lrgFile, "ensembl");
+        indexManeMapping(maneFile, ENSEMBL_DATA);
+        indexLrgMapping(lrgFile, ENSEMBL_DATA);
         indexProteinSequences(proteinFastaFile);
         indexCdnaSequences(cDnaFastaFile);
         indexExpression(species, geneExpressionFile);
@@ -69,13 +86,13 @@ public void index(Path geneDescriptionFile, Path xrefsFile, Path hgncFile, Path
         indexDiseases(hpoFile, disgenetFile);
         indexConstraints(gnomadFile);
         indexOntologyAnnotations(geneOntologyAnnotationFile);
-        indexMiRBase(miRBaseFile);
+        indexMiRBase(species, miRBaseFile);
         indexMiRTarBase(miRTarBaseFile);
-        indexCancerGeneCensus(cancerGeneGensusFile);
+//        indexCancerGeneCensus(cancerGeneGensusFile);
         indexCancerHotspot(cancerHostpotFile);
-        indexCanonical(canonicalFile);
-        indexTSO500(tso500File);
-        indexEGLHHaemOnc(eglhHaemOncFile);
+//        indexCanonical(canonicalFile);
+//        indexTSO500(tso500File);
+//        indexEGLHHaemOnc(eglhHaemOncFile);
     }
 
     private void indexDescriptions(Path geneDescriptionFile) throws IOException, RocksDBException {
@@ -233,129 +250,6 @@ public List<Expression> getExpression(String id) throws RocksDBException, IOExce
         return rocksDbManager.getExpression(rocksdb, key);
     }
 
-    private void indexDrugs(Path geneDrugFile) throws IOException, RocksDBException {
-        if (geneDrugFile != null && Files.exists(geneDrugFile) && Files.size(geneDrugFile) > 0) {
-            logger.info("Loading gene-drug interaction data from '{}'", geneDrugFile);
-            BufferedReader br = FileUtils.newBufferedReader(geneDrugFile);
-
-            // Skip header
-            br.readLine();
-
-            int lineCounter = 1;
-            String line;
-            String currentGene = "";
-            List<GeneDrugInteraction> drugs = new ArrayList<>();
-            while ((line = br.readLine()) != null) {
-                String[] parts = line.split("\t");
-                String geneName = parts[0];
-                if (currentGene.equals("")) {
-                    currentGene = geneName;
-                } else if (!currentGene.equals(geneName)) {
-                    rocksDbManager.update(rocksdb, currentGene + DRUGS_SUFFIX, drugs);
-                    drugs = new ArrayList<>();
-                    currentGene = geneName;
-                }
-
-                String source = null;
-                if (parts.length >= 4) {
-                    source = parts[3];
-                }
-
-                String interactionType = null;
-                if (parts.length >= 5) {
-                    interactionType = parts[4];
-                }
-
-                String drugName = null;
-                if (parts.length >= 8) {
-                    // if drug name column is empty, use drug claim name instead
-                    drugName = StringUtils.isEmpty(parts[7]) ? parts[6] : parts[7];
-                }
-                if (StringUtils.isEmpty(drugName)) {
-                    // no drug name
-                    continue;
-                }
-
-                String chemblId = null;
-                if (parts.length >= 9) {
-                    chemblId = parts[8];
-                }
-
-                List<String> publications = new ArrayList<>();
-                if (parts.length >= 10 && parts[9] != null) {
-                    publications = Arrays.asList(parts[9].split(","));
-                }
-
-                GeneDrugInteraction drug = new GeneDrugInteraction(
-                        geneName, drugName, source, null, null, interactionType, chemblId, publications);
-                drugs.add(drug);
-                lineCounter++;
-            }
-            br.close();
-            // update last gene
-            rocksDbManager.update(rocksdb, currentGene + DRUGS_SUFFIX, drugs);
-        } else {
-            logger.warn("Gene drug file " + geneDrugFile + " not found");
-            logger.warn("Ignoring " + geneDrugFile);
-        }
-    }
-
-    public List<GeneDrugInteraction> getDrugs(String id) throws RocksDBException, IOException {
-        String key = id + DRUGS_SUFFIX;
-        return rocksDbManager.getDrugs(rocksdb, key);
-    }
-
-    private void indexDiseases(Path hpoFilePath, Path disgenetFilePath) throws IOException, RocksDBException {
-        Map<String, List<GeneTraitAssociation>> geneDiseaseAssociationMap = new HashMap<>(50000);
-        String line;
-
-        if (hpoFilePath != null && hpoFilePath.toFile().exists() && Files.size(hpoFilePath) > 0) {
-            try (BufferedReader bufferedReader = FileUtils.newBufferedReader(hpoFilePath)) {
-                // skip first header line
-                bufferedReader.readLine();
-                while ((line = bufferedReader.readLine()) != null) {
-                    String[] fields = line.split("\t");
-                    String omimId = fields[6];
-                    String geneSymbol = fields[3];
-                    String hpoId = fields[0];
-                    String diseaseName = fields[1];
-                    GeneTraitAssociation disease =
-                            new GeneTraitAssociation(omimId, diseaseName, hpoId, 0f, 0, new ArrayList<>(), new ArrayList<>(), "hpo");
-                    addValueToMapElement(geneDiseaseAssociationMap, geneSymbol, disease);
-                }
-            }
-        }
-
-        if (disgenetFilePath != null && disgenetFilePath.toFile().exists() && Files.size(disgenetFilePath) > 0) {
-            try (BufferedReader bufferedReader = FileUtils.newBufferedReader(disgenetFilePath)) {
-                // skip first header line
-                bufferedReader.readLine();
-                while ((line = bufferedReader.readLine()) != null) {
-                    String[] fields = line.split("\t");
-                    String diseaseId = fields[4];
-                    String diseaseName = fields[5];
-                    String score = fields[9];
-                    String numberOfPubmeds = fields[13].trim();
-                    String numberOfSNPs = fields[14];
-                    String source = fields[15];
-                    GeneTraitAssociation disease = new GeneTraitAssociation(diseaseId, diseaseName, "", Float.parseFloat(score),
-                            Integer.parseInt(numberOfPubmeds), Collections.singletonList(numberOfSNPs), Collections.singletonList(source),
-                            "disgenet");
-                    addValueToMapElement(geneDiseaseAssociationMap, fields[1], disease);
-                }
-            }
-        }
-
-        for (Map.Entry<String, List<GeneTraitAssociation>> entry : geneDiseaseAssociationMap.entrySet()) {
-            rocksDbManager.update(rocksdb, entry.getKey() + DISEASE_SUFFIX, entry.getValue());
-        }
-    }
-
-    public List<GeneTraitAssociation> getDiseases(String id) throws RocksDBException, IOException {
-        String key = id + DISEASE_SUFFIX;
-        return rocksDbManager.getDiseases(rocksdb, key);
-    }
-
     private void indexConstraints(Path gnomadFile) throws IOException, RocksDBException {
         if (gnomadFile != null && Files.exists(gnomadFile) && Files.size(gnomadFile) > 0) {
             logger.info("Loading OE scores from '{}'", gnomadFile);
@@ -384,7 +278,7 @@ private void indexConstraints(Path gnomadFile) throws IOException, RocksDBExcept
                 rocksDbManager.update(rocksdb, transcriptIdentifier + CONSTRAINT_SUFFIX, constraints);
 
                 if ("TRUE".equalsIgnoreCase(canonical)) {
-                     rocksDbManager.update(rocksdb, geneIdentifier + CONSTRAINT_SUFFIX, constraints);
+                    rocksDbManager.update(rocksdb, geneIdentifier + CONSTRAINT_SUFFIX, constraints);
                 }
             }
             br.close();
@@ -432,66 +326,13 @@ public List<FeatureOntologyTermAnnotation> getOntologyAnnotations(String id) thr
         return rocksDbManager.getOntologyAnnotations(rocksdb, key);
     }
 
-    private void indexMiRBase(Path miRBaseFile) throws IOException, RocksDBException {
-        if (miRBaseFile != null && Files.exists(miRBaseFile) && Files.size(miRBaseFile) > 0) {
-            logger.info("Loading mirna from '{}'", miRBaseFile);
-            FileInputStream fileInputStream = new FileInputStream(miRBaseFile.toFile());
-            HSSFWorkbook workbook = new HSSFWorkbook(fileInputStream);
-            HSSFSheet sheet = workbook.getSheetAt(0);
-            Iterator<org.apache.poi.ss.usermodel.Row> iterator = sheet.iterator();
-            while (iterator.hasNext()) {
-                Row currentRow = iterator.next();
-                Iterator<Cell> cellIterator = currentRow.iterator();
-
-                org.apache.poi.ss.usermodel.Cell cell = cellIterator.next();
-                String miRBaseAccession = cell.getStringCellValue();
-
-                cell = cellIterator.next();
-                String miRBaseID = cell.getStringCellValue();
-
-                cell = cellIterator.next();
-                String status = cell.getStringCellValue();
-
-                cell = cellIterator.next();
-                String sequence = cell.getStringCellValue();
+    private void indexMiRBase(String species, Path miRBaseFile) throws IOException {
+        logger.info(PARSING_LOG_MESSAGE, miRBaseFile);
 
-                cell = cellIterator.next();
-                String mature1Accession = cell.getStringCellValue();
+        MirBaseCallback callback = new MirBaseCallback(rocksdb, rocksDbManager);
+        MirBaseParser.parse(miRBaseFile, species, callback);
 
-                cell = cellIterator.next();
-                String mature1Id = cell.getStringCellValue();
-
-                cell = cellIterator.next();
-                String mature1Sequence = cell.getStringCellValue();
-
-                String mature2Accession = "";
-                String mature2Id = "";
-                String mature2Sequence = "";
-                if (cellIterator.hasNext()) {
-                    cell = cellIterator.next();
-                    mature2Accession = cell.getStringCellValue();
-
-                    cell = cellIterator.next();
-                    mature2Id = cell.getStringCellValue();
-
-                    cell = cellIterator.next();
-                    mature2Sequence = cell.getStringCellValue();
-                }
-
-                MiRnaGene miRNAGene = new MiRnaGene(miRBaseAccession, miRBaseID, status, sequence, new ArrayList<>());
-                int cdnaStart = sequence.indexOf(mature1Sequence);
-                int cdnaEnd = cdnaStart + mature1Sequence.length();
-                miRNAGene.addMiRNAMature(mature1Accession, mature1Id, mature1Sequence, cdnaStart, cdnaEnd);
-
-                cdnaStart = sequence.indexOf(mature2Sequence);
-                cdnaEnd = cdnaStart + mature2Sequence.length();
-                miRNAGene.addMiRNAMature(mature2Accession, mature2Id, mature2Sequence, cdnaStart, cdnaEnd);
-
-                rocksDbManager.update(rocksdb, miRBaseID + MIRBASE_SUFFIX, miRNAGene);
-            }
-        } else {
-            logger.error("mirna file not found");
-        }
+        logger.info(PARSING_DONE_LOG_MESSAGE, miRBaseFile);
     }
 
     public MiRnaGene getMirnaGene(String transcriptId) throws RocksDBException, IOException {
@@ -509,117 +350,11 @@ public MiRnaGene getMirnaGene(String transcriptId) throws RocksDBException, IOEx
         return null;
     }
 
-    private void indexMiRTarBase(Path miRTarBaseFile) throws IOException, RocksDBException {
-        if (miRTarBaseFile != null && Files.exists(miRTarBaseFile) && Files.size(miRTarBaseFile) > 0) {
-            logger.info("Loading mirna targets from '{}'", miRTarBaseFile);
-            FileInputStream file = new FileInputStream(miRTarBaseFile.toFile());
-            Workbook workbook = new XSSFWorkbook(file);
-            Sheet sheet = workbook.getSheetAt(0);
-            Iterator<Row> iterator = sheet.iterator();
-            String currentMiRTarBaseId = null;
-            String currentMiRNA = null;
-            String currentGene = null;
-            List<TargetGene> targetGenes = new ArrayList<>();
-            Map<String, List<MirnaTarget>> geneToMirna = new HashMap<>();
-            while (iterator.hasNext()) {
-                Row currentRow = iterator.next();
-
-                Iterator<Cell> cellIterator = currentRow.iterator();
-                Cell cell = cellIterator.next();
-
-                // Iterate columns
-                String miRTarBaseId = cell.getStringCellValue();
-
-                // skip header
-                if (miRTarBaseId.startsWith("miRTarBase")) {
-                    continue;
-                }
-
-                if (currentMiRTarBaseId == null) {
-                    currentMiRTarBaseId = miRTarBaseId;
-                }
-
-                cell = cellIterator.next();
-                String miRNA = cell.getStringCellValue();
-                if (currentMiRNA == null) {
-                    currentMiRNA = miRNA;
-                }
-
-                // Skip species
-                cellIterator.next();
-
-                // Read target gene
-                cell = cellIterator.next();
-                String geneName = cell.getStringCellValue();
-                if (currentGene == null) {
-                    currentGene = geneName;
-                }
-
-                // Skip entrez gene
-                cellIterator.next();
-                // Skip species
-                cellIterator.next();
-
-                if (!miRTarBaseId.equals(currentMiRTarBaseId) || !geneName.equals(currentGene)) {
-                    // new entry, store current one
-                    MirnaTarget miRnaTarget = new MirnaTarget(currentMiRTarBaseId, "miRTarBase", currentMiRNA, targetGenes);
-                    addValueToMapElement(geneToMirna, currentGene, miRnaTarget);
-                    targetGenes = new ArrayList<>();
-                    currentGene = geneName;
-                    currentMiRTarBaseId = miRTarBaseId;
-                    currentMiRNA = miRNA;
-                }
-
-                // experiment
-                cell = cellIterator.next();
-                String experiment = cell.getStringCellValue();
-
-                // support type
-                cell = cellIterator.next();
-                String supportType = cell.getStringCellValue();
-
-                // pubmed
-                cell = cellIterator.next();
-                String pubmed;
-                // seems to vary, so check both
-                if (cell.getCellType().equals(CellType.NUMERIC)) {
-//                    pubmed = String.valueOf(cell.getNumericCellValue());
-                    pubmed = Integer.toString(Double.valueOf(cell.getNumericCellValue()).intValue());
-                } else {
-                    pubmed = cell.getStringCellValue();
-                }
-
-                targetGenes.add(new TargetGene(experiment, supportType, pubmed));
-            }
-
-            // parse last entry
-            MirnaTarget miRnaTarget = new MirnaTarget(currentMiRTarBaseId, "miRTarBase", currentMiRNA,
-                    targetGenes);
-            addValueToMapElement(geneToMirna, currentGene, miRnaTarget);
-
-            for (Map.Entry<String, List<MirnaTarget>> entry : geneToMirna.entrySet()) {
-                rocksDbManager.update(rocksdb, entry.getKey() + MIRTARBASE_SUFFIX, entry.getValue());
-            }
-        } else {
-            logger.error("mirtarbase file not found");
-        }
-    }
-
     public List<MirnaTarget> getMirnaTargets(String geneName) throws RocksDBException, IOException {
         String key = geneName + MIRTARBASE_SUFFIX;
         return rocksDbManager.getMirnaTargets(rocksdb, key);
     }
 
-    private static <T> void addValueToMapElement(Map<String, List<T>> map, String key, T value) {
-        if (map.containsKey(key)) {
-            map.get(key).add(value);
-        } else {
-            List<T> valueList = new ArrayList<>();
-            valueList.add(value);
-            map.put(key, valueList);
-        }
-    }
-
     protected void indexCanonical(Path canonocalFile) throws IOException, RocksDBException {
         // Gene  Transcript  Canonical
         // ENSG00000210049.1  ENST00000387314.1  1
@@ -652,4 +387,30 @@ public String getCanonical(String transcriptId) throws RocksDBException, IOExcep
         }
         return new String(bytes);
     }
+
+    // Implementation of the MirBaseParserCallback function
+    public class MirBaseCallback implements MirBaseParserCallback {
+
+        private RocksDB rocksDB;
+        private RocksDbManager rocksDbManager;
+        private Logger logger;
+
+        public MirBaseCallback(RocksDB rocksDB, RocksDbManager rocksDbManager) {
+            this.rocksDB = rocksDB;
+            this.rocksDbManager = rocksDbManager;
+            this.logger = LoggerFactory.getLogger(this.getClass());
+        }
+
+        @Override
+        public boolean processMiRnaGene(MiRnaGene miRnaGene) {
+            try {
+                rocksDbManager.update(rocksdb, miRnaGene.getId() + MIRBASE_SUFFIX, miRnaGene);
+            } catch (JsonProcessingException | RocksDBException e) {
+                logger.warn("Something wrong happened when processing miRNA gene {}: {}", miRnaGene.getId(),
+                        StringUtils.join(e.getStackTrace(), "\t"));
+                return false;
+            }
+            return true;
+        }
+    }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java
index cd0863a259..970f73e05a 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java
@@ -16,904 +16,54 @@
 
 package org.opencb.cellbase.lib.builders;
 
-import htsjdk.tribble.readers.TabixReader;
-import org.apache.commons.lang3.StringUtils;
-import org.opencb.biodata.formats.feature.gff.Gff2;
-import org.opencb.biodata.formats.feature.gtf.Gtf;
-import org.opencb.biodata.formats.feature.gtf.io.GtfReader;
-import org.opencb.biodata.formats.io.FileFormatException;
-import org.opencb.biodata.models.core.*;
-import org.opencb.biodata.tools.sequence.FastaIndex;
-import org.opencb.cellbase.core.ParamConstants;
 import org.opencb.cellbase.core.config.SpeciesConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.core.serializer.CellBaseSerializer;
-import org.rocksdb.RocksDBException;
+import org.opencb.cellbase.core.serializer.CellBaseJsonFileSerializer;
 
-import java.io.IOException;
-import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.*;
 
-public class GeneBuilder extends CellBaseBuilder {
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
-    private Map<String, Integer> transcriptDict;
-    private Map<String, Exon> exonDict;
+public class GeneBuilder extends CellBaseBuilder {
 
-    private Path gtfFile;
-    private Path proteinFastaFile;
-    private Path cDnaFastaFile;
-    private Path geneDescriptionFile;
-    private Path xrefsFile;
-    private Path hgncFile;
-    private Path maneFile;
-    private Path lrgFile;
-    private Path uniprotIdMappingFile;
-    private Path tfbsFile;
-    private Path tabixFile;
-    private Path geneExpressionFile;
-    private Path geneDrugFile;
-    private Path hpoFile;
-    private Path disgenetFile;
-    private Path genomeSequenceFilePath;
-    private Path gnomadFile;
-    private Path geneOntologyAnnotationFile;
-    private Path miRBaseFile;
-    private Path miRTarBaseFile;
-    private Path cancerGeneCensusFile;
-    private Path cancerHostpotFile;
-    private Path ensemblCanonicalFile;
-    private Path tso500File;
-    private Path eglhHaemOncFile;
-    private boolean flexibleGTFParsing;
+    private EnsemblGeneBuilder ensemblGeneBuilder;
+    private RefSeqGeneBuilder refSeqGeneBuilder;
 
-    // source for genes is either ensembl or refseq
-    private final String SOURCE = ParamConstants.QueryParams.ENSEMBL.key();
-    private SpeciesConfiguration speciesConfiguration;
+    public GeneBuilder(Path downloadPath, Path buildPath, SpeciesConfiguration speciesConfiguration, boolean flexibleGTFParsing)
+            throws CellBaseException {
+        super(null);
 
-    private int geneCounter;
-    private ArrayList<String> geneList;
-    private String geneName;
-    private int transcriptCounter;
-    private ArrayList<String> transcriptList;
-    private String transcriptName;
-    private int exonCounter;
-    private String feature;
-    private Gtf nextGtfToReturn;
+        // Create Ensembl gene builder
+        CellBaseJsonFileSerializer ensemblGeneSerializer = new CellBaseJsonFileSerializer(buildPath.resolve(ENSEMBL_DATA),
+                ENSEMBL_GENE_BASENAME);
+        this.ensemblGeneBuilder = new EnsemblGeneBuilder(downloadPath.resolve(ENSEMBL_DATA), speciesConfiguration, flexibleGTFParsing,
+                ensemblGeneSerializer);
 
-    public GeneBuilder(Path geneDirectoryPath, Path genomeSequenceFastaFile, SpeciesConfiguration speciesConfiguration,
-                      CellBaseSerializer serializer) throws CellBaseException {
-        this(geneDirectoryPath, genomeSequenceFastaFile, speciesConfiguration, false, serializer);
+        // Create RefSeq gene builder
+        CellBaseJsonFileSerializer refSeqGeneSerializer = new CellBaseJsonFileSerializer(buildPath.resolve(REFSEQ_DATA),
+                REFSEQ_GENE_BASENAME);
+        this.refSeqGeneBuilder = new RefSeqGeneBuilder(downloadPath.resolve(REFSEQ_DATA), speciesConfiguration, refSeqGeneSerializer);
     }
 
-    public GeneBuilder(Path geneDirectoryPath, Path genomeSequenceFastaFile, SpeciesConfiguration speciesConfiguration,
-                       boolean flexibleGTFParsing, CellBaseSerializer serializer) throws CellBaseException {
-        this(null, geneDirectoryPath.resolve("description.txt"),
-                geneDirectoryPath.resolve("xrefs.txt"),
-                geneDirectoryPath.resolve("hgnc_complete_set_2023-11-01.txt"),
-                geneDirectoryPath.resolve("MANE.GRCh38.v1.1.summary.txt.gz"),
-                geneDirectoryPath.resolve("list_LRGs_transcripts_xrefs.txt"),
-                geneDirectoryPath.resolve("idmapping_selected.tab.gz"),
-                geneDirectoryPath.getParent().resolve("regulation/motif_features.gff.gz"),
-                geneDirectoryPath.getParent().resolve("regulation/motif_features.gff.gz.tbi"),
-                geneDirectoryPath.resolve("allgenes_updown_in_organism_part.tab.gz"),
-                geneDirectoryPath.resolve("dgidb.tsv"),
-                geneDirectoryPath.resolve("phenotype_to_genes.txt"),
-                geneDirectoryPath.resolve("all_gene_disease_associations.tsv.gz"),
-                geneDirectoryPath.resolve("gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz"),
-                geneDirectoryPath.resolve("goa_human.gaf.gz"),
-                geneDirectoryPath.getParent().resolve("regulation/miRNA.xls"),
-                geneDirectoryPath.getParent().resolve("regulation/hsa_MTI.xlsx"),
-                geneDirectoryPath.resolve("cancer-gene-census.tsv"),
-                geneDirectoryPath.resolve("hotspots_v2.xls"),
-                geneDirectoryPath.resolve("ensembl_canonical.txt"),
-                geneDirectoryPath.resolve("TSO500_transcripts.txt"),
-                geneDirectoryPath.resolve("EGLH_HaemOnc_transcripts.txt"),
-                genomeSequenceFastaFile,
-                speciesConfiguration, flexibleGTFParsing, serializer);
+    public void check() throws Exception {
+        // Check Ensembl requirements
+        ensemblGeneBuilder.check();
 
-        getGtfFileFromGeneDirectoryPath(geneDirectoryPath);
-        getProteinFastaFileFromGeneDirectoryPath(geneDirectoryPath);
-        getCDnaFastaFileFromGeneDirectoryPath(geneDirectoryPath);
-    }
-
-    public GeneBuilder(Path gtfFile, Path geneDescriptionFile, Path xrefsFile, Path hgncFile, Path maneFile,
-                       Path lrgFile, Path uniprotIdMappingFile, Path tfbsFile, Path tabixFile, Path geneExpressionFile,
-                       Path geneDrugFile, Path hpoFile, Path disgenetFile, Path gnomadFile,
-                       Path geneOntologyAnnotationFile, Path miRBaseFile, Path miRTarBaseFile, Path cancerGeneCensusFile,
-                       Path cancerHostpotFile, Path ensemblCanonicalFile, Path tso500File, Path eglhHaemOncFile,
-                       Path genomeSequenceFilePath, SpeciesConfiguration speciesConfiguration, boolean flexibleGTFParsing,
-                       CellBaseSerializer serializer) {
-        super(serializer);
-
-        this.gtfFile = gtfFile;
-        this.geneDescriptionFile = geneDescriptionFile;
-        this.xrefsFile = xrefsFile;
-        this.hgncFile = hgncFile;
-        this.maneFile = maneFile;
-        this.lrgFile = lrgFile;
-        this.uniprotIdMappingFile = uniprotIdMappingFile;
-        this.tfbsFile = tfbsFile;
-        this.tabixFile = tabixFile;
-        this.geneExpressionFile = geneExpressionFile;
-        this.geneDrugFile = geneDrugFile;
-        this.hpoFile = hpoFile;
-        this.disgenetFile = disgenetFile;
-        this.gnomadFile = gnomadFile;
-        this.geneOntologyAnnotationFile = geneOntologyAnnotationFile;
-        this.miRBaseFile = miRBaseFile;
-        this.miRTarBaseFile = miRTarBaseFile;
-        this.cancerGeneCensusFile = cancerGeneCensusFile;
-        this.cancerHostpotFile = cancerHostpotFile;
-        this.ensemblCanonicalFile = ensemblCanonicalFile;
-        this.tso500File = tso500File;
-        this.eglhHaemOncFile = eglhHaemOncFile;
-        this.genomeSequenceFilePath = genomeSequenceFilePath;
-        this.speciesConfiguration = speciesConfiguration;
-        this.flexibleGTFParsing = flexibleGTFParsing;
-
-        transcriptDict = new HashMap<>(250000);
-        exonDict = new HashMap<>(8000000);
+        // Check RefSeq requirements
+        refSeqGeneBuilder.check();
     }
 
+    @Override
     public void parse() throws Exception {
-        Gene gene = null;
-        Transcript transcript;
-        Exon exon = null;
-        int cdna = 1;
-        int cds = 1;
-        EnsemblGeneBuilderIndexer indexer = new EnsemblGeneBuilderIndexer(gtfFile.getParent());
-
-        try {
-            // process files and put values in rocksdb
-            indexer.index(geneDescriptionFile, xrefsFile, hgncFile, maneFile, lrgFile, uniprotIdMappingFile,
-                    proteinFastaFile, cDnaFastaFile, speciesConfiguration.getScientificName(), geneExpressionFile,
-                    geneDrugFile, hpoFile, disgenetFile, gnomadFile, geneOntologyAnnotationFile, miRBaseFile,
-                    miRTarBaseFile, cancerGeneCensusFile, cancerHostpotFile, ensemblCanonicalFile,
-                    tso500File, eglhHaemOncFile);
-
-            TabixReader tabixReader = null;
-            if (!Files.exists(tfbsFile) || !Files.exists(tabixFile)) {
-                logger.error("Tfbs or tabix file not found. Download them and try again.");
-            } else {
-                tabixReader = new TabixReader(tfbsFile.toAbsolutePath().toString(), tabixFile.toAbsolutePath().toString());
-            }
-
-            // Preparing the fasta file for fast accessing
-//            System.out.println("genomeSequenceFilePath.toString() = " + genomeSequenceFilePath.toString());
-            FastaIndex fastaIndex = new FastaIndex(genomeSequenceFilePath);
-
-            // Empty transcript and exon dictionaries
-            transcriptDict.clear();
-            exonDict.clear();
-            logger.info("Parsing gtf...");
-            GtfReader gtfReader = new GtfReader(gtfFile);
-
-            // Gene->Transcript->Feature->GTF line
-            Map<String, Map<String, Map<String, Object>>> gtfMap = null;
-            if (flexibleGTFParsing) {
-                gtfMap = loadGTFMap(gtfReader);
-                initializePointers(gtfMap);
-            }
-
-            Gtf gtf;
-            while ((gtf = getGTFEntry(gtfReader, gtfMap)) != null) {
-
-                if (gtf.getFeature().equals("gene") || gtf.getFeature().equals("transcript")
-                        || gtf.getFeature().equals("UTR") || gtf.getFeature().equals("Selenocysteine")) {
-                    continue;
-                }
-
-                String geneId = gtf.getAttributes().get("gene_id");
-                String transcriptId = gtf.getAttributes().get("transcript_id");
-                String geneName = gtf.getAttributes().get("gene_name");
-                if (newGene(gene, geneId)) {
-                    // If new geneId is different from the current then we must serialize before data new gene
-                    if (gene != null) {
-                        serializer.serialize(gene);
-                    }
-
-                    GeneAnnotation geneAnnotation = new GeneAnnotation(indexer.getExpression(geneId), indexer.getDiseases(geneName),
-                            indexer.getDrugs(geneName), indexer.getConstraints(geneId), indexer.getMirnaTargets(geneName),
-                            indexer.getCancerGeneCensus(geneName), indexer.getCancerHotspot(geneName));
-
-                    gene = new Gene(geneId, geneName, gtf.getSequenceName().replaceFirst("chr", ""),
-                            gtf.getStart(), gtf.getEnd(), gtf.getStrand(), gtf.getAttributes().get("gene_version"),
-                            gtf.getAttributes().get("gene_biotype"), "KNOWN", SOURCE, indexer.getDescription(geneId),
-                            new ArrayList<>(), indexer.getMirnaGene(transcriptId), geneAnnotation);
-                }
-
-                // Check if Transcript exist in the Gene Set of transcripts
-                if (!transcriptDict.containsKey(transcriptId)) {
-                    transcript = getTranscript(gene, indexer, tabixReader, gtf, transcriptId);
-                } else {
-                    transcript = gene.getTranscripts().get(transcriptDict.get(transcriptId));
-                }
-
-                // At this point gene and transcript objects are set up
-                // Update gene and transcript genomic coordinates, start must be the
-                // lower, and end the higher
-                updateTranscriptAndGeneCoords(transcript, gene, gtf);
-
-                String transcriptIdWithoutVersion = transcript.getId().split("\\.")[0];
-                if (gtf.getFeature().equalsIgnoreCase("exon")) {
-                    // Obtaining the exon sequence
-                    String exonId = gtf.getAttributes().get("exon_id") + "." + gtf.getAttributes().get("exon_version");
-                    String exonSequence = fastaIndex.query(gtf.getSequenceName(), gtf.getStart(), gtf.getEnd());
-
-                    exon = new Exon(exonId, gtf.getSequenceName().replaceFirst("chr", ""),
-                            gtf.getStart(), gtf.getEnd(), gtf.getStrand(), 0, 0, 0, 0, 0, 0, -1, Integer.parseInt(gtf
-                            .getAttributes().get("exon_number")), exonSequence);
-                    transcript.getExons().add(exon);
-
-                    exonDict.put(transcriptIdWithoutVersion + "_" + exon.getExonNumber(), exon);
-                    if (gtf.getAttributes().get("exon_number").equals("1")) {
-                        cdna = 1;
-                        cds = 1;
-                    } else {
-                        // with every exon we update cDNA length with the previous exon length
-                        cdna += exonDict.get(transcriptIdWithoutVersion + "_" + (exon.getExonNumber() - 1)).getEnd()
-                                - exonDict.get(transcriptIdWithoutVersion + "_" + (exon.getExonNumber() - 1)).getStart() + 1;
-                    }
-                } else {
-                    exon = exonDict.get(transcriptIdWithoutVersion + "_" + exon.getExonNumber());
-                    if (gtf.getFeature().equalsIgnoreCase("CDS")) {
-                        // Protein ID is only present in CDS lines
-                        String proteinId = gtf.getAttributes().get("protein_id") != null
-                                ? gtf.getAttributes().get("protein_id") + "." + gtf.getAttributes().get("protein_version")
-                                : "";
-                        transcript.setProteinId(proteinId);
-                        transcript.setProteinSequence(indexer.getProteinFasta(proteinId));
-
-                        if (gtf.getStrand().equals("+") || gtf.getStrand().equals("1")) {
-                            // CDS states the beginning of coding start
-                            exon.setGenomicCodingStart(gtf.getStart());
-                            exon.setGenomicCodingEnd(gtf.getEnd());
-
-                            // cDNA coordinates
-                            exon.setCdnaCodingStart(gtf.getStart() - exon.getStart() + cdna);
-                            exon.setCdnaCodingEnd(gtf.getEnd() - exon.getStart() + cdna);
-                            // Set cdnaCodingEnd to prevent those cases without stop_codon
-
-                            transcript.setCdnaCodingEnd(gtf.getEnd() - exon.getStart() + cdna);
-                            exon.setCdsStart(cds);
-                            exon.setCdsEnd(gtf.getEnd() - gtf.getStart() + cds);
-
-                            // increment in the coding length
-                            cds += gtf.getEnd() - gtf.getStart() + 1;
-                            transcript.setCdsLength(cds - 1);  // Set cdnaCodingEnd to prevent those cases without stop_codon
-
-                            exon.setPhase(Integer.parseInt(gtf.getFrame()));
-
-                            if (transcript.getGenomicCodingStart() == 0 || transcript.getGenomicCodingStart() > gtf.getStart()) {
-                                transcript.setGenomicCodingStart(gtf.getStart());
-                            }
-                            if (transcript.getGenomicCodingEnd() == 0 || transcript.getGenomicCodingEnd() < gtf.getEnd()) {
-                                transcript.setGenomicCodingEnd(gtf.getEnd());
-                            }
-                            // only first time
-                            if (transcript.getCdnaCodingStart() == 0) {
-                                transcript.setCdnaCodingStart(gtf.getStart() - exon.getStart() + cdna);
-                            }
-                            // strand -
-                        } else {
-                            // CDS states the beginning of coding start
-                            exon.setGenomicCodingStart(gtf.getStart());
-                            exon.setGenomicCodingEnd(gtf.getEnd());
-                            // cDNA coordinates
-                            // cdnaCodingStart points to the same base position than genomicCodingEnd
-                            exon.setCdnaCodingStart(exon.getEnd() - gtf.getEnd() + cdna);
-                            // cdnaCodingEnd points to the same base position than genomicCodingStart
-                            exon.setCdnaCodingEnd(exon.getEnd() - gtf.getStart() + cdna);
-                            // Set cdnaCodingEnd to prevent those cases without stop_codon
-                            transcript.setCdnaCodingEnd(exon.getEnd() - gtf.getStart() + cdna);
-                            exon.setCdsStart(cds);
-                            exon.setCdsEnd(gtf.getEnd() - gtf.getStart() + cds);
-
-                            // increment in the coding length
-                            cds += gtf.getEnd() - gtf.getStart() + 1;
-                            transcript.setCdsLength(cds - 1);  // Set cdnaCodingEnd to prevent those cases without stop_codon
-                            exon.setPhase(Integer.parseInt(gtf.getFrame()));
-
-                            if (transcript.getGenomicCodingStart() == 0 || transcript.getGenomicCodingStart() > gtf.getStart()) {
-                                transcript.setGenomicCodingStart(gtf.getStart());
-                            }
-                            if (transcript.getGenomicCodingEnd() == 0 || transcript.getGenomicCodingEnd() < gtf.getEnd()) {
-                                transcript.setGenomicCodingEnd(gtf.getEnd());
-                            }
-                            // only first time
-                            if (transcript.getCdnaCodingStart() == 0) {
-                                // cdnaCodingStart points to the same base position than genomicCodingEnd
-                                transcript.setCdnaCodingStart(exon.getEnd() - gtf.getEnd() + cdna);
-                            }
-                        }
-
-                    }
-//                if (gtf.getFeature().equalsIgnoreCase("start_codon")) {
-//                    // nothing to do
-//                    System.out.println("Empty block, this should be redesigned");
-//                }
-                    if (gtf.getFeature().equalsIgnoreCase("stop_codon")) {
-                        //                      setCdnaCodingEnd = false; // stop_codon found, cdnaCodingEnd will be set here,
-                        //                      no need to set it at the beginning of next feature
-                        if (exon.getStrand().equals("+")) {
-                            updateStopCodingDataPositiveExon(exon, cdna, cds, gtf);
-
-                            cds += gtf.getEnd() - gtf.getStart();
-                            // If stop_codon appears, overwrite values
-                            transcript.setGenomicCodingEnd(gtf.getEnd());
-                            transcript.setCdnaCodingEnd(gtf.getEnd() - exon.getStart() + cdna);
-                            transcript.setCdsLength(cds - 1);
-
-                        } else {
-                            updateNegativeExonCodingData(exon, cdna, cds, gtf);
-
-                            cds += gtf.getEnd() - gtf.getStart();
-                            // If stop_codon appears, overwrite values
-                            transcript.setGenomicCodingStart(gtf.getStart());
-                            // cdnaCodingEnd points to the same base position than genomicCodingStart
-                            transcript.setCdnaCodingEnd(exon.getEnd() - gtf.getStart() + cdna);
-                            transcript.setCdsLength(cds - 1);
-                        }
-                    }
-                }
-            }
-
-            // last gene must be serialized
-            serializer.serialize(gene);
-
-            // cleaning
-            gtfReader.close();
-            serializer.close();
-            fastaIndex.close();
-            indexer.close();
-        } catch (Exception e) {
-            indexer.close();
-            throw e;
-        }
-    }
-
-    private Transcript getTranscript(Gene gene, EnsemblGeneBuilderIndexer indexer, TabixReader tabixReader, Gtf gtf, String transcriptId)
-            throws IOException, RocksDBException {
-        Map<String, String> gtfAttributes = gtf.getAttributes();
-
-        // To match Ensembl, we set the ID as transcript+version. This also matches the Ensembl website.
-        String transcriptIdWithVersion = transcriptId + "." + gtfAttributes.get("transcript_version");
-        String biotype = gtfAttributes.get("transcript_biotype") != null ? gtfAttributes.get("transcript_biotype") : "";
-        String transcriptChromosome = gtf.getSequenceName().replaceFirst("chr", "");
-        List<TranscriptTfbs> transcriptTfbses = getTranscriptTfbses(gtf, transcriptChromosome, tabixReader);
-
-        List<FeatureOntologyTermAnnotation> ontologyAnnotations = getOntologyAnnotations(indexer.getXrefs(transcriptId), indexer);
-        TranscriptAnnotation transcriptAnnotation = new TranscriptAnnotation(ontologyAnnotations, indexer.getConstraints(transcriptId));
-
-        Transcript transcript = new Transcript(transcriptIdWithVersion, gtfAttributes.get("transcript_name"), transcriptChromosome,
-                gtf.getStart(), gtf.getEnd(), gtf.getStrand(), biotype, "KNOWN",
-                0, 0, 0, 0, 0,
-                indexer.getCdnaFasta(transcriptIdWithVersion), "", "", "",
-                gtfAttributes.get("transcript_version"), SOURCE, new ArrayList<>(), indexer.getXrefs(transcriptId), transcriptTfbses,
-                new HashSet<>(), transcriptAnnotation);
-
-        // Adding Ids appearing in the GTF to the xrefs is required, since for some unknown reason the ENSEMBL
-        // Perl API often doesn't return all genes resulting in an incomplete xrefs.txt file. We must ensure
-        // that the xrefs array contains all ids present in the GTF file
-        addGtfXrefs(transcript, gene, gtfAttributes);
-
-        // Add HGNC ID mappings, with this we can know which Ensembl and Refseq transcripts match to HGNC ID
-        String hgncId = indexer.getHgncId(gene.getName());
-        if (StringUtils.isNotEmpty(hgncId)) {
-            transcript.getXrefs().add(new Xref(hgncId, "hgnc_id", "HGNC ID"));
-        }
-
-        // Add MANE Select mappings, with this we can know which Ensembl and Refseq transcripts match according to MANE
-        for (String suffix: Arrays.asList("refseq", "refseq_protein")) {
-            String maneRefSeq = indexer.getMane(transcriptIdWithVersion, suffix);
-            if (StringUtils.isNotEmpty(maneRefSeq)) {
-                transcript.getXrefs().add(new Xref(maneRefSeq, "mane_select_" + suffix,
-                        "MANE Select RefSeq" + (suffix.contains("_") ? " Protein" : "")));
-            }
-        }
-
-        // Add LRG mappings, with this we can know which Ensembl and Refseq transcripts match according to LRG
-        String lrgRefSeq = indexer.getLrg(transcriptIdWithVersion, "refseq");
-        if (StringUtils.isNotEmpty(lrgRefSeq)) {
-            transcript.getXrefs().add(new Xref(lrgRefSeq, "lrg_refseq", "LRG RefSeq"));
-        }
-
-        // Add Flags
-        // 1. GTF tags
-        String tags = gtf.getAttributes().get("tag");
-        if (StringUtils.isNotEmpty(tags)) {
-            transcript.getFlags().addAll(Arrays.asList(tags.split(",")));
-        }
-        // 2. TSL
-        String supportLevel = gtfAttributes.get("transcript_support_level");
-        if (StringUtils.isNotEmpty(supportLevel)) {
-            // split on space so "5 (assigned to previous version 3)" and "5" both become "TSL:5"
-            String truncatedSupportLevel = supportLevel.split(" ")[0];
-            transcript.getFlags().add("TSL:" + truncatedSupportLevel);
-        }
-        // 3. MANE Flag
-        String maneFlag = indexer.getMane(transcriptIdWithVersion, "flag");
-        if (StringUtils.isNotEmpty(maneFlag)) {
-            transcript.getFlags().add(maneFlag);
-        }
-        // 4. LRG Flag
-        String lrg = indexer.getLrg(transcriptIdWithVersion, "ensembl");
-        if (StringUtils.isNotEmpty(lrg)) {
-            transcript.getFlags().add("LRG");
-        } else {
-            for (Xref xref : transcript.getXrefs()) {
-                if (xref.getId().startsWith("LRG_") && xref.getId().contains("t")) {
-                    transcript.getFlags().add("LRG");
-                }
-            }
-        }
-        // 5. Ensembl Canonical
-        String canonicalFlag = indexer.getCanonical(transcriptIdWithVersion);
-        if (StringUtils.isNotEmpty(canonicalFlag)) {
-            transcript.getFlags().add(canonicalFlag);
-        }
-
-        // 6. TSO500 and EGLH HaemOnc
-        String maneRefSeq = indexer.getMane(transcriptIdWithVersion, "refseq");
-        if (StringUtils.isNotEmpty(maneRefSeq)) {
-            String tso500Flag = indexer.getTSO500(maneRefSeq.split("\\.")[0]);
-            if (StringUtils.isNotEmpty(tso500Flag)) {
-                transcript.getFlags().add(tso500Flag);
-            }
-
-            String eglhHaemOncFlag = indexer.getEGLHHaemOnc(maneRefSeq.split("\\.")[0]);
-            if (StringUtils.isNotEmpty(eglhHaemOncFlag)) {
-                transcript.getFlags().add(eglhHaemOncFlag);
-            }
-        }
-
-        gene.getTranscripts().add(transcript);
-
-        // Do not change order!! size()-1 is the index of the transcript ID
-        transcriptDict.put(transcriptId, gene.getTranscripts().size() - 1);
-        return transcript;
-    }
-
-    private List<FeatureOntologyTermAnnotation> getOntologyAnnotations(List<Xref> xrefs,  EnsemblGeneBuilderIndexer indexer)
-            throws IOException, RocksDBException {
-        if (xrefs == null || indexer == null) {
-            return null;
-        }
-        List<FeatureOntologyTermAnnotation> annotations = new ArrayList<>();
-        for (Xref xref : xrefs) {
-            if (xref.getDbName().equals("uniprotkb_acc")) {
-                String key = xref.getId();
-                if (key != null && indexer.getOntologyAnnotations(key) != null) {
-                    annotations.addAll(indexer.getOntologyAnnotations(key));
-                }
-            }
-        }
-        return annotations;
-    }
-
-    private void updateNegativeExonCodingData(Exon exon, int cdna, int cds, Gtf gtf) {
-        // we need to increment 3 nts, the stop_codon length.
-        exon.setGenomicCodingStart(gtf.getStart());
-        // cdnaCodingEnd points to the same base position than genomicCodingStart
-        exon.setCdnaCodingEnd(exon.getEnd() - gtf.getStart() + cdna);
-        exon.setCdsEnd(gtf.getEnd() - gtf.getStart() + cds);
-
-        // If the STOP codon corresponds to the first three nts of the exon then no CDS will be defined
-        // in the gtf -as technically the STOP codon is non-coding- and we must manually set coding
-        // starts
-        if (exon.getGenomicCodingEnd() == 0) {
-            exon.setGenomicCodingEnd(exon.getGenomicCodingStart() + 2);
-        }
-        if (exon.getCdnaCodingStart() == 0) {
-            exon.setCdnaCodingStart(exon.getCdnaCodingEnd() - 2);
-        }
-        if (exon.getCdsStart() == 0) {
-            exon.setCdsStart(exon.getCdsEnd() - 2);
-        }
-    }
-
-    private void updateStopCodingDataPositiveExon(Exon exon, int cdna, int cds, Gtf gtf) {
-        // we need to increment 3 nts, the stop_codon length.
-        exon.setGenomicCodingEnd(gtf.getEnd());
-        exon.setCdnaCodingEnd(gtf.getEnd() - exon.getStart() + cdna);
-        exon.setCdsEnd(gtf.getEnd() - gtf.getStart() + cds);
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(GENE_DATA));
 
-        // If the STOP codon corresponds to the first three nts of the exon then no CDS will be defined
-        // in the gtf -as technically the STOP codon is non-coding- and we must manually set coding
-        // starts
-        if (exon.getGenomicCodingStart() == 0) {
-            exon.setGenomicCodingStart(exon.getGenomicCodingEnd() - 2);
-        }
-        if (exon.getCdnaCodingStart() == 0) {
-            exon.setCdnaCodingStart(exon.getCdnaCodingEnd() - 2);
-        }
-        if (exon.getCdsStart() == 0) {
-            exon.setCdsStart(exon.getCdsEnd() - 2);
-        }
-    }
-
-    private void addGtfXrefs(Transcript transcript, Gene gene, Map<String, String> gtfAttributes) {
-        if (transcript.getXrefs() == null) {
-            transcript.setXrefs(new ArrayList<>());
-        }
-
-        transcript.getXrefs().add(new Xref(gene.getId(), "ensembl_gene", "Ensembl Gene"));
-        transcript.getXrefs().add(new Xref(transcript.getId(), "ensembl_transcript", "Ensembl Transcript"));
-
-        // Some non-coding genes do not have Gene names
-        if (StringUtils.isNotEmpty(gene.getName())) {
-            transcript.getXrefs().add(new Xref(gene.getName(), "hgnc_symbol", "HGNC Symbol"));
-            transcript.getXrefs().add(new Xref(transcript.getName(), "ensembl_transcript_name", "Ensembl Transcript Name"));
-        }
-
-        if (gtfAttributes.get("ccds_id") != null) {
-            transcript.getXrefs().add(new Xref(gtfAttributes.get("ccds_id"), "ccds_id", "CCDS"));
-        }
-    }
-
-    private void initializePointers(Map<String, Map<String, Map<String, Object>>> gtfMap) {
-        geneCounter = 0;
-        geneList = new ArrayList<>(gtfMap.keySet());
-        geneName = geneList.get(geneCounter);
-        transcriptCounter = 0;
-        transcriptList = new ArrayList<>(gtfMap.get(geneName).keySet());
-        transcriptName = transcriptList.get(transcriptCounter);
-        exonCounter = 0;
-        feature = "exon";
-        nextGtfToReturn = (Gtf) ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).get(exonCounter);
-    }
-
-    private Gtf getGTFEntry(GtfReader gtfReader, Map<String, Map<String, Map<String, Object>>> gtfMap) throws FileFormatException {
-        // Flexible parsing is deactivated, return next line
-        if (gtfMap == null) {
-            return gtfReader.read();
-            // Flexible parsing activated, carefully select next line to return
-        } else {
-            // No more genes/features to return
-            if (nextGtfToReturn == null) {
-                return null;
-            }
-            Gtf gtfToReturn = nextGtfToReturn;
-            if (feature.equals("exon")) {
-//                gtfToReturn = (Gtf) ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).get(exonCounter);
-                if (gtfMap.get(geneName).get(transcriptName).containsKey("cds")) {
-                    nextGtfToReturn = getExonCDSLine(((Gtf) ((List) gtfMap.get(geneName)
-                                    .get(transcriptName).get("exon")).get(exonCounter)).getStart(),
-                            ((Gtf) ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).get(exonCounter)).getEnd(),
-                            (List) gtfMap.get(geneName).get(transcriptName).get("cds"));
-                    if (nextGtfToReturn != null) {
-                        feature = "cds";
-                        return gtfToReturn;
-                    }
-                }
-                // if no cds was found for this exon, get next exon
-                getFeatureFollowsExon(gtfMap);
-                return gtfToReturn;
-            }
-            if (feature.equals("cds") || feature.equals("stop_codon")) {
-                getFeatureFollowsExon(gtfMap);
-                return gtfToReturn;
-            }
-            if (feature.equals("start_codon")) {
-                feature = "stop_codon";
-                nextGtfToReturn = (Gtf) gtfMap.get(geneName).get(transcriptName).get("stop_codon");
-                return gtfToReturn;
-            }
-            // The only accepted features that should appear in the gtfMap are exon, cds, start_codon and stop_codon
-            throw new FileFormatException("Execution cannot reach this point");
-        }
-    }
-
-    private Gtf getExonCDSLine(Integer exonStart, Integer exonEnd, List cdsList) {
-        for (Object cdsObject : cdsList) {
-            int cdsStart = ((Gtf) cdsObject).getStart();
-            int cdsEnd = ((Gtf) cdsObject).getEnd();
-            if (cdsStart <= exonEnd && cdsEnd >= exonStart) {
-                return (Gtf) cdsObject;
-            }
-        }
-        return null;
-    }
-
-    private void getFeatureFollowsExon(Map<String, Map<String, Map<String, Object>>> gtfMap) {
-        exonCounter++;
-        if (exonCounter == ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).size()
-                || feature.equals("stop_codon")) {
-            // If last returned feature was a stop_codon or no start_codon is provided for this transcript,
-            // next transcript must be selected
-            if (!feature.equals("stop_codon") && gtfMap.get(geneName).get(transcriptName).containsKey("start_codon")) {
-                feature = "start_codon";
-                nextGtfToReturn = (Gtf) gtfMap.get(geneName).get(transcriptName).get("start_codon");
-            } else {
-                transcriptCounter++;
-                // No more transcripts in this gene, check if there are more genes
-                if (transcriptCounter == gtfMap.get(geneName).size()) {
-                    geneCounter++;
-                    // No more genes available, end parsing
-                    if (geneCounter == gtfMap.size()) {
-                        nextGtfToReturn = null;
-                        feature = null;
-                        // Still more genes to parse, select next one
-                    } else {
-                        geneName = geneList.get(geneCounter);
-                        transcriptCounter = 0;
-                        transcriptList = new ArrayList<>(gtfMap.get(geneName).keySet());
-                    }
-                }
-                // Check if a new gene was selected - null would indicate there're no more genes
-                if (nextGtfToReturn != null) {
-                    transcriptName = transcriptList.get(transcriptCounter);
-                    exonCounter = 0;
-                    feature = "exon";
-                    nextGtfToReturn = (Gtf) ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).get(exonCounter);
-                }
-            }
-        } else {
-            feature = "exon";
-            nextGtfToReturn = (Gtf) ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).get(exonCounter);
-        }
-    }
-
-    private Map<String, Map<String, Map<String, Object>>> loadGTFMap(GtfReader gtfReader) throws FileFormatException {
-        Map<String, Map<String, Map<String, Object>>> gtfMap = new HashMap<>();
-        Gtf gtf;
-        while ((gtf = gtfReader.read()) != null) {
-            if (gtf.getFeature().equals("gene") || gtf.getFeature().equals("transcript")
-                    || gtf.getFeature().equals("UTR") || gtf.getFeature().equals("Selenocysteine")) {
-                continue;
-            }
-
-            // Get GTF lines associated with this gene - create a new Map of GTF entries if it's a new gene
-            String geneId = gtf.getAttributes().get("gene_id");
-            // Transcript -> feature -> GTF line
-            Map<String, Map<String, Object>> gtfMapGeneEntry;
-            if (gtfMap.containsKey(geneId)) {
-                gtfMapGeneEntry =  gtfMap.get(geneId);
-            } else {
-                gtfMapGeneEntry = new HashMap();
-                gtfMap.put(geneId, gtfMapGeneEntry);
-            }
-
-            // Get GTF lines associated with this transcript - create a new Map of GTF entries if it's a new gene
-            String transcriptId = gtf.getAttributes().get("transcript_id");
-            Map<String, Object> gtfMapTranscriptEntry;
-            if (gtfMapGeneEntry.containsKey(transcriptId)) {
-                gtfMapTranscriptEntry =  gtfMapGeneEntry.get(transcriptId);
-            } else {
-                gtfMapTranscriptEntry = new HashMap();
-                gtfMapGeneEntry.put(transcriptId, gtfMapTranscriptEntry);
-            }
-
-            addGTFLineToGTFMap(gtfMapTranscriptEntry, gtf);
-
-        }
-
-        // Exon number is mandatory for the parser to be able to properly generate the gene data model
-        if (!exonNumberPresent(gtfMap)) {
-            setExonNumber(gtfMap);
-        }
-
-        return gtfMap;
-    }
-
-    private boolean exonNumberPresent(Map<String, Map<String, Map<String, Object>>> gtfMap) {
-        Map<String, Map<String, Object>> geneGtfMap = gtfMap.get(gtfMap.keySet().iterator().next());
-        return ((Gtf) ((List) geneGtfMap.get(geneGtfMap.keySet().iterator().next()).get("exon")).get(0))
-                .getAttributes().containsKey("exon_number");
-    }
-
-    private void setExonNumber(Map<String, Map<String, Map<String, Object>>> gtfMap) {
-        for (String gene : gtfMap.keySet()) {
-            for (String transcript : gtfMap.get(gene).keySet()) {
-                List<Gtf> exonList = (List<Gtf>) gtfMap.get(gene).get(transcript).get("exon");
-                Collections.sort(exonList, (e1, e2) -> Integer.valueOf(e1.getStart()).compareTo(e2.getStart()));
-                if (exonList.get(0).getStrand().equals("+")) {
-                    int exonNumber = 1;
-                    for (Gtf gtf : exonList) {
-                        gtf.getAttributes().put("exon_number", String.valueOf(exonNumber));
-                        exonNumber++;
-                    }
-                } else {
-                    int exonNumber = exonList.size();
-                    for (Gtf gtf : exonList) {
-                        gtf.getAttributes().put("exon_number", String.valueOf(exonNumber));
-                        exonNumber--;
-                    }
-                }
-            }
-        }
-    }
-
-    private void addGTFLineToGTFMap(Map<String, Object> gtfMapTranscriptEntry, Gtf gtf) {
-        // Add exon/cds GTF line to the corresponding gene entry in the map
-        String featureType = gtf.getFeature().toLowerCase();
-        if (featureType.equals("exon") || featureType.equals("cds")) {
-            List gtfList;
-            // Check if there were exons already stored
-            if (gtfMapTranscriptEntry.containsKey(featureType)) {
-                gtfList =  (List) gtfMapTranscriptEntry.get(featureType);
-            } else {
-                gtfList = new ArrayList<>();
-                gtfMapTranscriptEntry.put(featureType, gtfList);
-            }
-            gtfList.add(gtf);
-            // Only one start/stop codon can be stored per transcript - no need to check if the "start_codon"/"stop_codon"
-            // keys are already there
-        } else if (featureType.equals("start_codon") || featureType.equals("stop_codon")) {
-            gtfMapTranscriptEntry.put(featureType, gtf);
-        }
-    }
+        // Check folders and files before building
+        check();
 
-    private List<TranscriptTfbs> getTranscriptTfbses(Gtf transcript, String chromosome, TabixReader tabixReader) throws IOException {
-        if (tabixReader == null) {
-            return null;
-        }
-        List<TranscriptTfbs> transcriptTfbses = null;
-
-        int transcriptStart = transcript.getStart();
-        int transcriptEnd = transcript.getEnd();
-
-
-        String line;
-        TabixReader.Iterator iter = tabixReader.query(chromosome, transcriptStart, transcriptEnd);
-        while ((line = iter.next()) != null) {
-            String[] elements = line.split("\t");
-
-            String sequenceName = elements[0];
-            String source = elements[1];
-            String feature = elements[2];
-            int start = Integer.parseInt(elements[3]);
-            int end = Integer.parseInt(elements[4]);
-            String score = elements[5];
-            String strand = elements[6];
-            String frame = elements[7];
-            String attribute = elements[8];
-
-            if (strand.equals(transcript.getStrand())) {
-                continue;
-            }
-
-            if (transcript.getStrand().equals("+")) {
-                if (start > transcript.getStart() + 500) {
-                    break;
-                } else if (end > transcript.getStart() - 2500) {
-                    Gff2 tfbs = new Gff2(sequenceName, source, feature, start, end, score, strand, frame, attribute);
-                    transcriptTfbses = addTranscriptTfbstoList(tfbs, transcript, chromosome, transcriptTfbses);
-                }
-            } else {
-                // transcript in negative strand
-                if (start > transcript.getEnd() + 2500) {
-                    break;
-                } else if (start > transcript.getEnd() - 500) {
-                    Gff2 tfbs = new Gff2(sequenceName, source, feature, start, end, score, strand, frame, attribute);
-                    transcriptTfbses = addTranscriptTfbstoList(tfbs, transcript, chromosome, transcriptTfbses);
-                }
-            }
-        }
-
-        return transcriptTfbses;
-    }
-
-    protected List<TranscriptTfbs> addTranscriptTfbstoList(Gff2 tfbs, Gtf transcript, String chromosome,
-                                                           List<TranscriptTfbs> transcriptTfbses) {
-        if (transcriptTfbses == null) {
-            transcriptTfbses = new ArrayList<>();
-        }
-
-        // binding_matrix_stable_id=ENSPFM0542;epigenomes_with_experimental_evidence=SK-N.%2CMCF-7%2CH1-hESC_3%2CHCT116;
-        // stable_id=ENSM00208374688;transcription_factor_complex=TEAD4::ESRRB
-        String[] attributes = tfbs.getAttribute().split(";");
-
-        String id = null;
-        String pfmId = null;
-        List<String> transciptionFactors = null;
-
-        for (String attributePair : attributes) {
-            String[] attributePairArray = attributePair.split("=");
-            switch(attributePairArray[0]) {
-                case "binding_matrix_stable_id":
-                    pfmId = attributePairArray[1];
-                    break;
-                case "stable_id":
-                    id = attributePairArray[1];
-                    break;
-                case "transcription_factor_complex":
-                    transciptionFactors = Arrays.asList(attributePairArray[1].split("(::)|(%2C)"));
-                    break;
-                default:
-                    break;
-            }
-        }
-
-        transcriptTfbses.add(new TranscriptTfbs(id, pfmId, tfbs.getFeature(), transciptionFactors, chromosome, tfbs.getStart(),
-                tfbs.getEnd(), getRelativeTranscriptTfbsStart(tfbs, transcript), getRelativeTranscriptTfbsEnd(tfbs, transcript),
-                Float.parseFloat(tfbs.getScore())));
-        return transcriptTfbses;
-    }
-
-    private Integer getRelativeTranscriptTfbsStart(Gff2 tfbs, Gtf transcript) {
-        Integer relativeStart;
-        if (transcript.getStrand().equals("+")) {
-            if (tfbs.getStart() < transcript.getStart()) {
-                relativeStart = tfbs.getStart() - transcript.getStart();
-            } else {
-                relativeStart = tfbs.getStart() - transcript.getStart() + 1;
-            }
-        } else {
-            // negative strand transcript
-            if (tfbs.getEnd() > transcript.getEnd()) {
-                relativeStart = transcript.getEnd() - tfbs.getEnd();
-            } else {
-                relativeStart = transcript.getEnd() - tfbs.getEnd() + 1;
-            }
-        }
-        return relativeStart;
-    }
-
-    private Integer getRelativeTranscriptTfbsEnd(Gff2 tfbs, Gtf transcript) {
-        Integer relativeEnd;
-        if (transcript.getStrand().equals("+")) {
-            if (tfbs.getEnd() < transcript.getStart()) {
-                relativeEnd = tfbs.getEnd() - transcript.getStart();
-            } else {
-                relativeEnd = tfbs.getEnd() - transcript.getStart() + 1;
-            }
-        } else {
-            if (tfbs.getStart() > transcript.getEnd()) {
-                relativeEnd = transcript.getEnd() - tfbs.getStart();
-            } else {
-                relativeEnd = transcript.getEnd() - tfbs.getStart() + 1;
-            }
-        }
-        return relativeEnd;
-    }
-
-
-
-    private boolean newGene(Gene previousGene, String newGeneId) {
-        return previousGene == null || !newGeneId.equals(previousGene.getId());
-    }
-
-    private void updateTranscriptAndGeneCoords(Transcript transcript, Gene gene, Gtf gtf) {
-        if (transcript.getStart() > gtf.getStart()) {
-            transcript.setStart(gtf.getStart());
-        }
-        if (transcript.getEnd() < gtf.getEnd()) {
-            transcript.setEnd(gtf.getEnd());
-        }
-        if (gene.getStart() > gtf.getStart()) {
-            gene.setStart(gtf.getStart());
-        }
-        if (gene.getEnd() < gtf.getEnd()) {
-            gene.setEnd(gtf.getEnd());
-        }
-    }
-
-    private void getGtfFileFromGeneDirectoryPath(Path geneDirectoryPath) {
-        for (String fileName : geneDirectoryPath.toFile().list()) {
-            if (fileName.endsWith(".gtf") || fileName.endsWith(".gtf.gz")) {
-                gtfFile = geneDirectoryPath.resolve(fileName);
-                break;
-            }
-        }
-    }
-
-    private void getProteinFastaFileFromGeneDirectoryPath(Path geneDirectoryPath) {
-        for (String fileName : geneDirectoryPath.toFile().list()) {
-            if (fileName.endsWith(".pep.all.fa") || fileName.endsWith(".pep.all.fa.gz")) {
-                proteinFastaFile = geneDirectoryPath.resolve(fileName);
-                break;
-            }
-        }
-    }
+        // Build Ensembl/RefSeq genes
+        ensemblGeneBuilder.parse();
+        refSeqGeneBuilder.parse();
 
-    private void getCDnaFastaFileFromGeneDirectoryPath(Path geneDirectoryPath) {
-        for (String fileName : geneDirectoryPath.toFile().list()) {
-            if (fileName.endsWith(".cdna.all.fa") || fileName.endsWith(".cdna.all.fa.gz")) {
-                cDnaFastaFile = geneDirectoryPath.resolve(fileName);
-                break;
-            }
-        }
+        logger.info(BUILDING_DONE_LOG_MESSAGE, getDataName(GENE_DATA));
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilderIndexer.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilderIndexer.java
index 285236ba60..b8941cc448 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilderIndexer.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilderIndexer.java
@@ -24,9 +24,10 @@
 import org.opencb.biodata.formats.sequence.fasta.Fasta;
 import org.opencb.biodata.formats.sequence.fasta.io.FastaReader;
 import org.opencb.biodata.models.clinical.ClinicalProperty;
-import org.opencb.biodata.models.core.CancerHotspot;
-import org.opencb.biodata.models.core.CancerHotspotVariant;
-import org.opencb.biodata.models.core.GeneCancerAssociation;
+import org.opencb.biodata.models.core.*;
+import org.opencb.biodata.models.variant.avro.GeneDrugInteraction;
+import org.opencb.biodata.models.variant.avro.GeneTraitAssociation;
+import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.commons.utils.FileUtils;
 import org.rocksdb.Options;
 import org.rocksdb.RocksDB;
@@ -42,8 +43,14 @@
 import java.util.*;
 import java.util.stream.Collectors;
 
+import static org.opencb.cellbase.lib.EtlCommons.*;
+import static org.opencb.cellbase.lib.builders.CellBaseBuilder.PARSING_DONE_LOG_MESSAGE;
+import static org.opencb.cellbase.lib.builders.CellBaseBuilder.PARSING_LOG_MESSAGE;
+
 public class GeneBuilderIndexer {
 
+    public static final String ROCKSDB_FOLDER = "rocksdb.idx";
+
     protected RocksDB rocksdb;
     protected RocksDbManager rocksDbManager;
     protected Logger logger;
@@ -69,7 +76,7 @@ public GeneBuilderIndexer(Path genePath) {
 
     private void init(Path genePath) {
         rocksDbManager = new RocksDbManager();
-        dbLocation = genePath.resolve("integration.idx").toString();
+        dbLocation = genePath.resolve(ROCKSDB_FOLDER).toString();
         rocksdb = rocksDbManager.getDBConnection(dbLocation);
         dbOption = new Options().setCreateIfMissing(true);
 
@@ -77,18 +84,14 @@ private void init(Path genePath) {
     }
 
     protected void indexCdnaSequences(Path cDnaFastaFile) throws IOException, FileFormatException, RocksDBException {
-        logger.info("Loading RefSeq's cDNA sequences...");
-        FileUtils.checkPath(cDnaFastaFile);
-        if (Files.size(cDnaFastaFile) > 0) {
-            FastaReader fastaReader = new FastaReader(cDnaFastaFile);
-            Fasta fasta;
-            while ((fasta = fastaReader.read()) != null) {
-                rocksDbManager.update(rocksdb, fasta.getId() + CDNA_SEQUENCE_SUFFIX, fasta.getSeq());
-            }
-            fastaReader.close();
-        } else {
-            logger.warn("RefSeq's cDNA sequences not loaded");
+        logger.info(PARSING_LOG_MESSAGE, cDnaFastaFile);
+        FastaReader fastaReader = new FastaReader(cDnaFastaFile);
+        Fasta fasta;
+        while ((fasta = fastaReader.read()) != null) {
+            rocksDbManager.update(rocksdb, fasta.getId() + CDNA_SEQUENCE_SUFFIX, fasta.getSeq());
         }
+        fastaReader.close();
+        logger.info(PARSING_DONE_LOG_MESSAGE, cDnaFastaFile);
     }
 
     public String getCdnaFasta(String id) throws RocksDBException {
@@ -96,18 +99,14 @@ public String getCdnaFasta(String id) throws RocksDBException {
     }
 
     protected void indexProteinSequences(Path proteinFastaFile) throws IOException, FileFormatException, RocksDBException {
-        logger.info("Loading ENSEMBL's protein sequences...");
-        FileUtils.checkPath(proteinFastaFile);
-        if (Files.size(proteinFastaFile) > 0) {
-            FastaReader fastaReader = new FastaReader(proteinFastaFile);
-            Fasta fasta;
-            while ((fasta = fastaReader.read()) != null) {
-                rocksDbManager.update(rocksdb, fasta.getId() + PROTEIN_SEQUENCE_SUFFIX, fasta.getSeq());
-            }
-            fastaReader.close();
-        } else {
-            logger.warn("ENSEMBL's protein sequences not loaded");
+        logger.info(PARSING_LOG_MESSAGE, proteinFastaFile);
+        FastaReader fastaReader = new FastaReader(proteinFastaFile);
+        Fasta fasta;
+        while ((fasta = fastaReader.read()) != null) {
+            rocksDbManager.update(rocksdb, fasta.getId() + PROTEIN_SEQUENCE_SUFFIX, fasta.getSeq());
         }
+        fastaReader.close();
+        logger.info(PARSING_DONE_LOG_MESSAGE, proteinFastaFile);
     }
 
     protected String getProteinFasta(String id) throws RocksDBException {
@@ -115,22 +114,18 @@ protected String getProteinFasta(String id) throws RocksDBException {
     }
 
     protected void indexHgncIdMapping(Path hgncMappingFile) throws IOException, RocksDBException {
-        // #hgnc_id symbol  name    locus_group     locus_type      status  location        location_sortable  ...
-        logger.info("Indexing HGNC ID mapping data ...");
-
-        // We only need the first two columns: hgnc_id -> symbol
-        if (hgncMappingFile != null && Files.exists(hgncMappingFile) && Files.size(hgncMappingFile) > 0) {
-            try (BufferedReader bufferedReader = FileUtils.newBufferedReader(hgncMappingFile)) {
-                String line = bufferedReader.readLine();
-                while (StringUtils.isNotEmpty(line)) {
-                    String[] fields = line.split("\t", -1);
-                    rocksDbManager.update(rocksdb, fields[1] + HGNC_ID_SUFFIX, fields[0]);
-                    line = bufferedReader.readLine();
-                }
+        logger.info(PARSING_LOG_MESSAGE, hgncMappingFile);
+        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(hgncMappingFile)) {
+            String line = bufferedReader.readLine();
+            // We only need the first two columns: hgnc_id -> symbol
+            // #hgnc_id symbol  name    locus_group     locus_type      status  location        location_sortable  ...
+            while (StringUtils.isNotEmpty(line)) {
+                String[] fields = line.split("\t", -1);
+                rocksDbManager.update(rocksdb, fields[1] + HGNC_ID_SUFFIX, fields[0]);
+                line = bufferedReader.readLine();
             }
-        } else {
-            logger.warn("HGNC ID mapping file " + hgncMappingFile + " not found");
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE, hgncMappingFile);
     }
 
     public String getHgncId(String id) throws RocksDBException {
@@ -138,29 +133,25 @@ public String getHgncId(String id) throws RocksDBException {
     }
 
     protected void indexManeMapping(Path maneMappingFile, String referenceId) throws IOException, RocksDBException {
+        logger.info(PARSING_LOG_MESSAGE, maneMappingFile);
+        int idColumn = referenceId.equalsIgnoreCase(ENSEMBL_DATA) ? 7 : 5;
+
         // #NCBI_GeneID    Ensembl_Gene    HGNC_ID      symbol   name    RefSeq_nuc      RefSeq_prot     Ensembl_nuc     Ensembl_prot
         // MANE_status     GRCh38_chr     chr_start       chr_end chr_strand
-        logger.info("Indexing MANE mapping data ...");
-
-        if (maneMappingFile != null && Files.exists(maneMappingFile) && Files.size(maneMappingFile) > 0) {
-            int idColumn = referenceId.equalsIgnoreCase("ensembl") ? 7 : 5;
-//            BufferedReader bufferedReader = FileUtils.newBufferedReader(maneMappingFile);
-            try (BufferedReader bufferedReader = FileUtils.newBufferedReader(maneMappingFile)) {
-                String line = bufferedReader.readLine();
-                while (StringUtils.isNotEmpty(line)) {
-                    String[] fields = line.split("\t", -1);
-                    rocksDbManager.update(rocksdb, fields[idColumn] + MANE_SUFFIX + "_refseq", fields[5]);
-                    rocksDbManager.update(rocksdb, fields[idColumn] + MANE_SUFFIX + "_refseq_protein", fields[6]);
-                    rocksDbManager.update(rocksdb, fields[idColumn] + MANE_SUFFIX + "_ensembl", fields[7]);
-                    rocksDbManager.update(rocksdb, fields[idColumn] + MANE_SUFFIX + "_ensembl_protein", fields[8]);
-                    rocksDbManager.update(rocksdb, fields[idColumn] + MANE_SUFFIX + "_flag", fields[9]);
+        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(maneMappingFile)) {
+            String line = bufferedReader.readLine();
+            while (StringUtils.isNotEmpty(line)) {
+                String[] fields = line.split("\t", -1);
+                rocksDbManager.update(rocksdb, fields[idColumn] + MANE_SUFFIX + "_refseq", fields[5]);
+                rocksDbManager.update(rocksdb, fields[idColumn] + MANE_SUFFIX + "_refseq_protein", fields[6]);
+                rocksDbManager.update(rocksdb, fields[idColumn] + MANE_SUFFIX + "_ensembl", fields[7]);
+                rocksDbManager.update(rocksdb, fields[idColumn] + MANE_SUFFIX + "_ensembl_protein", fields[8]);
+                rocksDbManager.update(rocksdb, fields[idColumn] + MANE_SUFFIX + "_flag", fields[9]);
 
-                    line = bufferedReader.readLine();
-                }
+                line = bufferedReader.readLine();
             }
-        } else {
-            logger.warn("MANE mapping file " + maneMappingFile + " not found");
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE, maneMappingFile);
     }
 
     public String getMane(String id, String field) throws RocksDBException {
@@ -168,30 +159,27 @@ public String getMane(String id, String field) throws RocksDBException {
     }
 
     protected void indexLrgMapping(Path lrgMappingFile, String referenceId) throws IOException, RocksDBException {
+        logger.info(PARSING_LOG_MESSAGE, lrgMappingFile);
+
         // # Last modified: 30-03-2021@22:00:06
         // # LRG HGNC_SYMBOL REFSEQ_GENOMIC LRG_TRANSCRIPT REFSEQ_TRANSCRIPT ENSEMBL_TRANSCRIPT CCDS
         // LRG_1 COL1A1 NG_007400.1 t1 NM_000088.3 ENST00000225964.10 CCDS11561.1
-        logger.info("Indexing LRG mapping data ...");
-
-        if (lrgMappingFile != null && Files.exists(lrgMappingFile) && Files.size(lrgMappingFile) > 0) {
-            int idColumn = referenceId.equalsIgnoreCase("ensembl") ? 5 : 4;
-            try (BufferedReader bufferedReader = FileUtils.newBufferedReader(lrgMappingFile)) {
-                String line = bufferedReader.readLine();
-                while (StringUtils.isNotEmpty(line)) {
-                    if (!line.startsWith("#")) {
-                        String[] fields = line.split("\t", -1);
-                        String id = fields[idColumn];
-                        if (StringUtils.isNotEmpty(id) && !id.equals("-")) {
-                            rocksDbManager.update(rocksdb, id + LRG_SUFFIX + "_refseq", fields[4]);
-                            rocksDbManager.update(rocksdb, id + LRG_SUFFIX + "_ensembl", fields[5]);
-                        }
+        int idColumn = referenceId.equalsIgnoreCase("ensembl") ? 5 : 4;
+        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(lrgMappingFile)) {
+            String line = bufferedReader.readLine();
+            while (StringUtils.isNotEmpty(line)) {
+                if (!line.startsWith("#")) {
+                    String[] fields = line.split("\t", -1);
+                    String id = fields[idColumn];
+                    if (StringUtils.isNotEmpty(id) && !id.equals("-")) {
+                        rocksDbManager.update(rocksdb, id + LRG_SUFFIX + "_refseq", fields[4]);
+                        rocksDbManager.update(rocksdb, id + LRG_SUFFIX + "_ensembl", fields[5]);
                     }
-                    line = bufferedReader.readLine();
                 }
+                line = bufferedReader.readLine();
             }
-        } else {
-            logger.warn("LRG mapping file " + lrgMappingFile + " not found");
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE, lrgMappingFile);
     }
 
     public String getLrg(String id, String field) throws RocksDBException {
@@ -199,6 +187,8 @@ public String getLrg(String id, String field) throws RocksDBException {
     }
 
     protected void indexCancerGeneCensus(Path cgcFile) throws IOException, RocksDBException {
+        logger.info(PARSING_LOG_MESSAGE, cgcFile);
+
         Map<String, String> tissuesMap = new HashMap<>();
         tissuesMap.put("E", "epithelial");
         tissuesMap.put("L", "leukaemia/lymphoma");
@@ -224,10 +214,8 @@ protected void indexCancerGeneCensus(Path cgcFile) throws IOException, RocksDBEx
         mutationTypesMap.put("Mis", "missense");
         mutationTypesMap.put("PromoterMis", "missense");
 
-        logger.info("Indexing CANCER GENE CENSUS data ...");
-        if (cgcFile != null && Files.exists(cgcFile) && Files.size(cgcFile) > 0) {
+        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(cgcFile)) {
             // Skip the first header line
-            BufferedReader bufferedReader = FileUtils.newBufferedReader(cgcFile);
             bufferedReader.readLine();
 
             GeneCancerAssociation cancerGeneAssociation;
@@ -237,9 +225,9 @@ protected void indexCancerGeneCensus(Path cgcFile) throws IOException, RocksDBEx
                 // Find Ensembl Gene Id in the last comma-separated column
                 List<String> synonyms = StringUtils.isNotEmpty(fields[19])
                         ? Arrays.stream(fields[19]
-                        .replaceAll("\"", "")
-                        .replaceAll(" ", "")
-                        .split(","))
+                                .replaceAll("\"", "")
+                                .replaceAll(" ", "")
+                                .split(","))
                         .collect(Collectors.toList())
                         : Collections.emptyList();
 
@@ -264,44 +252,44 @@ protected void indexCancerGeneCensus(Path cgcFile) throws IOException, RocksDBEx
                             : Collections.emptyList();
                     List<String> tissues = StringUtils.isNotEmpty(fields[12])
                             ? Arrays.stream(fields[12]
-                            .replaceAll("\"", "")
-                            .replaceAll(" ", "")
-                            .split(","))
+                                    .replaceAll("\"", "")
+                                    .replaceAll(" ", "")
+                                    .split(","))
                             .map(tissuesMap::get)
                             .collect(Collectors.toList())
                             : Collections.emptyList();
                     List<ClinicalProperty.ModeOfInheritance> modeOfInheritance = StringUtils.isNotEmpty(fields[13])
                             ? fields[13].equalsIgnoreCase("Dom/Rec")
-                                ? Arrays.asList(moiMap.get("Dom"), moiMap.get("Rec"))
-                                : Collections.singletonList(moiMap.get(fields[13]))
+                            ? Arrays.asList(moiMap.get("Dom"), moiMap.get("Rec"))
+                            : Collections.singletonList(moiMap.get(fields[13]))
                             : Collections.emptyList();
                     List<ClinicalProperty.RoleInCancer> roleInCancer = StringUtils.isNotEmpty(fields[14])
                             ? Arrays.stream(fields[14]
-                            .replaceAll("\"", "")
-                            .replaceAll(" ", "")
-                            .split(","))
+                                    .replaceAll("\"", "")
+                                    .replaceAll(" ", "")
+                                    .split(","))
                             .map(roleInCancerMap::get)
                             .collect(Collectors.toList())
                             : Collections.emptyList();
                     List<String> mutationTypes = StringUtils.isNotEmpty(fields[15])
                             ? Arrays.stream(fields[15]
-                            .replaceAll("\"", "")
-                            .replaceAll(" ", "")
-                            .split(","))
+                                    .replaceAll("\"", "")
+                                    .replaceAll(" ", "")
+                                    .split(","))
                             .map(mutationTypesMap::get)
                             .collect(Collectors.toList())
                             : Collections.emptyList();
                     List<String> translocationPartners = StringUtils.isNotEmpty(fields[16])
                             ? Arrays.stream(fields[16]
-                            .replaceAll("\"", "")
-                            .replaceAll(" ", "")
-                            .split(","))
+                                    .replaceAll("\"", "")
+                                    .replaceAll(" ", "")
+                                    .split(","))
                             .collect(Collectors.toList())
                             : Collections.emptyList();
                     List<String> otherSyndromes = StringUtils.isNotEmpty(fields[18])
                             ? Arrays.stream(fields[18]
-                            .replaceAll("\"", "")
-                            .split("; "))
+                                    .replaceAll("\"", "")
+                                    .split("; "))
                             .collect(Collectors.toList())
                             : Collections.emptyList();
 
@@ -312,10 +300,9 @@ protected void indexCancerGeneCensus(Path cgcFile) throws IOException, RocksDBEx
                     rocksDbManager.update(rocksdb, fields[0] + CANCER_GENE_CENSUS_SUFFIX, cancerGeneAssociation);
                 }
             }
-            bufferedReader.close();
-        } else {
-            logger.warn("CANCER GENE CENSUS file " + cgcFile + " not found");
         }
+
+        logger.info(PARSING_DONE_LOG_MESSAGE, cgcFile);
     }
 
     public List<GeneCancerAssociation> getCancerGeneCensus(String geneName) throws RocksDBException, IOException {
@@ -324,97 +311,102 @@ public List<GeneCancerAssociation> getCancerGeneCensus(String geneName) throws R
     }
 
     public void indexCancerHotspot(Path cancerHotspot) throws IOException, RocksDBException {
+        logger.info(PARSING_LOG_MESSAGE, cancerHotspot);
+
         // Store all cancer hotspot (different gene and aminoacid position) for each gene in the same key
         Map<String, List<CancerHotspot>> visited = new HashMap<>();
-        FileInputStream fileInputStream = new FileInputStream(cancerHotspot.toFile());
-        HSSFWorkbook workbook = new HSSFWorkbook(fileInputStream);
-        HSSFSheet sheet = workbook.getSheetAt(0);
-        Iterator<org.apache.poi.ss.usermodel.Row> iterator = sheet.iterator();
-        iterator.next();
-        while (iterator.hasNext()) {
-            Row currentRow = iterator.next();
-            String geneName = currentRow.getCell(0).toString();
-
-            if (currentRow.getCell(1).toString().contains("splice")) {
-                continue;
-            }
-            int aminoAcidPosition = Integer.parseInt(currentRow.getCell(1).toString());
-
-            CancerHotspot ch = null;
-            // Check if ch object already exist
-            if (visited.containsKey(geneName)) {
-                for (CancerHotspot hotspot : visited.get(geneName)) {
-                    if (hotspot.getAminoacidPosition() == aminoAcidPosition) {
-                        ch = hotspot;
-                        break;
-                    }
-                }
-            }
 
-            // If not exist we create new ch
-            if (ch == null) {
-                ch = new CancerHotspot();
-                ch.setScores(new HashMap<>());
-                ch.setCancerTypeCount(new HashMap<>());
-                ch.setOrganCount(new HashMap<>());
-                ch.setVariants(new ArrayList<>());
-
-                // Parse new row
-                ch.setGeneName(geneName);
-                ch.setAminoacidPosition(aminoAcidPosition);
-                ch.getScores().put("log10Pvalue", Double.parseDouble(currentRow.getCell(2).toString()));
-                ch.setNumMutations(Integer.parseInt(currentRow.getCell(3).toString()));
-
-                String[] cancerCountSplit = currentRow.getCell(11).toString().split("\\|");
-                for (String cancerCount : cancerCountSplit) {
-                    String[] split = cancerCount.split(":");
-                    ch.getCancerTypeCount().put(split[0], Integer.parseInt(split[2]));
+        try (FileInputStream fileInputStream = new FileInputStream(cancerHotspot.toFile())) {
+            HSSFWorkbook workbook = new HSSFWorkbook(fileInputStream);
+            HSSFSheet sheet = workbook.getSheetAt(0);
+            Iterator<org.apache.poi.ss.usermodel.Row> iterator = sheet.iterator();
+            iterator.next();
+            while (iterator.hasNext()) {
+                Row currentRow = iterator.next();
+                String geneName = currentRow.getCell(0).toString();
+
+                if (currentRow.getCell(1).toString().contains("splice")) {
+                    continue;
                 }
+                int aminoAcidPosition = Integer.parseInt(currentRow.getCell(1).toString());
 
-                String[] organCountSplit = currentRow.getCell(12).toString().split("\\|");
-                for (String organCount : organCountSplit) {
-                    String[] split = organCount.split(":");
-                    ch.getOrganCount().put(split[0], Integer.parseInt(split[2]));
+                CancerHotspot ch = null;
+                // Check if ch object already exist
+                if (visited.containsKey(geneName)) {
+                    for (CancerHotspot hotspot : visited.get(geneName)) {
+                        if (hotspot.getAminoacidPosition() == aminoAcidPosition) {
+                            ch = hotspot;
+                            break;
+                        }
+                    }
                 }
 
-                ch.getScores().put("mutability", Double.parseDouble(currentRow.getCell(14).toString()));
-                ch.getScores().put("muProtein", Double.parseDouble(currentRow.getCell(15).toString()));
-                ch.setAnalysis(Arrays.asList(currentRow.getCell(17).toString().split(",")));
-                ch.getScores().put("qvalue", Double.parseDouble(currentRow.getCell(18).toString()));
-                ch.getScores().put("qvaluePancan", Double.parseDouble(currentRow.getCell(20).toString()));
-                ch.setAminoacidReference(currentRow.getCell(35).toString());
-                ch.getScores().put("qvalueCancerType", Double.parseDouble(currentRow.getCell(36).toString()));
-                ch.setCancerType(currentRow.getCell(37).toString());
+                // If not exist we create new ch
+                if (ch == null) {
+                    ch = new CancerHotspot();
+                    ch.setScores(new HashMap<>());
+                    ch.setCancerTypeCount(new HashMap<>());
+                    ch.setOrganCount(new HashMap<>());
+                    ch.setVariants(new ArrayList<>());
+
+                    // Parse new row
+                    ch.setGeneName(geneName);
+                    ch.setAminoacidPosition(aminoAcidPosition);
+                    ch.getScores().put("log10Pvalue", Double.parseDouble(currentRow.getCell(2).toString()));
+                    ch.setNumMutations(Integer.parseInt(currentRow.getCell(3).toString()));
+
+                    String[] cancerCountSplit = currentRow.getCell(11).toString().split("\\|");
+                    for (String cancerCount : cancerCountSplit) {
+                        String[] split = cancerCount.split(":");
+                        ch.getCancerTypeCount().put(split[0], Integer.parseInt(split[2]));
+                    }
 
-                if (visited.containsKey(geneName)) {
-                    // Gene exists but no this aminoacid position
-                    visited.get(geneName).add(ch);
-                } else {
-                    // New gene found
-                    visited.put(geneName, new ArrayList<>(Collections.singletonList(ch)));
+                    String[] organCountSplit = currentRow.getCell(12).toString().split("\\|");
+                    for (String organCount : organCountSplit) {
+                        String[] split = organCount.split(":");
+                        ch.getOrganCount().put(split[0], Integer.parseInt(split[2]));
+                    }
+
+                    ch.getScores().put("mutability", Double.parseDouble(currentRow.getCell(14).toString()));
+                    ch.getScores().put("muProtein", Double.parseDouble(currentRow.getCell(15).toString()));
+                    ch.setAnalysis(Arrays.asList(currentRow.getCell(17).toString().split(",")));
+                    ch.getScores().put("qvalue", Double.parseDouble(currentRow.getCell(18).toString()));
+                    ch.getScores().put("qvaluePancan", Double.parseDouble(currentRow.getCell(20).toString()));
+                    ch.setAminoacidReference(currentRow.getCell(35).toString());
+                    ch.getScores().put("qvalueCancerType", Double.parseDouble(currentRow.getCell(36).toString()));
+                    ch.setCancerType(currentRow.getCell(37).toString());
+
+                    if (visited.containsKey(geneName)) {
+                        // Gene exists but no this aminoacid position
+                        visited.get(geneName).add(ch);
+                    } else {
+                        // New gene found
+                        visited.put(geneName, new ArrayList<>(Collections.singletonList(ch)));
+                    }
                 }
-            }
 
-            // Add cancer hotspot variant information
-            CancerHotspotVariant cancerHotspotVariant = new CancerHotspotVariant();
-            cancerHotspotVariant.setSampleCount(new HashMap<>());
+                // Add cancer hotspot variant information
+                CancerHotspotVariant cancerHotspotVariant = new CancerHotspotVariant();
+                cancerHotspotVariant.setSampleCount(new HashMap<>());
 
-            String[] alternateCountSplit = currentRow.getCell(8).toString().split(":");
-            cancerHotspotVariant.setAminoacidAlternate(alternateCountSplit[0]);
-            cancerHotspotVariant.setCount(Integer.parseInt(alternateCountSplit[1]));
+                String[] alternateCountSplit = currentRow.getCell(8).toString().split(":");
+                cancerHotspotVariant.setAminoacidAlternate(alternateCountSplit[0]);
+                cancerHotspotVariant.setCount(Integer.parseInt(alternateCountSplit[1]));
 
-            String[] sampleSplit = currentRow.getCell(38).toString().split("\\|");
-            for (String sampleCount : sampleSplit) {
-                String[] sampleCountSplit = sampleCount.split(":");
-                cancerHotspotVariant.getSampleCount().put(sampleCountSplit[0], Integer.parseInt(sampleCountSplit[1]));
+                String[] sampleSplit = currentRow.getCell(38).toString().split("\\|");
+                for (String sampleCount : sampleSplit) {
+                    String[] sampleCountSplit = sampleCount.split(":");
+                    cancerHotspotVariant.getSampleCount().put(sampleCountSplit[0], Integer.parseInt(sampleCountSplit[1]));
+                }
+                ch.getVariants().add(cancerHotspotVariant);
             }
-            ch.getVariants().add(cancerHotspotVariant);
         }
-        fileInputStream.close();
 
         for (String geneName : visited.keySet()) {
             rocksDbManager.update(rocksdb, geneName + CANCER_HOTSPOT_SUFFIX, visited.get(geneName));
         }
+
+        logger.info(PARSING_DONE_LOG_MESSAGE, cancerHotspot);
     }
 
     public List<CancerHotspot> getCancerHotspot(String geneName) throws RocksDBException, IOException {
@@ -422,29 +414,25 @@ public List<CancerHotspot> getCancerHotspot(String geneName) throws RocksDBExcep
         return rocksDbManager.getCancerHotspot(rocksdb, key);
     }
 
-
     protected void indexTSO500(Path tso500Path) throws IOException, RocksDBException {
-        // Gene Ref Seq
-        // FAS  NM_000043
-        // AR   NM_000044
-        logger.info("Indexing TSO500 data ...");
-
-        if (tso500Path != null && Files.exists(tso500Path) && Files.size(tso500Path) > 0) {
-            try (BufferedReader bufferedReader = FileUtils.newBufferedReader(tso500Path)) {
-                String line = bufferedReader.readLine();
-                while (StringUtils.isNotEmpty(line)) {
-                    if (!line.startsWith("#")) {
-                        String[] fields = line.split("\t", -1);
-                        if (fields.length == 2) {
-                            rocksDbManager.update(rocksdb, fields[1] + TSO500_SUFFIX, "TSO500");
-                        }
+        logger.info(PARSING_LOG_MESSAGE, tso500Path);
+
+        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(tso500Path)) {
+            String line = bufferedReader.readLine();
+            // Gene Ref Seq
+            // FAS  NM_000043
+            // AR   NM_000044
+            while (StringUtils.isNotEmpty(line)) {
+                if (!line.startsWith("#")) {
+                    String[] fields = line.split("\t", -1);
+                    if (fields.length == 2) {
+                        rocksDbManager.update(rocksdb, fields[1] + TSO500_SUFFIX, "TSO500");
                     }
-                    line = bufferedReader.readLine();
                 }
+                line = bufferedReader.readLine();
             }
-        } else {
-            logger.warn("Ensembl TSO500 mapping file " + tso500Path + " not found");
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE, tso500Path);
     }
 
     public String getTSO500(String transcriptId) throws RocksDBException {
@@ -456,29 +444,25 @@ public String getTSO500(String transcriptId) throws RocksDBException {
         return new String(bytes);
     }
 
-
     protected void indexEGLHHaemOnc(Path eglhHaemOncPath) throws IOException, RocksDBException {
-        // Gene Ref Seq
-        // GNB1   NM_002074.4
-        // CSF3R  NM_000760.3
-        logger.info("Indexing EGLH HaemOnc data ...");
-
-        if (eglhHaemOncPath != null && Files.exists(eglhHaemOncPath) && Files.size(eglhHaemOncPath) > 0) {
-            try (BufferedReader bufferedReader = FileUtils.newBufferedReader(eglhHaemOncPath)) {
-                String line = bufferedReader.readLine();
-                while (StringUtils.isNotEmpty(line)) {
-                    if (!line.startsWith("#")) {
-                        String[] fields = line.split("\t", -1);
-                        if (fields.length == 2) {
-                            rocksDbManager.update(rocksdb, fields[1].split("\\.")[0] + EGLH_HAEMONC_SUFFIX, "EGLH_HaemOnc");
-                        }
+        logger.info(PARSING_LOG_MESSAGE, eglhHaemOncPath);
+
+        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(eglhHaemOncPath)) {
+            String line = bufferedReader.readLine();
+            // Gene Ref Seq
+            // GNB1   NM_002074.4
+            // CSF3R  NM_000760.3
+            while (StringUtils.isNotEmpty(line)) {
+                if (!line.startsWith("#")) {
+                    String[] fields = line.split("\t", -1);
+                    if (fields.length == 2) {
+                        rocksDbManager.update(rocksdb, fields[1].split("\\.")[0] + EGLH_HAEMONC_SUFFIX, "EGLH_HaemOnc");
                     }
-                    line = bufferedReader.readLine();
                 }
+                line = bufferedReader.readLine();
             }
-        } else {
-            logger.warn("Ensembl EGLH HaemOnc mapping file " + eglhHaemOncPath + " not found");
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE, eglhHaemOncPath);
     }
 
     public String getEGLHHaemOnc(String transcriptId) throws RocksDBException {
@@ -510,4 +494,219 @@ protected void close() throws IOException {
         rocksDbManager.closeIndex(rocksdb, dbOption, dbLocation);
     }
 
+    protected void indexDrugs(Path geneDrugFile) throws IOException, RocksDBException {
+        logger.info(PARSING_LOG_MESSAGE, geneDrugFile);
+
+        String currentGene = "";
+        List<GeneDrugInteraction> drugs = new ArrayList<>();
+
+        try (BufferedReader br = FileUtils.newBufferedReader(geneDrugFile)) {
+            // Skip header
+            br.readLine();
+
+            int lineCounter = 1;
+            String line;
+            while ((line = br.readLine()) != null) {
+                String[] parts = line.split("\t");
+                String geneName = parts[0];
+                if (currentGene.equals("")) {
+                    currentGene = geneName;
+                } else if (!currentGene.equals(geneName)) {
+                    rocksDbManager.update(rocksdb, currentGene + DRUGS_SUFFIX, drugs);
+                    drugs = new ArrayList<>();
+                    currentGene = geneName;
+                }
+
+                String source = null;
+                if (parts.length >= 4) {
+                    source = parts[3];
+                }
+
+                String interactionType = null;
+                if (parts.length >= 5) {
+                    interactionType = parts[4];
+                }
+
+                String drugName = null;
+                if (parts.length >= 8) {
+                    // if drug name column is empty, use drug claim name instead
+                    drugName = StringUtils.isEmpty(parts[7]) ? parts[6] : parts[7];
+                }
+                if (StringUtils.isEmpty(drugName)) {
+                    // no drug name
+                    continue;
+                }
+
+                String chemblId = null;
+                if (parts.length >= 9) {
+                    chemblId = parts[8];
+                }
+
+                List<String> publications = new ArrayList<>();
+                if (parts.length >= 10 && parts[9] != null) {
+                    publications = Arrays.asList(parts[9].split(","));
+                }
+
+                GeneDrugInteraction drug = new GeneDrugInteraction(
+                        geneName, drugName, source, null, null, interactionType, chemblId, publications);
+                drugs.add(drug);
+                lineCounter++;
+            }
+        }
+        // update last gene
+        rocksDbManager.update(rocksdb, currentGene + DRUGS_SUFFIX, drugs);
+
+        logger.info(PARSING_DONE_LOG_MESSAGE, geneDrugFile);
+    }
+
+    protected void indexDiseases(Path hpoFilePath, Path disgenetFilePath) throws IOException, RocksDBException {
+
+        Map<String, List<GeneTraitAssociation>> geneDiseaseAssociationMap = new HashMap<>(50000);
+
+        String line;
+
+        // HPO
+//        logger.info(PARSING_LOG_MESSAGE, hpoFilePath);
+//        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(hpoFilePath)) {
+//            // Skip first header line
+//            bufferedReader.readLine();
+//            while ((line = bufferedReader.readLine()) != null) {
+//                String[] fields = line.split("\t");
+//                String omimId = fields[6];
+//                String geneSymbol = fields[3];
+//                String hpoId = fields[0];
+//                String diseaseName = fields[1];
+//                GeneTraitAssociation disease =
+//                        new GeneTraitAssociation(omimId, diseaseName, hpoId, 0f, 0, new ArrayList<>(), new ArrayList<>(), HPO_DATA);
+//                addValueToMapElement(geneDiseaseAssociationMap, geneSymbol, disease);
+//            }
+//        }
+//        logger.info(PARSING_DONE_LOG_MESSAGE, hpoFilePath);
+
+        // DisGeNet
+        logger.info(PARSING_LOG_MESSAGE, disgenetFilePath);
+        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(disgenetFilePath)) {
+            // Skip first header line
+            bufferedReader.readLine();
+            while ((line = bufferedReader.readLine()) != null) {
+                String[] fields = line.split("\t");
+                String diseaseId = fields[4];
+                String diseaseName = fields[5];
+                String score = fields[9];
+                String numberOfPubmeds = fields[13].trim();
+                String numberOfSNPs = fields[14];
+                String source = fields[15];
+                GeneTraitAssociation disease = new GeneTraitAssociation(diseaseId, diseaseName, "", Float.parseFloat(score),
+                        Integer.parseInt(numberOfPubmeds), Arrays.asList(numberOfSNPs), Arrays.asList(source), DISGENET_DATA);
+                addValueToMapElement(geneDiseaseAssociationMap, fields[1], disease);
+            }
+        }
+        logger.info(PARSING_DONE_LOG_MESSAGE, disgenetFilePath);
+
+        for (Map.Entry<String, List<GeneTraitAssociation>> entry : geneDiseaseAssociationMap.entrySet()) {
+            rocksDbManager.update(rocksdb, entry.getKey() + DISEASE_SUFFIX, entry.getValue());
+        }
+    }
+
+    protected void indexMiRTarBase(Path miRTarBaseFile) throws IOException, RocksDBException, CellBaseException {
+        logger.info(PARSING_LOG_MESSAGE, miRTarBaseFile);
+
+        try (BufferedReader reader = Files.newBufferedReader(miRTarBaseFile)) {
+            String line;
+            // Skip header line
+            reader.readLine();
+
+            String currentMiRTarBaseId = null;
+            String currentMiRNA = null;
+            String currentGene = null;
+            List<TargetGene> targetGenes = new ArrayList<>();
+            Map<String, List<MirnaTarget>> geneToMirna = new HashMap<>();
+
+            while ((line = reader.readLine()) != null) {
+                String[] field = line.split("\t", -1);
+                if (field.length != 9) {
+                    throw new CellBaseException("Invalid number of columns " + field.length + " (expected 9 columns) parsing file "
+                            + miRTarBaseFile + ". Line: " + line);
+                }
+
+                // #0: miRTarBase ID
+                String miRTarBaseId = field[0];
+                if (currentMiRTarBaseId == null) {
+                    currentMiRTarBaseId = miRTarBaseId;
+                }
+
+                // #1: miRNA
+                String miRNA = field[1];
+                if (currentMiRNA == null) {
+                    currentMiRNA = miRNA;
+                }
+
+                // #2: Species (miRNA)
+
+                // #3: Target Gene
+                String geneName = field[3];
+                if (currentGene == null) {
+                    currentGene = geneName;
+                }
+
+                // #4: Target Gene (Entrez ID)
+                // #5: Species (Target Gene)
+
+                if (!miRTarBaseId.equals(currentMiRTarBaseId) || !geneName.equals(currentGene)) {
+                    // new entry, store current one
+                    MirnaTarget miRnaTarget = new MirnaTarget(currentMiRTarBaseId, "miRTarBase", currentMiRNA, targetGenes);
+                    addValueToMapElement(geneToMirna, currentGene, miRnaTarget);
+                    targetGenes = new ArrayList<>();
+                    currentGene = geneName;
+                    currentMiRTarBaseId = miRTarBaseId;
+                    currentMiRNA = miRNA;
+                }
+
+                // #6: Experiments
+                String experiment = field[6];
+
+                // #7: Support Type
+                String supportType = field[7];
+
+                // #8: pubmed
+                String pubmed = field[8];
+
+                targetGenes.add(new TargetGene(experiment, supportType, pubmed));
+            }
+
+            // parse last entry
+            MirnaTarget miRnaTarget = new MirnaTarget(currentMiRTarBaseId, MIRTARBASE_DATA, currentMiRNA, targetGenes);
+            addValueToMapElement(geneToMirna, currentGene, miRnaTarget);
+
+            for (Map.Entry<String, List<MirnaTarget>> entry : geneToMirna.entrySet()) {
+                rocksDbManager.update(rocksdb, entry.getKey() + MIRTARBASE_SUFFIX, entry.getValue());
+            }
+        }
+        logger.info(PARSING_DONE_LOG_MESSAGE, miRTarBaseFile);
+    }
+
+    protected static <T> void addValueToMapElement(Map<String, List<T>> map, String key, T value) {
+        if (map.containsKey(key)) {
+            map.get(key).add(value);
+        } else {
+            List<T> valueList = new ArrayList<>();
+            valueList.add(value);
+            map.put(key, valueList);
+        }
+    }
+
+    protected List<GeneDrugInteraction> getDrugs(String id) throws RocksDBException, IOException {
+        String key = id + DRUGS_SUFFIX;
+        return rocksDbManager.getDrugs(rocksdb, key);
+    }
+
+    protected List<GeneTraitAssociation> getDiseases(String id) throws RocksDBException, IOException {
+        String key = id + DISEASE_SUFFIX;
+        return rocksDbManager.getDiseases(rocksdb, key);
+    }
+
+    protected List<MirnaTarget> getMirnaTargets(String geneName) throws RocksDBException, IOException {
+        String key = geneName + MIRTARBASE_SUFFIX;
+        return rocksDbManager.getMirnaTargets(rocksdb, key);
+    }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java
index 1eabf8975a..b14d20b54c 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java
@@ -19,60 +19,70 @@
 
 import org.opencb.biodata.formats.obo.OboParser;
 import org.opencb.biodata.models.core.OntologyTerm;
+import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
-import org.opencb.cellbase.lib.EtlCommons;
 import org.opencb.commons.utils.FileUtils;
 
 import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
 import java.nio.file.Path;
 import java.util.List;
 
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
 public class OntologyBuilder extends CellBaseBuilder {
 
-    private Path hpoFile;
-    private Path goFile;
-    private Path doidFile;
-    private Path mondoFile;
+    private Path oboDownloadPath;
 
-    public OntologyBuilder(Path oboDirectoryPath, CellBaseSerializer serializer) {
+    public OntologyBuilder(Path oboDownloadPath, CellBaseSerializer serializer) {
         super(serializer);
-        hpoFile = oboDirectoryPath.resolve(EtlCommons.HPO_FILE);
-        goFile = oboDirectoryPath.resolve(EtlCommons.GO_FILE);
-        doidFile = oboDirectoryPath.resolve(EtlCommons.DOID_FILE);
-        mondoFile = oboDirectoryPath.resolve(EtlCommons.MONDO_FILE);
+        this.oboDownloadPath = oboDownloadPath;
     }
 
     @Override
     public void parse() throws Exception {
-        BufferedReader bufferedReader = FileUtils.newBufferedReader(hpoFile);
-        OboParser parser = new OboParser();
-        List<OntologyTerm> terms = parser.parseOBO(bufferedReader, "Human Phenotype Ontology");
-        for (OntologyTerm term : terms) {
-            term.setSource("HP");
-            serializer.serialize(term);
-        }
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(ONTOLOGY_DATA));
 
-        bufferedReader = FileUtils.newBufferedReader(goFile);
-        terms = parser.parseOBO(bufferedReader, "Gene Ontology");
-        for (OntologyTerm term : terms) {
-            term.setSource("GO");
-            serializer.serialize(term);
-        }
+        // Sanity check
+        checkDirectory(oboDownloadPath, getDataName(REGULATION_DATA));
 
-        bufferedReader = FileUtils.newBufferedReader(doidFile);
-        terms = parser.parseOBO(bufferedReader, "Human Disease Ontology");
-        for (OntologyTerm term : terms) {
-            term.setSource("DOID");
-            serializer.serialize(term);
-        }
+        // Check ontology files
+        List<File> hpoFiles = checkOboFiles(oboDownloadPath.resolve(getDataVersionFilename(HPO_OBO_DATA)), getDataName(HPO_OBO_DATA));
+        List<File> goFiles = checkOboFiles(oboDownloadPath.resolve(getDataVersionFilename(GO_OBO_DATA)), getDataName(GO_OBO_DATA));
+        List<File> doidFiles = checkOboFiles(oboDownloadPath.resolve(getDataVersionFilename(DOID_OBO_DATA)), getDataName(DOID_OBO_DATA));
+        List<File> mondoFiles = checkOboFiles(oboDownloadPath.resolve(getDataVersionFilename(MONDO_OBO_DATA)), getDataName(MONDO_OBO_DATA));
 
-        bufferedReader = FileUtils.newBufferedReader(mondoFile);
-        terms = parser.parseOBO(bufferedReader, "Mondo Ontology");
-        for (OntologyTerm term : terms) {
-            term.setSource("MONDO");
-            serializer.serialize(term);
-        }
+        // Parse OBO files and build
+        parseOboFile(hpoFiles.get(0), HPO_OBO_DATA);
+        parseOboFile(goFiles.get(0), GO_OBO_DATA);
+        parseOboFile(doidFiles.get(0), DOID_OBO_DATA);
+        parseOboFile(mondoFiles.get(0), MONDO_OBO_DATA);
 
+        // Close serializer
         serializer.close();
+
+        logger.info(BUILDING_DONE_LOG_MESSAGE, getDataName(ONTOLOGY_DATA));
+    }
+
+    private void parseOboFile(File oboFile, String data) throws IOException {
+        logger.info(PARSING_LOG_MESSAGE, oboFile);
+        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(oboFile.toPath())) {
+            OboParser parser = new OboParser();
+            List<OntologyTerm> terms = parser.parseOBO(bufferedReader, data);
+            for (OntologyTerm term : terms) {
+                serializer.serialize(term);
+            }
+        }
+        logger.info(PARSING_DONE_LOG_MESSAGE, oboFile);
+    }
+
+    private List<File> checkOboFiles(Path versionFilePath, String name) throws IOException, CellBaseException {
+        List<File> files = checkFiles(dataSourceReader.readValue(versionFilePath.toFile()), oboDownloadPath, getDataName(ONTOLOGY_DATA)
+                + "/" + name);
+        if (files.size() != 1) {
+            throw new CellBaseException("One " + name + " file is expected, but currently there are " + files.size() + " files");
+        }
+        return files;
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PharmGKBBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PharmGKBBuilder.java
index 1f7a4836ca..1a0ba2e7d3 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PharmGKBBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PharmGKBBuilder.java
@@ -23,13 +23,17 @@
 import org.opencb.biodata.models.core.Xref;
 import org.opencb.biodata.models.pharma.*;
 import org.opencb.biodata.models.pharma.guideline.BasicObject;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.models.DataSource;
 import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
+import org.opencb.cellbase.lib.EtlCommons;
 import org.opencb.commons.utils.FileUtils;
 
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.IOException;
 import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.*;
 import java.util.stream.Collectors;
 
@@ -37,8 +41,7 @@
 
 public class PharmGKBBuilder extends CellBaseBuilder {
 
-    private final Path inputDir;
-    private final Path pharmGKBDir;
+    private final Path pharmGkbDownloadPath;
 
     private static final String CHEMICALS_BASENAME = "chemicals";
     private static final String CHEMICALS_TSV_FILENAME = "chemicals.tsv";
@@ -88,21 +91,25 @@ public class PharmGKBBuilder extends CellBaseBuilder {
     private static final String PHARMGKB_LAST_UPDATE_DATE_KEY = "PHARMGKB_LAST_UPDATE_DATE";
     private static final String PHARMGKB_IS_VIP_KEY = "PHARMGKB_IS_VIP";
 
-    public PharmGKBBuilder(Path inputDir, CellBaseFileSerializer serializer) {
+    public PharmGKBBuilder(Path parmGkbDownloadPath, CellBaseFileSerializer serializer) {
         super(serializer);
-
-        this.inputDir = inputDir;
-        this.pharmGKBDir = inputDir.resolve(PHARMGKB_DATA);
+        this.pharmGkbDownloadPath = parmGkbDownloadPath;
     }
 
     @Override
     public void parse() throws Exception {
-        // Check input folder
-        FileUtils.checkDirectory(inputDir);
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(PHARMGKB_DATA));
+
+        // Sanity check
+        checkDirectory(pharmGkbDownloadPath, getDataName(PHARMGKB_DATA));
 
-        // PharmGKB
-        FileUtils.checkDirectory(pharmGKBDir);
-        logger.info("Parsing {} files and building the data models...", PHARMGKB_NAME);
+        // Check PharmGKB files
+        DataSource dataSource = dataSourceReader.readValue(pharmGkbDownloadPath.resolve(getDataVersionFilename(PHARMGKB_DATA)).toFile());
+        List<File> pharmGkbFiles = checkFiles(dataSource, pharmGkbDownloadPath, getDataCategory(PHARMGKB_DATA) + "/"
+                + getDataName(PHARMGKB_DATA));
+
+        // Unzip downloaded file
+        unzipDownloadedFiles(pharmGkbFiles);
 
         // Parse chemical file
         Map<String, PharmaChemical> chemicalsMap = parseChemicalFile();
@@ -113,8 +120,6 @@ public void parse() throws Exception {
         // Parse gene file
         parseGeneFile(chemicalsMap);
 
-        logger.info("Parsing {} files finished.", PHARMGKB_NAME);
-
         // Generation the pharmacogenomics JSON file
         logger.info("Writing {} JSON file to {} ...", PHARMACOGENOMICS_DATA, serializer.getOutdir());
         int counter = 0;
@@ -125,11 +130,14 @@ public void parse() throws Exception {
             }
         }
         serializer.close();
-        logger.info("Writing {} JSON file done!", PHARMACOGENOMICS_DATA);
+
+        logger.info(BUILDING_DONE_LOG_MESSAGE, getDataName(PHARMGKB_DATA));
     }
 
     private Map<String, PharmaChemical> parseChemicalFile() throws IOException {
-        Path chemicalsFile = pharmGKBDir.resolve(CHEMICALS_BASENAME).resolve(CHEMICALS_TSV_FILENAME);
+        Path chemicalsFile = serializer.getOutdir().resolve(CHEMICALS_BASENAME).resolve(CHEMICALS_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, chemicalsFile);
+
         Map<String, PharmaChemical> chemicalsMap = new HashMap<>();
         try (BufferedReader br = FileUtils.newBufferedReader(chemicalsFile)) {
             // Skip first line, i.e. the header line
@@ -146,7 +154,7 @@ private Map<String, PharmaChemical> parseChemicalFile() throws IOException {
                 // Label Has Dosing Info  Has Rx Annotation  RxNorm Identifiers  ATC Identifiers  PubChem Compound Identifiers
                 PharmaChemical pharmaChemical = new PharmaChemical()
                         .setId(fields[0])
-                        .setSource(PHARMGKB_NAME)
+                        .setSource(PHARMGKB_DATA)
                         .setName(fields[1])
                         .setSmiles(fields[7])
                         .setInChI(fields[8]);
@@ -177,6 +185,7 @@ private Map<String, PharmaChemical> parseChemicalFile() throws IOException {
         }
         logger.info("Number of Chemical items read {}", chemicalsMap.size());
 
+        logger.info(PARSING_DONE_LOG_MESSAGE, chemicalsFile);
         return chemicalsMap;
     }
 
@@ -192,8 +201,9 @@ private void parseClinicalAnnotationFiles(Map<String, PharmaChemical> chemicalsM
         Map<String, Map<String, Object>> variantMap = parseVariantFile();
 
         // clinical_annotations.tsv
-        try (BufferedReader br = FileUtils.newBufferedReader(pharmGKBDir.resolve(CLINICAL_ANNOTATIONS_BASENAME)
-                .resolve(CLINICAL_ANNOTATIONS_TSV_FILENAME))) {
+        Path clinAnnotPath = serializer.getOutdir().resolve(CLINICAL_ANNOTATIONS_BASENAME).resolve(CLINICAL_ANNOTATIONS_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, clinAnnotPath);
+        try (BufferedReader br = FileUtils.newBufferedReader(clinAnnotPath)) {
             // Skip first line, i.e. the header line
             String line = br.readLine();
             while ((line = br.readLine()) != null) {
@@ -278,6 +288,7 @@ private void parseClinicalAnnotationFiles(Map<String, PharmaChemical> chemicalsM
                 }
             }
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE, clinAnnotPath);
 
         // Update the clinical annotation map by parsing the clinical annotation evidences
         parseClinicalAnnotationEvidenceFile(variantAnnotationMap);
@@ -300,7 +311,9 @@ private void parseClinicalAnnotationFiles(Map<String, PharmaChemical> chemicalsM
     private Map<String, Map<String, Object>> parseVariantFile() throws IOException {
         Map<String, Map<String, Object>> variantMap = new HashMap<>();
         // Parse the variant file (i.e., variants.tsv)
-        Path varPath = pharmGKBDir.resolve(VARIANTS_BASENAME).resolve(VARIANTS_TSV_FILENAME);
+        Path varPath = serializer.getOutdir().resolve(VARIANTS_BASENAME).resolve(VARIANTS_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, varPath);
+
         try (BufferedReader br = FileUtils.newBufferedReader(varPath)) {
             // Skip first line, i.e. the header line
             String line = br.readLine();
@@ -367,6 +380,7 @@ private Map<String, Map<String, Object>> parseVariantFile() throws IOException {
         }
         logger.info("Number of variants = {}", variantMap.size());
 
+        logger.info(PARSING_DONE_LOG_MESSAGE, varPath);
         return variantMap;
     }
 
@@ -385,7 +399,8 @@ private void parseClinicalAnnotationEvidenceFile(Map<String, PharmaVariantAnnota
         parseStudyParameterFile(variantAssociationMap);
 
         // Parse the clinical annotation alleles file (i.e., clinical_ann_alleles.tsv)
-        Path evidencesPath = pharmGKBDir.resolve(CLINICAL_ANNOTATIONS_BASENAME).resolve(CLINICAL_ANN_EVIDENCE_TSV_FILENAME);
+        Path evidencesPath = serializer.getOutdir().resolve(CLINICAL_ANNOTATIONS_BASENAME).resolve(CLINICAL_ANN_EVIDENCE_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, evidencesPath);
         try (BufferedReader br = FileUtils.newBufferedReader(evidencesPath)) {
             // Skip first line, i.e. the header line
             String line = br.readLine();
@@ -440,12 +455,14 @@ private void parseClinicalAnnotationEvidenceFile(Map<String, PharmaVariantAnnota
                         break;
                     }
                     default: {
-                        logger.warn("Unknown evidence type '{}': this evidence is skipped. Valid evidence types are: {}",
-                                evidenceType,
-                                StringUtils.join(
-                                        Arrays.asList(VARIANT_ANNOTATION_EVIDENCE_TYPE, GUIDELINE_ANNOTATION_EVIDENCE_TYPE,
-                                                DRUG_LABEL_ANNOTATION_EVIDENCE_TYPE, FUNCTIONAL_ANNOTATION_EVIDENCE_TYPE,
-                                                PHENOTYPE_ANNOTATION_EVIDENCE_TYPE), ","));
+                        if (logger.isWarnEnabled()) {
+                            logger.warn("Unknown evidence type '{}': this evidence is skipped. Valid evidence types are: {}",
+                                    evidenceType,
+                                    StringUtils.join(
+                                            Arrays.asList(VARIANT_ANNOTATION_EVIDENCE_TYPE, GUIDELINE_ANNOTATION_EVIDENCE_TYPE,
+                                                    DRUG_LABEL_ANNOTATION_EVIDENCE_TYPE, FUNCTIONAL_ANNOTATION_EVIDENCE_TYPE,
+                                                    PHENOTYPE_ANNOTATION_EVIDENCE_TYPE), ","));
+                        }
                         break;
                     }
                 }
@@ -463,11 +480,14 @@ private void parseClinicalAnnotationEvidenceFile(Map<String, PharmaVariantAnnota
                 }
             }
         }
+
+        logger.info(PARSING_DONE_LOG_MESSAGE, evidencesPath);
     }
 
     private void parseClinicalAnnotationAlleleFile(Map<String, PharmaVariantAnnotation> variantAnnotationMap) throws IOException {
         // Parse the clinical annotation alleles file (i.e., clinical_ann_alleles.tsv)
-        Path allelesPath = pharmGKBDir.resolve(CLINICAL_ANNOTATIONS_BASENAME).resolve(CLINICAL_ANN_ALLELES_TSV_FILENAME);
+        Path allelesPath = serializer.getOutdir().resolve(CLINICAL_ANNOTATIONS_BASENAME).resolve(CLINICAL_ANN_ALLELES_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, allelesPath);
         try (BufferedReader br = FileUtils.newBufferedReader(allelesPath)) {
             // Skip first line, i.e. the header line
             String line = br.readLine();
@@ -502,12 +522,14 @@ private void parseClinicalAnnotationAlleleFile(Map<String, PharmaVariantAnnotati
                 }
             }
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE, allelesPath);
     }
 
     private void parseVariantAnnotationFile(Map<String, PharmaVariantAssociation> variantAssociationMap) throws IOException {
         // For CellBase, variant association corresponds to PharmGKB variant annotation
         // Parse the variant annotation file (i.e., var_drug_ann.tsv)
-        Path varDrugPath = pharmGKBDir.resolve(VARIANT_ANNOTATIONS_BASENAME).resolve(VARIANT_ANNOTATIONS_TSV_FILENAME);
+        Path varDrugPath = serializer.getOutdir().resolve(VARIANT_ANNOTATIONS_BASENAME).resolve(VARIANT_ANNOTATIONS_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, varDrugPath);
         int counter = 0;
         try (BufferedReader br = FileUtils.newBufferedReader(varDrugPath)) {
             // Skip first line, i.e. the header line
@@ -562,6 +584,7 @@ private void parseVariantAnnotationFile(Map<String, PharmaVariantAssociation> va
             }
         }
         logger.info("Number of variant annotations = {}", counter);
+        logger.info(PARSING_DONE_LOG_MESSAGE, varDrugPath);
     }
 
     private Map<String, PharmaGuidelineAnnotation> parseGuidelineAnnotationFiles() throws IOException {
@@ -571,7 +594,7 @@ private Map<String, PharmaGuidelineAnnotation> parseGuidelineAnnotationFiles() t
         ObjectReader objectReader = mapper.readerFor(PharmaGuidelineAnnotation.class);
 
         // Parse the guideline annotations JSON files
-        Path guidelinesPath = pharmGKBDir.resolve(GUIDELINE_ANNOTATIONS_BASENAME);
+        Path guidelinesPath = serializer.getOutdir().resolve(GUIDELINE_ANNOTATIONS_BASENAME);
         FileUtils.checkDirectory(guidelinesPath);
         for (File file : Objects.requireNonNull(guidelinesPath.toFile().listFiles())) {
             if (file.getName().endsWith("json")) {
@@ -593,7 +616,8 @@ private Map<String, PharmaGuidelineAnnotation> parseGuidelineAnnotationFiles() t
     private Map<String, PharmaDrugLabelAnnotation> parseDrugLabelAnnotationFile() throws IOException {
         Map<String, PharmaDrugLabelAnnotation> drugLabelAnnotationMap = new HashMap<>();
         // Parse the drug labels annotations file (i.e., drugLabels.tsv)
-        Path drugLabelPath = pharmGKBDir.resolve(DRUG_LABELS_BASENAME).resolve(DRUG_LABELS_TSV_FILENAME);
+        Path drugLabelPath = serializer.getOutdir().resolve(DRUG_LABELS_BASENAME).resolve(DRUG_LABELS_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, drugLabelPath);
         try (BufferedReader br = FileUtils.newBufferedReader(drugLabelPath)) {
             // Skip first line, i.e. the header line
             String line = br.readLine();
@@ -631,12 +655,15 @@ private Map<String, PharmaDrugLabelAnnotation> parseDrugLabelAnnotationFile() th
         }
         logger.info("Number of drug label annotations = {}", drugLabelAnnotationMap.size());
 
+        logger.info(PARSING_DONE_LOG_MESSAGE, drugLabelPath);
         return drugLabelAnnotationMap;
     }
 
     private void parsePhenotypeAnnotationFile(Map<String, PharmaVariantAssociation> variantAssociationMap) throws IOException {
         // Parse the variant annotation file (i.e., var_pheno_ann.tsv)
-        Path varDrugPath = pharmGKBDir.resolve(VARIANT_ANNOTATIONS_BASENAME).resolve(PHENOTYPE_ANNOTATIONS_TSV_FILENAME);
+        Path varDrugPath = serializer.getOutdir().resolve(VARIANT_ANNOTATIONS_BASENAME).resolve(PHENOTYPE_ANNOTATIONS_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, varDrugPath);
+
         int counter = 0;
         try (BufferedReader br = FileUtils.newBufferedReader(varDrugPath)) {
             // Skip first line, i.e. the header line
@@ -691,11 +718,13 @@ private void parsePhenotypeAnnotationFile(Map<String, PharmaVariantAssociation>
             }
         }
         logger.info("Number of phenotype annotations = {}", counter);
+        logger.info(PARSING_DONE_LOG_MESSAGE, varDrugPath);
     }
 
     private void parseFunctionalAnnotationFile(Map<String, PharmaVariantAssociation> variantAssociationMap) throws IOException {
         // Parse the variant annotation file (i.e., var_fa_ann.tsv)
-        Path varDrugPath = pharmGKBDir.resolve(VARIANT_ANNOTATIONS_BASENAME).resolve(FUNCTIONAL_ANNOTATIONS_TSV_FILENAME);
+        Path varDrugPath = serializer.getOutdir().resolve(VARIANT_ANNOTATIONS_BASENAME).resolve(FUNCTIONAL_ANNOTATIONS_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, varDrugPath);
         int counter = 0;
         try (BufferedReader br = FileUtils.newBufferedReader(varDrugPath)) {
             // Skip first line, i.e. the header line
@@ -751,12 +780,14 @@ private void parseFunctionalAnnotationFile(Map<String, PharmaVariantAssociation>
             }
         }
         logger.info("Number of variant annotations = {}", counter);
+        logger.info(PARSING_DONE_LOG_MESSAGE, varDrugPath);
     }
 
     private void parseStudyParameterFile(Map<String, PharmaVariantAssociation> variantAssociationMap) throws IOException {
         Map<String, List<PharmaStudyParameters>> studyParametersMap = new HashMap<>();
         // Parse the study parameters file (i.e., study_parameters.tsv)
-        Path studyParamsPath = pharmGKBDir.resolve(VARIANT_ANNOTATIONS_BASENAME).resolve(STUDY_PARAMETERS_TSV_FILENAME);
+        Path studyParamsPath = serializer.getOutdir().resolve(VARIANT_ANNOTATIONS_BASENAME).resolve(STUDY_PARAMETERS_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, studyParamsPath);
         try (BufferedReader br = FileUtils.newBufferedReader(studyParamsPath)) {
             // Skip first line, i.e. the header line
             String line = br.readLine();
@@ -807,6 +838,7 @@ private void parseStudyParameterFile(Map<String, PharmaVariantAssociation> varia
             }
         }
         logger.info("Number of study parameters lines = {}", studyParametersMap.size());
+        logger.info(PARSING_DONE_LOG_MESSAGE, studyParamsPath);
 
         for (Map.Entry<String, List<PharmaStudyParameters>> entry : studyParametersMap.entrySet()) {
             if (variantAssociationMap.containsKey(entry.getKey())) {
@@ -861,7 +893,8 @@ private void parseGeneFile(Map<String, PharmaChemical> chemicalsMap) throws IOEx
 
         // Parse the genes file (i.e., genes.tsv)
         Map<String, PharmaGeneAnnotation> geneAnnotationMapByPgkbGeneId = new HashMap<>();
-        Path genesPath = pharmGKBDir.resolve(GENES_BASENAME).resolve(GENES_TSV_FILENAME);
+        Path genesPath = serializer.getOutdir().resolve(GENES_BASENAME).resolve(GENES_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, genesPath);
         try (BufferedReader br = FileUtils.newBufferedReader(genesPath)) {
             // Skip first line, i.e. the header line
             String line = br.readLine();
@@ -940,13 +973,15 @@ private void parseGeneFile(Map<String, PharmaChemical> chemicalsMap) throws IOEx
         }
 
         logger.info("Number of parsed genes = {}", geneAnnotationMapByPgkbGeneId.size());
+        logger.info(PARSING_DONE_LOG_MESSAGE, genesPath);
     }
 
     private void parseChemicalGeneRelationships(Map<String, Set<String>> pgkbGeneIdMapByChemicalName,
                                                 Map<String, PharmaGeneAnnotation> geneAnnotationMapByPgkbGeneId) throws IOException {
         int counter = 0;
         // Parse the genes file (i.e., relationships.tsv)
-        Path relationshipsPath = pharmGKBDir.resolve(RELATIONSHIPS_BASENAME).resolve(RELATIONSHIPS_TSV_FILENAME);
+        Path relationshipsPath = serializer.getOutdir().resolve(RELATIONSHIPS_BASENAME).resolve(RELATIONSHIPS_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, relationshipsPath);
         try (BufferedReader br = FileUtils.newBufferedReader(relationshipsPath)) {
             // Skip first line, i.e. the header line
             String line = br.readLine();
@@ -986,6 +1021,7 @@ private void parseChemicalGeneRelationships(Map<String, Set<String>> pgkbGeneIdM
             }
         }
         logger.info("Number of parsed {}-{} relationships = {}", GENE_ENTITY, CHEMICAL_ENTITY, counter);
+        logger.info(PARSING_DONE_LOG_MESSAGE, relationshipsPath);
     }
 
     private List<String> stringFieldToList(String field) {
@@ -1011,6 +1047,29 @@ private boolean isHaplotype(String value) {
     }
 
     private List<String> getHaplotypeList(String value) {
-        return Arrays.stream(value.split(",")).map(s -> s.trim()).collect(Collectors.toList());
+        return Arrays.stream(value.split(",")).map(String::trim).collect(Collectors.toList());
+    }
+
+    private void unzipDownloadedFiles(List<File> pharmGkbFiles) throws CellBaseException {
+        // Unzip
+        for (File pharmGgkFile : pharmGkbFiles) {
+            logger.info("Unzip file: {}", pharmGgkFile);
+            try {
+                String outPath = serializer.getOutdir().resolve(pharmGgkFile.getName().split("\\.")[0]).toString();
+                List<String> params = Arrays.asList("-d", outPath, "-o", pharmGgkFile.toString());
+                EtlCommons.runCommandLineProcess(null, "unzip", params, Paths.get(outPath + ".log").toString());
+            } catch (CellBaseException e) {
+                if (pharmGgkFile.getName().contains(GUIDELINE_ANNOTATIONS_BASENAME)) {
+                    // It fails because of long filenames, so it does not raise any exception
+                    logger.warn(e.getMessage());
+                }
+            } catch (IOException e) {
+                throw new CellBaseException("Error executing unzip in file " + pharmGgkFile, e);
+            } catch (InterruptedException e) {
+                // Restore interrupted state...
+                Thread.currentThread().interrupt();
+                throw new CellBaseException("Error executing unzip in file " + pharmGgkFile, e);
+            }
+        }
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PolygenicScoreBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PolygenicScoreBuilder.java
new file mode 100644
index 0000000000..9e326013fc
--- /dev/null
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PolygenicScoreBuilder.java
@@ -0,0 +1,569 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.lib.builders;
+
+import com.fasterxml.jackson.databind.MapperFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectReader;
+import com.fasterxml.jackson.databind.ObjectWriter;
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVParser;
+import org.apache.commons.csv.CSVRecord;
+import org.apache.commons.lang3.StringUtils;
+import org.opencb.biodata.models.core.pgs.CommonPolygenicScore;
+import org.opencb.biodata.models.core.pgs.PgsCohort;
+import org.opencb.biodata.models.core.pgs.PolygenicScore;
+import org.opencb.biodata.models.core.pgs.VariantPolygenicScore;
+import org.opencb.biodata.models.variant.avro.OntologyTermAnnotation;
+import org.opencb.biodata.models.variant.avro.PubmedReference;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
+import org.opencb.commons.utils.FileUtils;
+import org.rocksdb.Options;
+import org.rocksdb.RocksDB;
+import org.rocksdb.RocksDBException;
+import org.rocksdb.RocksIterator;
+import org.slf4j.LoggerFactory;
+
+import java.io.*;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.*;
+
+public class PolygenicScoreBuilder extends CellBaseBuilder {
+
+    private String source;
+    private String version;
+
+    private Path pgsDir;
+    private CellBaseFileSerializer fileSerializer;
+
+    protected Map<String, Object[]> rdbConnectionPerChrom = new HashMap<>();
+
+    protected static ObjectMapper mapper;
+    protected static ObjectReader varPgsReader;
+    protected static ObjectWriter jsonObjectWriter;
+
+    public static final String COMMON_POLYGENIC_SCORE_FILENAME =  "common_polygenic_score.json.gz";
+    public static final String VARIANT_POLYGENIC_SCORE_FILENAME =  "variant_polygenic_score.json.gz";
+
+    private static final String RSID_COL = "rsID";
+    private static final String CHR_NAME_COL = "chr_name";
+    private static final String EFFECT_ALLELE_COL = "effect_allele";
+    private static final String OTHER_ALLELE_COL = "other_allele";
+    private static final String EFFECT_WEIGHT_COL = "effect_weight";
+    private static final String ALLELEFREQUENCY_EFFECT_COL = "allelefrequency_effect";
+    private static final String ODDS_RATIO_COL = "OR";
+    private static final String HAZARD_RATIO_COL = "HR";
+    private static final String LOCUS_NAME_COL = "locus_name";
+    private static final String IS_HAPLOTYPE_COL = "is_haplotype";
+    private static final String IS_DIPLOTYPE_COL = "is_diplotype";
+    private static final String IMPUTATION_METHOD_COL = "imputation_method";
+    private static final String VARIANT_DESCRIPTION_COL = "variant_description";
+    private static final String INCLUSION_CRITERIA_COL = "inclusion_criteria";
+    private static final String IS_INTERACTION_COL = "is_interaction";
+    private static final String IS_DOMINANT_COL = "is_dominant";
+    private static final String IS_RECESSIVE_COL = "is_recessive";
+    private static final String DOSAGE_0_WEIGHT_COL = "dosage_0_weight";
+    private static final String DOSAGE_1_WEIGHT_COL = "dosage_1_weight";
+    private static final String DOSAGE_2_WEIGHT_COL = "dosage_2_weight";
+    private static final String HM_RSID_COL = "hm_rsID";
+    private static final String HM_CHR_COL = "hm_chr";
+    private static final String HM_POS_COL = "hm_pos";
+    private static final String HM_INFEROTHERALLELE_COL = "hm_inferOtherAllele";
+
+    public static final String SAMPLE_SET_KEY = "Sample Set";
+    public static final String ODDS_RATIO_KEY = "Odds ratio";
+    public static final String HAZARD_RATIO_KEY = "Hazard ratio";
+    public static final String BETA_KEY = "Beta";
+    public static final String AUROC_KEY = "AUROC"; // Area Under the Receiver-Operating Characteristic Curve (AUROC)
+    public static final String CINDEX_KEY = "C-index"; // Concordance Statistic (C-index)
+    public static final String OTHER_KEY = "Other metric";
+    private static final String EFFECT_WEIGHT_KEY = "Effect weight";
+    private static final String ALLELE_FREQUENCY_EFFECT_KEY = "Allele frequency effect";
+    private static final String LOCUS_NAME_KEY = "Locus name";
+    private static final String IS_HAPLOTYPE_KEY = "Haplotype";
+    private static final String IS_DIPLOTYPE_KEY = "Diplotype";
+    private static final String IMPUTATION_METHOD_KEY = "Imputation method";
+    private static final String VARIANT_DESCRIPTION_KEY = "Variant description";
+    private static final String INCLUSION_CRITERIA_KEY = "Score inclusion criteria";
+    private static final String IS_INTERACTION_KEY = "Interaction";
+    private static final String IS_DOMINANT_KEY = "Dominant inheritance model";
+    private static final String IS_RECESSIVE_KEY = "Recessive inheritance model";
+    private static final String DOSAGE_0_WEIGHT_KEY = "Effect weight with 0 copy of the effect allele";
+    private static final String DOSAGE_1_WEIGHT_KEY = "Effect weight with 1 copy of the effect allele";
+    private static final String DOSAGE_2_WEIGHT_KEY = "Effect weight with 1 copy of the effect allele";
+
+    private static final Set<String> VALID_CHROMOSOMES = new HashSet<>(Arrays.asList("1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
+            "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", "MT", "M"));
+
+    static {
+        mapper = new ObjectMapper();
+        mapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);
+        varPgsReader = mapper.readerFor(VariantPolygenicScore.class);
+        jsonObjectWriter = mapper.writer();
+    }
+
+    public PolygenicScoreBuilder(String source, String version, Path pgsDir, CellBaseFileSerializer serializer) {
+        super(serializer);
+
+        this.source = source;
+        this.version = version;
+
+        this.fileSerializer = serializer;
+        this.pgsDir = pgsDir;
+
+        logger = LoggerFactory.getLogger(PolygenicScoreBuilder.class);
+    }
+
+    @Override
+    public void parse() throws Exception {
+        // Check input folder
+        FileUtils.checkPath(pgsDir);
+
+        logger.info("Parsing polygenic score (PGS) files...");
+
+        BufferedWriter bw = FileUtils.newBufferedWriter(serializer.getOutdir().resolve(COMMON_POLYGENIC_SCORE_FILENAME));
+
+        for (File file : pgsDir.toFile().listFiles()) {
+            if (file.isFile()) {
+                if (file.getName().endsWith(".txt.gz")) {
+                    logger.info("Processing PGS file: {}", file.getName());
+
+                    String pgsId = null;
+                    Map<String, Integer> columnPos = new HashMap<>();
+
+                    BufferedReader br = FileUtils.newBufferedReader(file.toPath());
+                    String line;
+                    while ((line = br.readLine()) != null) {
+                        if (line.startsWith("#")) {
+                            if (line.startsWith("#pgs_id=")) {
+                                pgsId = line.split("=")[1].trim();
+                                // Sanity check
+                                if (!file.getName().startsWith(pgsId)) {
+                                    throw new CellBaseException("Error parsing file " + file.getName() + ": pgs_id mismatch");
+                                }
+                            }
+                        } else if (line.startsWith(RSID_COL) || line.startsWith(CHR_NAME_COL)) {
+                            String[] fields = line.split("\t");
+                            for (int i = 0; i < fields.length; i++) {
+                                columnPos.put(fields[i], i);
+                            }
+                        } else {
+                            // Sanity check
+                            if (pgsId == null) {
+                                throw new CellBaseException("Error parsing file " + file.getName() + ": pgs_id is null");
+                            }
+                            saveVariantPolygenicScore(line, columnPos, pgsId);
+                        }
+                    }
+                    br.close();
+                } else if (file.getName().endsWith("_metadata.tar.gz")) {
+                    processPgsMetadataFile(file, bw);
+                }
+            }
+        }
+
+        // Serialize/write the saved variant polygenic scores in the RocksDB
+        serializeRDB();
+        serializer.close();
+
+        // Close PGS file (with common attributes)
+        bw.close();
+
+        logger.info("Parsing PGS files finished.");
+    }
+
+    private void processPgsMetadataFile(File metadataFile, BufferedWriter bw) throws IOException, CellBaseException {
+        String pgsId = metadataFile.getName().split("_")[0];
+
+        Path tmp = pgsDir.resolve("tmp");
+        if (!tmp.toFile().exists()) {
+            tmp.toFile().mkdirs();
+        }
+
+        String command = "tar -xzf " + metadataFile.getAbsolutePath() + " -C " + tmp.toAbsolutePath();
+        Process process = Runtime.getRuntime().exec(command);
+
+        // Wait for the process to complete
+        int exitCode;
+        try {
+            exitCode = process.waitFor();
+        } catch (InterruptedException e) {
+            throw new IOException("Error waiting for the process to complete.", e);
+        }
+
+        // Check the exit code
+        if (exitCode != 0) {
+            throw new IOException("Error executing the command. Exit code: " + exitCode);
+        }
+
+        // Create PGS object, with the common fields
+        CommonPolygenicScore pgs = new CommonPolygenicScore();
+        pgs.setId(pgsId);
+        pgs.setSource(source);
+        pgs.setVersion(version);
+
+        String line;
+        String[] field;
+        BufferedReader br;
+        // PGSxxxxx_metadata_publications.csv
+        br = FileUtils.newBufferedReader(tmp.resolve(pgsId + "_metadata_publications.csv"));
+        // Skip first line
+        line = br.readLine();
+        while ((line = br.readLine()) != null) {
+            // 0                                1              2       3              4                  5              6
+            // PGS Publication/Study (PGP) ID   First Author   Title   Journal Name   Publication Date   Release Date   Authors
+            // 7                                 8
+            // digital object identifier (doi)   PubMed ID (PMID)
+            StringReader stringReader = new StringReader(line);
+            CSVParser csvParser = CSVFormat.DEFAULT.parse(stringReader);
+            CSVRecord strings = csvParser.getRecords().get(0);
+            pgs.getPubmedRefs().add(new PubmedReference(strings.get(8), strings.get(2), strings.get(3), strings.get(4), null));
+        }
+
+        // PGSxxxxx_metadata_efo_traits.csv
+        br = FileUtils.newBufferedReader(tmp.resolve(pgsId + "_metadata_efo_traits.csv"));
+        // Skip first line
+        line = br.readLine();
+        while ((line = br.readLine()) != null) {
+            // 0                   1                      2                            3
+            // Ontology Trait ID   Ontology Trait Label   Ontology Trait Description   Ontology URL
+            StringReader stringReader = new StringReader(line);
+            CSVParser csvParser = CSVFormat.DEFAULT.parse(stringReader);
+            CSVRecord strings = csvParser.getRecords().get(0);
+            pgs.getTraits().add(new OntologyTermAnnotation(strings.get(0), strings.get(1), strings.get(2), "EFO", strings.get(3),
+                    new HashMap<>()));
+        }
+
+        // PGSxxxxx_metadata_scores.csv
+        br = FileUtils.newBufferedReader(tmp.resolve(pgsId + "_metadata_scores.csv"));
+        // Skip first line
+        line = br.readLine();
+        while ((line = br.readLine()) != null) {
+            // 0                          1          2                3                             4
+            // Polygenic Score (PGS) ID   PGS Name   Reported Trait   Mapped Trait(s) (EFO label)   Mapped Trait(s) (EFO ID)
+            // 5                        6                                             7                       8
+            // PGS Development Method   PGS Development Details/Relevant Parameters   Original Genome Build   Number of Variants
+            // 9                             10                       11                         12                   13
+            // Number of Interaction Terms   Type of Variant Weight   PGS Publication (PGP) ID   Publication (PMID)   Publication (doi)
+            // 14                                                 15
+            // Score and results match the original publication   Ancestry Distribution (%) - Source of Variant Associations (GWAS)
+            // 16                                                       17                                           18         19
+            // Ancestry Distribution (%) - Score Development/Training   Ancestry Distribution (%) - PGS Evaluation   FTP link   Release Date
+            // 19
+            // License/Terms of Use
+            StringReader stringReader = new StringReader(line);
+            CSVParser csvParser = CSVFormat.DEFAULT.parse(stringReader);
+            CSVRecord strings = csvParser.getRecords().get(0);
+            // Sanity check
+            if (!pgsId.equals(strings.get(0))) {
+                throw new CellBaseException("Mismatch PGS ID when parsing file " + pgsId + "_metadata_scores.csv");
+            }
+            if (StringUtils.isNotEmpty(pgs.getName())) {
+                throw new CellBaseException("More than one PGS in file " + pgsId + "_metadata_scores.csv");
+            }
+            pgs.setName(strings.get(1));
+        }
+
+        // TODO: PGSxxxxx_metadata_score_development_samples.csv
+        // 0                          1                          2                       3                 4
+        // Polygenic Score (PGS) ID   Stage of PGS Development   Number of Individuals   Number of Cases   Number of Controls
+        // 5                                      6            7                         8
+        // Percent of Participants Who are Male   Sample Age   Broad Ancestry Category   "Ancestry (e.g. French, Chinese)"
+        // 9                        10                                11                                  12
+        // Country of Recruitment   Additional Ancestry Description   Phenotype Definitions and Methods   Followup Time
+        // 13                                13                        14           15          16
+        // GWAS Catalog Study ID (GCST...)   Source PubMed ID (PMID)   Source DOI   Cohort(s)   Additional Sample/Cohort Information
+
+        // PGSxxxxx_metadata_performance_metrics.csv
+        br = FileUtils.newBufferedReader(tmp.resolve(pgsId + "_metadata_performance_metrics.csv"));
+        // Skip first line
+        line = br.readLine();
+        while ((line = br.readLine()) != null) {
+            // 0                                 1                 2                      3                          4
+            // PGS Performance Metric (PPM) ID   Evaluated Score   PGS Sample Set (PSS)   PGS Publication (PGP) ID   Reported Trait
+            // 5                                  6                                             7                    8
+            // Covariates Included in the Model   PGS Performance: Other Relevant Information   Publication (PMID)   Publication (doi)
+            // 9                   10                11     12
+            // Hazard Ratio (HR)   Odds Ratio (OR)   Beta   Area Under the Receiver-Operating Characteristic Curve (AUROC)
+            // 13                                14
+            // Concordance Statistic (C-index)   Other Metric(s)
+
+            StringReader stringReader = new StringReader(line);
+            CSVParser csvParser = CSVFormat.DEFAULT.parse(stringReader);
+            CSVRecord strings = csvParser.getRecords().get(0);
+
+            // Sanity check
+            if (!pgsId.equals(strings.get(1))) {
+                continue;
+            }
+
+            Map<String, String> values = new HashMap<>();
+            if (StringUtils.isNotEmpty(strings.get(2))) {
+                values.put(SAMPLE_SET_KEY, strings.get(2));
+            }
+            if (StringUtils.isNotEmpty(strings.get(9))) {
+                values.put(HAZARD_RATIO_KEY, strings.get(9));
+            }
+            if (StringUtils.isNotEmpty(strings.get(10))) {
+                values.put(ODDS_RATIO_KEY, strings.get(10));
+            }
+            if (StringUtils.isNotEmpty(strings.get(11))) {
+                values.put(BETA_KEY, strings.get(11));
+            }
+            if (StringUtils.isNotEmpty(strings.get(12))) {
+                values.put(AUROC_KEY, strings.get(12));
+            }
+            if (StringUtils.isNotEmpty(strings.get(13))) {
+                values.put(CINDEX_KEY, strings.get(13));
+            }
+            if (StringUtils.isNotEmpty(strings.get(14))) {
+                values.put(OTHER_KEY, strings.get(14));
+            }
+            pgs.getValues().add(values);
+        }
+
+        // TODO: PGSxxxxx_metadata_evaluation_sample_sets.csv
+        // 0                      1                          2                       3                 4
+        // PGS Sample Set (PSS)   Polygenic Score (PGS) ID   Number of Individuals   Number of Cases   Number of Controls
+        // 5                                      6                                    7
+        // Percent of Participants Who are Male   Sample Age,Broad Ancestry Category   "Ancestry (e.g.French, Chinese)"
+        // 8                        9                                 10                                  11
+        // Country of Recruitment   Additional Ancestry Description   Phenotype Definitions and Methods   Followup Time
+        // 12                                13                        14           15          16
+        // GWAS Catalog Study ID (GCST...)   Source PubMed ID (PMID)   Source DOI   Cohort(s)   Additional Sample/Cohort Information
+
+        // PGSxxxxx_metadata_cohorts.csv
+        br = FileUtils.newBufferedReader(tmp.resolve(pgsId + "_metadata_cohorts.csv"));
+        // Skip first line
+        line = br.readLine();
+        while ((line = br.readLine()) != null) {
+            // 0           1             2
+            // Cohort ID   Cohort Name   Previous/other/additional names
+            StringReader stringReader = new StringReader(line);
+            CSVParser csvParser = CSVFormat.DEFAULT.parse(stringReader);
+            CSVRecord strings = csvParser.getRecords().get(0);
+            pgs.getCohorts().add(new PgsCohort(strings.get(0), strings.get(1), strings.get(2)));
+        }
+
+        // Create PGS object, with the common fields
+        bw.write(jsonObjectWriter.writeValueAsString(pgs));
+        bw.write("\n");
+
+        // Clean tmp folder
+        for (File tmpFile : tmp.toFile().listFiles()) {
+            tmpFile.delete();
+        }
+    }
+
+    private void saveVariantPolygenicScore(String line, Map<String, Integer> columnPos, String pgsId)
+            throws RocksDBException, IOException {
+        String chrom;
+        int position;
+        String effectAllele;
+        String otherAllele;
+
+        String[] field = line.split("\t", -1);
+
+        if (columnPos.containsKey(HM_CHR_COL)) {
+            chrom = field[columnPos.get(HM_CHR_COL)];
+            if (!VALID_CHROMOSOMES.contains(chrom)) {
+                // Only chromosomes are processed; no contigs, e.g.: 8_KI270821v1_alt, 11_KI270927v1_alt, 12_GL877875v1_alt,...
+                return;
+            }
+        } else {
+            logger.warn("Missing field '{}', skipping line: {}", HM_CHR_COL, line);
+            return;
+        }
+        if (columnPos.containsKey(HM_POS_COL)) {
+            try {
+                position = Integer.parseInt(field[columnPos.get(HM_POS_COL)]);
+            } catch (NumberFormatException e) {
+                logger.warn("Invalid field '{}' (value = {}), skipping line: {}", HM_POS_COL, field[columnPos.get(HM_POS_COL)], line);
+                return;
+            }
+        } else {
+            logger.warn("Missing field '{}', skipping line: {}", HM_POS_COL, line);
+            return;
+        }
+        if (columnPos.containsKey(EFFECT_ALLELE_COL)) {
+            effectAllele = field[columnPos.get(EFFECT_ALLELE_COL)];
+        } else {
+            logger.warn("Missing field '{}', skipping line: {}", EFFECT_ALLELE_COL, line);
+            return;
+        }
+        if (columnPos.containsKey(HM_INFEROTHERALLELE_COL) && StringUtils.isNotEmpty(field[columnPos.get(HM_INFEROTHERALLELE_COL)])) {
+            otherAllele = field[columnPos.get(HM_INFEROTHERALLELE_COL)];
+        } else if (columnPos.containsKey(OTHER_ALLELE_COL)) {
+            otherAllele = field[columnPos.get(OTHER_ALLELE_COL)];
+        } else {
+            logger.warn("Missing fields '{}' and '{}' (at least one is mandatory), skipping line: {}", HM_INFEROTHERALLELE_COL,
+                    OTHER_ALLELE_COL, line);
+            return;
+        }
+
+        // Create polygenic score
+        Map<String, String> values = new HashMap<>();
+        if (columnPos.containsKey(EFFECT_WEIGHT_COL)) {
+            values.put(EFFECT_WEIGHT_KEY, field[columnPos.get(EFFECT_WEIGHT_COL)]);
+        }
+        if (columnPos.containsKey(ALLELEFREQUENCY_EFFECT_COL)) {
+            values.put(ALLELE_FREQUENCY_EFFECT_KEY, field[columnPos.get(ALLELEFREQUENCY_EFFECT_COL)]);
+        }
+        if (columnPos.containsKey(ODDS_RATIO_COL)) {
+            values.put(ODDS_RATIO_KEY, field[columnPos.get(ODDS_RATIO_COL)]);
+        }
+        if (columnPos.containsKey(HAZARD_RATIO_COL)) {
+            values.put(HAZARD_RATIO_KEY, field[columnPos.get(HAZARD_RATIO_COL)]);
+        }
+        if (columnPos.containsKey(LOCUS_NAME_COL)) {
+            values.put(LOCUS_NAME_KEY, field[columnPos.get(LOCUS_NAME_COL)]);
+        }
+        if (columnPos.containsKey(IS_HAPLOTYPE_COL)) {
+            values.put(IS_HAPLOTYPE_KEY, field[columnPos.get(IS_HAPLOTYPE_COL)]);
+        }
+        if (columnPos.containsKey(IS_DIPLOTYPE_COL)) {
+            values.put(IS_DIPLOTYPE_KEY, field[columnPos.get(IS_DIPLOTYPE_COL)]);
+        }
+        if (columnPos.containsKey(IMPUTATION_METHOD_COL)) {
+            values.put(IMPUTATION_METHOD_KEY, field[columnPos.get(IMPUTATION_METHOD_COL)]);
+        }
+        if (columnPos.containsKey(VARIANT_DESCRIPTION_COL)) {
+            values.put(VARIANT_DESCRIPTION_KEY, field[columnPos.get(VARIANT_DESCRIPTION_COL)]);
+        }
+        if (columnPos.containsKey(INCLUSION_CRITERIA_COL)) {
+            values.put(INCLUSION_CRITERIA_KEY, field[columnPos.get(INCLUSION_CRITERIA_COL)]);
+        }
+        if (columnPos.containsKey(IS_INTERACTION_COL)) {
+            values.put(IS_INTERACTION_KEY, field[columnPos.get(IS_INTERACTION_COL)]);
+        }
+        if (columnPos.containsKey(IS_DOMINANT_COL)) {
+            values.put(IS_DOMINANT_KEY, field[columnPos.get(IS_DOMINANT_COL)]);
+        }
+        if (columnPos.containsKey(IS_RECESSIVE_COL)) {
+            values.put(IS_RECESSIVE_KEY, field[columnPos.get(IS_RECESSIVE_COL)]);
+        }
+        if (columnPos.containsKey(DOSAGE_0_WEIGHT_COL)) {
+            values.put(DOSAGE_0_WEIGHT_KEY, field[columnPos.get(DOSAGE_0_WEIGHT_COL)]);
+        }
+        if (columnPos.containsKey(DOSAGE_1_WEIGHT_COL)) {
+            values.put(DOSAGE_1_WEIGHT_KEY, field[columnPos.get(DOSAGE_1_WEIGHT_COL)]);
+        }
+        if (columnPos.containsKey(DOSAGE_2_WEIGHT_COL)) {
+            values.put(DOSAGE_2_WEIGHT_KEY, field[columnPos.get(DOSAGE_2_WEIGHT_COL)]);
+        }
+
+        // Creating and/or updating variant polygenic score
+        VariantPolygenicScore varPgs;
+        RocksDB rdb = getRocksDB(chrom);
+        String key = chrom + ":" + position + ":" + otherAllele + ":" + effectAllele;
+        byte[] dbContent = rdb.get(key.getBytes());
+        if (dbContent == null) {
+            varPgs = new VariantPolygenicScore(chrom, position, otherAllele, effectAllele,
+                    Collections.singletonList(new PolygenicScore(pgsId, values)));
+        } else {
+            varPgs = varPgsReader.readValue(dbContent);
+            varPgs.getPolygenicScores().add(new PolygenicScore(pgsId, values));
+        }
+        rdb.put(key.getBytes(), jsonObjectWriter.writeValueAsBytes(varPgs));
+    }
+
+    private void serializeRDB() throws IOException {
+        for (Map.Entry<String, Object[]> entry : rdbConnectionPerChrom.entrySet()) {
+            RocksDB rdb = (RocksDB) entry.getValue()[0];
+            Options dbOption = (Options) entry.getValue()[1];
+            String dbLocation = (String) entry.getValue()[2];
+
+            // DO NOT change the name of the rocksIterator variable - for some unexplainable reason Java VM crashes if it's
+            // named "iterator"
+            RocksIterator rocksIterator = rdb.newIterator();
+
+            logger.info("Reading from RocksDB index ({}) and serializing to {}.json.gz", dbLocation,
+                    serializer.getOutdir().resolve(serializer.getFileName()));
+            int counter = 0;
+            for (rocksIterator.seekToFirst(); rocksIterator.isValid(); rocksIterator.next()) {
+                VariantPolygenicScore varPgs = varPgsReader.readValue(rocksIterator.value());
+                serializer.serialize(varPgs);
+                counter++;
+                if (counter % 10000 == 0) {
+                    logger.info("{} written", counter);
+                }
+            }
+            closeIndex(rdb, dbOption, dbLocation);
+        }
+    }
+
+    private void closeIndex(RocksDB rdb, Options dbOption, String dbLocation) throws IOException {
+        if (rdb != null) {
+            rdb.close();
+        }
+        if (dbOption != null) {
+            dbOption.dispose();
+        }
+        if (dbLocation != null && Files.exists(Paths.get(dbLocation))) {
+            org.apache.commons.io.FileUtils.deleteDirectory(new File(dbLocation));
+        }
+    }
+
+    private Object[] getDBConnection(String dbLocation, boolean forceCreate) {
+        boolean indexingNeeded = forceCreate || !Files.exists(Paths.get(dbLocation));
+        // a static method that loads the RocksDB C++ library.
+        RocksDB.loadLibrary();
+        // the Options class contains a set of configurable DB options
+        // that determines the behavior of a database.
+        Options options = new Options().setCreateIfMissing(true);
+
+//        options.setMaxBackgroundCompactions(4);
+//        options.setMaxBackgroundFlushes(1);
+//        options.setCompressionType(CompressionType.NO_COMPRESSION);
+//        options.setMaxOpenFiles(-1);
+//        options.setIncreaseParallelism(4);
+//        options.setCompactionStyle(CompactionStyle.LEVEL);
+//        options.setLevelCompactionDynamicLevelBytes(true);
+
+        RocksDB db = null;
+        try {
+            // a factory method that returns a RocksDB instance
+            if (indexingNeeded) {
+                db = RocksDB.open(options, dbLocation);
+            } else {
+                db = RocksDB.openReadOnly(options, dbLocation);
+            }
+            // do something
+        } catch (RocksDBException e) {
+            // do some error handling
+            e.printStackTrace();
+            System.exit(1);
+        }
+
+        return new Object[]{db, options, dbLocation, indexingNeeded};
+    }
+
+    private Object[] getRocksDBConnection(String chrom) {
+        if (!rdbConnectionPerChrom.containsKey(chrom) || rdbConnectionPerChrom.get(chrom) == null) {
+            Object[] dbConnection = getDBConnection(pgsDir.resolve("rdb-" + chrom + ".idx").toString(), true);
+            rdbConnectionPerChrom.put(chrom, dbConnection);
+        }
+        return rdbConnectionPerChrom.get(chrom);
+    }
+
+    private RocksDB getRocksDB(String chrom) {
+        return (RocksDB) getRocksDBConnection(chrom)[0];
+    }
+}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/ProteinBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/ProteinBuilder.java
index 0369a0e6aa..d8246241e4 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/ProteinBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/ProteinBuilder.java
@@ -21,6 +21,8 @@
 import com.fasterxml.jackson.databind.ObjectWriter;
 import org.opencb.biodata.formats.protein.uniprot.UniProtParser;
 import org.opencb.biodata.formats.protein.uniprot.v202003jaxb.*;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.models.DataSource;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
 import org.opencb.commons.utils.FileUtils;
 import org.rocksdb.Options;
@@ -34,54 +36,71 @@
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.IOException;
+import java.io.PrintWriter;
 import java.math.BigInteger;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
+
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
 public class ProteinBuilder extends CellBaseBuilder {
 
-    private Path uniprotFilesDir;
-    private Path interproFilePath;
+    private Path proteinPath;
     private String species;
 
-    private Map<String, Entry> proteinMap;
-
     protected Logger logger = LoggerFactory.getLogger(this.getClass());
 
-    public ProteinBuilder(Path uniprotFilesDir, String species, CellBaseSerializer serializer) {
-        this(uniprotFilesDir, null, species, serializer);
-    }
-
-    public ProteinBuilder(Path uniprotFilesDir, Path interproFilePath, String species, CellBaseSerializer serializer) {
+    public ProteinBuilder(Path proteinPath, String species, CellBaseSerializer serializer) {
         super(serializer);
 
-        this.uniprotFilesDir = uniprotFilesDir;
-        this.interproFilePath = interproFilePath;
+        this.proteinPath = proteinPath;
         this.species = species;
     }
 
     @Override
-    public void parse() throws IOException {
+    public void parse() throws CellBaseException, IOException {
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(PROTEIN_DATA));
+
+        // Sanity check
+        checkDirectory(proteinPath, getDataName(PROTEIN_DATA));
+
+        // Check UniProt file
+        DataSource dataSource = dataSourceReader.readValue(proteinPath.resolve(getDataVersionFilename(UNIPROT_DATA)).toFile());
+        List<File> uniProtFiles = checkFiles(dataSource, proteinPath, getDataCategory(UNIPROT_DATA) + "/" + getDataName(UNIPROT_DATA));
+        if (uniProtFiles.size() != 1) {
+            throw new CellBaseException("Only one " + getDataName(UNIPROT_DATA) + " file is expected, but currently there are "
+                    + uniProtFiles.size() + " files");
+        }
 
-        if (uniprotFilesDir == null || !Files.exists(uniprotFilesDir)) {
-            throw new IOException("File '" + uniprotFilesDir + "' not valid");
+        // Check InterPro file
+        dataSource = dataSourceReader.readValue(proteinPath.resolve(getDataVersionFilename(INTERPRO_DATA)).toFile());
+        List<File> interProFiles = checkFiles(dataSource, proteinPath, getDataCategory(INTERPRO_DATA) + "/" + getDataName(INTERPRO_DATA));
+        if (interProFiles.size() != 1) {
+            throw new CellBaseException("Only one " + getDataName(INTERPRO_DATA) + " file is expected, but currently there are "
+                    + interProFiles.size() + " files");
         }
 
-        RocksDB rocksDb = getDBConnection();
+        // Prepare UniProt data by splitting data in chunks
+        Path uniProtChunksPath = serializer.getOutdir().resolve(UNIPROT_CHUNKS_SUBDIRECTORY);
+        logger.info("Split {} file {} into chunks at {}", getDataName(UNIPROT_DATA), uniProtFiles.get(0).getName(), uniProtChunksPath);
+        Files.createDirectories(uniProtChunksPath);
+        splitUniprot(proteinPath.resolve(uniProtFiles.get(0).getName()), uniProtChunksPath);
+
+        // Prepare RocksDB
+        RocksDB rocksDb = getDBConnection(uniProtChunksPath);
         ObjectMapper mapper = new ObjectMapper();
         mapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);
         ObjectWriter jsonObjectWriter = mapper.writerFor(Entry.class);
 
-        proteinMap = new HashMap<>(30000);
-//        UniProtParser up = new UniProtParser();
+        Map<String, Entry> proteinMap = new HashMap<>(30000);
+
+        // Parsing files
         try {
-            File[] files = uniprotFilesDir.toFile().listFiles((dir, name) -> name.endsWith(".xml") || name.endsWith(".xml.gz"));
+            File[] files = uniProtChunksPath.toFile().listFiles((dir, name) -> name.endsWith(".xml") || name.endsWith(".xml.gz"));
 
             for (File file : files) {
+                logger.info(PARSING_LOG_MESSAGE, file);
                 Uniprot uniprot = (Uniprot) UniProtParser.loadXMLInfo(file.toString(), UniProtParser.UNIPROT_CONTEXT);
 
                 for (Entry entry : uniprot.getEntry()) {
@@ -89,16 +108,16 @@ public void parse() throws IOException {
                     for (OrganismNameType organismNameType : entry.getOrganism().getName()) {
                         entryOrganism = organismNameType.getValue();
                         if (entryOrganism.equals(species)) {
-//                            proteinMap.put(entry.getAccession().get(0), entry);
                             rocksDb.put(entry.getAccession().get(0).getBytes(), jsonObjectWriter.writeValueAsBytes(entry));
                         }
                     }
                 }
+                logger.info(PARSING_DONE_LOG_MESSAGE, file);
             }
             logger.debug("Number of proteins stored in map: '{}'", proteinMap.size());
 
-            if (interproFilePath != null && Files.exists(interproFilePath)) {
-                BufferedReader interproBuffereReader = FileUtils.newBufferedReader(interproFilePath);
+            logger.info(PARSING_LOG_MESSAGE, interProFiles.get(0));
+            try (BufferedReader interproBuffereReader = FileUtils.newBufferedReader(interProFiles.get(0).toPath())) {
                 Set<String> hashSet = new HashSet<>(proteinMap.keySet());
                 Set<String> visited = new HashSet<>(30000);
 
@@ -114,7 +133,6 @@ public void parse() throws IOException {
                         iprAdded = false;
                         BigInteger start = BigInteger.valueOf(Integer.parseInt(fields[4]));
                         BigInteger end = BigInteger.valueOf(Integer.parseInt(fields[5]));
-//                        for (FeatureType featureType : proteinMap.get(fields[0]).getFeature()) {
                         byte[] bytes = rocksDb.get(fields[0].getBytes());
                         Entry entry = mapper.readValue(bytes, Entry.class);
                         for (FeatureType featureType : entry.getFeature()) {
@@ -145,7 +163,6 @@ public void parse() throws IOException {
                             locationType.setEnd(positionType2);
                             featureType.setLocation(locationType);
 
-//                            proteinMap.get(fields[0]).getFeature().add(featureType);
                             bytes = rocksDb.get(fields[0].getBytes());
                             entry = mapper.readValue(bytes, Entry.class);
                             entry.getFeature().add(featureType);
@@ -158,11 +175,13 @@ public void parse() throws IOException {
                     }
 
                     if (++numInterProLinesProcessed % 10000000 == 0) {
-                        logger.debug("{} InterPro lines processed. {} unique proteins processed",
-                                numInterProLinesProcessed, numUniqueProteinsProcessed);
+                        logger.debug("{} {} lines processed. {} unique proteins processed", numInterProLinesProcessed,
+                                getDataName(INTERPRO_DATA), numUniqueProteinsProcessed);
                     }
                 }
-                interproBuffereReader.close();
+                logger.info(PARSING_DONE_LOG_MESSAGE, interProFiles.get(0));
+            } catch (IOException e) {
+                throw new CellBaseException("Error parsing " + getDataName(INTERPRO_DATA) + " file: " + interProFiles.get(0), e);
             }
 
             // Serialize and save results
@@ -173,24 +192,70 @@ public void parse() throws IOException {
             }
 
             rocksDb.close();
-        } catch (JAXBException | RocksDBException e) {
-            e.printStackTrace();
+        } catch (JAXBException | RocksDBException | IOException e) {
+            throw new CellBaseException("Error parsing " + getDataName(PROTEIN_DATA) + " files", e);
         }
+
+        logger.info(BUILDING_DONE_LOG_MESSAGE, getDataName(PROTEIN_DATA));
     }
 
-    private RocksDB getDBConnection() {
-        // a static method that loads the RocksDB C++ library.
+    private RocksDB getDBConnection(Path uniProtChunksPath) throws CellBaseException {
+        // A static method that loads the RocksDB C++ library
         RocksDB.loadLibrary();
-        // the Options class contains a set of configurable DB options
-        // that determines the behavior of a database.
+        // The Options class contains a set of configurable DB options that determines the behavior of a database
         Options options = new Options().setCreateIfMissing(true);
         try {
-            return RocksDB.open(options, uniprotFilesDir.resolve("integration.idx").toString());
+            return RocksDB.open(options, uniProtChunksPath.resolve("integration.idx").toString());
         } catch (RocksDBException e) {
-            // do some error handling
-            e.printStackTrace();
-            System.exit(1);
+            throw new CellBaseException("Error preparing RocksDB", e);
+        }
+    }
+
+    private void splitUniprot(Path uniprotFilePath, Path splitOutdirPath) throws IOException {
+        PrintWriter pw = null;
+        try (BufferedReader br = FileUtils.newBufferedReader(uniprotFilePath)) {
+            StringBuilder header = new StringBuilder();
+            boolean beforeEntry = true;
+            boolean inEntry = false;
+            int count = 0;
+            int chunk = 0;
+            String line;
+            while ((line = br.readLine()) != null) {
+                if (line.trim().startsWith("<entry ")) {
+                    inEntry = true;
+                    beforeEntry = false;
+                    if (count % 10000 == 0) {
+                        pw = new PrintWriter(Files.newOutputStream(splitOutdirPath.resolve("chunk_" + chunk + ".xml").toFile().toPath()));
+                        pw.println(header.toString().trim());
+                    }
+                    count++;
+                }
+
+                if (beforeEntry) {
+                    header.append(line).append("\n");
+                }
+
+                if (inEntry) {
+                    pw.println(line);
+                }
+
+                if (line.trim().startsWith("</entry>")) {
+                    inEntry = false;
+                    if (count % 10000 == 0) {
+                        if (pw != null) {
+                            pw.print("</uniprot>");
+                            pw.close();
+                        }
+                        chunk++;
+                    }
+                }
+            }
+            pw.print("</uniprot>");
+            pw.close();
+        } finally {
+            if (pw != null) {
+                pw.close();
+            }
         }
-        return null;
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PubMedBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PubMedBuilder.java
index 8aba7c9dda..348d22a07d 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PubMedBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PubMedBuilder.java
@@ -16,63 +16,71 @@
 
 package org.opencb.cellbase.lib.builders;
 
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.ObjectWriter;
 import org.opencb.biodata.formats.pubmed.PubMedParser;
 import org.opencb.biodata.formats.pubmed.v233jaxb.PubmedArticle;
 import org.opencb.biodata.formats.pubmed.v233jaxb.PubmedArticleSet;
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
+import org.opencb.cellbase.lib.download.PubMedDownloadManager;
 import org.opencb.commons.utils.FileUtils;
-import org.slf4j.LoggerFactory;
 
-import java.io.File;
+import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.List;
 
+import static org.opencb.cellbase.lib.EtlCommons.PUBMED_DATA;
+import static org.opencb.cellbase.lib.EtlCommons.getDataName;
+
 public class PubMedBuilder extends CellBaseBuilder {
 
-    private Path pubmedDir;
-    private CellBaseFileSerializer fileSerializer;
+    private Path pubMedDownloadPath;
+    private CellBaseConfiguration configuration;
 
-    public PubMedBuilder(Path pubmedDir, CellBaseFileSerializer serializer) {
+    public PubMedBuilder(Path pubMedDownloadPath, CellBaseFileSerializer serializer, CellBaseConfiguration configuration) {
         super(serializer);
-
-        this.fileSerializer = serializer;
-        this.pubmedDir = pubmedDir;
-
-        logger = LoggerFactory.getLogger(PubMedBuilder.class);
+        this.pubMedDownloadPath = pubMedDownloadPath;
+        this.configuration = configuration;
     }
 
     @Override
     public void parse() throws Exception {
-        // Check input folder
-        FileUtils.checkPath(pubmedDir);
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(PUBMED_DATA));
 
-        logger.info("Parsing PubMed files...");
+        // Check input folder
+        FileUtils.checkPath(pubMedDownloadPath);
 
-        for (File file : pubmedDir.toFile().listFiles()) {
-            if (file.isFile() && (file.getName().endsWith("gz") || file.getName().endsWith("xml"))) {
-                String name = file.getName().split("\\.")[0];
+        // Check PubMed files before parsing them
+        List<String> pubMedFilenames = PubMedDownloadManager.getPubMedFilenames(configuration.getDownload().getPubmed());
+        for (String pubMedFilename : pubMedFilenames) {
+            Path pubMedPath = pubMedDownloadPath.resolve(pubMedFilename);
+            if (!Files.exists(pubMedPath)) {
+                throw new CellBaseException("Expected PubMed file " + pubMedFilename + ", but it was not found at " + pubMedDownloadPath);
+            }
+        }
 
+        for (String pubMedFilename : pubMedFilenames) {
+            Path pubMedPath = pubMedDownloadPath.resolve(pubMedFilename);
+            String basename = pubMedFilename.split("\\.")[0];
 
-                ObjectWriter objectWriter = new ObjectMapper().writerFor(PubmedArticle.class);
-                PubmedArticleSet pubmedArticleSet = (PubmedArticleSet) PubMedParser.loadXMLInfo(file.getAbsolutePath());
+            PubmedArticleSet pubmedArticleSet = (PubmedArticleSet) PubMedParser.loadXMLInfo(pubMedPath.toAbsolutePath().toString());
 
-                List<Object> objects = pubmedArticleSet.getPubmedArticleOrPubmedBookArticle();
-                logger.info("Parsing PubMed file {} of {} articles ...", file.getName(), objects.size());
-                int counter = 0;
-                for (Object object : objects) {
-                    PubmedArticle pubmedArticle = (PubmedArticle) object;
-                    fileSerializer.serialize(pubmedArticle, name);
-                    if (++counter % 2000 == 0) {
-                        logger.info("\t\t" + counter + " articles");
-                    }
+            List<Object> objects = pubmedArticleSet.getPubmedArticleOrPubmedBookArticle();
+            logger.info(PARSING_LOG_MESSAGE, pubMedPath);
+            int counter = 0;
+            for (Object object : objects) {
+                PubmedArticle pubmedArticle = (PubmedArticle) object;
+                ((CellBaseFileSerializer) serializer).serialize(pubmedArticle, basename);
+                if (++counter % 2000 == 0) {
+                    logger.info("{} articles", counter);
                 }
-                fileSerializer.close();
-                logger.info("\t\tDone: " + counter + " articles.");
             }
+            serializer.close();
+
+            String logMsg = pubMedPath + " (" + counter + " articles)";
+            logger.info(PARSING_DONE_LOG_MESSAGE, logMsg);
         }
 
-        logger.info("Parsing PubMed files finished.");
+        logger.info(BUILDING_DONE_LOG_MESSAGE, getDataName(PUBMED_DATA));
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilder.java
index 48b0cd1d0d..8f03a801f2 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilder.java
@@ -24,26 +24,43 @@
 import org.opencb.cellbase.core.ParamConstants;
 import org.opencb.cellbase.core.config.SpeciesConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.models.DataSource;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
 import org.rocksdb.RocksDBException;
 
+import java.io.File;
 import java.io.IOException;
+import java.nio.file.Files;
 import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.*;
+import java.util.stream.Collectors;
+
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
 public class RefSeqGeneBuilder extends CellBaseBuilder {
 
+    private Path downloadPath;
+
     private Map<String, Transcript> transcriptDict;
     private Map<String, Exon> exonDict;
     private Path gtfFile;
     private Path fastaFile;
-    private Path proteinFastaFile, cdnaFastaFile;
-    private Path maneFile, lrgFile, disgenetFile, hpoFile, geneDrugFile, miRTarBaseFile;
-    private Path cancerGeneCensus, cancerHotspot;
-    private Path tso500File, eglhHaemOncFile;
+    private Path proteinFastaFile;
+    private Path cdnaFastaFile;
+    private Path maneFile;
+    private Path lrgFile;
+    private Path disgenetFile;
+    private Path hpoFile;
+    private Path geneDrugFile;
+    private Path miRTarBaseFile;
+    private Path cancerGeneCensus;
+    private Path cancerHotspot;
+    private Path tso500File;
+    private Path eglhHaemOncFile;
     private SpeciesConfiguration speciesConfiguration;
     private static final Map<String, String> REFSEQ_CHROMOSOMES = new HashMap<>();
-    private final String status = "KNOWN";
+    private static final String KNOWN_STATUS = "KNOWN";
     private static final String SOURCE = ParamConstants.QueryParams.REFSEQ.key();
     private Gene gene = null;
     private Transcript transcript = null;
@@ -52,85 +69,95 @@ public class RefSeqGeneBuilder extends CellBaseBuilder {
     // sometimes there are two stop codons (eg NM_018159.4). Only parse the first one, skip the second
     private boolean seenStopCodon = false;
 
-
-    public RefSeqGeneBuilder(Path refSeqDirectoryPath, SpeciesConfiguration speciesConfiguration, CellBaseSerializer serializer) {
+    public RefSeqGeneBuilder(Path downloadPath, SpeciesConfiguration speciesConfiguration, CellBaseSerializer serializer) {
         super(serializer);
 
+        this.downloadPath = downloadPath;
         this.speciesConfiguration = speciesConfiguration;
 
-        getGtfFileFromDirectoryPath(refSeqDirectoryPath);
-        getFastaFileFromDirectoryPath(refSeqDirectoryPath);
-        getProteinFastaFileFromDirectoryPath(refSeqDirectoryPath);
-        getCdnaFastaFileFromDirectoryPath(refSeqDirectoryPath);
-        setAnnotationFiles(refSeqDirectoryPath);
-
         transcriptDict = new HashMap<>(250000);
         exonDict = new HashMap<>(8000000);
     }
 
-    private void setAnnotationFiles(Path refSeqDirectoryPath) {
-        Path geneDirectoryPath = refSeqDirectoryPath.getParent().resolve("gene");
-        maneFile = geneDirectoryPath.resolve("MANE.GRCh38.v1.0.summary.txt.gz");
-        lrgFile = geneDirectoryPath.resolve("list_LRGs_transcripts_xrefs.txt");
-        geneDrugFile = geneDirectoryPath.resolve("dgidb.tsv");
-        disgenetFile = geneDirectoryPath.resolve("all_gene_disease_associations.tsv.gz");
-        hpoFile = geneDirectoryPath.resolve("phenotype_to_genes.txt");
-        cancerGeneCensus = geneDirectoryPath.resolve("cancer-gene-census.tsv");
-        cancerHotspot = geneDirectoryPath.resolve("hotspots_v2.xls");
-        tso500File = geneDirectoryPath.resolve("TSO500_transcripts.txt");
-        eglhHaemOncFile = geneDirectoryPath.resolve("EGLH_HaemOnc_transcripts.txt");
-        miRTarBaseFile = refSeqDirectoryPath.getParent().resolve("regulation/hsa_MTI.xlsx");
-    }
-
-    private void getGtfFileFromDirectoryPath(Path refSeqDirectoryPath) {
-        for (String fileName : refSeqDirectoryPath.toFile().list()) {
-            if (fileName.endsWith(".gtf") || fileName.endsWith(".gtf.gz")) {
-                gtfFile = refSeqDirectoryPath.resolve(fileName);
-                break;
-            }
+    public void check() throws Exception {
+        if (checked) {
+            return;
         }
-    }
 
-    private void getFastaFileFromDirectoryPath(Path refSeqDirectoryPath) {
-        for (String fileName : refSeqDirectoryPath.toFile().list()) {
-            if (fileName.endsWith("genomic.fna") || fileName.endsWith("genomic.fna.gz")) {
-                fastaFile = refSeqDirectoryPath.resolve(fileName);
-                break;
-            }
-        }
-    }
+        String refSeqGeneLabel = getDataName(REFSEQ_DATA) + " " + getDataName(GENE_DATA);
+        logger.info(CHECKING_BEFORE_BUILDING_LOG_MESSAGE, refSeqGeneLabel);
 
-    private void getProteinFastaFileFromDirectoryPath(Path refSeqDirectoryPath) {
-        for (String fileName : refSeqDirectoryPath.toFile().list()) {
-            if (fileName.endsWith(".faa") || fileName.endsWith(".faa.gz")) {
-                proteinFastaFile = refSeqDirectoryPath.resolve(fileName);
-                break;
+        // Sanity check
+        checkDirectory(downloadPath, refSeqGeneLabel);
+        if (!Files.exists(serializer.getOutdir())) {
+            try {
+                Files.createDirectories(serializer.getOutdir());
+            } catch (IOException e) {
+                throw new CellBaseException("Error creating folder " + serializer.getOutdir(), e);
             }
         }
-    }
 
-    private void getCdnaFastaFileFromDirectoryPath(Path refSeqDirectoryPath) {
-        for (String fileName : refSeqDirectoryPath.toFile().list()) {
-            if (fileName.endsWith("cdna.fna") || fileName.endsWith("cdna.fna.gz")) {
-                cdnaFastaFile = refSeqDirectoryPath.resolve(fileName);
-                break;
-            }
-        }
+        // Check RefSeq files
+        List<File> files = checkFiles(refSeqGeneLabel, REFSEQ_DATA, downloadPath, 4);
+        gtfFile = files.stream().filter(f -> f.getName().contains(".gtf")).findFirst().get().toPath();
+        proteinFastaFile = files.stream().filter(f -> f.getName().contains("_protein")).findFirst().get().toPath();
+        cdnaFastaFile = files.stream().filter(f -> f.getName().contains("_rna")).findFirst().get().toPath();
+        fastaFile = files.stream().filter(f -> f.getName().contains("_genomic.fna")).findFirst().get().toPath();
+
+        // Check common files
+        maneFile = checkFiles(MANE_SELECT_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        lrgFile = checkFiles(LRG_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        cancerHotspot = checkFiles(CANCER_HOTSPOT_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        geneDrugFile = checkFiles(DGIDB_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        // hpoFile = checkFiles(HPO_DATA, downloadPath.getParent(), 1);
+        disgenetFile = checkFiles(DISGENET_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        // cancerGeneCensus = ;
+        // tso500File = ;
+        // eglhHaemOncFile = ;
+
+        // Check regulation files
+        // mirtarbase
+        // The downloaded .xlsx file contains errors and it has to be fixed manually
+        logger.info("Checking {} folder and files", getDataName(MIRTARBASE_DATA));
+        Path downloadRegulationPath = downloadPath.getParent().getParent().resolve(REGULATION_DATA);
+        List<String> mirTarBaseFiles = ((DataSource) dataSourceReader.readValue(downloadRegulationPath.resolve(
+                getDataVersionFilename(MIRTARBASE_DATA)).toFile())).getUrls().stream().map(u -> Paths.get(u).getFileName().toString())
+                .collect(Collectors.toList());
+        if (mirTarBaseFiles.size() != 1) {
+            throw new CellBaseException("One " + getDataName(MIRTARBASE_DATA) + " file is expected at " + downloadRegulationPath
+                    + ", but currently there are " + mirTarBaseFiles.size() + " files");
+        }
+        // The hsa_MIT.xlsx is fixed and converted to hsa_MIT.csv manually
+        if (!mirTarBaseFiles.get(0).endsWith(XLSX_EXTENSION)) {
+            throw new CellBaseException("A " + XLSX_EXTENSION + " " + getDataName(MIRTARBASE_DATA) + " file is expected at "
+                    + downloadRegulationPath + ", but currently it is named " + mirTarBaseFiles.get(0));
+        }
+        miRTarBaseFile = downloadRegulationPath.resolve(mirTarBaseFiles.get(0).replace(XLSX_EXTENSION, CSV_EXTENSION));
+        if (!Files.exists(miRTarBaseFile)) {
+            throw new CellBaseException("The " + getDataName(MIRTARBASE_DATA) + " fixed file " + miRTarBaseFile + " does not exist");
+        }
+
+        logger.info(CHECKING_DONE_BEFORE_BUILDING_LOG_MESSAGE, refSeqGeneLabel);
+        checked = true;
     }
 
     public void parse() throws Exception {
+        check();
+
         // Preparing the fasta file for fast accessing
         FastaIndex fastaIndex = null;
         if (fastaFile != null) {
             fastaIndex = new FastaIndex(fastaFile);
         }
 
-        // index protein sequences for later
+        // Index protein sequences for later
+        logger.info("Indexing gene annotation for {} ...", getDataName(REFSEQ_DATA));
         RefSeqGeneBuilderIndexer indexer = new RefSeqGeneBuilderIndexer(gtfFile.getParent());
         indexer.index(maneFile, lrgFile, proteinFastaFile, cdnaFastaFile, geneDrugFile, hpoFile, disgenetFile, miRTarBaseFile,
                 cancerGeneCensus, cancerHotspot, tso500File, eglhHaemOncFile);
+        logger.info("Indexing done for {}", getDataName(REFSEQ_DATA));
 
-        logger.info("Parsing RefSeq gtf...");
+        logger.info(PARSING_LOG_MESSAGE, gtfFile);
         GtfReader gtfReader = new GtfReader(gtfFile);
 
         Gtf gtf;
@@ -164,22 +191,24 @@ public void parse() throws Exception {
             }
         }
 
-        // add xrefs to last transcript
+        // Add xrefs to last transcript
         addXrefs(transcript, geneDbxrefs, exonDbxrefs);
 
-        // last gene must be serialized
+        // Last gene must be serialized
         store();
 
-        // cleaning
+        // Close
         gtfReader.close();
         serializer.close();
         if (fastaIndex != null) {
             fastaIndex.close();
         }
         indexer.close();
+
+        logger.info(PARSING_DONE_LOG_MESSAGE, gtfFile);
     }
 
-    // store right before parsing the previous gene, or the very last gene.
+    // Store right before parsing the previous gene, or the very last gene.
     private void store() {
         serializer.serialize(gene);
         reset();
@@ -235,7 +264,7 @@ private void parseGene(Gtf gtf, String chromosome, RefSeqGeneBuilderIndexer inde
                 null, indexer.getMirnaTargets(geneName), indexer.getCancerGeneCensus(geneName), indexer.getCancerHotspot(geneName));
 
         gene = new Gene(geneId, geneName, chromosome, gtf.getStart(), gtf.getEnd(), gtf.getStrand(), "1", geneBiotype,
-                status, SOURCE, geneDescription, new ArrayList<>(), null, geneAnnotation);
+                KNOWN_STATUS, SOURCE, geneDescription, new ArrayList<>(), null, geneAnnotation);
         geneDbxrefs = parseXrefs(gtf);
     }
 
@@ -567,7 +596,7 @@ private Transcript getTranscript(Gtf gtf, String chromosome, String transcriptId
         if ("mRNA".equals(biotype)) {
             biotype = "protein_coding";
         }
-        transcript = new Transcript(transcriptId, name, chromosome, gtf.getStart(), gtf.getEnd(), gtf.getStrand(), biotype, status,
+        transcript = new Transcript(transcriptId, name, chromosome, gtf.getStart(), gtf.getEnd(), gtf.getStrand(), biotype, KNOWN_STATUS,
                 0, 0, 0, 0, 0,
                 indexer.getCdnaFasta(transcriptId), "", "", "", version, SOURCE,
                 new ArrayList<>(), new ArrayList<>(), new ArrayList<>(), new HashSet<>(), new TranscriptAnnotation());
@@ -644,6 +673,20 @@ private String getSequenceName(String fullSequenceName) {
         return fullSequenceName;
     }
 
+//    private void setAnnotationFiles(Path refSeqDirectoryPath) {
+//        Path geneDirectoryPath = refSeqDirectoryPath.getParent().resolve("gene");
+//        maneFile = geneDirectoryPath.resolve("MANE.GRCh38.v1.0.summary.txt.gz");
+//        lrgFile = geneDirectoryPath.resolve("list_LRGs_transcripts_xrefs.txt");
+//        geneDrugFile = geneDirectoryPath.resolve("dgidb.tsv");
+//        disgenetFile = geneDirectoryPath.resolve("all_gene_disease_associations.tsv.gz");
+//        hpoFile = geneDirectoryPath.resolve("phenotype_to_genes.txt");
+//        cancerGeneCensus = geneDirectoryPath.resolve("cancer-gene-census.tsv");
+//        cancerHotspot = geneDirectoryPath.resolve("hotspots_v2.xls");
+//        tso500File = geneDirectoryPath.resolve("TSO500_transcripts.txt");
+//        eglhHaemOncFile = geneDirectoryPath.resolve("EGLH_HaemOnc_transcripts.txt");
+//        miRTarBaseFile = refSeqDirectoryPath.getParent().resolve("regulation/hsa_MTI.xlsx");
+//    }
+
     static {
         REFSEQ_CHROMOSOMES.put("NC_000001", "1");
         REFSEQ_CHROMOSOMES.put("NC_000002", "2");
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilderIndexer.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilderIndexer.java
index 45520161f5..9aae170ce2 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilderIndexer.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilderIndexer.java
@@ -16,25 +16,16 @@
 
 package org.opencb.cellbase.lib.builders;
 
-import org.apache.commons.lang.StringUtils;
-import org.apache.poi.ss.usermodel.*;
-import org.apache.poi.xssf.usermodel.XSSFWorkbook;
 import org.opencb.biodata.formats.io.FileFormatException;
-import org.opencb.biodata.models.core.MirnaTarget;
-import org.opencb.biodata.models.core.TargetGene;
-import org.opencb.biodata.models.variant.avro.GeneDrugInteraction;
-import org.opencb.biodata.models.variant.avro.GeneTraitAssociation;
-import org.opencb.commons.utils.FileUtils;
+import org.opencb.cellbase.core.exception.CellBaseException;
 import org.rocksdb.RocksDBException;
 
-import java.io.BufferedReader;
-import java.io.FileInputStream;
 import java.io.IOException;
-import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.*;
 
-public class RefSeqGeneBuilderIndexer extends GeneBuilderIndexer{
+import static org.opencb.cellbase.lib.EtlCommons.REFSEQ_DATA;
+
+public class RefSeqGeneBuilderIndexer extends GeneBuilderIndexer {
 
     public RefSeqGeneBuilderIndexer(Path refSeqDirectoryPath) {
         super(refSeqDirectoryPath);
@@ -42,249 +33,17 @@ public RefSeqGeneBuilderIndexer(Path refSeqDirectoryPath) {
 
     public void index(Path maneFile, Path lrgFile, Path proteinFastaFile, Path cDnaFastaFile, Path geneDrugFile, Path hpoFilePath,
                       Path disgenetFile, Path miRTarBaseFile, Path cancerGeneGensus, Path cancerHotspot, Path tso500File,
-                      Path eglhHaemOncFile) throws IOException, RocksDBException, FileFormatException {
-        indexManeMapping(maneFile, "refseq");
-        indexLrgMapping(lrgFile, "refseq");
+                      Path eglhHaemOncFile) throws IOException, RocksDBException, FileFormatException, CellBaseException {
+        indexManeMapping(maneFile, REFSEQ_DATA);
+        indexLrgMapping(lrgFile, REFSEQ_DATA);
         indexProteinSequences(proteinFastaFile);
         indexCdnaSequences(cDnaFastaFile);
         indexDrugs(geneDrugFile);
         indexDiseases(hpoFilePath, disgenetFile);
         indexMiRTarBase(miRTarBaseFile);
-        indexCancerGeneCensus(cancerGeneGensus);
+//        indexCancerGeneCensus(cancerGeneGensus);
         indexCancerHotspot(cancerHotspot);
-        indexTSO500(tso500File);
-        indexEGLHHaemOnc(eglhHaemOncFile);
-    }
-
-    private void indexDrugs(Path geneDrugFile) throws IOException, RocksDBException {
-        if (geneDrugFile != null && Files.exists(geneDrugFile) && Files.size(geneDrugFile) > 0) {
-            logger.info("Loading gene-drug interaction data from '{}'", geneDrugFile);
-            BufferedReader br = FileUtils.newBufferedReader(geneDrugFile);
-
-            // Skip header
-            br.readLine();
-
-            int lineCounter = 1;
-            String line;
-            String currentGene = "";
-            List<GeneDrugInteraction> drugs = new ArrayList<>();
-            while ((line = br.readLine()) != null) {
-                String[] parts = line.split("\t");
-                String geneName = parts[0];
-                if (currentGene.equals("")) {
-                    currentGene = geneName;
-                } else if (!currentGene.equals(geneName)) {
-                    rocksDbManager.update(rocksdb, currentGene + DRUGS_SUFFIX, drugs);
-                    drugs = new ArrayList<>();
-                    currentGene = geneName;
-                }
-
-                String source = null;
-                if (parts.length >= 4) {
-                    source = parts[3];
-                }
-
-                String interactionType = null;
-                if (parts.length >= 5) {
-                    interactionType = parts[4];
-                }
-
-                String drugName = null;
-                if (parts.length >= 8) {
-                    // if drug name column is empty, use drug claim name instead
-                    drugName = StringUtils.isEmpty(parts[7]) ? parts[6] : parts[7];
-                }
-                if (StringUtils.isEmpty(drugName)) {
-                    // no drug name
-                    continue;
-                }
-
-                String chemblId = null;
-                if (parts.length >= 9) {
-                    chemblId = parts[8];
-                }
-
-                List<String> publications = new ArrayList<>();
-                if (parts.length >= 10 && parts[9] != null) {
-                    publications = Arrays.asList(parts[9].split(","));
-                }
-
-                GeneDrugInteraction drug = new GeneDrugInteraction(
-                        geneName, drugName, source, null, null, interactionType, chemblId, publications);
-                drugs.add(drug);
-                lineCounter++;
-            }
-            br.close();
-            // update last gene
-            rocksDbManager.update(rocksdb, currentGene + DRUGS_SUFFIX, drugs);
-        } else {
-            logger.warn("Gene drug file " + geneDrugFile + " not found");
-            logger.warn("Ignoring " + geneDrugFile);
-        }
-    }
-
-    public List<GeneDrugInteraction> getDrugs(String id) throws RocksDBException, IOException {
-        String key = id + DRUGS_SUFFIX;
-        return rocksDbManager.getDrugs(rocksdb, key);
-    }
-
-    private void indexDiseases(Path hpoFilePath, Path disgenetFilePath) throws IOException, RocksDBException {
-        Map<String, List<GeneTraitAssociation>> geneDiseaseAssociationMap = new HashMap<>(50000);
-
-        String line;
-        if (hpoFilePath != null && hpoFilePath.toFile().exists() && Files.size(hpoFilePath) > 0) {
-            BufferedReader bufferedReader = FileUtils.newBufferedReader(hpoFilePath);
-            // skip first header line
-            bufferedReader.readLine();
-            while ((line = bufferedReader.readLine()) != null) {
-                String[] fields = line.split("\t");
-                String omimId = fields[6];
-                String geneSymbol = fields[3];
-                String hpoId = fields[0];
-                String diseaseName = fields[1];
-                GeneTraitAssociation disease =
-                        new GeneTraitAssociation(omimId, diseaseName, hpoId, 0f, 0, new ArrayList<>(), new ArrayList<>(), "hpo");
-                addValueToMapElement(geneDiseaseAssociationMap, geneSymbol, disease);
-            }
-            bufferedReader.close();
-        }
-
-        if (disgenetFilePath != null && disgenetFilePath.toFile().exists() && Files.size(disgenetFilePath) > 0) {
-            BufferedReader bufferedReader = FileUtils.newBufferedReader(disgenetFilePath);
-            // skip first header line
-            bufferedReader.readLine();
-            while ((line = bufferedReader.readLine()) != null) {
-                String[] fields = line.split("\t");
-                String diseaseId = fields[4];
-                String diseaseName = fields[5];
-                String score = fields[9];
-                String numberOfPubmeds = fields[13].trim();
-                String numberOfSNPs = fields[14];
-                String source = fields[15];
-                GeneTraitAssociation disease = new GeneTraitAssociation(diseaseId, diseaseName, "", Float.parseFloat(score),
-                        Integer.parseInt(numberOfPubmeds), Arrays.asList(numberOfSNPs), Arrays.asList(source), "disgenet");
-                addValueToMapElement(geneDiseaseAssociationMap, fields[1], disease);
-            }
-            bufferedReader.close();
-        }
-
-        for (Map.Entry<String, List<GeneTraitAssociation>> entry : geneDiseaseAssociationMap.entrySet()) {
-            rocksDbManager.update(rocksdb, entry.getKey() + DISEASE_SUFFIX, entry.getValue());
-        }
-    }
-
-    public List<GeneTraitAssociation> getDiseases(String id) throws RocksDBException, IOException {
-        String key = id + DISEASE_SUFFIX;
-        return rocksDbManager.getDiseases(rocksdb, key);
-    }
-
-    private void indexMiRTarBase(Path miRTarBaseFile) throws IOException, RocksDBException {
-        if (miRTarBaseFile != null && Files.exists(miRTarBaseFile) && Files.size(miRTarBaseFile) > 0) {
-            logger.info("Loading mirna targets from '{}'", miRTarBaseFile);
-            FileInputStream file = new FileInputStream(miRTarBaseFile.toFile());
-            Workbook workbook = new XSSFWorkbook(file);
-            Sheet sheet = workbook.getSheetAt(0);
-            Iterator<Row> iterator = sheet.iterator();
-            String currentMiRTarBaseId = null;
-            String currentMiRNA = null;
-            String currentGene = null;
-            List<TargetGene> targetGenes = new ArrayList();
-            Map<String, List<MirnaTarget>> geneToMirna = new HashMap();
-            while (iterator.hasNext()) {
-
-                Row currentRow = iterator.next();
-                Iterator<Cell> cellIterator = currentRow.iterator();
-
-                Cell cell = cellIterator.next();
-                String miRTarBaseId = cell.getStringCellValue();
-
-                // skip header
-                if (miRTarBaseId.startsWith("miRTarBase")) {
-                    continue;
-                }
-
-                if (currentMiRTarBaseId == null) {
-                    currentMiRTarBaseId = miRTarBaseId;
-                }
-
-                cell = cellIterator.next();
-                String miRNA = cell.getStringCellValue();
-                if (currentMiRNA == null) {
-                    currentMiRNA = miRNA;
-                }
-
-                // species
-                cellIterator.next();
-
-                cell = cellIterator.next();
-                String geneName = cell.getStringCellValue();
-                if (currentGene == null) {
-                    currentGene = geneName;
-                }
-
-                // entrez
-                cellIterator.next();
-                // species
-                cellIterator.next();
-
-                if (!miRTarBaseId.equals(currentMiRTarBaseId) || !geneName.equals(currentGene)) {
-                    // new entry, store current one
-                    MirnaTarget miRnaTarget = new MirnaTarget(currentMiRTarBaseId, "miRTarBase", currentMiRNA,
-                            targetGenes);
-                    addValueToMapElement(geneToMirna, currentGene, miRnaTarget);
-                    targetGenes = new ArrayList();
-                    currentGene = geneName;
-                    currentMiRTarBaseId = miRTarBaseId;
-                    currentMiRNA = miRNA;
-                }
-
-                // experiment
-                cell = cellIterator.next();
-                String experiment = cell.getStringCellValue();
-
-                // support type
-                cell = cellIterator.next();
-                String supportType = cell.getStringCellValue();
-
-                // pubmeds
-                cell = cellIterator.next();
-                String pubmed = null;
-                // seems to vary, so check both
-                if (cell.getCellType().equals(CellType.NUMERIC)) {
-                    pubmed = String.valueOf(cell.getNumericCellValue());
-                } else {
-                    pubmed = cell.getStringCellValue();
-                }
-
-                targetGenes.add(new TargetGene(experiment, supportType, pubmed));
-            }
-
-            // parse last entry
-            MirnaTarget miRnaTarget = new MirnaTarget(currentMiRTarBaseId, "miRTarBase", currentMiRNA,
-                    targetGenes);
-            addValueToMapElement(geneToMirna, currentGene, miRnaTarget);
-
-            for (Map.Entry<String, List<MirnaTarget>> entry : geneToMirna.entrySet()) {
-                rocksDbManager.update(rocksdb, entry.getKey() + MIRTARBASE_SUFFIX, entry.getValue());
-            }
-        } else {
-            logger.error("mirtarbase file not found");
-        }
+//        indexTSO500(tso500File);
+//        indexEGLHHaemOnc(eglhHaemOncFile);
     }
-
-    public List<MirnaTarget> getMirnaTargets(String geneName) throws RocksDBException, IOException {
-        String key = geneName + MIRTARBASE_SUFFIX;
-        return rocksDbManager.getMirnaTargets(rocksdb, key);
-    }
-
-    private static <T> void addValueToMapElement(Map<String, List<T>> map, String key, T value) {
-        if (map.containsKey(key)) {
-            map.get(key).add(value);
-        } else {
-            List<T> valueList = new ArrayList<>();
-            valueList.add(value);
-            map.put(key, valueList);
-        }
-    }
-
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RegulatoryFeatureBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RegulatoryFeatureBuilder.java
index 03fc3a1cd6..83eccb9885 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RegulatoryFeatureBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RegulatoryFeatureBuilder.java
@@ -16,60 +16,152 @@
 
 package org.opencb.cellbase.lib.builders;
 
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.commons.lang3.StringUtils;
 import org.opencb.biodata.formats.feature.gff.Gff2;
 import org.opencb.biodata.formats.feature.gff.io.Gff2Reader;
 import org.opencb.biodata.formats.io.FileFormatException;
 import org.opencb.biodata.models.core.RegulatoryFeature;
+import org.opencb.biodata.models.core.RegulatoryPfm;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.models.DataSource;
+import org.opencb.cellbase.core.serializer.CellBaseJsonFileSerializer;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
-import org.opencb.cellbase.lib.EtlCommons;
 
+import java.io.File;
 import java.io.IOException;
+import java.net.URL;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.*;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
 public class RegulatoryFeatureBuilder extends CellBaseBuilder  {
 
-    private final Path gffFile;
-    protected Set<Gff2> regulatoryFeatureSet;
+    private Path regulationPath;
+
+    private Set<Gff2> regulatoryFeatureSet;
 
-    public RegulatoryFeatureBuilder(Path regulatoryDirectoryPath, CellBaseSerializer serializer) {
+    public RegulatoryFeatureBuilder(Path regulationPath, CellBaseSerializer serializer) {
         super(serializer);
-        gffFile = regulatoryDirectoryPath.resolve(EtlCommons.REGULATORY_FEATURES_FILE);
+        this.regulationPath = regulationPath;
     }
 
     @Override
     public void parse() throws Exception {
-        logger.info("Parsing regulatory features...");
-        if (Files.exists(gffFile)) {
-            parseGffFile(gffFile);
-        } else {
-            logger.warn("No regulatory features GFF file found {}", EtlCommons.REGULATORY_FEATURES_FILE);
-            logger.warn("Skipping regulatory features GFF file parsing. Regulatory feature data models will not be built.");
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(REGULATION_DATA));
+
+        // Sanity check
+        checkDirectory(regulationPath, getDataName(REGULATION_DATA));
+
+        // Check build regulatory files
+        DataSource dataSource = dataSourceReader.readValue(regulationPath.resolve(getDataVersionFilename(REGULATORY_BUILD_DATA)).toFile());
+        List<File> regulatoryFiles = checkFiles(dataSource, regulationPath, getDataCategory(REGULATORY_BUILD_DATA) + "/"
+                + getDataName(REGULATORY_BUILD_DATA));
+        if (regulatoryFiles.size() != 1) {
+            throw new CellBaseException("One " + getDataName(REGULATORY_BUILD_DATA) + " file is expected, but currently there are "
+                    + regulatoryFiles.size() + " files");
         }
+
+        // Check motif features files
+        dataSource = dataSourceReader.readValue(regulationPath.resolve(getDataVersionFilename(MOTIF_FEATURES_DATA)).toFile());
+        List<File> motifFeaturesFiles = checkFiles(dataSource, regulationPath, getDataCategory(MOTIF_FEATURES_DATA) + "/"
+                + getDataName(MOTIF_FEATURES_DATA));
+        if (motifFeaturesFiles.size() != 2) {
+            throw new CellBaseException("Two " + getDataName(MOTIF_FEATURES_DATA) + " files are expected, but currently there are "
+                    + motifFeaturesFiles.size() + " files");
+        }
+
+        // Downloading and building pfm matrices
+        File motifFile = motifFeaturesFiles.get(0).getName().endsWith("tbi") ? motifFeaturesFiles.get(1) : motifFeaturesFiles.get(0);
+        loadPfmMatrices(motifFile.toPath(), serializer.getOutdir());
+
+        // Parse regulatory build features
+        parseGffFile(regulatoryFiles.get(0).toPath());
+
+        logger.info(BUILDING_DONE_LOG_MESSAGE, getDataName(REGULATION_DATA));
     }
 
     protected void parseGffFile(Path regulatoryFeatureFile) throws IOException, NoSuchMethodException, FileFormatException {
+        logger.info(PARSING_LOG_MESSAGE, regulatoryFeatureFile);
+
+        // Create and populate regulatory feature set
         regulatoryFeatureSet = new HashSet<>();
-        if (regulatoryFeatureFile != null && Files.exists(regulatoryFeatureFile) && !Files.isDirectory(regulatoryFeatureFile)
-                && Files.size(regulatoryFeatureFile) > 0) {
-            Gff2Reader regulatoryFeatureReader = new Gff2Reader(regulatoryFeatureFile);
+        try (Gff2Reader regulatoryFeatureReader = new Gff2Reader(regulatoryFeatureFile)) {
             Gff2 feature;
             while ((feature = regulatoryFeatureReader.read()) != null) {
                 regulatoryFeatureSet.add(feature);
             }
-            regulatoryFeatureReader.close();
         }
 
-        int i = 0;
         // Serialize and save results
         for (Gff2 feature : regulatoryFeatureSet) {
-            // ID=TF_binding_site:ENSR00000243312;
+            // In order to get the ID we split the attribute format: ID=TF_binding_site:ENSR00000243312; ....
             String id = feature.getAttribute().split(";")[0].split(":")[1];
             RegulatoryFeature regulatoryFeature = new RegulatoryFeature(id, feature.getSequenceName(), feature.getFeature(),
                     feature.getStart(), feature.getEnd());
             serializer.serialize(regulatoryFeature);
         }
         serializer.close();
+
+        logger.info(PARSING_DONE_LOG_MESSAGE, regulatoryFeatureFile);
+    }
+
+    private void loadPfmMatrices(Path motifGffFile, Path buildFolder) throws IOException, NoSuchMethodException, FileFormatException,
+            InterruptedException {
+        Path regulatoryPfmPath = buildFolder.resolve(REGULATORY_PFM_BASENAME + ".json.gz");
+        logger.info("Downloading and building PFM matrices in {} from {} ...", regulatoryPfmPath, motifGffFile);
+        if (Files.exists(regulatoryPfmPath)) {
+            logger.info("{} is already built", regulatoryPfmPath);
+            return;
+        }
+
+        Set<String> motifIds = new HashSet<>();
+        try (Gff2Reader motifsFeatureReader = new Gff2Reader(motifGffFile)) {
+            Gff2 tfbsMotifFeature;
+            Pattern filePattern = Pattern.compile("ENSPFM(\\d+)");
+            while ((tfbsMotifFeature = motifsFeatureReader.read()) != null) {
+                String pfmId = getMatrixId(filePattern, tfbsMotifFeature);
+                if (StringUtils.isNotEmpty(pfmId)) {
+                    motifIds.add(pfmId);
+                }
+            }
+        }
+
+        ObjectMapper mapper = new ObjectMapper();
+        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, REGULATORY_PFM_BASENAME, true);
+        if (logger.isInfoEnabled()) {
+            logger.info("Looking up {} PFMs", motifIds.size());
+        }
+        for (String pfmId : motifIds) {
+            String urlString = "https://rest.ensembl.org/species/homo_sapiens/binding_matrix/" + pfmId
+                    + "?unit=frequencies;content-type=application/json";
+            URL url = new URL(urlString);
+            RegulatoryPfm regulatoryPfm = mapper.readValue(url, RegulatoryPfm.class);
+            serializer.serialize(regulatoryPfm);
+            // https://github.com/Ensembl/ensembl-rest/wiki/Rate-Limits
+            TimeUnit.MILLISECONDS.sleep(250);
+        }
+        serializer.close();
+
+        logger.info("Downloading and building PFM matrices at {} done.", regulatoryPfmPath);
+    }
+
+    private String getMatrixId(Pattern pattern, Gff2 tfbsMotifFeature) {
+        Matcher matcher = pattern.matcher(tfbsMotifFeature.getAttribute());
+        if (matcher.find()) {
+            return matcher.group(0);
+        }
+        return null;
+    }
+
+    public Set<Gff2> getRegulatoryFeatureSet() {
+        return regulatoryFeatureSet;
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RegulatoryRegionBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RegulatoryRegionBuilder.java
deleted file mode 100644
index 3727ac4a69..0000000000
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RegulatoryRegionBuilder.java
+++ /dev/null
@@ -1,607 +0,0 @@
-/*
- * Copyright 2015-2020 OpenCB
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.opencb.cellbase.lib.builders;
-
-import org.opencb.biodata.models.core.RegulatoryFeature;
-import org.opencb.cellbase.core.serializer.CellBaseSerializer;
-import org.opencb.cellbase.lib.EtlCommons;
-import org.opencb.commons.utils.FileUtils;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.sql.*;
-import java.util.*;
-
-/**
- * User: fsalavert.
- * Date: 4/10/13
- * Time: 10:14 AM
- */
-@Deprecated
-public class RegulatoryRegionBuilder extends CellBaseBuilder {
-
-    private static final int CHUNK_SIZE = 2000;
-    private static final String REGULATORY_FEATURES = "regulatory_features";
-    @Deprecated
-    private static final String DEPRECATED_MOTIF_FEATURES = "deprecated_motif_features";
-    private static final String MOTIF_FEATURES = "motif_features";
-    private static final String FEATURE_TYPE = "feature_type";
-    private static final String ID = "id";
-    private static final String BINDING_MATRIX = "binding_matrix";
-    private static final String MOTIF_FEATURE_TYPE = "motif_feature_type";
-    private Path regulatoryRegionPath;
-
-    public RegulatoryRegionBuilder(Path regulatoryRegionFilesDir, CellBaseSerializer serializer) {
-        super(serializer);
-
-        this.regulatoryRegionPath = regulatoryRegionFilesDir;
-
-    }
-
-    public void createSQLiteRegulatoryFiles(Path regulatoryRegionPath)
-            throws SQLException, IOException, ClassNotFoundException, NoSuchMethodException {
-        List<String> gffColumnNames = Arrays.asList("seqname", "source", "feature", "start", "end", "score", "strand", "frame", "group");
-        List<String> gffColumnTypes = Arrays.asList("TEXT", "TEXT", "TEXT", "INT", "INT", "TEXT", "TEXT", "TEXT", "TEXT");
-
-        //        Path regulatoryRegionPath = regulationDir.toPath();
-
-        Path filePath;
-
-        filePath = regulatoryRegionPath.resolve(EtlCommons.REGULATORY_FEATURES_FILE);
-        createSQLiteRegulatoryFiles(filePath, REGULATORY_FEATURES, gffColumnNames, gffColumnTypes);
-
-        filePath = regulatoryRegionPath.resolve(EtlCommons.MOTIF_FEATURES_FILE);
-        createSQLiteRegulatoryFiles(filePath, MOTIF_FEATURES, gffColumnNames, gffColumnTypes);
-
-        // TODO: REMOVE
-        // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DEPRECATED
-        filePath = regulatoryRegionPath.resolve("AnnotatedFeatures.gff.gz");
-        createSQLiteRegulatoryFiles(filePath, "annotated_features", gffColumnNames, gffColumnTypes);
-
-
-        filePath = regulatoryRegionPath.resolve("MotifFeatures.gff.gz");
-        createSQLiteRegulatoryFiles(filePath, DEPRECATED_MOTIF_FEATURES, gffColumnNames, gffColumnTypes);
-
-
-        filePath = regulatoryRegionPath.resolve("RegulatoryFeatures_MultiCell.gff.gz");
-        createSQLiteRegulatoryFiles(filePath, "regulatory_features_multicell", gffColumnNames, gffColumnTypes);
-        // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< DEPRECATED
-
-
-
-//  GFFColumnNames = Arrays.asList("seqname", "source", "feature", "start", "end", "score", "strand", "frame");
-//  GFFColumnTypes = Arrays.asList("TEXT", "TEXT", "TEXT", "INT", "INT", "TEXT", "TEXT", "TEXT");
-        filePath = regulatoryRegionPath.resolve("mirna_uniq.gff.gz");
-        if (Files.exists(filePath)) {
-            createSQLiteRegulatoryFiles(filePath, "mirna_uniq", gffColumnNames, gffColumnTypes);
-        }
-
-    }
-
-    @Override
-    public void parse() throws SQLException, IOException, ClassNotFoundException, NoSuchMethodException {
-        if (regulatoryRegionPath == null || !Files.exists(regulatoryRegionPath) || !Files.isDirectory(regulatoryRegionPath)) {
-            throw new IOException("Regulation directory whether does not exist, is not a directory or cannot be read");
-        }
-
-        // Create the SQLite databases
-        createSQLiteRegulatoryFiles(regulatoryRegionPath);
-
-        String chunkIdSuffix = CHUNK_SIZE / 1000 + "k";
-
-        Path regulatoryFilePath = regulatoryRegionPath.resolve(EtlCommons.REGULATORY_FEATURES_FILE + ".db");
-        Path motifFilePath = regulatoryRegionPath.resolve(EtlCommons.MOTIF_FEATURES_FILE + ".db");
-        Path annotatedFilePath = regulatoryRegionPath.resolve("AnnotatedFeatures.gff.gz.db");
-        Path deprecatedMotifFilePath = regulatoryRegionPath.resolve("MotifFeatures.gff.gz.db");
-        Path deprecatedRegulatoryFilePath = regulatoryRegionPath.resolve("RegulatoryFeatures_MultiCell.gff.gz.db");
-        Path mirnaFilePath = regulatoryRegionPath.resolve("mirna_uniq.gff.gz.db");
-
-        List<Path> filePaths = Arrays.asList(regulatoryFilePath, motifFilePath, annotatedFilePath,
-                deprecatedMotifFilePath, deprecatedRegulatoryFilePath);
-        List<String> tableNames = Arrays.asList(REGULATORY_FEATURES, MOTIF_FEATURES, "annotated_features",
-                DEPRECATED_MOTIF_FEATURES, "regulatory_features_multicell");
-
-        if (Files.exists(mirnaFilePath)) {
-            filePaths.add(mirnaFilePath);
-            tableNames.add("mirna_uniq");
-        }
-
-        // Fetching and joining all chromosomes found in the different databases
-        Set<String> setChr = new HashSet<>();
-        setChr.addAll(getChromosomesList(regulatoryFilePath, REGULATORY_FEATURES));
-        setChr.addAll(getChromosomesList(motifFilePath, MOTIF_FEATURES));
-        setChr.addAll(getChromosomesList(annotatedFilePath, "annotated_features"));
-        setChr.addAll(getChromosomesList(deprecatedMotifFilePath, DEPRECATED_MOTIF_FEATURES));
-        setChr.addAll(getChromosomesList(deprecatedRegulatoryFilePath, "regulatory_features_multicell"));
-        if (Files.exists(mirnaFilePath)) {
-            setChr.addAll(getChromosomesList(mirnaFilePath, "mirna_uniq"));
-        }
-
-        List<String> chromosomes = new ArrayList<>(setChr);
-        List<RegulatoryFeature> regulatoryFeatures;
-        HashSet<Integer> chunksHash;
-        for (String chromosome : chromosomes) {
-            for (int i = 0; i < tableNames.size(); i++) {
-                chunksHash = new HashSet<>();
-                regulatoryFeatures = queryChromosomesRegulatoryDB(filePaths.get(i), tableNames.get(i), chromosome);
-                for (RegulatoryFeature regulatoryFeature : regulatoryFeatures) {
-                    int firstChunkId = getChunkId(regulatoryFeature.getStart(), CHUNK_SIZE);
-                    int lastChunkId = getChunkId(regulatoryFeature.getEnd(), CHUNK_SIZE);
-
-                    List<String> chunkIds = new ArrayList<>();
-                    String chunkId;
-                    for (int j = firstChunkId; j <= lastChunkId; j++) {
-                        chunkId = chromosome + "_" + j + "_" + chunkIdSuffix;
-                        chunkIds.add(chunkId);
-                        //count chunks
-                        if (!chunksHash.contains(j)) {
-                            chunksHash.add(j);
-                        }
-                    }
-//                    regulatoryFeature.setChunkIds(chunkIds);
-
-                    // remove 'chr' prefix
-//                    if (genericFeature.getChromosome() != null) {
-//                        genericFeature.setSequenceName(genericFeature.getSequenceName().replace("chr", ""));
-//                    }
-                    serializer.serialize(regulatoryFeature);
-                }
-            }
-        }
-    }
-
-
-    public void createSQLiteRegulatoryFiles(Path filePath, String tableName, List<String> columnNames, List<String> columnTypes)
-            throws ClassNotFoundException, IOException, SQLException {
-        int limitRows = 100000;
-        int batchCount = 0;
-
-        if (!Files.exists(filePath) || Files.size(filePath) == 0) {
-            return;
-        }
-
-        Path dbPath = Paths.get(filePath.toString() + ".db");
-        if (Files.exists(dbPath) && Files.size(dbPath) > 0) {
-            return;
-        }
-
-        BufferedReader br = FileUtils.newBufferedReader(filePath);
-
-        Class.forName("org.sqlite.JDBC");
-        Connection conn = DriverManager.getConnection("jdbc:sqlite:" + dbPath.toString());
-        conn.setAutoCommit(false); //Set false to perform commits manually and increase performance on insertion
-
-        //Create table query
-        Statement createTables = conn.createStatement();
-
-        StringBuilder sbQuery = new StringBuilder();
-        sbQuery.append("CREATE TABLE if not exists " + tableName + "(");
-        for (int i = 0; i < columnNames.size(); i++) {    //columnNames and columnTypes must have the same size
-            sbQuery.append("'" + columnNames.get(i) + "' " + columnTypes.get(i) + ",");
-        }
-        sbQuery.deleteCharAt(sbQuery.length() - 1);
-        sbQuery.append(")");
-
-        System.out.println(sbQuery.toString());
-        createTables.executeUpdate(sbQuery.toString());
-
-        //Prepare insert query
-        sbQuery = new StringBuilder();
-        sbQuery.append("INSERT INTO " + tableName + "(");
-        for (int i = 0; i < columnNames.size(); i++) {
-            sbQuery.append("'" + columnNames.get(i) + "',");
-        }
-        sbQuery.deleteCharAt(sbQuery.length() - 1);
-        sbQuery.append(") values (");
-        sbQuery.append(repeat("?,", columnNames.size()));
-        sbQuery.deleteCharAt(sbQuery.length() - 1);
-        sbQuery.append(")");
-        System.out.println(sbQuery.toString());
-
-        PreparedStatement ps = conn.prepareStatement(sbQuery.toString());
-
-        //Read file
-        String line = null;
-        while ((line = br.readLine()) != null) {
-
-            insertByType(ps, getFields(line, tableName), columnTypes);
-            ps.addBatch();
-            batchCount++;
-
-            //commit batch
-            if (batchCount % limitRows == 0 && batchCount != 0) {
-                ps.executeBatch();
-                conn.commit();
-            }
-
-        }
-        br.close();
-
-        //Execute last Batch
-        ps.executeBatch();
-        conn.commit();
-
-        //Create index
-        System.out.println("creating indices...");
-        createTables.executeUpdate("CREATE INDEX " + tableName + "_seqname_idx on " + tableName + "(" + columnNames.get(0) + ")");
-        System.out.println("indices created.");
-
-        conn.commit();
-        conn.close();
-    }
-
-    public List<String> getChromosomesList(Path dbPath, String tableName) throws IOException {
-
-        try {
-            FileUtils.checkFile(dbPath);
-        } catch (IOException e) {
-            logger.warn(e.getMessage());
-            return Collections.emptyList();
-        }
-
-        List<String> chromosomes = new ArrayList<>();
-        try {
-            Class.forName("org.sqlite.JDBC");
-            Connection conn = DriverManager.getConnection("jdbc:sqlite:" + dbPath.toString());
-
-            Statement query = conn.createStatement();
-            ResultSet rs = query.executeQuery("select distinct(seqname) from " + tableName);
-//            ResultSet rs = query.executeQuery("select distinct(seqname) from " + tableName + " where seqname like 'chr%'");
-
-            while (rs.next()) {
-                chromosomes.add(rs.getString(1));
-            }
-            conn.close();
-
-        } catch (ClassNotFoundException | SQLException e) {
-            e.printStackTrace();
-        }
-        return chromosomes;
-    }
-
-    public List<RegulatoryFeature> queryChromosomesRegulatoryDB(Path dbPath, String tableName, String chromosome) {
-
-        try {
-            FileUtils.checkFile(dbPath);
-        } catch (IOException e) {
-            logger.warn(e.getMessage());
-            return Collections.emptyList();
-        }
-
-        Connection conn;
-        List<RegulatoryFeature> regulatoryFeatures = new ArrayList<>();
-        try {
-            Class.forName("org.sqlite.JDBC");
-            conn = DriverManager.getConnection("jdbc:sqlite:" + dbPath.toString());
-
-            Statement query = conn.createStatement();
-            ResultSet rs = query.executeQuery("select * from " + tableName + " where seqname='" + chromosome + "'");
-//            ResultSet rs = query.executeQuery("select * from " + tableName + " where seqname='chr" + chromosome + "'");
-            while (rs.next()) {
-                regulatoryFeatures.add(getDeprecatedRegulatoryFeature(rs, tableName));
-            }
-            conn.close();
-
-        } catch (ClassNotFoundException | SQLException e) {
-            e.printStackTrace();
-        }
-        return regulatoryFeatures;
-    }
-
-    public static List<RegulatoryFeature> queryRegulatoryDB(Path dbPath, String tableName, String chrFile, int start, int end) {
-        Connection conn = null;
-        List<RegulatoryFeature> regulatoryFeatures = new ArrayList<>();
-        try {
-            Class.forName("org.sqlite.JDBC");
-            conn = DriverManager.getConnection("jdbc:sqlite:" + dbPath.toString());
-
-            Statement query = conn.createStatement();
-            ResultSet rs = query.executeQuery("select * from " + tableName + " where start<=" + end + " AND end>=" + start);
-
-            while (rs.next()) {
-                regulatoryFeatures.add(getDeprecatedRegulatoryFeature(rs, tableName));
-            }
-            conn.close();
-
-        } catch (ClassNotFoundException | SQLException e) {
-            e.printStackTrace();
-        }
-        return regulatoryFeatures;
-    }
-
-    private static RegulatoryFeature getDeprecatedRegulatoryFeature(ResultSet rs, String tableName) throws SQLException {
-        RegulatoryFeature regulatoryFeature = null;
-        switch (tableName.toLowerCase()) {
-            case REGULATORY_FEATURES:
-                regulatoryFeature = getRegulatoryFeature(rs);
-                break;
-            case MOTIF_FEATURES:
-                regulatoryFeature = getMotifFeature(rs);
-                break;
-            case "annotated_features":
-                regulatoryFeature = getAnnotatedFeature(rs);
-                break;
-            case "regulatory_features_multicell":
-                regulatoryFeature = getDeprecatedRegulatoryFeature(rs);
-                break;
-            case DEPRECATED_MOTIF_FEATURES:
-                regulatoryFeature = getDeprecatedMotifFeature(rs);
-                break;
-            case "mirna_uniq":
-                regulatoryFeature = getMirnaFeature(rs);
-                break;
-            default:
-                break;
-        }
-        return regulatoryFeature;
-    }
-
-    private static RegulatoryFeature getMotifFeature(ResultSet rs) throws SQLException {
-        //   GFF     https://genome.ucsc.edu/FAQ/FAQformat.html#format3
-        RegulatoryFeature regulatoryFeature = new RegulatoryFeature();
-        Map<String, String> groupFields = getGroupFields(rs.getString(9));
-
-        regulatoryFeature.setChromosome(rs.getString(1));
-        regulatoryFeature.setSource(rs.getString(2));
-        regulatoryFeature.setFeatureType(rs.getString(3));
-        regulatoryFeature.setStart(rs.getInt(4));
-        regulatoryFeature.setEnd(rs.getInt(5));
-        regulatoryFeature.setScore(rs.getString(6));
-        regulatoryFeature.setStrand(rs.getString(7));
-
-        // Seems weird that the motif_feature_type property is used to fill the Name field. However, this is how the
-        // it was being done from the previous ENSEMBL files
-        regulatoryFeature.setName(groupFields.get(MOTIF_FEATURE_TYPE));
-
-        regulatoryFeature.setMatrix(groupFields.get(BINDING_MATRIX));
-
-        return regulatoryFeature;
-    }
-
-    private static RegulatoryFeature getRegulatoryFeature(ResultSet rs) throws SQLException {
-        //   GFF     https://genome.ucsc.edu/FAQ/FAQformat.html#format3
-        RegulatoryFeature regulatoryFeature = new RegulatoryFeature();
-        Map<String, String> groupFields = getGroupFields(rs.getString(9));
-
-        regulatoryFeature.setId(groupFields.get(ID));
-        regulatoryFeature.setChromosome(rs.getString(1));
-        regulatoryFeature.setSource(rs.getString(2));
-        regulatoryFeature.setFeatureType(groupFields.get(FEATURE_TYPE).replace(" ", "_"));
-        regulatoryFeature.setStart(rs.getInt(4));
-        regulatoryFeature.setEnd(rs.getInt(5));
-        regulatoryFeature.setScore(rs.getString(6));
-        regulatoryFeature.setStrand(rs.getString(7));
-
-        return regulatoryFeature;
-    }
-
-    private static RegulatoryFeature getAnnotatedFeature(ResultSet rs) throws SQLException {
-        //   GFF     https://genome.ucsc.edu/FAQ/FAQformat.html#format3
-        RegulatoryFeature regulatoryFeature = new RegulatoryFeature();
-        Map<String, String> groupFields = getGroupFields(rs.getString(9));
-
-        regulatoryFeature.setChromosome(rs.getString(1));
-        regulatoryFeature.setSource(rs.getString(2));
-        regulatoryFeature.setFeatureType(rs.getString(3));
-        regulatoryFeature.setStart(rs.getInt(4));
-        regulatoryFeature.setEnd(rs.getInt(5));
-        regulatoryFeature.setScore(rs.getString(6));
-        regulatoryFeature.setStrand(rs.getString(7));
-        regulatoryFeature.setFrame(rs.getString(8));
-
-        regulatoryFeature.setName(groupFields.get("name"));
-        regulatoryFeature.setAlias(groupFields.get("alias"));
-        regulatoryFeature.setFeatureClass(groupFields.get("class"));
-        regulatoryFeature.getCellTypes().add(groupFields.get("cell_type"));
-
-        return regulatoryFeature;
-    }
-
-    @Deprecated
-    private static RegulatoryFeature getDeprecatedRegulatoryFeature(ResultSet rs) throws SQLException {
-        //   GFF     https://genome.ucsc.edu/FAQ/FAQformat.html#format3
-        RegulatoryFeature regulatoryFeature = new RegulatoryFeature();
-        Map<String, String> groupFields = getGroupFields(rs.getString(9));
-
-        regulatoryFeature.setChromosome(rs.getString(1));
-        regulatoryFeature.setSource(rs.getString(2));
-        regulatoryFeature.setFeatureType(rs.getString(3));
-        regulatoryFeature.setStart(rs.getInt(4));
-        regulatoryFeature.setEnd(rs.getInt(5));
-        regulatoryFeature.setScore(rs.getString(6));
-        regulatoryFeature.setStrand(rs.getString(7));
-        regulatoryFeature.setFrame(rs.getString(8));
-        regulatoryFeature.setFrame(rs.getString(9));
-
-        return regulatoryFeature;
-    }
-
-    @Deprecated
-    private static RegulatoryFeature getDeprecatedMotifFeature(ResultSet rs) throws SQLException {
-        //   GFF     https://genome.ucsc.edu/FAQ/FAQformat.html#format3
-        RegulatoryFeature regulatoryFeature = new RegulatoryFeature();
-        Map<String, String> groupFields = getGroupFields(rs.getString(9));
-
-        regulatoryFeature.setChromosome(rs.getString(1));
-        regulatoryFeature.setSource(rs.getString(2));
-        regulatoryFeature.setFeatureType(rs.getString(3) + "_motif");
-        regulatoryFeature.setStart(rs.getInt(4));
-        regulatoryFeature.setEnd(rs.getInt(5));
-        regulatoryFeature.setScore(rs.getString(6));
-        regulatoryFeature.setStrand(rs.getString(7));
-        regulatoryFeature.setFrame(rs.getString(8));
-
-        String[] split = groupFields.get("name").split(":");
-        regulatoryFeature.setName(split[0]);
-        regulatoryFeature.setMatrix(split[1]);
-
-        return regulatoryFeature;
-    }
-
-    private static RegulatoryFeature getMirnaFeature(ResultSet rs) throws SQLException {
-        //   GFF     https://genome.ucsc.edu/FAQ/FAQformat.html#format3
-        RegulatoryFeature regulatoryFeature = new RegulatoryFeature();
-        Map<String, String> groupFields = getGroupFields(rs.getString(9));
-
-        regulatoryFeature.setChromosome(rs.getString(1));
-        regulatoryFeature.setSource(rs.getString(2));
-        regulatoryFeature.setFeatureType(rs.getString(3));
-        regulatoryFeature.setStart(rs.getInt(4));
-        regulatoryFeature.setEnd(rs.getInt(5));
-        regulatoryFeature.setScore(rs.getString(6));
-        regulatoryFeature.setStrand(rs.getString(7));
-        regulatoryFeature.setFrame(rs.getString(8));
-
-        regulatoryFeature.setFeatureClass("microRNA");
-        regulatoryFeature.setName(groupFields.get("name"));
-
-        return regulatoryFeature;
-    }
-
-    private static Map<String, String> getGroupFields(String group) {
-        //process group column
-        Map<String, String> groupFields = new HashMap<>();
-        String[] attributeFields = group.split(";");
-        String[] attributeKeyValue;
-        for (String attributeField : attributeFields) {
-            attributeKeyValue = attributeField.trim().split("=");
-            groupFields.put(attributeKeyValue[0].toLowerCase(), attributeKeyValue[1]);
-        }
-        return groupFields;
-    }
-
-
-    public static List<String> getFields(String line, String tableName) {
-        List<String> fields = new ArrayList<>();
-        switch (tableName.toLowerCase()) {
-            case REGULATORY_FEATURES:
-                fields = getRegulatoryFeaturesFields(line);
-                break;
-            case MOTIF_FEATURES:
-                fields = getMotifFeaturesFields(line);
-                break;
-            case "annotated_features":
-                fields = getAnnotatedFeaturesFields(line);
-                break;
-            case "regulatory_features_multicell":
-                fields = getRegulatoryFeaturesFields(line);
-                break;
-            case DEPRECATED_MOTIF_FEATURES:
-                fields = getMotifFeaturesFields(line);
-                break;
-            case "mirna_uniq":
-                fields = getMirnaFeaturesFields(line);
-                break;
-            default:
-                break;
-        }
-        return fields;
-    }
-
-    @Deprecated
-    public static List<String> getAnnotatedFeaturesFields(String line) {
-        String[] fields = line.split("\t");
-        fields[0] = fields[0].replace("chr", "");
-        return Arrays.asList(fields);
-    }
-
-    public static List<String> getRegulatoryFeaturesFields(String line) {
-        String[] fields = line.split("\t");
-        fields[0] = fields[0].replace("chr", "");
-        return Arrays.asList(fields);
-    }
-
-    public static List<String> getMotifFeaturesFields(String line) {
-        String[] fields = line.split("\t");
-        fields[0] = fields[0].replace("chr", "");
-        return Arrays.asList(fields);
-    }
-
-    public static List<String> getMirnaFeaturesFields(String line) {
-        String[] fields = line.split("\t");
-        fields[0] = fields[0].replace("chr", "");
-        return Arrays.asList(fields);
-    }
-
-    public static void insertByType(PreparedStatement ps, List<String> fields, List<String> types) throws SQLException {
-        //Datatypes In SQLite Version 3 -> http://www.sqlite.org/datatype3.html
-        String raw;
-        String type;
-        if (types.size() == fields.size()) {
-            for (int i = 0; i < fields.size(); i++) { //columnNames and columnTypes must have same size
-                int sqliteIndex = i + 1;
-                raw = fields.get(i);
-                type = types.get(i);
-
-                switch (type) {
-                    case "INTEGER":
-                    case "INT":
-                        ps.setInt(sqliteIndex, Integer.parseInt(raw));
-                        break;
-                    case "REAL":
-                        ps.setFloat(sqliteIndex, Float.parseFloat(raw));
-                        break;
-                    case "TEXT":
-                        ps.setString(sqliteIndex, raw);
-                        break;
-                    default:
-                        ps.setString(sqliteIndex, raw);
-                        break;
-                }
-            }
-        }
-
-    }
-
-    public String repeat(String s, int n) {
-        if (s == null) {
-            return null;
-        }
-        final StringBuilder sb = new StringBuilder();
-        for (int i = 0; i < n; i++) {
-            sb.append(s);
-        }
-        return sb.toString();
-    }
-
-    private int getChunkId(int position, int chunksize) {
-        if (chunksize <= 0) {
-            return position / CHUNK_SIZE;
-        } else {
-            return position / chunksize;
-        }
-    }
-
-    private int getChunkStart(int id, int chunksize) {
-        if (chunksize <= 0) {
-            return (id == 0) ? 1 : id * CHUNK_SIZE;
-        } else {
-            return (id == 0) ? 1 : id * chunksize;
-        }
-    }
-
-    private int getChunkEnd(int id, int chunksize) {
-        if (chunksize <= 0) {
-            return (id * CHUNK_SIZE) + CHUNK_SIZE - 1;
-        } else {
-            return (id * chunksize) + chunksize - 1;
-        }
-    }
-}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RepeatsBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RepeatsBuilder.java
index d37765e0b6..5ffabf747b 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RepeatsBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RepeatsBuilder.java
@@ -18,8 +18,10 @@
 
 import org.opencb.biodata.models.core.Region;
 import org.opencb.biodata.models.variant.avro.Repeat;
-import org.opencb.cellbase.lib.EtlCommons;
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
+import org.opencb.cellbase.lib.EtlCommons;
 import org.opencb.commons.ProgressLogger;
 import org.opencb.commons.utils.FileUtils;
 
@@ -27,56 +29,78 @@
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
 /**
  * Created by fjlopez on 05/05/17.
  */
 public class RepeatsBuilder extends CellBaseBuilder {
-    private static final String TRF = "trf";
-    private static final String GSD = "genomicSuperDup";
-    private static final String WM = "windowMasker";
+
+    private CellBaseConfiguration configuration;
+
     private final Path filesDir;
 
-    public RepeatsBuilder(Path filesDir, CellBaseFileSerializer serializer) {
+    public RepeatsBuilder(Path filesDir, CellBaseFileSerializer serializer, CellBaseConfiguration configuration) {
         super(serializer);
         this.filesDir = filesDir;
+        this.configuration = configuration;
     }
 
 
     @Override
     public void parse() throws Exception {
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(REPEATS_DATA));
+
+        // Sanity check
+        checkDirectory(filesDir, getDataName(REPEATS_DATA));
 
-        logger.info("Parsing repeats...");
-        if (Files.exists(filesDir.resolve(EtlCommons.TRF_FILE))) {
-            parseTrfFile(filesDir.resolve(EtlCommons.TRF_FILE));
-        } else {
-            logger.warn("No TRF file found {}", EtlCommons.TRF_FILE);
-            logger.warn("Skipping TRF file parsing. TRF data models will not be built.");
+        // Check Simple Repeats (TRF) filename
+        String trfFilename = Paths.get(configuration.getDownload().getSimpleRepeats().getFiles().get(SIMPLE_REPEATS_FILE_ID)).getFileName()
+                .toString();
+        if (!Files.exists(filesDir.resolve(trfFilename))) {
+            throw new CellBaseException(getMessageMissingFile(TRF_DATA, trfFilename, filesDir));
         }
 
-        if (Files.exists(filesDir.resolve(EtlCommons.GSD_FILE))) {
-            parseGsdFile(filesDir.resolve(EtlCommons.GSD_FILE));
-        } else {
-            logger.warn("No Genomic Super Duplications file found {}", EtlCommons.GSD_FILE);
-            logger.warn("Skipping Genomic Super Duplications file parsing. "
-                    + "Genomic Super Duplications data models will not be built.");
+        // Check Genomic Super Duplications (GSD) file
+        String gsdFilename = Paths.get(configuration.getDownload().getGenomicSuperDups().getFiles().get(GENOMIC_SUPER_DUPS_FILE_ID))
+                .getFileName().toString();
+        if (!Files.exists(filesDir.resolve(gsdFilename))) {
+            throw new CellBaseException(getMessageMissingFile(GSD_DATA, gsdFilename, filesDir));
         }
 
-        if (Files.exists(filesDir.resolve(EtlCommons.WM_FILE))) {
-            parseWmFile(filesDir.resolve(EtlCommons.WM_FILE));
-        } else {
-            logger.warn("No WindowMasker file found {}", EtlCommons.WM_FILE);
-            logger.warn("Skipping WindowMasker file parsing. WindowMasker data models will not be built.");
+        // Check Window Masker (WM) file
+        String wmFilename = Paths.get(configuration.getDownload().getWindowMasker().getFiles().get(WINDOW_MASKER_FILE_ID)).getFileName()
+                .toString();
+        if (!Files.exists(filesDir.resolve(wmFilename))) {
+            throw new CellBaseException(getMessageMissingFile(WM_DATA, wmFilename, filesDir));
         }
-        logger.info("Done.");
+
+        // Parse TRF file
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(TRF_DATA));
+        parseTrfFile(filesDir.resolve(trfFilename));
+        logger.info(BUILDING_DONE_LOG_MESSAGE, getDataName(TRF_DATA));
+
+        // Parse GSD file
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(GSD_DATA));
+        parseGsdFile(filesDir.resolve(gsdFilename));
+        logger.info(BUILDING_DONE_LOG_MESSAGE, getDataName(GSD_DATA));
+
+        // Parse WM file
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(WM_DATA));
+        parseWmFile(filesDir.resolve(wmFilename));
+        logger.info(BUILDING_DONE_LOG_MESSAGE, getDataName(WM_DATA));
+
+        logger.info(BUILDING_DONE_LOG_MESSAGE, getDataName(REPEATS_DATA));
     }
 
-    private void parseTrfFile(Path filePath) throws IOException {
+    private void parseTrfFile(Path filePath) throws IOException, CellBaseException {
         try (BufferedReader bufferedReader = FileUtils.newBufferedReader(filePath)) {
             String line = bufferedReader.readLine();
 
-            ProgressLogger progressLogger = new ProgressLogger("Parsed TRF lines:",
-                    () -> EtlCommons.countFileLines(filePath), 200).setBatchSize(10000);
+            ProgressLogger progressLogger = new ProgressLogger(getMessageParsedLines(TRF_DATA), () -> EtlCommons.countFileLines(filePath),
+                    200).setBatchSize(10000);
             while (line != null) {
                 serializer.serialize(parseTrfLine(line));
                 line = bufferedReader.readLine();
@@ -90,15 +114,15 @@ private Repeat parseTrfLine(String line) {
 
         return new Repeat(null, Region.normalizeChromosome(parts[1]), Integer.valueOf(parts[2]) + 1,
                 Integer.valueOf(parts[3]), Integer.valueOf(parts[5]), Integer.valueOf(parts[7]),
-                Float.valueOf(parts[6]), Float.valueOf(parts[8]) / 100, Float.valueOf(parts[10]), parts[16], TRF);
+                Float.valueOf(parts[6]), Float.valueOf(parts[8]) / 100, Float.valueOf(parts[10]), parts[16], TRF_DATA);
     }
 
-    private void parseGsdFile(Path filePath) throws IOException {
+    private void parseGsdFile(Path filePath) throws IOException, CellBaseException {
         try (BufferedReader bufferedReader = FileUtils.newBufferedReader(filePath)) {
             String line = bufferedReader.readLine();
 
-            ProgressLogger progressLogger = new ProgressLogger("Parsed GSD lines:",
-                    () -> EtlCommons.countFileLines(filePath), 200).setBatchSize(10000);
+            ProgressLogger progressLogger = new ProgressLogger(getMessageParsedLines(GSD_DATA), () -> EtlCommons.countFileLines(filePath),
+                    200).setBatchSize(10000);
             while (line != null) {
                 serializer.serialize(parseGSDLine(line));
                 line = bufferedReader.readLine();
@@ -112,16 +136,16 @@ private Repeat parseGSDLine(String line) {
 
         return new Repeat(parts[11], Region.normalizeChromosome(parts[1]), Integer.valueOf(parts[2]) + 1,
                 Integer.valueOf(parts[3]), null, null, 2f, Float.valueOf(parts[26]), null,
-                null, GSD);
+                null, GSD_DATA);
 
     }
 
-    private void parseWmFile(Path filePath) throws IOException {
+    private void parseWmFile(Path filePath) throws IOException, CellBaseException {
         try (BufferedReader bufferedReader = FileUtils.newBufferedReader(filePath)) {
             String line = bufferedReader.readLine();
 
-            ProgressLogger progressLogger = new ProgressLogger("Parsed WM lines:",
-                    () -> EtlCommons.countFileLines(filePath), 200).setBatchSize(10000);
+            ProgressLogger progressLogger = new ProgressLogger(getMessageParsedLines(WM_DATA), () -> EtlCommons.countFileLines(filePath),
+                    200).setBatchSize(10000);
             while (line != null) {
                 serializer.serialize(parseWmLine(line));
                 line = bufferedReader.readLine();
@@ -134,6 +158,16 @@ private Repeat parseWmLine(String line) {
         String[] parts = line.split("\t");
 
         return new Repeat(parts[4].replace("\t", ""), Region.normalizeChromosome(parts[1]),
-                Integer.valueOf(parts[2]) + 1, Integer.valueOf(parts[3]), null, null, null, null, null, null, WM);
+                Integer.valueOf(parts[2]) + 1, Integer.valueOf(parts[3]), null, null, null, null, null, null, WM_DATA);
+    }
+
+    private String getMessageMissingFile(String data, String filename, Path folder) throws CellBaseException {
+        return getDataName(data) + " file " + filename + " does not exist at " + folder;
     }
+
+    private String getMessageParsedLines(String data) throws CellBaseException {
+        return "Parsed " + getDataName(data) + " lines:";
+    }
+
 }
+
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RevelScoreBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RevelScoreBuilder.java
index 2ccf0cb2a1..06f38f28f0 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RevelScoreBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RevelScoreBuilder.java
@@ -19,8 +19,8 @@
 
 import org.opencb.biodata.models.core.MissenseVariantFunctionalScore;
 import org.opencb.biodata.models.core.TranscriptMissenseVariantFunctionalScore;
+import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
-import org.slf4j.LoggerFactory;
 
 import java.io.*;
 import java.nio.file.Path;
@@ -30,75 +30,95 @@
 import java.util.zip.ZipFile;
 import java.util.zip.ZipInputStream;
 
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
 public class RevelScoreBuilder extends CellBaseBuilder {
 
-    private Path revelFilePath = null;
-    private static final String SOURCE = "revel";
+    private Path revelDownloadPath = null;
 
-    public RevelScoreBuilder(Path revelDirectoryPath, CellBaseSerializer serializer) {
+    public RevelScoreBuilder(Path revelDownloadPath, CellBaseSerializer serializer) {
         super(serializer);
-        this.revelFilePath = revelDirectoryPath.resolve("revel-v1.3_all_chromosomes.zip");
-        logger = LoggerFactory.getLogger(ConservationBuilder.class);
-
+        this.revelDownloadPath = revelDownloadPath;
     }
 
     @Override
-    public void parse() throws IOException {
-        logger.error("processing Revel file at " + revelFilePath.toAbsolutePath());
-        ZipInputStream zis = new ZipInputStream(new FileInputStream(String.valueOf(revelFilePath)));
+    public void parse() throws IOException, CellBaseException {
+        String dataName = getDataName(REVEL_DATA);
+        String dataCategory = getDataCategory(REVEL_DATA);
+
+        logger.info(CATEGORY_BUILDING_LOG_MESSAGE, dataCategory, dataName);
+
+        // Sanity check
+        checkDirectory(revelDownloadPath, dataName);
+
+        // Check ontology files
+        List<File> revelFiles = checkFiles(dataSourceReader.readValue(revelDownloadPath.resolve(getDataVersionFilename(REVEL_DATA))
+                        .toFile()), revelDownloadPath, dataName);
+        if (revelFiles.size() != 1) {
+            throw new CellBaseException("One " + dataName + " file is expected, but currently there are " + revelFiles.size() + " files");
+        }
+
+        logger.info(PARSING_LOG_MESSAGE, revelFiles.get(0));
+
+        ZipInputStream zis = new ZipInputStream(new FileInputStream(String.valueOf(revelFiles.get(0))));
         ZipEntry zipEntry = zis.getNextEntry();
 
-        ZipFile zipFile = new ZipFile(String.valueOf(revelFilePath));
+        ZipFile zipFile = new ZipFile(revelFiles.get(0).toString());
         InputStream inputStream = zipFile.getInputStream(zipEntry);
-        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
-
-        // skip header
-        String line = bufferedReader.readLine();
-        String[] fields = null;
-        String lastEntry = null;
-        String currentEntry = null;
-        List<TranscriptMissenseVariantFunctionalScore> scores = new ArrayList<>();
-        MissenseVariantFunctionalScore predictions = null;
-        while ((line = bufferedReader.readLine()) != null) {
-            fields = line.split(",");
-            String chromosome = fields[0];
-            if (".".equalsIgnoreCase(fields[2])) {
-                // 1,12855835,.,C,A,A,D,0.175
-                // skip if invalid position
-                continue;
-            }
-            int position = Integer.parseInt(fields[2]);
-            String reference = fields[3];
-            String alternate = fields[4];
-            String aaReference = fields[5];
-            String aaAlternate = fields[6];
-            double score = Double.parseDouble(fields[7]);
-
-            currentEntry = chromosome + position;
-
-            // new chromosome + position, store previous entry
-            if (lastEntry != null && !currentEntry.equals(lastEntry)) {
-                serializer.serialize(predictions);
-                scores = new ArrayList<>();
-                predictions = null;
+        try (BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream))) {
+            // Skip header
+            bufferedReader.readLine();
+            String[] fields;
+            String lastEntry = null;
+            String currentEntry;
+            List<TranscriptMissenseVariantFunctionalScore> scores = new ArrayList<>();
+            MissenseVariantFunctionalScore predictions = null;
+            String line;
+            while ((line = bufferedReader.readLine()) != null) {
+                fields = line.split(",");
+                String chromosome = fields[0];
+                if (".".equalsIgnoreCase(fields[2])) {
+                    // 1,12855835,.,C,A,A,D,0.175
+                    // skip if invalid position
+                    continue;
+                }
+                int position = Integer.parseInt(fields[2]);
+                String reference = fields[3];
+                String alternate = fields[4];
+                String aaReference = fields[5];
+                String aaAlternate = fields[6];
+                double score = Double.parseDouble(fields[7]);
+
+                currentEntry = chromosome + position;
+
+                // new chromosome + position, store previous entry
+                if (lastEntry != null && !currentEntry.equals(lastEntry)) {
+                    serializer.serialize(predictions);
+                    scores = new ArrayList<>();
+                    predictions = null;
+                }
+
+                if (predictions == null) {
+                    predictions = new MissenseVariantFunctionalScore(chromosome, position, reference, REVEL_DATA, scores);
+                }
+
+                TranscriptMissenseVariantFunctionalScore predictedScore = new TranscriptMissenseVariantFunctionalScore("", alternate,
+                        aaReference, aaAlternate, score);
+                scores.add(predictedScore);
+                lastEntry = chromosome + position;
             }
 
-            if (predictions == null) {
-                predictions = new MissenseVariantFunctionalScore(chromosome, position, reference, SOURCE, scores);
-            }
-
-            TranscriptMissenseVariantFunctionalScore predictedScore = new TranscriptMissenseVariantFunctionalScore("",
-                    alternate, aaReference, aaAlternate, score);
-            scores.add(predictedScore);
-            lastEntry = chromosome + position;
+            // Serialise last entry
+            serializer.serialize(predictions);
         }
 
-        // serialise last entry
-        serializer.serialize(predictions);
+        logger.info(PARSING_DONE_LOG_MESSAGE, revelFiles.get(0));
 
+        // Close
         zis.close();
         zipFile.close();
         inputStream.close();
-        bufferedReader.close();
+
+        logger.info(CATEGORY_BUILDING_DONE_LOG_MESSAGE, dataCategory, dataName);
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RocksDbManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RocksDbManager.java
index cf8351cc54..3a178b4828 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RocksDbManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RocksDbManager.java
@@ -60,8 +60,11 @@ public RocksDB getDBConnection(String dbLocation) {
         Options options = new Options().setCreateIfMissing(true);
         RocksDB db = null;
         try {
+            if (!Files.exists(Paths.get(dbLocation))) {
+                Files.createDirectories(Paths.get(dbLocation));
+            }
             return RocksDB.open(options, dbLocation);
-        } catch (RocksDBException e) {
+        } catch (RocksDBException | IOException e) {
             // do some error handling
             e.printStackTrace();
             System.exit(1);
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinVarIndexer.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinVarIndexer.java
index a31bd8d5e6..951ea5c530 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinVarIndexer.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinVarIndexer.java
@@ -41,11 +41,6 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
-import static org.opencb.cellbase.lib.EtlCommons.CLINVAR_DATE;
-import static org.opencb.cellbase.lib.EtlCommons.CLINVAR_VERSION;
-
-//import org.opencb.biodata.formats.variant.clinvar.v24jaxb.*;
-
 /**
  * Created by fjlopez on 28/09/16.
  */
@@ -78,11 +73,15 @@ public class ClinVarIndexer extends ClinicalIndexer {
     private static final String DIPLOTYPE = "Diplotype";
     private static final String VARIANT = "Variant";
     private static final char CLINICAL_SIGNIFICANCE_SEPARATOR = '/';
+
     private final Path clinvarXMLFiles;
     private final Path clinvarSummaryFile;
     private final Path clinvarVariationAlleleFile;
     private final Path clinvarEFOFile;
+
+    private final String version;
     private final String assembly;
+
     private int numberSomaticRecords = 0;
     private int numberGermlineRecords = 0;
     private int numberNoDiseaseTrait = 0;
@@ -94,15 +93,15 @@ public class ClinVarIndexer extends ClinicalIndexer {
     private static final Set<ModeOfInheritance> RECESSIVE_TERM_SET
             = new HashSet<>(Arrays.asList(ModeOfInheritance.biallelic));
 
-    public ClinVarIndexer(Path clinvarXMLFiles, Path clinvarSummaryFile, Path clinvarVariationAlleleFile,
-                          Path clinvarEFOFile, boolean normalize, Path genomeSequenceFilePath, String assembly,
-                          RocksDB rdb) throws IOException {
+    public ClinVarIndexer(Path clinvarXMLFiles, Path clinvarSummaryFile, Path clinvarVariationAlleleFile, Path clinvarEFOFile,
+                          String version, boolean normalize, Path genomeSequenceFilePath, String assembly, RocksDB rdb) throws IOException {
         super(genomeSequenceFilePath);
         this.rdb = rdb;
         this.clinvarXMLFiles = clinvarXMLFiles;
         this.clinvarSummaryFile = clinvarSummaryFile;
         this.clinvarVariationAlleleFile = clinvarVariationAlleleFile;
         this.clinvarEFOFile = clinvarEFOFile;
+        this.version = version;
         this.normalize = normalize;
         this.genomeSequenceFilePath = genomeSequenceFilePath;
         this.assembly = assembly;
@@ -310,7 +309,7 @@ private void addNewEntries(VariantAnnotation variantAnnotation, String variation
                                String mateVariantString, String clinicalHaplotypeString,
                                Map<String, EFO> traitsToEfoTermsMap) {
 
-        EvidenceSource evidenceSource = new EvidenceSource(EtlCommons.CLINVAR_DATA, CLINVAR_VERSION, CLINVAR_DATE);
+        EvidenceSource evidenceSource = new EvidenceSource(EtlCommons.CLINVAR_DATA, version, null);
         // Create a set to avoid situations like germline;germline;germline
         List<AlleleOrigin> alleleOrigin = null;
         if (!EtlCommons.isMissing(lineFields[VARIANT_SUMMARY_ORIGIN_COLUMN])) {
@@ -391,7 +390,7 @@ private void addNewEntries(VariantAnnotation variantAnnotation, PublicSetType pu
         throws JsonProcessingException {
 
         List<Property> additionalProperties = new ArrayList<>(3);
-        EvidenceSource evidenceSource = new EvidenceSource(EtlCommons.CLINVAR_DATA, CLINVAR_VERSION, CLINVAR_DATE);
+        EvidenceSource evidenceSource = new EvidenceSource(EtlCommons.CLINVAR_DATA, version, null);
 //        String accession = publicSet.getReferenceClinVarAssertion().getClinVarAccession().getAcc();
 
         VariantClassification variantClassification = getVariantClassification(
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalIndexer.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalIndexer.java
index bbe33017fd..3f6e87b89c 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalIndexer.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalIndexer.java
@@ -83,7 +83,7 @@ public ClinicalIndexer(Path genomeSequenceFilePath) throws IOException {
                 .setDecomposeMNVs(true);
 
         if (genomeSequenceFilePath != null) {
-            logger.info("Enabling left aligning by using sequence at {}", genomeSequenceFilePath.toString());
+            logger.info("Enabling left aligning by using sequence at {}", genomeSequenceFilePath);
             variantNormalizerConfig.enableLeftAlign(genomeSequenceFilePath.toString());
         } else {
             logger.info("Left alignment is NOT enabled.");
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalVariantBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalVariantBuilder.java
index f574133ad7..e3c7ab3ff8 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalVariantBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalVariantBuilder.java
@@ -19,165 +19,159 @@
 import com.fasterxml.jackson.databind.ObjectMapper;
 import org.opencb.biodata.models.variant.Variant;
 import org.opencb.biodata.models.variant.avro.VariantAnnotation;
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
-import org.opencb.cellbase.lib.EtlCommons;
 import org.opencb.cellbase.lib.builders.CellBaseBuilder;
+import org.opencb.commons.utils.FileUtils;
 import org.rocksdb.Options;
 import org.rocksdb.RocksDB;
 import org.rocksdb.RocksDBException;
 import org.rocksdb.RocksIterator;
 
-import java.io.File;
-import java.io.IOException;
+import java.io.*;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
 /**
  * Created by fjlopez on 26/09/16.
  */
 public class ClinicalVariantBuilder extends CellBaseBuilder {
 
-    private final Path clinvarXMLFile;
-    private final Path clinvarSummaryFile;
-    private final Path clinvarVariationAlleleFile;
-    private final Path clinvarEFOFile;
-    private final Path cosmicFile;
-    private final Path gwasFile;
-    private final Path dbsnpFile;
+    private final Path clinicalVariantPath;
     private final String assembly;
-    private final Path iarctp53GermlineFile;
-    private final Path iarctp53SomaticFile;
-    private final Path iarctp53GermlineReferencesFile;
-    private final Path iarctp53SomaticReferencesFile;
     private final Path genomeSequenceFilePath;
-    private final Path docmFile;
-    private final Path hgmdFile;
-    private boolean normalize = true;
+    private boolean normalize;
 
-    public ClinicalVariantBuilder(Path clinicalVariantFolder, boolean normalize, Path genomeSequenceFilePath,
-                                  String assembly, CellBaseSerializer serializer) {
-        this(clinicalVariantFolder.resolve(EtlCommons.CLINVAR_XML_FILE),
-                clinicalVariantFolder.resolve(EtlCommons.CLINVAR_SUMMARY_FILE),
-                clinicalVariantFolder.resolve(EtlCommons.CLINVAR_VARIATION_ALLELE_FILE),
-                clinicalVariantFolder.resolve(EtlCommons.CLINVAR_EFO_FILE),
-                clinicalVariantFolder.resolve(EtlCommons.COSMIC_FILE),
-                clinicalVariantFolder.resolve(EtlCommons.GWAS_FILE),
-                clinicalVariantFolder.resolve(EtlCommons.DBSNP_FILE),
-                clinicalVariantFolder.resolve("datasets/" + EtlCommons.IARCTP53_GERMLINE_FILE),
-                clinicalVariantFolder.resolve("datasets/" + EtlCommons.IARCTP53_GERMLINE_REFERENCES_FILE),
-                clinicalVariantFolder.resolve("datasets/" + EtlCommons.IARCTP53_SOMATIC_FILE),
-                clinicalVariantFolder.resolve("datasets/" + EtlCommons.IARCTP53_SOMATIC_REFERENCES_FILE),
-                clinicalVariantFolder.resolve(EtlCommons.DOCM_FILE),
-                clinicalVariantFolder.resolve(EtlCommons.HGMD_FILE),
-                normalize,
-                genomeSequenceFilePath, assembly, serializer);
-    }
+    private Path clinvarFullReleaseFilePath;
+    private Path clinvarSummaryFilePath;
+    private Path clinvarVariationAlleleFilePath;
+    private Path clinvarEFOFilePath;
+    private Path cosmicFilePath;
+    private Path hgmdFilePath;
+    private Path gwasFilePath;
+    private Path gwasDbSnpFilePath;
+
+    private final CellBaseConfiguration configuration;
 
-    public ClinicalVariantBuilder(Path clinvarXMLFile, Path clinvarSummaryFile, Path clinvarVariationAlleleFile,
-                                  Path clinvarEFOFile, Path cosmicFile, Path gwasFile, Path dbsnpFile,
-                                  Path iarctp53GermlineFile, Path iarctp53GermlineReferencesFile,
-                                  Path iarctp53SomaticFile, Path iarctp53SomaticReferencesFile, Path docmFile, Path hgmdFile,
-                                  boolean normalize, Path genomeSequenceFilePath, String assembly,
-                                  CellBaseSerializer serializer) {
+    public ClinicalVariantBuilder(Path clinicalVariantFolder, boolean normalize, Path genomeSequenceFilePath,
+                                  String assembly, CellBaseConfiguration configuration, CellBaseSerializer serializer) {
         super(serializer);
-        this.clinvarXMLFile = clinvarXMLFile;
-        this.clinvarSummaryFile = clinvarSummaryFile;
-        this.clinvarVariationAlleleFile = clinvarVariationAlleleFile;
-        this.clinvarEFOFile = clinvarEFOFile;
-        this.cosmicFile = cosmicFile;
-        this.gwasFile = gwasFile;
-        this.dbsnpFile = dbsnpFile;
-        this.iarctp53GermlineFile = iarctp53GermlineFile;
-        this.iarctp53GermlineReferencesFile = iarctp53GermlineReferencesFile;
-        this.iarctp53SomaticFile = iarctp53SomaticFile;
-        this.iarctp53SomaticReferencesFile = iarctp53SomaticReferencesFile;
-        this.docmFile = docmFile;
-        this.hgmdFile = hgmdFile;
+        this.clinicalVariantPath = clinicalVariantFolder;
         this.normalize = normalize;
         this.genomeSequenceFilePath = genomeSequenceFilePath;
         this.assembly = assembly;
+        this.configuration = configuration;
     }
 
-    public void parse() throws IOException, RocksDBException {
+    public void check() throws CellBaseException, IOException {
+        if (checked) {
+            return;
+        }
+
+        logger.info(CHECKING_BEFORE_BUILDING_LOG_MESSAGE, getDataName(CLINICAL_VARIANT_DATA));
+
+        // Sanity check
+        checkDirectory(clinicalVariantPath, getDataName(CLINICAL_VARIANT_DATA));
+        if (!Files.exists(serializer.getOutdir())) {
+            try {
+                Files.createDirectories(serializer.getOutdir());
+            } catch (IOException e) {
+                throw new CellBaseException("Error creating folder " + serializer.getOutdir(), e);
+            }
+        }
+
+        // Check genome file
+        logger.info("Checking genome FASTA file ...");
+        if (!Files.exists(genomeSequenceFilePath)) {
+            throw new CellBaseException("Genome file path does not exist " + genomeSequenceFilePath);
+        }
+        logger.info(OK_LOG_MESSAGE);
+        logger.info("Checking index for genome FASTA file ...");
+        getIndexFastaReferenceGenome(genomeSequenceFilePath);
+        logger.info(OK_LOG_MESSAGE);
+
+        // Check ClinVar files
+        clinvarFullReleaseFilePath = checkFile(CLINVAR_DATA, configuration.getDownload().getClinvar(), CLINVAR_FULL_RELEASE_FILE_ID,
+                clinicalVariantPath).toPath();
+        clinvarSummaryFilePath = checkFile(CLINVAR_DATA, configuration.getDownload().getClinvar(), CLINVAR_SUMMARY_FILE_ID,
+                clinicalVariantPath).toPath();
+        clinvarVariationAlleleFilePath = checkFile(CLINVAR_DATA, configuration.getDownload().getClinvar(), CLINVAR_ALLELE_FILE_ID,
+                clinicalVariantPath).toPath();
+        clinvarEFOFilePath = checkFile(CLINVAR_DATA, configuration.getDownload().getClinvar(), CLINVAR_EFO_TERMS_FILE_ID,
+                clinicalVariantPath).toPath();
+
+        // Check COSMIC file
+        cosmicFilePath = checkFiles(COSMIC_DATA, clinicalVariantPath, 1).get(0).toPath();
+
+        // Check HGMD file
+        hgmdFilePath = checkFiles(HGMD_DATA, clinicalVariantPath, 1).get(0).toPath();
+
+        // Check GWAS files
+        gwasFilePath = checkFiles(GWAS_DATA, clinicalVariantPath, 1).get(0).toPath();
+        String dbSnpFilename = Paths.get(configuration.getDownload().getGwasCatalog().getFiles().get(GWAS_DBSNP_FILE_ID)).getFileName()
+                .toString();
+        gwasDbSnpFilePath = clinicalVariantPath.resolve(dbSnpFilename);
+        if (!Files.exists(gwasDbSnpFilePath)) {
+            throw new CellBaseException("Could not build clinical variants: the dbSNP file " + dbSnpFilename + " is missing at "
+                    + clinicalVariantPath);
+        }
+        if (!Files.exists(clinicalVariantPath.resolve(dbSnpFilename + TBI_EXTENSION))) {
+            throw new CellBaseException("Could not build clinical variants: the dbSNP tabix file " + dbSnpFilename + TBI_EXTENSION
+                    + " is missing at " + clinicalVariantPath);
+        }
+
+        logger.info(CHECKING_DONE_BEFORE_BUILDING_LOG_MESSAGE, getDataName(CLINICAL_VARIANT_DATA));
+        checked = true;
+    }
+
+    public void parse() throws IOException, RocksDBException, CellBaseException {
+        check();
+
+        // Prepare ClinVar chunk files before building (if necessary)
+        Path chunksPath = serializer.getOutdir().resolve(CLINVAR_CHUNKS_SUBDIRECTORY);
+        if (Files.notExists(chunksPath)) {
+            Files.createDirectories(chunksPath);
+            logger.info("Splitting CliVar file {} in {} ...", clinvarFullReleaseFilePath, chunksPath);
+            splitClinvar(clinvarFullReleaseFilePath, chunksPath);
+            logger.info(OK_LOG_MESSAGE);
+        }
 
         RocksDB rdb = null;
         Options dbOption = null;
         String dbLocation = null;
 
         try {
-            Object[] dbConnection = getDBConnection(clinvarXMLFile.getParent().toString() + "/integration.idx", true);
+            Object[] dbConnection = getDBConnection(clinicalVariantPath.toString() + "/integration.idx", true);
             rdb = (RocksDB) dbConnection[0];
             dbOption = (Options) dbConnection[1];
             dbLocation = (String) dbConnection[2];
 
             // COSMIC
-            // IMPORTANT: COSMIC must be indexed first (before ClinVar, IARC TP53, DOCM, HGMD,...)!!!
-            if (this.cosmicFile != null && Files.exists(this.cosmicFile)) {
-                CosmicIndexer cosmicIndexer = new CosmicIndexer(cosmicFile, normalize, genomeSequenceFilePath, assembly, rdb);
-                cosmicIndexer.index();
-            } else {
-                logger.warn("Cosmic file {} missing. Skipping Cosmic data", cosmicFile);
-            }
+            // IMPORTANT: COSMIC must be indexed first (before ClinVar, HGMD,...)!!!
+            CosmicIndexer cosmicIndexer = new CosmicIndexer(cosmicFilePath, configuration.getDownload().getCosmic().getVersion(),
+                    normalize, genomeSequenceFilePath, assembly, rdb);
+            cosmicIndexer.index();
 
             // ClinVar
-            if (this.clinvarXMLFile != null && this.clinvarSummaryFile != null
-                    && this.clinvarVariationAlleleFile != null && Files.exists(clinvarXMLFile)
-                    && Files.exists(clinvarSummaryFile) && Files.exists(clinvarVariationAlleleFile)) {
-              ClinVarIndexer clinvarIndexer = new ClinVarIndexer(clinvarXMLFile.getParent().resolve("clinvar_chunks"), clinvarSummaryFile,
-                        clinvarVariationAlleleFile, clinvarEFOFile, normalize, genomeSequenceFilePath, assembly, rdb);
-                clinvarIndexer.index();
-            } else {
-                logger.warn("One or more of required ClinVar files are missing. Skipping ClinVar data.\n"
-                        + "Please, ensure that these two files exist:\n"
-                        + "{}\n"
-                        + "{}", this.clinvarXMLFile.toString(), this.clinvarSummaryFile.toString());
-            }
-
-            // IARC TP53
-            if (this.iarctp53GermlineFile != null && this.iarctp53SomaticFile != null
-                    && Files.exists(iarctp53GermlineFile) && Files.exists(iarctp53SomaticFile)) {
-                IARCTP53Indexer iarctp53Indexer = new IARCTP53Indexer(iarctp53GermlineFile,
-                        iarctp53GermlineReferencesFile, iarctp53SomaticFile, iarctp53SomaticReferencesFile,
-                        normalize, genomeSequenceFilePath, assembly, rdb);
-                iarctp53Indexer.index();
-            } else {
-                logger.warn("One or more of required IARCTP53 files are missing. Skipping IARCTP53 data.");
-            }
-
-            // DOCM
-            if (this.docmFile != null && Files.exists(docmFile)) {
-                DOCMIndexer docmIndexer = new DOCMIndexer(docmFile, normalize, genomeSequenceFilePath, assembly, rdb);
-                docmIndexer.index();
-            } else {
-                logger.warn("The DOCM file {} is missing. Skipping DOCM data.", docmFile);
-            }
+            ClinVarIndexer clinvarIndexer = new ClinVarIndexer(serializer.getOutdir().resolve(CLINVAR_CHUNKS_SUBDIRECTORY),
+                    clinvarSummaryFilePath, clinvarVariationAlleleFilePath, clinvarEFOFilePath, configuration.getDownload().getClinvar()
+                    .getVersion(), normalize, genomeSequenceFilePath, assembly, rdb);
+            clinvarIndexer.index();
 
             // HGMD
-            if (this.hgmdFile != null && Files.exists(hgmdFile)) {
-                HGMDIndexer hgmdIndexer = new HGMDIndexer(hgmdFile, normalize, genomeSequenceFilePath, assembly, rdb);
-                hgmdIndexer.index();
-            } else {
-                logger.warn("The HGMD file {} is missing. Skipping HGMD data.", hgmdFile);
-            }
+            HGMDIndexer hgmdIndexer = new HGMDIndexer(hgmdFilePath, configuration.getDownload().getHgmd().getVersion(), normalize,
+                    genomeSequenceFilePath, assembly, rdb);
+            hgmdIndexer.index();
 
             // GWAS catalog
-            if (gwasFile != null && Files.exists(gwasFile)) {
-                if (dbsnpFile != null && Files.exists(dbsnpFile)) {
-                    Path tabixFile = Paths.get(dbsnpFile.toAbsolutePath() + ".tbi");
-                    if (tabixFile != null && Files.exists(tabixFile)) {
-                        GwasIndexer gwasIndexer = new GwasIndexer(gwasFile, dbsnpFile, genomeSequenceFilePath, assembly, rdb);
-                        gwasIndexer.index();
-                    } else {
-                        logger.warn("The dbSNP tabix file {} is missing. Skipping GWAS catalog data.", tabixFile);
-                    }
-                } else {
-                    logger.warn("The dbSNP file {} is missing. Skipping GWAS catalog data.", dbsnpFile);
-                }
-            } else {
-                logger.warn("The GWAS catalog file {} is missing. Skipping GWAS catalog data.", gwasFile);
-            }
+            GwasIndexer gwasIndexer = new GwasIndexer(gwasFilePath, gwasDbSnpFilePath, genomeSequenceFilePath, assembly, rdb);
+            gwasIndexer.index();
 
+            // Serialize
             serializeRDB(rdb);
             closeIndex(rdb, dbOption, dbLocation);
             serializer.close();
@@ -186,7 +180,6 @@ public void parse() throws IOException, RocksDBException {
             serializer.close();
             throw e;
         }
-
     }
 
     private void serializeRDB(RocksDB rdb) throws IOException {
@@ -223,7 +216,7 @@ private Variant parseVariantFromVariantId(String variantId) {
                 return new Variant(parts[0].trim(), Integer.parseInt(parts[1].trim()), parts[2], parts[3]);
             }
         } catch (Exception e) {
-            logger.warn(e.getMessage() + ". Impossible to create the variant object from the variant ID: " + variantId);
+            logger.warn("{}. Impossible to create the variant object from the variant ID: {}", e.getMessage(), variantId);
             return null;
         }
     }
@@ -275,4 +268,53 @@ private Object[] getDBConnection(String dbLocation, boolean forceCreate) {
 
     }
 
+    private void splitClinvar(Path clinvarXmlFilePath, Path splitOutdirPath) throws IOException {
+        PrintWriter pw = null;
+        try (BufferedReader br = FileUtils.newBufferedReader(clinvarXmlFilePath)) {
+            StringBuilder header = new StringBuilder();
+            boolean beforeEntry = true;
+            boolean inEntry = false;
+            int count = 0;
+            int chunk = 0;
+            String line;
+            while ((line = br.readLine()) != null) {
+                if (line.trim().startsWith("<ClinVarSet ")) {
+                    inEntry = true;
+                    beforeEntry = false;
+                    if (count % 10000 == 0) {
+                        pw = new PrintWriter(new FileOutputStream(splitOutdirPath.resolve("chunk_" + chunk + ".xml").toFile()));
+                        pw.println(header.toString().trim());
+                    }
+                    count++;
+                }
+
+                if (beforeEntry) {
+                    header.append(line).append("\n");
+                }
+
+                if (inEntry) {
+                    pw.println(line);
+                }
+
+                if (line.trim().startsWith("</ClinVarSet>")) {
+                    inEntry = false;
+                    if (count % 10000 == 0) {
+                        if (pw != null) {
+                            pw.print("</ReleaseSet>");
+                            pw.close();
+                        }
+                        chunk++;
+                    }
+                }
+            }
+            if (pw != null) {
+                pw.print("</ReleaseSet>");
+                pw.close();
+            }
+        } finally {
+            if (pw != null) {
+                pw.close();
+            }
+        }
+    }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/CosmicIndexer.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/CosmicIndexer.java
index f8d2f16d15..51be2b6f31 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/CosmicIndexer.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/CosmicIndexer.java
@@ -37,12 +37,12 @@
 public class CosmicIndexer extends ClinicalIndexer {
 
     private final Path cosmicFile;
+    private final String version;
     private final String assembly;
+
     private Pattern mutationGRCh37GenomePositionPattern;
     private Pattern snvPattern;
 
-    private static final String COSMIC_VERSION = "v95";
-
     private static final int GENE_NAMES_COLUMN = 0;
     private static final int HGNC_COLUMN = 3;
     private static final int PRIMARY_SITE_COLUMN = 7;
@@ -84,10 +84,12 @@ public class CosmicIndexer extends ClinicalIndexer {
     private int rocksDBNewVariants = 0;
     private int rocksDBUpdateVariants = 0;
 
-    public CosmicIndexer(Path cosmicFile, boolean normalize, Path genomeSequenceFilePath, String assembly, RocksDB rdb) throws IOException {
+    public CosmicIndexer(Path cosmicFile, String version, boolean normalize, Path genomeSequenceFilePath, String assembly, RocksDB rdb)
+            throws IOException {
         super(genomeSequenceFilePath);
 
         this.cosmicFile = cosmicFile;
+        this.version = version;
         this.normalize = normalize;
         this.assembly = assembly;
         this.rdb = rdb;
@@ -469,7 +471,7 @@ private EvidenceEntry buildCosmic(String[] fields) {
         String id = fields[ID_COLUMN];
         String url = "https://cancer.sanger.ac.uk/cosmic/search?q=" + id;
 
-        EvidenceSource evidenceSource = new EvidenceSource(EtlCommons.COSMIC_DATA, COSMIC_VERSION, null);
+        EvidenceSource evidenceSource = new EvidenceSource(EtlCommons.COSMIC_DATA, version, null);
         SomaticInformation somaticInformation = getSomaticInformation(fields);
         List<GenomicFeature> genomicFeatureList = getGenomicFeature(fields);
 
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/DOCMIndexer.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/DOCMIndexer.java
index b77f238432..a150e042dd 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/DOCMIndexer.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/DOCMIndexer.java
@@ -178,7 +178,7 @@ private VariantAnnotation parseVariantAnnotation(Map<String, Object> map) {
                 List<String> bibliography = getBibliography(evidenceEntry);
                 bibliography.add(PMID + diseaseMap.get(SOURCE_PUBMED_ID));
             } else {
-                EvidenceSource evidenceSource = new EvidenceSource(EtlCommons.DOCM_DATA, null, null);
+                EvidenceSource evidenceSource = new EvidenceSource(EtlCommons.DOCM_NAME, null, null);
                 HeritableTrait heritableTrait = new HeritableTrait((String) diseaseMap.get(DISEASE), null);
 
                 List<GenomicFeature> genomicFeatureList = getGenomicFeature(map);
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/HGMDIndexer.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/HGMDIndexer.java
index d2ce12dee8..f132f4b9e8 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/HGMDIndexer.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/HGMDIndexer.java
@@ -36,15 +36,17 @@
  */
 public class HGMDIndexer extends ClinicalIndexer {
     private final Path hgmdFile;
+    private final String version;
     private final String assembly;
 
-    public HGMDIndexer(Path hgmdFile, boolean normalize, Path genomeSequenceFilePath, String assembly, RocksDB rdb)
+    public HGMDIndexer(Path hgmdFile, String version, boolean normalize, Path genomeSequenceFilePath, String assembly, RocksDB rdb)
             throws IOException {
         super(genomeSequenceFilePath);
-        this.rdb = rdb;
-        this.assembly = assembly;
         this.hgmdFile = hgmdFile;
+        this.version = version;
         this.normalize = normalize;
+        this.assembly = assembly;
+        this.rdb = rdb;
     }
 
     public void index() throws RocksDBException, IOException {
@@ -93,7 +95,7 @@ private void parseHgmdInfo(Variant variant) {
             }
 
             // Source
-            entry.setSource(new EvidenceSource(EtlCommons.HGMD_DATA, "2020.3", "2020"));
+            entry.setSource(new EvidenceSource(EtlCommons.HGMD_DATA, version, null));
 
             // Assembly
             entry.setAssembly(assembly);
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/utils/RocksDBUtils.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/utils/RocksDBUtils.java
new file mode 100644
index 0000000000..f6183e3040
--- /dev/null
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/utils/RocksDBUtils.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.lib.builders.utils;
+
+import org.rocksdb.Options;
+import org.rocksdb.RocksDB;
+import org.rocksdb.RocksDBException;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+public class RocksDBUtils {
+
+    public static void closeIndex(RocksDB rdb, Options dbOption, String dbLocation) throws IOException {
+        if (rdb != null) {
+            rdb.close();
+        }
+        if (dbOption != null) {
+            dbOption.dispose();
+        }
+        if (dbLocation != null && Files.exists(Paths.get(dbLocation))) {
+            org.apache.commons.io.FileUtils.deleteDirectory(new File(dbLocation));
+        }
+    }
+
+    public static Object[] getDBConnection(String dbLocation, boolean forceCreate) throws RocksDBException {
+        boolean indexingNeeded = forceCreate || !Files.exists(Paths.get(dbLocation));
+        // a static method that loads the RocksDB C++ library.
+        RocksDB.loadLibrary();
+        // the Options class contains a set of configurable DB options
+        // that determines the behavior of a database.
+        Options options = new Options().setCreateIfMissing(true);
+
+//        options.setMaxBackgroundCompactions(4);
+//        options.setMaxBackgroundFlushes(1);
+//        options.setCompressionType(CompressionType.NO_COMPRESSION);
+//        options.setMaxOpenFiles(-1);
+//        options.setIncreaseParallelism(4);
+//        options.setCompactionStyle(CompactionStyle.LEVEL);
+//        options.setLevelCompactionDynamicLevelBytes(true);
+
+        RocksDB db;
+        // a factory method that returns a RocksDB instance
+        if (indexingNeeded) {
+            db = RocksDB.open(options, dbLocation);
+        } else {
+            db = RocksDB.openReadOnly(options, dbLocation);
+        }
+
+        return new Object[]{db, options, dbLocation, indexingNeeded};
+    }
+}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/AbstractDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/AbstractDownloadManager.java
index a4ade6603e..7ac8bcf800 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/AbstractDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/AbstractDownloadManager.java
@@ -24,10 +24,11 @@
 import com.fasterxml.jackson.databind.ObjectWriter;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang3.StringUtils;
-import org.opencb.biodata.formats.io.FileFormatException;
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.config.DownloadProperties;
 import org.opencb.cellbase.core.config.SpeciesConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.models.DataSource;
 import org.opencb.cellbase.core.utils.SpeciesUtils;
 import org.opencb.cellbase.lib.EtlCommons;
 import org.slf4j.Logger;
@@ -46,11 +47,15 @@
 import java.time.LocalDateTime;
 import java.util.*;
 
-public class AbstractDownloadManager {
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
-    private static final String DGV_NAME = "DGV";
+public abstract class AbstractDownloadManager {
 
-    private static final String GNOMAD_NAME = "gnomAD";
+    protected static final String DOWNLOADING_LOG_MESSAGE = "Downloading {} ...";
+    protected static final String DOWNLOADING_DONE_LOG_MESSAGE = "Ok. {}";
+    protected static final String CATEGORY_DOWNLOADING_LOG_MESSAGE = "Downloading {}/{} ...";
+    protected static final String CATEGORY_DOWNLOADING_DONE_LOG_MESSAGE = "Ok. {}/{}";
+    protected static final String DOWNLOADING_FROM_TO_LOG_MESSAGE = "Downloading {} to {} ...";
 
     protected String species;
     protected String assembly;
@@ -66,15 +71,23 @@ public class AbstractDownloadManager {
     protected Path downloadFolder;
     protected Path downloadLogFolder; // /download/log
     protected Path buildFolder; // <output>/<species>_<assembly>/generated-json
+
+    protected ObjectReader dataSourceReader;
+    protected ObjectWriter dataSourceWriter;
+
     protected Logger logger;
 
-    public AbstractDownloadManager(String species, String assembly, Path outdir, CellBaseConfiguration configuration)
+    protected AbstractDownloadManager(String species, String assembly, Path outdir, CellBaseConfiguration configuration)
             throws IOException, CellBaseException {
         this.species = species;
         this.assembly = assembly;
         this.outdir = outdir;
         this.configuration = configuration;
 
+        ObjectMapper jsonObjectMapper = new ObjectMapper();
+        this.dataSourceReader = jsonObjectMapper.readerFor(DataSource.class);
+        this.dataSourceWriter = jsonObjectMapper.writerFor(DataSource.class);
+
         this.init();
     }
 
@@ -104,47 +117,22 @@ private void init() throws CellBaseException, IOException {
         // Prepare outdir
         Path speciesFolder = outdir.resolve(speciesShortName + "_" + assemblyConfiguration.getName().toLowerCase());
         downloadFolder = outdir.resolve(speciesFolder + "/download");
-        logger.info("Creating download dir " + downloadFolder.toString());
+        logger.info("Creating download dir {}", downloadFolder);
         Files.createDirectories(downloadFolder);
 
         downloadLogFolder = outdir.resolve(speciesFolder + "/download/log");
-        logger.info("Creating download log dir " + downloadLogFolder.toString());
+        logger.info("Creating download log dir {}", downloadLogFolder);
         Files.createDirectories(downloadLogFolder);
 
         // <output>/<species>_<assembly>/generated_json
         buildFolder = outdir.resolve(speciesFolder + "/generated_json");
-        logger.info("Creating build dir " + buildFolder.toString());
+        logger.info("Creating build dir {}", buildFolder);
         Files.createDirectories(buildFolder);
 
-        logger.info("Processing species " + speciesConfiguration.getScientificName());
+        logger.info("Processing species {}", speciesConfiguration.getScientificName());
     }
 
-    public List<DownloadFile> download() throws IOException, InterruptedException, NoSuchMethodException, FileFormatException {
-        return null;
-    }
-
-//    public DownloadFile downloadStructuralVariants() throws IOException, InterruptedException {
-//        if (!speciesHasInfoToDownload(speciesConfiguration, "svs")) {
-//             return null;
-//        }
-//        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-//            logger.info("Downloading DGV data ...");
-//
-//            Path structuralVariantsFolder = downloadFolder.resolve(EtlCommons.STRUCTURAL_VARIANTS_FOLDER);
-//            Files.createDirectories(structuralVariantsFolder);
-//            String sourceFilename = (assemblyConfiguration.getName().equalsIgnoreCase("grch37") ? "GRCh37_hg19" : "GRCh38_hg38")
-//                    + "_variants_2016-05-15.txt";
-//            String url = configuration.getDownload().getDgv().getHost() + "/" + sourceFilename;
-//            saveVersionData(EtlCommons.STRUCTURAL_VARIANTS_DATA, DGV_NAME, getDGVVersion(sourceFilename), getTimeStamp(),
-//                    Collections.singletonList(url), structuralVariantsFolder.resolve(EtlCommons.DGV_VERSION_FILE));
-//            return downloadFile(url, structuralVariantsFolder.resolve(EtlCommons.DGV_FILE).toString());
-//        }
-//        return null;
-//    }
-
-//    private String getDGVVersion(String sourceFilename) {
-//        return sourceFilename.split("\\.")[0].split("_")[3];
-//    }
+    public abstract List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException;
 
     protected boolean speciesHasInfoToDownload(SpeciesConfiguration sp, String info) {
         boolean hasInfo = true;
@@ -155,32 +143,148 @@ protected boolean speciesHasInfoToDownload(SpeciesConfiguration sp, String info)
         return hasInfo;
     }
 
-    protected String getTimeStamp() {
-        return new SimpleDateFormat("yyyyMMdd_HHmmss").format(Calendar.getInstance().getTime());
+    protected DownloadFile downloadAndSaveDataSource(DownloadProperties.URLProperties props, String fileId, String data, Path outPath)
+            throws IOException, InterruptedException, CellBaseException {
+        return downloadAndSaveDataSource(props, fileId, data, null, outPath);
+    }
+
+    protected DownloadFile downloadAndSaveDataSource(DownloadProperties.URLProperties props, String fileId, String data, String chromosome,
+                                                     Path outPath) throws IOException, InterruptedException, CellBaseException {
+        String versionFilename = getDataVersionFilename(data);
+
+        // Download file
+        DownloadFile downloadFile = downloadDataSource(props, fileId, chromosome, outPath);
+
+        // Save data source
+        saveDataSource(data, props.getVersion(), getTimeStamp(), Collections.singletonList(downloadFile.getUrl()),
+                outPath.resolve(versionFilename));
+
+        return downloadFile;
+    }
+
+    @Deprecated
+    protected DownloadFile downloadAndSaveDataSource(DownloadProperties.URLProperties props, String fileId, String name, String category,
+                                                     String versionFilename, Path outPath)
+            throws IOException, InterruptedException, CellBaseException {
+        return downloadAndSaveDataSource(props, fileId, name, category, null, versionFilename, outPath);
+    }
+
+    @Deprecated
+    protected DownloadFile downloadAndSaveDataSource(DownloadProperties.URLProperties props, String fileId, String name, String category,
+                                                     String chromosome, String versionFilename, Path outPath)
+            throws IOException, InterruptedException, CellBaseException {
+        // Download file
+        DownloadFile downloadFile = downloadDataSource(props, fileId, chromosome, outPath);
+
+        // Save data source
+        saveDataSource(name, category, props.getVersion(), getTimeStamp(), Collections.singletonList(downloadFile.getUrl()),
+                outPath.resolve(versionFilename));
+
+        return downloadFile;
+    }
+
+    protected DownloadFile downloadAndSaveEnsemblDataSource(DownloadProperties.EnsemblProperties ensemblProps, String fileId, String data,
+                                                            Path outPath) throws IOException, InterruptedException, CellBaseException {
+        return downloadAndSaveEnsemblDataSource(ensemblProps, fileId, data, null, outPath);
+    }
+
+    protected DownloadFile downloadAndSaveEnsemblDataSource(DownloadProperties.EnsemblProperties ensemblProps, String fileId, String data,
+                                                            String chromosome, Path outPath)
+            throws IOException, InterruptedException, CellBaseException {
+        // Download file
+        DownloadFile downloadFile = downloadEnsemblDataSource(ensemblProps, fileId, chromosome, outPath);
+
+        // Save data source
+        saveDataSource(data, "(" + getDataName(ENSEMBL_DATA) + " " + ensemblVersion + ")", getTimeStamp(),
+                Collections.singletonList(downloadFile.getUrl()), outPath.resolve(getDataVersionFilename(data)));
+
+        return downloadFile;
+    }
+
+    @Deprecated
+    protected DownloadFile downloadAndSaveEnsemblDataSource(DownloadProperties.EnsemblProperties ensemblProps, String fileId, String name,
+                                                            String category, String chromosome, String versionFilename, Path outPath)
+            throws IOException, InterruptedException, CellBaseException {
+        // Download file
+        DownloadFile downloadFile = downloadEnsemblDataSource(ensemblProps, fileId, chromosome, outPath);
+
+        // Save data source
+        saveDataSource(name, category, "(Ensembl " + ensemblVersion + ")", getTimeStamp(), Collections.singletonList(downloadFile.getUrl()),
+                outPath.resolve(versionFilename));
+
+        return downloadFile;
+    }
+
+    protected DownloadFile downloadDataSource(DownloadProperties.URLProperties props, String fileId, Path outPath)
+            throws IOException, InterruptedException, CellBaseException {
+        return downloadDataSource(props, fileId, null, outPath);
+    }
+
+    protected DownloadFile downloadDataSource(DownloadProperties.URLProperties props, String fileId,
+                                              String chromosome, Path outPath)
+            throws IOException, InterruptedException, CellBaseException {
+        String url = EtlCommons.getUrl(props, fileId, species, assembly, chromosome);
+        File outFile = outPath.resolve(getFilenameFromUrl(url)).toFile();
+        logger.info(DOWNLOADING_FROM_TO_LOG_MESSAGE, url, outFile);
+        DownloadFile downloadFile = downloadFile(url, outFile.toString());
+        logger.info(OK_LOG_MESSAGE);
+        return downloadFile;
     }
 
-    protected void saveVersionData(String data, String name, String version, String date, List<String> url, Path outputFilePath)
+    protected DownloadFile downloadEnsemblDataSource(DownloadProperties.EnsemblProperties ensemblProps, String fileId, Path outPath)
+            throws IOException, InterruptedException, CellBaseException {
+        return downloadEnsemblDataSource(ensemblProps, fileId, null, outPath);
+    }
+
+    protected DownloadFile downloadEnsemblDataSource(DownloadProperties.EnsemblProperties ensemblProps, String fileId, String chromosome,
+                                                     Path outPath) throws IOException, InterruptedException, CellBaseException {
+        String url = EtlCommons.getEnsemblUrl(ensemblProps, ensemblRelease, fileId, speciesShortName, assemblyConfiguration.getName(),
+                chromosome);
+        File outFile = outPath.resolve(getFilenameFromUrl(url)).toFile();
+        logger.info(DOWNLOADING_FROM_TO_LOG_MESSAGE, url, outFile);
+        DownloadFile downloadFile = downloadFile(url, outFile.toString());
+        logger.info(OK_LOG_MESSAGE);
+        return downloadFile;
+    }
+
+    protected void saveDataSource(String data, String version, String date, List<String> urls, Path versionFilePath)
+            throws IOException, CellBaseException {
+        String name = getDataName(data);
+        String category = getDataCategory(data);
+        DataSource dataSource = new DataSource(name, category, version, date, urls);
+
+        if (StringUtils.isEmpty(version)) {
+            logger.warn("Version missing for data source {}/{}, using the date as version: {}", category, name, date);
+            dataSource.setVersion(date);
+        }
+
+        dataSourceWriter.writeValue(versionFilePath.toFile(), dataSource);
+    }
+
+    @Deprecated
+    protected void saveDataSource(String name, String category, String version, String date, List<String> urls, Path versionFilePath)
             throws IOException {
-        Map<String, Object> versionDataMap = new HashMap<>();
-        versionDataMap.put("data", data);
-        versionDataMap.put("name", name);
-        versionDataMap.put("version", version);
-        versionDataMap.put("date", date);
-        versionDataMap.put("url", url);
+        DataSource dataSource = new DataSource(name, category, version, date, urls);
 
-        ObjectMapper jsonObjectMapper = new ObjectMapper();
-        jsonObjectMapper.writeValue(outputFilePath.toFile(), versionDataMap);
+        if (StringUtils.isEmpty(version)) {
+            logger.warn("Version missing for data source {}/{}, using the date as version: {}", category, name, date);
+            dataSource.setVersion(date);
+        }
+
+        dataSourceWriter.writeValue(versionFilePath.toFile(), dataSource);
+    }
+
+    protected String getTimeStamp() {
+        return new SimpleDateFormat("yyyyMMdd_HHmmss").format(Calendar.getInstance().getTime());
     }
 
     protected String getLine(Path readmePath, int lineNumber) {
         Files.exists(readmePath);
-        try {
-            BufferedReader reader = Files.newBufferedReader(readmePath, Charset.defaultCharset());
+        try (BufferedReader reader = Files.newBufferedReader(readmePath, Charset.defaultCharset())) {
             String line = null;
             for (int i = 0; i < lineNumber; i++) {
                 line = reader.readLine();
             }
-            reader.close();
             return line;
         } catch (IOException e) {
             e.printStackTrace();
@@ -216,14 +320,12 @@ protected String getPhylo(SpeciesConfiguration sp) {
         }
     }
 
-
-
-    protected DownloadFile downloadFile(String url, String outputFileName) throws IOException, InterruptedException {
+    protected DownloadFile downloadFile(String url, String outputFileName) throws IOException, InterruptedException, CellBaseException {
         return downloadFile(url, outputFileName, null);
     }
 
     protected DownloadFile downloadFile(String url, String outputFileName, List<String> wgetAdditionalArgs)
-            throws IOException, InterruptedException {
+            throws IOException, InterruptedException, CellBaseException {
         DownloadFile downloadFileInfo = new DownloadFile(url, outputFileName, Timestamp.valueOf(LocalDateTime.now()).toString());
         Long startTime = System.currentTimeMillis();
         if (Paths.get(outputFileName).toFile().exists()) {
@@ -251,7 +353,7 @@ private void setDownloadStatusAndMessage(String outputFileName, DownloadFile dow
             } else {
                 downloadFile.setStatus(DownloadFile.Status.ERROR);
                 downloadFile.setMessage("Expected downloaded file size " + downloadFile.getExpectedFileSize()
-                + ", Actual file size " + downloadFile.getActualFileSize());
+                        + ", Actual file size " + downloadFile.getActualFileSize());
             }
         } else {
             downloadFile.setMessage("See full error message in " + outputLog);
@@ -277,54 +379,42 @@ private boolean validateDownloadFile(DownloadFile downloadFile, String outputFil
 
     private long getExpectedFileSize(String outputFileLog) {
         try (BufferedReader reader = new BufferedReader(new FileReader(outputFileLog))) {
-            String line = null;
+            String line;
             while ((line = reader.readLine()) != null) {
                 // looking for: Length: 13846591 (13M)
                 if (line.startsWith("Length:")) {
                     String[] parts = line.split("\\s");
-                    return Long.valueOf(parts[1]);
+                    return Long.parseLong(parts[1]);
                 }
             }
         } catch (Exception e) {
-            logger.info("Error getting expected file size " + e.getMessage());
+            logger.info("Error getting expected file size {}", e.getMessage());
         }
         return -1;
     }
 
-    protected String getVersionFromVersionLine(Path path, String tag) {
-        Files.exists(path);
-        try {
-            BufferedReader reader = Files.newBufferedReader(path, Charset.defaultCharset());
-            String line = reader.readLine();
-            // There shall be a line at the README.txt containing the version.
-            // e.g. The files in the current directory contain the data corresponding to the latest release
-            // (version 4.0, April 2016). ...
-            while (line != null) {
-                // tag specifies a certain string that must be found within the line supposed to contain the version
-                // info
-                if (line.contains(tag)) {
-                    String version = line.split("\\(")[1].split("\\)")[0];
-                    reader.close();
-                    return version;
-                }
-                line = reader.readLine();
-            }
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
-        return null;
-    }
-
     private String getEnsemblURL(SpeciesConfiguration sp) {
         // We need to find which is the correct Ensembl host URL.
         // This can different depending on if is a vertebrate species.
-        String ensemblHostUrl;
         if (configuration.getSpecies().getVertebrates().contains(sp)) {
-            ensemblHostUrl = configuration.getDownload().getEnsembl().getUrl().getHost();
+            return configuration.getDownload().getEnsembl().getUrl().getHost();
+        } else {
+            return configuration.getDownload().getEnsemblGenomes().getUrl().getHost();
+        }
+    }
+
+    @Deprecated
+    protected String getUrl(DownloadProperties.URLProperties props, String fileId) throws CellBaseException {
+        if (!props.getFiles().containsKey(fileId)) {
+            throw new CellBaseException("File ID " + fileId + " is missing in the DownloadProperties.URLProperties within the CellBase"
+                    + " configuration file");
+        }
+        String filesValue = props.getFiles().get(fileId);
+        if (filesValue.startsWith("https://") || filesValue.startsWith("http://") || filesValue.startsWith("ftp://")) {
+            return filesValue;
         } else {
-            ensemblHostUrl = configuration.getDownload().getEnsemblGenomes().getUrl().getHost();
+            return props.getHost() + filesValue;
         }
-        return ensemblHostUrl;
     }
 }
 
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/AlphaMissenseDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/AlphaMissenseDownloadManager.java
new file mode 100644
index 0000000000..721a02b599
--- /dev/null
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/AlphaMissenseDownloadManager.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.lib.download;
+
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.lib.EtlCommons;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Collections;
+import java.util.List;
+
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
+public class AlphaMissenseDownloadManager extends AbstractDownloadManager {
+
+    public AlphaMissenseDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
+            throws IOException, CellBaseException {
+        super(species, assembly, targetDirectory, configuration);
+    }
+
+    @Override
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(ALPHAMISSENSE_DATA));
+
+        Path alphaMissensePath = downloadFolder.resolve(EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA);
+        Files.createDirectories(alphaMissensePath);
+
+        // Download AlphaMissense file
+        DownloadFile downloadFile = downloadAndSaveDataSource(configuration.getDownload().getAlphaMissense(), ALPHAMISSENSE_FILE_ID,
+                ALPHAMISSENSE_DATA, alphaMissensePath);
+
+        logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(ALPHAMISSENSE_DATA));
+
+        return Collections.singletonList(downloadFile);
+    }
+}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/CaddDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/CaddDownloadManager.java
index e0cae1250e..0b0d09f412 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/CaddDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/CaddDownloadManager.java
@@ -18,7 +18,6 @@
 
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.lib.EtlCommons;
 
 import java.io.IOException;
 import java.nio.file.Files;
@@ -26,36 +25,36 @@
 import java.util.Collections;
 import java.util.List;
 
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
 public class CaddDownloadManager extends AbstractDownloadManager {
 
-    private static final String CADD_NAME = "CADD";
     public CaddDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
             throws IOException, CellBaseException {
         super(species, assembly, targetDirectory, configuration);
     }
 
     @Override
-    public List<DownloadFile> download() throws IOException, InterruptedException {
-        return Collections.singletonList(downloadCaddScores());
-    }
-
-    public DownloadFile downloadCaddScores() throws IOException, InterruptedException {
-        if (!speciesHasInfoToDownload(speciesConfiguration, "variation_functional_score")) {
-            return null;
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        logger.info(CATEGORY_DOWNLOADING_LOG_MESSAGE, getDataCategory(CADD_DATA), getDataName(CADD_DATA));
+
+        if (!speciesHasInfoToDownload(speciesConfiguration, VARIATION_FUNCTIONAL_SCORE_DATA)
+                || !speciesConfiguration.getScientificName().equals(HOMO_SAPIENS_NAME)) {
+            logger.info("{}/{} not supported for species {}", getDataCategory(CADD_DATA), getDataName(CADD_DATA),
+                    speciesConfiguration.getScientificName());
+            return Collections.emptyList();
         }
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading CADD scores information ...");
 
-            Path variationFunctionalScoreFolder = downloadFolder.resolve("variation_functional_score");
-            Files.createDirectories(variationFunctionalScoreFolder);
+        // Create the CADD download path
+        Path caddDownloadPath = downloadFolder.resolve(VARIATION_FUNCTIONAL_SCORE_DATA).resolve(CADD_DATA);
+        Files.createDirectories(caddDownloadPath);
 
-            // Downloads CADD scores
-            String url = configuration.getDownload().getCadd().getHost();
+        // Download CADD and save data source
+        DownloadFile downloadFile = downloadAndSaveDataSource(configuration.getDownload().getCadd(), CADD_FILE_ID, CADD_DATA,
+                caddDownloadPath);
 
-            saveVersionData(EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA, CADD_NAME, url.split("/")[5], getTimeStamp(),
-                    Collections.singletonList(url), variationFunctionalScoreFolder.resolve("caddVersion.json"));
-            return downloadFile(url, variationFunctionalScoreFolder.resolve("whole_genome_SNVs.tsv.gz").toString());
-        }
-        return null;
+        logger.info(CATEGORY_DOWNLOADING_DONE_LOG_MESSAGE, getDataCategory(CADD_DATA), getDataName(CADD_DATA));
+
+        return Collections.singletonList(downloadFile);
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ClinicalDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ClinicalDownloadManager.java
index 580a855a19..9fd0e7562c 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ClinicalDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ClinicalDownloadManager.java
@@ -20,26 +20,18 @@
 import org.opencb.cellbase.core.config.DownloadProperties;
 import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.lib.EtlCommons;
-import org.opencb.commons.utils.FileUtils;
 
-import javax.ws.rs.client.Client;
-import javax.ws.rs.client.ClientBuilder;
-import javax.ws.rs.client.WebTarget;
-import java.io.*;
-import java.net.URI;
+import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
-import java.util.Map;
 
-public class ClinicalDownloadManager extends AbstractDownloadManager {
-
-    private static final String CLINVAR_NAME = "ClinVar";
-    private static final String GWAS_NAME = "GWAS catalog";
-    private static final String IARCTP53_NAME = "IARC TP53 Database";
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
+public class ClinicalDownloadManager extends AbstractDownloadManager {
 
     public ClinicalDownloadManager(String species, String assembly, Path outdir, CellBaseConfiguration configuration)
             throws IOException, CellBaseException {
@@ -47,204 +39,63 @@ public ClinicalDownloadManager(String species, String assembly, Path outdir, Cel
     }
 
     @Override
-    public List<DownloadFile> download() throws IOException, InterruptedException {
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
         List<DownloadFile> downloadFiles = new ArrayList<>();
         downloadFiles.addAll(downloadClinical());
         return downloadFiles;
     }
 
-    public List<DownloadFile> downloadClinical() throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-//            if (assemblyConfiguration.getName() == null) {
-//                throw new ParameterException("Assembly must be provided for downloading clinical variants data."
-//                        + " Please, specify either --assembly GRCh37 or --assembly GRCh38");
-//            }
-
-            logger.info("Downloading clinical information ...");
-
-            String url;
-            List<DownloadFile> downloadFiles = new ArrayList<>();
-
-            Path clinicalFolder = downloadFolder.resolve(EtlCommons.CLINICAL_VARIANTS_FOLDER);
-            Files.createDirectories(clinicalFolder);
-            logger.info("\t\tDownloading ClinVar files ...");
-
-            List<String> clinvarUrls = new ArrayList<>(3);
-            url = configuration.getDownload().getClinvar().getHost();
-
-            downloadFiles.add(downloadFile(url, clinicalFolder.resolve(EtlCommons.CLINVAR_XML_FILE).toString()));
-            clinvarUrls.add(url);
-
-            url = configuration.getDownload().getClinvarEfoTerms().getHost();
-            downloadFiles.add(downloadFile(url, clinicalFolder.resolve(EtlCommons.CLINVAR_EFO_FILE).toString()));
-            clinvarUrls.add(url);
-
-            url = configuration.getDownload().getClinvarSummary().getHost();
-            downloadFiles.add(downloadFile(url, clinicalFolder.resolve(EtlCommons.CLINVAR_SUMMARY_FILE).toString()));
-            clinvarUrls.add(url);
-
-            url = configuration.getDownload().getClinvarVariationAllele().getHost();
-            downloadFiles.add(downloadFile(url, clinicalFolder.resolve(EtlCommons.CLINVAR_VARIATION_ALLELE_FILE).toString()));
-            clinvarUrls.add(url);
-            saveVersionData(EtlCommons.CLINICAL_VARIANTS_DATA, CLINVAR_NAME, getClinVarVersion(), getTimeStamp(), clinvarUrls,
-                    clinicalFolder.resolve("clinvarVersion.json"));
-
-            // Gwas catalog
-            logger.info("\t\tDownloading GWAS catalog file ...");
-            DownloadProperties.URLProperties gwasCatalog = configuration.getDownload().getGwasCatalog();
-            url = gwasCatalog.getHost();
-            downloadFiles.add(downloadFile(url, clinicalFolder.resolve(EtlCommons.GWAS_FILE).toString()));
-            saveVersionData(EtlCommons.CLINICAL_VARIANTS_DATA, GWAS_NAME, gwasCatalog.getVersion(), getTimeStamp(),
-                    Collections.singletonList(url), clinicalFolder.resolve("gwasVersion.json"));
-
-//            List<String> hgvsList = getDocmHgvsList();
-//            if (!hgvsList.isEmpty()) {
-//                downloadDocm(hgvsList, clinicalFolder.resolve(EtlCommons.DOCM_FILE));
-//                downloadFiles.add(downloadFile(configuration.getDownload().getDocmVersion().getHost(),
-//                        clinicalFolder.resolve("docmIndex.html").toString()));
-//                saveVersionData(EtlCommons.CLINICAL_VARIANTS_DATA, EtlCommons.DOCM_NAME,
-//                        getDocmVersion(clinicalFolder.resolve("docmIndex.html")), getTimeStamp(),
-//                        Arrays.asList(configuration.getDownload().getDocm().getHost() + "v1/variants.json",
-//                                configuration.getDownload().getDocm().getHost() + "v1/variants/{hgvs}.json"),
-//                        clinicalFolder.resolve("docmVersion.json"));
-//            } else {
-//                logger.warn("No DOCM variants found for assembly {}. Please double-check that this is the correct "
-//                        + "assembly", assemblyConfiguration.getName());
-//            }
-
-            // I am only able to download these files manually
-//            if (assemblyConfiguration.getName().equalsIgnoreCase("grch38")) {
-//                url = configuration.getDownload().getIarctp53().getHost();
-//                downloadFiles.add(downloadFile(url, clinicalFolder.resolve(EtlCommons.IARCTP53_FILE).toString()));
-//
-//                ZipFile zipFile = new ZipFile(clinicalFolder.resolve(EtlCommons.IARCTP53_FILE).toString());
-//                Enumeration<? extends ZipEntry> entries = zipFile.entries();
-//                while (entries.hasMoreElements()) {
-//                    ZipEntry entry = entries.nextElement();
-//                    File entryDestination = new File(clinicalFolder.toFile(), entry.getName());
-//                    if (entry.isDirectory()) {
-//                        entryDestination.mkdirs();
-//                    } else {
-//                        entryDestination.getParentFile().mkdirs();
-//                        InputStream in = zipFile.getInputStream(entry);
-//                        OutputStream out = new FileOutputStream(entryDestination);
-//                        IOUtils.copy(in, out);
-//                        IOUtils.closeQuietly(in);
-//                        out.close();
-//                    }
-//                }
-//                saveVersionData(EtlCommons.CLINICAL_VARIANTS_DATA, IARCTP53_NAME,
-//                        getVersionFromVersionLine(clinicalFolder.resolve("Disclaimer.txt"),
-//                                "The version of the database should be identified"), getTimeStamp(),
-//                        Collections.singletonList(url), clinicalFolder.resolve("iarctp53Version.json"));
-//            }
-
-            if (Files.notExists(clinicalFolder.resolve("clinvar_chunks"))) {
-                Files.createDirectories(clinicalFolder.resolve("clinvar_chunks"));
-                splitClinvar(clinicalFolder.resolve(EtlCommons.CLINVAR_XML_FILE), clinicalFolder.resolve("clinvar_chunks"));
-            }
-
-            return downloadFiles;
-        }
-        return null;
-    }
-
-    private void splitClinvar(Path clinvarXmlFilePath, Path splitOutdirPath) throws IOException {
-        BufferedReader br = FileUtils.newBufferedReader(clinvarXmlFilePath);
-        PrintWriter pw = null;
-        StringBuilder header = new StringBuilder();
-        boolean beforeEntry = true;
-        boolean inEntry = false;
-        int count = 0;
-        int chunk = 0;
-        String line;
-        while ((line = br.readLine()) != null) {
-            if (line.trim().startsWith("<ClinVarSet ")) {
-                inEntry = true;
-                beforeEntry = false;
-                if (count % 10000 == 0) {
-                    pw = new PrintWriter(new FileOutputStream(splitOutdirPath.resolve("chunk_" + chunk + ".xml").toFile()));
-                    pw.println(header.toString().trim());
-                }
-                count++;
-            }
-
-            if (beforeEntry) {
-                header.append(line).append("\n");
-            }
-
-            if (inEntry) {
-                pw.println(line);
-            }
-
-            if (line.trim().startsWith("</ClinVarSet>")) {
-                inEntry = false;
-                if (count % 10000 == 0) {
-                    pw.print("</ReleaseSet>");
-                    pw.close();
-                    chunk++;
-                }
-            }
+    public List<DownloadFile> downloadClinical() throws IOException, InterruptedException, CellBaseException {
+        logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(CLINICAL_VARIANT_DATA));
+        if (!speciesConfiguration.getScientificName().equals(HOMO_SAPIENS_NAME)) {
+            logger.info("{} not supported for the species {}", getDataName(CLINICAL_VARIANT_DATA),
+                    speciesConfiguration.getScientificName());
+            return Collections.emptyList();
         }
-        pw.print("</ReleaseSet>");
-        pw.close();
-        br.close();
-    }
 
-    private String getDocmVersion(Path docmIndexHtml) {
-        return getVersionFromVersionLine(docmIndexHtml, "<select name=\"version\" id=\"version\"");
-    }
+        // Create clinical directory
+        Path clinicalPath = downloadFolder.resolve(EtlCommons.CLINICAL_VARIANT_DATA).toAbsolutePath();
+        Files.createDirectories(clinicalPath);
 
-    private void downloadDocm(List<String> hgvsList, Path path) throws IOException, InterruptedException {
-        BufferedWriter bufferedWriter = Files.newBufferedWriter(path);
-        Client client = ClientBuilder.newClient();
-        WebTarget restUrlBase = client
-                .target(URI.create(configuration.getDownload().getDocm().getHost() + "v1/variants"));
-
-        logger.info("Querying DOCM REST API to get detailed data for all their variants");
-        int counter = 0;
-        for (String hgvs : hgvsList) {
-            WebTarget callUrl = restUrlBase.path(hgvs + ".json");
-            String jsonString = callUrl.request().get(String.class);
-            bufferedWriter.write(jsonString + "\n");
-
-            if (counter % 10 == 0) {
-                logger.info("{} DOCM variants saved", counter);
-            }
-            // Wait 1/3 of a second to avoid saturating their REST server - also avoid getting banned
-            Thread.sleep(300);
-
-            counter++;
-        }
-        logger.info("Finished. {} DOCM variants saved at {}", counter, path.toString());
-        bufferedWriter.close();
-    }
+        DownloadFile downloadFile;
+        List<DownloadFile> downloadFiles = new ArrayList<>();
 
-    private List<String> getDocmHgvsList() throws IOException {
-        Client client = ClientBuilder.newClient();
-        WebTarget restUrl = client
-                .target(URI.create(configuration.getDownload().getDocm().getHost() + "v1/variants.json"));
-
-        String jsonString;
-        logger.info("Getting full list of DOCM hgvs from: {}", restUrl.getUri().toURL());
-        jsonString = restUrl.request().get(String.class);
-
-        List<Map<String, String>> responseMap = parseResult(jsonString);
-        List<String> hgvsList = new ArrayList<>(responseMap.size());
-        for (Map<String, String> document : responseMap) {
-            if (document.containsKey("reference_version")
-                    && document.get("reference_version").equalsIgnoreCase(assemblyConfiguration.getName())) {
-                hgvsList.add(document.get("hgvs"));
-            }
+        // ClinVar
+        logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(CLINVAR_DATA));
+        DownloadProperties.URLProperties props = configuration.getDownload().getClinvar();
+        List<String> urls = new ArrayList<>();
+        for (String fileId : Arrays.asList(CLINVAR_FULL_RELEASE_FILE_ID, CLINVAR_SUMMARY_FILE_ID, CLINVAR_ALLELE_FILE_ID,
+                CLINVAR_EFO_TERMS_FILE_ID)) {
+            downloadFile = downloadDataSource(props, fileId, clinicalPath);
+            downloadFiles.add(downloadFile);
+
+            // Save URLs to be written in the version file
+            urls.add(downloadFile.getUrl());
         }
-        logger.info("{} hgvs found", hgvsList.size());
+        // Save data source
+        saveDataSource(CLINVAR_DATA, props.getVersion(), getTimeStamp(), urls, clinicalPath.resolve(getDataVersionFilename(CLINVAR_DATA)));
+        logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(CLINVAR_DATA));
+
+        // COSMIC
+        logger.warn("{} files must be downloaded manually !", getDataName(COSMIC_DATA));
+        props = configuration.getDownload().getCosmic();
+        String url = props.getHost() + props.getFiles().get(COSMIC_FILE_ID);
+        saveDataSource(COSMIC_DATA, props.getVersion(), getTimeStamp(), Collections.singletonList(url),
+                clinicalPath.resolve(getDataVersionFilename(COSMIC_DATA)));
+
+        // HGMD
+        logger.warn("{} files must be downloaded manually !", getDataName(HGMD_DATA));
+        props = configuration.getDownload().getHgmd();
+        url = props.getHost() + props.getFiles().get(HGMD_FILE_ID);
+        saveDataSource(HGMD_DATA, props.getVersion(), getTimeStamp(), Collections.singletonList(url),
+                clinicalPath.resolve(getDataVersionFilename(HGMD_DATA)));
+
+        // GWAS catalog
+        logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(GWAS_DATA));
+        downloadFile = downloadAndSaveDataSource(configuration.getDownload().getGwasCatalog(), GWAS_FILE_ID, GWAS_DATA, clinicalPath);
+        downloadFiles.add(downloadFile);
+        logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(GWAS_DATA));
 
-        return hgvsList;
-    }
-
-    private String getClinVarVersion() {
-        // ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2015-12.xml.gz
-        return configuration.getDownload().getClinvar().getHost().split("_")[1].split("\\.")[0];
+        return downloadFiles;
     }
-
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/CoreDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/CoreDownloadManager.java
deleted file mode 100644
index aca27ff2e8..0000000000
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/CoreDownloadManager.java
+++ /dev/null
@@ -1,467 +0,0 @@
-/*
- * Copyright 2015-2020 OpenCB
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.opencb.cellbase.lib.download;
-
-import org.apache.commons.io.FilenameUtils;
-import org.apache.commons.lang.StringUtils;
-import org.opencb.cellbase.core.config.CellBaseConfiguration;
-import org.opencb.cellbase.core.config.SpeciesConfiguration;
-import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.lib.EtlCommons;
-import org.opencb.commons.utils.FileUtils;
-
-import java.io.*;
-import java.net.URI;
-import java.nio.charset.Charset;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.*;
-
-@Deprecated
-public class CoreDownloadManager extends DownloadManager {
-
-    private static final String ENSEMBL_NAME = "ENSEMBL";
-    private static final String UNIPROT_NAME = "UniProt";
-    private static final String INTACT_NAME = "IntAct";
-    private static final String INTERPRO_NAME = "InterPro";
-    private static final String GERP_NAME = "GERP++";
-    private static final String PHASTCONS_NAME = "PhastCons";
-    private static final String PHYLOP_NAME = "PhyloP";
-    private static final String GENE_EXPRESSION_ATLAS_NAME = "Gene Expression Atlas";
-    private static final String HPO_NAME = "HPO";
-    private static final String DISGENET_NAME = "DisGeNET";
-    private static final String GO_ANNOTATION_NAME = "EBI Gene Ontology Annotation";
-    private static final String DGIDB_NAME = "DGIdb";
-    private static final String GNOMAD_NAME = "gnomAD";
-
-    private static final HashMap GENE_UNIPROT_XREF_FILES = new HashMap() {
-        {
-            put("Homo sapiens", "HUMAN_9606_idmapping_selected.tab.gz");
-            put("Mus musculus", "MOUSE_10090_idmapping_selected.tab.gz");
-            put("Rattus norvegicus", "RAT_10116_idmapping_selected.tab.gz");
-            put("Danio rerio", "DANRE_7955_idmapping_selected.tab.gz");
-            put("Drosophila melanogaster", "DROME_7227_idmapping_selected.tab.gz");
-            put("Saccharomyces cerevisiae", "YEAST_559292_idmapping_selected.tab.gz");
-        }
-    };
-
-    public CoreDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
-            throws IOException, CellBaseException {
-        super(species, assembly, targetDirectory, configuration);
-    }
-
-    public CoreDownloadManager(CellBaseConfiguration configuration, Path targetDirectory, SpeciesConfiguration speciesConfiguration,
-                               SpeciesConfiguration.Assembly assembly) throws IOException, CellBaseException {
-        super(configuration, targetDirectory, speciesConfiguration, assembly);
-    }
-
-    public void downloadReferenceGenome() throws IOException, InterruptedException {
-        logger.info("Downloading genome information ...");
-        Path sequenceFolder = downloadFolder.resolve("genome");
-        Files.createDirectories(sequenceFolder);
-
-        // Reference genome sequences are downloaded from Ensembl
-        // New Homo sapiens assemblies contain too many ALT regions, so we download 'primary_assembly' file instead
-        String url = ensemblHostUrl + "/" + ensemblRelease;
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            url = url + "/fasta/" + speciesShortName + "/dna/*.dna.primary_assembly.fa.gz";
-        } else {
-            if (!configuration.getSpecies().getVertebrates().contains(speciesConfiguration)) {
-                url = ensemblHostUrl + "/" + ensemblRelease + "/" + getPhylo(speciesConfiguration);
-            }
-            url = url + "/fasta/";
-            if (configuration.getSpecies().getBacteria().contains(speciesConfiguration)) {
-                // WARN: assuming there's just one assembly
-                url = url + speciesConfiguration.getAssemblies().get(0).getEnsemblCollection() + "/";
-            }
-            url = url + speciesShortName + "/dna/*.dna.toplevel.fa.gz";
-        }
-
-        String outputFileName = StringUtils.capitalize(speciesShortName) + "." + assemblyConfiguration.getName() + ".fa.gz";
-        Path outputPath = sequenceFolder.resolve(outputFileName);
-        downloadFile(url, outputPath.toString());
-        logger.info("Saving reference genome version data at {}", sequenceFolder.resolve("genomeVersion.json"));
-        saveVersionData(EtlCommons.GENOME_DATA, ENSEMBL_NAME, ensemblVersion, getTimeStamp(),
-                Collections.singletonList(url), buildFolder.resolve("genomeVersion.json"));
-    }
-
-    public void downloadEnsemblGene()throws IOException, InterruptedException {
-        logger.info("Downloading gene information ...");
-        Path geneFolder = downloadFolder.resolve("gene");
-        Files.createDirectories(geneFolder);
-
-        downloadEnsemblData(geneFolder);
-        downloadDrugData(geneFolder);
-        downloadGeneUniprotXref(geneFolder);
-        downloadGeneExpressionAtlas(geneFolder);
-        downloadGeneDiseaseAnnotation(geneFolder);
-        downloadGnomadConstraints(geneFolder);
-        downloadGO(geneFolder);
-        // FIXME
-//        runGeneExtraInfo(geneFolder);
-    }
-
-    private void downloadGO(Path geneFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading go annotation...");
-            String url = configuration.getDownload().getGoAnnotation().getHost();
-            downloadFile(url, geneFolder.resolve("goa_human.gaf.gz").toString());
-            saveVersionData(EtlCommons.GENE_DATA, GO_ANNOTATION_NAME, null, getTimeStamp(), Collections.singletonList(url),
-                    buildFolder.resolve("goAnnotationVersion.json"));
-        }
-    }
-
-    public void downloadObo() throws IOException, InterruptedException {
-        logger.info("Downloading obo files ...");
-
-        Path oboFolder = downloadFolder.resolve("obo");
-        Files.createDirectories(oboFolder);
-
-        String url = configuration.getDownload().getHpoObo().getHost();
-        downloadFile(url, oboFolder.resolve("hp.obo").toString());
-
-        url = configuration.getDownload().getGoObo().getHost();
-        downloadFile(url, oboFolder.resolve("go-basic.obo").toString());
-    }
-
-    private void downloadGnomadConstraints(Path geneFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading gnomAD constraints data...");
-            String url = configuration.getDownload().getGnomadConstraints().getHost();
-            downloadFile(url, geneFolder.resolve("gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz").toString());
-            saveVersionData(EtlCommons.GENE_DATA, GNOMAD_NAME, configuration.getDownload().
-                            getGnomadConstraints().getVersion(), getTimeStamp(),
-                    Collections.singletonList(url), buildFolder.resolve("gnomadVersion.json"));
-        }
-    }
-    private void downloadDrugData(Path geneFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading drug-gene data...");
-            String url = configuration.getDownload().getDgidb().getHost();
-            downloadFile(url, geneFolder.resolve("dgidb.tsv").toString());
-            saveVersionData(EtlCommons.GENE_DATA, DGIDB_NAME, null, getTimeStamp(), Collections.singletonList(url),
-                    buildFolder.resolve("dgidbVersion.json"));
-        }
-    }
-
-    private void downloadEnsemblData(Path geneFolder) throws IOException, InterruptedException {
-        logger.info("Downloading gene Ensembl data (gtf, pep, cdna, motifs) ...");
-        List<String> downloadedUrls = new ArrayList<>(4);
-
-        String ensemblHost = ensemblHostUrl + "/" + ensemblRelease;
-        if (!configuration.getSpecies().getVertebrates().contains(speciesConfiguration)) {
-            ensemblHost = ensemblHostUrl + "/" + ensemblRelease + "/" + getPhylo(speciesConfiguration);
-        }
-
-        String bacteriaCollectionPath = "";
-        if (configuration.getSpecies().getBacteria().contains(speciesConfiguration)) {
-            // WARN: assuming there's just one assembly
-            bacteriaCollectionPath =  speciesConfiguration.getAssemblies().get(0).getEnsemblCollection() + "/";
-        }
-
-        // Ensembl leaves now several GTF files in the FTP folder, we need to build a more accurate URL
-        // to download the correct GTF file.
-        String version = ensemblRelease.split("-")[1];
-        String url = ensemblHost + "/gtf/" + bacteriaCollectionPath + speciesShortName + "/*" + version + ".gtf.gz";
-        String fileName = geneFolder.resolve(speciesShortName + ".gtf.gz").toString();
-        downloadFile(url, fileName);
-        downloadedUrls.add(url);
-
-        url = ensemblHost + "/fasta/" + bacteriaCollectionPath + speciesShortName + "/pep/*.pep.all.fa.gz";
-        fileName = geneFolder.resolve(speciesShortName + ".pep.all.fa.gz").toString();
-        downloadFile(url, fileName);
-        downloadedUrls.add(url);
-
-        url = ensemblHost + "/fasta/" + bacteriaCollectionPath + speciesShortName + "/cdna/*.cdna.all.fa.gz";
-        fileName = geneFolder.resolve(speciesShortName + ".cdna.all.fa.gz").toString();
-        downloadFile(url, fileName);
-        downloadedUrls.add(url);
-
-        //ftp://ftp.ensembl.org/pub/release-99/regulation/homo_sapiens/MotifFeatures/Homo_sapiens.GRCh38.motif_features.gff.gz
-//        url = ensemblHost + "/regulation/" + speciesShortName + "/MotifFeatures/*.motif_features.gff.gz";
-//        Path outputFile = geneFolder.resolve("motif_features.gff.gz");
-//        downloadFile(url, outputFile.toString());
-//        downloadedUrls.add(url);
-
-
-        saveVersionData(EtlCommons.GENE_DATA, ENSEMBL_NAME, ensemblVersion, getTimeStamp(), downloadedUrls,
-                buildFolder.resolve("ensemblCoreVersion.json"));
-    }
-
-    private void downloadGeneUniprotXref(Path geneFolder) throws IOException, InterruptedException {
-        logger.info("Downloading UniProt ID mapping ...");
-
-        if (GENE_UNIPROT_XREF_FILES.containsKey(speciesConfiguration.getScientificName())) {
-            String geneGtfUrl = configuration.getDownload().getGeneUniprotXref().getHost() + "/"
-                    + GENE_UNIPROT_XREF_FILES.get(speciesConfiguration.getScientificName());
-            downloadFile(geneGtfUrl, geneFolder.resolve("idmapping_selected.tab.gz").toString());
-            downloadFile(getUniProtReleaseNotesUrl(), geneFolder.resolve("uniprotRelnotes.txt").toString());
-
-            saveVersionData(EtlCommons.GENE_DATA, UNIPROT_NAME,
-                    getUniProtRelease(geneFolder.resolve("uniprotRelnotes.txt").toString()), getTimeStamp(),
-                    Collections.singletonList(geneGtfUrl), buildFolder.resolve("uniprotXrefVersion.json"));
-        }
-    }
-
-    private String getUniProtRelease(String relnotesFilename) {
-        Path path = Paths.get(relnotesFilename);
-        Files.exists(path);
-        try {
-            // The first line at the relnotes.txt file contains the UniProt release
-            BufferedReader reader = Files.newBufferedReader(path, Charset.defaultCharset());
-            String release = reader.readLine().split(" ")[2];
-            reader.close();
-            return  release;
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
-        return null;
-    }
-
-    private String getUniProtReleaseNotesUrl() {
-        return URI.create(configuration.getDownload().getGeneUniprotXref().getHost()).resolve("../../../").toString()
-                + "/relnotes.txt";
-    }
-
-    private void downloadGeneExpressionAtlas(Path geneFolder) throws IOException, InterruptedException {
-        logger.info("Downloading gene expression atlas ...");
-
-        String geneGtfUrl = configuration.getDownload().getGeneExpressionAtlas().getHost();
-        downloadFile(geneGtfUrl, geneFolder.resolve("allgenes_updown_in_organism_part.tab.gz").toString());
-
-        saveVersionData(EtlCommons.GENE_DATA, GENE_EXPRESSION_ATLAS_NAME, getGeneExpressionAtlasVersion(), getTimeStamp(),
-                Collections.singletonList(geneGtfUrl), buildFolder.resolve("geneExpressionAtlasVersion.json"));
-
-    }
-
-    private String getGeneExpressionAtlasVersion() {
-        return FilenameUtils.getBaseName(configuration.getDownload().getGeneExpressionAtlas().getHost())
-                .split("_")[5].replace(".tab", "");
-    }
-
-    private void downloadGeneDiseaseAnnotation(Path geneFolder) throws IOException, InterruptedException {
-        logger.info("Downloading gene disease annotation ...");
-
-        String host = configuration.getDownload().getHpo().getHost();
-        String fileName = StringUtils.substringAfterLast(host, "/");
-        downloadFile(host, geneFolder.resolve(fileName).toString());
-        saveVersionData(EtlCommons.GENE_DATA, HPO_NAME, null, getTimeStamp(), Collections.singletonList(host),
-                buildFolder.resolve("hpoVersion.json"));
-
-        host = configuration.getDownload().getDisgenet().getHost();
-        List<String> files = configuration.getDownload().getDisgenet().getFiles();
-        for (String file : files) {
-            String outputFile = file.equalsIgnoreCase("readme.txt") ? "disgenetReadme.txt" : file;
-            downloadFile(host + "/" + file, geneFolder.resolve(outputFile).toString());
-        }
-
-        saveVersionData(EtlCommons.GENE_DISEASE_ASSOCIATION_DATA, DISGENET_NAME,
-                getVersionFromVersionLine(geneFolder.resolve("disgenetReadme.txt"), "(version"), getTimeStamp(),
-                Collections.singletonList(host), buildFolder.resolve("disgenetVersion.json"));
-    }
-
-    private void runGeneExtraInfo(Path geneFolder) throws IOException, InterruptedException {
-        logger.info("Downloading gene extra info ...");
-
-        String geneExtraInfoLogFile = geneFolder.resolve("gene_extra_info.log").toString();
-        List<String> args = new ArrayList<>();
-        args.addAll(Arrays.asList("--species", speciesConfiguration.getScientificName(), "--assembly", assemblyConfiguration.getName(),
-                "--outdir", geneFolder.toAbsolutePath().toString(),
-                "--ensembl-libs", configuration.getDownload().getEnsembl().getLibs()));
-
-        if (!configuration.getSpecies().getVertebrates().contains(speciesConfiguration)
-                && !speciesConfiguration.getScientificName().equals("Drosophila melanogaster")) {
-            args.add("--phylo");
-            args.add("no-vertebrate");
-        }
-
-        File ensemblScriptsFolder = new File(System.getProperty("basedir") + "/bin/ensembl-scripts/");
-
-        // run gene_extra_info.pl
-        boolean geneExtraInfoDownloaded = EtlCommons.runCommandLineProcess(ensemblScriptsFolder,
-                "./gene_extra_info.pl",
-                args,
-                geneExtraInfoLogFile);
-
-        // check output
-        if (geneExtraInfoDownloaded) {
-            logger.info("Gene extra files created OK");
-        } else {
-            logger.error("Gene extra info for " + speciesConfiguration.getScientificName() + " cannot be downloaded");
-        }
-    }
-
-    /**
-     * This method downloads Gerp, PhastCons and PhyloP data from UCSC for Human and Mouse species.
-
-     * @throws IOException if there is an error writing to a file
-     * @throws InterruptedException if there is an error downloading files
-     */
-    public void downloadConservation() throws IOException, InterruptedException {
-        if (!speciesHasInfoToDownload(speciesConfiguration, "conservation")) {
-            return;
-        }
-
-        logger.info("Downloading conservation information ...");
-        Path conservationFolder = downloadFolder.resolve("conservation");
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            Files.createDirectories(conservationFolder);
-            Files.createDirectories(conservationFolder.resolve("phastCons"));
-            Files.createDirectories(conservationFolder.resolve("phylop"));
-            Files.createDirectories(conservationFolder.resolve("gerp"));
-
-            String[] chromosomes = {"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14",
-                    "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", "M", };
-
-            if (assemblyConfiguration.getName().equalsIgnoreCase("GRCh38")) {
-                logger.info("Downloading GERP++ ...");
-                downloadFile(configuration.getDownload().getGerp().getHost(),
-                        conservationFolder.resolve(EtlCommons.GERP_SUBDIRECTORY + "/" + EtlCommons.GERP_FILE).toAbsolutePath().toString());
-                saveVersionData(EtlCommons.CONSERVATION_DATA, GERP_NAME, null, getTimeStamp(),
-                        Collections.singletonList(configuration.getDownload().getGerp().getHost()),
-                        buildFolder.resolve("gerpVersion.json"));
-
-                logger.info("Downloading phastCons and PhyloP ...");
-                String url = configuration.getDownload().getConservation().getHost() + "/hg38";
-                List<String> phastconsUrls = new ArrayList<>(chromosomes.length);
-                List<String> phyloPUrls = new ArrayList<>(chromosomes.length);
-                for (String chromosome : chromosomes) {
-                    String phastConsUrl = url + "/phastCons100way/hg38.100way.phastCons/chr" + chromosome + ".phastCons100way.wigFix.gz";
-                    downloadFile(phastConsUrl, conservationFolder.resolve("phastCons").resolve("chr" + chromosome
-                            + ".phastCons100way.wigFix.gz").toString());
-                    phastconsUrls.add(phastConsUrl);
-
-                    String phyloPUrl = url + "/phyloP100way/hg38.100way.phyloP100way/chr" + chromosome + ".phyloP100way.wigFix.gz";
-                    downloadFile(phyloPUrl, conservationFolder.resolve("phylop").resolve("chr" + chromosome
-                            + ".phyloP100way.wigFix.gz").toString());
-                    phyloPUrls.add(phyloPUrl);
-                }
-                saveVersionData(EtlCommons.CONSERVATION_DATA, PHASTCONS_NAME, null, getTimeStamp(), phastconsUrls,
-                        buildFolder.resolve("phastConsVersion.json"));
-                saveVersionData(EtlCommons.CONSERVATION_DATA, PHYLOP_NAME, null, getTimeStamp(), phyloPUrls,
-                        buildFolder.resolve("phyloPVersion.json"));
-            }
-        }
-
-        if (speciesConfiguration.getScientificName().equals("Mus musculus")) {
-            Files.createDirectories(conservationFolder);
-            Files.createDirectories(conservationFolder.resolve("phastCons"));
-            Files.createDirectories(conservationFolder.resolve("phylop"));
-
-            String url = configuration.getDownload().getConservation().getHost() + "/mm10";
-            String[] chromosomes = {"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14",
-                    "15", "16", "17", "18", "19", "X", "Y", "M", };
-            List<String> phastconsUrls = new ArrayList<>(chromosomes.length);
-            List<String> phyloPUrls = new ArrayList<>(chromosomes.length);
-            for (String chromosome : chromosomes) {
-                String phastConsUrl = url + "/phastCons60way/mm10.60way.phastCons/chr" + chromosome + ".phastCons60way.wigFix.gz";
-                downloadFile(phastConsUrl, conservationFolder.resolve("phastCons").resolve("chr" + chromosome
-                        + ".phastCons60way.wigFix.gz").toString());
-                phastconsUrls.add(phastConsUrl);
-                String phyloPUrl = url + "/phyloP60way/mm10.60way.phyloP60way/chr" + chromosome + ".phyloP60way.wigFix.gz";
-                downloadFile(phyloPUrl, conservationFolder.resolve("phylop").resolve("chr" + chromosome
-                        + ".phyloP60way.wigFix.gz").toString());
-                phyloPUrls.add(phyloPUrl);
-            }
-            saveVersionData(EtlCommons.CONSERVATION_DATA, PHASTCONS_NAME, null, getTimeStamp(), phastconsUrls,
-                    buildFolder.resolve("phastConsVersion.json"));
-            saveVersionData(EtlCommons.CONSERVATION_DATA, PHYLOP_NAME, null, getTimeStamp(), phyloPUrls,
-                    buildFolder.resolve("phastConsVersion.json"));
-        }
-    }
-
-
-    /**
-     * This method downloads UniProt, IntAct and Interpro data from EMBL-EBI.
-     *
-     * @throws IOException if there is an error writing to a file
-     * @throws InterruptedException if there is an error downloading files
-     */
-    public void downloadProtein() throws IOException, InterruptedException {
-        if (!speciesHasInfoToDownload(speciesConfiguration, "protein")) {
-            return;
-        }
-        logger.info("Downloading protein information ...");
-        Path proteinFolder = downloadFolder.resolve("protein");
-        Files.createDirectories(proteinFolder);
-
-        String url = configuration.getDownload().getUniprot().getHost();
-        downloadFile(url, proteinFolder.resolve("uniprot_sprot.xml.gz").toString());
-        String relNotesUrl = configuration.getDownload().getUniprotRelNotes().getHost();
-        downloadFile(relNotesUrl, proteinFolder.resolve("uniprotRelnotes.txt").toString());
-        Files.createDirectories(proteinFolder.resolve("uniprot_chunks"));
-        splitUniprot(proteinFolder.resolve("uniprot_sprot.xml.gz"), proteinFolder.resolve("uniprot_chunks"));
-        saveVersionData(EtlCommons.PROTEIN_DATA, UNIPROT_NAME, getLine(proteinFolder.resolve("uniprotRelnotes.txt"), 1),
-                getTimeStamp(), Collections.singletonList(url), buildFolder.resolve("uniprotVersion.json"));
-
-//        url = configuration.getDownload().getIntact().getHost();
-//        downloadFile(url, proteinFolder.resolve("intact.txt").toString());
-//        saveVersionData(EtlCommons.PROTEIN_DATA, INTACT_NAME, null, getTimeStamp(), Collections.singletonList(url),
-//                proteinFolder.resolve("intactVersion.json"));
-//
-//        url = configuration.getDownload().getInterpro().getHost();
-//        downloadFile(url, proteinFolder.resolve("protein2ipr.dat.gz").toString());
-//        relNotesUrl = configuration.getDownload().getInterproRelNotes().getHost();
-//        downloadFile(relNotesUrl, proteinFolder.resolve("interproRelnotes.txt").toString());
-//        saveVersionData(EtlCommons.PROTEIN_DATA, INTERPRO_NAME, getLine(proteinFolder.resolve("interproRelnotes.txt"), 5),
-//                getTimeStamp(), Collections.singletonList(url), proteinFolder.resolve("interproVersion.json"));
-    }
-
-    private void splitUniprot(Path uniprotFilePath, Path splitOutdirPath) throws IOException {
-        BufferedReader br = FileUtils.newBufferedReader(uniprotFilePath);
-        PrintWriter pw = null;
-        StringBuilder header = new StringBuilder();
-        boolean beforeEntry = true;
-        boolean inEntry = false;
-        int count = 0;
-        int chunk = 0;
-        String line;
-        while ((line = br.readLine()) != null) {
-            if (line.trim().startsWith("<entry ")) {
-                inEntry = true;
-                beforeEntry = false;
-                if (count % 10000 == 0) {
-                    pw = new PrintWriter(new FileOutputStream(splitOutdirPath.resolve("chunk_" + chunk + ".xml").toFile()));
-                    pw.println(header.toString().trim());
-                }
-                count++;
-            }
-
-            if (beforeEntry) {
-                header.append(line).append("\n");
-            }
-
-            if (inEntry) {
-                pw.println(line);
-            }
-
-            if (line.trim().startsWith("</entry>")) {
-                inEntry = false;
-                if (count % 10000 == 0) {
-                    pw.print("</uniprot>");
-                    pw.close();
-                    chunk++;
-                }
-            }
-        }
-        pw.print("</uniprot>");
-        pw.close();
-        br.close();
-    }
-
-}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/DownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/DownloadManager.java
deleted file mode 100644
index ab1d090294..0000000000
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/DownloadManager.java
+++ /dev/null
@@ -1,385 +0,0 @@
-/*
- * Copyright 2015-2020 OpenCB
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.opencb.cellbase.lib.download;
-
-import com.beust.jcommander.ParameterException;
-import com.fasterxml.jackson.core.util.DefaultPrettyPrinter;
-import com.fasterxml.jackson.databind.DeserializationFeature;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.ObjectReader;
-import com.fasterxml.jackson.databind.ObjectWriter;
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.lang3.StringUtils;
-import org.opencb.cellbase.core.config.CellBaseConfiguration;
-import org.opencb.cellbase.core.config.SpeciesConfiguration;
-import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.lib.EtlCommons;
-import org.opencb.cellbase.core.utils.SpeciesUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
-import java.nio.charset.Charset;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.sql.Timestamp;
-import java.text.SimpleDateFormat;
-import java.time.LocalDateTime;
-import java.util.*;
-
-@Deprecated
-public class DownloadManager {
-
-
-    private static final String CADD_NAME = "CADD";
-    private static final String DGV_NAME = "DGV";
-//    private static final String GWAS_NAME = "Gwas Catalog";
-//    private static final String DBSNP_NAME = "dbSNP";
-//    private static final String REACTOME_NAME = "Reactome";
-
-    private static final String GNOMAD_NAME = "gnomAD";
-
-    protected String species;
-    protected String assembly;
-    protected Path outdir;
-    protected CellBaseConfiguration configuration;
-
-    protected SpeciesConfiguration speciesConfiguration;
-    protected String speciesShortName;
-    protected String ensemblHostUrl;
-    protected SpeciesConfiguration.Assembly assemblyConfiguration;
-    protected String ensemblVersion;
-    protected String ensemblRelease;
-    protected Path downloadFolder;
-    protected Path buildFolder; // <output>/<species>_<assembly>/generated-json
-    protected Logger logger;
-
-    public DownloadManager(String species, String assembly, Path outdir, CellBaseConfiguration configuration)
-            throws IOException, CellBaseException {
-        this.species = species;
-        this.assembly = assembly;
-        this.outdir = outdir;
-        this.configuration = configuration;
-
-        this.init();
-    }
-
-    @Deprecated
-    public DownloadManager(CellBaseConfiguration configuration, Path targetDirectory, SpeciesConfiguration speciesConfiguration,
-                           SpeciesConfiguration.Assembly assembly) throws IOException {
-        logger = LoggerFactory.getLogger(this.getClass());
-
-        this.configuration = configuration;
-        this.speciesConfiguration = speciesConfiguration;
-//        assemblyName = assembly.getName();
-
-        // Output folder creation
-        speciesShortName = SpeciesUtils.getSpeciesShortname(speciesConfiguration);
-        // <output>/<species>_<assembly>
-        Path speciesFolder = targetDirectory.resolve(speciesShortName + "_" + assembly.getName().toLowerCase());
-        // <output>/<species>_<assembly>/download
-        downloadFolder = targetDirectory.resolve(speciesFolder + "/download");
-        makeDir(downloadFolder);
-
-        ensemblHostUrl = getEnsemblURL(speciesConfiguration);
-        ensemblVersion = assembly.getEnsemblVersion();
-        ensemblRelease = "release-" + ensemblVersion.split("_")[0];
-    }
-
-    private void init() throws CellBaseException, IOException {
-        logger = LoggerFactory.getLogger(this.getClass());
-
-        // Check Species
-        this.speciesConfiguration = SpeciesUtils.getSpeciesConfiguration(configuration, species);
-        if (speciesConfiguration == null) {
-            throw new CellBaseException("Invalid species: '" + species + "'");
-        }
-        this.speciesShortName = SpeciesUtils.getSpeciesShortname(speciesConfiguration);
-        this.ensemblHostUrl = getEnsemblURL(speciesConfiguration);
-
-        // Check assembly and get Ensembl version
-        if (StringUtils.isEmpty(assembly)) {
-            this.assemblyConfiguration = SpeciesUtils.getDefaultAssembly(speciesConfiguration);
-        } else {
-            this.assemblyConfiguration = SpeciesUtils.getAssembly(speciesConfiguration, assembly);
-        }
-        if (assemblyConfiguration == null) {
-            throw new CellBaseException("Invalid assembly: '" + assembly + "'");
-        }
-        this.ensemblVersion = assemblyConfiguration.getEnsemblVersion();
-        this.ensemblRelease = "release-" + ensemblVersion.split("_")[0];
-
-        // Prepare outdir
-        Path speciesFolder = outdir.resolve(speciesShortName + "_" + assemblyConfiguration.getName().toLowerCase());
-        downloadFolder = outdir.resolve(speciesFolder + "/download");
-        Files.createDirectories(downloadFolder);
-
-        // <output>/<species>_<assembly>/generated_json
-        buildFolder = outdir.resolve(speciesFolder + "/generated_json");
-        Files.createDirectories(buildFolder);
-
-        logger.info("Processing species " + speciesConfiguration.getScientificName());
-    }
-
-    @Deprecated
-    public DownloadFile downloadStructuralVariants() throws IOException, InterruptedException {
-        if (!speciesHasInfoToDownload(speciesConfiguration, "svs")) {
-             return null;
-        }
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading DGV data ...");
-
-            Path structuralVariantsFolder = downloadFolder.resolve(EtlCommons.STRUCTURAL_VARIANTS_FOLDER);
-            Files.createDirectories(structuralVariantsFolder);
-            String sourceFilename = (assemblyConfiguration.getName().equalsIgnoreCase("grch37") ? "GRCh37_hg19" : "GRCh38_hg38")
-                    + "_variants_2016-05-15.txt";
-            String url = configuration.getDownload().getDgv().getHost() + "/" + sourceFilename;
-            saveVersionData(EtlCommons.STRUCTURAL_VARIANTS_DATA, DGV_NAME, getDGVVersion(sourceFilename), getTimeStamp(),
-                    Collections.singletonList(url), buildFolder.resolve(EtlCommons.DGV_VERSION_FILE));
-            return downloadFile(url, structuralVariantsFolder.resolve(EtlCommons.DGV_FILE).toString());
-        }
-        return null;
-    }
-
-    private String getDGVVersion(String sourceFilename) {
-        return sourceFilename.split("\\.")[0].split("_")[3];
-    }
-
-    protected boolean speciesHasInfoToDownload(SpeciesConfiguration sp, String info) {
-        boolean hasInfo = true;
-        if (sp.getData() == null || !sp.getData().contains(info)) {
-            logger.warn("Species '{}' has no '{}' information available to download", sp.getScientificName(), info);
-            hasInfo = false;
-        }
-        return hasInfo;
-    }
-
-    protected String getTimeStamp() {
-        return new SimpleDateFormat("yyyyMMdd_HHmmss").format(Calendar.getInstance().getTime());
-    }
-
-    protected void saveVersionData(String data, String source, String version, String date, List<String> url, Path outputFilePath)
-            throws IOException {
-        Map<String, Object> versionDataMap = new HashMap<>();
-        versionDataMap.put("data", data);
-        versionDataMap.put("source", source);
-        versionDataMap.put("version", version);
-        versionDataMap.put("downloadDate", date);
-        versionDataMap.put("uRL", url);
-
-        ObjectMapper jsonObjectMapper = new ObjectMapper();
-        jsonObjectMapper.writeValue(outputFilePath.toFile(), versionDataMap);
-    }
-
-    protected String getLine(Path readmePath, int lineNumber) {
-        Files.exists(readmePath);
-        try {
-            BufferedReader reader = Files.newBufferedReader(readmePath, Charset.defaultCharset());
-            String line = null;
-            for (int i = 0; i < lineNumber; i++) {
-                line = reader.readLine();
-            }
-            reader.close();
-            return line;
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
-        return null;
-    }
-
-    protected List<Map<String, String>> parseResult(String json) throws IOException {
-        ObjectMapper jsonObjectMapper = new ObjectMapper();
-        jsonObjectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
-        ObjectReader reader = jsonObjectMapper
-                .readerFor(jsonObjectMapper.getTypeFactory().constructCollectionType(List.class, Map.class));
-        return reader.readValue(json);
-    }
-
-    protected String getPhylo(SpeciesConfiguration sp) {
-        if (configuration.getSpecies().getVertebrates().contains(sp)) {
-            return "vertebrates";
-        } else if (configuration.getSpecies().getMetazoa().contains(sp)) {
-            return "metazoa";
-        } else if (configuration.getSpecies().getFungi().contains(sp)) {
-            return "fungi";
-        } else if (configuration.getSpecies().getProtist().contains(sp)) {
-            return "protists";
-        } else if (configuration.getSpecies().getPlants().contains(sp)) {
-            return "plants";
-        } else if (configuration.getSpecies().getVirus().contains(sp)) {
-            return "virus";
-        } else if (configuration.getSpecies().getBacteria().contains(sp)) {
-            return "bacteria";
-        } else {
-            throw new ParameterException("Species " + sp.getScientificName() + " not associated to any phylo in the configuration file");
-        }
-    }
-
-    public DownloadFile downloadCaddScores() throws IOException, InterruptedException {
-        if (!speciesHasInfoToDownload(speciesConfiguration, "variation_functional_score")) {
-            return null;
-        }
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens") && assemblyConfiguration.getName().equalsIgnoreCase("GRCh37")) {
-            logger.info("Downloading CADD scores information ...");
-
-            Path variationFunctionalScoreFolder = downloadFolder.resolve("variation_functional_score");
-            Files.createDirectories(variationFunctionalScoreFolder);
-
-            // Downloads CADD scores
-            String url = configuration.getDownload().getCadd().getHost();
-
-            saveVersionData(EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA, CADD_NAME, url.split("/")[5], getTimeStamp(),
-                    Collections.singletonList(url), buildFolder.resolve("caddVersion.json"));
-            return downloadFile(url, variationFunctionalScoreFolder.resolve("whole_genome_SNVs.tsv.gz").toString());
-        }
-        return null;
-    }
-
-    protected DownloadFile downloadFile(String url, String outputFileName) throws IOException, InterruptedException {
-        return downloadFile(url, outputFileName, null);
-    }
-
-//    protected void downloadFiles(String host, List<String> fileNames) throws IOException, InterruptedException {
-//        downloadFiles(host, fileNames, fileNames);
-//    }
-
-//    protected void downloadFiles(String host, List<String> fileNames, List<String> ouputFileNames)
-//        throws IOException, InterruptedException {
-//        for (int i = 0; i < fileNames.size(); i++) {
-//            downloadFile(host + "/" + fileNames.get(i), ouputFileNames.get(i), null);
-//        }
-//    }
-
-    protected DownloadFile downloadFile(String url, String outputFileName, List<String> wgetAdditionalArgs)
-            throws IOException, InterruptedException {
-        DownloadFile downloadFileInfo = new DownloadFile(url, outputFileName, Timestamp.valueOf(LocalDateTime.now()).toString());
-        Long startTime = System.currentTimeMillis();
-        if (Paths.get(outputFileName).toFile().exists()) {
-            logger.warn("File '{}' is already downloaded", outputFileName);
-            setDownloadStatusAndMessage(outputFileName, downloadFileInfo, "File '" + outputFileName + "' is already downloaded", true);
-        } else {
-            final String outputLog = outputFileName + ".log";
-            List<String> wgetArgs = new ArrayList<>(Arrays.asList("--tries=10", url, "-O", outputFileName, "-o", outputLog));
-            if (wgetAdditionalArgs != null && !wgetAdditionalArgs.isEmpty()) {
-                wgetArgs.addAll(wgetAdditionalArgs);
-            }
-            boolean downloaded = EtlCommons.runCommandLineProcess(null, "wget", wgetArgs, outputLog);
-            setDownloadStatusAndMessage(outputFileName, downloadFileInfo, outputLog, downloaded);
-        }
-        downloadFileInfo.setElapsedTime(startTime, System.currentTimeMillis());
-        return downloadFileInfo;
-    }
-
-    private void setDownloadStatusAndMessage(String outputFileName, DownloadFile downloadFile, String outputLog, boolean downloaded) {
-        if (downloaded) {
-            boolean validFileSize = validateDownloadFile(downloadFile, outputFileName, outputLog);
-            if (validFileSize) {
-                downloadFile.setStatus(DownloadFile.Status.OK);
-                downloadFile.setMessage("File downloaded successfully");
-            } else {
-                downloadFile.setStatus(DownloadFile.Status.ERROR);
-                downloadFile.setMessage("Expected downloaded file size " + downloadFile.getExpectedFileSize()
-                + ", Actual file size " + downloadFile.getActualFileSize());
-            }
-        } else {
-            downloadFile.setMessage("See full error message in " + outputLog);
-            downloadFile.setStatus(DownloadFile.Status.ERROR);
-            // because we use the -O flag, a file will be written, even on error. See #467
-//            Files.deleteIfExists((new File(outputFileName)).toPath());
-        }
-    }
-
-    public void writeDownloadLogFile(List<DownloadFile> downloadFiles) throws IOException {
-        ObjectMapper mapper = new ObjectMapper();
-        ObjectWriter writer = mapper.writer(new DefaultPrettyPrinter());
-        writer.writeValue(new File(downloadFolder + "/download_log.json"), downloadFiles);
-    }
-
-    private boolean validateDownloadFile(DownloadFile downloadFile, String outputFileName, String outputFileLog) {
-        long expectedFileSize = getExpectedFileSize(outputFileLog);
-        long actualFileSize = FileUtils.sizeOf(new File(outputFileName));
-        downloadFile.setActualFileSize(actualFileSize);
-        downloadFile.setExpectedFileSize(expectedFileSize);
-        return expectedFileSize == actualFileSize;
-    }
-
-    private int getExpectedFileSize(String outputFileLog) {
-        try (BufferedReader reader = new BufferedReader(new FileReader(outputFileLog))) {
-            String line = null;
-            while ((line = reader.readLine()) != null) {
-                // looking for: Length: 13846591 (13M)
-                if (line.startsWith("Length:")) {
-                    String[] parts = line.split("\\s");
-                    return Integer.parseInt(parts[1]);
-                }
-            }
-        } catch (Exception e) {
-            System.err.println(e);
-        }
-        return 0;
-    }
-
-    protected String getVersionFromVersionLine(Path path, String tag) {
-        Files.exists(path);
-        try {
-            BufferedReader reader = Files.newBufferedReader(path, Charset.defaultCharset());
-            String line = reader.readLine();
-            // There shall be a line at the README.txt containing the version.
-            // e.g. The files in the current directory contain the data corresponding to the latest release
-            // (version 4.0, April 2016). ...
-            while (line != null) {
-                // tag specifies a certain string that must be found within the line supposed to contain the version
-                // info
-                if (line.contains(tag)) {
-                    String version = line.split("\\(")[1].split("\\)")[0];
-                    reader.close();
-                    return version;
-                }
-                line = reader.readLine();
-            }
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
-        return null;
-    }
-
-    @Deprecated
-    private void makeDir(Path folderPath) throws IOException {
-        if (!Files.exists(folderPath)) {
-            Files.createDirectories(folderPath);
-        }
-    }
-
-    @Deprecated
-    private String getEnsemblURL(SpeciesConfiguration sp) {
-        // We need to find which is the correct Ensembl host URL.
-        // This can different depending on if is a vertebrate species.
-        String ensemblHostUrl;
-        if (configuration.getSpecies().getVertebrates().contains(sp)) {
-            ensemblHostUrl = configuration.getDownload().getEnsembl().getUrl().getHost();
-        } else {
-            ensemblHostUrl = configuration.getDownload().getEnsemblGenomes().getUrl().getHost();
-        }
-        return ensemblHostUrl;
-    }
-}
-
-
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/Downloader.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/Downloader.java
index 17022cae4b..d26fb01997 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/Downloader.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/Downloader.java
@@ -40,7 +40,7 @@ public Downloader(String species, String assembly, Path outputDirectory, CellBas
 
     public List<DownloadFile> downloadGenome() throws IOException, CellBaseException, InterruptedException {
         GenomeDownloadManager manager = new GenomeDownloadManager(species, assembly, outputDirectory, configuration);
-        return manager.download();
+        return manager.downloadReferenceGenome();
     }
 
     public List<DownloadFile> downloadGene() throws IOException, CellBaseException, InterruptedException {
@@ -84,8 +84,8 @@ public List<DownloadFile> downloadCaddScores() throws IOException, CellBaseExcep
         return manager.download();
     }
 
-    public List<DownloadFile> downloadPredictionScores() throws IOException, CellBaseException, InterruptedException {
-        MissenseScoresDownloadManager manager = new MissenseScoresDownloadManager(species, assembly, outputDirectory, configuration);
+    public List<DownloadFile> downloadRevelScores() throws IOException, CellBaseException, InterruptedException {
+        RevelScoresDownloadManager manager = new RevelScoresDownloadManager(species, assembly, outputDirectory, configuration);
         return manager.download();
     }
 
@@ -98,4 +98,14 @@ public List<DownloadFile> downloadPharmKGB() throws IOException, CellBaseExcepti
         PharmGKBDownloadManager manager = new PharmGKBDownloadManager(species, assembly, outputDirectory, configuration);
         return manager.download();
     }
+
+    public List<DownloadFile> downloadAlphaMissense() throws IOException, CellBaseException, InterruptedException {
+        AlphaMissenseDownloadManager manager = new AlphaMissenseDownloadManager(species, assembly, outputDirectory, configuration);
+        return manager.download();
+    }
+
+    public List<DownloadFile> downloadPolygenicScores() throws IOException, CellBaseException, InterruptedException {
+        PgsDownloadManager manager = new PgsDownloadManager(species, assembly, outputDirectory, configuration);
+        return manager.download();
+    }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java
index 260ff75427..ee332dd8ea 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java
@@ -16,346 +16,256 @@
 
 package org.opencb.cellbase.lib.download;
 
-import org.apache.commons.io.FilenameUtils;
-import org.apache.commons.lang.StringUtils;
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.config.DownloadProperties;
 import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.lib.EtlCommons;
-import org.opencb.commons.utils.DockerUtils;
-import org.opencb.commons.utils.FileUtils;
 
-import java.io.BufferedReader;
 import java.io.IOException;
-import java.net.URI;
-import java.nio.charset.Charset;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.nio.file.Paths;
 import java.util.*;
 
-public class GeneDownloadManager extends AbstractDownloadManager {
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
-    private static final String ENSEMBL_NAME = "ENSEMBL";
-    private static final String UNIPROT_NAME = "UniProt";
-//    private static final String GERP_NAME = "GERP++";
-//    private static final String PHASTCONS_NAME = "PhastCons";
-//    private static final String PHYLOP_NAME = "PhyloP";
-    private static final String GENE_EXPRESSION_ATLAS_NAME = "Gene Expression Atlas";
-    private static final String HPO_NAME = "HPO";
-    private static final String DISGENET_NAME = "DisGeNET";
-    private static final String GO_ANNOTATION_NAME = "EBI Gene Ontology Annotation";
-    private static final String DGIDB_NAME = "DGIdb";
-    private static final String GNOMAD_NAME = "gnomAD";
-    private static String dockerImage;
+public class GeneDownloadManager extends AbstractDownloadManager {
 
     private static final Map<String, String> GENE_UNIPROT_XREF_FILES;
 
     static {
         GENE_UNIPROT_XREF_FILES = new HashMap<>();
-        GENE_UNIPROT_XREF_FILES.put("Homo sapiens", "HUMAN_9606_idmapping_selected.tab.gz");
+        GENE_UNIPROT_XREF_FILES.put(HOMO_SAPIENS_NAME, "HUMAN_9606_idmapping_selected.tab.gz");
         GENE_UNIPROT_XREF_FILES.put("Mus musculus", "MOUSE_10090_idmapping_selected.tab.gz");
         GENE_UNIPROT_XREF_FILES.put("Rattus norvegicus", "RAT_10116_idmapping_selected.tab.gz");
         GENE_UNIPROT_XREF_FILES.put("Danio rerio", "DANRE_7955_idmapping_selected.tab.gz");
         GENE_UNIPROT_XREF_FILES.put("Drosophila melanogaster", "DROME_7227_idmapping_selected.tab.gz");
         GENE_UNIPROT_XREF_FILES.put("Saccharomyces cerevisiae", "YEAST_559292_idmapping_selected.tab.gz");
-    };
+    }
 
     public GeneDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
             throws IOException, CellBaseException {
         super(species, assembly, targetDirectory, configuration);
-
-        dockerImage = "opencb/cellbase-builder:" + configuration.getApiVersion();
     }
 
     @Override
-    public List<DownloadFile> download() throws IOException, InterruptedException {
-        logger.info("Downloading gene information ...");
-        Path geneFolder = downloadFolder.resolve("gene");
-        Files.createDirectories(geneFolder);
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(GENE_DATA));
+
+        // Create gene folder
+        Path geneDownloadPath = downloadFolder.resolve(GENE_DATA);
+
+        // Create Ensembl folder
+        Path ensemblDownloadPath = geneDownloadPath.resolve(ENSEMBL_DATA);
+        Files.createDirectories(ensemblDownloadPath);
 
-        Path refseqFolder = downloadFolder.resolve("refseq");
-        Files.createDirectories(refseqFolder);
+        // Create RefSeq folder
+        Path refSeqDownloadPath = geneDownloadPath.resolve(REFSEQ_DATA);
+        Files.createDirectories(refSeqDownloadPath);
 
         List<DownloadFile> downloadFiles = new ArrayList<>();
 
-        downloadFiles.addAll(downloadEnsemblData(geneFolder));
-        downloadFiles.addAll(downloadRefSeq(refseqFolder));
-        downloadFiles.add(downloadMane(geneFolder));
-        downloadFiles.add(downloadLrg(geneFolder));
-        downloadFiles.add(downloadHgnc(geneFolder));
-        downloadFiles.add(downloadCancerHotspot(geneFolder));
-        downloadFiles.add(downloadDrugData(geneFolder));
-        downloadFiles.addAll(downloadGeneUniprotXref(geneFolder));
-        downloadFiles.add(downloadGeneExpressionAtlas(geneFolder));
-        downloadFiles.addAll(downloadGeneDiseaseAnnotation(geneFolder));
-        downloadFiles.add(downloadGnomadConstraints(geneFolder));
-        downloadFiles.add(downloadGO(geneFolder));
-//        runGeneExtraInfo(geneFolder);
+        // Ensembl
+        downloadFiles.addAll(downloadEnsemblData(ensemblDownloadPath));
 
-        return downloadFiles;
-    }
+        // RefSeq
+        downloadFiles.addAll(downloadRefSeq(refSeqDownloadPath));
 
-    private List<DownloadFile> downloadEnsemblData(Path geneFolder) throws IOException, InterruptedException {
-        logger.info("Downloading gene Ensembl data (gtf, pep, cdna, motifs) ...");
-        List<String> downloadedUrls = new ArrayList<>(4);
-        List<DownloadFile> downloadFiles = new ArrayList<>();
+        // Gene annotation
+        logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(GENE_ANNOTATION_DATA));
+        downloadFiles.add(downloadMane(geneDownloadPath));
+        downloadFiles.add(downloadLrg(geneDownloadPath));
+        downloadFiles.add(downloadHgnc(geneDownloadPath));
+        downloadFiles.add(downloadCancerHotspot(geneDownloadPath));
+        downloadFiles.add(downloadDrugData(geneDownloadPath));
+        downloadFiles.add(downloadGeneUniprotXref(geneDownloadPath));
+        downloadFiles.add(downloadGeneExpressionAtlas(geneDownloadPath));
+        downloadFiles.add(downloadGeneDiseaseAnnotation(geneDownloadPath));
+        downloadFiles.add(downloadGnomadConstraints(geneDownloadPath));
+        downloadFiles.add(downloadGO(geneDownloadPath));
+        logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(GENE_ANNOTATION_DATA));
 
-        String ensemblHost = ensemblHostUrl + "/" + ensemblRelease;
-        if (!configuration.getSpecies().getVertebrates().contains(speciesConfiguration)) {
-            ensemblHost = ensemblHostUrl + "/" + ensemblRelease + "/" + getPhylo(speciesConfiguration);
-        }
+        logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(GENE_DATA));
 
-        String ensemblCollection = "";
-        if (configuration.getSpecies().getBacteria().contains(speciesConfiguration)) {
-            // WARN: assuming there's just one assembly
-            ensemblCollection =  speciesConfiguration.getAssemblies().get(0).getEnsemblCollection() + "/";
-        }
+        return downloadFiles;
+    }
 
-        // Ensembl leaves now several GTF files in the FTP folder, we need to build a more accurate URL
-        // to download the correct GTF file.
-        String version = ensemblRelease.split("-")[1];
-        String url = ensemblHost + "/gtf/" + ensemblCollection + speciesShortName + "/*" + version + ".gtf.gz";
-        String fileName = geneFolder.resolve(speciesShortName + ".gtf.gz").toString();
-        downloadFiles.add(downloadFile(url, fileName));
-        downloadedUrls.add(url);
+    private List<DownloadFile> downloadEnsemblData(Path ensemblDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        logger.info(CATEGORY_DOWNLOADING_LOG_MESSAGE, getDataName(ENSEMBL_DATA), getDataCategory(ENSEMBL_DATA));
 
-        url = ensemblHost + "/fasta/" + ensemblCollection + speciesShortName + "/pep/*.pep.all.fa.gz";
-        fileName = geneFolder.resolve(speciesShortName + ".pep.all.fa.gz").toString();
-        downloadFiles.add(downloadFile(url, fileName));
-        downloadedUrls.add(url);
+        List<DownloadFile> downloadFiles = new ArrayList<>();
+        DownloadProperties.EnsemblProperties ensemblProps = configuration.getDownload().getEnsembl();
 
-        url = ensemblHost + "/fasta/" + ensemblCollection + speciesShortName + "/cdna/*.cdna.all.fa.gz";
-        fileName = geneFolder.resolve(speciesShortName + ".cdna.all.fa.gz").toString();
-        downloadFiles.add(downloadFile(url, fileName));
-        downloadedUrls.add(url);
+        // GTF
+        downloadFiles.add(downloadEnsemblDataSource(ensemblProps, ENSEMBL_GTF_FILE_ID, ensemblDownloadPath));
+        // PEP
+        downloadFiles.add(downloadEnsemblDataSource(ensemblProps, ENSEMBL_PEP_FA_FILE_ID, ensemblDownloadPath));
+        // CDNA
+        downloadFiles.add(downloadEnsemblDataSource(ensemblProps, ENSEMBL_CDNA_FA_FILE_ID, ensemblDownloadPath));
 
-        saveVersionData(EtlCommons.GENE_DATA, ENSEMBL_NAME, ensemblVersion, getTimeStamp(), downloadedUrls,
-                geneFolder.resolve("ensemblCoreVersion.json"));
+        // Save data source (i.e., metadata)
+        saveDataSource(ENSEMBL_DATA, ensemblVersion, getTimeStamp(), getUrls(downloadFiles),
+                ensemblDownloadPath.resolve(getDataVersionFilename(ENSEMBL_DATA)));
 
+        logger.info(CATEGORY_DOWNLOADING_DONE_LOG_MESSAGE, getDataName(ENSEMBL_DATA), getDataCategory(ENSEMBL_DATA));
         return downloadFiles;
     }
 
-    private List<DownloadFile> downloadRefSeq(Path refSeqFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading RefSeq...");
+    private List<DownloadFile> downloadRefSeq(Path refSeqDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS_NAME)) {
+            logger.info(CATEGORY_DOWNLOADING_LOG_MESSAGE, getDataName(REFSEQ_DATA), getDataCategory(REFSEQ_DATA));
 
             List<DownloadFile> downloadFiles = new ArrayList<>();
+            DownloadProperties.URLProperties refSeqProps = configuration.getDownload().getRefSeq();
+
+            // GTF
+            downloadFiles.add(downloadDataSource(refSeqProps, REFSEQ_GENOMIC_GTF_FILE_ID, refSeqDownloadPath));
+            // Genomic FASTA
+            downloadFiles.add(downloadDataSource(refSeqProps, REFSEQ_GENOMIC_FNA_FILE_ID, refSeqDownloadPath));
+            // Protein FASTA
+            downloadFiles.add(downloadDataSource(refSeqProps, REFSEQ_PROTEIN_FAA_FILE_ID, refSeqDownloadPath));
+            // cDNA
+            downloadFiles.add(downloadDataSource(refSeqProps, REFSEQ_RNA_FNA_FILE_ID, refSeqDownloadPath));
 
-            // gtf
-            String url = configuration.getDownload().getRefSeq().getHost();
-            saveVersionData(EtlCommons.REFSEQ_DATA, "RefSeq", null, getTimeStamp(), Collections.singletonList(url),
-                    refSeqFolder.resolve("refSeqVersion.json"));
-            String outputFileName = "refSeq_" + StringUtils.capitalize(speciesShortName) + "." + assemblyConfiguration.getName()
-                    + ".gtf.gz";
-            logger.info("downloading " + url);
-            Path outputPath = refSeqFolder.resolve(outputFileName);
-            downloadFiles.add(downloadFile(url, outputPath.toString()));
-
-            // genomic fasta
-            url = configuration.getDownload().getRefSeqFasta().getHost();
-            outputFileName = "refSeq_" + StringUtils.capitalize(speciesShortName) + "." + assemblyConfiguration.getName()
-                    + "_genomic.fna.gz";
-            logger.info("downloading " + url);
-            outputPath = refSeqFolder.resolve(outputFileName);
-            saveVersionData(EtlCommons.REFSEQ_DATA, "RefSeq", null, getTimeStamp(),
-                    Collections.singletonList(url), refSeqFolder.resolve("refSeqFastaVersion.json"));
-            downloadFiles.add(downloadFile(url, outputPath.toString()));
-            logger.info("Unzipping file: " + outputFileName);
-            EtlCommons.runCommandLineProcess(null, "gunzip", Collections.singletonList(outputPath.toString()), null);
-
-            // protein fasta
-            url = configuration.getDownload().getRefSeqProteinFasta().getHost();
-            outputFileName = "refSeq_" + StringUtils.capitalize(speciesShortName) + "." + assemblyConfiguration.getName()
-                    + "_protein.faa.gz";
-            outputPath = refSeqFolder.resolve(outputFileName);
-            saveVersionData(EtlCommons.REFSEQ_DATA, "RefSeq", null, getTimeStamp(), Collections.singletonList(url),
-                    refSeqFolder.resolve("refSeqProteinFastaVersion.json"));
-            downloadFiles.add(downloadFile(url, outputPath.toString()));
+            // Save data source (i.e., metadata)
+            saveDataSource(REFSEQ_DATA, refSeqProps.getVersion(), getTimeStamp(), getUrls(downloadFiles),
+                    refSeqDownloadPath.resolve(getDataVersionFilename(REFSEQ_DATA)));
 
-            // cDNA
-            url = configuration.getDownload().getRefSeqCdna().getHost();
-            outputFileName = "refSeq_" + StringUtils.capitalize(speciesShortName) + "." + assemblyConfiguration.getName()
-                    + "_cdna.fna.gz";
-            outputPath = refSeqFolder.resolve(outputFileName);
-            saveVersionData(EtlCommons.REFSEQ_DATA, "RefSeq", null, getTimeStamp(), Collections.singletonList(url),
-                    refSeqFolder.resolve("refSeqCdnaFastaVersion.json"));
-            downloadFiles.add(downloadFile(url, outputPath.toString()));
+            logger.info(CATEGORY_DOWNLOADING_DONE_LOG_MESSAGE, getDataName(REFSEQ_DATA), getDataCategory(REFSEQ_DATA));
             return downloadFiles;
-
         }
-        return null;
+        return Collections.emptyList();
     }
 
-    private DownloadFile downloadMane(Path geneFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading MANE Select ...");
-            String url = configuration.getDownload().getManeSelect().getHost();
-            saveVersionData(EtlCommons.GENE_DATA, "MANE Select", configuration.getDownload().getManeSelect().getVersion(),
-                    getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("maneSelectVersion.json"));
-            String[] array = url.split("/");
-            return downloadFile(url, geneFolder.resolve(array[array.length - 1]).toString());
-        }
-        return null;
-    }
+    private DownloadFile downloadMane(Path geneDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS_NAME)) {
+            logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(MANE_SELECT_DATA));
 
-    private DownloadFile downloadLrg(Path geneFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading LRG ...");
-            String url = configuration.getDownload().getLrg().getHost();
-            saveVersionData(EtlCommons.GENE_DATA, "LRG", configuration.getDownload().getLrg().getVersion(),
-                    getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("lrgVersion.json"));
-            String[] array = url.split("/");
-            return downloadFile(url, geneFolder.resolve(array[array.length - 1]).toString());
+            DownloadFile downloadFile = downloadAndSaveDataSource(configuration.getDownload().getManeSelect(), MANE_SELECT_FILE_ID,
+                    MANE_SELECT_DATA, geneDownloadPath);
+
+            logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(MANE_SELECT_DATA));
+            return downloadFile;
         }
         return null;
     }
 
-    private DownloadFile downloadHgnc(Path geneFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading HGNC ...");
-            String url = configuration.getDownload().getHgnc().getHost();
-            saveVersionData(EtlCommons.GENE_DATA, "HGNC_GENE", configuration.getDownload().getHgnc().getVersion(),
-                    getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("hgncVersion.json"));
-            String[] array = url.split("/");
-            return downloadFile(url, geneFolder.resolve(array[array.length - 1]).toString());
+    private DownloadFile downloadLrg(Path geneDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS_NAME)) {
+            logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(LRG_DATA));
+
+            DownloadFile downloadFile = downloadAndSaveDataSource(configuration.getDownload().getLrg(), LRG_FILE_ID, LRG_DATA,
+                    geneDownloadPath);
+
+            logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(LRG_DATA));
+            return downloadFile;
         }
         return null;
     }
 
-    private DownloadFile downloadCancerHotspot(Path geneFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading Cancer Hotspot ...");
-            String url = configuration.getDownload().getCancerHotspot().getHost();
-            saveVersionData(EtlCommons.GENE_DATA, "CANCER_HOTSPOT", configuration.getDownload().getHgnc().getVersion(),
-                    getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("cancerHotspotVersion.json"));
-            String[] array = url.split("/");
-            return downloadFile(url, geneFolder.resolve(array[array.length - 1]).toString());
+    private DownloadFile downloadHgnc(Path geneDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS_NAME)) {
+            logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(HGNC_DATA));
+
+            DownloadFile downloadFile = downloadAndSaveDataSource(configuration.getDownload().getHgnc(), HGNC_FILE_ID, HGNC_DATA,
+                    geneDownloadPath);
+
+            logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(HGNC_DATA));
+            return downloadFile;
         }
         return null;
     }
 
-    private DownloadFile downloadGO(Path geneFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading go annotation...");
-            String url = configuration.getDownload().getGoAnnotation().getHost();
-            saveVersionData(EtlCommons.GENE_DATA, GO_ANNOTATION_NAME, null, getTimeStamp(), Collections.singletonList(url),
-                    geneFolder.resolve("goAnnotationVersion.json"));
-            return downloadFile(url, geneFolder.resolve("goa_human.gaf.gz").toString());
+    private DownloadFile downloadCancerHotspot(Path geneDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS_NAME)) {
+            logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(CANCER_HOTSPOT_DATA));
+
+            DownloadFile downloadFile = downloadAndSaveDataSource(configuration.getDownload().getCancerHotspot(), CANCER_HOTSPOT_FILE_ID,
+                    CANCER_HOTSPOT_DATA, geneDownloadPath);
+
+            logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(CANCER_HOTSPOT_DATA));
+            return downloadFile;
         }
         return null;
     }
 
-    private DownloadFile downloadGnomadConstraints(Path geneFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading gnomAD constraints data...");
-            String url = configuration.getDownload().getGnomadConstraints().getHost();
-            saveVersionData(EtlCommons.GENE_DATA, GNOMAD_NAME, configuration.getDownload().
-                            getGnomadConstraints().getVersion(), getTimeStamp(),
-                    Collections.singletonList(url), geneFolder.resolve("gnomadVersion.json"));
-            return downloadFile(url, geneFolder.resolve("gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz").toString());
+    private DownloadFile downloadDrugData(Path geneDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS_NAME)) {
+            logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(DGIDB_DATA));
+
+            DownloadFile downloadFile = downloadAndSaveDataSource(configuration.getDownload().getDgidb(), DGIDB_FILE_ID, DGIDB_DATA,
+                    geneDownloadPath);
+
+            logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(DGIDB_DATA));
+            return downloadFile;
         }
         return null;
     }
 
-    private DownloadFile downloadDrugData(Path geneFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading drug-gene data...");
-            String url = configuration.getDownload().getDgidb().getHost();
-            saveVersionData(EtlCommons.GENE_DATA, DGIDB_NAME, null, getTimeStamp(), Collections.singletonList(url),
-                    geneFolder.resolve("dgidbVersion.json"));
-            return downloadFile(url, geneFolder.resolve("dgidb.tsv").toString());
+    private DownloadFile downloadGeneUniprotXref(Path geneDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        if (GENE_UNIPROT_XREF_FILES.containsKey(speciesConfiguration.getScientificName())) {
+            logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(UNIPROT_XREF_DATA));
+
+            DownloadFile downloadFile = downloadAndSaveDataSource(configuration.getDownload().getGeneUniprotXref(), UNIPROT_XREF_FILE_ID,
+                    UNIPROT_XREF_DATA, geneDownloadPath);
+
+            logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(UNIPROT_XREF_DATA));
+            return downloadFile;
         }
         return null;
     }
 
+    private DownloadFile downloadGeneExpressionAtlas(Path geneDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(GENE_EXPRESSION_ATLAS_DATA));
 
-    private String getUniProtReleaseNotesUrl() {
-        return URI.create(configuration.getDownload().getGeneUniprotXref().getHost()).resolve("../../../").toString()
-                + "/relnotes.txt";
-    }
+        DownloadFile downloadFile = downloadAndSaveDataSource(configuration.getDownload().getGeneExpressionAtlas(),
+                GENE_EXPRESSION_ATLAS_FILE_ID, GENE_EXPRESSION_ATLAS_DATA, geneDownloadPath);
 
-    private String getUniProtRelease(String relnotesFilename) throws IOException {
-        Path path = Paths.get(relnotesFilename);
-        FileUtils.checkFile(path);
-        // The first line at the relnotes.txt file contains the UniProt release
-        BufferedReader reader = Files.newBufferedReader(path, Charset.defaultCharset());
-        String release = reader.readLine().split(" ")[2];
-        reader.close();
-        return release;
+        logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(GENE_EXPRESSION_ATLAS_DATA));
+        return downloadFile;
     }
 
-    private List<DownloadFile> downloadGeneUniprotXref(Path geneFolder) throws IOException, InterruptedException {
-        logger.info("Downloading UniProt ID mapping ...");
-
-        List<DownloadFile> downloadFiles = new ArrayList<>();
-
-        if (GENE_UNIPROT_XREF_FILES.containsKey(speciesConfiguration.getScientificName())) {
-            String geneGtfUrl = configuration.getDownload().getGeneUniprotXref().getHost() + "/"
-                    + GENE_UNIPROT_XREF_FILES.get(speciesConfiguration.getScientificName());
-            downloadFiles.add(downloadFile(geneGtfUrl, geneFolder.resolve("idmapping_selected.tab.gz").toString()));
-            downloadFiles.add(downloadFile(getUniProtReleaseNotesUrl(), geneFolder.resolve("uniprotRelnotes.txt").toString()));
-
-            saveVersionData(EtlCommons.GENE_DATA, UNIPROT_NAME,
-                    getUniProtRelease(geneFolder.resolve("uniprotRelnotes.txt").toString()), getTimeStamp(),
-                    Collections.singletonList(geneGtfUrl), geneFolder.resolve("uniprotXrefVersion.json"));
-        }
+    private DownloadFile downloadGeneDiseaseAnnotation(Path geneDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(GENE_DISEASE_ANNOTATION_DATA));
 
-        return downloadFiles;
-    }
+        // HPO
+        // IMPORTANT !!!
+        logger.warn("{} must be downloaded manually from {} and then create the file {} with data ({}), name ({}) and the version",
+                getDataName(HPO_DATA), configuration.getDownload().getHpo().getHost(), getDataVersionFilename(HPO_DATA),
+                getDataCategory(HPO_DATA), getDataName(HPO_DATA));
+        saveDataSource(HPO_DATA, configuration.getDownload().getHpo().getVersion(), getTimeStamp(),
+                Collections.singletonList(configuration.getDownload().getHpo().getHost()),
+                geneDownloadPath.resolve(getDataVersionFilename(HPO_DATA)));
 
-    private DownloadFile downloadGeneExpressionAtlas(Path geneFolder) throws IOException, InterruptedException {
-        logger.info("Downloading gene expression atlas ...");
-        String geneGtfUrl = configuration.getDownload().getGeneExpressionAtlas().getHost();
-        saveVersionData(EtlCommons.GENE_DATA, GENE_EXPRESSION_ATLAS_NAME, getGeneExpressionAtlasVersion(), getTimeStamp(),
-                Collections.singletonList(geneGtfUrl), geneFolder.resolve("geneExpressionAtlasVersion.json"));
-        return downloadFile(geneGtfUrl, geneFolder.resolve("allgenes_updown_in_organism_part.tab.gz").toString());
-    }
+        // DisGeNet
+        DownloadFile downloadFile = downloadAndSaveDataSource(configuration.getDownload().getDisgenet(), DISGENET_FILE_ID, DISGENET_DATA,
+                geneDownloadPath);
 
-    private String getGeneExpressionAtlasVersion() {
-        return FilenameUtils.getBaseName(configuration.getDownload().getGeneExpressionAtlas().getHost())
-                .split("_")[5].replace(".tab", "");
+        logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(GENE_DISEASE_ANNOTATION_DATA));
+        return downloadFile;
     }
 
-    private List<DownloadFile> downloadGeneDiseaseAnnotation(Path geneFolder) throws IOException, InterruptedException {
-        logger.info("Downloading gene disease annotation ...");
+    private DownloadFile downloadGnomadConstraints(Path geneDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS_NAME)) {
+            logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(GNOMAD_CONSTRAINTS_DATA));
 
-        List<DownloadFile> downloadFiles = new ArrayList<>();
+            DownloadFile downloadFile = downloadAndSaveDataSource(configuration.getDownload().getGnomadConstraints(),
+                    GNOMAD_CONSTRAINTS_FILE_ID, GNOMAD_CONSTRAINTS_DATA, geneDownloadPath);
 
-        String host = configuration.getDownload().getHpo().getHost();
-        String fileName = StringUtils.substringAfterLast(host, "/");
-        downloadFiles.add(downloadFile(host, geneFolder.resolve(fileName).toString()));
-        saveVersionData(EtlCommons.GENE_DATA, HPO_NAME, null, getTimeStamp(), Collections.singletonList(host),
-                geneFolder.resolve("hpoVersion.json"));
-
-        host = configuration.getDownload().getDisgenet().getHost();
-        List<String> files = configuration.getDownload().getDisgenet().getFiles();
-        for (String file : files) {
-            String outputFile = file.equalsIgnoreCase("readme.txt") ? "disgenetReadme.txt" : file;
-            downloadFiles.add(downloadFile(host + "/" + file, geneFolder.resolve(outputFile).toString()));
+            logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(GNOMAD_CONSTRAINTS_DATA));
+            return downloadFile;
         }
+        return null;
+    }
 
-        saveVersionData(EtlCommons.GENE_DISEASE_ASSOCIATION_DATA, DISGENET_NAME,
-                getVersionFromVersionLine(geneFolder.resolve("disgenetReadme.txt"), "(version"), getTimeStamp(),
-                Collections.singletonList(host), geneFolder.resolve("disgenetVersion.json"));
+    private DownloadFile downloadGO(Path geneDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS_NAME)) {
+            logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(GO_ANNOTATION_DATA));
 
-        return downloadFiles;
-    }
+            DownloadFile downloadFile = downloadAndSaveDataSource(configuration.getDownload().getGoAnnotation(), GO_ANNOTATION_FILE_ID,
+                    GO_ANNOTATION_DATA, geneDownloadPath);
 
-    private void runGeneExtraInfo(Path geneFolder) throws IOException, InterruptedException {
-        // TODO skip if we already have these data
-        logger.info("Downloading gene extra info ...");
-
-        if ("true".equals(System.getenv("CELLBASE_BUILD_DOCKER"))) {
-            final String outputLog = downloadLogFolder + "/gene_extra_info.log";
-            EtlCommons.runCommandLineProcess(null, "/opt/cellbase/gene_extra_info.pl",
-                    Arrays.asList("--outdir", geneFolder.toAbsolutePath().toString()),
-                    outputLog);
-        } else {
-            AbstractMap.SimpleEntry<String, String> outputBinding = new AbstractMap.SimpleEntry(geneFolder.toAbsolutePath().toString(),
-                    "/ensembl-data");
-            String ensemblScriptParams = "/opt/cellbase/gene_extra_info.pl --outdir /ensembl-data";
-
-            DockerUtils.run(dockerImage, null, outputBinding, ensemblScriptParams, null);
+            logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(GO_ANNOTATION_DATA));
+            return downloadFile;
         }
+        return null;
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java
index 0ba9f39db4..9b967eb052 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java
@@ -17,26 +17,20 @@
 package org.opencb.cellbase.lib.download;
 
 import com.beust.jcommander.ParameterException;
-import org.apache.commons.lang.StringUtils;
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.lib.EtlCommons;
-import org.opencb.commons.utils.DockerUtils;
 
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.*;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
 
-public class GenomeDownloadManager extends AbstractDownloadManager {
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
-    private static final String ENSEMBL_NAME = "ENSEMBL";
-    private static final String GERP_NAME = "GERP++";
-    private static final String PHASTCONS_NAME = "PhastCons";
-    private static final String PHYLOP_NAME = "PhyloP";
-    private static final String TRF_NAME = "Tandem repeats finder";
-    private static final String GSD_NAME = "Genomic super duplications";
-    private static final String WM_NAME = "WindowMasker";
+public class GenomeDownloadManager extends AbstractDownloadManager {
 
     public GenomeDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
             throws IOException, CellBaseException {
@@ -44,48 +38,27 @@ public GenomeDownloadManager(String species, String assembly, Path targetDirecto
     }
 
     @Override
-    public List<DownloadFile> download() throws IOException, InterruptedException {
-        List<DownloadFile> downloadFiles = new ArrayList<>();
-        downloadFiles.addAll(downloadReferenceGenome());
-        downloadFiles.addAll(downloadConservation());
-        downloadFiles.addAll(downloadRepeats());
-
-        // cytobands
-//        runGenomeInfo();
-        return downloadFiles;
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        return downloadReferenceGenome();
     }
 
-    public List<DownloadFile> downloadReferenceGenome() throws IOException, InterruptedException {
-        logger.info("Downloading genome information ...");
-        Path sequenceFolder = downloadFolder.resolve("genome");
+    public List<DownloadFile> downloadReferenceGenome() throws IOException, InterruptedException, CellBaseException {
+        logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(GENOME_DATA));
+        Path sequenceFolder = downloadFolder.resolve(GENOME_DATA);
         Files.createDirectories(sequenceFolder);
 
         // Reference genome sequences are downloaded from Ensembl
         // New Homo sapiens assemblies contain too many ALT regions, so we download 'primary_assembly' file instead
-        String url = ensemblHostUrl + "/" + ensemblRelease;
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            url = url + "/fasta/" + speciesShortName + "/dna/*.dna.primary_assembly.fa.gz";
-        } else {
-            if (!configuration.getSpecies().getVertebrates().contains(speciesConfiguration)) {
-                url = ensemblHostUrl + "/" + ensemblRelease + "/" + getPhylo(speciesConfiguration);
-            }
-            url = url + "/fasta/";
-            if (configuration.getSpecies().getBacteria().contains(speciesConfiguration)) {
-                // WARN: assuming there's just one assembly
-                url = url + speciesConfiguration.getAssemblies().get(0).getEnsemblCollection() + "/";
-            }
-            url = url + speciesShortName + "/dna/*.dna.toplevel.fa.gz";
-        }
+        DownloadFile downloadFile = downloadEnsemblDataSource(configuration.getDownload().getEnsembl(), ENSEMBL_PRIMARY_FA_FILE_ID,
+                sequenceFolder);
 
-        String outputFileName = StringUtils.capitalize(speciesShortName) + "." + assemblyConfiguration.getName() + ".fa.gz";
-        Path outputPath = sequenceFolder.resolve(outputFileName);
-        logger.info("Saving reference genome version data at {}", sequenceFolder.resolve("genomeVersion.json"));
-        saveVersionData(EtlCommons.GENOME_DATA, ENSEMBL_NAME, ensemblVersion, getTimeStamp(),
-                Collections.singletonList(url), sequenceFolder.resolve("genomeVersion.json"));
-        List<DownloadFile> downloadFiles = Collections.singletonList(downloadFile(url, outputPath.toString()));
-        logger.info("Unzipping file: " + outputFileName);
-        EtlCommons.runCommandLineProcess(null, "gunzip", Collections.singletonList(outputPath.toString()), null);
-        return downloadFiles;
+        // Save data source
+        saveDataSource(GENOME_DATA, ensemblVersion, getTimeStamp(), Collections.singletonList(downloadFile.getUrl()),
+                sequenceFolder.resolve(getDataVersionFilename(GENOME_DATA)));
+
+        logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(GENOME_DATA));
+
+        return Collections.singletonList(downloadFile);
     }
 
     /**
@@ -93,145 +66,135 @@ public List<DownloadFile> downloadReferenceGenome() throws IOException, Interrup
      * @return list of files downloaded
      * @throws IOException if there is an error writing to a file
      * @throws InterruptedException if there is an error downloading files
+     * @throws CellBaseException if there is an error executing the command line
      */
-    public List<DownloadFile> downloadConservation() throws IOException, InterruptedException {
-        if (!speciesHasInfoToDownload(speciesConfiguration, "conservation")) {
-            return null;
+    public List<DownloadFile> downloadConservation() throws IOException, InterruptedException, CellBaseException {
+        if (!speciesHasInfoToDownload(speciesConfiguration, CONSERVATION_DATA)) {
+            return Collections.emptyList();
         }
-        logger.info("Downloading conservation information ...");
-        Path conservationFolder = downloadFolder.resolve("conservation");
         List<DownloadFile> downloadFiles = new ArrayList<>();
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS_NAME)) {
+            logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(CONSERVATION_DATA));
+            Path conservationFolder = downloadFolder.resolve(CONSERVATION_DATA);
+
             Files.createDirectories(conservationFolder);
-            Files.createDirectories(conservationFolder.resolve("gerp"));
-            Files.createDirectories(conservationFolder.resolve("phastCons"));
-            Files.createDirectories(conservationFolder.resolve("phylop"));
+            Files.createDirectories(conservationFolder.resolve(GERP_DATA));
+            Files.createDirectories(conservationFolder.resolve(PHASTCONS_DATA));
+            Files.createDirectories(conservationFolder.resolve(PHYLOP_DATA));
 
             String[] chromosomes = {"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14",
                     "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", "M", };
 
-            if (assemblyConfiguration.getName().equalsIgnoreCase("GRCh38")) {
-                String url = configuration.getDownload().getConservation().getHost() + "/hg38";
+            if (assemblyConfiguration.getName().equalsIgnoreCase(GRCH38_NAME)) {
+                String filename;
+                Path outputPath;
+                String assembly = HG38_NAME;
                 List<String> phastconsUrls = new ArrayList<>(chromosomes.length);
                 List<String> phyloPUrls = new ArrayList<>(chromosomes.length);
+                // Downloading PhastCons and PhyloP
+                logger.info(DOWNLOADING_LOG_MESSAGE, (getDataName(PHASTCONS_DATA) + "/" + getDataName(PHYLOP_DATA)));
                 for (String chromosome : chromosomes) {
-                    String phastConsUrl = url + "/phastCons470way/hg38.470way.phastCons/chr" + chromosome
-                            + ".phastCons470way.wigFix.gz";
-                    downloadFiles.add(downloadFile(phastConsUrl, conservationFolder.resolve("phastCons")
-                            .resolve("chr" + chromosome + ".phastCons470way.wigFix.gz").toString()));
+                    // PhastCons
+                    String phastConsUrl = configuration.getDownload().getPhastCons().getHost() + configuration.getDownload().getPhastCons()
+                            .getFiles().get(PHASTCONS_FILE_ID).replaceAll(PUT_ASSEMBLY_HERE_MARK, assembly)
+                            .replace(PUT_CHROMOSOME_HERE_MARK, chromosome);
+                    filename = Paths.get(phastConsUrl).getFileName().toString();
+                    outputPath = conservationFolder.resolve(PHASTCONS_DATA).resolve(filename);
+                    logger.info(DOWNLOADING_FROM_TO_LOG_MESSAGE, phastConsUrl, outputPath);
+                    downloadFiles.add(downloadFile(phastConsUrl, outputPath.toString()));
+                    logger.info(OK_LOG_MESSAGE);
                     phastconsUrls.add(phastConsUrl);
 
-                    String phyloPUrl = url + "/phyloP470way/hg38.470way.phyloP/chr" + chromosome
-                            + ".phyloP470way.wigFix.gz";
-                    downloadFiles.add(downloadFile(phyloPUrl, conservationFolder.resolve("phylop")
-                            .resolve("chr" + chromosome + ".phyloP470way.wigFix.gz").toString()));
+                    // PhyloP
+                    String phyloPUrl = configuration.getDownload().getPhylop().getHost() + configuration.getDownload().getPhylop()
+                            .getFiles().get(PHYLOP_FILE_ID).replaceAll(PUT_ASSEMBLY_HERE_MARK, assembly)
+                            .replace(PUT_CHROMOSOME_HERE_MARK, chromosome);
+                    filename = Paths.get(phyloPUrl).getFileName().toString();
+                    outputPath = conservationFolder.resolve(PHYLOP_DATA).resolve(filename);
+                    logger.info(DOWNLOADING_FROM_TO_LOG_MESSAGE, phyloPUrl, outputPath);
+                    downloadFiles.add(downloadFile(phyloPUrl, outputPath.toString()));
+                    logger.info(OK_LOG_MESSAGE);
                     phyloPUrls.add(phyloPUrl);
                 }
-                String gerpUrl = configuration.getDownload().getGerp().getHost();
-                downloadFiles.add(downloadFile(gerpUrl, conservationFolder.resolve(EtlCommons.GERP_SUBDIRECTORY)
-                        .resolve(EtlCommons.GERP_FILE).toString()));
-
-                saveVersionData(EtlCommons.CONSERVATION_DATA, GERP_NAME, null, getTimeStamp(), Collections.singletonList(gerpUrl),
-                        conservationFolder.resolve("gerpVersion.json"));
-                saveVersionData(EtlCommons.CONSERVATION_DATA, PHASTCONS_NAME, null, getTimeStamp(), phastconsUrls,
-                        conservationFolder.resolve("phastConsVersion.json"));
-                saveVersionData(EtlCommons.CONSERVATION_DATA, PHYLOP_NAME, null, getTimeStamp(), phyloPUrls,
-                        conservationFolder.resolve("phyloPVersion.json"));
-            }
-        }
 
-        if (speciesConfiguration.getScientificName().equals("Mus musculus")) {
-            Files.createDirectories(conservationFolder);
-            Files.createDirectories(conservationFolder.resolve("phastCons"));
-            Files.createDirectories(conservationFolder.resolve("phylop"));
-
-            String url = configuration.getDownload().getConservation().getHost() + "/mm10";
-            String[] chromosomes = {"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14",
-                    "15", "16", "17", "18", "19", "X", "Y", "M", };
-            List<String> phastconsUrls = new ArrayList<>(chromosomes.length);
-            List<String> phyloPUrls = new ArrayList<>(chromosomes.length);
-            for (String chromosome : chromosomes) {
-                String phastConsUrl = url + "/phastCons60way/mm10.60way.phastCons/chr" + chromosome + ".phastCons60way.wigFix.gz";
-                downloadFiles.add(downloadFile(phastConsUrl, conservationFolder.resolve("phastCons").resolve("chr" + chromosome
-                        + ".phastCons60way.wigFix.gz").toString()));
-                phastconsUrls.add(phastConsUrl);
-                String phyloPUrl = url + "/phyloP60way/mm10.60way.phyloP60way/chr" + chromosome + ".phyloP60way.wigFix.gz";
-                downloadFiles.add(downloadFile(phyloPUrl, conservationFolder.resolve("phylop").resolve("chr" + chromosome
-                        + ".phyloP60way.wigFix.gz").toString()));
-                phyloPUrls.add(phyloPUrl);
+                // Downloading Gerp
+                logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(GERP_DATA));
+                String gerpUrl = configuration.getDownload().getGerp().getHost() + configuration.getDownload().getGerp().getFiles()
+                        .get(GERP_FILE_ID);
+                filename = Paths.get(gerpUrl).getFileName().toString();
+                outputPath = conservationFolder.resolve(GERP_DATA).resolve(filename);
+                logger.info(DOWNLOADING_FROM_TO_LOG_MESSAGE, gerpUrl, outputPath);
+                downloadFiles.add(downloadFile(gerpUrl, outputPath.toString()));
+                logger.info(OK_LOG_MESSAGE);
+
+
+                // Save data version
+                saveDataSource(PHASTCONS_DATA, configuration.getDownload().getPhastCons().getVersion(), getTimeStamp(), phastconsUrls,
+                        conservationFolder.resolve(getDataVersionFilename(PHASTCONS_DATA)));
+                saveDataSource(PHYLOP_DATA, configuration.getDownload().getPhylop().getVersion(), getTimeStamp(), phyloPUrls,
+                        conservationFolder.resolve(getDataVersionFilename(PHYLOP_DATA)));
+                saveDataSource(GERP_DATA, configuration.getDownload().getGerp().getVersion(), getTimeStamp(),
+                        Collections.singletonList(gerpUrl), conservationFolder.resolve(getDataVersionFilename(GERP_DATA)));
             }
-            saveVersionData(EtlCommons.CONSERVATION_DATA, PHASTCONS_NAME, null, getTimeStamp(), phastconsUrls,
-                    conservationFolder.resolve("phastConsVersion.json"));
-            saveVersionData(EtlCommons.CONSERVATION_DATA, PHYLOP_NAME, null, getTimeStamp(), phyloPUrls,
-                    conservationFolder.resolve("phastConsVersion.json"));
+            logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(CONSERVATION_DATA));
         }
+
         return downloadFiles;
     }
 
-    public List<DownloadFile> downloadRepeats() throws IOException, InterruptedException {
-        if (!speciesHasInfoToDownload(speciesConfiguration, "repeats")) {
-            return null;
+    public List<DownloadFile> downloadRepeats() throws IOException, InterruptedException, CellBaseException {
+        if (!speciesHasInfoToDownload(speciesConfiguration, REPEATS_DATA)) {
+            return Collections.emptyList();
         }
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading repeats data ...");
-            Path repeatsFolder = downloadFolder.resolve(EtlCommons.REPEATS_FOLDER);
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS_NAME)) {
+            logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(REPEATS_DATA));
+            Path repeatsFolder = downloadFolder.resolve(REPEATS_DATA);
             Files.createDirectories(repeatsFolder);
             List<DownloadFile> downloadFiles = new ArrayList<>();
             String pathParam;
-            if (assemblyConfiguration.getName().equalsIgnoreCase("grch38")) {
-                pathParam = "hg38";
+            if (assemblyConfiguration.getName().equalsIgnoreCase(GRCH38_NAME)) {
+                pathParam = HG38_NAME;
             } else {
-                logger.error("Please provide a valid human assembly {GRCh37, GRCh38)");
+                logger.error("Please provide a valid human assembly: {}, {}", GRCH37_NAME, GRCH38_NAME);
                 throw new ParameterException("Assembly '" + assemblyConfiguration.getName() + "' is not valid. Please provide "
-                        + "a valid human assembly {GRCh37, GRCh38)");
+                        + "a valid human assembly: " + GRCH37_NAME + ", " + GRCH38_NAME);
             }
 
             // Download tandem repeat finder
-            String url = configuration.getDownload().getSimpleRepeats().getHost() + "/" + pathParam
-                    + "/database/simpleRepeat.txt.gz";
-            downloadFiles.add(downloadFile(url, repeatsFolder.resolve(EtlCommons.TRF_FILE).toString()));
-            saveVersionData(EtlCommons.REPEATS_DATA, TRF_NAME, null, getTimeStamp(), Collections.singletonList(url),
-                    repeatsFolder.resolve(EtlCommons.TRF_VERSION_FILE));
+            String url = configuration.getDownload().getSimpleRepeats().getHost() + configuration.getDownload().getSimpleRepeats()
+                    .getFiles().get(SIMPLE_REPEATS_FILE_ID).replace(PUT_ASSEMBLY_HERE_MARK, pathParam);
+            Path outputPath = repeatsFolder.resolve(getFilenameFromUrl(url));
+            logger.info(DOWNLOADING_FROM_TO_LOG_MESSAGE, url, outputPath);
+            downloadFiles.add(downloadFile(url, outputPath.toString()));
+            logger.info(OK_LOG_MESSAGE);
+            saveDataSource(TRF_DATA, configuration.getDownload().getSimpleRepeats().getVersion(), getTimeStamp(),
+                    Collections.singletonList(url), repeatsFolder.resolve(getDataVersionFilename(TRF_DATA)));
 
             // Download genomic super duplications
-            url = configuration.getDownload().getGenomicSuperDups().getHost() + "/" + pathParam
-                    + "/database/genomicSuperDups.txt.gz";
-            downloadFiles.add(downloadFile(url, repeatsFolder.resolve(EtlCommons.GSD_FILE).toString()));
-            saveVersionData(EtlCommons.REPEATS_DATA, GSD_NAME, null, getTimeStamp(), Collections.singletonList(url),
-                    repeatsFolder.resolve(EtlCommons.GSD_VERSION_FILE));
+            url = configuration.getDownload().getGenomicSuperDups().getHost() + configuration.getDownload().getGenomicSuperDups()
+                    .getFiles().get(GENOMIC_SUPER_DUPS_FILE_ID).replace(PUT_ASSEMBLY_HERE_MARK, pathParam);
+            outputPath = repeatsFolder.resolve(getFilenameFromUrl(url));
+            logger.info(DOWNLOADING_FROM_TO_LOG_MESSAGE, url, outputPath);
+            downloadFiles.add(downloadFile(url, outputPath.toString()));
+            logger.info(OK_LOG_MESSAGE);
+            saveDataSource(GSD_DATA, configuration.getDownload().getGenomicSuperDups().getVersion(), getTimeStamp(),
+                    Collections.singletonList(url), repeatsFolder.resolve(getDataVersionFilename(GSD_DATA)));
 
             // Download WindowMasker
-            if (!pathParam.equalsIgnoreCase("hg19")) {
-                url = configuration.getDownload().getWindowMasker().getHost() + "/" + pathParam
-                        + "/database/windowmaskerSdust.txt.gz";
-                downloadFiles.add(downloadFile(url, repeatsFolder.resolve(EtlCommons.WM_FILE).toString()));
-                saveVersionData(EtlCommons.REPEATS_DATA, WM_NAME, null, getTimeStamp(), Collections.singletonList(url),
-                        repeatsFolder.resolve(EtlCommons.WM_VERSION_FILE));
+            if (!pathParam.equalsIgnoreCase(HG19_NAME)) {
+                url = configuration.getDownload().getWindowMasker().getHost() + configuration.getDownload().getWindowMasker().getFiles()
+                        .get(WINDOW_MASKER_FILE_ID).replace(PUT_ASSEMBLY_HERE_MARK, pathParam);
+                outputPath = repeatsFolder.resolve(getFilenameFromUrl(url));
+                logger.info(DOWNLOADING_FROM_TO_LOG_MESSAGE, url, outputPath);
+                downloadFiles.add(downloadFile(url, outputPath.toString()));
+                logger.info(OK_LOG_MESSAGE);
+                saveDataSource(WM_DATA, configuration.getDownload().getWindowMasker().getVersion(), getTimeStamp(),
+                        Collections.singletonList(url), repeatsFolder.resolve(getDataVersionFilename(WM_DATA)));
             }
-            return downloadFiles;
-        }
-        return null;
-    }
 
-    public void runGenomeInfo() throws IOException, InterruptedException {
-        logger.info("Downloading genome info ...");
-
-        // TODO don't run this if file already exists
-
-        String outputFolder = downloadFolder.getParent().toAbsolutePath().toString() + "/generated_json/";
-
-        if ("true".equals(System.getenv("CELLBASE_BUILD_DOCKER"))) {
-            String outputLog = downloadLogFolder + "/genome_info.log";
-            EtlCommons.runCommandLineProcess(null, "/opt/cellbase/genome_info.pl",
-                    Arrays.asList("--outdir", outputFolder),
-                    outputLog);
-        } else {
-            String dockerImage = "opencb/cellbase-builder:" + configuration.getApiVersion();
-
-            AbstractMap.SimpleEntry<String, String> outputBinding = new AbstractMap.SimpleEntry(outputFolder, "/ensembl-data");
-            String ensemblScriptParams = "/opt/cellbase/genome_info.pl";
-
-            DockerUtils.run(dockerImage, null, outputBinding, ensemblScriptParams, null);
+            logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(REPEATS_DATA));
+            return downloadFiles;
         }
+        return Collections.emptyList();
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java
index 522be7b27d..53ff518323 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java
@@ -18,8 +18,9 @@
 
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.lib.EtlCommons;
+import org.opencb.commons.utils.FileUtils;
 
+import java.io.BufferedReader;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
@@ -27,41 +28,72 @@
 import java.util.Collections;
 import java.util.List;
 
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
 public class OntologyDownloadManager extends AbstractDownloadManager {
 
+    private static final String DATA_VERSION_FIELD = "data-version:";
+
     public OntologyDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
             throws IOException, CellBaseException {
         super(species, assembly, targetDirectory, configuration);
     }
 
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(ONTOLOGY_DATA));
 
-    public List<DownloadFile> download() throws IOException, InterruptedException {
-        logger.info("Downloading OBO files ...");
+        Path oboFolder = downloadFolder.resolve(ONTOLOGY_DATA);
+        Files.createDirectories(oboFolder);
 
+        DownloadFile downloadFile;
         List<DownloadFile> downloadFiles = new ArrayList<>();
-        Path oboFolder = downloadFolder.resolve("ontology");
-        Files.createDirectories(oboFolder);
 
-        String url = configuration.getDownload().getHpoObo().getHost();
-        downloadFiles.add(downloadFile(url, oboFolder.resolve("hp.obo").toString()));
-        saveVersionData(EtlCommons.OBO_DATA, "HPO", getTimeStamp(), getTimeStamp(),
-                Collections.singletonList(url), buildFolder.resolve(EtlCommons.HPO_VERSION_FILE));
+        // HPO
+        downloadFile = downloadDataSource(configuration.getDownload().getHpoObo(), HPO_OBO_FILE_ID, oboFolder);
+        String version = getVersionFromOboFile(oboFolder.resolve(downloadFile.getOutputFile()));
+        saveDataSource(HPO_OBO_DATA, version, getTimeStamp(), Collections.singletonList(downloadFile.getUrl()),
+                oboFolder.resolve(getDataVersionFilename(HPO_OBO_DATA)));
+        downloadFiles.add(downloadFile);
 
-        url = configuration.getDownload().getGoObo().getHost();
-        downloadFiles.add(downloadFile(url, oboFolder.resolve("go-basic.obo").toString()));
-        saveVersionData(EtlCommons.OBO_DATA, "GO", getTimeStamp(), getTimeStamp(),
-                Collections.singletonList(url), buildFolder.resolve(EtlCommons.GO_VERSION_FILE));
+        // GO
+        downloadFile = downloadDataSource(configuration.getDownload().getGoObo(), GO_OBO_FILE_ID, oboFolder);
+        version = getVersionFromOboFile(oboFolder.resolve(downloadFile.getOutputFile()));
+        saveDataSource(GO_OBO_DATA, version, getTimeStamp(), Collections.singletonList(downloadFile.getUrl()),
+                oboFolder.resolve(getDataVersionFilename(GO_OBO_DATA)));
+        downloadFiles.add(downloadFile);
 
-        url = configuration.getDownload().getDoidObo().getHost();
-        downloadFiles.add(downloadFile(url, oboFolder.resolve("doid.obo").toString()));
-        saveVersionData(EtlCommons.OBO_DATA, "DO", getTimeStamp(), getTimeStamp(),
-                Collections.singletonList(url), buildFolder.resolve(EtlCommons.DO_VERSION_FILE));
+        // DOID
+        downloadFile = downloadDataSource(configuration.getDownload().getDoidObo(), DOID_OBO_FILE_ID, oboFolder);
+        version = getVersionFromOboFile(oboFolder.resolve(downloadFile.getOutputFile()));
+        saveDataSource(DOID_OBO_DATA, version, getTimeStamp(), Collections.singletonList(downloadFile.getUrl()),
+                oboFolder.resolve(getDataVersionFilename(DOID_OBO_DATA)));
+        downloadFiles.add(downloadFile);
 
-        url = configuration.getDownload().getMondoObo().getHost();
-        downloadFiles.add(downloadFile(url, oboFolder.resolve("mondo.obo").toString()));
-        saveVersionData(EtlCommons.OBO_DATA, "MONDO", getTimeStamp(), getTimeStamp(),
-                Collections.singletonList(url), buildFolder.resolve(EtlCommons.DO_VERSION_FILE));
+        // Mondo
+        downloadFile = downloadDataSource(configuration.getDownload().getMondoObo(), MONDO_OBO_FILE_ID, oboFolder);
+        version = getVersionFromOboFile(oboFolder.resolve(downloadFile.getOutputFile()));
+        saveDataSource(MONDO_OBO_DATA, version, getTimeStamp(), Collections.singletonList(downloadFile.getUrl()),
+                oboFolder.resolve(getDataVersionFilename(MONDO_OBO_DATA)));
+        downloadFiles.add(downloadFile);
 
+        logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(ONTOLOGY_DATA));
         return downloadFiles;
     }
+
+    private String getVersionFromOboFile(Path oboPath) throws CellBaseException, IOException {
+        String version = null;
+        if (!oboPath.toFile().exists()) {
+            throw new CellBaseException("OBO file " + oboPath + " does not exit");
+        }
+        try (BufferedReader reader = FileUtils.newBufferedReader(oboPath)) {
+            String line;
+            while ((line = reader.readLine()) != null) {
+                if (line.startsWith(DATA_VERSION_FIELD)) {
+                    version = line.split(DATA_VERSION_FIELD)[1].trim();
+                    break;
+                }
+            }
+        }
+        return version;
+    }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PgsDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PgsDownloadManager.java
new file mode 100644
index 0000000000..957daa95d3
--- /dev/null
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PgsDownloadManager.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.lib.download;
+
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.config.DownloadProperties;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.lib.EtlCommons;
+import org.opencb.commons.utils.FileUtils;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
+public class PgsDownloadManager extends AbstractDownloadManager {
+
+    public PgsDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
+            throws IOException, CellBaseException {
+        super(species, assembly, targetDirectory, configuration);
+    }
+
+    @Override
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        logger.info(DOWNLOADING_LOG_MESSAGE, PGS_NAME);
+
+        DownloadProperties.URLProperties pgsUrlProperties = configuration.getDownload().getPgs();
+
+        Path pgsFolder = downloadFolder.resolve(PGS_DATA);
+        Files.createDirectories(pgsFolder);
+
+        List<String> urls = new ArrayList<>();
+        urls.add(pgsUrlProperties.getHost());
+
+        String urlAllMeta = pgsUrlProperties.getFiles().get(PGS_CATALOG_METADATA_FILE_ID);
+        urls.add(urlAllMeta);
+
+        String filename = new File(urlAllMeta).getName();
+
+        // Downloads PGS files
+        String url;
+        Path outPath;
+        List<DownloadFile> list = new ArrayList<>();
+        list.add(downloadFile(urlAllMeta, pgsFolder.resolve(filename).toString()));
+
+        String baseUrl = urlAllMeta.replace(filename, "").replace("metadata", "scores");
+        BufferedReader br = FileUtils.newBufferedReader(pgsFolder.resolve(filename));
+        // Skip first line
+        String line = br.readLine();
+        while ((line = br.readLine()) != null) {
+            String[] field = line.split(",");
+            String pgsId = field[0];
+
+            url = baseUrl + pgsId + "/Metadata/" + pgsId + "_metadata.tar.gz";
+            outPath = pgsFolder.resolve(new File(url).getName());
+            logger.info(DOWNLOADING_FROM_TO_LOG_MESSAGE, url, outPath);
+            list.add(downloadFile(url, outPath.toString()));
+
+            url = baseUrl + pgsId + "/ScoringFiles/Harmonized/" + pgsId + "_hmPOS_GRCh38.txt.gz";
+            outPath = pgsFolder.resolve(new File(url).getName());
+            logger.info(DOWNLOADING_FROM_TO_LOG_MESSAGE, url, outPath);
+            list.add(downloadFile(url, outPath.toString()));
+        }
+        br.close();
+
+        // Save version file
+        saveDataSource(PGS_CATALOG_NAME, PGS_NAME, pgsUrlProperties.getVersion(), getTimeStamp(), urls,
+                pgsFolder.resolve(EtlCommons.PGS_CATALOG_VERSION_FILENAME));
+
+        logger.info(DOWNLOADING_DONE_LOG_MESSAGE, PGS_NAME);
+
+        return list;
+    }
+}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PharmGKBDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PharmGKBDownloadManager.java
index 274f6c62a7..25ad390650 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PharmGKBDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PharmGKBDownloadManager.java
@@ -19,17 +19,13 @@
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.config.DownloadProperties;
 import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.commons.exec.Command;
-import org.opencb.commons.utils.FileUtils;
 
 import java.io.IOException;
-import java.net.URL;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.nio.file.Paths;
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.List;
+import java.util.Map;
 
 import static org.opencb.cellbase.lib.EtlCommons.*;
 
@@ -41,48 +37,33 @@ public PharmGKBDownloadManager(String species, String assembly, Path targetDirec
     }
 
     @Override
-    public List<DownloadFile> download() throws IOException, InterruptedException {
-        logger.info("Downloading PharmGKB files...");
-        DownloadProperties.URLProperties pharmGKB = configuration.getDownload().getPharmGKB();
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        logger.info(CATEGORY_DOWNLOADING_LOG_MESSAGE, getDataCategory(PHARMGKB_DATA), getDataName(PHARMGKB_DATA));
+
         Path pharmgkbDownloadFolder = downloadFolder.resolve(PHARMACOGENOMICS_DATA).resolve(PHARMGKB_DATA);
         Files.createDirectories(pharmgkbDownloadFolder);
 
+        DownloadProperties.URLProperties pharmGKBProps = configuration.getDownload().getPharmGKB();
+
         List<String> urls = new ArrayList<>();
         List<DownloadFile> downloadFiles = new ArrayList<>();
-        for (String url : pharmGKB.getFiles()) {
+        String host = pharmGKBProps.getHost();
+        for (Map.Entry<String, String> entry : pharmGKBProps.getFiles().entrySet()) {
+            String url = host + entry.getValue();
             urls.add(url);
 
-            Path downloadedFileName = Paths.get(new URL(url).getPath()).getFileName();
-            Path downloadedFilePath = pharmgkbDownloadFolder.resolve(downloadedFileName);
-            logger.info("Downloading file {} to {}", url, downloadedFilePath);
+            Path downloadedFilePath = pharmgkbDownloadFolder.resolve(getFilenameFromUrl(url));
+            logger.info(DOWNLOADING_FROM_TO_LOG_MESSAGE, url, downloadedFilePath);
             DownloadFile downloadFile = downloadFile(url, downloadedFilePath.toString());
+            logger.info(OK_LOG_MESSAGE);
             downloadFiles.add(downloadFile);
-
-            // Unzip downloaded file
-            unzip(downloadedFilePath.getParent(), downloadedFileName.toString(), Collections.emptyList(),
-                    pharmgkbDownloadFolder.resolve(downloadedFileName.toString().split("\\.")[0]));
         }
 
-        // Save versions
-        saveVersionData(PHARMACOGENOMICS_DATA, PHARMGKB_NAME, pharmGKB.getVersion(), getTimeStamp(), urls,
-                pharmgkbDownloadFolder.resolve(PHARMGKB_VERSION_FILENAME));
+        // Save data source
+        saveDataSource(PHARMGKB_DATA, pharmGKBProps.getVersion(), getTimeStamp(), urls,
+                pharmgkbDownloadFolder.resolve(getDataVersionFilename(PHARMGKB_DATA)));
 
+        logger.info(CATEGORY_DOWNLOADING_DONE_LOG_MESSAGE, getDataCategory(PHARMGKB_DATA), getDataName(PHARMGKB_DATA));
         return downloadFiles;
     }
-
-    private void unzip(Path inPath, String zipFilename, List<String> outFilenames, Path outPath) throws IOException {
-        // Check zip file exists
-        FileUtils.checkFile(inPath.resolve(zipFilename));
-
-        // Unzip files if output dir does NOT exist
-        if (!outPath.toFile().exists()) {
-            logger.info("Unzipping {} into {}", zipFilename, outPath);
-            Command cmd = new Command("unzip -d " + outPath + " " + inPath.resolve(zipFilename));
-            cmd.run();
-            // Check if expected files exist
-            for (String outFilename : outFilenames) {
-                FileUtils.checkFile(outPath.resolve(outFilename));
-            }
-        }
-    }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java
index 5a722ed448..ba75a8e162 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java
@@ -18,23 +18,17 @@
 
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.lib.EtlCommons;
-import org.opencb.commons.utils.FileUtils;
 
-import java.io.BufferedReader;
 import java.io.IOException;
-import java.io.PrintWriter;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 
-public class ProteinDownloadManager extends AbstractDownloadManager {
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
-    private static final String UNIPROT_NAME = "UniProt";
-    private static final String INTERPRO_NAME = "InterPro";
-    private static final String INTACT_NAME = "IntAct";
+public class ProteinDownloadManager extends AbstractDownloadManager {
 
     public ProteinDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
             throws IOException, CellBaseException {
@@ -46,85 +40,34 @@ public ProteinDownloadManager(String species, String assembly, Path targetDirect
      *
      * @return list of files downloaded
      * @throws IOException if there is an error writing to a file
-     * @throws InterruptedException if there is an error downloading files     *
+     * @throws InterruptedException if there is an error downloading files
+     * @throws CellBaseException if there is an error in the CelllBase configuration file
      */
-    public List<DownloadFile> download() throws IOException, InterruptedException {
-        if (!speciesHasInfoToDownload(speciesConfiguration, "protein")) {
-            return null;
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(PROTEIN_DATA));
+        if (!speciesHasInfoToDownload(speciesConfiguration, PROTEIN_DATA)) {
+            logger.info("{} not supported for the species {}", getDataName(PROTEIN_DATA), speciesConfiguration.getScientificName());
+            return Collections.emptyList();
         }
-        logger.info("Downloading protein information ...");
-        Path proteinFolder = downloadFolder.resolve("protein");
+        Path proteinFolder = downloadFolder.resolve(PROTEIN_DATA);
         Files.createDirectories(proteinFolder);
+
+        DownloadFile downloadFile;
         List<DownloadFile> downloadFiles = new ArrayList<>();
 
         // Uniprot
-        String url = configuration.getDownload().getUniprot().getHost();
-        downloadFiles.add(downloadFile(url, proteinFolder.resolve("uniprot_sprot.xml.gz").toString()));
-        Files.createDirectories(proteinFolder.resolve("uniprot_chunks"));
-        splitUniprot(proteinFolder.resolve("uniprot_sprot.xml.gz"), proteinFolder.resolve("uniprot_chunks"));
+        downloadFile = downloadAndSaveDataSource(configuration.getDownload().getUniprot(), UNIPROT_FILE_ID, UNIPROT_DATA, proteinFolder);
+        downloadFiles.add(downloadFile);
 
-        String relNotesUrl = configuration.getDownload().getUniprotRelNotes().getHost();
-        downloadFiles.add(downloadFile(relNotesUrl, proteinFolder.resolve("uniprotRelnotes.txt").toString()));
-        saveVersionData(EtlCommons.PROTEIN_DATA, UNIPROT_NAME, getLine(proteinFolder.resolve("uniprotRelnotes.txt"), 1),
-                getTimeStamp(), Collections.singletonList(url), proteinFolder.resolve("uniprotVersion.json"));
-
-        // Interpro
-        String interproUrl = configuration.getDownload().getInterpro().getHost();
-        downloadFiles.add(downloadFile(interproUrl, proteinFolder.resolve("protein2ipr.dat.gz").toString()));
-
-        relNotesUrl = configuration.getDownload().getInterproRelNotes().getHost();
-        downloadFiles.add(downloadFile(relNotesUrl, proteinFolder.resolve("interproRelnotes.txt").toString()));
-        saveVersionData(EtlCommons.PROTEIN_DATA, INTERPRO_NAME, getLine(proteinFolder.resolve("interproRelnotes.txt"), 5),
-                getTimeStamp(), Collections.singletonList(interproUrl), proteinFolder.resolve("interproVersion.json"));
+        // InterPro
+        downloadFile = downloadAndSaveDataSource(configuration.getDownload().getInterpro(), INTERPRO_FILE_ID, INTERPRO_DATA, proteinFolder);
+        downloadFiles.add(downloadFile);
 
         // Intact
-        String intactUrl = configuration.getDownload().getIntact().getHost();
-        downloadFiles.add(downloadFile(intactUrl, proteinFolder.resolve("intact.txt").toString()));
-        saveVersionData(EtlCommons.PROTEIN_DATA, INTACT_NAME, configuration.getDownload().getIntact().getVersion(),
-                getTimeStamp(), Collections.singletonList(intactUrl), proteinFolder.resolve("intactVersion.json"));
+        downloadFile = downloadAndSaveDataSource(configuration.getDownload().getIntact(), INTACT_FILE_ID, INTACT_DATA, proteinFolder);
+        downloadFiles.add(downloadFile);
 
+        logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(PROTEIN_DATA));
         return downloadFiles;
     }
-
-    private void splitUniprot(Path uniprotFilePath, Path splitOutdirPath) throws IOException {
-        BufferedReader br = FileUtils.newBufferedReader(uniprotFilePath);
-        PrintWriter pw = null;
-        StringBuilder header = new StringBuilder();
-        boolean beforeEntry = true;
-        boolean inEntry = false;
-        int count = 0;
-        int chunk = 0;
-        String line;
-        while ((line = br.readLine()) != null) {
-            if (line.trim().startsWith("<entry ")) {
-                inEntry = true;
-                beforeEntry = false;
-                if (count % 10000 == 0) {
-                    pw = new PrintWriter(Files.newOutputStream(splitOutdirPath.resolve("chunk_" + chunk + ".xml").toFile().toPath()));
-                    pw.println(header.toString().trim());
-                }
-                count++;
-            }
-
-            if (beforeEntry) {
-                header.append(line).append("\n");
-            }
-
-            if (inEntry) {
-                pw.println(line);
-            }
-
-            if (line.trim().startsWith("</entry>")) {
-                inEntry = false;
-                if (count % 10000 == 0) {
-                    pw.print("</uniprot>");
-                    pw.close();
-                    chunk++;
-                }
-            }
-        }
-        pw.print("</uniprot>");
-        pw.close();
-        br.close();
-    }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PubMedDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PubMedDownloadManager.java
index b5edf0220b..9006be7a7d 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PubMedDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PubMedDownloadManager.java
@@ -17,8 +17,8 @@
 package org.opencb.cellbase.lib.download;
 
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.config.DownloadProperties;
 import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.lib.EtlCommons;
 
 import java.io.IOException;
 import java.nio.file.Files;
@@ -27,39 +27,56 @@
 import java.util.Collections;
 import java.util.List;
 
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
 public class PubMedDownloadManager extends AbstractDownloadManager {
 
-    private static final String PUBMED_NAME = "PUBMED";
     public PubMedDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
             throws IOException, CellBaseException {
         super(species, assembly, targetDirectory, configuration);
     }
 
     @Override
-    public List<DownloadFile> download() throws IOException, InterruptedException {
-        logger.info("Downloading PubMed XML files...");
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(PUBMED_DATA));
 
-        Path pubmedFolder = downloadFolder.resolve("pubmed");
-        Files.createDirectories(pubmedFolder);
+        Path pubmedDownloadFolder = downloadFolder.resolve(PUBMED_DATA);
+        Files.createDirectories(pubmedDownloadFolder);
 
         // Downloads PubMed XML files
-        String url = configuration.getDownload().getPubmed().getHost();
-        String regexp = configuration.getDownload().getPubmed().getFiles().get(0);
+        String host = configuration.getDownload().getPubmed().getHost();
+        List<String> filenames = getPubMedFilenames(configuration.getDownload().getPubmed());
+        List<DownloadFile> downloadFiles = new ArrayList<>();
+        for (String filename : filenames) {
+            String url = host + filename;
+            logger.info(DOWNLOADING_FROM_TO_LOG_MESSAGE, url, pubmedDownloadFolder.resolve(filename));
+            downloadFiles.add(downloadFile(url, pubmedDownloadFolder.resolve(filename).toString()));
+            logger.info(OK_LOG_MESSAGE);
+        }
+
+        // Save data source
+        saveDataSource(PUBMED_DATA, configuration.getDownload().getPubmed().getVersion(), getTimeStamp(), Collections.singletonList(host),
+                pubmedDownloadFolder.resolve(getDataVersionFilename(PUBMED_DATA)));
+
+        logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(PUBMED_DATA));
+
+        return downloadFiles;
+    }
+
+    public static List<String> getPubMedFilenames(DownloadProperties.URLProperties pubMedProps) {
+        String regexp = pubMedProps.getFiles().get(PUBMED_REGEX_FILE_ID);
         String[] name = regexp.split("[\\[\\]]");
         String[] split = name[1].split("\\.\\.");
-        int start = Integer.valueOf(split[0]);
-        int end = Integer.valueOf(split[1]);
-        int padding = Integer.valueOf(split[2]);
-
-        saveVersionData(EtlCommons.PUBMED_DATA, PUBMED_NAME, null, getTimeStamp(), Collections.singletonList(url),
-                pubmedFolder.resolve("pubmedVersion.json"));
+        int start = Integer.parseInt(split[0]);
+        int end = Integer.parseInt(split[1]);
+        int padding = Integer.parseInt(split[2]);
 
-        List<DownloadFile> list = new ArrayList<>();
+        List<String> filenames = new ArrayList<>();
         for (int i = start; i <= end; i++) {
-            String filename = name[0] + String.format("%0" + padding + "d", i) + name[2];
-            logger.info("\tDownloading file " + filename);
-            list.add(downloadFile(url + "/" + filename, pubmedFolder.resolve(filename).toString()));
+            String padString = "%0" + padding + "d";
+            String filename = name[0] + String.format(padString, i) + name[2];
+            filenames.add(filename);
         }
-        return list;
+        return  filenames;
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java
index 51152e478d..0c87775f5c 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java
@@ -16,151 +16,97 @@
 
 package org.opencb.cellbase.lib.download;
 
-import com.fasterxml.jackson.databind.ObjectMapper;
-import org.apache.commons.lang3.StringUtils;
-import org.opencb.biodata.formats.feature.gff.Gff2;
-import org.opencb.biodata.formats.feature.gff.io.Gff2Reader;
-import org.opencb.biodata.formats.io.FileFormatException;
-import org.opencb.biodata.models.core.RegulatoryPfm;
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.core.serializer.CellBaseJsonFileSerializer;
-import org.opencb.cellbase.core.serializer.CellBaseSerializer;
-import org.opencb.cellbase.lib.EtlCommons;
 
 import java.io.IOException;
-import java.net.URL;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.*;
-import java.util.concurrent.TimeUnit;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
 
 public class RegulationDownloadManager extends AbstractDownloadManager {
 
     private Path regulationFolder;
 
-    private static final String ENSEMBL_NAME = "ENSEMBL";
-    private static final String MIRBASE_NAME = "miRBase";
-    private static final String MIRTARBASE_NAME = "miRTarBase";
-
     public RegulationDownloadManager(String species, String assembly, Path outdir, CellBaseConfiguration configuration)
             throws IOException, CellBaseException {
         super(species, assembly, outdir, configuration);
     }
 
     @Override
-    public List<DownloadFile> download() throws IOException, InterruptedException, NoSuchMethodException, FileFormatException {
-        if (!speciesHasInfoToDownload(speciesConfiguration, "regulation")) {
-            return null;
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(REGULATION_DATA));
+        if (!speciesHasInfoToDownload(speciesConfiguration, REGULATION_DATA)) {
+            logger.info("{} not supported for the species {}", getDataName(REGULATION_DATA), speciesConfiguration.getScientificName());
+            return Collections.emptyList();
         }
-        this.regulationFolder = downloadFolder.resolve("regulation");
+        regulationFolder = downloadFolder.resolve(REGULATION_DATA);
         Files.createDirectories(regulationFolder);
 
-        logger.info("Downloading regulation information ...");
-
         List<DownloadFile> downloadFiles = new ArrayList<>();
 
         downloadFiles.addAll(downloadRegulatoryaAndMotifFeatures());
         downloadFiles.add(downloadMiRTarBase());
         downloadFiles.add(downloadMirna());
 
+        logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(REGULATION_DATA));
         return downloadFiles;
     }
 
     /**
-     * Downloads Ensembl regulatory buid and motif feature files.
+     * Downloads Ensembl regulatory build and motif feature files.
      * @throws IOException Any issue when writing files
      * @throws InterruptedException Any issue downloading files
      */
-    private List<DownloadFile> downloadRegulatoryaAndMotifFeatures()
-            throws IOException, InterruptedException, NoSuchMethodException, FileFormatException {
-        String regulationUrl = ensemblHostUrl + "/" + ensemblRelease;
-        if (!configuration.getSpecies().getVertebrates().contains(speciesConfiguration)) {
-            regulationUrl = ensemblHostUrl + "/" + ensemblRelease + "/" + getPhylo(speciesConfiguration);
-        }
-        regulationUrl += "/regulation/" + speciesShortName;
-
+    private List<DownloadFile> downloadRegulatoryaAndMotifFeatures() throws IOException, InterruptedException, CellBaseException {
+        DownloadFile downloadFile;
         List<DownloadFile> downloadFiles = new ArrayList<>();
 
-        Path outputFile = regulationFolder.resolve(EtlCommons.REGULATORY_FEATURES_FILE);
-        String regulatoryBuildUrl = regulationUrl + "/*Regulatory_Build.regulatory_features*.gff.gz";
-        downloadFiles.add(downloadFile(regulatoryBuildUrl, outputFile.toString()));
-
-        outputFile = regulationFolder.resolve(EtlCommons.MOTIF_FEATURES_FILE);
-        String motifUrl = regulationUrl + "/MotifFeatures/*" + assemblyConfiguration.getName() + ".motif_features.gff.gz";
-        downloadFiles.add(downloadFile(motifUrl, outputFile.toString()));
-
-        String motifTbiUrl = regulationUrl + "/MotifFeatures/*" + assemblyConfiguration.getName() + ".motif_features.gff.gz.tbi";
-        outputFile = regulationFolder.resolve(EtlCommons.MOTIF_FEATURES_FILE + ".tbi");
-        downloadFiles.add(downloadFile(motifTbiUrl, outputFile.toString()));
-
-        loadPfmMatrices();
+        // Regulatory build
+        downloadFile = downloadAndSaveEnsemblDataSource(configuration.getDownload().getEnsembl(), ENSEMBL_REGULATORY_BUILD_FILE_ID,
+                REGULATORY_BUILD_DATA, regulationFolder);
+        downloadFiles.add(downloadFile);
+
+        // Motifs features
+        List<String> urls = new ArrayList<>();
+        downloadFile = downloadEnsemblDataSource(configuration.getDownload().getEnsembl(), ENSEMBL_MOTIF_FEATURES_FILE_ID, null,
+                regulationFolder);
+        downloadFiles.add(downloadFile);
+        urls.add(downloadFile.getUrl());
+        // And now the index file
+        downloadFile = downloadEnsemblDataSource(configuration.getDownload().getEnsembl(), ENSEMBL_MOTIF_FEATURES_INDEX_FILE_ID, null,
+                regulationFolder);
+        downloadFiles.add(downloadFile);
+        urls.add(downloadFile.getUrl());
+        // Save data source (name, category, version,...)
+        saveDataSource(MOTIF_FEATURES_DATA, "(" + getDataName(ENSEMBL_DATA) + " " + ensemblVersion + ")", getTimeStamp(), urls,
+                regulationFolder.resolve(getDataVersionFilename(MOTIF_FEATURES_DATA)));
 
         return downloadFiles;
     }
 
-    private void loadPfmMatrices() throws IOException, NoSuchMethodException, FileFormatException, InterruptedException {
-        logger.info("Downloading and building pfm matrices...");
-        if (Files.exists(buildFolder.resolve("regulatory_pfm.json.gz"))) {
-            logger.info("regulatory_pfm.json.gz is already built");
-            return;
-        }
-        Path motifGffFile = regulationFolder.resolve(EtlCommons.MOTIF_FEATURES_FILE);
-        Gff2Reader motifsFeatureReader = new Gff2Reader(motifGffFile);
-        Gff2 tfbsMotifFeature;
-        Set<String> motifIds = new HashSet<>();
-        Pattern filePattern = Pattern.compile("ENSPFM(\\d+)");
-        while ((tfbsMotifFeature = motifsFeatureReader.read()) != null) {
-            String pfmId = getMatrixId(filePattern, tfbsMotifFeature);
-            if (StringUtils.isNotEmpty(pfmId)) {
-                motifIds.add(pfmId);
-            }
-        }
-        motifsFeatureReader.close();
-
-        ObjectMapper mapper = new ObjectMapper();
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "regulatory_pfm", true);
-        logger.info("Looking up " + motifIds.size() + " pfms");
-        for (String pfmId : motifIds) {
-            String urlString = "https://rest.ensembl.org/species/homo_sapiens/binding_matrix/" + pfmId
-                    + "?unit=frequencies;content-type=application/json";
-            URL url = new URL(urlString);
-            RegulatoryPfm regulatoryPfm = mapper.readValue(url, RegulatoryPfm.class);
-            serializer.serialize(regulatoryPfm);
-            // https://github.com/Ensembl/ensembl-rest/wiki/Rate-Limits
-            TimeUnit.MILLISECONDS.sleep(250);
-        }
-        serializer.close();
-    }
+    private DownloadFile downloadMirna() throws IOException, InterruptedException, CellBaseException {
+        logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(MIRBASE_DATA));
 
-    private String getMatrixId(Pattern pattern, Gff2 tfbsMotifFeature) {
-        Matcher matcher = pattern.matcher(tfbsMotifFeature.getAttribute());
-        if (matcher.find()) {
-            return matcher.group(0);
-        }
-        return null;
-    }
+        DownloadFile downloadFile = downloadAndSaveDataSource(configuration.getDownload().getMirbase(), MIRBASE_FILE_ID, MIRBASE_DATA,
+                regulationFolder);
 
-    private DownloadFile downloadMirna() throws IOException, InterruptedException {
-        String url = configuration.getDownload().getMirbase().getHost();
-        String readmeUrl = configuration.getDownload().getMirbaseReadme().getHost();
-        downloadFile(readmeUrl, regulationFolder.resolve("mirbaseReadme.txt").toString());
-        saveVersionData(EtlCommons.REGULATION_DATA, MIRBASE_NAME,
-                getLine(regulationFolder.resolve("mirbaseReadme.txt"), 1), getTimeStamp(),
-                Collections.singletonList(url), regulationFolder.resolve("mirbaseVersion.json"));
-        Path outputPath = regulationFolder.resolve("miRNA.xls.gz");
-        DownloadFile downloadFile = downloadFile(url, regulationFolder.resolve("miRNA.xls.gz").toString());
-        EtlCommons.runCommandLineProcess(null, "gunzip", Collections.singletonList(outputPath.toString()), null);
+        logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(MIRBASE_DATA));
         return downloadFile;
     }
 
-    private DownloadFile downloadMiRTarBase() throws IOException, InterruptedException {
-        String url = configuration.getDownload().getMiRTarBase().getHost();
-        saveVersionData(EtlCommons.REGULATION_DATA, MIRTARBASE_NAME, null, getTimeStamp(), Collections.singletonList(url),
-                regulationFolder.resolve("miRTarBaseVersion.json"));
-        return downloadFile(url, regulationFolder.resolve("hsa_MTI.xlsx").toString());
+    private DownloadFile downloadMiRTarBase() throws IOException, InterruptedException, CellBaseException {
+        logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(MIRTARBASE_DATA));
+
+        DownloadFile downloadFile = downloadAndSaveDataSource(configuration.getDownload().getMiRTarBase(), MIRTARBASE_FILE_ID,
+                MIRTARBASE_DATA, regulationFolder);
+
+        logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(MIRTARBASE_DATA));
+        return downloadFile;
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/MissenseScoresDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RevelScoresDownloadManager.java
similarity index 51%
rename from cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/MissenseScoresDownloadManager.java
rename to cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RevelScoresDownloadManager.java
index 1ae2514e49..0bebd2d44f 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/MissenseScoresDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RevelScoresDownloadManager.java
@@ -26,31 +26,32 @@
 import java.util.Collections;
 import java.util.List;
 
-public class MissenseScoresDownloadManager extends AbstractDownloadManager {
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
-    public MissenseScoresDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
+public class RevelScoresDownloadManager extends AbstractDownloadManager {
+
+    public RevelScoresDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
             throws IOException, CellBaseException {
         super(species, assembly, targetDirectory, configuration);
     }
 
     @Override
-    public List<DownloadFile> download() throws IOException, InterruptedException {
-        return Collections.singletonList(downloadRevel());
-    }
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(REVEL_DATA));
 
-    public DownloadFile downloadRevel() throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading Revel data ...");
+        if (!speciesConfiguration.getScientificName().equals(HOMO_SAPIENS_NAME)) {
+            logger.info("{} not supported for the species {}", getDataName(REVEL_DATA), speciesConfiguration.getScientificName());
+            return Collections.emptyList();
+        }
 
-            Path missensePredictionScore = downloadFolder.resolve(EtlCommons.MISSENSE_VARIATION_SCORE_DATA);
-            Files.createDirectories(missensePredictionScore);
+        Path revelPath = downloadFolder.resolve(EtlCommons.PROTEIN_SUBSTITUTION_PREDICTION_DATA);
+        Files.createDirectories(revelPath);
 
-            String url = configuration.getDownload().getRevel().getHost();
+        // Download REVEL file
+        DownloadFile downloadFile = downloadAndSaveDataSource(configuration.getDownload().getRevel(), REVEL_FILE_ID, REVEL_DATA, revelPath);
 
-            saveVersionData(EtlCommons.MISSENSE_VARIATION_SCORE_DATA, "Revel", null, getTimeStamp(),
-                    Collections.singletonList(url), missensePredictionScore.resolve("revelVersion.json"));
-            return downloadFile(url, missensePredictionScore.resolve("revel_grch38_all_chromosomes.csv.zip").toString());
-        }
-        return null;
+        logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(REVEL_DATA));
+
+        return Collections.singletonList(downloadFile);
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactory.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactory.java
index e120e0ae51..05fe4d85a8 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactory.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactory.java
@@ -95,6 +95,10 @@ public PharmacogenomicsMongoDBAdaptor getPharmacogenomicsMongoDBAdaptor() {
         return new PharmacogenomicsMongoDBAdaptor(mongoDatastore);
     }
 
+    public PolygenicScoreMongoDBAdaptor getPolygenicScoreMongoDBAdaptor() {
+        return new PolygenicScoreMongoDBAdaptor(mongoDatastore);
+    }
+
     @Override
     public String toString() {
         final StringBuilder sb = new StringBuilder("MongoDBAdaptorFactory{");
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/PolygenicScoreMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/PolygenicScoreMongoDBAdaptor.java
new file mode 100644
index 0000000000..66d10a2bb4
--- /dev/null
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/PolygenicScoreMongoDBAdaptor.java
@@ -0,0 +1,199 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.lib.impl.core;
+
+import com.mongodb.client.model.Filters;
+import org.apache.commons.collections4.CollectionUtils;
+import org.bson.Document;
+import org.bson.conversions.Bson;
+import org.opencb.biodata.models.core.pgs.CommonPolygenicScore;
+import org.opencb.biodata.models.core.pgs.PolygenicScore;
+import org.opencb.biodata.models.core.pgs.VariantPolygenicScore;
+import org.opencb.biodata.models.variant.avro.PolygenicScoreAnnotation;
+import org.opencb.biodata.models.variant.avro.PolygenicScoreVariant;
+import org.opencb.cellbase.core.api.PolygenicScoreQuery;
+import org.opencb.cellbase.core.api.query.ProjectionQueryOptions;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.result.CellBaseDataResult;
+import org.opencb.cellbase.lib.EtlCommons;
+import org.opencb.cellbase.lib.iterator.CellBaseIterator;
+import org.opencb.cellbase.lib.iterator.CellBaseMongoDBIterator;
+import org.opencb.commons.datastore.core.DataResult;
+import org.opencb.commons.datastore.core.QueryOptions;
+import org.opencb.commons.datastore.core.QueryParam;
+import org.opencb.commons.datastore.mongodb.GenericDocumentComplexConverter;
+import org.opencb.commons.datastore.mongodb.MongoDBCollection;
+import org.opencb.commons.datastore.mongodb.MongoDBIterator;
+import org.opencb.commons.datastore.mongodb.MongoDataStore;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+public class PolygenicScoreMongoDBAdaptor extends CellBaseDBAdaptor
+        implements CellBaseCoreDBAdaptor<PolygenicScoreQuery, CommonPolygenicScore> {
+
+    protected Map<Integer, MongoDBCollection> pgsVariantMongoDBCollectionByRelease;
+
+    private static final GenericDocumentComplexConverter<CommonPolygenicScore> CONVERTER;
+
+    static {
+        CONVERTER = new GenericDocumentComplexConverter<>(CommonPolygenicScore.class);
+    }
+
+    public PolygenicScoreMongoDBAdaptor(MongoDataStore mongoDataStore) {
+        super(mongoDataStore);
+
+        init();
+    }
+
+    private void init() {
+        logger.debug("PolygenicScoreMongoDBAdaptor: in 'constructor'");
+
+        mongoDBCollectionByRelease = buildCollectionByReleaseMap(EtlCommons.PGS_COMMON_COLLECTION);
+        pgsVariantMongoDBCollectionByRelease = buildCollectionByReleaseMap(EtlCommons.PGS_VARIANT_COLLECTION);
+    }
+
+    public CellBaseDataResult<PolygenicScoreAnnotation> getPolygenicScoreAnnotation(String chromosome, int position, String reference,
+                                                                                    String alternate, int dataRelease)
+            throws CellBaseException {
+        long dbTimeStart = System.currentTimeMillis();
+
+        List<Bson> andBsonList = new ArrayList<>();
+        andBsonList.add(Filters.eq("chromosome", chromosome));
+        andBsonList.add(Filters.eq("position", position));
+        Bson query = Filters.and(andBsonList);
+
+        MongoDBCollection mongoDBCollection = getCollectionByRelease(pgsVariantMongoDBCollectionByRelease, dataRelease);
+        DataResult<VariantPolygenicScore> pgsVariantDataResult = mongoDBCollection.find(query, null, VariantPolygenicScore.class,
+                new QueryOptions());
+
+        List<PolygenicScoreAnnotation> results = new ArrayList<>();
+
+        // Search for the right polygenic score, i.e., checking reference and alternate with PGS effectAllele and otherAllele
+        if (pgsVariantDataResult.getNumResults() > 0) {
+            for (VariantPolygenicScore score : pgsVariantDataResult.getResults()) {
+                if ((score.getEffectAllele().equals(reference) && score.getOtherAllele().equals(alternate))
+                        || (score.getEffectAllele().equals(alternate) && score.getOtherAllele().equals(reference))) {
+                    List<String> pgsIds = score.getPolygenicScores().stream().map(PolygenicScore::getId).collect(Collectors.toList());
+                    List<CellBaseDataResult<CommonPolygenicScore>> infoResults = info(pgsIds, null, dataRelease, null);
+                    for (CellBaseDataResult<CommonPolygenicScore> infoResult : infoResults) {
+                        CommonPolygenicScore pgs = infoResult.first();
+
+                        // Init PGS
+                        PolygenicScoreAnnotation pgsAnnotation = new PolygenicScoreAnnotation(pgs.getId(), pgs.getName(), pgs.getSource(),
+                                pgs.getVersion(), pgs.getTraits(), pgs.getPubmedRefs(), pgs.getValues(), new ArrayList());
+
+                        // Add PGS variant scores to that PGS
+                        PolygenicScoreVariant pgsVariant = new PolygenicScoreVariant(score.getEffectAllele(), score.getOtherAllele(),
+                                new HashMap<>());
+                        for (PolygenicScore polygenicScore : score.getPolygenicScores()) {
+                            // Search the matched PGS
+                            System.out.println(">>> polygenic score ID = " + polygenicScore.getId() + ", " + pgs.getId());
+                            System.out.println(">>> polygenic score variant scores size = " + polygenicScore.getValues().size());
+                            if (pgs.getId().equals(polygenicScore.getId())) {
+                                System.out.println("FOUND !!!!!!");
+                                pgsVariant.setValues(polygenicScore.getValues());
+                                break;
+                            }
+                        }
+                        pgsAnnotation.setVariants(Collections.singletonList(pgsVariant));
+
+                        // Add annotation to the output list
+                        results.add(pgsAnnotation);
+                    }
+                }
+            }
+        }
+        int dbTime = Long.valueOf(System.currentTimeMillis() - dbTimeStart).intValue();
+        final String id = chromosome + ":" + position + ":" + reference + ":" + alternate;
+        return new CellBaseDataResult<>(id, dbTime, new ArrayList<>(), results.size(), results, results.size());
+    }
+
+    @Override
+    public CellBaseIterator<CommonPolygenicScore> iterator(PolygenicScoreQuery query) throws CellBaseException {
+        Bson bson = parseQuery(query);
+        QueryOptions queryOptions = query.toQueryOptions();
+        Bson projection = getProjection(query);
+        MongoDBIterator<CommonPolygenicScore> iterator;
+        MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, query.getDataRelease());
+        iterator = mongoDBCollection.iterator(null, bson, projection, CONVERTER, queryOptions);
+        return new CellBaseMongoDBIterator<>(iterator);
+    }
+
+    @Override
+    public CellBaseDataResult<CommonPolygenicScore> aggregationStats(PolygenicScoreQuery query) {
+        logger.error("Not implemented yet");
+        return null;
+    }
+
+    @Override
+    public CellBaseDataResult<CommonPolygenicScore> groupBy(PolygenicScoreQuery query) throws CellBaseException {
+        logger.error("Not implemented yet");
+        return null;
+    }
+
+    @Override
+    public CellBaseDataResult<String> distinct(PolygenicScoreQuery query) throws CellBaseException {
+        Bson bsonDocument = parseQuery(query);
+        MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, query.getDataRelease());
+        return new CellBaseDataResult<>(mongoDBCollection.distinct(query.getFacet(), bsonDocument, String.class));
+    }
+
+    @Override
+    public List<CellBaseDataResult<CommonPolygenicScore>> info(List<String> ids, ProjectionQueryOptions queryOptions, int dataRelease,
+                                                               String apiKey) throws CellBaseException {
+        List<CellBaseDataResult<CommonPolygenicScore>> results = new ArrayList<>();
+        Bson projection = getProjection(queryOptions);
+        for (String id : ids) {
+            List<Bson> orBsonList = new ArrayList<>(ids.size());
+            orBsonList.add(Filters.eq("id", id));
+            orBsonList.add(Filters.eq("name", id));
+            Bson query = Filters.or(orBsonList);
+            MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, dataRelease);
+            results.add(new CellBaseDataResult<>(mongoDBCollection.find(query, projection, CONVERTER, new QueryOptions())));
+        }
+        return results;
+    }
+
+    public Bson parseQuery(PolygenicScoreQuery pharmaQuery) {
+        List<Bson> andBsonList = new ArrayList<>();
+        try {
+            for (Map.Entry<String, Object> entry : pharmaQuery.toObjectMap().entrySet()) {
+                String dotNotationName = entry.getKey();
+                Object value = entry.getValue();
+                switch (dotNotationName) {
+                    case "token":
+                    case "apiKey":
+                    case "dataRelease":
+                        // do nothing
+                        break;
+                    default:
+                        createAndOrQuery(value, dotNotationName, QueryParam.Type.STRING, andBsonList);
+                        break;
+                }
+            }
+        } catch (IllegalAccessException e) {
+            e.printStackTrace();
+        }
+        logger.debug("PolygenicScoreQuery parsed query: {}", andBsonList);
+        if (CollectionUtils.isNotEmpty(andBsonList)) {
+            return Filters.and(andBsonList);
+        } else {
+            return new Document();
+        }
+    }
+}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/ProteinMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/ProteinMongoDBAdaptor.java
index 353b4042c4..3a30b50f93 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/ProteinMongoDBAdaptor.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/ProteinMongoDBAdaptor.java
@@ -18,11 +18,13 @@
 
 import com.mongodb.BasicDBList;
 import com.mongodb.client.model.Filters;
-import com.mongodb.client.model.Projections;
+import org.apache.commons.collections4.CollectionUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.bson.Document;
 import org.bson.conversions.Bson;
 import org.opencb.biodata.formats.protein.uniprot.v202003jaxb.Entry;
+import org.opencb.biodata.models.core.ProteinSubstitutionPrediction;
+import org.opencb.biodata.models.core.ProteinSubstitutionPredictionScore;
 import org.opencb.biodata.models.variant.avro.ProteinFeature;
 import org.opencb.biodata.models.variant.avro.ProteinVariantAnnotation;
 import org.opencb.biodata.models.variant.avro.Score;
@@ -34,7 +36,7 @@
 import org.opencb.cellbase.core.result.CellBaseDataResult;
 import org.opencb.cellbase.lib.iterator.CellBaseIterator;
 import org.opencb.cellbase.lib.iterator.CellBaseMongoDBIterator;
-import org.opencb.cellbase.lib.variant.VariantAnnotationUtils;
+import org.opencb.commons.datastore.core.DataResult;
 import org.opencb.commons.datastore.core.Query;
 import org.opencb.commons.datastore.core.QueryOptions;
 import org.opencb.commons.datastore.core.QueryParam;
@@ -90,69 +92,55 @@ private void init() {
         logger.debug("ProteinMongoDBAdaptor: in 'constructor'");
 
         mongoDBCollectionByRelease = buildCollectionByReleaseMap("protein");
-        proteinSubstitutionMongoDBCollectionByRelease = buildCollectionByReleaseMap("protein_functional_prediction");
+        proteinSubstitutionMongoDBCollectionByRelease = buildCollectionByReleaseMap("protein_substitution_predictions");
     }
 
-    public CellBaseDataResult<Score> getSubstitutionScores(TranscriptQuery query, Integer position, String aa) throws CellBaseException {
-        CellBaseDataResult result = null;
+    public CellBaseDataResult<Score> getSubstitutionScores(TranscriptQuery query, Integer aaPosition, String aa) throws CellBaseException {
+        long dbTimeStart = System.currentTimeMillis();
+        Map<String, Score> scoreSet = new HashMap<>();
+
+        // transcriptId, aaPosition, aaAlternate are needed for this collection
+        if (query.getTranscriptsId() != null && query.getTranscriptsId().get(0) != null && aaPosition != null
+                && StringUtils.isNotEmpty(aa)) {
 
-        // Ensembl transcript id is needed for this collection
-        if (query.getTranscriptsId() != null && query.getTranscriptsId().get(0) != null) {
-            String transcriptId = query.getTranscriptsId().get(0).split("\\.")[0];
-            Bson transcript = Filters.eq("transcriptId", transcriptId);
             MongoDBCollection mongoDBCollection = getCollectionByRelease(proteinSubstitutionMongoDBCollectionByRelease,
                     query.getDataRelease());
 
-            String aaShortName = null;
-            // If position and aa change are provided we create a 'projection' to return only the required data from the database
-            if (position != null) {
-                String projectionString = "aaPositions." + position;
-
-                // If aa change is provided we only return that information
-                if (StringUtils.isNotEmpty(aa)) {
-                    aaShortName = aaShortNameMap.get(aa.toUpperCase());
-                    projectionString += "." + aaShortName;
-                }
-
-                // Projection is used to minimize the returned data
-                Bson positionProjection = Projections.include(projectionString);
-                result = new CellBaseDataResult<>(mongoDBCollection.find(transcript, positionProjection, query.toQueryOptions()));
-            } else {
-                // Return the whole transcript data
-                result = new CellBaseDataResult<>(mongoDBCollection.find(transcript, query.toQueryOptions()));
-            }
-
-            if (result != null && !result.getResults().isEmpty()) {
-                Document document = (Document) result.getResults().get(0);
-                Document aaPositionsDocument = (Document) document.get("aaPositions");
-
-                // Position or aa change were not provided, returning whole transcript data
-                if (position == null || position == -1 || aaShortName == null) {
-                    // Return only the inner Document, not the whole document projected
-                    result.setResults(Collections.singletonList(aaPositionsDocument));
-                    // Position and aa were provided, return only corresponding Score objects
-                } else {
-                    List<Score> scoreList = null;
-                    if (result.getNumResults() == 1 && aaPositionsDocument != null) {
-                        scoreList = new ArrayList<>(NUM_PROTEIN_SUBSTITUTION_SCORE_METHODS);
-                        Document positionDocument = (Document) aaPositionsDocument.get(Integer.toString(position));
-                        Document aaDocument = (Document) positionDocument.get(aaShortName);
-                        if (aaDocument.get("ss") != null) {
-                            scoreList.add(new Score(Double.parseDouble("" + aaDocument.get("ss")),
-                                    "sift", VariantAnnotationUtils.SIFT_DESCRIPTIONS.get(aaDocument.get("se"))));
-                        }
-                        if (aaDocument.get("ps") != null) {
-                            scoreList.add(new Score(Double.parseDouble("" + aaDocument.get("ps")),
-                                    "polyphen", VariantAnnotationUtils.POLYPHEN_DESCRIPTIONS.get(aaDocument.get("pe"))));
+            List<Bson> andBsonList = new ArrayList<>();
+            // Sanity check, protein substitution predictions do not contain the transcript ID version
+            String transcriptId = query.getTranscriptsId().get(0).split("\\.")[0];
+            andBsonList.add(Filters.eq("transcriptId", transcriptId));
+            andBsonList.add(Filters.eq("aaPosition", aaPosition));
+            String aaAlternate = aaShortNameMap.get(aa.toUpperCase());
+            andBsonList.add(Filters.eq("scores.aaAlternate", aaAlternate));
+            Bson bson = Filters.and(andBsonList);
+
+            System.out.println("transcriptId = " + transcriptId + ", aaPosition = " + aaPosition + ", aa = " + aa + ", aaAlternate = "
+                    + aaAlternate);
+
+            DataResult<ProteinSubstitutionPrediction> predictions = mongoDBCollection.find(bson, null, ProteinSubstitutionPrediction.class,
+                    new QueryOptions());
+
+            if (predictions != null && CollectionUtils.isNotEmpty(predictions.getResults())) {
+                for (ProteinSubstitutionPrediction prediction : predictions.getResults()) {
+                    for (ProteinSubstitutionPredictionScore predictionScore : prediction.getScores()) {
+                        System.out.println("predictionScore = " + predictionScore.toString());
+                        if (StringUtils.isNotEmpty(predictionScore.getAaAlternate()) && StringUtils.isNotEmpty(aaAlternate)
+                                && predictionScore.getAaAlternate().equals(aaAlternate)) {
+                            String key = prediction.getSource() + ":" + predictionScore.getScore() + ":" + predictionScore.getEffect();
+                            if (!scoreSet.containsKey(key)) {
+                                Score score = new Score(predictionScore.getScore(), prediction.getSource(), predictionScore.getEffect());
+                                scoreSet.put(key, score);
+                            }
                         }
                     }
-                    result.setResults(scoreList);
                 }
             }
         }
-        // Return null if no transcript id is provided
-        return result;
 
+        int dbTime = Long.valueOf(System.currentTimeMillis() - dbTimeStart).intValue();
+        return new CellBaseDataResult<>("getSubstitutionScores", dbTime, new ArrayList<>(), scoreSet.size(),
+                new ArrayList<>(scoreSet.values()), scoreSet.size());
     }
 
 //    public CellBaseDataResult<Score> getSubstitutionScores(Query query, QueryOptions options) {
@@ -231,12 +219,12 @@ public CellBaseDataResult<ProteinVariantAnnotation> getVariantAnnotation(String
         // Stop_gain/lost variants do not have SIFT/POLYPHEN scores
 //        System.out.println("aaReference = " + aaReference);
 //        System.out.println("aaAlternate = " + aaAlternate);
-        if (!aaAlternate.equals("STOP") && !aaReference.equals("STOP")) {
+//        if (!aaAlternate.equals("STOP") && !aaReference.equals("STOP")) {
             TranscriptQuery query = new TranscriptQuery();
             query.setTranscriptsId(Collections.singletonList(ensemblTranscriptId));
             query.setDataRelease(dataRelease);
             proteinVariantAnnotation.setSubstitutionScores(getSubstitutionScores(query, position, aaAlternate).getResults());
-        }
+//        }
 
         CellBaseDataResult proteinVariantData;
         String shortAlternativeAa = aaShortNameMap.get(aaAlternate);
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/loader/MongoDBCellBaseLoader.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/loader/MongoDBCellBaseLoader.java
index 8ab745feab..4da90ee1a5 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/loader/MongoDBCellBaseLoader.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/loader/MongoDBCellBaseLoader.java
@@ -151,8 +151,7 @@ private String getCollectionName() throws LoaderException {
                 if (dr.getCollections().containsKey(data)) {
                     String collectionName = CellBaseDBAdaptor.buildCollectionName(data, dataRelease);
                     if (dr.getCollections().get(data).equals(collectionName)) {
-                        throw new LoaderException("Impossible load data " + data + " with release " + dataRelease + " since it"
-                                + " has already been done.");
+                        logger.warn("Loading new data " + data + " with release " + dataRelease + " (already populated previously)");
                     }
                 }
             }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/CellBaseManagerFactory.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/CellBaseManagerFactory.java
index ba6e90e150..9e610b8ae9 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/CellBaseManagerFactory.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/CellBaseManagerFactory.java
@@ -45,6 +45,7 @@ public class CellBaseManagerFactory {
     private FileManager fileManager;
     private PublicationManager publicationManager;
     private Map<String, PharmacogenomicsManager> pharmacogenomicsManagers;
+    private Map<String, PolygenicScoreManager> polygenicScoreManagers;
 
     private Map<String, DataReleaseManager> dataReleaseManagers;
 
@@ -67,6 +68,7 @@ public CellBaseManagerFactory(CellBaseConfiguration configuration) {
         ontologyManagers = new HashMap<>();
         dataReleaseManagers = new HashMap<>();
         pharmacogenomicsManagers = new HashMap<>();
+        polygenicScoreManagers = new HashMap<>();
     }
 
     private String getMultiKey(String species, String assembly) {
@@ -374,4 +376,15 @@ public PharmacogenomicsManager getPharmacogenomicsManager(String species, String
         }
         return pharmacogenomicsManagers.get(multiKey);
     }
+
+    public PolygenicScoreManager getPolygenicScoreManager(String species, String assembly) throws CellBaseException {
+        String multiKey = getMultiKey(species, assembly);
+        if (!polygenicScoreManagers.containsKey(multiKey)) {
+            if (!validateSpeciesAssembly(species, assembly)) {
+                throw new CellBaseException("Invalid species " + species + " or assembly " + assembly);
+            }
+            polygenicScoreManagers.put(multiKey, new PolygenicScoreManager(species, assembly, configuration));
+        }
+        return polygenicScoreManagers.get(multiKey);
+    }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/DataReleaseManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/DataReleaseManager.java
index c768cb15dc..331b52e5c3 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/DataReleaseManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/DataReleaseManager.java
@@ -26,7 +26,7 @@
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.models.DataRelease;
-import org.opencb.cellbase.core.models.DataReleaseSource;
+import org.opencb.cellbase.core.models.DataSource;
 import org.opencb.cellbase.core.result.CellBaseDataResult;
 import org.opencb.cellbase.lib.impl.core.CellBaseDBAdaptor;
 import org.opencb.cellbase.lib.impl.core.ReleaseMongoDBAdaptor;
@@ -107,7 +107,7 @@ public DataRelease get(int release) throws CellBaseException {
                 }
             }
         }
-        throw new CellBaseException("Data release '" + release + "' does not exist for species = " + species + ", assembly = " + assembly);
+        throw new CellBaseException("Data release '" + release + "' does not exist" + getSpeciesAssemblyMessage());
     }
 
     public DataRelease getDefault(String cellBaseVersion) throws CellBaseException {
@@ -119,8 +119,7 @@ public DataRelease getDefault(String cellBaseVersion) throws CellBaseException {
                 }
             }
         }
-        throw new CellBaseException("No data release found for CellBase " + cellBaseVersion + " (species = " + species + ", assembly = "
-                + assembly + ")");
+        throw new CellBaseException("No data release found for CellBase " + cellBaseVersion + getSpeciesAssemblyMessage());
     }
 
     public DataRelease update(int release, List<String> versions) throws CellBaseException {
@@ -136,28 +135,27 @@ public DataRelease update(int release, String collection, String data, List<Path
 
             // Check sources
             if (StringUtils.isNotEmpty(data) && CollectionUtils.isNotEmpty(dataSourcePaths)) {
-                List<DataReleaseSource> newSources = new ArrayList<>();
+                List<DataSource> newSources = new ArrayList<>();
 
                 // First, add new data sources
                 Set<String> sourceSet = new HashSet<>();
                 ObjectMapper jsonObjectMapper = new ObjectMapper();
-                ObjectReader jsonObjectReader = jsonObjectMapper.readerFor(DataReleaseSource.class);
+                ObjectReader jsonObjectReader = jsonObjectMapper.readerFor(DataSource.class);
                 for (Path dataSourcePath : dataSourcePaths) {
                     if (dataSourcePath.toFile().exists()) {
                         try {
-                            DataReleaseSource dataReleaseSource = jsonObjectReader.readValue(dataSourcePath.toFile());
-                            newSources.add(dataReleaseSource);
-                            sourceSet.add(dataReleaseSource.getData() + "__" + dataReleaseSource.getName());
+                            DataSource dataSource = jsonObjectReader.readValue(dataSourcePath.toFile());
+                            newSources.add(dataSource);
+                            sourceSet.add(dataSource.getCategory() + "__" + dataSource.getName());
                         } catch (IOException e) {
-                            logger.warn("Something wrong happened when reading data release source " + dataSourcePath + ". "
-                                    + e.getMessage());
+                            logger.warn("Something wrong happened when reading data release source {}: {}", dataSourcePath, e.getMessage());
                         }
                     }
                 }
 
                 // Second, add previous data sources if necessary (to avoid duplicated sources)
-                for (DataReleaseSource source : currDataRelease.getSources()) {
-                    String key = source.getData() + "__" + source.getName();
+                for (DataSource source : currDataRelease.getSources()) {
+                    String key = source.getCategory() + "__" + source.getName();
                     if (!sourceSet.contains(key)) {
                         newSources.add(source);
                     }
@@ -173,7 +171,7 @@ public DataRelease update(int release, String collection, String data, List<Path
 
             return currDataRelease;
         }
-        throw new CellBaseException("Data release '" + release + "' does not exist for species = " + species + ", assembly = " + assembly);
+        throw new CellBaseException("Data release '" + release + "' does not exist" + getSpeciesAssemblyMessage());
     }
 
     public void update(DataRelease dataRelase) {
@@ -184,22 +182,22 @@ public void update(DataRelease dataRelase) {
         if (CollectionUtils.isNotEmpty(dataRelase.getSources())) {
             // TODO: use native functions
             List<Map<String, Object>> tmp = new ArrayList<>();
-            for (DataReleaseSource source : dataRelase.getSources()) {
+            for (DataSource source : dataRelase.getSources()) {
                 Map<String, Object> map = new HashMap<>();
-                if (StringUtils.isNotEmpty(source.getData())) {
-                    map.put("data", source.getData());
-                }
                 if (StringUtils.isNotEmpty(source.getName())) {
                     map.put("name", source.getName());
                 }
+                if (StringUtils.isNotEmpty(source.getCategory())) {
+                    map.put("category", source.getCategory());
+                }
                 if (StringUtils.isNotEmpty(source.getVersion())) {
                     map.put("version", source.getVersion());
                 }
-                if (CollectionUtils.isNotEmpty(source.getUrl())) {
-                    map.put("url", source.getUrl());
+                if (StringUtils.isNotEmpty(source.getDownloadDate())) {
+                    map.put("downloadDate", source.getDownloadDate());
                 }
-                if (StringUtils.isNotEmpty(source.getDate())) {
-                    map.put("date", source.getDate());
+                if (CollectionUtils.isNotEmpty(source.getUrls())) {
+                    map.put("urls", source.getUrls());
                 }
                 tmp.add(map);
             }
@@ -223,9 +221,11 @@ public DataRelease checkDataRelease(int inRelease) throws CellBaseException {
         if (inRelease == 0) {
             String[] split = GitRepositoryState.get().getBuildVersion().split("[.-]");
             String version = "v" + split[0] + "." + split[1];
+
             outRelease = getDefault(version);
-            logger.info("Using data release 0: it means to take default data release '" + outRelease.getRelease()
-                    + "' for CellBase version '" + version + "'");
+            logger.info("Using data release 0: it means to take default data release {} for CellBase version {}", outRelease.getRelease(),
+                    version);
+
             return outRelease;
         }
 
@@ -236,8 +236,11 @@ public DataRelease checkDataRelease(int inRelease) throws CellBaseException {
             }
         }
 
-        throw new CellBaseException("Invalid data release " + inRelease + " for species = " + species + ", assembly = " + assembly
-                + ". Valid data releases are: " + StringUtils.join(dataReleases.stream().map(dr -> dr.getRelease())
-                .collect(Collectors.toList()), ","));
+        throw new CellBaseException("Invalid data release " + inRelease + getSpeciesAssemblyMessage() + ". Valid data releases are: "
+                + StringUtils.join(dataReleases.stream().map(dr -> dr.getRelease()).collect(Collectors.toList()), ","));
+    }
+
+    private String getSpeciesAssemblyMessage() {
+        return " (species = " + species + ", assembly = " + assembly + ")";
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/PolygenicScoreManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/PolygenicScoreManager.java
new file mode 100644
index 0000000000..4c0630569e
--- /dev/null
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/PolygenicScoreManager.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.lib.managers;
+
+import org.opencb.biodata.models.core.pgs.CommonPolygenicScore;
+import org.opencb.biodata.models.variant.avro.PolygenicScoreAnnotation;
+import org.opencb.cellbase.core.api.PolygenicScoreQuery;
+import org.opencb.cellbase.core.api.query.ProjectionQueryOptions;
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.result.CellBaseDataResult;
+import org.opencb.cellbase.lib.impl.core.CellBaseCoreDBAdaptor;
+import org.opencb.cellbase.lib.impl.core.PolygenicScoreMongoDBAdaptor;
+
+import java.util.List;
+
+public class PolygenicScoreManager extends AbstractManager implements AggregationApi<PolygenicScoreQuery, CommonPolygenicScore> {
+
+    private PolygenicScoreMongoDBAdaptor pgsDBAdaptor;
+
+    public PolygenicScoreManager(String species, CellBaseConfiguration configuration) throws CellBaseException {
+        this(species, null, configuration);
+    }
+
+    public PolygenicScoreManager(String species, String assembly, CellBaseConfiguration configuration) throws CellBaseException {
+        super(species, assembly, configuration);
+
+        this.init();
+    }
+
+    private void init() {
+        pgsDBAdaptor = dbAdaptorFactory.getPolygenicScoreMongoDBAdaptor();
+    }
+
+    @Override
+    public CellBaseCoreDBAdaptor<PolygenicScoreQuery, CommonPolygenicScore> getDBAdaptor() {
+        return pgsDBAdaptor;
+    }
+
+    public List<CellBaseDataResult<CommonPolygenicScore>> info(List<String> ids, ProjectionQueryOptions query, int dataRelease,
+                                               String apiKey) throws CellBaseException {
+        return pgsDBAdaptor.info(ids, query, dataRelease, apiKey);
+    }
+
+    public CellBaseDataResult<PolygenicScoreAnnotation> getPolygenicScoreAnnotation(String chromosome, Integer start, String reference,
+                                                                                    String alternate, int dataRelease)
+            throws CellBaseException {
+        return pgsDBAdaptor.getPolygenicScoreAnnotation(chromosome, start, reference, alternate, dataRelease);
+    }
+}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/ProteinManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/ProteinManager.java
index 0505c80ad9..e1a0681476 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/ProteinManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/ProteinManager.java
@@ -18,9 +18,7 @@
 
 import com.fasterxml.jackson.core.JsonProcessingException;
 import org.opencb.biodata.formats.protein.uniprot.v202003jaxb.Entry;
-import org.opencb.biodata.models.core.MissenseVariantFunctionalScore;
 import org.opencb.biodata.models.core.Transcript;
-import org.opencb.biodata.models.core.TranscriptMissenseVariantFunctionalScore;
 import org.opencb.biodata.models.variant.Variant;
 import org.opencb.biodata.models.variant.avro.ProteinVariantAnnotation;
 import org.opencb.biodata.models.variant.avro.Score;
@@ -105,17 +103,6 @@ public CellBaseDataResult<ProteinVariantAnnotation> getVariantAnnotation(Variant
                                                                              int dataRelease) throws CellBaseException {
         CellBaseDataResult<ProteinVariantAnnotation> proteinVariantAnnotation = proteinDBAdaptor.getVariantAnnotation(ensemblTranscriptId,
                 aaPosition, aaReference, aaAlternate, options, dataRelease);
-        CellBaseDataResult<TranscriptMissenseVariantFunctionalScore> revelResults =
-                missenseVariationFunctionalScoreMongoDBAdaptor.getScores(
-                        variant.getChromosome(), variant.getStart(), variant.getReference(), variant.getAlternate(),
-                        aaReference, aaAlternate, dataRelease);
-        if (proteinVariantAnnotation.getResults() != null && revelResults.getResults() != null) {
-            if (proteinVariantAnnotation.getResults().get(0).getSubstitutionScores() == null) {
-                proteinVariantAnnotation.getResults().get(0).setSubstitutionScores(new ArrayList<>());
-            }
-            proteinVariantAnnotation.getResults().get(0).getSubstitutionScores().add(
-                    new Score(revelResults.first().getScore(), "revel", ""));
-        }
         return proteinVariantAnnotation;
     }
 
@@ -123,12 +110,6 @@ public CellBaseDataResult<Object> getProteinSubstitutionRawData(List<String> tra
                                                                     int dataRelease) throws CellBaseException {
         return proteinDBAdaptor.getProteinSubstitutionRawData(transcriptIds, options, dataRelease);
     }
-
-    public CellBaseDataResult<MissenseVariantFunctionalScore> getMissenseVariantFunctionalScores(String chromosome, List<Integer> positions,
-                                                                                                 CellBaseQueryOptions options,
-                                                                                                 int dataRelease) throws CellBaseException {
-        return missenseVariationFunctionalScoreMongoDBAdaptor.getScores(chromosome, positions, options, dataRelease);
-    }
 }
 
 
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java
index a503ba7045..745e5de9dc 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java
@@ -40,6 +40,7 @@
 import org.opencb.cellbase.lib.managers.*;
 import org.opencb.cellbase.lib.variant.VariantAnnotationUtils;
 import org.opencb.cellbase.lib.variant.annotation.futures.FuturePharmacogenomicsAnnotator;
+import org.opencb.cellbase.lib.variant.annotation.futures.FuturePolygenicScoreAnnotator;
 import org.opencb.cellbase.lib.variant.hgvs.HgvsCalculator;
 import org.opencb.commons.datastore.core.QueryOptions;
 import org.slf4j.Logger;
@@ -57,11 +58,6 @@
 /**
  * Created by imedina on 06/02/16.
  */
-/**
- * Created by imedina on 11/07/14.
- *
- * @author Javier Lopez fjlopez@ebi.ac.uk;
- */
 public class VariantAnnotationCalculator {
 
     private static final String EMPTY_STRING = "";
@@ -74,6 +70,7 @@ public class VariantAnnotationCalculator {
     private RepeatsManager repeatsManager;
     private ProteinManager proteinManager;
     private PharmacogenomicsManager pharmacogenomicsManager;
+    private PolygenicScoreManager polygenicScoreManager;
     private DataRelease dataRelease;
     private String apiKey;
     private Set<String> annotatorSet;
@@ -108,6 +105,7 @@ public VariantAnnotationCalculator(String species, String assembly, DataRelease
         this.clinicalManager = cellbaseManagerFactory.getClinicalManager(species, assembly);
         this.repeatsManager = cellbaseManagerFactory.getRepeatsManager(species, assembly);
         this.pharmacogenomicsManager = cellbaseManagerFactory.getPharmacogenomicsManager(species, assembly);
+        this.polygenicScoreManager = cellbaseManagerFactory.getPolygenicScoreManager(species, assembly);
 
         // Init data release and API key
         this.dataRelease = dataRelease;
@@ -523,6 +521,14 @@ private List<VariantAnnotation> runAnnotationProcess(List<Variant> normalizedVar
             pharmacogenomicsFuture = CACHED_THREAD_POOL.submit(futurePharmacogenomicsAnnotator);
         }
 
+        FuturePolygenicScoreAnnotator futurePolygenicScoreAnnotator = null;
+        Future<List<CellBaseDataResult<PolygenicScoreAnnotation>>> polygenicScoreFuture = null;
+        if (annotatorSet.contains(EtlCommons.PGS_DATA)) {
+            futurePolygenicScoreAnnotator = new FuturePolygenicScoreAnnotator(normalizedVariantList, QueryOptions.empty(),
+                    dataRelease.getRelease(), polygenicScoreManager, logger);
+            polygenicScoreFuture = CACHED_THREAD_POOL.submit(futurePolygenicScoreAnnotator);
+        }
+
         // We iterate over all variants to get the rest of the annotations and to create the VariantAnnotation objects
         Queue<Variant> variantBuffer = new LinkedList<>();
         long startTime = System.currentTimeMillis();
@@ -664,6 +670,9 @@ private List<VariantAnnotation> runAnnotationProcess(List<Variant> normalizedVar
         if (futurePharmacogenomicsAnnotator != null) {
             futurePharmacogenomicsAnnotator.processResults(pharmacogenomicsFuture, variantAnnotationList);
         }
+        if (futurePolygenicScoreAnnotator != null) {
+            futurePolygenicScoreAnnotator.processResults(polygenicScoreFuture, variantAnnotationList);
+        }
 
         // Not needed with newCachedThreadPool
         // fixedThreadPool.shutdown();
@@ -1171,7 +1180,8 @@ private Set<String> getAnnotatorSet(QueryOptions queryOptions) {
             // 'expression' removed in CB 5.0
             annotatorSet = new HashSet<>(Arrays.asList("variation", "traitAssociation", "conservation", "functionalScore",
                     "consequenceType", "geneDisease", "drugInteraction", "geneConstraints", "mirnaTargets", "pharmacogenomics",
-                    "cancerGeneAssociation", "cancerHotspots", "populationFrequencies", "repeats", "cytoband", "hgvs"));
+                    "cancerGeneAssociation", "cancerHotspots", "populationFrequencies", "repeats", "cytoband", "hgvs",
+                    EtlCommons.PGS_DATA));
             List<String> excludeList = queryOptions.getAsStringList("exclude");
             excludeList.forEach(annotatorSet::remove);
         }
@@ -1420,8 +1430,6 @@ private List<ConsequenceType> getConsequenceTypeList(Variant variant, List<Gene>
     }
 
     private List<Region> variantListToRegionList(List<Variant> variantList) {
-//        return variantList.stream().map((variant) -> variantToRegion(variant)).collect(Collectors.toList());
-
         // In great majority of cases returned region list size will equal variant list; this will happen except when
         // there's a breakend within the variantList
         List<Region> regionList = new ArrayList<>(variantList.size());
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FuturePolygenicScoreAnnotator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FuturePolygenicScoreAnnotator.java
new file mode 100644
index 0000000000..76f8ed1e85
--- /dev/null
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FuturePolygenicScoreAnnotator.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.lib.variant.annotation.futures;
+
+import org.apache.commons.collections4.CollectionUtils;
+import org.opencb.biodata.models.variant.Variant;
+import org.opencb.biodata.models.variant.avro.PolygenicScoreAnnotation;
+import org.opencb.biodata.models.variant.avro.VariantAnnotation;
+import org.opencb.cellbase.core.result.CellBaseDataResult;
+import org.opencb.cellbase.lib.managers.PolygenicScoreManager;
+import org.opencb.commons.datastore.core.QueryOptions;
+import org.slf4j.Logger;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.*;
+
+public class FuturePolygenicScoreAnnotator implements Callable<List<CellBaseDataResult<PolygenicScoreAnnotation>>> {
+    private PolygenicScoreManager polygenicScoreManager;
+
+    private List<Variant> variantList;
+    private QueryOptions queryOptions;
+    private int dataRelease;
+
+    private Logger logger;
+
+    public FuturePolygenicScoreAnnotator(List<Variant> variantList, QueryOptions queryOptions, int dataRelease,
+                                         PolygenicScoreManager polygenicScoreManager, Logger logger) {
+        this.polygenicScoreManager = polygenicScoreManager;
+
+        this.variantList = variantList;
+        this.queryOptions = queryOptions;
+        this.dataRelease = dataRelease;
+
+        this.logger = logger;
+    }
+
+    @Override
+    public List<CellBaseDataResult<PolygenicScoreAnnotation>> call() throws Exception {
+        long startTime = System.currentTimeMillis();
+
+        List<CellBaseDataResult<PolygenicScoreAnnotation>> cellBaseDataResultList = new ArrayList<>(variantList.size());
+
+        logger.debug("PolygenicScore queries...");
+        // Want to return only one CellBaseDataResult object per Variant
+        for (Variant variant : variantList) {
+            cellBaseDataResultList.add(polygenicScoreManager.getPolygenicScoreAnnotation(variant.getChromosome(), variant.getStart(),
+                    variant.getReference(), variant.getAlternate(), dataRelease));
+        }
+        logger.info("PolygenicScore queries performance in {} ms for {} variants", System.currentTimeMillis() - startTime,
+                variantList.size());
+        return cellBaseDataResultList;
+    }
+
+    public void processResults(Future<List<CellBaseDataResult<PolygenicScoreAnnotation>>> pgsFuture,
+                               List<VariantAnnotation> variantAnnotationList)
+            throws InterruptedException, ExecutionException {
+        List<CellBaseDataResult<PolygenicScoreAnnotation>> pgsCellBaseDataResults;
+        try {
+            pgsCellBaseDataResults = pgsFuture.get(30, TimeUnit.SECONDS);
+        } catch (TimeoutException e) {
+            pgsFuture.cancel(true);
+            throw new ExecutionException("Unable to finish polygenic scores query on time", e);
+        }
+
+        if (CollectionUtils.isNotEmpty(pgsCellBaseDataResults)) {
+            for (int i = 0; i < variantAnnotationList.size(); i++) {
+                CellBaseDataResult<PolygenicScoreAnnotation> pgsResult = pgsCellBaseDataResults.get(i);
+                if (pgsResult != null && CollectionUtils.isNotEmpty(pgsResult.getResults())) {
+                    // Set the polygenic scores in the variant annotation
+                    variantAnnotationList.get(i).setPolygenicScores(pgsResult.getResults());
+                }
+            }
+        }
+    }
+}
diff --git a/cellbase-lib/src/main/resources/mongodb-indexes.json b/cellbase-lib/src/main/resources/mongodb-indexes.json
index de81c7b83b..0dade98add 100644
--- a/cellbase-lib/src/main/resources/mongodb-indexes.json
+++ b/cellbase-lib/src/main/resources/mongodb-indexes.json
@@ -145,3 +145,16 @@
 {"collection": "pharmacogenomics", "fields": {"variants.phenotypeType": 1}, "options": {"background": true}}
 {"collection": "pharmacogenomics", "fields": {"variants.confidence": 1}, "options": {"background": true}}
 {"collection": "pharmacogenomics", "fields": {"variants.evidences.pubmed": 1}, "options": {"background": true}}
+
+{"collection": "protein_substitution_predictions", "fields": {"checksum": 1}, "options": {"background": true}}
+{"collection": "protein_substitution_predictions", "fields": {"uniprotId": 1}, "options": {"background": true}}
+{"collection": "protein_substitution_predictions", "fields": {"transcriptId": 1}, "options": {"background": true}}
+{"collection": "protein_substitution_predictions", "fields": {"aaPosition": 1}, "options": {"background": true}}
+
+{"collection": "common_polygenic_scores", "fields": {"id": 1}, "options": {"background": true}}
+{"collection": "common_polygenic_scores", "fields": {"name": 1}, "options": {"background": true}}
+{"collection": "common_polygenic_scores", "fields": {"source": 1}, "options": {"background": true}}
+{"collection": "variant_polygenic_scores", "fields": {"_chunkIds": 1}, "options": {"background": true}}
+{"collection": "variant_polygenic_scores", "fields": {"chromosome": 1, "position": 1}, "options": {"background": true}}
+{"collection": "variant_polygenic_scores", "fields": {"polygenicScores.id": 1}, "options": {"background": true}}
+
diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/ConservationBuilderTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/ConservationBuilderTest.java
index 5af6cbd7e9..6a21908c13 100644
--- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/ConservationBuilderTest.java
+++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/ConservationBuilderTest.java
@@ -23,6 +23,7 @@
 import org.eclipse.jetty.util.ajax.JSON;
 import org.opencb.biodata.models.core.GenomicScoreRegion;
 import org.opencb.biodata.models.variant.avro.Repeat;
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
 import org.opencb.cellbase.core.serializer.CellBaseJsonFileSerializer;
 import org.opencb.commons.utils.FileUtils;
@@ -41,6 +42,8 @@ public class ConservationBuilderTest {
 
     @Test
     public void testParse() throws Exception {
+        CellBaseConfiguration configuration = CellBaseConfiguration.load(getClass().getResourceAsStream("configuration.test.yaml"));
+
         Path conservationDir = Paths.get(ConservationBuilderTest.class.getResource("/conservation").toURI());
         CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), "gerp.test");
         (new ConservationBuilder(conservationDir, BATCH_SIZE, serializer)).parse();
diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/EnsemblGeneBuilderTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/EnsemblGeneBuilderTest.java
new file mode 100644
index 0000000000..63d1f445a8
--- /dev/null
+++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/EnsemblGeneBuilderTest.java
@@ -0,0 +1,22 @@
+package org.opencb.cellbase.lib.builders;
+
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.config.SpeciesConfiguration;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+class EnsemblGeneBuilderTest {
+
+    public void testGeneBuilder() throws Exception {
+        Path downloadPath = Paths.get("/home/jtarraga/data/cellbase/cb6/v6.1.0-dr1/homo_sapiens_grch38/download/gene");
+        Path buildPath = Paths.get("/home/jtarraga/data/cellbase/cb6/v6.1.0-dr1/homo_sapiens_grch38/generated_json/gene");
+        boolean flexibleGTFParsing = false;
+        CellBaseConfiguration configuration = CellBaseConfiguration.load(Paths.get("/home/jtarraga/appl/cellbase/build/conf/configuration.yml"));
+        SpeciesConfiguration speciesConfiguration = configuration.getSpeciesConfig("hsapiens");
+
+        GeneBuilder geneBuilder = new GeneBuilder(downloadPath, buildPath, speciesConfiguration, flexibleGTFParsing);
+        geneBuilder.check();
+        geneBuilder.parse();
+    }
+}
\ No newline at end of file
diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/GeneBuilderTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/GeneBuilderTest.java
index 5926c0184b..798c1a29db 100644
--- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/GeneBuilderTest.java
+++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/GeneBuilderTest.java
@@ -55,23 +55,23 @@ public GeneBuilderTest() {
 
     @BeforeAll
     public void init() {
-        try {
-            Path genomeSequenceFastaFile
-                    = Paths.get(GeneBuilderTest.class.getResource("/gene/Homo_sapiens.GRCh38.fa").toURI());
-            Path geneDirectoryPath = Paths.get(GeneBuilderTest.class.getResource("/gene").toURI());
-            // put the results in /tmp
-            CellBaseSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), "gene",
-                    true);
-            SpeciesConfiguration species = new SpeciesConfiguration("hsapiens", "Homo sapiens",
-                    "human", null, null, null);
-            geneParser = new GeneBuilder(geneDirectoryPath, genomeSequenceFastaFile, species, serializer);
-            jsonObjectMapper = new ObjectMapper();
-            jsonObjectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);
-            jsonObjectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
-            geneParser.parse();
-        } catch (Exception e) {
-            e.printStackTrace();
-        }
+//        try {
+//            Path genomeSequenceFastaFile
+//                    = Paths.get(GeneBuilderTest.class.getResource("/gene/Homo_sapiens.GRCh38.fa").toURI());
+//            Path geneDirectoryPath = Paths.get(GeneBuilderTest.class.getResource("/gene").toURI());
+//            // put the results in /tmp
+//            CellBaseSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), "gene",
+//                    true);
+//            SpeciesConfiguration species = new SpeciesConfiguration("hsapiens", "Homo sapiens",
+//                    "human", null, null, null);
+//            geneParser = new GeneBuilder(geneDirectoryPath, genomeSequenceFastaFile, species, serializer);
+//            jsonObjectMapper = new ObjectMapper();
+//            jsonObjectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);
+//            jsonObjectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
+//            geneParser.parse();
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//        }
     }
 
     @Test
@@ -226,36 +226,36 @@ public void testProteinSequence() throws Exception {
         }
     }
 
-    @Test
-    @Disabled
-    public void testaddTranscriptTfbstoList() throws Exception {
-        String attributes = "binding_matrix_stable_id=ENSPFM0542;epigenomes_with_experimental_evidence=SK-N.%2CMCF-7%2CH1-hESC_3%2CHCT116;stable_id=ENSM00208374688;transcription_factor_complex=TEAD4::ESRRB";
-        String source = null;
-        String sequenceName = "1";
-        String feature = "TF_binding_site";
-        int start = 10000;
-        int end = 100100;
-        String score = "1.2870005";
-        String strand = "+";
-        String frame = null;
-
-        Gff2 tfbs = new Gff2(sequenceName, source, feature, start, end, score, strand, frame, attributes);
-        Gtf transcript = new Gtf(sequenceName, source, feature, start, end, score, strand, frame, new HashMap<>());
-
-        List<TranscriptTfbs> transcriptTfbs = geneParser.addTranscriptTfbstoList(tfbs, transcript,"1", new ArrayList<>());
-
-        assertEquals(1, transcriptTfbs.size());
-        TranscriptTfbs result = transcriptTfbs.get(0);
-
-        assertEquals(sequenceName, result.getChromosome());
-        assertEquals(feature, result.getType());
-        assertEquals(start, result.getStart());
-        assertEquals(end, result.getEnd());
-        assertEquals(score, String.valueOf(result.getScore()));
-        assertEquals("ENSPFM0542", result.getPfmId());
-        assertEquals("ENSM00208374688", result.getId());
-        assertEquals(2, result.getTranscriptionFactors().size());
-    }
+//    @Test
+//    @Disabled
+//    public void testaddTranscriptTfbstoList() throws Exception {
+//        String attributes = "binding_matrix_stable_id=ENSPFM0542;epigenomes_with_experimental_evidence=SK-N.%2CMCF-7%2CH1-hESC_3%2CHCT116;stable_id=ENSM00208374688;transcription_factor_complex=TEAD4::ESRRB";
+//        String source = null;
+//        String sequenceName = "1";
+//        String feature = "TF_binding_site";
+//        int start = 10000;
+//        int end = 100100;
+//        String score = "1.2870005";
+//        String strand = "+";
+//        String frame = null;
+//
+//        Gff2 tfbs = new Gff2(sequenceName, source, feature, start, end, score, strand, frame, attributes);
+//        Gtf transcript = new Gtf(sequenceName, source, feature, start, end, score, strand, frame, new HashMap<>());
+//
+//        List<TranscriptTfbs> transcriptTfbs = geneParser.addTranscriptTfbstoList(tfbs, transcript,"1", new ArrayList<>());
+//
+//        assertEquals(1, transcriptTfbs.size());
+//        TranscriptTfbs result = transcriptTfbs.get(0);
+//
+//        assertEquals(sequenceName, result.getChromosome());
+//        assertEquals(feature, result.getType());
+//        assertEquals(start, result.getStart());
+//        assertEquals(end, result.getEnd());
+//        assertEquals(score, String.valueOf(result.getScore()));
+//        assertEquals("ENSPFM0542", result.getPfmId());
+//        assertEquals("ENSM00208374688", result.getId());
+//        assertEquals(2, result.getTranscriptionFactors().size());
+//    }
 
     private List<Gene> loadSerializedGenes(String fileName) {
         List<Gene> geneList = new ArrayList();
diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/RegulatoryFeatureBuilderTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/RegulatoryFeatureBuilderTest.java
index 1bd36998b6..cde955fb63 100644
--- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/RegulatoryFeatureBuilderTest.java
+++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/RegulatoryFeatureBuilderTest.java
@@ -33,7 +33,7 @@ public void testParse() throws Exception {
         CellBaseSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), "regulatory_feature", true);
         RegulatoryFeatureBuilder parser = new RegulatoryFeatureBuilder(regulationDirectoryPath, serializer);
         parser.parse();
-        Set<Gff2> features = parser.regulatoryFeatureSet;
+        Set<Gff2> features = parser.getRegulatoryFeatureSet();
         assertEquals(1, features.size());
 
         Gff2 feature = features.iterator().next();
diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/RepeatsBuilderTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/RepeatsBuilderTest.java
index 9c69a1e602..acce1fa92b 100644
--- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/RepeatsBuilderTest.java
+++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/RepeatsBuilderTest.java
@@ -21,6 +21,7 @@
 import org.junit.jupiter.api.Test;
 import org.eclipse.jetty.util.ajax.JSON;
 import org.opencb.biodata.models.variant.avro.Repeat;
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
 import org.opencb.cellbase.core.serializer.CellBaseJsonFileSerializer;
 import org.opencb.commons.utils.FileUtils;
@@ -46,9 +47,10 @@ public RepeatsBuilderTest() {
 
     @Test
     public void testParse() throws Exception {
+        CellBaseConfiguration configuration = CellBaseConfiguration.load(getClass().getResourceAsStream("configuration.test.yaml"));
         Path repeatsFilesDir = Paths.get(getClass().getResource("/repeats").getPath());
         CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), "repeats.test");
-        (new RepeatsBuilder(repeatsFilesDir, serializer)).parse();
+        (new RepeatsBuilder(repeatsFilesDir, serializer, configuration)).parse();
         serializer.close();
         assertEquals(loadRepeatSet(Paths.get(getClass().getResource("/repeats/repeats.test.json.gz").getFile())),
                 loadRepeatSet(Paths.get("/tmp/repeats.test.json.gz")));
diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalVariantBuilderTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalVariantBuilderTest.java
index 7ad0892c1e..aea3b9e7fe 100644
--- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalVariantBuilderTest.java
+++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalVariantBuilderTest.java
@@ -89,8 +89,8 @@ public void noNormaliseTest() throws Exception {
                         .getResource("/variant/annotation/clinicalVariant/ClinVarFullRelease_2020-02.xml.gz").toURI()).toFile(),
                 clinicalVariantChunksFolder.resolve("ClinVarFullRelease_2020-02.xml.gz").toFile());
 
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), EtlCommons.CLINICAL_VARIANTS_DATA, true);
-        (new ClinicalVariantBuilder(clinicalVariantFolder, false, genomeSequenceFilePath, "GRCh37",  serializer)).parse();
+        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), EtlCommons.CLINICAL_VARIANT_DATA, true);
+        (new ClinicalVariantBuilder(clinicalVariantFolder, false, genomeSequenceFilePath, "GRCh37", null, serializer)).parse();
 
         List<Variant> parsedVariantList = loadSerializedVariants("/tmp/" + EtlCommons.CLINICAL_VARIANTS_JSON_FILE);
         assertEquals(23, parsedVariantList.size());
@@ -145,8 +145,8 @@ public void parseMNVTest() throws Exception {
 
         Path genomeSequenceFilePath = clinicalVariantFolder.resolve("Homo_sapiens.GRCh37.75.dna.primary_assembly.chr17.fa.gz");
 
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), EtlCommons.CLINICAL_VARIANTS_DATA, true);
-        (new ClinicalVariantBuilder(clinicalVariantFolder, true, genomeSequenceFilePath, "GRCh37",  serializer)).parse();
+        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), EtlCommons.CLINICAL_VARIANT_DATA, true);
+        (new ClinicalVariantBuilder(clinicalVariantFolder, true, genomeSequenceFilePath, "GRCh37", null, serializer)).parse();
 
         List<Variant> parsedVariantList = loadSerializedVariants("/tmp/" + EtlCommons.CLINICAL_VARIANTS_JSON_FILE);
         assertEquals(29, parsedVariantList.size());
@@ -230,8 +230,8 @@ public void parse() throws Exception {
                         .getResource("/variant/annotation/clinicalVariant/ClinVarFullRelease_2020-02.xml.gz").toURI()).toFile(),
                 clinicalVariantChunksFolder.resolve("ClinVarFullRelease_2020-02.xml.gz").toFile());
 
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), EtlCommons.CLINICAL_VARIANTS_DATA, true);
-        (new ClinicalVariantBuilder(clinicalVariantFolder, true, genomeSequenceFilePath, "GRCh37",  serializer)).parse();
+        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), EtlCommons.CLINICAL_VARIANT_DATA, true);
+        (new ClinicalVariantBuilder(clinicalVariantFolder, true, genomeSequenceFilePath, "GRCh37", null, serializer)).parse();
 
         List<Variant> parsedVariantList = loadSerializedVariants("/tmp/" + EtlCommons.CLINICAL_VARIANTS_JSON_FILE);
         assertEquals(29, parsedVariantList.size());
diff --git a/cellbase-lib/src/test/resources/configuration.test.yaml b/cellbase-lib/src/test/resources/configuration.test.yaml
index 1322d2fa52..fd7a1498f8 100644
--- a/cellbase-lib/src/test/resources/configuration.test.yaml
+++ b/cellbase-lib/src/test/resources/configuration.test.yaml
@@ -85,12 +85,23 @@ download:
     host: http://docm.genome.wustl.edu
   dgv:
     host: http://dgv.tcag.ca/v106/docs
+
   simpleRepeats:
-    host: http://hgdownload.cse.ucsc.edu/goldenPath
+    host: http://hgdownload.cse.ucsc.edu/
+    files:
+      ## The CellBase downloader will change put_assembly_here by the assembly, e.g. hg38
+      SIMPLE_REPEATS: goldenPath/put_assembly_here/database/simpleRepeat.txt.gz
   windowMasker:
-    host: http://hgdownload.cse.ucsc.edu/goldenPath
+    host: http://hgdownload.cse.ucsc.edu/
+    files:
+      ## The CellBase downloader will change put_assembly_here by the assembly, e.g. hg38
+      WINDOW_MASKER: goldenPath/put_assembly_here/database/windowmaskerSdust.txt.gz
   genomicSuperDups:
-    host: http://hgdownload.cse.ucsc.edu/goldenPath
+    host: http://hgdownload.cse.ucsc.edu/
+    files:
+      ## The CellBase downloader will change put_assembly_here by the assembly, e.g. hg38
+      GENOMIC_SUPER_DUPS: goldenPath/put_assembly_here/database/genomicSuperDups.txt.gz
+
   gwasCatalog:
     host: ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/2016/09/28/gwas-catalog-associations.tsv
   hpo:
diff --git a/cellbase-lib/src/test/resources/index/mongodb-indexes.json b/cellbase-lib/src/test/resources/index/mongodb-indexes.json
index 7c264a469a..a77b79f49f 100644
--- a/cellbase-lib/src/test/resources/index/mongodb-indexes.json
+++ b/cellbase-lib/src/test/resources/index/mongodb-indexes.json
@@ -127,3 +127,10 @@
 
 {"collection": "splice_score", "fields": {"_chunkIds": 1}, "options": {"background": true}}
 {"collection": "splice_score", "fields": {"chromosome": 1, "position": 1}, "options": {"background": true}}
+
+{"collection": "common_polygenic_scores", "fields": {"id": 1}, "options": {"background": true}}
+{"collection": "common_polygenic_scores", "fields": {"name": 1}, "options": {"background": true}}
+{"collection": "common_polygenic_scores", "fields": {"source": 1}, "options": {"background": true}}
+{"collection": "variant_polygenic_scores", "fields": {"_chunkIds": 1}, "options": {"background": true}}
+{"collection": "variant_polygenic_scores", "fields": {"chromosome": 1, "position": 1}, "options": {"background": true}}
+{"collection": "variant_polygenic_scores", "fields": {"polygenicScores.id": 1}, "options": {"background": true}}