11. Run Command in CGAP Project

11.1. Make Single Data Source File

11.1.1. Run VEP

./bin/ensembl-vep-release-99/vep \
        -i input_novel_indels.vcf \
        -o input_novel_indels.vep.vcf \
        --hgvs \
        --fasta GRCh38_full_analysis_set_plus_decoy_hla.fa \
        --assembly GRCh38 \
        --use_given_ref \
        --offline \
        --cache_version 99 \
        --dir_cache ./bin/nonindexed_vep_cache/homo_sapiens_vep \
        --everything \
        --force_overwrite \
        --vcf \
        --plugin MaxEntScan,./bin/VEP_plugins-release-99/fordownload \
        --plugin TSSDistance \
        --dir_plugins ./bin/VEP_plugins-release-99 \
        --plugin SpliceRegion,Extended

11.1.2. Download and Preprocess Source for Micro Annotation

mutanno download \
        -source_path datasource_directory \
        -source gnomad \
        -version latest \
        -refversion hg38

mutanno download \
        -source_path datasource_directory \
        -source clinvar \
        -version latest \
        -refversion hg38

mutanno preprocess \
        -infile datasource_directory/SPLICEAI/spliceai_scores.raw.snv.hg38.vcf.gz \
        -ds datastructure_microannot_v0.4.4.json \
        -out datasource_directory/additional_novel_indels.vep.microannot.mti \
        -spliceai2mti

mutanno makedata \
        -ds data_structure.json \
        -out single_datasource_file.tsi \
        -vartype SNV \
        -blocksize 1000

11.1.3. Download and Preprocess Source for Full Annotation

mutanno download \
        -source_path datasource_directory \
        -source gnomAD \
        -version latest \
        -refversion hg38 \

mutanno download \
        -source_path datasource_directory \
        -source CLINVAR \
        -version latest \
        -refversion hg38 \

mutanno download \
        -source_path datasource_directory \
        -source UK10K \
        -version latest \
        -refversion hg38 \

mutanno makedata \
        -ds data_structure.json \
        -out single_datasource_file.tsi \
        -vartype SNV \
        -blocksize 1000

11.1.4. Download Data Source for Gene Annotation

mutanno download \
        -source_path datasource_directory \
        -source gnomAD \
        -version latest \
        -refversion hg38 \

mutanno download \
        -source_path datasource_directory \
        -source CLINVAR \
        -version latest \
        -refversion hg38 \

mutanno download \
        -source_path datasource_directory \
        -source UK10K \
        -version latest \
        -refversion hg38 \

11.2. Convert VEP to .mti for novel InDels

11.2.1. Run VEP

./bin/ensembl-vep-release-99/vep \
        -i input_novel_indels.vcf \
        -o input_novel_indels.vep.vcf \
        --hgvs \
        --fasta GRCh38_full_analysis_set_plus_decoy_hla.fa \
        --assembly GRCh38 \
        --use_given_ref \
        --offline \
        --cache_version 99 \
        --dir_cache ./bin/nonindexed_vep_cache/homo_sapiens_vep \
        --everything \
        --force_overwrite \
        --vcf \
        --plugin MaxEntScan,./bin/VEP_plugins-release-99/fordownload \
        --plugin TSSDistance \
        --dir_plugins ./bin/VEP_plugins-release-99 \
        --plugin SpliceRegion,Extended

11.2.2. Make Additional .mti for Novel InDels

mutanno preprocess \
        -infile input_novel_indels.vep.vcf \
        -ds datastructure_microannot_v0.4.4.json \
        -out additional_novel_indels.vep.microannot.mti \
        -vep2mti

bgzip -c additional_novel_indels.vep.microannot.mti > additional_novel_indels.vep.microannot.mti.gz
tabix -f -p vcf additional_novel_indels.vep.microannot.mti.gz;

11.2.3. Make Additional .mti for Novel InDels

mutanno preprocess \
        -infile input_novel_indels.vep.vcf \
        -out additional_novel_indels.vep.fullannot.mti \
        -vep2mti

bgzip -c additional_novel_indels.vep.fullannot.mti > additional_novel_indels.vep.fullannot.mti.gz
tabix -f -p vcf additional_novel_indels.vep.fullannot.mti.gz;

11.3. Annotation

11.3.1. Run Micro-Annotation

mutanno annot \
        -vcf input.vcf \
        -ds datastructure_microannot_v0.4.5.json \
        -out output.annot.vcf \
        -sourcefile microannot_datasource.v0.4.4_200614.mti.gz additional.mti.gz \
            additional_novel_indels.vep.microannot.mti.gz \
        -split_multi_allelic_variant \
        -genoinfo \
        -use_raw_source
1
2
 #CHROM      POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA12877_sample  NA12878_sample  NA12879_sample
 chr2        55544025        rs1045910       A       G       3047.94 .       AC=4;AF=0.667;AN=6;BaseQRankSum=0.502;DB;DP=148;ExcessHet=3.01;FS=1.374;MLEAC=4;MLEAF=0.667;MQ=60.00;MQRankSum=0.00;QD=20.59;ReadPosRankSum=0.549;SOR=0.709     GT:AD:DP:GQ:PL  0/1:27,20:47:99:534,0,756       1/1:0,50:50:99:1717,150,0       0/1:23,28:51:99:810,0,621
1
2
 #CHROM      POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA12877_sample  NA12878_sample  NA12879_sample
 chr2        55544025        rs1045910       A       G       3047.94 .       AC=4;AF=0.667;AN=6;BaseQRankSum=0.502;DB;DP=148;ExcessHet=3.01;FS=1.374;MLEAC=4;MLEAF=0.667;MQ=60.00;MQRankSum=0.00;QD=20.59;ReadPosRankSum=0.549;SOR=0.709;SAMPLEGENO=0/1|A/G|27/20|NA12877_sample,1/1|G/G|0/50|NA12878_sample,0/1|A/G|23/28|NA12879_sample;VEP=ENSG00000163001|ENST00000339012|Transcript|missense_variant|CFAP36|protein_coding,ENSG00000163001|ENST00000349456|Transcript|missense_variant|CFAP36|protein_coding,ENSG00000163001|ENST00000406691|Transcript|downstream_gene_variant|CFAP36|protein_coding,ENSG00000163001|ENST00000407816|Transcript|missense_variant~splice_region_variant|CFAP36|protein_coding,ENSG00000163001|ENST00000481791|Transcript|non_coding_transcript_exon_variant|CFAP36|retained_intron,ENSG00000163001|ENST00000490934|Transcript|non_coding_transcript_exon_variant|CFAP36|processed_transcript,ENSG00000275052|ENST00000611717|Transcript|downstream_gene_variant|PPP4R3B|protein_coding,ENSG00000275052|ENST00000616288|Transcript|downstream_gene_variant|PPP4R3B|protein_coding,ENSG00000275052|ENST00000616407|Transcript|downstream_gene_variant|PPP4R3B|protein_coding;gnomADgenome=9.40488e-01;SpliceAI=0.10       GT:AD:DP:GQ:PL  0/1:27,20:47:99:534,0,756       1/1:0,50:50:99:1717,150,0       0/1:23,28:51:99:810,0,621

11.3.2. Run Full-Annotation

mutanno annot \
        -vcf input.vcf \
        -ds datastructure_fullannot_v0.4.8.json \
        -out output.vcf \
        -sourcefile fullannot_source_file.mti.gz \
            additional_novel_indels.vep.fullannot.mti.gz \
        -hg19 \
        -chain hg38ToHg19.over.chain.gz \
        -clean_tag MUTANNO SpliceAI CLINVAR gnomADgenome

11.4. Gene annotation

mutanno download \
        -source_path datasource_directory \
        -source all \
        -version latest \
        -refversion hg38 \
        -websource mutanno

mutanno makedata \
        -ds tests/data/datastructure_gene_v0.4.6ds.json \
        -out mvp_gene_datasource_v0.4.6.coding_gene_main_chrom \
        -vartype CODING_GENE_MAIN_CHROM \
        -outtype json

gzip -c mvp_gene_datasource_v0.4.6.coding_gene_main_chrom.json > mvp_gene_datasource_v0.4.6.coding_gene_main_chrom.json.gz