This is SNPfiltR v.1.0.2
Detailed usage information is available at: devonderaad.github.io/SNPfiltR/
If you use SNPfiltR in your published work, please cite the following papers:
DeRaad, D.A. (2022), SNPfiltR: an R package for interactive and reproducible SNP filtering. Molecular Ecology Resources, 22, 2443-2453. http://doi.org/10.1111/1755-0998.13618
Knaus, Brian J., and Niklaus J. Grunwald. 2017. VCFR: a package to manipulate and visualize variant call format data in R. Molecular Ecology Resources, 17.1:44-53. http://doi.org/10.1111/1755-0998.12549
Code
library(vcfR)
***** *** vcfR *** *****
This is vcfR 1.15.0
browseVignettes('vcfR') # Documentation
citation('vcfR') # Citation
***** ***** ***** *****
Code
library(ggplot2)library(stringr)library(StAMPP)
Loading required package: pegas
Loading required package: ape
Attaching package: 'pegas'
The following object is masked from 'package:ape':
mst
The following objects are masked from 'package:vcfR':
getINFO, write.vcf
Registered S3 method overwritten by 'ade4':
method from
print.amova pegas
Code
library(adegenet)
Loading required package: ade4
Attaching package: 'ade4'
The following object is masked from 'package:pegas':
amova
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
✖ dplyr::where() masks ape::where()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#reorder sampling file to match order of samples in vcfsample.info<-sample.info[match(colnames(vcfR@gt)[-1], sample.info$Desired_Sequence_Name),]sample.info$Desired_Sequence_Name ==colnames(vcfR@gt)[-1]
#add in locality to sample name for PCA labelpopmap$new_id<-c("Honduras_ao_ANSP_90568","Belize_BC_107","Belize_BC_108","Belize_BC_109","Belize_BC_A112","Belize_BC_A113","Belize_BC_A114","Belize_BC_A115","Belize_BC_A116","Belize_BC_A117","Belize_BC_A118","Tabasco_LSUMZ_23890","Oaxaca_LSUMZ_33050","Chiapas_LSUMZ_39731","Colima_LSUMZ_43831","Colima_LSUMZ_43832","SCP_MLZ_1105","Tamaulipas_MLZ_32244","Veracruz_MLZ_35920","Tamaulipas_MLZ_39530","Tamaulipas_MLZ_40633","Tamaulipas_MLZ_40634","Tamaulipas_MLZ_40635","San_Luis_Potosi_MLZ_41497","Oaxaca_MLZ_45517","Tamaulipas_MLZ_48333","Guerrero_MLZ_50773","Guerrero_MLZ_50774","Guerrero_MLZ_50775","Veracruz_MLZ_59507","SCP_MLZ_70063","SCP_MLZ_70074","SCP_SP_1","SCP_SP_2","SCP_SP_3","SCP_SP_4","SCP_SP_5","SCP_SP_6","SCP_SP_7","SCP_SP_8","SCP_SP_831","SCP_SP_832","SCP_SP_833","SCP_SP_834","SCP_SP_835","SCP_SP_836","SCP_SP_837","SCP_SP_838","SCP_SP_839","SCP_SP_840","Tabasco_UMMZ_103984","Michoacán_UMMZ_130517","Nayarit_UMMZ_95618","Nayarit_UMMZ_95619")popmap
#outliers: Tamaulipas_MLZ_32244 appears to be contaminated above - removedvcfR <- vcfR[, !colnames(vcfR@gt) %in%c("ao_MLZ_32244")]#subset popmap to only include retained individualspopmap<-popmap[popmap$id %in%colnames(vcfR@gt),]#check missing datamiss<-missing_by_sample(vcfR)
No popmap provided
Code
vcfR.trim<-missing_by_sample(vcfR, cutoff =0.80)
7 samples are above a 0.8 missing data cutoff, and were removed from VCF
Code
#check missing by SNPmissing_by_snp(vcfR.trim)
cutoff is not specified, exploratory visualizations will be generated
Warning in ggridges::geom_density_ridges(jittered_points = TRUE, position =
"raincloud", : Ignoring unknown parameters: `size`
#labeled pca#subset popmap to only include retained individualspopmap<-popmap[popmap$id %in%colnames(vcfR.trim.80@gt),]#convert each to genlightgena<-vcfR2genlight(vcfR.trim.80)#perform PCAdi.pca<-glPca(gena, nf=4)#isolate PCA scores as a dataframedi.pca.scores<-as.data.frame(di.pca$scores)#reorder sampling file to match alphabetical orderpopmap<-popmap %>%slice(order(factor(id, levels =rownames(di.pca.scores))))#check that they all matchrownames(di.pca.scores) == popmap$id