library("fitdistrplus") library("doParallel") library("data.table") #load the file that descibes all the possible SNVs on the target panel Panel_subs=as.data.frame(fread(file.choose(),header=T,sep="\t")) Panel_subs=Panel_subs[,c(1,2,4,5)] colnames(Panel_subs)=c(“Chr”,”Start”,”Ref”,”Alt”) #load all the samples from Varscan (ONLY MAPQ filter is being used) d="/F1_FILES/" # CHANGE PATH HERE. IT SHOULD CONTAIN THE PATH TO THE FOLDER THAT CONTAINS THE FILES FROM PART4 f=list.files(d,pattern = "F1") #POSITION SPECIFIC ERROR CORRECTION IS BEING DONE ON THE F1 FILES VAF=as.data.frame(matrix(data=NA,nrow=dim(Panel_subs)[1],ncol=4+length(f))) colnames(VAF)[1:4]=c("Chrom","Position","Ref","VarAllele") VAF[,1:4]=as.data.frame(cbind(Panel_subs$Chr,Panel_subs$Start,Panel_subs$Ref,Panel_subs$Alt)) i=4 for (n in f){ i=i+1 print(i-4) assign("x",as.data.frame(fread(paste0(d,n),header=T,sep="\t"))) x=x[which(x$VarAllele!=""),] index=which((x$MapQual1<59 & x$MapQual1!=0) | (x$MapQual2<59 & x$MapQual2!=0)) if(length(index)>0){x=x[-index,]} #REMOVE POSTIONS WITH LOW MAPING QUALITY index=match(paste(x$Chrom,x$Position,x$Ref,x$VarAllele),paste(VAF$Chrom,VAF$Position,VAF$Ref,VAF$VarAllele)) VAF[index[which(!is.na(index))],i]=x$Reads2[which(!is.na(index))]/ (x$Reads1[which(!is.na(index))]+x$Reads2[which(!is.na(index))]) colnames(VAF)[i]=paste0(substr(n,0,12)) }