File: AssemblyStats.R

package info (click to toggle)
r-bioc-cner 1.26.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 18,216 kB
  • sloc: ansic: 23,458; makefile: 6
file content (25 lines) | stat: -rw-r--r-- 646 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
### -----------------------------------------------------------------
### N50 calculation for a assembly
### Exported!
NXX <- function(filepath, XX=50){
  if(file_ext(filepath) == "2bit"){
    lengths <- seqlengths(TwoBitFile(filepath))
  }else if(file_ext(filepath) %in% c("fa", "fasta")){
    lengths <- fasta.seqlengths(filepath)
  }else{
    stop("The suffix can only be .2bit, .fa, .fasta!")
  }
  lengths <- as.numeric(sort(lengths, decreasing=TRUE))
  index <- which(cumsum(lengths) / sum(lengths) >= XX/100)[1]
  return(lengths[index])
}

N50 <- function(fn){
  return(NXX(fn, XX=50))
}

N90 <- function(fn){
  return(NXX(fn, XX=90))
}