1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
|
## Function to extract tip and edge labels from newick formatted strings
## useful when the tree is too small to be read in by ape/rncl.
## tr needs to be a newick formatted tree string
## - missing tips are removed (OK for OTL as it won't happen)
tree_to_labels <- function(tr, remove_quotes = TRUE) {
n_right <- unlist(gregexpr("\\)", tr))
n_left <- unlist(gregexpr("\\(", tr))
if (n_right[1] == -1) n_right <- 0 else n_right <- length(n_right)
if (n_left[1] == -1) n_left <- 0 else n_left <- length(n_left)
if (!identical(n_right, n_left)) {
stop("invalid newick string, numbers of ( and ) don't match")
}
## remove white spaces
tr <- gsub("\\s+", "", tr)
## remove branch lengths
tr <- gsub(":[0-9]+(\\.[0-9]+)?", "", tr)
## TODO?: remove comments
if (n_right < 1) {
## if only 1 tip
tip_lbl <- gsub(";$", "", tr)
edge_lbl <- character(0)
} else {
## extract edge labels
edge_lbl <- unlist(strsplit(tr, ")"))
edge_lbl <- grep("^[^\\(]", edge_lbl, value = T)
edge_lbl <- gsub("(,|;).*$", "", edge_lbl)
edge_lbl <- edge_lbl[nzchar(edge_lbl)]
## extract tips
tip_lbl <- unlist(strsplit(tr, ","))
tip_lbl <- gsub("^\\(*", "", tip_lbl)
tip_lbl <- gsub("\\).*$", "", tip_lbl)
tip_lbl <- tip_lbl[nzchar(tip_lbl)]
}
if (remove_quotes) {
tip_lbl <- gsub("^(\\\"|\\\')(.+)(\\\'|\\\")$", "\\2", tip_lbl)
}
list(tip_label = tip_lbl, edge_label = edge_lbl)
}
|