1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
|
#' List color schemes available in ggseqlogo
#'
#' @param v If true, font names are printed to stderr. Otherwise, color scheme names are returned as a character vector
#' @export
list_col_schemes <- function(v=T){
col_schemes = c('auto', 'chemistry', 'chemistry2','hydrophobicity', 'nucleotide', 'nucleotide2',
'base_pairing', 'clustalx', 'taylor')
if(!v) return(col_schemes)
message('Available ggseqlogo color schemes:')
for(f in col_schemes) message('\t', f)
}
# Get color scheme
# @param col_scheme name of color scheme
# @param seq_type sequence type of color scheme
get_col_scheme = function(col_scheme, seq_type='auto'){
# Check if user-defined color scheme
if(is.data.frame(col_scheme)){
if(!'ggseqlogo_cs' %in% class(col_scheme))
stop('Colour scheme must be generated using "make_col_scheme" function')
return(col_scheme)
}
# Get ambigious colour scheme
col_scheme = match.arg(col_scheme, list_col_schemes(F))
# Get default color scheme for sequence type
if(col_scheme == 'auto'){
if(seq_type == 'auto') stop('"col_scheme" and "seq_type" cannot both be "auto"')
col_scheme = switch(tolower(seq_type), aa = 'chemistry',
dna = 'nucleotide', rna = 'nucleotide',
other='nucleotide')
}
# Pick from default color schemes
cs = switch(col_scheme,
# Color scheme based on chemistry of amino acids
chemistry2 = data.frame(
letter = c('G', 'S', 'T', 'Y', 'C', 'N', 'Q', 'K', 'R', 'H', 'D', 'E', 'P', 'A', 'W', 'F', 'L', 'I', 'M', 'V'),
group = c(rep('Polar', 5), rep('Neutral', 2), rep('Basic', 3), rep('Acidic', 2), rep('Hydrophobic', 8)),
col = c(rep('#058644', 5), rep('#720091', 2), rep('#0046C5', 3), rep('#C5003E', 2), rep('#2E2E2E', 8)),
stringsAsFactors = F
),
# Color scheme based on chemistry of amino acids
chemistry = data.frame(
letter = c('G', 'S', 'T', 'Y', 'C', 'N', 'Q', 'K', 'R', 'H', 'D', 'E', 'P', 'A', 'W', 'F', 'L', 'I', 'M', 'V'),
group = c(rep('Polar', 5), rep('Neutral', 2), rep('Basic', 3), rep('Acidic', 2), rep('Hydrophobic', 8)),
col = c(rep('#109648', 5), rep('#5E239D', 2), rep('#255C99', 3), rep('#D62839', 2), rep('#221E22', 8)),
stringsAsFactors = F
),
# Hydrophobicity index (PMID: 7108955) from -4.5 to 4.5
hydrophobicity = data.frame(
letter = c('I', 'V', 'L', 'F', 'C', 'M', 'A', 'G', 'T', 'W',
'S', 'Y', 'P', 'H', 'D', 'E', 'N', 'Q', 'K', 'R'),
group = c(4.5, 4.2, 3.8, 2.8, 2.5, 1.9, 1.8, -0.4, -0.7, -0.9, -0.8,
-1.3, -1.6, -3.2, -3.5, -3.5, -3.5, -3.5, -3.9, -4.5),
stringsAsFactors=F
),
# Colour based on nucleotide
nucleotide2 = data.frame(
letter = c('A', 'C', 'G', 'T', 'U'),
col = c('darkgreen', 'blue', 'orange', 'red', 'red'),
stringsAsFactors = F
),
#alt red BA1200
nucleotide = data.frame(
letter = c('A', 'C', 'G', 'T', 'U'),
col = c('#109648', '#255C99', '#F7B32B', '#D62839', '#D62839'),
stringsAsFactors = F
),
base_pairing = data.frame(
letter = c('A', 'T', 'U', 'G', 'C'),
group = c(rep('Weak bonds', 3), rep('Strong bonds', 2)),
col = c(rep('darkorange', 3), rep('blue', 2)),
stringsAsFactors = F
),
# ClustalX color scheme:
# http://www.jalview.org/help/html/colourSchemes/clustal.html
clustalx = data.frame(
letter = c('W', 'L', 'V', 'I', 'M', 'F', 'A', 'R', 'K', 'T', 'S', 'N', 'Q', 'D', 'E', 'H', 'Y', 'C', 'G', 'P'),
col = c(rep('#197FE5', 7), rep('#E53319', 2), rep('#19CC19', 4), rep('#CC4CCC', 2),
rep('#19B2B2', 2), '#E57F7F', '#E5994C', '#B0B000'),
stringsAsFactors = F
),
# Taylor color scheme (PMID: 9342138)
taylor = data.frame(
letter = c('D','S','T','G','P','C','A','V','I','L','M','F','Y','W','H','R','K','N','Q','E'),
col = c('#FF0000','#FF3300','#FF6600','#FF9900','#FFCC00','#FFFF00','#CCFF00','#99FF00',
'#66FF00','#33FF00','#00FF00','#00FF66','#00FFCC','#00CCFF','#0066FF','#0000FF',
'#6600FF','#CC00FF','#FF00CC','#FF0066'),
stringsAsFactors = F
)
)
if(!'group' %in% names(cs)) cs$group = cs$letter
# Set attributes
attr(cs, 'cs_label') = col_scheme
class(cs) = c('data.frame','ggseqlogo_cs')
return(cs)
}
#' Create new sequence logo color scheme
#'
#' @param chars Vector of one letter characters
#' @param groups Vector of groups for letters with same length as chars (optional if cols parameter is provided)
#' @param cols Vector of colors with same length as chars (optional if values parameter is provided)
#' @param values Vector of numerical values with same length as chars
#' @param name Name of color scheme
#'
#' @export
#'
#' @importFrom grDevices col2rgb
#' @examples
#'
#' # Discrete color scheme examples
#' cs1 = make_col_scheme(chars=c('A', 'T', 'G', 'C'), groups=c('g1', 'g1', 'g2', 'g2'),
#' cols=c('red', 'red', 'blue', 'blue'), name='custom1')
#'
#' cs2 = make_col_scheme(chars=c('A', 'T', 'G', 'C'), cols=c('red', 'red', 'blue', 'blue'),
#' name='custom2')
#'
#' # Quantitative color scheme
#' cs3 = make_col_scheme(chars=c('A', 'T', 'G', 'C'), values=1:4, name='custom3')
make_col_scheme <- function(chars=NULL, groups=NULL, cols=NULL, values=NULL, name=''){
if(is.null(chars) | any(nchar(chars) != 1) | !is.character(chars))
stop('"chars" must be a character vector of one letter characters')
if(is.null(values)){
# Discrete colour scheme
# Error check lengths
if(length(chars) != length(cols)) stop('"chars" and "cols" must have same length')
# Error check types
if(!is.character(cols)) stop('"cols" must be a character vector')
# Check valid colours
tmp = col2rgb(cols); rm(tmp)
if(is.null(groups)) groups = chars
cs = data.frame( letter=chars, group=groups, col=cols, stringsAsFactors = F )
}else{
# Quantitative color scheme
if(length(chars) != length(values)) stop('"chars" and "values" must have same length')
cs = data.frame( letter=chars, group=values, stringsAsFactors=F )
}
# Remove duplicate letters
cs = cs[!duplicated(cs$letter),]
# Set attributes
attr(cs, 'cs_label') = name
class(cs) = c('data.frame','ggseqlogo_cs')
return(cs)
}
|