1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
|
/* Copyright (c) 2011 Peter Troshin
*
* JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0
*
* This library is free software; you can redistribute it and/or modify it under the terms of the
* Apache License version 2 as published by the Apache Software Foundation
*
* This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache
* License for more details.
*
* A copy of the license is in apache_license.txt. It is also available here:
* @see: http://www.apache.org/licenses/LICENSE-2.0.txt
*
* Any republication or derived work distributed in source code form
* must include this copyright and license notice.
*/
package compbio.data.sequence;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import compbio.util.SysPrefs;
import compbio.util.annotation.Immutable;
/**
* A FASTA formatted sequence. Please note that this class does not make any
* assumptions as to what sequence it stores e.g. it could be nucleotide,
* protein or even gapped alignment sequence! The only guarantee it makes is
* that the sequence does not contain white space characters e.g. spaces, new
* lines etc
*
* @author pvtroshin
*
* @version 1.0 September 2009
*/
@XmlAccessorType(XmlAccessType.FIELD)
@Immutable
public class FastaSequence {
/**
* Sequence id
*/
private String id;
// TODO what about gapped sequence here! should be indicated
/**
* Returns the string representation of sequence
*/
private String sequence;
FastaSequence() {
// Default constructor for JaxB
}
/**
* Upon construction the any whitespace characters are removed from the
* sequence
*
* @param id
* @param sequence
*/
public FastaSequence(String id, String sequence) {
this.id = id;
this.sequence = SequenceUtil.cleanSequence(sequence);
}
/**
* Gets the value of id
*
* @return the value of id
*/
public String getId() {
return this.id;
}
/**
* Gets the value of sequence
*
* @return the value of sequence
*/
public String getSequence() {
return this.sequence;
}
public static int countMatchesInSequence(final String theString,
final String theRegExp) {
final Pattern p = Pattern.compile(theRegExp);
final Matcher m = p.matcher(theString);
int cnt = 0;
while (m.find()) {
cnt++;
}
return cnt;
}
public String getFormattedFasta() {
return getFormatedSequence(80);
}
/**
*
* @return one line name, next line sequence, no matter what the sequence
* length is
*/
public String getOnelineFasta() {
String fasta = ">" + getId() + SysPrefs.newlinechar;
fasta += getSequence() + SysPrefs.newlinechar;
return fasta;
}
/**
* Format sequence per width letter in one string. Without spaces.
*
* @return multiple line formated sequence, one line width letters length
*
*/
public String getFormatedSequence(final int width) {
if (sequence == null) {
return "";
}
assert width >= 0 : "Wrong width parameter ";
final StringBuilder sb = new StringBuilder(sequence);
// int tail = nrOfWindows % WIN_SIZE;
// final int turns = (nrOfWindows - tail) / WIN_SIZE;
int tailLen = sequence.length() % width;
// add up inserted new line chars
int nchunks = (sequence.length() - tailLen) / width;
int nlineCharcounter = 0;
int insPos = 0;
for (int i = 1; i <= nchunks; i++) {
insPos = width * i + nlineCharcounter;
// to prevent inserting new line in the very end of a sequence then
// it would have failed.
if (sb.length() <= insPos) {
break;
}
sb.insert(insPos, "\n");
nlineCharcounter++;
}
// sb.insert(insPos + tailLen, "\n");
return sb.toString();
}
/**
*
* @return sequence length
*/
public int getLength() {
return this.sequence.length();
}
/**
* Same as oneLineFasta
*/
@Override
public String toString() {
return this.getOnelineFasta();
}
@Override
public int hashCode() {
final int prime = 17;
int result = 1;
result = prime * result + ((id == null) ? 0 : id.hashCode());
result = prime * result
+ ((sequence == null) ? 0 : sequence.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (!(obj instanceof FastaSequence)) {
return false;
}
FastaSequence fs = (FastaSequence) obj;
if (!fs.getId().equals(this.getId())) {
return false;
}
if (!fs.getSequence().equalsIgnoreCase(this.getSequence())) {
return false;
}
return true;
}
}
|