1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
|
package fastx
import (
"io"
"github.com/shenwei356/bio/seq"
)
// GetSeqNames returns the names of a fasta/q file
func GetSeqNames(file string) ([]string, error) {
names := []string{}
seq.ValidateSeq = false
reader, err := NewDefaultReader(file)
if err != nil {
return nil, nil
}
for {
record, err := reader.Read()
if err != nil {
if err == io.EOF {
break
}
return nil, err
}
names = append(names, string(record.Name))
}
return names, nil
}
// GetSeqNumber returns the sequences number of FASTA/Q files
func GetSeqNumber(file string) (int, error) {
n := 0
seq.ValidateSeq = false
reader, err := NewDefaultReader(file)
if err != nil {
return 0, nil
}
for {
_, err := reader.Read()
if err != nil {
if err == io.EOF {
break
}
return 0, err
}
n++
}
return n, nil
}
// GetSeqs return fastx records of a file.
// when alphabet is nil or seq.Unlimit, it will automaticlly detect the alphabet.
// when idRegexp is "", default idRegexp ( ^([^\s]+)\s? ) will be used.
func GetSeqs(file string, alphabet *seq.Alphabet, bufferSize int, chunkSize int, idRegexp string) ([]*Record, error) {
records := []*Record{}
reader, err := NewReader(alphabet, file, idRegexp)
if err != nil {
return records, err
}
for chunk := range reader.ChunkChan(bufferSize, chunkSize) {
if err != nil {
return records, err
}
records = append(records, chunk.Data...)
}
return records, nil
}
// GetSeqsMap returns all seqs as a map for fasta file
func GetSeqsMap(file string, alphabet *seq.Alphabet, bufferSize int, chunkSize int, idRegexp string) (map[string]*Record, error) {
m := make(map[string]*Record)
records, err := GetSeqs(file, alphabet, bufferSize, chunkSize, idRegexp)
if err != nil {
return m, err
}
for _, record := range records {
m[string(record.Name)] = record
}
return m, nil
}
// GuessAlphabet guess the alphabet of the file by the first maxLen bases
func GuessAlphabet(file string) (*seq.Alphabet, bool, error) {
reader, err := NewDefaultReader(file)
if err != nil {
return nil, false, err
}
_, err = reader.Read()
if err != nil {
if err == io.EOF {
return reader.Alphabet(), false, io.EOF
}
return nil, false, err
}
return reader.Alphabet(), reader.IsFastq, nil
}
|