File: genomefile.go

package info (click to toggle)
ggd-utils 0.0.7%2Bds-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 188 kB
  • sloc: sh: 63; makefile: 14
file content (114 lines) | stat: -rw-r--r-- 2,345 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
// read genome files
package ggd_utils

import (
	"bytes"
	"fmt"
	"io"
	"log"
	"regexp"
	"strconv"
	"strings"

	"github.com/shenwei356/xopen"
)

// GenomeFile has the Lengths and Orders of the chromosomes.
type GenomeFile struct {
	Lengths map[string]int
	Order   map[string]int
	path    string
	ReMap   map[string]string
}

// Less checks if one chromosome occurs before the other.
func (g *GenomeFile) Less(a, b string) bool {
	return g.Order[a] <= g.Order[b]
}

func readChromosomMappings(fname string) map[string]string {
	if fname == "" {
		return nil
	}
	rdr, err := xopen.Ropen(fname)
	result := make(map[string]string)
	if err != nil {
		log.Fatalf("[gsort] unable to open chromosome maping file: %s", fname)
	}

	warned := false
	for {
		line, err := rdr.ReadString('\n')
		if len(line) > 0 {
			toks := strings.Split(strings.TrimSpace(line), "\t")
			if len(toks) == 1 {
				toks = append(toks, "[unknown]"+toks[0])
				if !warned {
					log.Printf("[gsort] warning unmappable chromosome: %s.", toks[0])
					warned = true
				}
			}
			result[toks[0]] = toks[1]
		}
		if err == io.EOF {
			break
		}
		if err != nil {
			log.Printf("[gsort] error reading chromosome maping file: %s. %s", fname, err)
		}
	}
	return result
}

// ReadGenomeFile returns a GenomeFile struct
func ReadGenomeFile(path string, chromsomeMappings string) (*GenomeFile, error) {

	rdr, err := xopen.Ropen(path)
	if err != nil {
		return nil, err
	}
	gf := &GenomeFile{path: path, Lengths: make(map[string]int, 50), Order: make(map[string]int, 50)}
	defer rdr.Close()
	gf.ReMap = readChromosomMappings(chromsomeMappings)

	space := regexp.MustCompile("\\s+")
	// allow us to bypass a header. found indicates when we have a usable line.
	found := false

	for {
		line, err := rdr.ReadBytes('\n')
		line = bytes.TrimSpace(line)
		if len(line) > 0 {
			if line[0] == '#' {
				continue
			}
		}
		if err == io.EOF {
			break
		}
		if err != nil {
			return nil, err
		}
		if len(line) == 0 {
			continue
		}

		toks := space.Split(string(line), -1)
		chrom := toks[0]
		length, err := strconv.Atoi(toks[1])
		if err != nil {
			if !found {
				continue
			}
			return nil, err
		}
		found = true
		gf.Lengths[chrom] = length
		gf.Order[chrom] = len(gf.Order)

	}
	if !found {
		return nil, fmt.Errorf("no usable lengths found for %s\n", path)
	}
	return gf, nil
}