File: sequtils.c

package info (click to toggle)
staden 2.0.0%2Bb11-5
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 21,568 kB
  • sloc: ansic: 240,605; tcl: 65,360; cpp: 12,854; makefile: 11,201; sh: 2,952; fortran: 2,033; perl: 63; awk: 46
file content (138 lines) | stat: -rw-r--r-- 3,044 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#include "base_comp.h"
#include "text_output.h"
#include "edge.h"
#include "dna_utils.h"
#include "seq_results.h"

void sequence_info(char *seq_name,
		   char *sequence,
		   int start,
		   int end,
		   int seq_structure,
		   int seq_type)
{
    double base_comp[5];
    double aa_comp[25];
    char aa[] = {"ABCDEFGHIKLMNPQRSTVWYZX*-"};
    int i;
    double aa_mass[25];
    int seq_length;

    seq_length = end - start + 1;
    vmessage("Sequence %s: %d to %d\n", seq_name, start, end);
    
    if (seq_type == 1) {
	if (seq_structure == 0) {
	    vmessage("linear ");
	} else {
	    vmessage("circular ");
	}

	vmessage("DNA\n");
	set_char_set(DNA);
	get_base_comp(&sequence[start-1], seq_length, base_comp);
	
	vmessage("Sequence composition\n");
	vmessage("\tA %d (%.2f%%) C %d (%.2f%%) G %d (%.2f%%) T %d (%.2f%%) - %d (%.2f%%)\n", 
		 (int)base_comp[0], base_comp[0]/seq_length*100.0, 
		 (int)base_comp[1], base_comp[1]/seq_length*100.0, 
		 (int)base_comp[2], base_comp[2]/seq_length*100.0,
		 (int)base_comp[3], base_comp[3]/seq_length*100.0,
		 (int)base_comp[4], base_comp[4]/seq_length*100.0); 
	vmessage("Mass %f\n", get_base_comp_mass((int)base_comp[0], 
						 (int)base_comp[1],
						 (int)base_comp[2], 
						 (int)base_comp[3]));
    } else {
	vmessage("Protein\n");
	set_char_set(PROTEIN);
	get_aa_comp(&sequence[start-1], seq_length, aa_comp);
	get_aa_comp_mass(aa_comp, aa_mass);

	/* amino acid name */
	vmessage("AA ");
	for (i = 0; i < 13; i++) {
	    vmessage(" %-5c", aa[i]);
	}
	vmessage("\n");

	/* number of each aa */
	vmessage("N  ");
	for (i = 0; i < 13; i++) {
	    vmessage("%-6g", aa_comp[i]);
	}
	vmessage("\n");

	/* % of each aa */
	vmessage("%%  ");
	for (i = 0; i < 13; i++) {
	    vmessage("%-6.1f", aa_comp[i]/seq_length*100.0);
	}
	vmessage("\n");

	/* mass of each aa */
	vmessage("M  ");
	for (i = 0; i < 13; i++) {
	    vmessage("%-6.0f", aa_mass[i]);
	}
	vmessage("\n\n");

	/* amino acid name */
	vmessage("AA ");
	for (i = 13; i < 25; i++) {
	    vmessage(" %-5c", aa[i]);
	}
	vmessage("\n");

	/* number of each aa */
	vmessage("N  ");
	for (i = 13; i < 25; i++) {
	    vmessage("%-6g", aa_comp[i]);
	}
	vmessage("\n");

	/* % of each aa */
	vmessage("%%  ");
	for (i = 13; i < 25; i++) {
	    vmessage("%-6.1f", aa_comp[i]/seq_length*100.0);
	}
	vmessage("\n");

	/* mass of each aa */
	vmessage("M  ");
	for (i = 13; i < 25; i++) {
	    vmessage("%-6.0f", aa_mass[i]);
	}
	vmessage("\n");
    }
}

double get_seq_mass (int seq_num)
{
    double mass = 0.0;

    if (GetSeqType(seq_num) == 1) {
	double base_comp[5];

	get_base_comp(GetSeqSequence(seq_num), GetSeqLength(seq_num), 
		      base_comp);
	
	mass = get_base_comp_mass((int)base_comp[0], 
				  (int)base_comp[1],
				  (int)base_comp[2], 
				  (int)base_comp[3]);
    } else {
	double aa_mass[25];
	double aa_comp[25];
	int i;

	get_aa_comp(GetSeqSequence(seq_num), GetSeqLength(seq_num), aa_comp);
	get_aa_comp_mass(aa_comp, aa_mass);
	for (i = 0; i < 25; i++) {
	    mass += aa_mass[i];
	}
    }
    return mass;
}