File: check_parse_phylip.c

package info (click to toggle)
gubbins 3.4.3-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 11,008 kB
  • sloc: python: 5,347; ansic: 5,080; sh: 242; makefile: 130; cpp: 27
file content (94 lines) | stat: -rw-r--r-- 3,419 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <check.h>
#include "check_parse_phylip.h"
#include "helper_methods.h"
#include "parse_phylip.h"


START_TEST (phylip_read_in_small_file)
{
  load_sequences_from_multifasta_file("../tests/data/small_phylip_file.aln");

  ck_assert( number_of_samples_from_parse_phylip() == 3);
  ck_assert( find_sequence_index_from_sample_name("2956_6_1") == 0);
  ck_assert( find_sequence_index_from_sample_name("2956_6_2") == 1);
  ck_assert( find_sequence_index_from_sample_name("2956_6_3") == 2);
  
  char *sample_names[3];
  get_sample_names_from_parse_phylip(sample_names);
  ck_assert( strcmp(sample_names[0],"2956_6_1") == 0 );
  ck_assert( strcmp(sample_names[1],"2956_6_2") == 0 );
  ck_assert( strcmp(sample_names[2],"2956_6_3") == 0 );

  char *reference_bases = "*ACG*";
  char *filtered_bases_for_snps[3];

  filter_sequence_bases_and_rotate(reference_bases, filtered_bases_for_snps, 3);
  ck_assert( strcmp(filtered_bases_for_snps[0], "AAT") == 0 );
  ck_assert( strcmp(filtered_bases_for_snps[1], "CGT") == 0 );
  ck_assert( strcmp(filtered_bases_for_snps[2], "GGT") == 0 );
  

  
  ck_assert( does_column_contain_snps(0, 'A') == 0);
  ck_assert( does_column_contain_snps(1, 'A') == 1);
  ck_assert( does_column_contain_snps(2, 'A') == 1);
  // bad reference base
  ck_assert( does_column_contain_snps(0, 'X') == 1);
  
  char sequence_bases[10];
  get_sequence_for_sample_name(sequence_bases, "2956_6_2");
  ck_assert( strcmp(sequence_bases, "AAGGC") == 0);
  
  update_sequence_base('X', 1, 4);
  get_sequence_for_sample_name(sequence_bases, "2956_6_2");
  ck_assert( strcmp(sequence_bases, "AAGGX") == 0);
  
}
END_TEST

START_TEST (phylip_read_in_file_with_gaps)
{
	load_sequences_from_multifasta_file("../tests/data/alignment_with_gaps.aln");
	ck_assert( does_column_contain_snps(0, 'A') == 0);
  ck_assert( does_column_contain_snps(1, '-') == 0);
  ck_assert( does_column_contain_snps(2, '-') == 0);
  ck_assert( does_column_contain_snps(3, 'T') == 0);
  ck_assert( does_column_contain_snps(4, 'G') == 1);
  ck_assert( does_column_contain_snps(4, '-') == 1);
  ck_assert( does_column_contain_snps(5, 'N') == 0);

  // Check you can fill in parent bases with gaps if all children have the same gap at the same place
	int child_indices[2] = {1,2};
	fill_in_unambiguous_gaps_in_parent_from_children(0, child_indices, 2);
	char sequence_bases[10];
  get_sequence_for_sample_name(sequence_bases, "2956_6_1");
  ck_assert( strcmp(sequence_bases, "A-N-CT") == 0);
}
END_TEST

START_TEST (phylip_fill_in_unambiguous_bases_in_parent_from_children_where_parent_has_a_gap)
{
	load_sequences_from_multifasta_file("../tests/data/alignment_with_gap_in_parent.aln");
	int child_indices[2] = {0,1};
	fill_in_unambiguous_bases_in_parent_from_children_where_parent_has_a_gap(2, child_indices, 2);
	char sequence_bases[10];
  get_sequence_for_sample_name(sequence_bases, "parent");
  ck_assert( strcmp(sequence_bases, "AC--") == 0);
}
END_TEST



Suite * parse_phylip_suite(void)
{
  Suite *s = suite_create ("Parsing a phylip file");
  TCase *tc_phylip = tcase_create ("phylip_files");
  tcase_add_test (tc_phylip, phylip_read_in_small_file);
  tcase_add_test (tc_phylip, phylip_read_in_file_with_gaps);
  tcase_add_test (tc_phylip, phylip_fill_in_unambiguous_bases_in_parent_from_children_where_parent_has_a_gap);
  suite_add_tcase (s, tc_phylip);
  return s;
}