File: AssemblyStatistics.t

package info (click to toggle)
roary 3.13.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye
  • size: 3,944 kB
  • sloc: perl: 10,536; sh: 211; makefile: 9
file content (121 lines) | stat: -rw-r--r-- 4,562 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;
use Test::Files;

BEGIN { unshift( @INC, './lib' ) }

BEGIN {
    use Test::Most;
    use_ok('Bio::Roary::AssemblyStatistics');
}

my $obj;
ok( $obj = Bio::Roary::AssemblyStatistics->new( spreadsheet => 't/data/input_block_spreadsheet.csv' ), 'initialise spreadsheet' );

my @genes = sort keys %{ $obj->_genes_to_rows };
is_deeply(
    \@genes,
    [
        'SBOV29371', 'SBOV38871', 'SBOV43201',  'STY3593',    'STY4162',    'bcsC_1',     'betC_2',     'comM_2',
        'dmsA4_1',   'dosC',      'dsbA_3',     'fadH_1',     'fimD_3',     'fliB_2',     'fliF',       'ftsN',
        'gatY_1',    'glfT2',     'group_1000', 'group_1001', 'group_1004', 'group_1006', 'group_1009', 'group_220',
        'group_277', 'group_281', 'group_283',  'group_284',  'group_288',  'hemD',       'hsrA_2',     'icsA',
        'kdpD',      'ligB_1',    'marT_1',     'nepI',       'rffH',       'rpoS',       'selA_1',     'speC_3',
        'sptP',      'srgB',      'stp',        'tmcA',       'tub',        'yadA',       'ybbW_1',     'yhaO_2',
        'yicJ_1',    'yigZ'
    ],
    'all gene rows available'
);

is_deeply(
    $obj->ordered_genes,
    [
        'dmsA4_1',    'group_1000', 'group_1001', 'SBOV43201', 'dosC',      'stp',    'fliB_2', 'fliF',
        'dsbA_3',     'srgB',       'fimD_3',     'betC_2',    'tmcA',      'tub',    'rffH',   'hemD',
        'group_1006', 'STY3593',    'group_1004', 'yigZ',      'group_220', 'glfT2',  'kdpD',   'speC_3',
        'ybbW_1',     'sptP',       'SBOV29371',  'rpoS',      'fadH_1',    'yhaO_2', 'bcsC_1', 'STY4162',
        'yadA',       'ligB_1',     'icsA',       'marT_1',    'selA_1',    'nepI',   'gatY_1', 'SBOV38871',
        'group_288',  'hsrA_2',     'group_281',  'group_283', 'group_284', 'yicJ_1', 'ftsN',   'group_277',
        'group_1009', 'comM_2'
    ],
    'ordered genes'
);

is_deeply(
    $obj->sample_names_to_column_index,
    {
        'threeblocks'          => 18,
        'nocontigs'            => 17,
        'contigwithgaps'       => 16,
        'oneblock'             => 14,
        'threeblocksinversion' => 19,
        'oneblockrev'          => 15
    },
    'sample names to column index'
);

is_deeply( $obj->_sample_statistics('oneblock'),    { num_blocks => 1, largest_block_size => 50 }, 'one block' );
is_deeply( $obj->_sample_statistics('oneblockrev'), { num_blocks => 1, largest_block_size => 50 }, 'one block reversed' );
is_deeply(
    $obj->_sample_statistics('contigwithgaps'),
    { num_blocks => 1, largest_block_size => 50 },
    'one block where there are gaps everywhere'
);
is_deeply( $obj->_sample_statistics('nocontigs'),   { num_blocks => 50, largest_block_size => 1 },  'no contiguous blocks' );
is_deeply( $obj->_sample_statistics('threeblocks'), { num_blocks => 3,  largest_block_size => 21 }, 'three blocks' );
is_deeply(
    $obj->_sample_statistics('threeblocksinversion'),
    { num_blocks => 3, largest_block_size => 20 },
    'three blocks with an inversion in the middle'
);
is_deeply( $obj->gene_category_count, { core => 50 }, 'Gene category counts' );

# t/data/gene_category_count.csv
ok( $obj = Bio::Roary::AssemblyStatistics->new( spreadsheet => 't/data/gene_category_count.csv' ),
    'initialise spreadsheet with variable numbers of genes in samples' );
is_deeply(
    $obj->gene_category_count,
    {
        'core'      => 1,
        'cloud'     => 4,
        'soft_core' => 1,
        'shell'     => 24
    },
    'Categories as expected'
);
ok($obj->create_summary_output, 'create output file');
compare_ok('summary_statistics.txt', 't/data/expected_summary_statistics.txt', 'summary statistics as expected');


# t/data/gene_category_count.csv
ok( $obj = Bio::Roary::AssemblyStatistics->new( spreadsheet => 't/data/gene_category_count.csv', core_definition => 0.9667 ),
    'initialise spreadsheet with core of 96.67%' );
is_deeply(
    $obj->gene_category_count,
    {
        'core'      => 1,
		'soft_core' => 1,
        'cloud'     => 4,
        'shell'     => 24
    },
    'Categories as expected with cd of 96.67%'
);

# t/data/gene_category_count.csv
ok( $obj = Bio::Roary::AssemblyStatistics->new( spreadsheet => 't/data/gene_category_count.csv', core_definition => 0.9666 ),
    'initialise spreadsheet with core of 96.66%' );
is_deeply(
    $obj->gene_category_count,
    {
        'core'      => 2,
        'cloud'     => 4,
        'shell'     => 24
    },
    'Categories as expected with cd of 96.66%'
);


unlink('summary_statistics.txt');
done_testing();