package Data::CompactReadonly::V0::Node;
our $VERSION = '0.1.0';

use warnings;
use strict;

use Fcntl qw(:seek);
use Scalar::Type qw(is_* bool_supported);

use Devel::StackTrace;
use Data::CompactReadonly::V0::Text;
use Data::Dumper;

# return the root node. assumes the $fh is pointing at the start of the node header
sub _init {
    my($class, %args) = @_;
    my $self = bless(\%args, $class);
    $self->{root} = $self;
    return $self->_node_at_current_offset();
}

# write the root node to the file and, recursively, its children
sub _create {
    my($class, %args) = @_;
    die("fell through to Data::CompactReadonly::V0::Node::_create when creating a $class\n")
        if($class ne __PACKAGE__);

    $class->_type_class(
        from_data => $args{data}
    )->_create(%args);
}

# stash (in memory) of everything that we've seen while writing the database,
# with a pointer to their location in the file so that it can be re-used. We
# even stash stringified Dicts/Arrays, which can eat a TON of memory. Yes, we
# seem to need to local()ise the config vars in each sub.
sub _stash_already_seen {
    my($class, %args) = @_;
    local $Data::Dumper::Indent   = 0;
    local $Data::Dumper::Sortkeys = 1;
    if(bool_supported && is_bool($args{data})) {
        $args{globals}->{already_seen}->{
            $args{data} ? 'bt' : 'bf'
        } = tell($args{fh});
    } elsif(defined($args{data})) {
        $args{globals}->{already_seen}->{d}->{
            ref($args{data}) ? Dumper($args{data}) : $args{data}
        } = tell($args{fh});
    } else {
        $args{globals}->{already_seen}->{u} = tell($args{fh});
    }
}

# look in the stash for data that we've seen before and get a pointer to it
sub _get_already_seen {
    my($class, %args) = @_;
    local $Data::Dumper::Indent   = 0;
    local $Data::Dumper::Sortkeys = 1;

    if(bool_supported && is_bool($args{data})) {
        return
            $args{data} ? $args{globals}->{already_seen}->{bt}
                        : $args{globals}->{already_seen}->{bf}
    } elsif(defined($args{data})) {
        return
            $args{globals}->{already_seen}->{d}->{
                ref($args{data}) ? Dumper($args{data}) : $args{data}
            }
    } else {
        return $args{globals}->{already_seen}->{u};
    }
}

sub _get_next_free_ptr {
    my($class, %args) = @_;
    return $args{globals}->{next_free_ptr};
}

sub _set_next_free_ptr {
    my($class, %args) = @_;
    $args{globals}->{next_free_ptr} = tell($args{fh});
}

# in case the database isn't at the beginning of a file, eg in __DATA__
sub _db_base {
    my $self = shift;
    return $self->_root()->{db_base};
}

sub _fast_collections {
    my $self = shift;
    return $self->_root()->{'fast_collections'};
}

sub _tied {
    my $self = shift;
    return $self->_root()->{'tie'};
}

# figure out what type the node is from the node specifier byte, then call
# the class's _init to get it to read itself from the db
sub _node_at_current_offset {
    my $self = shift;

    # for performance, cache the filehandle in this object
    $self->{_fh} ||= $self->_fh();
    my $type_class = $self->_type_class(from_byte => $self->_bytes_at_current_offset(1));
    return $type_class->_init(root => $self->_root(), offset => tell($self->{_fh}) - $self->_db_base());
}

# what's the minimum number of bytes required to store this int?
sub _bytes_required_for_int {
    no warnings 'portable'; # perl worries about 32 bit machines. I don't.
    my($class, $int) = @_;
    return
        $int <= 0xff               ? 1 : # Byte
        $int <= 0xffff             ? 2 : # Short
        $int <= 0xffffff           ? 3 : # Medium
        $int <= 0xffffffff         ? 4 : # Long
        $int <= 0xffffffffffffffff ? 8 : # Huge
                                     9;  # 9 or greater signals too big for 64 bits
}

# given the number of elements in a Collection, figure out what the appropriate
# class is to represent it. NB that only Byte/Short/Medium/Long are allowed, we
# don't allow Huge numbers of elements in a Collection
sub _sub_type_for_collection_of_length {
    my($class, $length) = @_;
    my $bytes = $class->_bytes_required_for_int($length);
    return $bytes == 1 ? 'Byte' :
           $bytes == 2 ? 'Short' :
           $bytes == 3 ? 'Medium' :
           $bytes == 4 ? 'Long' :
                         undef;
}

# given a blob of text, figure out its type
sub _text_type_for_data {
    my($class, $data) = @_;
    return 'Text::'.do {
        $class->_sub_type_for_collection_of_length(
            length(Data::CompactReadonly::V0::Text->_text_to_bytes($data))
        ) || die("$class: Invalid: Text too long");
    };
}

# work out what node type is required to represent a piece of data
sub _type_map_from_data {
    my($class, $data) = @_;
    return !defined($data)
             ? 'Scalar::Null' :
           (bool_supported && is_bool($data))
             ? 'Scalar::'.($data ? 'True' : 'False') :
           ref($data) eq 'ARRAY'
             ? 'Array::'.do { $class->_sub_type_for_collection_of_length(1 + $#{$data}) ||
                              die("$class: Invalid: Array too long");
                         } :
           ref($data) eq 'HASH'
             ? 'Dictionary::'.do { $class->_sub_type_for_collection_of_length(scalar(keys %{$data})) ||
                                   die("$class: Invalid: Dictionary too long");
                         } :
           is_integer($data)
             ? do {
                 my $neg = $data < 0 ? 'Negative' : '';
                 my $bytes = $class->_bytes_required_for_int(abs($data));
                 $bytes == 1 ? "Scalar::${neg}Byte"   :
                 $bytes == 2 ? "Scalar::${neg}Short"  :
                 $bytes == 3 ? "Scalar::${neg}Medium" :
                 $bytes == 4 ? "Scalar::${neg}Long"   :
                 $bytes <  9 ? "Scalar::${neg}Huge"
                             : "Scalar::Float64"
               } :
           is_number($data)
             ? 'Scalar::Float64' :
           !ref($data)
             ? $class->_text_type_for_data($data)
             : die("Can't yet create from '$data'\n");
}

my $type_by_bits = {
    0b00 => 'Text',
    0b01 => 'Array',
    0b10 => 'Dictionary',
    0b11 => 'Scalar'
};
my $subtype_by_bits = { 
    0b0000 => 'Byte',      0b0001 => 'NegativeByte',
    0b0010 => 'Short',     0b0011 => 'NegativeShort',
    0b0100 => 'Medium',    0b0101 => 'NegativeMedium',
    0b0110 => 'Long',      0b0111 => 'NegativeLong',
    0b1000 => 'Huge',      0b1001 => 'NegativeHuge',
    0b1010 => 'Null',
    0b1011 => 'Float64',
    0b1100 => 'True',
    0b1101 => 'False',
    (map { $_ => 'Reserved' } (0b1110 .. 0b1111))
};
my $bits_by_type    = { reverse %{$type_by_bits} };
my $bits_by_subtype = { reverse %{$subtype_by_bits} };

# used by classes when serialising themselves to figure out what their
# type specifier byte should be
sub _type_byte_from_class {
    my $class = shift;
    $class =~ /.*::([^:]+)::([^:]+)/;
    my($type, $subtype) = ($1, $2);
    return chr(
        ($bits_by_type->{$type}       << 6) +
        ($bits_by_subtype->{$subtype} << 2)
    );
}

# work out what node type is represented by a given node specifier byte
sub _type_map_from_byte {
    my $class   = shift;
    my $in_type = ord(shift());

    my $type        = $type_by_bits->{$in_type >> 6};
    my $scalar_type = $subtype_by_bits->{($in_type & 0b111100) >> 2};

    die(sprintf("$class: Invalid type: 0b%08b: Reserved\n", $in_type))
        if($scalar_type eq 'Reserved');
    die(sprintf("$class: Invalid type: 0b%08b: length $scalar_type\n", $in_type))
        if($type ne 'Scalar' && $scalar_type =~ /^(Null|Float64|Negative|Huge|True|False)/);
    return join('::', $type, $scalar_type);
}

# get a class name (having loaded the relevant class) either from_data
# (when writing a file) or from_byte (when reading a file)
sub _type_class {
    my($class, $from, $in_type) = @_;
    my $map_method = "_type_map_$from";
    my $type_name = "Data::CompactReadonly::V0::".$class->$map_method($in_type);
    unless($type_name->VERSION()) {
        eval "use $type_name";
        die($@) if($@);
    }
    return $type_name;
}

# read N bytes from the current offset
sub _bytes_at_current_offset {
    my($self, $bytes) = @_;
    # for performance, cache the filehandle in this object
    $self->{_fh} ||= $self->_fh();
    my $tell = tell($self->{_fh});
    my $chars_read = read($self->{_fh}, my $data, $bytes);

    if(!defined($chars_read)) {
        die(
            "$self: read() failed to read $bytes bytes at offset $tell: $!\n".
            Devel::StackTrace->new()->as_string()
        );
    } elsif($chars_read != $bytes) {
        die(
            "$self: read() tried to read $bytes bytes at offset $tell, got $chars_read: $!\n".
            Devel::StackTrace->new()->as_string()
        );
    }
    return $data;
}

# this is a monstrous evil - TODO instantiate classes when writing!
# seek to a particular point in the *database* (not in the file). If the
# pointer has gone too far for the current pointer size, die. This will be
# caught in Data::CompactReadonly::V0->create(), the pointer size incremented, and it will
# try again from the start
sub _seek {
    my $self = shift;
    if($#_ == 0) { # for when reading
        my $to = shift;
        # for performance, cache the filehandle in this object
        $self->{_fh} ||= $self->_fh();
        seek($self->{_fh}, $self->_db_base() + $to, SEEK_SET);
    } else { # for when writing
        my %args = @_;
        die($self->_ptr_blown())
            if($args{pointer} >= 256 ** $args{ptr_size});
        seek($args{fh}, $args{pointer}, SEEK_SET);
    }
}

sub _ptr_blown { "pointer out of range" }

# the offset of the current node
sub _offset {
    my $self = shift;
    return $self->{offset};
}

sub _root {
    my $self = shift;
    return $self->{root};
}

# the filehandle, currently only used when reading, see the TODO above
# for _seek
sub _fh {
    my $self = shift;
    return $self->_root()->{fh};
}

sub _ptr_size {
    my $self = shift;
    return $self->_root()->{ptr_size};
}

1;
