# $Id: obo_text.pm,v 1.16 2008/01/22 23:54:45 cmungall Exp $
#
# This GO module is maintained by Chris Mungall <cjm@fruitfly.org>
#
# see also - http://www.geneontology.org
#          - http://www.godatabase.org/dev
#
# You may distribute this module under the same terms as perl itself

=head1 NAME

  GO::Handlers::obo_text - OBO text handler 

=head1 SYNOPSIS

  use GO::Handlers::obo_text

=cut

=head1 DESCRIPTION

transforms OBO XML events into OBO Text

L<http://www.geneontology.org/GO.format.html#oboflat>

=head1 PUBLIC METHODS - 

=cut

# makes objects from parser events

package GO::Handlers::obo_text;
use Data::Stag qw(:all);
use GO::Parsers::ParserEventNames;
use base qw(GO::Handlers::base);
use strict qw(vars refs);

sub s_obo {
    my $self = shift;
    #$self->SUPER::s_obo(@_);
    return;
}

sub e_header {
    my $self = shift;
    my $hdr = shift;

    my $fmt = stag_get($hdr,'format-version');
    $self->tag("format-version"=> 
               (stag_sget($hdr,'format-version') || '1.2'));
    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = 
      localtime(time);
    $self->tag('date'=>sprintf("%02d:%02d:%04d %02d:%02d",
                               $mday,$mon+1,$year+1900,$hour,$min));
    $self->tag('autogenerated-by'=>$0);
    foreach (stag_tnodes($hdr)) {
        $self->tag(stag_name($_), _obo_escape($_->data));
    }
    my @sts = stag_get($hdr,'synonymtypedef');
    foreach (@sts) {
        my $scope = stag_sget($_,'scope');
        $self->tag(synonymtypedef => sprintf("%s \"%s\" %s",
                                             stag_sget($_,ID),
                                             stag_sget($_,NAME) || '',
                                             ($scope ? uc($scope) : '')));
    }

    my @ssdefs = stag_get($hdr,'subsetdef');
    foreach (@ssdefs) {
        $self->tag(subsetdef => sprintf("%s \"%s\"",
                                        stag_sget($_,ID),
                                        stag_sget($_,NAME)));
    }
    $self->{__emitted_header} = 1;

    $self->print("\n");
    return;
}

sub e_typedef {
    my $self = shift;
    my $t = shift;
    $self->stanza('Typedef', $t);
}

sub e_term {
    my $self = shift;
    my $t = shift;
    if (!$self->{__emitted_header}) {
        $self->e_header(stag_new(HEADER,[]));
    }
    $self->stanza('Term', $t);
}

sub e_annotation {
    my $self = shift;
    my $t = shift;
    if (!$self->{__emitted_header}) {
        $self->e_header(stag_new(HEADER,[]));
    }
    $self->stanza('Annotation', $t);
}

sub e_instance {
    my $self = shift;
    my $t = shift;
    if (!$self->{__emitted_header}) {
        $self->e_header(stag_new(HEADER,[]));
    }
    $self->stanza('Instance', $t);
}

sub stanza {
    my $self = shift;
    my $stanza = shift;
    my $t = shift;
    $self->print("[$stanza]\n");
    my @BOOLEAN_TAGS =
      (
       IS_ANONYMOUS,
       IS_OBSOLETE,
       IS_CYCLIC,
       IS_TRANSITIVE,
       IS_SYMMETRIC,
       IS_ANTI_SYMMETRIC,
       IS_REFLEXIVE,
       IS_METADATA_TAG,
      );
    my @TAGS =
      (ID,
       NAME,
       ALT_ID,
       NAMESPACE,
       DEF,
       COMMENT,
       SUBSET,
       IS_A ,
       RELATIONSHIP,
       UNION_OF,
       INTERSECTION_OF,
       SYNONYM,
       PROPERTY_VALUE,
       XREF_ANALOG,
       XREF_UNKNOWN,
       'object',
       @BOOLEAN_TAGS,
      );
    my %IS_BOOLEAN = map { ($_=>1) } @BOOLEAN_TAGS;
    my @IGNORE = qw(is_root);
    foreach my $tag (@IGNORE) {
        stag_unset($t, $tag);
    }
    foreach my $tag (@TAGS) {
        my @vals = stag_get($t, $tag);
        next unless @vals;

        if ($tag eq DEF) {
            my $def = shift @vals;
            my $defstr = $def->get_defstr;
            my $qn = stag_sget($t, "$tag/@");
            $self->tag(def => _obo_escape($defstr), [$def->get_dbxref], $qn);
        }
        elsif ($tag eq RELATIONSHIP) {
            $self->tag(relationship => sprintf("%s %s",
                                               $_->sget_type,
                                               $_->sget_to),
                       undef,
                       $_->sget('@'))
              foreach @vals;
        }
        elsif ($tag eq INTERSECTION_OF) {
            $self->tag(intersection_of => sprintf("%s %s",
                                                  $_->sget_type,
                                                  $_->sget_to),
                       undef,
                       $_->sget('@'))
              foreach @vals;
        }
        elsif ($tag eq UNION_OF) {
            $self->tag(union_of => sprintf("%s %s",
                                           $_->sget_type,
                                           $_->sget_to),
                       undef,
                       $_->sget('@'))
              foreach @vals;
        }
        elsif ($tag eq SYNONYM) {
            foreach my $syn (@vals) {
                my $type = $syn->sget('@/synonym_type');
                my $scope = $syn->sget('@/scope');
                my @vals = (quote($syn->sget_synonym_text));
                push(@vals,uc($scope)) if $scope;
                push(@vals,$type) if $type;
                $self->tag($tag,
                           join(' ',@vals),
                           [$syn->get_dbxref]);
            }
        }
        elsif ($tag eq XREF_ANALOG) {
            $self->tag('xref', dbxref($_),undef,$_->sget('@'))
              foreach @vals;
        }
        elsif ($tag eq PROPERTY_VALUE) {
            foreach (@vals) {
                my $dt = $_->sget_datatype;
                if ($dt) {
                    $self->tag('property_value' => sprintf("%s %s %s",
                                                           $_->sget_type,
                                                           quote($_->sget_value),
                                                           $dt));
                }
                else {
                    $self->tag('property_value' => sprintf("%s %s",
                                                           $_->sget_type,
                                                           $_->sget_to));
                }
            }
        }
        elsif ($tag eq 'object') {
            # experimental: obof1.3
            $self->tag('object' => $self->obo_id(@vals));
        }
        elsif ($IS_BOOLEAN{$tag}) {
            $self->tag($tag, $vals[0] ? "true" : "false");
        }
        else {
            foreach (@vals) {
                if (ref($_)) {
                    $self->tag($tag, $_->sget('.'),undef,$_->sget('@'))
                }
                else {
                    $self->tag($tag, _obo_escape($_));
                }
            }
        }
        stag_unset($t, $tag);
    }
    my @tnodes = stag_tnodes($t);
    $self->tag($_->name, _obo_escape($_->data))
      foreach @tnodes;

    my @ntnodes = stag_ntnodes($t);
    if (@ntnodes) {
        print STDERR $_->xml foreach @ntnodes;
        $self->throw( "unknown elements");
    }

    $self->print("\n");

}

sub obo_id {
    my $self = shift;
    my $v = shift;
    if (ref($v)) {
        my $isect = $v->sget_intersection;
        if ($isect) {
            my @links = $isect->get_link;
            my @genus = grep {!$_->get_type} @links;
            my @diffs = grep {$_->get_type} @links;
            my $s =
              join('^',
                   (map {$self->obo_id($_->sget_to)} @genus),
                   (map {
                       sprintf("%s(%s)",$_->sget_type,$self->obo_id($_->sget_to))
                   } @diffs));
            return $s;
        }
        else {
        }
    }
    else {
        return $v;
    }
}

sub tag {
    my $self = shift;
    my ($t, $v, $xrefsr, $qualsr) = @_;
    my @xrefs = @{$xrefsr || []};
    return unless defined $v;
    if ($t eq DEF) {
        $v=quote($v);
    }
    my $xrefl = '';
    if ($xrefsr) {
	$xrefl =
	  ' ['.join(', ',
		   map {
		       dbxref($_);
		   } @xrefs).']';
    }
    my $ql = '';
    if ($qualsr) {
        my %qh = stag_pairs($qualsr);
        $ql = ' {'.join(
                        ', ',
                        map {
                            "$_=".quote($qh{$_})
                        } keys %qh
                       ).'}';
    }
    $self->printf("%s: %s$xrefl$ql\n", $t, $v);
    return;
}

sub _obo_escape {
    my $s=shift;
    $s =~ s/\\/\\\\/;
    $s =~ s/([\{\}])/\\$1/g;
    $s;
}

sub dbxref {
    my $x = shift;
    if (ref($x)) {
        my $xref = $x->sget_dbname . ':' . $x->sget_acc;
        my $name = $x->sget_name;
        if (defined($name)) {
            $name =~ s/\"/\\\"/g;
            $xref." \"$name\"";
        }
        else {
            $xref;
        }
    }
    else {
        $x;
    }
}

sub safe {
    my $word = shift;
    $word =~ s/ /_/g;
    $word =~ s/\-/_/g;
    $word =~ s/\'/prime/g;
    $word =~ tr/a-zA-Z0-9_//cd;
    $word =~ s/^([0-9])/_$1/;
    $word;
}

sub quote {
    my $word = shift;
    #$word =~ s/,/\\,/g;  ## no longer required
    $word =~ s/\"/\\\"/g;
    "\"$word\"";
}

# -- EXPERIMENTAL CODE --
# obo format for gene_assocs

# we are hardcoding aspects here; this is OK, only for
# gene_assoc file which is GO specific
our %ASPECT_IDX =
  (F => 'has_activity',
   P => 'involved_in',
   C => 'localised_to'
  );

sub e_prod {
    my $self = shift;
    my $prod = shift;

    my $proddb = $self->up_to('dbset')->get_proddb;
    
    my $acc = $prod->get_prodacc;
    my $id = "$proddb:$acc";
    my $type = $prod->get_prodtype || 'gene_product';
    $self->print("!! ***************************** \n");
    $self->print("!! Gene Product: $id \n");
    $self->print("!! ***************************** \n");
    $self->print("[$type]\n");
    $self->tag(id=>$id);
    $self->tag(dbname=>$proddb);
    $self->tag(acc=>$acc);
    $self->tag(symbol=>$prod->sget_prodsymbol);
    $self->tag(name=>$prod->sget_prodname);
    $self->tag(synonym=>$_) foreach $prod->sget_prodsyn;
    $self->tag(has_taxon=>'NCBI:'.$prod->sget_prodtaxa);
    $self->print("\n");

    my @assocs = $prod->get_assoc;
    foreach my $assoc (@assocs) {
        my $termacc = $assoc->get_termacc;
        my $aspect = $assoc->get_aspect;
        my $ns = $ASPECT_IDX{$aspect};
        $self->print("[gene_product_annotation]\n");
        $self->tag(involves_gene_product=>$id);
        $self->tag($ns=>$termacc);
        $self->tag($_=>'true') foreach $assoc->get_qualifier;
        $self->tag(date=>$assoc->sget_assocdate);
        $self->tag(source_db=>$assoc->sget_source_db);
        my @evs = $assoc->get_evidence;
        foreach my $ev (@evs) {
            $self->tag(has_evidence=>$ev->sget_evcode, $ev->get_ref);
            $self->tag(with=>$_) foreach $ev->get_with;
        }
        $self->print("\n");
    }
    $self->print("!! //\n\n");
    
}

sub dbxrefstr {

}

1;
