File: CrossRef.pm

package info (click to toggle)
latexml 0.8.7-2
links: PTS, VCS
area: main
in suites: bookworm
size: 29,128 kB
sloc: xml: 98,982; perl: 29,706; sh: 179; javascript: 28; makefile: 15
file content (924 lines) | stat: -rw-r--r-- 41,675 bytes
# /=====================================================================\ #
# |  LaTeXML::Post::CrossRef                                            | #
# | Scan for ID's etc                                                   | #
# |=====================================================================| #
# | Part of LaTeXML:                                                    | #
# |  Public domain software, produced as part of work done by the       | #
# |  United States Government & not subject to copyright in the US.     | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <bruce.miller@nist.gov>                        #_#     | #
# | http://dlmf.nist.gov/LaTeXML/                              (o o)    | #
# \=========================================================ooo==U==ooo=/ #

package LaTeXML::Post::CrossRef;
use strict;
use warnings;
use LaTeXML::Util::Pathname;
use LaTeXML::Common::XML;
use LaTeXML::Common::Error;
use LaTeXML::Post::UnicodeMath;
use charnames qw(:full);
use LaTeXML::Post;
use base qw(LaTeXML::Post::Processor);

my $NBSP = pack('U', 0xA0);    # CONSTANT

sub new {
  my ($class, %options) = @_;
  my $self = $class->SUPER::new(%options);
  $$self{db}       = $options{db};
  $$self{urlstyle} = $options{urlstyle};
##  $$self{toc_show} = ($options{number_sections} ? "typerefnum title" : "title");
  # Default format for ltx:ref's within TOC's
  $$self{toc_show} = 'toctitle';
  # Default format for regular ltx:ref's
  # [BTW: Does number_sections really still make sense?]
  $$self{ref_show}       = ($options{number_sections}        ? "refnum"                 : "title");
  $$self{min_ref_length} = (defined $options{min_ref_length} ? $options{min_ref_length} : 1);
  $$self{ref_join} = (defined $options{ref_join} ? $options{ref_join} : " \x{2023} "); # or " in " or ... ?
  $$self{navigation_toc} = $options{navigation_toc};
  return $self; }

sub process {
  my ($self, $doc, $root) = @_;
  local %LaTeXML::Post::CrossRef::MISSING = ();

  my $navtoc   = $$self{navigation_toc};  # If a navigation toc requested, put a toc in nav to fill in
  my $doctitle = $self->generateDocumentTile($doc);
  my $nav      = $doc->findnode('//ltx:navigation');
  if (!$nav && ($navtoc || (defined $doctitle))) {    # Need navigation block
    $doc->addNodes($doc->getDocumentElement, ['ltx:navigation', {}]);
    $nav = $doc->findnode('//ltx:navigation'); }
  $doc->addNodes($nav, ['ltx:TOC', { format => $navtoc }]) if $navtoc;
  $doc->addNodes($nav, ['ltx:title', {}, $doctitle]) if defined $doctitle;

  $self->fill_in_relations($doc);
  $self->fill_in_tocs($doc);
  $self->fill_in_frags($doc);
  $self->fill_in_glossaryrefs($doc);
  $self->fill_in_refs($doc);
  $self->fill_in_RDFa_refs($doc);
  $self->fill_in_bibrefs($doc);
  $self->fill_in_mathlinks($doc);
  $self->copy_resources($doc);

  if (keys %LaTeXML::Post::CrossRef::MISSING) {
    my $tempid = 0;
    foreach my $severity (qw(error warn info)) {
      my @msgs = ();
      foreach my $type (sort keys %{ $LaTeXML::Post::CrossRef::MISSING{$severity} }) {
        my @items = keys %{ $LaTeXML::Post::CrossRef::MISSING{$severity}{$type} };
        $tempid ||= grep { $_ eq 'TEMPORARY_DOCUMENT_ID' } @items;
        my @args = ('expected', 'ids', undef,
          "Missing $type: " . join(',', @items),
          ($tempid ? "[Note TEMPORARY_DOCUMENT_ID is a stand-in ID for the main document.]" : ()));
        if    ($severity eq 'error') { Error(@args); }
        elsif ($severity eq 'warn')  { Warn(@args); }
        elsif ($severity eq 'info')  { Info(@args); } } } }
  return $doc; }

sub note_missing {
  my ($self, $severity, $type, $key) = @_;
  $LaTeXML::Post::CrossRef::MISSING{$severity}{$type}{$key}++;
  return; }

sub fill_in_relations {
  my ($self, $doc) = @_;
  my $db = $$self{db};
  if (my $id = $doc->getDocumentElement->getAttribute('xml:id')) {
    if (my $entry = $db->lookup("ID:" . $id)) {
      # First, add the basic relations
      my $x;
      # Apparently, "up", "up up", "up up up" is the desired form for html5
      my $xentry = $entry;
      my $rel    = 'up';
      while (($x = $xentry->getValue('parent')) && ($xentry = $db->lookup("ID:" . $x))) {
        if ($xentry->getValue('title')) {    # it's interesting if it has a title (INCONSISTENT!!!)
          ### NOT pageid, like the others, because of the sleasy link to \part in dlmf!!!
          $doc->addNavigation($rel => $xentry->getValue('id'));
          $rel .= ' up'; } }
      if ($xentry && ($id ne $xentry->getValue('pageid'))) {
        $doc->addNavigation(start => $xentry->getValue('pageid')); }
      if (my $prev = $self->findPreviousPage($entry)) {    # previous page
        $doc->addNavigation(prev => $prev->getValue('pageid')); }
      if (my $next = $self->findNextPage($entry)) {
        $doc->addNavigation(next => $next->getValue('pageid')); }

      # Now, dig around for other interesting related documents
      # Use the entry types themselves for the relations
      $xentry = $entry;
      # Firstly, look at siblings of this page, then at siblings of parent,
      # then those of grandparent, etc.
      # In a large/complex site, this gets way too much. But how to prune?
      while ($xentry = $self->getParentPage($xentry)) {
        # any siblings of (grand)parent are "interesting" structural elements
        # OR, even more interesting: the index, bibliography, glossary related to current page!
        foreach my $sib ($self->getChildPages($xentry)) {
          my $sib_id = $sib->getValue('pageid');
          next if $sib_id eq $id;
          if ($sib->getValue('primary')) {    # If a primary page
                                              # Use the element name (w/o prefix) as the relation !!!!
            my $sib_rel = $sib->getValue('type'); $sib_rel =~ s/^(\w+)://;
            $doc->addNavigation($sib_rel => $sib_id); }
          else {                              # Else, consider it as some sort of sidebar.
            $doc->addNavigation('sidebar' => $sib_id); } } }
      # Then Look at (only?) 1st level of pages below this one.
      foreach my $child ($self->getChildPages($entry)) {
        my $child_id = $child->getValue('pageid');
        if ($child->getValue('primary')) {    # If a primary page
                                              # Use the element name (w/o prefix) as the relation !!!!
          my $child_rel = $child->getValue('type'); $child_rel =~ s/^(\w+)://;
          $doc->addNavigation($child_rel => $child_id); }
        else {                                # Else, consider it as some sort of sidebar.
          $doc->addNavigation('sidebar' => $child_id); } }
  } }
  return; }

sub findPreviousPage {
  my ($self, $entry) = @_;
  my $page = $entry->getValue('pageid');
  # Look at parent's entry, and get the list of our siblings
  if (my $pentry = $self->getParentPage($entry)) {
    my @sibs = $self->getChildPages($pentry);
    while (@sibs && $sibs[-1]->getValue('pageid') ne $page) {    # peel off following sibs
      pop(@sibs); }
    return unless @sibs && $sibs[-1]->getValue('pageid') eq $page;    # Broken database?
    pop(@sibs);                                                       # Now skip our own entry ($id)
    @sibs = grep { $_->getValue('primary') } @sibs;
    # If there IS a preceding sibling, find it's rightmost descendant
    while (@sibs) {
      $pentry = $sibs[-1];
      @sibs   = grep { $_->getValue('primary') } $self->getChildPages($pentry); }
    return $pentry; }                                                 # Return deepest page found
  return; }

sub findNextPage {
  my ($self, $entry) = @_;
  # Return first child page, if any
  my @ch = grep { $_->getValue('primary') } $self->getChildPages($entry);
  return $ch[0] if @ch;
  my $page = $entry->getValue('pageid');
  # Look at parent's entry, and get the list of siblings
  while ($entry = $self->getParentPage($entry)) {
    my @sibs = $self->getChildPages($entry);
    while (@sibs && $sibs[0]->getValue('pageid') ne $page) {    # peel off preceding sibs, till found,
      shift(@sibs); }
    return unless @sibs && ($sibs[0]->getValue('pageid') eq $page);    # Broken database?
    shift(@sibs);                                                      # remove our own entry ($id)
    @sibs = grep { $_->getValue('primary') } @sibs;                    # Skip uninteresting pages
    return $sibs[0] if @sibs;
    $page = $entry->getValue('pageid'); }
  return; }

sub getParentPage {
  my ($self, $entry) = @_;
  my $x;
  return ($x = $entry->getValue('pageid')) && ($x = $$self{db}->lookup("ID:" . $x))
    && ($x = $x->getValue('parent')) && ($x = $$self{db}->lookup("ID:" . $x))
    && ($x = $x->getValue('pageid')) && ($x = $$self{db}->lookup("ID:" . $x))
    && $x; }

sub getRootPage {
  my ($self, $entry) = @_;
  my $x    = $entry;
  my $root = $entry;
  while (($x = $x->getValue('parent')) && ($x = $$self{db}->lookup("ID:" . $x))
    && ($x = $x->getValue('pageid')) && ($x = $$self{db}->lookup("ID:" . $x))) {
    $root = $x; }
  return $root; }

# Assuming this entry is for a page, find the closest descendants that are (distinct) pages
sub getChildPages {
  my ($self, $entry) = @_;
  my $page = $entry->getValue('pageid');
  my @p    = ();
  foreach my $ch (@{ $entry->getValue('children') || [] }) {
    if (my $e = $$self{db}->lookup("ID:" . $ch)) {
      if (my $p = $e->getValue('pageid')) {    # if valid page
        push(@p, ($p ne $page ? ($e) : $self->getChildPages($e))); } } }
  return @p; }

# this is probably the same as "Interesting" for the above relations.
# To make it more extensible, it really should be integrated into the database?
# Eg. "sectional" things might mark their entries specially?
my $normaltoctypes = { map { ($_ => 1) }    # CONSTANT
    qw (ltx:document ltx:part ltx:chapter
    ltx:section ltx:subsection ltx:subsubsection
    ltx:paragraph ltx:subparagraph
    ltx:index ltx:bibliography ltx:glossary ltx:appendix) };

sub fill_in_tocs {
  my ($self, $doc) = @_;
  my $n = 0;
  foreach my $toc ($doc->findnodes('descendant::ltx:TOC[not(ltx:toclist)]')) {
    $n++;
    my $selector = $toc->getAttribute('select');
    my $types;
    if ($selector) {
      $types = { map { ($_ => 1) } split(/\s*\|\s*/, $selector) }; }
    # global vs children of THIS or Document node?
    my $id     = $doc->getDocumentElement->getAttribute('xml:id');
    my $scope  = $toc->getAttribute('scope')  || 'current';
    my $format = $toc->getAttribute('format') || 'normal';
    my $lists;
    if (my $listname = $toc->getAttribute('lists')) {
      $lists = { map { $_ => 1 } split(/\s/, $listname) }; }
    else {
      $lists = { toc => 1 }; }
    if ($scope eq 'global') {
      if (my $entry = $$self{db}->lookup("ID:" . $id)) {
        if (my $root = $self->getRootPage($entry)) {
          $id = $root->getValue('pageid'); } } }
    my $show = $toc->getAttribute('show') || $$self{toc_show};
    my @list = ();
    if (!$format || ($format =~ /^normal/)) {
      @list = $self->gentoc($doc, $id, $show, $lists, $types); }
    elsif ($format eq 'context') {
      $lists = { toc => 1 };
      @list  = $self->gentoc_context($doc, $id, $show, $lists, $types); }
    $doc->addNodes($toc, ['ltx:toclist', {}, @list]) if @list; }
  Debug("Filled in $n TOCs") if $LaTeXML::DEBUG{crossref};
  return; }

# generate TOC for $id & its children,
# providing that those objects are of appropriate type.
# Returns a list of 0 or more ltx:tocentry's (possibly containing ltx:toclist's)
# Note that parent/child relationships stored in ObjectDB can also reflect less
# `interesting' objects like para or p style paragraphs, and such.
sub gentoc {
  my ($self, $doc, $id, $show, $lists, $types, $localto, $selfid) = @_;
  if (my $entry = $$self{db}->lookup("ID:$id")) {
    my @kids = ();
    if ((!defined $localto) || (($entry->getValue('location') || '') eq $localto)) {
      @kids = map { $self->gentoc($doc, $_, $show, $lists, $types, $localto, $selfid) }
        @{ $entry->getValue('children') || [] }; }
    my $type = $entry->getValue('type');
    my $role = $entry->getValue('role');
    if (($types ? ($type = $entry->getValue('type')) && $$types{$type} : 1)
      && inlist_match($lists, $entry->getValue('inlist'))) {
      return $self->gentocentry($doc, $entry, $selfid, $show, @kids); }
    else {
      return @kids; } }
  else {
    return (); } }

sub inlist_match {
  my ($listsa, $listsb) = @_;
  return ($listsa && $listsb && grep { $$listsb{$_} } keys %$listsa); }

# Experimental show pattern:  before < filling > after
sub gentocentry {
  my ($self, $doc, $entry, $selfid, $show, @children) = @_;
  my $id       = $entry->getValue('id');
  my $type     = $entry->getValue('type');
  my $typename = $type; $typename =~ s/^ltx://;
  my ($before, $after);
  if ($show =~ /^(.*?)\<(.*?)$/) { $before = $1; $show  = $2; }
  if ($show =~ /^(.*?)\>(.*?)$/) { $show   = $1; $after = $2; }
  # Good candidate for before = thumbnail
  return (['ltx:tocentry',
      { class => "ltx_tocentry_$typename"
          . (defined $selfid && ($selfid eq $id) ? ' ltx_ref_self' : "") },
      ($before ? $self->generateRef_simple($doc, $id, $before) : ()),
      ['ltx:ref', { show => $show, idref => $id }],
      ($after    ? $self->generateRef_simple($doc, $id, $after)                       : ()),
      (@children ? (['ltx:toclist', { class => "ltx_toclist_$typename" }, @children]) : ())]); }

# Generate a "context" TOC, that shows what's on the current page,
# but also shows the page in the context of it's siblings & ancestors.
# This is useful for putting in a navigation bar.
sub gentoc_context {
  my ($self, $doc, $id, $show, $lists, $types) = @_;
  if (my $entry = $$self{db}->lookup("ID:$id")) {
    # Generate Downward TOC covering items WITHIN the current page.
    my @navtoc = $self->gentoc($doc, $id, $show, $lists, $types, $entry->getValue('location') || '', $id);
    # Then enclose it upwards along with siblings & ancestors
    my $p_id;
    while (($p_id = $entry->getValue('parent')) && ($entry = $$self{db}->lookup("ID:$p_id"))) {
      @navtoc =
        map {
        ($_->getValue('id') eq $id
          ? @navtoc
          : $self->gentocentry($doc, $_, undef, $show)) }
        grep { $$normaltoctypes{ $_->getValue('type') } }    # or should we use @inlist???
        map  { $$self{db}->lookup("ID:$_") }
        @{ $entry->getValue('children') || [] };
      if (($types ? $$types{ $entry->getValue('type') } : 1)
        && $entry->getValue('parent')) {
        @navtoc = ($self->gentocentry($doc, $entry, undef, $show, @navtoc)); }
      $id = $p_id; }
    return @navtoc; }
  else {
    return (); } }

sub fill_in_frags {
  my ($self, $doc) = @_;
  my $n  = 0;
  my $db = $$self{db};
  # Any nodes with an ID will get a fragid;
  # This is the id/name that will be used within xhtml/html.
  foreach my $node ($doc->findnodes('//@xml:id')) {
    if (my $entry = $db->lookup("ID:" . $node->value)) {
      if (my $fragid = $entry->getValue('fragid')) {
        $n++;
        $node->parentNode->setAttribute(fragid => $fragid); } } }
  Debug("Filled in fragment $n ids") if $LaTeXML::DEBUG{crossref};
  return; }

# Fill in content text for any <... @idref..>'s or @labelref
sub fill_in_refs {
  my ($self, $doc) = @_;
  my $db = $$self{db};
  my $n  = 0;
  foreach my $ref ($doc->findnodes('descendant::*[@idref or @labelref]')) {
    my $tag = $doc->getQName($ref);
    next if $tag eq 'ltx:XMRef';    # Blech; list those TO fill-in, or list those to exclude?
    my $id   = $ref->getAttribute('idref');
    my $show = $ref->getAttribute('show');
    $show = $$self{ref_show} unless $show;
    if (!$id) {
      if (my $label = $ref->getAttribute('labelref')) {
        my $entry;
        if (($entry = $db->lookup($label)) && ($id = $entry->getValue('id'))) {
          $ref->setAttribute(idref => $id); }
        else {
          $self->note_missing('warn', 'Target for Label', $label);
          my $cl = $ref->getAttribute('class');
          $ref->setAttribute(class => ($cl ? $cl . ' ltx_missing_label' : 'ltx_missing_label'));
          if (!$ref->textContent) {
            $doc->addNodes($ref, $label);    # Just to reassure (?) readers.
            $ref->setAttribute(broken => 1); }
    } } }

    if ($id) {
      $n++;
      if (!$ref->getAttribute('href')) {
        if (my $url = $self->generateURL($doc, $id)) {
          $ref->setAttribute(href => $url); } }
      if (!$ref->getAttribute('title')) {
        if (my $titlestring = $self->generateTitle($doc, $id)) {
          $ref->setAttribute(title => $titlestring); } }
      if (!$ref->textContent && !element_nodes($ref)
        && !(($tag eq 'ltx:graphics') || ($tag eq 'ltx:picture'))) {
        my $is_nameref = ($ref->getAttribute('class') || '') =~ 'ltx_refmacro_nameref';
        $doc->addNodes($ref, $self->generateRef($doc, $id, $show, $is_nameref)); }
      if (my $entry = $$self{db}->lookup("ID:$id")) {
        $ref->setAttribute(stub => 1) if $entry->getValue('stub'); }
  } }
  Debug("Filled in $n refs") if $LaTeXML::DEBUG{crossref};
  return; }

# similar sorta thing for RDF about & resource labels & ids
sub fill_in_RDFa_refs {
  my ($self, $doc) = @_;
  my $db = $$self{db};
  my $n  = 0;
  foreach my $key (qw(about resource)) {
    foreach my $ref ($doc->findnodes('descendant::*[@' . $key . 'idref or @' . $key . 'labelref]')) {
      my $id = $ref->getAttribute($key . 'idref');
      if (!$id) {
        if (my $label = $ref->getAttribute($key . 'labelref')) {
          my $entry;
          if (($entry = $db->lookup($label)) && ($id = $entry->getValue('id'))) {
            $ref->setAttribute($key . 'idref' => $id); }
          else {
            $self->note_missing('warn', "Target for $key Label", $label);
      } } }
      if ($id) {
        $n++;
        if (!$ref->getAttribute($key)) {
          if ($db->lookup("ID:" . $id)) {    # RDF "id" need not be real, valid, ids!!!
            if (my $url = $self->generateURL($doc, $id)) {
              $ref->setAttribute($key => $url); } }
          else {
            $ref->setAttribute($key => '#' . $id); } }
  } } }
  set_RDFa_prefixes($doc->getDocument, {});    # what prefixes??
  Debug("Filled in $n RDFa refs") if $LaTeXML::DEBUG{crossref};
  return; }

sub fill_in_mathlinks {
  my ($self, $doc) = @_;
  my $db = $$self{db};
  my $n  = 0;
  foreach my $sym ($doc->findnodes('descendant::*[@decl_id or @meaning]')) {
    my $tag = $doc->getQName($sym);
    next if $tag eq 'ltx:XMRef';          # Blech; list those TO fill-in, or list those to exclude?
    next if $sym->hasAttribute('href');
    my $decl_id = $sym->getAttribute('decl_id');
    my $meaning = $sym->getAttribute('meaning');
    my $entry;
    if ($decl_id
      && !$doc->findnodes('ancestor::ltx:glossaryphrase | ancestor::ltx:declare[@type]', $sym)) {
      $entry = $$self{db}->lookup("DECLARATION:local:$decl_id"); }
    elsif ($meaning) {
      $entry = $$self{db}->lookup("DECLARATION:global:$meaning"); }
    if ($entry) {
      $n++;
      ## HACK: DLMF copies $meaning to ltxx:meaning for search indexing
      ## This should evolve into using (future) mml @mathrole?
##      if ($meaning && $$doc{namespaces}{ltxx}) {
##        $$node[1]{'ltxx:meaning'} = $meaning; }
      if (my $id = $self->getIDForDeclaration($entry)) {    # Where defined
        $sym->setAttribute(href => $self->generateURL($doc, $id));
        if (my $tag = $entry->getValue('tag:short') || $entry->getValue('description')) {
          $sym->setAttribute(title => getTextContent($doc, $tag)); }
  } } }
  Debug("Filled in $n math links") if $LaTeXML::DEBUG{crossref};
  return; }

# Given a declaration entry (ltx:declare, or ltx:mark or ...)
# Return the id of an appropriate link target.
# Basically this is the parent, except (DLMF specific?) it should be a table ROW, not CELL
# Or the numbered equationgroup, not the unnumbered equation
sub getIDForDeclaration {
  my ($self, $entry) = @_;
  if (my $pid = $entry && $entry->getValue('parent')) {
    if (my $pentry = $$self{db}->lookup("ID:$pid")) {
      my $ptype = $pentry->getValue('type') || '';
      # If definition is in a table cell, the correct id will be that of the row
      if ($ptype eq 'ltx:td') {
        if (my $gpid = $pentry->getValue('parent')) {
          return $gpid; } }
      # If definition is in unnumbered equation within an equation group, use id of the group
      elsif (($ptype eq 'ltx:equation') && !$pentry->getValue('refnum')) {
        if (my $gpid = $pentry->getValue('parent')) {
          if (my $gpentry = $$self{db}->lookup("ID:$gpid")) {
            my $gptype = $gpentry->getValue('type') || '';
            if ($gptype eq 'ltx:equationgroup') {
              return $gpid; } } } }
    }
    return $pid; } }

sub fill_in_glossaryrefs {
  my ($self, $doc) = @_;
  my $n = 0;
  foreach my $ref ($doc->findnodes('descendant::ltx:glossaryref')) {
    $n++;
    my $key   = $ref->getAttribute('key');
    my $list  = $ref->getAttribute('inlist');
    my $show  = $ref->getAttribute('show') || 'name';
    my $entry = $$self{db}->lookup(join(':', 'GLOSSARY', $list, $key));
    if ($entry) {
      my $title = $entry->getValue('phrase:definition');
      if (!$ref->getAttribute('title') && $title) {
        $ref->setAttribute(title => $title->textContent); }
      if (my $id = $entry->getValue('id')) {
        $ref->setAttribute(idref => $id); }
      if (!$ref->textContent && !element_nodes($ref)) {
        my @stuff = $self->generateGlossaryRefTitle($doc, $entry, $show);
        if (@stuff) {
          $doc->addNodes($ref, @stuff); }
        else {
          $self->note_missing('warn', "Glossary contents ($show) for key", $key);
          $doc->addNodes($ref, $key);
          $doc->addClass($ref, 'ltx_missing'); } } }
    else {
      $self->note_missing('warn', "Glossary Entry for key", $key); }
    if (!$ref->textContent && !element_nodes($ref)) {
      $doc->addNodes($ref, $key);
      $doc->addClass($ref, 'ltx_missing'); } }
  return; }

# Needs to evolve into the combined stuff that we had in DLMF.
# (eg. concise author/year combinations for multiple bibrefs)
sub fill_in_bibrefs {
  my ($self, $doc) = @_;
  my $n = 0;
  foreach my $bibref ($doc->findnodes('descendant::ltx:bibref')) {
    $n++;
    $doc->replaceNode($bibref, $self->make_bibcite($doc, $bibref)); }
  Debug("Filled in $n bibrefs") if $LaTeXML::DEBUG{crossref};
  return; }

# Given a list of bibkeys, construct links to them.
# Mostly tuned to author-year style.
# Combines when multiple bibitems share the same authors.
sub make_bibcite {
  my ($self, $doc, $bibref) = @_;

  my @keys         = grep { $_ } split(/,/, $bibref->getAttribute('bibrefs') || '');
  my $show         = $bibref->getAttribute('show');
  my @preformatted = $bibref->childNodes();
  if ($show && ($show eq 'none') && !@preformatted) {
    $show = 'refnum'; }
  if (!$show) {
    $show = 'refnum'; }
  if ($show eq 'nothing') {    # Ad Hoc support for \nocite!t
    return (); }
  my $sep     = $bibref->getAttribute('separator')   || ',';
  my $yysep   = $bibref->getAttribute('yyseparator') || ',';
  my @phrases = element_nodes($bibref);    # get the ltx;bibrefphrase's in the bibref!
                                           # Collect all the data from the bibliography
  my @data    = ();
  my @lists   = split(/\s+/, $bibref->getAttribute('inlist') || 'bibliography');
  foreach my $key (@keys) {
    my ($bentry, $id, $entry);
    foreach my $list (@lists) {            # Find the first of the lists that contains this bibkey
      $bentry = $$self{db}->lookup("BIBLABEL:" . $list . ':' . $key);
      last if $bentry; }
    if ($bentry
      && ($id    = $bentry->getValue('id'))
      && ($entry = $$self{db}->lookup("ID:$id"))) {
      my $authors  = $entry->getValue('authors');
      my $fauthors = $entry->getValue('fullauthors');
      my $keytag   = $entry->getValue('keytag');
      my $year     = $entry->getValue('year');
      my $typetag  = $entry->getValue('typetag');
      my $number   = $entry->getValue('number');
      my $title    = $entry->getValue('title');
      my $refnum   = $entry->getValue('refnum');        # This come's from the \bibitem, w/o BibTeX
      my ($rawyear, $suffix);

      my $titlestring = undef;
      if (defined $title) {
        $titlestring = $title->textContent;
        $titlestring =~ s/^\s+//;        # Trim leading whitespace
        $titlestring =~ s/\s+$//;        # and trailing
        $titlestring =~ s/\s+/ /gs; }    # and normalize all other whitespace.
      if ($year && ($year->textContent) =~ /^(\d\d\d\d)(\w)$/) {
        ($rawyear, $suffix) = ($1, $2); }
      $show = 'refnum' unless ($show eq 'none') || $authors || $fauthors || $keytag; # Disable author-year format!
                                                                                     # fullnames ?
      push(@data, {
          key         => $key,
          authors     => [$doc->trimChildNodes($authors  || $fauthors || $keytag)],
          fullauthors => [$doc->trimChildNodes($fauthors || $authors  || $keytag)],
          authortext  => ($authors || $fauthors ? ($authors || $fauthors)->textContent : ''),
          year        => [$doc->trimChildNodes($year || $typetag)],
          rawyear     => $rawyear,
          suffix      => $suffix,
          number      => [$doc->trimChildNodes($number)],
          refnum      => [$doc->trimChildNodes($refnum)],
          title       => [$doc->trimChildNodes($title || $keytag)],
          attr        => { idref => $id,
            href => orNull($self->generateURL($doc, $id)),
            ($titlestring ? (title => $titlestring) : ()) } }); }
    else {
      $self->note_missing('warn', 'Entry for citation', $key);
      push(@data, { key => $key, refnum => [$key], title => [$key], year => [],
          attr => { idref => $key, title => $key, class => "ltx_missing_citation" } });
  } }
  my $checkdups = ($show =~ /author/i) && ($show =~ /(year|number)/i);
  my @refs      = ();
  my $saveshow  = $show;
  while (@data) {
    my $datum  = shift(@data);
    my $didref = 0;
    my @stuff  = ();
    $show = $saveshow;
    if (($show eq 'none') && @preformatted) {
      @stuff = @preformatted; $show = ''; }
    elsif ($$datum{attr}{class} && ($$datum{attr}{class} eq 'ltx_missing_citation')) {
      @stuff  = (['ltx:ref', $$datum{attr}, $$datum{key}]);
      $didref = 1;
      $show   = '';
    }

    # Add delimeters for parsing...
    $show =~ s/(\w)year/$1\{\}year/gi;
    $show =~ s/(\w)phrase/$1\{\}phrase/gi;
    $show =~ s/phrase(\d)(\w)/phrase$1\{\}$2/gi;
    while ($show) {
      if ($show =~ s/^(\w+)//) {
        my $role = lc($1); $role =~ s/s$//;    # remove trailing plural
        if ($role eq 'author') {
          push(@stuff, $doc->cloneNodes(@{ $$datum{authors} })); }
        elsif ($role eq 'fullauthor') {
          push(@stuff, $doc->cloneNodes(@{ $$datum{fullauthors} })); }
        elsif ($role eq 'title') {
          push(@stuff, $doc->cloneNodes(@{ $$datum{title} })); }
        elsif ($role eq 'refnum') {
          push(@stuff, $doc->cloneNodes(@{ $$datum{refnum} })); }
        elsif ($role =~ /^phrase(\d)$/) {
          # HACK! Avoid empty () from situations where we've set the show (CITE_STYLE) too early
          # and don't actually have author-year information!
          my $n = $1;
          if (($n == 1) && ($show =~ /^\{\}year\{\}phrase2/i) && !scalar(@{ $$datum{year} })
            && (!$phrases[0] || (length($phrases[0]->textContent) <= 1))
            && (!$phrases[1] || (length($phrases[1]->textContent) <= 1))) {
            $show =~ s/^\{\}year\{\}phrase2//i; }
          else {
            push(@stuff, $phrases[$n - 1]->childNodes) if $phrases[$n - 1]; } }
        elsif ($role eq 'year') {
          if (!$$datum{year}) {
            $self->note_missing('warn', 'Date for citation', $$datum{key}); }
          elsif (@{ $$datum{year} }) {
            push(@stuff, ['ltx:ref', $$datum{attr}, @{ $$datum{year} }]);
            $didref = 1;
            while ($checkdups && @data && ($$datum{authortext} eq $data[0]{authortext})) {
              my $next = shift(@data);
              push(@stuff, $yysep, ' ');
              if ((($$datum{rawyear} || 'no_year_1') eq ($$next{rawyear} || 'no_year_2')) && $$next{suffix}) {
                push(@stuff, ['ltx:ref', $$next{attr}, $$next{suffix}]); }
              else {
                push(@stuff, ['ltx:ref', $$next{attr}, @{ $$next{year} }]); } } } }
        elsif ($role eq 'number') {
          push(@stuff, ['ltx:ref', $$datum{attr}, @{ $$datum{number} }]);
          $didref = 1;
          while ($checkdups && @data && ($$datum{authortext} eq $data[0]{authortext})) {
            my $next = shift(@data);
            push(@stuff, $yysep, ' ', ['ltx:ref', $$next{attr}, @{ $$next{number} }]); } }
        elsif ($role eq 'super') {
          my @r = ();
          push(@r, ['ltx:ref', $$datum{attr}, @{ $$datum{number} }]);
          $didref = 1;
          while ($checkdups && @data && ($$datum{authortext} eq $data[0]{authortext})) {
            my $next = shift(@data);
            push(@r, $yysep, ' ', ['ltx:ref', $$next{attr}, @{ $$next{number} }]); }
          push(@stuff, ['ltx:sup', {}, @r]); }
        else {
          Info('unexpected', $role, $doc, "CITE ignoring show key '$role'"); } }
      elsif ($show =~ s/^\{([^\}]*)\}//) {    # pass-thru literal, quoted with {}
        push(@stuff, $1) if $1; }
      elsif ($show =~ s/^~//) {               # Pass-thru spaces
        push(@stuff, $NBSP) if @stuff; }
      elsif ($show =~ s/^(\s+)//) {           # Pass-thru spaces
        push(@stuff, $1) if @stuff; }
      elsif ($show =~ s/^(\W+)//) {           # Pass-thru non show keywords
        push(@stuff, $1); } }
    push(@refs,
      (@refs   ? ($sep, ' ') : ()),
      ($didref ? @stuff      : (['ltx:ref', $$datum{attr}, @stuff]))); }
  return @refs; }

sub generateURL {
  my ($self, $doc, $id) = @_;
  my ($object, $location);
  if ($object = $$self{db}->lookup("ID:" . $id)) {
    if ($location = $object->getValue('location')) {
      my $doclocation = $doc->siteRelativeDestination;
      my $pathdir     = pathname_directory($doclocation);
      my $url         = pathname_relative(($location =~ m|^/| ? $location : '/' . $location),
        ($pathdir =~ m|^/| ? $pathdir : '/' . $pathdir));
      my $extension = $$self{extension} || 'xml';
      my $urlstyle  = $$self{urlstyle}  || 'file';
      if ($urlstyle eq 'server') {
        # Remove trailing index.$extension but be careful not to leave url empty! (then it's "self")
        $url =~ s/(^|\/)index.\Q$extension\E$/($1 ? $1 : '.\/')/e; }
      elsif ($urlstyle eq 'negotiated') {
        $url =~ s/\.\Q$extension\E$//;    # Remove trailing $extension
        $url =~ s/(^|\/)index$/$1/;       # AND trailing index
      }
      $url = '.' unless $url;
      if (my $fragid = $object->getValue('fragid')) {
        $url = '' if ($url eq '.') or ($location eq $doclocation);
        $url .= '#' . $fragid; }
      elsif ($location eq $doclocation) {
        $url = ''; }
      return pathname_to_url($url); }
    else {
      $self->note_missing('warn', 'File location for ID', $id); } }
  else {
    $self->note_missing('warn', 'DB Entry for ID', $id); }
  return; }

# Generate the contents of a <ltx:ref> of the given id.
# show is a string containing substrings 'type', 'refnum' and 'title'
# (standing for the type prefix, refnum and title of the id'd object)
# and any other random characters; the
sub generateRef {
  my ($self, $doc, $reqid, $reqshow, $is_nameref) = @_;
  my $pending = '';
  my @stuff;
  # Try the requested show pattern, and if it fails, try a fallback of just the title or refnum
  foreach my $show (($reqshow, ($reqshow !~ /title/ ? "title" : "refnum"))) {
    my $id = $reqid;
    # Start with requested ID, add some from parent(s), if needed/until to make "useful" link content
    while (my $entry = $id && $$self{db}->lookup("ID:$id")) {
      if (my @s = $self->generateRef_aux($doc, $entry, $show, $is_nameref)) {
        push(@stuff, $pending) if $pending;
        push(@stuff, @s);
        return @stuff if $self->checkRefContent($doc, @stuff);
        $pending = $$self{ref_join}; } # inside/outside this brace determines if text can START with the join.
      $id = $entry->getValue('parent'); } }
  if (!@stuff) {                       # Try first child for a title-less document?
    if (my $entry = $$self{db}->lookup("ID:$reqid")) {
      if (($entry->getValue('type') || '') eq 'ltx:document') {
        foreach my $c (@{ $entry->getValue('children') }) {
          if (my $centry = $$self{db}->lookup("ID:$c")) {
            if (my @s = $self->generateRef_aux($doc, $centry, $reqshow, $is_nameref)) {
              push(@stuff, @s); last; } } } } } }
  if (@stuff) {
    return @stuff; }
  else {
    $self->note_missing('info', 'Usable title for ID', $reqid);
    return ($reqid); } }    # id is crummy, but better than "?"... or?

# Just return the reqshow value for $reqid, or nothing
sub generateRef_simple {
  my ($self, $doc, $reqid, $reqshow) = @_;
  my $pending = '';
  my @stuff;
  if (my $entry = $reqshow && $reqid && $$self{db}->lookup("ID:$reqid")) {
    return $self->generateRef_aux($doc, $entry, $reqshow); }
  return (); }

# Check if the proposed content of a <ltx:ref> is "Good Enough"
# (long enough, unique enough to give reader feedback,...)
sub checkRefContent {
  my ($self, $doc, @stuff) = @_;
  # Length? having _some_ actual text ?
  my $s = text_content(@stuff);
  # Could compare a minum length
  # But perhaps this is better: check that there's some "text", not just symbols!
  $s =~ s/\bin\s+//g;
  return ($s =~ /\w/ ? 1 : 0); }

sub text_content {
  my (@stuff) = @_;
  return join('', map { text_content_aux($_) } @stuff); }

sub text_content_aux {
  my ($n) = @_;
  my $r = ref $n;
  if (!$r) {
    return $n; }
  elsif ($r eq 'ARRAY') {
    my ($t, $a, @c) = @$n;
    return text_content(@c); }
  elsif ($r =~ /^XML::/) {
    return $n->textContent; }
  else {
    return $n; } }

my %ref_fallbacks = (    # Alternative fields, when not found
  typerefnum  => [qw(refnum)],
  toctitle    => [qw(title toccaption)],
  title       => [qw(toccaption)],
  rawtoctitle => [qw(toctitle title toccaption)],
  rawtitle    => [qw(title toccaption)],
);

# Generate text to fill in an ltx:ref from a database entry for some object.
# The show pattern indicates what data to use; usually a single keyword
# (or keywords separated by spaces, ~ or {} enclosed literal text)
# The keywords are things like refnum, title, caption, etc
# (possibly coming from ltx:tag or other data; see Scan)
sub generateRef_aux {
  my ($self, $doc, $entry, $show, $is_nameref) = @_;
  my @stuff = ();
  my $OK    = 0;
  while ($show) {
    if ($show =~ s/^(\w+)//) {    # peel off next keyword
      my $key   = lc($1);
      my $class = ($key =~ /title/ ? 'ltx_ref_title' : 'ltx_ref_tag');
      my @keys  = ($key, 'tag:' . $key,
        ($ref_fallbacks{$key} ? @{ $ref_fallbacks{$key} } : ()));
      my $value;
      foreach my $k (@keys) {     # lookup the data for that keyword (or an alternative)
        $value = $entry->getValue($k);
        last if $value; }
      if ($value) {
        $OK = 1;
        if ($is_nameref) {
          # yank out the tag if this is nameref
          my ($first_child) = element_nodes($value);
          $first_child->unbindNode if $first_child && ($doc->getQName($first_child) eq 'ltx:tag'); }
        push(@stuff, ['ltx:text', { class => $class }, $self->prepRefText($doc, $value)]); } }
    elsif ($show =~ s/^\{([^\}]*)\}//) {    # pass-thru literal, quoted with {}
      push(@stuff, $1) if $1; }
    elsif ($show =~ s/^~//) {               # Pass-thru spaces
      push(@stuff, $NBSP) if @stuff; }
    elsif ($show =~ s/^(\s+)//) {           # Pass-thru spaces
      push(@stuff, $1) if @stuff; }
    elsif ($show =~ s/^(\W+)//) {           # Pass-thru non show keywords
      push(@stuff, $1); } }
  # Maybe nothing found for this entry (probably retry on parent?)
  return ($OK ? @stuff : ()); }

sub prepRefText {
  my ($self, $doc, $title) = @_;
  return $doc->cloneNodes($doc->trimChildNodes($self->fillInTitle($doc, $title))); }

sub prepRawRefText {
  my ($self, $doc, $title) = @_;
  my $node = $self->prepRefText($doc, $title);
  if ($doc->getQName($node) =~ /^ltx:(?:toc)title$/) {    # Trim tags from titles
    my ($first) = element_nodes($node);
    if ($first && ($doc->getQName($first) eq 'ltx:tag')) {
      $node->removeChild($first); } }
  return $node; }

# Generate a title for this document.
sub generateDocumentTile {
  my ($self, $doc) = @_;
  my $title;
  if (my $docid = $doc->getDocumentElement->getAttribute('xml:id')) {    # If has a id?
    $title = $self->generateTitle($doc, $docid); }
  return $title if (defined $title) && ($title ne '');
  if (my $node = $doc->findnode('//ltx:title | //ltx:toctitle | //ltx:caption | //ltx:toccaption')) {
    $title = getTextContent($doc, $node); }
  return $title if (defined $title) && ($title ne '');
  return; }

# Generate a title string for ltx:ref
sub generateTitle {
  my ($self, $doc, $id) = @_;
  # Add author, if any ???
  my $string    = "";
  my $altstring = "";
  while (my $entry = $id && $$self{db}->lookup("ID:$id")) {
    my $title = $self->fillInTitle($doc,
      $entry->getValue('title') || $entry->getValue('typerefnum') || $entry->getValue('refnum'));
    $title = getTextContent($doc, $title) if $title && ref $title;
    if ($title) {
      $string .= $$self{ref_join} if $string;
      $string .= $title; }
    $id = $entry->getValue('parent'); }
  return $string || $altstring; }

sub getTextContent {
  my ($doc, $title) = @_;
  $title = getTextContent_rec($doc, $title) if $title && ref $title;
  $title =~ s/^\s+//s  if $title;    # Trim leading whitespace
  $title =~ s/\s+$//s  if $title;    # and trailing
  $title =~ s/\s+/ /gs if $title;    # and normalize all other whitespace.
  return $title; }

sub getTextContent_rec {
  my ($doc, $node) = @_;
  my $type = $node->nodeType;
  if ($type == XML_TEXT_NODE) {
    return $node->textContent; }
  elsif ($type == XML_ELEMENT_NODE) {
    my $tag = $doc->getQName($node);
    if ($tag eq 'ltx:tag') {
      return ($node->getAttribute('open') || '')
        . $node->textContent    # assuming no nested ltx:tag
        . ($node->getAttribute('close') || ''); }
    elsif ($tag eq 'ltx:Math') {
      return unicodemath($doc, $node); }
    else {
      return join('', map { getTextContent_rec($doc, $_); } $node->childNodes); } }
  elsif ($type == XML_DOCUMENT_FRAG_NODE) {
    return join('', map { getTextContent_rec($doc, $_); } $node->childNodes); }
  else {
    return ''; } }

# Fill in any embedded ltx:ref's & ltx:cite's within a title
sub fillInTitle {
  my ($self, $doc, $title) = @_;
  return $title unless $title && ref $title;
  # Fill in any nested ref's!
  foreach my $ref ($doc->findnodes('descendant::ltx:ref[@idref or @labelref]', $title)) {
    next if $ref->textContent;
    my $show = $ref->getAttribute('show');
    $show = $$self{ref_show} unless $show;
    my $refentry;
    if (my $id = $ref->getAttribute('idref')) {
      $refentry = $$self{db}->lookup("ID:$id"); }
    elsif (my $label = $ref->getAttribute('labelref')) {
      $refentry = $$self{db}->lookup($label);
      if ($id = $refentry->getValue('id')) {
        $refentry = $$self{db}->lookup("ID:$id"); } }
    if ($refentry) {
      $doc->replaceNode($ref, $self->generateRef_aux($doc, $refentry, $show)); } }
  # Fill in (replace, actually) any embedded citations.
  foreach my $bibref ($doc->findnodes('descendant::ltx:bibref', $title)) {
    $doc->replaceNode($bibref, $self->make_bibcite($doc, $bibref)); }
  foreach my $break ($doc->findnodes('descendant::ltx:break', $title)) {
    $doc->replaceNode($break, ['ltx:text', {}, " "]); }
  return $title; }

sub generateGlossaryRefTitle {
  my ($self, $doc, $entry, $show) = @_;
  my $phrases = $entry->getValue('phrases');
  my @stuff   = ();
  if (my $phrase = $entry->getValue('phrase:' . $show)) {
    push(@stuff, ['ltx:text', { class => 'ltx_glossary_' . $show },
        $self->prepRefText($doc, $phrase)]); }
  elsif ($show =~ /^(\w+)-plural$/) {
    my $sh = $1;
    if (my $phrase = $entry->getValue('phrase:' . $sh)) {
      push(@stuff, ['ltx:text', { class => 'ltx_glossary_' . $show },
          $self->prepRefText($doc, $phrase), 's']); } }
  elsif ($show =~ /^(\w+)-indefinite$/) {
    my $sh = $1;
    if (my $phrase = $entry->getValue('phrase:' . $sh)) {
      my $s   = $phrase->textContent;
      my $art = ($s =~ /^[aeiou]/i ? 'an ' : 'a ');
      push(@stuff, ['ltx:text', { class => 'ltx_glossary_' . $show },
          $art, $self->prepRefText($doc, $phrase)]); } }
  return @stuff; }

sub orNull {
  return (grep { defined } @_) ? @_ : undef; }

# Possibly this needs support from Scan, as well?
# to manage resources, record in Manifest, something like that?
sub copy_resources {
  my ($self, $doc) = @_;
  # Copy any "resources" linked from the document
  my $paths = [$doc->getSearchPaths];
  foreach my $n ($doc->findnodes('//ltx:ref[@href and not(@idref) and not(@labelref)]')) {
    my $url = $n->getAttribute('href');
    if ($url !~ /^(\w+:|\/)/) {    # relative path? (No explicit protocol, or absolute)
      if (my $src = pathname_find($url, paths => $paths)) {    # AND if file exists there.
        my $dst = $doc->checkDestination($url);
        pathname_copy($src, $dst);
  } } }
  return; }

# ================================================================================
1;