1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
|
/*
* doi.c
*
* doi_to_url()
* Handle outputing DOI as a URL (Endnote and RIS formats)
* 1) Append http://dx.doi.org as necessary
* 2) Check for overlap with pre-existing URL for the DOI
*
* is_doi()
* Check for DOI buried in another field.
*
* Copyright (c) Chris Putnam 2008-2009
*
* Source code released under the GPL
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "newstr.h"
#include "fields.h"
static void
construct_url( char *prefix, newstr *id, newstr *id_url )
{
if ( !strncasecmp( id->data, "http:", 5 ) )
newstr_newstrcpy( id_url, id );
else {
newstr_strcpy( id_url, prefix );
if ( id->data[0]!='/' ) newstr_addchar( id_url, '/' );
newstr_newstrcat( id_url, id );
}
}
static int
url_exists( fields *info, char *urltag, newstr *doi_url )
{
int i, found = 0;
if ( urltag ) {
for ( i=0; i<info->nfields && !found; ++i ) {
if ( strcmp( info->tag[i].data, urltag ) )
continue;
if ( !strcmp( info->data[i].data, doi_url->data ) )
found=1;
}
}
return found;
}
void
doi_to_url( fields *info, int n, char *urltag, newstr *doi_url )
{
newstr_empty( doi_url );
construct_url( "http://dx.doi.org", &(info->data[n]), doi_url );
if ( url_exists( info, urltag, doi_url ) )
newstr_empty( doi_url );
}
void
pmid_to_url( fields *info, int n, char *urltag, newstr *pmid_url )
{
newstr_empty( pmid_url );
construct_url( "http://www.ncbi.nlm.nih.gov/pubmed", &(info->data[n]),
pmid_url );
if ( url_exists( info, urltag, pmid_url ) )
newstr_empty( pmid_url );
}
void
arxiv_to_url( fields *info, int n, char *urltag, newstr *arxiv_url )
{
newstr_empty( arxiv_url );
construct_url( "http://arxiv.org/abs", &(info->data[n]), arxiv_url );
if ( url_exists( info, urltag, arxiv_url ) )
newstr_empty( arxiv_url );
}
/* Rules for the pattern:
* '#' = number
* isalpha() = match precisely (matchcase==1) or match regardless of case
* (matchcase==0)
* all others must match precisely
*/
static int
string_pattern( char *s, char *pattern, int matchcase )
{
int patlen, match, i;
patlen = strlen( pattern );
if ( strlen( s ) < patlen ) return 0; /* too short */
for ( i=0; i<patlen; ++i ) {
match = 0;
if ( pattern[i]=='#' ) {
if ( isdigit( s[i] ) ) match = 1;
} else if ( !matchcase && isalpha( pattern[i] ) ) {
if ( tolower(pattern[i])==tolower(s[i])) match = 1;
} else {
if ( pattern[i] == s[i] ) match = 1;
}
if ( !match ) return 0;
}
return 1;
}
/* science direct is now doing "M3 - doi: DOI: 10.xxxx/xxxxx" */
int
is_doi( char *s )
{
if ( string_pattern( s, "##.####/", 0 ) ) return 0;
if ( string_pattern( s, "doi:##.####/", 0 ) ) return 4;
if ( string_pattern( s, "doi: ##.####/", 0 ) ) return 5;
if ( string_pattern( s, "doi: DOI: ##.####/", 0 ) ) return 10;
return -1;
}
|