File: doi.c

package info (click to toggle)
bibutils 4.8-1
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 3,512 kB
  • ctags: 1,340
  • sloc: ansic: 72,394; csh: 216; makefile: 117
file content (113 lines) | stat: -rw-r--r-- 2,826 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
/*
 * doi.c
 *
 * doi_to_url()
 * Handle outputing DOI as a URL (Endnote and RIS formats)
 *     1) Append http://dx.doi.org as necessary
 *     2) Check for overlap with pre-existing URL for the DOI
 *
 * is_doi()
 * Check for DOI buried in another field.
 *
 * Copyright (c) Chris Putnam 2008-2009
 *
 * Source code released under the GPL
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "newstr.h"
#include "fields.h"

static void
construct_url( char *prefix, newstr *id, newstr *id_url )
{
	if ( !strncasecmp( id->data, "http:", 5 ) )
		newstr_newstrcpy( id_url, id );
	else {
		newstr_strcpy( id_url, prefix );
		if ( id->data[0]!='/' ) newstr_addchar( id_url, '/' );
		newstr_newstrcat( id_url, id );
	}
}

static int
url_exists( fields *info, char *urltag, newstr *doi_url )
{
	int i, found = 0;
	if ( urltag ) {
		for ( i=0; i<info->nfields && !found; ++i ) {
			if ( strcmp( info->tag[i].data, urltag ) )
				continue;
			if ( !strcmp( info->data[i].data, doi_url->data ) )
				found=1;
		}
	}
	return found;
}

void
doi_to_url( fields *info, int n, char *urltag, newstr *doi_url )
{
	newstr_empty( doi_url );
	construct_url( "http://dx.doi.org", &(info->data[n]), doi_url );
	if ( url_exists( info, urltag, doi_url ) )
		newstr_empty( doi_url );
}

void
pmid_to_url( fields *info, int n, char *urltag, newstr *pmid_url )
{
	newstr_empty( pmid_url );
	construct_url( "http://www.ncbi.nlm.nih.gov/pubmed", &(info->data[n]),
			pmid_url );
	if ( url_exists( info, urltag, pmid_url ) )
		newstr_empty( pmid_url );
}

void
arxiv_to_url( fields *info, int n, char *urltag, newstr *arxiv_url )
{
	newstr_empty( arxiv_url );
	construct_url( "http://arxiv.org/abs", &(info->data[n]), arxiv_url );
	if ( url_exists( info, urltag, arxiv_url ) )
		newstr_empty( arxiv_url );
}

/* Rules for the pattern:
 *   '#' = number
 *   isalpha() = match precisely (matchcase==1) or match regardless of case
 *   	(matchcase==0)
 *   all others must match precisely
 */
static int
string_pattern( char *s, char *pattern, int matchcase )
{
	int patlen, match, i;
	patlen = strlen( pattern );
	if ( strlen( s ) < patlen ) return 0; /* too short */
	for ( i=0; i<patlen; ++i ) {
		match = 0;
		if ( pattern[i]=='#' ) {
			if ( isdigit( s[i] ) ) match = 1;
		} else if ( !matchcase && isalpha( pattern[i] ) ) {
			if ( tolower(pattern[i])==tolower(s[i])) match = 1;
		} else {
			if ( pattern[i] == s[i] ) match = 1;
		}
		if ( !match ) return 0;
	}
	return 1;
}

/* science direct is now doing "M3  - doi: DOI: 10.xxxx/xxxxx" */
int
is_doi( char *s )
{
	if ( string_pattern( s, "##.####/", 0 ) ) return 0;
	if ( string_pattern( s, "doi:##.####/", 0 ) ) return 4;
	if ( string_pattern( s, "doi: ##.####/", 0 ) ) return 5;
	if ( string_pattern( s, "doi: DOI: ##.####/", 0 ) ) return 10;
	return -1;
}