File: rdf_heuristics.c

package info (click to toggle)
redland 1.0.7-1
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 27,592 kB
  • ctags: 12,328
  • sloc: ansic: 79,017; xml: 25,115; sh: 10,162; yacc: 5,985; lex: 3,682; makefile: 3,260; perl: 2,814; cpp: 59
file content (247 lines) | stat: -rw-r--r-- 5,554 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
/* -*- Mode: c; c-basic-offset: 2 -*-
 *
 * rdf_heuristics.c - Heuristic routines to guess things about RDF
 *
 * $Id: rdf_heuristics.c 12692 2007-09-29 21:03:22Z laalto $
 *
 * Copyright (C) 2000-2006, David Beckett http://purl.org/net/dajobe/
 * Copyright (C) 2000-2004, University of Bristol, UK http://www.bristol.ac.uk/
 * 
 * This package is Free Software and part of Redland http://librdf.org/
 * 
 * It is licensed under the following three licenses as alternatives:
 *   1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
 *   2. GNU General Public License (GPL) V2 or any newer version
 *   3. Apache License, V2.0 or any newer version
 * 
 * You may not use this file except in compliance with at least one of
 * the above three licenses.
 * 
 * See LICENSE.html or LICENSE.txt at the top of this package for the
 * complete terms and further detail along with the license texts for
 * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
 * 
 * 
 */


#ifdef HAVE_CONFIG_H
#include <rdf_config.h>
#endif

#ifdef WIN32
#include <win32_rdf_config.h>
#endif

#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#include <ctype.h>

#ifdef HAVE_STDLIB_H
#include <stdlib.h> /* for strtol */
#endif

#include <redland.h>


#ifndef STANDALONE
/**
 * librdf_heuristic_is_blank_node:
 * @node: string object to guess type
 *
 * Try to guess if an node string is a blank node identifier.
 *
 * The guessing is done by assuming the object is a blank node if it matches
 * ^_: like N-Triples, N3 and related.
 *
 * Return value: non 0 if node is probably a blank node identifier
 **/

int
librdf_heuristic_is_blank_node(const char *node)
{
  return node && (*node == '_' && node[1] == ':'); 
}


/**
 * librdf_heuristic_get_blank_node:
 * @node: string object to guess type
 *
 * Get a blank node identifier from a node string.
 *
 * Picks the blank node identifier out of a string.  Looks for things
 * like _:ABC
 *
 * Return value: the blank node identifer string or NULL if the node does not seem to be a blank node identifier
 **/

const char*
librdf_heuristic_get_blank_node(const char *node)
{
  return librdf_heuristic_is_blank_node(node) ? node+2 : NULL;
}


/**
 * librdf_heuristic_object_is_literal:
 * @object: string object to guess type
 *
 * Try to guess if an object string is a literal or a resource.
 *
 * The guessing is done by assuming the object is a URL if it matches
 *   ^[isalnum()]+:[^isblank()]+$
 *
 * This will be fooled by literals of form 'thing:non-blank-thing' but
 * is good enough.
 * 
 * Return value: non 0 if object is probably a literal
 **/

int
librdf_heuristic_object_is_literal(const char *object) 
{
  int object_is_literal=1; /* assume the worst */

  if(librdf_heuristic_is_blank_node(object))
    return 0;
  
  /* Find first non alphanumeric */
  for(;*object; object++)
    if(!isalnum(*object))
       break;

  /* Better be a ':' */
  if(*object && *object == ':') {
    /* check rest of string has no spaces */
    for(;*object; object++)
      if(isspace(*object))
        break;

    /* reached end, not a literal (by this heuristic) */
    if(!*object)
      object_is_literal=0;
  }
  
  return object_is_literal;
 
}


/**
 * librdf_heuristic_gen_name:
 * @name: the name
 *
 * Generate a new name from an existing name.
 * 
 * Adds an integer or increases the integer at the end of the name
 * in order to generate a new one
 * 
 * Return value: a new name or NULL on failure
 **/
char *
librdf_heuristic_gen_name(const char *name) 
{
  char *new_name;
  const char *p=name;
  size_t len;
  size_t offset;
  long l=-1L;
  
  LIBRDF_ASSERT_OBJECT_POINTER_RETURN_VALUE(name, cstring, NULL);

  /* Move to last character of name */
  len=strlen(name);
  offset=len-1;
  p=name+offset;

  /* Move p to last non number char */
  if(isdigit(*p)) {
    while(p>name && isdigit(*p))
      p--;
    l=strtol(p+1, (char**)NULL, 10);
    offset=p-name;
  }
   
  if(l<0)
    l=0;
  l++;

  /* +1 to required length if no digit was found */
  if(offset == len-1) 
    len++;

  /* +1 to required length if an extra digit is needed -
   * number now ends in 0.  Note l is never 0. */
  if((l % 10) ==0) 
    len++;

  new_name=(char*)LIBRDF_MALLOC(cstring, len+1); /* +1 for \0 */
  strncpy(new_name, name, offset+2);
  sprintf(new_name+offset+1, "%ld", l);
  return new_name;
}

#endif


/* TEST CODE */


#ifdef STANDALONE

/* one more prototype */
int main(int argc, char *argv[]);


int
main(int argc, char *argv[]) 
{
  const char *test_names[]={"test", "abc123", "99997", NULL};
  char *name;
  int n;
  int rc=0;
  
#define NAMES_COUNT 11

  const char *program=librdf_basename((const char*)argv[0]);

  for(n=0; (name=(char*)test_names[n]); n++) {
    int i;
    
#if LIBRDF_DEBUG > 1
    fprintf(stdout, "%s: Generating %d new names from '%s'\n", program, 
            NAMES_COUNT, name);
#endif
  
    for(i=0; i< NAMES_COUNT; i++) {
      char *new_name;
      
#if LIBRDF_DEBUG > 1
      fprintf(stdout, "Generating name from '%s'\n", name);
#endif
      new_name=librdf_heuristic_gen_name(name);
      if(!new_name) {
        fprintf(stdout, "%s: Failed to generate name from '%s'\n", program, name);
        rc=1;
        break;
      }
#if LIBRDF_DEBUG > 1
      fprintf(stdout, "  result was '%s'\n", new_name);
#endif
      
      if(name != test_names[n])
        LIBRDF_FREE(cstring, (char*)name);
      /* copy them over */
      name=new_name;
    }

    if(name != test_names[n])
      LIBRDF_FREE(cstring, name);
  }

  return rc;
}

#endif