1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
|
/* -*- Mode: c; c-basic-offset: 2 -*-
*
* raptor_www_libxml.c - Raptor WWW retrieval via libxml2
*
* Copyright (C) 2003-2008, David Beckett http://www.dajobe.org/
* Copyright (C) 2003-2004, University of Bristol, UK http://www.bristol.ac.uk/
*
* This package is Free Software and part of Redland http://librdf.org/
*
* It is licensed under the following three licenses as alternatives:
* 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
* 2. GNU General Public License (GPL) V2 or any newer version
* 3. Apache License, V2.0 or any newer version
*
* You may not use this file except in compliance with at least one of
* the above three licenses.
*
* See LICENSE.html or LICENSE.txt at the top of this package for the
* complete terms and further detail along with the license texts for
* the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
*
*
*/
#ifdef HAVE_CONFIG_H
#include <raptor_config.h>
#endif
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
/* Raptor includes */
#include "raptor2.h"
#include "raptor_internal.h"
#ifdef RAPTOR_WWW_LIBXML
void
raptor_www_libxml_init(raptor_www *www)
{
xmlNanoHTTPInit();
www->ctxt = NULL;
}
void
raptor_www_libxml_free(raptor_www *www)
{
xmlNanoHTTPCleanup();
}
int
raptor_www_libxml_fetch(raptor_www *www)
{
char* headers = NULL;
if(www->proxy)
xmlNanoHTTPScanProxy(www->proxy);
if(www->http_accept || www->user_agent) {
size_t accept_len = 0;
size_t ua_len = 0;
size_t cc_len = 0;
size_t len = 0;
char *p;
if(www->http_accept) {
accept_len = strlen(www->http_accept);
len += accept_len+2; /* \r\n */
}
if(www->user_agent) {
ua_len = strlen(www->user_agent);
len += 12+ua_len+2; /* strlen("User-Agent: ") + \r\n */
}
if(www->cache_control) {
cc_len = strlen(www->cache_control);
len += cc_len+2; /* \r\n */
}
headers = RAPTOR_MALLOC(char*, len + 1);
if(!headers)
return 1;
p = headers;
if(www->http_accept) {
memcpy(p, www->http_accept, accept_len);
p+= accept_len;
*p++='\r';
*p++='\n';
}
if(www->user_agent) {
memcpy(p, "User-Agent: ", 12);
p += 12;
memcpy(p, www->user_agent, ua_len);
p+= ua_len;
*p++='\r';
*p++='\n';
}
if(www->cache_control) {
memcpy(p, www->cache_control, cc_len);
p+= cc_len;
*p++='\r';
*p++='\n';
}
*p='\0';
}
www->ctxt = xmlNanoHTTPMethod((const char*)raptor_uri_as_string(www->uri),
NULL, /* HTTP method (default GET) */
NULL, /* input string */
&www->type,
headers,
0); /* input length - ilen */
if(headers)
RAPTOR_FREE(char*, headers);
if(!www->ctxt)
return 1;
if(www->type) {
if(www->content_type) {
www->content_type(www, www->content_type_userdata, www->type);
if(www->failed) {
xmlNanoHTTPClose(www->ctxt);
return 1;
}
}
xmlFree(www->type);
www->type = NULL;
}
www->status_code = xmlNanoHTTPReturnCode(www->ctxt);
while(1) {
int len = xmlNanoHTTPRead(www->ctxt, www->buffer, RAPTOR_WWW_BUFFER_SIZE);
if(len < 0)
break;
www->total_bytes += len;
if(www->write_bytes)
www->write_bytes(www, www->write_bytes_userdata, www->buffer, len, 1);
if(len < RAPTOR_WWW_BUFFER_SIZE || www->failed)
break;
}
xmlNanoHTTPClose(www->ctxt);
return www->failed;
}
#endif /* #ifdef RAPTOR_WWW_LIBXML*/
|