File: http_get2.c

package info (click to toggle)
wget2 2.2.0%2Bds-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 22,468 kB
  • sloc: ansic: 121,166; sh: 11,559; makefile: 878; xml: 182; sed: 16
file content (147 lines) | stat: -rw-r--r-- 4,742 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
/*
 * Copyright (c) 2013 Tim Ruehsen
 * Copyright (c) 2015-2024 Free Software Foundation, Inc.
 *
 * This file is part of libwget.
 *
 * Libwget is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Libwget is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with libwget.  If not, see <https://www.gnu.org/licenses/>.
 *
 *
 * Example for retrieving and parsing an HTTP URI
 *
 * Changelog
 * 16.01.2013  Tim Ruehsen  created
 *
 * Simple demonstration how to download an URI.
 *
 */

#include <wget.h>

#define COOKIE_SUPPORT

int main(void)
{
	wget_iri *uri;
	wget_http_connection *conn = NULL;
	wget_http_request *req;
	wget_cookie_db *cookies;

	// We want the libwget debug messages be printed to STDERR.
	// From here on, we can call wget_debug_printf, etc.
	wget_logger_set_stream(wget_get_logger(WGET_LOGGER_DEBUG), stderr);

	// We want the libwget error messages be printed to STDERR.
	// From here on, we can call wget_error_printf, etc.
	wget_logger_set_stream(wget_get_logger(WGET_LOGGER_ERROR), stderr);

	// We want the libwget info messages be printed to STDOUT.
	// From here on, we can call wget_info_printf, etc.
	wget_logger_set_stream(wget_get_logger(WGET_LOGGER_INFO), stdout);

	wget_net_init(); // needed for Windows Sockets

	// 1. parse the URL into a URI
	//    if you want use a non-ascii (international) domain, the second
	//    parameter should be the character encoding of this file (e.g. "iso-8859-1")
	uri = wget_iri_parse("http://www.example.org", NULL);

	// 2. create a HTTP/1.1 GET request.
	//    the only default header is 'Host: www.example.com' (taken from uri)
	req = wget_http_create_request(uri, "GET");

	// 3. add HTTP headers as you wish
	wget_http_add_header(req, "User-Agent", "TheUserAgent/0.5");

	// libwget also supports gzip'ed or deflated response bodies
	wget_http_add_header(req, "Accept-Encoding", "gzip, deflate");
	wget_http_add_header(req, "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
	wget_http_add_header(req, "Accept-Language", "en-us,en;q=0.5");

	wget_http_request_set_int(req, WGET_HTTP_RESPONSE_KEEPHEADER, 1);

	// use keep-alive if you want to send more requests on the same connection
	// http_add_header(req, "Connection", "keep-alive");

	// you need cookie support ? just #define COOKIE_SUPPORT or remove the #ifdef/#endif
	// 'keep_session_cookies' should be 0 or 1
#ifdef COOKIE_SUPPORT
	const char *cookie_string;

	// init cookie database
	cookies = wget_cookie_db_init(NULL);
	wget_cookie_set_keep_session_cookies(cookies, 1);

	// load public suffixes for cookie validation from file (instead of using internal PSL data)
	// just works if Wget has been compiled with libpsl
	wget_cookie_db_load_psl(cookies, "public_suffixes.txt");

	// load cookie-store
	wget_cookie_db_load(cookies, "cookies.txt");

	// enrich the HTTP request with the uri-related cookies we have
	if ((cookie_string = wget_cookie_create_request_header(cookies, uri))) {
		wget_http_add_header(req, "Cookie", cookie_string);
		wget_xfree(cookie_string);
	}
#endif

	// 4. establish connection to the host/port given by uri
	// well, we could have done this directly after wget_iri_parse(), since
	// http_open() works semi-async and returns immediately after domain name lookup.
	wget_http_open(&conn, uri);

	if (conn) {
		wget_http_response *resp;

		if (wget_http_send_request(conn, req) == 0) {
			resp = wget_http_get_response(conn);

			if (!resp)
				goto out;

			// server doesn't support or want keep-alive
			if (!resp->keep_alive)
				wget_http_close(&conn);

#ifdef COOKIE_SUPPORT
			// check and normalization of received cookies
			wget_cookie_normalize_cookies(uri, resp->cookies);

			// put cookies into cookie-store (also known as cookie-jar)
			wget_cookie_store_cookies(cookies, resp->cookies);

			// save cookie-store to file
			wget_cookie_db_save(cookies, "cookies.txt");
#endif

			// let's assume the body isn't binary (doesn't contain \0)
			wget_info_printf("%s%s\n", resp->header->data, resp->body->data);

			wget_http_free_response(&resp);
		}
	}

out:
#ifdef COOKIE_SUPPORT
	wget_cookie_db_free(&cookies);
#endif
	wget_http_close(&conn);
	wget_http_free_request(&req);
	wget_iri_free(&uri);

	wget_net_deinit(); // needed for Windows Sockets

	return 0;
}