File: xml_encoding.c

package info (click to toggle)
referencer 1.2.2-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 4,028 kB
  • ctags: 2,265
  • sloc: ansic: 32,973; cpp: 12,149; python: 1,314; xml: 1,258; sh: 1,154; makefile: 252
file content (61 lines) | stat: -rw-r--r-- 1,360 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
/* xml_getencoding.c
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "newstr.h"
#include "newstr_conv.h"
#include "xml.h"

static int
xml_getencodingr( xml *node )
{
	newstr *s;
	int n = CHARSET_UNKNOWN, m;
	if ( xml_tagexact( node, "xml" ) ) {
		s = xml_getattrib( node, "encoding" );
		if ( s && s->data ) {
			if ( !strcasecmp( s->data, "UTF-8" ) ) 
				n = CHARSET_UNICODE;
			else n = get_charset( s->data );
			if ( n==CHARSET_UNKNOWN ) {
				fprintf( stderr, "Warning: did not recognize "
					"encoding '%s'\n", s->data );
			}
		}
	}
        if ( node->down ) {
		m = xml_getencodingr( node->down );
		if ( m!=CHARSET_UNKNOWN ) n = m;
	}
        if ( node->next ) {
		m = xml_getencodingr( node->next );
		if ( m!=CHARSET_UNKNOWN ) n = m;
	}
	return n;
}

int
xml_getencoding( newstr *s )
{
	newstr descriptor;
	xml descriptxml;
	int file_charset = CHARSET_UNKNOWN;
	char *p, *q;
	p = strstr( s->data, "<?xml" );
	if ( !p ) p = strstr( s->data, "<?XML" );
	if ( p ) {
		q = strstr( p, "?>" );
		if ( q ) {
			newstr_init( &descriptor );
			newstr_segcpy( &descriptor, p, q+2 );
			xml_init( &descriptxml );
			xml_tree( descriptor.data, &descriptxml );
			file_charset = xml_getencodingr( &descriptxml );
			xml_free( &descriptxml );
			newstr_free( &descriptor );
			newstr_segdel( s, p, q+2 );
		}
	}
	return file_charset;
}