File: jstrans.c

package info (click to toggle)
pavuk 0.9.35-2.1
  • links: PTS
  • area: main
  • in suites: lenny, squeeze
  • size: 4,720 kB
  • ctags: 3,824
  • sloc: ansic: 51,779; sh: 3,468; makefile: 363
file content (94 lines) | stat: -rw-r--r-- 2,044 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
/***************************************************************************/
/*    This code is part of WWW grabber called pavuk                        */
/*    Copyright (c) 1997 - 2001 Stefan Ondrejicka                          */
/*    Distributed under GPL 2 or later                                     */
/***************************************************************************/

#include <string.h>
#include <stdlib.h>

#include "config.h"
#include "jstrans.h"
#include "tools.h"

#ifdef HAVE_REGEX

void js_transform_free(js_transform_t *jt)
{
  re_free(jt->re);
  _free(jt->transform);
  _free(jt->tag);
  _free(jt->attrib);
  _free(jt);
}

js_transform_t *js_transform_new(const char *pattern, const char *transform,
const char *tag, const char *attrib, int type)
{
  js_transform_t *rv;
  re_entry *re;

  if(!pattern || !transform || !tag || !attrib || !*pattern || !*transform)
    return NULL;

  re = re_make(pattern);

  if(!re)
    return NULL;

  rv = _malloc(sizeof(js_transform_t));
  rv->type = type;
  rv->re = re;
  rv->transform = tl_strdup(transform);
  rv->tag = tl_strdup(tag);
  rv->attrib = tl_strdup(attrib);

  return rv;
}

int js_transform_match_tag(js_transform_t *jt, const char *tag)
{
  int l;

  if(jt->tag[0] == '*')
    return TRUE;

  l = strcspn(tag + 1, " \t\r\n>");

  if(!strncasecmp(tag + 1, jt->tag, l) && (l == strlen(jt->tag)))
    return TRUE;

  return FALSE;
}

char *js_transform_apply(js_transform_t *jt, const char *attr, int nsub,
int *subs)
{
  char *rv;
  int n, l;

  l = strcspn(jt->transform, "$");
  rv = tl_strndup(jt->transform, l);

  do
  {
    if(jt->transform[l] == '$')
    {
      n = atoi(jt->transform + l + 1);
      if(n <= nsub)
      {
        rv = tl_str_nappend(rv, attr + subs[2 * n],
          subs[2 * n + 1] - subs[2 * n]);
      }
      l += 1 + strspn(jt->transform + l + 1, "0123456789");
    }
    n = strcspn(jt->transform + l, "$");
    rv = tl_str_nappend(rv, jt->transform + l, n);
    l += n;
  }
  while(jt->transform[l]);

  return rv;
}

#endif