File: SQL

package info (click to toggle)
htcheck 1%3A1.2.3-1
  • links: PTS
  • area: main
  • in suites: sarge
  • size: 3,836 kB
  • ctags: 2,885
  • sloc: cpp: 14,380; sh: 7,363; php: 3,089; ansic: 1,068; makefile: 303
file content (130 lines) | stat: -rw-r--r-- 5,200 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
Common SQL statements for ht://Check
------------------------------------

Copyright (c) 1999-2004 Comune di Prato - Prato - Italy
Some Portions Copyright (c) 1995-2003 The ht://Dig Group <www.htdig.org>
Author: Gabriele Bartolini - Prato - Italy <angusgb@users.sourceforge.net>
$Id: SQL,v 1.11 2003/12/30 09:38:29 angusgb Exp $

ht://Check is distributed under the GNU General Public License (GPL).
See the COPYING file for license information.

ht://Check is a world-wide-web utility for an intranet or small internet.

-------------------------------------------------------------------
Retrieve all the anchors (A name="anchorname"):

 SELECT HtmlAttribute.IDUrl, HtmlAttribute.Content FROM HtmlAttribute,
  HtmlStatement WHERE  HtmlStatement.IDUrl=HtmlAttribute.IDUrl AND 
  HtmlAttribute.TagPosition=HtmlStatement.TagPosition AND
  HtmlAttribute.Attribute='name' AND HtmlStatement.Tag='a';

Retrieve all the anchor found and not after LinkResult has been set by
 the program with the Scheduler::SetLinkResults() method:
 SELECT DISTINCT Link.IDUrlSrc, Link.IDUrlDest, Link.TagPosition,
  Link.AttrPosition, Link.Anchor, TmpAnchors.IDUrl
  FROM Link LEFT JOIN TmpAnchors ON
  Link.IDUrlDest=TmpAnchors.IDUrl
  AND TmpAnchors.Anchor=Link.Anchor
  WHERE Link.Anchor != '' AND Link.LinkResult='OK'
  
Retrieve all the linked URL:

 SELECT Link.*, Url.StatusCode
  FROM Link LEFT JOIN Url on Link.IDUrlDest=Url.IDUrl
  WHERE Link.LinkResult='NotChecked' 
  
Retrieve all of the groups of LinkResult with the retrieval Status:
 SELECT Link.LinkResult, Schedule.Status, COUNT(*)
  FROM Link, Schedule
  WHERE Link.IDUrlDest=Schedule.IDUrl
  GROUP BY Link.LinkResult, Schedule.Status

Retrieve all of the groups of LinkResult:
 SELECT Link.LinkResult, COUNT(*)
  FROM Link GROUP BY Link.LinkResult

Retrieve all the broken links (1st version - Without redirected URLs, but
 with HtmlAttribute too):
 SELECT UrlSrc.IDUrl as IDUrlSrc, UrlDest.IDUrl as IDUrlDest,
  UrlSrc.Url as UrlSrc, UrlDest.Url as UrlDest,
  UrlDest.StatusCode, UrlDest.ReasonPhrase,
  UrlDest.ConnStatus, Link.LinkType,
  HtmlStatement.Statement, HtmlAttribute.Attribute, HtmlAttribute.Content
  FROM Url UrlDest, Url UrlSrc, Link, HtmlStatement, HtmlAttribute
  WHERE Link.LinkResult = 'Broken'
  AND HtmlStatement.IDUrl = Link.IDUrlSrc
  AND HtmlStatement.TagPosition = Link.TagPosition
  AND HtmlAttribute.IDUrl = Link.IDUrlSrc
  AND HtmlAttribute.TagPosition = Link.TagPosition
  AND HtmlAttribute.AttrPosition = Link.AttrPosition
  AND UrlSrc.IDUrl = Link.IDUrlSrc
  AND UrlDest.IDUrl = Link.IDUrlDest
  ORDER BY UrlSrc, UrlDest, Link.TagPosition, Link.AttrPosition

Retrieve all the broken links (2nd version with redirected Urls but withou
 HtmlAttribute table):
 SELECT UrlSrc.IDUrl as IDUrlSrc, UrlDest.IDUrl as IDUrlDest,
  UrlSrc.Url as UrlSrc, UrlDest.Url as UrlDest,
  UrlDest.StatusCode, UrlDest.ReasonPhrase,
  UrlDest.ConnStatus, Link.LinkType,
  HtmlStatement.Statement
  FROM Url UrlDest, Url UrlSrc, Link
  LEFT JOIN HtmlStatement
  ON HtmlStatement.IDUrl = Link.IDUrlSrc
  AND HtmlStatement.TagPosition = Link.TagPosition
  WHERE Link.LinkResult = 'Broken'
  AND UrlSrc.IDUrl = Link.IDUrlSrc
  AND UrlDest.IDUrl = Link.IDUrlDest
  ORDER BY UrlSrc, UrlDest, Link.TagPosition, Link.AttrPosition

Retrieve all the anchors not found:
 SELECT UrlSrc.IDUrl as IDUrlSrc, UrlDest.IDUrl as IDUrlDest,
  UrlSrc.Url as UrlSrc, UrlDest.Url as UrlDest,
  Link.LinkType, Link.Anchor, HtmlStatement.Statement
  FROM Url UrlDest, Url UrlSrc, Link
  LEFT JOIN HtmlStatement
  ON HtmlStatement.IDUrl = Link.IDUrlSrc
  AND HtmlStatement.TagPosition = Link.TagPosition
  WHERE Link.LinkResult = 'AnchorNotFound'
  AND UrlSrc.IDUrl = Link.IDUrlSrc
  AND UrlDest.IDUrl = Link.IDUrlDest
  ORDER BY UrlSrc, UrlDest, Link.TagPosition, Link.AttrPosition

Retrieve all the Urls linked to from inside a URL pattern outside
  SELECT Source.Url, Dest.Url, Statement
  FROM Url Source, Schedule Dest, Link
  LEFT JOIN HtmlStatement
  ON HtmlStatement.IDUrl = Link.IDUrlSrc
  AND HtmlStatement.TagPosition = Link.TagPosition
  WHERE Dest.IDUrl = Link.IDUrlDest
  AND Source.IDUrl = Link.IDUrlSrc
  AND Source.Url like 'pattern%'
  AND Dest.Url not like 'pattern%'
  
Retrieve filenames lengths
  SELECT Url, instr(reverse(Url),'/') -1 as FileLength,
  instr(reverse(Url),'.') -1 as ExtensionLength,
  instr(reverse(Url),'/') - instr(reverse(Url),'.') as NameLength
  FROM Url

Retrieve all the documents that don't respect a filename lenght
 of 8 chars and an extension bigger than 3 chars (not query string):
  SELECT Url
  FROM Url
  WHERE Url NOT REGEXP '^.*/(([A-Za-z0-9_-]{1,8}\.[A-Za-z]{1,3})?|.*\\?.+)$' 

Retrieve all the image reference tags which both found and not found
 image alternative files.
SELECT Url.Url, HtmlStatement.Statement, HtmlAttribute.Attribute
 FROM HtmlStatement, Url
 LEFT JOIN HtmlAttribute ON
 HtmlStatement.IDUrl = HtmlAttribute.IDUrl
  AND HtmlStatement.TagPosition = HtmlAttribute.TagPosition
  AND HtmlAttribute.Attribute = 'ALT'
 WHERE Tag='IMG'
  AND Url.IDUrl = HtmlStatement.IDUrl
 ORDER BY Attribute, Url, HtmlStatement.TagPosition


-------------------------------------------------------------------