File: utils.go

package info (click to toggle)
golang-github-spdx-gordf 0.0~git20221230.b735bd5-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 324 kB
  • sloc: makefile: 4
file content (304 lines) | stat: -rw-r--r-- 10,502 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
package rdfwriter

import (
	"fmt"
	"github.com/spdx/gordf/rdfloader/parser"
	"github.com/spdx/gordf/uri"
	"strings"
)

// returns an adjacency list from a list of triples
// Params:
//   triples: might be unordered
// Output:
//    adjList: adjacency list which maps subject to object for each triple
func GetAdjacencyList(triples []*parser.Triple) (adjList map[*parser.Node][]*parser.Node) {
	// triples are analogous to the edges of a graph.
	// For a (Subject, Predicate, Object) triple,
	// it forms a directed edge from Subject to Object
	// Graphically,
	//                          predicate
	//             (Subject) ---------------> (Object)

	// initialising the adjacency list:
	adjList = make(map[*parser.Node][]*parser.Node)
	for _, triple := range triples {
		// create a new entry in the adjList if the key is not already seen.
		if adjList[triple.Subject] == nil {
			adjList[triple.Subject] = []*parser.Node{}
		}

		// the key is already seen and we can directly append the child
		adjList[triple.Subject] = append(adjList[triple.Subject], triple.Object)

		// ensure that there is a key entry for all the children.
		if adjList[triple.Object] == nil {
			adjList[triple.Object] = []*parser.Node{}
		}
	}
	return adjList
}

// Params:
//   triples: might be unordered
// Output:
//    recoveryDS: subject to triple mapping that will help retrieve the
//                triples after sorting the Subject: Object pairs.
func GetNodeToTriples(triples []*parser.Triple) (recoveryDS map[string][]*parser.Triple) {
	// triples are analogous to the edges of a graph.
	// For a (Subject, Predicate, Object) triple,
	// it forms a directed edge from Subject to Object
	// Graphically,
	//                          predicate
	//             (Subject) ---------------> (Object)

	// initialising the recoveryDS:
	recoveryDS = make(map[string][]*parser.Triple)
	for _, triple := range triples {
		// create a new entry in the recoverDS if the key is not already seen.
		if recoveryDS[triple.Subject.String()] == nil {
			recoveryDS[triple.Subject.String()] = []*parser.Triple{}
		}

		// the key is already seen and we can directly append the child
		recoveryDS[triple.Subject.String()] = append(recoveryDS[triple.Subject.String()], triple)

		// ensure that there is a key entry for all the children.
		if recoveryDS[triple.Object.String()] == nil {
			recoveryDS[triple.Object.String()] = []*parser.Triple{}
		}
	}
	return removeDuplicateTriples(recoveryDS)
}

func getUniqueTriples(triples []*parser.Triple) []*parser.Triple {
	set := map[string]*parser.Triple{}
	for _, triple := range triples {
		set[triple.Hash()] = triple
	}
	var retList []*parser.Triple
	for key := range set {
		retList = append(retList, set[key])
	}
	return retList
}

func removeDuplicateTriples(nodeToTriples map[string][]*parser.Triple) map[string][]*parser.Triple {
	retMap := map[string][]*parser.Triple{}
	for key := range nodeToTriples {
		retMap[key] = getUniqueTriples(nodeToTriples[key])
	}
	return retMap
}

// same as dfs function. Just that after each every neighbor of the node is visited, it is appended in a queue.
// Params:
//     node: Current node to perform dfs on.
//     lastIdx: index where a new node should be added in the resultList
//     visited: if visited[node] is true, we've already serviced the node before.
//     resultList: list of all the nodes after topological sorting.
func topologicalSortHelper(node *parser.Node, lastIndex *int, adjList map[*parser.Node][]*parser.Node, visited *map[*parser.Node]bool, resultList *[]*parser.Node) (err error) {
	if node == nil {
		return
	}

	// checking if the node exist in the graph
	_, exists := adjList[node]
	if !exists {
		return fmt.Errorf("node%v doesn't exist in the graph", *node)
	}
	if (*visited)[node] {
		// this node is already visited.
		// the program enters here when the graph has at least one cycle..
		return
	}

	// marking current node as visited
	(*visited)[node] = true

	// visiting all the neighbors of the node and it's children recursively
	for _, neighbor := range adjList[node] {
		// recurse neighbor only if and only if it is not visited yet.
		if !(*visited)[neighbor] {
			err = topologicalSortHelper(neighbor, lastIndex, adjList, visited, resultList)
			if err != nil {
				return err
			}
		}
	}

	if *lastIndex >= len(adjList) {
		// there is at least one node which is a neighbor of some node
		// whose entry doesn't exist in the adjList
		return fmt.Errorf("found more nodes than the number of keys in the adjacency list")
	}

	// appending from left to right to get a reverse sorted output
	(*resultList)[*lastIndex] = node
	*lastIndex++
	return nil
}

// A wrapper function to initialize the data structures required by the
// topological sort algorithm. It provides an interface to directly get the
// sorted triples without knowing the internal variables required for sorting.
// Note: it sorts in reverse order.
// Params:
//   adjList   : adjacency list: a map with key as the node and value as a
//  			 list of it's neighbor nodes.
// Assumes: all the nodes in the graph are present in the adjList keys.
func topologicalSort(adjList map[*parser.Node][]*parser.Node) ([]*parser.Node, error) {
	// variable declaration
	numberNodes := len(adjList)
	resultList := make([]*parser.Node, numberNodes) //  this will be returned
	visited := make(map[*parser.Node]bool, numberNodes)
	lastIndex := 0

	// iterate through nodes and perform a dfs starting from that node.
	for node := range adjList {
		if !visited[node] {
			err := topologicalSortHelper(node, &lastIndex, adjList, &visited, &resultList)
			if err != nil {
				return resultList, err
			}
		}
	}
	return resultList, nil
}

// Interface for user to provide a list of triples and get the
// sorted one as the output
func TopologicalSortTriples(triples []*parser.Triple) (sortedTriples []*parser.Triple, err error) {
	adjList := GetAdjacencyList(triples)
	recoveryDS := GetNodeToTriples(triples)
	sortedNodes, err := topologicalSort(adjList)
	if err != nil {
		return sortedTriples, fmt.Errorf("error sorting the triples: %v", err)
	}

	// initialized a slice
	sortedTriples = []*parser.Triple{}

	for _, subjectNode := range sortedNodes {
		// append all the triples associated with the subjectNode
		for _, triple := range recoveryDS[subjectNode.String()] {
			sortedTriples = append(sortedTriples, triple)
		}
	}
	return sortedTriples, nil
}

func DisjointSet(triples []*parser.Triple) map[*parser.Node]*parser.Node {
	nodeStringMap := map[string]*parser.Node{}
	parentString := map[string]*parser.Node{}
	for _, triple := range triples {
		parentString[triple.Object.String()] = triple.Subject
		nodeStringMap[triple.Object.String()] = triple.Object
		if _, exists := parentString[triple.Subject.String()]; !exists {
			parentString[triple.Subject.String()] = nil
			nodeStringMap[triple.Subject.String()] = triple.Subject
		}
	}

	parent := make(map[*parser.Node]*parser.Node)
	for keyString := range parentString {
		node := nodeStringMap[keyString]
		parent[node] = parentString[keyString]
	}
	return parent
}

// a schemaDefinition is a dictionary which maps the abbreviation defined in the root tag.
// for example: if the root tag is =>
//      <rdf:RDF
//		    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"/>
// the schemaDefinition will contain:
//    {"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#"}
// this function will output a reverse map that is:
//    {"http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf"}
func invertSchemaDefinition(schemaDefinition map[string]uri.URIRef) map[string]string {
	invertedMap := make(map[string]string)
	for abbreviation := range schemaDefinition {
		_uri := schemaDefinition[abbreviation]
		invertedMap[strings.Trim(_uri.String(), "#")] = abbreviation
	}
	return invertedMap
}

// return true if the target is in the given list
func any(target string, list []string) bool {
	for _, s := range list {
		if s == target {
			return true
		}
	}
	return false
}

// from the inverted schema definition, returns the name of the prefix used for
// the rdf name space. Return defaults to "rdf"
func getRDFNSAbbreviation(invSchemaDefinition map[string]string) string {
	rdfNSAbbrev := "rdf"
	if abbrev, exists := invSchemaDefinition[parser.RDFNS]; exists {
		rdfNSAbbrev = abbrev
	}
	return rdfNSAbbrev
}

// given an expanded uri, returns abbreviated form for the same.
// For example:
// http://www.w3.org/1999/02/22-rdf-syntax-ns#Description will be abbreviated to rdf:Description
func shortenURI(uri string, invSchemaDefinition map[string]string) (string, error) {
	// Logic: Every uri with a fragment created by the uri.URIRef has if of
	// type baseURI#fragment. This function splits the uri by # character and
	// replaces the baseURI with the abbreviated form from the inverseSchemaDefinition

	splitIndex := strings.LastIndex(uri, "#")
	if splitIndex == -1 {
		return "", fmt.Errorf("uri doesn't have two parts of type schemaName:tagName. URI: %s", uri)
	}

	baseURI := strings.Trim(uri[:splitIndex], "#")
	fragment := strings.TrimSuffix(uri[splitIndex+1:], "#") // removing the trailing #.
	fragment = strings.TrimSpace(fragment)
	if len(fragment) == 0 {
		return "", fmt.Errorf(`fragment "%v" doesn't exist`, fragment)
	}
	if abbrev, exists := invSchemaDefinition[baseURI]; exists {
		if abbrev == "" {
			return fragment, nil
		}
		return fmt.Sprintf("%s:%s", abbrev, fragment), nil
	}
	return "", fmt.Errorf("declaration of URI(%s) not found in the schemaDefinition", baseURI)
}

// from a given adjacency list, return a list of root-nodes which will be used
// to generate string forms of the nodes to be written.
func GetRootNodes(triples []*parser.Triple) (rootNodes []*parser.Node) {

	// In a disjoint set, indices with root nodes will point to nil
	// that means, if disjointSet[node] is nil, the node has no parent
	// and it is one of the root nodes.
	var parent map[*parser.Node]*parser.Node
	parent = DisjointSet(triples)

	for node := range parent {
		if parent[node] == nil {
			rootNodes = append(rootNodes, node)
		}
	}
	return rootNodes
}

// returns the triples that are not associated with tags of schemaName "rdf".
func getRestTriples(triples []*parser.Triple) (restTriples []*parser.Triple) {
	rdfTypeURI := parser.RDFNS + "type"
	rdfNodeIDURI := parser.RDFNS + "nodeID"
	for _, triple := range triples {
		if !any(triple.Predicate.ID, []string{rdfNodeIDURI, rdfTypeURI}) {
			restTriples = append(restTriples, triple)
		}
	}
	return restTriples
}