1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
|
package rdfwriter
import (
"fmt"
"github.com/spdx/gordf/rdfloader/parser"
"github.com/spdx/gordf/uri"
"strings"
)
// returns an adjacency list from a list of triples
// Params:
// triples: might be unordered
// Output:
// adjList: adjacency list which maps subject to object for each triple
func GetAdjacencyList(triples []*parser.Triple) (adjList map[*parser.Node][]*parser.Node) {
// triples are analogous to the edges of a graph.
// For a (Subject, Predicate, Object) triple,
// it forms a directed edge from Subject to Object
// Graphically,
// predicate
// (Subject) ---------------> (Object)
// initialising the adjacency list:
adjList = make(map[*parser.Node][]*parser.Node)
for _, triple := range triples {
// create a new entry in the adjList if the key is not already seen.
if adjList[triple.Subject] == nil {
adjList[triple.Subject] = []*parser.Node{}
}
// the key is already seen and we can directly append the child
adjList[triple.Subject] = append(adjList[triple.Subject], triple.Object)
// ensure that there is a key entry for all the children.
if adjList[triple.Object] == nil {
adjList[triple.Object] = []*parser.Node{}
}
}
return adjList
}
// Params:
// triples: might be unordered
// Output:
// recoveryDS: subject to triple mapping that will help retrieve the
// triples after sorting the Subject: Object pairs.
func GetNodeToTriples(triples []*parser.Triple) (recoveryDS map[string][]*parser.Triple) {
// triples are analogous to the edges of a graph.
// For a (Subject, Predicate, Object) triple,
// it forms a directed edge from Subject to Object
// Graphically,
// predicate
// (Subject) ---------------> (Object)
// initialising the recoveryDS:
recoveryDS = make(map[string][]*parser.Triple)
for _, triple := range triples {
// create a new entry in the recoverDS if the key is not already seen.
if recoveryDS[triple.Subject.String()] == nil {
recoveryDS[triple.Subject.String()] = []*parser.Triple{}
}
// the key is already seen and we can directly append the child
recoveryDS[triple.Subject.String()] = append(recoveryDS[triple.Subject.String()], triple)
// ensure that there is a key entry for all the children.
if recoveryDS[triple.Object.String()] == nil {
recoveryDS[triple.Object.String()] = []*parser.Triple{}
}
}
return removeDuplicateTriples(recoveryDS)
}
func getUniqueTriples(triples []*parser.Triple) []*parser.Triple {
set := map[string]*parser.Triple{}
for _, triple := range triples {
set[triple.Hash()] = triple
}
var retList []*parser.Triple
for key := range set {
retList = append(retList, set[key])
}
return retList
}
func removeDuplicateTriples(nodeToTriples map[string][]*parser.Triple) map[string][]*parser.Triple {
retMap := map[string][]*parser.Triple{}
for key := range nodeToTriples {
retMap[key] = getUniqueTriples(nodeToTriples[key])
}
return retMap
}
// same as dfs function. Just that after each every neighbor of the node is visited, it is appended in a queue.
// Params:
// node: Current node to perform dfs on.
// lastIdx: index where a new node should be added in the resultList
// visited: if visited[node] is true, we've already serviced the node before.
// resultList: list of all the nodes after topological sorting.
func topologicalSortHelper(node *parser.Node, lastIndex *int, adjList map[*parser.Node][]*parser.Node, visited *map[*parser.Node]bool, resultList *[]*parser.Node) (err error) {
if node == nil {
return
}
// checking if the node exist in the graph
_, exists := adjList[node]
if !exists {
return fmt.Errorf("node%v doesn't exist in the graph", *node)
}
if (*visited)[node] {
// this node is already visited.
// the program enters here when the graph has at least one cycle..
return
}
// marking current node as visited
(*visited)[node] = true
// visiting all the neighbors of the node and it's children recursively
for _, neighbor := range adjList[node] {
// recurse neighbor only if and only if it is not visited yet.
if !(*visited)[neighbor] {
err = topologicalSortHelper(neighbor, lastIndex, adjList, visited, resultList)
if err != nil {
return err
}
}
}
if *lastIndex >= len(adjList) {
// there is at least one node which is a neighbor of some node
// whose entry doesn't exist in the adjList
return fmt.Errorf("found more nodes than the number of keys in the adjacency list")
}
// appending from left to right to get a reverse sorted output
(*resultList)[*lastIndex] = node
*lastIndex++
return nil
}
// A wrapper function to initialize the data structures required by the
// topological sort algorithm. It provides an interface to directly get the
// sorted triples without knowing the internal variables required for sorting.
// Note: it sorts in reverse order.
// Params:
// adjList : adjacency list: a map with key as the node and value as a
// list of it's neighbor nodes.
// Assumes: all the nodes in the graph are present in the adjList keys.
func topologicalSort(adjList map[*parser.Node][]*parser.Node) ([]*parser.Node, error) {
// variable declaration
numberNodes := len(adjList)
resultList := make([]*parser.Node, numberNodes) // this will be returned
visited := make(map[*parser.Node]bool, numberNodes)
lastIndex := 0
// iterate through nodes and perform a dfs starting from that node.
for node := range adjList {
if !visited[node] {
err := topologicalSortHelper(node, &lastIndex, adjList, &visited, &resultList)
if err != nil {
return resultList, err
}
}
}
return resultList, nil
}
// Interface for user to provide a list of triples and get the
// sorted one as the output
func TopologicalSortTriples(triples []*parser.Triple) (sortedTriples []*parser.Triple, err error) {
adjList := GetAdjacencyList(triples)
recoveryDS := GetNodeToTriples(triples)
sortedNodes, err := topologicalSort(adjList)
if err != nil {
return sortedTriples, fmt.Errorf("error sorting the triples: %v", err)
}
// initialized a slice
sortedTriples = []*parser.Triple{}
for _, subjectNode := range sortedNodes {
// append all the triples associated with the subjectNode
for _, triple := range recoveryDS[subjectNode.String()] {
sortedTriples = append(sortedTriples, triple)
}
}
return sortedTriples, nil
}
func DisjointSet(triples []*parser.Triple) map[*parser.Node]*parser.Node {
nodeStringMap := map[string]*parser.Node{}
parentString := map[string]*parser.Node{}
for _, triple := range triples {
parentString[triple.Object.String()] = triple.Subject
nodeStringMap[triple.Object.String()] = triple.Object
if _, exists := parentString[triple.Subject.String()]; !exists {
parentString[triple.Subject.String()] = nil
nodeStringMap[triple.Subject.String()] = triple.Subject
}
}
parent := make(map[*parser.Node]*parser.Node)
for keyString := range parentString {
node := nodeStringMap[keyString]
parent[node] = parentString[keyString]
}
return parent
}
// a schemaDefinition is a dictionary which maps the abbreviation defined in the root tag.
// for example: if the root tag is =>
// <rdf:RDF
// xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"/>
// the schemaDefinition will contain:
// {"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#"}
// this function will output a reverse map that is:
// {"http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf"}
func invertSchemaDefinition(schemaDefinition map[string]uri.URIRef) map[string]string {
invertedMap := make(map[string]string)
for abbreviation := range schemaDefinition {
_uri := schemaDefinition[abbreviation]
invertedMap[strings.Trim(_uri.String(), "#")] = abbreviation
}
return invertedMap
}
// return true if the target is in the given list
func any(target string, list []string) bool {
for _, s := range list {
if s == target {
return true
}
}
return false
}
// from the inverted schema definition, returns the name of the prefix used for
// the rdf name space. Return defaults to "rdf"
func getRDFNSAbbreviation(invSchemaDefinition map[string]string) string {
rdfNSAbbrev := "rdf"
if abbrev, exists := invSchemaDefinition[parser.RDFNS]; exists {
rdfNSAbbrev = abbrev
}
return rdfNSAbbrev
}
// given an expanded uri, returns abbreviated form for the same.
// For example:
// http://www.w3.org/1999/02/22-rdf-syntax-ns#Description will be abbreviated to rdf:Description
func shortenURI(uri string, invSchemaDefinition map[string]string) (string, error) {
// Logic: Every uri with a fragment created by the uri.URIRef has if of
// type baseURI#fragment. This function splits the uri by # character and
// replaces the baseURI with the abbreviated form from the inverseSchemaDefinition
splitIndex := strings.LastIndex(uri, "#")
if splitIndex == -1 {
return "", fmt.Errorf("uri doesn't have two parts of type schemaName:tagName. URI: %s", uri)
}
baseURI := strings.Trim(uri[:splitIndex], "#")
fragment := strings.TrimSuffix(uri[splitIndex+1:], "#") // removing the trailing #.
fragment = strings.TrimSpace(fragment)
if len(fragment) == 0 {
return "", fmt.Errorf(`fragment "%v" doesn't exist`, fragment)
}
if abbrev, exists := invSchemaDefinition[baseURI]; exists {
if abbrev == "" {
return fragment, nil
}
return fmt.Sprintf("%s:%s", abbrev, fragment), nil
}
return "", fmt.Errorf("declaration of URI(%s) not found in the schemaDefinition", baseURI)
}
// from a given adjacency list, return a list of root-nodes which will be used
// to generate string forms of the nodes to be written.
func GetRootNodes(triples []*parser.Triple) (rootNodes []*parser.Node) {
// In a disjoint set, indices with root nodes will point to nil
// that means, if disjointSet[node] is nil, the node has no parent
// and it is one of the root nodes.
var parent map[*parser.Node]*parser.Node
parent = DisjointSet(triples)
for node := range parent {
if parent[node] == nil {
rootNodes = append(rootNodes, node)
}
}
return rootNodes
}
// returns the triples that are not associated with tags of schemaName "rdf".
func getRestTriples(triples []*parser.Triple) (restTriples []*parser.Triple) {
rdfTypeURI := parser.RDFNS + "type"
rdfNodeIDURI := parser.RDFNS + "nodeID"
for _, triple := range triples {
if !any(triple.Predicate.ID, []string{rdfNodeIDURI, rdfTypeURI}) {
restTriples = append(restTriples, triple)
}
}
return restTriples
}
|