1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
|
(: simple_dump.xquery - perform a simple dump of the contents of a FreeDict dictionary
Originally by Piotr BaĆski (bansp at o2.pl), 01-nov-2010.
License: GNU GPL ver. 3.0 or any later version.
$Id$
This script expects an external parameter $lg_pair but you may safely set that to ''
and manipulate the contents of $my_lg_pair pair instead.
Initially, it was only supposed to match the headword(s) with their equivalents,
but I got slightly carried away. Still, this is supposed to be a simple dump,
so it skips a lot of potential details.
:)
declare default element namespace "http://www.tei-c.org/ns/1.0";
declare namespace functx = "http:///www.functx.com";
declare option saxon:output "method=text";
(:the following variable is system-internal :)
declare variable $my_svn_id as xs:string := "$Id$";
(: set this to the pair of languages that you want to process :)
declare variable $my_lg_pair as xs:string := "srp-eng";
(: reset this to true() for an even simpler dump :)
declare variable $skip_gram as xs:boolean := false();
declare variable $lg_pair as xs:string external;
declare function functx:get_lg_pair() as xs:string {
let $lgs := if ($lg_pair) then $lg_pair else $my_lg_pair
return $lgs
};
declare function functx:get_filename() as xs:string {
let $lgs := functx:get_lg_pair()
return concat('../../',$lgs,'/',$lgs,'.tei')
};
declare function functx:header() as xs:string {
let $ret := concat('Dump of ',functx:get_lg_pair(),'.tei on ',substring-before(string(xs:date(current-dateTime())),'+'),' at ',substring-before(string(xs:time(current-dateTime())),'.'),' ')
return $ret
};
declare function functx:process() as xs:string+ {
for $entry in doc(functx:get_filename())/TEI/text/body/entry
let $hdwd := $entry/form/orth
let $gram := for $any in $entry/gramGrp/* return normalize-space($any)
let $gloss := $entry//cit[@type='trans']/quote | $entry//sense/def
order by lower-case($hdwd[1])
return concat(string-join($hdwd,', '),if (count($gram) and not($skip_gram)) then concat(' [',string-join($gram,'|'),'] ') else ' -- ',normalize-space(string-join($gloss,', ')))
};
let $ret := functx:process()
return concat(functx:header(),string-join($ret,' '))
|