File: apertium-cy-en.cy-en.rlx

package info (click to toggle)
apertium-cy-en 0.1.1~r57554-7
links: PTS, VCS
area: main
in suites: bullseye
size: 9,364 kB
sloc: xml: 199; makefile: 105; sh: 57
file content (698 lines) | stat: -rw-r--r-- 24,453 bytes
parent folder | download | duplicates (3)
################### IMPORTANT #####################
# If a rule is not having any effect, or having the wrong effect, it is likely that it is not 
# defined tightly enough.  Redo it more strictly, and it should have the desired effect.
# Note that "casting" rules [SUBSTITUTE] should come after REMOVE and SELECT rules.
##################################################

# to-dos
# check and remove if nec items in "apostrophes" section of cydix

#####################################
# Intial configuration
#####################################

DELIMITERS = "<.>" "<!>" "<?>" (sent); # end-of-sentence markers

# the LIST statements give Apertium tags a mnemonic to be used in the rules

# mutations for individual letters
# first the surface form regex, then the base form regex

# soft mutation
LIST MSoftC = ("<g.*>"ri "c.*"ri)  ("<d.*>"ri "t.*"ri) ("<f.*>"ri "b.*"ri)  ("<w.*>"ri "gw.*"ri) ("<b.*>"ri "p.*"ri) ("<dd.*>"ri "d.*"ri) ("<f.*>"ri "m.*"ri);	
	
# aspirate mutation
LIST MAspC = ("<ch.*>"ri "c.*"ri) ("<th.*>"ri "t.*"ri);	

# nasal mutation
LIST MNasC = ("<n.*>"ri "d.*"ri) ("<m.*>"ri "b.*"ri);

# nouns

LIST NC = n; 				# common nouns
LIST NP = (np top) (np ant) (np cog);		# proper nouns (toponyms, anthroponyms, cognomens)
LIST Top = (np top);
LIST Ant = (np ant);		# anthroponyms
LIST Cog = (np cog);
LIST Acr = (n acr);

LIST MF = mf;		# masculine/feminine
LIST M = m;			# masculine
LIST F = f;			# feminine

# pronouns

LIST Prn = (prn tn) (prn subj);		# pronoun
LIST PrnSubj = (prn subj);		# subject pronoun
LIST PrnObj = (prn obj);		# subject pronoun

LIST P1Sg = (p1 sg);		# first person singular - verb agreement
LIST P3Sg = (p3 sg);		# third person singular - verb agreement
LIST P1Pl = (p1 pl);		# first person plural - verb agreement
LIST P2Pl = (p2 pl);		# second person plural - verb agreement
LIST P3Pl = (p3 pl);

LIST prn_subj_1sg = (prn subj p1 sg);
LIST prn_subj_2sg = (prn subj p2 sg);
LIST prn_subj_3sg_m = (prn subj p3 sg m);
LIST prn_subj_3sg_f = (prn subj p3 sg f);
LIST prn_subj_1pl = (prn subj p1 pl);
LIST prn_subj_2pl = (prn subj p2 pl);
LIST prn_subj_3pl = (prn subj p3 pl);

LIST PrnSubjP1Sg = (prn subj p1 sg);
#LIST PrnSubjP1Sg = (prn obj p1 sg);

SET PrnSubjP3Sg = PrnSubj + P3Sg;
SET PrnSubjP1Pl = PrnSubj + P1Pl;
SET PrnSubjP2Pl = PrnSubj + P2Pl;
SET PrnSubjP3Pl = PrnSubj + P3Pl;

# verbs

LIST V = vaux vblex vbser vbmod;		 	# lexical verbs and "to be"
#LIST VblexInf = (vblex inf);		# verb in the infinitive
LIST Inf = inf;
LIST VblexPrs = (vblex prs);		# verb in the present subjunctive
LIST Imp = imp;		# imperative
LIST VbMod = (vbmod);		# modal verb (eg gallu)
LIST Oes = (vbser sp); 		# oes

LIST Vpart = (vpart neg) (vpart aff) (vpart itg); # Verbal particles

LIST vb_1sg = V + (p1 sg);
LIST vb_2sg = V + (p2 sg);
LIST vb_3sg = V + (p3 sg);
LIST vb_1pl = V + (p1 pl);
LIST vb_2pl = V + (p2 pl);
LIST vb_3pl = V + (p3 pl);

LIST vb_inf = (vblex inf);

LIST be_1_sg = (vbser p1 sg);
LIST be_any = (vbser);
LIST be_pres_3sg = (vbser pres p3 sg);

SET VbP1Sg = V + P1Sg;
SET VbP2Pl = V + P2Pl;

# determiners and possessive pronouns

LIST Det = (det);		# determiner
LIST Def = def;		# definite
# ? can above two be deleted in view of below ?
LIST DetDef = (det def);		# definite determiner (article)
LIST DetPos = (det pos);		# possessive determiner
LIST DetDem = (det dem);		# demonstrative determiner
LIST DetItg = (det itg);		# interrogative determiner
LIST DetQnt = (det qnt);		# quantifying determiner

#SET DetPosNE = DetPos - (+ei);


# adjectives and numerals

LIST Adj = (adj sint) (adj); 		# all adjectives (synthetic + non-synthetic)
LIST Num = num;				# numeral


# prepositions

LIST Pr = pr;		# preposition
LIST Prep_i = ("i" pr);		# the preposition "i" (to)
LIST Prep_yn_ôl = ("yn ôl" pr) ("Yn ôl" pr);		# the preposition "yn ôl"
LIST prep_yn = ("yn" pr);		# the preposition "yn" (in)


# adverbs

LIST Adv = adv;		# adverb
LIST Int = adv itg;		# interrogative adverb (eg where?)


# conjunctions

LIST CC = cnjcoo; 			# co-ordinate conjunction
LIST CS = cnjsub;			# subordinate conjunction

SET Conj = CC + CS;


# interjections

LIST Interj = ij;			# interjection


# Semantic sets

LIST Days = ("nos# Lun"ri n) ("nos# Fawrth"ri n) ("nos# Fercher"ri n) ("nos# Iau"ri n) ("nos# Wener"ri n) ("nos# Sadwrn"ri n) ("nos# Sul"ri n) ("dydd# Llun"ri n) ("dydd# Mawrth"ri n) ("dydd# Mercher"ri n) ("dydd# Iau"ri n) ("dydd# Gwener"ri n) ("dydd# Sadwrn"ri n) ("dydd# Sul"ri n);


#####################################
# Morphological disambiguation
#####################################
SECTION



# Mutations
#####################################

#Early removal of impossible aspirate mutations
#Gobeithio bod popeth yn iawn gyda chi
REMOVE:R_sm_notpostprep  MAspC IF (NOT -1 ("ei")) (NOT -1 ("a" cnjcoo)) (NOT -1 ("â")) (NOT -1 ("chwech"));

# naeth o ddarllen y llythyr
# remove nasally mutated verbs, which are impossible
REMOVE:R_nm_whereverb  MNasC (0 V) (NOT 0 ("mod")) (NOT 0 ("maen"));
# this needs improvement - all conjugated verbs beginning with a nasal will also be removed
# ideally, we need mutated words to be marked with a tag saying they are mutated, so that we can remove them based on that

#mynd â chi ar
REMOVE:R_aspC_imposs MAspC IF (-1 (vblex)) (0 MAspC) (0 PrnSubj);
REMOVE:R_aspC_imposs MAspC IF (-1 (n)) (0 MAspC) (0 PrnSubj);
# The ambiguity in sense still needs to be resolved somehow.

#Rhai gwynion, rhai gwinau,
REMOVE MSoftC IF (-1 Adj);
# seems too greedy

# pobl y de
REMOVE MSoftC IF (-1 DetDef);
# seems too greedy

#cyfarfod nos
REMOVE MNasC IF (0 (n f)) (-1 (n m));

#o ryw fath
REMOVE MSoftC IF (0 (n m)) (-1 NC);

#Bryd hynny
REMOVE MSoftC IF (1 Prn);



# Sentence-based rules
#####################################

# a fydd o'n mynd (and he'll be going) - spoken for a mi fydd o'n mynd or a bydd o'n mynd
# seems impossible to disambiguate from a_vpart+SM except by looking for a question-mark at the end of the sentence
#SELECT:S_a_cnjcoo (cnjcoo) (@1 ("a"ri)) (NOT @-1 ("?" sent));
# in theory, this would select a_cnj if the sentence does not end in a question mark
# but it doesn't work - the 2nd condition never seems to apply



# Pre-verbal particles
#####################################

# delete negative verbal particle reading if a verb does not follow
# Mi welodd y bachgen ni a'n ffrindiau ar y traeth.
REMOVE:R_ni_vpart ("ni" vpart) (NOT 1 V);

# a yw Apertium yn gallu ...
# choose a vpart if a precedes yw
SELECT:S_a_vpart (vpart itg) (1 V) (NOT 1 (infin));

# this needs to be checked and perhaps broken down into smaller rules
# choose verbal partical if sandwiched between Conj + verb
SELECT Vpart IF ((-1 CC) OR (-1 CS) OR (NOT -1 (vblex))) ((1 (vblex)) OR (1 (vbser)) OR (1 (vbmod)) OR (1 (vaux))) (NOT 1 (vblex inf)) (NOT 1 (n)) (NOT 1 Num) (NOT 0 CC);

# SELECT Vpart IF ( (-1 (cm)) OR (NOT -1 (vblex)) ) ((1 (vblex)) OR (1 (vbser))) (NOT 1 (vblex inf)) (NOT 1 Num) (NOT 0 CC);
# commented out because it interferes with R_oes_age
# needs to be broken down

SELECT (vbser cns) (-1 Vpart);

# choose det.def after a verb and before a noun
REMOVE:R_yr_vpart Vpart (-1 V) (1 NC);

# choose the interrogative "a" instead of the conjunctive "a" if oes follows
# this could probably be broadened out
# a oes lle yma? (is there room (a place) here?) - *and there are where here
REMOVE:R_a_cnjcoo ("a" cnjcoo) (1 Oes);
# Oops - gives "did this place there are"
# With above "lle yma" rules, we now get "did place there are here"
# not ideal, but slightly better than original "and there are where here"
# where is "did" coming from? "did" is never used with "be" in English!!
# and where is the plural coming from?
# "place there is here" would be acceptable

#Mae hen wlad fy'n nhadau
SELECT (part) IF (-1C DetPos) (0 (part)) (0 DetPos);


# Determiners
#####################################

REMOVE:R_yna_det ("yna" det) OR ("yma" det) (NOT -2 DetDef);
# as analogue of rule below - can they be combined?
# mae 'na le yma (there is a place here) - *that place is here.
# with above rule, we get "is there place here" - acceptable if we can get rid of question word-order
# perhaps keep this rule, but do a multiword for "mae 'na = there is", since it is a bit odd, and Englishy
# compare (better) "mae lle yma" - "*place is here"
#REMOVE ("yma" det) IF (NOT -2 DetDef);
# choose the "where" (adv, itg) sense for lle only if it is followed by a verb
# a oes lle yma? (is there room (a place) here?) - *and there are where here.
REMOVE:R_lle_where ("lle" itg) IF (NOT 1 V);
# Oops - gives "this place" for "lle yma"
# we need the REMOVE yma rule to handle this, and it needs to come before this rule
# or we get "this place there are"
# with both in this order, we get "and place there are here"

# mark a possessive determiner as being masculine if followed by soft mutation
SUBSTITUTE:M_mf_m_presoft (mf) (m) DetPos (1 MSoftC) (NOT 0 ("<o'i>"));
# what's with the last condition?
# and feminine if followed by aspirate mutation.
SUBSTITUTE:M_mf_f_preasp (mf) (f) DetPos (1 MAspC);

#gwerthir llyfrau yma
REMOVE DetDem IF (NOT -2* DetDef BARRIER V) ;
# may not be required now - R_yna_det

#y llyfr yma
REMOVE Adv IF (-1 NC) (0 Adv) (0 DetDem);
REMOVE Prn IF (-1 NC) (0 Prn) (0 DetDem);
# may not be required now - R_yna_det



# Adjectives
#####################################
# boi iawn; mae'n iawn rŵan
SELECT:S_iawn_adj ("iawn" adj) ( (-1 NC) OR (-1 prep_yn) );
#REMOVE:R_iawn_adv ("iawn" adv) ( (-1 NC) OR (-1 ("yn" part)) );



# Adverbs
#####################################
SELECT:S_cyn_adv ("cyn" adv) ( (1 Adj) OR (1 Adv) );


# Pronouns
#####################################

# Edrychwn ymlaen i glywed ganddoch.
# remove the prn_1sg reading if the preceding word is not a verb and we have an infinitive following
REMOVE:R_prn1sg_preinfin_postnonvb prn_subj_1sg (NOT -1 V) (1 vb_inf);

# delete a subject pronoun if preceded by an infinitive
# i'w weld fo
# NOT: mynd â chi ar
REMOVE:R_prn_postinfin PrnSubj IF (-1 vb_inf) (NOT 0 MAspC);

# delete a subject pronoun if preceded by an adjective and followed by a proper noun
# ??
REMOVE PrnSubj IF (1 NP) (-1 Adj);

# convert iddyn nhw to a subject in subordinate clauses after cyn
# Mi ddaeth y dyn yn ôl cyn iddyn nhw fynd.

# Mi welodd y bachgen ni a'n ffrindiau ar y traeth.
#REMOVE:R_ni_subj ("ni"ri prn subj) IF (NOT -1 vb_1pl);
# UPDATE - for some inexplicable reason this stopped working; even adjusting the location made no difference, so changed to a SUBSTITUTE rule
SUBSTITUTE:M_subj_obj_1sg (subj) (obj) prn_subj_1sg (NOT 0 vb_1sg) (NOT -1 vb_1sg) (NOT -1 ("<ddaru>" "ddaru" vaux past)) (NOT @1 (prn));
# the (NOT 0 vb_1sg) is to cover dwi'n - this decomposes to vbser+prn+stative, but since CG sees all of these as the current focus
# it will not fire this rule, since the previous (-1) item is not in fact the verb
SUBSTITUTE:M_subj_obj_2sg (subj) (obj) prn_subj_2sg (NOT -1 vb_2sg) (NOT -1 ("<ddaru>" "ddaru" vaux past)) (NOT @1 (prn));
SUBSTITUTE:M_subj_obj_3sg_m (subj) (obj) prn_subj_3sg_m (NOT -1 vb_3sg) (NOT -1 ("<ddaru>" "ddaru" vaux past)) (NOT @1 (prn));
SUBSTITUTE:M_subj_obj_3sg_f (subj) (obj) prn_subj_3sg_f (NOT -1 vb_3sg) (NOT -1 ("<ddaru>" "ddaru" vaux past)) (NOT @1 (prn));
SUBSTITUTE:M_subj_obj_1pl (subj) (obj) prn_subj_1pl (NOT -1 vb_1pl) (NOT -1 ("<ddaru>" "ddaru" vaux past)) (NOT @1 (prn));
SUBSTITUTE:M_subj_obj_2pl (subj) (obj) prn_subj_2pl (NOT -1 vb_2pl) (NOT -1 ("<ddaru>" "ddaru" vaux past)) (NOT @1 (prn));
SUBSTITUTE:M_subj_obj_3pl (subj) (obj) prn_subj_3pl (NOT -1 vb_3pl) (NOT -1 ("<ddaru>" "ddaru" vaux past)) (NOT @1 (prn));
# the last clause says not to apply the rule if a pronoun is the first word in the sentence
# eg Hi yw'r ferch a welais ddoe
# may need some tweaking


# Interrogatives
#####################################

# sut + verb --> how + verb, sut + noun --> what sort of + noun
# we need the ri here to catch cases where sut is at the beginning of a sentence, and therefore capitalised
REMOVE:R_sut_det_preverb ("sut"ri det) (1 V);
REMOVE:R_sut_adv_prenoun ("sut"ri adv) (1 NC);

# Mae sawl anifail yn y maes - adjectival use
REMOVE:R_sawl_interrogative ("sawl" itg) (-1 be_any);

# Also need to propagate pl to following sydd
# sydd is entered in cydix as a+bod pres, 3p, mf, sg
# and we can use that subreading directly as a tag
SUBSTITUTE:M_sg_pl_sydd (sg) (pl) (+bod sg) (-1* ("sawl"ri) BARRIER (sent));
# this is not perfect, because you could have:
# mi welodd y ffermwr sawl anifail yr oedd yn eu hoffi
# so the rule needs to be tightened
# but it seems unlikely that the adverbial and adjectival uses of sawl in a subordinate clause can be disambiguated like this
# dwn i ddim sawl gwaith yr aeth i'r lle

# faint o + noun.sing --> how much + noun
# faint o + noun.plural --> how many + noun
#
#SUBSTITUTE (sp) (sg) DetItg (1* (n sg) BARRIER (sent))
#SUBSTITUTE (sp) (pl) DetItg (1* (n pl) BARRIER (sent))
#SUBSTITUTE (sp) (sp) DetItg (1* (n sp) BARRIER (sent))
# these are too greedy - they impact on sut



# Prepositions
#####################################

# choose the preposition "i" if it is followed by an infinitive
# aeth i gael y pres - he went to get the money
# aethom i gael y pres - we went to get the money
# the first is alright without this rule, the second isn't
SELECT:S_i_prep Prep_i IF (NOT -1 vb_1sg) (0 Prep_i) (0 PrnSubjP1Sg)  (0 PrnSubjP1Sg);
# We need more work to differentiate between
# cerddaf i'r dre - I'll walk to the town (prep + yr)
# and
# wela i'r dyn fory - I'll see the man tomorrow (prn + yr)
# May not even be possible without some marker of case.

# tra bod
REMOVE:R_tra_adv ("tra" adv) IF (NOT 1 Adj);

# Yn ôl Ms Thomas
REMOVE:R_ynôl_adv Adv IF (0 Prep_yn_ôl) (1 Ant);
# needs expansion - perhaps put two meanings in the dictionary?

# dawnsio yn y cae
SELECT:S_cae_n ("cae" n m) IF (-1 DetDef) (0 NC) (0 V);

# mewn i un system
REMOVE PrnSubj IF (1 Num) (-1 Pr);
# check re above prn_subj rules

# ymwelwyr o tramor 
# un o hen
# ôl i 1880
REMOVE:R_prn_subj PrnSubj IF (0 Pr) (0 PrnSubj)  (NOT -1 V) ((1 NC) OR (1 NP) OR (1 Adj) OR (1 Num));
REMOVE:R_prn_obj PrnObj IF (0 Pr) (0 PrnObj)  (NOT -1 V) ((1 NC) OR (1 NP) OR (1 Adj) OR (1 Num));

# stative
SELECT:S_yn_stative ("yn" part) (1 Num);
#SUBSTITUTE:M_pr_stative_yn (pr) (stative) ("yn" pr) (1 Num) (2 ("oed"ri));

# postnum
# chwech o ddynion
SELECT:S_o_postnum ("o" postnum) (-1 Num) (NOT -1 (sp));
# the NOT sp works, but is probably a bit fragile
# it prevents the rule tripping on things like "dyn 41 oed o Abertawe"
# num + oed probably needs to be looked at again some time

# cyn iddyn nhw
# note that these two rules are co-dependent
# we use i_prep to decide that cyn is cnjsub
# and then use cyn_cnjsub to choose the subj form of i_prep
# this depends on adding subject entries for i in the same way that we already have object entries
#SELECT:S_cyn_cnjsub_prei ("cyn" cnjsub) (1 Prep_i);
#SELECT:S_subj_postcyn PrnSubj (-1 ("cyn"ri cnjsub));
# had to revert - clashes with M_subj_obj_xyy.
SUBSTITUTE:M_obj_subj_posti_preinf (obj) (subj) (+prpers obj) (1 vb_inf);
# this works better, and is applicable to things other than cyn, eg erbyn, wedi, etc.
# could be tightened by adding (-1 Time_prep)

# gan feddwl amdani, gan gynnwys
# noun gets chosen

# wrth fynd i'r ffenestr
# select i_prep, not i_prn
SELECT:S_i_postinfin ("i" pr) (-1 vb_inf);

#i ba raddau
# select i_prep, not i_prn
SELECT:S_i_prepa ("i"ri pr) (1 ("pa" prn itg));
# need to use ri or it won't catch the capital I

# math o, fathau o
# delete the bath sense before o_pr- this is not perfect
REMOVE:R_bath_preo ("bath" n) (1 ("o" pr));

# mae o'n cynnig ateb
# delete yn before infin, and cast infin to gerund
SELECT:S_yn_stative ("yn" part) (1 (inf));
SUBSTITUTE:M_infin_vn_postyn (inf) (ger) (inf) (-1 ("yn" part));





# Verbs - bod
#####################################

#This is horrible:
#nid oes llawer o'r rheiny ar ôl
SELECT (vbser) IF (0 NC) (0 (vbser)) (1 DetQnt) (2 DetDef);

#Ac maen nhw wedi
REMOVE:R_maen_stone NC (0 ("maen")) (1 PrnSubj);

# taswn i yn dy le di
# select bod, not tasu
REMOVE:R_tasu_wherecond ("tasu" vblex) (NOT -1 (vpart));
# not ideal, because it will not choose "tasu" (stack) *unless* it has mi, fe, a, ni in front, but OK.



# Verbs - agreement
#####################################

# choose a 1p.sg pronoun if it is preceded by 1p.sg bod
# rydw i'n anfon - i'n should be seen as pronoun + yn, not as prep + ein - I am sending
SELECT:S_i_1sg PrnSubjP1Sg IF (-1 be_1_sg) (0 Prep_i) (0 PrnSubjP1Sg);

#Rydan ni wedi sefydlu meini prawf newydd, ystyried y ffordd ymlaen. 
SELECT:S_1pl_prn PrnSubjP1Pl IF (-1 P1Pl) (-1 V);
# ?? Is this required?  I think the problems with ni were due to incorrect entries for dod and dianc, and these have been fixed - check.

# choose a 2p.pl pronoun if it is preceded by a 2p.pl verb
# rydych chi yn mynd
SELECT:S_2pl_prn_post2plvb PrnSubjP2Pl IF (0 NC) (0 PrnSubj) (-1 VbP2Pl);

# Remove a pronoun reading if it doesn't concord with the verb
# Better would be: 
#  REMOVE (Pron) IF (-1 V $$PERNUM)(NOT 0 $$PERNUM);
#  when $$PERNUM = (p1 sg) | (p2 sg) | ...

REMOVE PrnSubj IF (-1 V) ((-1 (p2) OR (p3))) (0 (p1)); 
REMOVE PrnSubj IF (-1 V) ((-1 (p1) OR (p3))) (0 (p2)); 
REMOVE PrnSubj IF (-1 V) ((-1 (p1) OR (p2))) (0 (p3));
REMOVE PrnSubj IF (-1 V) (-1 (pl)) (0 (sg));
REMOVE PrnSubj IF (-1 V) (-1 (sg)) (0 (pl));

REMOVE V IF (-1 DetDef);
# isn't this duplicating something else?

#aeth fo
SELECT PrnSubj IF (0 PrnSubj) (0 (vbser prs)) (-1 (vblex past));



# Verbs - lexical
#####################################

# delete a verb from a {verb, noun} cohort if preceded by "preposition + the"
# yn y golchi
REMOVE:R_vb_postdef V IF (0 V) (0 NC) (NOT 0 Acr) (-1 DetDef) (-2 Pr);

# See 1.3.36 -- we currently just drop subjunctives and imperatives
# needs to be revised
REMOVE:R_subjunctive_vb VblexPrs IF (0 V); 
REMOVE:R_imperative_be Imp IF (0 (vbser)); 
REMOVE:R_imperative_vb Imp IF (0 (vblex past)); 

# delete medd (say) if preceded by an obj prn
# drain ac ysgall mall a'i medd
REMOVE:R_medd_say ("medd" vblex inf) (-1 (det pos));
#REMOVE:R_a_cnjcoo_preprn ("a" cnjcoo) (1 (prn obj p3 m sg));
#SELECT:S_a_rel_preprn ("a" rel) (1 (prn obj p3 m sg));

# cyfathrebu dwyieithog
# cast an infinitive as a verbal noun when an adj follows
SUBSTITUTE:M_infin_vn_preadj (inf) (ger) (vblex) (1 (adj));

# cyfrannu at gaffael
# cast an infinitive as a verbal noun when at precedes
# needed to comment out cyfrannu#at multiword for this particular example to work
# there seems no way of triggering the rule by using a subset of the multiword
SUBSTITUTE:M_infin_vn_postat (inf) (ger) (vblex) (-1 ("at" pr));

#d w i'n meddwl
# cast an infinitive as a verbal noun when at precedes
SUBSTITUTE:M_infin_vn_postyn (inf) (ger) (vblex) (-1 (+yn part));


# gweler y nodiadau
# remove impers tag to avoid passive in the imperative
#SUBSTITUTE:M_impers_imp (imp impers pl) (imp) (vblex);



# Verbs - periphrastic constructions
#####################################

# SUBSTITUTE:S_poss_subj_prebodwedi (det pos sp) (prn subj) DetPos (1 (vbser)) (2 ("wedi" pr)) (3 vb_inf);

# ei fod wedi taflu (that he threw)
# delete wedi, cast infin to past, and delete bod
SELECT:S_wedi_peri ("wedi" part) (-1 (vbser inf)) (1 V);
SUBSTITUTE:M_infin_past_postwedi (inf) (past) (inf) (-3 (det pos)) (-2 (vbser)) (-1 ("wedi" part));
SELECT:S_bod_vrbnull ("bod" vrbnull) (1 ("wedi" part));
# also need to cast poss to subj, but this doesn't seem to be possible

# wedi ei seilio (based)
# delete wedi, cast infin to past participle, and delete poss
SELECT:S_wedi_peri ("wedi" part) (1 (det pos)) (2 V);
SUBSTITUTE:M_infin_past_postwedi (inf) (pp) (inf)  (-2 ("wedi" part)) (-1 (det pos));



# Nouns
#####################################

# delete a noun from a {noun, pronoun} cohort if preceded by an infinitive and followed by a preposition
# i'w weld fo yn y dref
REMOVE NC IF (-1 V) (0 NC) (0 PrnSubj) (1 Pr);
# this doesn't seem to work - check

# delete a noun from a {vbmod, noun} cohort if preceded by a preposition
# this may be cast too widely - let's see ...
# yn gallu dod (able to come) - *in capacity come
REMOVE NC IF (-1 Pr) (0 NC) (0 VbMod);

# delete a noun from a {verb, noun} cohort if preceded by a verbal particle
# nad oes (that there is not) - *not age
REMOVE:R_oes_age ("oes" n f) IF (-1 Vpart) (0 NC) (0 V);
# needed to tighten this to specify oes

# Trodd Peter Law fwyafrif Llafur o 19,000 i fwyafrif o 9,000 iddo ef.
SELECT Cog IF (-1C Ant) (NOT 0 (vblex));


# Nouns - tweaking
#####################################

#Edwina Hart
SELECT (np ant) IF (1 (np cog));

#Ieuan Wyn Jones AC
SELECT (n acr) IF (-1 (np cog));

# Mae'r modd y mae gweithwyr mudol
# nad oedd modd
# Dim modd ei
REMOVE MNasC IF ((-1 DetDef) OR (-1 V) OR (-1 (n m)));

#Sioned Haf o Gymdeithas yr Iaith
REMOVE PrnSubj IF (-1 (np)) (0 Pr) (0 PrnSubj) (1 (np));

#ym Medi 1999
#REMOVE (vblex) IF (-1 Pr) (0 NC) (NOT 0 Acr) (0 V);
# too broad - seems to interfere with M_infin_vn_postyn

#54 acer o barcio 
SELECT Pr IF (-1 NC) (1 V);

#yn sicr o gynyddu
REMOVE PrnSubj IF (-1 Adj) (1 V);

#ysgolion o'r fath -- no idea why this doesn't work with MSoftC list
REMOVE ("<f.*>"ri "b.*"ri) IF (0 (n m sg)) (-1 DetDef);  




# Conjunctions
#####################################

#well o lawer na'r
SELECT:S_na_cnjsub_postcomp ("na" cnjsub) IF (-1* (adj comp) BARRIER Det);

# Anne a Bev
SELECT:S_a_cnjcoo_tweennames ("a" cnjcoo) (-1 (np ant)) (1 (np ant));

# yn ystod y dydd a gyda'r nos
SELECT:S_a_cnjcoo_noverb ("a" cnjcoo) (NOT -1 V) (NOT 1 V);





# Chunking
#####################################

# prosesau ieithyddol ac anieithyddol
# two adjectives joined by a[c]
#ADD (beginAP) (adj) (1 ("a" cnjcoo)) (2 (adj));
#ADD (endAP) (adj) (-2 (adj))(-1 ("a" cnjcoo));
# this works, but commented out for now




#####################################
# Lexical and translation disambiguation and rules
# Note: this needs more work
#####################################
SECTION

# After "sawl", the Welsh noun will always be singular, but the English one plural.
SUBSTITUTE:M_sg_pl_postsawl (sg) (pl) NC (-1 ("sawl"ri));

# distinguish between "lonely" and "only"
ADD (pred) ("unig" adj) ((1 (n)) OR (1 (np)) OR (1 (pr)) OR (1 Det) OR (1 (cm)));

# distinguish between "work" and "time"
REMOVE (n f) (0 ("gwaith")) (0 MSoftC);
# perhaps better to use the meaning tag approach?
# hmm - does this make sense?  all it says is remove the time meaning when you have soft mutation - needs more
REMOVE (n f) (0 ("gwaith")) (NOT -1 Num);

REMOVE (n f) (0 ("llif")) (-1 Adj);

REMOVE (vpart neg) (0 ("ni")) (-1 Inf);

# choose the "direction" sense of "cyfeiriad" (not address) if preceded by trafod
SUBSTITUTE:M_cyfeiriad_direction_posttrafod ("cyfeiriad"ri n m) ("cyfeiriad" n m S1) NC (-1 ("trafod"ri vblex));
# this doesn't handle cases where trafod is mutated ...

# choose the "board" sense of "bwrdd" (not table) if detdef or noun follows
#SUBSTITUTE:M_bwrdd_board_prenoun ("bwrdd"ri n m) ("bwrdd" n m S1) NC ( (NOT 1 ("pren"ri n)) OR (NOT 1 ("top"ri adj)) );

# get rid of noun reading of trafod if it's followed by cyfeiriad
# we could probably change this to NC, because it's of wider application
REMOVE:R_trafod_n ("trafod"ri n m) IF (1 ("cyfeiriad"ri n m S1));

# this chooses the be able sense of cyfeiriad (not address) if preceded by yn
SUBSTITUTE ("gallu"ri vbmod) ("gallu" vbmod S1) NC (-1 prep_yn);
# doesn't work - investigate

# dyn o Abertawe
# choose the "from" sense of o (not of) if it is followed by a placename
SUBSTITUTE:M_o_from_pretop ("o" pr) ("o" pr S1) ("o" pr) (1 Top);
#MAP:M_o_from_pretop (@A:from) ("o" pr) (1 Top)

# o nos Fawrth
# choose the "from" sense of o (not of) if it is followed by a dayname
SUBSTITUTE:M_o_from_preday ("o" pr) ("o" pr S1) ("o" pr) (1 Days);

# choose the "way" sense of "ffordd" when in the vicinity of "gwahanol"
# mewn gwahanol ffyrdd
SUBSTITUTE:M_ffordd_way_gwahanol ("ffordd" n f) ("ffordd" n f S1) ("ffordd" n f) ( (1 ("gwahanol" adj)) OR (-1 ("gwahanol" adj)) );
# not perfect, because it would apply in other contexts too

# datblygodd y cwricwlwm i gynnwys mathemateg
SELECT ("<gynnwys>" vblex inf) IF (-1 ("i" pr)) (1 NC);


####
# Some things extracted from evaluation of cy-en 0.1 test sentences
#
# TODO:
#       Disambiguation of 'oes'
#       Disambiguation of 'yn'
#	Disambiguation of 'sawl'
#	Disambiguation of 'a'
#       Selection of bigger/more/greater/larger from 'mawr/mwyaf'