File: tagger.properties

package info (click to toggle)
uima-addons 2.3.1-10
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 10,044 kB
  • sloc: java: 48,492; xml: 42,522; javascript: 53; makefile: 8; sh: 8
file content (75 lines) | stat: -rw-r--r-- 3,131 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# Licensed to the Apache Software Foundation (ASF) under one
#  or more contributor license agreements.  See the NOTICE file
#  distributed with this work for additional information
#  regarding copyright ownership.  The ASF licenses this file
#  to you under the Apache License, Version 2.0 (the
#  "License"); you may not use this file except in compliance
#  with the License.  You may obtain a copy of the License at
#  
#    http://www.apache.org/licenses/LICENSE-2.0
#  
#  Unless required by applicable law or agreed to in writing,
#  software distributed under the License is distributed on an
#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
#  KIND, either express or implied.  See the License for the
#  specific language governing permissions and limitations
#  under the License.
 
######## This is the default tagger.properties file
######## This file is used for training and testing only,
######## The configuration for tagging is directly tuned in the descriptor "HmmTagger.xml"

###########  ONLY FOR TRAINING #####################################################


####### FILE OR DIRECTORY CONTAINING TRAINING CORPUS:
####### can be in specified either as an absolute or as a relative path
####### e.g. FILE = ../../tueba_tigerFormat.txt or FILE = C:/Data/tueba.txt
FILE = 


##########################  BOTH FOR TRAINING AND EVALUATION  ################################

######## THESE ARE THE DEFAULT MODEL FILES FOR GERMAN AND ENGLISH
######## You can either uncomment one of them, if you want to replace given models with your own one,

#MODEL_FILE = resources/german/TuebaModel.dat
MODEL_FILE = resources/english/BrownModel.dat

######## or specify a completely different name
# MODEL_FILE = 

######## If mapping of tags is desired, uncomment the following
DO_MAPPING = true


#######	EXAMPLES OF MAPPING CLASSES 

## Basic mapping for the Brown corpus (nltk distribution) tagset: to get 93 tags out of 473
MAPPING = org.apache.uima.examples.tagger.trainAndTest.TagMappingBrown

## Basic mapping for STTS tagset: from 54 tags onto the basic ca. 15 classes plus punctuation
#MAPPING = org.apache.uima.examples.tagger.trainAndTest.GrobMappingTueba

## If you implement your own mapping, you should specify here in the same manner as above a java-path to the class  
#MAPPING = 

######## If corpus is in a different format and cannot be read with the provided READERS,
######## you should specify here a java-path to the class (s. examples below) 

#CORPUS_READER = org.apache.uima.examples.tagger.trainAndTest.TT_FormatReader
CORPUS_READER = org.apache.uima.examples.tagger.trainAndTest.BrownReader

#CORPUS_READER = 

#################      ONLY FOR EVALUATION   ###############################

######### GOLD STANDARD CORPUS FILE: 
######### can be specified as an absolute or as a relative path 
##e.g. GOLD_STANDARD = ../../tueba_tigerFormat.txt or GOLD_STANDARD = C:/Data/tueba.txt

GOLD_STANDARD = 

######### Here we specify whether one intends to test a bi- or a trigram model (default is a trigram model)  
N=3