forked from SciSharp/BotSharp
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathWordNetEngine.cs
More file actions
151 lines (133 loc) · 6.22 KB
/
WordNetEngine.cs
File metadata and controls
151 lines (133 loc) · 6.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
//Copyright (C) 2006 Richard J. Northedge
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
using System;
using System.Collections.Generic;
namespace BotSharp.MachineLearning
{
/// <summary>
/// Summary description for WordNetEngine.
/// </summary>
public abstract class WordNetEngine
{
private Morph.IOperation[] mDefaultOperations;
protected string[] mEmpty = new string[0];
public abstract string[] GetPartsOfSpeech();
public abstract string[] GetPartsOfSpeech(string lemma);
public abstract IndexWord[] GetAllIndexWords(string partOfSpeech);
public abstract IndexWord GetIndexWord(string lemma, string partOfSpeech);
public abstract Synset[] GetSynsets(string lemma);
public abstract Synset[] GetSynsets(string lemma, string partOfSpeech);
public abstract RelationType[] GetRelationTypes(string lemma, string partOfSpeech);
public abstract Synset GetSynset(string lemma, string partOfSpeech, int senseNumber);
public delegate void MorphologicalProcessOperation (string lemma, string partOfSpeech, List<string>baseForms);
public string[] GetBaseForms(string lemma, string partOfSpeech, MorphologicalProcessOperation morphologicalProcess)
{
var baseForms = new List<string>();
morphologicalProcess(lemma, partOfSpeech, baseForms);
return baseForms.ToArray();
}
public string[] GetBaseForms(string lemma, string partOfSpeech, Morph.IOperation[] operations)
{
var baseForms = new List<string>();
foreach (Morph.IOperation operation in operations)
{
operation.Execute(lemma, partOfSpeech, baseForms);
}
return baseForms.ToArray();
}
public string[] GetBaseForms(string lemma, string partOfSpeech)
{
if (mDefaultOperations == null)
{
var suffixMap = new Dictionary<string, string[][]>
{
{
"noun", new string[][]
{
new string[] {"s", ""}, new string[] {"ses", "s"}, new string[] {"xes", "x"},
new string[] {"zes", "z"}, new string[] {"ches", "ch"}, new string[] {"shes", "sh"},
new string[] {"men", "man"}, new string[] {"ies", "y"}
}
},
{
"verb", new string[][]
{
new string[] {"s", ""}, new string[] {"ies", "y"}, new string[] {"es", "e"},
new string[] {"es", ""}, new string[] {"ed", "e"}, new string[] {"ed", ""},
new string[] {"ing", "e"}, new string[] {"ing", ""}
}
},
{
"adjective", new string[][]
{
new string[] {"er", ""}, new string[] {"est", ""}, new string[] {"er", "e"},
new string[] {"est", "e"}
}
}
};
var tokDso = new Morph.DetachSuffixesOperation(suffixMap);
tokDso.AddDelegate(Morph.DetachSuffixesOperation.Operations, new Morph.IOperation[]
{
new Morph.LookupIndexWordOperation(this), new Morph.LookupExceptionsOperation(this)
});
var tokOp = new Morph.TokenizerOperation(this, new string[] { " ", "-" });
tokOp.AddDelegate(Morph.TokenizerOperation.TokenOperations, new Morph.IOperation[]
{
new Morph.LookupIndexWordOperation(this), new Morph.LookupExceptionsOperation(this), tokDso
});
var morphDso = new Morph.DetachSuffixesOperation(suffixMap);
morphDso.AddDelegate(Morph.DetachSuffixesOperation.Operations, new Morph.IOperation[]
{
new Morph.LookupIndexWordOperation(this), new Morph.LookupExceptionsOperation(this)
});
mDefaultOperations = new Morph.IOperation[] { new Morph.LookupExceptionsOperation(this), morphDso, tokOp };
}
return GetBaseForms(lemma, partOfSpeech, mDefaultOperations);
}
public MorphologicalProcessOperation LookupExceptionsOperation
{
get
{
return delegate(string lemma, string partOfSpeech, List<string> baseForms)
{
string[] exceptionForms = GetExceptionForms(lemma, partOfSpeech);
foreach (string exceptionForm in exceptionForms)
{
if (!baseForms.Contains(exceptionForm))
{
baseForms.Add(exceptionForm);
}
}
};
}
}
public MorphologicalProcessOperation LookupIndexWordOperation
{
get
{
return delegate(string lemma, string partOfSpeech, List<string> baseForms)
{
if (!baseForms.Contains(lemma) && GetIndexWord(lemma, partOfSpeech) != null)
{
baseForms.Add(lemma);
}
};
}
}
protected internal abstract Synset CreateSynset(string partOfSpeech, int synsetOffset);
protected internal abstract string[] GetExceptionForms(string lemma, string partOfSpeech);
}
}