ZGen  0.2.0
a linearization system for natural language.
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
score_context.h
Go to the documentation of this file.
1 #ifndef __ZGEN_SHIFTREDUCE_MODEL_SCORE_CONTEXT_H__
2 #define __ZGEN_SHIFTREDUCE_MODEL_SCORE_CONTEXT_H__
3 
5 #include "utils/math_utils.h" // for bin
6 
7 #define _LEGEAL_RANGE_(x) (((x) >= 0) && ((x) < N))
8 
9 #define __SET_CTX(prefix) do { \
10  prefix##w = (item.ref->forms).at(prefix); \
11  prefix##p = item.postags[prefix]; \
12  prefix##l = item.deprels[prefix]; \
13 } while (0);
14 
15 #define __CLEAR_CTX(prefix) do { \
16  prefix##w = kNoneWord; \
17  prefix##p = kNonePostag; \
18  prefix##l = kNoneDeprel; \
19 } while (0);
20 
21 #define __SET_CNT(prefix) do { \
22  prefix##la = item.nr_left_children[prefix]; \
23  prefix##ra = item.nr_right_children[prefix]; \
24  prefix##ls = utils::bin(item.nr_left_descendant[prefix]); \
25  prefix##rs = utils::bin(item.nr_right_descendant[prefix]); \
26 } while (0);
27 
28 #define __CLEAR_CNT(prefix) do { \
29  prefix##la = 0; \
30  prefix##ra = 0; \
31  prefix##ls = 0; \
32  prefix##rs = 0; \
33 } while (0);
34 
35 
36 namespace ZGen {
37 
38 namespace ShiftReduce {
39 
40 namespace eg = ZGen::Engine;
41 namespace utils= ZGen::Utility;
42 
46 
47 struct ScoreContext {
48  ScoreContext(const StateItem& item) {
49  int N = item.ref->size();
50  int S0 = item.top0;
51 
52  const sentence_t& forms = item.ref->forms;
53 
54  if (S0 >= 0) {
55  _is_begin_state = false;
56 
57  S0w = forms.at(S0);
58  S0p = item.postags[S0];
59  __SET_CNT(S0);
60 
61  if ( _LEGEAL_RANGE_(item.left_most_child[S0]) ) {
62  int S0ld = item.left_most_child[S0];
63  __SET_CTX(S0ld);
64  S0S0ldDist = utils::bin(item.rank[S0ld]- item.rank[S0]);
65 
66  if ( _LEGEAL_RANGE_(item.left_2nd_most_child[S0]) ) {
67  int S0l2d = item.left_2nd_most_child[S0];
68  __SET_CTX(S0l2d);
69  } else {
70  __CLEAR_CTX(S0l2d);
71  }
72 
73  if ( _LEGEAL_RANGE_(item.left_most_child[S0ld]) ) {
74  int S0ldd = item.left_most_child[S0ld];
75  __SET_CTX(S0ldd);
76  } else {
77  __CLEAR_CTX(S0ldd);
78  }
79  } else {
80  __CLEAR_CTX(S0ld);
81  __CLEAR_CTX(S0l2d);
82  __CLEAR_CTX(S0ldd);
83  S0S0ldDist = 0;
84  }
85 
86  if ( _LEGEAL_RANGE_(item.right_most_child[S0]) ) {
87  int S0rd = item.right_most_child[S0];
88  __SET_CTX(S0rd);
89  S0S0rdDist = utils::bin(item.rank[S0rd] - item.rank[S0]);
90 
91  if ( _LEGEAL_RANGE_(item.right_2nd_most_child[S0]) ) {
92  int S0r2d = item.right_2nd_most_child[S0];
93  __SET_CTX(S0r2d);
94  } else {
95  __CLEAR_CTX(S0l2d);
96  }
97 
98  if ( _LEGEAL_RANGE_(item.right_most_child[S0rd]) ) {
99  int S0rdd = item.right_most_child[S0rd];
100  __SET_CTX(S0rdd);
101  } else {
102  __CLEAR_CTX(S0rdd);
103  }
104  } else {
105  __CLEAR_CTX(S0rd);
106  __CLEAR_CTX(S0r2d);
107  __CLEAR_CTX(S0rdd);
108  S0S0rdDist = 0;
109  }
110  } else {
111  // The given state is begin state.
112  _is_begin_state = true;
113  //
116  __CLEAR_CNT(S0);
117  __CLEAR_CTX(S0ld);
118  __CLEAR_CTX(S0l2d);
119  __CLEAR_CTX(S0ldd);
120  __CLEAR_CTX(S0rd);
121  __CLEAR_CTX(S0r2d);
122  __CLEAR_CTX(S0rdd);
123  }
124 
125  if (item.stack.size() > 2) {
126  _has_S1 = true;
127  int S1 = item.top1;
128 
129  S1w = forms.at(S1);
130  S1p = item.postags[S1];
131  __SET_CNT(S1);
132  S0S1Dist = utils::bin(item.rank[S1]- item.rank[S0]);
133 
134  if ( _LEGEAL_RANGE_(item.left_most_child[S1]) ) {
135  int S1ld = item.left_most_child[S1];
136  __SET_CTX(S1ld);
137  S1S1ldDist = utils::bin(item.rank[S1ld]- item.rank[S1]);
138 
139  if ( _LEGEAL_RANGE_(item.left_2nd_most_child[S1]) ) {
140  int S1l2d = item.left_2nd_most_child[S1];
141  __SET_CTX(S1l2d);
142  } else {
143  __CLEAR_CTX(S1l2d);
144  }
145 
146  if ( _LEGEAL_RANGE_(item.left_most_child[S1ld]) ) {
147  int S1ldd = item.left_most_child[S1ld];
148  __SET_CTX(S1ldd);
149  } else {
150  __CLEAR_CTX(S1ldd);
151  }
152  } else {
153  __CLEAR_CTX(S1ld);
154  __CLEAR_CTX(S1l2d);
155  __CLEAR_CTX(S1ldd);
156  S1S1ldDist = 0;
157  }
158 
159  if ( _LEGEAL_RANGE_(item.right_most_child[S1]) ) {
160  int S1rd = item.right_most_child[S1];
161  __SET_CTX(S1rd);
162  S1S1rdDist = utils::bin(item.rank[S1rd]- item.rank[S1]);
163  if ( _LEGEAL_RANGE_(item.right_2nd_most_child[S1]) ) {
164  int S1r2d = item.right_2nd_most_child[S1];
165  __SET_CTX(S1r2d);
166  } else {
167  __CLEAR_CTX(S1r2d);
168  }
169 
170  if ( _LEGEAL_RANGE_(item.right_most_child[S1rd]) ) {
171  int S1rdd = item.right_most_child[S1rd];
172  __SET_CTX(S1rdd);
173  } else {
174  __CLEAR_CTX(S1rdd);
175  }
176  } else {
177  __CLEAR_CTX(S1rd);
178  __CLEAR_CTX(S1r2d);
179  __CLEAR_CTX(S1rdd);
180  S1S1rdDist = 0;
181  }
182  } else {
183  _has_S1 = false;
184  S1w = kNoneWord;
185  S1p = kNonePostag;
186  __CLEAR_CNT(S1);
187  __CLEAR_CTX(S1ld);
188  __CLEAR_CTX(S1l2d);
189  __CLEAR_CTX(S1ldd);
190  __CLEAR_CTX(S1rd);
191  __CLEAR_CTX(S1r2d);
192  __CLEAR_CTX(S1rdd);
193  S0S1Dist = 0;
194  }
195 
196  // SHIFTED words
197  // item.word_sequence.size() should be equal to item.postag_sequence.size()
198  int C = item.word_sequence.size();
199 
200  W0 = item.word_sequence[C - 1];
201  P0 = item.postag_sequence[C - 1];
202 
203  _has_W1 = false;
204  if (C > 1) {
205  _has_W1 = true;
206  W1 = item.word_sequence[C - 2];
207  P1 = item.postag_sequence[C - 2];
208 
209  if (C > 2) {
210  W2 = item.word_sequence[C - 3];
211  P2 = item.postag_sequence[C - 3];
212  } else {
213  W2 = kNoneWord;
214  P2 = kNonePostag;
215  }
216  } else {
217  W1 = (W2 = kNoneWord);
218  P1 = (P2 = kNonePostag);
219  }
220  }
221 
222  bool is_begin_state() const {
223  return _is_begin_state;
224  }
225 
226  bool has_S1() const {
227  return _has_S1;
228  }
229 
230  bool has_W1() const {
231  return _has_W1;
232  }
233 
234  bool has_W2() const {
235  return _has_W2;
236  }
237 
240 
244  int S0la, S0ra;
245  int S0ls, S0rs;
246 
250  int S1la, S1ra;
251  int S1ls, S1rs;
252 
255 
256  int S0S1Dist;
258 };
259 
261 
262 } // end for
263 } // end for
264 
265 #undef __SET_CTX
266 #undef __CLEAR_CTX
267 #undef __SET_CNT
268 #undef __CLEAR_CNT
269 
270 #endif // __ZGEN_SHIFTREDUCE_MODEL_SCORE_CONTEXT_H__
int S1S1rdDist
Definition: score_context.h:257
Definition: score_context.h:47
deprel_t S1ldl
Definition: score_context.h:249
#define _LEGEAL_RANGE_(x)
Definition: score_context.h:7
word_t S0l2dw
Definition: score_context.h:241
bool is_begin_state() const
Definition: score_context.h:222
#define __SET_CNT(prefix)
Definition: score_context.h:21
word_t S0lddw
Definition: score_context.h:241
postag_t S0rdp
Definition: score_context.h:242
Definition: word_alphabet.h:63
postag_t S1lddp
Definition: score_context.h:248
Definition: state.h:15
Definition: postag_alphabet.h:33
deprel_t S1r2dl
Definition: score_context.h:249
postag_t S0rddp
Definition: score_context.h:242
deprel_t S0lddl
Definition: score_context.h:243
int S0S0rdDist
Definition: score_context.h:257
int S0ls
Definition: score_context.h:245
Definition: word_alphabet.h:63
std::vector< word_t > word_sequence
The sequence of words leading by the current state.
Definition: state.h:59
word_t S0rddw
Definition: score_context.h:241
postag_t P0
Definition: score_context.h:254
int right_most_child[kMaxNumberOfWords]
CACHE: use to store the right most child for the word.
Definition: state.h:95
int S0S1Dist
Definition: score_context.h:256
int S0ra
Definition: score_context.h:244
int S0rs
Definition: score_context.h:245
const int kNonePostag
Definition: score_context.h:44
int S1rs
Definition: score_context.h:251
int postags[kMaxNumberOfWords]
The postags cache.
Definition: state.h:71
#define __SET_CTX(prefix)
Definition: score_context.h:9
postag_t S1rdp
Definition: score_context.h:248
int word_t
Definition: instance.h:11
deprel_t S1rddl
Definition: score_context.h:249
deprel_t S0r2dl
Definition: score_context.h:243
bool has_W2() const
Definition: score_context.h:234
postag_t S1ldp
Definition: score_context.h:248
const dependency_t * ref
The second top word.
Definition: state.h:51
word_t S1w
Definition: score_context.h:247
#define __CLEAR_CTX(prefix)
Definition: score_context.h:15
int S0la
Definition: score_context.h:244
postag_t S1r2dp
Definition: score_context.h:248
deprel_t S0rddl
Definition: score_context.h:243
postag_t S0l2dp
Definition: score_context.h:242
word_t W0
Definition: score_context.h:253
#define __CLEAR_CNT(prefix)
Definition: score_context.h:28
std::vector< postag_t > postag_sequence
The sequence of postags leading by the current state.
Definition: state.h:62
word_t S0ldw
Definition: score_context.h:241
int left_most_child[kMaxNumberOfWords]
CACHE: use to store the left most child for the word.
Definition: state.h:92
deprel_t S0l2dl
Definition: score_context.h:243
postag_t S1rddp
Definition: score_context.h:248
postag_t S0p
Definition: score_context.h:242
word_t S1lddw
Definition: score_context.h:247
bool has_W1() const
Definition: score_context.h:230
std::vector< word_t > sentence_t
Definition: instance.h:14
int bin(int x)
Definition: math_utils.cc:6
const int kNoneWord
Definition: score_context.h:43
word_t S1l2dw
Definition: score_context.h:247
deprel_t S1l2dl
Definition: score_context.h:249
ScoreContext ctx_t
Definition: score_context.h:260
deprel_t S1lddl
Definition: score_context.h:249
postag_t S1p
Definition: score_context.h:248
postag_t P1
Definition: score_context.h:254
std::vector< word_t > forms
Definition: instance.h:21
word_t S0rdw
Definition: score_context.h:241
postag_t S0ldp
Definition: score_context.h:242
Definition: postag_alphabet.h:33
int S1ls
Definition: score_context.h:251
size_t size() const
Definition: instance.cc:83
postag_t S0lddp
Definition: score_context.h:242
int deprel_t
Definition: instance.h:13
int postag_t
Definition: instance.h:12
word_t S0r2dw
Definition: score_context.h:241
std::vector< int > stack
The score for the current state.
Definition: state.h:56
int top1
The top word in the stack of the current state.
Definition: state.h:50
deprel_t S0ldl
Definition: score_context.h:243
word_t S1rdw
Definition: score_context.h:247
Definition: deprel_alphabet.h:31
int left_2nd_most_child[kMaxNumberOfWords]
CACHE: use to store the 2nd-left-most child.
Definition: state.h:98
word_t W1
Definition: score_context.h:253
word_t S1r2dw
Definition: score_context.h:247
word_t W2
Definition: score_context.h:253
int right_2nd_most_child[kMaxNumberOfWords]
CACHE: use to store the 2nd-right-most child.
Definition: state.h:101
deprel_t S1rdl
Definition: score_context.h:249
word_t S0w
Definition: score_context.h:241
bool _has_S1
Definition: score_context.h:239
const int kNoneDeprel
Definition: score_context.h:45
deprel_t S0rdl
Definition: score_context.h:243
bool _is_begin_state
Definition: score_context.h:238
postag_t S0r2dp
Definition: score_context.h:242
ScoreContext(const StateItem &item)
Definition: score_context.h:48
int S1la
Definition: score_context.h:250
bool has_S1() const
Definition: score_context.h:226
bool _has_W2
Definition: score_context.h:239
bool _has_W1
Definition: score_context.h:239
postag_t P2
Definition: score_context.h:254
postag_t S1l2dp
Definition: score_context.h:248
int S0S0ldDist
Definition: score_context.h:257
int S1ra
Definition: score_context.h:250
int top0
Definition: state.h:49
int rank[kMaxNumberOfWords]
The rank of the form, with word considered.
Definition: state.h:68
word_t S1rddw
Definition: score_context.h:247
word_t S1ldw
Definition: score_context.h:247
int S1S1ldDist
Definition: score_context.h:257