00001 #include "osmHyp.h"
00002 #include <sstream>
00003
00004 using namespace std;
00005 using namespace lm::ngram;
00006
00007 namespace Moses
00008 {
00009 osmState::osmState(const State & val)
00010 :j(0)
00011 ,E(0)
00012 {
00013 lmState = val;
00014
00015 }
00016
00017 void osmState::saveState(int jVal, int eVal, map <int , string> & gapVal)
00018 {
00019 gap.clear();
00020 gap = gapVal;
00021 j = jVal;
00022 E = eVal;
00023 }
00024
00025 size_t osmState::hash() const
00026 {
00027 size_t ret = j;
00028
00029 boost::hash_combine(ret, E);
00030 boost::hash_combine(ret, gap);
00031 boost::hash_combine(ret, lmState.length);
00032
00033 return ret;
00034 }
00035
00036 bool osmState::operator==(const FFState& otherBase) const
00037 {
00038 const osmState &other = static_cast<const osmState&>(otherBase);
00039 if (j != other.j)
00040 return false;
00041 if (E != other.E)
00042 return false;
00043 if (gap != other.gap)
00044 return false;
00045 if (lmState.length != other.lmState.length)
00046 return false;
00047
00048 return true;
00049 }
00050
00051 std::string osmState :: getName() const
00052 {
00053
00054 return "done";
00055 }
00056
00058
00059 osmHypothesis :: osmHypothesis()
00060 {
00061 opProb = 0;
00062 gapWidth = 0;
00063 gapCount = 0;
00064 openGapCount = 0;
00065 deletionCount = 0;
00066 gapCount = 0;
00067 j = 0;
00068 E = 0;
00069 gap.clear();
00070 }
00071
00072 void osmHypothesis :: setState(const FFState* prev_state)
00073 {
00074
00075 if(prev_state != NULL) {
00076
00077 j = static_cast <const osmState *> (prev_state)->getJ();
00078 E = static_cast <const osmState *> (prev_state)->getE();
00079 gap = static_cast <const osmState *> (prev_state)->getGap();
00080 lmState = static_cast <const osmState *> (prev_state)->getLMState();
00081 }
00082 }
00083
00084 osmState * osmHypothesis :: saveState()
00085 {
00086
00087 osmState * statePtr = new osmState(lmState);
00088 statePtr->saveState(j,E,gap);
00089 return statePtr;
00090 }
00091
00092 int osmHypothesis :: isTranslationOperation(int x)
00093 {
00094 if (operations[x].find("_JMP_BCK_") != -1)
00095 return 0;
00096
00097 if (operations[x].find("_JMP_FWD_") != -1)
00098 return 0;
00099
00100 if (operations[x].find("_CONT_CEPT_") != -1)
00101 return 0;
00102
00103 if (operations[x].find("_INS_GAP_") != -1)
00104 return 0;
00105
00106 return 1;
00107
00108 }
00109
00110 void osmHypothesis :: removeReorderingOperations()
00111 {
00112 gapCount = 0;
00113 deletionCount = 0;
00114 openGapCount = 0;
00115 gapWidth = 0;
00116
00117 std::vector <std::string> tupleSequence;
00118
00119 for (int x = 0; x < operations.size(); x++) {
00120
00121
00122 if(isTranslationOperation(x) == 1) {
00123 tupleSequence.push_back(operations[x]);
00124 }
00125
00126 }
00127
00128 operations.clear();
00129 operations = tupleSequence;
00130 }
00131
00132 void osmHypothesis :: calculateOSMProb(OSMLM& ptrOp)
00133 {
00134
00135 opProb = 0;
00136 State currState = lmState;
00137 State temp;
00138
00139 for (size_t i = 0; i<operations.size(); i++) {
00140 temp = currState;
00141 opProb += ptrOp.Score(temp,operations[i],currState);
00142 }
00143
00144 lmState = currState;
00145
00146
00147 }
00148
00149
00150 int osmHypothesis :: firstOpenGap(vector <int> & coverageVector)
00151 {
00152
00153 int firstOG =-1;
00154
00155 for(int nd = 0; nd < coverageVector.size(); nd++) {
00156 if(coverageVector[nd]==0) {
00157 firstOG = nd;
00158 return firstOG;
00159 }
00160 }
00161
00162 return firstOG;
00163
00164 }
00165
00166 string osmHypothesis :: intToString(int num)
00167 {
00168 return SPrint(num);
00169
00170 }
00171
00172 void osmHypothesis :: generateOperations(int & startIndex , int j1 , int contFlag , Bitmap & coverageVector , string english , string german , set <int> & targetNullWords , vector <string> & currF)
00173 {
00174
00175 int gFlag = 0;
00176 int gp = 0;
00177 int ans;
00178
00179
00180 if ( j < j1) {
00181
00182 if(coverageVector.GetValue(j)==0) {
00183 operations.push_back("_INS_GAP_");
00184 gFlag++;
00185 gap[j]="Unfilled";
00186 }
00187 if (j == E) {
00188 j = j1;
00189 } else {
00190 operations.push_back("_JMP_FWD_");
00191 j=E;
00192 }
00193 }
00194
00195 if (j1 < j) {
00196
00197 if(j < E && coverageVector.GetValue(j)==0) {
00198 operations.push_back("_INS_GAP_");
00199 gFlag++;
00200 gap[j]="Unfilled";
00201 }
00202
00203 j=closestGap(gap,j1,gp);
00204 operations.push_back("_JMP_BCK_"+ intToString(gp));
00205
00206
00207
00208
00209 if(j==j1)
00210 gap[j]="Filled";
00211 }
00212
00213 if (j < j1) {
00214 operations.push_back("_INS_GAP_");
00215 gap[j] = "Unfilled";
00216 gFlag++;
00217 j=j1;
00218 }
00219
00220 if(contFlag == 0) {
00221
00222 if(english == "_TRANS_SLF_") {
00223 operations.push_back("_TRANS_SLF_");
00224 } else {
00225 operations.push_back("_TRANS_" + english + "_TO_" + german);
00226 }
00227
00228
00229 ans = coverageVector.GetFirstGapPos();
00230
00231 if (ans != -1)
00232 gapWidth += j - ans;
00233
00234 } else if (contFlag == 2) {
00235
00236 operations.push_back("_INS_" + german);
00237 ans = coverageVector.GetFirstGapPos();
00238
00239 if (ans != -1)
00240 gapWidth += j - ans;
00241 deletionCount++;
00242 } else {
00243 operations.push_back("_CONT_CEPT_");
00244 }
00245
00246
00247 coverageVector.SetValue(j,1);
00248 j+=1;
00249
00250 if(E<j)
00251 E=j;
00252
00253 if (gFlag > 0)
00254 gapCount++;
00255
00256 openGapCount += getOpenGaps();
00257
00258
00259 if (j < coverageVector.GetSize()) {
00260 if (coverageVector.GetValue(j) == 0 && targetNullWords.find(j) != targetNullWords.end()) {
00261 j1 = j;
00262 german = currF[j1-startIndex];
00263 english = "_INS_";
00264 generateOperations(startIndex, j1, 2 , coverageVector , english , german , targetNullWords , currF);
00265 }
00266 }
00267
00268 }
00269
00270 void osmHypothesis :: print()
00271 {
00272 for (int i = 0; i< operations.size(); i++) {
00273 cerr<<operations[i]<<" ";
00274
00275 }
00276
00277 cerr<<endl<<endl;
00278
00279 cerr<<"Operation Probability "<<opProb<<endl;
00280 cerr<<"Gap Count "<<gapCount<<endl;
00281 cerr<<"Open Gap Count "<<openGapCount<<endl;
00282 cerr<<"Gap Width "<<gapWidth<<endl;
00283 cerr<<"Deletion Count "<<deletionCount<<endl;
00284
00285 cerr<<"_______________"<<endl;
00286 }
00287
00288 int osmHypothesis :: closestGap(map <int,string> gap, int j1, int & gp)
00289 {
00290
00291 int dist=1172;
00292 int value=-1;
00293 int temp=0;
00294 gp=0;
00295 int opGap=0;
00296
00297 map <int,string> :: iterator iter;
00298
00299 iter=gap.end();
00300
00301 do {
00302 iter--;
00303
00304
00305 if(iter->first==j1 && iter->second== "Unfilled") {
00306 opGap++;
00307 gp = opGap;
00308 return j1;
00309
00310 }
00311
00312 if(iter->second =="Unfilled") {
00313 opGap++;
00314 temp = iter->first - j1;
00315
00316 if(temp<0)
00317 temp=temp * -1;
00318
00319 if(dist>temp && iter->first < j1) {
00320 dist=temp;
00321 value=iter->first;
00322 gp=opGap;
00323 }
00324 }
00325
00326
00327 } while(iter!=gap.begin());
00328
00329 return value;
00330 }
00331
00332
00333
00334 int osmHypothesis :: getOpenGaps()
00335 {
00336 map <int,string> :: iterator iter;
00337
00338 int nd = 0;
00339 for (iter = gap.begin(); iter!=gap.end(); iter++) {
00340 if(iter->second == "Unfilled")
00341 nd++;
00342 }
00343
00344 return nd;
00345
00346 }
00347
00348 void osmHypothesis :: generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes)
00349 {
00350
00351 operations.push_back("_DEL_" + english);
00352 currTargetIndex++;
00353
00354 while(doneTargetIndexes.find(currTargetIndex) != doneTargetIndexes.end()) {
00355 currTargetIndex++;
00356 }
00357
00358 if (sourceNullWords.find(currTargetIndex) != sourceNullWords.end()) {
00359 english = currE[currTargetIndex];
00360 generateDeleteOperations(english,currTargetIndex,doneTargetIndexes);
00361 }
00362
00363 }
00364
00365 void osmHypothesis :: computeOSMFeature(int startIndex , Bitmap & coverageVector)
00366 {
00367
00368 set <int> doneTargetIndexes;
00369 set <int> eSide;
00370 set <int> fSide;
00371 set <int> :: iterator iter;
00372 string english;
00373 string source;
00374 int j1;
00375 int targetIndex = 0;
00376 doneTargetIndexes.clear();
00377
00378
00379 if (targetNullWords.size() != 0) {
00380 iter = targetNullWords.begin();
00381
00382 if (*iter == startIndex) {
00383
00384 j1 = startIndex;
00385 source = currF[j1-startIndex];
00386 english = "_INS_";
00387 generateOperations(startIndex, j1, 2 , coverageVector , english , source , targetNullWords , currF);
00388 }
00389 }
00390
00391 if (sourceNullWords.find(targetIndex) != sourceNullWords.end()) {
00392 english = currE[targetIndex];
00393 generateDeleteOperations(english,targetIndex, doneTargetIndexes);
00394 }
00395
00396
00397 for (size_t i = 0; i < ceptsInPhrase.size(); i++) {
00398 source = "";
00399 english = "";
00400
00401 fSide = ceptsInPhrase[i].first;
00402 eSide = ceptsInPhrase[i].second;
00403
00404 iter = eSide.begin();
00405 targetIndex = *iter;
00406 english += currE[*iter];
00407 iter++;
00408
00409 for (; iter != eSide.end(); iter++) {
00410 if(*iter == targetIndex+1)
00411 targetIndex++;
00412 else
00413 doneTargetIndexes.insert(*iter);
00414
00415 english += "^_^";
00416 english += currE[*iter];
00417 }
00418
00419 iter = fSide.begin();
00420 source += currF[*iter];
00421 iter++;
00422
00423 for (; iter != fSide.end(); iter++) {
00424 source += "^_^";
00425 source += currF[*iter];
00426 }
00427
00428 iter = fSide.begin();
00429 j1 = *iter + startIndex;
00430 iter++;
00431
00432 generateOperations(startIndex, j1, 0 , coverageVector , english , source , targetNullWords , currF);
00433
00434
00435 for (; iter != fSide.end(); iter++) {
00436 j1 = *iter + startIndex;
00437 generateOperations(startIndex, j1, 1 , coverageVector , english , source , targetNullWords , currF);
00438 }
00439
00440 targetIndex++;
00441
00442 while(doneTargetIndexes.find(targetIndex) != doneTargetIndexes.end()) {
00443 targetIndex++;
00444 }
00445
00446 if(sourceNullWords.find(targetIndex) != sourceNullWords.end()) {
00447 english = currE[targetIndex];
00448 generateDeleteOperations(english,targetIndex, doneTargetIndexes);
00449 }
00450 }
00451
00452
00453
00454
00455
00456 }
00457
00458 void osmHypothesis :: getMeCepts ( set <int> & eSide , set <int> & fSide , map <int , vector <int> > & tS , map <int , vector <int> > & sT)
00459 {
00460 set <int> :: iterator iter;
00461
00462 int sz = eSide.size();
00463 vector <int> t;
00464
00465 for (iter = eSide.begin(); iter != eSide.end(); iter++) {
00466 t = tS[*iter];
00467
00468 for (size_t i = 0; i < t.size(); i++) {
00469 fSide.insert(t[i]);
00470 }
00471
00472 }
00473
00474 for (iter = fSide.begin(); iter != fSide.end(); iter++) {
00475
00476 t = sT[*iter];
00477
00478 for (size_t i = 0 ; i<t.size(); i++) {
00479 eSide.insert(t[i]);
00480 }
00481
00482 }
00483
00484 if (eSide.size () > sz) {
00485 getMeCepts(eSide,fSide,tS,sT);
00486 }
00487
00488 }
00489
00490 void osmHypothesis :: constructCepts(vector <int> & align , int startIndex , int endIndex, int targetPhraseLength)
00491 {
00492
00493 std::map <int , vector <int> > sT;
00494 std::map <int , vector <int> > tS;
00495 std::set <int> eSide;
00496 std::set <int> fSide;
00497 std::set <int> :: iterator iter;
00498 std :: map <int , vector <int> > :: iterator iter2;
00499 std :: pair < set <int> , set <int> > cept;
00500 int src;
00501 int tgt;
00502
00503
00504 for (size_t i = 0; i < align.size(); i+=2) {
00505 src = align[i];
00506 tgt = align[i+1];
00507 tS[tgt].push_back(src);
00508 sT[src].push_back(tgt);
00509 }
00510
00511 for (int i = startIndex; i<= endIndex; i++) {
00512 if (sT.find(i-startIndex) == sT.end()) {
00513 targetNullWords.insert(i);
00514 }
00515 }
00516
00517 for (int i = 0; i < targetPhraseLength; i++) {
00518 if (tS.find(i) == tS.end()) {
00519 sourceNullWords.insert(i);
00520 }
00521 }
00522
00523
00524 while (tS.size() != 0 && sT.size() != 0) {
00525
00526 iter2 = tS.begin();
00527
00528 eSide.clear();
00529 fSide.clear();
00530 eSide.insert (iter2->first);
00531
00532 getMeCepts(eSide, fSide, tS , sT);
00533
00534 for (iter = eSide.begin(); iter != eSide.end(); iter++) {
00535 iter2 = tS.find(*iter);
00536 tS.erase(iter2);
00537 }
00538
00539 for (iter = fSide.begin(); iter != fSide.end(); iter++) {
00540 iter2 = sT.find(*iter);
00541 sT.erase(iter2);
00542 }
00543
00544 cept = make_pair (fSide , eSide);
00545 ceptsInPhrase.push_back(cept);
00546 }
00547
00548
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563
00564
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580
00581
00582
00583
00584
00585
00586 }
00587
00588 void osmHypothesis :: populateScores(vector <float> & scores , const int numFeatures)
00589 {
00590 scores.clear();
00591 scores.push_back(opProb);
00592
00593 if (numFeatures == 1)
00594 return;
00595
00596 scores.push_back(gapWidth);
00597 scores.push_back(gapCount);
00598 scores.push_back(openGapCount);
00599 scores.push_back(deletionCount);
00600 }
00601
00602
00603 }
00604