00001 #include <cstdio>
00002 #include <iostream>
00003 #include <fstream>
00004 #include <vector>
00005 #include <string>
00006 #include <cstdlib>
00007 #include <cassert>
00008 #include <cstring>
00009 #include <sstream>
00010 #include <map>
00011 #include <set>
00012 #include <vector>
00013 #include <limits>
00014
00015 #include "tables-core.h"
00016 #include "InputFileStream.h"
00017 #include "OutputFileStream.h"
00018 #include "PhraseExtractionOptions.h"
00019 #include "SentenceAlignmentWithSyntax.h"
00020 #include "SyntaxNode.h"
00021 #include "moses/Util.h"
00022
00023 using namespace std;
00024 using namespace MosesTraining;
00025
00026 namespace MosesTraining
00027 {
00028
00029
00030 typedef pair <int, int> HPhraseVertex;
00031
00032
00033
00034 typedef pair<HPhraseVertex, HPhraseVertex> HPhrase;
00035
00036
00037 typedef vector < HPhrase > HPhraseVector;
00038
00039
00040
00041 typedef map <int, set<int> > HSentenceVertices;
00042
00043 REO_POS getOrientWordModel(SentenceAlignmentWithSyntax &, REO_MODEL_TYPE, bool, bool,
00044 int, int, int, int, int, int, int,
00045 bool (*)(int, int), bool (*)(int, int));
00046 REO_POS getOrientPhraseModel(SentenceAlignmentWithSyntax &, REO_MODEL_TYPE, bool, bool,
00047 int, int, int, int, int, int, int,
00048 bool (*)(int, int), bool (*)(int, int),
00049 const HSentenceVertices &, const HSentenceVertices &);
00050 REO_POS getOrientHierModel(SentenceAlignmentWithSyntax &, REO_MODEL_TYPE, bool, bool,
00051 int, int, int, int, int, int, int,
00052 bool (*)(int, int), bool (*)(int, int),
00053 const HSentenceVertices &, const HSentenceVertices &,
00054 const HSentenceVertices &, const HSentenceVertices &,
00055 REO_POS);
00056
00057 void insertVertex(HSentenceVertices &, int, int);
00058 void insertPhraseVertices(HSentenceVertices &, HSentenceVertices &, HSentenceVertices &, HSentenceVertices &,
00059 int, int, int, int);
00060 string getOrientString(REO_POS, REO_MODEL_TYPE);
00061
00062 bool ge(int, int);
00063 bool le(int, int);
00064 bool lt(int, int);
00065
00066 bool isAligned (SentenceAlignmentWithSyntax &, int, int);
00067
00068 int sentenceOffset = 0;
00069
00070
00071 class ExtractTask
00072 {
00073 public:
00074 ExtractTask(
00075 size_t id, SentenceAlignmentWithSyntax &sentence,
00076 PhraseExtractionOptions &initoptions,
00077 Moses::OutputFileStream &extractFile,
00078 Moses::OutputFileStream &extractFileInv,
00079 Moses::OutputFileStream &extractFileOrientation,
00080 Moses::OutputFileStream &extractFileContext,
00081 Moses::OutputFileStream &extractFileContextInv):
00082 m_sentence(sentence),
00083 m_options(initoptions),
00084 m_extractFile(extractFile),
00085 m_extractFileInv(extractFileInv),
00086 m_extractFileOrientation(extractFileOrientation),
00087 m_extractFileContext(extractFileContext),
00088 m_extractFileContextInv(extractFileContextInv) {}
00089 void Run();
00090 private:
00091 vector< string > m_extractedPhrases;
00092 vector< string > m_extractedPhrasesInv;
00093 vector< string > m_extractedPhrasesOri;
00094 vector< string > m_extractedPhrasesSid;
00095 vector< string > m_extractedPhrasesContext;
00096 vector< string > m_extractedPhrasesContextInv;
00097 void extractBase();
00098 void extract();
00099 void addPhrase(int, int, int, int, const std::string &);
00100 void writePhrasesToFile();
00101 bool checkPlaceholders(int startE, int endE, int startF, int endF) const;
00102 bool isPlaceholder(const string &word) const;
00103 bool checkTargetConstituentBoundaries(int startE, int endE, int startF, int endF,
00104 ostringstream &outextractstrPhraseProperties) const;
00105 void getOrientationInfo(int startE, int endE, int startF, int endF,
00106 const HSentenceVertices& inTopLeft,
00107 const HSentenceVertices& inTopRight,
00108 const HSentenceVertices& inBottomLeft,
00109 const HSentenceVertices& inBottomRight,
00110 const HSentenceVertices& outTopLeft,
00111 const HSentenceVertices& outTopRight,
00112 const HSentenceVertices& outBottomLeft,
00113 const HSentenceVertices& outBottomRight,
00114 std::string &orientationInfo) const;
00115
00116 SentenceAlignmentWithSyntax &m_sentence;
00117 const PhraseExtractionOptions &m_options;
00118 Moses::OutputFileStream &m_extractFile;
00119 Moses::OutputFileStream &m_extractFileInv;
00120 Moses::OutputFileStream &m_extractFileOrientation;
00121 Moses::OutputFileStream &m_extractFileContext;
00122 Moses::OutputFileStream &m_extractFileContextInv;
00123 };
00124 }
00125
00126 int main(int argc, char* argv[])
00127 {
00128 cerr << "PhraseExtract v1.5, written by Philipp Koehn et al." << std::endl
00129 << "phrase extraction from an aligned parallel corpus" << std::endl;
00130
00131 if (argc < 6) {
00132 cerr << "syntax: extract en de align extract max-length [orientation [ --model [wbe|phrase|hier]-[msd|mslr|mono] ] ";
00133 cerr << "| --OnlyOutputSpanInfo | --NoTTable | --GZOutput | --IncludeSentenceId | --SentenceOffset n | --InstanceWeights filename ";
00134 cerr << "| --TargetConstituentConstrained | --TargetConstituentBoundaries ]" << std::endl;
00135 exit(1);
00136 }
00137
00138 Moses::OutputFileStream extractFile;
00139 Moses::OutputFileStream extractFileInv;
00140 Moses::OutputFileStream extractFileOrientation;
00141 Moses::OutputFileStream extractFileContext;
00142 Moses::OutputFileStream extractFileContextInv;
00143 const char* const &fileNameE = argv[1];
00144 const char* const &fileNameF = argv[2];
00145 const char* const &fileNameA = argv[3];
00146 const string fileNameExtract = string(argv[4]);
00147 PhraseExtractionOptions options(atoi(argv[5]));
00148
00149 for(int i=6; i<argc; i++) {
00150 if (strcmp(argv[i],"--OnlyOutputSpanInfo") == 0) {
00151 options.initOnlyOutputSpanInfo(true);
00152 } else if (strcmp(argv[i],"orientation") == 0 || strcmp(argv[i],"--Orientation") == 0) {
00153 options.initOrientationFlag(true);
00154 } else if (strcmp(argv[i],"--TargetConstituentConstrained") == 0) {
00155 options.initTargetConstituentConstrainedFlag(true);
00156 } else if (strcmp(argv[i],"--TargetConstituentBoundaries") == 0) {
00157 options.initTargetConstituentBoundariesFlag(true);
00158 } else if (strcmp(argv[i],"--FlexibilityScore") == 0) {
00159 options.initFlexScoreFlag(true);
00160 } else if (strcmp(argv[i],"--SingleWordHeuristic") == 0) {
00161 options.initSingleWordHeuristicFlag(true);
00162 } else if (strcmp(argv[i],"--NoTTable") == 0) {
00163 options.initTranslationFlag(false);
00164 } else if (strcmp(argv[i], "--IncludeSentenceId") == 0) {
00165 options.initIncludeSentenceIdFlag(true);
00166 } else if (strcmp(argv[i], "--SentenceOffset") == 0) {
00167 if (i+1 >= argc || argv[i+1][0] < '0' || argv[i+1][0] > '9') {
00168 cerr << "extract: syntax error, used switch --SentenceOffset without a number" << endl;
00169 exit(1);
00170 }
00171 sentenceOffset = atoi(argv[++i]);
00172 } else if (strcmp(argv[i], "--GZOutput") == 0) {
00173 options.initGzOutput(true);
00174 } else if (strcmp(argv[i], "--InstanceWeights") == 0) {
00175 if (i+1 >= argc) {
00176 cerr << "extract: syntax error, used switch --InstanceWeights without file name" << endl;
00177 exit(1);
00178 }
00179 options.initInstanceWeightsFile(argv[++i]);
00180 } else if (strcmp(argv[i], "--Debug") == 0) {
00181 options.debug = true;
00182 } else if(strcmp(argv[i],"--model") == 0) {
00183 if (i+1 >= argc) {
00184 cerr << "extract: syntax error, no model's information provided to the option --model " << endl;
00185 exit(1);
00186 }
00187 char* modelParams = argv[++i];
00188 char* modelName = strtok(modelParams, "-");
00189 char* modelType = strtok(NULL, "-");
00190
00191
00192
00193 if(strcmp(modelName, "wbe") == 0) {
00194 options.initWordModel(true);
00195 if(strcmp(modelType, "msd") == 0)
00196 options.initWordType(REO_MSD);
00197 else if(strcmp(modelType, "mslr") == 0)
00198 options.initWordType(REO_MSLR);
00199 else if(strcmp(modelType, "mono") == 0 || strcmp(modelType, "monotonicity") == 0)
00200 options.initWordType(REO_MONO);
00201 else {
00202 cerr << "extract: syntax error, unknown reordering model type: " << modelType << endl;
00203 exit(1);
00204 }
00205 } else if(strcmp(modelName, "phrase") == 0) {
00206 options.initPhraseModel(true);
00207 if(strcmp(modelType, "msd") == 0)
00208 options.initPhraseType(REO_MSD);
00209 else if(strcmp(modelType, "mslr") == 0)
00210 options.initPhraseType(REO_MSLR);
00211 else if(strcmp(modelType, "mono") == 0 || strcmp(modelType, "monotonicity") == 0)
00212 options.initPhraseType(REO_MONO);
00213 else {
00214 cerr << "extract: syntax error, unknown reordering model type: " << modelType << endl;
00215 exit(1);
00216 }
00217 } else if(strcmp(modelName, "hier") == 0) {
00218 options.initHierModel(true);
00219 if(strcmp(modelType, "msd") == 0)
00220 options.initHierType(REO_MSD);
00221 else if(strcmp(modelType, "mslr") == 0)
00222 options.initHierType(REO_MSLR);
00223 else if(strcmp(modelType, "mono") == 0 || strcmp(modelType, "monotonicity") == 0)
00224 options.initHierType(REO_MONO);
00225 else {
00226 cerr << "extract: syntax error, unknown reordering model type: " << modelType << endl;
00227 exit(1);
00228 }
00229 } else {
00230 cerr << "extract: syntax error, unknown reordering model: " << modelName << endl;
00231 exit(1);
00232 }
00233
00234 options.initAllModelsOutputFlag(true);
00235 } else if (strcmp(argv[i], "--Placeholders") == 0) {
00236 ++i;
00237 string str = argv[i];
00238 Moses::Tokenize(options.placeholders, str.c_str(), ",");
00239 } else {
00240 cerr << "extract: syntax error, unknown option '" << string(argv[i]) << "'" << std::endl;
00241 exit(1);
00242 }
00243 }
00244
00245
00246
00247 if(options.isOrientationFlag() && !options.isAllModelsOutputFlag()) {
00248 options.initWordModel(true);
00249 options.initWordType(REO_MSD);
00250 }
00251
00252
00253 Moses::InputFileStream eFile(fileNameE);
00254 Moses::InputFileStream fFile(fileNameF);
00255 Moses::InputFileStream aFile(fileNameA);
00256
00257 istream *eFileP = &eFile;
00258 istream *fFileP = &fFile;
00259 istream *aFileP = &aFile;
00260
00261 istream *iwFileP = NULL;
00262 auto_ptr<Moses::InputFileStream> instanceWeightsFile;
00263 if (options.getInstanceWeightsFile().length()) {
00264 instanceWeightsFile.reset(new Moses::InputFileStream(options.getInstanceWeightsFile()));
00265 iwFileP = instanceWeightsFile.get();
00266 }
00267
00268
00269 if (options.isTranslationFlag()) {
00270 string fileNameExtractInv = fileNameExtract + ".inv" + (options.isGzOutput()?".gz":"");
00271 extractFile.Open( (fileNameExtract + (options.isGzOutput()?".gz":"")).c_str());
00272 extractFileInv.Open(fileNameExtractInv.c_str());
00273 }
00274 if (options.isOrientationFlag()) {
00275 string fileNameExtractOrientation = fileNameExtract + ".o" + (options.isGzOutput()?".gz":"");
00276 extractFileOrientation.Open(fileNameExtractOrientation.c_str());
00277 }
00278 if (options.isFlexScoreFlag()) {
00279 string fileNameExtractContext = fileNameExtract + ".context" + (options.isGzOutput()?".gz":"");
00280 string fileNameExtractContextInv = fileNameExtract + ".context.inv" + (options.isGzOutput()?".gz":"");
00281 extractFileContext.Open(fileNameExtractContext.c_str());
00282 extractFileContextInv.Open(fileNameExtractContextInv.c_str());
00283 }
00284
00285
00286 set< string > targetLabelCollection, sourceLabelCollection;
00287 map< string, int > targetTopLabelCollection, sourceTopLabelCollection;
00288 const bool targetSyntax = true;
00289
00290 int i = sentenceOffset;
00291
00292 string englishString, foreignString, alignmentString, weightString;
00293
00294 while (getline(*eFileP, englishString)) {
00295
00296 i++;
00297 if (i%10000 == 0) cerr << "." << flush;
00298
00299 getline(*fFileP, foreignString);
00300 getline(*aFileP, alignmentString);
00301 if (iwFileP) {
00302 getline(*iwFileP, weightString);
00303 }
00304
00305 SentenceAlignmentWithSyntax sentence
00306 (targetLabelCollection, sourceLabelCollection,
00307 targetTopLabelCollection, sourceTopLabelCollection,
00308 targetSyntax, false);
00309
00310
00311 if (options.isOnlyOutputSpanInfo()) {
00312 cout << "LOG: SRC: " << foreignString << endl;
00313 cout << "LOG: TGT: " << englishString << endl;
00314 cout << "LOG: ALT: " << alignmentString << endl;
00315 cout << "LOG: PHRASES_BEGIN:" << endl;
00316 }
00317 if (sentence.create( englishString.c_str(),
00318 foreignString.c_str(),
00319 alignmentString.c_str(),
00320 weightString.c_str(),
00321 i, false)) {
00322 if (options.placeholders.size()) {
00323 sentence.invertAlignment();
00324 }
00325 ExtractTask *task = new ExtractTask(i-1, sentence, options, extractFile , extractFileInv, extractFileOrientation, extractFileContext, extractFileContextInv);
00326 task->Run();
00327 delete task;
00328
00329 }
00330 if (options.isOnlyOutputSpanInfo()) cout << "LOG: PHRASES_END:" << endl;
00331 }
00332
00333 eFile.Close();
00334 fFile.Close();
00335 aFile.Close();
00336
00337
00338 if (!options.isOnlyOutputSpanInfo()) {
00339 if (options.isTranslationFlag()) {
00340 extractFile.Close();
00341 extractFileInv.Close();
00342
00343 }
00344 if (options.isOrientationFlag()) {
00345 extractFileOrientation.Close();
00346 }
00347
00348 if (options.isFlexScoreFlag()) {
00349 extractFileContext.Close();
00350 extractFileContextInv.Close();
00351 }
00352 }
00353
00354
00355 cerr << endl;
00356 }
00357
00358 namespace MosesTraining
00359 {
00360 void ExtractTask::Run()
00361 {
00362 extract();
00363 writePhrasesToFile();
00364 m_extractedPhrases.clear();
00365 m_extractedPhrasesInv.clear();
00366 m_extractedPhrasesOri.clear();
00367 m_extractedPhrasesSid.clear();
00368 m_extractedPhrasesContext.clear();
00369 m_extractedPhrasesContextInv.clear();
00370
00371 }
00372
00373 void ExtractTask::extract()
00374 {
00375 int countE = m_sentence.target.size();
00376 int countF = m_sentence.source.size();
00377
00378 HPhraseVector inboundPhrases;
00379
00380 HSentenceVertices inTopLeft;
00381 HSentenceVertices inTopRight;
00382 HSentenceVertices inBottomLeft;
00383 HSentenceVertices inBottomRight;
00384
00385 HSentenceVertices outTopLeft;
00386 HSentenceVertices outTopRight;
00387 HSentenceVertices outBottomLeft;
00388 HSentenceVertices outBottomRight;
00389
00390 bool relaxLimit = m_options.isHierModel();
00391
00392
00393
00394 for (int startE=0; startE<countE; startE++) {
00395 for (int endE=startE;
00396 (endE<countE && (relaxLimit || endE<startE+m_options.maxPhraseLength));
00397 endE++) {
00398
00399 int minF = std::numeric_limits<int>::max();
00400 int maxF = -1;
00401 vector< int > usedF = m_sentence.alignedCountS;
00402 for (int ei=startE; ei<=endE; ei++) {
00403 for (size_t i=0; i<m_sentence.alignedToT[ei].size(); i++) {
00404 int fi = m_sentence.alignedToT[ei][i];
00405 if (fi<minF) {
00406 minF = fi;
00407 }
00408 if (fi>maxF) {
00409 maxF = fi;
00410 }
00411 usedF[ fi ]--;
00412 }
00413 }
00414
00415 if (maxF >= 0 &&
00416 (relaxLimit || maxF-minF < m_options.maxPhraseLength)) {
00417
00418
00419 bool out_of_bounds = false;
00420 for (int fi=minF; fi<=maxF && !out_of_bounds; fi++)
00421 if (usedF[fi]>0) {
00422
00423 out_of_bounds = true;
00424 }
00425
00426
00427 if (!out_of_bounds) {
00428
00429 for (int startF=minF;
00430 (startF>=0 &&
00431 (relaxLimit || startF>maxF-m_options.maxPhraseLength) &&
00432 (startF==minF || m_sentence.alignedCountS[startF]==0));
00433 startF--) {
00434
00435 for (int endF=maxF;
00436 (endF<countF &&
00437 (relaxLimit || endF<startF+m_options.maxPhraseLength) &&
00438 (endF==maxF || m_sentence.alignedCountS[endF]==0));
00439 endF++) {
00440
00441 if(endE-startE < m_options.maxPhraseLength && endF-startF < m_options.maxPhraseLength) {
00442 inboundPhrases.push_back(HPhrase(HPhraseVertex(startF,startE),
00443 HPhraseVertex(endF,endE)));
00444 insertPhraseVertices(inTopLeft, inTopRight, inBottomLeft, inBottomRight,
00445 startF, startE, endF, endE);
00446 } else {
00447 insertPhraseVertices(outTopLeft, outTopRight, outBottomLeft, outBottomRight,
00448 startF, startE, endF, endE);
00449 }
00450 }
00451 }
00452 }
00453 }
00454 }
00455 }
00456
00457 std::string orientationInfo = "";
00458
00459 for (size_t i = 0; i < inboundPhrases.size(); i++) {
00460
00461 int startF = inboundPhrases[i].first.first;
00462 int startE = inboundPhrases[i].first.second;
00463 int endF = inboundPhrases[i].second.first;
00464 int endE = inboundPhrases[i].second.second;
00465
00466 getOrientationInfo(startE, endE, startF, endF,
00467 inTopLeft, inTopRight, inBottomLeft, inBottomRight,
00468 outTopLeft, outTopRight, outBottomLeft, outBottomRight,
00469 orientationInfo);
00470
00471 addPhrase(startE, endE, startF, endF, orientationInfo);
00472 }
00473
00474 if (m_options.isSingleWordHeuristicFlag()) {
00475
00476 m_sentence.invertAlignment();
00477 for (int ei=0; ei<countE; ei++) {
00478 for (size_t i=0; i<m_sentence.alignedToT[ei].size(); i++) {
00479 int fi = m_sentence.alignedToT[ei][i];
00480 if ((m_sentence.alignedToT[ei].size() > 1) || (m_sentence.alignedToS[fi].size() > 1)) {
00481
00482 if (m_options.isOrientationFlag()) {
00483 getOrientationInfo(ei, ei, fi, fi,
00484 inTopLeft, inTopRight, inBottomLeft, inBottomRight,
00485 outTopLeft, outTopRight, outBottomLeft, outBottomRight,
00486 orientationInfo);
00487 }
00488
00489 addPhrase(ei, ei, fi, fi, orientationInfo);
00490 }
00491 }
00492 }
00493 }
00494 }
00495
00496 void ExtractTask::getOrientationInfo(int startE, int endE, int startF, int endF,
00497 const HSentenceVertices& inTopLeft,
00498 const HSentenceVertices& inTopRight,
00499 const HSentenceVertices& inBottomLeft,
00500 const HSentenceVertices& inBottomRight,
00501 const HSentenceVertices& outTopLeft,
00502 const HSentenceVertices& outTopRight,
00503 const HSentenceVertices& outBottomLeft,
00504 const HSentenceVertices& outBottomRight,
00505 std::string &orientationInfo) const
00506 {
00507 REO_POS wordPrevOrient=UNKNOWN, wordNextOrient=UNKNOWN;
00508 REO_POS phrasePrevOrient=UNKNOWN, phraseNextOrient=UNKNOWN;
00509 REO_POS hierPrevOrient=UNKNOWN, hierNextOrient=UNKNOWN;
00510
00511 bool connectedLeftTopP = isAligned( m_sentence, startF-1, startE-1 );
00512 bool connectedRightTopP = isAligned( m_sentence, endF+1, startE-1 );
00513 bool connectedLeftTopN = isAligned( m_sentence, endF+1, endE+1 );
00514 bool connectedRightTopN = isAligned( m_sentence, startF-1, endE+1 );
00515
00516 const int countF = m_sentence.source.size();
00517
00518 if (m_options.isWordModel()) {
00519 wordPrevOrient = getOrientWordModel(m_sentence, m_options.isWordType(),
00520 connectedLeftTopP, connectedRightTopP,
00521 startF, endF, startE, endE, countF, 0, 1,
00522 &ge, <);
00523 wordNextOrient = getOrientWordModel(m_sentence, m_options.isWordType(),
00524 connectedLeftTopN, connectedRightTopN,
00525 endF, startF, endE, startE, 0, countF, -1,
00526 <, &ge);
00527 }
00528 if (m_options.isPhraseModel()) {
00529 phrasePrevOrient = getOrientPhraseModel(m_sentence, m_options.isPhraseType(),
00530 connectedLeftTopP, connectedRightTopP,
00531 startF, endF, startE, endE, countF-1, 0, 1, &ge, <, inBottomRight, inBottomLeft);
00532 phraseNextOrient = getOrientPhraseModel(m_sentence, m_options.isPhraseType(),
00533 connectedLeftTopN, connectedRightTopN,
00534 endF, startF, endE, startE, 0, countF-1, -1, <, &ge, inBottomLeft, inBottomRight);
00535 }
00536 if (m_options.isHierModel()) {
00537 hierPrevOrient = getOrientHierModel(m_sentence, m_options.isHierType(),
00538 connectedLeftTopP, connectedRightTopP,
00539 startF, endF, startE, endE, countF-1, 0, 1, &ge, <, inBottomRight, inBottomLeft, outBottomRight, outBottomLeft, phrasePrevOrient);
00540 hierNextOrient = getOrientHierModel(m_sentence, m_options.isHierType(),
00541 connectedLeftTopN, connectedRightTopN,
00542 endF, startF, endE, startE, 0, countF-1, -1, <, &ge, inBottomLeft, inBottomRight, outBottomLeft, outBottomRight, phraseNextOrient);
00543 }
00544
00545 if (m_options.isWordModel()) {
00546 orientationInfo = getOrientString(wordPrevOrient, m_options.isWordType()) + " " + getOrientString(wordNextOrient, m_options.isWordType());
00547 } else {
00548 orientationInfo = " | " +
00549 ((m_options.isPhraseModel())? getOrientString(phrasePrevOrient, m_options.isPhraseType()) + " " + getOrientString(phraseNextOrient, m_options.isPhraseType()) : "") + " | " +
00550 ((m_options.isHierModel())? getOrientString(hierPrevOrient, m_options.isHierType()) + " " + getOrientString(hierNextOrient, m_options.isHierType()) : "");
00551 }
00552 }
00553
00554
00555 REO_POS getOrientWordModel(SentenceAlignmentWithSyntax & sentence, REO_MODEL_TYPE modelType,
00556 bool connectedLeftTop, bool connectedRightTop,
00557 int startF, int endF, int startE, int endE, int countF, int zero, int unit,
00558 bool (*ge)(int, int), bool (*lt)(int, int) )
00559 {
00560
00561 if( connectedLeftTop && !connectedRightTop)
00562 return LEFT;
00563 if(modelType == REO_MONO)
00564 return UNKNOWN;
00565 if (!connectedLeftTop && connectedRightTop)
00566 return RIGHT;
00567 if(modelType == REO_MSD)
00568 return UNKNOWN;
00569 for(int indexF=startF-2*unit; (*ge)(indexF, zero) && !connectedLeftTop; indexF=indexF-unit)
00570 connectedLeftTop = isAligned(sentence, indexF, startE-unit);
00571 for(int indexF=endF+2*unit; (*lt)(indexF,countF) && !connectedRightTop; indexF=indexF+unit)
00572 connectedRightTop = isAligned(sentence, indexF, startE-unit);
00573 if(connectedLeftTop && !connectedRightTop)
00574 return DRIGHT;
00575 else if(!connectedLeftTop && connectedRightTop)
00576 return DLEFT;
00577 return UNKNOWN;
00578 }
00579
00580
00581 REO_POS getOrientPhraseModel (SentenceAlignmentWithSyntax & sentence, REO_MODEL_TYPE modelType,
00582 bool connectedLeftTop, bool connectedRightTop,
00583 int startF, int endF, int startE, int endE, int countF, int zero, int unit,
00584 bool (*ge)(int, int), bool (*lt)(int, int),
00585 const HSentenceVertices & inBottomRight, const HSentenceVertices & inBottomLeft)
00586 {
00587
00588 HSentenceVertices::const_iterator it;
00589
00590 if((connectedLeftTop && !connectedRightTop) ||
00591
00592
00593 ((it = inBottomRight.find(startE - unit)) != inBottomRight.end() &&
00594 it->second.find(startF-unit) != it->second.end()))
00595 return LEFT;
00596 if(modelType == REO_MONO)
00597 return UNKNOWN;
00598 if((!connectedLeftTop && connectedRightTop) ||
00599 ((it = inBottomLeft.find(startE - unit)) != inBottomLeft.end() && it->second.find(endF + unit) != it->second.end()))
00600 return RIGHT;
00601 if(modelType == REO_MSD)
00602 return UNKNOWN;
00603 connectedLeftTop = false;
00604 for(int indexF=startF-2*unit; (*ge)(indexF, zero) && !connectedLeftTop; indexF=indexF-unit)
00605 if ((connectedLeftTop = ((it = inBottomRight.find(startE - unit)) != inBottomRight.end() &&
00606 it->second.find(indexF) != it->second.end())))
00607 return DRIGHT;
00608 connectedRightTop = false;
00609 for(int indexF=endF+2*unit; (*lt)(indexF, countF) && !connectedRightTop; indexF=indexF+unit)
00610 if ((connectedRightTop = ((it = inBottomLeft.find(startE - unit)) != inBottomLeft.end() &&
00611 it->second.find(indexF) != it->second.end())))
00612 return DLEFT;
00613 return UNKNOWN;
00614 }
00615
00616
00617 REO_POS getOrientHierModel (SentenceAlignmentWithSyntax & sentence, REO_MODEL_TYPE modelType,
00618 bool connectedLeftTop, bool connectedRightTop,
00619 int startF, int endF, int startE, int endE, int countF, int zero, int unit,
00620 bool (*ge)(int, int), bool (*lt)(int, int),
00621 const HSentenceVertices & inBottomRight, const HSentenceVertices & inBottomLeft,
00622 const HSentenceVertices & outBottomRight, const HSentenceVertices & outBottomLeft,
00623 REO_POS phraseOrient)
00624 {
00625
00626 HSentenceVertices::const_iterator it;
00627
00628 if(phraseOrient == LEFT ||
00629 (connectedLeftTop && !connectedRightTop) ||
00630
00631
00632 ((it = inBottomRight.find(startE - unit)) != inBottomRight.end() &&
00633 it->second.find(startF-unit) != it->second.end()) ||
00634 ((it = outBottomRight.find(startE - unit)) != outBottomRight.end() &&
00635 it->second.find(startF-unit) != it->second.end()))
00636 return LEFT;
00637 if(modelType == REO_MONO)
00638 return UNKNOWN;
00639 if(phraseOrient == RIGHT ||
00640 (!connectedLeftTop && connectedRightTop) ||
00641 ((it = inBottomLeft.find(startE - unit)) != inBottomLeft.end() &&
00642 it->second.find(endF + unit) != it->second.end()) ||
00643 ((it = outBottomLeft.find(startE - unit)) != outBottomLeft.end() &&
00644 it->second.find(endF + unit) != it->second.end()))
00645 return RIGHT;
00646 if(modelType == REO_MSD)
00647 return UNKNOWN;
00648 if(phraseOrient != UNKNOWN)
00649 return phraseOrient;
00650 connectedLeftTop = false;
00651 for(int indexF=startF-2*unit; (*ge)(indexF, zero) && !connectedLeftTop; indexF=indexF-unit) {
00652 if((connectedLeftTop = (it = inBottomRight.find(startE - unit)) != inBottomRight.end() &&
00653 it->second.find(indexF) != it->second.end()) ||
00654 (connectedLeftTop = (it = outBottomRight.find(startE - unit)) != outBottomRight.end() &&
00655 it->second.find(indexF) != it->second.end()))
00656 return DRIGHT;
00657 }
00658 connectedRightTop = false;
00659 for(int indexF=endF+2*unit; (*lt)(indexF, countF) && !connectedRightTop; indexF=indexF+unit) {
00660 if((connectedRightTop = (it = inBottomLeft.find(startE - unit)) != inBottomLeft.end() &&
00661 it->second.find(indexF) != it->second.end()) ||
00662 (connectedRightTop = (it = outBottomLeft.find(startE - unit)) != outBottomLeft.end() &&
00663 it->second.find(indexF) != it->second.end()))
00664 return DLEFT;
00665 }
00666 return UNKNOWN;
00667 }
00668
00669 bool isAligned ( SentenceAlignmentWithSyntax &sentence, int fi, int ei )
00670 {
00671 if (ei == -1 && fi == -1)
00672 return true;
00673 if (ei <= -1 || fi <= -1)
00674 return false;
00675 if ((size_t)ei == sentence.target.size() && (size_t)fi == sentence.source.size())
00676 return true;
00677 if ((size_t)ei >= sentence.target.size() || (size_t)fi >= sentence.source.size())
00678 return false;
00679 for(size_t i=0; i<sentence.alignedToT[ei].size(); i++)
00680 if (sentence.alignedToT[ei][i] == fi)
00681 return true;
00682 return false;
00683 }
00684
00685 bool ge(int first, int second)
00686 {
00687 return first >= second;
00688 }
00689
00690 bool le(int first, int second)
00691 {
00692 return first <= second;
00693 }
00694
00695 bool lt(int first, int second)
00696 {
00697 return first < second;
00698 }
00699
00700 void insertVertex( HSentenceVertices & corners, int x, int y )
00701 {
00702 set<int> tmp;
00703 tmp.insert(x);
00704 pair< HSentenceVertices::iterator, bool > ret = corners.insert( pair<int, set<int> > (y, tmp) );
00705 if (ret.second == false) {
00706 ret.first->second.insert(x);
00707 }
00708 }
00709
00710 void insertPhraseVertices(
00711 HSentenceVertices & topLeft,
00712 HSentenceVertices & topRight,
00713 HSentenceVertices & bottomLeft,
00714 HSentenceVertices & bottomRight,
00715 int startF, int startE, int endF, int endE)
00716 {
00717
00718 insertVertex(topLeft, startF, startE);
00719 insertVertex(topRight, endF, startE);
00720 insertVertex(bottomLeft, startF, endE);
00721 insertVertex(bottomRight, endF, endE);
00722 }
00723
00724 string getOrientString(REO_POS orient, REO_MODEL_TYPE modelType)
00725 {
00726 switch(orient) {
00727 case LEFT:
00728 return "mono";
00729 break;
00730 case RIGHT:
00731 return "swap";
00732 break;
00733 case DRIGHT:
00734 return "dright";
00735 break;
00736 case DLEFT:
00737 return "dleft";
00738 break;
00739 case UNKNOWN:
00740 switch(modelType) {
00741 case REO_MONO:
00742 return "nomono";
00743 break;
00744 case REO_MSD:
00745 return "other";
00746 break;
00747 case REO_MSLR:
00748 return "dright";
00749 break;
00750 }
00751 break;
00752 }
00753 return "";
00754 }
00755
00756
00757 bool ExtractTask::checkTargetConstituentBoundaries(int startE, int endE, int startF, int endF,
00758 ostringstream &outextractstrPhraseProperties) const
00759 {
00760 if (m_options.isTargetConstituentBoundariesFlag()) {
00761 outextractstrPhraseProperties << " {{TargetConstituentBoundariesLeft ";
00762 }
00763
00764 bool validTargetConstituentBoundaries = false;
00765 bool outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst = true;
00766
00767 if (m_options.isTargetConstituentBoundariesFlag()) {
00768 if (startE==0) {
00769 outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst = false;
00770 outextractstrPhraseProperties << "BOS_";
00771 }
00772 }
00773
00774 if (!m_sentence.targetTree.HasNodeStartingAtPosition(startE)) {
00775
00776 validTargetConstituentBoundaries = false;
00777
00778 } else {
00779
00780 const std::vector< SyntaxNode* >& startingNodes = m_sentence.targetTree.GetNodesByStartPosition(startE);
00781 for ( std::vector< SyntaxNode* >::const_reverse_iterator iter = startingNodes.rbegin(); iter != startingNodes.rend(); ++iter ) {
00782 if ( (*iter)->end == endE ) {
00783 validTargetConstituentBoundaries = true;
00784 if (!m_options.isTargetConstituentBoundariesFlag()) {
00785 break;
00786 }
00787 }
00788 if (m_options.isTargetConstituentBoundariesFlag()) {
00789 if (outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst) {
00790 outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst = false;
00791 } else {
00792 outextractstrPhraseProperties << "<";
00793 }
00794 outextractstrPhraseProperties << (*iter)->label;
00795 }
00796 }
00797 }
00798
00799 if (m_options.isTargetConstituentBoundariesFlag()) {
00800 if (outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst) {
00801 outextractstrPhraseProperties << "<";
00802 }
00803 outextractstrPhraseProperties << "}}";
00804 }
00805
00806
00807 if (m_options.isTargetConstituentConstrainedFlag() && !validTargetConstituentBoundaries) {
00808
00809 bool relaxedValidTargetConstituentBoundaries = false;
00810 int relaxedStartE = startE;
00811 int relaxedEndE = endE;
00812 const std::string punctuation = ",;.:!?";
00813 while ( (relaxedStartE < endE) &&
00814 (m_sentence.target[relaxedStartE].size() == 1) &&
00815 (punctuation.find(m_sentence.target[relaxedStartE].at(0)) != std::string::npos) ) {
00816 ++relaxedStartE;
00817 }
00818 while ( (relaxedEndE > relaxedStartE) &&
00819 (m_sentence.target[relaxedEndE].size() == 1) &&
00820 (punctuation.find(m_sentence.target[relaxedEndE].at(0)) != std::string::npos) ) {
00821 --relaxedEndE;
00822 }
00823
00824 if ( (relaxedStartE != startE) || (relaxedEndE !=endE) ) {
00825 const std::vector< SyntaxNode* >& startingNodes = m_sentence.targetTree.GetNodesByStartPosition(relaxedStartE);
00826 for ( std::vector< SyntaxNode* >::const_reverse_iterator iter = startingNodes.rbegin();
00827 (iter != startingNodes.rend() && !relaxedValidTargetConstituentBoundaries);
00828 ++iter ) {
00829 if ( (*iter)->end == relaxedEndE ) {
00830 relaxedValidTargetConstituentBoundaries = true;
00831 }
00832 }
00833 }
00834
00835 if (!relaxedValidTargetConstituentBoundaries) {
00836 return false;
00837 }
00838 }
00839
00840
00841 if (m_options.isTargetConstituentBoundariesFlag()) {
00842
00843 outextractstrPhraseProperties << " {{TargetConstituentBoundariesRightAdjacent ";
00844 outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst = true;
00845
00846 if (endE==(int)m_sentence.target.size()-1) {
00847
00848 outextractstrPhraseProperties << "EOS_";
00849 outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst = false;
00850
00851 } else {
00852
00853 const std::vector< SyntaxNode* >& adjacentNodes = m_sentence.targetTree.GetNodesByStartPosition(endE+1);
00854 for ( std::vector< SyntaxNode* >::const_reverse_iterator iter = adjacentNodes.rbegin(); iter != adjacentNodes.rend(); ++iter ) {
00855 if (outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst) {
00856 outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst = false;
00857 } else {
00858 outextractstrPhraseProperties << "<";
00859 }
00860 outextractstrPhraseProperties << (*iter)->label;
00861 }
00862 }
00863
00864 if (outextractstrPhrasePropertyTargetConstituentBoundariesIsFirst) {
00865 outextractstrPhraseProperties << "<";
00866 }
00867 outextractstrPhraseProperties << "}}";
00868 }
00869
00870 return true;
00871 }
00872
00873
00874 void ExtractTask::addPhrase( int startE, int endE, int startF, int endF,
00875 const std::string &orientationInfo)
00876 {
00877 ostringstream outextractstrPhraseProperties;
00878 if (m_options.isTargetConstituentBoundariesFlag() || m_options.isTargetConstituentConstrainedFlag()) {
00879 bool isTargetConstituentCovered = checkTargetConstituentBoundaries(startE, endE, startF, endF, outextractstrPhraseProperties);
00880 if (m_options.isTargetConstituentBoundariesFlag() && !isTargetConstituentCovered) {
00881 return;
00882 }
00883 }
00884
00885 if (m_options.placeholders.size() && !checkPlaceholders(startE, endE, startF, endF)) {
00886 return;
00887 }
00888
00889 if (m_options.isOnlyOutputSpanInfo()) {
00890 cout << startF << " " << endF << " " << startE << " " << endE << std::endl;
00891 return;
00892 }
00893
00894 ostringstream outextractstr;
00895 ostringstream outextractstrInv;
00896 ostringstream outextractstrOrientation;
00897
00898 if (m_options.debug) {
00899 outextractstr << "sentenceID=" << m_sentence.sentenceID << " ";
00900 outextractstrInv << "sentenceID=" << m_sentence.sentenceID << " ";
00901 outextractstrOrientation << "sentenceID=" << m_sentence.sentenceID << " ";
00902 }
00903
00904
00905 for(int fi=startF; fi<=endF; fi++) {
00906 if (m_options.isTranslationFlag()) outextractstr << m_sentence.source[fi] << " ";
00907 if (m_options.isOrientationFlag()) outextractstrOrientation << m_sentence.source[fi] << " ";
00908 }
00909 if (m_options.isTranslationFlag()) outextractstr << "||| ";
00910 if (m_options.isOrientationFlag()) outextractstrOrientation << "||| ";
00911
00912
00913
00914 for(int ei=startE; ei<=endE; ei++) {
00915
00916 if (m_options.isTranslationFlag()) {
00917 outextractstr << m_sentence.target[ei] << " ";
00918 outextractstrInv << m_sentence.target[ei] << " ";
00919 }
00920
00921 if (m_options.isOrientationFlag()) {
00922 outextractstrOrientation << m_sentence.target[ei] << " ";
00923 }
00924 }
00925 if (m_options.isTranslationFlag()) outextractstr << "|||";
00926 if (m_options.isTranslationFlag()) outextractstrInv << "||| ";
00927 if (m_options.isOrientationFlag()) outextractstrOrientation << "||| ";
00928
00929
00930
00931 if (m_options.isTranslationFlag()) {
00932 for(int fi=startF; fi<=endF; fi++)
00933 outextractstrInv << m_sentence.source[fi] << " ";
00934 outextractstrInv << "|||";
00935 }
00936
00937
00938 if (m_options.isTranslationFlag()) {
00939 if (m_options.isSingleWordHeuristicFlag() && (startE==endE) && (startF==endF)) {
00940 outextractstr << " 0-0";
00941 outextractstrInv << " 0-0";
00942 } else {
00943 for(int ei=startE; ei<=endE; ei++) {
00944 for(unsigned int i=0; i<m_sentence.alignedToT[ei].size(); i++) {
00945 int fi = m_sentence.alignedToT[ei][i];
00946 outextractstr << " " << fi-startF << "-" << ei-startE;
00947 outextractstrInv << " " << ei-startE << "-" << fi-startF;
00948 }
00949 }
00950 }
00951 }
00952
00953 if (m_options.isOrientationFlag())
00954 outextractstrOrientation << orientationInfo;
00955
00956 if (m_options.isIncludeSentenceIdFlag()) {
00957 outextractstr << " ||| " << m_sentence.sentenceID;
00958 }
00959
00960 if (m_options.getInstanceWeightsFile().length()) {
00961 if (m_options.isTranslationFlag()) {
00962 outextractstr << " ||| " << m_sentence.weightString;
00963 outextractstrInv << " ||| " << m_sentence.weightString;
00964 }
00965 if (m_options.isOrientationFlag()) {
00966 outextractstrOrientation << " ||| " << m_sentence.weightString;
00967 }
00968 }
00969
00970 outextractstr << outextractstrPhraseProperties.str();
00971
00972
00973
00974 if (m_options.isFlexScoreFlag()) {
00975
00976 ostringstream outextractstrContext;
00977 ostringstream outextractstrContextInv;
00978
00979 for(int fi=startF; fi<=endF; fi++) {
00980 outextractstrContext << m_sentence.source[fi] << " ";
00981 }
00982 outextractstrContext << "||| ";
00983
00984
00985 for(int ei=startE; ei<=endE; ei++) {
00986 outextractstrContext << m_sentence.target[ei] << " ";
00987 outextractstrContextInv << m_sentence.target[ei] << " ";
00988 }
00989 outextractstrContext << "||| ";
00990 outextractstrContextInv << "||| ";
00991
00992 for(int fi=startF; fi<=endF; fi++)
00993 outextractstrContextInv << m_sentence.source[fi] << " ";
00994
00995 outextractstrContextInv << "|||";
00996
00997 string strContext = outextractstrContext.str();
00998 string strContextInv = outextractstrContextInv.str();
00999
01000 ostringstream outextractstrContextRight(strContext, ostringstream::app);
01001 ostringstream outextractstrContextRightInv(strContextInv, ostringstream::app);
01002
01003
01004 outextractstrContext << "< ";
01005 if (startF == 0) outextractstrContext << "<s>";
01006 else outextractstrContext << m_sentence.source[startF-1];
01007
01008 outextractstrContextInv << " < ";
01009 if (startE == 0) outextractstrContextInv << "<s>";
01010 else outextractstrContextInv << m_sentence.target[startE-1];
01011
01012
01013 outextractstrContextRight << "> ";
01014 if (endF+1 == (int)m_sentence.source.size()) outextractstrContextRight << "<s>";
01015 else outextractstrContextRight << m_sentence.source[endF+1];
01016
01017 outextractstrContextRightInv << " > ";
01018 if (endE+1 == (int)m_sentence.target.size()) outextractstrContextRightInv << "<s>";
01019 else outextractstrContextRightInv << m_sentence.target[endE+1];
01020
01021 outextractstrContext << std::endl;
01022 outextractstrContextInv << std::endl;
01023 outextractstrContextRight << std::endl;
01024 outextractstrContextRightInv << std::endl;
01025
01026 m_extractedPhrasesContext.push_back(outextractstrContext.str());
01027 m_extractedPhrasesContextInv.push_back(outextractstrContextInv.str());
01028 m_extractedPhrasesContext.push_back(outextractstrContextRight.str());
01029 m_extractedPhrasesContextInv.push_back(outextractstrContextRightInv.str());
01030 }
01031
01032 if (m_options.isTranslationFlag()) outextractstr << std::endl;
01033 if (m_options.isTranslationFlag()) outextractstrInv << std::endl;
01034 if (m_options.isOrientationFlag()) outextractstrOrientation << std::endl;
01035
01036
01037 m_extractedPhrases.push_back(outextractstr.str());
01038 m_extractedPhrasesInv.push_back(outextractstrInv.str());
01039 m_extractedPhrasesOri.push_back(outextractstrOrientation.str());
01040 }
01041
01042
01043 void ExtractTask::writePhrasesToFile()
01044 {
01045
01046 ostringstream outextractFile;
01047 ostringstream outextractFileInv;
01048 ostringstream outextractFileOrientation;
01049 ostringstream outextractFileContext;
01050 ostringstream outextractFileContextInv;
01051
01052 for(vector<string>::const_iterator phrase=m_extractedPhrases.begin(); phrase!=m_extractedPhrases.end(); phrase++) {
01053 outextractFile<<phrase->data();
01054 }
01055 for(vector<string>::const_iterator phrase=m_extractedPhrasesInv.begin(); phrase!=m_extractedPhrasesInv.end(); phrase++) {
01056 outextractFileInv<<phrase->data();
01057 }
01058 for(vector<string>::const_iterator phrase=m_extractedPhrasesOri.begin(); phrase!=m_extractedPhrasesOri.end(); phrase++) {
01059 outextractFileOrientation<<phrase->data();
01060 }
01061 for(vector<string>::const_iterator phrase=m_extractedPhrasesContext.begin(); phrase!=m_extractedPhrasesContext.end(); phrase++) {
01062 outextractFileContext<<phrase->data();
01063 }
01064 for(vector<string>::const_iterator phrase=m_extractedPhrasesContextInv.begin(); phrase!=m_extractedPhrasesContextInv.end(); phrase++) {
01065 outextractFileContextInv<<phrase->data();
01066 }
01067
01068 m_extractFile << outextractFile.str();
01069 m_extractFileInv << outextractFileInv.str();
01070 m_extractFileOrientation << outextractFileOrientation.str();
01071 if (m_options.isFlexScoreFlag()) {
01072 m_extractFileContext << outextractFileContext.str();
01073 m_extractFileContextInv << outextractFileContextInv.str();
01074 }
01075 }
01076
01077
01078
01079 void ExtractTask::extractBase()
01080 {
01081 ostringstream outextractFile;
01082 ostringstream outextractFileInv;
01083
01084 int countF = m_sentence.source.size();
01085 for(int startF=0; startF<countF; startF++) {
01086 for(int endF=startF;
01087 (endF<countF && endF<startF+m_options.maxPhraseLength);
01088 endF++) {
01089 for(int fi=startF; fi<=endF; fi++) {
01090 outextractFile << m_sentence.source[fi] << " ";
01091 }
01092 outextractFile << "|||" << endl;
01093 }
01094 }
01095
01096 int countE = m_sentence.target.size();
01097 for(int startE=0; startE<countE; startE++) {
01098 for(int endE=startE;
01099 (endE<countE && endE<startE+m_options.maxPhraseLength);
01100 endE++) {
01101 for(int ei=startE; ei<=endE; ei++) {
01102 outextractFileInv << m_sentence.target[ei] << " ";
01103 }
01104 outextractFileInv << "|||" << endl;
01105 }
01106 }
01107 m_extractFile << outextractFile.str();
01108 m_extractFileInv << outextractFileInv.str();
01109
01110 }
01111
01112
01113 bool ExtractTask::checkPlaceholders(int startE, int endE, int startF, int endF) const
01114 {
01115 for (int pos = startF; pos <= endF; ++pos) {
01116 const string &sourceWord = m_sentence.source[pos];
01117 if (isPlaceholder(sourceWord)) {
01118 if (m_sentence.alignedToS.at(pos).size() != 1) {
01119 return false;
01120 } else {
01121
01122 int targetPos = m_sentence.alignedToS.at(pos).at(0);
01123 const string &otherWord = m_sentence.target[targetPos];
01124 if (!isPlaceholder(otherWord)) {
01125 return false;
01126 }
01127 }
01128 }
01129 }
01130
01131 for (int pos = startE; pos <= endE; ++pos) {
01132 const string &targetWord = m_sentence.target[pos];
01133 if (isPlaceholder(targetWord)) {
01134 if (m_sentence.alignedToT.at(pos).size() != 1) {
01135 return false;
01136 } else {
01137
01138 int sourcePos = m_sentence.alignedToT.at(pos).at(0);
01139 const string &otherWord = m_sentence.source[sourcePos];
01140 if (!isPlaceholder(otherWord)) {
01141 return false;
01142 }
01143 }
01144 }
01145 }
01146 return true;
01147 }
01148
01149 bool ExtractTask::isPlaceholder(const string &word) const
01150 {
01151 for (size_t i = 0; i < m_options.placeholders.size(); ++i) {
01152 const string &placeholder = m_options.placeholders[i];
01153 if (word == placeholder) {
01154 return true;
01155 }
01156 }
01157 return false;
01158 }
01159
01160 }