#include <sstream>
#include <assert.h>
#include <cstdlib>
#include <cstring>
#include <map>
#include <set>
#include <vector>
#include <algorithm>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/unordered_map.hpp>
#include "ScoreFeature.h"
#include "tables-core.h"
#include "ExtractionPhrasePair.h"
#include "score.h"
#include "InputFileStream.h"
#include "OutputFileStream.h"
#include "moses/Util.h"
Go to the source code of this file.
Namespaces | |
namespace | MosesTraining |
Defines | |
#define | COC_MAX 10 |
Functions | |
std::vector< float > | MosesTraining::orientationClassPriorsL2R (4, 0) |
std::vector< float > | MosesTraining::orientationClassPriorsR2L (4, 0) |
void | processLine (std::string line, int lineID, bool includeSentenceIdFlag, int &sentenceId, PHRASE *phraseSource, PHRASE *phraseTarget, ALIGNMENT *targetToSourceAlignment, std::string &additionalPropertiesString, float &count, float &pcfgSum) |
void | writeCountOfCounts (const std::string &fileNameCountOfCounts) |
void | writeLeftHandSideLabelCounts (const boost::unordered_map< std::string, float > &countsLabelLHS, const boost::unordered_map< std::string, boost::unordered_map< std::string, float > * > &jointCountsLabelLHS, const std::string &fileNameLeftHandSideSourceLabelCounts, const std::string &fileNameLeftHandSideTargetSourceLabelCounts) |
void | writeLabelSet (const std::set< std::string > &labelSet, const std::string &fileName) |
void | processPhrasePairs (std::vector< ExtractionPhrasePair * > &phrasePairsWithSameSource, std::ostream &phraseTableFile, const ScoreFeatureManager &featureManager, const MaybeLog &maybeLogProb) |
void | outputPhrasePair (const ExtractionPhrasePair &phrasePair, float, int, std::ostream &phraseTableFile, const ScoreFeatureManager &featureManager, const MaybeLog &maybeLog) |
double | computeLexicalTranslation (const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource) |
double | computeUnalignedPenalty (const ALIGNMENT *alignmentTargetToSource) |
void | loadOrientationPriors (const std::string &fileNamePhraseOrientationPriors, std::vector< float > &orientationClassPriorsL2R, std::vector< float > &orientationClassPriorsR2L) |
void | loadFunctionWords (const std::string &fileNameFunctionWords) |
double | computeUnalignedFWPenalty (const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource) |
int | calcCrossedNonTerm (const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource) |
void | printSourcePhrase (const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *targetToSourceAlignment, std::ostream &out) |
void | printTargetPhrase (const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *targetToSourceAlignment, std::ostream &out) |
void | invertAlignment (const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *inTargetToSourceAlignment, ALIGNMENT *outSourceToTargetAlignment) |
size_t | NumNonTerminal (const PHRASE *phraseSource) |
int | main (int argc, char *argv[]) |
bool | calcCrossedNonTerm (size_t targetPos, size_t sourcePos, const ALIGNMENT *alignmentTargetToSource) |
Variables | |
LexicalTable | MosesTraining::lexTable |
bool | MosesTraining::inverseFlag = false |
bool | MosesTraining::pcfgFlag = false |
bool | MosesTraining::phraseOrientationFlag = false |
bool | MosesTraining::treeFragmentsFlag = false |
bool | MosesTraining::partsOfSpeechFlag = false |
bool | MosesTraining::sourceSyntaxLabelsFlag = false |
bool | MosesTraining::sourceSyntaxLabelCountsLHSFlag = false |
bool | MosesTraining::targetSyntacticPreferencesFlag = false |
bool | MosesTraining::unpairedExtractFormatFlag = false |
bool | MosesTraining::conditionOnTargetLhsFlag = false |
bool | MosesTraining::wordAlignmentFlag = true |
bool | MosesTraining::goodTuringFlag = false |
bool | MosesTraining::kneserNeyFlag = false |
bool | MosesTraining::logProbFlag = false |
int | MosesTraining::negLogProb = 1 |
bool | MosesTraining::lexFlag = true |
bool | MosesTraining::unalignedFlag = false |
bool | MosesTraining::unalignedFWFlag = false |
bool | MosesTraining::crossedNonTerm = false |
bool | MosesTraining::spanLength = false |
bool | MosesTraining::ruleLength = false |
bool | MosesTraining::nonTermContext = false |
bool | MosesTraining::nonTermContextTarget = false |
bool | MosesTraining::targetConstituentBoundariesFlag = false |
int | MosesTraining::countOfCounts [COC_MAX+1] |
int | MosesTraining::totalDistinct = 0 |
float | MosesTraining::minCount = 0 |
float | MosesTraining::minCountHierarchical = 0 |
bool | MosesTraining::phraseOrientationPriorsFlag = false |
boost::unordered_map < std::string, float > | MosesTraining::sourceLHSCounts |
boost::unordered_map < std::string, boost::unordered_map < std::string, float > * > | MosesTraining::targetLHSAndSourceLHSJointCounts |
std::set< std::string > | MosesTraining::sourceLabelSet |
std::map< std::string, size_t > | MosesTraining::sourceLabels |
std::vector< std::string > | MosesTraining::sourceLabelsByIndex |
std::set< std::string > | MosesTraining::partsOfSpeechSet |
boost::unordered_map < std::string, float > | MosesTraining::targetSyntacticPreferencesLHSCounts |
boost::unordered_map < std::string, boost::unordered_map < std::string, float > * > | MosesTraining::ruleTargetLHSAndTargetSyntacticPreferencesLHSJointCounts |
std::set< std::string > | MosesTraining::targetSyntacticPreferencesLabelSet |
std::map< std::string, size_t > | MosesTraining::targetSyntacticPreferencesLabels |
std::vector< std::string > | MosesTraining::targetSyntacticPreferencesLabelsByIndex |
std::set< std::string > | functionWordList |
#define COC_MAX 10 |
Definition at line 62 of file score-main.cpp.
Referenced by main(), outputPhrasePair(), and writeCountOfCounts().
bool calcCrossedNonTerm | ( | size_t | targetPos, | |
size_t | sourcePos, | |||
const ALIGNMENT * | alignmentTargetToSource | |||
) |
Definition at line 1076 of file score-main.cpp.
int calcCrossedNonTerm | ( | const PHRASE * | phraseTarget, | |
const ALIGNMENT * | alignmentTargetToSource | |||
) |
Definition at line 1100 of file score-main.cpp.
References MosesTraining::Vocabulary::getWord(), isNonTerminal(), and MosesTraining::vcbT.
Referenced by outputPhrasePair().
double computeLexicalTranslation | ( | const PHRASE * | phraseSource, | |
const PHRASE * | phraseTarget, | |||
const ALIGNMENT * | alignmentTargetToSource | |||
) |
Definition at line 1171 of file score-main.cpp.
References MosesTraining::Vocabulary::getWordID(), lexTable, MosesTraining::LexicalTable::permissiveLookup(), and MosesTraining::vcbS.
Referenced by outputPhrasePair().
double computeUnalignedFWPenalty | ( | const PHRASE * | phraseTarget, | |
const ALIGNMENT * | alignmentTargetToSource | |||
) |
Definition at line 1135 of file score-main.cpp.
References functionWordList, MosesTraining::Vocabulary::getWord(), and MosesTraining::vcbT.
Referenced by outputPhrasePair().
double computeUnalignedPenalty | ( | const ALIGNMENT * | alignmentTargetToSource | ) |
Definition at line 1120 of file score-main.cpp.
Referenced by outputPhrasePair().
void invertAlignment | ( | const PHRASE * | phraseSource, | |
const PHRASE * | phraseTarget, | |||
const ALIGNMENT * | inTargetToSourceAlignment, | |||
ALIGNMENT * | outSourceToTargetAlignment | |||
) |
Definition at line 1293 of file score-main.cpp.
References begin, end, and MosesTraining::hierarchicalFlag.
Referenced by printSourcePhrase().
void loadFunctionWords | ( | const std::string & | fileNameFunctionWords | ) |
Definition at line 1149 of file score-main.cpp.
References Moses::InputFileStream::Close(), functionWordList, and Moses::Tokenize().
Referenced by main().
void loadOrientationPriors | ( | const std::string & | fileNamePhraseOrientationPriors, | |
std::vector< float > & | orientationClassPriorsL2R, | |||
std::vector< float > & | orientationClassPriorsR2L | |||
) |
Definition at line 989 of file score-main.cpp.
References Moses::InputFileStream::Close(), count, key, and starts_with().
Referenced by main().
int main | ( | int | argc, | |
char * | argv[] | |||
) |
Definition at line 129 of file score-main.cpp.
References MosesTraining::ExtractionPhrasePair::Add(), MosesTraining::ExtractionPhrasePair::AddProperties(), MosesTraining::ScoreFeatureManager::addPropertiesToPhrasePair(), COC_MAX, MosesTraining::conditionOnTargetLhsFlag, MosesTraining::ScoreFeatureManager::configure(), countOfCounts, MosesTraining::crossedNonTerm, goodTuringFlag, hierarchicalFlag, MosesTraining::ScoreFeatureManager::includeSentenceId(), MosesTraining::ExtractionPhrasePair::IncrementPrevious(), inverseFlag, kneserNeyFlag, MosesTraining::lexFlag, lexTable, MosesTraining::LexicalTable::load(), loadFunctionWords(), loadOrientationPriors(), logProbFlag, MosesTraining::ExtractionPhrasePair::Matches(), maybeLogProb(), MosesTraining::minCount, MosesTraining::minCountHierarchical, MosesTraining::negLogProb, MosesTraining::nonTermContext, MosesTraining::nonTermContextTarget, NULL, Moses::OutputFileStream::Open(), MosesTraining::orientationClassPriorsL2R(), MosesTraining::orientationClassPriorsR2L(), partsOfSpeechFlag, MosesTraining::partsOfSpeechSet, MosesTraining::pcfgFlag, MosesTraining::phraseOrientationFlag, MosesTraining::phraseOrientationPriorsFlag, phraseTableFile, processLine(), processPhrasePairs(), MosesTraining::ruleLength, MosesTraining::ruleTargetLHSAndTargetSyntacticPreferencesLHSJointCounts, MosesTraining::sourceLabelSet, MosesTraining::sourceLHSCounts, MosesTraining::sourceSyntaxLabelCountsLHSFlag, MosesTraining::sourceSyntaxLabelsFlag, MosesTraining::spanLength, MosesTraining::targetConstituentBoundariesFlag, MosesTraining::targetLHSAndSourceLHSJointCounts, targetSyntacticPreferencesFlag, MosesTraining::targetSyntacticPreferencesLabelSet, MosesTraining::targetSyntacticPreferencesLHSCounts, MosesTraining::treeFragmentsFlag, MosesTraining::unalignedFlag, MosesTraining::unalignedFWFlag, MosesTraining::unpairedExtractFormatFlag, MosesTraining::ScoreFeatureManager::usage(), MosesTraining::wordAlignmentFlag, writeCountOfCounts(), writeLabelSet(), and writeLeftHandSideLabelCounts().
size_t NumNonTerminal | ( | const PHRASE * | phraseSource | ) |
Definition at line 979 of file score-main.cpp.
References MosesTraining::Vocabulary::getWord(), isNonTerminal(), and MosesTraining::vcbS.
Referenced by outputPhrasePair().
void outputPhrasePair | ( | const ExtractionPhrasePair & | phrasePair, | |
float | totalCount, | |||
int | distinctCount, | |||
std::ostream & | phraseTableFile, | |||
const ScoreFeatureManager & | featureManager, | |||
const MaybeLog & | maybeLog | |||
) |
Definition at line 713 of file score-main.cpp.
References MosesTraining::ScoreFeatureManager::addFeatures(), begin, calcCrossedNonTerm(), COC_MAX, MosesTraining::ExtractionPhrasePair::CollectAllLabelsSeparateLHSAndRHS(), MosesTraining::ExtractionPhrasePair::CollectAllPhraseOrientations(), MosesTraining::ExtractionPhrasePair::CollectAllPropertyValues(), computeLexicalTranslation(), computeUnalignedFWPenalty(), computeUnalignedPenalty(), count, countOfCounts, MosesTraining::crossedNonTerm, end, MosesTraining::ExtractionPhrasePair::FindBestAlignmentTargetToSource(), MosesTraining::ExtractionPhrasePair::FindBestPropertyValue(), MosesTraining::ExtractionPhrasePair::GetCount(), MosesTraining::ExtractionPhrasePair::GetPcfgScore(), MosesTraining::ExtractionPhrasePair::GetSource(), MosesTraining::ExtractionPhrasePair::GetTarget(), MosesTraining::Vocabulary::getWord(), goodTuringFlag, hierarchicalFlag, inverseFlag, isNonTerminal(), MosesTraining::ExtractionPhrasePair::IsValid(), kneserNeyFlag, MosesTraining::lexFlag, maybeLogProb(), MosesTraining::minCount, MosesTraining::minCountHierarchical, MosesTraining::nonTermContext, MosesTraining::nonTermContextTarget, NumNonTerminal(), MosesTraining::orientationClassPriorsL2R(), MosesTraining::orientationClassPriorsR2L(), partsOfSpeechFlag, MosesTraining::partsOfSpeechSet, MosesTraining::pcfgFlag, MosesTraining::phraseOrientationFlag, printSourcePhrase(), printTargetPhrase(), MosesTraining::ruleLength, MosesTraining::ruleTargetLHSAndTargetSyntacticPreferencesLHSJointCounts, sort(), MosesTraining::sourceLabelSet, MosesTraining::sourceLHSCounts, MosesTraining::sourceSyntaxLabelsFlag, MosesTraining::spanLength, MosesTraining::targetConstituentBoundariesFlag, MosesTraining::targetLHSAndSourceLHSJointCounts, targetSyntacticPreferencesFlag, MosesTraining::targetSyntacticPreferencesLabelSet, MosesTraining::targetSyntacticPreferencesLHSCounts, MosesTraining::totalDistinct, MosesTraining::treeFragmentsFlag, MosesTraining::unalignedFlag, MosesTraining::unalignedFWFlag, MosesTraining::ExtractionPhrasePair::UpdateVocabularyFromValueTokens(), MosesTraining::vcbS, MosesTraining::vcbT, and MosesTraining::wordAlignmentFlag.
Referenced by processPhrasePairs().
void printSourcePhrase | ( | const PHRASE * | phraseSource, | |
const PHRASE * | phraseTarget, | |||
const ALIGNMENT * | targetToSourceAlignment, | |||
std::ostream & | out | |||
) |
Definition at line 1228 of file score-main.cpp.
References MosesTraining::conditionOnTargetLhsFlag, MosesTraining::Vocabulary::getWord(), MosesTraining::inverseFlag, invertAlignment(), MosesTraining::isNonTerminal(), MosesTraining::unpairedExtractFormatFlag, MosesTraining::vcbS, and MosesTraining::vcbT.
Referenced by outputPhrasePair().
void printTargetPhrase | ( | const PHRASE * | phraseSource, | |
const PHRASE * | phraseTarget, | |||
const ALIGNMENT * | targetToSourceAlignment, | |||
std::ostream & | out | |||
) |
Definition at line 1260 of file score-main.cpp.
References MosesTraining::conditionOnTargetLhsFlag, MosesTraining::Vocabulary::getWord(), MosesTraining::inverseFlag, MosesTraining::isNonTerminal(), MosesTraining::unpairedExtractFormatFlag, MosesTraining::vcbS, and MosesTraining::vcbT.
Referenced by outputPhrasePair().
void processLine | ( | std::string | line, | |
int | lineID, | |||
bool | includeSentenceIdFlag, | |||
int & | sentenceId, | |||
PHRASE * | phraseSource, | |||
PHRASE * | phraseTarget, | |||
ALIGNMENT * | targetToSourceAlignment, | |||
std::string & | additionalPropertiesString, | |||
float & | count, | |||
float & | pcfgSum | |||
) |
Definition at line 530 of file score-main.cpp.
References hierarchicalFlag, MosesTraining::Vocabulary::storeIfNew(), Moses::Tokenize(), MosesTraining::vcbS, and MosesTraining::vcbT.
Referenced by main().
void processPhrasePairs | ( | std::vector< ExtractionPhrasePair * > & | phrasePairsWithSameSource, | |
std::ostream & | phraseTableFile, | |||
const ScoreFeatureManager & | featureManager, | |||
const MaybeLog & | maybeLogProb | |||
) |
Definition at line 687 of file score-main.cpp.
References outputPhrasePair().
Referenced by main().
void writeCountOfCounts | ( | const std::string & | fileNameCountOfCounts | ) |
Definition at line 601 of file score-main.cpp.
References Moses::OutputFileStream::Close(), COC_MAX, countOfCounts, Moses::OutputFileStream::Open(), and MosesTraining::totalDistinct.
Referenced by main().
void writeLabelSet | ( | const std::set< std::string > & | labelSet, | |
const std::string & | fileName | |||
) |
Definition at line 667 of file score-main.cpp.
References Moses::OutputFileStream::Close(), and Moses::OutputFileStream::Open().
Referenced by main().
void writeLeftHandSideLabelCounts | ( | const boost::unordered_map< std::string, float > & | countsLabelLHS, | |
const boost::unordered_map< std::string, boost::unordered_map< std::string, float > * > & | jointCountsLabelLHS, | |||
const std::string & | fileNameLeftHandSideSourceLabelCounts, | |||
const std::string & | fileNameLeftHandSideTargetSourceLabelCounts | |||
) |
Definition at line 623 of file score-main.cpp.
References Moses::OutputFileStream::Close(), end, Moses::OutputFileStream::Open(), MosesTraining::sourceLHSCounts, and MosesTraining::targetLHSAndSourceLHSJointCounts.
Referenced by main().
std::set<std::string> functionWordList |
Definition at line 118 of file score-main.cpp.
Referenced by computeUnalignedFWPenalty(), and loadFunctionWords().