#include <sstream>#include <assert.h>#include <cstdlib>#include <cstring>#include <map>#include <set>#include <vector>#include <algorithm>#include <boost/algorithm/string/predicate.hpp>#include <boost/unordered_map.hpp>#include "ScoreFeature.h"#include "tables-core.h"#include "ExtractionPhrasePair.h"#include "score.h"#include "InputFileStream.h"#include "OutputFileStream.h"#include "moses/Util.h"Go to the source code of this file.
Namespaces | |
| namespace | MosesTraining |
Defines | |
| #define | COC_MAX 10 |
Functions | |
| std::vector< float > | MosesTraining::orientationClassPriorsL2R (4, 0) |
| std::vector< float > | MosesTraining::orientationClassPriorsR2L (4, 0) |
| void | processLine (std::string line, int lineID, bool includeSentenceIdFlag, int &sentenceId, PHRASE *phraseSource, PHRASE *phraseTarget, ALIGNMENT *targetToSourceAlignment, std::string &additionalPropertiesString, float &count, float &pcfgSum) |
| void | writeCountOfCounts (const std::string &fileNameCountOfCounts) |
| void | writeLeftHandSideLabelCounts (const boost::unordered_map< std::string, float > &countsLabelLHS, const boost::unordered_map< std::string, boost::unordered_map< std::string, float > * > &jointCountsLabelLHS, const std::string &fileNameLeftHandSideSourceLabelCounts, const std::string &fileNameLeftHandSideTargetSourceLabelCounts) |
| void | writeLabelSet (const std::set< std::string > &labelSet, const std::string &fileName) |
| void | processPhrasePairs (std::vector< ExtractionPhrasePair * > &phrasePairsWithSameSource, std::ostream &phraseTableFile, const ScoreFeatureManager &featureManager, const MaybeLog &maybeLogProb) |
| void | outputPhrasePair (const ExtractionPhrasePair &phrasePair, float, int, std::ostream &phraseTableFile, const ScoreFeatureManager &featureManager, const MaybeLog &maybeLog) |
| double | computeLexicalTranslation (const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource) |
| double | computeUnalignedPenalty (const ALIGNMENT *alignmentTargetToSource) |
| void | loadOrientationPriors (const std::string &fileNamePhraseOrientationPriors, std::vector< float > &orientationClassPriorsL2R, std::vector< float > &orientationClassPriorsR2L) |
| void | loadFunctionWords (const std::string &fileNameFunctionWords) |
| double | computeUnalignedFWPenalty (const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource) |
| int | calcCrossedNonTerm (const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource) |
| void | printSourcePhrase (const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *targetToSourceAlignment, std::ostream &out) |
| void | printTargetPhrase (const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *targetToSourceAlignment, std::ostream &out) |
| void | invertAlignment (const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *inTargetToSourceAlignment, ALIGNMENT *outSourceToTargetAlignment) |
| size_t | NumNonTerminal (const PHRASE *phraseSource) |
| int | main (int argc, char *argv[]) |
| bool | calcCrossedNonTerm (size_t targetPos, size_t sourcePos, const ALIGNMENT *alignmentTargetToSource) |
Variables | |
| LexicalTable | MosesTraining::lexTable |
| bool | MosesTraining::inverseFlag = false |
| bool | MosesTraining::pcfgFlag = false |
| bool | MosesTraining::phraseOrientationFlag = false |
| bool | MosesTraining::treeFragmentsFlag = false |
| bool | MosesTraining::partsOfSpeechFlag = false |
| bool | MosesTraining::sourceSyntaxLabelsFlag = false |
| bool | MosesTraining::sourceSyntaxLabelCountsLHSFlag = false |
| bool | MosesTraining::targetSyntacticPreferencesFlag = false |
| bool | MosesTraining::unpairedExtractFormatFlag = false |
| bool | MosesTraining::conditionOnTargetLhsFlag = false |
| bool | MosesTraining::wordAlignmentFlag = true |
| bool | MosesTraining::goodTuringFlag = false |
| bool | MosesTraining::kneserNeyFlag = false |
| bool | MosesTraining::logProbFlag = false |
| int | MosesTraining::negLogProb = 1 |
| bool | MosesTraining::lexFlag = true |
| bool | MosesTraining::unalignedFlag = false |
| bool | MosesTraining::unalignedFWFlag = false |
| bool | MosesTraining::crossedNonTerm = false |
| bool | MosesTraining::spanLength = false |
| bool | MosesTraining::ruleLength = false |
| bool | MosesTraining::nonTermContext = false |
| bool | MosesTraining::nonTermContextTarget = false |
| bool | MosesTraining::targetConstituentBoundariesFlag = false |
| int | MosesTraining::countOfCounts [COC_MAX+1] |
| int | MosesTraining::totalDistinct = 0 |
| float | MosesTraining::minCount = 0 |
| float | MosesTraining::minCountHierarchical = 0 |
| bool | MosesTraining::phraseOrientationPriorsFlag = false |
| boost::unordered_map < std::string, float > | MosesTraining::sourceLHSCounts |
| boost::unordered_map < std::string, boost::unordered_map < std::string, float > * > | MosesTraining::targetLHSAndSourceLHSJointCounts |
| std::set< std::string > | MosesTraining::sourceLabelSet |
| std::map< std::string, size_t > | MosesTraining::sourceLabels |
| std::vector< std::string > | MosesTraining::sourceLabelsByIndex |
| std::set< std::string > | MosesTraining::partsOfSpeechSet |
| boost::unordered_map < std::string, float > | MosesTraining::targetSyntacticPreferencesLHSCounts |
| boost::unordered_map < std::string, boost::unordered_map < std::string, float > * > | MosesTraining::ruleTargetLHSAndTargetSyntacticPreferencesLHSJointCounts |
| std::set< std::string > | MosesTraining::targetSyntacticPreferencesLabelSet |
| std::map< std::string, size_t > | MosesTraining::targetSyntacticPreferencesLabels |
| std::vector< std::string > | MosesTraining::targetSyntacticPreferencesLabelsByIndex |
| std::set< std::string > | functionWordList |
| #define COC_MAX 10 |
Definition at line 62 of file score-main.cpp.
Referenced by main(), outputPhrasePair(), and writeCountOfCounts().
| bool calcCrossedNonTerm | ( | size_t | targetPos, | |
| size_t | sourcePos, | |||
| const ALIGNMENT * | alignmentTargetToSource | |||
| ) |
Definition at line 1076 of file score-main.cpp.
| int calcCrossedNonTerm | ( | const PHRASE * | phraseTarget, | |
| const ALIGNMENT * | alignmentTargetToSource | |||
| ) |
Definition at line 1100 of file score-main.cpp.
References MosesTraining::Vocabulary::getWord(), isNonTerminal(), and MosesTraining::vcbT.
Referenced by outputPhrasePair().


| double computeLexicalTranslation | ( | const PHRASE * | phraseSource, | |
| const PHRASE * | phraseTarget, | |||
| const ALIGNMENT * | alignmentTargetToSource | |||
| ) |
Definition at line 1171 of file score-main.cpp.
References MosesTraining::Vocabulary::getWordID(), lexTable, MosesTraining::LexicalTable::permissiveLookup(), and MosesTraining::vcbS.
Referenced by outputPhrasePair().


| double computeUnalignedFWPenalty | ( | const PHRASE * | phraseTarget, | |
| const ALIGNMENT * | alignmentTargetToSource | |||
| ) |
Definition at line 1135 of file score-main.cpp.
References functionWordList, MosesTraining::Vocabulary::getWord(), and MosesTraining::vcbT.
Referenced by outputPhrasePair().


| double computeUnalignedPenalty | ( | const ALIGNMENT * | alignmentTargetToSource | ) |
Definition at line 1120 of file score-main.cpp.
Referenced by outputPhrasePair().

| void invertAlignment | ( | const PHRASE * | phraseSource, | |
| const PHRASE * | phraseTarget, | |||
| const ALIGNMENT * | inTargetToSourceAlignment, | |||
| ALIGNMENT * | outSourceToTargetAlignment | |||
| ) |
Definition at line 1293 of file score-main.cpp.
References begin, end, and MosesTraining::hierarchicalFlag.
Referenced by printSourcePhrase().

| void loadFunctionWords | ( | const std::string & | fileNameFunctionWords | ) |
Definition at line 1149 of file score-main.cpp.
References Moses::InputFileStream::Close(), functionWordList, and Moses::Tokenize().
Referenced by main().


| void loadOrientationPriors | ( | const std::string & | fileNamePhraseOrientationPriors, | |
| std::vector< float > & | orientationClassPriorsL2R, | |||
| std::vector< float > & | orientationClassPriorsR2L | |||
| ) |
Definition at line 989 of file score-main.cpp.
References Moses::InputFileStream::Close(), count, key, and starts_with().
Referenced by main().


| int main | ( | int | argc, | |
| char * | argv[] | |||
| ) |
Definition at line 129 of file score-main.cpp.
References MosesTraining::ExtractionPhrasePair::Add(), MosesTraining::ExtractionPhrasePair::AddProperties(), MosesTraining::ScoreFeatureManager::addPropertiesToPhrasePair(), COC_MAX, MosesTraining::conditionOnTargetLhsFlag, MosesTraining::ScoreFeatureManager::configure(), countOfCounts, MosesTraining::crossedNonTerm, goodTuringFlag, hierarchicalFlag, MosesTraining::ScoreFeatureManager::includeSentenceId(), MosesTraining::ExtractionPhrasePair::IncrementPrevious(), inverseFlag, kneserNeyFlag, MosesTraining::lexFlag, lexTable, MosesTraining::LexicalTable::load(), loadFunctionWords(), loadOrientationPriors(), logProbFlag, MosesTraining::ExtractionPhrasePair::Matches(), maybeLogProb(), MosesTraining::minCount, MosesTraining::minCountHierarchical, MosesTraining::negLogProb, MosesTraining::nonTermContext, MosesTraining::nonTermContextTarget, NULL, Moses::OutputFileStream::Open(), MosesTraining::orientationClassPriorsL2R(), MosesTraining::orientationClassPriorsR2L(), partsOfSpeechFlag, MosesTraining::partsOfSpeechSet, MosesTraining::pcfgFlag, MosesTraining::phraseOrientationFlag, MosesTraining::phraseOrientationPriorsFlag, phraseTableFile, processLine(), processPhrasePairs(), MosesTraining::ruleLength, MosesTraining::ruleTargetLHSAndTargetSyntacticPreferencesLHSJointCounts, MosesTraining::sourceLabelSet, MosesTraining::sourceLHSCounts, MosesTraining::sourceSyntaxLabelCountsLHSFlag, MosesTraining::sourceSyntaxLabelsFlag, MosesTraining::spanLength, MosesTraining::targetConstituentBoundariesFlag, MosesTraining::targetLHSAndSourceLHSJointCounts, targetSyntacticPreferencesFlag, MosesTraining::targetSyntacticPreferencesLabelSet, MosesTraining::targetSyntacticPreferencesLHSCounts, MosesTraining::treeFragmentsFlag, MosesTraining::unalignedFlag, MosesTraining::unalignedFWFlag, MosesTraining::unpairedExtractFormatFlag, MosesTraining::ScoreFeatureManager::usage(), MosesTraining::wordAlignmentFlag, writeCountOfCounts(), writeLabelSet(), and writeLeftHandSideLabelCounts().

| size_t NumNonTerminal | ( | const PHRASE * | phraseSource | ) |
Definition at line 979 of file score-main.cpp.
References MosesTraining::Vocabulary::getWord(), isNonTerminal(), and MosesTraining::vcbS.
Referenced by outputPhrasePair().


| void outputPhrasePair | ( | const ExtractionPhrasePair & | phrasePair, | |
| float | totalCount, | |||
| int | distinctCount, | |||
| std::ostream & | phraseTableFile, | |||
| const ScoreFeatureManager & | featureManager, | |||
| const MaybeLog & | maybeLog | |||
| ) |
Definition at line 713 of file score-main.cpp.
References MosesTraining::ScoreFeatureManager::addFeatures(), begin, calcCrossedNonTerm(), COC_MAX, MosesTraining::ExtractionPhrasePair::CollectAllLabelsSeparateLHSAndRHS(), MosesTraining::ExtractionPhrasePair::CollectAllPhraseOrientations(), MosesTraining::ExtractionPhrasePair::CollectAllPropertyValues(), computeLexicalTranslation(), computeUnalignedFWPenalty(), computeUnalignedPenalty(), count, countOfCounts, MosesTraining::crossedNonTerm, end, MosesTraining::ExtractionPhrasePair::FindBestAlignmentTargetToSource(), MosesTraining::ExtractionPhrasePair::FindBestPropertyValue(), MosesTraining::ExtractionPhrasePair::GetCount(), MosesTraining::ExtractionPhrasePair::GetPcfgScore(), MosesTraining::ExtractionPhrasePair::GetSource(), MosesTraining::ExtractionPhrasePair::GetTarget(), MosesTraining::Vocabulary::getWord(), goodTuringFlag, hierarchicalFlag, inverseFlag, isNonTerminal(), MosesTraining::ExtractionPhrasePair::IsValid(), kneserNeyFlag, MosesTraining::lexFlag, maybeLogProb(), MosesTraining::minCount, MosesTraining::minCountHierarchical, MosesTraining::nonTermContext, MosesTraining::nonTermContextTarget, NumNonTerminal(), MosesTraining::orientationClassPriorsL2R(), MosesTraining::orientationClassPriorsR2L(), partsOfSpeechFlag, MosesTraining::partsOfSpeechSet, MosesTraining::pcfgFlag, MosesTraining::phraseOrientationFlag, printSourcePhrase(), printTargetPhrase(), MosesTraining::ruleLength, MosesTraining::ruleTargetLHSAndTargetSyntacticPreferencesLHSJointCounts, sort(), MosesTraining::sourceLabelSet, MosesTraining::sourceLHSCounts, MosesTraining::sourceSyntaxLabelsFlag, MosesTraining::spanLength, MosesTraining::targetConstituentBoundariesFlag, MosesTraining::targetLHSAndSourceLHSJointCounts, targetSyntacticPreferencesFlag, MosesTraining::targetSyntacticPreferencesLabelSet, MosesTraining::targetSyntacticPreferencesLHSCounts, MosesTraining::totalDistinct, MosesTraining::treeFragmentsFlag, MosesTraining::unalignedFlag, MosesTraining::unalignedFWFlag, MosesTraining::ExtractionPhrasePair::UpdateVocabularyFromValueTokens(), MosesTraining::vcbS, MosesTraining::vcbT, and MosesTraining::wordAlignmentFlag.
Referenced by processPhrasePairs().


| void printSourcePhrase | ( | const PHRASE * | phraseSource, | |
| const PHRASE * | phraseTarget, | |||
| const ALIGNMENT * | targetToSourceAlignment, | |||
| std::ostream & | out | |||
| ) |
Definition at line 1228 of file score-main.cpp.
References MosesTraining::conditionOnTargetLhsFlag, MosesTraining::Vocabulary::getWord(), MosesTraining::inverseFlag, invertAlignment(), MosesTraining::isNonTerminal(), MosesTraining::unpairedExtractFormatFlag, MosesTraining::vcbS, and MosesTraining::vcbT.
Referenced by outputPhrasePair().


| void printTargetPhrase | ( | const PHRASE * | phraseSource, | |
| const PHRASE * | phraseTarget, | |||
| const ALIGNMENT * | targetToSourceAlignment, | |||
| std::ostream & | out | |||
| ) |
Definition at line 1260 of file score-main.cpp.
References MosesTraining::conditionOnTargetLhsFlag, MosesTraining::Vocabulary::getWord(), MosesTraining::inverseFlag, MosesTraining::isNonTerminal(), MosesTraining::unpairedExtractFormatFlag, MosesTraining::vcbS, and MosesTraining::vcbT.
Referenced by outputPhrasePair().


| void processLine | ( | std::string | line, | |
| int | lineID, | |||
| bool | includeSentenceIdFlag, | |||
| int & | sentenceId, | |||
| PHRASE * | phraseSource, | |||
| PHRASE * | phraseTarget, | |||
| ALIGNMENT * | targetToSourceAlignment, | |||
| std::string & | additionalPropertiesString, | |||
| float & | count, | |||
| float & | pcfgSum | |||
| ) |
Definition at line 530 of file score-main.cpp.
References hierarchicalFlag, MosesTraining::Vocabulary::storeIfNew(), Moses::Tokenize(), MosesTraining::vcbS, and MosesTraining::vcbT.
Referenced by main().


| void processPhrasePairs | ( | std::vector< ExtractionPhrasePair * > & | phrasePairsWithSameSource, | |
| std::ostream & | phraseTableFile, | |||
| const ScoreFeatureManager & | featureManager, | |||
| const MaybeLog & | maybeLogProb | |||
| ) |
Definition at line 687 of file score-main.cpp.
References outputPhrasePair().
Referenced by main().


| void writeCountOfCounts | ( | const std::string & | fileNameCountOfCounts | ) |
Definition at line 601 of file score-main.cpp.
References Moses::OutputFileStream::Close(), COC_MAX, countOfCounts, Moses::OutputFileStream::Open(), and MosesTraining::totalDistinct.
Referenced by main().


| void writeLabelSet | ( | const std::set< std::string > & | labelSet, | |
| const std::string & | fileName | |||
| ) |
Definition at line 667 of file score-main.cpp.
References Moses::OutputFileStream::Close(), and Moses::OutputFileStream::Open().
Referenced by main().


| void writeLeftHandSideLabelCounts | ( | const boost::unordered_map< std::string, float > & | countsLabelLHS, | |
| const boost::unordered_map< std::string, boost::unordered_map< std::string, float > * > & | jointCountsLabelLHS, | |||
| const std::string & | fileNameLeftHandSideSourceLabelCounts, | |||
| const std::string & | fileNameLeftHandSideTargetSourceLabelCounts | |||
| ) |
Definition at line 623 of file score-main.cpp.
References Moses::OutputFileStream::Close(), end, Moses::OutputFileStream::Open(), MosesTraining::sourceLHSCounts, and MosesTraining::targetLHSAndSourceLHSJointCounts.
Referenced by main().


| std::set<std::string> functionWordList |
Definition at line 118 of file score-main.cpp.
Referenced by computeUnalignedFWPenalty(), and loadFunctionWords().
1.5.9