Namespaces | |
namespace | Syntax |
Classes | |
class | AlignmentElement |
class | AlignmentPhrase |
class | Domain |
class | DomainFeature |
class | SubsetDomainFeature |
class | SparseSubsetDomainFeature |
class | IndicatorDomainFeature |
class | SparseIndicatorDomainFeature |
class | RatioDomainFeature |
class | SparseRatioDomainFeature |
class | WordCount |
class | Vocab |
class | ExtractLex |
class | ExtractTask |
class | ExtractedRule |
class | ExtractionPhrasePair |
class | Hole |
class | HoleSourceOrderer |
class | HoleCollection |
class | InternalStructFeature |
class | InternalStructFeatureDense |
class | InternalStructFeatureSparse |
class | PhraseExtractionOptions |
class | PhraseOrientation |
class | PropertiesConsolidator |
class | RuleExist |
struct | RuleExtractionOptions |
class | LexicalTable |
struct | MaybeLog |
class | ScoreFeatureArgumentException |
struct | ScoreFeatureContext |
class | ScoreFeature |
class | ScoreFeatureManager |
class | SentenceAlignment |
class | SentenceAlignmentWithSyntax |
class | PhraseAlignment |
struct | SyntaxNode |
class | SyntaxNodeCollection |
class | Vocabulary |
class | PhraseTable |
class | TTable |
class | DTable |
class | XmlException |
Typedefs | |
typedef std::vector< std::pair < int, int > > | Alignment |
typedef pair< int, int > | HPhraseVertex |
typedef pair< HPhraseVertex, HPhraseVertex > | HPhrase |
typedef vector< HPhrase > | HPhraseVector |
typedef map< int, set< int > > | HSentenceVertices |
typedef std::vector< std::set < size_t > > | ALIGNMENT |
typedef std::map< int, std::set< int > > | HSenteceVertices |
typedef std::list< Hole > | HoleList |
typedef boost::shared_ptr < ScoreFeature > | ScoreFeaturePtr |
typedef Syntax::Tree< SyntaxNode > | SyntaxTree |
typedef std::string | WORD |
typedef unsigned int | WORD_ID |
typedef std::vector< WORD_ID > | PHRASE |
typedef unsigned int | PHRASE_ID |
typedef std::vector< std::pair < PHRASE_ID, double > > | PHRASEPROBVEC |
Enumerations | |
enum | REO_MODEL_TYPE { REO_MSD, REO_MSLR, REO_MONO } |
enum | REO_POS { LEFT, RIGHT, DLEFT, DRIGHT, UNKNOWN } |
Functions | |
void | ReadAlignment (const std::string &s, Alignment &a) |
void | FlipAlignment (Alignment &a) |
std::ostream & | operator<< (std::ostream &out, const WordCount &obj) |
REO_POS | getOrientWordModel (SentenceAlignmentWithSyntax &, REO_MODEL_TYPE, bool, bool, int, int, int, int, int, int, int, bool(*)(int, int), bool(*)(int, int)) |
REO_POS | getOrientPhraseModel (SentenceAlignmentWithSyntax &, REO_MODEL_TYPE, bool, bool, int, int, int, int, int, int, int, bool(*)(int, int), bool(*)(int, int), const HSentenceVertices &, const HSentenceVertices &) |
REO_POS | getOrientHierModel (SentenceAlignmentWithSyntax &, REO_MODEL_TYPE, bool, bool, int, int, int, int, int, int, int, bool(*)(int, int), bool(*)(int, int), const HSentenceVertices &, const HSentenceVertices &, const HSentenceVertices &, const HSentenceVertices &, REO_POS) |
void | insertVertex (HSentenceVertices &, int, int) |
void | insertPhraseVertices (HSentenceVertices &, HSentenceVertices &, HSentenceVertices &, HSentenceVertices &, int, int, int, int) |
string | getOrientString (REO_POS, REO_MODEL_TYPE) |
bool | ge (int, int) |
bool | le (int, int) |
bool | lt (int, int) |
bool | isAligned (SentenceAlignmentWithSyntax &, int, int) |
std::vector< float > | orientationClassPriorsL2R (4, 0) |
std::vector< float > | orientationClassPriorsR2L (4, 0) |
bool | isNonTerminal (const std::string &word) |
void | addBoundaryWords (vector< string > &phrase) |
std::vector< std::string > | Tokenize (const std::string &str, const std::string &delimiters=" \t") |
std::string | Trim (const std::string &str, const std::string dropChars=" \t\n\r") |
string | ParseXmlTagAttribute (const string &tag, const string &attributeName) |
void | ParseXmlTagAttributes (const std::string &s, std::map< std::string, std::string > &attributes) |
string | TrimXml (const string &str) |
bool | isXmlTag (const string &tag) |
string | unescape (const string &str) |
vector< string > | TokenizeXml (const string &str) |
bool | ProcessAndStripXMLTags (string &line, SyntaxNodeCollection &nodeCollection, set< string > &labelCollection, map< string, int > &topLabelCollection, bool unescapeSpecialChars) |
std::string | ParseXmlTagAttribute (const std::string &tag, const std::string &attributeName) |
std::string | TrimXml (const std::string &str) |
bool | isXmlTag (const std::string &tag) |
std::vector< std::string > | TokenizeXml (const std::string &str) |
bool | ProcessAndStripXMLTags (std::string &line, SyntaxNodeCollection &tree, std::set< std::string > &labelCollection, std::map< std::string, int > &topLabelCollection, bool unescape=true) |
std::string | unescape (const std::string &str) |
Variables | |
int | sentenceOffset = 0 |
Vocabulary | vcbT |
Vocabulary | vcbS |
bool | hierarchicalFlag = false |
LexicalTable | lexTable |
bool | inverseFlag = false |
bool | pcfgFlag = false |
bool | phraseOrientationFlag = false |
bool | treeFragmentsFlag = false |
bool | partsOfSpeechFlag = false |
bool | sourceSyntaxLabelsFlag = false |
bool | sourceSyntaxLabelCountsLHSFlag = false |
bool | targetSyntacticPreferencesFlag = false |
bool | unpairedExtractFormatFlag = false |
bool | conditionOnTargetLhsFlag = false |
bool | wordAlignmentFlag = true |
bool | goodTuringFlag = false |
bool | kneserNeyFlag = false |
bool | logProbFlag = false |
int | negLogProb = 1 |
bool | lexFlag = true |
bool | unalignedFlag = false |
bool | unalignedFWFlag = false |
bool | crossedNonTerm = false |
bool | spanLength = false |
bool | ruleLength = false |
bool | nonTermContext = false |
bool | nonTermContextTarget = false |
bool | targetConstituentBoundariesFlag = false |
int | countOfCounts [COC_MAX+1] |
int | totalDistinct = 0 |
float | minCount = 0 |
float | minCountHierarchical = 0 |
bool | phraseOrientationPriorsFlag = false |
boost::unordered_map < std::string, float > | sourceLHSCounts |
boost::unordered_map < std::string, boost::unordered_map < std::string, float > * > | targetLHSAndSourceLHSJointCounts |
std::set< std::string > | sourceLabelSet |
std::map< std::string, size_t > | sourceLabels |
std::vector< std::string > | sourceLabelsByIndex |
std::set< std::string > | partsOfSpeechSet |
boost::unordered_map < std::string, float > | targetSyntacticPreferencesLHSCounts |
boost::unordered_map < std::string, boost::unordered_map < std::string, float > * > | ruleTargetLHSAndTargetSyntacticPreferencesLHSJointCounts |
std::set< std::string > | targetSyntacticPreferencesLabelSet |
std::map< std::string, size_t > | targetSyntacticPreferencesLabels |
std::vector< std::string > | targetSyntacticPreferencesLabelsByIndex |
typedef std::vector< std::set<size_t> > MosesTraining::ALIGNMENT |
Definition at line 32 of file ExtractionPhrasePair.h.
typedef std::vector<std::pair<int, int> > MosesTraining::Alignment |
Definition at line 29 of file Alignment.h.
typedef std::list<Hole> MosesTraining::HoleList |
typedef std::pair< HPhraseVertex, HPhraseVertex > MosesTraining::HPhrase |
Definition at line 34 of file extract-main.cpp.
typedef std::vector< HPhrase > MosesTraining::HPhraseVector |
Definition at line 37 of file extract-main.cpp.
typedef std::pair< int, int > MosesTraining::HPhraseVertex |
Definition at line 30 of file extract-main.cpp.
typedef std::map<int, std::set<int> > MosesTraining::HSenteceVertices |
Definition at line 33 of file hierarchical.h.
typedef std::map< int, std::set< int > > MosesTraining::HSentenceVertices |
Definition at line 41 of file extract-main.cpp.
typedef std::vector< WORD_ID > MosesTraining::PHRASE |
Definition at line 33 of file tables-core.h.
typedef unsigned int MosesTraining::PHRASE_ID |
Definition at line 34 of file tables-core.h.
typedef std::vector< std::pair< PHRASE_ID, double > > MosesTraining::PHRASEPROBVEC |
Definition at line 49 of file tables-core.h.
typedef boost::shared_ptr<ScoreFeature> MosesTraining::ScoreFeaturePtr |
Definition at line 102 of file ScoreFeature.h.
Definition at line 10 of file SyntaxTree.h.
typedef std::string MosesTraining::WORD |
Definition at line 18 of file tables-core.h.
typedef unsigned int MosesTraining::WORD_ID |
Definition at line 19 of file tables-core.h.
Definition at line 28 of file PhraseExtractionOptions.h.
void MosesTraining::addBoundaryWords | ( | vector< string > & | phrase | ) |
Definition at line 36 of file SentenceAlignment.cpp.
Referenced by MosesTraining::SentenceAlignment::processSourceSentence(), and MosesTraining::SentenceAlignment::processTargetSentence().
void MosesTraining::FlipAlignment | ( | Alignment & | a | ) |
Definition at line 63 of file Alignment.cpp.
References swap().
Referenced by MosesTraining::Syntax::GHKM::ExtractGHKM::Main().
bool MosesTraining::ge | ( | int | first, | |
int | second | |||
) |
Definition at line 685 of file extract-main.cpp.
REO_POS MosesTraining::getOrientHierModel | ( | SentenceAlignmentWithSyntax & | sentence, | |
REO_MODEL_TYPE | modelType, | |||
bool | connectedLeftTop, | |||
bool | connectedRightTop, | |||
int | startF, | |||
int | endF, | |||
int | startE, | |||
int | endE, | |||
int | countF, | |||
int | zero, | |||
int | unit, | |||
bool(*)(int, int) | ge, | |||
bool(*)(int, int) | lt, | |||
const HSentenceVertices & | inBottomRight, | |||
const HSentenceVertices & | inBottomLeft, | |||
const HSentenceVertices & | outBottomRight, | |||
const HSentenceVertices & | outBottomLeft, | |||
REO_POS | phraseOrient | |||
) |
REO_POS MosesTraining::getOrientPhraseModel | ( | SentenceAlignmentWithSyntax & | sentence, | |
REO_MODEL_TYPE | modelType, | |||
bool | connectedLeftTop, | |||
bool | connectedRightTop, | |||
int | startF, | |||
int | endF, | |||
int | startE, | |||
int | endE, | |||
int | countF, | |||
int | zero, | |||
int | unit, | |||
bool(*)(int, int) | ge, | |||
bool(*)(int, int) | lt, | |||
const HSentenceVertices & | inBottomRight, | |||
const HSentenceVertices & | inBottomLeft | |||
) |
string MosesTraining::getOrientString | ( | REO_POS | orient, | |
REO_MODEL_TYPE | modelType | |||
) |
REO_POS MosesTraining::getOrientWordModel | ( | SentenceAlignmentWithSyntax & | sentence, | |
REO_MODEL_TYPE | modelType, | |||
bool | connectedLeftTop, | |||
bool | connectedRightTop, | |||
int | startF, | |||
int | endF, | |||
int | startE, | |||
int | endE, | |||
int | countF, | |||
int | zero, | |||
int | unit, | |||
bool(*)(int, int) | ge, | |||
bool(*)(int, int) | lt | |||
) |
Definition at line 555 of file extract-main.cpp.
References DLEFT, DRIGHT, isAligned(), LEFT, REO_MONO, REO_MSD, RIGHT, and UNKNOWN.
void MosesTraining::insertPhraseVertices | ( | HSentenceVertices & | topLeft, | |
HSentenceVertices & | topRight, | |||
HSentenceVertices & | bottomLeft, | |||
HSentenceVertices & | bottomRight, | |||
int | startF, | |||
int | startE, | |||
int | endF, | |||
int | endE | |||
) |
Definition at line 710 of file extract-main.cpp.
References insertVertex().
void MosesTraining::insertVertex | ( | HSentenceVertices & | corners, | |
int | x, | |||
int | y | |||
) |
Definition at line 700 of file extract-main.cpp.
Referenced by insertPhraseVertices().
bool MosesTraining::isAligned | ( | SentenceAlignmentWithSyntax & | sentence, | |
int | fi, | |||
int | ei | |||
) |
Definition at line 669 of file extract-main.cpp.
References MosesTraining::SentenceAlignment::alignedToT, MosesTraining::SentenceAlignment::source, and MosesTraining::SentenceAlignment::target.
Referenced by getOrientWordModel().
bool MosesTraining::isNonTerminal | ( | const std::string & | word | ) | [inline] |
Definition at line 42 of file score.h.
Referenced by MosesTraining::ExtractionPhrasePair::MatchesAlignment(), printSourcePhrase(), and printTargetPhrase().
bool MosesTraining::isXmlTag | ( | const std::string & | tag | ) |
bool MosesTraining::isXmlTag | ( | const string & | tag | ) |
Check if the token is an XML tag, i.e. starts with "<"
tag | token to be checked |
Definition at line 141 of file XmlTree.cpp.
Referenced by ProcessAndStripXMLTags().
bool MosesTraining::le | ( | int | first, | |
int | second | |||
) |
Definition at line 690 of file extract-main.cpp.
bool MosesTraining::lt | ( | int | first, | |
int | second | |||
) |
Definition at line 695 of file extract-main.cpp.
std::ostream& MosesTraining::operator<< | ( | std::ostream & | out, | |
const WordCount & | obj | |||
) |
Definition at line 206 of file extract-lex-main.cpp.
References MosesTraining::WordCount::GetCount().
std::vector<float> MosesTraining::orientationClassPriorsL2R | ( | 4 | , | |
0 | ||||
) |
std::vector<float> MosesTraining::orientationClassPriorsR2L | ( | 4 | , | |
0 | ||||
) |
std::string MosesTraining::ParseXmlTagAttribute | ( | const std::string & | tag, | |
const std::string & | attributeName | |||
) |
string MosesTraining::ParseXmlTagAttribute | ( | const string & | tag, | |
const string & | attributeName | |||
) |
Definition at line 64 of file XmlTree.cpp.
Referenced by ProcessAndStripXMLTags().
void MosesTraining::ParseXmlTagAttributes | ( | const std::string & | s, | |
std::map< std::string, std::string > & | attributes | |||
) |
Definition at line 85 of file XmlTree.cpp.
Referenced by ProcessAndStripXMLTags().
bool MosesTraining::ProcessAndStripXMLTags | ( | std::string & | line, | |
SyntaxNodeCollection & | tree, | |||
std::set< std::string > & | labelCollection, | |||
std::map< std::string, int > & | topLabelCollection, | |||
bool | unescape = true | |||
) |
bool MosesTraining::ProcessAndStripXMLTags | ( | string & | line, | |
SyntaxNodeCollection & | nodeCollection, | |||
set< string > & | labelCollection, | |||
map< string, int > & | topLabelCollection, | |||
bool | unescapeSpecialChars | |||
) |
Process a sentence with XML-style annotation of syntactic nodes.
line[in,out] | in: sentence, out: sentence without the XML | |
nodeCollection[out] | the collection of SyntaxNode objects for this sentence | |
labelCollection[out] | label values are inserted into this set | |
topLabelCollection[out] | top labels (key) and their counts (value) are inserted into this map | |
unescapeSpecialChars | flag indicating whether XML special characters should be unescaped |
Definition at line 259 of file XmlTree.cpp.
References MosesTraining::SyntaxNodeCollection::AddNode(), MosesTraining::SyntaxNode::attributes, MosesTraining::SyntaxNodeCollection::GetNodes(), isXmlTag(), MosesTraining::SyntaxNode::label, n, ParseXmlTagAttribute(), ParseXmlTagAttributes(), Tokenize(), TokenizeXml(), Trim(), TrimXml(), and unescape().
Referenced by MosesTraining::Syntax::XmlTreeParser::Parse(), MosesTraining::SentenceAlignmentWithSyntax::processSourceSentence(), and MosesTraining::SentenceAlignmentWithSyntax::processTargetSentence().
void MosesTraining::ReadAlignment | ( | const std::string & | s, | |
Alignment & | a | |||
) |
Definition at line 31 of file Alignment.cpp.
References begin, end, and src.
Referenced by MosesTraining::Syntax::GHKM::ExtractGHKM::Main().
std::vector<std::string> MosesTraining::Tokenize | ( | const std::string & | str, | |
const std::string & | delimiters = " \t" | |||
) | [inline] |
Definition at line 36 of file XmlTree.cpp.
Referenced by MosesTraining::LexicalTable::load(), ProcessAndStripXMLTags(), and MosesTraining::ExtractionPhrasePair::UpdateVocabularyFromValueTokens().
std::vector<std::string> MosesTraining::TokenizeXml | ( | const std::string & | str | ) |
vector<string> MosesTraining::TokenizeXml | ( | const string & | str | ) |
Split up the input character string into tokens made up of either XML tags or text. example: this is a test . => (this ), (), ( is a ), (), ( test .)
str | input string |
Definition at line 209 of file XmlTree.cpp.
Referenced by ProcessAndStripXMLTags().
std::string MosesTraining::Trim | ( | const std::string & | str, | |
const std::string | dropChars = " \t\n\r" | |||
) |
Definition at line 57 of file XmlTree.cpp.
Referenced by ParseXmlTagAttributes(), and ProcessAndStripXMLTags().
std::string MosesTraining::TrimXml | ( | const std::string & | str | ) |
string MosesTraining::TrimXml | ( | const string & | str | ) |
Remove "<" and ">" from XML tag
str | xml token to be stripped |
Definition at line 121 of file XmlTree.cpp.
Referenced by ProcessAndStripXMLTags().
std::string MosesTraining::unescape | ( | const std::string & | str | ) |
string MosesTraining::unescape | ( | const string & | str | ) |
Unescape XML special characters.
Definition at line 149 of file XmlTree.cpp.
Referenced by ProcessAndStripXMLTags().
bool MosesTraining::conditionOnTargetLhsFlag = false |
Definition at line 56 of file score-main.cpp.
Referenced by main(), printSourcePhrase(), and printTargetPhrase().
int MosesTraining::countOfCounts[COC_MAX+1] |
Definition at line 73 of file score-main.cpp.
bool MosesTraining::crossedNonTerm = false |
bool MosesTraining::goodTuringFlag = false |
Definition at line 58 of file score-main.cpp.
bool MosesTraining::hierarchicalFlag = false |
Definition at line 47 of file score-main.cpp.
Referenced by invertAlignment(), main(), MosesTraining::ExtractionPhrasePair::MatchesAlignment(), outputPhrasePair(), and processLine().
bool MosesTraining::inverseFlag = false |
Definition at line 46 of file score-main.cpp.
Referenced by printSourcePhrase(), and printTargetPhrase().
bool MosesTraining::kneserNeyFlag = false |
Definition at line 59 of file score-main.cpp.
bool MosesTraining::lexFlag = true |
Definition at line 45 of file score-main.cpp.
bool MosesTraining::logProbFlag = false |
Definition at line 60 of file score-main.cpp.
float MosesTraining::minCount = 0 |
float MosesTraining::minCountHierarchical = 0 |
int MosesTraining::negLogProb = 1 |
bool MosesTraining::nonTermContext = false |
bool MosesTraining::nonTermContextTarget = false |
bool MosesTraining::partsOfSpeechFlag = false |
Definition at line 51 of file score-main.cpp.
std::set<std::string> MosesTraining::partsOfSpeechSet |
bool MosesTraining::pcfgFlag = false |
bool MosesTraining::phraseOrientationFlag = false |
bool MosesTraining::phraseOrientationPriorsFlag = false |
bool MosesTraining::ruleLength = false |
boost::unordered_map<std::string, boost::unordered_map<std::string,float>* > MosesTraining::ruleTargetLHSAndTargetSyntacticPreferencesLHSJointCounts |
int MosesTraining::sentenceOffset = 0 |
std::map<std::string,size_t> MosesTraining::sourceLabels |
Definition at line 82 of file score-main.cpp.
Referenced by AlignedSentenceSyntax::CreateNonTerms(), and MosesTraining::Syntax::GHKM::ExtractGHKM::Main().
std::vector<std::string> MosesTraining::sourceLabelsByIndex |
Definition at line 83 of file score-main.cpp.
std::set<std::string> MosesTraining::sourceLabelSet |
boost::unordered_map<std::string,float> MosesTraining::sourceLHSCounts |
Definition at line 79 of file score-main.cpp.
Referenced by Moses::SoftSourceSyntacticConstraintsFeature::LoadTargetSourceLeftHandSideJointCountFile(), main(), outputPhrasePair(), and writeLeftHandSideLabelCounts().
bool MosesTraining::sourceSyntaxLabelCountsLHSFlag = false |
bool MosesTraining::sourceSyntaxLabelsFlag = false |
bool MosesTraining::spanLength = false |
bool MosesTraining::targetConstituentBoundariesFlag = false |
boost::unordered_map<std::string, boost::unordered_map<std::string,float>* > MosesTraining::targetLHSAndSourceLHSJointCounts |
Definition at line 80 of file score-main.cpp.
Referenced by main(), outputPhrasePair(), and writeLeftHandSideLabelCounts().
bool MosesTraining::targetSyntacticPreferencesFlag = false |
Definition at line 54 of file score-main.cpp.
std::map<std::string,size_t> MosesTraining::targetSyntacticPreferencesLabels |
Definition at line 90 of file score-main.cpp.
std::vector<std::string> MosesTraining::targetSyntacticPreferencesLabelsByIndex |
Definition at line 91 of file score-main.cpp.
std::set<std::string> MosesTraining::targetSyntacticPreferencesLabelSet |
boost::unordered_map<std::string,float> MosesTraining::targetSyntacticPreferencesLHSCounts |
int MosesTraining::totalDistinct = 0 |
Definition at line 74 of file score-main.cpp.
Referenced by outputPhrasePair(), and writeCountOfCounts().
bool MosesTraining::treeFragmentsFlag = false |
bool MosesTraining::unalignedFlag = false |
bool MosesTraining::unalignedFWFlag = false |
bool MosesTraining::unpairedExtractFormatFlag = false |
Definition at line 55 of file score-main.cpp.
Referenced by main(), printSourcePhrase(), and printTargetPhrase().
Definition at line 97 of file score-main.cpp.
Referenced by computeLexicalTranslation(), MosesTraining::LexicalTable::load(), NumNonTerminal(), outputPhrasePair(), printSourcePhrase(), printTargetPhrase(), and processLine().
Definition at line 96 of file score-main.cpp.
Referenced by calcCrossedNonTerm(), computeUnalignedFWPenalty(), MosesTraining::LexicalTable::load(), MosesTraining::ExtractionPhrasePair::MatchesAlignment(), outputPhrasePair(), printSourcePhrase(), printTargetPhrase(), and processLine().
bool MosesTraining::wordAlignmentFlag = true |