00001 #pragma once
00002
00003 #include <map>
00004 #include <ostream>
00005 #include <set>
00006 #include <string>
00007 #include <vector>
00008
00009 #include "ExtractionPhrasePair.h"
00010 #include "OutputFileStream.h"
00011
00012 #include "syntax-common/tool.h"
00013
00014 #include "LexicalTable.h"
00015 #include "Options.h"
00016 #include "RuleSymbol.h"
00017 #include "TokenizedRuleHalf.h"
00018 #include "Vocabulary.h"
00019
00020 namespace MosesTraining
00021 {
00022 namespace Syntax
00023 {
00024 namespace ScoreStsg
00025 {
00026
00027 class RuleGroup;
00028 class RuleTableWriter;
00029
00030 class ScoreStsg : public Tool
00031 {
00032 public:
00033 ScoreStsg();
00034
00035 virtual int Main(int argc, char *argv[]);
00036
00037 private:
00038 static const int kCountOfCountsMax;
00039
00040 double ComputeLexProb(const std::vector<RuleSymbol> &,
00041 const std::vector<RuleSymbol> &,
00042 const ALIGNMENT &);
00043
00044 void ParseAlignmentString(const std::string &, int,
00045 ALIGNMENT &);
00046
00047 void ProcessOptions(int, char *[], Options &) const;
00048
00049 void ProcessRuleGroup(const RuleGroup &, RuleTableWriter &);
00050
00051 void ProcessRuleGroupOrDie(const RuleGroup &, RuleTableWriter &,
00052 std::size_t, std::size_t);
00053
00054 void TokenizeRuleHalf(const std::string &, TokenizedRuleHalf &);
00055
00056 Options m_options;
00057 Vocabulary m_srcVocab;
00058 Vocabulary m_tgtVocab;
00059 LexicalTable m_lexTable;
00060 std::vector<int> m_countOfCounts;
00061 int m_totalDistinct;
00062 TokenizedRuleHalf m_sourceHalf;
00063 TokenizedRuleHalf m_targetHalf;
00064 ALIGNMENT m_tgtToSrc;
00065 };
00066
00067 }
00068 }
00069 }