00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef moses_SentenceStats_h
00023 #define moses_SentenceStats_h
00024
00025 #include <iostream>
00026 #include <string>
00027 #include <vector>
00028 #include <ctime>
00029 #include "Timer.h"
00030 #include "Phrase.h"
00031 #include "Hypothesis.h"
00032 #include "TypeDef.h"
00033 #include "InputType.h"
00034 #include "Util.h"
00035
00036 namespace Moses
00037 {
00038
00040 struct RecombinationInfo {
00041 RecombinationInfo() {}
00042 RecombinationInfo(size_t srcWords, float gProb, float bProb)
00043 : numSourceWords(srcWords), betterProb(gProb), worseProb(bProb) {}
00044
00045 size_t numSourceWords;
00046 float betterProb, worseProb;
00047 };
00048
00052 class SentenceStats
00053 {
00054 public:
00055
00056
00057
00058
00059 SentenceStats(const InputType& source) {
00060 Initialize(source);
00061 }
00062 void Initialize(const InputType& source) {
00063 m_numHyposCreated = 0;
00064 m_numHyposPopped = 0;
00065 m_numHyposPruned = 0;
00066 m_numHyposDiscarded = 0;
00067 m_numHyposEarlyDiscarded = 0;
00068 m_numHyposNotBuilt = 0;
00069 m_totalSourceWords = source.GetSize();
00070 m_recombinationInfos.clear();
00071 m_deletedWords.clear();
00072 m_insertedWords.clear();
00073 }
00074
00075
00076
00077
00078 void CalcFinalStats(const Hypothesis& bestHypo);
00079
00080 unsigned int GetTotalHypos() const {
00081 return m_numHyposCreated + m_numHyposNotBuilt;
00082 }
00083 unsigned int GetNumHyposPopped() const {
00084 return m_numHyposPopped;
00085 }
00086 size_t GetNumHyposRecombined() const {
00087 return m_recombinationInfos.size();
00088 }
00089 unsigned int GetNumHyposPruned() const {
00090 return m_numHyposPruned;
00091 }
00092 unsigned int GetNumHyposDiscarded() const {
00093 return m_numHyposDiscarded;
00094 }
00095 unsigned int GetNumHyposEarlyDiscarded() const {
00096 return m_numHyposEarlyDiscarded;
00097 }
00098 unsigned int GetNumHyposNotBuilt() const {
00099 return m_numHyposNotBuilt;
00100 }
00101 double GetTimeCollectOpts() const {
00102 return m_timeCollectOpts.get_elapsed_time();
00103 }
00104 double GetTimeBuildHyp() const {
00105 return m_timeBuildHyp.get_elapsed_time();
00106 }
00107 double GetTimeCalcLM() const {
00108 return m_timeCalcLM.get_elapsed_time();
00109 }
00110 double GetTimeOtherScore() const {
00111 return m_timeOtherScore.get_elapsed_time();
00112 }
00113 double GetTimeEstimateScore() const {
00114 return m_timeEstimateScore.get_elapsed_time();
00115 }
00116 double GetTimeStack() const {
00117 return m_timeStack.get_elapsed_time();
00118 }
00119 double GetTimeSetupCubes() const {
00120 return m_timeSetupCubes.get_elapsed_time();
00121 }
00122 double GetTimeManageCubes() const {
00123 return m_timeManageCubes.get_elapsed_time();
00124 }
00125 double GetTimeTotal() const {
00126 return m_timeTotal.get_elapsed_time();
00127 }
00128 size_t GetTotalSourceWords() const {
00129 return m_totalSourceWords;
00130 }
00131 size_t GetNumWordsDeleted() const {
00132 return m_deletedWords.size();
00133 }
00134 size_t GetNumWordsInserted() const {
00135 return m_insertedWords.size();
00136 }
00137 const std::vector<const Phrase*>& GetDeletedWords() const {
00138 return m_deletedWords;
00139 }
00140 const std::vector<std::string>& GetInsertedWords() const {
00141 return m_insertedWords;
00142 }
00143
00144 void AddRecombination(const Hypothesis& worseHypo, const Hypothesis& betterHypo) {
00145 m_recombinationInfos.push_back(RecombinationInfo(worseHypo.GetWordsBitmap().GetNumWordsCovered(),
00146 betterHypo.GetFutureScore(), worseHypo.GetFutureScore()));
00147 }
00148 void AddCreated() {
00149 m_numHyposCreated++;
00150 }
00151 void AddPopped() {
00152 m_numHyposPopped++;
00153 }
00154 void AddPruning() {
00155 m_numHyposPruned++;
00156 }
00157 void AddEarlyDiscarded() {
00158 m_numHyposEarlyDiscarded++;
00159 }
00160 void AddNotBuilt() {
00161 m_numHyposNotBuilt++;
00162 }
00163 void AddDiscarded() {
00164 m_numHyposDiscarded++;
00165 }
00166
00167 void StartTimeCollectOpts() {
00168 m_timeCollectOpts.start();
00169 }
00170 void StopTimeCollectOpts() {
00171 m_timeCollectOpts.stop();
00172 }
00173 void StartTimeBuildHyp() {
00174 m_timeBuildHyp.start();
00175 }
00176 void StopTimeBuildHyp() {
00177 m_timeBuildHyp.stop();
00178 }
00179 void StartTimeCalcLM() {
00180 m_timeCalcLM.start();
00181 }
00182 void StopTimeCalcLM() {
00183 m_timeCalcLM.stop();
00184 }
00185 void StartTimeOtherScore() {
00186 m_timeOtherScore.start();
00187 }
00188 void StopTimeOtherScore() {
00189 m_timeOtherScore.stop();
00190 }
00191 void StartTimeEstimateScore() {
00192 m_timeEstimateScore.start();
00193 }
00194 void StopTimeEstimateScore() {
00195 m_timeEstimateScore.stop();
00196 }
00197 void StartTimeSetupCubes() {
00198 m_timeSetupCubes.start();
00199 }
00200 void StopTimeSetupCubes() {
00201 m_timeSetupCubes.stop();
00202 }
00203 void StartTimeManageCubes() {
00204 m_timeManageCubes.start();
00205 }
00206 void StopTimeManageCubes() {
00207 m_timeManageCubes.stop();
00208 }
00209 void StartTimeStack() {
00210 m_timeStack.start();
00211 }
00212 void StopTimeStack() {
00213 m_timeStack.stop();
00214 }
00215 void StartTimeTotal() {
00216 m_timeTotal.start();
00217 }
00218 void StopTimeTotal() {
00219 m_timeTotal.stop();
00220 }
00221
00222 protected:
00223
00224
00225
00226
00227 void AddDeletedWords(const Hypothesis& hypo);
00228
00229
00230
00231
00232
00233 std::vector<RecombinationInfo> m_recombinationInfos;
00234 unsigned int m_numHyposCreated;
00235 unsigned int m_numHyposPopped;
00236 unsigned int m_numHyposPruned;
00237 unsigned int m_numHyposDiscarded;
00238 unsigned int m_numHyposEarlyDiscarded;
00239 unsigned int m_numHyposNotBuilt;
00240 Timer m_timeCollectOpts;
00241 Timer m_timeBuildHyp;
00242 Timer m_timeEstimateScore;
00243 Timer m_timeOtherScore;
00244 Timer m_timeCalcLM;
00245 Timer m_timeStack;
00246 Timer m_timeSetupCubes;
00247 Timer m_timeManageCubes;
00248 Timer m_timeTotal;
00249
00250
00251 size_t m_totalSourceWords;
00252 std::vector<const Phrase*> m_deletedWords;
00253 std::vector<std::string> m_insertedWords;
00254 };
00255
00256 inline std::ostream& operator<<(std::ostream& os, const SentenceStats& ss)
00257 {
00258 double totalTime = ss.GetTimeTotal();
00259 double otherTime = totalTime - (ss.GetTimeCollectOpts() + ss.GetTimeBuildHyp() + ss.GetTimeEstimateScore() + ss.GetTimeCalcLM() + ss.GetTimeOtherScore() + ss.GetTimeStack() + ss.GetTimeSetupCubes() + ss.GetTimeManageCubes());
00260
00261 return os << "total hypotheses considered = " << ss.GetTotalHypos() << std::endl
00262 << " number popped from cube = " << ss.GetNumHyposPopped() << std::endl
00263 << " number not built = " << ss.GetNumHyposNotBuilt() << std::endl
00264 << " number discarded early = " << ss.GetNumHyposEarlyDiscarded() << std::endl
00265 << " number discarded = " << ss.GetNumHyposDiscarded() << std::endl
00266 << " number recombined = " << ss.GetNumHyposRecombined() << std::endl
00267 << " number pruned = " << ss.GetNumHyposPruned() << std::endl
00268
00269 << "time to collect opts " << ss.GetTimeCollectOpts() << " (" << (int)(100 * ss.GetTimeCollectOpts()/totalTime) << "%)" << std::endl
00270 << " create hyps " << ss.GetTimeBuildHyp() << " (" << (int)(100 * ss.GetTimeBuildHyp()/totalTime) << "%)" << std::endl
00271 << " estimate score " << ss.GetTimeEstimateScore() << " (" << (int)(100 * ss.GetTimeEstimateScore()/totalTime) << "%)" << std::endl
00272 << " calc lm " << ss.GetTimeCalcLM() << " (" << (int)(100 * ss.GetTimeCalcLM()/totalTime) << "%)" << std::endl
00273 << " other hyp score " << ss.GetTimeOtherScore() << " (" << (int)(100 * ss.GetTimeOtherScore()/totalTime) << "%)" << std::endl
00274 << " set up cubes " << ss.GetTimeSetupCubes() << " (" << (int)(100 * ss.GetTimeSetupCubes()/totalTime) << "%)" << std::endl
00275 << " manage cubes " << ss.GetTimeManageCubes() << " (" << (int)(100 * ss.GetTimeManageCubes()/totalTime) << "%)" << std::endl
00276 << " manage stacks " << ss.GetTimeStack() << " (" << (int)(100 * ss.GetTimeStack()/totalTime) << "%)" << std::endl
00277 << " other " << otherTime << " (" << (int)(100 * otherTime/totalTime) << "%)" << std::endl
00278
00279 << "total source words = " << ss.GetTotalSourceWords() << std::endl
00280 << " words deleted = " << ss.GetNumWordsDeleted() << " (" << Join(" ", ss.GetDeletedWords()) << ")" << std::endl
00281 << " words inserted = " << ss.GetNumWordsInserted() << " (" << Join(" ", ss.GetInsertedWords()) << ")" << std::endl;
00282 }
00283
00284 }
00285 #endif