00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <cassert>
00004 #include <algorithm>
00005 #include <functional>
00006 #include <boost/filesystem.hpp>
00007 #include "pruneGeneration.h"
00008 #include "moses/InputFileStream.h"
00009 #include "moses/OutputFileStream.h"
00010
00011 using namespace std;
00012
00013 int main(int argc, char **argv)
00014 {
00015 cerr << "Starting" << endl;
00016 int limit = atoi(argv[1]);
00017 string inPathStem = argv[2];
00018 string outPathStem = argv[3];
00019
00020 namespace fs = boost::filesystem;
00021
00022
00023 fs::path p(inPathStem);
00024 fs::path dir = p.parent_path();
00025
00026
00027 fs::path fileStem = p.filename();
00028 string fileStemStr = fileStem.native();
00029 size_t fileStemStrSize = fileStemStr.size();
00030
00031
00032
00033 fs::directory_iterator end_iter;
00034 for( fs::directory_iterator dir_iter(dir) ; dir_iter != end_iter ; ++dir_iter) {
00035 if (fs::is_regular_file(dir_iter->status())) {
00036 fs::path currPath = *dir_iter;
00037 string currPathStr = currPath.native();
00038
00039
00040 fs::path currFile = currPath.filename();
00041 string currFileStr = currFile.native();
00042
00043 if (currFileStr.find(fileStemStr) == 0) {
00044
00045
00046 string suffix = currFileStr.substr(fileStemStrSize, currFileStr.size() - fileStemStrSize);
00047 string outPath = outPathStem + suffix;
00048 cerr << "PRUNING " << currPathStr << " TO " << outPath << endl;
00049
00050 Moses::InputFileStream inStrme(currPathStr);
00051 Moses::OutputFileStream outStrme(outPath);
00052 Process(limit, inStrme, outStrme);
00053
00054 }
00055 }
00056 }
00057
00058 cerr << "Finished" << endl;
00059 }
00060
00061 void Process(int limit, istream &inStrme, ostream &outStrme)
00062 {
00063 vector<Rec> records;
00064 string prevInWord;
00065 string line;
00066 while (getline(inStrme, line)) {
00067 vector<string> toks;
00068 Tokenize(toks, line);
00069 assert(toks.size() == 4);
00070
00071 if (prevInWord != toks[0]) {
00072 Output(outStrme, records, limit);
00073 records.clear();
00074 }
00075
00076
00077 float prob = atof(toks[2].c_str());
00078 records.push_back(Rec(prob, line));
00079
00080 prevInWord = toks[0];
00081 }
00082
00083
00084 Output(outStrme, records, limit);
00085 records.clear();
00086
00087 }
00088
00089 void Output(ostream &outStrme, vector<Rec> &records, int limit)
00090 {
00091 std::sort(records.rbegin(), records.rend());
00092
00093 for (size_t i = 0; i < limit && i < records.size(); ++i) {
00094 const Rec &rec = records[i];
00095 outStrme << rec.line << endl;
00096 }
00097 }
00098