00001 /****************************************************************************** 00002 IrstLM: IRST Language Model Toolkit, compile LM 00003 Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy 00004 00005 This library is free software; you can redistribute it and/or 00006 modify it under the terms of the GNU Lesser General Public 00007 License as published by the Free Software Foundation; either 00008 version 2.1 of the License, or (at your option) any later version. 00009 00010 This library is distributed in the hope that it will be useful, 00011 but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00013 Lesser General Public License for more details. 00014 00015 You should have received a copy of the GNU Lesser General Public 00016 License along with this library; if not, write to the Free Software 00017 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00018 00019 ******************************************************************************/ 00020 //class managing a collection of documents for PLSA 00021 00022 00023 class doc 00024 { 00025 bool binary; //is file in binary format? 00026 mfstream* df; //doc file descriptor 00027 char* dfname; //doc file name 00028 dictionary* dict; 00029 00030 public: 00031 int cd; //current doc index 00032 int n; //number of docs 00033 int m; //number of words in the current doc 00034 int* V; //words in current doc 00035 int* N; //frequencies in doc 00036 int* T; //temporary frequencies 00037 00038 doc(dictionary* d,char* docfname); 00039 ~doc(); 00040 int count(); 00041 int open(); 00042 int save(char* fname); 00043 int savernd(char* fname,int num); 00044 int save(char* fname,int bsz); 00045 int reset(); 00046 int read(); 00047 }; 00048