* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#ifndef SVMT_HASH_H
+
#include <stdint.h>
#ifdef __cplusplus
-
extern "C"
{
- #endif
-
- typedef struct hash_t
- {
- /* array of hash nodes */
- struct hash_node_t **bucket;
- int size; /* size of the array */
- int entries; /* number of entries in table */
- int downshift; /* shift cound, used in hash function */
- int mask; /* used to select bits for hashing */
- } hash_t;
-
- typedef struct hash_node_t
- {
- uintptr_t data; /* data in hash node */
- const char * key; /* key for hash lookup */
- struct hash_node_t *next;/* next node in hash chain */
- } hash_node_t;
-
- #define HASH_FAIL -1
-
- void hash_init(hash_t *, int);
-
- uintptr_t hash_lookup (const hash_t *, const char *);
-
- uintptr_t hash_insert (hash_t *, const char *, uintptr_t);
-
- uintptr_t hash_delete (hash_t *, const char *);
-
- void hash_destroy(hash_t *);
-
- char *hash_stats (hash_t *);
-
- void hash_print(hash_t *,FILE *f);
+#endif
- void rebuild_table(hash_t *);
+typedef struct hash_t
+{
+ struct hash_node_t **bucket; /* array of hash nodes */
+ int size; /* size of the array */
+ int entries; /* number of entries in table */
+ int downshift; /* shift cound, used in hash function */
+ int mask; /* used to select bits for hashing */
+} hash_t;
+
+typedef struct hash_node_t
+{
+ uintptr_t data; /* data in hash node */
+ const char * key; /* key for hash lookup */
+ struct hash_node_t *next; /* next node in hash chain */
+} hash_node_t;
+
+#define HASH_FAIL -1
+
+void hash_init(hash_t *, int);
+uintptr_t hash_lookup (const hash_t *, const char *);
+uintptr_t hash_insert (hash_t *, const char *, uintptr_t);
+uintptr_t hash_delete (hash_t *, const char *);
+void hash_destroy(hash_t *);
+char *hash_stats (hash_t *);
+void hash_print(hash_t *,FILE *f);
+void rebuild_table(hash_t *);
- #ifdef __cplusplus
+#ifdef __cplusplus
}
#endif
+
+#define SVMT_HASH_H
+#endif
-/*
- * Copyright (C) 2004 Jesus Gimenez, Lluis Marquez and Senen Moya
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef LEARNER_H
-
-struct samples_counter_t
-{
- char *key;
- int positive;
- int negative;
-};
-
-class learner
-{
- private:
- int learnerNumAMBP,learnerNumUNKP;
- hash_t *learnerAMBP_H,*learnerUNKP_H;
- swindow *sw;
- simpleList learnerFeatureList,*learnerAMBP_L,*learnerUNKP_L;
- FILE *fKnown,*fUnknown;
- int learnerNumFeatures;
- char obtainAtrChar(FILE *channel);
- int obtainAtrInt(FILE *channel,int *endAtr);
- void learnerCreateFeatureList(char *name, simpleList *featureList);
- simpleList *learnerTransformHashInList(hash_t *tptr);
- void learnerCreateDefaultFile(const char *modelName, const char *str);
- void learnerCreatePOSFile(char *modelName, int is_ambp, hash_t *h);
- void learnerCount(char *name, int *sentences, int *words);
- int learnerExecSVMlight(char *svmdir, char *options, char *posFile, char *outFile);
- int learnerLeftToRight(simpleList *, simpleList *, dictionary *dKnown, dictionary *dUnknown,/*mapping *mKnown, mapping *mUnknown,*/int numWrds, int inicio);
- int learnerRightToLeft(simpleList *,simpleList *, dictionary *dKnown, dictionary *dUnknown,/*mapping *mKnown, mapping *mUnknown,*/int numWrds, int inicio);
- void learnerGetFeatures(nodo *elem, stack_t *stk,dictionary *d, simpleList *featureList, int direction);
- void learnerGenerateFeatures(nodo *elem,simpleList *featureList, dictionary *d, int direction);
- void learnerGenerateFeaturesUnk(nodo *elem,simpleList *featureList, dictionary *d, dictionary *dUnk, int direction);
- void learnerTrainModel(char *trainingFileName,dictionary *d, int numModel,int direction, int numSent, int numWords, int numChunks);
- weightRepository *learnerBuiltWeightRepository(weightRepository *wr,mapping *m,char *pos,char *fileName);
- hash_t *learnerBuiltBias(hash_t *,char *pos,char *fileName);
- void learnerDestroyBias(hash_t *h);
- int learnerIsPossiblePOS(char *wrd, char *pos, int Known_or_Unknown);
- simpleList *learnerGetPotser(char *wrd, int Known_or_Unknown, dictionary *d);
- void learnerTraining(FILE *f,char *modelName, int numModel,int LR_or_RL,int K_or_U,dictionary *d,simpleList *lpos);
- void learnerPrintMessage(int numModel, int K_or_U, int LR_or_RL,int is_fex);
- int learnerNumChunks(char *trainingFileName,float percentage,int nSentences);
- int learnerIsInsideList(simpleList *l, char *key);
- void learnerDoLearn(FILE *f,int numModel,int LR_or_RL,int K_or_U,dictionary *d,simpleList *lPosToTrain);
- void learnerDressNakedSetTrain(dictionary *d,mapping *m,FILE *f, char* pos, int numModel, int direction, int K_or_U,int *nPos,int *nNeg);
- void learnerPushSample(char *wrd,int numModel,int direction, int Known_or_Unknown, char *pos,char *samplePos,char *features,dictionary *d, int *nNeg, int *nPos);
- void learnerPushSampleUnk(char *wrd,int numModel,int direction, int Known_or_Unknown,char *pos, char *samplePos, char *features,dictionary *d, int *nNeg, int *nPos);
- char *learnerCreateFeatureString(FILE *f,mapping *m);
-
- char *read_feature_list_from_config_file(FILE *f, char *first_feature);
- void read_config_file(const char *config_file);
-
- public:
- learner(char *modelName);
- learner();
- ~learner();
- void learnerRun(char *train);
-};
-
-#define LEARNER_H
-#endif
+/*
+ * Copyright (C) 2004 Jesus Gimenez, Lluis Marquez and Senen Moya
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef SVMT_LEARNER_H
+
+struct samples_counter_t
+{
+ char *key;
+ int positive;
+ int negative;
+};
+
+class learner
+{
+private:
+ int learnerNumAMBP,learnerNumUNKP;
+ hash_t *learnerAMBP_H,*learnerUNKP_H;
+ swindow *sw;
+ simpleList learnerFeatureList,*learnerAMBP_L,*learnerUNKP_L;
+ FILE *fKnown,*fUnknown;
+ int learnerNumFeatures;
+ char obtainAtrChar(FILE *channel);
+ int obtainAtrInt(FILE *channel,int *endAtr);
+ void learnerCreateFeatureList(char *name, simpleList *featureList);
+ simpleList *learnerTransformHashInList(hash_t *tptr);
+ void learnerCreateDefaultFile(const char *modelName, const char *str);
+ void learnerCreatePOSFile(char *modelName, int is_ambp, hash_t *h);
+ void learnerCount(char *name, int *sentences, int *words);
+ int learnerExecSVMlight(char *svmdir, char *options, char *posFile, char *outFile);
+ int learnerLeftToRight(simpleList *, simpleList *, dictionary *dKnown, dictionary *dUnknown,/*mapping *mKnown, mapping *mUnknown,*/int numWrds, int inicio);
+ int learnerRightToLeft(simpleList *,simpleList *, dictionary *dKnown, dictionary *dUnknown,/*mapping *mKnown, mapping *mUnknown,*/int numWrds, int inicio);
+ void learnerGetFeatures(nodo *elem, stack_t *stk,dictionary *d, simpleList *featureList, int direction);
+ void learnerGenerateFeatures(nodo *elem,simpleList *featureList, dictionary *d, int direction);
+ void learnerGenerateFeaturesUnk(nodo *elem,simpleList *featureList, dictionary *d, dictionary *dUnk, int direction);
+ void learnerTrainModel(char *trainingFileName,dictionary *d, int numModel,int direction, int numSent, int numWords, int numChunks);
+ weightRepository *learnerBuiltWeightRepository(weightRepository *wr,mapping *m,char *pos,char *fileName);
+ hash_t *learnerBuiltBias(hash_t *,char *pos,char *fileName);
+ void learnerDestroyBias(hash_t *h);
+ int learnerIsPossiblePOS(char *wrd, char *pos, int Known_or_Unknown);
+ simpleList *learnerGetPotser(char *wrd, int Known_or_Unknown, dictionary *d);
+ void learnerTraining(FILE *f,char *modelName, int numModel,int LR_or_RL,int K_or_U,dictionary *d,simpleList *lpos);
+ void learnerPrintMessage(int numModel, int K_or_U, int LR_or_RL,int is_fex);
+ int learnerNumChunks(char *trainingFileName,float percentage,int nSentences);
+ int learnerIsInsideList(simpleList *l, char *key);
+ void learnerDoLearn(FILE *f,int numModel,int LR_or_RL,int K_or_U,dictionary *d,simpleList *lPosToTrain);
+ void learnerDressNakedSetTrain(dictionary *d,mapping *m,FILE *f, char* pos, int numModel, int direction, int K_or_U,int *nPos,int *nNeg);
+ void learnerPushSample(char *wrd,int numModel,int direction, int Known_or_Unknown, char *pos,char *samplePos,char *features,dictionary *d, int *nNeg, int *nPos);
+ void learnerPushSampleUnk(char *wrd,int numModel,int direction, int Known_or_Unknown,char *pos, char *samplePos, char *features,dictionary *d, int *nNeg, int *nPos);
+ char *learnerCreateFeatureString(FILE *f,mapping *m);
+
+ char *read_feature_list_from_config_file(FILE *f, char *first_feature);
+ void read_config_file(const char *config_file);
+
+public:
+ learner(char *modelName);
+ learner();
+ ~learner();
+ void learnerRun(char *train);
+};
+
+#define SVMT_LEARNER_H
+#endif
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#ifndef MARKS_H
+#ifndef SVMT_MARKS_H
-#define SLASTW "Swn" //Last Word
-#define WMARK "w" //Words
-#define PMARK "p" //POS
-#define KMARK "k" //Ambiguity Classes
-#define MMARK "m" //Maybe
-#define MFTMARK "f" //Most Frequent Tag --> f(-1) --> f-1:NN
-#define PREFIX_MARK "a" //prefixes
-#define SUFFIX_MARK "z" //Suffixes
-#define CHAR_A_MARK "ca" //Character, counting from the beggining of the begining of the token (starting at 1)
-#define CHAR_Z_MARK "cz" //Character, counting from the end of the begining of the token (starting at 1)
-#define LENGTH_MARK "L" //token length
-#define START_CAPITAL_MARK "SA" //start with upper case
-#define START_LOWER_MARK "sa" //start with lower case
-#define START_NUMBER_MARK "SN" //start with number
-#define ALL_UPPER_MARK "AA" //all upper case
-#define ALL_LOWER_MARK "aa" //all lower case
-#define CONTAIN_CAP_MARK "CA" //contains a capital letter
-#define CONTAIN_CAPS_MARK "CAA" //contains several capital letters
-#define CONTAIN_PERIOD_MARK "CP" //contains period
-#define CONTAIN_COMMA_MARK "CC" //contains comma
-#define CONTAIN_NUMBER_MARK "CN" //contains number
-#define MULTIWORD_MARK "MW" //contains underscores (multiword)
+#define SLASTW "Swn" //Last Word
+#define WMARK "w" //Words
+#define PMARK "p" //POS
+#define KMARK "k" //Ambiguity Classes
+#define MMARK "m" //Maybe
+#define MFTMARK "f" //Most Frequent Tag --> f(-1) --> f-1:NN
+#define PREFIX_MARK "a" //prefixes
+#define SUFFIX_MARK "z" //Suffixes
+#define CHAR_A_MARK "ca" //Character, counting from the beggining of the begining of the token (starting at 1)
+#define CHAR_Z_MARK "cz" //Character, counting from the end of the begining of the token (starting at 1)
+#define LENGTH_MARK "L" //token length
+#define START_CAPITAL_MARK "SA" //start with upper case
+#define START_LOWER_MARK "sa" //start with lower case
+#define START_NUMBER_MARK "SN" //start with number
+#define ALL_UPPER_MARK "AA" //all upper case
+#define ALL_LOWER_MARK "aa" //all lower case
+#define CONTAIN_CAP_MARK "CA" //contains a capital letter
+#define CONTAIN_CAPS_MARK "CAA" //contains several capital letters
+#define CONTAIN_PERIOD_MARK "CP" //contains period
+#define CONTAIN_COMMA_MARK "CC" //contains comma
+#define CONTAIN_NUMBER_MARK "CN" //contains number
+#define MULTIWORD_MARK "MW" //contains underscores (multiword)
-#define MARKS_H
+#define SVMT_MARKS_H
#endif
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#ifndef WEIGHT_H
+#ifndef SVMT_WEIGHT_H
typedef struct weight_node_t
{
@@ -26,27 +26,26 @@ typedef struct weight_node_t
class weightRepository
{
- private:
- hash_t wr;
+private:
+ hash_t wr;
- //ADD 180705
- char *wrGetMergeInput(hash_t *tptr, float filter);
- //char *wrGetMergeInput(hash_t *tptr); //DEL 180705
- FILE *openFile(char *name, char mode[]);
- void wrReadMergeModel(FILE *in,float filter);
- char wrSaltarBlancs(FILE *in,char c,int jmp);
- void wrAddPOS(uintptr_t obj, char* pos, long double weight);
- public:
- long double wrGetWeight(const char *feature,char *pos);
- void wrAdd(char *feature, char* pos, long double weight);
- //void wrWrite(const char *outName); //DEL 180705
- //ADD 180705
- void wrWrite(const char *outName, float filter);
- void wrWriteHash(hash_t *tptr,FILE *f,char separador);
- weightRepository(char *fileName,float filter);
- weightRepository();
- ~weightRepository();
+ //char *wrGetMergeInput(hash_t *tptr); //DEL 180705
+ char *wrGetMergeInput(hash_t *tptr, float filter); //ADD 180705
+ FILE *openFile(char *name, char mode[]);
+ void wrReadMergeModel(FILE *in,float filter);
+ char wrSaltarBlancs(FILE *in,char c,int jmp);
+ void wrAddPOS(uintptr_t obj, char* pos, long double weight);
+
+public:
+ long double wrGetWeight(const char *feature,char *pos);
+ void wrAdd(char *feature, char* pos, long double weight);
+ //void wrWrite(const char *outName); //DEL 180705
+ void wrWrite(const char *outName, float filter); //ADD 180705
+ void wrWriteHash(hash_t *tptr,FILE *f,char separador);
+ weightRepository(char *fileName,float filter);
+ weightRepository();
+ ~weightRepository();
};
-#define WEIGHT_H
+#define SVMT_WEIGHT_H
#endif