/*************************************************************************************************
 * The core API of Hyper Estraier
 *                                                      Copyright (C) 2004-2005 Mikio Hirabayashi
 * This file is part of Hyper Estraier.
 * Hyper Estraier is free software; you can redistribute it and/or modify it under the terms of
 * the GNU Lesser General Public License as published by the Free Software Foundation; either
 * version 2.1 of the License or any later version.  Hyper Estraier is distributed in the hope
 * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 * License for more details.
 * You should have received a copy of the GNU Lesser General Public License along with Hyper
 * Estraier; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
 * Boston, MA 02111-1307 USA.
 *************************************************************************************************/


#ifndef _ESTRAIER_H                      /* duplication check */
#define _ESTRAIER_H

#if defined(__cplusplus)                 /* export for C++ */
extern "C" {
#endif



/*************************************************************************************************
 * common settings
 *************************************************************************************************/


/* version of QDBM */
extern const char *est_version;



/*************************************************************************************************
 * underlying headers
 *************************************************************************************************/


#include <depot.h>
#include <curia.h>
#include <cabin.h>
#include <villa.h>
#include <stdlib.h>



/*************************************************************************************************
 * API for document
 *************************************************************************************************/


#define ESTDATTRID     "@id"             /* name of the attribute of ID */
#define ESTDATTRURI    "@uri"            /* name of the attribute of URI */
#define ESTDATTRCDATE  "@cdate"          /* name of the attribute of creation date */
#define ESTDATTRMDATE  "@mdate"          /* name of the attribute of modification date */
#define ESTDATTRTITLE  "@title"          /* name of the attribute of title */
#define ESTDATTRAUTHOR "@author"         /* name of the attribute of author */
#define ESTDATTRTYPE   "@type"           /* name of the attribute of content type */
#define ESTDATTRLANG   "@lang"           /* name of the attribute of language */
#define ESTDATTRSIZE   "@size"           /* name of the attribute of entity size */

typedef struct {                         /* type of structure for a document */
  int id;                                /* identification number */
  CBMAP *attrs;                          /* map of attributes */
  CBLIST *dtexts;                        /* list of shown text */
} ESTDOC;


/* Create a document object.
   The return value is an object of a document. */
ESTDOC *est_doc_new(void);


/* Create a document object made from draft data.
   `draft' specifies a string of draft data.
   The return value is an object of a document. */
ESTDOC *est_doc_new_from_draft(const char *draft);


/* Destroy a document object.
   `doc' specifies a document object. */
void est_doc_delete(ESTDOC *doc);


/* Add an attribute to a document object.
   `doc' specifies a document object.
   `name' specifies the name of an attribute.
   `value' specifies the value of the attribute.  If it is `NULL', the attribute is removed. */
void est_doc_add_attr(ESTDOC *doc, const char *name, const char *value);


/* Add a sentence of text to a document object.
   `doc' specifies a document object.
   `text' specifies a sentence of text. */
void est_doc_add_text(ESTDOC *doc, const char *text);


/* Add a hidden sentence to a document object.
   `doc' specifies a document object.
   `text' specifies a hidden sentence. */
void est_doc_add_hidden_text(ESTDOC *doc, const char *text);


/* Get the ID number of a document object.
   `doc' specifies a document object.
   The return value is the ID number of the document object.  If the object has not been
   registered, -1 is returned. */
int est_doc_id(ESTDOC *doc);


/* Get a list of attribute names of a document object.
   `doc' specifies a document object.
   The return value is a new list object of attribute names of the document object.  Because
   the object of the return value is opened with the function `cblistopen', it should be closed
   with the function `cblistclose' if it is no longer in use. */
CBLIST *est_doc_attr_names(ESTDOC *doc);


/* Get the value of an attribute of a document object.
   `doc' specifies a document object.
   `name' specifies the name of an attribute.
   The return value is the value of the attribute or `NULL' if it does not exist.  The life
   duration of the returned string is synchronous with the one of the document object. */
const char *est_doc_attr(ESTDOC *doc, const char *name);


/* Get a list of sentences of the text of a document object.
   `doc' specifies a document object.
   The return value is a list object of sentences of the text of the document object.  The life
   duration of the returned object is synchronous with the one of the document object. */
const CBLIST *est_doc_texts(ESTDOC *doc);


/* Concatenate sentences of the text of a document object.
   `doc' specifies a document object.
   The return value is concatenated sentences of the document object.  Because the region of the
   return value is allocated with the `malloc' call, it should be released with the `free' call
   if it is no longer in use. */
char *est_doc_cat_texts(ESTDOC *doc);


/* Dump draft data of a document object.
   `doc' specifies a document object.
   The return value is draft data of the document object.  Because the region of the return value
   is allocated with the `malloc' call, it should be released with the `free' call if it is no
   longer in use. */
char *est_doc_dump_draft(ESTDOC *doc);


/* Make a snippet of the body text of a document object.
   `doc' specifies a document object.
   `word' specifies a list object of words to be highlight.
   `wwitdh' specifies whole width of the result.
   `hwitdh' specifies width of strings picked up from the beginning of the text.
   `awitdh' specifies width of strings picked up around each highlighted word.
   The return value is a snippet string of the body text of the document object.  There are tab
   separated values.  Each line is a string to be shown.  Though most lines have only one field,
   some lines have two fields.  If the second field exists, the first field is to be shown with
   highlighted, and the second field means its normalized form.  Because the region of the
   return value is allocated with the `malloc' call, it should be released with the `free' call
   if it is no longer in use. */
char *est_doc_make_snippet(ESTDOC *doc, const CBLIST *words, int wwidth, int hwidth, int awidth);


/* Check whether the text of a document object includes every specified words.
   `doc' specifies a document object.
   `word' specifies a list object of words to be checked.
   The return value is true if every specified words is found, else it is false. */
int est_doc_scan_words(ESTDOC *doc, const CBLIST *words);



/*************************************************************************************************
 * API for search conditions
 *************************************************************************************************/


#define ESTOPUVSET     "[UVSET]"         /* universal set */
#define ESTOPSIMILAR   "[SIMILAR]"       /* similarity search */

#define ESTOPUNION     "OR"              /* union (conjunction) */
#define ESTOPISECT     "AND"             /* intersection (disjunction) */
#define ESTOPDIFF      "ANDNOT"          /* difference (intersection with negation) */
#define ESTOPWITH      "WITH"            /* delimiter for elements */

#define ESTOPSTREQ     "STREQ"           /* string is equal */
#define ESTOPSTRNE     "STRNE"           /* string is not equal */
#define ESTOPSTRINC    "STRINC"          /* string is included in */
#define ESTOPSTRBW     "STRBW"           /* string begins with */
#define ESTOPSTREW     "STREW"           /* string ends with */
#define ESTOPNUMEQ     "NUMEQ"           /* number or date is equal */
#define ESTOPNUMNE     "NUMNE"           /* number or date is not equal */
#define ESTOPNUMGT     "NUMGT"           /* number or date is greater than */
#define ESTOPNUMGE     "NUMGE"           /* number or date is greater than or equal to */
#define ESTOPNUMLT     "NUMLT"           /* number or date is less than */
#define ESTOPNUMLE     "NUMLE"           /* number or date is less than or equal to */
#define ESTOPREGEX     "REGEX"           /* string matches regular expressions */

#define ESTORDSTRA     "STRA"            /* strings in ascending order */
#define ESTORDSTRD     "STRD"            /* strings in descending order */
#define ESTORDNUMA     "NUMA"            /* numbers in ascending order */
#define ESTORDNUMD     "NUMD"            /* numbers in descending order */

typedef struct {                         /* type of structure for search conditions */
  char *phrase;                          /* search phrase */
  int gstep;                             /* step of N-gram */
  int tfidf;                             /* whether with TF-IDF tuning */
  int simple;                            /* whether with the simplified phrase */
  CBLIST *attrs;                         /* conditions with attributes */
  char *order;                           /* sorting order */
  int max;                               /* maximum number of retrieval */
  int scfb;                              /* whether to feed back scores */
  int *scores;                           /* array of scores */
  int snum;                              /* number of elemnts of the score array */
  int opts;                              /* options for preservation */
} ESTCOND;

enum {                                   /* enumeration for options */
  ESTCONDSURE = 1 << 0,                  /* check every N-gram key */
  ESTCONDUSU = 1 << 1,                   /* check N-gram keys skipping by one */
  ESTCONDFAST = 1 << 2,                  /* check N-gram keys skipping by two */
  ESTCONDAGIT = 1 << 3,                  /* check N-gram keys skipping by three */
  ESTCONDNOIDF = 1 << 4,                 /* without TF-IDF tuning */
  ESTCONDSIMPLE = 1 << 10,               /* with the simplified phrase */
  ESTCONDSCFB = 1 << 30                  /* feed back scores (for debug) */
};


/* Create a condition object.
   The return value is an object of search conditions. */
ESTCOND *est_cond_new(void);


/* Destroy a condition object.
   `cond' specifies a condition object. */
void est_cond_delete(ESTCOND *cond);


/* Set the search phrase to a condition object.
   `cond' specifies a condition object.
   `phrase' specifies a search phrase. */
void est_cond_set_phrase(ESTCOND *cond, const char *phrase);


/* Add an expression for an attribute to a condition object.
   `cond' specifies a condition object.
   `expr' specifies an expression for an attribute. */
void est_cond_add_attr(ESTCOND *cond, const char *expr);


/* Set the order of a condition object.
   `cond' specifies a condition object.
   `expr' specifies an expression for the order.  By default, the order is by score descending. */
void est_cond_set_order(ESTCOND *cond, const char *expr);


/* Set the maximum number of retrieval of a condition object.
   `cond' specifies a condition object.
   `max' specifies the maximum number of retrieval.  By default, the number of retrieval is not
   limited. */
void est_cond_set_max(ESTCOND *cond, int max);


/* Set options of retrieval of a condition object.
   `cond' specifies a condition object.
   `options' specifies options: `ESTCONDSURE' specifies that it checks every N-gram key,
   `ESTCONDUSU', which is the default, specifies that it checks N-gram keys with skipping one
   key, `ESTCONDFAST' skips two keys, `ESTCONDAGIT' skips three keys, `ESTCONDNOIDF' specifies
   not to perform TF-IDF tuning, `ESTCONDSIMPLE' specifies to use simplified phrase.  Each option
   can be specified at the same time by bitwise or.  If keys are skipped, though search speed is
   improved, the relevance ratio grows less. */
void est_cond_set_options(ESTCOND *cond, int options);



/*************************************************************************************************
 * API for database
 *************************************************************************************************/


#define ESTIDXDMAX     16                /* max number of the inverted index */

typedef struct {                         /* type of structure for the inverted index */
  char *name;                            /* name of the database */
  int omode;                             /* open mode */
  VILLA *dbs[ESTIDXDMAX];                /* database handles */
  int dnum;                              /* number of division */
  VILLA *cdb;                            /* current database handle */
} ESTIDX;

typedef struct {                         /* type of structure for a database object */
  char *name;                            /* name of the database */
  DEPOT *metadb;                         /* handle of the meta database */
  ESTIDX *idxdb;                         /* handles of the inverted indexs */
  VILLA *fwmdb;                          /* handle of the database for forward matching */
  CURIA *attrdb;                         /* handle of the database for attrutes */
  CURIA *textdb;                         /* handle of the database for texts */
  VILLA *listdb;                         /* handle of the database for document list */
  int ecode;                             /* last happened error code */
  int fatal;                             /* whether to have a fatal error */
  int dseq;                              /* sequence for document IDs */
  int dnum;                              /* number of the documents */
  int amode;                             /* mode of text analyzer */
  CBMAP *idxcc;                          /* cache for the inverted index */
  size_t icsiz;                          /* power of the cache */
  size_t icmax;                          /* max size of the cache */
  CBMAP *outcc;                          /* cache for deleted documents */
  CBMAP *keycc;                          /* cache for keys for TF-IDF */
  int  kcmnum;                           /* max number of the key cache */
  CBMAP *attrcc;                         /* cache for attributes */
  int acmnum;                            /* max number of the attribute cache */
  CBMAP *textcc;                         /* cache for texts */
  int tcmnum;                            /* max number of the text cache */
  CBMAP *spacc;                          /* special cache for attributes */
  int scmnum;                            /* max number of the special cache */
  char *scname;                          /* name of the attribute for the special cache */
  void (*cbinfo)(const char *);          /* callback function to inform of events */
  CBMAP *(*cbvec)(void *, int, void *);  /* callback function to create a vector */
  void *vecdata;                         /* arbitrary object for the vectorizer */
  CBMAP *metacc;                         /* cache for meta data */
} ESTDB;

enum {                                   /* enumeration for error codes */
  ESTENOERR,                             /* no error */
  ESTEINVAL,                             /* invalid argument */
  ESTEACCES,                             /* access forbidden */
  ESTELOCK,                              /* lock failure */
  ESTEDB,                                /* database problem */
  ESTEIO,                                /* I/O problem */
  ESTENOITEM,                            /* no item */
  ESTEMISC = 9999                        /* miscellaneous */
};

enum {                                   /* enumeration for open modes */
  ESTDBREADER = 1 << 0,                  /* open as a reader */
  ESTDBWRITER = 1 << 1,                  /* open as a writer */
  ESTDBCREAT = 1 << 2,                   /* a writer creating */
  ESTDBTRUNC = 1 << 3,                   /* a writer truncating */
  ESTDBNOLCK = 1 << 4,                   /* open without locking */
  ESTDBLCKNB = 1 << 5,                   /* lock without blocking */
  ESTDBPERFNG = 1 << 6                   /* use perfect N-gram analyzer */
};

enum {                                   /* enumeration for options of document registration */
  ESTPDCLEAN = 1 << 0                    /* clean up dispensable regions */
};

enum {                                   /* enumeration for options of document deletion */
  ESTODCLEAN = 1 << 0                    /* clean up dispensable regions */
};

enum {                                   /* enumeration for options of optimization */
  ESTOPTNOPURGE = 1 << 0,                /* omit purging dispensable region of deleted */
  ESTOPTNODBOPT = 1 << 1                 /* omit optimization of the database files */
};

enum {                                   /* enumeration for options of document retrieval */
  ESTGDNOATTR = 1 << 0,                  /* no attributes */
  ESTGDNOTEXT = 1 << 1                   /* no text */
};


/* Get the string of an error code.
   `ecode' specifies an error code.
   The return value is the string of the error code. */
const char *est_err_msg(int ecode);


/* Open a database.
   `name' specifies the name of a database directory.
   `mode' specifies open modes: `ESTDBWRITER' as a writer, `ESTDBREADER' as a reader.  If the
   mode is `ESTDBWRITER', the following may be added by bitwise or: `ESTDBCREAT', which means it
   creates a new database if not exist, `ESTDBTRUNC', which means it creates a new database
   regardless if one exists.  Both of `ESTDBREADER' and  `ESTDBWRITER' can be added to by
   bitwise or: `ESTDBNOLCK', which means it opens a database file without file locking, or
   `ESTDBLCKNB', which means locking is performed without blocking.  If `ESTDBNOLCK' is used,
   the application is responsible for exclusion control.  `ESTDBCREAT' can be added to by bitwise
   or: `ESTDBPERFNG', which means N-gram analysis is performed against European text also.
   `ecp' specifies the pointer to a variable to which the error code is assigned.
   The return value is a database object of the database or `NULL' if failure. */
ESTDB *est_db_open(const char *name, int omode, int *ecp);


/* Close a database.
   `db' specifies a database object.
   `ecp' specifies the pointer to a variable to which the error code is assigned.
   The return value is true if success, else it is false. */
int est_db_close(ESTDB *db, int *ecp);


/* Get the last happened error code of a database.
   `db' specifies a database object.
   The return value is the last happened error code of the database. */
int est_db_error(ESTDB *db);


/* Check whether a database has a fatal error.
   `db' specifies a database object.
   The return value is true if the database has fatal erroor, else it is false. */
int est_db_fatal(ESTDB *db);


/* Flush index words in the cache of a database.
   `db' specifies a database object connected as a writer.
   `max' specifies the maximum number of words to be flushed.  If it not more than zero, all
   words are flushed.
   The return value is true if success, else it is false. */
int est_db_flush(ESTDB *db, int max);


/* Synchronize updating contents of a database.
   `db' specifies a database object connected as a writer.
   The return value is true if success, else it is false. */
int est_db_sync(ESTDB *db);


/* Optimize a database.
   `db' specifies a database object connected as a writer.
   `options' specifies options: `ESTOPTNOPURGE' to omit purging dispensable region of deleted
   documents, `ESTOPTNODBOPT' to omit optimization of the database files.  The two can be
   specified at the same time by bitwise or.
   The return value is true if success, else it is false. */
int est_db_optimize(ESTDB *db, int options);


/* Add a document to a database.
   `db' specifies a database object connected as a writer.
   `doc' specifies a document object.  The document object should have the URI attribute.
   `options' specifies options: `ESTPDCLEAN' to clean up dispensable regions of the overwritten
   document.
   The return value is true if success, else it is false.
   If the URI attribute is same with an existing document in the database, the existing one is
   deleted. */
int est_db_put_doc(ESTDB *db, ESTDOC *doc, int options);


/* Remove a document from a database.
   `db' specifies a database object connected as a writer.
   `id' specifies the ID number of a registered document.
   `options' specifies options: `ESTODCLEAN' to clean up dispensable regions of the deleted
   document.
   The return value is true if success, else it is false. */
int est_db_out_doc(ESTDB *db, int id, int options);


/* Retrieve a document in a database.
   `db' specifies a database object.
   `id' specifies the ID number of a registered document.
   `options' specifies options: `ESTGDNOATTR' to ignore attributes, `ESTGDNOTEXT' to ignore
   the body text.  The two can be specified at the same time by bitwise or.
   The return value is a document object.  On error, `NULL' is returned. */
ESTDOC *est_db_get_doc(ESTDB *db, int id, int options);


/* Retrieve the value of an attribute of a document in a database.
   `db' specifies a database object.
   `id' specifies the ID number of a registered document.
   `name' specifies the name of an attribute.
   The return value is the value of the attribute or `NULL' if it does not exist.  Because the
   region of the return value is allocated with the `malloc' call, it should be released with
   the `free' call if it is no longer in use. */
char *est_db_get_doc_attr(ESTDB *db, int id, const char *name);


/* Get the ID of a document specified by URI.
   `db' specifies a database object.
   `uri' specifies the URI of a registered document.
   The return value is the ID of the document.  On error, -1 is returned. */
int est_db_uri_to_id(ESTDB *db, const char *uri);


/* Extract keywords of a document object.
   `db' specifies a database object for TF-IDF tuning.  If it is `NULL', it is not used.
   `doc' specifies a document object.
   `max' specifies the maximum number of keywords to be extracted.
   The return value is a new map object of keywords and their scores in decimal string.  Because
   the object of the return value is opened with the function `cbmapopen', it should be closed
   with the function `cbmapclose' if it is no longer in use. */
CBMAP *est_db_etch_doc(ESTDB *db, ESTDOC *doc, int max);


/* Initialize the iterator of a database.
   `db' specifies a database object.
   The return value is true if success, else it is false. */
int est_db_iter_init(ESTDB *db);


/* Get the next ID of the iterator of a database.
   `db' specifies a database object.
   The return value is the next ID.  If there is no more document, 0 is returned.  On error,
   -1 is returned. */
int est_db_iter_next(ESTDB *db);


/* Get the name of a database.
   `db' specifies a database object.
   The return value is the name of the database.  The life duration of the returned string is
   synchronous with the one of the database object. */
const char *est_db_name(ESTDB *db);


/* Get the number of documents in a database.
   `db' specifies a database object.
   The return value is the number of documents in the database. */
int est_db_doc_num(ESTDB *db);


/* Get the number of unique words in a database.
   `db' specifies a database object.
   The return value is the number of unique words in the database. */
int est_db_word_num(ESTDB *db);


/* Get the size of a database.
   `db' specifies a database object.
   The return value is the size of the database. */
double est_db_size(ESTDB *db);


/* Search documents corresponding a condition for a database.
   `db' specifies a database object.
   `cond' specifies a condition object.
   `nump' specifies the pointer to a variable to which the number of elements in the result is
   assigned.
   `hints' specifies a map object into which the number of documents corresponding to each word
   is stored.  If a word is in a negative condition, the number is negative.  The element whose
   key is an empty string specifies the number of whole result.  If it is `NULL', it is not used.
   The return value is an array whose elements are ID numbers of corresponding documents.
   This function does never fail.  Even if no document corresponds or an error occurs, an empty
   array is returned.  Because the region of the return value is allocated with the `malloc'
   call, it should be released with the `free' call if it is no longer in use. */
int *est_db_search(ESTDB *db, ESTCOND *cond, int *nump, CBMAP *hints);


/* Set the maximum size of the cache memory of a database.
   `db' specifies a database object.
   `size' specifies the maximum size of the index cache.  By default, it is 64MB.  If it is not
   more than 0, the current size is not changed.
   `anum' specifies the maximum number of cached records for document attributes.  By default, it
   is 8192.  If it is not more than 0, the current size is not changed.
   `tnum' specifies the maximum number of cached records for document texts.  By default, it is
   1024.  If it is not more than 0, the current size is not changed. */
void est_db_set_cache_size(ESTDB *db, size_t size, int anum, int tnum);


/* Set the special cache for narrowing and sorting with document attributes.
   `db' specifies a database object.
   `name' specifies the name of a document.
   `num' specifies the maximum number of cached records. */
void est_db_set_special_cache(ESTDB *db, const char *name, int num);



/*************************************************************************************************
 * features for experts
 *************************************************************************************************/


#define _EST_VERSION   "0.5.3"
#define _EST_LIBVER    200
#define _EST_PROTVER   "0.9"

enum {                                   /* enumeration for languages */
  ESTLANGEN,                             /* English */
  ESTLANGJA,                             /* Japanese */
  ESTLANGZH,                             /* Chinese */
  ESTLANGKO,                             /* Korean */
  ESTLANGMISC                            /* miscellaneous */
};


/* Break a sentence of text and extract words.
   `text' specifies a sentence of text.
   `list' specifies a list object to which extract words are added.
   `norm' specifies whether to normalize the text.
   `tail' specifies whether to pick up oddness N-gram at the end. */
void est_break_text(const char *text, CBLIST *list, int norm, int tail);


/* Break a sentence of text and extract words using perfect N-gram analyzer.
   `text' specifies a sentence of text.
   `list' specifies a list object to which extract words are added.
   `norm' specifies whether to normalize the text.
   `tail' specifies whether to pick up oddness N-gram at the end. */
void est_break_text_perfng(const char *text, CBLIST *list, int norm, int tail);


/* Convert the character encoding of a string.
   `ptr' specifies the pointer to a region.
   `size' specifies the size of the region.  If it is negative, the size is assigned with
   `strlen(ptr)'.
   `icode' specifies the name of encoding of the input string.
   `ocode' specifies the name of encoding of the output string.
   `sp' specifies the pointer to a variable to which the size of the region of the return
   value is assigned.  If it is `NULL', it is not used.
   `mp' specifies the pointer to a variable to which the number of missing characters by failure
   of conversion is assigned.  If it is `NULL', it is not used.
   If successful, the return value is the pointer to the result object, else, it is `NULL'.
   Because an additional zero code is appended at the end of the region of the return value,
   the return value can be treated as a character string.  Because the region of the return
   value is allocated with the `malloc' call, it should be released with the `free' call if it
   is no longer in use. */
char *est_iconv(const char *ptr, int size, const char *icode, const char *ocode,
                int *sp, int *mp);


/* Detect the encoding of a string automatically.
   `ptr' specifies the pointer to a region.
   `size' specifies the size of the region.  If it is negative, the size is assigned with
   `strlen(ptr)'.
   `plang' specifies a preferred language.  As for now, `ESTLANGEN', `ESTLANGJA', `ESTLANGZH',
   and `ESTLANGKO' are supported.
   The return value is the string of the encoding name of the string. */
const char *est_enc_name(const char *ptr, int size, int plang);


/* Convert a UTF-8 string into UTF-16BE.
   `ptr' specifies the pointer to a region.
   `size' specifies the size of the region.
   `sp' specifies the pointer to a variable to which the size of the region of the return
   value is assigned.
   The return value is the pointer to the result object.  Because an additional zero code is
   appended at the end of the region of the return value, the return value can be treated as a
   character string.  Because the region of the return value is allocated with the `malloc' call,
   it should be released with the `free' call if it is no longer in use. */
char *est_uconv_in(const char *ptr, int size, int *sp);


/* Convert a UTF-16BE string into UTF-8.
   `ptr' specifies the pointer to a region.
   `size' specifies the size of the region.
   `sp' specifies the pointer to a variable to which the size of the region of the return
   value is assigned.  If it is `NULL', it is not used.
   The return value is the pointer to the result object.  Because an additional zero code is
   appended at the end of the region of the return value, the return value can be treated as a
   character string.  Because the region of the return value is allocated with the `malloc' call,
   it should be released with the `free' call if it is no longer in use. */
char *est_uconv_out(const char *ptr, int size, int *sp);


/* Compress a serial object with ZLIB.
   `ptr' specifies the pointer to a region.
   `size' specifies the size of the region.  If it is negative, the size is assigned with
   `strlen(ptr)'.
   `sp' specifies the pointer to a variable to which the size of the region of the return
   value is assigned.
   If successful, the return value is the pointer to the result object, else, it is `NULL'.
   Because the region of the return value is allocated with the `malloc' call, it should be
   released with the `free' call if it is no longer in use. */
char *est_deflate(const char *ptr, int size, int *sp);


/* Decompress a serial object compressed with ZLIB.
   `ptr' specifies the pointer to a region.
   `size' specifies the size of the region.
   `sp' specifies the pointer to a variable to which the size of the region of the return
   value is assigned.  If it is `NULL', it is not used.
   If successful, the return value is the pointer to the result object, else, it is `NULL'.
   Because an additional zero code is appended at the end of the region of the return value,
   the return value can be treated as a character string.  Because the region of the return
   value is allocated with the `malloc' call, it should be released with the `free' call if it
   is no longer in use. */
char *est_inflate(const char *ptr, int size, int *sp);


/* Get the border string for draft data of documents.
   The return value is the border string for draft data of documents. */
const char *est_border_str(void);


/* Get the real random number.
   The return value is the real random number between 0.0 and 1.0. */
double est_random(void);


/* Get the random number in normal distribution.
   The return value is the random number in normal distribution between 0.0 and 1.0. */
double est_random_nd(void);


/* Get an MD5 hash string of a key string.
   `key' specifies a string to be encrypted.
   The return value is an MD5 hash string of the key string.  Because the region of the return
   value is allocated with the `malloc' call, it should be released with the `free' call if it
   is no longer in use. */
char *est_make_crypt(const char *key);


/* Check whether a key matches an MD5 hash string.
   `key' specifies a string to be checked.
   `hash' specifies an MD5 hash string.
   The return value is true if the key matches the hash string, else it is false. */
int est_match_crypt(const char *key, const char *hash);


/* Get the hidden texts of a document object.
   `doc' specifies a document object.
   The return value is concatenated sentences of the hidden text of the document object.  The
   life duration of the returned string is synchronous with the one of the document object. */
const char *est_doc_hidden_texts(ESTDOC *doc);


/* Get the phrase of a condition object.
   `cond' specifies a condition object.
   The return value is the phrase of the condition object or `NULL' if it is not specified.  The
   life duration of the returned string is synchronous with the one of the condition object. */
const char *est_cond_phrase(ESTCOND *cond);


/* Get a list object of attribute expressions of a condition object.
   `cond' specifies a condition object.
   The return value is a list object of attribute expressions of the condition object or `NULL' if
   it is not specified.  The life duration of the returned object is synchronous with the one of
   the condition object. */
const CBLIST *est_cond_attrs(ESTCOND *cond);


/* Get the order expression of a condition object.
   `cond' specifies a condition object.
   The return value is the order expression of the condition object or `NULL' if it is not
   specified.  The life duration of the returned string is synchronous with the one of the
   condition object. */
const char *est_cond_order(ESTCOND *cond);


/* Get the maximum number of retrieval of a condition object.
   `cond' specifies a condition object.
   The return value is the maximum number of retrieval of the condition object or -1 if it is not
   specified. */
int est_cond_max(ESTCOND *cond);


/* Get the options of a condition object.
   `cond' specifies a condition object.
   The return value is the options of the condition object. */
int est_cond_options(ESTCOND *cond);


/* Get the score of a document corresponding to a condition object.
   `cond' specifies a condition object.
   `index' specifies the index of an element of the result array of `est_db_search'.
   The return value is the score of the element or -1 if the index is out of bounds. */
int est_cond_score(ESTCOND *cond, int index);


/* Set the error code of a database.
   `db' specifies a database object.
   `ecode' specifies a error code to set. */
void est_db_set_ecode(ESTDB *db, int ecode);


/* Edit attributes of a document object in a database.
   `db' specifies a database object connected as a writer.
   `doc' specifies a document object.
   The return value is true if success, else it is false. */
int est_db_edit_doc(ESTDB *db, ESTDOC *doc);


/* Add a piece of meta data to a database.
   `db' specifies a database object connected as a writer.
   `name' specifies the name of a piece of meta data.
   `value' specifies the value of the meta data.  If it is `NULL', the meta data is removed. */
void est_db_add_meta(ESTDB *db, const char *name, const char *value);


/* Get a list of names of meta data of a database.
   `db' specifies a database object.
   The return value is a new list object of meta data names of the document object.  Because the
   object of the return value is opened with the function `cblistopen', it should be closed with
   the function `cblistclose' if it is no longer in use. */
CBLIST *est_db_meta_names(ESTDB *db);


/* Get the value of a piece of meta data of a database.
   `db' specifies a database object.
   `name' specifies the name of a piece of meta data.
   The return value is the value of the meta data or `NULL' if it does not exist.  Because the
   region of the return value is allocated with the `malloc' call, it should be released with
   the `free' call if it is no longer in use. */
char *est_db_meta(ESTDB *db, const char *name);


/* Get the number of records in the cache memory of a database.
   `db' specifies a database object.
   The return value is the cache memory of a database. */
int est_db_cache_num(ESTDB *db);


/* Set the callback function to inform of database events.
   `db' specifies a database object.
   `func' specifies the pointer to a function.  The argument of the callback specifies a message
   of each event. */
void est_db_set_informer(ESTDB *db, void (*func)(const char *));


/* Set the callback function to create a vector of keywords of a document.
   `db' specifies a database object.
   `func' specifies the pointer to a function.  The arguments of the callback specify the
   database object, the ID of a document, and an arbitrary pointer.  The return value is the
   callback is a new map object conforming to the return value of `est_db_etch_doc'.
   `data' specifies the pointer to an object given as the third argument of the callback. */
void est_db_set_vectorizer(ESTDB *db, CBMAP *(*func)(void *, int, void *), void *data);


/* Fill the cache for keys for TF-IDF.
   `db' specifies a database object. */
void est_db_fill_key_cache(ESTDB *db);


/* Make a directory.
   `path' specifies the path of a new directory.
   The return value is true if success, else it is false. */
int est_mkdir(const char *path);


/* Remove a directory and its contents recursively.
   `path' specifies the path of a directory.
   The return value is true if success, else it is false. */
int est_rmdir_rec(const char *path);


/* Get the canonicalized absolute pathname of a file.
   `path' specifies the path of a new directory.
   The return value is the canonicalized absolute pathname of a file.  Because the region of the
   return value is allocated with the `malloc' call, it should be released with the `free' call
   if it is no longer in use. */
char *est_realpath(const char *path);


/* Get the time of day in milliseconds.
   The return value is the time of day in milliseconds. */
double est_gettimeofday(void);


/* Suspend execution for microsecond intervals.
   `usec' specifies microseconds to sleep for. */
void est_usleep(unsigned long usec);


/* Send a signal to a process.
   `pid' specifies the PID of a target process.
   `sig' specifies a signal code.
   The return value is true if success, else it is false. */
int est_kill(int pid, int sig);


/* Get the media type of an extention.
   `ext' specifies the extension of a file path.
   The return value is the media time of the extension. */
const char *est_ext_type(const char *ext);



#if defined(__cplusplus)                 /* export for C++ */
}
#endif

#endif                                   /* duplication check */


/* END OF FILE */
