cabinet:我对Tokyo Cabinet的理解



Tokyo Cabinet 是个DBM实现这里数据库由系列key-value对记录构成key和value都可以是任意长度字节序列,既可以是 2进制也可以是这里没有数据类型和数据表概念

当做为Hash表数据库使用时每个key必须是区别,因此无法存储两个key相同提供了以下访问思路方法:提供key,value参数来存储按 key删除记录按key来读取记录另外遍历key也被支持虽然顺序是任意不能被保证这些思路方法跟Unix标准DBM,例如GDBM,NDBM 等等是相同但是比它们性能要好得多(因此可以替代它们)

当按B+树来存储时拥用相同key记录也能被存储像hash表读取存储删除也都有提供记录按照用户提供比较来存储可以采用顺序或倒序游标来读取每条记录依照这个原理向前串匹配搜索和整数区间搜索也实现了另外B+树事务也是可用

As for database of fixed-length .gif' />, records are stored with unique natural numbers. It is impossible to store two or more records with a key overlaps. Moreover, the length of each record is limited by the specied length. Provided operations are the same as _disibledevent=>
On-memory database supports "bnum", "capnum", and "capsiz". Hash database supports "mode", "bnum", "apow", "fpow", "opts", "rcnum", and "xmsiz". B+ tree database supports "mode", "lmemb", "nmemb", "bnum", "apow", "fpow", "opts", "lcnum", "ncnum", and "xmsiz". Fixed-length database supports "mode", "width", and "limsiz". "capnum" species the capacity number of records. "capsiz" species the capacity size of using memory. Records spilled the capacity are removed by the storing order. "mode" can contain "w" of writer, "r" of reader, "c" of creating, "t" of truncating, "e" of no locking, and "f" of non-blocking lock. The default mode is relevant to "wc". "opts" can contains "l" of large option, "d" of Deflate option, "b" of BZIP2 option, and "t" of TCBS option. For example, "casket.tch#bnum=1000000#opts=ld" means that the name of the database file is "casket.tch", and the bucket number is 1000000, and the options are large and Deflate.



---------------------
# <tcutil.h>
# <tcadb.h>
# <stdlib.h>
# <stdbool.h>
# <std.h>

( argc, char **argv){

TCADB *adb;
char *key, *value;



/* create the object */
adb = tcadb;

/* open the database */
(!tcadbopen(adb, "casket.tch")){
fprf(stderr, "open error\n");
}

/* store records */
(!tcadbput2(adb, "foo", "hop") ||
!tcadbput2(adb, "bar", "step") ||
!tcadbput2(adb, "baz", "jump")){
fprf(stderr, "put error\n");
}

/* retrieve records */
value = tcadbget2(adb, "foo");
(value){
prf("%s\n", value);
free(value);
} {
fprf(stderr, "get error\n");
}

/* traverse records */
tcadbiterinit(adb);
while((key = tcadbiternext2(adb)) != NULL){
value = tcadbget2(adb, key);
(value){
prf("%s:%s\n", key, value);
free(value);
}
free(key);
}

/* close the database */
(!tcadbclose(adb)){
fprf(stderr, "close error\n");
}

/* delete the object */
tcadbdel(adb);

0;
}

___________________________________________

pythonanydbm接口

anydbm.open(filename[, flag[, mode]])

Open the database file filename and a corresponding object.

If the database file already exists, the whichdb module is used to determine its type and the appropriate module is used; it does not exist, the first module listed above that can be imported is used.

The optional flag argument can be 'r' to open an existing database for reading _disibledevent=>
# Record some values
db['www.python.org'] = 'Python Website'
db['www.cnn.com'] = 'Cable News Network'

# Loop through contents. Other dictionary methods
# such as .keys, .values also work.
for k, v in db.iteritems:
pr k, '\t', v

# Storing a non- key or value will raise an exception (most
# likely a TypeError).
db['www.yahoo.com'] = 4

# Close when done.
db.close

___________________________________________

Python gdbm封装源代码

/* DBM module using dictionary erface */
/* Author: Anthony Baxter, after dbmmodule.c */
/* Doc s: Mitch Chapman */


# "Python.h"

# <sys/types.h>
# <sys/stat.h>
# <fcntl.h>
# "gdbm.h"

# d(WIN32) && !d(__CYGWIN__)
# "gdbmerrno.h"
extern const char * gdbm_strerror(gdbm_error);
#end

PyDoc_STRVAR(gdbmmodule__doc__,
"This module provides an erface to the GNU DBM (GDBM) library.\n\
\n\
This module is quite similar to the dbm module, but uses GDBM instead to\n\
provide some additional functionality. Please note that the file formats\n\
created by GDBM and dbm are incompatible. \n\
\n\
GDBM objects behave like mappings (dictionaries), except that keys and\n\
values are always s. Pring a GDBM object doesn't pr the\n\
keys and values, and the items and values methods are not\n\
supported.");

typedef struct {
PyObject_HEAD
di_size; /* -1 means recompute */
GDBM_FILE di_dbm;
} dbmobject;

PyTypeObject Dbmtype;

# is_dbmobject(v) ((v)->ob_type &Dbmtype)
# check_dbmobject_open(v) ((v)->di_dbm NULL) \
{ PyErr_SetString(DbmError, "GDBM object has already been closed"); \
NULL; }



PyObject *DbmError;

PyDoc_STRVAR(gdbm_object__doc__,
"This object represents a GDBM database.\n\
GDBM objects behave like mappings (dictionaries), except that keys and\n\
values are always s. Pring a GDBM object doesn't pr the\n\
keys and values, and the items and values methods are not\n\
supported.\n\
\n\
GDBM objects also support additional operations such as firstkey,\n\
nextkey, reorganize, and sync.");



PyObject *
dbmobject(char *file, flags, mode)
{
dbmobject *dp;

dp = PyObject_New(dbmobject, &Dbmtype);
(dp NULL)
NULL;
dp->di_size = -1;
errno = 0;
((dp->di_dbm = gdbm_open(file, 0, flags, mode, NULL)) 0) {
(errno != 0)
PyErr_SetFromErrno(DbmError);

PyErr_SetString(DbmError, gdbm_strerror(gdbm_errno));
Py_DECREF(dp);
NULL;
}
(PyObject *)dp;
}

/* Methods */

void
dbm_dealloc(register dbmobject *dp)
{
(dp->di_dbm)
gdbm_close(dp->di_dbm);
PyObject_Del(dp);
}

Py_ssize_t
dbm_length(dbmobject *dp)
{
(dp->di_dbm NULL) {
PyErr_SetString(DbmError, "GDBM object has already been closed");
-1;
}
(dp->di_size < 0) {
datum key,okey;
size;
okey.dsize=0;
okey.dptr=NULL;

size = 0;
for (key=gdbm_firstkey(dp->di_dbm); key.dptr;
key = gdbm_nextkey(dp->di_dbm,okey)) {
size;
(okey.dsize) free(okey.dptr);
okey=key;
}
dp->di_size = size;
}
dp->di_size;
}

PyObject *
dbm_subscript(dbmobject *dp, register PyObject *key)
{
PyObject *v;
datum drec, krec;

(!PyArg_Parse(key, "s#", &krec.dptr, &krec.dsize) )
NULL;

(dp->di_dbm NULL) {
PyErr_SetString(DbmError,
"GDBM object has already been closed");
NULL;
}
drec = gdbm_fetch(dp->di_dbm, krec);
(drec.dptr 0) {
PyErr_SetString(PyExc_KeyError,
PyString_AS_STRING((PyStringObject *)key));
NULL;
}
v = PyString_FromStringAndSize(drec.dptr, drec.dsize);
free(drec.dptr);
v;
}


dbm_ass_sub(dbmobject *dp, PyObject *v, PyObject *w)
{
datum krec, drec;

(!PyArg_Parse(v, "s#", &krec.dptr, &krec.dsize) ) {
PyErr_SetString(PyExc_TypeError,
"gdbm mappings have indices _disibledevent=>PyErr_SetString(DbmError,
"GDBM object has already been closed");
-1;
}
dp->di_size = -1;
(w NULL) {
(gdbm_delete(dp->di_dbm, krec) < 0) {
PyErr_SetString(PyExc_KeyError,
PyString_AS_STRING((PyStringObject *)v));
-1;
}
}
{
(!PyArg_Parse(w, "s#", &drec.dptr, &drec.dsize)) {
PyErr_SetString(PyExc_TypeError,
"gdbm mappings have elements _disibledevent=> (gdbm_store(dp->di_dbm, krec, drec, GDBM_REPLACE) < 0) {
(errno != 0)
PyErr_SetFromErrno(DbmError);

PyErr_SetString(DbmError,
gdbm_strerror(gdbm_errno));
-1;
}
}
0;
}

PyMappingMethods dbm_as_mapping = {
(lenfunc)dbm_length, /*mp_length*/
(binaryfunc)dbm_subscript, /*mp_subscript*/
(objobjargproc)dbm_ass_sub, /*mp_ass_subscript*/
};

PyDoc_STRVAR(dbm_close__doc__,
"close -> None\n\
Closes the database.");

PyObject *
dbm_close(register dbmobject *dp, PyObject *unused)
{
(dp->di_dbm)
gdbm_close(dp->di_dbm);
dp->di_dbm = NULL;
Py_INCREF(Py_None);
Py_None;
}

PyDoc_STRVAR(dbm_keys__doc__,
"keys -> list_of_keys\n\
Get a list of all keys in the database.");

PyObject *
dbm_keys(register dbmobject *dp, PyObject *unused)
{
register PyObject *v, *item;
datum key, nextkey;
err;

(dp NULL || !is_dbmobject(dp)) {
PyErr_BadInternalCall;
NULL;
}
check_dbmobject_open(dp);

v = PyList_New(0);
(v NULL)
NULL;

key = gdbm_firstkey(dp->di_dbm);
while (key.dptr) {
item = PyString_FromStringAndSize(key.dptr, key.dsize);
(item NULL) {
free(key.dptr);
Py_DECREF(v);
NULL;
}
err = PyList_Append(v, item);
Py_DECREF(item);
(err != 0) {
free(key.dptr);
Py_DECREF(v);
NULL;
}
nextkey = gdbm_nextkey(dp->di_dbm, key);
free(key.dptr);
key = nextkey;
}
v;
}

PyDoc_STRVAR(dbm_has_key__doc__,
"has_key(key) -> boolean\n\
Find out whether or not the database contains a given key.");

PyObject *
dbm_has_key(register dbmobject *dp, PyObject *args)
{
datum key;

(!PyArg_ParseTuple(args, "s#:has_key", &key.dptr, &key.dsize))


NULL;
check_dbmobject_open(dp);
PyInt_FromLong((long) gdbm_exists(dp->di_dbm, key));
}

PyDoc_STRVAR(dbm_firstkey__doc__,
"firstkey -> key\n\
It's possible to loop over every key in the database using this method\n\
and the nextkey method. The traversal is ordered by GDBM's ernal\n\
hash values, and won't be sorted by the key values. This method\n\
s the starting key.");

PyObject *
dbm_firstkey(register dbmobject *dp, PyObject *unused)
{
register PyObject *v;
datum key;

check_dbmobject_open(dp);
key = gdbm_firstkey(dp->di_dbm);
(key.dptr) {
v = PyString_FromStringAndSize(key.dptr, key.dsize);
free(key.dptr);
v;
}
{
Py_INCREF(Py_None);
Py_None;
}
}

PyDoc_STRVAR(dbm_nextkey__doc__,
"nextkey(key) -> next_key\n\
Returns the key that follows key in the traversal.\n\
The following code prs every key in the database db, without having\n\
to create a list in memory that contains them all:\n\
\n\
k = db.firstkey\n\
while k != None:\n\
pr k\n\
k = db.nextkey(k)");

PyObject *
dbm_nextkey(register dbmobject *dp, PyObject *args)
{
register PyObject *v;
datum key, nextkey;

(!PyArg_ParseTuple(args, "s#:nextkey", &key.dptr, &key.dsize))
NULL;
check_dbmobject_open(dp);
nextkey = gdbm_nextkey(dp->di_dbm, key);
(nextkey.dptr) {
v = PyString_FromStringAndSize(nextkey.dptr, nextkey.dsize);
free(nextkey.dptr);
v;
}
{
Py_INCREF(Py_None);
Py_None;
}
}

PyDoc_STRVAR(dbm_reorganize__doc__,
"reorganize -> None\n\
If you have carried out a lot of deletions and would like to shrink\n\
the space used by the GDBM file, this routine will reorganize the\n\
database. GDBM will not en the length of a database file except\n\
by using this reorganization; otherwise, deleted file space will be\n\
kept and reused as (key,value) pairs are added.");

PyObject *
dbm_reorganize(register dbmobject *dp, PyObject *unused)
{
check_dbmobject_open(dp);
errno = 0;
(gdbm_reorganize(dp->di_dbm) < 0) {
(errno != 0)
PyErr_SetFromErrno(DbmError);

PyErr_SetString(DbmError, gdbm_strerror(gdbm_errno));
NULL;
}
Py_INCREF(Py_None);
Py_None;
}

PyDoc_STRVAR(dbm_sync__doc__,
"sync -> None\n\
When the database has been opened in fast mode, this method forces\n\
any unwritten data to be written to the disk.");

PyObject *
dbm_sync(register dbmobject *dp, PyObject *unused)
{
check_dbmobject_open(dp);
gdbm_sync(dp->di_dbm);
Py_INCREF(Py_None);
Py_None;
}

PyMethodDef dbm_methods = {
{"close", (PyCFunction)dbm_close, METH_NOARGS, dbm_close__doc__},
{"keys", (PyCFunction)dbm_keys, METH_NOARGS, dbm_keys__doc__},
{"has_key", (PyCFunction)dbm_has_key, METH_VARARGS, dbm_has_key__doc__},
{"firstkey", (PyCFunction)dbm_firstkey,METH_NOARGS, dbm_firstkey__doc__},
{"nextkey", (PyCFunction)dbm_nextkey, METH_VARARGS, dbm_nextkey__doc__},
{"reorganize",(PyCFunction)dbm_reorganize,METH_NOARGS, dbm_reorganize__doc__},
{"sync", (PyCFunction)dbm_sync, METH_NOARGS, dbm_sync__doc__},
{NULL, NULL} /* sentinel */
};

PyObject *
dbm_getattr(dbmobject *dp, char *name)
{
Py_FindMethod(dbm_methods, (PyObject *)dp, name);
}


PyTypeObject Dbmtype = {
PyObject_HEAD_INIT(0)
0,
"gdbm.gdbm",
(dbmobject),
0,
(destructor)dbm_dealloc, /*tp_dealloc*/
0, /*tp_pr*/
(getattrfunc)dbm_getattr, /*tp_getattr*/
0, /*tp_attr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
&dbm_as_mapping, /*tp_as_mapping*/
0, /*tp_hash*/
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_attro*/
0, /*tp_as_buffer*/
0, /*tp_xxx4*/
gdbm_object__doc__, /*tp_doc*/
};

/* ----------------------------------------------------------------- */

PyDoc_STRVAR(dbmopen__doc__,
"open(filename, [flags, [mode]]) -> dbm_object\n\
Open a dbm database and a dbm object. The filename argument is\n\
the name of the database file.\n\
\n\
The optional flags argument can be 'r' (to open an existing database\n\
for reading _disibledevent=>


reading and writing), 'c' (which creates the database it doesn't\n\
exist), or 'n' (which always creates a empty database).\n\
\n\
Some versions of gdbm support additional flags which must be\n\
appended to _disibledevent=> lags;
mode = 0666;

(!PyArg_ParseTuple(args, "s|si:open", &name, &flags, &mode))
NULL;
switch (flags[0]) {
'r':
lags = GDBM_READER;
;
'w':
lags = GDBM_WRITER;
;
'c':
lags = GDBM_WRCREAT;
;
'n':
lags = GDBM_NEWDB;
;
default:
PyErr_SetString(DbmError,
"First flag must be _disibledevent=>char buf[40];
switch (*flags) {
#def GDBM_FAST
'f':
lags |= GDBM_FAST;
;
#end
#def GDBM_SYNC
's':
lags |= GDBM_SYNC;
;
#end
#def GDBM_NOLOCK
'u':
lags |= GDBM_NOLOCK;
;
#end
default:
PyOS_snprf(buf, (buf), "Flag '%c' is not supported.",
*flags);
PyErr_SetString(DbmError, buf);
NULL;
}
}

dbmobject(name, lags, mode);
}

char dbmmodule_open_flags = "rwcn"
#def GDBM_FAST
"f"
#end
#def GDBM_SYNC
"s"
#end
#def GDBM_NOLOCK
"u"
#end
;

PyMethodDef dbmmodule_methods = {
{ "open", (PyCFunction)dbmopen, METH_VARARGS, dbmopen__doc__},
{ 0, 0 },
};

PyMODINIT_FUNC
initgdbm(void) {
PyObject *m, *d, *s;

Dbmtype.ob_type = &PyType_Type;
m = Py_InitModule4("gdbm", dbmmodule_methods,
gdbmmodule__doc__, (PyObject *)NULL,
PYTHON_API_VERSION);
(m NULL)
;
d = PyModule_GetDict(m);
DbmError = PyErr_NewException("gdbm.error", NULL, NULL);
(DbmError != NULL) {
PyDict_SetItemString(d, "error", DbmError);
s = PyString_FromString(dbmmodule_open_flags);
PyDict_SetItemString(d, "open_flags", s);
Py_DECREF(s);
}
}
Tags:  cabinet.dll cabinetmanager opencabinet cabinet

延伸阅读

最新评论

发表评论