123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557 |
- /*****************************************************************************
-
- Copyright (c) 2001, 2002 Zope Foundation and Contributors.
- All Rights Reserved.
-
- This software is subject to the provisions of the Zope Public License,
- Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
- THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
- WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
- FOR A PARTICULAR PURPOSE
-
- ****************************************************************************/
-
- /****************************************************************************
- Set operations
- ****************************************************************************/
-
- #define SETOPTEMPLATE_C "$Id$\n"
-
- #ifdef KEY_CHECK
- static int
- nextKeyAsSet(SetIteration *i)
- {
- if (i->position >= 0) {
- if (i->position) {
- DECREF_KEY(i->key);
- i->position = -1;
- }
- else
- i->position = 1;
- }
- return 0;
- }
- #endif
-
- /* initSetIteration
- *
- * Start the set iteration protocol. See the comments at struct SetIteration.
- *
- * Arguments
- * i The address of a SetIteration control struct.
- * s The address of the set, bucket, BTree, ..., to be iterated.
- * useValues Boolean; if true, and s has values (is a mapping), copy
- * them into i->value each time i->next() is called; else
- * ignore s's values even if s is a mapping.
- *
- * Return
- * 0 on success; -1 and an exception set if error.
- * i.usesValue is set to 1 (true) if s has values and useValues was
- * true; else usesValue is set to 0 (false).
- * i.set gets a new reference to s, or to some other object used to
- * iterate over s.
- * i.position is set to 0.
- * i.next is set to an appropriate iteration function.
- * i.key and i.value are left alone.
- *
- * Internal
- * i.position < 0 means iteration terminated.
- * i.position = 0 means iteration hasn't yet begun (next() hasn't
- * been called yet).
- * In all other cases, i.key, and possibly i.value, own references.
- * These must be cleaned up, either by next() routines, or by
- * finiSetIteration.
- * next() routines must ensure the above. They should return without
- * doing anything when i.position < 0.
- * It's the responsibility of {init, fini}setIteration to clean up
- * the reference in i.set, and to ensure that no stale references
- * live in i.key or i.value if iteration terminates abnormally.
- * A SetIteration struct has been cleaned up iff i.set is NULL.
- */
- static int
- initSetIteration(SetIteration *i, PyObject *s, int useValues)
- {
- i->set = NULL;
- i->position = -1; /* set to 0 only on normal return */
- i->usesValue = 0; /* assume it's a set or that values aren't iterated */
-
- if (PyObject_IsInstance(s, (PyObject *)&BucketType))
- {
- i->set = s;
- Py_INCREF(s);
-
- if (useValues)
- {
- i->usesValue = 1;
- i->next = nextBucket;
- }
- else
- i->next = nextSet;
- }
- else if (PyObject_IsInstance(s, (PyObject *)&SetType))
- {
- i->set = s;
- Py_INCREF(s);
- i->next = nextSet;
- }
- else if (PyObject_IsInstance(s, (PyObject *)&BTreeType))
- {
- i->set = BTree_rangeSearch(BTREE(s), NULL, NULL, 'i');
- UNLESS(i->set) return -1;
-
- if (useValues)
- {
- i->usesValue = 1;
- i->next = nextBTreeItems;
- }
- else
- i->next = nextTreeSetItems;
- }
- else if (PyObject_IsInstance(s, (PyObject *)&TreeSetType))
- {
- i->set = BTree_rangeSearch(BTREE(s), NULL, NULL, 'k');
- UNLESS(i->set) return -1;
- i->next = nextTreeSetItems;
- }
- #ifdef KEY_CHECK
- else if (KEY_CHECK(s))
- {
- int copied = 1;
- COPY_KEY_FROM_ARG(i->key, s, copied);
- UNLESS (copied) return -1;
-
- INCREF_KEY(i->key);
- i->set = s;
- Py_INCREF(s);
- i->next = nextKeyAsSet;
- }
- #endif
- else
- {
- PyErr_SetString(PyExc_TypeError, "invalid argument");
- return -1;
- }
-
- i->position = 0;
-
- return 0;
- }
-
- #ifndef MERGE_WEIGHT
- #define MERGE_WEIGHT(O, w) (O)
- #endif
-
- static int
- copyRemaining(Bucket *r, SetIteration *i, int merge,
-
- /* See comment # 42 */
- #ifdef MERGE
- VALUE_TYPE w)
- #else
- int w)
- #endif
- {
- while (i->position >= 0)
- {
- if(r->len >= r->size && Bucket_grow(r, -1, ! merge) < 0) return -1;
- COPY_KEY(r->keys[r->len], i->key);
- INCREF_KEY(r->keys[r->len]);
-
- if (merge)
- {
- COPY_VALUE(r->values[r->len], MERGE_WEIGHT(i->value, w));
- INCREF_VALUE(r->values[r->len]);
- }
- r->len++;
- if (i->next(i) < 0) return -1;
- }
-
- return 0;
- }
-
- /* This is the workhorse for all set merge operations: the weighted and
- * unweighted flavors of union and intersection, and set difference. The
- * algorithm is conceptually simple but the code is complicated due to all
- * the options.
- *
- * s1, s2
- * The input collections to be merged.
- *
- * usevalues1, usevalues2
- * Booleans. In the output, should values from s1 (or s2) be used? This
- * only makes sense when an operation intends to support mapping outputs;
- * these should both be false for operations that want pure set outputs.
- *
- * w1, w2
- * If usevalues1(2) are true, these are the weights to apply to the
- * input values.
- *
- * c1
- * Boolean. Should keys that appear in c1 but not c2 appear in the output?
- * c12
- * Boolean. Should keys that appear in both inputs appear in the output?
- * c2
- * Boolean. Should keys that appear in c2 but not c1 appear in the output?
- *
- * Returns NULL if error, else a Set or Bucket, depending on whether a set or
- * mapping was requested.
- */
- static PyObject *
- set_operation(PyObject *s1, PyObject *s2,
- int usevalues1, int usevalues2,
-
- /* Comment # 42
-
- The following ifdef works around a template/type problem
-
- Weights are passed as integers. In particular, the weight passed by
- difference is one. This works fine in the int value and float value
- cases but makes no sense in the object value case. In the object
- value case, we don't do merging, so we don't use the weights, so it
- doesn't matter what they are.
- */
- #ifdef MERGE
- VALUE_TYPE w1, VALUE_TYPE w2,
- #else
- int w1, int w2,
- #endif
- int c1, int c12, int c2)
-
-
- {
- Bucket *r=0;
- SetIteration i1 = {0,0,0}, i2 = {0,0,0};
- int cmp, merge;
-
- if (initSetIteration(&i1, s1, usevalues1) < 0) goto err;
- if (initSetIteration(&i2, s2, usevalues2) < 0) goto err;
- merge = i1.usesValue | i2.usesValue;
-
- if (merge)
- {
- #ifndef MERGE
- if (c12 && i1.usesValue && i2.usesValue) goto invalid_set_operation;
- #endif
- if (! i1.usesValue&& i2.usesValue)
- {
- SetIteration t;
- int i;
-
- /* See comment # 42 above */
- #ifdef MERGE
- VALUE_TYPE v;
- #else
- int v;
- #endif
-
- t=i1; i1=i2; i2=t;
- i=c1; c1=c2; c2=i;
- v=w1; w1=w2; w2=v;
- }
- #ifdef MERGE_DEFAULT
- i1.value=MERGE_DEFAULT;
- i2.value=MERGE_DEFAULT;
- #else
- if (i1.usesValue)
- {
- if (! i2.usesValue && c2) goto invalid_set_operation;
- }
- else
- {
- if (c1 || c12) goto invalid_set_operation;
- }
- #endif
-
- UNLESS(r=BUCKET(PyObject_CallObject(OBJECT(&BucketType), NULL)))
- goto err;
- }
- else
- {
- UNLESS(r=BUCKET(PyObject_CallObject(OBJECT(&SetType), NULL)))
- goto err;
- }
-
- if (i1.next(&i1) < 0) goto err;
- if (i2.next(&i2) < 0) goto err;
-
- while (i1.position >= 0 && i2.position >= 0)
- {
- TEST_KEY_SET_OR(cmp, i1.key, i2.key) goto err;
- if(cmp < 0)
- {
- if(c1)
- {
- if(r->len >= r->size && Bucket_grow(r, -1, ! merge) < 0) goto err;
- COPY_KEY(r->keys[r->len], i1.key);
- INCREF_KEY(r->keys[r->len]);
- if (merge)
- {
- COPY_VALUE(r->values[r->len], MERGE_WEIGHT(i1.value, w1));
- INCREF_VALUE(r->values[r->len]);
- }
- r->len++;
- }
- if (i1.next(&i1) < 0) goto err;
- }
- else if(cmp==0)
- {
- if(c12)
- {
- if(r->len >= r->size && Bucket_grow(r, -1, ! merge) < 0) goto err;
- COPY_KEY(r->keys[r->len], i1.key);
- INCREF_KEY(r->keys[r->len]);
- if (merge)
- {
- #ifdef MERGE
- r->values[r->len] = MERGE(i1.value, w1, i2.value, w2);
- #else
- COPY_VALUE(r->values[r->len], i1.value);
- INCREF_VALUE(r->values[r->len]);
- #endif
- }
- r->len++;
- }
- if (i1.next(&i1) < 0) goto err;
- if (i2.next(&i2) < 0) goto err;
- }
- else
- {
- if(c2)
- {
- if(r->len >= r->size && Bucket_grow(r, -1, ! merge) < 0) goto err;
- COPY_KEY(r->keys[r->len], i2.key);
- INCREF_KEY(r->keys[r->len]);
- if (merge)
- {
- COPY_VALUE(r->values[r->len], MERGE_WEIGHT(i2.value, w2));
- INCREF_VALUE(r->values[r->len]);
- }
- r->len++;
- }
- if (i2.next(&i2) < 0) goto err;
- }
- }
- if(c1 && copyRemaining(r, &i1, merge, w1) < 0) goto err;
- if(c2 && copyRemaining(r, &i2, merge, w2) < 0) goto err;
-
-
- finiSetIteration(&i1);
- finiSetIteration(&i2);
-
- return OBJECT(r);
-
- #ifndef MERGE_DEFAULT
- invalid_set_operation:
- PyErr_SetString(PyExc_TypeError, "invalid set operation");
- #endif
-
- err:
- finiSetIteration(&i1);
- finiSetIteration(&i2);
- Py_XDECREF(r);
- return NULL;
- }
-
- static PyObject *
- difference_m(PyObject *ignored, PyObject *args)
- {
- PyObject *o1, *o2;
-
- UNLESS(PyArg_ParseTuple(args, "OO", &o1, &o2)) return NULL;
-
-
- if (o1 == Py_None || o2 == Py_None)
- {
- /* difference(None, X) -> None; difference(X, None) -> X */
- Py_INCREF(o1);
- return o1;
- }
-
- return set_operation(o1, o2, 1, 0, /* preserve values from o1, ignore o2's */
- 1, 0, /* o1's values multiplied by 1 */
- 1, 0, 0); /* take only keys unique to o1 */
- }
-
- static PyObject *
- union_m(PyObject *ignored, PyObject *args)
- {
- PyObject *o1, *o2;
-
- UNLESS(PyArg_ParseTuple(args, "OO", &o1, &o2)) return NULL;
-
- if (o1 == Py_None)
- {
- Py_INCREF(o2);
- return o2;
- }
- else if (o2 == Py_None)
- {
- Py_INCREF(o1);
- return o1;
- }
-
- return set_operation(o1, o2, 0, 0, /* ignore values in both */
- 1, 1, /* the weights are irrelevant */
- 1, 1, 1); /* take all keys */
- }
-
- static PyObject *
- intersection_m(PyObject *ignored, PyObject *args)
- {
- PyObject *o1, *o2;
-
- UNLESS(PyArg_ParseTuple(args, "OO", &o1, &o2)) return NULL;
-
- if (o1 == Py_None)
- {
- Py_INCREF(o2);
- return o2;
- }
- else if (o2 == Py_None)
- {
- Py_INCREF(o1);
- return o1;
- }
-
- return set_operation(o1, o2, 0, 0, /* ignore values in both */
- 1, 1, /* the weights are irrelevant */
- 0, 1, 0); /* take only keys common to both */
- }
-
- #ifdef MERGE
-
- static PyObject *
- wunion_m(PyObject *ignored, PyObject *args)
- {
- PyObject *o1, *o2;
- VALUE_TYPE w1 = 1, w2 = 1;
-
- UNLESS(PyArg_ParseTuple(args, "OO|" VALUE_PARSE VALUE_PARSE,
- &o1, &o2, &w1, &w2)
- ) return NULL;
-
- if (o1 == Py_None)
- return Py_BuildValue(VALUE_PARSE "O", (o2 == Py_None ? 0 : w2), o2);
- else if (o2 == Py_None)
- return Py_BuildValue(VALUE_PARSE "O", w1, o1);
-
- o1 = set_operation(o1, o2, 1, 1, w1, w2, 1, 1, 1);
- if (o1)
- ASSIGN(o1, Py_BuildValue(VALUE_PARSE "O", (VALUE_TYPE)1, o1));
-
- return o1;
- }
-
- static PyObject *
- wintersection_m(PyObject *ignored, PyObject *args)
- {
- PyObject *o1, *o2;
- VALUE_TYPE w1 = 1, w2 = 1;
-
- UNLESS(PyArg_ParseTuple(args, "OO|" VALUE_PARSE VALUE_PARSE,
- &o1, &o2, &w1, &w2)
- ) return NULL;
-
- if (o1 == Py_None)
- return Py_BuildValue(VALUE_PARSE "O", (o2 == Py_None ? 0 : w2), o2);
- else if (o2 == Py_None)
- return Py_BuildValue(VALUE_PARSE "O", w1, o1);
-
- o1 = set_operation(o1, o2, 1, 1, w1, w2, 0, 1, 0);
- if (o1)
- ASSIGN(o1, Py_BuildValue(VALUE_PARSE "O",
- ((o1->ob_type == (PyTypeObject*)(&SetType)) ? w2+w1 : 1),
- o1));
-
- return o1;
- }
-
- #endif
-
- #ifdef MULTI_INT_UNION
- #include "sorters.c"
-
- /* Input is a sequence of integer sets (or convertible to sets by the
- set iteration protocol). Output is the union of the sets. The point
- is to run much faster than doing pairs of unions.
- */
- static PyObject *
- multiunion_m(PyObject *ignored, PyObject *args)
- {
- PyObject *seq; /* input sequence */
- int n; /* length of input sequence */
- PyObject *set = NULL; /* an element of the input sequence */
- Bucket *result; /* result set */
- SetIteration setiter = {0};
- int i;
-
- UNLESS(PyArg_ParseTuple(args, "O", &seq))
- return NULL;
-
- n = PyObject_Length(seq);
- if (n < 0)
- return NULL;
-
- /* Construct an empty result set. */
- result = BUCKET(PyObject_CallObject(OBJECT(&SetType), NULL));
- if (result == NULL)
- return NULL;
-
- /* For each set in the input sequence, append its elements to the result
- set. At this point, we ignore the possibility of duplicates. */
- for (i = 0; i < n; ++i) {
- set = PySequence_GetItem(seq, i);
- if (set == NULL)
- goto Error;
-
- /* If set is a bucket, do a straight resize + memcpy. */
- if (set->ob_type == (PyTypeObject*)&SetType ||
- set->ob_type == (PyTypeObject*)&BucketType)
- {
- Bucket *b = BUCKET(set);
- int status = 0;
-
- UNLESS (PER_USE(b)) goto Error;
- if (b->len)
- status = bucket_append(result, b, 0, b->len, 0, i < n-1);
- PER_UNUSE(b);
- if (status < 0) goto Error;
- }
- else {
- /* No cheap way: iterate over set's elements one at a time. */
- if (initSetIteration(&setiter, set, 0) < 0) goto Error;
- if (setiter.next(&setiter) < 0) goto Error;
- while (setiter.position >= 0) {
- if (result->len >= result->size && Bucket_grow(result, -1, 1) < 0)
- goto Error;
- COPY_KEY(result->keys[result->len], setiter.key);
- ++result->len;
- /* We know the key is an int, so no need to incref it. */
- if (setiter.next(&setiter) < 0) goto Error;
- }
- finiSetIteration(&setiter);
- }
- Py_DECREF(set);
- set = NULL;
- }
-
- /* Combine, sort, remove duplicates, and reset the result's len.
- If the set shrinks (which happens if and only if there are
- duplicates), no point to realloc'ing the set smaller, as we
- expect the result set to be short-lived.
- */
- if (result->len > 0) {
- size_t newlen; /* number of elements in final result set */
- newlen = sort_int_nodups(result->keys, (size_t)result->len);
- result->len = (int)newlen;
- }
- return (PyObject *)result;
-
- Error:
- Py_DECREF(result);
- Py_XDECREF(set);
- finiSetIteration(&setiter);
- return NULL;
- }
- #endif
|