/***************************************************************************** Copyright (c) 2001, 2002 Zope Foundation and Contributors. All Rights Reserved. This software is subject to the provisions of the Zope Public License, Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE ****************************************************************************/ /**************************************************************************** Set operations ****************************************************************************/ #define SETOPTEMPLATE_C "$Id$\n" #ifdef KEY_CHECK static int nextKeyAsSet(SetIteration *i) { if (i->position >= 0) { if (i->position) { DECREF_KEY(i->key); i->position = -1; } else i->position = 1; } return 0; } #endif /* initSetIteration * * Start the set iteration protocol. See the comments at struct SetIteration. * * Arguments * i The address of a SetIteration control struct. * s The address of the set, bucket, BTree, ..., to be iterated. * useValues Boolean; if true, and s has values (is a mapping), copy * them into i->value each time i->next() is called; else * ignore s's values even if s is a mapping. * * Return * 0 on success; -1 and an exception set if error. * i.usesValue is set to 1 (true) if s has values and useValues was * true; else usesValue is set to 0 (false). * i.set gets a new reference to s, or to some other object used to * iterate over s. * i.position is set to 0. * i.next is set to an appropriate iteration function. * i.key and i.value are left alone. * * Internal * i.position < 0 means iteration terminated. * i.position = 0 means iteration hasn't yet begun (next() hasn't * been called yet). * In all other cases, i.key, and possibly i.value, own references. * These must be cleaned up, either by next() routines, or by * finiSetIteration. * next() routines must ensure the above. They should return without * doing anything when i.position < 0. * It's the responsibility of {init, fini}setIteration to clean up * the reference in i.set, and to ensure that no stale references * live in i.key or i.value if iteration terminates abnormally. * A SetIteration struct has been cleaned up iff i.set is NULL. */ static int initSetIteration(SetIteration *i, PyObject *s, int useValues) { i->set = NULL; i->position = -1; /* set to 0 only on normal return */ i->usesValue = 0; /* assume it's a set or that values aren't iterated */ if (PyObject_IsInstance(s, (PyObject *)&BucketType)) { i->set = s; Py_INCREF(s); if (useValues) { i->usesValue = 1; i->next = nextBucket; } else i->next = nextSet; } else if (PyObject_IsInstance(s, (PyObject *)&SetType)) { i->set = s; Py_INCREF(s); i->next = nextSet; } else if (PyObject_IsInstance(s, (PyObject *)&BTreeType)) { i->set = BTree_rangeSearch(BTREE(s), NULL, NULL, 'i'); UNLESS(i->set) return -1; if (useValues) { i->usesValue = 1; i->next = nextBTreeItems; } else i->next = nextTreeSetItems; } else if (PyObject_IsInstance(s, (PyObject *)&TreeSetType)) { i->set = BTree_rangeSearch(BTREE(s), NULL, NULL, 'k'); UNLESS(i->set) return -1; i->next = nextTreeSetItems; } #ifdef KEY_CHECK else if (KEY_CHECK(s)) { int copied = 1; COPY_KEY_FROM_ARG(i->key, s, copied); UNLESS (copied) return -1; INCREF_KEY(i->key); i->set = s; Py_INCREF(s); i->next = nextKeyAsSet; } #endif else { PyErr_SetString(PyExc_TypeError, "invalid argument"); return -1; } i->position = 0; return 0; } #ifndef MERGE_WEIGHT #define MERGE_WEIGHT(O, w) (O) #endif static int copyRemaining(Bucket *r, SetIteration *i, int merge, /* See comment # 42 */ #ifdef MERGE VALUE_TYPE w) #else int w) #endif { while (i->position >= 0) { if(r->len >= r->size && Bucket_grow(r, -1, ! merge) < 0) return -1; COPY_KEY(r->keys[r->len], i->key); INCREF_KEY(r->keys[r->len]); if (merge) { COPY_VALUE(r->values[r->len], MERGE_WEIGHT(i->value, w)); INCREF_VALUE(r->values[r->len]); } r->len++; if (i->next(i) < 0) return -1; } return 0; } /* This is the workhorse for all set merge operations: the weighted and * unweighted flavors of union and intersection, and set difference. The * algorithm is conceptually simple but the code is complicated due to all * the options. * * s1, s2 * The input collections to be merged. * * usevalues1, usevalues2 * Booleans. In the output, should values from s1 (or s2) be used? This * only makes sense when an operation intends to support mapping outputs; * these should both be false for operations that want pure set outputs. * * w1, w2 * If usevalues1(2) are true, these are the weights to apply to the * input values. * * c1 * Boolean. Should keys that appear in c1 but not c2 appear in the output? * c12 * Boolean. Should keys that appear in both inputs appear in the output? * c2 * Boolean. Should keys that appear in c2 but not c1 appear in the output? * * Returns NULL if error, else a Set or Bucket, depending on whether a set or * mapping was requested. */ static PyObject * set_operation(PyObject *s1, PyObject *s2, int usevalues1, int usevalues2, /* Comment # 42 The following ifdef works around a template/type problem Weights are passed as integers. In particular, the weight passed by difference is one. This works fine in the int value and float value cases but makes no sense in the object value case. In the object value case, we don't do merging, so we don't use the weights, so it doesn't matter what they are. */ #ifdef MERGE VALUE_TYPE w1, VALUE_TYPE w2, #else int w1, int w2, #endif int c1, int c12, int c2) { Bucket *r=0; SetIteration i1 = {0,0,0}, i2 = {0,0,0}; int cmp, merge; if (initSetIteration(&i1, s1, usevalues1) < 0) goto err; if (initSetIteration(&i2, s2, usevalues2) < 0) goto err; merge = i1.usesValue | i2.usesValue; if (merge) { #ifndef MERGE if (c12 && i1.usesValue && i2.usesValue) goto invalid_set_operation; #endif if (! i1.usesValue&& i2.usesValue) { SetIteration t; int i; /* See comment # 42 above */ #ifdef MERGE VALUE_TYPE v; #else int v; #endif t=i1; i1=i2; i2=t; i=c1; c1=c2; c2=i; v=w1; w1=w2; w2=v; } #ifdef MERGE_DEFAULT i1.value=MERGE_DEFAULT; i2.value=MERGE_DEFAULT; #else if (i1.usesValue) { if (! i2.usesValue && c2) goto invalid_set_operation; } else { if (c1 || c12) goto invalid_set_operation; } #endif UNLESS(r=BUCKET(PyObject_CallObject(OBJECT(&BucketType), NULL))) goto err; } else { UNLESS(r=BUCKET(PyObject_CallObject(OBJECT(&SetType), NULL))) goto err; } if (i1.next(&i1) < 0) goto err; if (i2.next(&i2) < 0) goto err; while (i1.position >= 0 && i2.position >= 0) { TEST_KEY_SET_OR(cmp, i1.key, i2.key) goto err; if(cmp < 0) { if(c1) { if(r->len >= r->size && Bucket_grow(r, -1, ! merge) < 0) goto err; COPY_KEY(r->keys[r->len], i1.key); INCREF_KEY(r->keys[r->len]); if (merge) { COPY_VALUE(r->values[r->len], MERGE_WEIGHT(i1.value, w1)); INCREF_VALUE(r->values[r->len]); } r->len++; } if (i1.next(&i1) < 0) goto err; } else if(cmp==0) { if(c12) { if(r->len >= r->size && Bucket_grow(r, -1, ! merge) < 0) goto err; COPY_KEY(r->keys[r->len], i1.key); INCREF_KEY(r->keys[r->len]); if (merge) { #ifdef MERGE r->values[r->len] = MERGE(i1.value, w1, i2.value, w2); #else COPY_VALUE(r->values[r->len], i1.value); INCREF_VALUE(r->values[r->len]); #endif } r->len++; } if (i1.next(&i1) < 0) goto err; if (i2.next(&i2) < 0) goto err; } else { if(c2) { if(r->len >= r->size && Bucket_grow(r, -1, ! merge) < 0) goto err; COPY_KEY(r->keys[r->len], i2.key); INCREF_KEY(r->keys[r->len]); if (merge) { COPY_VALUE(r->values[r->len], MERGE_WEIGHT(i2.value, w2)); INCREF_VALUE(r->values[r->len]); } r->len++; } if (i2.next(&i2) < 0) goto err; } } if(c1 && copyRemaining(r, &i1, merge, w1) < 0) goto err; if(c2 && copyRemaining(r, &i2, merge, w2) < 0) goto err; finiSetIteration(&i1); finiSetIteration(&i2); return OBJECT(r); #ifndef MERGE_DEFAULT invalid_set_operation: PyErr_SetString(PyExc_TypeError, "invalid set operation"); #endif err: finiSetIteration(&i1); finiSetIteration(&i2); Py_XDECREF(r); return NULL; } static PyObject * difference_m(PyObject *ignored, PyObject *args) { PyObject *o1, *o2; UNLESS(PyArg_ParseTuple(args, "OO", &o1, &o2)) return NULL; if (o1 == Py_None || o2 == Py_None) { /* difference(None, X) -> None; difference(X, None) -> X */ Py_INCREF(o1); return o1; } return set_operation(o1, o2, 1, 0, /* preserve values from o1, ignore o2's */ 1, 0, /* o1's values multiplied by 1 */ 1, 0, 0); /* take only keys unique to o1 */ } static PyObject * union_m(PyObject *ignored, PyObject *args) { PyObject *o1, *o2; UNLESS(PyArg_ParseTuple(args, "OO", &o1, &o2)) return NULL; if (o1 == Py_None) { Py_INCREF(o2); return o2; } else if (o2 == Py_None) { Py_INCREF(o1); return o1; } return set_operation(o1, o2, 0, 0, /* ignore values in both */ 1, 1, /* the weights are irrelevant */ 1, 1, 1); /* take all keys */ } static PyObject * intersection_m(PyObject *ignored, PyObject *args) { PyObject *o1, *o2; UNLESS(PyArg_ParseTuple(args, "OO", &o1, &o2)) return NULL; if (o1 == Py_None) { Py_INCREF(o2); return o2; } else if (o2 == Py_None) { Py_INCREF(o1); return o1; } return set_operation(o1, o2, 0, 0, /* ignore values in both */ 1, 1, /* the weights are irrelevant */ 0, 1, 0); /* take only keys common to both */ } #ifdef MERGE static PyObject * wunion_m(PyObject *ignored, PyObject *args) { PyObject *o1, *o2; VALUE_TYPE w1 = 1, w2 = 1; UNLESS(PyArg_ParseTuple(args, "OO|" VALUE_PARSE VALUE_PARSE, &o1, &o2, &w1, &w2) ) return NULL; if (o1 == Py_None) return Py_BuildValue(VALUE_PARSE "O", (o2 == Py_None ? 0 : w2), o2); else if (o2 == Py_None) return Py_BuildValue(VALUE_PARSE "O", w1, o1); o1 = set_operation(o1, o2, 1, 1, w1, w2, 1, 1, 1); if (o1) ASSIGN(o1, Py_BuildValue(VALUE_PARSE "O", (VALUE_TYPE)1, o1)); return o1; } static PyObject * wintersection_m(PyObject *ignored, PyObject *args) { PyObject *o1, *o2; VALUE_TYPE w1 = 1, w2 = 1; UNLESS(PyArg_ParseTuple(args, "OO|" VALUE_PARSE VALUE_PARSE, &o1, &o2, &w1, &w2) ) return NULL; if (o1 == Py_None) return Py_BuildValue(VALUE_PARSE "O", (o2 == Py_None ? 0 : w2), o2); else if (o2 == Py_None) return Py_BuildValue(VALUE_PARSE "O", w1, o1); o1 = set_operation(o1, o2, 1, 1, w1, w2, 0, 1, 0); if (o1) ASSIGN(o1, Py_BuildValue(VALUE_PARSE "O", ((o1->ob_type == (PyTypeObject*)(&SetType)) ? w2+w1 : 1), o1)); return o1; } #endif #ifdef MULTI_INT_UNION #include "sorters.c" /* Input is a sequence of integer sets (or convertible to sets by the set iteration protocol). Output is the union of the sets. The point is to run much faster than doing pairs of unions. */ static PyObject * multiunion_m(PyObject *ignored, PyObject *args) { PyObject *seq; /* input sequence */ int n; /* length of input sequence */ PyObject *set = NULL; /* an element of the input sequence */ Bucket *result; /* result set */ SetIteration setiter = {0}; int i; UNLESS(PyArg_ParseTuple(args, "O", &seq)) return NULL; n = PyObject_Length(seq); if (n < 0) return NULL; /* Construct an empty result set. */ result = BUCKET(PyObject_CallObject(OBJECT(&SetType), NULL)); if (result == NULL) return NULL; /* For each set in the input sequence, append its elements to the result set. At this point, we ignore the possibility of duplicates. */ for (i = 0; i < n; ++i) { set = PySequence_GetItem(seq, i); if (set == NULL) goto Error; /* If set is a bucket, do a straight resize + memcpy. */ if (set->ob_type == (PyTypeObject*)&SetType || set->ob_type == (PyTypeObject*)&BucketType) { Bucket *b = BUCKET(set); int status = 0; UNLESS (PER_USE(b)) goto Error; if (b->len) status = bucket_append(result, b, 0, b->len, 0, i < n-1); PER_UNUSE(b); if (status < 0) goto Error; } else { /* No cheap way: iterate over set's elements one at a time. */ if (initSetIteration(&setiter, set, 0) < 0) goto Error; if (setiter.next(&setiter) < 0) goto Error; while (setiter.position >= 0) { if (result->len >= result->size && Bucket_grow(result, -1, 1) < 0) goto Error; COPY_KEY(result->keys[result->len], setiter.key); ++result->len; /* We know the key is an int, so no need to incref it. */ if (setiter.next(&setiter) < 0) goto Error; } finiSetIteration(&setiter); } Py_DECREF(set); set = NULL; } /* Combine, sort, remove duplicates, and reset the result's len. If the set shrinks (which happens if and only if there are duplicates), no point to realloc'ing the set smaller, as we expect the result set to be short-lived. */ if (result->len > 0) { size_t newlen; /* number of elements in final result set */ newlen = sort_int_nodups(result->keys, (size_t)result->len); result->len = (int)newlen; } return (PyObject *)result; Error: Py_DECREF(result); Py_XDECREF(set); finiSetIteration(&setiter); return NULL; } #endif