Development of an internal social media platform with personalised dashboards for students
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

SetOpTemplate.c 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. /*****************************************************************************
  2. Copyright (c) 2001, 2002 Zope Foundation and Contributors.
  3. All Rights Reserved.
  4. This software is subject to the provisions of the Zope Public License,
  5. Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
  6. THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
  7. WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  8. WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
  9. FOR A PARTICULAR PURPOSE
  10. ****************************************************************************/
  11. /****************************************************************************
  12. Set operations
  13. ****************************************************************************/
  14. #define SETOPTEMPLATE_C "$Id$\n"
  15. #ifdef KEY_CHECK
  16. static int
  17. nextKeyAsSet(SetIteration *i)
  18. {
  19. if (i->position >= 0) {
  20. if (i->position) {
  21. DECREF_KEY(i->key);
  22. i->position = -1;
  23. }
  24. else
  25. i->position = 1;
  26. }
  27. return 0;
  28. }
  29. #endif
  30. /* initSetIteration
  31. *
  32. * Start the set iteration protocol. See the comments at struct SetIteration.
  33. *
  34. * Arguments
  35. * i The address of a SetIteration control struct.
  36. * s The address of the set, bucket, BTree, ..., to be iterated.
  37. * useValues Boolean; if true, and s has values (is a mapping), copy
  38. * them into i->value each time i->next() is called; else
  39. * ignore s's values even if s is a mapping.
  40. *
  41. * Return
  42. * 0 on success; -1 and an exception set if error.
  43. * i.usesValue is set to 1 (true) if s has values and useValues was
  44. * true; else usesValue is set to 0 (false).
  45. * i.set gets a new reference to s, or to some other object used to
  46. * iterate over s.
  47. * i.position is set to 0.
  48. * i.next is set to an appropriate iteration function.
  49. * i.key and i.value are left alone.
  50. *
  51. * Internal
  52. * i.position < 0 means iteration terminated.
  53. * i.position = 0 means iteration hasn't yet begun (next() hasn't
  54. * been called yet).
  55. * In all other cases, i.key, and possibly i.value, own references.
  56. * These must be cleaned up, either by next() routines, or by
  57. * finiSetIteration.
  58. * next() routines must ensure the above. They should return without
  59. * doing anything when i.position < 0.
  60. * It's the responsibility of {init, fini}setIteration to clean up
  61. * the reference in i.set, and to ensure that no stale references
  62. * live in i.key or i.value if iteration terminates abnormally.
  63. * A SetIteration struct has been cleaned up iff i.set is NULL.
  64. */
  65. static int
  66. initSetIteration(SetIteration *i, PyObject *s, int useValues)
  67. {
  68. i->set = NULL;
  69. i->position = -1; /* set to 0 only on normal return */
  70. i->usesValue = 0; /* assume it's a set or that values aren't iterated */
  71. if (PyObject_IsInstance(s, (PyObject *)&BucketType))
  72. {
  73. i->set = s;
  74. Py_INCREF(s);
  75. if (useValues)
  76. {
  77. i->usesValue = 1;
  78. i->next = nextBucket;
  79. }
  80. else
  81. i->next = nextSet;
  82. }
  83. else if (PyObject_IsInstance(s, (PyObject *)&SetType))
  84. {
  85. i->set = s;
  86. Py_INCREF(s);
  87. i->next = nextSet;
  88. }
  89. else if (PyObject_IsInstance(s, (PyObject *)&BTreeType))
  90. {
  91. i->set = BTree_rangeSearch(BTREE(s), NULL, NULL, 'i');
  92. UNLESS(i->set) return -1;
  93. if (useValues)
  94. {
  95. i->usesValue = 1;
  96. i->next = nextBTreeItems;
  97. }
  98. else
  99. i->next = nextTreeSetItems;
  100. }
  101. else if (PyObject_IsInstance(s, (PyObject *)&TreeSetType))
  102. {
  103. i->set = BTree_rangeSearch(BTREE(s), NULL, NULL, 'k');
  104. UNLESS(i->set) return -1;
  105. i->next = nextTreeSetItems;
  106. }
  107. #ifdef KEY_CHECK
  108. else if (KEY_CHECK(s))
  109. {
  110. int copied = 1;
  111. COPY_KEY_FROM_ARG(i->key, s, copied);
  112. UNLESS (copied) return -1;
  113. INCREF_KEY(i->key);
  114. i->set = s;
  115. Py_INCREF(s);
  116. i->next = nextKeyAsSet;
  117. }
  118. #endif
  119. else
  120. {
  121. PyErr_SetString(PyExc_TypeError, "invalid argument");
  122. return -1;
  123. }
  124. i->position = 0;
  125. return 0;
  126. }
  127. #ifndef MERGE_WEIGHT
  128. #define MERGE_WEIGHT(O, w) (O)
  129. #endif
  130. static int
  131. copyRemaining(Bucket *r, SetIteration *i, int merge,
  132. /* See comment # 42 */
  133. #ifdef MERGE
  134. VALUE_TYPE w)
  135. #else
  136. int w)
  137. #endif
  138. {
  139. while (i->position >= 0)
  140. {
  141. if(r->len >= r->size && Bucket_grow(r, -1, ! merge) < 0) return -1;
  142. COPY_KEY(r->keys[r->len], i->key);
  143. INCREF_KEY(r->keys[r->len]);
  144. if (merge)
  145. {
  146. COPY_VALUE(r->values[r->len], MERGE_WEIGHT(i->value, w));
  147. INCREF_VALUE(r->values[r->len]);
  148. }
  149. r->len++;
  150. if (i->next(i) < 0) return -1;
  151. }
  152. return 0;
  153. }
  154. /* This is the workhorse for all set merge operations: the weighted and
  155. * unweighted flavors of union and intersection, and set difference. The
  156. * algorithm is conceptually simple but the code is complicated due to all
  157. * the options.
  158. *
  159. * s1, s2
  160. * The input collections to be merged.
  161. *
  162. * usevalues1, usevalues2
  163. * Booleans. In the output, should values from s1 (or s2) be used? This
  164. * only makes sense when an operation intends to support mapping outputs;
  165. * these should both be false for operations that want pure set outputs.
  166. *
  167. * w1, w2
  168. * If usevalues1(2) are true, these are the weights to apply to the
  169. * input values.
  170. *
  171. * c1
  172. * Boolean. Should keys that appear in c1 but not c2 appear in the output?
  173. * c12
  174. * Boolean. Should keys that appear in both inputs appear in the output?
  175. * c2
  176. * Boolean. Should keys that appear in c2 but not c1 appear in the output?
  177. *
  178. * Returns NULL if error, else a Set or Bucket, depending on whether a set or
  179. * mapping was requested.
  180. */
  181. static PyObject *
  182. set_operation(PyObject *s1, PyObject *s2,
  183. int usevalues1, int usevalues2,
  184. /* Comment # 42
  185. The following ifdef works around a template/type problem
  186. Weights are passed as integers. In particular, the weight passed by
  187. difference is one. This works fine in the int value and float value
  188. cases but makes no sense in the object value case. In the object
  189. value case, we don't do merging, so we don't use the weights, so it
  190. doesn't matter what they are.
  191. */
  192. #ifdef MERGE
  193. VALUE_TYPE w1, VALUE_TYPE w2,
  194. #else
  195. int w1, int w2,
  196. #endif
  197. int c1, int c12, int c2)
  198. {
  199. Bucket *r=0;
  200. SetIteration i1 = {0,0,0}, i2 = {0,0,0};
  201. int cmp, merge;
  202. if (initSetIteration(&i1, s1, usevalues1) < 0) goto err;
  203. if (initSetIteration(&i2, s2, usevalues2) < 0) goto err;
  204. merge = i1.usesValue | i2.usesValue;
  205. if (merge)
  206. {
  207. #ifndef MERGE
  208. if (c12 && i1.usesValue && i2.usesValue) goto invalid_set_operation;
  209. #endif
  210. if (! i1.usesValue&& i2.usesValue)
  211. {
  212. SetIteration t;
  213. int i;
  214. /* See comment # 42 above */
  215. #ifdef MERGE
  216. VALUE_TYPE v;
  217. #else
  218. int v;
  219. #endif
  220. t=i1; i1=i2; i2=t;
  221. i=c1; c1=c2; c2=i;
  222. v=w1; w1=w2; w2=v;
  223. }
  224. #ifdef MERGE_DEFAULT
  225. i1.value=MERGE_DEFAULT;
  226. i2.value=MERGE_DEFAULT;
  227. #else
  228. if (i1.usesValue)
  229. {
  230. if (! i2.usesValue && c2) goto invalid_set_operation;
  231. }
  232. else
  233. {
  234. if (c1 || c12) goto invalid_set_operation;
  235. }
  236. #endif
  237. UNLESS(r=BUCKET(PyObject_CallObject(OBJECT(&BucketType), NULL)))
  238. goto err;
  239. }
  240. else
  241. {
  242. UNLESS(r=BUCKET(PyObject_CallObject(OBJECT(&SetType), NULL)))
  243. goto err;
  244. }
  245. if (i1.next(&i1) < 0) goto err;
  246. if (i2.next(&i2) < 0) goto err;
  247. while (i1.position >= 0 && i2.position >= 0)
  248. {
  249. TEST_KEY_SET_OR(cmp, i1.key, i2.key) goto err;
  250. if(cmp < 0)
  251. {
  252. if(c1)
  253. {
  254. if(r->len >= r->size && Bucket_grow(r, -1, ! merge) < 0) goto err;
  255. COPY_KEY(r->keys[r->len], i1.key);
  256. INCREF_KEY(r->keys[r->len]);
  257. if (merge)
  258. {
  259. COPY_VALUE(r->values[r->len], MERGE_WEIGHT(i1.value, w1));
  260. INCREF_VALUE(r->values[r->len]);
  261. }
  262. r->len++;
  263. }
  264. if (i1.next(&i1) < 0) goto err;
  265. }
  266. else if(cmp==0)
  267. {
  268. if(c12)
  269. {
  270. if(r->len >= r->size && Bucket_grow(r, -1, ! merge) < 0) goto err;
  271. COPY_KEY(r->keys[r->len], i1.key);
  272. INCREF_KEY(r->keys[r->len]);
  273. if (merge)
  274. {
  275. #ifdef MERGE
  276. r->values[r->len] = MERGE(i1.value, w1, i2.value, w2);
  277. #else
  278. COPY_VALUE(r->values[r->len], i1.value);
  279. INCREF_VALUE(r->values[r->len]);
  280. #endif
  281. }
  282. r->len++;
  283. }
  284. if (i1.next(&i1) < 0) goto err;
  285. if (i2.next(&i2) < 0) goto err;
  286. }
  287. else
  288. {
  289. if(c2)
  290. {
  291. if(r->len >= r->size && Bucket_grow(r, -1, ! merge) < 0) goto err;
  292. COPY_KEY(r->keys[r->len], i2.key);
  293. INCREF_KEY(r->keys[r->len]);
  294. if (merge)
  295. {
  296. COPY_VALUE(r->values[r->len], MERGE_WEIGHT(i2.value, w2));
  297. INCREF_VALUE(r->values[r->len]);
  298. }
  299. r->len++;
  300. }
  301. if (i2.next(&i2) < 0) goto err;
  302. }
  303. }
  304. if(c1 && copyRemaining(r, &i1, merge, w1) < 0) goto err;
  305. if(c2 && copyRemaining(r, &i2, merge, w2) < 0) goto err;
  306. finiSetIteration(&i1);
  307. finiSetIteration(&i2);
  308. return OBJECT(r);
  309. #ifndef MERGE_DEFAULT
  310. invalid_set_operation:
  311. PyErr_SetString(PyExc_TypeError, "invalid set operation");
  312. #endif
  313. err:
  314. finiSetIteration(&i1);
  315. finiSetIteration(&i2);
  316. Py_XDECREF(r);
  317. return NULL;
  318. }
  319. static PyObject *
  320. difference_m(PyObject *ignored, PyObject *args)
  321. {
  322. PyObject *o1, *o2;
  323. UNLESS(PyArg_ParseTuple(args, "OO", &o1, &o2)) return NULL;
  324. if (o1 == Py_None || o2 == Py_None)
  325. {
  326. /* difference(None, X) -> None; difference(X, None) -> X */
  327. Py_INCREF(o1);
  328. return o1;
  329. }
  330. return set_operation(o1, o2, 1, 0, /* preserve values from o1, ignore o2's */
  331. 1, 0, /* o1's values multiplied by 1 */
  332. 1, 0, 0); /* take only keys unique to o1 */
  333. }
  334. static PyObject *
  335. union_m(PyObject *ignored, PyObject *args)
  336. {
  337. PyObject *o1, *o2;
  338. UNLESS(PyArg_ParseTuple(args, "OO", &o1, &o2)) return NULL;
  339. if (o1 == Py_None)
  340. {
  341. Py_INCREF(o2);
  342. return o2;
  343. }
  344. else if (o2 == Py_None)
  345. {
  346. Py_INCREF(o1);
  347. return o1;
  348. }
  349. return set_operation(o1, o2, 0, 0, /* ignore values in both */
  350. 1, 1, /* the weights are irrelevant */
  351. 1, 1, 1); /* take all keys */
  352. }
  353. static PyObject *
  354. intersection_m(PyObject *ignored, PyObject *args)
  355. {
  356. PyObject *o1, *o2;
  357. UNLESS(PyArg_ParseTuple(args, "OO", &o1, &o2)) return NULL;
  358. if (o1 == Py_None)
  359. {
  360. Py_INCREF(o2);
  361. return o2;
  362. }
  363. else if (o2 == Py_None)
  364. {
  365. Py_INCREF(o1);
  366. return o1;
  367. }
  368. return set_operation(o1, o2, 0, 0, /* ignore values in both */
  369. 1, 1, /* the weights are irrelevant */
  370. 0, 1, 0); /* take only keys common to both */
  371. }
  372. #ifdef MERGE
  373. static PyObject *
  374. wunion_m(PyObject *ignored, PyObject *args)
  375. {
  376. PyObject *o1, *o2;
  377. VALUE_TYPE w1 = 1, w2 = 1;
  378. UNLESS(PyArg_ParseTuple(args, "OO|" VALUE_PARSE VALUE_PARSE,
  379. &o1, &o2, &w1, &w2)
  380. ) return NULL;
  381. if (o1 == Py_None)
  382. return Py_BuildValue(VALUE_PARSE "O", (o2 == Py_None ? 0 : w2), o2);
  383. else if (o2 == Py_None)
  384. return Py_BuildValue(VALUE_PARSE "O", w1, o1);
  385. o1 = set_operation(o1, o2, 1, 1, w1, w2, 1, 1, 1);
  386. if (o1)
  387. ASSIGN(o1, Py_BuildValue(VALUE_PARSE "O", (VALUE_TYPE)1, o1));
  388. return o1;
  389. }
  390. static PyObject *
  391. wintersection_m(PyObject *ignored, PyObject *args)
  392. {
  393. PyObject *o1, *o2;
  394. VALUE_TYPE w1 = 1, w2 = 1;
  395. UNLESS(PyArg_ParseTuple(args, "OO|" VALUE_PARSE VALUE_PARSE,
  396. &o1, &o2, &w1, &w2)
  397. ) return NULL;
  398. if (o1 == Py_None)
  399. return Py_BuildValue(VALUE_PARSE "O", (o2 == Py_None ? 0 : w2), o2);
  400. else if (o2 == Py_None)
  401. return Py_BuildValue(VALUE_PARSE "O", w1, o1);
  402. o1 = set_operation(o1, o2, 1, 1, w1, w2, 0, 1, 0);
  403. if (o1)
  404. ASSIGN(o1, Py_BuildValue(VALUE_PARSE "O",
  405. ((o1->ob_type == (PyTypeObject*)(&SetType)) ? w2+w1 : 1),
  406. o1));
  407. return o1;
  408. }
  409. #endif
  410. #ifdef MULTI_INT_UNION
  411. #include "sorters.c"
  412. /* Input is a sequence of integer sets (or convertible to sets by the
  413. set iteration protocol). Output is the union of the sets. The point
  414. is to run much faster than doing pairs of unions.
  415. */
  416. static PyObject *
  417. multiunion_m(PyObject *ignored, PyObject *args)
  418. {
  419. PyObject *seq; /* input sequence */
  420. int n; /* length of input sequence */
  421. PyObject *set = NULL; /* an element of the input sequence */
  422. Bucket *result; /* result set */
  423. SetIteration setiter = {0};
  424. int i;
  425. UNLESS(PyArg_ParseTuple(args, "O", &seq))
  426. return NULL;
  427. n = PyObject_Length(seq);
  428. if (n < 0)
  429. return NULL;
  430. /* Construct an empty result set. */
  431. result = BUCKET(PyObject_CallObject(OBJECT(&SetType), NULL));
  432. if (result == NULL)
  433. return NULL;
  434. /* For each set in the input sequence, append its elements to the result
  435. set. At this point, we ignore the possibility of duplicates. */
  436. for (i = 0; i < n; ++i) {
  437. set = PySequence_GetItem(seq, i);
  438. if (set == NULL)
  439. goto Error;
  440. /* If set is a bucket, do a straight resize + memcpy. */
  441. if (set->ob_type == (PyTypeObject*)&SetType ||
  442. set->ob_type == (PyTypeObject*)&BucketType)
  443. {
  444. Bucket *b = BUCKET(set);
  445. int status = 0;
  446. UNLESS (PER_USE(b)) goto Error;
  447. if (b->len)
  448. status = bucket_append(result, b, 0, b->len, 0, i < n-1);
  449. PER_UNUSE(b);
  450. if (status < 0) goto Error;
  451. }
  452. else {
  453. /* No cheap way: iterate over set's elements one at a time. */
  454. if (initSetIteration(&setiter, set, 0) < 0) goto Error;
  455. if (setiter.next(&setiter) < 0) goto Error;
  456. while (setiter.position >= 0) {
  457. if (result->len >= result->size && Bucket_grow(result, -1, 1) < 0)
  458. goto Error;
  459. COPY_KEY(result->keys[result->len], setiter.key);
  460. ++result->len;
  461. /* We know the key is an int, so no need to incref it. */
  462. if (setiter.next(&setiter) < 0) goto Error;
  463. }
  464. finiSetIteration(&setiter);
  465. }
  466. Py_DECREF(set);
  467. set = NULL;
  468. }
  469. /* Combine, sort, remove duplicates, and reset the result's len.
  470. If the set shrinks (which happens if and only if there are
  471. duplicates), no point to realloc'ing the set smaller, as we
  472. expect the result set to be short-lived.
  473. */
  474. if (result->len > 0) {
  475. size_t newlen; /* number of elements in final result set */
  476. newlen = sort_int_nodups(result->keys, (size_t)result->len);
  477. result->len = (int)newlen;
  478. }
  479. return (PyObject *)result;
  480. Error:
  481. Py_DECREF(result);
  482. Py_XDECREF(set);
  483. finiSetIteration(&setiter);
  484. return NULL;
  485. }
  486. #endif