Funktionierender Prototyp des Serious Games zur Vermittlung von Wissen zu Software-Engineering-Arbeitsmodellen.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

speedups.c 5.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. /* C implementation of performance sensitive functions. */
  2. #define PY_SSIZE_T_CLEAN
  3. #include <Python.h>
  4. #include <stdint.h> /* uint8_t, uint32_t, uint64_t */
  5. #if __ARM_NEON
  6. #include <arm_neon.h>
  7. #elif __SSE2__
  8. #include <emmintrin.h>
  9. #endif
  10. static const Py_ssize_t MASK_LEN = 4;
  11. /* Similar to PyBytes_AsStringAndSize, but accepts more types */
  12. static int
  13. _PyBytesLike_AsStringAndSize(PyObject *obj, PyObject **tmp, char **buffer, Py_ssize_t *length)
  14. {
  15. // This supports bytes, bytearrays, and memoryview objects,
  16. // which are common data structures for handling byte streams.
  17. // websockets.framing.prepare_data() returns only these types.
  18. // If *tmp isn't NULL, the caller gets a new reference.
  19. if (PyBytes_Check(obj))
  20. {
  21. *tmp = NULL;
  22. *buffer = PyBytes_AS_STRING(obj);
  23. *length = PyBytes_GET_SIZE(obj);
  24. }
  25. else if (PyByteArray_Check(obj))
  26. {
  27. *tmp = NULL;
  28. *buffer = PyByteArray_AS_STRING(obj);
  29. *length = PyByteArray_GET_SIZE(obj);
  30. }
  31. else if (PyMemoryView_Check(obj))
  32. {
  33. *tmp = PyMemoryView_GetContiguous(obj, PyBUF_READ, 'C');
  34. if (*tmp == NULL)
  35. {
  36. return -1;
  37. }
  38. Py_buffer *mv_buf;
  39. mv_buf = PyMemoryView_GET_BUFFER(*tmp);
  40. *buffer = mv_buf->buf;
  41. *length = mv_buf->len;
  42. }
  43. else
  44. {
  45. PyErr_Format(
  46. PyExc_TypeError,
  47. "expected a bytes-like object, %.200s found",
  48. Py_TYPE(obj)->tp_name);
  49. return -1;
  50. }
  51. return 0;
  52. }
  53. /* C implementation of websockets.utils.apply_mask */
  54. static PyObject *
  55. apply_mask(PyObject *self, PyObject *args, PyObject *kwds)
  56. {
  57. // In order to support various bytes-like types, accept any Python object.
  58. static char *kwlist[] = {"data", "mask", NULL};
  59. PyObject *input_obj;
  60. PyObject *mask_obj;
  61. // A pointer to a char * + length will be extracted from the data and mask
  62. // arguments, possibly via a Py_buffer.
  63. PyObject *input_tmp = NULL;
  64. char *input;
  65. Py_ssize_t input_len;
  66. PyObject *mask_tmp = NULL;
  67. char *mask;
  68. Py_ssize_t mask_len;
  69. // Initialize a PyBytesObject then get a pointer to the underlying char *
  70. // in order to avoid an extra memory copy in PyBytes_FromStringAndSize.
  71. PyObject *result = NULL;
  72. char *output;
  73. // Other variables.
  74. Py_ssize_t i = 0;
  75. // Parse inputs.
  76. if (!PyArg_ParseTupleAndKeywords(
  77. args, kwds, "OO", kwlist, &input_obj, &mask_obj))
  78. {
  79. goto exit;
  80. }
  81. if (_PyBytesLike_AsStringAndSize(input_obj, &input_tmp, &input, &input_len) == -1)
  82. {
  83. goto exit;
  84. }
  85. if (_PyBytesLike_AsStringAndSize(mask_obj, &mask_tmp, &mask, &mask_len) == -1)
  86. {
  87. goto exit;
  88. }
  89. if (mask_len != MASK_LEN)
  90. {
  91. PyErr_SetString(PyExc_ValueError, "mask must contain 4 bytes");
  92. goto exit;
  93. }
  94. // Create output.
  95. result = PyBytes_FromStringAndSize(NULL, input_len);
  96. if (result == NULL)
  97. {
  98. goto exit;
  99. }
  100. // Since we just created result, we don't need error checks.
  101. output = PyBytes_AS_STRING(result);
  102. // Perform the masking operation.
  103. // Apparently GCC cannot figure out the following optimizations by itself.
  104. // We need a new scope for MSVC 2010 (non C99 friendly)
  105. {
  106. #if __ARM_NEON
  107. // With NEON support, XOR by blocks of 16 bytes = 128 bits.
  108. Py_ssize_t input_len_128 = input_len & ~15;
  109. uint8x16_t mask_128 = vreinterpretq_u8_u32(vdupq_n_u32(*(uint32_t *)mask));
  110. for (; i < input_len_128; i += 16)
  111. {
  112. uint8x16_t in_128 = vld1q_u8((uint8_t *)(input + i));
  113. uint8x16_t out_128 = veorq_u8(in_128, mask_128);
  114. vst1q_u8((uint8_t *)(output + i), out_128);
  115. }
  116. #elif __SSE2__
  117. // With SSE2 support, XOR by blocks of 16 bytes = 128 bits.
  118. // Since we cannot control the 16-bytes alignment of input and output
  119. // buffers, we rely on loadu/storeu rather than load/store.
  120. Py_ssize_t input_len_128 = input_len & ~15;
  121. __m128i mask_128 = _mm_set1_epi32(*(uint32_t *)mask);
  122. for (; i < input_len_128; i += 16)
  123. {
  124. __m128i in_128 = _mm_loadu_si128((__m128i *)(input + i));
  125. __m128i out_128 = _mm_xor_si128(in_128, mask_128);
  126. _mm_storeu_si128((__m128i *)(output + i), out_128);
  127. }
  128. #else
  129. // Without SSE2 support, XOR by blocks of 8 bytes = 64 bits.
  130. // We assume the memory allocator aligns everything on 8 bytes boundaries.
  131. Py_ssize_t input_len_64 = input_len & ~7;
  132. uint32_t mask_32 = *(uint32_t *)mask;
  133. uint64_t mask_64 = ((uint64_t)mask_32 << 32) | (uint64_t)mask_32;
  134. for (; i < input_len_64; i += 8)
  135. {
  136. *(uint64_t *)(output + i) = *(uint64_t *)(input + i) ^ mask_64;
  137. }
  138. #endif
  139. }
  140. // XOR the remainder of the input byte by byte.
  141. for (; i < input_len; i++)
  142. {
  143. output[i] = input[i] ^ mask[i & (MASK_LEN - 1)];
  144. }
  145. exit:
  146. Py_XDECREF(input_tmp);
  147. Py_XDECREF(mask_tmp);
  148. return result;
  149. }
  150. static PyMethodDef speedups_methods[] = {
  151. {
  152. "apply_mask",
  153. (PyCFunction)apply_mask,
  154. METH_VARARGS | METH_KEYWORDS,
  155. "Apply masking to the data of a WebSocket message.",
  156. },
  157. {NULL, NULL, 0, NULL}, /* Sentinel */
  158. };
  159. static struct PyModuleDef speedups_module = {
  160. PyModuleDef_HEAD_INIT,
  161. "websocket.speedups", /* m_name */
  162. "C implementation of performance sensitive functions.",
  163. /* m_doc */
  164. -1, /* m_size */
  165. speedups_methods, /* m_methods */
  166. NULL,
  167. NULL,
  168. NULL,
  169. NULL
  170. };
  171. PyMODINIT_FUNC
  172. PyInit_speedups(void)
  173. {
  174. return PyModule_Create(&speedups_module);
  175. }